18
18
import os
19
19
import platform
20
20
import psutil
21
+ import pymupdf4llm
21
22
import random
22
23
import regex as re
23
24
import requests
@@ -353,7 +354,7 @@ def recursive_copy(source, visited):
353
354
return str (source ) # Convert non-serializable types to strings
354
355
return recursive_copy (proxy_obj , set ())
355
356
356
- def maths_to_words (text , lang , lang_iso1 , tts_engine ):
357
+ def math2word (text , lang , lang_iso1 , tts_engine ):
357
358
def check_compat ():
358
359
try :
359
360
num2words (1 , lang = lang_iso1 )
@@ -400,7 +401,8 @@ def replace_ambiguous(match):
400
401
if ambiguous_replacements :
401
402
text = re .sub (ambiguous_pattern , replace_ambiguous , text )
402
403
# Regex pattern for detecting numbers (handles negatives, commas, decimals, scientific notation)
403
- number_pattern = r'(?<!\S)(-?\d{1,3}(?:,\d{3})*(?:\.\d+)?(?:[eE][-+]?\d+)?)(?!\S)'
404
+ #number_pattern = r'(?<!\S)(-?\d{1,3}(?:,\d{3})*(?:\.\d+)?(?:[eE][-+]?\d+)?)(?!\S)'
405
+ number_pattern = r'(?<!\S)(-?\d{1,3}(?:,\d{3})*(?:\.\d+(?!\s|$))?(?:[eE][-+]?\d+)?)(?!\S)'
404
406
if tts_engine != XTTSv2 :
405
407
if is_num2words_compat :
406
408
# Pattern 2: Split big numbers into groups of 4
@@ -438,7 +440,7 @@ def normalize_text(text, lang, lang_iso1, tts_engine):
438
440
# Pattern 1: Add a space between UTF-8 characters and numbers
439
441
text = re .sub (r'(?<=[\p{L}])(?=\d)|(?<=\d)(?=[\p{L}])' , ' ' , text )
440
442
# Replace math symbols with words
441
- text = maths_to_words (text , lang , lang_iso1 , tts_engine )
443
+ text = math2word (text , lang , lang_iso1 , tts_engine )
442
444
return text
443
445
444
446
def convert_to_epub (session ):
@@ -451,10 +453,20 @@ def convert_to_epub(session):
451
453
error = "The 'ebook-convert' utility is not installed or not found."
452
454
print (error )
453
455
return False
454
- print (f"Running command: { util_app } { session ['ebook' ]} { session ['epub_path' ]} " )
456
+ file_input = session ['ebook' ]
457
+ file_ext = os .path .splitext (session ['ebook' ])[1 ].lower ()
458
+ if file_ext == '.pdf' :
459
+ msg = 'File input is a PDF. flatten it in MD format...'
460
+ print (msg )
461
+ file_input = f"{ os .path .splitext (session ['epub_path' ])[0 ]} .md"
462
+ markdown_text = pymupdf4llm .to_markdown (session ['ebook' ])
463
+ with open (file_input , "w" , encoding = "utf-8" ) as md_file :
464
+ md_file .write (markdown_text )
465
+ msg = f"Running command: { util_app } { file_input } { session ['epub_path' ]} "
466
+ print (msg )
455
467
result = subprocess .run (
456
468
[
457
- util_app , session [ 'ebook' ] , session ['epub_path' ],
469
+ util_app , file_input , session ['epub_path' ],
458
470
'--input-encoding=utf-8' ,
459
471
'--output-profile=generic_eink' ,
460
472
'--epub-version=3' ,
0 commit comments