Merge pull request #466 from DrewThomasson/v25

ROBERT-MCDOWELL · web-flow · commit 45932f1220e7 · 2025-03-10T15:23:44.000-07:00
Merge pull request #461 from DrewThomasson/workflow-patch-1
diff --git a/.github/workflows/dev-docker-test.yml b/.github/workflows/dev-docker-test.yml
@@ -2,9 +2,6 @@ name: Full Dev Docker Test
 
 on:
   workflow_dispatch: {}
-  release:
-    types:
-      - published
   push:
     branches:
       - v25
@@ -16,6 +13,7 @@ on:
       - dockerfiles/**
       - Notebooks/**
 
+
 jobs:
   build:
     runs-on: [self-hosted, Linux, ARM64]
diff --git a/README.md b/README.md
@@ -11,6 +11,7 @@ Use this tool responsibly and in accordance with all applicable laws.
 Thanks to support ebook2audiobook developers!<br>
 [![Ko-Fi](https://img.shields.io/badge/Ko--fi-F16061?style=for-the-badge&logo=ko-fi&logoColor=white)](https://ko-fi.com/athomasson2) 
 
+[![Ubuntu Build+Test](https://github.com/DrewThomasson/ebook2audiobook/actions/workflows/ubuntu-build+test-docker.yml/badge.svg)](https://github.com/DrewThomasson/ebook2audiobook/actions/workflows/ubuntu-build+test-docker.yml)
 
 #### GUI Interface
 ![demo_web_gui](assets/demo_web_gui.gif)
diff --git a/VERSION.txt b/VERSION.txt
@@ -1 +1 @@
-25.3.9
+25.3.10
diff --git a/ebook2audiobook.egg-info/requires.txt b/ebook2audiobook.egg-info/requires.txt
@@ -22,11 +22,12 @@ pypinyin
 ray
 regex
 sentencepiece
-torchvggish
 transformers
 translate
 tqdm
 unidic
+torchvggish
+pymupdf4llm
 torch==2.4.1
 torchaudio==2.4.1
 torchvision==0.19.1
diff --git a/lib/functions.py b/lib/functions.py
@@ -18,6 +18,7 @@
 import os
 import platform
 import psutil
+import pymupdf4llm
 import random
 import regex as re
 import requests
@@ -353,7 +354,7 @@ def recursive_copy(source, visited):
             return str(source)  # Convert non-serializable types to strings
     return recursive_copy(proxy_obj, set())
 
-def maths_to_words(text, lang, lang_iso1, tts_engine):
+def math2word(text, lang, lang_iso1, tts_engine):
     def check_compat():
         try:
             num2words(1, lang=lang_iso1)
@@ -400,7 +401,8 @@ def replace_ambiguous(match):
     if ambiguous_replacements:
         text = re.sub(ambiguous_pattern, replace_ambiguous, text)
     # Regex pattern for detecting numbers (handles negatives, commas, decimals, scientific notation)
-    number_pattern = r'(?<!\S)(-?\d{1,3}(?:,\d{3})*(?:\.\d+)?(?:[eE][-+]?\d+)?)(?!\S)'
+    #number_pattern = r'(?<!\S)(-?\d{1,3}(?:,\d{3})*(?:\.\d+)?(?:[eE][-+]?\d+)?)(?!\S)'
+    number_pattern = r'(?<!\S)(-?\d{1,3}(?:,\d{3})*(?:\.\d+(?!\s|$))?(?:[eE][-+]?\d+)?)(?!\S)'
     if tts_engine != XTTSv2:
         if is_num2words_compat:
             # Pattern 2: Split big numbers into groups of 4
@@ -438,7 +440,7 @@ def normalize_text(text, lang, lang_iso1, tts_engine):
     # Pattern 1: Add a space between UTF-8 characters and numbers
     text = re.sub(r'(?<=[\p{L}])(?=\d)|(?<=\d)(?=[\p{L}])', ' ', text)
     # Replace math symbols with words
-    text = maths_to_words(text, lang, lang_iso1, tts_engine)
+    text = math2word(text, lang, lang_iso1, tts_engine)
     return text
 
 def convert_to_epub(session):
@@ -451,10 +453,20 @@ def convert_to_epub(session):
             error = "The 'ebook-convert' utility is not installed or not found."
             print(error)
             return False
-        print(f"Running command: {util_app} {session['ebook']} {session['epub_path']}")
+        file_input = session['ebook']
+        file_ext = os.path.splitext(session['ebook'])[1].lower()
+        if file_ext == '.pdf':
+            msg = 'File input is a PDF. flatten it in MD format...'
+            print(msg)
+            file_input = f"{os.path.splitext(session['epub_path'])[0]}.md"
+            markdown_text = pymupdf4llm.to_markdown(session['ebook'])
+            with open(file_input, "w", encoding="utf-8") as md_file:
+                md_file.write(markdown_text)
+        msg = f"Running command: {util_app} {file_input} {session['epub_path']}"
+        print(msg)
         result = subprocess.run(
             [
-                util_app, session['ebook'], session['epub_path'],
+                util_app, file_input, session['epub_path'],
                 '--input-encoding=utf-8',
                 '--output-profile=generic_eink',
                 '--epub-version=3',
diff --git a/pyproject.toml b/pyproject.toml
@@ -46,6 +46,7 @@ dependencies = [
 	"tqdm",
 	"unidic",
 	"torchvggish",
+	"pymupdf4llm",
 	"torch==2.4.1",
 	"torchaudio==2.4.1",
 	"torchvision==0.19.1",
diff --git a/requirements.txt b/requirements.txt
@@ -27,7 +27,8 @@ translate
 tqdm
 unidic
 torchvggish
+pymupdf4llm
 torch==2.4.1
 torchaudio==2.4.1
 torchvision==0.19.1
-coqui-tts==0.25.3
+coqui-tts==0.26.0