Skip to content

Commit 45932f1

Browse files
Merge pull request #466 from DrewThomasson/v25
Merge pull request #461 from DrewThomasson/workflow-patch-1
2 parents 8d153e6 + c442318 commit 45932f1

File tree

7 files changed

+25
-11
lines changed

7 files changed

+25
-11
lines changed

.github/workflows/dev-docker-test.yml

+1-3
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@ name: Full Dev Docker Test
22

33
on:
44
workflow_dispatch: {}
5-
release:
6-
types:
7-
- published
85
push:
96
branches:
107
- v25
@@ -16,6 +13,7 @@ on:
1613
- dockerfiles/**
1714
- Notebooks/**
1815

16+
1917
jobs:
2018
build:
2119
runs-on: [self-hosted, Linux, ARM64]

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ Use this tool responsibly and in accordance with all applicable laws.
1111
Thanks to support ebook2audiobook developers!<br>
1212
[![Ko-Fi](https://img.shields.io/badge/Ko--fi-F16061?style=for-the-badge&logo=ko-fi&logoColor=white)](https://ko-fi.com/athomasson2)
1313

14+
[![Ubuntu Build+Test](https://github.com/DrewThomasson/ebook2audiobook/actions/workflows/ubuntu-build+test-docker.yml/badge.svg)](https://github.com/DrewThomasson/ebook2audiobook/actions/workflows/ubuntu-build+test-docker.yml)
1415

1516
#### GUI Interface
1617
![demo_web_gui](assets/demo_web_gui.gif)

VERSION.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
25.3.9
1+
25.3.10

ebook2audiobook.egg-info/requires.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,12 @@ pypinyin
2222
ray
2323
regex
2424
sentencepiece
25-
torchvggish
2625
transformers
2726
translate
2827
tqdm
2928
unidic
29+
torchvggish
30+
pymupdf4llm
3031
torch==2.4.1
3132
torchaudio==2.4.1
3233
torchvision==0.19.1

lib/functions.py

+17-5
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import os
1919
import platform
2020
import psutil
21+
import pymupdf4llm
2122
import random
2223
import regex as re
2324
import requests
@@ -353,7 +354,7 @@ def recursive_copy(source, visited):
353354
return str(source) # Convert non-serializable types to strings
354355
return recursive_copy(proxy_obj, set())
355356

356-
def maths_to_words(text, lang, lang_iso1, tts_engine):
357+
def math2word(text, lang, lang_iso1, tts_engine):
357358
def check_compat():
358359
try:
359360
num2words(1, lang=lang_iso1)
@@ -400,7 +401,8 @@ def replace_ambiguous(match):
400401
if ambiguous_replacements:
401402
text = re.sub(ambiguous_pattern, replace_ambiguous, text)
402403
# Regex pattern for detecting numbers (handles negatives, commas, decimals, scientific notation)
403-
number_pattern = r'(?<!\S)(-?\d{1,3}(?:,\d{3})*(?:\.\d+)?(?:[eE][-+]?\d+)?)(?!\S)'
404+
#number_pattern = r'(?<!\S)(-?\d{1,3}(?:,\d{3})*(?:\.\d+)?(?:[eE][-+]?\d+)?)(?!\S)'
405+
number_pattern = r'(?<!\S)(-?\d{1,3}(?:,\d{3})*(?:\.\d+(?!\s|$))?(?:[eE][-+]?\d+)?)(?!\S)'
404406
if tts_engine != XTTSv2:
405407
if is_num2words_compat:
406408
# Pattern 2: Split big numbers into groups of 4
@@ -438,7 +440,7 @@ def normalize_text(text, lang, lang_iso1, tts_engine):
438440
# Pattern 1: Add a space between UTF-8 characters and numbers
439441
text = re.sub(r'(?<=[\p{L}])(?=\d)|(?<=\d)(?=[\p{L}])', ' ', text)
440442
# Replace math symbols with words
441-
text = maths_to_words(text, lang, lang_iso1, tts_engine)
443+
text = math2word(text, lang, lang_iso1, tts_engine)
442444
return text
443445

444446
def convert_to_epub(session):
@@ -451,10 +453,20 @@ def convert_to_epub(session):
451453
error = "The 'ebook-convert' utility is not installed or not found."
452454
print(error)
453455
return False
454-
print(f"Running command: {util_app} {session['ebook']} {session['epub_path']}")
456+
file_input = session['ebook']
457+
file_ext = os.path.splitext(session['ebook'])[1].lower()
458+
if file_ext == '.pdf':
459+
msg = 'File input is a PDF. flatten it in MD format...'
460+
print(msg)
461+
file_input = f"{os.path.splitext(session['epub_path'])[0]}.md"
462+
markdown_text = pymupdf4llm.to_markdown(session['ebook'])
463+
with open(file_input, "w", encoding="utf-8") as md_file:
464+
md_file.write(markdown_text)
465+
msg = f"Running command: {util_app} {file_input} {session['epub_path']}"
466+
print(msg)
455467
result = subprocess.run(
456468
[
457-
util_app, session['ebook'], session['epub_path'],
469+
util_app, file_input, session['epub_path'],
458470
'--input-encoding=utf-8',
459471
'--output-profile=generic_eink',
460472
'--epub-version=3',

pyproject.toml

+1
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ dependencies = [
4646
"tqdm",
4747
"unidic",
4848
"torchvggish",
49+
"pymupdf4llm",
4950
"torch==2.4.1",
5051
"torchaudio==2.4.1",
5152
"torchvision==0.19.1",

requirements.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ translate
2727
tqdm
2828
unidic
2929
torchvggish
30+
pymupdf4llm
3031
torch==2.4.1
3132
torchaudio==2.4.1
3233
torchvision==0.19.1
33-
coqui-tts==0.25.3
34+
coqui-tts==0.26.0

0 commit comments

Comments
 (0)