Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge pull request #461 from DrewThomasson/workflow-patch-1 #466

Merged
merged 11 commits into from
Mar 10, 2025
Merged
4 changes: 1 addition & 3 deletions .github/workflows/dev-docker-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@ name: Full Dev Docker Test

on:
workflow_dispatch: {}
release:
types:
- published
push:
branches:
- v25
Expand All @@ -16,6 +13,7 @@ on:
- dockerfiles/**
- Notebooks/**


jobs:
build:
runs-on: [self-hosted, Linux, ARM64]
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Use this tool responsibly and in accordance with all applicable laws.
Thanks to support ebook2audiobook developers!<br>
[![Ko-Fi](https://img.shields.io/badge/Ko--fi-F16061?style=for-the-badge&logo=ko-fi&logoColor=white)](https://ko-fi.com/athomasson2)

[![Ubuntu Build+Test](https://github.com/DrewThomasson/ebook2audiobook/actions/workflows/ubuntu-build+test-docker.yml/badge.svg)](https://github.com/DrewThomasson/ebook2audiobook/actions/workflows/ubuntu-build+test-docker.yml)

#### GUI Interface
![demo_web_gui](assets/demo_web_gui.gif)
Expand Down
2 changes: 1 addition & 1 deletion VERSION.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
25.3.9
25.3.10
3 changes: 2 additions & 1 deletion ebook2audiobook.egg-info/requires.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,12 @@ pypinyin
ray
regex
sentencepiece
torchvggish
transformers
translate
tqdm
unidic
torchvggish
pymupdf4llm
torch==2.4.1
torchaudio==2.4.1
torchvision==0.19.1
Expand Down
22 changes: 17 additions & 5 deletions lib/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import os
import platform
import psutil
import pymupdf4llm
import random
import regex as re
import requests
Expand Down Expand Up @@ -353,7 +354,7 @@ def recursive_copy(source, visited):
return str(source) # Convert non-serializable types to strings
return recursive_copy(proxy_obj, set())

def maths_to_words(text, lang, lang_iso1, tts_engine):
def math2word(text, lang, lang_iso1, tts_engine):
def check_compat():
try:
num2words(1, lang=lang_iso1)
Expand Down Expand Up @@ -400,7 +401,8 @@ def replace_ambiguous(match):
if ambiguous_replacements:
text = re.sub(ambiguous_pattern, replace_ambiguous, text)
# Regex pattern for detecting numbers (handles negatives, commas, decimals, scientific notation)
number_pattern = r'(?<!\S)(-?\d{1,3}(?:,\d{3})*(?:\.\d+)?(?:[eE][-+]?\d+)?)(?!\S)'
#number_pattern = r'(?<!\S)(-?\d{1,3}(?:,\d{3})*(?:\.\d+)?(?:[eE][-+]?\d+)?)(?!\S)'
number_pattern = r'(?<!\S)(-?\d{1,3}(?:,\d{3})*(?:\.\d+(?!\s|$))?(?:[eE][-+]?\d+)?)(?!\S)'
if tts_engine != XTTSv2:
if is_num2words_compat:
# Pattern 2: Split big numbers into groups of 4
Expand Down Expand Up @@ -438,7 +440,7 @@ def normalize_text(text, lang, lang_iso1, tts_engine):
# Pattern 1: Add a space between UTF-8 characters and numbers
text = re.sub(r'(?<=[\p{L}])(?=\d)|(?<=\d)(?=[\p{L}])', ' ', text)
# Replace math symbols with words
text = maths_to_words(text, lang, lang_iso1, tts_engine)
text = math2word(text, lang, lang_iso1, tts_engine)
return text

def convert_to_epub(session):
Expand All @@ -451,10 +453,20 @@ def convert_to_epub(session):
error = "The 'ebook-convert' utility is not installed or not found."
print(error)
return False
print(f"Running command: {util_app} {session['ebook']} {session['epub_path']}")
file_input = session['ebook']
file_ext = os.path.splitext(session['ebook'])[1].lower()
if file_ext == '.pdf':
msg = 'File input is a PDF. flatten it in MD format...'
print(msg)
file_input = f"{os.path.splitext(session['epub_path'])[0]}.md"
markdown_text = pymupdf4llm.to_markdown(session['ebook'])
with open(file_input, "w", encoding="utf-8") as md_file:
md_file.write(markdown_text)
msg = f"Running command: {util_app} {file_input} {session['epub_path']}"
print(msg)
result = subprocess.run(
[
util_app, session['ebook'], session['epub_path'],
util_app, file_input, session['epub_path'],
'--input-encoding=utf-8',
'--output-profile=generic_eink',
'--epub-version=3',
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ dependencies = [
"tqdm",
"unidic",
"torchvggish",
"pymupdf4llm",
"torch==2.4.1",
"torchaudio==2.4.1",
"torchvision==0.19.1",
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ translate
tqdm
unidic
torchvggish
pymupdf4llm
torch==2.4.1
torchaudio==2.4.1
torchvision==0.19.1
coqui-tts==0.25.3
coqui-tts==0.26.0