feat: update version to 2.8.1 and enhance installation script

zackees · Feb 1, 2025 · 65e47a0 · 65e47a0
1 parent 8d40b8e
commit 65e47a0
Show file tree

Hide file tree

Showing 6 changed files with 264 additions and 40 deletions.
diff --git a/install b/install
@@ -1,5 +1,6 @@
 #!/bin/bash
 set -e
+set -x
 uv venv --python 3.11
 uv pip install -r requirements.testing.txt
 uv pip install -e . --refresh-package uv-iso-env

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "transcribe-anything"
-version = "2.3.7"  # Update this manually or configure setuptools-scm for automatic versioning
+version = "2.8.1"  # Update this manually or configure setuptools-scm for automatic versioning
 readme = "README.md"
 description = "Uses Whisper AI to transcribe speech from video and audio files. Also accepts URLs for YouTube, Rumble, BitChute, clear file links, etc."
 requires-python = ">=3.10"
@@ -18,6 +18,7 @@ dependencies = [
     "FileLock",
     "webvtt-py==0.4.6",
     "uv-iso-env>=1.0.33",
+    "python-dotenv>=1.0.1",
 ]
 
 maintainers = [{ name = "Zachary Vorhies", email = "dont@email.me" }]

diff --git a/src/transcribe_anything/insanely_fast_whisper.py b/src/transcribe_anything/insanely_fast_whisper.py
@@ -274,6 +274,7 @@ def run_insanely_fast_whisper(
     json_data_str = json.dumps(json_data, indent=2)
 
     if hugging_face_token:
+        print("### HUGGING FACE TOKEN IS ACTIVE - GENERATING SPEAKER JSON ###")
         # Speaker diarization is active so generate the file
         try:
             speaker_json = generate_speaker_json(json_data)
@@ -282,6 +283,8 @@ def run_insanely_fast_whisper(
             speaker_json_file.write_text(speaker_json_str, encoding="utf-8")
         except Exception as exc:
             warnings.warn(f"Failed to generate speaker json beause of exception: {exc}")
+    else:
+        print("### HUGGING FACE TOKEN IS NOT ACTIVE - NO SPEAKER JSON GENERATED ###")
 
     # now write the pretty formatted json data back to the text file.
     outfile.write_text(json_data_str, encoding="utf-8")

diff --git a/src/transcribe_anything/insanley_fast_whisper_reqs.py b/src/transcribe_anything/insanley_fast_whisper_reqs.py
@@ -23,26 +23,219 @@ def get_current_python_version() -> str:
     return f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
 
 
-# for insanely fast whisper, use:
-#   pipx install insanely-fast-whisper --python python3.11
+# All deps for CUDA because it's the most finicky.
+_WIN_COMPILED_212: str = """
+accelerate==1.3.0
+aiohappyeyeballs==2.4.4
+aiohttp==3.11.11
+aiosignal==1.3.2
+alembic==1.14.1
+ansicon==1.89.0
+antlr4-python3-runtime==4.9.3
+anyio==4.8.0
+asteroid-filterbanks==0.4.0
+async-generator==1.10
+attrs==22.2.0
+beautifulsoup4==4.11.1
+blessed==1.19.1
+brotli==1.1.0
+certifi==2022.12.7
+cffi==1.15.1
+charset-normalizer==2.1.1
+click==8.1.8
+colorama==0.4.6
+colorlog==6.9.0
+contourpy==1.3.1
+cycler==0.12.1
+datasets==2.17.1
+deepl==1.14.0
+dill==0.3.8
+docopt==0.6.2
+einops==0.8.0
+exceptiongroup==1.1.0
+filelock==3.17.0
+fonttools==4.55.8
+free-proxy==1.1.0
+frozenlist==1.5.0
+fsspec==2023.10.0
+greenlet==3.1.1
+h11==0.14.0
+httpcore==1.0.7
+httpx==0.28.1
+huggingface-hub==0.28.1
+hyperpyyaml==1.2.2
+idna==3.4
+inquirer==3.1.1
+insanely-fast-whisper==0.0.15
+intel-openmp==2024.0.3
+jinja2==3.1.5
+jinxed==1.2.0
+joblib==1.4.2
+julius==0.2.7
+kiwisolver==1.4.8
+langdetect==1.0.9
+lightning==2.5.0.post0
+lightning-utilities==0.12.0
+llvmlite==0.44.0
+lxml==4.9.3
+mako==1.3.8
+markdown-it-py==3.0.0
+markupsafe==3.0.2
+matplotlib==3.10.0
+mdurl==0.1.2
+more-itertools==10.6.0
+mpmath==1.3.0
+multidict==6.1.0
+multiprocess==0.70.16
+networkx==3.4.2
+numba==0.61.0
+numpy==1.26.4
+omegaconf==2.3.0
+openai-whisper==20240930
+optuna==4.2.0
+outcome==1.2.0
+packaging==24.2
+pandas==2.2.3
+pillow==11.1.0
+primepy==1.3
+propcache==0.2.1
+protobuf==5.29.3
+psutil==6.1.1
+pyannote-audio==3.1.0
+pyannote-core==5.0.0
+pyannote-database==5.1.3
+pyannote-metrics==3.2.1
+pyannote-pipeline==3.0.1
+pyarrow==19.0.0
+pyarrow-hotfix==0.6
+pycparser==2.21
+pydeeplx==1.0.4
+pygments==2.19.1
+pyparsing==3.2.1
+pysocks==1.7.1
+python-dateutil==2.9.0.post0
+python-editor==1.0.4
+pytorch-lightning==2.1.4
+pytorch-metric-learning==2.8.1
+pytz==2025.1
+pyuseragents==1.0.5
+pyyaml==6.0.2
+readchar==4.0.3
+regex==2024.11.6
+requests==2.28.1
+rich==13.9.4
+ruamel-yaml==0.18.10
+ruamel-yaml-clib==0.2.12
+safeio==1.2
+safetensors==0.5.2
+scikit-learn==1.6.1
+scipy==1.15.1
+selenium==4.7.2
+semver==3.0.4
+sentencepiece==0.2.0
+setuptools==75.8.0
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.0
+socksio==1.0.0
+sortedcontainers==2.4.0
+soundfile==0.13.1
+soupsieve==2.3.2.post1
+speechbrain==1.0.2
+sqlalchemy==2.0.37
+srt==3.5.2
+srtranslator==0.3.5
+sympy==1.13.3
+tabulate==0.9.0
+tensorboardx==2.6.2.2
+threadpoolctl==3.5.0
+tiktoken==0.8.0
+tokenizers==0.21.0
+torch==2.1.2+cu121
+torch-audiomentations==0.12.0
+torch-pitch-shift==1.2.5
+torchaudio==2.1.2
+torchmetrics==1.3.2
+tqdm==4.64.1
+transformers==4.48.2
+translatepy==2.3
+trio==0.22.0
+trio-websocket==0.9.2
+typer==0.15.1
+typing-extensions==4.12.2
+tzdata==2025.1
+urllib3==1.26.13
+typing-extensions==4.12.2
+tzdata==2025.1
+urllib3==1.26.13
+typing-extensions==4.12.2
+tzdata==2025.1
+urllib3==1.26.13
+tzdata==2025.1
+urllib3==1.26.13
+tzdata==2025.1
+urllib3==1.26.13
+tzdata==2025.1
+urllib3==1.26.13
+urllib3==1.26.13
+wcwidth==0.2.5
+wcwidth==0.2.5
+wcwidth==0.2.5
+wcwidth==0.2.5
+webdriverdownloader==1.1.0.3
+wsproto==1.2.0
+xxhash==3.5.0
+yarl==1.18.3
+"""
 
+_COMPILED: dict[str, str] = {
+    "WIN_CUDA_212": _WIN_COMPILED_212,
+}
 
-def get_environment() -> IsoEnv:
-    """Returns the environment."""
-    venv_dir = HERE / "venv" / "insanely_fast_whisper"
+
+def _get_reqs_generic(has_nvidia: bool) -> list[str]:
+    """Generate the requirements for the generic case."""
     deps = [
+        "pyannote.audio==3.1.0",
         "openai-whisper==20240930",
         "insanely-fast-whisper==0.0.15",
         "torchaudio==2.1.2",
         "datasets==2.17.1",
         "pytorch-lightning==2.1.4",
         "torchmetrics~=1.3.0",
-        "srtranslator==0.2.6",
+        "srtranslator==0.3.5",
         "numpy==1.26.4",
+        "safeIO==1.2",
     ]
 
     content_lines: list[str] = []
 
+    for dep in deps:
+        content_lines.append(dep)
+    if has_nvidia:
+        content_lines.append(f"torch=={TENSOR_CUDA_VERSION}")
+    else:
+        content_lines.append(f"torch=={TENSOR_VERSION}")
+    if sys.platform != "darwin":
+        # Add the windows specific dependencies.
+        content_lines.append("intel-openmp==2024.0.3")
+
+    return content_lines
+
+
+def get_environment() -> IsoEnv:
+    """Returns the environment."""
+    venv_dir = HERE / "venv" / "insanely_fast_whisper"
+    has_nvidia = has_nvidia_smi()
+    is_windows = sys.platform == "win32"
+    if has_nvidia and TENSOR_VERSION == "2.1.2" and is_windows:
+        dep_lines = _COMPILED["WIN_CUDA_212"].splitlines()
+    else:
+        dep_lines = _get_reqs_generic(has_nvidia)
+    # filter out empty lines
+    dep_lines = [line.strip() for line in dep_lines if line.strip()]
+    content_lines: list[str] = []
+
     content_lines.append("[build-system]")
     content_lines.append('requires = ["setuptools", "wheel"]')
     content_lines.append('build-backend = "setuptools.build_meta"')
@@ -53,50 +246,20 @@ def get_environment() -> IsoEnv:
     content_lines.append('version = "0.1.0"')
     content_lines.append('requires-python = "==3.10.*"')
     content_lines.append("dependencies = [")
-    for dep in deps:
+    for dep in dep_lines:
         content_lines.append(f'  "{dep}",')
-    needs_extra_index = False
-    if has_nvidia_smi():
-        needs_extra_index = True
-        content_lines.append(f'  "torch=={TENSOR_CUDA_VERSION}",')
-    else:
-        content_lines.append(f'  "torch=={TENSOR_VERSION}",')
-    if sys.platform != "darwin":
-        # Add the windows specific dependencies.
-        content_lines.append('  "intel-openmp==2024.0.3",')
     content_lines.append("]")
-    content_lines.append("")
 
-    # if has_nvidia_smi():
-    #     deps.append(f"torch=={TENSOR_CUDA_VERSION} --extra-index-url {EXTRA_INDEX_URL}")
-    # else:
-    #     deps.append(f"torch=={TENSOR_VERSION}")
-    # if sys.platform != "darwin":
-    #     # Add the windows specific dependencies.
-    #     deps.append("intel-openmp==2024.0.3")
-
-    if needs_extra_index:
-        # [tool.uv.sources]
-        #   torch = [
-        #   { index = "pytorch-cu121", marker = "platform_system == 'Windows'" },
-        # ]
+    if has_nvidia:
         content_lines.append("[tool.uv.sources]")
         content_lines.append("torch = [")
         content_lines.append("  { index = 'pytorch-cu121' },")
         content_lines.append("]")
-
-        # [[tool.uv.index]]
-        # name = "pytorch-cu121"
-        # url = "https://download.pytorch.org/whl/cu121"
-        # explicit = true
-
         content_lines.append("[[tool.uv.index]]")
         content_lines.append('name = "pytorch-cu121"')
         content_lines.append(f'url = "{EXTRA_INDEX_URL}"')
         content_lines.append("explicit = true")
 
-        # deps.append(f"--extra-index-url {EXTRA_INDEX_URL}")
-
     content = "\n".join(content_lines)
     build_info = PyProjectToml(content)
     args = IsoEnvArgs(venv_path=venv_dir, build_info=build_info)

diff --git a/test b/test
@@ -2,4 +2,4 @@
 . ./activate
 set -e
 # uv run python -m unittest discover tests
-pytest
+pytest --tb=long -v
diff --git a/tests/test_insanley_fast_whisper_diarization.py b/tests/test_insanley_fast_whisper_diarization.py
@@ -0,0 +1,56 @@
+# pylint: disable=bad-option-value,useless-option-value,no-self-use,protected-access,R0801
+# flake8: noqa E501
+
+
+"""
+Tests transcribe_anything
+"""
+
+
+import os
+import shutil
+import unittest
+from pathlib import Path
+
+from dotenv import load_dotenv
+
+from transcribe_anything.insanely_fast_whisper import (
+    run_insanely_fast_whisper,
+)
+from transcribe_anything.util import has_nvidia_smi, is_mac_arm
+
+load_dotenv()  # take environment variables from .env.
+
+HF_TOKEN = os.getenv("HF_TOKEN")
+
+
+HERE = Path(os.path.abspath(os.path.dirname(__file__)))
+LOCALFILE_DIR = HERE / "localfile"
+TESTS_DATA_DIR = LOCALFILE_DIR / "text_video_insane" / "en"
+TEST_WAV = LOCALFILE_DIR / "video.wav"
+PROJECT_ROOT = HERE.parent
+
+CAN_RUN_TEST = (has_nvidia_smi() or is_mac_arm()) and HF_TOKEN is not None
+
+
+class InsanelFastWhisperDiarizationTester(unittest.TestCase):
+    """Tester for transcribe anything."""
+
+    @unittest.skipUnless(CAN_RUN_TEST, "No GPU, or HF_TOKEN not set")
+    def test_local_file(self) -> None:
+        """Check that the command works on a local file."""
+        shutil.rmtree(TESTS_DATA_DIR, ignore_errors=True)
+        run_insanely_fast_whisper(
+            input_wav=TEST_WAV,
+            model="small",
+            output_dir=TESTS_DATA_DIR,
+            task="transcribe",
+            language="en",
+            hugging_face_token=HF_TOKEN,
+        )
+        expected_file = TESTS_DATA_DIR / "speaker.json"
+        self.assertTrue(expected_file.exists())
+
+
+if __name__ == "__main__":
+    unittest.main()