Skip to content

Commit

Permalink
feat: update version to 2.8.1 and enhance installation script
Browse files Browse the repository at this point in the history
  • Loading branch information
zackees committed Feb 1, 2025
1 parent 8d40b8e commit 65e47a0
Show file tree
Hide file tree
Showing 6 changed files with 264 additions and 40 deletions.
1 change: 1 addition & 0 deletions install
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/bin/bash
set -e
set -x
uv venv --python 3.11
uv pip install -r requirements.testing.txt
uv pip install -e . --refresh-package uv-iso-env
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "transcribe-anything"
version = "2.3.7" # Update this manually or configure setuptools-scm for automatic versioning
version = "2.8.1" # Update this manually or configure setuptools-scm for automatic versioning
readme = "README.md"
description = "Uses Whisper AI to transcribe speech from video and audio files. Also accepts URLs for YouTube, Rumble, BitChute, clear file links, etc."
requires-python = ">=3.10"
Expand All @@ -18,6 +18,7 @@ dependencies = [
"FileLock",
"webvtt-py==0.4.6",
"uv-iso-env>=1.0.33",
"python-dotenv>=1.0.1",
]

maintainers = [{ name = "Zachary Vorhies", email = "dont@email.me" }]
Expand Down
3 changes: 3 additions & 0 deletions src/transcribe_anything/insanely_fast_whisper.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@ def run_insanely_fast_whisper(
json_data_str = json.dumps(json_data, indent=2)

if hugging_face_token:
print("### HUGGING FACE TOKEN IS ACTIVE - GENERATING SPEAKER JSON ###")
# Speaker diarization is active so generate the file
try:
speaker_json = generate_speaker_json(json_data)
Expand All @@ -282,6 +283,8 @@ def run_insanely_fast_whisper(
speaker_json_file.write_text(speaker_json_str, encoding="utf-8")
except Exception as exc:
warnings.warn(f"Failed to generate speaker json beause of exception: {exc}")
else:
print("### HUGGING FACE TOKEN IS NOT ACTIVE - NO SPEAKER JSON GENERATED ###")

# now write the pretty formatted json data back to the text file.
outfile.write_text(json_data_str, encoding="utf-8")
Expand Down
239 changes: 201 additions & 38 deletions src/transcribe_anything/insanley_fast_whisper_reqs.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,26 +23,219 @@ def get_current_python_version() -> str:
return f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"


# for insanely fast whisper, use:
# pipx install insanely-fast-whisper --python python3.11
# All deps for CUDA because it's the most finicky.
_WIN_COMPILED_212: str = """
accelerate==1.3.0
aiohappyeyeballs==2.4.4
aiohttp==3.11.11
aiosignal==1.3.2
alembic==1.14.1
ansicon==1.89.0
antlr4-python3-runtime==4.9.3
anyio==4.8.0
asteroid-filterbanks==0.4.0
async-generator==1.10
attrs==22.2.0
beautifulsoup4==4.11.1
blessed==1.19.1
brotli==1.1.0
certifi==2022.12.7
cffi==1.15.1
charset-normalizer==2.1.1
click==8.1.8
colorama==0.4.6
colorlog==6.9.0
contourpy==1.3.1
cycler==0.12.1
datasets==2.17.1
deepl==1.14.0
dill==0.3.8
docopt==0.6.2
einops==0.8.0
exceptiongroup==1.1.0
filelock==3.17.0
fonttools==4.55.8
free-proxy==1.1.0
frozenlist==1.5.0
fsspec==2023.10.0
greenlet==3.1.1
h11==0.14.0
httpcore==1.0.7
httpx==0.28.1
huggingface-hub==0.28.1
hyperpyyaml==1.2.2
idna==3.4
inquirer==3.1.1
insanely-fast-whisper==0.0.15
intel-openmp==2024.0.3
jinja2==3.1.5
jinxed==1.2.0
joblib==1.4.2
julius==0.2.7
kiwisolver==1.4.8
langdetect==1.0.9
lightning==2.5.0.post0
lightning-utilities==0.12.0
llvmlite==0.44.0
lxml==4.9.3
mako==1.3.8
markdown-it-py==3.0.0
markupsafe==3.0.2
matplotlib==3.10.0
mdurl==0.1.2
more-itertools==10.6.0
mpmath==1.3.0
multidict==6.1.0
multiprocess==0.70.16
networkx==3.4.2
numba==0.61.0
numpy==1.26.4
omegaconf==2.3.0
openai-whisper==20240930
optuna==4.2.0
outcome==1.2.0
packaging==24.2
pandas==2.2.3
pillow==11.1.0
primepy==1.3
propcache==0.2.1
protobuf==5.29.3
psutil==6.1.1
pyannote-audio==3.1.0
pyannote-core==5.0.0
pyannote-database==5.1.3
pyannote-metrics==3.2.1
pyannote-pipeline==3.0.1
pyarrow==19.0.0
pyarrow-hotfix==0.6
pycparser==2.21
pydeeplx==1.0.4
pygments==2.19.1
pyparsing==3.2.1
pysocks==1.7.1
python-dateutil==2.9.0.post0
python-editor==1.0.4
pytorch-lightning==2.1.4
pytorch-metric-learning==2.8.1
pytz==2025.1
pyuseragents==1.0.5
pyyaml==6.0.2
readchar==4.0.3
regex==2024.11.6
requests==2.28.1
rich==13.9.4
ruamel-yaml==0.18.10
ruamel-yaml-clib==0.2.12
safeio==1.2
safetensors==0.5.2
scikit-learn==1.6.1
scipy==1.15.1
selenium==4.7.2
semver==3.0.4
sentencepiece==0.2.0
setuptools==75.8.0
shellingham==1.5.4
six==1.16.0
sniffio==1.3.0
socksio==1.0.0
sortedcontainers==2.4.0
soundfile==0.13.1
soupsieve==2.3.2.post1
speechbrain==1.0.2
sqlalchemy==2.0.37
srt==3.5.2
srtranslator==0.3.5
sympy==1.13.3
tabulate==0.9.0
tensorboardx==2.6.2.2
threadpoolctl==3.5.0
tiktoken==0.8.0
tokenizers==0.21.0
torch==2.1.2+cu121
torch-audiomentations==0.12.0
torch-pitch-shift==1.2.5
torchaudio==2.1.2
torchmetrics==1.3.2
tqdm==4.64.1
transformers==4.48.2
translatepy==2.3
trio==0.22.0
trio-websocket==0.9.2
typer==0.15.1
typing-extensions==4.12.2
tzdata==2025.1
urllib3==1.26.13
typing-extensions==4.12.2
tzdata==2025.1
urllib3==1.26.13
typing-extensions==4.12.2
tzdata==2025.1
urllib3==1.26.13
tzdata==2025.1
urllib3==1.26.13
tzdata==2025.1
urllib3==1.26.13
tzdata==2025.1
urllib3==1.26.13
urllib3==1.26.13
wcwidth==0.2.5
wcwidth==0.2.5
wcwidth==0.2.5
wcwidth==0.2.5
webdriverdownloader==1.1.0.3
wsproto==1.2.0
xxhash==3.5.0
yarl==1.18.3
"""

_COMPILED: dict[str, str] = {
"WIN_CUDA_212": _WIN_COMPILED_212,
}

def get_environment() -> IsoEnv:
"""Returns the environment."""
venv_dir = HERE / "venv" / "insanely_fast_whisper"

def _get_reqs_generic(has_nvidia: bool) -> list[str]:
"""Generate the requirements for the generic case."""
deps = [
"pyannote.audio==3.1.0",
"openai-whisper==20240930",
"insanely-fast-whisper==0.0.15",
"torchaudio==2.1.2",
"datasets==2.17.1",
"pytorch-lightning==2.1.4",
"torchmetrics~=1.3.0",
"srtranslator==0.2.6",
"srtranslator==0.3.5",
"numpy==1.26.4",
"safeIO==1.2",
]

content_lines: list[str] = []

for dep in deps:
content_lines.append(dep)
if has_nvidia:
content_lines.append(f"torch=={TENSOR_CUDA_VERSION}")
else:
content_lines.append(f"torch=={TENSOR_VERSION}")
if sys.platform != "darwin":
# Add the windows specific dependencies.
content_lines.append("intel-openmp==2024.0.3")

return content_lines


def get_environment() -> IsoEnv:
"""Returns the environment."""
venv_dir = HERE / "venv" / "insanely_fast_whisper"
has_nvidia = has_nvidia_smi()
is_windows = sys.platform == "win32"
if has_nvidia and TENSOR_VERSION == "2.1.2" and is_windows:
dep_lines = _COMPILED["WIN_CUDA_212"].splitlines()
else:
dep_lines = _get_reqs_generic(has_nvidia)
# filter out empty lines
dep_lines = [line.strip() for line in dep_lines if line.strip()]
content_lines: list[str] = []

content_lines.append("[build-system]")
content_lines.append('requires = ["setuptools", "wheel"]')
content_lines.append('build-backend = "setuptools.build_meta"')
Expand All @@ -53,50 +246,20 @@ def get_environment() -> IsoEnv:
content_lines.append('version = "0.1.0"')
content_lines.append('requires-python = "==3.10.*"')
content_lines.append("dependencies = [")
for dep in deps:
for dep in dep_lines:
content_lines.append(f' "{dep}",')
needs_extra_index = False
if has_nvidia_smi():
needs_extra_index = True
content_lines.append(f' "torch=={TENSOR_CUDA_VERSION}",')
else:
content_lines.append(f' "torch=={TENSOR_VERSION}",')
if sys.platform != "darwin":
# Add the windows specific dependencies.
content_lines.append(' "intel-openmp==2024.0.3",')
content_lines.append("]")
content_lines.append("")

# if has_nvidia_smi():
# deps.append(f"torch=={TENSOR_CUDA_VERSION} --extra-index-url {EXTRA_INDEX_URL}")
# else:
# deps.append(f"torch=={TENSOR_VERSION}")
# if sys.platform != "darwin":
# # Add the windows specific dependencies.
# deps.append("intel-openmp==2024.0.3")

if needs_extra_index:
# [tool.uv.sources]
# torch = [
# { index = "pytorch-cu121", marker = "platform_system == 'Windows'" },
# ]
if has_nvidia:
content_lines.append("[tool.uv.sources]")
content_lines.append("torch = [")
content_lines.append(" { index = 'pytorch-cu121' },")
content_lines.append("]")

# [[tool.uv.index]]
# name = "pytorch-cu121"
# url = "https://download.pytorch.org/whl/cu121"
# explicit = true

content_lines.append("[[tool.uv.index]]")
content_lines.append('name = "pytorch-cu121"')
content_lines.append(f'url = "{EXTRA_INDEX_URL}"')
content_lines.append("explicit = true")

# deps.append(f"--extra-index-url {EXTRA_INDEX_URL}")

content = "\n".join(content_lines)
build_info = PyProjectToml(content)
args = IsoEnvArgs(venv_path=venv_dir, build_info=build_info)
Expand Down
2 changes: 1 addition & 1 deletion test
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
. ./activate
set -e
# uv run python -m unittest discover tests
pytest
pytest --tb=long -v
56 changes: 56 additions & 0 deletions tests/test_insanley_fast_whisper_diarization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# pylint: disable=bad-option-value,useless-option-value,no-self-use,protected-access,R0801
# flake8: noqa E501


"""
Tests transcribe_anything
"""


import os
import shutil
import unittest
from pathlib import Path

from dotenv import load_dotenv

from transcribe_anything.insanely_fast_whisper import (
run_insanely_fast_whisper,
)
from transcribe_anything.util import has_nvidia_smi, is_mac_arm

load_dotenv() # take environment variables from .env.

HF_TOKEN = os.getenv("HF_TOKEN")


HERE = Path(os.path.abspath(os.path.dirname(__file__)))
LOCALFILE_DIR = HERE / "localfile"
TESTS_DATA_DIR = LOCALFILE_DIR / "text_video_insane" / "en"
TEST_WAV = LOCALFILE_DIR / "video.wav"
PROJECT_ROOT = HERE.parent

CAN_RUN_TEST = (has_nvidia_smi() or is_mac_arm()) and HF_TOKEN is not None


class InsanelFastWhisperDiarizationTester(unittest.TestCase):
"""Tester for transcribe anything."""

@unittest.skipUnless(CAN_RUN_TEST, "No GPU, or HF_TOKEN not set")
def test_local_file(self) -> None:
"""Check that the command works on a local file."""
shutil.rmtree(TESTS_DATA_DIR, ignore_errors=True)
run_insanely_fast_whisper(
input_wav=TEST_WAV,
model="small",
output_dir=TESTS_DATA_DIR,
task="transcribe",
language="en",
hugging_face_token=HF_TOKEN,
)
expected_file = TESTS_DATA_DIR / "speaker.json"
self.assertTrue(expected_file.exists())


if __name__ == "__main__":
unittest.main()

0 comments on commit 65e47a0

Please sign in to comment.