Skip to content

Commit

Permalink
chore: remove unnecessary files and update build scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
zackees committed Jan 31, 2025
1 parent 3ae9743 commit 8d40b8e
Show file tree
Hide file tree
Showing 17 changed files with 76 additions and 151 deletions.
2 changes: 0 additions & 2 deletions MANIFEST.in

This file was deleted.

3 changes: 3 additions & 0 deletions clean
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ rm -rf venv
rm -rf .venv
rm -rf .env_*
rm -rf src/transcribe_anything/venv
rm -rf src/transcribe_anything/transcribe_anything.egg-info
rm -rf src/transcribe_anything.egg-info
rm -rf src/transcribe_anything/__pycache__
# remove all *.pyc files
find . -name "*.pyc" -exec rm -rf {} \;
# remove all *.egg files
Expand Down
12 changes: 6 additions & 6 deletions lint
Original file line number Diff line number Diff line change
Expand Up @@ -7,25 +7,25 @@ echo activating venv...
set -e

echo installing pip dependencies
pip install -r requirements.txt


echo running black...
black src tests --exclude 'src/transcribe_anything/venv'
uv run black src tests --exclude 'src/transcribe_anything/venv'

echo running isort...
isort src tests
uv run isort src tests

echo re-running black now
black src tests --exclude 'src/transcribe_anything/venv'
uv run black src tests --exclude 'src/transcribe_anything/venv'

# does --clean exist
if [[ $* == *--no-ruff* ]]; then
echo skipping ruff...
else
echo running ruff...
ruff check --fix src tests
uv run ruff check --fix src tests
fi


echo running mypy...
mypy src tests --exclude 'src/transcribe_anything/venv'
uv run mypy src tests --exclude 'src/transcribe_anything/venv'
56 changes: 32 additions & 24 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,35 +4,46 @@ build-backend = "setuptools.build_meta"

[project]
name = "transcribe-anything"
version = "2.3.7" # Update this manually or configure setuptools-scm for automatic versioning
readme = "README.md"
maintainers = [
{name = "Zachary Vorhies", email = "dont@email.me"}
description = "Uses Whisper AI to transcribe speech from video and audio files. Also accepts URLs for YouTube, Rumble, BitChute, clear file links, etc."
requires-python = ">=3.10"
keywords = ["transcribe", "openai", "whisper"]
license = { text = "BSD-3-Clause" }
dependencies = [
"static-ffmpeg>=2.7",
"yt-dlp>=2025.1.15",
"appdirs>=1.4.4",
"disklru>=1.0.7",
"FileLock",
"webvtt-py==0.4.6",
"uv-iso-env>=1.0.33",
]

description = "Uses whisper AI to transcribe speach from video and audio files. Also accepts urls for youtube, rumble, bitchute, clear file, etc."
requires-python = ">=3.10"
keywords = ["transcribe", "open ai", "whisper"]
license = { text = "BSD 3-Clause License" }
classifiers = ["Programming Language :: Python :: 3"]
dynamic = ["dependencies"]
# Change this with the version number bump.
version = "2.3.0"
maintainers = [{ name = "Zachary Vorhies", email = "dont@email.me" }]

[project.urls]
homepage = "https://github.com/zackees/transcribe-anything"

[tool.setuptools]
package-dir = {"" = "src"}

[tool.setuptools.packages.find]
where = ["src"]
include = ["transcribe_anything*"]
exclude = ["tests*", "docs*", "examples*"]
namespaces = false # Prevent setuptools from looking elsewhere


[project.scripts]
transcribe_anything = "transcribe_anything._cmd:main"
transcribe-anything = "transcribe_anything._cmd:main"

[tool.ruff]
line-length = 200

[tool.pylint."MESSAGES CONTROL"]
good-names = [
"c",
"i",
"ok",
"id",
"e",
"f"
]
good-names = ["c", "i", "ok", "id", "e", "f"]
disable = [
"missing-function-docstring",
"missing-module-docstring"
Expand All @@ -45,9 +56,6 @@ profile = "black"
ignore_missing_imports = true
disable_error_code = ["import-untyped"]

[tool.setuptools.dynamic]
dependencies = {file = ["requirements.txt"]}

[project.scripts]
transcribe_anything = "transcribe_anything._cmd:main"
transcribe-anything = "transcribe_anything._cmd:main"
[tool.black]
line-length = 200
target-version = ['py310']
8 changes: 0 additions & 8 deletions requirements.txt

This file was deleted.

15 changes: 1 addition & 14 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,6 @@



import os

from setuptools import setup

URL = "https://github.com/zackees/transcribe-anything"
KEYWORDS = "isolated environment, uv pip python build tools"
HERE = os.path.dirname(os.path.abspath(__file__))

if __name__ == "__main__":
setup(
keywords=KEYWORDS,
url=URL,
package_data={"": ["assets/example.txt"]},
include_package_data=True)

setup()

32 changes: 7 additions & 25 deletions src/transcribe_anything/_cmd.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Entry point for running the transcribe-anything prgram.
Entry point for running the transcribe-anything prgram.
"""

# flake8: noqa E501
Expand Down Expand Up @@ -59,11 +59,7 @@ def parse_arguments() -> argparse.Namespace:
"""Parse arguments."""
whisper_options = get_whisper_options()
device = get_computing_device()
help_str = (
f'transcribe_anything is using a "{device}" device.'
" Any unrecognized args are assumed to be for whisper"
" ai and will be passed as is to whisper ai."
)
help_str = f'transcribe_anything is using a "{device}" device.' " Any unrecognized args are assumed to be for whisper" " ai and will be passed as is to whisper ai."
parser = argparse.ArgumentParser(description=help_str)
parser.add_argument(
"url_or_file",
Expand All @@ -72,10 +68,7 @@ def parse_arguments() -> argparse.Namespace:
)
parser.add_argument(
"--query-gpu-json-path",
help=(
"Query the GPU and store it in the given path,"
" warning takes a long time on first load!"
),
help=("Query the GPU and store it in the given path," " warning takes a long time on first load!"),
type=Path,
)
parser.add_argument(
Expand Down Expand Up @@ -119,18 +112,12 @@ def parse_arguments() -> argparse.Namespace:
)
parser.add_argument(
"--diarization_model",
help=(
"Name of the pretrained model/ checkpoint to perform diarization."
+ " (default: pyannote/speaker-diarization). Only works for --device insane."
),
help=("Name of the pretrained model/ checkpoint to perform diarization." + " (default: pyannote/speaker-diarization). Only works for --device insane."),
default="pyannote/speaker-diarization-3.1",
)
parser.add_argument(
"--timestamp",
help=(
"Whisper supports both chunked as well as word level timestamps. (default: chunk)."
+ " Only works for --device insane."
),
help=("Whisper supports both chunked as well as word level timestamps. (default: chunk)." + " Only works for --device insane."),
choices=["chunk", "word"],
default=None,
)
Expand Down Expand Up @@ -163,10 +150,7 @@ def main() -> int:
if args.model == "large-legacy":
args.model = "large"
elif args.model == "large":
print(
"Defaulting to large-v3 model for --model large,"
+ " use --model large-legacy for the old model"
)
print("Defaulting to large-v3 model for --model large," + " use --model large-legacy for the old model")
args.model = "large-v3"
elif args.model is None and args.device == "insane":
print("Defaulting to large-v3 model for --device insane")
Expand All @@ -187,9 +171,7 @@ def main() -> int:
# For now, just stuff --diarization_model and --timestamp into unknown
if args.diarization_model:
if args.device != "insane":
print(
"--diarization_model only works with --device insane. Ignoring --diarization_model"
)
print("--diarization_model only works with --device insane. Ignoring --diarization_model")
else:
unknown.append(f"--diarization_model {args.diarization_model}")

Expand Down
32 changes: 7 additions & 25 deletions src/transcribe_anything/api.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Api for using transcribe_anything from python. Allows bulk processing.
Api for using transcribe_anything from python. Allows bulk processing.
"""

# pylint: disable=too-many-arguments,broad-except,too-many-locals,unsupported-binary-operation,too-many-branches,too-many-statements,disable=notimplemented-raised,unused-variable,line-too-long
Expand Down Expand Up @@ -42,14 +42,7 @@

CACHE_FILE = os.path.join(user_config_dir("transcript-anything", "cache", roaming=True))

PERMS = (
stat.S_IRUSR
| stat.S_IRGRP
| stat.S_IROTH
| stat.S_IWOTH
| stat.S_IWUSR
| stat.S_IWGRP
)
PERMS = stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH | stat.S_IWOTH | stat.S_IWUSR | stat.S_IWGRP


class Device(Enum):
Expand Down Expand Up @@ -81,9 +74,7 @@ def make_temp_wav() -> str:
"""
Makes a temporary mp3 file and returns the path to it.
"""
tmp = tempfile.NamedTemporaryFile( # pylint: disable=consider-using-with
suffix=".wav", delete=False
)
tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) # pylint: disable=consider-using-with

tmp.close()

Expand Down Expand Up @@ -151,9 +142,7 @@ def get_video_name_from_url(url: str) -> str:
log_error("yt-dlp failed to get title, using basename instead.")
return os.path.basename(url)
except subprocess.CalledProcessError as exc:
log_error(
f"yt-dlp failed with {exc}, using basename instead\n{exc.stdout}\n{exc.stderr}"
)
log_error(f"yt-dlp failed with {exc}, using basename instead\n{exc.stdout}\n{exc.stderr}")
return os.path.basename(url)
except Exception as exc:
log_error(f"yt-dlp failed with {exc}, using basename instead.")
Expand Down Expand Up @@ -192,10 +181,7 @@ def transcribe(
static_ffmpeg.add_paths()
check_python_in_range()
if not os.path.isfile(url_or_file) and embed:
raise NotImplementedError(
"Embedding is only supported for local files. "
+ "Please download the file first."
)
raise NotImplementedError("Embedding is only supported for local files. " + "Please download the file first.")
# cache = DiskLRUCache(CACHE_FILE, 16)
basename = os.path.basename(url_or_file)
if not basename or basename == ".": # if url_or_file is a directory
Expand All @@ -216,9 +202,7 @@ def transcribe(
print(f"making dir {output_dir}")
os.makedirs(output_dir, exist_ok=True)
tmp_wav = make_temp_wav()
assert os.path.isdir(
output_dir
), f"Path {output_dir} is not found or not a directory."
assert os.path.isdir(output_dir), f"Path {output_dir} is not found or not a directory."
# tmp_mp3 = os.path.join(output_dir, "out.mp3")
fetch_audio(url_or_file, tmp_wav)
assert os.path.exists(tmp_wav), f"Path {tmp_wav} doesn't exist."
Expand Down Expand Up @@ -319,9 +303,7 @@ def transcribe(
except subprocess.CalledProcessError as exc:
stdout = exc.stdout
stderr = exc.stderr
warnings.warn(
f"ffmpeg failed with return code {exc.returncode}\n{stdout}\n{stderr}"
)
warnings.warn(f"ffmpeg failed with return code {exc.returncode}\n{stdout}\n{stderr}")
raise
print(f"Done! Files were saved to {output_dir}")
return output_dir
Expand Down
10 changes: 3 additions & 7 deletions src/transcribe_anything/audio.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Fetches audio and handles transcoding it for it.
Fetches audio and handles transcoding it for it.
"""

import os
Expand All @@ -12,15 +12,11 @@
from transcribe_anything.ytldp_download import ytdlp_download


def _convert_to_wav(
inpath: str, outpath: str, speech_normalization: bool = False
) -> None:
def _convert_to_wav(inpath: str, outpath: str, speech_normalization: bool = False) -> None:
"""Converts a file to wav."""
# static_ffmpeg -y -i C:\Users\niteris\AppData\Local\Temp\tmp3xhzm1sn\out.webm -filter:a "speechnorm=e=12.5:r=0.00001:l=1" -acodec pcm_s16le -ar 44100 -ac 1 C:\Users\niteris\AppData\Local\Temp\tmpu32zsjov.wav

tmpwav = tempfile.NamedTemporaryFile( # pylint: disable=consider-using-with
suffix=".wav", delete=False
)
tmpwav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) # pylint: disable=consider-using-with
tmpwav.close()
tmpwavepath = tmpwav.name

Expand Down
5 changes: 1 addition & 4 deletions src/transcribe_anything/cuda_available.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,7 @@ class CudaDevice:
device_id: int

def __str__(self):
return (
f"{self.name} - VRAM: {self.vram / (1024 ** 3):.2f} GB, "
f"Multiprocessors: {self.multiprocessors}"
)
return f"{self.name} - VRAM: {self.vram / (1024 ** 3):.2f} GB, " f"Multiprocessors: {self.multiprocessors}"

def to_json(self) -> dict[str, str | int]:
"""Returns a dictionary representation of the object."""
Expand Down
9 changes: 2 additions & 7 deletions src/transcribe_anything/generate_speaker_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,7 @@ def to_json(self) -> dict:

def can_combine(chunk1: Chunk, chunk2: Chunk) -> bool:
"""Check if two chunks can be combined."""
return (
chunk1.speaker == chunk2.speaker
and abs(chunk1.timestamp_end - chunk2.timestamp_start) <= 0.1
)
return chunk1.speaker == chunk2.speaker and abs(chunk1.timestamp_end - chunk2.timestamp_start) <= 0.1


def reduce(dat: list[Chunk]) -> list[Chunk]:
Expand All @@ -46,9 +43,7 @@ def reduce(dat: list[Chunk]) -> list[Chunk]:
continue
last_chunk = out[-1]
if not can_combine(last_chunk, chunk):
chunk.reason = (
"speaker-switch" if last_chunk.speaker != chunk.speaker else "timeout"
)
chunk.reason = "speaker-switch" if last_chunk.speaker != chunk.speaker else "timeout"
out.append(chunk)
continue
# combine
Expand Down
Loading

0 comments on commit 8d40b8e

Please sign in to comment.