Skip to content

Commit

Permalink
adds insane mode for transcribe anything
Browse files Browse the repository at this point in the history
  • Loading branch information
zackees committed Jan 12, 2024
1 parent d024607 commit a3dadd0
Show file tree
Hide file tree
Showing 8 changed files with 159 additions and 65 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -130,4 +130,5 @@ dmypy.json
activate.sh
tests/test_data
tests/localfile/text_video
tests/localfile/text_video_insane
tests/localfile/text_video_insane
tests/localfile/text_video_api_insane
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ static-ffmpeg>=2.5
yt-dlp>=2023.3.4
appdirs==1.4.4
disklru>=1.0.7
isolated-environment>=1.0.1
isolated-environment>=1.0.1
FileLock
41 changes: 41 additions & 0 deletions tests/test_insane_whisper_cmd_arg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""
Tests transcribe_anything
"""

# pylint: disable=bad-option-value,useless-option-value,no-self-use,protected-access,R0801
# flake8: noqa E501

import os
import unittest
import shutil

from transcribe_anything.api import transcribe

from transcribe_anything.insanely_fast_whisper import has_nvidia_smi


HERE = os.path.abspath(os.path.dirname(__file__))
LOCALFILE_DIR = os.path.join(HERE, "localfile")
TESTS_DATA_DIR = os.path.join(LOCALFILE_DIR, "text_video_api_insane", "en")


class InsaneWhisperModeTester(unittest.TestCase):
"""Tester for transcribe anything."""

@unittest.skipUnless(has_nvidia_smi(), "No GPU detected")
def test_local_file(self) -> None:
"""Check that the command works on a local file."""
shutil.rmtree(TESTS_DATA_DIR, ignore_errors=True)
vidfile = os.path.join(LOCALFILE_DIR, "video.mp4")
transcribe(
url_or_file=vidfile,
language="en",
model="tiny",
device="insane",
output_dir=TESTS_DATA_DIR,
)



if __name__ == "__main__":
unittest.main()
13 changes: 8 additions & 5 deletions tests/test_local_file_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,14 @@ class TranscribeAnythingTester(unittest.TestCase):
def test_local_file(self) -> None:
"""Check that the command works on a local file."""
shutil.rmtree(TESTS_DATA_DIR, ignore_errors=True)
subprocess.check_output(
["transcribe_anything", "video.mp4", "--language", "en", "--model", "tiny"],
cwd=LOCALFILE_DIR,
)

try:
subprocess.check_output(
["transcribe_anything", "video.mp4", "--language", "en", "--model", "tiny"],
cwd=LOCALFILE_DIR,
)
except subprocess.CalledProcessError as e: # pylint: disable=R0801
print(e.output)
raise e

if __name__ == "__main__":
unittest.main()
15 changes: 9 additions & 6 deletions tests/test_whisper.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,15 @@ class WhisperTester(unittest.TestCase):
def test_whisper_cmd(self) -> None:
"""Check that the command is installed by the setup process."""
env = get_environment().environment()
subprocess.check_output(
"whisper --help",
shell=True,
env=env,
)

try:
subprocess.check_output(
"whisper --help",
shell=True,
env=env,
)
except subprocess.CalledProcessError as e:
print(e.output)
raise e

if __name__ == "__main__":
unittest.main()
69 changes: 55 additions & 14 deletions transcribe_anything/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import tempfile
import shutil
from pathlib import Path
from enum import Enum

from appdirs import user_config_dir # type: ignore
# from disklru import DiskLRUCache # type: ignore # pylint: disable=unused-import
Expand All @@ -30,6 +31,7 @@
)
from transcribe_anything.logger import log_error
from transcribe_anything.whisper import run_whisper, get_computing_device
from transcribe_anything.insanely_fast_whisper import run_insanely_fast_whisper

os.environ["PYTHONIOENCODING"] = "utf-8"

Expand All @@ -44,6 +46,30 @@
| stat.S_IWGRP
)

class Device(Enum):
"""Device enum."""
CPU = "cpu"
CUDA = "cuda"
INSANE = "insane"

def __str__(self) -> str:
return self.value

def __repr__(self) -> str:
return str(self)

@staticmethod
def from_str(device: str) -> "Device":
"""Returns the device from a string."""
if device == "cpu":
return Device.CPU
if device == "cuda":
return Device.CUDA
if device == "insane":
return Device.INSANE
raise ValueError(f"Unknown device {device}")


ffmpeg_add_paths()


Expand Down Expand Up @@ -145,30 +171,45 @@ def transcribe(
#cached_data = cache.get_json(key)
# print(f"Todo: cached data: {cached_data}")
device = device or get_computing_device()
if device == "cuda":
device_enum = Device.from_str(device)
if device_enum == Device.CUDA:
print("#####################################")
print("######### GPU ACCELERATED! ##########")
print("#####################################")
elif device == "cpu":
elif device_enum == Device.INSANE:
print("#####################################")
print("####### INSANE GPU MODE! ############")
print("#####################################")
elif device_enum == Device.CPU:
print("WARNING: NOT using GPU acceleration, using 10x slower CPU instead.")
else:
raise ValueError(f"Unknown device {device}")
print(f"Using device {device}")
model_str = f" --model {model}" if model else ""
task_str = f" --task {task}" if task else ""
language_str = f" --language {language}" if language else ""
model_str = f"{model}" if model else ""
task_str = f"{task}" if task else "transcribe"
language_str = f"{language}" if language else ""

print(f"Running whisper on {tmp_wav} (will install models on first run)")
with tempfile.TemporaryDirectory() as tmpdir:
run_whisper(
Path(tmp_wav),
device,
model_str,
Path(tmpdir),
task_str,
language_str,
other_args or [],
)
if device_enum == Device.INSANE:
run_insanely_fast_whisper(
Path(tmp_wav),
model_str,
Path(tmpdir),
task_str,
language_str,
other_args or [],
)
else:
run_whisper(
Path(tmp_wav),
str(device),
model_str,
Path(tmpdir),
task_str,
language_str,
other_args or [],
)
files = [os.path.join(tmpdir, name) for name in os.listdir(tmpdir)]
srt_file: Optional[str] = None
for file in files:
Expand Down
32 changes: 17 additions & 15 deletions transcribe_anything/insanely_fast_whisper.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@
from pathlib import Path
import subprocess
from typing import Optional, Any
from filelock import FileLock

from isolated_environment import IsolatedEnvironment # type: ignore
from transcribe_anything.cuda_available import CudaInfo

HERE = Path(__file__).parent
ENV: Optional[IsolatedEnvironment] = None
CUDA_INFO: Optional[CudaInfo] = None

ENV_LOCK = FileLock(HERE / "insane_whisper_env.lock")

# Set the versions
TENSOR_VERSION = "2.1.2"
Expand All @@ -41,20 +42,21 @@ def has_nvidia_smi() -> bool:
def get_environment() -> IsolatedEnvironment:
"""Returns the environment."""
global ENV # pylint: disable=global-statement
if ENV is not None:
return ENV
venv_dir = HERE / "venv" / "insanely_fast_whisper"
env = IsolatedEnvironment(venv_dir)
if not venv_dir.exists():
env.install_environment()
if has_nvidia_smi():
env.pip_install(f"torch=={TENSOR_VERSION}", extra_index=EXTRA_INDEX_URL)
else:
env.pip_install(f"torch=={TENSOR_VERSION}")
env.pip_install("openai-whisper")
env.pip_install("insanely-fast-whisper")
ENV = env
return env
with ENV_LOCK:
if ENV is not None:
return ENV
venv_dir = HERE / "venv" / "insanely_fast_whisper"
env = IsolatedEnvironment(venv_dir)
if not venv_dir.exists():
env.install_environment()
if has_nvidia_smi():
env.pip_install(f"torch=={TENSOR_VERSION}", extra_index=EXTRA_INDEX_URL)
else:
env.pip_install(f"torch=={TENSOR_VERSION}")
env.pip_install("openai-whisper")
env.pip_install("insanely-fast-whisper")
ENV = env
return env


def get_cuda_info() -> CudaInfo:
Expand Down
48 changes: 25 additions & 23 deletions transcribe_anything/whisper.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@
from pathlib import Path
import subprocess
from typing import Optional
from filelock import FileLock

from isolated_environment import IsolatedEnvironment # type: ignore

HERE = Path(__file__).parent
ENV: Optional[IsolatedEnvironment] = None
CUDA_AVAILABLE: Optional[bool] = None
ENV_LOCK = FileLock(HERE / "whisper_env.lock")

# Set the versions
TENSOR_VERSION = "2.1.2"
Expand Down Expand Up @@ -48,18 +50,20 @@ def get_environment() -> IsolatedEnvironment:
return env



def get_computing_device() -> str:
"""Get the computing device."""
global CUDA_AVAILABLE # pylint: disable=global-statement
if CUDA_AVAILABLE is None:
iso_env = get_environment()
env = iso_env.environment()
py_file = HERE / "cuda_available.py"
rtn = subprocess.run([
"python", py_file
], check=False, env=env).returncode
CUDA_AVAILABLE = rtn == 0
return "cuda" if CUDA_AVAILABLE else "cpu"
with ENV_LOCK:
global CUDA_AVAILABLE # pylint: disable=global-statement
if CUDA_AVAILABLE is None:
iso_env = get_environment()
env = iso_env.environment()
py_file = HERE / "cuda_available.py"
rtn = subprocess.run([
"python", py_file
], check=False, env=env).returncode
CUDA_AVAILABLE = rtn == 0
return "cuda" if CUDA_AVAILABLE else "cpu"

def run_whisper( # pylint: disable=too-many-arguments
input_wav: Path,
Expand All @@ -71,25 +75,23 @@ def run_whisper( # pylint: disable=too-many-arguments
other_args: Optional[list[str]]
) -> None:
"""Runs whisper."""

iso_env = get_environment()
cmd_list = []
if sys.platform == "win32":
# Set the text mode to UTF-8 on Windows.
cmd_list.extend(["chcp", "65001", "&&"])
cmd_list.append("whisper")
cmd_list.append(f'"{input_wav}"')
cmd_list.append("--device")
cmd_list.append(device)
cmd_list.append("--model")
cmd_list.append(model)
cmd_list.append(f'--output_dir "{output_dir}"')
cmd_list.append("--task")
cmd_list.append(task)
if language:
cmd_list.append(f'--language "{language}"')

cmd_list.extend(
[
"whisper",
f'"{input_wav}"',
"--device",
device,
model,
f'--output_dir "{output_dir}"',
task,
language,
]
)
if other_args:
cmd_list.extend(other_args)
# Remove the empty strings.
Expand Down

0 comments on commit a3dadd0

Please sign in to comment.