Skip to content

Commit d5c9c71

Browse files
authored
feat(chatterbox): add new backend (#5524)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
1 parent dd7fa6b commit d5c9c71

15 files changed

+330
-3
lines changed

.github/workflows/test-extra.yml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,28 @@ concurrency:
1414
cancel-in-progress: true
1515

1616
jobs:
17+
# Requires CUDA
18+
# tests-chatterbox-tts:
19+
# runs-on: ubuntu-latest
20+
# steps:
21+
# - name: Clone
22+
# uses: actions/checkout@v4
23+
# with:
24+
# submodules: true
25+
# - name: Dependencies
26+
# run: |
27+
# sudo apt-get update
28+
# sudo apt-get install build-essential ffmpeg
29+
# # Install UV
30+
# curl -LsSf https://astral.sh/uv/install.sh | sh
31+
# sudo apt-get install -y ca-certificates cmake curl patch python3-pip
32+
# sudo apt-get install -y libopencv-dev
33+
# pip install --user --no-cache-dir grpcio-tools==1.64.1
34+
35+
# - name: Test chatterbox-tts
36+
# run: |
37+
# make --jobs=5 --output-sync=target -C backend/python/chatterbox
38+
# make --jobs=5 --output-sync=target -C backend/python/chatterbox test
1739
tests-transformers:
1840
runs-on: ubuntu-latest
1941
steps:

Dockerfile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ ARG TARGETARCH
1515
ARG TARGETVARIANT
1616

1717
ENV DEBIAN_FRONTEND=noninteractive
18-
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh"
18+
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,faster-whisper:/build/backend/python/faster-whisper/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh,chatterbox:/build/backend/python/chatterbox/run.sh"
1919

2020
RUN apt-get update && \
2121
apt-get install -y --no-install-recommends \
@@ -434,6 +434,9 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "coqui" || -z "${EXTRA_BACKENDS}" ) && "$IMAG
434434
; fi && \
435435
if [[ ( "${EXTRA_BACKENDS}" =~ "diffusers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \
436436
make -C backend/python/diffusers \
437+
; fi && \
438+
if [[ ( "${EXTRA_BACKENDS}" =~ "chatterbox" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" && "${BUILD_TYPE}" = "cublas" ]]; then \
439+
make -C backend/python/chatterbox \
437440
; fi
438441

439442
RUN if [[ ( "${EXTRA_BACKENDS}" =~ "kokoro" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \

Makefile

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -549,10 +549,10 @@ protogen-go-clean:
549549
$(RM) bin/*
550550

551551
.PHONY: protogen-python
552-
protogen-python: bark-protogen coqui-protogen diffusers-protogen exllama2-protogen rerankers-protogen transformers-protogen kokoro-protogen vllm-protogen faster-whisper-protogen
552+
protogen-python: bark-protogen coqui-protogen chatterbox-protogen diffusers-protogen exllama2-protogen rerankers-protogen transformers-protogen kokoro-protogen vllm-protogen faster-whisper-protogen
553553

554554
.PHONY: protogen-python-clean
555-
protogen-python-clean: bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean rerankers-protogen-clean transformers-protogen-clean kokoro-protogen-clean vllm-protogen-clean faster-whisper-protogen-clean
555+
protogen-python-clean: bark-protogen-clean coqui-protogen-clean chatterbox-protogen-clean diffusers-protogen-clean exllama2-protogen-clean rerankers-protogen-clean transformers-protogen-clean kokoro-protogen-clean vllm-protogen-clean faster-whisper-protogen-clean
556556

557557
.PHONY: bark-protogen
558558
bark-protogen:
@@ -574,10 +574,18 @@ coqui-protogen-clean:
574574
diffusers-protogen:
575575
$(MAKE) -C backend/python/diffusers protogen
576576

577+
.PHONY: chatterbox-protogen
578+
chatterbox-protogen:
579+
$(MAKE) -C backend/python/chatterbox protogen
580+
577581
.PHONY: diffusers-protogen-clean
578582
diffusers-protogen-clean:
579583
$(MAKE) -C backend/python/diffusers protogen-clean
580584

585+
.PHONY: chatterbox-protogen-clean
586+
chatterbox-protogen-clean:
587+
$(MAKE) -C backend/python/chatterbox protogen-clean
588+
581589
.PHONY: faster-whisper-protogen
582590
faster-whisper-protogen:
583591
$(MAKE) -C backend/python/faster-whisper protogen
@@ -632,6 +640,7 @@ prepare-extra-conda-environments: protogen-python
632640
$(MAKE) -C backend/python/bark
633641
$(MAKE) -C backend/python/coqui
634642
$(MAKE) -C backend/python/diffusers
643+
$(MAKE) -C backend/python/chatterbox
635644
$(MAKE) -C backend/python/faster-whisper
636645
$(MAKE) -C backend/python/vllm
637646
$(MAKE) -C backend/python/rerankers
@@ -642,11 +651,13 @@ prepare-extra-conda-environments: protogen-python
642651
prepare-test-extra: protogen-python
643652
$(MAKE) -C backend/python/transformers
644653
$(MAKE) -C backend/python/diffusers
654+
$(MAKE) -C backend/python/chatterbox
645655
$(MAKE) -C backend/python/vllm
646656

647657
test-extra: prepare-test-extra
648658
$(MAKE) -C backend/python/transformers test
649659
$(MAKE) -C backend/python/diffusers test
660+
$(MAKE) -C backend/python/chatterbox test
650661
$(MAKE) -C backend/python/vllm test
651662

652663
backend-assets:

backend/python/chatterbox/Makefile

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
.PHONY: coqui
2+
coqui: protogen
3+
bash install.sh
4+
5+
.PHONY: run
6+
run: protogen
7+
@echo "Running coqui..."
8+
bash run.sh
9+
@echo "coqui run."
10+
11+
.PHONY: test
12+
test: protogen
13+
@echo "Testing coqui..."
14+
bash test.sh
15+
@echo "coqui tested."
16+
17+
.PHONY: protogen
18+
protogen: backend_pb2_grpc.py backend_pb2.py
19+
20+
.PHONY: protogen-clean
21+
protogen-clean:
22+
$(RM) backend_pb2_grpc.py backend_pb2.py
23+
24+
backend_pb2_grpc.py backend_pb2.py:
25+
python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto
26+
27+
.PHONY: clean
28+
clean: protogen-clean
29+
rm -rf venv __pycache__

backend/python/chatterbox/backend.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
#!/usr/bin/env python3
2+
"""
3+
This is an extra gRPC server of LocalAI for Bark TTS
4+
"""
5+
from concurrent import futures
6+
import time
7+
import argparse
8+
import signal
9+
import sys
10+
import os
11+
import backend_pb2
12+
import backend_pb2_grpc
13+
14+
import torch
15+
import torchaudio as ta
16+
from chatterbox.tts import ChatterboxTTS
17+
18+
import grpc
19+
20+
21+
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
22+
23+
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
24+
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
25+
COQUI_LANGUAGE = os.environ.get('COQUI_LANGUAGE', None)
26+
27+
# Implement the BackendServicer class with the service methods
28+
class BackendServicer(backend_pb2_grpc.BackendServicer):
29+
"""
30+
BackendServicer is the class that implements the gRPC service
31+
"""
32+
def Health(self, request, context):
33+
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
34+
def LoadModel(self, request, context):
35+
36+
# Get device
37+
# device = "cuda" if request.CUDA else "cpu"
38+
if torch.cuda.is_available():
39+
print("CUDA is available", file=sys.stderr)
40+
device = "cuda"
41+
else:
42+
print("CUDA is not available", file=sys.stderr)
43+
device = "cpu"
44+
45+
if not torch.cuda.is_available() and request.CUDA:
46+
return backend_pb2.Result(success=False, message="CUDA is not available")
47+
48+
self.AudioPath = None
49+
50+
if os.path.isabs(request.AudioPath):
51+
self.AudioPath = request.AudioPath
52+
elif request.AudioPath and request.ModelFile != "" and not os.path.isabs(request.AudioPath):
53+
# get base path of modelFile
54+
modelFileBase = os.path.dirname(request.ModelFile)
55+
# modify LoraAdapter to be relative to modelFileBase
56+
self.AudioPath = os.path.join(modelFileBase, request.AudioPath)
57+
58+
try:
59+
print("Preparing models, please wait", file=sys.stderr)
60+
self.model = ChatterboxTTS.from_pretrained(device=device)
61+
except Exception as err:
62+
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
63+
# Implement your logic here for the LoadModel service
64+
# Replace this with your desired response
65+
return backend_pb2.Result(message="Model loaded successfully", success=True)
66+
67+
def TTS(self, request, context):
68+
try:
69+
# Generate audio using ChatterboxTTS
70+
if self.AudioPath is not None:
71+
wav = self.model.generate(request.text, audio_prompt_path=self.AudioPath)
72+
else:
73+
wav = self.model.generate(request.text)
74+
75+
# Save the generated audio
76+
ta.save(request.dst, wav, self.model.sr)
77+
78+
except Exception as err:
79+
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
80+
return backend_pb2.Result(success=True)
81+
82+
def serve(address):
83+
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS),
84+
options=[
85+
('grpc.max_message_length', 50 * 1024 * 1024), # 50MB
86+
('grpc.max_send_message_length', 50 * 1024 * 1024), # 50MB
87+
('grpc.max_receive_message_length', 50 * 1024 * 1024), # 50MB
88+
])
89+
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
90+
server.add_insecure_port(address)
91+
server.start()
92+
print("Server started. Listening on: " + address, file=sys.stderr)
93+
94+
# Define the signal handler function
95+
def signal_handler(sig, frame):
96+
print("Received termination signal. Shutting down...")
97+
server.stop(0)
98+
sys.exit(0)
99+
100+
# Set the signal handlers for SIGINT and SIGTERM
101+
signal.signal(signal.SIGINT, signal_handler)
102+
signal.signal(signal.SIGTERM, signal_handler)
103+
104+
try:
105+
while True:
106+
time.sleep(_ONE_DAY_IN_SECONDS)
107+
except KeyboardInterrupt:
108+
server.stop(0)
109+
110+
if __name__ == "__main__":
111+
parser = argparse.ArgumentParser(description="Run the gRPC server.")
112+
parser.add_argument(
113+
"--addr", default="localhost:50051", help="The address to bind the server to."
114+
)
115+
args = parser.parse_args()
116+
117+
serve(args.addr)

backend/python/chatterbox/install.sh

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#!/bin/bash
2+
set -e
3+
4+
source $(dirname $0)/../common/libbackend.sh
5+
6+
# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links.
7+
# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match.
8+
# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index
9+
# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index
10+
if [ "x${BUILD_PROFILE}" == "xintel" ]; then
11+
EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match"
12+
fi
13+
14+
installRequirements
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
accelerate
2+
torch==2.6.0
3+
torchaudio==2.6.0
4+
transformers==4.46.3
5+
chatterbox-tts
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
--extra-index-url https://download.pytorch.org/whl/cu118
2+
torch==2.6.0+cu118
3+
torchaudio==2.6.0+cu118
4+
transformers==4.46.3
5+
chatterbox-tts
6+
accelerate
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
torch==2.6.0
2+
torchaudio==2.6.0
3+
transformers==4.46.3
4+
chatterbox-tts
5+
accelerate
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
--extra-index-url https://download.pytorch.org/whl/rocm6.0
2+
torch==2.6.0+rocm6.0
3+
torchaudio==2.6.0+rocm6.0
4+
transformers==4.46.3
5+
chatterbox-tts
6+
accelerate
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/
2+
intel-extension-for-pytorch==2.3.110+xpu
3+
torch==2.3.1+cxx11.abi
4+
torchaudio==2.3.1+cxx11.abi
5+
transformers==4.46.3
6+
chatterbox-tts
7+
accelerate
8+
oneccl_bind_pt==2.3.100+xpu
9+
optimum[openvino]
10+
setuptools
11+
transformers==4.48.3
12+
accelerate
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
grpcio==1.72.0
2+
protobuf
3+
certifi
4+
packaging
5+
setuptools

backend/python/chatterbox/run.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#!/bin/bash
2+
source $(dirname $0)/../common/libbackend.sh
3+
4+
startBackend $@

0 commit comments

Comments
 (0)