Skip to content

Commit 658867f

Browse files
Add multi-language AudioQnA on Xeon (#982)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 620ef76 commit 658867f

File tree

5 files changed

+201
-1
lines changed

5 files changed

+201
-1
lines changed

AudioQnA/Dockerfile.multilang

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
2+
3+
# Copyright (C) 2024 Intel Corporation
4+
# SPDX-License-Identifier: Apache-2.0
5+
6+
FROM python:3.11-slim
7+
8+
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
9+
libgl1-mesa-glx \
10+
libjemalloc-dev \
11+
git
12+
13+
RUN useradd -m -s /bin/bash user && \
14+
mkdir -p /home/user && \
15+
chown -R user /home/user/
16+
17+
WORKDIR /home/user/
18+
RUN git clone https://github.com/opea-project/GenAIComps.git
19+
20+
WORKDIR /home/user/GenAIComps
21+
RUN pip install --no-cache-dir --upgrade pip && \
22+
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
23+
24+
COPY ./audioqna_multilang.py /home/user/audioqna_multilang.py
25+
26+
ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
27+
28+
USER user
29+
30+
WORKDIR /home/user
31+
32+
ENTRYPOINT ["python", "audioqna_multilang.py"]

AudioQnA/audioqna_multilang.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
import asyncio
5+
import base64
6+
import os
7+
8+
from comps import AudioQnAGateway, MicroService, ServiceOrchestrator, ServiceType
9+
10+
MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
11+
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
12+
13+
WHISPER_SERVER_HOST_IP = os.getenv("WHISPER_SERVER_HOST_IP", "0.0.0.0")
14+
WHISPER_SERVER_PORT = int(os.getenv("WHISPER_SERVER_PORT", 7066))
15+
GPT_SOVITS_SERVER_HOST_IP = os.getenv("GPT_SOVITS_SERVER_HOST_IP", "0.0.0.0")
16+
GPT_SOVITS_SERVER_PORT = int(os.getenv("GPT_SOVITS_SERVER_PORT", 9088))
17+
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
18+
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 8888))
19+
20+
21+
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
22+
print(inputs)
23+
if self.services[cur_node].service_type == ServiceType.ASR:
24+
# {'byte_str': 'UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA'}
25+
inputs["audio"] = inputs["byte_str"]
26+
del inputs["byte_str"]
27+
elif self.services[cur_node].service_type == ServiceType.LLM:
28+
# convert TGI/vLLM to unified OpenAI /v1/chat/completions format
29+
next_inputs = {}
30+
next_inputs["model"] = "tgi" # specifically clarify the fake model to make the format unified
31+
next_inputs["messages"] = [{"role": "user", "content": inputs["asr_result"]}]
32+
next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
33+
next_inputs["top_p"] = llm_parameters_dict["top_p"]
34+
next_inputs["stream"] = inputs["streaming"] # False as default
35+
next_inputs["frequency_penalty"] = inputs["frequency_penalty"]
36+
# next_inputs["presence_penalty"] = inputs["presence_penalty"]
37+
# next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
38+
next_inputs["temperature"] = inputs["temperature"]
39+
inputs = next_inputs
40+
elif self.services[cur_node].service_type == ServiceType.TTS:
41+
next_inputs = {}
42+
next_inputs["text"] = inputs["choices"][0]["message"]["content"]
43+
next_inputs["text_language"] = kwargs["tts_text_language"] if "tts_text_language" in kwargs else "zh"
44+
inputs = next_inputs
45+
return inputs
46+
47+
48+
def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs):
49+
if self.services[cur_node].service_type == ServiceType.TTS:
50+
audio_base64 = base64.b64encode(data).decode("utf-8")
51+
return {"byte_str": audio_base64}
52+
return data
53+
54+
55+
class AudioQnAService:
56+
def __init__(self, host="0.0.0.0", port=8000):
57+
self.host = host
58+
self.port = port
59+
ServiceOrchestrator.align_inputs = align_inputs
60+
ServiceOrchestrator.align_outputs = align_outputs
61+
self.megaservice = ServiceOrchestrator()
62+
63+
def add_remote_service(self):
64+
asr = MicroService(
65+
name="asr",
66+
host=WHISPER_SERVER_HOST_IP,
67+
port=WHISPER_SERVER_PORT,
68+
# endpoint="/v1/audio/transcriptions",
69+
endpoint="/v1/asr",
70+
use_remote_service=True,
71+
service_type=ServiceType.ASR,
72+
)
73+
llm = MicroService(
74+
name="llm",
75+
host=LLM_SERVER_HOST_IP,
76+
port=LLM_SERVER_PORT,
77+
endpoint="/v1/chat/completions",
78+
use_remote_service=True,
79+
service_type=ServiceType.LLM,
80+
)
81+
tts = MicroService(
82+
name="tts",
83+
host=GPT_SOVITS_SERVER_HOST_IP,
84+
port=GPT_SOVITS_SERVER_PORT,
85+
# endpoint="/v1/audio/speech",
86+
endpoint="/",
87+
use_remote_service=True,
88+
service_type=ServiceType.TTS,
89+
)
90+
self.megaservice.add(asr).add(llm).add(tts)
91+
self.megaservice.flow_to(asr, llm)
92+
self.megaservice.flow_to(llm, tts)
93+
self.gateway = AudioQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
94+
95+
96+
if __name__ == "__main__":
97+
audioqna = AudioQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
98+
audioqna.add_remote_service()

AudioQnA/docker_compose/intel/cpu/xeon/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,5 +131,5 @@ curl http://${host_ip}:3002/v1/audio/speech \
131131
curl http://${host_ip}:3008/v1/audioqna \
132132
-X POST \
133133
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' \
134-
-H 'Content-Type: application/json'
134+
-H 'Content-Type: application/json' | sed 's/^"//;s/"$//' | base64 -d > output.wav
135135
```
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
services:
5+
whisper-service:
6+
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
7+
container_name: whisper-service
8+
ports:
9+
- "7066:7066"
10+
ipc: host
11+
environment:
12+
no_proxy: ${no_proxy}
13+
http_proxy: ${http_proxy}
14+
https_proxy: ${https_proxy}
15+
restart: unless-stopped
16+
command: --language "zh"
17+
gpt-sovits-service:
18+
image: ${REGISTRY:-opea}/gpt-sovits:${TAG:-latest}
19+
container_name: gpt-sovits-service
20+
ports:
21+
- "9880:9880"
22+
ipc: host
23+
environment:
24+
no_proxy: ${no_proxy}
25+
http_proxy: ${http_proxy}
26+
https_proxy: ${https_proxy}
27+
restart: unless-stopped
28+
tgi-service:
29+
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
30+
container_name: tgi-service
31+
ports:
32+
- "3006:80"
33+
volumes:
34+
- "./data:/data"
35+
shm_size: 1g
36+
environment:
37+
no_proxy: ${no_proxy}
38+
http_proxy: ${http_proxy}
39+
https_proxy: ${https_proxy}
40+
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
41+
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
42+
audioqna-xeon-backend-server:
43+
image: ${REGISTRY:-opea}/audioqna-multilang:${TAG:-latest}
44+
container_name: audioqna-xeon-backend-server
45+
ports:
46+
- "3008:8888"
47+
environment:
48+
- no_proxy=${no_proxy}
49+
- https_proxy=${https_proxy}
50+
- http_proxy=${http_proxy}
51+
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
52+
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
53+
- LLM_SERVER_PORT=${LLM_SERVER_PORT}
54+
- LLM_MODEL_ID=${LLM_MODEL_ID}
55+
- WHISPER_SERVER_HOST_IP=${WHISPER_SERVER_HOST_IP}
56+
- WHISPER_SERVER_PORT=${WHISPER_SERVER_PORT}
57+
- GPT_SOVITS_SERVER_HOST_IP=${GPT_SOVITS_SERVER_HOST_IP}
58+
- GPT_SOVITS_SERVER_PORT=${GPT_SOVITS_SERVER_PORT}
59+
ipc: host
60+
restart: always
61+
62+
networks:
63+
default:
64+
driver: bridge

AudioQnA/docker_image_build/build.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,9 @@ services:
5353
dockerfile: comps/tts/speecht5/Dockerfile
5454
extends: audioqna
5555
image: ${REGISTRY:-opea}/tts:${TAG:-latest}
56+
gpt-sovits:
57+
build:
58+
context: GenAIComps
59+
dockerfile: comps/tts/gpt-sovits/Dockerfile
60+
extends: audioqna
61+
image: ${REGISTRY:-opea}/gpt-sovits:${TAG:-latest}

0 commit comments

Comments
 (0)