Refactor AudioQnA README (#1508)

Spycsh · web-flow · commit 9f36e84c1c8e · 2025-02-15T11:30:16.000+08:00
Signed-off-by: Spycsh &lt;sihan.chen@intel.com&gt;
diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/README.md b/AudioQnA/docker_compose/intel/cpu/xeon/README.md
@@ -25,6 +25,9 @@ Intel Xeon optimized image hosted in huggingface repo will be used for TGI servi
 
 ```bash
 docker build -t opea/speecht5:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/src/integrations/dependency/speecht5/Dockerfile .
+
+# multilang tts (optional)
+docker build -t opea/gpt-sovits:latest --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -f comps/tts/src/integrations/dependency/gpt-sovits/Dockerfile .
 ```
 
 ### 5. Build MegaService Docker Image
@@ -42,6 +45,7 @@ Then run the command `docker images`, you will have following images ready:
 1. `opea/whisper:latest`
 2. `opea/speecht5:latest`
 3. `opea/audioqna:latest`
+4. `opea/gpt-sovits:latest` (optional)
 
 ## 🚀 Set the environment variables
 
@@ -57,9 +61,11 @@ export MEGA_SERVICE_HOST_IP=${host_ip}
 export WHISPER_SERVER_HOST_IP=${host_ip}
 export SPEECHT5_SERVER_HOST_IP=${host_ip}
 export LLM_SERVER_HOST_IP=${host_ip}
+export GPT_SOVITS_SERVER_HOST_IP=${host_ip}
 
 export WHISPER_SERVER_PORT=7066
 export SPEECHT5_SERVER_PORT=7055
+export GPT_SOVITS_SERVER_PORT=9880
 export LLM_SERVER_PORT=3006
 
 export BACKEND_SERVICE_ENDPOINT=http://${host_ip}:3008/v1/audioqna
@@ -74,16 +80,20 @@ Note: Please replace with host_ip with your external IP address, do not use loca
 ```bash
 cd GenAIExamples/AudioQnA/docker_compose/intel/cpu/xeon/
 docker compose up -d
+
+# multilang tts (optional)
+docker compose -f compose_multilang.yaml up -d
 ```
 
 ## 🚀 Test MicroServices
 
 ```bash
 # whisper service
-curl http://${host_ip}:7066/v1/asr \
-  -X POST \
-  -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
-  -H 'Content-Type: application/json'
+wget https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav
+curl http://${host_ip}:7066/v1/audio/transcriptions \
+  -H "Content-Type: multipart/form-data" \
+  -F file="@./sample.wav" \
+  -F model="openai/whisper-small"
 
 # tgi service
 curl http://${host_ip}:3006/generate \
@@ -92,11 +102,10 @@ curl http://${host_ip}:3006/generate \
   -H 'Content-Type: application/json'
 
 # speecht5 service
-curl http://${host_ip}:7055/v1/tts \
-  -X POST \
-  -d '{"text": "Who are you?"}' \
-  -H 'Content-Type: application/json'
+curl http://${host_ip}:7055/v1/audio/speech -XPOST -d '{"input": "Who are you?"}' -H 'Content-Type: application/json' --output speech.mp3
 
+# gpt-sovits service (optional)
+curl http://${host_ip}:9880/v1/audio/speech -XPOST -d '{"input": "Who are you?"}' -H 'Content-Type: application/json' --output speech.mp3
 ```
 
 ## 🚀 Test MegaService
@@ -106,7 +115,8 @@ base64 string to the megaservice endpoint. The megaservice will return a spoken
 to the response, decode the base64 string and save it as a .wav file.
 
 ```bash
-# voice can be "default" or "male"
+# if you are using speecht5 as the tts service, voice can be "default" or "male"
+# if you are using gpt-sovits for the tts service, you can set the reference audio following https://github.com/opea-project/GenAIComps/blob/main/comps/tts/src/integrations/dependency/gpt-sovits/README.md
 curl http://${host_ip}:3008/v1/audioqna \
   -X POST \
   -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64, "voice":"default"}' \
diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml b/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml
@@ -58,7 +58,20 @@ services:
       - GPT_SOVITS_SERVER_PORT=${GPT_SOVITS_SERVER_PORT}
     ipc: host
     restart: always
-
+  audioqna-xeon-ui-server:
+    image: ${REGISTRY:-opea}/audioqna-ui:${TAG:-latest}
+    container_name: audioqna-xeon-ui-server
+    depends_on:
+      - audioqna-xeon-backend-server
+    ports:
+      - "5175:5173"
+    environment:
+      - no_proxy=${no_proxy}
+      - https_proxy=${https_proxy}
+      - http_proxy=${http_proxy}
+      - CHAT_URL=${BACKEND_SERVICE_ENDPOINT}
+    ipc: host
+    restart: always
 networks:
   default:
     driver: bridge
diff --git a/docker_images_list.md b/docker_images_list.md
@@ -65,6 +65,7 @@ Take ChatQnA for example. ChatQnA is a chatbot application service based on the
 | [opea/feedbackmanagement-mongo](https://hub.docker.com/r/opea/feedbackmanagement-mongo)                             | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/feedback_management/src/Dockerfile)                             | The docker image exposes that the OPEA feedback management microservice uses a MongoDB database for GenAI applications.                                                                                       |
 | [opea/finetuning](https://hub.docker.com/r/opea/finetuning)                                                         | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/finetuning/src/Dockerfile)                                      | The docker image exposed the OPEA Fine-tuning microservice for GenAI application use                                                                                                                          |
 | [opea/finetuning-gaudi](https://hub.docker.com/r/opea/finetuning-gaudi)                                             | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/finetuning/src/Dockerfile.intel_hpu)                            | The docker image exposed the OPEA Fine-tuning microservice for GenAI application use on the Gaudi                                                                                                             |
+| [opea/gpt-sovits](https://hub.docker.com/r/opea/gpt-sovits)                                                         | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/tts/src/integrations/dependency/gpt-sovits/Dockerfile)          | The docker image exposed the OPEA GPT-SoVITS service for GenAI application use                                                                                                                                |
 | [opea/guardrails](https://hub.docker.com/r/opea/guardrails)                                                         | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/guardrails/Dockerfile)                           | The docker image exposed the OPEA guardrail microservice for GenAI application use                                                                                                                            |
 | [opea/guardrails-toxicity-predictionguard](https://hub.docker.com/r/opea/guardrails-toxicity-predictionguard)       | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/toxicity_detection/Dockerfile)                   | The docker image exposed the OPEA guardrail microservice to provide toxicity detection for GenAI application use                                                                                              |
 | [opea/guardrails-pii-predictionguard](https://hub.docker.com/r/opea/guardrails-pii-predictionguard)                 | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/guardrails/src/pii_detection/Dockerfile)                        | The docker image exposed the OPEA guardrail microservice to provide PII detection for GenAI application use                                                                                                   |