Fix left issue of tgi version update (#1121)

chensuyue · pre-commit-ci[bot] · web-flow · commit 393367e9f103 · 2024-11-13T15:42:42.000+08:00
Signed-off-by: chensuyue &lt;suyue.chen@intel.com&gt;
Co-authored-by: pre-commit-ci[bot] &lt;66853113+pre-commit-ci[bot]@users.noreply.github.com&gt;
diff --git a/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml b/AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -15,7 +15,7 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HABANA_VISIBLE_MODULES: all
+      HABANA_VISIBLE_DEVICES: all
       OMPI_MCA_btl_vader_single_copy_mechanism: none
     runtime: habana
     cap_add:
@@ -39,7 +39,7 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HABANA_VISIBLE_MODULES: all
+      HABANA_VISIBLE_DEVICES: all
       OMPI_MCA_btl_vader_single_copy_mechanism: none
     runtime: habana
     cap_add:
@@ -67,7 +67,7 @@ services:
       HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
-      HABANA_VISIBLE_MODULES: all
+      HABANA_VISIBLE_DEVICES: all
       OMPI_MCA_btl_vader_single_copy_mechanism: none
       ENABLE_HPU_GRAPH: true
       LIMIT_HPU_GRAPH: true
@@ -105,7 +105,7 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HABANA_VISIBLE_MODULES: all
+      HABANA_VISIBLE_DEVICES: all
       OMPI_MCA_btl_vader_single_copy_mechanism: none
       DEVICE: ${DEVICE}
       INFERENCE_MODE: ${INFERENCE_MODE}
@@ -132,7 +132,7 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HABANA_VISIBLE_MODULES: all
+      HABANA_VISIBLE_DEVICES: all
       OMPI_MCA_btl_vader_single_copy_mechanism: none
       WAV2LIP_ENDPOINT: ${WAV2LIP_ENDPOINT}
     runtime: habana
diff --git a/AvatarChatbot/tests/test_compose_on_gaudi.sh b/AvatarChatbot/tests/test_compose_on_gaudi.sh
@@ -74,7 +74,7 @@ function start_services() {
     export FPS=10
 
     # Start Docker Containers
-    docker compose up -d
+    docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
 
     n=0
     until [[ "$n" -ge 100 ]]; do
@@ -86,7 +86,6 @@ function start_services() {
        n=$((n+1))
     done
 
-    # sleep 5m
     echo "All services are up and running"
     sleep 5s
 }
@@ -99,6 +98,7 @@ function validate_megaservice() {
     if [[ $result == *"mp4"* ]]; then
         echo "Result correct."
     else
+        echo "Result wrong, print docker logs."
         docker logs whisper-service > $LOG_PATH/whisper-service.log
         docker logs asr-service > $LOG_PATH/asr-service.log
         docker logs speecht5-service > $LOG_PATH/speecht5-service.log
@@ -107,19 +107,13 @@ function validate_megaservice() {
         docker logs llm-tgi-gaudi-server > $LOG_PATH/llm-tgi-gaudi-server.log
         docker logs wav2lip-service > $LOG_PATH/wav2lip-service.log
         docker logs animation-gaudi-server > $LOG_PATH/animation-gaudi-server.log
-
-        echo "Result wrong."
+        echo "Exit test."
         exit 1
     fi
 
 }
 
 
-#function validate_frontend() {
-
-#}
-
-
 function stop_docker() {
     cd $WORKPATH/docker_compose/intel/hpu/gaudi
     docker compose down
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README.md b/ChatQnA/docker_compose/intel/cpu/xeon/README.md
@@ -432,57 +432,66 @@ curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
      -H "Content-Type: application/json"
 ```
 
-
 ### Profile Microservices
 
-To further analyze MicroService Performance, users could follow the instructions to profile MicroServices. 
+To further analyze MicroService Performance, users could follow the instructions to profile MicroServices.
 
 #### 1. vLLM backend Service
-   Users could follow previous section to testing vLLM microservice or ChatQnA MegaService.  
-   By default, vLLM profiling is not enabled. Users could start and stop profiling by following commands.  
 
-   ##### Start vLLM profiling
+Users could follow previous section to testing vLLM microservice or ChatQnA MegaService.  
+ By default, vLLM profiling is not enabled. Users could start and stop profiling by following commands.
 
-   ```bash
-   curl http://${host_ip}:9009/start_profile \
-     -H "Content-Type: application/json" \
-     -d '{"model": "Intel/neural-chat-7b-v3-3"}'
-   ```
-   Users would see below docker logs from vllm-service if profiling is started correctly.
-   ```bash
-   INFO api_server.py:361] Starting profiler...
-   INFO api_server.py:363] Profiler started.
-   INFO:     x.x.x.x:35940 - "POST /start_profile HTTP/1.1" 200 OK
-   ```
-   After vLLM profiling is started, users could start asking questions and get responses from vLLM MicroService  
-   or ChatQnA MicroService.  
-   
-   ##### Stop vLLM profiling
-   By following command, users could stop vLLM profliing and generate a *.pt.trace.json.gz file as profiling result  
-   under /mnt folder in vllm-service docker instance.  
-   ```bash
-   # vLLM Service
-   curl http://${host_ip}:9009/stop_profile \
-     -H "Content-Type: application/json" \
-     -d '{"model": "Intel/neural-chat-7b-v3-3"}'
-   ```
-   Users would see below docker logs from vllm-service if profiling is stopped correctly.  
-   ```bash
-   INFO api_server.py:368] Stopping profiler...
-   INFO api_server.py:370] Profiler stopped.
-   INFO:     x.x.x.x:41614 - "POST /stop_profile HTTP/1.1" 200 OK
-   ```
-   After vllm profiling is stopped, users could use below command to get the *.pt.trace.json.gz file under /mnt folder.  
-   ```bash
-   docker cp  vllm-service:/mnt/ .
-   ```
+##### Start vLLM profiling
+
+```bash
+curl http://${host_ip}:9009/start_profile \
+  -H "Content-Type: application/json" \
+  -d '{"model": "Intel/neural-chat-7b-v3-3"}'
+```
+
+Users would see below docker logs from vllm-service if profiling is started correctly.
+
+```bash
+INFO api_server.py:361] Starting profiler...
+INFO api_server.py:363] Profiler started.
+INFO:     x.x.x.x:35940 - "POST /start_profile HTTP/1.1" 200 OK
+```
+
+After vLLM profiling is started, users could start asking questions and get responses from vLLM MicroService  
+ or ChatQnA MicroService.
+
+##### Stop vLLM profiling
+
+By following command, users could stop vLLM profliing and generate a \*.pt.trace.json.gz file as profiling result  
+ under /mnt folder in vllm-service docker instance.
+
+```bash
+# vLLM Service
+curl http://${host_ip}:9009/stop_profile \
+  -H "Content-Type: application/json" \
+  -d '{"model": "Intel/neural-chat-7b-v3-3"}'
+```
+
+Users would see below docker logs from vllm-service if profiling is stopped correctly.
+
+```bash
+INFO api_server.py:368] Stopping profiler...
+INFO api_server.py:370] Profiler stopped.
+INFO:     x.x.x.x:41614 - "POST /stop_profile HTTP/1.1" 200 OK
+```
+
+After vllm profiling is stopped, users could use below command to get the \*.pt.trace.json.gz file under /mnt folder.
+
+```bash
+docker cp  vllm-service:/mnt/ .
+```
+
+##### Check profiling result
 
-   ##### Check profiling result
-   Open a web browser and type "chrome://tracing" or "ui.perfetto.dev", and then load the json.gz file, you should be able  
-   to see the vLLM profiling result as below diagram. 
+Open a web browser and type "chrome://tracing" or "ui.perfetto.dev", and then load the json.gz file, you should be able  
+ to see the vLLM profiling result as below diagram.
 ![image](https://github.com/user-attachments/assets/55c7097e-5574-41dc-97a7-5e87c31bc286)
 
-   
 ## 🚀 Launch the UI
 
 ### Launch with origin port