opea-project
diff --git a/‎AgentQnA/docker_compose/amd/gpu/rocm/compose.yaml
Lines changed: 2 additions & 2 deletions b/‎AgentQnA/docker_compose/amd/gpu/rocm/compose.yaml
Lines changed: 2 additions & 2 deletions
diff --git a/‎AgentQnA/docker_compose/intel/cpu/xeon/compose_openai.yaml
Lines changed: 2 additions & 2 deletions b/‎AgentQnA/docker_compose/intel/cpu/xeon/compose_openai.yaml
Lines changed: 2 additions & 2 deletions
diff --git a/‎AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml
Lines changed: 2 additions & 2 deletions b/‎AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml
Lines changed: 2 additions & 2 deletions
diff --git a/‎AgentQnA/tests/step2_start_retrieval_tool.sh
Lines changed: 1 addition & 0 deletions b/‎AgentQnA/tests/step2_start_retrieval_tool.sh
Lines changed: 1 addition & 0 deletions
diff --git a/‎AgentQnA/tests/test_compose_on_gaudi.sh
Lines changed: 2 additions & 2 deletions b/‎AgentQnA/tests/test_compose_on_gaudi.sh
Lines changed: 2 additions & 2 deletions
diff --git a/‎AgentQnA/tests/test_compose_on_rocm.sh
Lines changed: 2 additions & 2 deletions b/‎AgentQnA/tests/test_compose_on_rocm.sh
Lines changed: 2 additions & 2 deletions
diff --git a/‎AudioQnA/audioqna.py
Lines changed: 2 additions & 2 deletions b/‎AudioQnA/audioqna.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎AudioQnA/audioqna_multilang.py
Lines changed: 2 additions & 2 deletions b/‎AudioQnA/audioqna_multilang.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎AudioQnA/benchmark/performance/benchmark.yaml
Lines changed: 1 addition & 1 deletion b/‎AudioQnA/benchmark/performance/benchmark.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎AudioQnA/docker_compose/amd/gpu/rocm/compose.yaml
Lines changed: 1 addition & 1 deletion b/‎AudioQnA/docker_compose/amd/gpu/rocm/compose.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎AudioQnA/kubernetes/intel/README_gmc.md
Lines changed: 1 addition & 1 deletion b/‎AudioQnA/kubernetes/intel/README_gmc.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎AudioQnA/tests/test_compose_on_gaudi.sh
Lines changed: 1 addition & 0 deletions b/‎AudioQnA/tests/test_compose_on_gaudi.sh
Lines changed: 1 addition & 0 deletions
diff --git a/‎AudioQnA/tests/test_compose_on_rocm.sh
Lines changed: 2 additions & 1 deletion b/‎AudioQnA/tests/test_compose_on_rocm.sh
Lines changed: 2 additions & 1 deletion
diff --git a/‎AudioQnA/tests/test_compose_on_xeon.sh
Lines changed: 1 addition & 0 deletions b/‎AudioQnA/tests/test_compose_on_xeon.sh
Lines changed: 1 addition & 0 deletions
diff --git a/‎AudioQnA/tests/test_gmc_on_gaudi.sh
Lines changed: 1 addition & 1 deletion b/‎AudioQnA/tests/test_gmc_on_gaudi.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎AudioQnA/tests/test_gmc_on_xeon.sh
Lines changed: 1 addition & 1 deletion b/‎AudioQnA/tests/test_gmc_on_xeon.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎AvatarChatbot/avatarchatbot.py
Lines changed: 2 additions & 2 deletions b/‎AvatarChatbot/avatarchatbot.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎AvatarChatbot/tests/test_compose_on_gaudi.sh
Lines changed: 1 addition & 0 deletions b/‎AvatarChatbot/tests/test_compose_on_gaudi.sh
Lines changed: 1 addition & 0 deletions
diff --git a/‎AvatarChatbot/tests/test_compose_on_xeon.sh
Lines changed: 1 addition & 0 deletions b/‎AvatarChatbot/tests/test_compose_on_xeon.sh
Lines changed: 1 addition & 0 deletions
diff --git a/‎ChatQnA/benchmark/performance/kubernetes/intel/gaudi/benchmark.yaml
Lines changed: 1 addition & 1 deletion b/‎ChatQnA/benchmark/performance/kubernetes/intel/gaudi/benchmark.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎ChatQnA/chatqna.py
Lines changed: 3 additions & 3 deletions b/‎ChatQnA/chatqna.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎ChatQnA/chatqna_wrapper.py
Lines changed: 1 addition & 1 deletion b/‎ChatQnA/chatqna_wrapper.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎ChatQnA/tests/test_compose_on_gaudi.sh
Lines changed: 1 addition & 0 deletions b/‎ChatQnA/tests/test_compose_on_gaudi.sh
Lines changed: 1 addition & 0 deletions
diff --git a/‎ChatQnA/tests/test_compose_on_rocm.sh
Lines changed: 1 addition & 0 deletions b/‎ChatQnA/tests/test_compose_on_rocm.sh
Lines changed: 1 addition & 0 deletions
diff --git a/‎ChatQnA/tests/test_compose_on_xeon.sh
Lines changed: 1 addition & 0 deletions b/‎ChatQnA/tests/test_compose_on_xeon.sh
Lines changed: 1 addition & 0 deletions
diff --git a/‎CodeGen/benchmark/performance/benchmark.yaml
Lines changed: 1 addition & 1 deletion b/‎CodeGen/benchmark/performance/benchmark.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎CodeGen/codegen.py
Lines changed: 1 addition & 1 deletion b/‎CodeGen/codegen.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎CodeGen/docker_compose/amd/gpu/rocm/README.md
Lines changed: 1 addition & 1 deletion b/‎CodeGen/docker_compose/amd/gpu/rocm/README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎CodeGen/docker_compose/intel/cpu/xeon/README.md
Lines changed: 2 additions & 2 deletions b/‎CodeGen/docker_compose/intel/cpu/xeon/README.md
Lines changed: 2 additions & 2 deletions
@@ -49,7 +49,7 @@ services:
       model: ${LLM_MODEL_ID}
       temperature: ${temperature}
       max_new_tokens: ${max_new_tokens}
-      streaming: false
+      stream: false
       tools: /home/user/tools/worker_agent_tools.yaml
       require_human_feedback: false
       RETRIEVAL_TOOL_URL: ${RETRIEVAL_TOOL_URL}
@@ -83,7 +83,7 @@ services:
       model: ${LLM_MODEL_ID}
       temperature: ${temperature}
       max_new_tokens: ${max_new_tokens}
-      streaming: false
+      stream: false
       tools: /home/user/tools/supervisor_agent_tools.yaml
       require_human_feedback: false
       no_proxy: ${no_proxy}
 
@@ -19,7 +19,7 @@ services:
       model: ${model}
       temperature: ${temperature}
       max_new_tokens: ${max_new_tokens}
-      streaming: false
+      stream: false
       tools: /home/user/tools/worker_agent_tools.yaml
       require_human_feedback: false
       RETRIEVAL_TOOL_URL: ${RETRIEVAL_TOOL_URL}
@@ -51,7 +51,7 @@ services:
       model: ${model}
       temperature: ${temperature}
       max_new_tokens: ${max_new_tokens}
-      streaming: false
+      stream: false
       tools: /home/user/tools/supervisor_agent_tools.yaml
       require_human_feedback: false
       no_proxy: ${no_proxy}
 
@@ -21,7 +21,7 @@ services:
       model: ${LLM_MODEL_ID}
       temperature: ${temperature}
       max_new_tokens: ${max_new_tokens}
-      streaming: false
+      stream: false
       tools: /home/user/tools/worker_agent_tools.yaml
       require_human_feedback: false
       RETRIEVAL_TOOL_URL: ${RETRIEVAL_TOOL_URL}
@@ -55,7 +55,7 @@ services:
       model: ${LLM_MODEL_ID}
       temperature: ${temperature}
       max_new_tokens: ${max_new_tokens}
-      streaming: false
+      stream: false
       tools: /home/user/tools/supervisor_agent_tools.yaml
       require_human_feedback: false
       no_proxy: ${no_proxy}
 
@@ -7,6 +7,7 @@ WORKPATH=$(dirname "$PWD")
 export WORKDIR=$WORKPATH/../../
 echo "WORKDIR=${WORKDIR}"
 export ip_address=$(hostname -I | awk '{print $1}')
+export host_ip=${ip_address}
 
 export HF_CACHE_DIR=$WORKDIR/hf_cache
 if [ ! -d "$HF_CACHE_DIR" ]; then
 
@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-set -e
+set -xe
 
 WORKPATH=$(dirname "$PWD")
 export WORKDIR=$WORKPATH/../../
@@ -82,4 +82,4 @@ echo "=================== #5 Agent and API server stopped===================="
 
 echo y | docker system prune
 
-echo "ALL DONE!"
+echo "ALL DONE!!"
@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Advanced Micro Devices, Inc.
 # SPDX-License-Identifier: Apache-2.0
 
-set -e
+set -xe
 
 WORKPATH=$(dirname "$PWD")
 export WORKDIR=$WORKPATH/../../
@@ -72,4 +72,4 @@ echo "=================== #5 Agent and API server stopped===================="
 
 echo y | docker system prune
 
-echo "ALL DONE!"
+echo "ALL DONE!!"
@@ -26,7 +26,7 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k
         next_inputs["messages"] = [{"role": "user", "content": inputs["asr_result"]}]
         next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
         next_inputs["top_p"] = llm_parameters_dict["top_p"]
-        next_inputs["stream"] = inputs["streaming"]  # False as default
+        next_inputs["stream"] = inputs["stream"]  # False as default
         next_inputs["frequency_penalty"] = inputs["frequency_penalty"]
         # next_inputs["presence_penalty"] = inputs["presence_penalty"]
         # next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
@@ -91,7 +91,7 @@ async def handle_request(self, request: Request):
             frequency_penalty=chat_request.frequency_penalty if chat_request.frequency_penalty else 0.0,
             presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0,
             repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,
-            streaming=False,  # TODO add streaming LLM output as input to TTS
+            stream=False,  # TODO add stream LLM output as input to TTS
         )
         result_dict, runtime_graph = await self.megaservice.schedule(
             initial_inputs={"audio": chat_request.audio},
 
@@ -28,7 +28,7 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k
         next_inputs["messages"] = [{"role": "user", "content": inputs["asr_result"]}]
         next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
         next_inputs["top_p"] = llm_parameters_dict["top_p"]
-        next_inputs["stream"] = inputs["streaming"]  # False as default
+        next_inputs["stream"] = inputs["stream"]  # False as default
         next_inputs["frequency_penalty"] = inputs["frequency_penalty"]
         # next_inputs["presence_penalty"] = inputs["presence_penalty"]
         # next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
@@ -103,7 +103,7 @@ async def handle_request(self, request: Request):
             frequency_penalty=chat_request.frequency_penalty if chat_request.frequency_penalty else 0.0,
             presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0,
             repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,
-            streaming=False,  # TODO add streaming LLM output as input to TTS
+            stream=False,  # TODO add stream LLM output as input to TTS
         )
         result_dict, runtime_graph = await self.megaservice.schedule(
             initial_inputs={"audio": chat_request.audio}, llm_parameters=parameters
 
@@ -40,7 +40,7 @@ test_cases:
         top_k: 10
         top_p: 0.95
         repetition_penalty: 1.03
-        streaming: true
+        stream: true
     llmserve:
       run_test: true
       service_name: "llm-svc"  # Replace with your service name
 
@@ -53,7 +53,7 @@ services:
     ipc: host
   audioqna-backend-server:
     image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
-    container_name: audioqna-xeon-backend-server
+    container_name: audioqna-rocm-backend-server
     depends_on:
       - whisper-service
       - tgi-service
 
@@ -66,7 +66,7 @@ This involves deploying the AudioQnA custom resource. You can use audioQnA_xeon.
    ```sh
    export CLIENT_POD=$(kubectl get pod -n audioqa -l app=client-test -o jsonpath={.items..metadata.name})
    export accessUrl=$(kubectl get gmc -n audioqa -o jsonpath="{.items[?(@.metadata.name=='audioqa')].status.accessUrl}")
-   kubectl exec "$CLIENT_POD" -n audioqa -- curl -s --no-buffer $accessUrl  -X POST  -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_new_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json'
+   kubectl exec "$CLIENT_POD" -n audioqa -- curl -s --no-buffer $accessUrl  -X POST  -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_new_tokens":64, "do_sample": true, "stream":false}}' -H 'Content-Type: application/json'
    ```
 
 > [NOTE]
 
@@ -44,6 +44,7 @@ function start_services() {
     # sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
 
     # Start Docker Containers
+    sed -i "s|container_name: audioqna-gaudi-backend-server|container_name: audioqna-gaudi-backend-server\n    volumes:\n      - \"${WORKPATH}\/docker_image_build\/GenAIComps:\/home\/user\/GenAIComps\"|g" compose.yaml
     docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
     n=0
     until [[ "$n" -ge 200 ]]; do
 
@@ -46,6 +46,7 @@ function start_services() {
     # sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
 
     # Start Docker Containers
+    sed -i "s|container_name: audioqna-rocm-backend-server|container_name: audioqna-rocm-backend-server\n    volumes:\n      - \"${WORKPATH}\/docker_image_build\/GenAIComps:\/home\/user\/GenAIComps\"|g" compose.yaml
     docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
     n=0
     until [[ "$n" -ge 200 ]]; do
@@ -63,7 +64,7 @@ function validate_megaservice() {
     docker logs whisper-service > $LOG_PATH/whisper-service.log
     docker logs speecht5-service > $LOG_PATH/tts-service.log
     docker logs tgi-service > $LOG_PATH/tgi-service.log
-    docker logs audioqna-xeon-backend-server > $LOG_PATH/audioqna-xeon-backend-server.log
+    docker logs audioqna-rocm-backend-server > $LOG_PATH/audioqna-rocm-backend-server.log
     echo "$response" | sed 's/^"//;s/"$//' | base64 -d > speech.mp3
 
     if [[ $(file speech.mp3) == *"RIFF"* ]]; then
 
@@ -45,6 +45,7 @@ function start_services() {
     # sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
 
     # Start Docker Containers
+    sed -i "s|container_name: audioqna-xeon-backend-server|container_name: audioqna-xeon-backend-server\n    volumes:\n      - \"${WORKPATH}\/docker_image_build\/GenAIComps:\/home\/user\/GenAIComps\"|g" compose.yaml
     docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
     n=0
     until [[ "$n" -ge 200 ]]; do
 
@@ -34,7 +34,7 @@ function validate_audioqa() {
     export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
     echo "$CLIENT_POD"
     accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='audioqa')].status.accessUrl}")
-    byte_str=$(kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -s -X POST  -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json' | jq .byte_str)
+    byte_str=$(kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -s -X POST  -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_tokens":64, "do_sample": true, "stream":false}}' -H 'Content-Type: application/json' | jq .byte_str)
     echo "$byte_str" > $LOG_PATH/curl_audioqa.log
     if [ -z "$byte_str" ]; then
 	echo "audioqa failed, please check the logs in ${LOG_PATH}!"
 
@@ -34,7 +34,7 @@ function validate_audioqa() {
     export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
     echo "$CLIENT_POD"
     accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='audioqa')].status.accessUrl}")
-    byte_str=$(kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -s -X POST  -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json' | jq .byte_str)
+    byte_str=$(kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -s -X POST  -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_tokens":64, "do_sample": true, "stream":false}}' -H 'Content-Type: application/json' | jq .byte_str)
     echo "$byte_str" > $LOG_PATH/curl_audioqa.log
     if [ -z "$byte_str" ]; then
         echo "audioqa failed, please check the logs in ${LOG_PATH}!"
 
@@ -29,7 +29,7 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k
         next_inputs["messages"] = [{"role": "user", "content": inputs["asr_result"]}]
         next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
         next_inputs["top_p"] = llm_parameters_dict["top_p"]
-        next_inputs["stream"] = inputs["streaming"]  # False as default
+        next_inputs["stream"] = inputs["stream"]  # False as default
         next_inputs["frequency_penalty"] = inputs["frequency_penalty"]
         # next_inputs["presence_penalty"] = inputs["presence_penalty"]
         # next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
@@ -112,7 +112,7 @@ async def handle_request(self, request: Request):
             top_p=chat_request.top_p if chat_request.top_p else 0.95,
             temperature=chat_request.temperature if chat_request.temperature else 0.01,
             repetition_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 1.03,
-            streaming=False,  # TODO add streaming LLM output as input to TTS
+            stream=False,  # TODO add stream LLM output as input to TTS
         )
         # print(parameters)
 
 
@@ -71,6 +71,7 @@ function start_services() {
     export FPS=10
 
     # Start Docker Containers
+    sed -i "s|container_name: avatarchatbot-gaudi-backend-server|container_name: avatarchatbot-gaudi-backend-server\n    volumes:\n      - \"${WORKPATH}\/docker_image_build\/GenAIComps:\/home\/user\/GenAIComps\"|g" compose.yaml
     docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
     n=0
     until [[ "$n" -ge 200 ]]; do
 
@@ -71,6 +71,7 @@ function start_services() {
     export FPS=10
 
     # Start Docker Containers
+    sed -i "s|container_name: avatarchatbot-xeon-backend-server|container_name: avatarchatbot-xeon-backend-server\n    volumes:\n      - \"${WORKPATH}\/docker_image_build\/GenAIComps:\/home\/user\/GenAIComps\"|g" compose.yaml
     docker compose up -d
     n=0
     until [[ "$n" -ge 100 ]]; do
 
@@ -58,7 +58,7 @@ test_cases:
         top_k: 10
         top_p: 0.95
         repetition_penalty: 1.03
-        streaming: true
+        stream: true
     llmserve:
       run_test: false
       service_name: "chatqna-tgi"  # Replace with your service name
 
@@ -76,7 +76,7 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k
         next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}]
         next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
         next_inputs["top_p"] = llm_parameters_dict["top_p"]
-        next_inputs["stream"] = inputs["streaming"]
+        next_inputs["stream"] = inputs["stream"]
         next_inputs["frequency_penalty"] = inputs["frequency_penalty"]
         # next_inputs["presence_penalty"] = inputs["presence_penalty"]
         # next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
@@ -158,7 +158,7 @@ def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_di
 
         next_data["inputs"] = prompt
 
-    elif self.services[cur_node].service_type == ServiceType.LLM and not llm_parameters_dict["streaming"]:
+    elif self.services[cur_node].service_type == ServiceType.LLM and not llm_parameters_dict["stream"]:
         next_data["text"] = data["choices"][0]["message"]["content"]
     else:
         next_data = data
@@ -342,7 +342,7 @@ async def handle_request(self, request: Request):
             frequency_penalty=chat_request.frequency_penalty if chat_request.frequency_penalty else 0.0,
             presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0,
             repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,
-            streaming=stream_opt,
+            stream=stream_opt,
             chat_template=chat_request.chat_template if chat_request.chat_template else None,
         )
         retriever_parameters = RetrieverParms(
 
@@ -86,7 +86,7 @@ async def handle_request(self, request: Request):
             frequency_penalty=chat_request.frequency_penalty if chat_request.frequency_penalty else 0.0,
             presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0,
             repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,
-            streaming=stream_opt,
+            stream=stream_opt,
             chat_template=chat_request.chat_template if chat_request.chat_template else None,
         )
         retriever_parameters = RetrieverParms(
 
@@ -38,6 +38,7 @@ function start_services() {
     export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 
     # Start Docker Containers
+    sed -i "s|container_name: chatqna-gaudi-backend-server|container_name: chatqna-gaudi-backend-server\n    volumes:\n      - \"${WORKPATH}\/docker_image_build\/GenAIComps:\/home\/user\/GenAIComps\"|g" compose.yaml
     docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
 
     n=0
 
@@ -65,6 +65,7 @@ function start_services() {
     cd "$WORKPATH"/docker_compose/amd/gpu/rocm
 
     # Start Docker Containers
+    sed -i "s|container_name: chatqna-backend-server|container_name: chatqna-backend-server\n    volumes:\n      - \"${WORKPATH}\/docker_image_build\/GenAIComps:\/home\/user\/GenAIComps\"|g" compose.yaml
     docker compose -f compose.yaml up -d > "${LOG_PATH}"/start_services_with_compose.log
 
     n=0
 
@@ -38,6 +38,7 @@ function start_services() {
     export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 
     # Start Docker Containers
+    sed -i "s|container_name: chatqna-xeon-backend-server|container_name: chatqna-xeon-backend-server\n    volumes:\n      - \"${WORKPATH}\/docker_image_build\/GenAIComps:\/home\/user\/GenAIComps\"|g" compose.yaml
     docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
 
     n=0
 
@@ -38,7 +38,7 @@ test_cases:
         top_k: 10
         top_p: 0.95
         repetition_penalty: 1.03
-        streaming: true
+        stream: true
     llmserve:
       run_test: true
       service_name: "llm-svc"  # Replace with your service name
 
@@ -53,7 +53,7 @@ async def handle_request(self, request: Request):
             frequency_penalty=chat_request.frequency_penalty if chat_request.frequency_penalty else 0.0,
             presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0,
             repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,
-            streaming=stream_opt,
+            stream=stream_opt,
         )
         result_dict, runtime_graph = await self.megaservice.schedule(
             initial_inputs={"query": prompt}, llm_parameters=parameters
 
@@ -113,7 +113,7 @@ curl http://${HOST_IP}:${CODEGEN_TGI_SERVICE_PORT}/generate \
 ```bash
 curl http://${HOST_IP}:${CODEGEN_LLM_SERVICE_PORT}/v1/chat/completions\
   -X POST \
-  -d '{"query":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","max_tokens":256,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
+  -d '{"query":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","max_tokens":256,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"stream":true}' \
   -H 'Content-Type: application/json'
 ```
 
 
@@ -138,7 +138,7 @@ docker compose up -d
    ```bash
    curl http://${host_ip}:9000/v1/chat/completions\
      -X POST \
-     -d '{"query":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","max_tokens":256,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
+     -d '{"query":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","max_tokens":256,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"stream":true}' \
      -H 'Content-Type: application/json'
    ```
 
@@ -250,7 +250,7 @@ There are 4 areas worth noting as shown in the screenshot above:
 
 1. Enter and submit your question
 2. Your previous questions
-3. Answers from AI assistant (Code will be highlighted properly according to the programming language it is written in, also support streaming output)
+3. Answers from AI assistant (Code will be highlighted properly according to the programming language it is written in, also support stream output)
 4. Copy or replace code with one click (Note that you need to select the code in the editor first and then click "replace", otherwise the code will be inserted)
 
 You can also select the code in the editor and ask the AI assistant questions about the code directly.
Original file line number	Diff line number	Diff line change
`@@ -86,7 +86,7 @@ async def handle_request(self, request: Request):`
`86`	`86`	`frequency_penalty=chat_request.frequency_penalty if chat_request.frequency_penalty else 0.0,`
`87`	`87`	`presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0,`
`88`	`88`	`repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,`
`89`		`- streaming=stream_opt,`
	`89`	`+ stream=stream_opt,`
`90`	`90`	`chat_template=chat_request.chat_template if chat_request.chat_template else None,`
`91`	`91`	`)`
`92`	`92`	`retriever_parameters = RetrieverParms(`
Original file line number	Diff line number	Diff line change
`@@ -53,7 +53,7 @@ async def handle_request(self, request: Request):`
`53`	`53`	`frequency_penalty=chat_request.frequency_penalty if chat_request.frequency_penalty else 0.0,`
`54`	`54`	`presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0,`
`55`	`55`	`repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,`
`56`		`- streaming=stream_opt,`
	`56`	`+ stream=stream_opt,`
`57`	`57`	`)`
`58`	`58`	`result_dict, runtime_graph = await self.megaservice.schedule(`
`59`	`59`	`initial_inputs={"query": prompt}, llm_parameters=parameters`