minmin-intel
diff --git a/‎.github/workflows/_example-workflow.yml
Lines changed: 0 additions & 4 deletions b/‎.github/workflows/_example-workflow.yml
Lines changed: 0 additions & 4 deletions
diff --git a/‎AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml
Lines changed: 6 additions & 2 deletions b/‎AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml
Lines changed: 6 additions & 2 deletions
diff --git a/‎AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml
Lines changed: 6 additions & 2 deletions b/‎AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml
Lines changed: 6 additions & 2 deletions
diff --git a/‎AudioQnA/kubernetes/intel/README_gmc.md
Lines changed: 1 addition & 1 deletion b/‎AudioQnA/kubernetes/intel/README_gmc.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎AudioQnA/kubernetes/intel/hpu/gaudi/manifest/audioqna.yaml
Lines changed: 10 additions & 2 deletions b/‎AudioQnA/kubernetes/intel/hpu/gaudi/manifest/audioqna.yaml
Lines changed: 10 additions & 2 deletions
diff --git a/‎AudioQnA/tests/test_compose_on_gaudi.sh
Lines changed: 1 addition & 1 deletion b/‎AudioQnA/tests/test_compose_on_gaudi.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎AudioQnA/tests/test_compose_on_xeon.sh
Lines changed: 1 addition & 1 deletion b/‎AudioQnA/tests/test_compose_on_xeon.sh
Lines changed: 1 addition & 1 deletion
@@ -64,10 +64,6 @@ jobs:
         run: |
           cd ${{ github.workspace }}/${{ inputs.example }}/docker_image_build
           docker_compose_path=${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml
-          if [[ $(grep -c "tei-gaudi:" ${docker_compose_path}) != 0 ]]; then
-              git clone https://github.com/huggingface/tei-gaudi.git
-              cd tei-gaudi && git rev-parse HEAD && cd ../
-          fi
           if [[ $(grep -c "vllm:" ${docker_compose_path}) != 0 ]]; then
               git clone https://github.com/vllm-project/vllm.git
               cd vllm && git rev-parse HEAD && cd ../
 
@@ -3,7 +3,7 @@
 
 services:
   tgi-server:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.4
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
     container_name: tgi-server
     ports:
       - "8085:80"
@@ -13,12 +13,16 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
       HABANA_VISIBLE_DEVICES: all
       OMPI_MCA_btl_vader_single_copy_mechanism: none
       PT_HPU_ENABLE_LAZY_COLLECTIVES: true
+      ENABLE_HPU_GRAPH: true
+      LIMIT_HPU_GRAPH: true
+      USE_FLASH_ATTENTION: true
+      FLASH_ATTENTION_RECOMPUTE: true
     runtime: habana
     cap_add:
       - SYS_NICE
 
@@ -51,7 +51,7 @@ services:
     environment:
       TTS_ENDPOINT: ${TTS_ENDPOINT}
   tgi-service:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.1
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
     container_name: tgi-gaudi-server
     ports:
       - "3006:80"
@@ -61,11 +61,15 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
       HF_HUB_DISABLE_PROGRESS_BARS: 1
       HF_HUB_ENABLE_HF_TRANSFER: 0
       HABANA_VISIBLE_DEVICES: all
       OMPI_MCA_btl_vader_single_copy_mechanism: none
+      ENABLE_HPU_GRAPH: true
+      LIMIT_HPU_GRAPH: true
+      USE_FLASH_ATTENTION: true
+      FLASH_ATTENTION_RECOMPUTE: true
     runtime: habana
     cap_add:
       - SYS_NICE
 
@@ -25,7 +25,7 @@ The AudioQnA uses the below prebuilt images if you choose a Xeon deployment
 Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.
 For Gaudi:
 
-- tgi-service: ghcr.io/huggingface/tgi-gaudi:1.2.1
+- tgi-service: ghcr.io/huggingface/tgi-gaudi:2.0.5
 - whisper-gaudi: opea/whisper-gaudi:latest
 - speecht5-gaudi: opea/speecht5-gaudi:latest
 
 
@@ -271,7 +271,7 @@ spec:
       - envFrom:
         - configMapRef:
             name: audio-qna-config
-        image: ghcr.io/huggingface/tgi-gaudi:2.0.1
+        image: ghcr.io/huggingface/tgi-gaudi:2.0.5
         name: llm-dependency-deploy-demo
         securityContext:
           capabilities:
@@ -303,6 +303,14 @@ spec:
           value: none
         - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
           value: 'true'
+        - name: ENABLE_HPU_GRAPH
+          value: 'true'
+        - name: LIMIT_HPU_GRAPH
+          value: 'true'
+        - name: USE_FLASH_ATTENTION
+          value: 'true'
+        - name: FLASH_ATTENTION_RECOMPUTE
+          value: 'true'
         - name: runtime
           value: habana
         - name: HABANA_VISIBLE_DEVICES
@@ -315,7 +323,7 @@ spec:
       volumes:
       - name: model-volume
         hostPath:
-          path: /home/sdp/cesg
+          path: /mnt/models
           type: Directory
       - name: shm
         emptyDir:
 
@@ -22,7 +22,7 @@ function build_docker_images() {
     service_list="audioqna whisper-gaudi asr llm-tgi speecht5-gaudi tts"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
     docker images && sleep 1s
 }
 
 
@@ -22,7 +22,7 @@ function build_docker_images() {
     service_list="audioqna whisper asr llm-tgi speecht5 tts"
     docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
 
-    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
+    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
     docker images && sleep 1s
 }
Original file line number	Diff line number	Diff line change
`@@ -22,7 +22,7 @@ function build_docker_images() {`
`22`	`22`	`service_list="audioqna whisper-gaudi asr llm-tgi speecht5-gaudi tts"`
`23`	`23`	`docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log`
`24`	`24`
`25`		`- docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1`
	`25`	`+ docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5`
`26`	`26`	`docker images && sleep 1s`
`27`	`27`	`}`
`28`	`28`