ChatQnA: Update chatqna-vllm-remote-inference (#1224)

sgurunat · web-flow · commit 3299e5c9f5e7 · 2024-12-04T22:33:27.000+08:00
Signed-off-by: sgurunat &lt;gurunath.s@intel.com&gt;
diff --git a/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-vllm-remote-inference.yaml b/ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-vllm-remote-inference.yaml
@@ -76,8 +76,8 @@ data:
   no_proxy: ""
   LOGFLAG: ""
   vLLM_ENDPOINT: "insert-your-remote-vllm-inference-endpoint"
-  LLM_MODEL: "meta-llama/Meta-Llama-3.1-8B-Instruct"
-  MODEL_ID: "meta-llama/Meta-Llama-3.1-8B-Instruct"
+  LLM_MODEL: "meta-llama/Meta-Llama-3.1-70B-Instruct"
+  MODEL_ID: "meta-llama/Meta-Llama-3.1-70B-Instruct"
   CLIENTID: ""
   CLIENT_SECRET: ""
   TOKEN_URL: ""
@@ -174,6 +174,10 @@ data:
             proxy_set_header X-Real-IP $remote_addr;
             proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
             proxy_set_header X-Forwarded-Proto $scheme;
+            proxy_buffering off;
+            proxy_cache off;
+            proxy_request_buffering off;
+            gzip off;
         }
 
         location /v1/dataprep {
@@ -459,6 +463,9 @@ spec:
         {}
       containers:
         - name: chatqna-ui
+          env:
+            - name: MODEL_ID
+              value: "meta-llama/Meta-Llama-3.1-70B-Instruct"
           securityContext:
             {}
           image: "opea/chatqna-ui:latest"
@@ -981,7 +988,7 @@ spec:
             - name: EMBEDDING_SERVICE_HOST_IP
               value: chatqna-embedding-usvc
             - name: MODEL_ID
-              value: "meta-llama/Meta-Llama-3.1-8B-Instruct"
+              value: "meta-llama/Meta-Llama-3.1-70B-Instruct"
           securityContext:
             allowPrivilegeEscalation: false
             capabilities:
@@ -993,7 +1000,7 @@ spec:
             seccompProfile:
               type: RuntimeDefault
           image: "opea/chatqna-wrapper:latest"
-          imagePullPolicy: IfNotPresent
+          imagePullPolicy: Always
           volumeMounts:
             - mountPath: /tmp
               name: tmp