Skip to content

Commit 3299e5c

Browse files
authored
ChatQnA: Update chatqna-vllm-remote-inference (#1224)
Signed-off-by: sgurunat <gurunath.s@intel.com>
1 parent 340796b commit 3299e5c

File tree

1 file changed

+11
-4
lines changed

1 file changed

+11
-4
lines changed

ChatQnA/kubernetes/intel/hpu/gaudi/manifest/chatqna-vllm-remote-inference.yaml

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,8 @@ data:
7676
no_proxy: ""
7777
LOGFLAG: ""
7878
vLLM_ENDPOINT: "insert-your-remote-vllm-inference-endpoint"
79-
LLM_MODEL: "meta-llama/Meta-Llama-3.1-8B-Instruct"
80-
MODEL_ID: "meta-llama/Meta-Llama-3.1-8B-Instruct"
79+
LLM_MODEL: "meta-llama/Meta-Llama-3.1-70B-Instruct"
80+
MODEL_ID: "meta-llama/Meta-Llama-3.1-70B-Instruct"
8181
CLIENTID: ""
8282
CLIENT_SECRET: ""
8383
TOKEN_URL: ""
@@ -174,6 +174,10 @@ data:
174174
proxy_set_header X-Real-IP $remote_addr;
175175
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
176176
proxy_set_header X-Forwarded-Proto $scheme;
177+
proxy_buffering off;
178+
proxy_cache off;
179+
proxy_request_buffering off;
180+
gzip off;
177181
}
178182
179183
location /v1/dataprep {
@@ -459,6 +463,9 @@ spec:
459463
{}
460464
containers:
461465
- name: chatqna-ui
466+
env:
467+
- name: MODEL_ID
468+
value: "meta-llama/Meta-Llama-3.1-70B-Instruct"
462469
securityContext:
463470
{}
464471
image: "opea/chatqna-ui:latest"
@@ -981,7 +988,7 @@ spec:
981988
- name: EMBEDDING_SERVICE_HOST_IP
982989
value: chatqna-embedding-usvc
983990
- name: MODEL_ID
984-
value: "meta-llama/Meta-Llama-3.1-8B-Instruct"
991+
value: "meta-llama/Meta-Llama-3.1-70B-Instruct"
985992
securityContext:
986993
allowPrivilegeEscalation: false
987994
capabilities:
@@ -993,7 +1000,7 @@ spec:
9931000
seccompProfile:
9941001
type: RuntimeDefault
9951002
image: "opea/chatqna-wrapper:latest"
996-
imagePullPolicy: IfNotPresent
1003+
imagePullPolicy: Always
9971004
volumeMounts:
9981005
- mountPath: /tmp
9991006
name: tmp

0 commit comments

Comments
 (0)