Skip to content

Commit 031cf6e

Browse files
ChatQnA: Update kubernetes xeon chatqna remote inference and svelte UI (#1215)
Signed-off-by: sgurunat <gurunath.s@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 3299e5c commit 031cf6e

File tree

2 files changed

+21
-9
lines changed

2 files changed

+21
-9
lines changed

ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-remote-inference.yaml

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,8 @@ data:
7070
no_proxy: ""
7171
LOGFLAG: ""
7272
vLLM_ENDPOINT: "insert-your-remote-inference-endpoint"
73-
LLM_MODEL: "meta-llama/Meta-Llama-3.1-8B-Instruct"
74-
LLM_MODEL_ID: "meta-llama/Meta-Llama-3.1-8B-Instruct"
75-
MODEL_ID: "meta-llama/Meta-Llama-3.1-8B-Instruct"
73+
LLM_MODEL: "meta-llama/Meta-Llama-3.1-70B-Instruct"
74+
MODEL_ID: "meta-llama/Meta-Llama-3.1-70B-Instruct"
7675
CLIENTID: ""
7776
CLIENT_SECRET: ""
7877
TOKEN_URL: ""
@@ -216,6 +215,10 @@ data:
216215
proxy_set_header X-Real-IP $remote_addr;
217216
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
218217
proxy_set_header X-Forwarded-Proto $scheme;
218+
proxy_buffering off;
219+
proxy_cache off;
220+
proxy_request_buffering off;
221+
gzip off;
219222
}
220223
221224
location /v1/dataprep {
@@ -552,6 +555,9 @@ spec:
552555
{}
553556
containers:
554557
- name: chatqna-ui
558+
env:
559+
- name: MODEL_ID
560+
value: "meta-llama/Meta-Llama-3.1-70B-Instruct"
555561
securityContext:
556562
{}
557563
image: "opea/chatqna-ui:latest"
@@ -691,7 +697,7 @@ spec:
691697
seccompProfile:
692698
type: RuntimeDefault
693699
image: "opea/embedding-tei:latest"
694-
imagePullPolicy: IfNotPresent
700+
imagePullPolicy: Always
695701
ports:
696702
- name: embedding-usvc
697703
containerPort: 6000
@@ -769,7 +775,7 @@ spec:
769775
seccompProfile:
770776
type: RuntimeDefault
771777
image: "opea/llm-vllm:latest"
772-
imagePullPolicy: IfNotPresent
778+
imagePullPolicy: Always
773779
ports:
774780
- name: llm-uservice
775781
containerPort: 9000
@@ -919,7 +925,7 @@ spec:
919925
seccompProfile:
920926
type: RuntimeDefault
921927
image: "opea/reranking-tei:latest"
922-
imagePullPolicy: IfNotPresent
928+
imagePullPolicy: Always
923929
ports:
924930
- name: reranking-usvc
925931
containerPort: 8000
@@ -1257,7 +1263,7 @@ spec:
12571263
- name: EMBEDDING_SERVICE_HOST_IP
12581264
value: chatqna-embedding-usvc
12591265
- name: MODEL_ID
1260-
value: "meta-llama/Meta-Llama-3.1-8B-Instruct"
1266+
value: "meta-llama/Meta-Llama-3.1-70B-Instruct"
12611267
securityContext:
12621268
allowPrivilegeEscalation: false
12631269
capabilities:
@@ -1269,7 +1275,7 @@ spec:
12691275
seccompProfile:
12701276
type: RuntimeDefault
12711277
image: "opea/chatqna-wrapper:latest"
1272-
imagePullPolicy: IfNotPresent
1278+
imagePullPolicy: Always
12731279
volumeMounts:
12741280
- mountPath: /tmp
12751281
name: tmp

ChatQnA/ui/svelte/src/lib/network/chat/Network.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,19 @@ import { env } from "$env/dynamic/public";
1616
import { SSE } from "sse.js";
1717

1818
const CHAT_BASE_URL = env.CHAT_BASE_URL;
19+
const MODEL_ID = env.MODEL_ID;
1920

2021
export async function fetchTextStream(query: string) {
2122
let payload = {};
2223
let url = "";
24+
let modelId = "Intel/neural-chat-7b-v3-3";
25+
26+
if (MODEL_ID) {
27+
modelId = MODEL_ID;
28+
}
2329

2430
payload = {
25-
model: "Intel/neural-chat-7b-v3-3",
31+
model: `${modelId}`,
2632
messages: query,
2733
};
2834
url = `${CHAT_BASE_URL}`;

0 commit comments

Comments
 (0)