Skip to content

Commit e6f7eb1

Browse files
Zhenzhong1pre-commit-ci[bot]
authored andcommitted
update manifests for v0.9 (opea-project#623)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent ad017ba commit e6f7eb1

33 files changed

+765
-15
lines changed
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: v1
5+
kind: ConfigMap
6+
metadata:
7+
name: qna-config
8+
namespace: default
9+
data:
10+
EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
11+
RERANK_MODEL_ID: BAAI/bge-reranker-base
12+
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
13+
TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
14+
TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
15+
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
16+
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
17+
INDEX_NAME: rag-redis
18+
HUGGINGFACEHUB_API_TOKEN: {HF_TOKEN}
19+
EMBEDDING_SERVICE_HOST_IP: embedding-svc
20+
RETRIEVER_SERVICE_HOST_IP: retriever-svc
21+
RERANK_SERVICE_HOST_IP: reranking-svc
22+
NODE_SELECTOR: chatqna-opea
23+
LLM_SERVICE_HOST_IP: llm-svc
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: apps/v1
5+
kind: Deployment
6+
metadata:
7+
name: chatqna-backend-server-deploy
8+
namespace: default
9+
spec:
10+
replicas: 1
11+
selector:
12+
matchLabels:
13+
app: chatqna-backend-server-deploy
14+
template:
15+
metadata:
16+
annotations:
17+
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
18+
labels:
19+
app: chatqna-backend-server-deploy
20+
spec:
21+
nodeSelector:
22+
node-type: chatqna-opea
23+
topologySpreadConstraints:
24+
- maxSkew: 1
25+
topologyKey: kubernetes.io/hostname
26+
whenUnsatisfiable: ScheduleAnyway
27+
labelSelector:
28+
matchLabels:
29+
app: chatqna-backend-server-deploy
30+
hostIPC: true
31+
containers:
32+
- envFrom:
33+
- configMapRef:
34+
name: qna-config
35+
image: opea/chatqna:latest
36+
imagePullPolicy: IfNotPresent
37+
name: chatqna-backend-server-deploy
38+
args: null
39+
ports:
40+
- containerPort: 8888
41+
resources:
42+
limits:
43+
cpu: 8
44+
memory: 4000Mi
45+
requests:
46+
cpu: 8
47+
memory: 4000Mi
48+
serviceAccountName: default
49+
---
50+
kind: Service
51+
apiVersion: v1
52+
metadata:
53+
name: chaqna-backend-server-svc
54+
spec:
55+
type: NodePort
56+
selector:
57+
app: chatqna-backend-server-deploy
58+
ports:
59+
- name: service
60+
port: 8888
61+
targetPort: 8888
62+
nodePort: 30888
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: apps/v1
5+
kind: Deployment
6+
metadata:
7+
name: dataprep-deploy
8+
namespace: default
9+
spec:
10+
replicas: 1
11+
selector:
12+
matchLabels:
13+
app: dataprep-deploy
14+
template:
15+
metadata:
16+
annotations:
17+
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
18+
labels:
19+
app: dataprep-deploy
20+
spec:
21+
nodeSelector:
22+
node-type: chatqna-opea
23+
topologySpreadConstraints:
24+
- maxSkew: 1
25+
topologyKey: kubernetes.io/hostname
26+
whenUnsatisfiable: ScheduleAnyway
27+
labelSelector:
28+
matchLabels:
29+
app: dataprep-deploy
30+
hostIPC: true
31+
containers:
32+
- env:
33+
- name: REDIS_URL
34+
valueFrom:
35+
configMapKeyRef:
36+
name: qna-config
37+
key: REDIS_URL
38+
- name: INDEX_NAME
39+
valueFrom:
40+
configMapKeyRef:
41+
name: qna-config
42+
key: INDEX_NAME
43+
image: opea/dataprep-redis:latest
44+
imagePullPolicy: IfNotPresent
45+
name: dataprep-deploy
46+
args: null
47+
ports:
48+
- containerPort: 6007
49+
- containerPort: 6008
50+
- containerPort: 6009
51+
serviceAccountName: default
52+
---
53+
kind: Service
54+
apiVersion: v1
55+
metadata:
56+
name: dataprep-svc
57+
spec:
58+
type: ClusterIP
59+
selector:
60+
app: dataprep-deploy
61+
ports:
62+
- name: port1
63+
port: 6007
64+
targetPort: 6007
65+
- name: port2
66+
port: 6008
67+
targetPort: 6008
68+
- name: port3
69+
port: 6009
70+
targetPort: 6009
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: apps/v1
5+
kind: Deployment
6+
metadata:
7+
name: embedding-dependency-deploy
8+
namespace: default
9+
spec:
10+
replicas: 6
11+
selector:
12+
matchLabels:
13+
app: embedding-dependency-deploy
14+
template:
15+
metadata:
16+
annotations:
17+
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
18+
labels:
19+
app: embedding-dependency-deploy
20+
spec:
21+
nodeSelector:
22+
node-type: chatqna-opea
23+
containers:
24+
- envFrom:
25+
- configMapRef:
26+
name: qna-config
27+
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
28+
name: embedding-dependency-deploy
29+
args:
30+
- --model-id
31+
- $(EMBEDDING_MODEL_ID)
32+
- --auto-truncate
33+
volumeMounts:
34+
- mountPath: /data
35+
name: model-volume
36+
- mountPath: /dev/shm
37+
name: shm
38+
ports:
39+
- containerPort: 80
40+
resources:
41+
limits:
42+
cpu: 80
43+
memory: 20000Mi
44+
requests:
45+
cpu: 80
46+
memory: 20000Mi
47+
serviceAccountName: default
48+
volumes:
49+
- name: model-volume
50+
hostPath:
51+
path: /home/sdp/cesg
52+
type: Directory
53+
- name: shm
54+
emptyDir:
55+
medium: Memory
56+
sizeLimit: 1Gi
57+
---
58+
kind: Service
59+
apiVersion: v1
60+
metadata:
61+
name: embedding-dependency-svc
62+
spec:
63+
type: ClusterIP
64+
selector:
65+
app: embedding-dependency-deploy
66+
ports:
67+
- name: service
68+
port: 6006
69+
targetPort: 80
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: apps/v1
5+
kind: Deployment
6+
metadata:
7+
name: embedding-deploy
8+
namespace: default
9+
spec:
10+
replicas: 1
11+
selector:
12+
matchLabels:
13+
app: embedding-deploy
14+
template:
15+
metadata:
16+
annotations:
17+
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
18+
labels:
19+
app: embedding-deploy
20+
spec:
21+
nodeSelector:
22+
node-type: chatqna-opea
23+
topologySpreadConstraints:
24+
- maxSkew: 1
25+
topologyKey: kubernetes.io/hostname
26+
whenUnsatisfiable: ScheduleAnyway
27+
labelSelector:
28+
matchLabels:
29+
app: embedding-deploy
30+
hostIPC: true
31+
containers:
32+
- envFrom:
33+
- configMapRef:
34+
name: qna-config
35+
image: opea/embedding-tei:latest
36+
imagePullPolicy: IfNotPresent
37+
name: embedding-deploy
38+
args: null
39+
ports:
40+
- containerPort: 6000
41+
resources:
42+
limits:
43+
cpu: 4
44+
requests:
45+
cpu: 4
46+
serviceAccountName: default
47+
---
48+
kind: Service
49+
apiVersion: v1
50+
metadata:
51+
name: embedding-svc
52+
spec:
53+
type: ClusterIP
54+
selector:
55+
app: embedding-deploy
56+
ports:
57+
- name: service
58+
port: 6000
59+
targetPort: 6000
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: apps/v1
5+
kind: Deployment
6+
metadata:
7+
name: llm-dependency-deploy
8+
namespace: default
9+
spec:
10+
replicas: 31
11+
selector:
12+
matchLabels:
13+
app: llm-dependency-deploy
14+
template:
15+
metadata:
16+
annotations:
17+
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
18+
labels:
19+
app: llm-dependency-deploy
20+
spec:
21+
nodeSelector:
22+
node-type: chatqna-opea
23+
hostIPC: true
24+
containers:
25+
- envFrom:
26+
- configMapRef:
27+
name: qna-config
28+
image: tgi_gaudi:2.0.1
29+
name: llm-dependency-deploy-demo
30+
securityContext:
31+
capabilities:
32+
add:
33+
- SYS_NICE
34+
args:
35+
- --model-id
36+
- $(LLM_MODEL_ID)
37+
- --max-input-length
38+
- '1024'
39+
- --max-total-tokens
40+
- '2048'
41+
- --max-batch-total-tokens
42+
- '65536'
43+
- --max-batch-prefill-tokens
44+
- '4096'
45+
volumeMounts:
46+
- mountPath: /data
47+
name: model-volume
48+
- mountPath: /dev/shm
49+
name: shm
50+
ports:
51+
- containerPort: 80
52+
resources:
53+
limits:
54+
habana.ai/gaudi: 1
55+
env:
56+
- name: OMPI_MCA_btl_vader_single_copy_mechanism
57+
value: none
58+
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
59+
value: 'true'
60+
- name: runtime
61+
value: habana
62+
- name: HABANA_VISIBLE_DEVICES
63+
value: all
64+
- name: HF_TOKEN
65+
value: $(HF_TOKEN)
66+
serviceAccountName: default
67+
volumes:
68+
- name: model-volume
69+
hostPath:
70+
path: /home/sdp/cesg
71+
type: Directory
72+
- name: shm
73+
emptyDir:
74+
medium: Memory
75+
sizeLimit: 1Gi
76+
---
77+
kind: Service
78+
apiVersion: v1
79+
metadata:
80+
name: llm-dependency-svc
81+
spec:
82+
type: ClusterIP
83+
selector:
84+
app: llm-dependency-deploy
85+
ports:
86+
- name: service
87+
port: 9009
88+
targetPort: 80

0 commit comments

Comments
 (0)