Skip to content

Commit 5db12a5

Browse files
committed
Docker compose updated
Signed-off-by: Ezequiel Lanza <ezequiel.lanza@gmail.com>
1 parent 6ebe408 commit 5db12a5

File tree

2 files changed

+18
-13
lines changed

2 files changed

+18
-13
lines changed

ChatQnA/docker_compose/intel/cpu/xeon/compose_milvus.yaml

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -138,22 +138,27 @@ services:
138138
HF_HUB_ENABLE_HF_TRANSFER: 0
139139
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
140140

141-
tgi-service:
142-
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
143-
container_name: tgi-service
141+
vllm-service:
142+
image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
143+
container_name: vllm-service
144144
ports:
145145
- "9009:80"
146146
volumes:
147147
- "./data:/data"
148-
shm_size: 1g
148+
shm_size: 128g
149149
environment:
150150
no_proxy: ${no_proxy}
151151
http_proxy: ${http_proxy}
152152
https_proxy: ${https_proxy}
153153
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
154-
HF_HUB_DISABLE_PROGRESS_BARS: 1
155-
HF_HUB_ENABLE_HF_TRANSFER: 0
156-
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
154+
LLM_MODEL_ID: ${LLM_MODEL_ID}
155+
VLLM_TORCH_PROFILER_DIR: "/mnt"
156+
healthcheck:
157+
test: ["CMD-SHELL", "curl -f http://$host_ip:9009/health || exit 1"]
158+
interval: 10s
159+
timeout: 10s
160+
retries: 100
161+
command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80
157162

158163
chatqna-xeon-backend-server:
159164
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
@@ -164,7 +169,7 @@ services:
164169
- dataprep-milvus-service
165170
- retriever
166171
- tei-reranking-service
167-
- tgi-service
172+
- vllm-service
168173
ports:
169174
- "8888:8888"
170175
environment:

ChatQnA/tests/test_compose_milvus_on_xeon.sh

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ function build_docker_images() {
3838
cd ../
3939

4040
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
41-
service_list="chatqna chatqna-ui dataprep retriever nginx"
41+
service_list="chatqna chatqna-ui dataprep retriever vllm nginx"
4242
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
4343

4444
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
@@ -59,8 +59,8 @@ function start_services() {
5959

6060
n=0
6161
until [[ "$n" -ge 100 ]]; do
62-
docker logs tgi-service > ${LOG_PATH}/tgi_service_start.log
63-
if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then
62+
docker logs vllm-service > ${LOG_PATH}/vllm_service_start.log 2>&1
63+
if grep -q complete ${LOG_PATH}/vllm_service_start.log; then
6464
break
6565
fi
6666
sleep 5s
@@ -171,8 +171,8 @@ function validate_microservices() {
171171
validate_service \
172172
"${ip_address}:9009/v1/chat/completions" \
173173
"content" \
174-
"tgi-llm" \
175-
"tgi-service" \
174+
"vllm-llm" \
175+
"vllm-service" \
176176
'{"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}'
177177
}
178178

0 commit comments

Comments
 (0)