Skip to content

Commit 6ebe408

Browse files
committed
Docker compose updated
Signed-off-by: Ezequiel Lanza <ezequiel.lanza@gmail.com>
1 parent 80d9b32 commit 6ebe408

File tree

2 files changed

+13
-18
lines changed

2 files changed

+13
-18
lines changed

ChatQnA/docker_compose/intel/cpu/xeon/compose_milvus.yaml

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -138,27 +138,22 @@ services:
138138
HF_HUB_ENABLE_HF_TRANSFER: 0
139139
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
140140

141-
vllm-service:
142-
image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
143-
container_name: vllm-service
141+
tgi-service:
142+
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
143+
container_name: tgi-service
144144
ports:
145145
- "9009:80"
146146
volumes:
147147
- "./data:/data"
148-
shm_size: 128g
148+
shm_size: 1g
149149
environment:
150150
no_proxy: ${no_proxy}
151151
http_proxy: ${http_proxy}
152152
https_proxy: ${https_proxy}
153153
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
154-
LLM_MODEL_ID: ${LLM_MODEL_ID}
155-
VLLM_TORCH_PROFILER_DIR: "/mnt"
156-
healthcheck:
157-
test: ["CMD-SHELL", "curl -f http://$host_ip:9009/health || exit 1"]
158-
interval: 10s
159-
timeout: 10s
160-
retries: 100
161-
command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80
154+
HF_HUB_DISABLE_PROGRESS_BARS: 1
155+
HF_HUB_ENABLE_HF_TRANSFER: 0
156+
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
162157

163158
chatqna-xeon-backend-server:
164159
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
@@ -169,7 +164,7 @@ services:
169164
- dataprep-milvus-service
170165
- retriever
171166
- tei-reranking-service
172-
- vllm-service
167+
- tgi-service
173168
ports:
174169
- "8888:8888"
175170
environment:

ChatQnA/tests/test_compose_milvus_on_xeon.sh

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ function build_docker_images() {
3838
cd ../
3939

4040
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
41-
service_list="chatqna chatqna-ui dataprep retriever vllm nginx"
41+
service_list="chatqna chatqna-ui dataprep retriever nginx"
4242
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
4343

4444
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
@@ -59,8 +59,8 @@ function start_services() {
5959

6060
n=0
6161
until [[ "$n" -ge 100 ]]; do
62-
docker logs vllm-service > ${LOG_PATH}/vllm_service_start.log 2>&1
63-
if grep -q complete ${LOG_PATH}/vllm_service_start.log; then
62+
docker logs tgi-service > ${LOG_PATH}/tgi_service_start.log
63+
if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then
6464
break
6565
fi
6666
sleep 5s
@@ -171,8 +171,8 @@ function validate_microservices() {
171171
validate_service \
172172
"${ip_address}:9009/v1/chat/completions" \
173173
"content" \
174-
"vllm-llm" \
175-
"vllm-service" \
174+
"tgi-llm" \
175+
"tgi-service" \
176176
'{"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}'
177177
}
178178

0 commit comments

Comments
 (0)