Skip to content

Commit 2309fd3

Browse files
artem-astafevcwlacewe
authored andcommitted
Adding files to deploy MultimodalQnA application on ROCm vLLM (opea-project#1737)
Signed-off-by: Artem Astafev <a.astafev@datamonsters.com> Signed-off-by: Lacewell, Chaunte W <chaunte.w.lacewell@intel.com>
1 parent cfb2c5c commit 2309fd3

File tree

8 files changed

+948
-166
lines changed

8 files changed

+948
-166
lines changed

MultimodalQnA/docker_compose/amd/gpu/rocm/README.md

Lines changed: 367 additions & 163 deletions
Large diffs are not rendered by default.

MultimodalQnA/docker_compose/amd/gpu/rocm/compose.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ services:
105105
HUGGINGFACEHUB_API_TOKEN: ${MULTIMODAL_HUGGINGFACEHUB_API_TOKEN}
106106
HUGGING_FACE_HUB_TOKEN: ${MULTIMODAL_HUGGINGFACEHUB_API_TOKEN}
107107
volumes:
108-
- "/var/opea/multimodalqna-service/data:/data"
108+
- "${MODEL_CACHE:-./data}:/data"
109109
shm_size: 64g
110110
devices:
111111
- /dev/kfd:/dev/kfd
@@ -156,7 +156,7 @@ services:
156156
MM_EMBEDDING_PORT_MICROSERVICE: ${MM_EMBEDDING_PORT_MICROSERVICE}
157157
MM_RETRIEVER_SERVICE_HOST_IP: ${MM_RETRIEVER_SERVICE_HOST_IP}
158158
LVM_SERVICE_HOST_IP: ${LVM_SERVICE_HOST_IP}
159-
WHISPER_SERVER_PORT: ${WHISPER_SERVER_PORT}
159+
WHISPER_SERVER_PORT: ${WHISPER_PORT}
160160
WHISPER_SERVER_ENDPOINT: ${WHISPER_SERVER_ENDPOINT}
161161
ipc: host
162162
restart: always
Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
# Copyright (C) 2024 Advanced Micro Devices, Inc.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
services:
5+
whisper-service:
6+
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
7+
container_name: whisper-service
8+
ports:
9+
- "7066:7066"
10+
ipc: host
11+
environment:
12+
no_proxy: ${no_proxy}
13+
http_proxy: ${http_proxy}
14+
https_proxy: ${https_proxy}
15+
restart: unless-stopped
16+
redis-vector-db:
17+
image: redis/redis-stack:7.2.0-v9
18+
container_name: redis-vector-db
19+
ports:
20+
- "6379:6379"
21+
- "8001:8001"
22+
dataprep-multimodal-redis:
23+
image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
24+
container_name: dataprep-multimodal-redis
25+
depends_on:
26+
- redis-vector-db
27+
- lvm
28+
ports:
29+
- "6007:5000"
30+
environment:
31+
no_proxy: ${no_proxy}
32+
http_proxy: ${http_proxy}
33+
https_proxy: ${https_proxy}
34+
REDIS_URL: ${REDIS_URL}
35+
REDIS_HOST: ${REDIS_HOST}
36+
INDEX_NAME: ${INDEX_NAME}
37+
LVM_ENDPOINT: "http://${LVM_SERVICE_HOST_IP}:9399/v1/lvm"
38+
HUGGINGFACEHUB_API_TOKEN: ${MULTIMODAL_HUGGINGFACEHUB_API_TOKEN}
39+
MULTIMODAL_DATAPREP: true
40+
DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MULTIMODALREDIS"
41+
restart: unless-stopped
42+
embedding-multimodal-bridgetower:
43+
image: ${REGISTRY:-opea}/embedding-multimodal-bridgetower:${TAG:-latest}
44+
container_name: embedding-multimodal-bridgetower
45+
ports:
46+
- ${EMBEDDER_PORT}:${EMBEDDER_PORT}
47+
environment:
48+
no_proxy: ${no_proxy}
49+
http_proxy: ${http_proxy}
50+
https_proxy: ${https_proxy}
51+
PORT: ${EMBEDDER_PORT}
52+
healthcheck:
53+
test: ["CMD-SHELL", "http_proxy='' curl -f http://localhost:${EMBEDDER_PORT}/v1/health_check"]
54+
interval: 10s
55+
timeout: 6s
56+
retries: 18
57+
start_period: 30s
58+
entrypoint: ["python", "bridgetower_server.py", "--device", "cpu", "--model_name_or_path", $EMBEDDING_MODEL_ID]
59+
restart: unless-stopped
60+
embedding:
61+
image: ${REGISTRY:-opea}/embedding:${TAG:-latest}
62+
container_name: embedding
63+
depends_on:
64+
embedding-multimodal-bridgetower:
65+
condition: service_healthy
66+
ports:
67+
- ${MM_EMBEDDING_PORT_MICROSERVICE}:${MM_EMBEDDING_PORT_MICROSERVICE}
68+
ipc: host
69+
environment:
70+
no_proxy: ${no_proxy}
71+
http_proxy: ${http_proxy}
72+
https_proxy: ${https_proxy}
73+
MMEI_EMBEDDING_ENDPOINT: ${MMEI_EMBEDDING_ENDPOINT}
74+
MM_EMBEDDING_PORT_MICROSERVICE: ${MM_EMBEDDING_PORT_MICROSERVICE}
75+
MULTIMODAL_EMBEDDING: true
76+
restart: unless-stopped
77+
retriever-redis:
78+
image: ${REGISTRY:-opea}/retriever:${TAG:-latest}
79+
container_name: retriever-redis
80+
depends_on:
81+
- redis-vector-db
82+
ports:
83+
- "7000:7000"
84+
ipc: host
85+
environment:
86+
no_proxy: ${no_proxy}
87+
http_proxy: ${http_proxy}
88+
https_proxy: ${https_proxy}
89+
REDIS_URL: ${REDIS_URL}
90+
INDEX_NAME: ${INDEX_NAME}
91+
BRIDGE_TOWER_EMBEDDING: ${BRIDGE_TOWER_EMBEDDING}
92+
LOGFLAG: ${LOGFLAG}
93+
RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
94+
restart: unless-stopped
95+
multimodalqna-vllm-service:
96+
image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest}
97+
container_name: multimodalqna-vllm-service
98+
ports:
99+
- "${MULTIMODAL_VLLM_SERVICE_PORT:-8081}:8011"
100+
environment:
101+
no_proxy: ${no_proxy}
102+
http_proxy: ${http_proxy}
103+
https_proxy: ${https_proxy}
104+
HUGGINGFACEHUB_API_TOKEN: ${MULTIMODAL_HUGGINGFACEHUB_API_TOKEN}
105+
HF_TOKEN: ${MULTIMODAL_HUGGINGFACEHUB_API_TOKEN}
106+
HF_HUB_DISABLE_PROGRESS_BARS: 1
107+
HF_HUB_ENABLE_HF_TRANSFER: 0
108+
WILM_USE_TRITON_FLASH_ATTENTION: 0
109+
PYTORCH_JIT: 0
110+
volumes:
111+
- "${MODEL_CACHE:-./data}:/data"
112+
shm_size: 20G
113+
devices:
114+
- /dev/kfd:/dev/kfd
115+
- /dev/dri/:/dev/dri/
116+
cap_add:
117+
- SYS_PTRACE
118+
group_add:
119+
- video
120+
security_opt:
121+
- seccomp:unconfined
122+
- apparmor=unconfined
123+
command: "--model ${MULTIMODAL_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 1 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\""
124+
ipc: host
125+
lvm:
126+
image: ${REGISTRY:-opea}/lvm:${TAG:-latest}
127+
container_name: lvm
128+
depends_on:
129+
- multimodalqna-vllm-service
130+
ports:
131+
- "9399:9399"
132+
ipc: host
133+
environment:
134+
no_proxy: ${no_proxy}
135+
http_proxy: ${http_proxy}
136+
https_proxy: ${https_proxy}
137+
LVM_COMPONENT_NAME: "OPEA_VLLM_LVM"
138+
LVM_ENDPOINT: ${LVM_ENDPOINT}
139+
LLM_MODEL_ID: ${MULTIMODAL_LLM_MODEL_ID}
140+
HF_HUB_DISABLE_PROGRESS_BARS: 1
141+
HF_HUB_ENABLE_HF_TRANSFER: 0
142+
restart: unless-stopped
143+
multimodalqna:
144+
image: ${REGISTRY:-opea}/multimodalqna:${TAG:-latest}
145+
container_name: multimodalqna-backend-server
146+
depends_on:
147+
- redis-vector-db
148+
- dataprep-multimodal-redis
149+
- embedding
150+
- retriever-redis
151+
- lvm
152+
ports:
153+
- "8888:8888"
154+
environment:
155+
no_proxy: ${no_proxy}
156+
https_proxy: ${https_proxy}
157+
http_proxy: ${http_proxy}
158+
MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
159+
MM_EMBEDDING_SERVICE_HOST_IP: ${MM_EMBEDDING_SERVICE_HOST_IP}
160+
MM_EMBEDDING_PORT_MICROSERVICE: ${MM_EMBEDDING_PORT_MICROSERVICE}
161+
MM_RETRIEVER_SERVICE_HOST_IP: ${MM_RETRIEVER_SERVICE_HOST_IP}
162+
LVM_SERVICE_HOST_IP: ${LVM_SERVICE_HOST_IP}
163+
WHISPER_SERVER_PORT: ${WHISPER_PORT}
164+
WHISPER_SERVER_ENDPOINT: ${WHISPER_SERVER_ENDPOINT}
165+
ipc: host
166+
restart: always
167+
multimodalqna-ui:
168+
image: ${REGISTRY:-opea}/multimodalqna-ui:${TAG:-latest}
169+
container_name: multimodalqna-gradio-ui-server
170+
depends_on:
171+
- multimodalqna
172+
ports:
173+
- "5173:5173"
174+
environment:
175+
- no_proxy=${no_proxy}
176+
- https_proxy=${https_proxy}
177+
- http_proxy=${http_proxy}
178+
- BACKEND_SERVICE_ENDPOINT=${BACKEND_SERVICE_ENDPOINT}
179+
- DATAPREP_INGEST_SERVICE_ENDPOINT=${DATAPREP_INGEST_SERVICE_ENDPOINT}
180+
- DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT=${DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT}
181+
- DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT=${DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT}
182+
ipc: host
183+
restart: always
184+
185+
networks:
186+
default:
187+
driver: bridge

MultimodalQnA/docker_compose/amd/gpu/rocm/set_env.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,5 @@ export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/datap
3131
export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/generate_captions"
3232
export DATAPREP_GET_FILE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/get"
3333
export DATAPREP_DELETE_FILE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/delete"
34+
export WHISPER_PORT="7066"
35+
export WHISPER_SERVER_ENDPOINT="http://${host_ip}:${WHISPER_PORT}/v1/asr"
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#!/usr/bin/env bash
2+
3+
# Copyright (C) 2024 Advanced Micro Devices, Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
6+
export HOST_IP=${your_host_ip_address}
7+
export MULTIMODAL_HUGGINGFACEHUB_API_TOKEN=${your_huggingfacehub_token}
8+
export MULTIMODAL_TGI_SERVICE_PORT="8399"
9+
export no_proxy=${your_no_proxy}
10+
export http_proxy=${your_http_proxy}
11+
export https_proxy=${your_http_proxy}
12+
export BRIDGE_TOWER_EMBEDDING=true
13+
export EMBEDDER_PORT=6006
14+
export MMEI_EMBEDDING_ENDPOINT="http://${HOST_IP}:$EMBEDDER_PORT"
15+
export MM_EMBEDDING_PORT_MICROSERVICE=6000
16+
export REDIS_URL="redis://${HOST_IP}:6379"
17+
export REDIS_HOST=${HOST_IP}
18+
export INDEX_NAME="mm-rag-redis"
19+
export VLLM_SERVER_PORT=8081
20+
export LVM_ENDPOINT="http://${HOST_IP}:${VLLM_SERVER_PORT}"
21+
export EMBEDDING_MODEL_ID="BridgeTower/bridgetower-large-itm-mlm-itc"
22+
export LVM_MODEL_ID="Xkev/Llama-3.2V-11B-cot"
23+
export WHISPER_MODEL="base"
24+
export MM_EMBEDDING_SERVICE_HOST_IP=${HOST_IP}
25+
export MM_RETRIEVER_SERVICE_HOST_IP=${HOST_IP}
26+
export LVM_SERVICE_HOST_IP=${HOST_IP}
27+
export MEGA_SERVICE_HOST_IP=${HOST_IP}
28+
export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:8888/v1/multimodalqna"
29+
export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/ingest"
30+
export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/generate_transcripts"
31+
export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/generate_captions"
32+
export DATAPREP_GET_FILE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/get"
33+
export DATAPREP_DELETE_FILE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/delete"
34+
export WHISPER_PORT="7066"
35+
export WHISPER_SERVER_ENDPOINT="http://${host_ip}:${WHISPER_PORT}/v1/asr"

MultimodalQnA/docker_image_build/build.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,3 +77,8 @@ services:
7777
dockerfile: comps/tts/src/Dockerfile
7878
extends: multimodalqna
7979
image: ${REGISTRY:-opea}/tts:${TAG:-latest}
80+
vllm-rocm:
81+
build:
82+
context: GenAIComps
83+
dockerfile: comps/third_parties/vllm/src/Dockerfile.amd_gpu
84+
image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest}

MultimodalQnA/tests/test_compose_on_rocm.sh

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,12 +72,21 @@ function setup_env() {
7272
export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/generate_captions"
7373
export DATAPREP_GET_FILE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/get"
7474
export DATAPREP_DELETE_FILE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/delete"
75+
export MODEL_CACHE=${model_cache:-"/var/opea/multimodalqna-service/data"}
7576
}
7677

7778
function start_services() {
7879
cd $WORKPATH/docker_compose/amd/gpu/rocm
7980
docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
80-
sleep 1m
81+
n=0
82+
until [[ "$n" -ge 100 ]]; do
83+
docker logs tgi-llava-rocm-server >& $LOG_PATH/tgi-llava-rocm-server_start.log
84+
if grep -q "Connected" $LOG_PATH/tgi-llava-rocm-server_start.log; then
85+
break
86+
fi
87+
sleep 10s
88+
n=$((n+1))
89+
done
8190
}
8291

8392
function prepare_data() {

0 commit comments

Comments
 (0)