Skip to content

Adding files to deploy VisualQnA application on ROCm vLLM #1751

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Apr 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
417 changes: 340 additions & 77 deletions VisualQnA/docker_compose/amd/gpu/rocm/README.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion VisualQnA/docker_compose/amd/gpu/rocm/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ services:
HUGGINGFACEHUB_API_TOKEN: ${VISUALQNA_HUGGINGFACEHUB_API_TOKEN}
HUGGING_FACE_HUB_TOKEN: ${VISUALQNA_HUGGINGFACEHUB_API_TOKEN}
volumes:
- "/var/opea/visualqna-service/data:/data"
- "${MODEL_CACHE:-./data}:/data"
shm_size: 64g
devices:
- /dev/kfd:/dev/kfd
Expand Down
105 changes: 105 additions & 0 deletions VisualQnA/docker_compose/amd/gpu/rocm/compose_vllm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# Copyright (C) 2024 Advanced Micro Devices, Inc.
# SPDX-License-Identifier: Apache-2.0

services:
visualqna-vllm-service:
image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest}
container_name: visualqna-vllm-service
ports:
- "${VISUALQNA_VLLM_SERVICE_PORT:-8081}:8011"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${VISUALQNA_HUGGINGFACEHUB_API_TOKEN}
HF_TOKEN: ${VISUALQNA_HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
WILM_USE_TRITON_FLASH_ATTENTION: 0
PYTORCH_JIT: 0
volumes:
- "${MODEL_CACHE:-./data}:/data"
shm_size: 20G
devices:
- /dev/kfd:/dev/kfd
- /dev/dri/:/dev/dri/
cap_add:
- SYS_PTRACE
group_add:
- video
security_opt:
- seccomp:unconfined
- apparmor=unconfined
command: "--model ${VISUALQNA_LVM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 1 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\""
ipc: host
lvm:
image: ${REGISTRY:-opea}/lvm:${TAG:-latest}
container_name: lvm-server
depends_on:
- visualqna-vllm-service
ports:
- "9399:9399"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
LVM_COMPONENT_NAME: "OPEA_VLLM_LVM"
LVM_ENDPOINT: ${LVM_ENDPOINT}
LLM_MODEL_ID: ${VISUALQNA_LVM_MODEL_ID}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
visualqna-rocm-backend-server:
image: ${REGISTRY:-opea}/visualqna:${TAG:-latest}
container_name: visualqna-rocm-backend-server
depends_on:
- visualqna-vllm-service
- lvm
ports:
- "${BACKEND_SERVICE_PORT:-8888}:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- LVM_SERVICE_HOST_IP=${LVM_SERVICE_HOST_IP}
ipc: host
restart: always
visualqna-rocm-ui-server:
image: ${REGISTRY:-opea}/visualqna-ui:${TAG:-latest}
container_name: visualqna-rocm-ui-server
depends_on:
- visualqna-rocm-backend-server
ports:
- "${FRONTEND_SERVICE_PORT:-5173}:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- BACKEND_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
ipc: host
restart: always
visualqna-nginx-server:
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
container_name: visualqna-rocm-nginx-server
depends_on:
- visualqna-rocm-backend-server
- visualqna-rocm-ui-server
ports:
- "${NGINX_PORT:-80}:80"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- FRONTEND_SERVICE_IP=${HOST_IP}
- FRONTEND_SERVICE_PORT=${FRONTEND_SERVICE_PORT}
- BACKEND_SERVICE_NAME=${BACKEND_SERVICE_NAME}
- BACKEND_SERVICE_IP=${HOST_IP}
- BACKEND_SERVICE_PORT=${BACKEND_SERVICE_PORT}
ipc: host
restart: always

networks:
default:
driver: bridge
3 changes: 2 additions & 1 deletion VisualQnA/docker_compose/amd/gpu/rocm/set_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
# Copyright (C) 2024 Advanced Micro Devices, Inc
# SPDX-License-Identifier: Apache-2.0

export HOST_IP=${Your_host_ip_address}
export HOST_IP=${host_ip}
export EXTERNAL_HOST_IP=${host_ip}
export VISUALQNA_TGI_SERVICE_PORT="8399"
export VISUALQNA_HUGGINGFACEHUB_API_TOKEN=${Your_HUGGINGFACEHUB_API_TOKEN}
export VISUALQNA_CARD_ID="card1"
Expand Down
23 changes: 23 additions & 0 deletions VisualQnA/docker_compose/amd/gpu/rocm/set_env_vllm.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/usr/bin/env bash

# Copyright (C) 2024 Advanced Micro Devices, Inc
# SPDX-License-Identifier: Apache-2.0

export HOST_IP=${host_ip}
export EXTERNAL_HOST_IP=${host_ip}
export VISUALQNA_VLLM_SERVICE_PORT="8081"
export VISUALQNA_HUGGINGFACEHUB_API_TOKEN=${Your_HUGGINGFACEHUB_API_TOKEN}
export VISUALQNA_CARD_ID="card1"
export VISUALQNA_RENDER_ID="renderD136"
export VISUALQNA_LVM_MODEL_ID="Xkev/Llama-3.2V-11B-cot"
export LVM_ENDPOINT="http://${HOST_IP}:${VISUALQNA_VLLM_SERVICE_PORT}"
export LVM_SERVICE_PORT=9399
export MEGA_SERVICE_HOST_IP=${HOST_IP}
export LVM_SERVICE_HOST_IP=${HOST_IP}
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_SERVICE_PORT}/v1/visualqna"
export FRONTEND_SERVICE_IP=${HOST_IP}
export FRONTEND_SERVICE_PORT=18001
export BACKEND_SERVICE_NAME=visualqna
export BACKEND_SERVICE_IP=${HOST_IP}
export BACKEND_SERVICE_PORT=18002
export NGINX_PORT=18003
5 changes: 5 additions & 0 deletions VisualQnA/docker_image_build/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,8 @@ services:
dockerfile: comps/third_parties/nginx/src/Dockerfile
extends: visualqna
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
vllm-rocm:
build:
context: GenAIComps
dockerfile: comps/third_parties/vllm/src/Dockerfile.amd_gpu
image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest}
5 changes: 3 additions & 2 deletions VisualQnA/tests/test_compose_on_rocm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ export BACKEND_SERVICE_IP=${HOST_IP}
export BACKEND_SERVICE_PORT=8888
export NGINX_PORT=18003
export PATH="~/miniconda3/bin:$PATH"
export MODEL_CACHE=${model_cache:-"/var/opea/multimodalqna-service/data"}

function build_docker_images() {
opea_branch=${opea_branch:-"main"}
Expand Down Expand Up @@ -63,11 +64,11 @@ function start_services() {
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env

# Start Docker Containers
docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log

n=0
until [[ "$n" -ge 100 ]]; do
docker logs visualqna-tgi-service > ${LOG_PATH}/lvm_tgi_service_start.log
docker logs visualqna-tgi-service >& ${LOG_PATH}/lvm_tgi_service_start.log
if grep -q Connected ${LOG_PATH}/lvm_tgi_service_start.log; then
break
fi
Expand Down
Loading