Skip to content

Commit 319dbda

Browse files
Adding files to deploy DocSum application on ROCm vLLM (#1572)
Signed-off-by: Chingis Yundunov <YundunovCN@sibedge.com>
1 parent 1a0c5f0 commit 319dbda

File tree

10 files changed

+794
-175
lines changed

10 files changed

+794
-175
lines changed

DocSum/assets/img/ui-result-page.png

117 KB
Loading
21.9 KB
Loading

DocSum/docker_compose/amd/gpu/rocm/README.md

Lines changed: 338 additions & 108 deletions
Large diffs are not rendered by default.

DocSum/docker_compose/amd/gpu/rocm/compose.yaml

Lines changed: 23 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ services:
66
image: ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
77
container_name: docsum-tgi-service
88
ports:
9-
- "${DOCSUM_TGI_SERVICE_PORT}:80"
9+
- "${DOCSUM_TGI_SERVICE_PORT:-8008}:80"
1010
environment:
1111
no_proxy: ${no_proxy}
1212
http_proxy: ${http_proxy}
@@ -16,12 +16,11 @@ services:
1616
host_ip: ${host_ip}
1717
DOCSUM_TGI_SERVICE_PORT: ${DOCSUM_TGI_SERVICE_PORT}
1818
volumes:
19-
- "/var/opea/docsum-service/data:/data"
20-
shm_size: 1g
19+
- "${MODEL_CACHE:-./data}:/data"
20+
shm_size: 20g
2121
devices:
2222
- /dev/kfd:/dev/kfd
23-
- /dev/dri/${DOCSUM_CARD_ID}:/dev/dri/${DOCSUM_CARD_ID}
24-
- /dev/dri/${DOCSUM_RENDER_ID}:/dev/dri/${DOCSUM_RENDER_ID}
23+
- /dev/dri/:/dev/dri/
2524
cap_add:
2625
- SYS_PTRACE
2726
group_add:
@@ -34,7 +33,7 @@ services:
3433
interval: 10s
3534
timeout: 10s
3635
retries: 100
37-
command: --model-id ${DOCSUM_LLM_MODEL_ID} --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}
36+
command: --model-id ${DOCSUM_LLM_MODEL_ID} --max-input-length ${DOCSUM_MAX_INPUT_TOKENS} --max-total-tokens ${DOCSUM_MAX_TOTAL_TOKENS}
3837

3938
docsum-llm-server:
4039
image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
@@ -45,34 +44,24 @@ services:
4544
ports:
4645
- "${DOCSUM_LLM_SERVER_PORT}:9000"
4746
ipc: host
48-
group_add:
49-
- video
50-
security_opt:
51-
- seccomp:unconfined
52-
cap_add:
53-
- SYS_PTRACE
54-
devices:
55-
- /dev/kfd:/dev/kfd
56-
- /dev/dri/${DOCSUM_CARD_ID}:/dev/dri/${DOCSUM_CARD_ID}
57-
- /dev/dri/${DOCSUM_RENDER_ID}:/dev/dri/${DOCSUM_RENDER_ID}
5847
environment:
5948
no_proxy: ${no_proxy}
6049
http_proxy: ${http_proxy}
6150
https_proxy: ${https_proxy}
62-
LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}"
51+
LLM_ENDPOINT: ${DOCSUM_TGI_LLM_ENDPOINT}
6352
HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
64-
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
65-
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
53+
MAX_INPUT_TOKENS: ${DOCSUM_MAX_INPUT_TOKENS}
54+
MAX_TOTAL_TOKENS: ${DOCSUM_MAX_TOTAL_TOKENS}
6655
LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID}
67-
DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
56+
DocSum_COMPONENT_NAME: "OpeaDocSumTgi"
6857
LOGFLAG: ${LOGFLAG:-False}
6958
restart: unless-stopped
7059

7160
whisper:
7261
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
7362
container_name: whisper-service
7463
ports:
75-
- "7066:7066"
64+
- "${DOCSUM_WHISPER_PORT:-7066}:7066"
7665
ipc: host
7766
environment:
7867
no_proxy: ${no_proxy}
@@ -89,13 +78,14 @@ services:
8978
ports:
9079
- "${DOCSUM_BACKEND_SERVER_PORT}:8888"
9180
environment:
92-
- no_proxy=${no_proxy}
93-
- https_proxy=${https_proxy}
94-
- http_proxy=${http_proxy}
95-
- MEGA_SERVICE_HOST_IP=${HOST_IP}
96-
- LLM_SERVICE_HOST_IP=${HOST_IP}
97-
- ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP}
98-
81+
no_proxy: ${no_proxy}
82+
https_proxy: ${https_proxy}
83+
http_proxy: ${http_proxy}
84+
MEGA_SERVICE_HOST_IP: ${HOST_IP}
85+
LLM_SERVICE_HOST_IP: ${HOST_IP}
86+
LLM_SERVICE_PORT: ${DOCSUM_LLM_SERVER_PORT}
87+
ASR_SERVICE_HOST_IP: ${ASR_SERVICE_HOST_IP}
88+
ASR_SERVICE_PORT: ${DOCSUM_WHISPER_PORT}
9989
ipc: host
10090
restart: always
10191

@@ -107,11 +97,11 @@ services:
10797
ports:
10898
- "5173:5173"
10999
environment:
110-
- no_proxy=${no_proxy}
111-
- https_proxy=${https_proxy}
112-
- http_proxy=${http_proxy}
113-
- BACKEND_SERVICE_ENDPOINT=${BACKEND_SERVICE_ENDPOINT}
114-
- DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
100+
no_proxy: ${no_proxy}
101+
https_proxy: ${https_proxy}
102+
http_proxy: ${http_proxy}
103+
BACKEND_SERVICE_ENDPOINT: ${BACKEND_SERVICE_ENDPOINT}
104+
DOC_BASE_URL: ${BACKEND_SERVICE_ENDPOINT}
115105
ipc: host
116106
restart: always
117107

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
# Copyright (C) 2024 Advanced Micro Devices, Inc.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
services:
5+
docsum-vllm-service:
6+
image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest}
7+
container_name: docsum-vllm-service
8+
ports:
9+
- "${DOCSUM_VLLM_SERVICE_PORT:-8081}:8011"
10+
environment:
11+
no_proxy: ${no_proxy}
12+
http_proxy: ${http_proxy}
13+
https_proxy: ${https_proxy}
14+
HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
15+
HF_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
16+
HF_HUB_DISABLE_PROGRESS_BARS: 1
17+
HF_HUB_ENABLE_HF_TRANSFER: 0
18+
VLLM_USE_TRITON_FLASH_ATTENTION: 0
19+
PYTORCH_JIT: 0
20+
healthcheck:
21+
test: [ "CMD-SHELL", "curl -f http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT:-8081}/health || exit 1" ]
22+
interval: 10s
23+
timeout: 10s
24+
retries: 100
25+
volumes:
26+
- "${MODEL_CACHE:-./data}:/data"
27+
shm_size: 20G
28+
devices:
29+
- /dev/kfd:/dev/kfd
30+
- /dev/dri/:/dev/dri/
31+
cap_add:
32+
- SYS_PTRACE
33+
group_add:
34+
- video
35+
security_opt:
36+
- seccomp:unconfined
37+
- apparmor=unconfined
38+
command: "--model ${DOCSUM_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\""
39+
ipc: host
40+
41+
docsum-llm-server:
42+
image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
43+
container_name: docsum-llm-server
44+
depends_on:
45+
docsum-vllm-service:
46+
condition: service_healthy
47+
ports:
48+
- "${DOCSUM_LLM_SERVER_PORT}:9000"
49+
ipc: host
50+
environment:
51+
no_proxy: ${no_proxy}
52+
http_proxy: ${http_proxy}
53+
https_proxy: ${https_proxy}
54+
LLM_ENDPOINT: ${DOCSUM_LLM_ENDPOINT}
55+
HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
56+
MAX_INPUT_TOKENS: ${DOCSUM_MAX_INPUT_TOKENS}
57+
MAX_TOTAL_TOKENS: ${DOCSUM_MAX_TOTAL_TOKENS}
58+
LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID}
59+
DocSum_COMPONENT_NAME: "OpeaDocSumvLLM"
60+
LOGFLAG: ${LOGFLAG:-False}
61+
restart: unless-stopped
62+
63+
whisper:
64+
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
65+
container_name: whisper-service
66+
ports:
67+
- "${DOCSUM_WHISPER_PORT:-7066}:7066"
68+
ipc: host
69+
environment:
70+
no_proxy: ${no_proxy}
71+
http_proxy: ${http_proxy}
72+
https_proxy: ${https_proxy}
73+
restart: unless-stopped
74+
75+
docsum-backend-server:
76+
image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
77+
container_name: docsum-backend-server
78+
depends_on:
79+
- docsum-vllm-service
80+
- docsum-llm-server
81+
ports:
82+
- "${DOCSUM_BACKEND_SERVER_PORT}:8888"
83+
environment:
84+
no_proxy: ${no_proxy}
85+
https_proxy: ${https_proxy}
86+
http_proxy: ${http_proxy}
87+
MEGA_SERVICE_HOST_IP: ${HOST_IP}
88+
LLM_SERVICE_HOST_IP: ${HOST_IP}
89+
ASR_SERVICE_HOST_IP: ${ASR_SERVICE_HOST_IP}
90+
ipc: host
91+
restart: always
92+
93+
docsum-gradio-ui:
94+
image: ${REGISTRY:-opea}/docsum-gradio-ui:${TAG:-latest}
95+
container_name: docsum-ui-server
96+
depends_on:
97+
- docsum-backend-server
98+
ports:
99+
- "${DOCSUM_FRONTEND_PORT:-5173}:5173"
100+
environment:
101+
no_proxy: ${no_proxy}
102+
https_proxy: ${https_proxy}
103+
http_proxy: ${http_proxy}
104+
BACKEND_SERVICE_ENDPOINT: ${BACKEND_SERVICE_ENDPOINT}
105+
DOC_BASE_URL: ${BACKEND_SERVICE_ENDPOINT}
106+
ipc: host
107+
restart: always
108+
109+
networks:
110+
default:
111+
driver: bridge

DocSum/docker_compose/amd/gpu/rocm/set_env.sh

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,16 @@
33
# Copyright (C) 2024 Advanced Micro Devices, Inc.
44
# SPDX-License-Identifier: Apache-2.0
55

6-
export MAX_INPUT_TOKENS=2048
7-
export MAX_TOTAL_TOKENS=4096
8-
export DOCSUM_TGI_IMAGE="ghcr.io/huggingface/text-generation-inference:2.4.1-rocm"
6+
export HOST_IP=''
7+
export DOCSUM_MAX_INPUT_TOKENS="2048"
8+
export DOCSUM_MAX_TOTAL_TOKENS="4096"
99
export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
10-
export HOST_IP=${host_ip}
1110
export DOCSUM_TGI_SERVICE_PORT="8008"
1211
export DOCSUM_TGI_LLM_ENDPOINT="http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}"
13-
export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
12+
export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
13+
export DOCSUM_WHISPER_PORT="7066"
14+
export ASR_SERVICE_HOST_IP="${HOST_IP}"
1415
export DOCSUM_LLM_SERVER_PORT="9000"
15-
export DOCSUM_BACKEND_SERVER_PORT="8888"
16-
export DOCSUM_FRONTEND_PORT="5173"
16+
export DOCSUM_BACKEND_SERVER_PORT="18072"
17+
export DOCSUM_FRONTEND_PORT="18073"
1718
export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum"
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#!/usr/bin/env bash
2+
3+
# Copyright (C) 2024 Advanced Micro Devices, Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
6+
export HOST_IP=''
7+
export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
8+
export DOCSUM_MAX_INPUT_TOKENS=2048
9+
export DOCSUM_MAX_TOTAL_TOKENS=4096
10+
export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
11+
export DOCSUM_VLLM_SERVICE_PORT="8008"
12+
export DOCSUM_LLM_ENDPOINT="http://${HOST_IP}:${DOCSUM_VLLM_SERVICE_PORT}"
13+
export DOCSUM_WHISPER_PORT="7066"
14+
export ASR_SERVICE_HOST_IP="${HOST_IP}"
15+
export DOCSUM_LLM_SERVER_PORT="9000"
16+
export DOCSUM_BACKEND_SERVER_PORT="18072"
17+
export DOCSUM_FRONTEND_PORT="18073"
18+
export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum"

DocSum/docker_image_build/build.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,11 @@ services:
4949
dockerfile: comps/llms/src/doc-summarization/Dockerfile
5050
extends: docsum
5151
image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
52+
vllm-rocm:
53+
build:
54+
context: GenAIComps
55+
dockerfile: comps/third_parties/vllm/src/Dockerfile.amd_gpu
56+
image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest}
5257
vllm:
5358
build:
5459
context: vllm

0 commit comments

Comments
 (0)