Skip to content

Commit 1a0c5f0

Browse files
authored
Code Enhancement for vllm inference (#1729)
Signed-off-by: Yongbozzz <yongbo.zhu@intel.com>
1 parent bbd5344 commit 1a0c5f0

File tree

4 files changed

+127
-11
lines changed

4 files changed

+127
-11
lines changed

EdgeCraftRAG/README.md

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ quality and performance.
1717

1818
### (Optional) Build Docker Images for Mega Service, Server and UI by your own
1919

20-
If you want to build the images by your own, please follow the steps:
20+
**All the docker images can be automatically‌ pulled**, If you want to build the images by your own, please follow the steps:
2121

2222
```bash
2323
cd GenAIExamples/EdgeCraftRAG
@@ -101,6 +101,26 @@ export HUGGINGFACEHUB_API_TOKEN=#your HF token
101101
docker compose -f compose_vllm.yaml up -d
102102
```
103103

104+
#### Launch services with vLLM for multi Intel Arc GPUs inference service
105+
106+
The docker file can be pulled automatically‌, you can also pull the image manually:
107+
108+
```bash
109+
docker pull intelanalytics/ipex-llm-serving-xpu:latest
110+
```
111+
112+
Set up Additional Environment Variables and start with compose_vllm_multi-arc.yaml
113+
114+
```bash
115+
export LLM_MODEL=#your model id
116+
export VLLM_SERVICE_PORT=8008
117+
export vLLM_ENDPOINT="http://${HOST_IP}:${VLLM_SERVICE_PORT}"
118+
export LLM_MODEL_PATH=#your model path
119+
export TENSOR_PARALLEL_SIZE=#your Intel Arc GPU number to do inference
120+
121+
docker compose -f compose_vllm_multi-arc.yaml up -d
122+
```
123+
104124
### ChatQnA with LLM Example (Command Line)
105125

106126
```bash
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
services:
5+
server:
6+
image: ${REGISTRY:-opea}/edgecraftrag-server:${TAG:-latest}
7+
container_name: edgecraftrag-server
8+
environment:
9+
no_proxy: ${no_proxy}
10+
http_proxy: ${http_proxy}
11+
https_proxy: ${https_proxy}
12+
HF_ENDPOINT: ${HF_ENDPOINT}
13+
vLLM_ENDPOINT: ${vLLM_ENDPOINT}
14+
LLM_MODEL: ${LLM_MODEL}
15+
ENABLE_BENCHMARK: ${ENABLE_BENCHMARK:-false}
16+
volumes:
17+
- ${MODEL_PATH:-${PWD}}:/home/user/models
18+
- ${DOC_PATH:-${PWD}}:/home/user/docs
19+
- ${UI_TMPFILE_PATH:-${PWD}}:/home/user/ui_cache
20+
- ${HF_CACHE:-${HOME}/.cache}:/home/user/.cache
21+
- ${PROMPT_PATH:-${PWD}}:/templates/custom
22+
ports:
23+
- ${PIPELINE_SERVICE_PORT:-16010}:${PIPELINE_SERVICE_PORT:-16010}
24+
devices:
25+
- /dev/dri:/dev/dri
26+
group_add:
27+
- ${VIDEOGROUPID:-44}
28+
- ${RENDERGROUPID:-109}
29+
ecrag:
30+
image: ${REGISTRY:-opea}/edgecraftrag:${TAG:-latest}
31+
container_name: edgecraftrag
32+
environment:
33+
no_proxy: ${no_proxy}
34+
http_proxy: ${http_proxy}
35+
https_proxy: ${https_proxy}
36+
MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT:-16011}
37+
MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-${HOST_IP}}
38+
PIPELINE_SERVICE_PORT: ${PIPELINE_SERVICE_PORT:-16010}
39+
PIPELINE_SERVICE_HOST_IP: ${PIPELINE_SERVICE_HOST_IP:-${HOST_IP}}
40+
ports:
41+
- ${MEGA_SERVICE_PORT:-16011}:${MEGA_SERVICE_PORT:-16011}
42+
depends_on:
43+
- server
44+
ui:
45+
image: ${REGISTRY:-opea}/edgecraftrag-ui:${TAG:-latest}
46+
container_name: edgecraftrag-ui
47+
environment:
48+
no_proxy: ${no_proxy}
49+
http_proxy: ${http_proxy}
50+
https_proxy: ${https_proxy}
51+
MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT:-16011}
52+
MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-${HOST_IP}}
53+
PIPELINE_SERVICE_PORT: ${PIPELINE_SERVICE_PORT:-16010}
54+
PIPELINE_SERVICE_HOST_IP: ${PIPELINE_SERVICE_HOST_IP:-${HOST_IP}}
55+
UI_SERVICE_PORT: ${UI_SERVICE_PORT:-8082}
56+
UI_SERVICE_HOST_IP: ${UI_SERVICE_HOST_IP:-0.0.0.0}
57+
volumes:
58+
- ${UI_TMPFILE_PATH:-${PWD}}:/home/user/ui_cache
59+
ports:
60+
- ${UI_SERVICE_PORT:-8082}:${UI_SERVICE_PORT:-8082}
61+
restart: always
62+
depends_on:
63+
- server
64+
- ecrag
65+
llm-serving-xpu:
66+
container_name: ipex-llm-serving-xpu-container
67+
image: intelanalytics/ipex-llm-serving-xpu:latest
68+
privileged: true
69+
ports:
70+
- ${VLLM_SERVICE_PORT:-8008}:8000
71+
group_add:
72+
- video
73+
- ${VIDEOGROUPID:-44}
74+
- ${RENDERGROUPID:-109}
75+
volumes:
76+
- ${LLM_MODEL_PATH:-${PWD}}:/llm/models
77+
devices:
78+
- /dev/dri
79+
environment:
80+
no_proxy: ${no_proxy}
81+
http_proxy: ${http_proxy}
82+
https_proxy: ${https_proxy}
83+
HF_ENDPOINT: ${HF_ENDPOINT}
84+
MODEL_PATH: "/llm/models"
85+
SERVED_MODEL_NAME: ${LLM_MODEL}
86+
TENSOR_PARALLEL_SIZE: ${TENSOR_PARALLEL_SIZE:-1}
87+
shm_size: '16g'
88+
entrypoint: /bin/bash -c "\
89+
cd /llm && \
90+
bash start-vllm-service.sh"
91+
networks:
92+
default:
93+
driver: bridge

EdgeCraftRAG/ui/gradio/ecrag_client.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -156,13 +156,16 @@ def get_benchmark(name):
156156

157157
if data.get("Benchmark enabled", False):
158158
benchmark_data = data.get("last_benchmark_data", {})
159-
if benchmark_data.get("generator", "N/A"):
160-
benchmark = (
161-
f"Retrieval: {benchmark_data.get('retriever', 0.0):.4f}s "
162-
f"Post-process: {benchmark_data.get('postprocessor', 0.0):.4f}s "
163-
f"Generation: {benchmark_data.get('generator', 0.0):.4f}s"
164-
).rstrip()
165-
return benchmark
159+
if benchmark_data and "generator" in benchmark_data:
160+
if benchmark_data.get("generator", "N/A"):
161+
benchmark = (
162+
f"Retrieval: {benchmark_data.get('retriever', 0.0):.4f}s "
163+
f"Post-process: {benchmark_data.get('postprocessor', 0.0):.4f}s "
164+
f"Generation: {benchmark_data.get('generator', 0.0):.4f}s"
165+
).rstrip()
166+
return benchmark
167+
else:
168+
return None
166169
else:
167170
return None
168171
else:

EdgeCraftRAG/ui/gradio/ecragui.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,9 +85,9 @@ def get_system_status():
8585

8686
def get_benchmark():
8787
time.sleep(0.5)
88-
active_pipeline_nam = get_actived_pipeline()
89-
if active_pipeline_nam:
90-
data = cli.get_benchmark(active_pipeline_nam)
88+
active_pipeline_name = get_actived_pipeline()
89+
if active_pipeline_name:
90+
data = cli.get_benchmark(active_pipeline_name)
9191
if data:
9292
return gr.update(
9393
visible=True,

0 commit comments

Comments
 (0)