Skip to content

Commit ff1310b

Browse files
authored
Refactor docsum (#1336)
Signed-off-by: Xinyao Wang <xinyao.wang@intel.com>
1 parent ca15fe9 commit ff1310b

File tree

16 files changed

+94
-75
lines changed

16 files changed

+94
-75
lines changed

DocSum/docker_compose/amd/gpu/rocm/README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ First of all, you need to build Docker Images locally and install the python pac
1111
```bash
1212
git clone https://github.com/opea-project/GenAIComps.git
1313
cd GenAIComps
14-
docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/summarization/tgi/langchain/Dockerfile .
14+
docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/doc-summarization/Dockerfile .
1515
```
1616

1717
Then run the command `docker images`, you will have the following four Docker Images:
@@ -81,6 +81,7 @@ export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
8181
export DOCSUM_LLM_SERVER_PORT="8008"
8282
export DOCSUM_BACKEND_SERVER_PORT="8888"
8383
export DOCSUM_FRONTEND_PORT="5173"
84+
export DocSum_COMPONENT_NAME="OPEADocSum_TGI"
8485
```
8586

8687
Note: Please replace with `host_ip` with your external IP address, do not use localhost.
@@ -126,7 +127,7 @@ docker compose up -d
126127
2. LLM Microservice
127128

128129
```bash
129-
curl http://${host_ip}:9000/v1/chat/docsum \
130+
curl http://${host_ip}:9000/v1/docsum \
130131
-X POST \
131132
-d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \
132133
-H 'Content-Type: application/json'

DocSum/docker_compose/amd/gpu/rocm/compose.yaml

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ services:
1313
https_proxy: ${https_proxy}
1414
TGI_LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}"
1515
HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
16+
host_ip: ${host_ip}
17+
DOCSUM_TGI_SERVICE_PORT: ${DOCSUM_TGI_SERVICE_PORT}
1618
volumes:
1719
- "/var/opea/docsum-service/data:/data"
1820
shm_size: 1g
@@ -27,13 +29,19 @@ services:
2729
security_opt:
2830
- seccomp:unconfined
2931
ipc: host
32+
healthcheck:
33+
test: ["CMD-SHELL", "curl -f http://${host_ip}:${DOCSUM_TGI_SERVICE_PORT}/health || exit 1"]
34+
interval: 10s
35+
timeout: 10s
36+
retries: 100
3037
command: --model-id ${DOCSUM_LLM_MODEL_ID} --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}
3138

3239
docsum-llm-server:
33-
image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
40+
image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
3441
container_name: docsum-llm-server
3542
depends_on:
36-
- docsum-tgi-service
43+
docsum-tgi-service:
44+
condition: service_healthy
3745
ports:
3846
- "${DOCSUM_LLM_SERVER_PORT}:9000"
3947
ipc: host
@@ -51,11 +59,13 @@ services:
5159
no_proxy: ${no_proxy}
5260
http_proxy: ${http_proxy}
5361
https_proxy: ${https_proxy}
54-
TGI_LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}"
62+
LLM_ENDPOINT: "http://${HOST_IP}:${DOCSUM_TGI_SERVICE_PORT}"
5563
HUGGINGFACEHUB_API_TOKEN: ${DOCSUM_HUGGINGFACEHUB_API_TOKEN}
5664
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
5765
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
5866
LLM_MODEL_ID: ${DOCSUM_LLM_MODEL_ID}
67+
DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
68+
LOGFLAG: ${LOGFLAG:-False}
5969
restart: unless-stopped
6070

6171
whisper:

DocSum/docker_compose/intel/cpu/xeon/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ You will have the following Docker Images:
123123

124124
1. `opea/docsum-ui:latest`
125125
2. `opea/docsum:latest`
126-
3. `opea/llm-docsum-tgi:latest`
126+
3. `opea/llm-docsum:latest`
127127
4. `opea/whisper:latest`
128128

129129
### Validate Microservices
@@ -140,7 +140,7 @@ You will have the following Docker Images:
140140
2. LLM Microservice
141141

142142
```bash
143-
curl http://${host_ip}:9000/v1/chat/docsum \
143+
curl http://${host_ip}:9000/v1/docsum \
144144
-X POST \
145145
-d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \
146146
-H 'Content-Type: application/json'

DocSum/docker_compose/intel/cpu/xeon/compose.yaml

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,36 +6,45 @@ services:
66
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
77
container_name: tgi-server
88
ports:
9-
- "8008:80"
9+
- ${LLM_ENDPOINT_PORT:-8008}:80
1010
environment:
1111
no_proxy: ${no_proxy}
1212
http_proxy: ${http_proxy}
1313
https_proxy: ${https_proxy}
1414
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
1515
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
16+
host_ip: ${host_ip}
17+
LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
18+
healthcheck:
19+
test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
20+
interval: 10s
21+
timeout: 10s
22+
retries: 100
1623
volumes:
1724
- "./data:/data"
1825
shm_size: 1g
1926
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}
2027

2128
llm-docsum-tgi:
22-
image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
29+
image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
2330
container_name: llm-docsum-server
2431
depends_on:
25-
- tgi-server
32+
tgi-server:
33+
condition: service_healthy
2634
ports:
27-
- "9000:9000"
35+
- ${DOCSUM_PORT:-9000}:9000
2836
ipc: host
2937
environment:
3038
no_proxy: ${no_proxy}
3139
http_proxy: ${http_proxy}
3240
https_proxy: ${https_proxy}
33-
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
41+
LLM_ENDPOINT: ${LLM_ENDPOINT}
3442
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
3543
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
3644
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
3745
LLM_MODEL_ID: ${LLM_MODEL_ID}
38-
LOGFLAG: True
46+
DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
47+
LOGFLAG: ${LOGFLAG:-False}
3948
restart: unless-stopped
4049

4150
whisper:

DocSum/docker_compose/intel/hpu/gaudi/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ You will have the following Docker Images:
115115

116116
1. `opea/docsum-ui:latest`
117117
2. `opea/docsum:latest`
118-
3. `opea/llm-docsum-tgi:latest`
118+
3. `opea/llm-docsum:latest`
119119
4. `opea/whisper:latest`
120120

121121
### Validate Microservices
@@ -132,7 +132,7 @@ You will have the following Docker Images:
132132
2. LLM Microservice
133133

134134
```bash
135-
curl http://${host_ip}:9000/v1/chat/docsum \
135+
curl http://${host_ip}:9000/v1/docsum \
136136
-X POST \
137137
-d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \
138138
-H 'Content-Type: application/json'

DocSum/docker_compose/intel/hpu/gaudi/compose.yaml

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,47 +2,59 @@
22
# SPDX-License-Identifier: Apache-2.0
33

44
services:
5-
tgi-server:
6-
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
5+
tgi-gaudi-server:
6+
image: ghcr.io/huggingface/tgi-gaudi:2.3.1
77
container_name: tgi-gaudi-server
88
ports:
9-
- "8008:80"
9+
- ${LLM_ENDPOINT_PORT:-8008}:80
10+
volumes:
11+
- "./data:/data"
1012
environment:
11-
HABANA_VISIBLE_DEVICES: all
1213
no_proxy: ${no_proxy}
1314
http_proxy: ${http_proxy}
1415
https_proxy: ${https_proxy}
1516
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
17+
HF_HUB_DISABLE_PROGRESS_BARS: 1
18+
HF_HUB_ENABLE_HF_TRANSFER: 0
19+
HABANA_VISIBLE_DEVICES: all
20+
OMPI_MCA_btl_vader_single_copy_mechanism: none
1621
ENABLE_HPU_GRAPH: true
1722
LIMIT_HPU_GRAPH: true
1823
USE_FLASH_ATTENTION: true
1924
FLASH_ATTENTION_RECOMPUTE: true
20-
volumes:
21-
- "./data:/data"
25+
host_ip: ${host_ip}
26+
LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT}
2227
runtime: habana
2328
cap_add:
2429
- SYS_NICE
2530
ipc: host
31+
healthcheck:
32+
test: ["CMD-SHELL", "curl -f http://${host_ip}:${LLM_ENDPOINT_PORT}/health || exit 1"]
33+
interval: 10s
34+
timeout: 10s
35+
retries: 100
2636
command: --model-id ${LLM_MODEL_ID} --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS}
2737

2838
llm-docsum-tgi:
29-
image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
39+
image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}
3040
container_name: llm-docsum-gaudi-server
3141
depends_on:
32-
- tgi-server
42+
tgi-gaudi-server:
43+
condition: service_healthy
3344
ports:
34-
- "9000:9000"
45+
- ${DOCSUM_PORT:-9000}:9000
3546
ipc: host
3647
environment:
3748
no_proxy: ${no_proxy}
3849
http_proxy: ${http_proxy}
3950
https_proxy: ${https_proxy}
40-
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
4151
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
4252
MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
4353
MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
54+
LLM_ENDPOINT: ${LLM_ENDPOINT}
4455
LLM_MODEL_ID: ${LLM_MODEL_ID}
45-
LOGFLAG: True
56+
DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
57+
LOGFLAG: ${LOGFLAG:-False}
4658
restart: unless-stopped
4759

4860
whisper:
@@ -66,7 +78,7 @@ services:
6678
image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
6779
container_name: docsum-gaudi-backend-server
6880
depends_on:
69-
- tgi-server
81+
- tgi-gaudi-server
7082
- llm-docsum-tgi
7183
ports:
7284
- "8888:8888"

DocSum/docker_compose/set_env.sh

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,14 @@ export MAX_INPUT_TOKENS=1024
1010
export MAX_TOTAL_TOKENS=2048
1111

1212
export no_proxy="${no_proxy},${host_ip}"
13-
export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
1413
export MEGA_SERVICE_HOST_IP=${host_ip}
1514
export LLM_SERVICE_HOST_IP=${host_ip}
1615
export ASR_SERVICE_HOST_IP=${host_ip}
1716
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
1817

1918
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
19+
20+
export LLM_ENDPOINT_PORT=8008
21+
export DOCSUM_PORT=9000
22+
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
23+
export DocSum_COMPONENT_NAME="OPEADocSum_TGI"

DocSum/docker_image_build/build.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,9 @@ services:
4141
dockerfile: comps/asr/src/integrations/dependency/whisper/Dockerfile
4242
extends: docsum
4343
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
44-
llm-docsum-tgi:
44+
llm-docsum:
4545
build:
4646
context: GenAIComps
47-
dockerfile: comps/llms/summarization/tgi/langchain/Dockerfile
47+
dockerfile: comps/llms/src/doc-summarization/Dockerfile
4848
extends: docsum
49-
image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
49+
image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}

DocSum/docsum.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ def add_remote_service(self):
146146
name="llm",
147147
host=LLM_SERVICE_HOST_IP,
148148
port=LLM_SERVICE_PORT,
149-
endpoint="/v1/chat/docsum",
149+
endpoint="/v1/docsum",
150150
use_remote_service=True,
151151
service_type=ServiceType.LLM,
152152
)

DocSum/kubernetes/gmc/docsum_gaudi.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ spec:
2323
internalService:
2424
serviceName: docsum-llm-uservice
2525
config:
26-
endpoint: /v1/chat/docsum
26+
endpoint: /v1/docsum
2727
PORT: "9009"
2828
TGI_LLM_ENDPOINT: tgi-gaudi-svc
2929
- name: TgiGaudi

DocSum/kubernetes/gmc/docsum_xeon.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ spec:
2323
internalService:
2424
serviceName: docsum-llm-uservice
2525
config:
26-
endpoint: /v1/chat/docsum
26+
endpoint: /v1/docsum
2727
PORT: "9009"
2828
TGI_LLM_ENDPOINT: tgi-svc
2929
- name: Tgi

DocSum/tests/test_compose_on_gaudi.sh

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,17 @@ export TAG=${IMAGE_TAG}
1717
export MAX_INPUT_TOKENS=2048
1818
export MAX_TOTAL_TOKENS=4096
1919
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
20-
export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
2120
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
2221
export MEGA_SERVICE_HOST_IP=${host_ip}
2322
export LLM_SERVICE_HOST_IP=${host_ip}
2423
export ASR_SERVICE_HOST_IP=${host_ip}
2524
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/docsum"
2625
export no_proxy="${no_proxy},${host_ip}"
26+
export LLM_ENDPOINT_PORT=8008
27+
export DOCSUM_PORT=9000
28+
export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
29+
export DocSum_COMPONENT_NAME="OPEADocSum_TGI"
30+
export LOGFLAG=True
2731

2832
WORKPATH=$(dirname "$PWD")
2933
LOG_PATH="$WORKPATH/tests"
@@ -37,10 +41,10 @@ function build_docker_images() {
3741
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
3842

3943
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
40-
service_list="docsum docsum-gradio-ui whisper llm-docsum-tgi"
44+
service_list="docsum docsum-gradio-ui whisper llm-docsum"
4145
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
4246

43-
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
47+
docker pull ghcr.io/huggingface/tgi-gaudi:2.3.1
4448
docker images && sleep 1s
4549
}
4650

@@ -49,15 +53,6 @@ function start_services() {
4953

5054
docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
5155
sleep 3m
52-
53-
until [[ "$n" -ge 100 ]]; do
54-
docker logs tgi-gaudi-server > ${LOG_PATH}/tgi_service_start.log
55-
if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then
56-
break
57-
fi
58-
sleep 5s
59-
n=$((n+1))
60-
done
6156
}
6257

6358
get_base64_str() {
@@ -156,13 +151,13 @@ function validate_microservices() {
156151
validate_services_json \
157152
"${host_ip}:8008/generate" \
158153
"generated_text" \
159-
"tgi-gaudi" \
154+
"tgi-gaudi-server" \
160155
"tgi-gaudi-server" \
161156
'{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}'
162157

163158
# llm microservice
164159
validate_services_json \
165-
"${host_ip}:9000/v1/chat/docsum" \
160+
"${host_ip}:9000/v1/docsum" \
166161
"data: " \
167162
"llm-docsum-tgi" \
168163
"llm-docsum-gaudi-server" \

0 commit comments

Comments
 (0)