Skip to content

Commit 60871f2

Browse files
Merge branch 'main' into feature/GenAIExample_ChatQnA_AMD_vLLM
2 parents 2634171 + edcd7c9 commit 60871f2

File tree

18 files changed

+78
-117
lines changed

18 files changed

+78
-117
lines changed

ChatQnA/benchmark/performance/kubernetes/intel/gaudi/README.md

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,6 @@ Results will be displayed in the terminal and saved as CSV file named `1_stats.c
6969
- Persistent Volume Claim (PVC): This is the recommended approach for production setups. For more details on using PVC, refer to [PVC](https://github.com/opea-project/GenAIInfra/blob/main/helm-charts/README.md#using-persistent-volume).
7070
- Local Host Path: For simpler testing, ensure that each node involved in the deployment follows the steps above to locally prepare the models. After preparing the models, use `--set global.modelUseHostPath=${MODELDIR}` in the deployment command.
7171

72-
- Add OPEA Helm Repository:
73-
```bash
74-
python deploy.py --add-repo
75-
```
7672
- Label Nodes
7773
```base
7874
python deploy.py --add-label --num-nodes 2
@@ -192,13 +188,9 @@ All the test results will come to the folder `GenAIEval/evals/benchmark/benchmar
192188

193189
## Teardown
194190

195-
After completing the benchmark, use the following commands to clean up the environment:
191+
After completing the benchmark, use the following command to clean up the environment:
196192

197193
Remove Node Labels:
198-
```base
199-
python deploy.py --delete-label
200-
```
201-
Delete the OPEA Helm Repository:
202194
```bash
203-
python deploy.py --delete-repo
195+
python deploy.py --delete-label
204196
```

ChatQnA/benchmark/performance/kubernetes/intel/gaudi/deploy.py

Lines changed: 8 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -83,26 +83,6 @@ def clear_labels_from_nodes(label, node_names=None):
8383
print(f"Label {label_key} not found on node {node_name}, skipping.")
8484

8585

86-
def add_helm_repo(repo_name, repo_url):
87-
# Add the repo if it does not exist
88-
add_command = ["helm", "repo", "add", repo_name, repo_url]
89-
try:
90-
subprocess.run(add_command, check=True)
91-
print(f"Added Helm repo {repo_name} from {repo_url}.")
92-
except subprocess.CalledProcessError as e:
93-
print(f"Failed to add Helm repo {repo_name}: {e}")
94-
95-
96-
def delete_helm_repo(repo_name):
97-
"""Delete Helm repo if it exists."""
98-
command = ["helm", "repo", "remove", repo_name]
99-
try:
100-
subprocess.run(command, check=True)
101-
print(f"Deleted Helm repo {repo_name}.")
102-
except subprocess.CalledProcessError:
103-
print(f"Failed to delete Helm repo {repo_name}. It may not exist.")
104-
105-
10686
def install_helm_release(release_name, chart_name, namespace, values_file, device_type):
10787
"""Deploy a Helm release with a specified name and chart.
10888
@@ -132,14 +112,14 @@ def install_helm_release(release_name, chart_name, namespace, values_file, devic
132112
if device_type == "gaudi":
133113
print("Device type is gaudi. Pulling Helm chart to get gaudi-values.yaml...")
134114

135-
# Pull and untar the chart
136-
subprocess.run(["helm", "pull", chart_name, "--untar"], check=True)
115+
# Combine chart_name with fixed prefix
116+
chart_pull_url = f"oci://ghcr.io/opea-project/charts/{chart_name}"
137117

138-
# Determine the directory name (get the actual chart_name if chart_name is in the format 'repo_name/chart_name', else use chart_name directly)
139-
chart_dir_name = chart_name.split("/")[-1] if "/" in chart_name else chart_name
118+
# Pull and untar the chart
119+
subprocess.run(["helm", "pull", chart_pull_url, "--untar"], check=True)
140120

141-
# Find the untarred directory (assumes only one directory matches chart_dir_name)
142-
untar_dirs = glob.glob(f"{chart_dir_name}*")
121+
# Find the untarred directory
122+
untar_dirs = glob.glob(f"{chart_name}*")
143123
if untar_dirs:
144124
untar_dir = untar_dirs[0]
145125
hw_values_file = os.path.join(untar_dir, "gaudi-values.yaml")
@@ -210,20 +190,14 @@ def main():
210190
parser.add_argument(
211191
"--chart-name",
212192
type=str,
213-
default="opea/chatqna",
214-
help="The chart name to deploy, composed of repo name and chart name (default: opea/chatqna).",
193+
default="chatqna",
194+
help="The chart name to deploy, composed of repo name and chart name (default: chatqna).",
215195
)
216196
parser.add_argument("--namespace", default="default", help="Kubernetes namespace (default: default).")
217197
parser.add_argument("--hf-token", help="Hugging Face API token.")
218198
parser.add_argument(
219199
"--model-dir", help="Model directory, mounted as volumes for service access to pre-downloaded models"
220200
)
221-
parser.add_argument("--repo-name", default="opea", help="Helm repo name to add/delete (default: opea).")
222-
parser.add_argument(
223-
"--repo-url",
224-
default="https://opea-project.github.io/GenAIInfra",
225-
help="Helm repository URL (default: https://opea-project.github.io/GenAIInfra).",
226-
)
227201
parser.add_argument("--user-values", help="Path to a user-specified values.yaml file.")
228202
parser.add_argument(
229203
"--create-values-only", action="store_true", help="Only create the values.yaml file without deploying."
@@ -244,8 +218,6 @@ def main():
244218
action="store_true",
245219
help="Modify resources for services and change extraCmdArgs when creating values.yaml.",
246220
)
247-
parser.add_argument("--add-repo", action="store_true", help="Add the Helm repo specified by --repo-url.")
248-
parser.add_argument("--delete-repo", action="store_true", help="Delete the Helm repo specified by --repo-name.")
249221
parser.add_argument(
250222
"--device-type",
251223
type=str,
@@ -264,14 +236,6 @@ def main():
264236
else:
265237
args.num_nodes = num_node_names
266238

267-
# Helm repository management
268-
if args.add_repo:
269-
add_helm_repo(args.repo_name, args.repo_url)
270-
return
271-
elif args.delete_repo:
272-
delete_helm_repo(args.repo_name)
273-
return
274-
275239
# Node labeling management
276240
if args.add_label:
277241
add_labels_to_nodes(args.num_nodes, args.label, args.node_names)

DocIndexRetriever/tests/test.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import requests
77

88

9-
def search_knowledge_base(query: str, url: str, request_type="chat_completion") -> str:
9+
def search_knowledge_base(query: str, url: str, request_type: str) -> str:
1010
"""Search the knowledge base for a specific query."""
1111
print(url)
1212
proxies = {"http": ""}
@@ -18,12 +18,13 @@ def search_knowledge_base(query: str, url: str, request_type="chat_completion")
1818
"top_n": 2,
1919
}
2020
else:
21-
print("Sending text request")
21+
print("Sending textdoc request")
2222
payload = {
2323
"text": query,
2424
}
2525
response = requests.post(url, json=payload, proxies=proxies)
2626
print(response)
27+
print(response.json().keys())
2728
if "documents" in response.json():
2829
docs = response.json()["documents"]
2930
context = ""
@@ -32,7 +33,6 @@ def search_knowledge_base(query: str, url: str, request_type="chat_completion")
3233
context = str(i) + ": " + doc
3334
else:
3435
context += "\n" + str(i) + ": " + doc
35-
# print(context)
3636
return context
3737
elif "text" in response.json():
3838
return response.json()["text"]
@@ -44,7 +44,6 @@ def search_knowledge_base(query: str, url: str, request_type="chat_completion")
4444
context = doc["text"]
4545
else:
4646
context += "\n" + doc["text"]
47-
# print(context)
4847
return context
4948
else:
5049
return "Error parsing response from the knowledge base."

DocIndexRetriever/tests/test_compose_on_gaudi.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ LOG_PATH="$WORKPATH/tests"
1515
ip_address=$(hostname -I | awk '{print $1}')
1616

1717
function build_docker_images() {
18+
echo "Building Docker Images...."
1819
cd $WORKPATH/docker_image_build
1920
if [ ! -d "GenAIComps" ] ; then
2021
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
@@ -26,9 +27,11 @@ function build_docker_images() {
2627
docker pull redis/redis-stack:7.2.0-v9
2728
docker pull ghcr.io/huggingface/tei-gaudi:1.5.0
2829
docker images && sleep 1s
30+
echo "Docker images built!"
2931
}
3032

3133
function start_services() {
34+
echo "Starting Docker Services...."
3235
cd $WORKPATH/docker_compose/intel/hpu/gaudi
3336
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
3437
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
@@ -47,6 +50,7 @@ function start_services() {
4750
# Start Docker Containers
4851
docker compose up -d
4952
sleep 20
53+
echo "Docker services started!"
5054
}
5155

5256
function validate() {

DocIndexRetriever/tests/test_compose_on_xeon.sh

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,10 @@ LOG_PATH="$WORKPATH/tests"
1515
ip_address=$(hostname -I | awk '{print $1}')
1616

1717
function build_docker_images() {
18+
echo "Building Docker Images...."
1819
cd $WORKPATH/docker_image_build
1920
if [ ! -d "GenAIComps" ] ; then
21+
echo "Cloning GenAIComps repository"
2022
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
2123
fi
2224
service_list="dataprep-redis embedding-tei retriever-redis reranking-tei doc-index-retriever"
@@ -25,9 +27,12 @@ function build_docker_images() {
2527
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
2628
docker pull redis/redis-stack:7.2.0-v9
2729
docker images && sleep 1s
30+
31+
echo "Docker images built!"
2832
}
2933

3034
function start_services() {
35+
echo "Starting Docker Services...."
3136
cd $WORKPATH/docker_compose/intel/cpu/xeon
3237
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
3338
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
@@ -45,7 +50,8 @@ function start_services() {
4550

4651
# Start Docker Containers
4752
docker compose up -d
48-
sleep 20
53+
sleep 5m
54+
echo "Docker services started!"
4955
}
5056

5157
function validate() {
@@ -66,7 +72,7 @@ function validate_megaservice() {
6672
echo "===========Ingest data=================="
6773
local CONTENT=$(http_proxy="" curl -X POST "http://${ip_address}:6007/v1/dataprep" \
6874
-H "Content-Type: multipart/form-data" \
69-
-F 'link_list=["https://opea.dev"]')
75+
-F 'link_list=["https://opea.dev/"]')
7076
local EXIT_CODE=$(validate "$CONTENT" "Data preparation succeeded" "dataprep-redis-service-xeon")
7177
echo "$EXIT_CODE"
7278
local EXIT_CODE="${EXIT_CODE:0-1}"
@@ -77,19 +83,26 @@ function validate_megaservice() {
7783
fi
7884

7985
# Curl the Mega Service
80-
echo "================Testing retriever service: Default params================"
81-
82-
local CONTENT=$(curl http://${ip_address}:8889/v1/retrievaltool -X POST -H "Content-Type: application/json" -d '{
83-
"messages": "Explain the OPEA project?"
86+
echo "================Testing retriever service: Text Request ================"
87+
cd $WORKPATH/tests
88+
local CONTENT=$(http_proxy="" curl http://${ip_address}:8889/v1/retrievaltool -X POST -H "Content-Type: application/json" -d '{
89+
"text": "Explain the OPEA project?"
8490
}')
91+
# local CONTENT=$(python test.py --host_ip ${ip_address} --request_type text)
8592
local EXIT_CODE=$(validate "$CONTENT" "OPEA" "doc-index-retriever-service-xeon")
8693
echo "$EXIT_CODE"
8794
local EXIT_CODE="${EXIT_CODE:0-1}"
8895
echo "return value is $EXIT_CODE"
8996
if [ "$EXIT_CODE" == "1" ]; then
90-
docker logs tei-embedding-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
97+
echo "=============Embedding container log=================="
98+
docker logs embedding-tei-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
99+
echo "=============Retriever container log=================="
91100
docker logs retriever-redis-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
92-
docker logs reranking-tei-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
101+
echo "=============TEI Reranking log=================="
102+
docker logs tei-reranking-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
103+
echo "=============Reranking container log=================="
104+
docker logs reranking-tei-xeon-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
105+
echo "=============Doc-index-retriever container log=================="
93106
docker logs doc-index-retriever-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
94107
exit 1
95108
fi
@@ -102,9 +115,15 @@ function validate_megaservice() {
102115
local EXIT_CODE="${EXIT_CODE:0-1}"
103116
echo "return value is $EXIT_CODE"
104117
if [ "$EXIT_CODE" == "1" ]; then
105-
docker logs tei-embedding-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
118+
echo "=============Embedding container log=================="
119+
docker logs embedding-tei-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
120+
echo "=============Retriever container log=================="
106121
docker logs retriever-redis-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
107-
docker logs reranking-tei-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
122+
echo "=============TEI Reranking log=================="
123+
docker logs tei-reranking-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
124+
echo "=============Reranking container log=================="
125+
docker logs reranking-tei-xeon-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
126+
echo "=============Doc-index-retriever container log=================="
108127
docker logs doc-index-retriever-server | tee -a ${LOG_PATH}/doc-index-retriever-service-xeon.log
109128
exit 1
110129
fi

EdgeCraftRAG/Dockerfile.server

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,11 @@ RUN useradd -m -s /bin/bash user && \
2323
mkdir -p /home/user && \
2424
chown -R user /home/user/
2525

26+
RUN mkdir /templates && \
27+
chown -R user /templates
28+
COPY ./edgecraftrag/prompt_template/default_prompt.txt /templates/
29+
RUN chown -R user /templates/default_prompt.txt
30+
2631
COPY ./edgecraftrag /home/user/edgecraftrag
2732

2833
RUN mkdir -p /home/user/gradio_cache

EdgeCraftRAG/README.md

Lines changed: 10 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,14 @@ Please follow this link [vLLM with OpenVINO](https://github.com/opea-project/Gen
3232

3333
### Start Edge Craft RAG Services with Docker Compose
3434

35-
If you want to enable vLLM with OpenVINO service, please finish the steps in [Launch vLLM with OpenVINO service](#optional-launch-vllm-with-openvino-service) first.
36-
3735
```bash
3836
cd GenAIExamples/EdgeCraftRAG/docker_compose/intel/gpu/arc
3937

4038
export MODEL_PATH="your model path for all your models"
4139
export DOC_PATH="your doc path for uploading a dir of files"
4240
export GRADIO_PATH="your gradio cache path for transferring files"
41+
# If you have a specific prompt template, please uncomment the following line
42+
# export PROMPT_PATH="your prompt path for prompt templates"
4343

4444
# Make sure all 3 folders have 1000:1000 permission, otherwise
4545
# chown 1000:1000 ${MODEL_PATH} ${DOC_PATH} ${GRADIO_PATH}
@@ -70,49 +70,25 @@ optimum-cli export openvino -m BAAI/bge-small-en-v1.5 ${MODEL_PATH}/BAAI/bge-sma
7070
optimum-cli export openvino -m BAAI/bge-reranker-large ${MODEL_PATH}/BAAI/bge-reranker-large --task sentence-similarity
7171
optimum-cli export openvino -m Qwen/Qwen2-7B-Instruct ${MODEL_PATH}/Qwen/Qwen2-7B-Instruct/INT4_compressed_weights --weight-format int4
7272

73-
docker compose up -d
73+
```
74+
75+
#### Launch services with local inference
7476

77+
```bash
78+
docker compose -f compose.yaml up -d
7579
```
7680

77-
#### (Optional) Launch vLLM with OpenVINO service
81+
#### Launch services with vLLM + OpenVINO inference service
7882

79-
1. Set up Environment Variables
83+
Set up Additional Environment Variables and start with compose_vllm.yaml
8084

8185
```bash
8286
export LLM_MODEL=#your model id
8387
export VLLM_SERVICE_PORT=8008
8488
export vLLM_ENDPOINT="http://${HOST_IP}:${VLLM_SERVICE_PORT}"
8589
export HUGGINGFACEHUB_API_TOKEN=#your HF token
86-
```
87-
88-
2. Uncomment below code in 'GenAIExamples/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml'
8990

90-
```bash
91-
# vllm-openvino-server:
92-
# container_name: vllm-openvino-server
93-
# image: opea/vllm-arc:latest
94-
# ports:
95-
# - ${VLLM_SERVICE_PORT:-8008}:80
96-
# environment:
97-
# HTTPS_PROXY: ${https_proxy}
98-
# HTTP_PROXY: ${https_proxy}
99-
# VLLM_OPENVINO_DEVICE: GPU
100-
# HF_ENDPOINT: ${HF_ENDPOINT}
101-
# HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
102-
# volumes:
103-
# - /dev/dri/by-path:/dev/dri/by-path
104-
# - $HOME/.cache/huggingface:/root/.cache/huggingface
105-
# devices:
106-
# - /dev/dri
107-
# entrypoint: /bin/bash -c "\
108-
# cd / && \
109-
# export VLLM_CPU_KVCACHE_SPACE=50 && \
110-
# export VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS=ON && \
111-
# python3 -m vllm.entrypoints.openai.api_server \
112-
# --model '${LLM_MODEL}' \
113-
# --max_model_len=1024 \
114-
# --host 0.0.0.0 \
115-
# --port 80"
91+
docker compose -f compose_vllm.yaml up -d
11692
```
11793

11894
### ChatQnA with LLM Example (Command Line)

EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ services:
1616
- ${DOC_PATH:-${PWD}}:/home/user/docs
1717
- ${GRADIO_PATH:-${PWD}}:/home/user/gradio_cache
1818
- ${HF_CACHE:-${HOME}/.cache}:/home/user/.cache
19+
- ${PROMPT_PATH:-${PWD}}:/templates/custom
1920
ports:
2021
- ${PIPELINE_SERVICE_PORT:-16010}:${PIPELINE_SERVICE_PORT:-16010}
2122
devices:

EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ services:
1616
- ${DOC_PATH:-${PWD}}:/home/user/docs
1717
- ${GRADIO_PATH:-${PWD}}:/home/user/gradio_cache
1818
- ${HF_CACHE:-${HOME}/.cache}:/home/user/.cache
19+
- ${PROMPT_PATH:-${PWD}}:/templates/custom
1920
ports:
2021
- ${PIPELINE_SERVICE_PORT:-16010}:${PIPELINE_SERVICE_PORT:-16010}
2122
devices:

0 commit comments

Comments
 (0)