Skip to content

Commit be5933a

Browse files
Update benchmark scripts (#1883)
Signed-off-by: chensuyue <suyue.chen@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 18b4f39 commit be5933a

6 files changed

+21
-20
lines changed

ChatQnA/benchmark_chatqna.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
deploy:
55
device: gaudi
6-
version: 1.2.0
6+
version: 1.3.0
77
modelUseHostPath: /mnt/models
88
HUGGINGFACEHUB_API_TOKEN: "" # mandatory
99
node: [1, 2, 4, 8]

DocSum/benchmark_docsum.yaml

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
deploy:
55
device: gaudi
6-
version: 1.2.0
6+
version: 1.3.0
77
modelUseHostPath: /mnt/models
88
HUGGINGFACEHUB_API_TOKEN: "" # mandatory
99
node: [1]
@@ -20,14 +20,10 @@ deploy:
2020
memory_capacity: "8000Mi"
2121
replicaCount: [1]
2222

23-
teirerank:
24-
enabled: False
25-
2623
llm:
2724
engine: vllm # or tgi
2825
model_id: "meta-llama/Llama-3.2-3B-Instruct" # mandatory
29-
replicaCount:
30-
without_teirerank: [1] # When teirerank.enabled is False
26+
replicaCount: [1]
3127
resources:
3228
enabled: False
3329
cards_per_instance: 1
@@ -78,7 +74,7 @@ benchmark:
7874

7975
# workload, all of the test cases will run for benchmark
8076
bench_target: ["docsumfixed"] # specify the bench_target for benchmark
81-
dataset: "/home/sdp/upload.txt" # specify the absolute path to the dataset file
77+
dataset: "/home/sdp/pubmed_10.txt" # specify the absolute path to the dataset file
8278
summary_type: "stuff"
8379
stream: True
8480

README-deploy-benchmark.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# ChatQnA Benchmarking
1+
# Deploy and Benchmark
22

33
## Purpose
44

@@ -8,6 +8,11 @@ We aim to run these benchmarks and share them with the OPEA community for three
88
- To establish a baseline for validating optimization solutions across different implementations, providing clear guidance on which methods are most effective for your use case.
99
- To inspire the community to build upon our benchmarks, allowing us to better quantify new solutions in conjunction with current leading LLMs, serving frameworks etc.
1010

11+
### Support Example List
12+
13+
- ChatQnA
14+
- DocSum
15+
1116
## Table of Contents
1217

1318
- [Prerequisites](#prerequisites)
@@ -68,6 +73,7 @@ Before running the benchmarks, ensure you have:
6873
```bash
6974
pip install -r requirements.txt
7075
```
76+
notes: the benchmark need `opea-eval>=1.3`, if v1.3 is not released, please build the `opea-eval` from [source](https://github.com/opea-project/GenAIEval).
7177

7278
## Data Preparation
7379

deploy.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,7 @@ def generate_helm_values(example_type, deploy_config, chart_dir, action_type, no
224224
"modelUseHostPath": deploy_config.get("modelUseHostPath", ""),
225225
}
226226
}
227+
os.environ["HF_TOKEN"] = deploy_config.get("HUGGINGFACEHUB_API_TOKEN", "")
227228

228229
# Configure components
229230
values = configure_node_selectors(values, node_selector or {}, deploy_config)
@@ -338,17 +339,15 @@ def get_hw_values_file(deploy_config, chart_dir):
338339
version = deploy_config.get("version", "1.1.0")
339340

340341
if os.path.isdir(chart_dir):
341-
# Determine which values file to use based on version
342-
if version in ["1.0.0", "1.1.0"]:
343-
hw_values_file = os.path.join(chart_dir, f"{device_type}-values.yaml")
344-
else:
345-
hw_values_file = os.path.join(chart_dir, f"{device_type}-{llm_engine}-values.yaml")
346-
342+
hw_values_file = os.path.join(chart_dir, f"{device_type}-{llm_engine}-values.yaml")
347343
if not os.path.exists(hw_values_file):
348344
print(f"Warning: {hw_values_file} not found")
349-
hw_values_file = None
350-
else:
351-
print(f"Device-specific values file found: {hw_values_file}")
345+
hw_values_file = os.path.join(chart_dir, f"{device_type}-values.yaml")
346+
if not os.path.exists(hw_values_file):
347+
print(f"Warning: {hw_values_file} not found")
348+
print(f"Error: Can not found a correct values file for {device_type} with {llm_engine}")
349+
sys.exit(1)
350+
print(f"Device-specific values file found: {hw_values_file}")
352351
else:
353352
print(f"Error: Could not find directory for {chart_dir}")
354353
hw_values_file = None

deploy_and_benchmark.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def construct_deploy_config(deploy_config, target_node, batch_param_value=None,
5454

5555
# First determine which llm replicaCount to use based on teirerank.enabled
5656
services = new_config.get("services", {})
57-
teirerank_enabled = services.get("teirerank", {}).get("enabled", True)
57+
teirerank_enabled = services.get("teirerank", {}).get("enabled", False)
5858

5959
# Process each service's configuration
6060
for service_name, service_config in services.items():

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
kubernetes
22
locust
33
numpy
4-
opea-eval>=1.2
4+
opea-eval>=1.3
55
prometheus_client
66
pytest
77
pyyaml

0 commit comments

Comments
 (0)