Update benchmark scripts (#1883)

chensuyue · pre-commit-ci[bot] · web-flow · commit be5933ad852d · 2025-04-25T17:05:48.000+08:00
Signed-off-by: chensuyue &lt;suyue.chen@intel.com&gt;
Co-authored-by: pre-commit-ci[bot] &lt;66853113+pre-commit-ci[bot]@users.noreply.github.com&gt;
diff --git a/ChatQnA/benchmark_chatqna.yaml b/ChatQnA/benchmark_chatqna.yaml
@@ -3,7 +3,7 @@
 
 deploy:
   device: gaudi
-  version: 1.2.0
+  version: 1.3.0
   modelUseHostPath: /mnt/models
   HUGGINGFACEHUB_API_TOKEN: "" # mandatory
   node: [1, 2, 4, 8]
diff --git a/DocSum/benchmark_docsum.yaml b/DocSum/benchmark_docsum.yaml
@@ -3,7 +3,7 @@
 
 deploy:
   device: gaudi
-  version: 1.2.0
+  version: 1.3.0
   modelUseHostPath: /mnt/models
   HUGGINGFACEHUB_API_TOKEN: "" # mandatory
   node: [1]
@@ -20,14 +20,10 @@ deploy:
         memory_capacity: "8000Mi"
       replicaCount: [1]
 
-    teirerank:
-      enabled: False
-
     llm:
       engine: vllm  # or tgi
       model_id: "meta-llama/Llama-3.2-3B-Instruct" # mandatory
-      replicaCount:
-        without_teirerank: [1]   # When teirerank.enabled is False
+      replicaCount: [1]
       resources:
         enabled: False
         cards_per_instance: 1
@@ -78,7 +74,7 @@ benchmark:
 
   # workload, all of the test cases will run for benchmark
   bench_target: ["docsumfixed"] # specify the bench_target for benchmark
-  dataset: "/home/sdp/upload.txt"  # specify the absolute path to the dataset file
+  dataset: "/home/sdp/pubmed_10.txt"  # specify the absolute path to the dataset file
   summary_type: "stuff"
   stream: True
 
diff --git a/README-deploy-benchmark.md b/README-deploy-benchmark.md
@@ -1,4 +1,4 @@
-# ChatQnA Benchmarking
+# Deploy and Benchmark
 
 ## Purpose
 
@@ -8,6 +8,11 @@ We aim to run these benchmarks and share them with the OPEA community for three
 - To establish a baseline for validating optimization solutions across different implementations, providing clear guidance on which methods are most effective for your use case.
 - To inspire the community to build upon our benchmarks, allowing us to better quantify new solutions in conjunction with current leading LLMs, serving frameworks etc.
 
+### Support Example List
+
+- ChatQnA
+- DocSum
+
 ## Table of Contents
 
 - [Prerequisites](#prerequisites)
@@ -68,6 +73,7 @@ Before running the benchmarks, ensure you have:
    ```bash
    pip install -r requirements.txt
    ```
+   notes: the benchmark need `opea-eval>=1.3`, if v1.3 is not released, please build the `opea-eval` from [source](https://github.com/opea-project/GenAIEval).
 
 ## Data Preparation
 
diff --git a/deploy.py b/deploy.py
@@ -224,6 +224,7 @@ def generate_helm_values(example_type, deploy_config, chart_dir, action_type, no
             "modelUseHostPath": deploy_config.get("modelUseHostPath", ""),
         }
     }
+    os.environ["HF_TOKEN"] = deploy_config.get("HUGGINGFACEHUB_API_TOKEN", "")
 
     # Configure components
     values = configure_node_selectors(values, node_selector or {}, deploy_config)
@@ -338,17 +339,15 @@ def get_hw_values_file(deploy_config, chart_dir):
     version = deploy_config.get("version", "1.1.0")
 
     if os.path.isdir(chart_dir):
-        # Determine which values file to use based on version
-        if version in ["1.0.0", "1.1.0"]:
-            hw_values_file = os.path.join(chart_dir, f"{device_type}-values.yaml")
-        else:
-            hw_values_file = os.path.join(chart_dir, f"{device_type}-{llm_engine}-values.yaml")
-
+        hw_values_file = os.path.join(chart_dir, f"{device_type}-{llm_engine}-values.yaml")
         if not os.path.exists(hw_values_file):
             print(f"Warning: {hw_values_file} not found")
-            hw_values_file = None
-        else:
-            print(f"Device-specific values file found: {hw_values_file}")
+            hw_values_file = os.path.join(chart_dir, f"{device_type}-values.yaml")
+            if not os.path.exists(hw_values_file):
+                print(f"Warning: {hw_values_file} not found")
+                print(f"Error: Can not found a correct values file for {device_type} with {llm_engine}")
+                sys.exit(1)
+        print(f"Device-specific values file found: {hw_values_file}")
     else:
         print(f"Error: Could not find directory for {chart_dir}")
         hw_values_file = None
diff --git a/deploy_and_benchmark.py b/deploy_and_benchmark.py
@@ -54,7 +54,7 @@ def construct_deploy_config(deploy_config, target_node, batch_param_value=None,
 
     # First determine which llm replicaCount to use based on teirerank.enabled
     services = new_config.get("services", {})
-    teirerank_enabled = services.get("teirerank", {}).get("enabled", True)
+    teirerank_enabled = services.get("teirerank", {}).get("enabled", False)
 
     # Process each service's configuration
     for service_name, service_config in services.items():
diff --git a/requirements.txt b/requirements.txt
@@ -1,7 +1,7 @@
 kubernetes
 locust
 numpy
-opea-eval>=1.2
+opea-eval>=1.3
 prometheus_client
 pytest
 pyyaml