12
12
# only support chatqna for now
13
13
service_endpoints = {
14
14
"chatqna" : "/v1/chatqna" ,
15
+ "docsum" : "/v1/docsum" ,
15
16
}
16
17
17
18
@@ -35,6 +36,9 @@ def construct_benchmark_config(test_suite_config):
35
36
"dataset" : test_suite_config .get ("dataset" , "" ),
36
37
"prompt" : test_suite_config .get ("prompt" , [10 ]),
37
38
"llm_max_token_size" : test_suite_config .get ("llm" , {}).get ("max_token_size" , [128 ]),
39
+ "collect_service_metric" : test_suite_config .get ("collect_service_metric" , False ),
40
+ "summary_type" : test_suite_config .get ("summary_type" , "auto" ),
41
+ "stream" : test_suite_config .get ("stream" , "auto" ),
38
42
}
39
43
40
44
@@ -144,6 +148,8 @@ def _create_yaml_content(service, base_url, bench_target, test_phase, num_querie
144
148
"llm-model" : test_params ["llm_model" ],
145
149
"deployment-type" : test_params ["deployment_type" ],
146
150
"load-shape" : load_shape ,
151
+ "summary_type" : test_params .get ("summary_type" , "auto" ),
152
+ "stream" : test_params .get ("stream" , True ),
147
153
},
148
154
"runs" : [{"name" : test_phase , "users" : concurrency , "max-request" : num_queries }],
149
155
}
@@ -373,7 +379,9 @@ def run_benchmark(benchmark_config, chart_name, namespace, node_num=1, llm_model
373
379
"user_queries" : parsed_data ["user_queries" ], # num of user queries
374
380
"random_prompt" : False , # whether to use random prompt, set to False by default
375
381
"run_time" : "30m" , # The max total run time for the test suite, set to 60m by default
376
- "collect_service_metric" : False , # whether to collect service metrics, set to False by default
382
+ "collect_service_metric" : (
383
+ parsed_data ["collect_service_metric" ] if parsed_data ["collect_service_metric" ] else False
384
+ ), # Metrics collection set to False by default
377
385
"llm_model" : llm_model , # The LLM model used for the test
378
386
"deployment_type" : "k8s" , # Default is "k8s", can also be "docker"
379
387
"service_ip" : None , # Leave as None for k8s, specify for Docker
@@ -398,9 +406,15 @@ def run_benchmark(benchmark_config, chart_name, namespace, node_num=1, llm_model
398
406
"dataset" : parsed_data ["dataset" ],
399
407
"prompt" : parsed_data ["prompt" ],
400
408
"llm_max_token_size" : parsed_data ["llm_max_token_size" ],
409
+ "summary_type" : parsed_data ["summary_type" ],
410
+ "stream" : parsed_data ["stream" ],
401
411
}
402
412
403
- dataset = None
413
+ if parsed_data ["dataset" ]: # This checks if user provided dataset/document for DocSum service
414
+ dataset = parsed_data ["dataset" ]
415
+ else :
416
+ dataset = None
417
+
404
418
query_data = None
405
419
os .environ ["MODEL_NAME" ] = test_suite_config .get ("llm_model" , "meta-llama/Meta-Llama-3-8B-Instruct" )
406
420
# Do benchmark in for-loop for different llm_max_token_size
@@ -428,6 +442,21 @@ def run_benchmark(benchmark_config, chart_name, namespace, node_num=1, llm_model
428
442
"max_output" : llm_max_token , # max number of output tokens
429
443
"k" : 1 , # number of retrieved documents
430
444
}
445
+ if chart_name == "docsum" :
446
+ case_data = {
447
+ "run_test" : True ,
448
+ "service_name" : "docsum" ,
449
+ "service_list" : [
450
+ "docsum" ,
451
+ "docsum-llm-uservice" ,
452
+ "docsum-vllm" ,
453
+ ],
454
+ "stream" : parsed_data ["stream" ],
455
+ "max_output" : llm_max_token , # max number of output tokens
456
+ "summary_type" : parsed_data ["summary_type" ], # Summary_type for DocSum
457
+ "dataset" : dataset , # Dataset used for document summary
458
+ }
459
+
431
460
output_folder = _run_service_test (chart_name , case_data , test_suite_config , namespace )
432
461
433
462
print (f"[OPEA BENCHMARK] 🚀 Test Finished. Output saved in { output_folder } ." )
0 commit comments