Skip to content

Commit b47ff92

Browse files
authored
Add max-num-seqs
1 parent d5a66d6 commit b47ff92

File tree

1 file changed

+11
-1
lines changed

1 file changed

+11
-1
lines changed

benchmarks/benchmark_throughput.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ def run_vllm(
8383
enable_prefix_caching: bool,
8484
enable_chunked_prefill: bool,
8585
max_num_batched_tokens: int,
86+
max_num_seqs: int,
8687
distributed_executor_backend: Optional[str],
8788
gpu_memory_utilization: float = 0.9,
8889
num_scheduler_steps: int = 1,
@@ -111,6 +112,7 @@ def run_vllm(
111112
download_dir=download_dir,
112113
enable_chunked_prefill=enable_chunked_prefill,
113114
max_num_batched_tokens=max_num_batched_tokens,
115+
max_num_seqs=max_num_seqs,
114116
distributed_executor_backend=distributed_executor_backend,
115117
load_format=load_format,
116118
num_scheduler_steps=num_scheduler_steps,
@@ -172,6 +174,7 @@ async def run_vllm_async(
172174
enable_prefix_caching: bool,
173175
enable_chunked_prefill: bool,
174176
max_num_batched_tokens: int,
177+
max_num_seqs: int,
175178
distributed_executor_backend: Optional[str],
176179
gpu_memory_utilization: float = 0.9,
177180
num_scheduler_steps: int = 1,
@@ -200,6 +203,7 @@ async def run_vllm_async(
200203
download_dir=download_dir,
201204
enable_chunked_prefill=enable_chunked_prefill,
202205
max_num_batched_tokens=max_num_batched_tokens,
206+
max_num_seqs=max_num_seqs,
203207
distributed_executor_backend=distributed_executor_backend,
204208
load_format=load_format,
205209
num_scheduler_steps=num_scheduler_steps,
@@ -341,7 +345,8 @@ def main(args: argparse.Namespace):
341345
args.enforce_eager, args.kv_cache_dtype,
342346
args.quantization_param_path, args.device,
343347
args.enable_prefix_caching, args.enable_chunked_prefill,
344-
args.max_num_batched_tokens, args.distributed_executor_backend,
348+
args.max_num_batched_tokens, args.max_num_seqs,
349+
args.distributed_executor_backend,
345350
args.gpu_memory_utilization, args.num_scheduler_steps,
346351
args.use_v2_block_manager, args.download_dir, args.load_format,
347352
args.disable_async_output_proc
@@ -494,6 +499,11 @@ def main(args: argparse.Namespace):
494499
default=None,
495500
help='maximum number of batched tokens per '
496501
'iteration')
502+
parser.add_argument('--max-num-seqs',
503+
type=int,
504+
default=None,
505+
help='maximum number of sequences per '
506+
'iteration')
497507
parser.add_argument('--download-dir',
498508
type=str,
499509
default=None,

0 commit comments

Comments
 (0)