Skip to content

Commit 5e4fa88

Browse files
heheda12345Alvant
authored andcommitted
[Bugfix] Support testing prefill throughput with benchmark_serving.py --hf-output-len 1 (vllm-project#8891)
Signed-off-by: Alvant <alvasian@yandex.ru>
1 parent 6711f4c commit 5e4fa88

File tree

1 file changed

+4
-5
lines changed

1 file changed

+4
-5
lines changed

benchmarks/benchmark_serving.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,6 @@ def sample_sharegpt_requests(
8989
tokenizer: PreTrainedTokenizerBase,
9090
fixed_output_len: Optional[int] = None,
9191
) -> List[Tuple[str, int, int, None]]:
92-
if fixed_output_len is not None and fixed_output_len < 4:
93-
raise ValueError("output_len too small")
9492
# Load the dataset.
9593
with open(dataset_path) as f:
9694
dataset = json.load(f)
@@ -117,7 +115,7 @@ def sample_sharegpt_requests(
117115
prompt_len = len(prompt_token_ids)
118116
output_len = len(completion_token_ids
119117
) if fixed_output_len is None else fixed_output_len
120-
if prompt_len < 4 or output_len < 4:
118+
if prompt_len < 4 or (fixed_output_len is None and output_len < 4):
121119
# Prune too short sequences.
122120
continue
123121
if prompt_len > 1024 or prompt_len + output_len > 2048:
@@ -228,10 +226,11 @@ def sample_hf_requests(
228226
prompt_len = len(prompt_token_ids)
229227
output_len = len(completion_token_ids
230228
) if fixed_output_len is None else fixed_output_len
231-
if prompt_len < 4 or output_len < 4:
229+
if fixed_output_len is None and (prompt_len < 4 or output_len < 4):
232230
# Prune too short sequences.
233231
continue
234-
if prompt_len > 1024 or prompt_len + output_len > 2048:
232+
if fixed_output_len is None and \
233+
(prompt_len > 1024 or prompt_len + output_len > 2048):
235234
# Prune too long sequences.
236235
continue
237236

0 commit comments

Comments
 (0)