Skip to content

Commit 5b2dcbf

Browse files
authored
Fix Whisper crash caused by invalid max_num_batched_tokens config (#17853)
Signed-off-by: inkcherry <mingzhi.liu@intel.com>
1 parent 6e4a93e commit 5b2dcbf

File tree

1 file changed

+14
-0
lines changed

1 file changed

+14
-0
lines changed

vllm/config.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2050,6 +2050,13 @@ def __post_init__(self) -> None:
20502050
_MULTIMODAL_MODEL_MAX_NUM_BATCHED_TOKENS,
20512051
)
20522052

2053+
# When using default settings,
2054+
# Ensure max_num_batched_tokens does not exceed model limit.
2055+
# Some models (e.g., Whisper) have embeddings tied to max length.
2056+
self.max_num_batched_tokens = min(
2057+
self.max_num_seqs * self.max_model_len,
2058+
self.max_num_batched_tokens)
2059+
20532060
self.max_num_encoder_input_tokens = self.max_num_batched_tokens
20542061
self.encoder_cache_size = self.max_num_batched_tokens
20552062

@@ -2090,6 +2097,13 @@ def _verify_args(self) -> None:
20902097
"be greater than or equal to max_num_seqs "
20912098
f"({self.max_num_seqs}).")
20922099

2100+
if self.max_num_batched_tokens > self.max_num_seqs * self.max_model_len:
2101+
logger.warning(
2102+
"max_num_batched_tokens (%d) exceeds max_num_seqs"
2103+
"* max_model_len (%d). This may lead to unexpected behavior.",
2104+
self.max_num_batched_tokens,
2105+
self.max_num_seqs * self.max_model_len)
2106+
20932107
if self.num_lookahead_slots < 0:
20942108
raise ValueError(
20952109
"num_lookahead_slots "

0 commit comments

Comments
 (0)