File tree 1 file changed +14
-0
lines changed
1 file changed +14
-0
lines changed Original file line number Diff line number Diff line change @@ -2050,6 +2050,13 @@ def __post_init__(self) -> None:
2050
2050
_MULTIMODAL_MODEL_MAX_NUM_BATCHED_TOKENS ,
2051
2051
)
2052
2052
2053
+ # When using default settings,
2054
+ # Ensure max_num_batched_tokens does not exceed model limit.
2055
+ # Some models (e.g., Whisper) have embeddings tied to max length.
2056
+ self .max_num_batched_tokens = min (
2057
+ self .max_num_seqs * self .max_model_len ,
2058
+ self .max_num_batched_tokens )
2059
+
2053
2060
self .max_num_encoder_input_tokens = self .max_num_batched_tokens
2054
2061
self .encoder_cache_size = self .max_num_batched_tokens
2055
2062
@@ -2090,6 +2097,13 @@ def _verify_args(self) -> None:
2090
2097
"be greater than or equal to max_num_seqs "
2091
2098
f"({ self .max_num_seqs } )." )
2092
2099
2100
+ if self .max_num_batched_tokens > self .max_num_seqs * self .max_model_len :
2101
+ logger .warning (
2102
+ "max_num_batched_tokens (%d) exceeds max_num_seqs"
2103
+ "* max_model_len (%d). This may lead to unexpected behavior." ,
2104
+ self .max_num_batched_tokens ,
2105
+ self .max_num_seqs * self .max_model_len )
2106
+
2093
2107
if self .num_lookahead_slots < 0 :
2094
2108
raise ValueError (
2095
2109
"num_lookahead_slots "
You can’t perform that action at this time.
0 commit comments