File tree Expand file tree Collapse file tree 2 files changed +13
-3
lines changed Expand file tree Collapse file tree 2 files changed +13
-3
lines changed Original file line number Diff line number Diff line change @@ -843,6 +843,13 @@ def create_engine_config(self) -> EngineConfig:
843
843
device_config = DeviceConfig (device = self .device )
844
844
model_config = self .create_model_config ()
845
845
846
+ if model_config .is_multimodal_model :
847
+ if self .enable_prefix_caching :
848
+ logger .warning (
849
+ "--enable-prefix-caching is currently not "
850
+ "supported for multimodal models and has been disabled." )
851
+ self .enable_prefix_caching = False
852
+
846
853
cache_config = CacheConfig (
847
854
block_size = self .block_size if self .device != "neuron" else
848
855
self .max_model_len , # neuron needs block_size = max_model_len
@@ -874,7 +881,10 @@ def create_engine_config(self) -> EngineConfig:
874
881
# If not explicitly set, enable chunked prefill by default for
875
882
# long context (> 32K) models. This is to avoid OOM errors in the
876
883
# initial memory profiling phase.
877
- if use_long_context :
884
+
885
+ # Chunked prefill is currently disabled for multimodal models by
886
+ # default.
887
+ if use_long_context and not model_config .is_multimodal_model :
878
888
is_gpu = device_config .device_type == "cuda"
879
889
use_sliding_window = (model_config .get_sliding_window ()
880
890
is not None )
Original file line number Diff line number Diff line change 90
90
"PaliGemmaForConditionalGeneration" : ("paligemma" ,
91
91
"PaliGemmaForConditionalGeneration" ),
92
92
"Phi3VForCausalLM" : ("phi3v" , "Phi3VForCausalLM" ),
93
- "UltravoxModel" : ("ultravox" , "UltravoxModel" ),
94
- "QWenLMHeadModel" : ("qwen" , "QWenLMHeadModel" ),
95
93
"PixtralForConditionalGeneration" : ("pixtral" ,
96
94
"PixtralForConditionalGeneration" ),
95
+ "QWenLMHeadModel" : ("qwen" , "QWenLMHeadModel" ),
97
96
"Qwen2VLForConditionalGeneration" : ("qwen2_vl" ,
98
97
"Qwen2VLForConditionalGeneration" ),
98
+ "UltravoxModel" : ("ultravox" , "UltravoxModel" ),
99
99
}
100
100
_CONDITIONAL_GENERATION_MODELS = {
101
101
"BartModel" : ("bart" , "BartForConditionalGeneration" ),
You can’t perform that action at this time.
0 commit comments