File tree Expand file tree Collapse file tree 1 file changed +5
-3
lines changed
vllm/model_executor/layers/fused_moe Expand file tree Collapse file tree 1 file changed +5
-3
lines changed Original file line number Diff line number Diff line change @@ -492,12 +492,14 @@ def fused_experts(hidden_states: torch.Tensor,
492
492
if tokens_in_chunk == 0 :
493
493
break
494
494
495
- if tokens_in_chunk < CHUNK_SIZE :
496
- # will only happen in the last chunk
495
+ if tokens_in_chunk < CHUNK_SIZE and chunk > 0 :
496
+ # Adjust the intermediate cache size and config for the last
497
+ # chunk. Note that in most cases we only have one chunk
498
+ # so the cache size and config are already set correctly and
499
+ # do not need to be adjusted.
497
500
intermediate_cache1 = intermediate_cache1 [:tokens_in_chunk ]
498
501
intermediate_cache2 = intermediate_cache2 [:tokens_in_chunk ]
499
502
intermediate_cache3 = intermediate_cache3 [:tokens_in_chunk ]
500
- # reload config to get better performance on the last chunk
501
503
config = get_config_func (tokens_in_chunk )
502
504
503
505
curr_topk_ids = topk_ids [begin_chunk_idx :end_chunk_idx ]
You can’t perform that action at this time.
0 commit comments