diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index 0b3c02d1ba2..f1cb77f64ea 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -503,7 +503,6 @@ def forward_cuda( indices_type=torch.uint32 if self.moe.use_pplx_kernels else None) if self.rocm_aiter_moe_enabled: - assert not apply_router_weight_on_input assert expert_map is None return self.rocm_aiter_fused_experts( hidden_states=x,