vllm-project · mgoin · Apr 30, 2025 · May 2, 2025
@@ -343,6 +343,7 @@ def apply(
         activation: str = "silu",
     ):
         assert activation == "silu", "Only SiLU activation is supported."
+        assert expert_map is None, "Expert maps not supported yet"
         if apply_router_weight_on_input:
             raise NotImplementedError(
                 "Apply router weight on input is not supported for"

@@ -607,7 +607,7 @@ def apply(
         activation: str = "silu",
     ) -> torch.Tensor:
         assert activation == "silu", "Only SiLU activation is supported."
-        if apply_router_weight_on_input is not None:
+        if apply_router_weight_on_input:
             raise NotImplementedError(
                 "Apply router weight on input is not supported for"
                 "fused Marlin MoE method.")

@@ -226,6 +226,7 @@ def apply(
             topk_weights=topk_weights,
             topk_ids=topk_ids,
             inplace=True,
+            activation=activation,
             use_fp8_w8a8=True,
             global_num_experts=global_num_experts,
             apply_router_weight_on_input=apply_router_weight_on_input,