Skip to content

Commit 1df491c

Browse files
authored
[Bugfix] Fixes for new marlin moe usage (#18017)
Signed-off-by: mgoin <mgoin64@gmail.com>
1 parent d8487ef commit 1df491c

File tree

2 files changed

+5
-4
lines changed

2 files changed

+5
-4
lines changed

vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,10 @@ def get_moe_method(
5757
"input_activations")
5858

5959
if quant_config._is_wNa16_group_channel(weight_quant, input_quant):
60+
# group_size=None means channelwise
61+
group_size = weight_quant.group_size or -1
6062
# Prefer to use the MarlinMoE kernel when it is supported.
61-
if not check_moe_marlin_supports_layer(layer,
62-
weight_quant.group_size):
63+
if not check_moe_marlin_supports_layer(layer, group_size):
6364
if (weight_quant.strategy in QuantizationStrategy.GROUP and
6465
weight_quant.actorder in (ActivationOrdering.GROUP,
6566
ActivationOrdering.DYNAMIC)):

vllm/model_executor/layers/quantization/gptq_marlin.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -610,9 +610,9 @@ def apply(
610610
activation: str = "silu",
611611
) -> torch.Tensor:
612612
assert activation == "silu", "Only SiLU activation is supported."
613-
if apply_router_weight_on_input is not None:
613+
if apply_router_weight_on_input:
614614
raise NotImplementedError(
615-
"Apply router weight on input is not supported for"
615+
"Apply router weight on input is not supported for "
616616
"fused Marlin MoE method.")
617617

618618
topk_weights, topk_ids = FusedMoE.select_experts(

0 commit comments

Comments
 (0)