File tree 2 files changed +5
-4
lines changed
vllm/model_executor/layers/quantization
2 files changed +5
-4
lines changed Original file line number Diff line number Diff line change @@ -57,9 +57,10 @@ def get_moe_method(
57
57
"input_activations" )
58
58
59
59
if quant_config ._is_wNa16_group_channel (weight_quant , input_quant ):
60
+ # group_size=None means channelwise
61
+ group_size = weight_quant .group_size or - 1
60
62
# Prefer to use the MarlinMoE kernel when it is supported.
61
- if not check_moe_marlin_supports_layer (layer ,
62
- weight_quant .group_size ):
63
+ if not check_moe_marlin_supports_layer (layer , group_size ):
63
64
if (weight_quant .strategy in QuantizationStrategy .GROUP and
64
65
weight_quant .actorder in (ActivationOrdering .GROUP ,
65
66
ActivationOrdering .DYNAMIC )):
Original file line number Diff line number Diff line change @@ -610,9 +610,9 @@ def apply(
610
610
activation : str = "silu" ,
611
611
) -> torch .Tensor :
612
612
assert activation == "silu" , "Only SiLU activation is supported."
613
- if apply_router_weight_on_input is not None :
613
+ if apply_router_weight_on_input :
614
614
raise NotImplementedError (
615
- "Apply router weight on input is not supported for"
615
+ "Apply router weight on input is not supported for "
616
616
"fused Marlin MoE method." )
617
617
618
618
topk_weights , topk_ids = FusedMoE .select_experts (
You can’t perform that action at this time.
0 commit comments