Skip to content

Commit 5f2cb45

Browse files
author
rshaw@neuralmagic.com
committed
update condition slightly
1 parent 9f9d039 commit 5f2cb45

File tree

1 file changed

+2
-3
lines changed

1 file changed

+2
-3
lines changed

vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,8 @@ def __init__(self, strategy: str, is_static_input_scheme: bool):
3636
def process_weights_after_loading(self, layer) -> None:
3737
# If per tensor, when we have a fused module (e.g. QKV) with per
3838
# tensor scales (thus N scales being passed to the kernel),
39-
# requantize so we can always run per tensor with torch._scaled_mm
40-
if (self.strategy == QuantizationStrategy.TENSOR
41-
or not self.cutlass_fp8_supported):
39+
# requantize so we can always run per tensor
40+
if self.strategy == QuantizationStrategy.TENSOR:
4241
max_w_scale, weight = requantize_with_max_scale(
4342
weight=layer.weight,
4443
weight_scale=layer.weight_scale,

0 commit comments

Comments
 (0)