Skip to content

Commit e5d9c7a

Browse files
committed
disable padding for non-cuda cases.
Signed-off-by: charlifu <charlifu@amd.com>
1 parent 8ce4101 commit e5d9c7a

File tree

2 files changed

+2
-3
lines changed

2 files changed

+2
-3
lines changed

tests/compile/test_fusion.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,7 @@ def forward(self, x):
5151
@pytest.mark.parametrize("hidden_size", [64, 3392, 4096])
5252
@pytest.mark.parametrize("num_tokens", [7, 256, 533, 2048, 2049])
5353
@pytest.mark.parametrize("eps", [1e-5, 1e-6])
54-
@pytest.mark.skipif(envs.VLLM_TARGET_DEVICE != "cuda"
55-
and envs.VLLM_TARGET_DEVICE != "rocm",
54+
@pytest.mark.skipif(envs.VLLM_TARGET_DEVICE not in ["cuda", "rocm"],
5655
reason="Only test on CUDA and Rocm")
5756
def test_fusion_rmsnorm_quant(dtype, hidden_size, num_tokens, eps):
5857
torch.set_default_device("cuda")

vllm/model_executor/layers/quantization/utils/w8a8_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def apply_fp8_linear(
125125
qinput, x_scale = ops.scaled_fp8_quant(
126126
input_2d,
127127
input_scale,
128-
num_token_padding=None,
128+
num_token_padding=17 if current_platform.is_cuda() else None,
129129
use_per_token_if_dynamic=use_per_token_if_dynamic)
130130

131131
per_tensor_weights = (weight_scale.numel() == 1)

0 commit comments

Comments
 (0)