We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 33e3b37 commit e38042dCopy full SHA for e38042d
vllm/model_executor/layers/quantization/fp8.py
@@ -257,7 +257,9 @@ def apply(self,
257
# If dynamic, layer.input_scale is None and x_scale computed from x.
258
# If static, layer.input_scale is scalar and x_scale is input_scale.
259
260
- if bias is None and self.cutlass_fp8_supported:
+ # Temporarily disable CUTLASS kernels due to an illegal memory access
261
+ #if bias is None and self.cutlass_fp8_supported:
262
+ if False:
263
qinput, x_scale = ops.scaled_fp8_quant(x, layer.input_scale)
264
265
# Fused GEMM_DQ
0 commit comments