[Minor] Rename quantization nvfp4 to modelopt_fp4 (#18356)

mgoin · web-flow · commit f4a8a3746575 · 2025-05-20T09:08:37.000-07:00
Signed-off-by: mgoin &lt;mgoin64@gmail.com&gt;
diff --git a/tests/models/quantization/test_nvfp4.py b/tests/models/quantization/test_nvfp4.py
@@ -41,16 +41,16 @@
     reason=
     "Prevent unstable test based on golden strings from breaking the build "
     " and test input model being too large and hanging the system.")
-@pytest.mark.skipif(not is_quant_method_supported("nvfp4"),
-                    reason="nvfp4 is not supported on this GPU type.")
+@pytest.mark.skipif(not is_quant_method_supported("modelopt_fp4"),
+                    reason="modelopt_fp4 is not supported on this GPU type.")
 @pytest.mark.parametrize("model_name", MODELS)
 def test_models(example_prompts, model_name) -> None:
     model = LLM(
         model=model_name,
         max_model_len=MAX_MODEL_LEN,
         trust_remote_code=True,
         enforce_eager=True,
-        quantization="nvfp4",
+        quantization="modelopt_fp4",
     )
 
     tokenizer = AutoTokenizer.from_pretrained(model_name)
diff --git a/vllm/config.py b/vllm/config.py
@@ -824,7 +824,7 @@ def _verify_quantization(self) -> None:
         optimized_quantization_methods = [
             "fp8", "marlin", "modelopt", "gptq_marlin_24", "gptq_marlin",
             "awq_marlin", "fbgemm_fp8", "compressed-tensors", "experts_int8",
-            "quark", "nvfp4", "bitblas", "gptq_bitblas"
+            "quark", "modelopt_fp4", "bitblas", "gptq_bitblas"
         ]
         if self.quantization is not None:
             self.quantization = cast(QuantizationMethods,
diff --git a/vllm/model_executor/layers/quantization/__init__.py b/vllm/model_executor/layers/quantization/__init__.py
@@ -14,7 +14,7 @@
     "ptpc_fp8",
     "fbgemm_fp8",
     "modelopt",
-    "nvfp4",
+    "modelopt_fp4",
     "marlin",
     "bitblas",
     "gguf",
@@ -120,7 +120,7 @@ def get_quantization_config(quantization: str) -> type[QuantizationConfig]:
         "fp8": Fp8Config,
         "fbgemm_fp8": FBGEMMFp8Config,
         "modelopt": ModelOptFp8Config,
-        "nvfp4": ModelOptNvFp4Config,
+        "modelopt_fp4": ModelOptNvFp4Config,
         "marlin": MarlinConfig,
         "bitblas": BitBLASConfig,
         "gguf": GGUFConfig,
diff --git a/vllm/model_executor/layers/quantization/modelopt.py b/vllm/model_executor/layers/quantization/modelopt.py
@@ -192,7 +192,7 @@ def __init__(
 
     @classmethod
     def get_name(cls) -> QuantizationMethods:
-        return "nvfp4"
+        return "modelopt_fp4"
 
     @classmethod
     def get_supported_act_dtypes(cls) -> list[torch.dtype]:

Original file line number	Diff line number	Diff line change
`@@ -824,7 +824,7 @@ def _verify_quantization(self) -> None:`
`824`	`824`	`optimized_quantization_methods = [`
`825`	`825`	`"fp8", "marlin", "modelopt", "gptq_marlin_24", "gptq_marlin",`
`826`	`826`	`"awq_marlin", "fbgemm_fp8", "compressed-tensors", "experts_int8",`
`827`		`- "quark", "nvfp4", "bitblas", "gptq_bitblas"`
	`827`	`+ "quark", "modelopt_fp4", "bitblas", "gptq_bitblas"`
`828`	`828`	`]`
`829`	`829`	`if self.quantization is not None:`
`830`	`830`	`self.quantization = cast(QuantizationMethods,`