Skip to content

Commit f4a8a37

Browse files
authored
[Minor] Rename quantization nvfp4 to modelopt_fp4 (#18356)
Signed-off-by: mgoin <mgoin64@gmail.com>
1 parent 8f55962 commit f4a8a37

File tree

4 files changed

+7
-7
lines changed

4 files changed

+7
-7
lines changed

tests/models/quantization/test_nvfp4.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,16 +41,16 @@
4141
reason=
4242
"Prevent unstable test based on golden strings from breaking the build "
4343
" and test input model being too large and hanging the system.")
44-
@pytest.mark.skipif(not is_quant_method_supported("nvfp4"),
45-
reason="nvfp4 is not supported on this GPU type.")
44+
@pytest.mark.skipif(not is_quant_method_supported("modelopt_fp4"),
45+
reason="modelopt_fp4 is not supported on this GPU type.")
4646
@pytest.mark.parametrize("model_name", MODELS)
4747
def test_models(example_prompts, model_name) -> None:
4848
model = LLM(
4949
model=model_name,
5050
max_model_len=MAX_MODEL_LEN,
5151
trust_remote_code=True,
5252
enforce_eager=True,
53-
quantization="nvfp4",
53+
quantization="modelopt_fp4",
5454
)
5555

5656
tokenizer = AutoTokenizer.from_pretrained(model_name)

vllm/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -824,7 +824,7 @@ def _verify_quantization(self) -> None:
824824
optimized_quantization_methods = [
825825
"fp8", "marlin", "modelopt", "gptq_marlin_24", "gptq_marlin",
826826
"awq_marlin", "fbgemm_fp8", "compressed-tensors", "experts_int8",
827-
"quark", "nvfp4", "bitblas", "gptq_bitblas"
827+
"quark", "modelopt_fp4", "bitblas", "gptq_bitblas"
828828
]
829829
if self.quantization is not None:
830830
self.quantization = cast(QuantizationMethods,

vllm/model_executor/layers/quantization/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
"ptpc_fp8",
1515
"fbgemm_fp8",
1616
"modelopt",
17-
"nvfp4",
17+
"modelopt_fp4",
1818
"marlin",
1919
"bitblas",
2020
"gguf",
@@ -120,7 +120,7 @@ def get_quantization_config(quantization: str) -> type[QuantizationConfig]:
120120
"fp8": Fp8Config,
121121
"fbgemm_fp8": FBGEMMFp8Config,
122122
"modelopt": ModelOptFp8Config,
123-
"nvfp4": ModelOptNvFp4Config,
123+
"modelopt_fp4": ModelOptNvFp4Config,
124124
"marlin": MarlinConfig,
125125
"bitblas": BitBLASConfig,
126126
"gguf": GGUFConfig,

vllm/model_executor/layers/quantization/modelopt.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ def __init__(
192192

193193
@classmethod
194194
def get_name(cls) -> QuantizationMethods:
195-
return "nvfp4"
195+
return "modelopt_fp4"
196196

197197
@classmethod
198198
def get_supported_act_dtypes(cls) -> list[torch.dtype]:

0 commit comments

Comments
 (0)