Fix Exporter for baichuan and chatglm (#13095) (#13126)

ko3n1g · suiyoubi · web-flow · commit d65ca6f05f2c · 2025-04-21T06:32:29.000-05:00
Signed-off-by: Ao Tang &lt;aot@nvidia.com&gt;
Co-authored-by: Ao Tang &lt;aot@nvidia.com&gt;
diff --git a/nemo/collections/llm/gpt/model/baichuan.py b/nemo/collections/llm/gpt/model/baichuan.py
@@ -224,7 +224,7 @@ class HFBaichuan2Exporter(io.ModelConnector[Baichuan2Model, "AutoModelForCausalL
     BaichuanForCausalLM format, including weight mapping and configuration translation.
     """
 
-    def init(self, dtype=torch.bfloat16, model_name="baichuan-inc/Baichuan2-7B-Base") -> "AutoModelForCausalLM":
+    def init(self, dtype=torch.bfloat16, model_name=None) -> "AutoModelForCausalLM":
         """
         Initialize a HF BaichuanForCausalLM instance.
 
@@ -237,6 +237,8 @@ def init(self, dtype=torch.bfloat16, model_name="baichuan-inc/Baichuan2-7B-Base"
         from transformers import AutoModelForCausalLM
         from transformers.modeling_utils import no_init_weights
 
+        if model_name is None:
+            model_name = "baichuan-inc/Baichuan2-7B-Base"
         with no_init_weights(True):
             # Since Baichuan2 is not importable from transformers, we can only initialize the HF model
             # from a known checkpoint. The model_name will need to be passed in.
diff --git a/nemo/collections/llm/gpt/model/chatglm.py b/nemo/collections/llm/gpt/model/chatglm.py
@@ -218,10 +218,12 @@ class HFChatGLMExporter(io.ModelConnector[ChatGLMModel, "AutoModelForCausalLM"])
     ChatGLMForCausalLM format, including weight mapping and configuration translation.
     """
 
-    def init(self, dtype=torch.bfloat16, model_name="THUDM/chatglm3-6b") -> "AutoModelForCausalLM":
+    def init(self, dtype=torch.bfloat16, model_name=None) -> "AutoModelForCausalLM":
         from transformers import AutoModelForCausalLM
         from transformers.modeling_utils import no_init_weights
 
+        if model_name is None:
+            model_name = "THUDM/chatglm3-6b"
         with no_init_weights(True):
             # Since ChatGLM is not importable from transformers, we can only initialize the HF model
             # from a known checkpoint. The model_name will need to be passed in.