Update Mllama cross attn signature to match update MCore (#13048) (#13122)

ko3n1g · yaoyu-33 · web-flow · commit c2bf5f0a33ec · 2025-04-20T08:09:08.000-05:00
* update Cross Attn signature



* update TransformerLayer signature



* update SelfAttention signature



---------

Signed-off-by: yaoyu-33 &lt;yaoyu.094@gmail.com&gt;
Co-authored-by: Yu Yao &lt;54727607+yaoyu-33@users.noreply.github.com&gt;
diff --git a/nemo/collections/vlm/mllama/model/language.py b/nemo/collections/vlm/mllama/model/language.py
@@ -553,13 +553,15 @@ def __init__(
         submodules: MLlamaCrossAttentionSubmodules,
         layer_number: int,
         attn_mask_type=AttnMaskType.padding,
+        **kwargs,
     ):
         super().__init__(
             config=config,
             submodules=submodules,
             layer_number=layer_number,
             attn_mask_type=attn_mask_type,
             attention_type="cross",
+            **kwargs,
         )
 
         # TODO might need special care when TP>8
diff --git a/nemo/collections/vlm/mllama/model/vision.py b/nemo/collections/vlm/mllama/model/vision.py
@@ -355,12 +355,14 @@ def __init__(
         submodules: SelfAttentionSubmodules,
         layer_number: int,
         attn_mask_type=AttnMaskType.padding,
+        **kwargs,
     ):
         super().__init__(
             config=config,
             submodules=submodules,
             layer_number=layer_number,
             attn_mask_type=attn_mask_type,
+            **kwargs,
         )
 
         # Override to remove bias since we don't have a good config for this.
@@ -408,12 +410,14 @@ def __init__(
         submodules: TransformerLayerSubmodules,
         layer_number: int = 1,
         hidden_dropout: float = None,
+        **kwargs,
     ):
         super().__init__(
             config=config,
             submodules=submodules,
             layer_number=layer_number,
             hidden_dropout=hidden_dropout,
+            **kwargs,
         )
         self.gated = self.config.gated
         if self.gated: