File tree 2 files changed +6
-0
lines changed
nemo/collections/vlm/mllama/model
2 files changed +6
-0
lines changed Original file line number Diff line number Diff line change @@ -553,13 +553,15 @@ def __init__(
553
553
submodules : MLlamaCrossAttentionSubmodules ,
554
554
layer_number : int ,
555
555
attn_mask_type = AttnMaskType .padding ,
556
+ ** kwargs ,
556
557
):
557
558
super ().__init__ (
558
559
config = config ,
559
560
submodules = submodules ,
560
561
layer_number = layer_number ,
561
562
attn_mask_type = attn_mask_type ,
562
563
attention_type = "cross" ,
564
+ ** kwargs ,
563
565
)
564
566
565
567
# TODO might need special care when TP>8
Original file line number Diff line number Diff line change @@ -355,12 +355,14 @@ def __init__(
355
355
submodules : SelfAttentionSubmodules ,
356
356
layer_number : int ,
357
357
attn_mask_type = AttnMaskType .padding ,
358
+ ** kwargs ,
358
359
):
359
360
super ().__init__ (
360
361
config = config ,
361
362
submodules = submodules ,
362
363
layer_number = layer_number ,
363
364
attn_mask_type = attn_mask_type ,
365
+ ** kwargs ,
364
366
)
365
367
366
368
# Override to remove bias since we don't have a good config for this.
@@ -408,12 +410,14 @@ def __init__(
408
410
submodules : TransformerLayerSubmodules ,
409
411
layer_number : int = 1 ,
410
412
hidden_dropout : float = None ,
413
+ ** kwargs ,
411
414
):
412
415
super ().__init__ (
413
416
config = config ,
414
417
submodules = submodules ,
415
418
layer_number = layer_number ,
416
419
hidden_dropout = hidden_dropout ,
420
+ ** kwargs ,
417
421
)
418
422
self .gated = self .config .gated
419
423
if self .gated :
You can’t perform that action at this time.
0 commit comments