@@ -287,6 +287,7 @@ class MODEL_ARCH(IntEnum):
287
287
CHAMELEON = auto ()
288
288
WAVTOKENIZER_DEC = auto ()
289
289
PLM = auto ()
290
+ BAILINGMOE = auto ()
290
291
291
292
292
293
class MODEL_TENSOR (IntEnum ):
@@ -490,6 +491,7 @@ class MODEL_TENSOR(IntEnum):
490
491
MODEL_ARCH .CHAMELEON : "chameleon" ,
491
492
MODEL_ARCH .WAVTOKENIZER_DEC : "wavtokenizer-dec" ,
492
493
MODEL_ARCH .PLM : "plm" ,
494
+ MODEL_ARCH .BAILINGMOE : "bailingmoe" ,
493
495
}
494
496
495
497
TENSOR_NAMES : dict [MODEL_TENSOR , str ] = {
@@ -1667,6 +1669,28 @@ class MODEL_TENSOR(IntEnum):
1667
1669
MODEL_TENSOR .POSNET_ATTN_V ,
1668
1670
MODEL_TENSOR .POSNET_ATTN_OUT ,
1669
1671
],
1672
+ MODEL_ARCH .BAILINGMOE : [
1673
+ MODEL_TENSOR .TOKEN_EMBD ,
1674
+ MODEL_TENSOR .OUTPUT_NORM ,
1675
+ MODEL_TENSOR .OUTPUT ,
1676
+ MODEL_TENSOR .ROPE_FREQS ,
1677
+ MODEL_TENSOR .ATTN_NORM ,
1678
+ MODEL_TENSOR .ATTN_Q ,
1679
+ MODEL_TENSOR .ATTN_K ,
1680
+ MODEL_TENSOR .ATTN_V ,
1681
+ MODEL_TENSOR .ATTN_OUT ,
1682
+ MODEL_TENSOR .FFN_GATE_INP ,
1683
+ MODEL_TENSOR .FFN_NORM ,
1684
+ MODEL_TENSOR .FFN_GATE ,
1685
+ MODEL_TENSOR .FFN_DOWN ,
1686
+ MODEL_TENSOR .FFN_UP ,
1687
+ MODEL_TENSOR .FFN_GATE_EXP ,
1688
+ MODEL_TENSOR .FFN_DOWN_EXP ,
1689
+ MODEL_TENSOR .FFN_UP_EXP ,
1690
+ MODEL_TENSOR .FFN_GATE_SHEXP ,
1691
+ MODEL_TENSOR .FFN_DOWN_SHEXP ,
1692
+ MODEL_TENSOR .FFN_UP_SHEXP ,
1693
+ ],
1670
1694
# TODO
1671
1695
}
1672
1696
@@ -1719,6 +1743,9 @@ class MODEL_TENSOR(IntEnum):
1719
1743
MODEL_TENSOR .ROPE_FREQS ,
1720
1744
MODEL_TENSOR .ATTN_ROT_EMBD ,
1721
1745
],
1746
+ MODEL_ARCH .BAILINGMOE : [
1747
+ MODEL_TENSOR .ROPE_FREQS ,
1748
+ ],
1722
1749
}
1723
1750
1724
1751
#
0 commit comments