@@ -69,6 +69,7 @@ class Model:
69
69
model_name : str | None
70
70
metadata_override : Path | None
71
71
dir_model_card : Path
72
+ is_lora : bool
72
73
73
74
model_arch : gguf .MODEL_ARCH
74
75
@@ -86,6 +87,7 @@ def __init__(
86
87
split_max_size : int = 0 ,
87
88
dry_run : bool = False ,
88
89
small_first_shard : bool = False ,
90
+ is_lora : bool = False ,
89
91
):
90
92
if type (self ) is Model :
91
93
raise TypeError (
@@ -118,6 +120,7 @@ def __init__(
118
120
self .metadata_override = metadata_override
119
121
self .model_name = model_name
120
122
self .dir_model_card = dir_model
123
+ self .is_lora = is_lora
121
124
122
125
if self .ftype == gguf .LlamaFileType .GUESSED :
123
126
@@ -381,6 +384,7 @@ def prepare_tensors(self):
381
384
gguf .MODEL_TENSOR .FFN_GATE_INP ,
382
385
gguf .MODEL_TENSOR .POS_EMBD ,
383
386
gguf .MODEL_TENSOR .TOKEN_TYPES ,
387
+ gguf .MODEL_TENSOR .SSM_CONV1D ,
384
388
)
385
389
)
386
390
or not name .endswith (".weight" )
@@ -1831,7 +1835,10 @@ def prepare_tensors(self):
1831
1835
if rope_scaling := self .find_hparam (["rope_scaling" ], optional = True ):
1832
1836
if rope_scaling .get ("rope_type" , "" ).lower () == "llama3" :
1833
1837
base = self .hparams .get ("rope_theta" , 10000.0 )
1834
- dim = self .hparams ["hidden_size" ] // self .hparams ["num_attention_heads" ]
1838
+ dim = self .hparams .get (
1839
+ "head_dim" ,
1840
+ self .hparams ["hidden_size" ] // self .hparams ["num_attention_heads" ],
1841
+ )
1835
1842
freqs = 1.0 / (
1836
1843
base ** (torch .arange (0 , dim , 2 , dtype = torch .float32 ) / dim )
1837
1844
)
@@ -1860,10 +1867,11 @@ def prepare_tensors(self):
1860
1867
)
1861
1868
rope_factors .append (1 / ((1 - smooth ) / factor + smooth ))
1862
1869
1863
- self .gguf_writer .add_tensor (
1864
- self .format_tensor_name (gguf .MODEL_TENSOR .ROPE_FREQS ),
1865
- np .array (rope_factors , dtype = np .float32 ),
1866
- )
1870
+ if not self .is_lora :
1871
+ self .gguf_writer .add_tensor (
1872
+ self .format_tensor_name (gguf .MODEL_TENSOR .ROPE_FREQS ),
1873
+ np .array (rope_factors , dtype = np .float32 ),
1874
+ )
1867
1875
1868
1876
super ().prepare_tensors ()
1869
1877
@@ -2472,14 +2480,15 @@ def set_gguf_parameters(self):
2472
2480
f"The length of rope long and short factors must be { rope_dims / 2 } "
2473
2481
)
2474
2482
2475
- self .gguf_writer .add_tensor (
2476
- gguf .TENSOR_NAMES [gguf .MODEL_TENSOR .ROPE_FACTORS_LONG ] + ".weight" ,
2477
- np .array (long_factors , dtype = np .float32 ),
2478
- )
2479
- self .gguf_writer .add_tensor (
2480
- gguf .TENSOR_NAMES [gguf .MODEL_TENSOR .ROPE_FACTORS_SHORT ] + ".weight" ,
2481
- np .array (short_factors , dtype = np .float32 ),
2482
- )
2483
+ if not self .is_lora :
2484
+ self .gguf_writer .add_tensor (
2485
+ gguf .TENSOR_NAMES [gguf .MODEL_TENSOR .ROPE_FACTORS_LONG ] + ".weight" ,
2486
+ np .array (long_factors , dtype = np .float32 ),
2487
+ )
2488
+ self .gguf_writer .add_tensor (
2489
+ gguf .TENSOR_NAMES [gguf .MODEL_TENSOR .ROPE_FACTORS_SHORT ] + ".weight" ,
2490
+ np .array (short_factors , dtype = np .float32 ),
2491
+ )
2483
2492
2484
2493
2485
2494
@Model .register ("PlamoForCausalLM" )
@@ -3081,7 +3090,7 @@ class StarCoder2Model(Model):
3081
3090
model_arch = gguf .MODEL_ARCH .STARCODER2
3082
3091
3083
3092
3084
- @Model .register ("MambaForCausalLM" , "MambaLMHeadModel" )
3093
+ @Model .register ("MambaForCausalLM" , "MambaLMHeadModel" , "FalconMambaForCausalLM" )
3085
3094
class MambaModel (Model ):
3086
3095
model_arch = gguf .MODEL_ARCH .MAMBA
3087
3096
@@ -3117,19 +3126,24 @@ def set_gguf_parameters(self):
3117
3126
self .find_hparam (["layer_norm_epsilon" , "rms_norm_eps" ], optional = True )
3118
3127
or 1e-5
3119
3128
)
3129
+ use_dt_b_c_norm = False
3130
+
3131
+ if self .find_hparam (["model_type" ], optional = True ) in ("falcon_mamba" ,):
3132
+ use_dt_b_c_norm = True
3120
3133
3121
3134
assert d_inner == 2 * d_model
3122
3135
3123
3136
self .gguf_writer .add_context_length (2 ** 20 )
3124
3137
self .gguf_writer .add_embedding_length (d_model )
3125
3138
self .gguf_writer .add_feed_forward_length (0 )
3126
3139
self .gguf_writer .add_head_count (0 )
3127
- self .gguf_writer .add_block_count (self .hparams [ "n_layer" ] )
3140
+ self .gguf_writer .add_block_count (self .block_count )
3128
3141
self .gguf_writer .add_ssm_conv_kernel (d_conv )
3129
3142
self .gguf_writer .add_ssm_inner_size (d_inner )
3130
3143
self .gguf_writer .add_ssm_state_size (d_state )
3131
3144
self .gguf_writer .add_ssm_time_step_rank (dt_rank )
3132
3145
self .gguf_writer .add_layer_norm_rms_eps (rms_norm_eps )
3146
+ self .gguf_writer .add_ssm_dt_b_c_rms (use_dt_b_c_norm )
3133
3147
self .gguf_writer .add_file_type (self .ftype )
3134
3148
3135
3149
_tok_embd = None
@@ -3159,25 +3173,6 @@ def modify_tensors(
3159
3173
3160
3174
return [(new_name , data_torch )]
3161
3175
3162
- def tensor_force_quant (
3163
- self , name : str , new_name : str , bid : int | None , n_dims : int
3164
- ) -> gguf .GGMLQuantizationType | bool :
3165
- if bid is not None and new_name in (
3166
- self .format_tensor_name (
3167
- n , bid , ".weight" if name .endswith (".weight" ) else ""
3168
- )
3169
- for n in [
3170
- gguf .MODEL_TENSOR .SSM_CONV1D ,
3171
- gguf .MODEL_TENSOR .SSM_X ,
3172
- gguf .MODEL_TENSOR .SSM_DT ,
3173
- gguf .MODEL_TENSOR .SSM_A ,
3174
- gguf .MODEL_TENSOR .SSM_D ,
3175
- ]
3176
- ):
3177
- return gguf .GGMLQuantizationType .F32
3178
-
3179
- return super ().tensor_force_quant (name , new_name , bid , n_dims )
3180
-
3181
3176
3182
3177
@Model .register ("CohereForCausalLM" )
3183
3178
class CommandR2Model (Model ):
@@ -4301,7 +4296,10 @@ def prepare_tensors(self):
4301
4296
if rope_scaling := self .find_hparam (["rope_scaling" ], optional = True ):
4302
4297
if rope_scaling .get ("rope_type" , "" ).lower () == "llama3" :
4303
4298
base = self .hparams .get ("rope_theta" , 10000.0 )
4304
- dim = self .hparams ["hidden_size" ] // self .hparams ["num_attention_heads" ]
4299
+ dim = self .hparams .get (
4300
+ "head_dim" ,
4301
+ self .hparams ["hidden_size" ] // self .hparams ["num_attention_heads" ],
4302
+ )
4305
4303
freqs = 1.0 / (
4306
4304
base ** (torch .arange (0 , dim , 2 , dtype = torch .float32 ) / dim )
4307
4305
)
@@ -4330,10 +4328,11 @@ def prepare_tensors(self):
4330
4328
)
4331
4329
rope_factors .append (1 / ((1 - smooth ) / factor + smooth ))
4332
4330
4333
- self .gguf_writer .add_tensor (
4334
- self .format_tensor_name (gguf .MODEL_TENSOR .ROPE_FREQS ),
4335
- np .array (rope_factors , dtype = np .float32 ),
4336
- )
4331
+ if not self .is_lora :
4332
+ self .gguf_writer .add_tensor (
4333
+ self .format_tensor_name (gguf .MODEL_TENSOR .ROPE_FREQS ),
4334
+ np .array (rope_factors , dtype = np .float32 ),
4335
+ )
4337
4336
4338
4337
super ().prepare_tensors ()
4339
4338
@@ -4403,82 +4402,26 @@ def __torch_function__(cls, func, types, args=(), kwargs=None):
4403
4402
4404
4403
4405
4404
def parse_args () -> argparse .Namespace :
4406
- parser = argparse .ArgumentParser (description = "" )
4407
- parser .add_argument (
4408
- "--vocab-only" ,
4409
- action = "store_true" ,
4410
- help = "" ,
4411
- )
4412
- parser .add_argument (
4413
- "--outfile" ,
4414
- type = Path ,
4415
- help = "" ,
4416
- )
4405
+ parser = argparse .ArgumentParser ()
4406
+ parser .add_argument ("--vocab-only" , action = "store_true" )
4407
+ parser .add_argument ("--outfile" , type = Path )
4417
4408
parser .add_argument (
4418
4409
"--outtype" ,
4419
4410
type = str ,
4420
4411
choices = ["f32" , "f16" , "bf16" , "q8_0" , "auto" ],
4421
4412
default = "f16" ,
4422
- help = "" ,
4423
- )
4424
- parser .add_argument (
4425
- "--bigendian" ,
4426
- action = "store_true" ,
4427
- help = "" ,
4428
- )
4429
- parser .add_argument (
4430
- "model" ,
4431
- type = Path ,
4432
- help = "" ,
4433
- )
4434
- parser .add_argument (
4435
- "--use-temp-file" ,
4436
- action = "store_true" ,
4437
- help = "" ,
4438
- )
4439
- parser .add_argument (
4440
- "--no-lazy" ,
4441
- action = "store_true" ,
4442
- help = "" ,
4443
- )
4444
- parser .add_argument (
4445
- "--model-name" ,
4446
- type = str ,
4447
- default = None ,
4448
- help = "" ,
4449
- )
4450
- parser .add_argument (
4451
- "--verbose" ,
4452
- action = "store_true" ,
4453
- help = "" ,
4454
- )
4455
- parser .add_argument (
4456
- "--split-max-tensors" ,
4457
- type = int ,
4458
- default = 0 ,
4459
- help = "" ,
4460
- )
4461
- parser .add_argument (
4462
- "--split-max-size" ,
4463
- type = str ,
4464
- default = "0" ,
4465
- help = "" ,
4466
- )
4467
- parser .add_argument (
4468
- "--dry-run" ,
4469
- action = "store_true" ,
4470
- help = "" ,
4471
- )
4472
- parser .add_argument (
4473
- "--no-tensor-first-split" ,
4474
- action = "store_true" ,
4475
- help = "" ,
4476
- )
4477
- parser .add_argument (
4478
- "--metadata" ,
4479
- type = Path ,
4480
- help = "" ,
4481
4413
)
4414
+ parser .add_argument ("--bigendian" , action = "store_true" )
4415
+ parser .add_argument ("model" , type = Path )
4416
+ parser .add_argument ("--use-temp-file" , action = "store_true" )
4417
+ parser .add_argument ("--no-lazy" , action = "store_true" )
4418
+ parser .add_argument ("--model-name" , type = str , default = None )
4419
+ parser .add_argument ("--verbose" , action = "store_true" )
4420
+ parser .add_argument ("--split-max-tensors" , type = int , default = 0 )
4421
+ parser .add_argument ("--split-max-size" , type = str , default = "0" )
4422
+ parser .add_argument ("--dry-run" , action = "store_true" )
4423
+ parser .add_argument ("--no-tensor-first-split" , action = "store_true" )
4424
+ parser .add_argument ("--metadata" , type = Path )
4482
4425
4483
4426
return parser .parse_args ()
4484
4427
0 commit comments