@@ -52,6 +52,7 @@ class Model:
52
52
endianess : gguf .GGUFEndian
53
53
use_temp_file : bool
54
54
lazy : bool
55
+ model_name : str | None
55
56
part_names : list [str ]
56
57
is_safetensors : bool
57
58
hparams : dict [str , Any ]
@@ -64,7 +65,7 @@ class Model:
64
65
# subclasses should define this!
65
66
model_arch : gguf .MODEL_ARCH
66
67
67
- def __init__ (self , dir_model : Path , ftype : gguf .LlamaFileType , fname_out : Path , is_big_endian : bool , use_temp_file : bool , eager : bool ):
68
+ def __init__ (self , dir_model : Path , ftype : gguf .LlamaFileType , fname_out : Path , is_big_endian : bool , use_temp_file : bool , eager : bool , model_name : str | None ):
68
69
if type (self ) is Model :
69
70
raise TypeError (f"{ type (self ).__name__ !r} should not be directly instantiated" )
70
71
self .dir_model = dir_model
@@ -73,6 +74,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
73
74
self .endianess = gguf .GGUFEndian .BIG if is_big_endian else gguf .GGUFEndian .LITTLE
74
75
self .use_temp_file = use_temp_file
75
76
self .lazy = not eager
77
+ self .model_name = model_name
76
78
self .part_names = Model .get_model_part_names (self .dir_model , "model" , ".safetensors" )
77
79
self .is_safetensors = len (self .part_names ) > 0
78
80
if not self .is_safetensors :
@@ -182,7 +184,7 @@ def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = (".weight", "
182
184
return new_name
183
185
184
186
def set_gguf_parameters (self ):
185
- self .gguf_writer .add_name (self .dir_model .name )
187
+ self .gguf_writer .add_name (self .dir_model .name if self . model_name is None else self . model_name )
186
188
self .gguf_writer .add_block_count (self .block_count )
187
189
188
190
if (n_ctx := self .find_hparam (["max_position_embeddings" , "n_ctx" ], optional = True )) is not None :
@@ -665,7 +667,7 @@ class GPTNeoXModel(Model):
665
667
def set_gguf_parameters (self ):
666
668
block_count = self .hparams ["num_hidden_layers" ]
667
669
668
- self .gguf_writer .add_name (self .dir_model .name )
670
+ self .gguf_writer .add_name (self .dir_model .name if self . model_name is None else self . model_name )
669
671
self .gguf_writer .add_context_length (self .hparams ["max_position_embeddings" ])
670
672
self .gguf_writer .add_embedding_length (self .hparams ["hidden_size" ])
671
673
self .gguf_writer .add_block_count (block_count )
@@ -798,7 +800,7 @@ def set_vocab(self):
798
800
799
801
def set_gguf_parameters (self ):
800
802
block_count = self .hparams ["n_layers" ]
801
- self .gguf_writer .add_name (self .dir_model .name )
803
+ self .gguf_writer .add_name (self .dir_model .name if self . model_name is None else self . model_name )
802
804
self .gguf_writer .add_context_length (self .hparams ["max_seq_len" ])
803
805
self .gguf_writer .add_embedding_length (self .hparams ["d_model" ])
804
806
self .gguf_writer .add_block_count (block_count )
@@ -850,7 +852,7 @@ def set_gguf_parameters(self):
850
852
raise ValueError ("gguf: can not find ctx length parameter." )
851
853
852
854
self .gguf_writer .add_file_type (self .ftype )
853
- self .gguf_writer .add_name (self .dir_model .name )
855
+ self .gguf_writer .add_name (self .dir_model .name if self . model_name is None else self . model_name )
854
856
self .gguf_writer .add_source_hf_repo (hf_repo )
855
857
self .gguf_writer .add_tensor_data_layout ("Meta AI original pth" )
856
858
self .gguf_writer .add_context_length (ctx_length )
@@ -887,7 +889,7 @@ def set_gguf_parameters(self):
887
889
else :
888
890
raise ValueError ("gguf: can not find ctx length parameter." )
889
891
890
- self .gguf_writer .add_name (self .dir_model .name )
892
+ self .gguf_writer .add_name (self .dir_model .name if self . model_name is None else self . model_name )
891
893
self .gguf_writer .add_source_hf_repo (hf_repo )
892
894
self .gguf_writer .add_tensor_data_layout ("Meta AI original pth" )
893
895
self .gguf_writer .add_context_length (ctx_length )
@@ -1010,7 +1012,7 @@ def set_gguf_parameters(self):
1010
1012
else :
1011
1013
raise ValueError ("gguf: can not find ctx length parameter." )
1012
1014
1013
- self .gguf_writer .add_name (self .dir_model .name )
1015
+ self .gguf_writer .add_name (self .dir_model .name if self . model_name is None else self . model_name )
1014
1016
self .gguf_writer .add_source_hf_repo (hf_repo )
1015
1017
self .gguf_writer .add_tensor_data_layout ("Meta AI original pth" )
1016
1018
self .gguf_writer .add_context_length (ctx_length )
@@ -1206,7 +1208,7 @@ def set_gguf_parameters(self):
1206
1208
hparams = self .hparams
1207
1209
block_count = hparams ["num_hidden_layers" ]
1208
1210
1209
- self .gguf_writer .add_name (self .dir_model .name )
1211
+ self .gguf_writer .add_name (self .dir_model .name if self . model_name is None else self . model_name )
1210
1212
self .gguf_writer .add_context_length (hparams ["max_position_embeddings" ])
1211
1213
self .gguf_writer .add_embedding_length (hparams ["hidden_size" ])
1212
1214
self .gguf_writer .add_block_count (block_count )
@@ -1681,7 +1683,7 @@ class GPT2Model(Model):
1681
1683
model_arch = gguf .MODEL_ARCH .GPT2
1682
1684
1683
1685
def set_gguf_parameters (self ):
1684
- self .gguf_writer .add_name (self .dir_model .name )
1686
+ self .gguf_writer .add_name (self .dir_model .name if self . model_name is None else self . model_name )
1685
1687
self .gguf_writer .add_block_count (self .hparams ["n_layer" ])
1686
1688
self .gguf_writer .add_context_length (self .hparams ["n_ctx" ])
1687
1689
self .gguf_writer .add_embedding_length (self .hparams ["n_embd" ])
@@ -2248,7 +2250,7 @@ def set_gguf_parameters(self):
2248
2250
hparams = self .hparams
2249
2251
block_count = hparams ["num_hidden_layers" ]
2250
2252
2251
- self .gguf_writer .add_name (self .dir_model .name )
2253
+ self .gguf_writer .add_name (self .dir_model .name if self . model_name is None else self . model_name )
2252
2254
self .gguf_writer .add_context_length (hparams ["max_position_embeddings" ])
2253
2255
self .gguf_writer .add_embedding_length (hparams ["hidden_size" ])
2254
2256
self .gguf_writer .add_block_count (block_count )
@@ -2348,7 +2350,7 @@ def set_gguf_parameters(self):
2348
2350
# Fail early for models which don't have a block expansion factor of 2
2349
2351
assert d_inner == 2 * d_model
2350
2352
2351
- self .gguf_writer .add_name (self .dir_model .name )
2353
+ self .gguf_writer .add_name (self .dir_model .name if self . model_name is None else self . model_name )
2352
2354
self .gguf_writer .add_context_length (2 ** 20 ) # arbitrary value; for those who use the default
2353
2355
self .gguf_writer .add_embedding_length (d_model )
2354
2356
self .gguf_writer .add_feed_forward_length (0 ) # unused, but seemingly required when loading
@@ -2852,7 +2854,7 @@ def main() -> None:
2852
2854
logger .error (f"Model { hparams ['architectures' ][0 ]} is not supported" )
2853
2855
sys .exit (1 )
2854
2856
2855
- model_instance = model_class (dir_model , ftype_map [args .outtype ], fname_out , args .bigendian , args .use_temp_file , args .no_lazy )
2857
+ model_instance = model_class (dir_model , ftype_map [args .outtype ], fname_out , args .bigendian , args .use_temp_file , args .no_lazy , args . model_name )
2856
2858
2857
2859
logger .info ("Set model parameters" )
2858
2860
model_instance .set_gguf_parameters ()
0 commit comments