Skip to content

Commit 2decf57

Browse files
authored
convert-hf : set the model name based on cli arg, if present (ggml-org#7693)
`--model-name` argument was added a while ago but did not do anything. This commit fixes this issue and enables this feature.
1 parent 5795b94 commit 2decf57

File tree

1 file changed

+14
-12
lines changed

1 file changed

+14
-12
lines changed

convert-hf-to-gguf.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ class Model:
5252
endianess: gguf.GGUFEndian
5353
use_temp_file: bool
5454
lazy: bool
55+
model_name: str | None
5556
part_names: list[str]
5657
is_safetensors: bool
5758
hparams: dict[str, Any]
@@ -64,7 +65,7 @@ class Model:
6465
# subclasses should define this!
6566
model_arch: gguf.MODEL_ARCH
6667

67-
def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool, use_temp_file: bool, eager: bool):
68+
def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool, use_temp_file: bool, eager: bool, model_name: str | None):
6869
if type(self) is Model:
6970
raise TypeError(f"{type(self).__name__!r} should not be directly instantiated")
7071
self.dir_model = dir_model
@@ -73,6 +74,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
7374
self.endianess = gguf.GGUFEndian.BIG if is_big_endian else gguf.GGUFEndian.LITTLE
7475
self.use_temp_file = use_temp_file
7576
self.lazy = not eager
77+
self.model_name = model_name
7678
self.part_names = Model.get_model_part_names(self.dir_model, "model", ".safetensors")
7779
self.is_safetensors = len(self.part_names) > 0
7880
if not self.is_safetensors:
@@ -182,7 +184,7 @@ def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = (".weight", "
182184
return new_name
183185

184186
def set_gguf_parameters(self):
185-
self.gguf_writer.add_name(self.dir_model.name)
187+
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
186188
self.gguf_writer.add_block_count(self.block_count)
187189

188190
if (n_ctx := self.find_hparam(["max_position_embeddings", "n_ctx"], optional=True)) is not None:
@@ -665,7 +667,7 @@ class GPTNeoXModel(Model):
665667
def set_gguf_parameters(self):
666668
block_count = self.hparams["num_hidden_layers"]
667669

668-
self.gguf_writer.add_name(self.dir_model.name)
670+
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
669671
self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"])
670672
self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
671673
self.gguf_writer.add_block_count(block_count)
@@ -798,7 +800,7 @@ def set_vocab(self):
798800

799801
def set_gguf_parameters(self):
800802
block_count = self.hparams["n_layers"]
801-
self.gguf_writer.add_name(self.dir_model.name)
803+
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
802804
self.gguf_writer.add_context_length(self.hparams["max_seq_len"])
803805
self.gguf_writer.add_embedding_length(self.hparams["d_model"])
804806
self.gguf_writer.add_block_count(block_count)
@@ -850,7 +852,7 @@ def set_gguf_parameters(self):
850852
raise ValueError("gguf: can not find ctx length parameter.")
851853

852854
self.gguf_writer.add_file_type(self.ftype)
853-
self.gguf_writer.add_name(self.dir_model.name)
855+
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
854856
self.gguf_writer.add_source_hf_repo(hf_repo)
855857
self.gguf_writer.add_tensor_data_layout("Meta AI original pth")
856858
self.gguf_writer.add_context_length(ctx_length)
@@ -887,7 +889,7 @@ def set_gguf_parameters(self):
887889
else:
888890
raise ValueError("gguf: can not find ctx length parameter.")
889891

890-
self.gguf_writer.add_name(self.dir_model.name)
892+
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
891893
self.gguf_writer.add_source_hf_repo(hf_repo)
892894
self.gguf_writer.add_tensor_data_layout("Meta AI original pth")
893895
self.gguf_writer.add_context_length(ctx_length)
@@ -1010,7 +1012,7 @@ def set_gguf_parameters(self):
10101012
else:
10111013
raise ValueError("gguf: can not find ctx length parameter.")
10121014

1013-
self.gguf_writer.add_name(self.dir_model.name)
1015+
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
10141016
self.gguf_writer.add_source_hf_repo(hf_repo)
10151017
self.gguf_writer.add_tensor_data_layout("Meta AI original pth")
10161018
self.gguf_writer.add_context_length(ctx_length)
@@ -1206,7 +1208,7 @@ def set_gguf_parameters(self):
12061208
hparams = self.hparams
12071209
block_count = hparams["num_hidden_layers"]
12081210

1209-
self.gguf_writer.add_name(self.dir_model.name)
1211+
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
12101212
self.gguf_writer.add_context_length(hparams["max_position_embeddings"])
12111213
self.gguf_writer.add_embedding_length(hparams["hidden_size"])
12121214
self.gguf_writer.add_block_count(block_count)
@@ -1681,7 +1683,7 @@ class GPT2Model(Model):
16811683
model_arch = gguf.MODEL_ARCH.GPT2
16821684

16831685
def set_gguf_parameters(self):
1684-
self.gguf_writer.add_name(self.dir_model.name)
1686+
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
16851687
self.gguf_writer.add_block_count(self.hparams["n_layer"])
16861688
self.gguf_writer.add_context_length(self.hparams["n_ctx"])
16871689
self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
@@ -2248,7 +2250,7 @@ def set_gguf_parameters(self):
22482250
hparams = self.hparams
22492251
block_count = hparams["num_hidden_layers"]
22502252

2251-
self.gguf_writer.add_name(self.dir_model.name)
2253+
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
22522254
self.gguf_writer.add_context_length(hparams["max_position_embeddings"])
22532255
self.gguf_writer.add_embedding_length(hparams["hidden_size"])
22542256
self.gguf_writer.add_block_count(block_count)
@@ -2348,7 +2350,7 @@ def set_gguf_parameters(self):
23482350
# Fail early for models which don't have a block expansion factor of 2
23492351
assert d_inner == 2 * d_model
23502352

2351-
self.gguf_writer.add_name(self.dir_model.name)
2353+
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
23522354
self.gguf_writer.add_context_length(2**20) # arbitrary value; for those who use the default
23532355
self.gguf_writer.add_embedding_length(d_model)
23542356
self.gguf_writer.add_feed_forward_length(0) # unused, but seemingly required when loading
@@ -2852,7 +2854,7 @@ def main() -> None:
28522854
logger.error(f"Model {hparams['architectures'][0]} is not supported")
28532855
sys.exit(1)
28542856

2855-
model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian, args.use_temp_file, args.no_lazy)
2857+
model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian, args.use_temp_file, args.no_lazy, args.model_name)
28562858

28572859
logger.info("Set model parameters")
28582860
model_instance.set_gguf_parameters()

0 commit comments

Comments
 (0)