Skip to content

Commit dc36338

Browse files
authored
Workaround for incorrect weight map
It appears transformer.wte.weight is in the weight map even though the weights are not there, remove it if output weights are encountered first.
1 parent faea5ff commit dc36338

File tree

1 file changed

+23
-1
lines changed

1 file changed

+23
-1
lines changed

convert_hf_to_gguf.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,8 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
180180
extra = sorted(tensor_names_from_parts.difference(self.tensor_names))
181181
missing_files = sorted(set(weight_map[n] for n in missing if n in weight_map))
182182
if len(extra) == 0 and len(missing_files) > 0:
183-
raise ValueError(f"Missing or incomplete model files: {missing_files}")
183+
raise ValueError(f"Missing or incomplete model files: {missing_files}\n"
184+
f"Missing tensors: {missing}")
184185
else:
185186
raise ValueError("Mismatch between weight map and model parts for tensor names:\n"
186187
f"Missing tensors: {missing}\n"
@@ -2726,6 +2727,27 @@ def set_gguf_parameters(self):
27262727
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
27272728
self.gguf_writer.add_rope_scaling_factor(1.0)
27282729

2730+
_has_tok_embd = False
2731+
2732+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
2733+
del bid # unused
2734+
2735+
output_name = self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT)
2736+
tok_embd_name = self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD)
2737+
2738+
new_name = self.map_tensor_name(name)
2739+
2740+
# assuming token_embd.weight is seen before output.weight
2741+
if not self._has_tok_embd and new_name == self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT):
2742+
# even though the tensor file(s) does not contain the word embeddings they are still in the weight map
2743+
if "transformer.wte.weight" in self.tensor_names:
2744+
logger.debug(f"{tok_embd_name} not found before {output_name}, assuming they are tied")
2745+
self.tensor_names.remove("transformer.wte.weight")
2746+
elif new_name == tok_embd_name:
2747+
self._has_tok_embd = True
2748+
2749+
return [(new_name, data_torch)]
2750+
27292751

27302752
@Model.register("InternLM2ForCausalLM")
27312753
class InternLM2Model(Model):

0 commit comments

Comments
 (0)