Skip to content

Commit 83a2ef3

Browse files
authored
Copy EXAONE word embeddings weights to output weights if missing
1 parent 8551c44 commit 83a2ef3

File tree

1 file changed

+16
-0
lines changed

1 file changed

+16
-0
lines changed

convert_hf_to_gguf.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5027,6 +5027,22 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
50275027

50285028
yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), torch.tensor(rope_factors, dtype=torch.float32))
50295029

5030+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
5031+
del bid # unused
5032+
5033+
new_name = self.map_tensor_name(name)
5034+
5035+
tensors: list[tuple[str, Tensor]] = [(new_name, data_torch)]
5036+
5037+
if new_name == self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD):
5038+
assert self.tensor_names is not None
5039+
5040+
if all(s not in self.tensor_names for s in ("lm_head.weight", "output.weight")):
5041+
# copy tok_embd.weight to output.weight
5042+
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch))
5043+
5044+
return tensors
5045+
50305046

50315047
@Model.register("GraniteForCausalLM")
50325048
class GraniteModel(LlamaModel):

0 commit comments

Comments
 (0)