Skip to content

Commit 29fff30

Browse files
authored
llama : support converting Mistral Small text-only (ggml-org#12450)
1 parent c6af216 commit 29fff30

File tree

1 file changed

+19
-0
lines changed

1 file changed

+19
-0
lines changed

convert_hf_to_gguf.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1747,6 +1747,25 @@ def prepare_tensors(self):
17471747
raise ValueError(f"Unprocessed experts: {experts}")
17481748

17491749

1750+
@Model.register("Mistral3ForConditionalGeneration")
1751+
class Mistral3Model(LlamaModel):
1752+
model_arch = gguf.MODEL_ARCH.LLAMA
1753+
1754+
# we need to merge the text_config into the root level of hparams
1755+
def __init__(self, *args, **kwargs):
1756+
hparams = Model.load_hparams(kwargs["dir_model"])
1757+
if "text_config" in hparams:
1758+
hparams = {**hparams, **hparams["text_config"]}
1759+
kwargs["hparams"] = hparams
1760+
super().__init__(*args, **kwargs)
1761+
1762+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
1763+
name = name.replace("language_model.", "")
1764+
if "multi_modal_projector" in name or "vision_tower" in name:
1765+
return []
1766+
return super().modify_tensors(data_torch, name, bid)
1767+
1768+
17501769
@Model.register("DeciLMForCausalLM")
17511770
class DeciModel(Model):
17521771
model_arch = gguf.MODEL_ARCH.DECI

0 commit comments

Comments
 (0)