Skip to content

Commit a59b49f

Browse files
committed
chore: update llama.cpp convert scripts
1 parent fb9addb commit a59b49f

File tree

3 files changed

+75
-178
lines changed

3 files changed

+75
-178
lines changed

requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,4 @@ python-dotenv~=1.0.1
1111
safetensors~=0.4.4
1212
setuptools~=68.2.0
1313
huggingface-hub~=0.24.6
14+
transformers~=4.44.2

src/convert_hf_to_gguf.py

+52-109
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ class Model:
6969
model_name: str | None
7070
metadata_override: Path | None
7171
dir_model_card: Path
72+
is_lora: bool
7273

7374
model_arch: gguf.MODEL_ARCH
7475

@@ -86,6 +87,7 @@ def __init__(
8687
split_max_size: int = 0,
8788
dry_run: bool = False,
8889
small_first_shard: bool = False,
90+
is_lora: bool = False,
8991
):
9092
if type(self) is Model:
9193
raise TypeError(
@@ -118,6 +120,7 @@ def __init__(
118120
self.metadata_override = metadata_override
119121
self.model_name = model_name
120122
self.dir_model_card = dir_model
123+
self.is_lora = is_lora
121124

122125
if self.ftype == gguf.LlamaFileType.GUESSED:
123126

@@ -381,6 +384,7 @@ def prepare_tensors(self):
381384
gguf.MODEL_TENSOR.FFN_GATE_INP,
382385
gguf.MODEL_TENSOR.POS_EMBD,
383386
gguf.MODEL_TENSOR.TOKEN_TYPES,
387+
gguf.MODEL_TENSOR.SSM_CONV1D,
384388
)
385389
)
386390
or not name.endswith(".weight")
@@ -1831,7 +1835,10 @@ def prepare_tensors(self):
18311835
if rope_scaling := self.find_hparam(["rope_scaling"], optional=True):
18321836
if rope_scaling.get("rope_type", "").lower() == "llama3":
18331837
base = self.hparams.get("rope_theta", 10000.0)
1834-
dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"]
1838+
dim = self.hparams.get(
1839+
"head_dim",
1840+
self.hparams["hidden_size"] // self.hparams["num_attention_heads"],
1841+
)
18351842
freqs = 1.0 / (
18361843
base ** (torch.arange(0, dim, 2, dtype=torch.float32) / dim)
18371844
)
@@ -1860,10 +1867,11 @@ def prepare_tensors(self):
18601867
)
18611868
rope_factors.append(1 / ((1 - smooth) / factor + smooth))
18621869

1863-
self.gguf_writer.add_tensor(
1864-
self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS),
1865-
np.array(rope_factors, dtype=np.float32),
1866-
)
1870+
if not self.is_lora:
1871+
self.gguf_writer.add_tensor(
1872+
self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS),
1873+
np.array(rope_factors, dtype=np.float32),
1874+
)
18671875

18681876
super().prepare_tensors()
18691877

@@ -2472,14 +2480,15 @@ def set_gguf_parameters(self):
24722480
f"The length of rope long and short factors must be {rope_dims / 2}"
24732481
)
24742482

2475-
self.gguf_writer.add_tensor(
2476-
gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ROPE_FACTORS_LONG] + ".weight",
2477-
np.array(long_factors, dtype=np.float32),
2478-
)
2479-
self.gguf_writer.add_tensor(
2480-
gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ROPE_FACTORS_SHORT] + ".weight",
2481-
np.array(short_factors, dtype=np.float32),
2482-
)
2483+
if not self.is_lora:
2484+
self.gguf_writer.add_tensor(
2485+
gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ROPE_FACTORS_LONG] + ".weight",
2486+
np.array(long_factors, dtype=np.float32),
2487+
)
2488+
self.gguf_writer.add_tensor(
2489+
gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ROPE_FACTORS_SHORT] + ".weight",
2490+
np.array(short_factors, dtype=np.float32),
2491+
)
24832492

24842493

24852494
@Model.register("PlamoForCausalLM")
@@ -3081,7 +3090,7 @@ class StarCoder2Model(Model):
30813090
model_arch = gguf.MODEL_ARCH.STARCODER2
30823091

30833092

3084-
@Model.register("MambaForCausalLM", "MambaLMHeadModel")
3093+
@Model.register("MambaForCausalLM", "MambaLMHeadModel", "FalconMambaForCausalLM")
30853094
class MambaModel(Model):
30863095
model_arch = gguf.MODEL_ARCH.MAMBA
30873096

@@ -3117,19 +3126,24 @@ def set_gguf_parameters(self):
31173126
self.find_hparam(["layer_norm_epsilon", "rms_norm_eps"], optional=True)
31183127
or 1e-5
31193128
)
3129+
use_dt_b_c_norm = False
3130+
3131+
if self.find_hparam(["model_type"], optional=True) in ("falcon_mamba",):
3132+
use_dt_b_c_norm = True
31203133

31213134
assert d_inner == 2 * d_model
31223135

31233136
self.gguf_writer.add_context_length(2**20)
31243137
self.gguf_writer.add_embedding_length(d_model)
31253138
self.gguf_writer.add_feed_forward_length(0)
31263139
self.gguf_writer.add_head_count(0)
3127-
self.gguf_writer.add_block_count(self.hparams["n_layer"])
3140+
self.gguf_writer.add_block_count(self.block_count)
31283141
self.gguf_writer.add_ssm_conv_kernel(d_conv)
31293142
self.gguf_writer.add_ssm_inner_size(d_inner)
31303143
self.gguf_writer.add_ssm_state_size(d_state)
31313144
self.gguf_writer.add_ssm_time_step_rank(dt_rank)
31323145
self.gguf_writer.add_layer_norm_rms_eps(rms_norm_eps)
3146+
self.gguf_writer.add_ssm_dt_b_c_rms(use_dt_b_c_norm)
31333147
self.gguf_writer.add_file_type(self.ftype)
31343148

31353149
_tok_embd = None
@@ -3159,25 +3173,6 @@ def modify_tensors(
31593173

31603174
return [(new_name, data_torch)]
31613175

3162-
def tensor_force_quant(
3163-
self, name: str, new_name: str, bid: int | None, n_dims: int
3164-
) -> gguf.GGMLQuantizationType | bool:
3165-
if bid is not None and new_name in (
3166-
self.format_tensor_name(
3167-
n, bid, ".weight" if name.endswith(".weight") else ""
3168-
)
3169-
for n in [
3170-
gguf.MODEL_TENSOR.SSM_CONV1D,
3171-
gguf.MODEL_TENSOR.SSM_X,
3172-
gguf.MODEL_TENSOR.SSM_DT,
3173-
gguf.MODEL_TENSOR.SSM_A,
3174-
gguf.MODEL_TENSOR.SSM_D,
3175-
]
3176-
):
3177-
return gguf.GGMLQuantizationType.F32
3178-
3179-
return super().tensor_force_quant(name, new_name, bid, n_dims)
3180-
31813176

31823177
@Model.register("CohereForCausalLM")
31833178
class CommandR2Model(Model):
@@ -4301,7 +4296,10 @@ def prepare_tensors(self):
43014296
if rope_scaling := self.find_hparam(["rope_scaling"], optional=True):
43024297
if rope_scaling.get("rope_type", "").lower() == "llama3":
43034298
base = self.hparams.get("rope_theta", 10000.0)
4304-
dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"]
4299+
dim = self.hparams.get(
4300+
"head_dim",
4301+
self.hparams["hidden_size"] // self.hparams["num_attention_heads"],
4302+
)
43054303
freqs = 1.0 / (
43064304
base ** (torch.arange(0, dim, 2, dtype=torch.float32) / dim)
43074305
)
@@ -4330,10 +4328,11 @@ def prepare_tensors(self):
43304328
)
43314329
rope_factors.append(1 / ((1 - smooth) / factor + smooth))
43324330

4333-
self.gguf_writer.add_tensor(
4334-
self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS),
4335-
np.array(rope_factors, dtype=np.float32),
4336-
)
4331+
if not self.is_lora:
4332+
self.gguf_writer.add_tensor(
4333+
self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS),
4334+
np.array(rope_factors, dtype=np.float32),
4335+
)
43374336

43384337
super().prepare_tensors()
43394338

@@ -4403,82 +4402,26 @@ def __torch_function__(cls, func, types, args=(), kwargs=None):
44034402

44044403

44054404
def parse_args() -> argparse.Namespace:
4406-
parser = argparse.ArgumentParser(description="")
4407-
parser.add_argument(
4408-
"--vocab-only",
4409-
action="store_true",
4410-
help="",
4411-
)
4412-
parser.add_argument(
4413-
"--outfile",
4414-
type=Path,
4415-
help="",
4416-
)
4405+
parser = argparse.ArgumentParser()
4406+
parser.add_argument("--vocab-only", action="store_true")
4407+
parser.add_argument("--outfile", type=Path)
44174408
parser.add_argument(
44184409
"--outtype",
44194410
type=str,
44204411
choices=["f32", "f16", "bf16", "q8_0", "auto"],
44214412
default="f16",
4422-
help="",
4423-
)
4424-
parser.add_argument(
4425-
"--bigendian",
4426-
action="store_true",
4427-
help="",
4428-
)
4429-
parser.add_argument(
4430-
"model",
4431-
type=Path,
4432-
help="",
4433-
)
4434-
parser.add_argument(
4435-
"--use-temp-file",
4436-
action="store_true",
4437-
help="",
4438-
)
4439-
parser.add_argument(
4440-
"--no-lazy",
4441-
action="store_true",
4442-
help="",
4443-
)
4444-
parser.add_argument(
4445-
"--model-name",
4446-
type=str,
4447-
default=None,
4448-
help="",
4449-
)
4450-
parser.add_argument(
4451-
"--verbose",
4452-
action="store_true",
4453-
help="",
4454-
)
4455-
parser.add_argument(
4456-
"--split-max-tensors",
4457-
type=int,
4458-
default=0,
4459-
help="",
4460-
)
4461-
parser.add_argument(
4462-
"--split-max-size",
4463-
type=str,
4464-
default="0",
4465-
help="",
4466-
)
4467-
parser.add_argument(
4468-
"--dry-run",
4469-
action="store_true",
4470-
help="",
4471-
)
4472-
parser.add_argument(
4473-
"--no-tensor-first-split",
4474-
action="store_true",
4475-
help="",
4476-
)
4477-
parser.add_argument(
4478-
"--metadata",
4479-
type=Path,
4480-
help="",
44814413
)
4414+
parser.add_argument("--bigendian", action="store_true")
4415+
parser.add_argument("model", type=Path)
4416+
parser.add_argument("--use-temp-file", action="store_true")
4417+
parser.add_argument("--no-lazy", action="store_true")
4418+
parser.add_argument("--model-name", type=str, default=None)
4419+
parser.add_argument("--verbose", action="store_true")
4420+
parser.add_argument("--split-max-tensors", type=int, default=0)
4421+
parser.add_argument("--split-max-size", type=str, default="0")
4422+
parser.add_argument("--dry-run", action="store_true")
4423+
parser.add_argument("--no-tensor-first-split", action="store_true")
4424+
parser.add_argument("--metadata", type=Path)
44824425

44834426
return parser.parse_args()
44844427

0 commit comments

Comments
 (0)