Skip to content

Commit b4817ee

Browse files
committed
refactor(ggml): update safetensor conversion scripts
1 parent c9c2b04 commit b4817ee

10 files changed

+1306
-655
lines changed

src/convert_hf_to_gguf.py

+876-555
Large diffs are not rendered by default.

src/convert_lora_to_gguf.py

+21-39
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727

2828
import gguf
2929

30-
# reuse model definitions from convert_hf_to_gguf.py
3130
from convert_hf_to_gguf import LazyTorchTensor, Model
3231

3332
logger = logging.getLogger("lora-to-gguf")
@@ -39,10 +38,9 @@ class PartialLoraTensor:
3938
B: Tensor | None = None
4039

4140

42-
# magic to support tensor shape modifications and splitting
4341
class LoraTorchTensor:
44-
_lora_A: Tensor # (n_rank, row_size)
45-
_lora_B: Tensor # (col_size, n_rank)
42+
_lora_A: Tensor
43+
_lora_B: Tensor
4644
_rank: int
4745

4846
def __init__(self, A: Tensor, B: Tensor):
@@ -60,20 +58,14 @@ def get_lora_A_B(self) -> tuple[Tensor, Tensor]:
6058

6159
def __getitem__(
6260
self,
63-
indices: (
64-
SupportsIndex
65-
| slice
66-
| tuple[
67-
SupportsIndex | slice | Tensor, ...
68-
] # TODO: add ellipsis in the type signature
69-
),
61+
indices: SupportsIndex | slice | tuple[SupportsIndex | slice | Tensor, ...],
7062
) -> LoraTorchTensor:
7163
shape = self.shape
7264
if isinstance(indices, SupportsIndex):
7365
if len(shape) > 2:
7466
return LoraTorchTensor(self._lora_A[indices], self._lora_B[indices])
7567
else:
76-
raise NotImplementedError # can't return a vector
68+
raise NotImplementedError
7769
elif isinstance(indices, slice):
7870
if len(shape) > 2:
7971
return LoraTorchTensor(self._lora_A[indices], self._lora_B[indices])
@@ -83,7 +75,7 @@ def __getitem__(
8375
assert len(indices) > 0
8476
if indices[-1] is Ellipsis:
8577
return self[indices[:-1]]
86-
# expand ellipsis
78+
8779
indices = tuple(
8880
u
8981
for v in (
@@ -103,7 +95,6 @@ def __getitem__(
10395
*(slice(None, None) for _ in range(len(indices), len(shape))),
10496
)
10597

106-
# TODO: make sure this is correct
10798
indices_A = (
10899
*(
109100
(
@@ -119,7 +110,7 @@ def __getitem__(
119110
indices_B = indices[:-1]
120111
return LoraTorchTensor(self._lora_A[indices_A], self._lora_B[indices_B])
121112
else:
122-
raise NotImplementedError # unknown indice type
113+
raise NotImplementedError
123114

124115
@property
125116
def dtype(self) -> torch.dtype:
@@ -142,9 +133,8 @@ def reshape(self, *shape: int | tuple[int, ...]) -> LoraTorchTensor:
142133
new_shape = cast(tuple[int, ...], shape)
143134
orig_shape = self.shape
144135
if len(new_shape) < 2:
145-
raise NotImplementedError # can't become a vector
136+
raise NotImplementedError
146137

147-
# expand -1 in the shape
148138
if any(dim == -1 for dim in new_shape):
149139
n_elems = prod(orig_shape)
150140
n_new_elems = prod(dim if dim != -1 else 1 for dim in new_shape)
@@ -154,7 +144,7 @@ def reshape(self, *shape: int | tuple[int, ...]) -> LoraTorchTensor:
154144
)
155145

156146
if new_shape[-1] != orig_shape[-1]:
157-
raise NotImplementedError # can't reshape the row size trivially
147+
raise NotImplementedError
158148

159149
shape_A = (*(1 for _ in new_shape[:-2]), self._rank, orig_shape[-1])
160150
shape_B = (*new_shape[:-1], self._rank)
@@ -173,15 +163,15 @@ def permute(self, *dims: int) -> LoraTorchTensor:
173163
shape = self.shape
174164
dims = tuple(dim - len(shape) if dim >= 0 else dim for dim in dims)
175165
if dims[-1] == -1:
176-
# TODO: support higher dimensional A shapes bigger than 1
166+
177167
assert all(dim == 1 for dim in self._lora_A.shape[:-2])
178168
return LoraTorchTensor(self._lora_A, self._lora_B.permute(*dims))
179169
if len(shape) == 2 and dims[-1] == -2 and dims[-2] == -1:
180170
return LoraTorchTensor(
181171
self._lora_B.permute(*dims), self._lora_A.permute(*dims)
182172
)
183173
else:
184-
# TODO: compose the above two
174+
185175
raise NotImplementedError
186176

187177
def transpose(self, dim0: int, dim1: int) -> LoraTorchTensor:
@@ -200,7 +190,7 @@ def to(self, *args, **kwargs):
200190

201191
@classmethod
202192
def __torch_function__(cls, func: Callable, types, args=(), kwargs=None):
203-
del types # unused
193+
del types
204194

205195
if kwargs is None:
206196
kwargs = {}
@@ -241,7 +231,7 @@ def get_base_tensor_name(lora_tensor_name: str) -> str:
241231
base_name = lora_tensor_name.replace("base_model.model.", "")
242232
base_name = base_name.replace(".lora_A.weight", ".weight")
243233
base_name = base_name.replace(".lora_B.weight", ".weight")
244-
# models produced by mergekit-extract-lora have token embeddings in the adapter
234+
245235
base_name = base_name.replace(".lora_embedding_A", ".weight")
246236
base_name = base_name.replace(".lora_embedding_B", ".weight")
247237
return base_name
@@ -303,7 +293,7 @@ def parse_args() -> argparse.Namespace:
303293

304294

305295
def load_hparams_from_hf(hf_model_id: str) -> dict[str, Any]:
306-
# normally, adapter does not come with base model config, we need to load it from AutoConfig
296+
307297
config = AutoConfig.from_pretrained(hf_model_id)
308298
return config.to_dict()
309299

@@ -331,23 +321,21 @@ def load_hparams_from_hf(hf_model_id: str) -> dict[str, Any]:
331321
if args.outfile is not None:
332322
fname_out = args.outfile
333323
else:
334-
# output in the same directory as the model by default
324+
335325
fname_out = dir_lora
336326

337327
if os.path.exists(input_model):
338-
# lazy import load_file only if lora is in safetensors format.
328+
339329
from safetensors.torch import load_file
340330

341331
lora_model = load_file(input_model, device="cpu")
342332
else:
343333
input_model = os.path.join(dir_lora, "adapter_model.bin")
344334
lora_model = torch.load(input_model, map_location="cpu", weights_only=True)
345335

346-
# load LoRA config
347336
with open(lora_config, "r") as f:
348337
lparams: dict[str, Any] = json.load(f)
349338

350-
# load base model
351339
if base_model_id is not None:
352340
logger.info(f"Loading base model from Hugging Face: {base_model_id}")
353341
hparams = load_hparams_from_hf(base_model_id)
@@ -409,7 +397,7 @@ def set_gguf_parameters(self):
409397
)
410398

411399
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
412-
# Never add extra tensors (e.g. rope_freqs) for LoRA adapters
400+
413401
return ()
414402

415403
def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
@@ -419,13 +407,13 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
419407
if self.lazy:
420408
tensor = LazyTorchTensor.from_eager(tensor)
421409
base_name = get_base_tensor_name(name)
422-
# note: mergekit-extract-lora also adds token embeddings to the adapter
410+
423411
is_lora_a = ".lora_A.weight" in name or ".lora_embedding_A" in name
424412
is_lora_b = ".lora_B.weight" in name or ".lora_embedding_B" in name
425413
if not is_lora_a and not is_lora_b:
426414
if ".base_layer.weight" in name:
427415
continue
428-
# mergekit-extract-lora add these layernorm to the adapter, we need to keep them
416+
429417
if "_layernorm" in name or ".norm" in name:
430418
yield (base_name, tensor)
431419
continue
@@ -437,7 +425,7 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
437425
"Embeddings is present in the adapter. This can be due to new tokens added during fine tuning"
438426
)
439427
logger.error(
440-
"Please refer to https://github.com/ggerganov/llama.cpp/pull/9948"
428+
"Please refer to https://github.com/ggml-org/llama.cpp/pull/9948"
441429
)
442430
sys.exit(1)
443431

@@ -464,27 +452,21 @@ def modify_tensors(
464452
self, data_torch: Tensor, name: str, bid: int | None
465453
) -> Iterable[tuple[str, Tensor]]:
466454
dest = list(super().modify_tensors(data_torch, name, bid))
467-
# some archs may have the same tensor for lm_head and output (tie word embeddings)
468-
# in this case, adapters targeting lm_head will fail when using llama-export-lora
469-
# therefore, we ignore them for now
470-
# see: https://github.com/ggerganov/llama.cpp/issues/9065
455+
471456
if name == "lm_head.weight" and len(dest) == 0:
472457
raise ValueError(
473458
"lm_head is present in adapter, but is ignored in base model"
474459
)
475460
for dest_name, dest_data in dest:
476-
# mergekit-extract-lora add these layernorm to the adapter
461+
477462
if "_norm" in dest_name:
478463
assert dest_data.dim() == 1
479464
yield (dest_name, dest_data)
480465
continue
481466

482-
# otherwise, we must get the lora_A and lora_B tensors
483467
assert isinstance(dest_data, LoraTorchTensor)
484468
lora_a, lora_b = dest_data.get_lora_A_B()
485469

486-
# note: mergekit-extract-lora flip and transpose A and B
487-
# here we only need to transpose token_embd.lora_a, see llm_build_inp_embd()
488470
if "token_embd.weight" in dest_name:
489471
lora_a = lora_a.T
490472

0 commit comments

Comments
 (0)