27
27
28
28
import gguf
29
29
30
- # reuse model definitions from convert_hf_to_gguf.py
31
30
from convert_hf_to_gguf import LazyTorchTensor , Model
32
31
33
32
logger = logging .getLogger ("lora-to-gguf" )
@@ -39,10 +38,9 @@ class PartialLoraTensor:
39
38
B : Tensor | None = None
40
39
41
40
42
- # magic to support tensor shape modifications and splitting
43
41
class LoraTorchTensor :
44
- _lora_A : Tensor # (n_rank, row_size)
45
- _lora_B : Tensor # (col_size, n_rank)
42
+ _lora_A : Tensor
43
+ _lora_B : Tensor
46
44
_rank : int
47
45
48
46
def __init__ (self , A : Tensor , B : Tensor ):
@@ -60,20 +58,14 @@ def get_lora_A_B(self) -> tuple[Tensor, Tensor]:
60
58
61
59
def __getitem__ (
62
60
self ,
63
- indices : (
64
- SupportsIndex
65
- | slice
66
- | tuple [
67
- SupportsIndex | slice | Tensor , ...
68
- ] # TODO: add ellipsis in the type signature
69
- ),
61
+ indices : SupportsIndex | slice | tuple [SupportsIndex | slice | Tensor , ...],
70
62
) -> LoraTorchTensor :
71
63
shape = self .shape
72
64
if isinstance (indices , SupportsIndex ):
73
65
if len (shape ) > 2 :
74
66
return LoraTorchTensor (self ._lora_A [indices ], self ._lora_B [indices ])
75
67
else :
76
- raise NotImplementedError # can't return a vector
68
+ raise NotImplementedError
77
69
elif isinstance (indices , slice ):
78
70
if len (shape ) > 2 :
79
71
return LoraTorchTensor (self ._lora_A [indices ], self ._lora_B [indices ])
@@ -83,7 +75,7 @@ def __getitem__(
83
75
assert len (indices ) > 0
84
76
if indices [- 1 ] is Ellipsis :
85
77
return self [indices [:- 1 ]]
86
- # expand ellipsis
78
+
87
79
indices = tuple (
88
80
u
89
81
for v in (
@@ -103,7 +95,6 @@ def __getitem__(
103
95
* (slice (None , None ) for _ in range (len (indices ), len (shape ))),
104
96
)
105
97
106
- # TODO: make sure this is correct
107
98
indices_A = (
108
99
* (
109
100
(
@@ -119,7 +110,7 @@ def __getitem__(
119
110
indices_B = indices [:- 1 ]
120
111
return LoraTorchTensor (self ._lora_A [indices_A ], self ._lora_B [indices_B ])
121
112
else :
122
- raise NotImplementedError # unknown indice type
113
+ raise NotImplementedError
123
114
124
115
@property
125
116
def dtype (self ) -> torch .dtype :
@@ -142,9 +133,8 @@ def reshape(self, *shape: int | tuple[int, ...]) -> LoraTorchTensor:
142
133
new_shape = cast (tuple [int , ...], shape )
143
134
orig_shape = self .shape
144
135
if len (new_shape ) < 2 :
145
- raise NotImplementedError # can't become a vector
136
+ raise NotImplementedError
146
137
147
- # expand -1 in the shape
148
138
if any (dim == - 1 for dim in new_shape ):
149
139
n_elems = prod (orig_shape )
150
140
n_new_elems = prod (dim if dim != - 1 else 1 for dim in new_shape )
@@ -154,7 +144,7 @@ def reshape(self, *shape: int | tuple[int, ...]) -> LoraTorchTensor:
154
144
)
155
145
156
146
if new_shape [- 1 ] != orig_shape [- 1 ]:
157
- raise NotImplementedError # can't reshape the row size trivially
147
+ raise NotImplementedError
158
148
159
149
shape_A = (* (1 for _ in new_shape [:- 2 ]), self ._rank , orig_shape [- 1 ])
160
150
shape_B = (* new_shape [:- 1 ], self ._rank )
@@ -173,15 +163,15 @@ def permute(self, *dims: int) -> LoraTorchTensor:
173
163
shape = self .shape
174
164
dims = tuple (dim - len (shape ) if dim >= 0 else dim for dim in dims )
175
165
if dims [- 1 ] == - 1 :
176
- # TODO: support higher dimensional A shapes bigger than 1
166
+
177
167
assert all (dim == 1 for dim in self ._lora_A .shape [:- 2 ])
178
168
return LoraTorchTensor (self ._lora_A , self ._lora_B .permute (* dims ))
179
169
if len (shape ) == 2 and dims [- 1 ] == - 2 and dims [- 2 ] == - 1 :
180
170
return LoraTorchTensor (
181
171
self ._lora_B .permute (* dims ), self ._lora_A .permute (* dims )
182
172
)
183
173
else :
184
- # TODO: compose the above two
174
+
185
175
raise NotImplementedError
186
176
187
177
def transpose (self , dim0 : int , dim1 : int ) -> LoraTorchTensor :
@@ -200,7 +190,7 @@ def to(self, *args, **kwargs):
200
190
201
191
@classmethod
202
192
def __torch_function__ (cls , func : Callable , types , args = (), kwargs = None ):
203
- del types # unused
193
+ del types
204
194
205
195
if kwargs is None :
206
196
kwargs = {}
@@ -241,7 +231,7 @@ def get_base_tensor_name(lora_tensor_name: str) -> str:
241
231
base_name = lora_tensor_name .replace ("base_model.model." , "" )
242
232
base_name = base_name .replace (".lora_A.weight" , ".weight" )
243
233
base_name = base_name .replace (".lora_B.weight" , ".weight" )
244
- # models produced by mergekit-extract-lora have token embeddings in the adapter
234
+
245
235
base_name = base_name .replace (".lora_embedding_A" , ".weight" )
246
236
base_name = base_name .replace (".lora_embedding_B" , ".weight" )
247
237
return base_name
@@ -303,7 +293,7 @@ def parse_args() -> argparse.Namespace:
303
293
304
294
305
295
def load_hparams_from_hf (hf_model_id : str ) -> dict [str , Any ]:
306
- # normally, adapter does not come with base model config, we need to load it from AutoConfig
296
+
307
297
config = AutoConfig .from_pretrained (hf_model_id )
308
298
return config .to_dict ()
309
299
@@ -331,23 +321,21 @@ def load_hparams_from_hf(hf_model_id: str) -> dict[str, Any]:
331
321
if args .outfile is not None :
332
322
fname_out = args .outfile
333
323
else :
334
- # output in the same directory as the model by default
324
+
335
325
fname_out = dir_lora
336
326
337
327
if os .path .exists (input_model ):
338
- # lazy import load_file only if lora is in safetensors format.
328
+
339
329
from safetensors .torch import load_file
340
330
341
331
lora_model = load_file (input_model , device = "cpu" )
342
332
else :
343
333
input_model = os .path .join (dir_lora , "adapter_model.bin" )
344
334
lora_model = torch .load (input_model , map_location = "cpu" , weights_only = True )
345
335
346
- # load LoRA config
347
336
with open (lora_config , "r" ) as f :
348
337
lparams : dict [str , Any ] = json .load (f )
349
338
350
- # load base model
351
339
if base_model_id is not None :
352
340
logger .info (f"Loading base model from Hugging Face: { base_model_id } " )
353
341
hparams = load_hparams_from_hf (base_model_id )
@@ -409,7 +397,7 @@ def set_gguf_parameters(self):
409
397
)
410
398
411
399
def generate_extra_tensors (self ) -> Iterable [tuple [str , Tensor ]]:
412
- # Never add extra tensors (e.g. rope_freqs) for LoRA adapters
400
+
413
401
return ()
414
402
415
403
def get_tensors (self ) -> Iterator [tuple [str , Tensor ]]:
@@ -419,13 +407,13 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
419
407
if self .lazy :
420
408
tensor = LazyTorchTensor .from_eager (tensor )
421
409
base_name = get_base_tensor_name (name )
422
- # note: mergekit-extract-lora also adds token embeddings to the adapter
410
+
423
411
is_lora_a = ".lora_A.weight" in name or ".lora_embedding_A" in name
424
412
is_lora_b = ".lora_B.weight" in name or ".lora_embedding_B" in name
425
413
if not is_lora_a and not is_lora_b :
426
414
if ".base_layer.weight" in name :
427
415
continue
428
- # mergekit-extract-lora add these layernorm to the adapter, we need to keep them
416
+
429
417
if "_layernorm" in name or ".norm" in name :
430
418
yield (base_name , tensor )
431
419
continue
@@ -437,7 +425,7 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
437
425
"Embeddings is present in the adapter. This can be due to new tokens added during fine tuning"
438
426
)
439
427
logger .error (
440
- "Please refer to https://github.com/ggerganov /llama.cpp/pull/9948"
428
+ "Please refer to https://github.com/ggml-org /llama.cpp/pull/9948"
441
429
)
442
430
sys .exit (1 )
443
431
@@ -464,27 +452,21 @@ def modify_tensors(
464
452
self , data_torch : Tensor , name : str , bid : int | None
465
453
) -> Iterable [tuple [str , Tensor ]]:
466
454
dest = list (super ().modify_tensors (data_torch , name , bid ))
467
- # some archs may have the same tensor for lm_head and output (tie word embeddings)
468
- # in this case, adapters targeting lm_head will fail when using llama-export-lora
469
- # therefore, we ignore them for now
470
- # see: https://github.com/ggerganov/llama.cpp/issues/9065
455
+
471
456
if name == "lm_head.weight" and len (dest ) == 0 :
472
457
raise ValueError (
473
458
"lm_head is present in adapter, but is ignored in base model"
474
459
)
475
460
for dest_name , dest_data in dest :
476
- # mergekit-extract-lora add these layernorm to the adapter
461
+
477
462
if "_norm" in dest_name :
478
463
assert dest_data .dim () == 1
479
464
yield (dest_name , dest_data )
480
465
continue
481
466
482
- # otherwise, we must get the lora_A and lora_B tensors
483
467
assert isinstance (dest_data , LoraTorchTensor )
484
468
lora_a , lora_b = dest_data .get_lora_A_B ()
485
469
486
- # note: mergekit-extract-lora flip and transpose A and B
487
- # here we only need to transpose token_embd.lora_a, see llm_build_inp_embd()
488
470
if "token_embd.weight" in dest_name :
489
471
lora_a = lora_a .T
490
472
0 commit comments