Skip to content

Commit b2d13f0

Browse files
committed
lora: lokr tweaks + reorganize
1 parent 32c639e commit b2d13f0

File tree

1 file changed

+30
-21
lines changed

1 file changed

+30
-21
lines changed

lora.hpp

+30-21
Original file line numberDiff line numberDiff line change
@@ -266,8 +266,13 @@ struct LoraModel : public GGMLRunner {
266266
float scale_value = 1.0f;
267267
std::string fk = lora_pre[type] + key;
268268
if (lora_tensors.find(fk + ".hada_w1_a") != lora_tensors.end()) {
269-
// loHa mode
269+
// LoHa mode
270270

271+
// TODO: split qkv convention for LoHas (is it ever used?)
272+
if (is_qkv_split || is_qkvm_split) {
273+
LOG_ERROR("Split qkv isn't supported for LoHa models.");
274+
break;
275+
}
271276
std::string alpha_name = "";
272277

273278
ggml_tensor* hada_1_mid = NULL; // tau for tucker decomposition
@@ -286,11 +291,6 @@ struct LoraModel : public GGMLRunner {
286291
std::string hada_2_down_name = "";
287292
std::string hada_2_up_name = "";
288293

289-
// TODO: split qkv convention for LoHas (is it ever used?)
290-
if (is_qkv_split || is_qkvm_split) {
291-
LOG_ERROR("Split qkv isn't supported for LoHa models.");
292-
break;
293-
}
294294

295295
hada_1_down_name = fk + ".hada_w1_b";
296296
hada_1_up_name = fk + ".hada_w1_a";
@@ -340,12 +340,20 @@ struct LoraModel : public GGMLRunner {
340340

341341
// calc_scale
342342
// TODO: .dora_scale?
343-
int64_t dim = hada_1_down->ne[ggml_n_dims(hada_1_down) - 1];
343+
int64_t rank = hada_1_down->ne[ggml_n_dims(hada_1_down) - 1];
344344
if (lora_tensors.find(alpha_name) != lora_tensors.end()) {
345345
float alpha = ggml_backend_tensor_get_f32(lora_tensors[alpha_name]);
346-
scale_value = alpha / dim;
346+
scale_value = alpha / rank;
347347
}
348348
} else if (lora_tensors.find(fk + ".lokr_w1") != lora_tensors.end() || lora_tensors.find(fk + ".lokr_w1_a") != lora_tensors.end()) {
349+
// LoKr mode
350+
351+
// TODO: split qkv convention for LoKrs (is it ever used?)
352+
if (is_qkv_split || is_qkvm_split) {
353+
LOG_ERROR("Split qkv isn't supported for LoKr models.");
354+
break;
355+
}
356+
349357
std::string alpha_name = fk + ".alpha";
350358

351359
ggml_tensor* lokr_w1 = NULL;
@@ -354,12 +362,6 @@ struct LoraModel : public GGMLRunner {
354362
std::string lokr_w1_name = "";
355363
std::string lokr_w2_name = "";
356364

357-
// TODO: split qkv convention for LoKrs (is it ever used?)
358-
if (is_qkv_split || is_qkvm_split) {
359-
LOG_ERROR("Split qkv isn't supported for LoKr models.");
360-
break;
361-
}
362-
363365
lokr_w1_name = fk + ".lokr_w1";
364366
lokr_w2_name = fk + ".lokr_w2";
365367

@@ -372,14 +374,14 @@ struct LoraModel : public GGMLRunner {
372374
std::string down_name = lokr_w1_name + "_b";
373375
std::string up_name = lokr_w1_name + "_a";
374376
if (lora_tensors.find(down_name) != lora_tensors.end()) {
377+
// w1 should not be low rank normally, sometimes w1 and w2 are swapped
375378
down = to_f32(compute_ctx, lora_tensors[down_name]);
376379
applied_lora_tensors.insert(down_name);
377380

378-
// scale != 1 only when using Low rank form (?)
379-
int64_t dim = down->ne[ggml_n_dims(down) - 1];
381+
int64_t rank = down->ne[ggml_n_dims(down) - 1];
380382
if (lora_tensors.find(alpha_name) != lora_tensors.end()) {
381383
float alpha = ggml_backend_tensor_get_f32(lora_tensors[alpha_name]);
382-
scale_value = alpha / dim;
384+
scale_value = alpha / rank;
383385
}
384386
}
385387
if (lora_tensors.find(up_name) != lora_tensors.end()) {
@@ -399,18 +401,25 @@ struct LoraModel : public GGMLRunner {
399401
if (lora_tensors.find(down_name) != lora_tensors.end()) {
400402
down = to_f32(compute_ctx, lora_tensors[down_name]);
401403
applied_lora_tensors.insert(down_name);
404+
405+
int64_t rank = down->ne[ggml_n_dims(down) - 1];
406+
if (lora_tensors.find(alpha_name) != lora_tensors.end()) {
407+
float alpha = ggml_backend_tensor_get_f32(lora_tensors[alpha_name]);
408+
scale_value = alpha / rank;
409+
}
402410
}
403411
if (lora_tensors.find(up_name) != lora_tensors.end()) {
404412
up = to_f32(compute_ctx, lora_tensors[up_name]);
405413
applied_lora_tensors.insert(up_name);
406414
}
407415
lokr_w2 = ggml_merge_lora(compute_ctx, down, up);
408416
}
417+
418+
// Technically it might be unused, but I believe it's the expected behavior
419+
applied_lora_tensors.insert(alpha_name);
409420

410421
updown = ggml_kronecker(compute_ctx, lokr_w1, lokr_w2);
411422

412-
// TODO: double check alpha implementation, it seems strange to not use them most of the time
413-
applied_lora_tensors.insert(alpha_name);
414423
} else {
415424
// LoRA mode
416425
ggml_tensor* lora_mid = NULL; // tau for tucker decomposition
@@ -770,12 +779,12 @@ struct LoraModel : public GGMLRunner {
770779
}
771780
// calc_scale
772781
// TODO: .dora_scale?
773-
int64_t dim = lora_down->ne[ggml_n_dims(lora_down) - 1];
782+
int64_t rank = lora_down->ne[ggml_n_dims(lora_down) - 1];
774783
if (lora_tensors.find(scale_name) != lora_tensors.end()) {
775784
scale_value = ggml_backend_tensor_get_f32(lora_tensors[scale_name]);
776785
} else if (lora_tensors.find(alpha_name) != lora_tensors.end()) {
777786
float alpha = ggml_backend_tensor_get_f32(lora_tensors[alpha_name]);
778-
scale_value = alpha / dim;
787+
scale_value = alpha / rank;
779788
}
780789

781790
updown = ggml_merge_lora(compute_ctx, lora_down, lora_up, lora_mid);

0 commit comments

Comments
 (0)