Skip to content

Commit dbb3a47

Browse files
authored
llama : make Qwen2MoE QKV bias optional (ggml-org#12477)
1 parent 3d82dbc commit dbb3a47

File tree

1 file changed

+15
-6
lines changed

1 file changed

+15
-6
lines changed

src/llama-model.cpp

+15-6
Original file line numberDiff line numberDiff line change
@@ -2210,9 +2210,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
22102210
layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}, 0);
22112211

22122212
// optional bias tensors
2213-
layer.bq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "bias", i), {n_embd}, 0);
2214-
layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K, "bias", i), {n_embd_gqa}, 0);
2215-
layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V, "bias", i), {n_embd_gqa}, 0);
2213+
layer.bq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "bias", i), {n_embd}, TENSOR_NOT_REQUIRED);
2214+
layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K, "bias", i), {n_embd_gqa}, TENSOR_NOT_REQUIRED);
2215+
layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V, "bias", i), {n_embd_gqa}, TENSOR_NOT_REQUIRED);
22162216

22172217
layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0);
22182218

@@ -6193,16 +6193,25 @@ struct llm_build_qwen2moe : public llm_graph_context {
61936193
{
61946194
// compute Q and K and RoPE them
61956195
ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur);
6196-
Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq);
61976196
cb(Qcur, "Qcur", il);
6197+
if (model.layers[il].bq) {
6198+
Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq);
6199+
cb(Qcur, "Qcur", il);
6200+
}
61986201

61996202
ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur);
6200-
Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk);
62016203
cb(Kcur, "Kcur", il);
6204+
if (model.layers[il].bk) {
6205+
Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk);
6206+
cb(Kcur, "Kcur", il);
6207+
}
62026208

62036209
ggml_tensor * Vcur = build_lora_mm(model.layers[il].wv, cur);
6204-
Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv);
62056210
cb(Vcur, "Vcur", il);
6211+
if (model.layers[il].bv) {
6212+
Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv);
6213+
cb(Vcur, "Vcur", il);
6214+
}
62066215

62076216
Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens);
62086217
Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);

0 commit comments

Comments
 (0)