@@ -2210,9 +2210,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
2210
2210
layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}, 0);
2211
2211
2212
2212
// optional bias tensors
2213
- layer.bq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "bias", i), {n_embd}, 0 );
2214
- layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K, "bias", i), {n_embd_gqa}, 0 );
2215
- layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V, "bias", i), {n_embd_gqa}, 0 );
2213
+ layer.bq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "bias", i), {n_embd}, TENSOR_NOT_REQUIRED );
2214
+ layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K, "bias", i), {n_embd_gqa}, TENSOR_NOT_REQUIRED );
2215
+ layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V, "bias", i), {n_embd_gqa}, TENSOR_NOT_REQUIRED );
2216
2216
2217
2217
layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0);
2218
2218
@@ -6193,16 +6193,25 @@ struct llm_build_qwen2moe : public llm_graph_context {
6193
6193
{
6194
6194
// compute Q and K and RoPE them
6195
6195
ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur);
6196
- Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq);
6197
6196
cb(Qcur, "Qcur", il);
6197
+ if (model.layers[il].bq) {
6198
+ Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq);
6199
+ cb(Qcur, "Qcur", il);
6200
+ }
6198
6201
6199
6202
ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur);
6200
- Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk);
6201
6203
cb(Kcur, "Kcur", il);
6204
+ if (model.layers[il].bk) {
6205
+ Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk);
6206
+ cb(Kcur, "Kcur", il);
6207
+ }
6202
6208
6203
6209
ggml_tensor * Vcur = build_lora_mm(model.layers[il].wv, cur);
6204
- Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv);
6205
6210
cb(Vcur, "Vcur", il);
6211
+ if (model.layers[il].bv) {
6212
+ Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv);
6213
+ cb(Vcur, "Vcur", il);
6214
+ }
6206
6215
6207
6216
Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens);
6208
6217
Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
0 commit comments