Skip to content

Commit cab7355

Browse files
committed
Update llama.cpp
1 parent d336217 commit cab7355

File tree

1 file changed

+14
-14
lines changed

1 file changed

+14
-14
lines changed

Diff for: src/llama.cpp

+14-14
Original file line numberDiff line numberDiff line change
@@ -16676,7 +16676,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1667616676
}
1667716677
else if (name.find("attn_k.weight") != std::string::npos) {
1667816678
if (qs.model.hparams.n_expert >= 8) new_type = GGML_TYPE_Q6_K;
16679-
else if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q5_K;
16679+
else if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q4_K;
1668016680
else new_type = GGML_TYPE_IQ4_XS;
1668116681
}
1668216682
else if (name.find("attn_v.weight") != std::string::npos) {
@@ -16712,7 +16712,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1671216712
}
1671316713
else if (name.find("attn_k.weight") != std::string::npos) {
1671416714
if (qs.model.hparams.n_expert >= 8) new_type = GGML_TYPE_Q6_K;
16715-
else if (qs.model.hparams.n_gqa() >= 2) new_type = GGML_TYPE_Q5_K;
16715+
else if (qs.model.hparams.n_gqa() >= 8) new_type = GGML_TYPE_Q4_K;
1671616716
else new_type = GGML_TYPE_IQ4_XS;
1671716717
}
1671816718
else if (name.find("attn_v.weight") != std::string::npos) {
@@ -16761,7 +16761,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1676116761
}
1676216762
else if (name.find("attn_k.weight") != std::string::npos) {
1676316763
if (qs.model.hparams.n_expert >= 8) new_type = GGML_TYPE_Q6_K;
16764-
else if (qs.model.hparams.n_gqa() >= 2) new_type = GGML_TYPE_Q5_K;
16764+
else if (qs.model.hparams.n_gqa() >= 8) new_type = GGML_TYPE_Q4_K;
1676516765
else new_type = GGML_TYPE_IQ4_XS;
1676616766
}
1676716767
else if (name.find("attn_v.weight") != std::string::npos) {
@@ -16798,7 +16798,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1679816798
}
1679916799
else if (name.find("attn_k.weight") != std::string::npos) {
1680016800
if (qs.model.hparams.n_expert >= 8) new_type = GGML_TYPE_Q6_K;
16801-
else if (qs.model.hparams.n_gqa() >= 2) new_type = GGML_TYPE_Q5_K;
16801+
else if (qs.model.hparams.n_gqa() >= 8) new_type = GGML_TYPE_Q4_K;
1680216802
else new_type = GGML_TYPE_IQ4_XS;
1680316803
}
1680416804
else if (name.find("attn_v.weight") != std::string::npos) {
@@ -16863,8 +16863,8 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1686316863
}
1686416864
else if (name.find("attn_k.weight") != std::string::npos) {
1686516865
if (qs.model.hparams.n_expert >= 8) new_type = GGML_TYPE_Q6_K;
16866-
else if (qs.model.hparams.n_gqa() >= 8) new_type = GGML_TYPE_Q5_K;
16867-
else new_type = GGML_TYPE_Q4_K;
16866+
else if (qs.model.hparams.n_gqa() >= 8) new_type = GGML_TYPE_Q4_K;
16867+
else new_type = GGML_TYPE_IQ4_XS;
1686816868
}
1686916869
else if (name.find("attn_v.weight") != std::string::npos) {
1687016870
if (qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q6_K;
@@ -16904,8 +16904,8 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1690416904
}
1690516905
else if (name.find("attn_v.weight") != std::string::npos) {
1690616906
if (qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q6_K;
16907-
else if (qs.model.hparams.n_gqa() >= 2) new_type = GGML_TYPE_Q5_K;
16908-
else new_type = GGML_TYPE_Q5_K;
16907+
else if (qs.model.hparams.n_gqa() >= 8) new_type = GGML_TYPE_Q4_K;
16908+
else new_type = GGML_TYPE_IQ4_XS;
1690916909
++qs.i_attention_wv;
1691016910
}
1691116911
else if (name.find("attn_output.weight") != std::string::npos) {
@@ -16949,8 +16949,8 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1694916949
}
1695016950
else if (name.find("attn_k.weight") != std::string::npos) {
1695116951
if (qs.model.hparams.n_expert >= 8) new_type = GGML_TYPE_Q6_K;
16952-
else if (qs.model.hparams.n_gqa() >= 8) new_type = GGML_TYPE_Q5_K;
16953-
else new_type = GGML_TYPE_Q4_K;
16952+
else if (qs.model.hparams.n_gqa() >= 8) new_type = GGML_TYPE_Q4_K;
16953+
else new_type = GGML_TYPE_IQ4_XS;
1695416954
}
1695516955
else if (name.find("attn_v.weight") != std::string::npos) {
1695616956
if (qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q6_K;
@@ -16999,8 +16999,8 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1699916999
}
1700017000
else if (name.find("attn_k.weight") != std::string::npos) {
1700117001
if (qs.model.hparams.n_expert >= 8) new_type = GGML_TYPE_Q6_K;
17002-
else if (qs.model.hparams.n_gqa() >= 8) new_type = GGML_TYPE_Q5_K;
17003-
else new_type = GGML_TYPE_Q4_K;
17002+
else if (qs.model.hparams.n_gqa() >= 8) new_type = GGML_TYPE_Q4_K;
17003+
else new_type = GGML_TYPE_IQ4_XS;
1700417004
}
1700517005
else if (name.find("attn_v.weight") != std::string::npos) {
1700617006
if (qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q6_K;
@@ -17049,8 +17049,8 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1704917049
}
1705017050
else if (name.find("attn_k.weight") != std::string::npos) {
1705117051
if (qs.model.hparams.n_expert >= 8) new_type = GGML_TYPE_Q6_K;
17052-
else if (qs.model.hparams.n_gqa() >= 8) new_type = GGML_TYPE_Q5_K;
17053-
else new_type = GGML_TYPE_Q4_K;
17052+
else if (qs.model.hparams.n_gqa() >= 8) new_type = GGML_TYPE_Q4_K;
17053+
else new_type = GGML_TYPE_IQ4_XS;
1705417054
}
1705517055
else if (name.find("attn_v.weight") != std::string::npos) {
1705617056
if (qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q6_K;

0 commit comments

Comments
 (0)