@@ -15603,7 +15603,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
15603
15603
new_type = GGML_TYPE_Q6_K;
15604
15604
}
15605
15605
else if (ftype == LLAMA_FTYPE_MOSTLY_Q6_K_ML) {
15606
- new_type = GGML_TYPE_Q8_0 ;
15606
+ new_type = GGML_TYPE_Q6_K ;
15607
15607
}
15608
15608
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ5_XSR || ftype == LLAMA_FTYPE_MOSTLY_IQ5_SR || ftype == LLAMA_FTYPE_MOSTLY_IQ5_MR ||
15609
15609
ftype == LLAMA_FTYPE_MOSTLY_IQ5_LR || ftype == LLAMA_FTYPE_MOSTLY_IQ5_BLR) {
@@ -15741,7 +15741,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
15741
15741
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ4_SR || ftype == LLAMA_FTYPE_MOSTLY_IQ4_MR || ftype == LLAMA_FTYPE_MOSTLY_IQ4_LR ||
15742
15742
ftype == LLAMA_FTYPE_MOSTLY_IQ4_BLR) {
15743
15743
if (qs.model.hparams.n_expert >= 8 || qs.model.hparams.n_gqa() >= 12) new_type = GGML_TYPE_Q5_K;
15744
- else new_type = GGML_TYPE_IQ4_XS ;
15744
+ else new_type = GGML_TYPE_Q8_0 ;
15745
15745
}
15746
15746
else if (ftype == LLAMA_FTYPE_MOSTLY_Q4_K_ML) {
15747
15747
if (qs.model.hparams.n_expert >= 8 || qs.model.hparams.n_gqa() >= 12) new_type = GGML_TYPE_Q5_K;
@@ -15753,7 +15753,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
15753
15753
}
15754
15754
else if (ftype == LLAMA_FTYPE_MOSTLY_Q6_K_ML) {
15755
15755
if (qs.model.hparams.n_expert >= 8 || qs.model.hparams.n_gqa() >= 12) new_type = GGML_TYPE_Q8_0;
15756
- else new_type = GGML_TYPE_Q6_K ;
15756
+ else new_type = GGML_TYPE_Q8_0 ;
15757
15757
}
15758
15758
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ5_XSR || ftype == LLAMA_FTYPE_MOSTLY_IQ5_SR || ftype == LLAMA_FTYPE_MOSTLY_IQ5_MR ||
15759
15759
ftype == LLAMA_FTYPE_MOSTLY_IQ5_LR || ftype == LLAMA_FTYPE_MOSTLY_IQ5_BLR) {
0 commit comments