Embeddings and Output changes

Nexesenex · Nexesenex · commit d336217739de · 2024-08-05T16:12:26.000+02:00
diff --git a/src/llama.cpp b/src/llama.cpp
@@ -15603,7 +15603,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
                 new_type = GGML_TYPE_Q6_K;
             }
             else if (ftype == LLAMA_FTYPE_MOSTLY_Q6_K_ML) {
-                new_type = GGML_TYPE_Q8_0;
+                new_type = GGML_TYPE_Q6_K;
             }
             else if (ftype == LLAMA_FTYPE_MOSTLY_IQ5_XSR || ftype == LLAMA_FTYPE_MOSTLY_IQ5_SR || ftype == LLAMA_FTYPE_MOSTLY_IQ5_MR ||
                      ftype == LLAMA_FTYPE_MOSTLY_IQ5_LR  || ftype == LLAMA_FTYPE_MOSTLY_IQ5_BLR) {
@@ -15741,7 +15741,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
             else if (ftype == LLAMA_FTYPE_MOSTLY_IQ4_SR || ftype == LLAMA_FTYPE_MOSTLY_IQ4_MR  || ftype == LLAMA_FTYPE_MOSTLY_IQ4_LR  ||
                      ftype == LLAMA_FTYPE_MOSTLY_IQ4_BLR) {
                 if (qs.model.hparams.n_expert >= 8 || qs.model.hparams.n_gqa() >= 12) new_type = GGML_TYPE_Q5_K;
-                else new_type = GGML_TYPE_IQ4_XS;
+                else new_type = GGML_TYPE_Q8_0;
             }
             else if (ftype == LLAMA_FTYPE_MOSTLY_Q4_K_ML) {
                 if (qs.model.hparams.n_expert >= 8 || qs.model.hparams.n_gqa() >= 12) new_type = GGML_TYPE_Q5_K;
@@ -15753,7 +15753,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
             }
             else if (ftype == LLAMA_FTYPE_MOSTLY_Q6_K_ML) {
                 if (qs.model.hparams.n_expert >= 8 || qs.model.hparams.n_gqa() >= 12) new_type = GGML_TYPE_Q8_0;
-                else new_type = GGML_TYPE_Q6_K;
+                else new_type = GGML_TYPE_Q8_0;
             }
             else if (ftype == LLAMA_FTYPE_MOSTLY_IQ5_XSR || ftype == LLAMA_FTYPE_MOSTLY_IQ5_SR || ftype == LLAMA_FTYPE_MOSTLY_IQ5_MR ||
                      ftype == LLAMA_FTYPE_MOSTLY_IQ5_LR  || ftype == LLAMA_FTYPE_MOSTLY_IQ5_BLR) {

Original file line number	Diff line number	Diff line change
`@@ -15603,7 +15603,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n`
`15603`	`15603`	`new_type = GGML_TYPE_Q6_K;`
`15604`	`15604`	`}`
`15605`	`15605`	`else if (ftype == LLAMA_FTYPE_MOSTLY_Q6_K_ML) {`
`15606`		`- new_type = GGML_TYPE_Q8_0;`
	`15606`	`+ new_type = GGML_TYPE_Q6_K;`
`15607`	`15607`	`}`
`15608`	`15608`	`else if (ftype == LLAMA_FTYPE_MOSTLY_IQ5_XSR \|\| ftype == LLAMA_FTYPE_MOSTLY_IQ5_SR \|\| ftype == LLAMA_FTYPE_MOSTLY_IQ5_MR \|\|`
`15609`	`15609`	`ftype == LLAMA_FTYPE_MOSTLY_IQ5_LR \|\| ftype == LLAMA_FTYPE_MOSTLY_IQ5_BLR) {`
`@@ -15741,7 +15741,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n`
`15741`	`15741`	`else if (ftype == LLAMA_FTYPE_MOSTLY_IQ4_SR \|\| ftype == LLAMA_FTYPE_MOSTLY_IQ4_MR \|\| ftype == LLAMA_FTYPE_MOSTLY_IQ4_LR \|\|`
`15742`	`15742`	`ftype == LLAMA_FTYPE_MOSTLY_IQ4_BLR) {`
`15743`	`15743`	`if (qs.model.hparams.n_expert >= 8 \|\| qs.model.hparams.n_gqa() >= 12) new_type = GGML_TYPE_Q5_K;`
`15744`		`- else new_type = GGML_TYPE_IQ4_XS;`
	`15744`	`+ else new_type = GGML_TYPE_Q8_0;`
`15745`	`15745`	`}`
`15746`	`15746`	`else if (ftype == LLAMA_FTYPE_MOSTLY_Q4_K_ML) {`
`15747`	`15747`	`if (qs.model.hparams.n_expert >= 8 \|\| qs.model.hparams.n_gqa() >= 12) new_type = GGML_TYPE_Q5_K;`
`@@ -15753,7 +15753,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n`
`15753`	`15753`	`}`
`15754`	`15754`	`else if (ftype == LLAMA_FTYPE_MOSTLY_Q6_K_ML) {`
`15755`	`15755`	`if (qs.model.hparams.n_expert >= 8 \|\| qs.model.hparams.n_gqa() >= 12) new_type = GGML_TYPE_Q8_0;`
`15756`		`- else new_type = GGML_TYPE_Q6_K;`
	`15756`	`+ else new_type = GGML_TYPE_Q8_0;`
`15757`	`15757`	`}`
`15758`	`15758`	`else if (ftype == LLAMA_FTYPE_MOSTLY_IQ5_XSR \|\| ftype == LLAMA_FTYPE_MOSTLY_IQ5_SR \|\| ftype == LLAMA_FTYPE_MOSTLY_IQ5_MR \|\|`
`15759`	`15759`	`ftype == LLAMA_FTYPE_MOSTLY_IQ5_LR \|\| ftype == LLAMA_FTYPE_MOSTLY_IQ5_BLR) {`