@@ -16,41 +16,41 @@ struct quant_option {
16
16
};
17
17
18
18
static const std::vector<struct quant_option > QUANT_OPTIONS = {
19
- { " Q4_0" , LLAMA_FTYPE_MOSTLY_Q4_0, " 3.56G , +0.2166 ppl @ LLaMA-v1-7B " , },
20
- { " Q4_1" , LLAMA_FTYPE_MOSTLY_Q4_1, " 3.90G , +0.1585 ppl @ LLaMA-v1-7B " , },
21
- { " Q5_0" , LLAMA_FTYPE_MOSTLY_Q5_0, " 4.33G , +0.0683 ppl @ LLaMA-v1-7B " , },
22
- { " Q5_1" , LLAMA_FTYPE_MOSTLY_Q5_1, " 4.70G , +0.0349 ppl @ LLaMA-v1-7B " , },
19
+ { " Q4_0" , LLAMA_FTYPE_MOSTLY_Q4_0, " 4.34G , +0.4685 ppl @ Llama-3-8B " , },
20
+ { " Q4_1" , LLAMA_FTYPE_MOSTLY_Q4_1, " 4.78G , +0.4511 ppl @ Llama-3-8B " , },
21
+ { " Q5_0" , LLAMA_FTYPE_MOSTLY_Q5_0, " 5.21G , +0.1316 ppl @ Llama-3-8B " , },
22
+ { " Q5_1" , LLAMA_FTYPE_MOSTLY_Q5_1, " 5.65G , +0.1062 ppl @ Llama-3-8B " , },
23
23
{ " IQ2_XXS" ,LLAMA_FTYPE_MOSTLY_IQ2_XXS," 2.06 bpw quantization" , },
24
24
{ " IQ2_XS" , LLAMA_FTYPE_MOSTLY_IQ2_XS, " 2.31 bpw quantization" , },
25
25
{ " IQ2_S" , LLAMA_FTYPE_MOSTLY_IQ2_S, " 2.5 bpw quantization" , },
26
26
{ " IQ2_M" , LLAMA_FTYPE_MOSTLY_IQ2_M, " 2.7 bpw quantization" , },
27
27
{ " IQ1_S" , LLAMA_FTYPE_MOSTLY_IQ1_S, " 1.56 bpw quantization" , },
28
28
{ " IQ1_M" , LLAMA_FTYPE_MOSTLY_IQ1_M, " 1.75 bpw quantization" , },
29
- { " Q2_K" , LLAMA_FTYPE_MOSTLY_Q2_K, " 2.63G , +0.6717 ppl @ LLaMA-v1-7B " , },
30
- { " Q2_K_S" , LLAMA_FTYPE_MOSTLY_Q2_K_S, " 2.16G , +9.0634 ppl @ LLaMA-v1-7B " , },
29
+ { " Q2_K" , LLAMA_FTYPE_MOSTLY_Q2_K, " 2.96G , +3.5199 ppl @ Llama-3-8B " , },
30
+ { " Q2_K_S" , LLAMA_FTYPE_MOSTLY_Q2_K_S, " 2.96G , +3.1836 ppl @ Llama-3-8B " , },
31
31
{ " IQ3_XXS" ,LLAMA_FTYPE_MOSTLY_IQ3_XXS," 3.06 bpw quantization" , },
32
32
{ " IQ3_S" , LLAMA_FTYPE_MOSTLY_IQ3_S, " 3.44 bpw quantization" , },
33
33
{ " IQ3_M" , LLAMA_FTYPE_MOSTLY_IQ3_M, " 3.66 bpw quantization mix" , },
34
- { " Q3_K" , LLAMA_FTYPE_MOSTLY_Q3_K_M, " alias for Q3_K_M" },
35
- { " IQ3_XS" , LLAMA_FTYPE_MOSTLY_IQ3_XS, " 3.3 bpw quantization" , },
36
- { " Q3_K_S" , LLAMA_FTYPE_MOSTLY_Q3_K_S, " 2.75G , +0.5551 ppl @ LLaMA-v1-7B " , },
37
- { " Q3_K_M" , LLAMA_FTYPE_MOSTLY_Q3_K_M, " 3.07G , +0.2496 ppl @ LLaMA-v1-7B " , },
38
- { " Q3_K_L" , LLAMA_FTYPE_MOSTLY_Q3_K_L, " 3.35G , +0.1764 ppl @ LLaMA-v1-7B " , },
34
+ { " Q3_K" , LLAMA_FTYPE_MOSTLY_Q3_K_M, " alias for Q3_K_M" },
35
+ { " IQ3_XS" , LLAMA_FTYPE_MOSTLY_IQ3_XS, " 3.3 bpw quantization" , },
36
+ { " Q3_K_S" , LLAMA_FTYPE_MOSTLY_Q3_K_S, " 3.41G , +1.6321 ppl @ Llama-3-8B " , },
37
+ { " Q3_K_M" , LLAMA_FTYPE_MOSTLY_Q3_K_M, " 3.74G , +0.6569 ppl @ Llama-3-8B " , },
38
+ { " Q3_K_L" , LLAMA_FTYPE_MOSTLY_Q3_K_L, " 4.03G , +0.5562 ppl @ Llama-3-8B " , },
39
39
{ " IQ4_NL" , LLAMA_FTYPE_MOSTLY_IQ4_NL, " 4.50 bpw non-linear quantization" , },
40
40
{ " IQ4_XS" , LLAMA_FTYPE_MOSTLY_IQ4_XS, " 4.25 bpw non-linear quantization" , },
41
- { " Q4_K" , LLAMA_FTYPE_MOSTLY_Q4_K_M, " alias for Q4_K_M" , },
42
- { " Q4_K_S" , LLAMA_FTYPE_MOSTLY_Q4_K_S, " 3.59G , +0.0992 ppl @ LLaMA-v1-7B " , },
43
- { " Q4_K_M" , LLAMA_FTYPE_MOSTLY_Q4_K_M, " 3.80G , +0.0532 ppl @ LLaMA-v1-7B " , },
44
- { " Q5_K" , LLAMA_FTYPE_MOSTLY_Q5_K_M, " alias for Q5_K_M" , },
45
- { " Q5_K_S" , LLAMA_FTYPE_MOSTLY_Q5_K_S, " 4.33G , +0.0400 ppl @ LLaMA-v1-7B " , },
46
- { " Q5_K_M" , LLAMA_FTYPE_MOSTLY_Q5_K_M, " 4.45G , +0.0122 ppl @ LLaMA-v1-7B " , },
47
- { " Q6_K" , LLAMA_FTYPE_MOSTLY_Q6_K, " 5.15G , +0.0008 ppl @ LLaMA-v1-7B " , },
48
- { " Q8_0" , LLAMA_FTYPE_MOSTLY_Q8_0, " 6.70G , +0.0004 ppl @ LLaMA-v1-7B " , },
49
- { " F16" , LLAMA_FTYPE_MOSTLY_F16, " 14.00G, - 0.0020 ppl @ Mistral-7B" , },
50
- { " BF16" , LLAMA_FTYPE_MOSTLY_BF16, " 14.00G, -0.0050 ppl @ Mistral-7B" , },
51
- { " F32" , LLAMA_FTYPE_ALL_F32, " 26.00G @ 7B" , },
41
+ { " Q4_K" , LLAMA_FTYPE_MOSTLY_Q4_K_M, " alias for Q4_K_M" , },
42
+ { " Q4_K_S" , LLAMA_FTYPE_MOSTLY_Q4_K_S, " 4.37G , +0.2689 ppl @ Llama-3-8B " , },
43
+ { " Q4_K_M" , LLAMA_FTYPE_MOSTLY_Q4_K_M, " 4.58G , +0.1754 ppl @ Llama-3-8B " , },
44
+ { " Q5_K" , LLAMA_FTYPE_MOSTLY_Q5_K_M, " alias for Q5_K_M" , },
45
+ { " Q5_K_S" , LLAMA_FTYPE_MOSTLY_Q5_K_S, " 5.21G , +0.1049 ppl @ Llama-3-8B " , },
46
+ { " Q5_K_M" , LLAMA_FTYPE_MOSTLY_Q5_K_M, " 5.33G , +0.0569 ppl @ Llama-3-8B " , },
47
+ { " Q6_K" , LLAMA_FTYPE_MOSTLY_Q6_K, " 6.14G , +0.0217 ppl @ Llama-3-8B " , },
48
+ { " Q8_0" , LLAMA_FTYPE_MOSTLY_Q8_0, " 7.96G , +0.0026 ppl @ Llama-3-8B " , },
49
+ { " F16" , LLAMA_FTYPE_MOSTLY_F16, " 14.00G, + 0.0020 ppl @ Mistral-7B" , },
50
+ { " BF16" , LLAMA_FTYPE_MOSTLY_BF16, " 14.00G, -0.0050 ppl @ Mistral-7B" , },
51
+ { " F32" , LLAMA_FTYPE_ALL_F32, " 26.00G @ 7B" , },
52
52
// Note: Ensure COPY comes after F32 to avoid ftype 0 from matching.
53
- { " COPY" , LLAMA_FTYPE_ALL_F32, " only copy tensors, no quantizing" , },
53
+ { " COPY" , LLAMA_FTYPE_ALL_F32, " only copy tensors, no quantizing" , },
54
54
};
55
55
56
56
static const char * const LLM_KV_QUANTIZE_IMATRIX_FILE = " quantize.imatrix.file" ;
0 commit comments