@@ -26,13 +26,14 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
26
26
{ " IQ2_S" , LLAMA_FTYPE_MOSTLY_IQ2_S, " 2.5 bpw quantization" , },
27
27
{ " IQ2_M" , LLAMA_FTYPE_MOSTLY_IQ2_M, " 2.7 bpw quantization" , },
28
28
{ " IQ1_S" , LLAMA_FTYPE_MOSTLY_IQ1_S, " 1.56 bpw quantization" , },
29
+ { " IQ1_XS" , LLAMA_FTYPE_MOSTLY_IQ1_XS, " 1.6-1.7 bpw quantization mix" , },
29
30
{ " Q2_K" , LLAMA_FTYPE_MOSTLY_Q2_K, " 2.63G, +0.6717 ppl @ LLaMA-v1-7B" , },
30
31
{ " Q2_K_S" , LLAMA_FTYPE_MOSTLY_Q2_K_S, " 2.16G, +9.0634 ppl @ LLaMA-v1-7B" , },
31
32
{ " IQ3_XXS" ,LLAMA_FTYPE_MOSTLY_IQ3_XXS," 3.06 bpw quantization" , },
32
33
{ " IQ3_S" , LLAMA_FTYPE_MOSTLY_IQ3_S, " 3.44 bpw quantization" , },
33
34
{ " IQ3_M" , LLAMA_FTYPE_MOSTLY_IQ3_M, " 3.66 bpw quantization mix" , },
34
- { " Q3_K" , LLAMA_FTYPE_MOSTLY_Q3_K_M, " alias for Q3_K_M" },
35
- { " IQ3_XS" , LLAMA_FTYPE_MOSTLY_IQ3_XS, " 3.3 bpw quantization" , },
35
+ { " Q3_K" , LLAMA_FTYPE_MOSTLY_Q3_K_M, " alias for Q3_K_M" , },
36
+ { " IQ3_XS" , LLAMA_FTYPE_MOSTLY_IQ3_XS, " 3.3 bpw quantization" , },
36
37
{ " Q3_K_S" , LLAMA_FTYPE_MOSTLY_Q3_K_S, " 2.75G, +0.5551 ppl @ LLaMA-v1-7B" , },
37
38
{ " Q3_K_M" , LLAMA_FTYPE_MOSTLY_Q3_K_M, " 3.07G, +0.2496 ppl @ LLaMA-v1-7B" , },
38
39
{ " Q3_K_L" , LLAMA_FTYPE_MOSTLY_Q3_K_L, " 3.35G, +0.1764 ppl @ LLaMA-v1-7B" , },
0 commit comments