Skip to content

Commit 2d32883

Browse files
committed
Quant strategies for IKL quants & custom quants
plus typo IQ3_KS ftype NXS_Llama.cpp Alpha 0.06 - b5506
1 parent a17aab4 commit 2d32883

File tree

4 files changed

+527
-79
lines changed

4 files changed

+527
-79
lines changed

include/llama.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,14 @@ extern "C" {
426426
enum llama_ftype ftype; // quantize to this llama_ftype
427427
enum ggml_type output_tensor_type; // output tensor type
428428
enum ggml_type token_embedding_type; // token embeddings tensor type
429+
enum ggml_type attn_q_type; // attention query tensor type
430+
enum ggml_type attn_k_type; // attention key tensor type
431+
enum ggml_type attn_v_type; // attention value tensor type
432+
enum ggml_type attn_qkv_type; // attention query-key-value tensor type
433+
enum ggml_type attn_output_type; // attention output tensor type
434+
enum ggml_type ffn_gate_type; // feedforward network gate type
435+
enum ggml_type ffn_down_type; // feedforward network down type
436+
enum ggml_type ffn_up_type; // feedforward network up type
429437
bool allow_requantize; // allow quantizing non-f32/f16 tensors
430438
bool quantize_output_tensor; // quantize output.weight
431439
bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored

src/llama-model-loader.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ static std::string llama_model_ftype_name(llama_ftype ftype) {
6868
case LLAMA_FTYPE_MOSTLY_IQ2_KT: return "IQ2_KT - 2.125 bpw";
6969
case LLAMA_FTYPE_MOSTLY_IQ2_K: return "IQ2_K - 2.375 bpw";
7070
case LLAMA_FTYPE_MOSTLY_IQ3_K: return "IQ3_K - 3.4325 bpw";
71-
case LLAMA_FTYPE_MOSTLY_IQ3_KS: return "IQ3_K - 3.25 bpw";
71+
case LLAMA_FTYPE_MOSTLY_IQ3_KS: return "IQ3_KS - 3.25 bpw";
7272
case LLAMA_FTYPE_MOSTLY_IQ3_KL: return "IQ3_KL - 4 bpw";
7373
case LLAMA_FTYPE_MOSTLY_IQ3_KT: return "IQ3_KT - 3.125 bpw";
7474
case LLAMA_FTYPE_MOSTLY_IQ4_KT: return "IQ4_KT - 4.0 bpw";

0 commit comments

Comments
 (0)