Nexesenex
diff --git a/‎include/llama.h
Lines changed: 8 additions & 0 deletions b/‎include/llama.h
Lines changed: 8 additions & 0 deletions
diff --git a/‎src/llama-model-loader.cpp
Lines changed: 1 addition & 1 deletion b/‎src/llama-model-loader.cpp
Lines changed: 1 addition & 1 deletion
@@ -426,6 +426,14 @@ extern "C" {
         enum llama_ftype ftype;               // quantize to this llama_ftype
         enum ggml_type output_tensor_type;    // output tensor type
         enum ggml_type token_embedding_type;  // token embeddings tensor type
+        enum ggml_type attn_q_type;           // attention query tensor type
+        enum ggml_type attn_k_type;           // attention key tensor type
+        enum ggml_type attn_v_type;           // attention value tensor type
+        enum ggml_type attn_qkv_type;         // attention query-key-value tensor type
+        enum ggml_type attn_output_type;      // attention output tensor type
+        enum ggml_type ffn_gate_type;         // feedforward network gate type
+        enum ggml_type ffn_down_type;         // feedforward network down type
+        enum ggml_type ffn_up_type;           // feedforward network up type
         bool allow_requantize;                // allow quantizing non-f32/f16 tensors
         bool quantize_output_tensor;          // quantize output.weight
         bool only_copy;                       // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
 
@@ -68,7 +68,7 @@ static std::string llama_model_ftype_name(llama_ftype ftype) {
         case LLAMA_FTYPE_MOSTLY_IQ2_KT:   return "IQ2_KT - 2.125 bpw";
         case LLAMA_FTYPE_MOSTLY_IQ2_K:    return "IQ2_K - 2.375 bpw";
         case LLAMA_FTYPE_MOSTLY_IQ3_K:    return "IQ3_K - 3.4325 bpw";
-        case LLAMA_FTYPE_MOSTLY_IQ3_KS:   return "IQ3_K - 3.25 bpw";
+        case LLAMA_FTYPE_MOSTLY_IQ3_KS:   return "IQ3_KS - 3.25 bpw";
         case LLAMA_FTYPE_MOSTLY_IQ3_KL:   return "IQ3_KL - 4 bpw";
         case LLAMA_FTYPE_MOSTLY_IQ3_KT:   return "IQ3_KT - 3.125 bpw";
         case LLAMA_FTYPE_MOSTLY_IQ4_KT:   return "IQ4_KT - 4.0 bpw";