feat: Update llama.cpp

abetlen · abetlen · commit 7f52335c50be · 2024-04-25T21:21:29.000-04:00
diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py
@@ -811,6 +811,7 @@ class llama_context_params(ctypes.Structure):
 #     bool quantize_output_tensor;         // quantize output.weight
 #     bool only_copy;                      // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
 #     bool pure;                           // quantize all tensors to the default type
+#     bool keep_split;                     // quantize to the same number of shards
 #     void * imatrix;                      // pointer to importance matrix data
 #     void * kv_overrides;                 // pointer to vector containing overrides
 # } llama_model_quantize_params;
@@ -826,6 +827,7 @@ class llama_model_quantize_params(ctypes.Structure):
         quantize_output_tensor (bool): quantize output.weight
         only_copy (bool): only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
         pure (bool): quantize all tensors to the default type
+        keep_split (bool): quantize to the same number of shards
         imatrix (ctypes.c_void_p): pointer to importance matrix data
         kv_overrides (ctypes.c_void_p): pointer to vector containing overrides
     """
@@ -839,6 +841,7 @@ class llama_model_quantize_params(ctypes.Structure):
         quantize_output_tensor: bool
         only_copy: bool
         pure: bool
+        keep_split: bool
         imatrix: ctypes.c_void_p
         kv_overrides: ctypes.c_void_p
 
@@ -851,6 +854,7 @@ class llama_model_quantize_params(ctypes.Structure):
         ("quantize_output_tensor", ctypes.c_bool),
         ("only_copy", ctypes.c_bool),
         ("pure", ctypes.c_bool),
+        ("keep_split", ctypes.c_bool),
         ("imatrix", ctypes.c_void_p),
         ("kv_overrides", ctypes.c_void_p),
     ]
diff --git a/vendor/llama.cpp b/vendor/llama.cpp
@@ -1 +1 @@
-Subproject commit 784e11dea1f5ce9638851b2b0dddb107e2a609c8
+Subproject commit 46e12c4692a37bdd31a0432fc5153d7d22bc7f72