Skip to content

Commit 7f52335

Browse files
committed
feat: Update llama.cpp
1 parent 266abfc commit 7f52335

File tree

2 files changed

+5
-1
lines changed

2 files changed

+5
-1
lines changed

llama_cpp/llama_cpp.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -811,6 +811,7 @@ class llama_context_params(ctypes.Structure):
811811
# bool quantize_output_tensor; // quantize output.weight
812812
# bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
813813
# bool pure; // quantize all tensors to the default type
814+
# bool keep_split; // quantize to the same number of shards
814815
# void * imatrix; // pointer to importance matrix data
815816
# void * kv_overrides; // pointer to vector containing overrides
816817
# } llama_model_quantize_params;
@@ -826,6 +827,7 @@ class llama_model_quantize_params(ctypes.Structure):
826827
quantize_output_tensor (bool): quantize output.weight
827828
only_copy (bool): only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
828829
pure (bool): quantize all tensors to the default type
830+
keep_split (bool): quantize to the same number of shards
829831
imatrix (ctypes.c_void_p): pointer to importance matrix data
830832
kv_overrides (ctypes.c_void_p): pointer to vector containing overrides
831833
"""
@@ -839,6 +841,7 @@ class llama_model_quantize_params(ctypes.Structure):
839841
quantize_output_tensor: bool
840842
only_copy: bool
841843
pure: bool
844+
keep_split: bool
842845
imatrix: ctypes.c_void_p
843846
kv_overrides: ctypes.c_void_p
844847

@@ -851,6 +854,7 @@ class llama_model_quantize_params(ctypes.Structure):
851854
("quantize_output_tensor", ctypes.c_bool),
852855
("only_copy", ctypes.c_bool),
853856
("pure", ctypes.c_bool),
857+
("keep_split", ctypes.c_bool),
854858
("imatrix", ctypes.c_void_p),
855859
("kv_overrides", ctypes.c_void_p),
856860
]

vendor/llama.cpp

0 commit comments

Comments
 (0)