@@ -811,6 +811,7 @@ class llama_context_params(ctypes.Structure):
811
811
# bool quantize_output_tensor; // quantize output.weight
812
812
# bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
813
813
# bool pure; // quantize all tensors to the default type
814
+ # bool keep_split; // quantize to the same number of shards
814
815
# void * imatrix; // pointer to importance matrix data
815
816
# void * kv_overrides; // pointer to vector containing overrides
816
817
# } llama_model_quantize_params;
@@ -826,6 +827,7 @@ class llama_model_quantize_params(ctypes.Structure):
826
827
quantize_output_tensor (bool): quantize output.weight
827
828
only_copy (bool): only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
828
829
pure (bool): quantize all tensors to the default type
830
+ keep_split (bool): quantize to the same number of shards
829
831
imatrix (ctypes.c_void_p): pointer to importance matrix data
830
832
kv_overrides (ctypes.c_void_p): pointer to vector containing overrides
831
833
"""
@@ -839,6 +841,7 @@ class llama_model_quantize_params(ctypes.Structure):
839
841
quantize_output_tensor : bool
840
842
only_copy : bool
841
843
pure : bool
844
+ keep_split : bool
842
845
imatrix : ctypes .c_void_p
843
846
kv_overrides : ctypes .c_void_p
844
847
@@ -851,6 +854,7 @@ class llama_model_quantize_params(ctypes.Structure):
851
854
("quantize_output_tensor" , ctypes .c_bool ),
852
855
("only_copy" , ctypes .c_bool ),
853
856
("pure" , ctypes .c_bool ),
857
+ ("keep_split" , ctypes .c_bool ),
854
858
("imatrix" , ctypes .c_void_p ),
855
859
("kv_overrides" , ctypes .c_void_p ),
856
860
]
0 commit comments