Skip to content

Commit 83ed554

Browse files
committed
feat: Sync with llama.cpp
Add `no_perf` field to `llama_context_params` to optionally disable performance timing measurements.
1 parent 0580cf2 commit 83ed554

File tree

1 file changed

+3
-0
lines changed

1 file changed

+3
-0
lines changed

llama_cpp/llama_cpp.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -782,6 +782,7 @@ class llama_context_params(ctypes.Structure):
782782
embeddings (bool): if true, extract embeddings (together with logits)
783783
offload_kqv (bool): whether to offload the KQV ops (including the KV cache) to GPU
784784
flash_attn (bool): whether to use flash attention
785+
no_perf (bool): whether to measure performance timings
785786
abort_callback (ggml_abort_callback): abort callback if it returns true, execution of llama_decode() will be aborted
786787
abort_callback_data (ctypes.ctypes.c_void_p): data for abort_callback
787788
"""
@@ -812,6 +813,7 @@ class llama_context_params(ctypes.Structure):
812813
embeddings: bool
813814
offload_kqv: bool
814815
flash_attn: bool
816+
no_perf: bool
815817
abort_callback: Callable[[ctypes.c_void_p], bool]
816818
abort_callback_data: ctypes.c_void_p
817819

@@ -841,6 +843,7 @@ class llama_context_params(ctypes.Structure):
841843
("embeddings", ctypes.c_bool),
842844
("offload_kqv", ctypes.c_bool),
843845
("flash_attn", ctypes.c_bool),
846+
("no_perf", ctypes.c_bool),
844847
("abort_callback", ggml_abort_callback),
845848
("abort_callback_data", ctypes.c_void_p),
846849
]

0 commit comments

Comments
 (0)