Skip to content

Commit 3c7501b

Browse files
authored
fix: Llama.close didn't free lora adapter (#1679)
1 parent 78e35c4 commit 3c7501b

File tree

1 file changed

+11
-8
lines changed

1 file changed

+11
-8
lines changed

llama_cpp/llama.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ def __init__(
198198
A Llama instance.
199199
"""
200200
self.verbose = verbose
201+
self._stack = contextlib.ExitStack()
201202

202203
set_verbose(verbose)
203204

@@ -365,8 +366,6 @@ def __init__(
365366
if not os.path.exists(model_path):
366367
raise ValueError(f"Model path does not exist: {model_path}")
367368

368-
self._stack = contextlib.ExitStack()
369-
370369
self._model = self._stack.enter_context(
371370
contextlib.closing(
372371
_LlamaModel(
@@ -420,6 +419,15 @@ def __init__(
420419
raise RuntimeError(
421420
f"Failed to initialize LoRA adapter from lora path: {self.lora_path}"
422421
)
422+
423+
def free_lora_adapter():
424+
if self._lora_adapter is None:
425+
return
426+
llama_cpp.llama_lora_adapter_free(self._lora_adapter)
427+
self._lora_adapter = None
428+
429+
self._stack.callback(free_lora_adapter)
430+
423431
assert self._ctx.ctx is not None
424432
if llama_cpp.llama_lora_adapter_set(
425433
self._ctx.ctx, self._lora_adapter, self.lora_scale
@@ -2085,14 +2093,9 @@ def pooling_type(self) -> str:
20852093

20862094
def close(self) -> None:
20872095
"""Explicitly free the model from memory."""
2088-
if hasattr(self,'_stack'):
2089-
if self._stack is not None:
2090-
self._stack.close()
2096+
self._stack.close()
20912097

20922098
def __del__(self) -> None:
2093-
if hasattr(self,'_lora_adapter'):
2094-
if self._lora_adapter is not None:
2095-
llama_cpp.llama_lora_adapter_free(self._lora_adapter)
20962099
self.close()
20972100

20982101
@staticmethod

0 commit comments

Comments
 (0)