Merge branch 'main' into patch-1

ExtReMLapin · web-flow · commit 9fb809f8d79d · 2024-08-01T06:49:19.000+02:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.2.85]
+
+- feat: Update llama.cpp to ggerganov/llama.cpp@398ede5efeb07b9adf9fbda7ea63f630d476a792
+- fix: Missing LoRA adapter after API change by @shamitv in #1630
+- fix(docker): Update Dockerfile BLAS options by @olivierdebauche in #1632
+- fix(docker): Fix GGML_CUDA param by @olivierdebauche in #1633
+- fix(docker): Update Dockerfile build options from `LLAMA_` to `GGML_` by @olivierdebauche in #1634
+- feat: FreeBSD compatibility by @yurivict in #1635
+
 ## [0.2.84]
 
 - feat: Update llama.cpp to ggerganov/llama.cpp@4730faca618ff9cee0780580145e3cbe86f24876
diff --git a/docker/cuda_simple/Dockerfile b/docker/cuda_simple/Dockerfile
@@ -15,13 +15,13 @@ COPY . .
 
 # setting build related env vars
 ENV CUDA_DOCKER_ARCH=all
-ENV LLAMA_CUBLAS=1
+ENV GGML_CUDA=1
 
 # Install depencencies
 RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings starlette-context
 
 # Install llama-cpp-python (build with cuda)
-RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python
+RUN CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python
 
 # Run the server
 CMD python3 -m llama_cpp.server
diff --git a/docker/open_llama/Dockerfile b/docker/open_llama/Dockerfile
@@ -20,13 +20,13 @@ RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fa
 
 # Perform the conditional installations based on the image
 RUN echo "Image: ${IMAGE}" && \
-    if [ "${IMAGE}" = "python:3-slim-bullseye" ] ; then \
+    if [ "${IMAGE}" = "python:3-slim-bookworm" ] ; then \
     echo "OpenBLAS install:" && \
     apt-get install -y --no-install-recommends libopenblas-dev && \
-    LLAMA_OPENBLAS=1 pip install llama-cpp-python --verbose; \
+    CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" pip install llama-cpp-python --verbose; \
 else \
     echo "CuBLAS install:" && \
-    LLAMA_CUBLAS=1 pip install llama-cpp-python --verbose; \
+    CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python --verbose; \
 fi
 
 # Clean up apt cache
diff --git a/docker/openblas_simple/Dockerfile b/docker/openblas_simple/Dockerfile
@@ -12,7 +12,7 @@ RUN apt update && apt install -y libopenblas-dev ninja-build build-essential pkg
     
 RUN python -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings starlette-context
 
-RUN CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" pip install llama_cpp_python --verbose
+RUN CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" pip install llama_cpp_python --verbose
 
 # Run the server
 CMD python3 -m llama_cpp.server
diff --git a/llama_cpp/__init__.py b/llama_cpp/__init__.py
@@ -1,4 +1,4 @@
 from .llama_cpp import *
 from .llama import *
 
-__version__ = "0.2.84"
+__version__ = "0.2.85"
diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
@@ -2083,11 +2083,14 @@ def pooling_type(self) -> str:
 
     def close(self) -> None:
         """Explicitly free the model from memory."""
-        self._stack.close()
+        if hasattr(self,'_stack'):
+            if self._stack is not None:
+                self._stack.close()
 
     def __del__(self) -> None:
-        if self._lora_adapter is not None:
-            llama_cpp.llama_lora_adapter_free(self._lora_adapter)
+        if hasattr(self,'_lora_adapter'):
+            if self._lora_adapter is not None:
+                llama_cpp.llama_lora_adapter_free(self._lora_adapter)
         self.close()
 
     @staticmethod
diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py
@@ -28,7 +28,7 @@ def _load_shared_library(lib_base_name: str):
     # for llamacpp) and "llama" (default name for this repo)
     _lib_paths: List[pathlib.Path] = []
     # Determine the file extension based on the platform
-    if sys.platform.startswith("linux"):
+    if sys.platform.startswith("linux") or sys.platform.startswith("freebsd"):
         _lib_paths += [
             _base_path / f"lib{lib_base_name}.so",
         ]
diff --git a/vendor/llama.cpp b/vendor/llama.cpp
@@ -1 +1 @@
-Subproject commit 4730faca618ff9cee0780580145e3cbe86f24876
+Subproject commit 398ede5efeb07b9adf9fbda7ea63f630d476a792

Original file line number	Diff line number	Diff line change
`@@ -28,7 +28,7 @@ def _load_shared_library(lib_base_name: str):`
`28`	`28`	`# for llamacpp) and "llama" (default name for this repo)`
`29`	`29`	`_lib_paths: List[pathlib.Path] = []`
`30`	`30`	`# Determine the file extension based on the platform`
`31`		`- if sys.platform.startswith("linux"):`
	`31`	`+ if sys.platform.startswith("linux") or sys.platform.startswith("freebsd"):`
`32`	`32`	`_lib_paths += [`
`33`	`33`	`_base_path / f"lib{lib_base_name}.so",`
`34`	`34`	`]`