xhedit
diff --git a/‎.github/dependabot.yml
Lines changed: 6 additions & 2 deletions b/‎.github/dependabot.yml
Lines changed: 6 additions & 2 deletions
diff --git a/‎.github/workflows/build-and-release.yaml
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/build-and-release.yaml
Lines changed: 2 additions & 2 deletions
diff --git a/‎CHANGELOG.md
Lines changed: 53 additions & 1 deletion b/‎CHANGELOG.md
Lines changed: 53 additions & 1 deletion
diff --git a/‎Makefile
Lines changed: 1 addition & 1 deletion b/‎Makefile
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md
Lines changed: 3 additions & 3 deletions b/‎README.md
Lines changed: 3 additions & 3 deletions
diff --git a/‎llama_cpp/__init__.py
Lines changed: 1 addition & 1 deletion b/‎llama_cpp/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎llama_cpp/_internals.py
Lines changed: 7 additions & 5 deletions b/‎llama_cpp/_internals.py
Lines changed: 7 additions & 5 deletions
diff --git a/‎llama_cpp/_utils.py
Lines changed: 11 additions & 14 deletions b/‎llama_cpp/_utils.py
Lines changed: 11 additions & 14 deletions
@@ -8,8 +8,12 @@ updates:
   - package-ecosystem: "pip" # See documentation for possible values
     directory: "/" # Location of package manifests
     schedule:
-      interval: "weekly"
+      interval: "daily"
   - package-ecosystem: "github-actions"
     directory: "/"
     schedule:
-      interval: "weekly"    
+      interval: "daily"
+  - package-ecosystem: "docker"
+    directory: "/"
+    schedule:
+      interval: "daily"   
@@ -29,7 +29,7 @@ jobs:
           python -m pip install -e .[all]
 
       - name: Build wheels
-        uses: pypa/cibuildwheel@v2.17.0
+        uses: pypa/cibuildwheel@v2.18.0
         env:
           # disable repair
           CIBW_REPAIR_WHEEL_COMMAND: ""
@@ -56,7 +56,7 @@ jobs:
           platforms: linux/arm64
 
       - name: Build wheels
-        uses: pypa/cibuildwheel@v2.17.0
+        uses: pypa/cibuildwheel@v2.18.0
         env:
           CIBW_SKIP: "*musllinux* pp*"
           CIBW_REPAIR_WHEEL_COMMAND: ""
 
@@ -7,9 +7,61 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.2.75]
+
+- feat: Update llama.cpp to ggerganov/llama.cpp@13ad16af1231ab2d245d35df3295bcfa23de1305
+- fix: segfault for models without eos / bos tokens by @abetlen in d99a6ba607a4885fb00e63e967964aa41bdbbbcb
+- feat: add MinTokensLogitProcessor and min_tokens argument to server by @twaka in #1333
+- misc: Remove unnecessary metadata lookups by @CISC in #1448
+
+## [0.2.74]
+
+- feat: Update llama.cpp to ggerganov/llama.cpp@b228aba91ac2cd9eb90e9d423ba1d0d20e0117e2
+- fix: Enable CUDA backend for llava by @abetlen in 7f59856fa6f3e23f07e12fc15aeb9359dc6c3bb4
+- docs: Fix typo in README.md by @yupbank in #1444
+
+## [0.2.73]
+
+- feat: Update llama.cpp to ggerganov/llama.cpp@25c6e82e7a1ad25a42b0894e87d9b5c557409516
+- fix: Clear kv cache at beginning of image chat formats to avoid bug when image is evaluated first by @abetlen in ac55d0a175115d1e719672ce1cb1bec776c738b1
+
+## [0.2.72]
+
+- fix(security): Remote Code Execution by Server-Side Template Injection in Model Metadata by @retr0reg in b454f40a9a1787b2b5659cd2cb00819d983185df
+- fix(security): Update remaining jinja chat templates to use immutable sandbox by @CISC in #1441
+
+## [0.2.71]
+
+- feat: Update llama.cpp to ggerganov/llama.cpp@911b3900dded9a1cfe0f0e41b82c7a29baf3a217
+- fix: Make leading bos_token optional for image chat formats, fix nanollava system message by @abetlen in 77122638b4153e31d9f277b3d905c2900b536632
+- fix: free last image embed in llava chat handler by @abetlen in 3757328b703b2cd32dcbd5853271e3a8c8599fe7
+
+## [0.2.70]
+
+- feat: Update llama.cpp to ggerganov/llama.cpp@c0e6fbf8c380718102bd25fcb8d2e55f8f9480d1
+- feat: fill-in-middle support by @CISC in #1386
+- fix: adding missing args in create_completion for functionary chat handler by @skalade in #1430
+- docs: update README.md @eltociear in #1432
+- fix: chat_format log where auto-detected format prints None by @balvisio in #1434
+- feat(server): Add support for setting root_path by @abetlen in 0318702cdc860999ee70f277425edbbfe0e60419
+- feat(ci): Add docker checks and check deps more frequently by @Smartappli in #1426
+- fix: detokenization case where first token does not start with a leading space by @noamgat in #1375
+- feat: Implement streaming for Functionary v2 + Bug fixes by @jeffrey-fong in #1419
+- fix: Use memmove to copy str_value kv_override by @abetlen in 9f7a85571ae80d3b6ddbd3e1bae407b9f1e3448a
+- feat(server): Remove temperature bounds checks for server by @abetlen in 0a454bebe67d12a446981eb16028c168ca5faa81
+- fix(server): Propagate flash_attn to model load by @dthuerck in #1424
+
+## [0.2.69]
+
+- feat: Update llama.cpp to ggerganov/llama.cpp@6ecf3189e00a1e8e737a78b6d10e1d7006e050a2
+- feat: Add llama-3-vision-alpha chat format by @abetlen in 31b1d95a6c19f5b615a3286069f181a415f872e8
+- fix: Change default verbose value of verbose in image chat format handlers to True to match Llama by @abetlen in 4f01c452b6c738dc56eacac3758119b12c57ea94
+- fix: Suppress all logs when verbose=False, use hardcoded fileno's to work in colab notebooks by @abetlen in f116175a5a7c84569c88cad231855c1e6e59ff6e
+- fix: UTF-8 handling with grammars by @jsoma in #1415
+
 ## [0.2.68]
 
-- feat: Update llama.cpp to ggerganov/llama.cpp@
+- feat: Update llama.cpp to ggerganov/llama.cpp@77e15bec6217a39be59b9cc83d6b9afb6b0d8167
 - feat: Add option to enable flash_attn to Lllama params and ModelSettings by @abetlen in 22d77eefd2edaf0148f53374d0cac74d0e25d06e
 - fix(ci): Fix build-and-release.yaml by @Smartappli in #1413
 
 
@@ -16,7 +16,7 @@ build.debug:
 	CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Debug" python3 -m pip install --verbose --config-settings=cmake.verbose=true --config-settings=logging.level=INFO --config-settings=install.strip=false  --editable .
 
 build.cuda:
-	CMAKE_ARGS="-DLLAMA_CUBLAS=on" python3 -m pip install --verbose -e .
+	CMAKE_ARGS="-DLLAMA_CUDA=on" python3 -m pip install --verbose -e .
 
 build.opencl:
 	CMAKE_ARGS="-DLLAMA_CLBLAST=on" python3 -m pip install --verbose -e .
 
@@ -516,7 +516,7 @@ chat_handler = Llava15ChatHandler(clip_model_path="path/to/llava/mmproj.bin")
 llm = Llama(
   model_path="./path/to/llava/llama-model.gguf",
   chat_handler=chat_handler,
-  n_ctx=2048, # n_ctx should be increased to accomodate the image embedding
+  n_ctx=2048, # n_ctx should be increased to accommodate the image embedding
 )
 llm.create_chat_completion(
     messages = [
@@ -547,10 +547,10 @@ llm = Llama.from_pretrained(
   repo_id="vikhyatk/moondream2",
   filename="*text-model*",
   chat_handler=chat_handler,
-  n_ctx=2048, # n_ctx should be increased to accomodate the image embedding
+  n_ctx=2048, # n_ctx should be increased to accommodate the image embedding
 )
 
-respoonse = llm.create_chat_completion(
+response = llm.create_chat_completion(
     messages = [
         {
             "role": "user",
 
@@ -1,4 +1,4 @@
 from .llama_cpp import *
 from .llama import *
 
-__version__ = "0.2.68a1"
+__version__ = "0.2.75"
@@ -15,6 +15,7 @@
 
 from .llama_types import *
 from .llama_grammar import LlamaGrammar
+from ._utils import suppress_stdout_stderr
 
 import llama_cpp.llama_cpp as llama_cpp
 
@@ -47,9 +48,10 @@ def __init__(
         if not os.path.exists(path_model):
             raise ValueError(f"Model path does not exist: {path_model}")
 
-        self.model = llama_cpp.llama_load_model_from_file(
-            self.path_model.encode("utf-8"), self.params
-        )
+        with suppress_stdout_stderr(disable=verbose):
+            self.model = llama_cpp.llama_load_model_from_file(
+                self.path_model.encode("utf-8"), self.params
+            )
 
         if self.model is None:
             raise ValueError(f"Failed to load model from file: {path_model}")
@@ -201,7 +203,7 @@ def detokenize(self, tokens: List[int], special: bool = False) -> bytes:
         # NOTE: Llama1 models automatically added a space at the start of the prompt
         # this line removes a leading space if the first token is a beginning of sentence token
         return (
-            output[1:] if len(tokens) > 0 and tokens[0] == self.token_bos() else output
+            output[1:] if len(tokens) > 0 and tokens[0] == self.token_bos() and output[0:1] == b' ' else output
         )
 
     # Extra
@@ -810,4 +812,4 @@ def sample(
     def accept(self, ctx_main: _LlamaContext, id: int, apply_grammar: bool):
         if apply_grammar and self.grammar is not None:
             ctx_main.grammar_accept_token(self.grammar, id)
-        self.prev.append(id)
+        self.prev.append(id)
@@ -1,13 +1,15 @@
 import os
 import sys
 
-import sys
 from typing import Any, Dict
 
 # Avoid "LookupError: unknown encoding: ascii" when open() called in a destructor
 outnull_file = open(os.devnull, "w")
 errnull_file = open(os.devnull, "w")
 
+STDOUT_FILENO = 1
+STDERR_FILENO = 2
+
 class suppress_stdout_stderr(object):
     # NOTE: these must be "saved" here to avoid exceptions when using
     #       this context manager inside of a __del__ method
@@ -22,12 +24,8 @@ def __enter__(self):
         if self.disable:
             return self
 
-        # Check if sys.stdout and sys.stderr have fileno method
-        if not hasattr(self.sys.stdout, 'fileno') or not hasattr(self.sys.stderr, 'fileno'):
-            return self  # Return the instance without making changes
-
-        self.old_stdout_fileno_undup = self.sys.stdout.fileno()
-        self.old_stderr_fileno_undup = self.sys.stderr.fileno()
+        self.old_stdout_fileno_undup = STDOUT_FILENO
+        self.old_stderr_fileno_undup = STDERR_FILENO
 
         self.old_stdout_fileno = self.os.dup(self.old_stdout_fileno_undup)
         self.old_stderr_fileno = self.os.dup(self.old_stderr_fileno_undup)
@@ -47,15 +45,14 @@ def __exit__(self, *_):
             return
 
         # Check if sys.stdout and sys.stderr have fileno method
-        if hasattr(self.sys.stdout, 'fileno') and hasattr(self.sys.stderr, 'fileno'):
-            self.sys.stdout = self.old_stdout
-            self.sys.stderr = self.old_stderr
+        self.sys.stdout = self.old_stdout
+        self.sys.stderr = self.old_stderr
 
-            self.os.dup2(self.old_stdout_fileno, self.old_stdout_fileno_undup)
-            self.os.dup2(self.old_stderr_fileno, self.old_stderr_fileno_undup)
+        self.os.dup2(self.old_stdout_fileno, self.old_stdout_fileno_undup)
+        self.os.dup2(self.old_stderr_fileno, self.old_stderr_fileno_undup)
 
-            self.os.close(self.old_stdout_fileno)
-            self.os.close(self.old_stderr_fileno)
+        self.os.close(self.old_stdout_fileno)
+        self.os.close(self.old_stderr_fileno)
 
 
 class MetaSingleton(type):