Fix tokenization edge case where llama output does not start with a space

noamgat · web-flow · commit 239200a99cc2 · 2024-04-23T11:06:45.000+03:00
See this notebook: https://colab.research.google.com/drive/1Ooz11nFPk19zyJdMDx42CeesU8aWZMdI#scrollTo=oKpHw5PZ30uC
diff --git a/llama_cpp/_internals.py b/llama_cpp/_internals.py
@@ -201,7 +201,7 @@ def detokenize(self, tokens: List[int], special: bool = False) -> bytes:
         # NOTE: Llama1 models automatically added a space at the start of the prompt
         # this line removes a leading space if the first token is a beginning of sentence token
         return (
-            output[1:] if len(tokens) > 0 and tokens[0] == self.token_bos() else output
+            output[1:] if len(tokens) > 0 and tokens[0] == self.token_bos() and output[0:1] == ' ' else output
         )
 
     # Extra
@@ -796,4 +796,4 @@ def sample(
     def accept(self, ctx_main: _LlamaContext, id: int, apply_grammar: bool):
         if apply_grammar and self.grammar is not None:
             ctx_main.grammar_accept_token(self.grammar, id)
-        self.prev.append(id)
+        self.prev.append(id)