Skip to content

Commit b65e2e2

Browse files
authored
fall back to internal prefix/middle/suffix id
In some cases llama.cpp will make a guess at fim tokens, use them if there's no metadata.
1 parent a2017ea commit b65e2e2

File tree

1 file changed

+10
-10
lines changed

1 file changed

+10
-10
lines changed

llama_cpp/llama.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -940,17 +940,17 @@ def _create_completion(
940940

941941
completion_id: str = f"cmpl-{str(uuid.uuid4())}"
942942
created: int = int(time.time())
943-
prefix_token_id: Optional[int] = self.metadata.get("tokenizer.ggml.prefix_token_id")
944-
middle_token_id: Optional[int] = self.metadata.get("tokenizer.ggml.middle_token_id")
945-
suffix_token_id: Optional[int] = self.metadata.get("tokenizer.ggml.suffix_token_id")
943+
prefix_token_id: int = int(self.metadata.get("tokenizer.ggml.prefix_token_id", self.token_prefix()))
944+
middle_token_id: int = int(self.metadata.get("tokenizer.ggml.middle_token_id", self.token_middle()))
945+
suffix_token_id: int = int(self.metadata.get("tokenizer.ggml.suffix_token_id", self.token_suffix()))
946946
# If prompt is empty, initialize completion with BOS token to avoid
947947
# detokenization including a space at the beginning of the completion
948948
completion_tokens: List[int] = [] if len(prompt) > 0 else [self.token_bos()]
949949
# Add blank space to start of prompt to match OG llama tokenizer
950950
prompt_tokens: List[int] = (
951951
(
952-
[int(prefix_token_id)]
953-
if prefix_token_id and suffix is not None
952+
[prefix_token_id]
953+
if prefix_token_id >= 0 and suffix is not None
954954
else []
955955
)
956956
+
@@ -965,8 +965,8 @@ def _create_completion(
965965
)
966966
+
967967
(
968-
[int(suffix_token_id)]
969-
if suffix_token_id and suffix is not None
968+
[suffix_token_id]
969+
if suffix_token_id >= 0 and suffix is not None
970970
else []
971971
)
972972
+
@@ -977,8 +977,8 @@ def _create_completion(
977977
)
978978
+
979979
(
980-
[int(middle_token_id)]
981-
if middle_token_id and suffix is not None
980+
[middle_token_id]
981+
if middle_token_id >= 0 and suffix is not None
982982
else []
983983
)
984984
)
@@ -1360,7 +1360,7 @@ def logit_bias_processor(
13601360
if echo:
13611361
text_str = prompt + text_str
13621362

1363-
if not suffix_token_id and suffix is not None:
1363+
if suffix_token_id < 0 and suffix is not None:
13641364
text_str = text_str + suffix
13651365

13661366
logprobs_or_none: Optional[CompletionLogprobs] = None

0 commit comments

Comments
 (0)