Skip to content

Commit 5816343

Browse files
committed
enable detokenizing special tokens
1 parent 5e39a85 commit 5816343

File tree

3 files changed

+6
-6
lines changed

3 files changed

+6
-6
lines changed

llama_cpp/_internals.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ def token_to_piece(self, token: int, special: bool = False) -> bytes:
214214
llama_cpp.llama_token_to_piece(self.model, token, buf, 32, 0, special)
215215
return bytes(buf)
216216

217-
def detokenize(self, tokens: List[int], special: bool = False) -> bytes:
217+
def detokenize(self, tokens: List[int], prev_tokens: Optional[List[int]] = None, special: bool = False) -> bytes:
218218
assert self.model is not None
219219
output = b""
220220
size = 32

llama_cpp/llama.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -580,7 +580,7 @@ def tokenize(
580580
return self.tokenizer_.tokenize(text, add_bos, special)
581581

582582
def detokenize(
583-
self, tokens: List[int], prev_tokens: Optional[List[int]] = None
583+
self, tokens: List[int], prev_tokens: Optional[List[int]] = None, special: bool = False
584584
) -> bytes:
585585
"""Detokenize a list of tokens.
586586
@@ -591,7 +591,7 @@ def detokenize(
591591
Returns:
592592
The detokenized string.
593593
"""
594-
return self.tokenizer_.detokenize(tokens, prev_tokens=prev_tokens)
594+
return self.tokenizer_.detokenize(tokens, prev_tokens=prev_tokens, special=special)
595595

596596
def set_cache(self, cache: Optional[BaseLlamaCache]):
597597
"""Set the cache.

llama_cpp/llama_tokenizer.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def tokenize(
2626

2727
@abc.abstractmethod
2828
def detokenize(
29-
self, tokens: List[int], prev_tokens: Optional[List[int]] = None
29+
self, tokens: List[int], prev_tokens: Optional[List[int]] = None, special: bool = True
3030
) -> bytes:
3131
"""Detokenize the tokens into text.
3232
@@ -47,9 +47,9 @@ def tokenize(
4747
return self._model.tokenize(text, add_bos=add_bos, special=special)
4848

4949
def detokenize(
50-
self, tokens: List[int], prev_tokens: Optional[List[int]] = None
50+
self, tokens: List[int], prev_tokens: Optional[List[int]] = None, special: bool = True
5151
) -> bytes:
52-
return self._model.detokenize(tokens)
52+
return self._model.detokenize(tokens, prev_tokens=prev_tokens, special=special)
5353

5454
def encode(
5555
self, text: str, add_bos: bool = True, special: bool = True

0 commit comments

Comments
 (0)