File tree Expand file tree Collapse file tree 2 files changed +10
-5
lines changed Expand file tree Collapse file tree 2 files changed +10
-5
lines changed Original file line number Diff line number Diff line change @@ -578,6 +578,8 @@ def tokenize(
578
578
579
579
Args:
580
580
text: The utf-8 encoded string to tokenize.
581
+ add_bos: Whether to add a beginning of sequence token.
582
+ special: Whether to tokenize special tokens.
581
583
582
584
Raises:
583
585
RuntimeError: If the tokenization failed.
@@ -594,7 +596,8 @@ def detokenize(
594
596
595
597
Args:
596
598
tokens: The list of tokens to detokenize.
597
- prev_tokens: The list of previous tokens. Offset mapping will be performed if provided
599
+ prev_tokens: The list of previous tokens. Offset mapping will be performed if provided.
600
+ special: Whether to detokenize special tokens.
598
601
599
602
Returns:
600
603
The detokenized string.
Original file line number Diff line number Diff line change @@ -19,9 +19,10 @@ def tokenize(
19
19
"""Tokenize the text into tokens.
20
20
21
21
Args:
22
- text: The text to tokenize.
22
+ text: The utf-8 encoded string to tokenize.
23
23
add_bos: Whether to add a beginning of sequence token.
24
- special: Whether to tokenize text literally or as special tokens."""
24
+ special: Whether to tokenize special tokens.
25
+ """
25
26
raise NotImplementedError
26
27
27
28
@abc .abstractmethod
@@ -31,8 +32,9 @@ def detokenize(
31
32
"""Detokenize the tokens into text.
32
33
33
34
Args:
34
- tokens: The tokens to detokenize.
35
- prev_tokens: If tokens is a continuation of a previous sequence, the previous tokens.
35
+ tokens: The list of tokens to detokenize.
36
+ prev_tokens: The list of previous tokens. Offset mapping will be performed if provided.
37
+ special: Whether to detokenize special tokens.
36
38
"""
37
39
raise NotImplementedError
38
40
You can’t perform that action at this time.
0 commit comments