Skip to content

Commit 2a428e8

Browse files
committed
better handling since xgen tokenizer breaks with convert_tokens_to_ids
1 parent 06c61d6 commit 2a428e8

File tree

1 file changed

+12
-6
lines changed

1 file changed

+12
-6
lines changed

src/axolotl/prompt_tokenizers.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -48,16 +48,22 @@ def tokenize_prompt(self, prompt):
4848

4949
@functools.lru_cache(maxsize=128)
5050
def _get_user_token(self):
51-
id_or_ids = self.tokenizer.convert_tokens_to_ids("<|USER|>")
52-
if isinstance(id_or_ids, (int,)):
53-
return id_or_ids
51+
try:
52+
id_or_ids = self.tokenizer.convert_tokens_to_ids("<|USER|>")
53+
if isinstance(id_or_ids, (int,)):
54+
return id_or_ids
55+
except KeyError:
56+
pass
5457
return False
5558

5659
@functools.lru_cache(maxsize=128)
5760
def _get_assistant_token(self):
58-
id_or_ids = self.tokenizer.convert_tokens_to_ids("<|ASSISTANT|>")
59-
if isinstance(id_or_ids, (int,)):
60-
return id_or_ids
61+
try:
62+
id_or_ids = self.tokenizer.convert_tokens_to_ids("<|ASSISTANT|>")
63+
if isinstance(id_or_ids, (int,)):
64+
return id_or_ids
65+
except KeyError:
66+
pass
6167
return False
6268

6369
def _tokenize(self, prompt: str, add_eos_token=True, strip_bos_token=False):

0 commit comments

Comments
 (0)