Skip to content

Commit 06808a3

Browse files
authored
Support converting models with multiple chat templates
Adds the following metadata: * tokenizer.chat_templates * tokenizer.chat_template.<name1> * tokenizer.chat_template.<name2> * tokenizer.chat_template.<...> Where `tokenizer.chat_templates` is an array of the template names (except `default`), `default` is added to the regular `tokenizer.chat_template`.
1 parent 67fac4b commit 06808a3

File tree

3 files changed

+32
-3
lines changed

3 files changed

+32
-3
lines changed

gguf-py/gguf/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ class Tokenizer:
9090
HF_JSON = "tokenizer.huggingface.json"
9191
RWKV = "tokenizer.rwkv.world"
9292
CHAT_TEMPLATE = "tokenizer.chat_template"
93+
CHAT_TEMPLATE_N = "tokenizer.chat_template.{name}"
94+
CHAT_TEMPLATES = "tokenizer.chat_templates"
9395

9496

9597
#

gguf-py/gguf/gguf_writer.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
import tempfile
77
from enum import Enum, auto
88
from io import BufferedWriter
9-
from typing import IO, Any, Sequence
9+
from typing import IO, Any, Sequence, Mapping
10+
from string import ascii_letters, digits
1011

1112
import numpy as np
1213

@@ -466,7 +467,33 @@ def add_add_eos_token(self, value: bool) -> None:
466467
def add_add_space_prefix(self, value: bool) -> None:
467468
self.add_bool(Keys.Tokenizer.ADD_PREFIX, value)
468469

469-
def add_chat_template(self, value: str) -> None:
470+
def add_chat_template(self, value: str | Sequence[Mapping[str, str]]) -> None:
471+
if isinstance(value, list):
472+
template_default = None
473+
template_names = set()
474+
475+
for choice in value:
476+
name = choice.get('name', '')
477+
template = choice.get('template')
478+
479+
# Allowing non-alphanumerical characters in template name is probably not a good idea, so filter it
480+
name = ''.join((c for c in name if c in ['_'] + list(ascii_letters) + list(digits)))
481+
482+
if name and template is not None:
483+
if name == 'default':
484+
template_default = template
485+
else:
486+
template_names.add(name)
487+
self.add_string(Keys.Tokenizer.CHAT_TEMPLATE_N.format(name=name), template)
488+
489+
if template_names:
490+
self.add_array(Keys.Tokenizer.CHAT_TEMPLATES, list(template_names))
491+
492+
if template_default is None:
493+
return
494+
495+
value = template_default
496+
470497
self.add_string(Keys.Tokenizer.CHAT_TEMPLATE, value)
471498

472499
def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:

gguf-py/gguf/vocab.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ def _try_load_from_tokenizer_json(self, path: Path) -> bool:
141141
with open(tokenizer_config_file, encoding = 'utf-8') as f:
142142
tokenizer_config = json.load(f)
143143
chat_template = tokenizer_config.get('chat_template')
144-
if chat_template is None or isinstance(chat_template, str):
144+
if chat_template is None or isinstance(chat_template, (str, list)):
145145
self.chat_template = chat_template
146146
else:
147147
print(

0 commit comments

Comments
 (0)