Skip to content

Commit a11d9ac

Browse files
committed
Add flag for prefix matching
1 parent d84402b commit a11d9ac

File tree

1 file changed

+9
-1
lines changed

1 file changed

+9
-1
lines changed

mergekit/scripts/tokensurgeon.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -687,6 +687,13 @@ def build_embedding_matrix(
687687
help="Number of tokens to process in each batch",
688688
show_default=True,
689689
)
690+
@click.option(
691+
"--allow-lm-head-prefix-match/--no-allow-lm-head-prefix-match",
692+
is_flag=True,
693+
default=True,
694+
help="Allow prefix matches for LM head tokens",
695+
show_default=True,
696+
)
690697
@add_merge_options
691698
def main(
692699
model: str,
@@ -699,6 +706,7 @@ def main(
699706
weight_scheme: str,
700707
subword_method: str,
701708
batch_size: Optional[int],
709+
allow_lm_head_prefix_match: bool,
702710
merge_options: MergeOptions,
703711
):
704712
merge_options.apply_global_options()
@@ -764,7 +772,7 @@ def main(
764772
donor_lm_head,
765773
orig_vocab=orig_vocab,
766774
donor_vocab=donor_vocab,
767-
allow_prefix=True,
775+
allow_prefix=allow_lm_head_prefix_match,
768776
allow_byte=True,
769777
is_lm_head=True,
770778
options=options,

0 commit comments

Comments
 (0)