Skip to content

Commit cc48bff

Browse files
committed
Update defaults
1 parent a11d9ac commit cc48bff

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

mergekit/scripts/tokensurgeon.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -555,6 +555,7 @@ def compute_new_embeddings(
555555
]
556556
targets = donor_embed[torch.tensor([donor_vocab[t] for t in target_tokens])]
557557
indices, coeffs = batch_omp(targets, donor_shared_embeds, options.k)
558+
558559
res = (
559560
torch.bmm(coeffs.unsqueeze(1), orig_shared_embeds[indices].to(torch.float))
560561
.squeeze(1)
@@ -660,7 +661,7 @@ def build_embedding_matrix(
660661
"--approximation-method",
661662
"-a",
662663
type=click.Choice([m.value for m in ApproximationMethod]),
663-
default=ApproximationMethod.COMMON_INTERPOLATION.value,
664+
default=ApproximationMethod.ORTHOGONAL_MATCHING_PURSUIT.value,
664665
help="Method for approximating missing tokens",
665666
show_default=True,
666667
)
@@ -669,7 +670,7 @@ def build_embedding_matrix(
669670
"-w",
670671
type=click.Choice([w.value for w in WeightingScheme]),
671672
default=WeightingScheme.DISTANCE_PROPORTIONAL.value,
672-
help="Weighting scheme for KNN interpolation",
673+
help="Weighting scheme for common-vocabulary interpolation",
673674
show_default=True,
674675
)
675676
@click.option(
@@ -690,7 +691,7 @@ def build_embedding_matrix(
690691
@click.option(
691692
"--allow-lm-head-prefix-match/--no-allow-lm-head-prefix-match",
692693
is_flag=True,
693-
default=True,
694+
default=False,
694695
help="Allow prefix matches for LM head tokens",
695696
show_default=True,
696697
)

0 commit comments

Comments
 (0)