@@ -38,6 +38,7 @@ struct whisper_params {
38
38
int32_t max_len = 0 ;
39
39
int32_t best_of = whisper_full_default_params(WHISPER_SAMPLING_GREEDY).greedy.best_of;
40
40
int32_t beam_size = whisper_full_default_params(WHISPER_SAMPLING_BEAM_SEARCH).beam_search.beam_size;
41
+ int32_t max_decoders = whisper_context_default_params().max_decoders;
41
42
int32_t audio_ctx = 0 ;
42
43
43
44
float word_thold = 0 .01f ;
@@ -131,6 +132,7 @@ static bool whisper_params_parse(int argc, char ** argv, whisper_params & params
131
132
else if (arg == " -ml" || arg == " --max-len" ) { params.max_len = std::stoi (argv[++i]); }
132
133
else if (arg == " -bo" || arg == " --best-of" ) { params.best_of = std::stoi (argv[++i]); }
133
134
else if (arg == " -bs" || arg == " --beam-size" ) { params.beam_size = std::stoi (argv[++i]); }
135
+ else if (arg == " -md" || arg == " --max-decoders" ) { params.max_decoders = std::stoi (argv[++i]); }
134
136
else if (arg == " -ac" || arg == " --audio-ctx" ) { params.audio_ctx = std::stoi (argv[++i]); }
135
137
else if (arg == " -wt" || arg == " --word-thold" ) { params.word_thold = std::stof (argv[++i]); }
136
138
else if (arg == " -et" || arg == " --entropy-thold" ) { params.entropy_thold = std::stof (argv[++i]); }
@@ -198,6 +200,7 @@ static void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params
198
200
fprintf (stderr, " -sow, --split-on-word [%-7s] split on word rather than on token\n " , params.split_on_word ? " true" : " false" );
199
201
fprintf (stderr, " -bo N, --best-of N [%-7d] number of best candidates to keep\n " , params.best_of );
200
202
fprintf (stderr, " -bs N, --beam-size N [%-7d] beam size for beam search\n " , params.beam_size );
203
+ fprintf (stderr, " -md N, --max-decoders N [%-7d] Max decoders, used to set the text context cache factor\n " , params.max_decoders );
201
204
fprintf (stderr, " -ac N, --audio-ctx N [%-7d] audio context size (0 - all)\n " , params.audio_ctx );
202
205
fprintf (stderr, " -wt N, --word-thold N [%-7.2f] word timestamp probability threshold\n " , params.word_thold );
203
206
fprintf (stderr, " -et N, --entropy-thold N [%-7.2f] entropy threshold for decoder fail\n " , params.entropy_thold );
@@ -981,6 +984,7 @@ int main(int argc, char ** argv) {
981
984
982
985
cparams.use_gpu = params.use_gpu ;
983
986
cparams.flash_attn = params.flash_attn ;
987
+ cparams.max_decoders = params.max_decoders ;
984
988
985
989
if (!params.dtw .empty ()) {
986
990
cparams.dtw_token_timestamps = true ;
0 commit comments