@@ -66,6 +66,7 @@ struct whisper_params {
66
66
bool no_timestamps = true ;
67
67
bool verbose_prompt = false ;
68
68
bool use_gpu = true ;
69
+ bool flash_attn = false ;
69
70
70
71
std::string person = " Georgi" ;
71
72
std::string bot_name = " LLaMA" ;
@@ -105,6 +106,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
105
106
else if (arg == " -pe" || arg == " --print-energy" ) { params.print_energy = true ; }
106
107
else if (arg == " -vp" || arg == " --verbose-prompt" ) { params.verbose_prompt = true ; }
107
108
else if (arg == " -ng" || arg == " --no-gpu" ) { params.use_gpu = false ; }
109
+ else if (arg == " -fa" || arg == " --flash-attn" ) { params.flash_attn = true ; }
108
110
else if (arg == " -p" || arg == " --person" ) { params.person = argv[++i]; }
109
111
else if (arg == " -bn" || arg == " --bot-name" ) { params.bot_name = argv[++i]; }
110
112
else if (arg == " --session" ) { params.path_session = argv[++i]; }
@@ -123,7 +125,6 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
123
125
}
124
126
}
125
127
else if (arg == " -f" || arg == " --file" ) { params.fname_out = argv[++i]; }
126
- else if (arg == " -ng" || arg == " --no-gpu" ) { params.use_gpu = false ; }
127
128
else {
128
129
fprintf (stderr, " error: unknown argument: %s\n " , arg.c_str ());
129
130
whisper_print_usage (argc, argv, params);
@@ -154,6 +155,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
154
155
fprintf (stderr, " -pe, --print-energy [%-7s] print sound energy (for debugging)\n " , params.print_energy ? " true" : " false" );
155
156
fprintf (stderr, " -vp, --verbose-prompt [%-7s] print prompt at start\n " , params.verbose_prompt ? " true" : " false" );
156
157
fprintf (stderr, " -ng, --no-gpu [%-7s] disable GPU\n " , params.use_gpu ? " false" : " true" );
158
+ fprintf (stderr, " -fa, --flash-attn [%-7s] flash attention\n " , params.flash_attn ? " true" : " false" );
157
159
fprintf (stderr, " -p NAME, --person NAME [%-7s] person name (for prompt selection)\n " , params.person .c_str ());
158
160
fprintf (stderr, " -bn NAME, --bot-name NAME [%-7s] bot name (to display)\n " , params.bot_name .c_str ());
159
161
fprintf (stderr, " -w TEXT, --wake-command T [%-7s] wake-up command to listen for\n " , params.wake_cmd .c_str ());
@@ -285,7 +287,9 @@ int main(int argc, char ** argv) {
285
287
// whisper init
286
288
287
289
struct whisper_context_params cparams = whisper_context_default_params ();
288
- cparams.use_gpu = params.use_gpu ;
290
+
291
+ cparams.use_gpu = params.use_gpu ;
292
+ cparams.flash_attn = params.flash_attn ;
289
293
290
294
struct whisper_context * ctx_wsp = whisper_init_from_file_with_params (params.model_wsp .c_str (), cparams);
291
295
if (!ctx_wsp) {
@@ -316,6 +320,7 @@ int main(int argc, char ** argv) {
316
320
lcparams.n_ctx = 2048 ;
317
321
lcparams.seed = 1 ;
318
322
lcparams.n_threads = params.n_threads ;
323
+ lcparams.flash_attn = params.flash_attn ;
319
324
320
325
struct llama_context * ctx_llama = llama_new_context_with_model (model_llama, lcparams);
321
326
0 commit comments