@@ -334,24 +334,24 @@ struct server_task {
334
334
if (data.contains (" json_schema" ) && !data.contains (" grammar" )) {
335
335
try {
336
336
auto schema = json_value (data, " json_schema" , json::object ());
337
- LOG_DBG (" JSON schema: %s\n " , schema.dump (2 ).c_str ());
337
+ SRV_DBG (" JSON schema: %s\n " , schema.dump (2 ).c_str ());
338
338
params.sampling .grammar = json_schema_to_grammar (schema);
339
- LOG_DBG (" Converted grammar: %s\n " , params.sampling .grammar .c_str ());
339
+ SRV_DBG (" Converted grammar: %s\n " , params.sampling .grammar .c_str ());
340
340
} catch (const std::exception & e) {
341
341
throw std::runtime_error (std::string (" \" json_schema\" : " ) + e.what ());
342
342
}
343
343
} else {
344
344
params.sampling .grammar = json_value (data, " grammar" , defaults.sampling .grammar );
345
- LOG_DBG (" Grammar: %s\n " , params.sampling .grammar .c_str ());
345
+ SRV_DBG (" Grammar: %s\n " , params.sampling .grammar .c_str ());
346
346
params.sampling .grammar_lazy = json_value (data, " grammar_lazy" , defaults.sampling .grammar_lazy );
347
- LOG_DBG (" Grammar lazy: %s\n " , params.sampling .grammar_lazy ? " true" : " false" );
347
+ SRV_DBG (" Grammar lazy: %s\n " , params.sampling .grammar_lazy ? " true" : " false" );
348
348
}
349
349
350
350
{
351
351
auto it = data.find (" chat_format" );
352
352
if (it != data.end ()) {
353
353
params.oaicompat_chat_format = static_cast <common_chat_format>(it->get <int >());
354
- LOG_INF (" Chat format: %s\n " , common_chat_format_name (params.oaicompat_chat_format ).c_str ());
354
+ SRV_INF (" Chat format: %s\n " , common_chat_format_name (params.oaicompat_chat_format ).c_str ());
355
355
} else {
356
356
params.oaicompat_chat_format = defaults.oaicompat_chat_format ;
357
357
}
@@ -367,12 +367,12 @@ struct server_task {
367
367
368
368
auto ids = common_tokenize (vocab, trigger.word , /* add_special= */ false , /* parse_special= */ true );
369
369
if (ids.size () == 1 ) {
370
- LOG_DBG (" Grammar trigger token: %d (`%s`)\n " , ids[0 ], trigger.word .c_str ());
370
+ SRV_DBG (" Grammar trigger token: %d (`%s`)\n " , ids[0 ], trigger.word .c_str ());
371
371
params.sampling .grammar_trigger_tokens .push_back (ids[0 ]);
372
372
params.sampling .preserved_tokens .insert (ids[0 ]);
373
373
continue ;
374
374
}
375
- LOG_DBG (" Grammar trigger word: `%s`\n " , trigger.word .c_str ());
375
+ SRV_DBG (" Grammar trigger word: `%s`\n " , trigger.word .c_str ());
376
376
params.sampling .grammar_trigger_words .push_back (trigger);
377
377
}
378
378
}
@@ -381,11 +381,11 @@ struct server_task {
381
381
for (const auto & t : *preserved_tokens) {
382
382
auto ids = common_tokenize (vocab, t.get <std::string>(), /* add_special= */ false , /* parse_special= */ true );
383
383
if (ids.size () == 1 ) {
384
- LOG_DBG (" Preserved token: %d\n " , ids[0 ]);
384
+ SRV_DBG (" Preserved token: %d\n " , ids[0 ]);
385
385
params.sampling .preserved_tokens .insert (ids[0 ]);
386
386
} else {
387
387
// This may happen when using a tool call style meant for a model with special tokens to preserve on a model without said tokens.
388
- LOG_WRN (" Not preserved because more than 1 token (wrong chat template override?): %s\n " , t.get <std::string>().c_str ());
388
+ SRV_WRN (" Not preserved because more than 1 token (wrong chat template override?): %s\n " , t.get <std::string>().c_str ());
389
389
}
390
390
}
391
391
}
@@ -717,7 +717,7 @@ struct server_task_result_cmpl_final : server_task_result {
717
717
std::string finish_reason = " length" ;
718
718
common_chat_msg msg;
719
719
if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) {
720
- LOG_DBG (" Parsing chat message: %s\n " , content.c_str ());
720
+ SRV_DBG (" Parsing chat message: %s\n " , content.c_str ());
721
721
msg = common_chat_parse (content, oaicompat_chat_format);
722
722
finish_reason = msg.tool_calls .empty () ? " stop" : " tool_calls" ;
723
723
} else {
@@ -1885,7 +1885,7 @@ struct server_context {
1885
1885
}
1886
1886
1887
1887
if (params_base.chat_template .empty () && !validate_builtin_chat_template (params.use_jinja )) {
1888
- LOG_WRN (" %s: The chat template that comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses\n " , __func__);
1888
+ SRV_WRN (" %s: The chat template that comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses\n " , __func__);
1889
1889
chat_templates = common_chat_templates_from_model (model, " chatml" );
1890
1890
} else {
1891
1891
chat_templates = common_chat_templates_from_model (model, params_base.chat_template );
@@ -3355,10 +3355,10 @@ static void log_server_request(const httplib::Request & req, const httplib::Resp
3355
3355
3356
3356
// reminder: this function is not covered by httplib's exception handler; if someone does more complicated stuff, think about wrapping it in try-catch
3357
3357
3358
- LOG_INF (" request: %s %s %s %d\n " , req.method .c_str (), req.path .c_str (), req.remote_addr .c_str (), res.status );
3358
+ SRV_INF (" request: %s %s %s %d\n " , req.method .c_str (), req.path .c_str (), req.remote_addr .c_str (), res.status );
3359
3359
3360
- LOG_DBG (" request: %s\n " , req.body .c_str ());
3361
- LOG_DBG (" response: %s\n " , res.body .c_str ());
3360
+ SRV_DBG (" request: %s\n " , req.body .c_str ());
3361
+ SRV_DBG (" response: %s\n " , res.body .c_str ());
3362
3362
}
3363
3363
3364
3364
std::function<void (int )> shutdown_handler;
@@ -3860,7 +3860,9 @@ int main(int argc, char ** argv) {
3860
3860
3861
3861
try {
3862
3862
const auto & prompt = data.at (" prompt" );
3863
- LOG_DBG (" Prompt: %s\n " , prompt.is_string () ? prompt.get <std::string>().c_str () : prompt.dump (2 ).c_str ());
3863
+ // TODO: this log can become very long, put it behind a flag or think about a more compact format
3864
+ // SRV_DBG("Prompt: %s\n", prompt.is_string() ? prompt.get<std::string>().c_str() : prompt.dump(2).c_str());
3865
+
3864
3866
std::vector<llama_tokens> tokenized_prompts = tokenize_input_prompts (ctx_server.vocab , prompt, true , true );
3865
3867
tasks.reserve (tokenized_prompts.size ());
3866
3868
for (size_t i = 0 ; i < tokenized_prompts.size (); i++) {
@@ -4376,6 +4378,9 @@ int main(int argc, char ** argv) {
4376
4378
res.set_content (" Error: gzip is not supported by this browser" , " text/plain" );
4377
4379
} else {
4378
4380
res.set_header (" Content-Encoding" , " gzip" );
4381
+ // COEP and COOP headers, required by pyodide (python interpreter)
4382
+ res.set_header (" Cross-Origin-Embedder-Policy" , " require-corp" );
4383
+ res.set_header (" Cross-Origin-Opener-Policy" , " same-origin" );
4379
4384
res.set_content (reinterpret_cast <const char *>(index_html_gz), index_html_gz_len, " text/html; charset=utf-8" );
4380
4385
}
4381
4386
return false ;
0 commit comments