Skip to content

Commit e3b6c27

Browse files
committed
Merge branch 'concedo_experimental' into croco_nex_0
2 parents 9cbf737 + 076e61e commit e3b6c27

21 files changed

+978
-345
lines changed

examples/server/public/index.html.gz

4.82 KB
Binary file not shown.

examples/server/server.cpp

+20-15
Original file line numberDiff line numberDiff line change
@@ -334,24 +334,24 @@ struct server_task {
334334
if (data.contains("json_schema") && !data.contains("grammar")) {
335335
try {
336336
auto schema = json_value(data, "json_schema", json::object());
337-
LOG_DBG("JSON schema: %s\n", schema.dump(2).c_str());
337+
SRV_DBG("JSON schema: %s\n", schema.dump(2).c_str());
338338
params.sampling.grammar = json_schema_to_grammar(schema);
339-
LOG_DBG("Converted grammar: %s\n", params.sampling.grammar.c_str());
339+
SRV_DBG("Converted grammar: %s\n", params.sampling.grammar.c_str());
340340
} catch (const std::exception & e) {
341341
throw std::runtime_error(std::string("\"json_schema\": ") + e.what());
342342
}
343343
} else {
344344
params.sampling.grammar = json_value(data, "grammar", defaults.sampling.grammar);
345-
LOG_DBG("Grammar: %s\n", params.sampling.grammar.c_str());
345+
SRV_DBG("Grammar: %s\n", params.sampling.grammar.c_str());
346346
params.sampling.grammar_lazy = json_value(data, "grammar_lazy", defaults.sampling.grammar_lazy);
347-
LOG_DBG("Grammar lazy: %s\n", params.sampling.grammar_lazy ? "true" : "false");
347+
SRV_DBG("Grammar lazy: %s\n", params.sampling.grammar_lazy ? "true" : "false");
348348
}
349349

350350
{
351351
auto it = data.find("chat_format");
352352
if (it != data.end()) {
353353
params.oaicompat_chat_format = static_cast<common_chat_format>(it->get<int>());
354-
LOG_INF("Chat format: %s\n", common_chat_format_name(params.oaicompat_chat_format).c_str());
354+
SRV_INF("Chat format: %s\n", common_chat_format_name(params.oaicompat_chat_format).c_str());
355355
} else {
356356
params.oaicompat_chat_format = defaults.oaicompat_chat_format;
357357
}
@@ -367,12 +367,12 @@ struct server_task {
367367

368368
auto ids = common_tokenize(vocab, trigger.word, /* add_special= */ false, /* parse_special= */ true);
369369
if (ids.size() == 1) {
370-
LOG_DBG("Grammar trigger token: %d (`%s`)\n", ids[0], trigger.word.c_str());
370+
SRV_DBG("Grammar trigger token: %d (`%s`)\n", ids[0], trigger.word.c_str());
371371
params.sampling.grammar_trigger_tokens.push_back(ids[0]);
372372
params.sampling.preserved_tokens.insert(ids[0]);
373373
continue;
374374
}
375-
LOG_DBG("Grammar trigger word: `%s`\n", trigger.word.c_str());
375+
SRV_DBG("Grammar trigger word: `%s`\n", trigger.word.c_str());
376376
params.sampling.grammar_trigger_words.push_back(trigger);
377377
}
378378
}
@@ -381,11 +381,11 @@ struct server_task {
381381
for (const auto & t : *preserved_tokens) {
382382
auto ids = common_tokenize(vocab, t.get<std::string>(), /* add_special= */ false, /* parse_special= */ true);
383383
if (ids.size() == 1) {
384-
LOG_DBG("Preserved token: %d\n", ids[0]);
384+
SRV_DBG("Preserved token: %d\n", ids[0]);
385385
params.sampling.preserved_tokens.insert(ids[0]);
386386
} else {
387387
// This may happen when using a tool call style meant for a model with special tokens to preserve on a model without said tokens.
388-
LOG_WRN("Not preserved because more than 1 token (wrong chat template override?): %s\n", t.get<std::string>().c_str());
388+
SRV_WRN("Not preserved because more than 1 token (wrong chat template override?): %s\n", t.get<std::string>().c_str());
389389
}
390390
}
391391
}
@@ -717,7 +717,7 @@ struct server_task_result_cmpl_final : server_task_result {
717717
std::string finish_reason = "length";
718718
common_chat_msg msg;
719719
if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) {
720-
LOG_DBG("Parsing chat message: %s\n", content.c_str());
720+
SRV_DBG("Parsing chat message: %s\n", content.c_str());
721721
msg = common_chat_parse(content, oaicompat_chat_format);
722722
finish_reason = msg.tool_calls.empty() ? "stop" : "tool_calls";
723723
} else {
@@ -1885,7 +1885,7 @@ struct server_context {
18851885
}
18861886

18871887
if (params_base.chat_template.empty() && !validate_builtin_chat_template(params.use_jinja)) {
1888-
LOG_WRN("%s: The chat template that comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses\n", __func__);
1888+
SRV_WRN("%s: The chat template that comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses\n", __func__);
18891889
chat_templates = common_chat_templates_from_model(model, "chatml");
18901890
} else {
18911891
chat_templates = common_chat_templates_from_model(model, params_base.chat_template);
@@ -3355,10 +3355,10 @@ static void log_server_request(const httplib::Request & req, const httplib::Resp
33553355

33563356
// reminder: this function is not covered by httplib's exception handler; if someone does more complicated stuff, think about wrapping it in try-catch
33573357

3358-
LOG_INF("request: %s %s %s %d\n", req.method.c_str(), req.path.c_str(), req.remote_addr.c_str(), res.status);
3358+
SRV_INF("request: %s %s %s %d\n", req.method.c_str(), req.path.c_str(), req.remote_addr.c_str(), res.status);
33593359

3360-
LOG_DBG("request: %s\n", req.body.c_str());
3361-
LOG_DBG("response: %s\n", res.body.c_str());
3360+
SRV_DBG("request: %s\n", req.body.c_str());
3361+
SRV_DBG("response: %s\n", res.body.c_str());
33623362
}
33633363

33643364
std::function<void(int)> shutdown_handler;
@@ -3860,7 +3860,9 @@ int main(int argc, char ** argv) {
38603860

38613861
try {
38623862
const auto & prompt = data.at("prompt");
3863-
LOG_DBG("Prompt: %s\n", prompt.is_string() ? prompt.get<std::string>().c_str() : prompt.dump(2).c_str());
3863+
// TODO: this log can become very long, put it behind a flag or think about a more compact format
3864+
//SRV_DBG("Prompt: %s\n", prompt.is_string() ? prompt.get<std::string>().c_str() : prompt.dump(2).c_str());
3865+
38643866
std::vector<llama_tokens> tokenized_prompts = tokenize_input_prompts(ctx_server.vocab, prompt, true, true);
38653867
tasks.reserve(tokenized_prompts.size());
38663868
for (size_t i = 0; i < tokenized_prompts.size(); i++) {
@@ -4376,6 +4378,9 @@ int main(int argc, char ** argv) {
43764378
res.set_content("Error: gzip is not supported by this browser", "text/plain");
43774379
} else {
43784380
res.set_header("Content-Encoding", "gzip");
4381+
// COEP and COOP headers, required by pyodide (python interpreter)
4382+
res.set_header("Cross-Origin-Embedder-Policy", "require-corp");
4383+
res.set_header("Cross-Origin-Opener-Policy", "same-origin");
43794384
res.set_content(reinterpret_cast<const char*>(index_html_gz), index_html_gz_len, "text/html; charset=utf-8");
43804385
}
43814386
return false;

examples/server/webui/package-lock.json

+10
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

examples/server/webui/package.json

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
"preview": "vite preview"
1212
},
1313
"dependencies": {
14+
"@heroicons/react": "^2.2.0",
1415
"@sec-ant/readable-stream": "^0.6.0",
1516
"@vscode/markdown-it-katex": "^1.1.1",
1617
"autoprefixer": "^10.4.20",

examples/server/webui/src/App.tsx

+13-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import { HashRouter, Outlet, Route, Routes } from 'react-router';
22
import Header from './components/Header';
33
import Sidebar from './components/Sidebar';
4-
import { AppContextProvider } from './utils/app.context';
4+
import { AppContextProvider, useAppContext } from './utils/app.context';
55
import ChatScreen from './components/ChatScreen';
6+
import SettingDialog from './components/SettingDialog';
67

78
function App() {
89
return (
@@ -22,13 +23,23 @@ function App() {
2223
}
2324

2425
function AppLayout() {
26+
const { showSettings, setShowSettings } = useAppContext();
2527
return (
2628
<>
2729
<Sidebar />
28-
<div className="chat-screen drawer-content grow flex flex-col h-screen w-screen mx-auto px-4">
30+
<div
31+
className="drawer-content grow flex flex-col h-screen w-screen mx-auto px-4 overflow-auto"
32+
id="main-scroll"
33+
>
2934
<Header />
3035
<Outlet />
3136
</div>
37+
{
38+
<SettingDialog
39+
show={showSettings}
40+
onClose={() => setShowSettings(false)}
41+
/>
42+
}
3243
</>
3344
);
3445
}

examples/server/webui/src/Config.ts

+3
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ export const BASE_URL = new URL('.', document.baseURI).href
1010

1111
export const CONFIG_DEFAULT = {
1212
// Note: in order not to introduce breaking changes, please keep the same data type (number, string, etc) if you want to change the default value. Do not use null or undefined for default value.
13+
// Do not use nested objects, keep it single level. Prefix the key if you need to group them.
1314
apiKey: '',
1415
systemMessage: 'You are a helpful assistant.',
1516
showTokensPerSecond: false,
@@ -36,6 +37,8 @@ export const CONFIG_DEFAULT = {
3637
dry_penalty_last_n: -1,
3738
max_tokens: -1,
3839
custom: '', // custom json-stringified object
40+
// experimental features
41+
pyIntepreterEnabled: false,
3942
};
4043
export const CONFIG_INFO: Record<string, string> = {
4144
apiKey: 'Set the API Key if you are using --api-key option for the server.',
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
import { useEffect, useState } from 'react';
2+
import { useAppContext } from '../utils/app.context';
3+
import { OpenInNewTab, XCloseButton } from '../utils/common';
4+
import { CanvasType } from '../utils/types';
5+
import { PlayIcon, StopIcon } from '@heroicons/react/24/outline';
6+
import StorageUtils from '../utils/storage';
7+
8+
const canInterrupt = typeof SharedArrayBuffer === 'function';
9+
10+
// adapted from https://pyodide.org/en/stable/usage/webworker.html
11+
const WORKER_CODE = `
12+
importScripts("https://cdn.jsdelivr.net/pyodide/v0.27.2/full/pyodide.js");
13+
14+
let stdOutAndErr = [];
15+
16+
let pyodideReadyPromise = loadPyodide({
17+
stdout: (data) => stdOutAndErr.push(data),
18+
stderr: (data) => stdOutAndErr.push(data),
19+
});
20+
21+
let alreadySetBuff = false;
22+
23+
self.onmessage = async (event) => {
24+
stdOutAndErr = [];
25+
26+
// make sure loading is done
27+
const pyodide = await pyodideReadyPromise;
28+
const { id, python, context, interruptBuffer } = event.data;
29+
30+
if (interruptBuffer && !alreadySetBuff) {
31+
pyodide.setInterruptBuffer(interruptBuffer);
32+
alreadySetBuff = true;
33+
}
34+
35+
// Now load any packages we need, run the code, and send the result back.
36+
await pyodide.loadPackagesFromImports(python);
37+
38+
// make a Python dictionary with the data from content
39+
const dict = pyodide.globals.get("dict");
40+
const globals = dict(Object.entries(context));
41+
try {
42+
self.postMessage({ id, running: true });
43+
// Execute the python code in this context
44+
const result = pyodide.runPython(python, { globals });
45+
self.postMessage({ result, id, stdOutAndErr });
46+
} catch (error) {
47+
self.postMessage({ error: error.message, id });
48+
}
49+
interruptBuffer[0] = 0;
50+
};
51+
`;
52+
53+
let worker: Worker;
54+
const interruptBuffer = canInterrupt
55+
? new Uint8Array(new SharedArrayBuffer(1))
56+
: null;
57+
58+
const startWorker = () => {
59+
if (!worker) {
60+
worker = new Worker(
61+
URL.createObjectURL(new Blob([WORKER_CODE], { type: 'text/javascript' }))
62+
);
63+
}
64+
};
65+
66+
if (StorageUtils.getConfig().pyIntepreterEnabled) {
67+
startWorker();
68+
}
69+
70+
const runCodeInWorker = (
71+
pyCode: string,
72+
callbackRunning: () => void
73+
): {
74+
donePromise: Promise<string>;
75+
interrupt: () => void;
76+
} => {
77+
startWorker();
78+
const id = Math.random() * 1e8;
79+
const context = {};
80+
if (interruptBuffer) {
81+
interruptBuffer[0] = 0;
82+
}
83+
84+
const donePromise = new Promise<string>((resolve) => {
85+
worker.onmessage = (event) => {
86+
const { error, stdOutAndErr, running } = event.data;
87+
if (id !== event.data.id) return;
88+
if (running) {
89+
callbackRunning();
90+
return;
91+
} else if (error) {
92+
resolve(error.toString());
93+
} else {
94+
resolve(stdOutAndErr.join('\n'));
95+
}
96+
};
97+
worker.postMessage({ id, python: pyCode, context, interruptBuffer });
98+
});
99+
100+
const interrupt = () => {
101+
console.log('Interrupting...');
102+
console.trace();
103+
if (interruptBuffer) {
104+
interruptBuffer[0] = 2;
105+
}
106+
};
107+
108+
return { donePromise, interrupt };
109+
};
110+
111+
export default function CanvasPyInterpreter() {
112+
const { canvasData, setCanvasData } = useAppContext();
113+
114+
const [code, setCode] = useState(canvasData?.content ?? ''); // copy to avoid direct mutation
115+
const [running, setRunning] = useState(false);
116+
const [output, setOutput] = useState('');
117+
const [interruptFn, setInterruptFn] = useState<() => void>();
118+
const [showStopBtn, setShowStopBtn] = useState(false);
119+
120+
const runCode = async (pycode: string) => {
121+
interruptFn?.();
122+
setRunning(true);
123+
setOutput('Loading Pyodide...');
124+
const { donePromise, interrupt } = runCodeInWorker(pycode, () => {
125+
setOutput('Running...');
126+
setShowStopBtn(canInterrupt);
127+
});
128+
setInterruptFn(() => interrupt);
129+
const out = await donePromise;
130+
setOutput(out);
131+
setRunning(false);
132+
setShowStopBtn(false);
133+
};
134+
135+
// run code on mount
136+
useEffect(() => {
137+
setCode(canvasData?.content ?? '');
138+
runCode(canvasData?.content ?? '');
139+
// eslint-disable-next-line react-hooks/exhaustive-deps
140+
}, [canvasData?.content]);
141+
142+
if (canvasData?.type !== CanvasType.PY_INTERPRETER) {
143+
return null;
144+
}
145+
146+
return (
147+
<div className="card bg-base-200 w-full h-full shadow-xl">
148+
<div className="card-body">
149+
<div className="flex justify-between items-center mb-4">
150+
<span className="text-lg font-bold">Python Interpreter</span>
151+
<XCloseButton
152+
className="bg-base-100"
153+
onClick={() => setCanvasData(null)}
154+
/>
155+
</div>
156+
<div className="grid grid-rows-3 gap-4 h-full">
157+
<textarea
158+
className="textarea textarea-bordered w-full h-full font-mono"
159+
value={code}
160+
onChange={(e) => setCode(e.target.value)}
161+
></textarea>
162+
<div className="font-mono flex flex-col row-span-2">
163+
<div className="flex items-center mb-2">
164+
<button
165+
className="btn btn-sm bg-base-100"
166+
onClick={() => runCode(code)}
167+
disabled={running}
168+
>
169+
<PlayIcon className="h-6 w-6" /> Run
170+
</button>
171+
{showStopBtn && (
172+
<button
173+
className="btn btn-sm bg-base-100 ml-2"
174+
onClick={() => interruptFn?.()}
175+
>
176+
<StopIcon className="h-6 w-6" /> Stop
177+
</button>
178+
)}
179+
<span className="grow text-right text-xs">
180+
<OpenInNewTab href="https://github.com/ggerganov/llama.cpp/issues/11762">
181+
Report a bug
182+
</OpenInNewTab>
183+
</span>
184+
</div>
185+
<textarea
186+
className="textarea textarea-bordered h-full dark-color"
187+
value={output}
188+
readOnly
189+
></textarea>
190+
</div>
191+
</div>
192+
</div>
193+
</div>
194+
);
195+
}

0 commit comments

Comments
 (0)