@@ -187,7 +187,7 @@ class HttpRequest {
187
187
}
188
188
189
189
void writeNotFound () {
190
- const char * data = " HTTP/1.1 404 Not Found\r\n " ;
190
+ const char * data = " HTTP/1.1 404 Not Found\r\n " ;
191
191
writeSocket (serverSocket, data, strlen (data));
192
192
}
193
193
@@ -218,7 +218,7 @@ class HttpRequest {
218
218
}
219
219
220
220
void writeStreamEndChunk () {
221
- const char * endChunk = " 0000\r\n\r\n " ;
221
+ const char * endChunk = " 0000\r\n\r\n " ;
222
222
writeSocket (serverSocket, endChunk, strlen (endChunk));
223
223
}
224
224
};
@@ -310,24 +310,24 @@ class NaiveCache {
310
310
311
311
class ApiServer {
312
312
private:
313
- RootLlmInference* inference;
314
- Tokenizer* tokenizer;
315
- Sampler* sampler;
316
- AppCliArgs* args;
317
- LlmHeader* header;
318
- EosDetector* eosDetector;
319
- ChatTemplate* chatTemplate ;
313
+ RootLlmInference * inference;
314
+ Tokenizer * tokenizer;
315
+ Sampler * sampler;
316
+ AppCliArgs * args;
317
+ LlmHeader * header;
318
+ EosDetector * eosDetector;
319
+ ChatTemplateGenerator *templateGenerator ;
320
320
NaiveCache naiveCache;
321
321
322
322
public:
323
- ApiServer ( RootLlmInference* inference, Tokenizer* tokenizer, Sampler* sampler, AppCliArgs* args, LlmHeader* header, EosDetector* eosDetector, ChatTemplate* chatTemplate ) {
323
+ ApiServer (RootLlmInference * inference, Tokenizer * tokenizer, Sampler * sampler, AppCliArgs * args, LlmHeader * header, EosDetector * eosDetector, ChatTemplateGenerator *templateGenerator ) {
324
324
this ->inference = inference;
325
325
this ->tokenizer = tokenizer;
326
326
this ->sampler = sampler;
327
327
this ->args = args;
328
328
this ->header = header;
329
329
this ->eosDetector = eosDetector;
330
- this ->chatTemplate = chatTemplate ;
330
+ this ->templateGenerator = templateGenerator ;
331
331
}
332
332
333
333
void complete (HttpRequest& request) {
@@ -345,7 +345,7 @@ class ApiServer {
345
345
inputItems[i].message = deltaPrompt[i].content ;
346
346
}
347
347
348
- GeneratedChat inputPrompt = chatTemplate ->generate (nInputItems, inputItems, true );
348
+ GeneratedChat inputPrompt = templateGenerator ->generate (nInputItems, inputItems, true );
349
349
printf (" 🔹%s🔸" , inputPrompt.content );
350
350
351
351
int nPromptTokens;
@@ -484,7 +484,7 @@ class ApiServer {
484
484
}
485
485
};
486
486
487
- void handleCompletionsRequest (HttpRequest& request, ApiServer* api) {
487
+ void handleCompletionsRequest (HttpRequest& request, ApiServer * api) {
488
488
api->complete (request);
489
489
}
490
490
@@ -500,9 +500,9 @@ static void server(AppInferenceContext *context) {
500
500
int serverSocket = createServerSocket (context->args ->port );
501
501
502
502
TokenizerChatStops stops (context->tokenizer );
503
- ChatTemplate chatTemplate (context->args ->chatTemplateType , context->tokenizer ->chatTemplate , stops.stops [0 ]);
503
+ ChatTemplateGenerator templateGenerator (context->args ->chatTemplateType , context->tokenizer ->chatTemplate , stops.stops [0 ]);
504
504
EosDetector eosDetector (stops.nStops , context->tokenizer ->eosTokenIds .data (), stops.stops , stops.maxStopLength , stops.maxStopLength );
505
- ApiServer api (context->inference , context->tokenizer , context->sampler , context->args , context->header , &eosDetector, &chatTemplate );
505
+ ApiServer api (context->inference , context->tokenizer , context->sampler , context->args , context->header , &eosDetector, &templateGenerator );
506
506
507
507
printf (" Server URL: http://127.0.0.1:%d/v1/\n " , context->args ->port );
508
508
0 commit comments