7
7
8
8
from anyio import Lock
9
9
from functools import partial
10
- from typing import Iterator , List , Optional , Union , Dict
10
+ from typing import List , Optional , Union , Dict
11
11
12
12
import llama_cpp
13
13
@@ -154,11 +154,13 @@ def create_app(
154
154
155
155
return app
156
156
157
+
157
158
def prepare_request_resources (
158
159
body : CreateCompletionRequest | CreateChatCompletionRequest ,
159
160
llama_proxy : LlamaProxy ,
160
161
body_model : str ,
161
- kwargs ) -> llama_cpp .Llama :
162
+ kwargs ,
163
+ ) -> llama_cpp .Llama :
162
164
if llama_proxy is None :
163
165
raise HTTPException (
164
166
status_code = status .HTTP_503_SERVICE_UNAVAILABLE ,
@@ -199,7 +201,9 @@ async def get_event_publisher(
199
201
server_settings .interrupt_requests if server_settings else False
200
202
)
201
203
async with contextlib .AsyncExitStack () as exit_stack :
202
- llama_proxy : LlamaProxy = await exit_stack .enter_async_context (contextlib .asynccontextmanager (get_llama_proxy )())
204
+ llama_proxy : LlamaProxy = await exit_stack .enter_async_context (
205
+ contextlib .asynccontextmanager (get_llama_proxy )()
206
+ )
203
207
llama = prepare_request_resources (body , llama_proxy , body_model , kwargs )
204
208
async with inner_send_chan :
205
209
try :
@@ -215,7 +219,9 @@ async def get_event_publisher(
215
219
except anyio .get_cancelled_exc_class () as e :
216
220
print ("disconnected" )
217
221
with anyio .move_on_after (1 , shield = True ):
218
- print (f"Disconnected from client (via refresh/close) { request .client } " )
222
+ print (
223
+ f"Disconnected from client (via refresh/close) { request .client } "
224
+ )
219
225
raise e
220
226
221
227
@@ -340,11 +346,15 @@ async def create_completion(
340
346
341
347
# handle regular request
342
348
async with contextlib .AsyncExitStack () as exit_stack :
343
- llama_proxy : LlamaProxy = await exit_stack .enter_async_context (contextlib .asynccontextmanager (get_llama_proxy )())
349
+ llama_proxy : LlamaProxy = await exit_stack .enter_async_context (
350
+ contextlib .asynccontextmanager (get_llama_proxy )()
351
+ )
344
352
llama = prepare_request_resources (body , llama_proxy , body_model , kwargs )
345
353
346
354
if await request .is_disconnected ():
347
- print (f"Disconnected from client (via refresh/close) before llm invoked { request .client } " )
355
+ print (
356
+ f"Disconnected from client (via refresh/close) before llm invoked { request .client } "
357
+ )
348
358
raise HTTPException (
349
359
status_code = status .HTTP_400_BAD_REQUEST ,
350
360
detail = "Client closed request" ,
@@ -507,12 +517,16 @@ async def create_chat_completion(
507
517
)
508
518
509
519
# handle regular request
510
- with contextlib .AsyncExitStack () as exit_stack :
511
- llama_proxy : LlamaProxy = await exit_stack .enter_async_context (contextlib .asynccontextmanager (get_llama_proxy )())
520
+ async with contextlib .AsyncExitStack () as exit_stack :
521
+ llama_proxy : LlamaProxy = await exit_stack .enter_async_context (
522
+ contextlib .asynccontextmanager (get_llama_proxy )()
523
+ )
512
524
llama = prepare_request_resources (body , llama_proxy , body_model , kwargs )
513
525
514
526
if await request .is_disconnected ():
515
- print (f"Disconnected from client (via refresh/close) before llm invoked { request .client } " )
527
+ print (
528
+ f"Disconnected from client (via refresh/close) before llm invoked { request .client } "
529
+ )
516
530
raise HTTPException (
517
531
status_code = status .HTTP_400_BAD_REQUEST ,
518
532
detail = "Client closed request" ,
0 commit comments