@@ -198,27 +198,25 @@ async def get_event_publisher(
198
198
interrupt_requests = (
199
199
server_settings .interrupt_requests if server_settings else False
200
200
)
201
- exit_stack = contextlib .AsyncExitStack ()
202
- llama_proxy : LlamaProxy = await exit_stack .enter_async_context (contextlib .asynccontextmanager (get_llama_proxy )())
203
- llama = prepare_request_resources (body , llama_proxy , body_model , kwargs )
204
- async with inner_send_chan :
205
- try :
206
- iterator = await run_in_threadpool (llama_call , llama , ** kwargs )
207
- async for chunk in iterate_in_threadpool (iterator ):
208
- await inner_send_chan .send (dict (data = json .dumps (chunk )))
209
- if await request .is_disconnected ():
210
- raise anyio .get_cancelled_exc_class ()()
211
- if interrupt_requests and llama_outer_lock .locked ():
212
- await inner_send_chan .send (dict (data = "[DONE]" ))
213
- raise anyio .get_cancelled_exc_class ()()
214
- await inner_send_chan .send (dict (data = "[DONE]" ))
215
- except anyio .get_cancelled_exc_class () as e :
216
- print ("disconnected" )
217
- with anyio .move_on_after (1 , shield = True ):
218
- print (f"Disconnected from client (via refresh/close) { request .client } " )
219
- raise e
220
- finally :
221
- await exit_stack .aclose ()
201
+ async with contextlib .AsyncExitStack () as exit_stack :
202
+ llama_proxy : LlamaProxy = await exit_stack .enter_async_context (contextlib .asynccontextmanager (get_llama_proxy )())
203
+ llama = prepare_request_resources (body , llama_proxy , body_model , kwargs )
204
+ async with inner_send_chan :
205
+ try :
206
+ iterator = await run_in_threadpool (llama_call , llama , ** kwargs )
207
+ async for chunk in iterate_in_threadpool (iterator ):
208
+ await inner_send_chan .send (dict (data = json .dumps (chunk )))
209
+ if await request .is_disconnected ():
210
+ raise anyio .get_cancelled_exc_class ()()
211
+ if interrupt_requests and llama_outer_lock .locked ():
212
+ await inner_send_chan .send (dict (data = "[DONE]" ))
213
+ raise anyio .get_cancelled_exc_class ()()
214
+ await inner_send_chan .send (dict (data = "[DONE]" ))
215
+ except anyio .get_cancelled_exc_class () as e :
216
+ print ("disconnected" )
217
+ with anyio .move_on_after (1 , shield = True ):
218
+ print (f"Disconnected from client (via refresh/close) { request .client } " )
219
+ raise e
222
220
223
221
224
222
def _logit_bias_tokens_to_input_ids (
@@ -341,23 +339,18 @@ async def create_completion(
341
339
)
342
340
343
341
# handle regular request
344
- exit_stack = contextlib .AsyncExitStack ()
345
- llama_proxy : LlamaProxy = await exit_stack .enter_async_context (contextlib .asynccontextmanager (get_llama_proxy )())
346
- llama = prepare_request_resources (body , llama_proxy , body_model , kwargs )
342
+ async with contextlib .AsyncExitStack () as exit_stack :
343
+ llama_proxy : LlamaProxy = await exit_stack .enter_async_context (contextlib .asynccontextmanager (get_llama_proxy )())
344
+ llama = prepare_request_resources (body , llama_proxy , body_model , kwargs )
347
345
348
- if await request .is_disconnected ():
349
- print (f"Disconnected from client (via refresh/close) before llm invoked { request .client } " )
350
- await exit_stack .aclose ()
351
- raise HTTPException (
352
- status_code = status .HTTP_400_BAD_REQUEST ,
353
- detail = "Client closed request" ,
354
- )
346
+ if await request .is_disconnected ():
347
+ print (f"Disconnected from client (via refresh/close) before llm invoked { request .client } " )
348
+ raise HTTPException (
349
+ status_code = status .HTTP_400_BAD_REQUEST ,
350
+ detail = "Client closed request" ,
351
+ )
355
352
356
- try :
357
- completion : llama_cpp .CreateCompletionResponse = await run_in_threadpool (llama , ** kwargs )
358
- finally :
359
- await exit_stack .aclose ()
360
- return completion
353
+ return await run_in_threadpool (llama , ** kwargs )
361
354
362
355
363
356
@router .post (
@@ -514,23 +507,18 @@ async def create_chat_completion(
514
507
)
515
508
516
509
# handle regular request
517
- exit_stack = contextlib .AsyncExitStack ()
518
- llama_proxy : LlamaProxy = await exit_stack .enter_async_context (contextlib .asynccontextmanager (get_llama_proxy )())
519
- llama = prepare_request_resources (body , llama_proxy , body_model , kwargs )
520
-
521
- if await request .is_disconnected ():
522
- print (f"Disconnected from client (via refresh/close) before llm invoked { request .client } " )
523
- await exit_stack .aclose ()
524
- raise HTTPException (
525
- status_code = status .HTTP_400_BAD_REQUEST ,
526
- detail = "Client closed request" ,
527
- )
528
-
529
- try :
530
- completion : llama_cpp .ChatCompletion = await run_in_threadpool (llama .create_chat_completion , ** kwargs )
531
- finally :
532
- await exit_stack .aclose ()
533
- return completion
510
+ with contextlib .AsyncExitStack () as exit_stack :
511
+ llama_proxy : LlamaProxy = await exit_stack .enter_async_context (contextlib .asynccontextmanager (get_llama_proxy )())
512
+ llama = prepare_request_resources (body , llama_proxy , body_model , kwargs )
513
+
514
+ if await request .is_disconnected ():
515
+ print (f"Disconnected from client (via refresh/close) before llm invoked { request .client } " )
516
+ raise HTTPException (
517
+ status_code = status .HTTP_400_BAD_REQUEST ,
518
+ detail = "Client closed request" ,
519
+ )
520
+
521
+ return await run_in_threadpool (llama .create_chat_completion , ** kwargs )
534
522
535
523
536
524
@router .get (
0 commit comments