@@ -158,7 +158,7 @@ def create_app(
158
158
def prepare_request_resources (
159
159
body : CreateCompletionRequest | CreateChatCompletionRequest ,
160
160
llama_proxy : LlamaProxy ,
161
- body_model : str ,
161
+ body_model : str | None ,
162
162
kwargs ,
163
163
) -> llama_cpp .Llama :
164
164
if llama_proxy is None :
@@ -192,18 +192,15 @@ async def get_event_publisher(
192
192
request : Request ,
193
193
inner_send_chan : MemoryObjectSendStream [typing .Any ],
194
194
body : CreateCompletionRequest | CreateChatCompletionRequest ,
195
- body_model : str ,
195
+ body_model : str | None ,
196
196
llama_call ,
197
197
kwargs ,
198
198
):
199
199
server_settings = next (get_server_settings ())
200
200
interrupt_requests = (
201
201
server_settings .interrupt_requests if server_settings else False
202
202
)
203
- async with contextlib .AsyncExitStack () as exit_stack :
204
- llama_proxy : LlamaProxy = await exit_stack .enter_async_context (
205
- contextlib .asynccontextmanager (get_llama_proxy )()
206
- )
203
+ async with contextlib .asynccontextmanager (get_llama_proxy )() as llama_proxy :
207
204
llama = prepare_request_resources (body , llama_proxy , body_model , kwargs )
208
205
async with inner_send_chan :
209
206
try :
@@ -345,10 +342,7 @@ async def create_completion(
345
342
)
346
343
347
344
# handle regular request
348
- async with contextlib .AsyncExitStack () as exit_stack :
349
- llama_proxy : LlamaProxy = await exit_stack .enter_async_context (
350
- contextlib .asynccontextmanager (get_llama_proxy )()
351
- )
345
+ async with contextlib .asynccontextmanager (get_llama_proxy )() as llama_proxy :
352
346
llama = prepare_request_resources (body , llama_proxy , body_model , kwargs )
353
347
354
348
if await request .is_disconnected ():
@@ -517,10 +511,7 @@ async def create_chat_completion(
517
511
)
518
512
519
513
# handle regular request
520
- async with contextlib .AsyncExitStack () as exit_stack :
521
- llama_proxy : LlamaProxy = await exit_stack .enter_async_context (
522
- contextlib .asynccontextmanager (get_llama_proxy )()
523
- )
514
+ async with contextlib .asynccontextmanager (get_llama_proxy )() as llama_proxy :
524
515
llama = prepare_request_resources (body , llama_proxy , body_model , kwargs )
525
516
526
517
if await request .is_disconnected ():
0 commit comments