Skip to content

Commit 1ee719a

Browse files
authored
fix: remove unnecessary explicit use of AsyncExitStack
fix: correct type hints for body_model
1 parent f4fb0ce commit 1ee719a

File tree

1 file changed

+5
-14
lines changed

1 file changed

+5
-14
lines changed

llama_cpp/server/app.py

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ def create_app(
158158
def prepare_request_resources(
159159
body: CreateCompletionRequest | CreateChatCompletionRequest,
160160
llama_proxy: LlamaProxy,
161-
body_model: str,
161+
body_model: str | None,
162162
kwargs,
163163
) -> llama_cpp.Llama:
164164
if llama_proxy is None:
@@ -192,18 +192,15 @@ async def get_event_publisher(
192192
request: Request,
193193
inner_send_chan: MemoryObjectSendStream[typing.Any],
194194
body: CreateCompletionRequest | CreateChatCompletionRequest,
195-
body_model: str,
195+
body_model: str | None,
196196
llama_call,
197197
kwargs,
198198
):
199199
server_settings = next(get_server_settings())
200200
interrupt_requests = (
201201
server_settings.interrupt_requests if server_settings else False
202202
)
203-
async with contextlib.AsyncExitStack() as exit_stack:
204-
llama_proxy: LlamaProxy = await exit_stack.enter_async_context(
205-
contextlib.asynccontextmanager(get_llama_proxy)()
206-
)
203+
async with contextlib.asynccontextmanager(get_llama_proxy)() as llama_proxy:
207204
llama = prepare_request_resources(body, llama_proxy, body_model, kwargs)
208205
async with inner_send_chan:
209206
try:
@@ -345,10 +342,7 @@ async def create_completion(
345342
)
346343

347344
# handle regular request
348-
async with contextlib.AsyncExitStack() as exit_stack:
349-
llama_proxy: LlamaProxy = await exit_stack.enter_async_context(
350-
contextlib.asynccontextmanager(get_llama_proxy)()
351-
)
345+
async with contextlib.asynccontextmanager(get_llama_proxy)() as llama_proxy:
352346
llama = prepare_request_resources(body, llama_proxy, body_model, kwargs)
353347

354348
if await request.is_disconnected():
@@ -517,10 +511,7 @@ async def create_chat_completion(
517511
)
518512

519513
# handle regular request
520-
async with contextlib.AsyncExitStack() as exit_stack:
521-
llama_proxy: LlamaProxy = await exit_stack.enter_async_context(
522-
contextlib.asynccontextmanager(get_llama_proxy)()
523-
)
514+
async with contextlib.asynccontextmanager(get_llama_proxy)() as llama_proxy:
524515
llama = prepare_request_resources(body, llama_proxy, body_model, kwargs)
525516

526517
if await request.is_disconnected():

0 commit comments

Comments
 (0)