Skip to content

[BugFix] Nonzero exit code if MQLLMEngine startup fails #8572

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 18, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 5 additions & 11 deletions vllm/entrypoints/openai/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from contextlib import asynccontextmanager
from functools import partial
from http import HTTPStatus
from typing import AsyncIterator, Optional, Set
from typing import AsyncIterator, Set

import uvloop
from fastapi import APIRouter, FastAPI, Request
Expand Down Expand Up @@ -95,7 +95,7 @@ async def _force_log():

@asynccontextmanager
async def build_async_engine_client(
args: Namespace) -> AsyncIterator[Optional[EngineClient]]:
args: Namespace) -> AsyncIterator[EngineClient]:

# Context manager to handle engine_client lifecycle
# Ensures everything is shutdown and cleaned up on error/exit
Expand All @@ -110,7 +110,7 @@ async def build_async_engine_client(
async def build_async_engine_client_from_engine_args(
engine_args: AsyncEngineArgs,
disable_frontend_multiprocessing: bool = False,
) -> AsyncIterator[Optional[EngineClient]]:
) -> AsyncIterator[EngineClient]:
"""
Create EngineClient, either:
- in-process using the AsyncLLMEngine Directly
Expand Down Expand Up @@ -188,10 +188,8 @@ async def build_async_engine_client_from_engine_args(
break
except TimeoutError:
if not engine_process.is_alive():
logger.error("Engine process died before responding "
"to readiness probe")
yield None
return
raise RuntimeError(
"Engine process failed to start") from None

yield mp_engine_client # type: ignore[misc]
finally:
Expand Down Expand Up @@ -532,10 +530,6 @@ def signal_handler(*_) -> None:
signal.signal(signal.SIGTERM, signal_handler)

async with build_async_engine_client(args) as engine_client:
# If None, creation of the client failed and we exit.
if engine_client is None:
return

app = build_app(args)

model_config = await engine_client.get_model_config()
Expand Down
Loading