Skip to content

Commit f61c984

Browse files
AbatomLeiWang1999
authored andcommitted
[Frontend] MQLLMEngine supports profiling. (vllm-project#8761)
Signed-off-by: LeiWang1999 <leiwang1999@outlook.com>
1 parent 2161ca3 commit f61c984

File tree

3 files changed

+45
-7
lines changed

3 files changed

+45
-7
lines changed

vllm/engine/multiprocessing/__init__.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,13 @@ class RPCStartupResponse:
107107
tracing_enabled: bool
108108

109109

110-
RPC_REQUEST_T = Union[RPCProcessRequest, RPCAbortRequest, RPCStartupRequest]
110+
class RPCUProfileRequest(Enum):
111+
START_PROFILE = 1
112+
STOP_PROFILE = 2
113+
114+
115+
RPC_REQUEST_T = Union[RPCProcessRequest, RPCAbortRequest, RPCStartupRequest,
116+
RPCUProfileRequest]
111117

112118
REQUEST_OUTPUTS_T = Union[List[RequestOutput], RPCError]
113119

vllm/engine/multiprocessing/client.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@
2121
IPC_OUTPUT_EXT, RPC_REQUEST_T,
2222
VLLM_RPC_SUCCESS_STR, RPCAbortRequest,
2323
RPCError, RPCProcessRequest,
24-
RPCStartupRequest, RPCStartupResponse)
24+
RPCStartupRequest, RPCStartupResponse,
25+
RPCUProfileRequest)
2526
# yapf: enable
2627
from vllm.envs import VLLM_RPC_TIMEOUT
2728
from vllm.inputs import PromptType
@@ -38,10 +39,10 @@
3839

3940
class MQClientClosedError(Exception):
4041
"""Exception class raised when the client is used post-close.
41-
42+
4243
The client can be closed, which closes the ZMQ context. This normally
43-
happens on server shutdown. In some cases, methods like abort and
44-
do_log_stats will still be called and then try to open a socket, which
44+
happens on server shutdown. In some cases, methods like abort and
45+
do_log_stats will still be called and then try to open a socket, which
4546
causes a ZMQError and creates a huge stack trace.
4647
So, we throw this error such that we can suppress it.
4748
"""
@@ -345,7 +346,7 @@ async def do_log_stats(self):
345346
async def check_health(self):
346347
"""
347348
The check health loop probes the health status of the
348-
Engine's health every N seconds and sets _errored_with
349+
Engine's health every N seconds and sets _errored_with
349350
if the engine is unhealthy.
350351
"""
351352
if self._errored_with is not None:
@@ -561,3 +562,15 @@ async def _process_request(
561562
await self.abort(request_id)
562563
finally:
563564
self.output_queues.pop(request_id)
565+
566+
async def start_profile(self) -> None:
567+
"""Start profiling the engine"""
568+
569+
await self._send_one_way_rpc_request(
570+
request=RPCUProfileRequest.START_PROFILE, socket=self.input_socket)
571+
572+
async def stop_profile(self) -> None:
573+
"""Stop profiling the engine"""
574+
575+
await self._send_one_way_rpc_request(
576+
request=RPCUProfileRequest.STOP_PROFILE, socket=self.input_socket)

vllm/engine/multiprocessing/engine.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@
1818
IPC_OUTPUT_EXT, REQUEST_OUTPUTS_T,
1919
VLLM_RPC_SUCCESS_STR, RPCAbortRequest,
2020
RPCError, RPCProcessRequest,
21-
RPCStartupRequest, RPCStartupResponse)
21+
RPCStartupRequest, RPCStartupResponse,
22+
RPCUProfileRequest)
2223
# yapf: enable
2324
from vllm.envs import VLLM_RPC_TIMEOUT
25+
from vllm.executor.gpu_executor import GPUExecutor
2426
from vllm.logger import init_logger
2527
from vllm.outputs import RequestOutput
2628
from vllm.usage.usage_lib import UsageContext
@@ -249,6 +251,11 @@ def handle_new_input(self):
249251
self._handle_process_request(request)
250252
elif isinstance(request, RPCAbortRequest):
251253
self._handle_abort_request(request)
254+
elif isinstance(request, RPCUProfileRequest):
255+
if request == RPCUProfileRequest.START_PROFILE:
256+
self.start_profile()
257+
else:
258+
self.stop_profile()
252259
else:
253260
raise ValueError("Unknown RPCRequest Type: "
254261
f"{type(request)}")
@@ -356,6 +363,18 @@ def _set_errored(self, e: BaseException):
356363
def _alive(self):
357364
self._last_alive_time = time.time()
358365

366+
def start_profile(self) -> None:
367+
if type(self.engine.model_executor) is GPUExecutor:
368+
self.engine.model_executor.start_profile()
369+
else:
370+
self.engine.model_executor._run_workers("start_profile")
371+
372+
def stop_profile(self) -> None:
373+
if type(self.engine.model_executor) is GPUExecutor:
374+
self.engine.model_executor.stop_profile()
375+
else:
376+
self.engine.model_executor._run_workers("stop_profile")
377+
359378

360379
def run_mp_engine(engine_args: AsyncEngineArgs, usage_context: UsageContext,
361380
ipc_path: str):

0 commit comments

Comments
 (0)