Skip to content

Commit afaa863

Browse files
SolitaryThinkerLeiWang1999
authored andcommitted
[bugfix] torch profiler bug for single gpu with GPUExecutor (vllm-project#8354)
Signed-off-by: LeiWang1999 <leiwang1999@outlook.com>
1 parent 2df5e25 commit afaa863

File tree

3 files changed

+27
-5
lines changed

3 files changed

+27
-5
lines changed

examples/offline_inference_with_profiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
1717

1818
# Create an LLM.
19-
llm = LLM(model="facebook/opt-125m")
19+
llm = LLM(model="facebook/opt-125m", tensor_parallel_size=1)
2020

2121
llm.start_profile()
2222

vllm/engine/async_llm_engine.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from vllm.engine.llm_engine import LLMEngine, SchedulerOutputState
1414
from vllm.engine.metrics_types import StatLoggerBase
1515
from vllm.executor.executor_base import ExecutorAsyncBase
16+
from vllm.executor.gpu_executor import GPUExecutorAsync
1617
from vllm.executor.ray_utils import initialize_ray_cluster
1718
from vllm.inputs import PromptInputs
1819
from vllm.logger import init_logger
@@ -1019,7 +1020,17 @@ def remove_logger(self, logger_name: str) -> None:
10191020
self.engine.remove_logger(logger_name=logger_name)
10201021

10211022
async def start_profile(self) -> None:
1022-
self.engine.model_executor._run_workers("start_profile")
1023+
# using type instead of isinstance to check to avoid capturing
1024+
# inherited classes
1025+
if type(self.engine.model_executor) == GPUExecutorAsync:
1026+
self.engine.model_executor.start_profile()
1027+
else:
1028+
self.engine.model_executor._run_workers("start_profile")
10231029

10241030
async def stop_profile(self) -> None:
1025-
self.engine.model_executor._run_workers("stop_profile")
1031+
# using type instead of isinstance to check to avoid capturing
1032+
# inherited classes
1033+
if type(self.engine.model_executor) == GPUExecutorAsync:
1034+
self.engine.model_executor.stop_profile()
1035+
else:
1036+
self.engine.model_executor._run_workers("stop_profile")

vllm/engine/llm_engine.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from vllm.engine.output_processor.stop_checker import StopChecker
2727
from vllm.engine.output_processor.util import create_output_by_sequence_group
2828
from vllm.executor.executor_base import ExecutorBase
29+
from vllm.executor.gpu_executor import GPUExecutor
2930
from vllm.executor.ray_utils import initialize_ray_cluster
3031
from vllm.inputs import (INPUT_REGISTRY, EncoderDecoderLLMInputs,
3132
InputRegistry, LLMInputs, PromptInputs)
@@ -1597,10 +1598,20 @@ def check_health(self) -> None:
15971598
self.model_executor.check_health()
15981599

15991600
def start_profile(self) -> None:
1600-
self.model_executor.start_profile()
1601+
# using type instead of isinstance to check to avoid capturing
1602+
# inherited classes (MultiprocessingGPUExecutor)
1603+
if type(self.model_executor) == GPUExecutor:
1604+
self.model_executor.start_profile()
1605+
else:
1606+
self.model_executor._run_workers("start_profile")
16011607

16021608
def stop_profile(self) -> None:
1603-
self.model_executor.stop_profile()
1609+
# using type instead of isinstance to check to avoid capturing
1610+
# inherited classes (MultiprocessingGPUExecutor)
1611+
if type(self.model_executor) == GPUExecutor:
1612+
self.model_executor.stop_profile()
1613+
else:
1614+
self.model_executor._run_workers("stop_profile")
16041615

16051616
def is_tracing_enabled(self) -> bool:
16061617
return self.tracer is not None

0 commit comments

Comments
 (0)