Skip to content

Commit 21dbdf5

Browse files
hongxiayangAlvant
authored andcommitted
[ROCm][AMD] unify CUDA_VISIBLE_DEVICES usage in cuda/rocm (vllm-project#6352)
Signed-off-by: Alvant <alvasian@yandex.ru>
1 parent 52adf89 commit 21dbdf5

File tree

5 files changed

+10
-34
lines changed

5 files changed

+10
-34
lines changed

Dockerfile.rocm

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -52,25 +52,25 @@ RUN pip install --upgrade pip
5252
# Remove sccache so it doesn't interfere with ccache
5353
# TODO: implement sccache support across components
5454
RUN apt-get purge -y sccache; pip uninstall -y sccache; rm -f "$(which sccache)"
55-
# Install torch == 2.4.0 on ROCm
55+
# Install torch == 2.5.0 on ROCm
5656
RUN case "$(ls /opt | grep -Po 'rocm-[0-9]\.[0-9]')" in \
5757
*"rocm-5.7"*) \
5858
pip uninstall -y torch torchaudio torchvision \
5959
&& pip install --no-cache-dir --pre \
60-
torch==2.4.0.dev20240612 torchaudio==2.4.0.dev20240612 \
61-
torchvision==0.19.0.dev20240612 \
60+
torch==2.5.0.dev20240710 torchaudio==2.4.0.dev20240710 \
61+
torchvision==0.20.0.dev20240710 \
6262
--index-url https://download.pytorch.org/whl/nightly/rocm5.7;; \
6363
*"rocm-6.0"*) \
6464
pip uninstall -y torch torchaudio torchvision \
6565
&& pip install --no-cache-dir --pre \
66-
torch==2.4.0.dev20240612 torchaudio==2.4.0.dev20240612 \
67-
torchvision==0.19.0.dev20240612 \
66+
torch==2.5.0.dev20240710 torchaudio==2.4.0.dev20240710 \
67+
torchvision==0.20.0.dev20240710 \
6868
--index-url https://download.pytorch.org/whl/nightly/rocm6.0;; \
6969
*"rocm-6.1"*) \
7070
pip uninstall -y torch torchaudio torchvision \
7171
&& pip install --no-cache-dir --pre \
72-
torch==2.4.0.dev20240612 torchaudio==2.4.0.dev20240612 \
73-
torchvision==0.19.0.dev20240612 \
72+
torch==2.5.0.dev20240710 torchaudio==2.4.0.dev20240710 \
73+
torchvision==0.20.0.dev20240710 \
7474
--index-url https://download.pytorch.org/whl/nightly/rocm6.1;; \
7575
*) ;; esac
7676

tests/distributed/test_utils.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import ray
22

33
import vllm.envs as envs
4-
from vllm.utils import (cuda_device_count_stateless, is_hip,
4+
from vllm.utils import (cuda_device_count_stateless,
55
update_environment_variables)
66

77

@@ -22,11 +22,6 @@ def get_cuda_visible_devices(self):
2222
def test_cuda_device_count_stateless():
2323
"""Test that cuda_device_count_stateless changes return value if
2424
CUDA_VISIBLE_DEVICES is changed."""
25-
if is_hip():
26-
# Set HIP_VISIBLE_DEVICES == CUDA_VISIBLE_DEVICES. Conversion
27-
# is handled by `update_environment_variables`
28-
update_environment_variables(
29-
{"CUDA_VISIBLE_DEVICES": envs.CUDA_VISIBLE_DEVICES})
3025
actor = _CUDADeviceCountStatelessTestActor.options( # type: ignore
3126
num_gpus=2).remote()
3227
assert sorted(ray.get(

vllm/config.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,14 @@
66
import torch
77
from transformers import PretrainedConfig
88

9-
import vllm.envs as envs
109
from vllm.logger import init_logger
1110
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
1211
from vllm.model_executor.models import ModelRegistry
1312
from vllm.tracing import is_otel_installed
1413
from vllm.transformers_utils.config import get_config, get_hf_text_config
1514
from vllm.utils import (cuda_device_count_stateless, get_cpu_memory, is_cpu,
1615
is_hip, is_neuron, is_openvino, is_tpu, is_xpu,
17-
print_warning_once, update_environment_variables)
16+
print_warning_once)
1817

1918
if TYPE_CHECKING:
2019
from ray.util.placement_group import PlacementGroup
@@ -695,12 +694,6 @@ def __init__(
695694
self.distributed_executor_backend = backend
696695
logger.info("Defaulting to use %s for distributed inference",
697696
backend)
698-
# If CUDA_VISIBLE_DEVICES is set on ROCm prior to vLLM init,
699-
# propagate changes to HIP_VISIBLE_DEVICES (conversion handled by
700-
# the update_environment_variables function)
701-
if is_hip() and envs.CUDA_VISIBLE_DEVICES:
702-
update_environment_variables(
703-
{"CUDA_VISIBLE_DEVICES": envs.CUDA_VISIBLE_DEVICES})
704697

705698
self._verify_args()
706699
self.rank = 0

vllm/utils.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -386,10 +386,6 @@ def get_open_port() -> int:
386386

387387

388388
def update_environment_variables(envs: Dict[str, str]):
389-
if is_hip() and "CUDA_VISIBLE_DEVICES" in envs:
390-
# Propagate changes to CUDA_VISIBLE_DEVICES to
391-
# ROCm's HIP_VISIBLE_DEVICES as well
392-
envs["HIP_VISIBLE_DEVICES"] = envs["CUDA_VISIBLE_DEVICES"]
393389
for k, v in envs.items():
394390
if k in os.environ and os.environ[k] != v:
395391
logger.warning(

vllm/worker/worker_base.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from vllm.lora.request import LoRARequest
1212
from vllm.sequence import (ExecuteModelRequest, IntermediateTensors,
1313
SamplerOutput)
14-
from vllm.utils import (enable_trace_function_call_for_thread, is_hip,
14+
from vllm.utils import (enable_trace_function_call_for_thread,
1515
update_environment_variables)
1616
from vllm.worker.model_runner_base import ModelRunnerBase, ModelRunnerInputBase
1717

@@ -309,14 +309,6 @@ def update_environment_variables(envs: Dict[str, str]) -> None:
309309
# overwriting CUDA_VISIBLE_DEVICES is desired behavior
310310
# suppress the warning in `update_environment_variables`
311311
del os.environ[key]
312-
if is_hip():
313-
hip_env_var = "HIP_VISIBLE_DEVICES"
314-
if hip_env_var in os.environ:
315-
logger.warning(
316-
"Ignoring pre-set environment variable `%s=%s` as "
317-
"%s has also been set, which takes precedence.",
318-
hip_env_var, os.environ[hip_env_var], key)
319-
os.environ.pop(hip_env_var, None)
320312
update_environment_variables(envs)
321313

322314
def init_worker(self, *args, **kwargs):

0 commit comments

Comments
 (0)