@@ -14,17 +14,22 @@ ARG PYTHON_VERSION=3.12
14
14
ARG TARGETPLATFORM
15
15
ENV DEBIAN_FRONTEND=noninteractive
16
16
17
- # Install minimal dependencies and uv
18
- RUN apt-get update -y \
19
- && apt-get install -y ccache git curl wget sudo \
20
- && curl -LsSf https://astral.sh/uv/install.sh | sh
21
-
22
- # Add uv to PATH
23
- ENV PATH="/root/.local/bin:$PATH"
24
- # Create venv with specified Python and activate by placing at the front of path
25
- ENV VIRTUAL_ENV="/opt/venv"
26
- RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV}
27
- ENV PATH="$VIRTUAL_ENV/bin:$PATH"
17
+ # Install Python and other dependencies
18
+ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
19
+ && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
20
+ && apt-get update -y \
21
+ && apt-get install -y ccache software-properties-common git curl sudo \
22
+ && add-apt-repository ppa:deadsnakes/ppa \
23
+ && apt-get update -y \
24
+ && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \
25
+ && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
26
+ && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \
27
+ && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \
28
+ && curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION} \
29
+ && python3 --version && python3 -m pip --version
30
+ # Install uv for faster pip installs
31
+ RUN --mount=type=cache,target=/root/.cache/uv \
32
+ python3 -m pip install uv
28
33
29
34
# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
30
35
# Reference: https://github.com/astral-sh/uv/pull/1694
@@ -46,20 +51,22 @@ RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
46
51
47
52
WORKDIR /workspace
48
53
54
+ # install build and runtime dependencies
55
+
49
56
# arm64 (GH200) build follows the practice of "use existing pytorch" build,
50
57
# we need to install torch and torchvision from the nightly builds first,
51
58
# pytorch will not appear as a vLLM dependency in all of the following steps
52
59
# after this step
53
60
RUN --mount=type=cache,target=/root/.cache/uv \
54
61
if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
55
- uv pip install --index-url https://download.pytorch.org/whl/nightly/cu128 "torch==2.8.0.dev20250318+cu128" "torchvision==0.22.0.dev20250319" ; \
56
- uv pip install --index-url https://download.pytorch.org/whl/nightly/cu128 --pre pytorch_triton==3.3.0+gitab727c40; \
62
+ uv pip install --system -- index-url https://download.pytorch.org/whl/nightly/cu128 "torch==2.8.0.dev20250318+cu128" "torchvision==0.22.0.dev20250319" ; \
63
+ uv pip install --system -- index-url https://download.pytorch.org/whl/nightly/cu128 --pre pytorch_triton==3.3.0+gitab727c40; \
57
64
fi
58
65
59
66
COPY requirements/common.txt requirements/common.txt
60
67
COPY requirements/cuda.txt requirements/cuda.txt
61
68
RUN --mount=type=cache,target=/root/.cache/uv \
62
- uv pip install -r requirements/cuda.txt
69
+ uv pip install --system - r requirements/cuda.txt
63
70
64
71
# cuda arch list used by torch
65
72
# can be useful for both `dev` and `test`
@@ -84,7 +91,7 @@ COPY requirements/build.txt requirements/build.txt
84
91
ENV UV_HTTP_TIMEOUT=500
85
92
86
93
RUN --mount=type=cache,target=/root/.cache/uv \
87
- uv pip install -r requirements/build.txt
94
+ uv pip install --system - r requirements/build.txt
88
95
89
96
COPY . .
90
97
ARG GIT_REPO_CHECK=0
@@ -156,7 +163,7 @@ COPY requirements/lint.txt requirements/lint.txt
156
163
COPY requirements/test.txt requirements/test.txt
157
164
COPY requirements/dev.txt requirements/dev.txt
158
165
RUN --mount=type=cache,target=/root/.cache/uv \
159
- uv pip install -r requirements/dev.txt
166
+ uv pip install --system - r requirements/dev.txt
160
167
# ################### DEV IMAGE ####################
161
168
162
169
# ################### vLLM installation IMAGE ####################
@@ -172,18 +179,23 @@ ARG TARGETPLATFORM
172
179
RUN PYTHON_VERSION_STR=$(echo ${PYTHON_VERSION} | sed 's/\. //g' ) && \
173
180
echo "export PYTHON_VERSION_STR=${PYTHON_VERSION_STR}" >> /etc/environment
174
181
175
- # Install minimal dependencies and uv
176
- RUN apt-get update -y \
177
- && apt-get install -y ccache git curl wget sudo vim \
178
- && apt-get install -y ffmpeg libsm6 libxext6 libgl1 libibverbs-dev \
179
- && curl -LsSf https://astral.sh/uv/install.sh | sh
180
-
181
- # Add uv to PATH
182
- ENV PATH="/root/.local/bin:$PATH"
183
- # Create venv with specified Python and activate by placing at the front of path
184
- ENV VIRTUAL_ENV="/opt/venv"
185
- RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV}
186
- ENV PATH="$VIRTUAL_ENV/bin:$PATH"
182
+ # Install Python and other dependencies
183
+ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
184
+ && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
185
+ && apt-get update -y \
186
+ && apt-get install -y ccache software-properties-common git curl wget sudo vim python3-pip \
187
+ && apt-get install -y ffmpeg libsm6 libxext6 libgl1 \
188
+ && add-apt-repository ppa:deadsnakes/ppa \
189
+ && apt-get update -y \
190
+ && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv libibverbs-dev \
191
+ && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
192
+ && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \
193
+ && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \
194
+ && curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION} \
195
+ && python3 --version && python3 -m pip --version
196
+ # Install uv for faster pip installs
197
+ RUN --mount=type=cache,target=/root/.cache/uv \
198
+ python3 -m pip install uv
187
199
188
200
# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out
189
201
# Reference: https://github.com/astral-sh/uv/pull/1694
@@ -201,14 +213,14 @@ RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
201
213
# after this step
202
214
RUN --mount=type=cache,target=/root/.cache/uv \
203
215
if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
204
- uv pip install --index-url https://download.pytorch.org/whl/nightly/cu128 "torch==2.8.0.dev20250318+cu128" "torchvision==0.22.0.dev20250319" ; \
205
- uv pip install --index-url https://download.pytorch.org/whl/nightly/cu128 --pre pytorch_triton==3.3.0+gitab727c40; \
216
+ uv pip install --system -- index-url https://download.pytorch.org/whl/nightly/cu128 "torch==2.8.0.dev20250318+cu128" "torchvision==0.22.0.dev20250319" ; \
217
+ uv pip install --system -- index-url https://download.pytorch.org/whl/nightly/cu128 --pre pytorch_triton==3.3.0+gitab727c40; \
206
218
fi
207
219
208
220
# Install vllm wheel first, so that torch etc will be installed.
209
221
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
210
222
--mount=type=cache,target=/root/.cache/uv \
211
- uv pip install dist/*.whl --verbose
223
+ uv pip install --system dist/*.whl --verbose
212
224
213
225
# If we need to build FlashInfer wheel before its release:
214
226
# $ export FLASHINFER_ENABLE_AOT=1
@@ -223,8 +235,9 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
223
235
# $ # upload the wheel to a public location, e.g. https://wheels.vllm.ai/flashinfer/524304395bd1d8cd7d07db083859523fcaa246a4/flashinfer_python-0.2.1.post1+cu124torch2.5-cp38-abi3-linux_x86_64.whl
224
236
225
237
RUN --mount=type=cache,target=/root/.cache/uv \
238
+ . /etc/environment && \
226
239
if [ "$TARGETPLATFORM" != "linux/arm64" ]; then \
227
- uv pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.1.post2/flashinfer_python-0.2.1.post2+cu124torch2.6-cp38-abi3-linux_x86_64.whl ; \
240
+ uv pip install --system https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.1.post2/flashinfer_python-0.2.1.post2+cu124torch2.6-cp38-abi3-linux_x86_64.whl ; \
228
241
fi
229
242
COPY examples examples
230
243
@@ -234,7 +247,7 @@ COPY examples examples
234
247
# TODO: Remove this once FlashInfer AOT wheel is fixed
235
248
COPY requirements/build.txt requirements/build.txt
236
249
RUN --mount=type=cache,target=/root/.cache/uv \
237
- uv pip install -r requirements/build.txt
250
+ uv pip install --system - r requirements/build.txt
238
251
239
252
# ################### vLLM installation IMAGE ####################
240
253
@@ -251,15 +264,15 @@ ENV UV_HTTP_TIMEOUT=500
251
264
252
265
# install development dependencies (for testing)
253
266
RUN --mount=type=cache,target=/root/.cache/uv \
254
- uv pip install -r requirements/dev.txt
267
+ uv pip install --system - r requirements/dev.txt
255
268
256
269
# install development dependencies (for testing)
257
270
RUN --mount=type=cache,target=/root/.cache/uv \
258
- uv pip install -e tests/vllm_test_utils
271
+ uv pip install --system - e tests/vllm_test_utils
259
272
260
273
# enable fast downloads from hf (for testing)
261
274
RUN --mount=type=cache,target=/root/.cache/uv \
262
- uv pip install hf_transfer
275
+ uv pip install --system hf_transfer
263
276
ENV HF_HUB_ENABLE_HF_TRANSFER 1
264
277
265
278
# Copy in the v1 package for testing (it isn't distributed yet)
@@ -284,9 +297,9 @@ ENV UV_HTTP_TIMEOUT=500
284
297
# install additional dependencies for openai api server
285
298
RUN --mount=type=cache,target=/root/.cache/uv \
286
299
if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
287
- uv pip install accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.42.0' 'timm==0.9.10' boto3 runai-model-streamer runai-model-streamer[s3]; \
300
+ uv pip install --system accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.42.0' 'timm==0.9.10' boto3 runai-model-streamer runai-model-streamer[s3]; \
288
301
else \
289
- uv pip install accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.45.3' 'timm==0.9.10' boto3 runai-model-streamer runai-model-streamer[s3]; \
302
+ uv pip install --system accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.45.3' 'timm==0.9.10' boto3 runai-model-streamer runai-model-streamer[s3]; \
290
303
fi
291
304
292
305
ENV VLLM_USAGE_SOURCE production-docker-image
0 commit comments