| ARG RELEASE |
| ARG LAUNCHPAD_BUILD_ARCH |
| LABEL org.opencontainers.image.ref.name=ubuntu |
| LABEL org.opencontainers.image.version=22.04 |
| ADD file:1b6c8c9518be42fa2afe5e241ca31677fce58d27cdfa88baa91a65a259be3637 in / |
| CMD ["/bin/bash"] |
| ENV NVARCH=x86_64 |
| ENV NVIDIA_REQUIRE_CUDA=cuda>=12.8 brand=unknown,driver>=470,driver<471 brand=grid,driver>=470,driver<471 brand=tesla,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=quadro,driver>=470,driver<471 brand=quadrortx,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 brand=vapps,driver>=470,driver<471 brand=vpc,driver>=470,driver<471 brand=vcs,driver>=470,driver<471 brand=vws,driver>=470,driver<471 brand=cloudgaming,driver>=470,driver<471 brand=unknown,driver>=535,driver<536 brand=grid,driver>=535,driver<536 brand=tesla,driver>=535,driver<536 brand=nvidia,driver>=535,driver<536 brand=quadro,driver>=535,driver<536 brand=quadrortx,driver>=535,driver<536 brand=nvidiartx,driver>=535,driver<536 brand=vapps,driver>=535,driver<536 brand=vpc,driver>=535,driver<536 brand=vcs,driver>=535,driver<536 brand=vws,driver>=535,driver<536 brand=cloudgaming,driver>=535,driver<536 brand=unknown,driver>=550,driver<551 brand=grid,driver>=550,driver<551 brand=tesla,driver>=550,driver<551 brand=nvidia,driver>=550,driver<551 brand=quadro,driver>=550,driver<551 brand=quadrortx,driver>=550,driver<551 brand=nvidiartx,driver>=550,driver<551 brand=vapps,driver>=550,driver<551 brand=vpc,driver>=550,driver<551 brand=vcs,driver>=550,driver<551 brand=vws,driver>=550,driver<551 brand=cloudgaming,driver>=550,driver<551 brand=unknown,driver>=560,driver<561 brand=grid,driver>=560,driver<561 brand=tesla,driver>=560,driver<561 brand=nvidia,driver>=560,driver<561 brand=quadro,driver>=560,driver<561 brand=quadrortx,driver>=560,driver<561 brand=nvidiartx,driver>=560,driver<561 brand=vapps,driver>=560,driver<561 brand=vpc,driver>=560,driver<561 brand=vcs,driver>=560,driver<561 brand=vws,driver>=560,driver<561 brand=cloudgaming,driver>=560,driver<561 brand=unknown,driver>=565,driver<566 brand=grid,driver>=565,driver<566 brand=tesla,driver>=565,driver<566 brand=nvidia,driver>=565,driver<566 brand=quadro,driver>=565,driver<566 brand=quadrortx,driver>=565,driver<566 brand=nvidiartx,driver>=565,driver<566 brand=vapps,driver>=565,driver<566 brand=vpc,driver>=565,driver<566 brand=vcs,driver>=565,driver<566 brand=vws,driver>=565,driver<566 brand=cloudgaming,driver>=565,driver<566 |
| ENV NV_CUDA_CUDART_VERSION=12.8.90-1 |
| ARG TARGETARCH |
| LABEL maintainer=NVIDIA CORPORATION <cudatools@nvidia.com> |
| RUN |1 TARGETARCH=amd64 /bin/sh -c apt-get update && apt-get install -y --no-install-recommends gnupg2 curl ca-certificates && curl -fsSLO https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/${NVARCH}/cuda-keyring_1.1-1_all.deb && dpkg -i cuda-keyring_1.1-1_all.deb && apt-get purge --autoremove -y curl && rm -rf /var/lib/apt/lists/* # buildkit |
| ENV CUDA_VERSION=12.8.1 |
| RUN |1 TARGETARCH=amd64 /bin/sh -c apt-get update && apt-get install -y --no-install-recommends cuda-cudart-12-8=${NV_CUDA_CUDART_VERSION} cuda-compat-12-8 && rm -rf /var/lib/apt/lists/* # buildkit |
| RUN |1 TARGETARCH=amd64 /bin/sh -c echo "/usr/local/cuda/lib64" >> /etc/ld.so.conf.d/nvidia.conf # buildkit |
| ENV PATH=/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin |
| ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64 |
| COPY NGC-DL-CONTAINER-LICENSE / # buildkit |
| ENV NVIDIA_VISIBLE_DEVICES=all |
| ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility |
| ENV NV_CUDA_LIB_VERSION=12.8.1-1 |
| ENV NV_NVTX_VERSION=12.8.90-1 |
| ENV NV_LIBNPP_VERSION=12.3.3.100-1 |
| ENV NV_LIBNPP_PACKAGE=libnpp-12-8=12.3.3.100-1 |
| ENV NV_LIBCUSPARSE_VERSION=12.5.8.93-1 |
| ENV NV_LIBCUBLAS_PACKAGE_NAME=libcublas-12-8 |
| ENV NV_LIBCUBLAS_VERSION=12.8.4.1-1 |
| ENV NV_LIBCUBLAS_PACKAGE=libcublas-12-8=12.8.4.1-1 |
| ENV NV_LIBNCCL_PACKAGE_NAME=libnccl2 |
| ENV NV_LIBNCCL_PACKAGE_VERSION=2.25.1-1 |
| ENV NCCL_VERSION=2.25.1-1 |
| ENV NV_LIBNCCL_PACKAGE=libnccl2=2.25.1-1+cuda12.8 |
| ARG TARGETARCH |
| LABEL maintainer=NVIDIA CORPORATION <cudatools@nvidia.com> |
| RUN |1 TARGETARCH=amd64 /bin/sh -c apt-get update && apt-get install -y --no-install-recommends cuda-libraries-12-8=${NV_CUDA_LIB_VERSION} ${NV_LIBNPP_PACKAGE} cuda-nvtx-12-8=${NV_NVTX_VERSION} libcusparse-12-8=${NV_LIBCUSPARSE_VERSION} ${NV_LIBCUBLAS_PACKAGE} ${NV_LIBNCCL_PACKAGE} && rm -rf /var/lib/apt/lists/* # buildkit |
| RUN |1 TARGETARCH=amd64 /bin/sh -c apt-mark hold ${NV_LIBCUBLAS_PACKAGE_NAME} ${NV_LIBNCCL_PACKAGE_NAME} # buildkit |
| COPY entrypoint.d/ /opt/nvidia/entrypoint.d/ # buildkit |
| COPY nvidia_entrypoint.sh /opt/nvidia/ # buildkit |
| ENV NVIDIA_PRODUCT_NAME=CUDA |
| ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] |
| ENV NV_CUDA_LIB_VERSION=12.8.1-1 |
| ENV NV_CUDA_CUDART_DEV_VERSION=12.8.90-1 |
| ENV NV_NVML_DEV_VERSION=12.8.90-1 |
| ENV NV_LIBCUSPARSE_DEV_VERSION=12.5.8.93-1 |
| ENV NV_LIBNPP_DEV_VERSION=12.3.3.100-1 |
| ENV NV_LIBNPP_DEV_PACKAGE=libnpp-dev-12-8=12.3.3.100-1 |
| ENV NV_LIBCUBLAS_DEV_VERSION=12.8.4.1-1 |
| ENV NV_LIBCUBLAS_DEV_PACKAGE_NAME=libcublas-dev-12-8 |
| ENV NV_LIBCUBLAS_DEV_PACKAGE=libcublas-dev-12-8=12.8.4.1-1 |
| ENV NV_CUDA_NSIGHT_COMPUTE_VERSION=12.8.1-1 |
| ENV NV_CUDA_NSIGHT_COMPUTE_DEV_PACKAGE=cuda-nsight-compute-12-8=12.8.1-1 |
| ENV NV_NVPROF_VERSION=12.8.90-1 |
| ENV NV_NVPROF_DEV_PACKAGE=cuda-nvprof-12-8=12.8.90-1 |
| ENV NV_LIBNCCL_DEV_PACKAGE_NAME=libnccl-dev |
| ENV NV_LIBNCCL_DEV_PACKAGE_VERSION=2.25.1-1 |
| ENV NCCL_VERSION=2.25.1-1 |
| ENV NV_LIBNCCL_DEV_PACKAGE=libnccl-dev=2.25.1-1+cuda12.8 |
| ARG TARGETARCH |
| LABEL maintainer=NVIDIA CORPORATION <cudatools@nvidia.com> |
| RUN |1 TARGETARCH=amd64 /bin/sh -c apt-get update && apt-get install -y --no-install-recommends cuda-cudart-dev-12-8=${NV_CUDA_CUDART_DEV_VERSION} cuda-command-line-tools-12-8=${NV_CUDA_LIB_VERSION} cuda-minimal-build-12-8=${NV_CUDA_LIB_VERSION} cuda-libraries-dev-12-8=${NV_CUDA_LIB_VERSION} cuda-nvml-dev-12-8=${NV_NVML_DEV_VERSION} ${NV_NVPROF_DEV_PACKAGE} ${NV_LIBNPP_DEV_PACKAGE} libcusparse-dev-12-8=${NV_LIBCUSPARSE_DEV_VERSION} ${NV_LIBCUBLAS_DEV_PACKAGE} ${NV_LIBNCCL_DEV_PACKAGE} ${NV_CUDA_NSIGHT_COMPUTE_DEV_PACKAGE} && rm -rf /var/lib/apt/lists/* # buildkit |
| RUN |1 TARGETARCH=amd64 /bin/sh -c apt-mark hold ${NV_LIBCUBLAS_DEV_PACKAGE_NAME} ${NV_LIBNCCL_DEV_PACKAGE_NAME} # buildkit |
| ENV LIBRARY_PATH=/usr/local/cuda/lib64/stubs |
| ARG CUDA_VERSION |
| ARG PYTHON_VERSION |
| ARG INSTALL_KV_CONNECTORS=false |
| WORKDIR /vllm-workspace |
| ENV DEBIAN_FRONTEND=noninteractive |
| ARG TARGETPLATFORM |
| ARG GDRCOPY_CUDA_VERSION=12.8 |
| ARG GDRCOPY_OS_VERSION=Ubuntu22_04 |
| SHELL [/bin/bash -c] |
| ARG DEADSNAKES_MIRROR_URL |
| ARG DEADSNAKES_GPGKEY_URL |
| ARG GET_PIP_URL |
| RUN |9 CUDA_VERSION=12.8.1 PYTHON_VERSION=3.12 INSTALL_KV_CONNECTORS=true TARGETPLATFORM=linux/amd64 GDRCOPY_CUDA_VERSION=12.8 GDRCOPY_OS_VERSION=Ubuntu22_04 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py /bin/bash -c PYTHON_VERSION_STR=$(echo ${PYTHON_VERSION} | sed 's/\.//g') && echo "export PYTHON_VERSION_STR=${PYTHON_VERSION_STR}" >> /etc/environment # buildkit |
| RUN |9 CUDA_VERSION=12.8.1 PYTHON_VERSION=3.12 INSTALL_KV_CONNECTORS=true TARGETPLATFORM=linux/amd64 GDRCOPY_CUDA_VERSION=12.8 GDRCOPY_OS_VERSION=Ubuntu22_04 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py /bin/bash -c echo 'tzdata tzdata/Areas select America' | debconf-set-selections && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections && apt-get update -y && apt-get install -y ccache software-properties-common git curl wget sudo vim python3-pip && apt-get install -y ffmpeg libsm6 libxext6 libgl1 && if [ ! -z ${DEADSNAKES_MIRROR_URL} ] ; then if [ ! -z "${DEADSNAKES_GPGKEY_URL}" ] ; then mkdir -p -m 0755 /etc/apt/keyrings ; curl -L ${DEADSNAKES_GPGKEY_URL} | gpg --dearmor > /etc/apt/keyrings/deadsnakes.gpg ; sudo chmod 644 /etc/apt/keyrings/deadsnakes.gpg ; echo "deb [signed-by=/etc/apt/keyrings/deadsnakes.gpg] ${DEADSNAKES_MIRROR_URL} $(lsb_release -cs) main" > /etc/apt/sources.list.d/deadsnakes.list ; fi ; else for i in 1 2 3; do add-apt-repository -y ppa:deadsnakes/ppa && break || { echo "Attempt $i failed, retrying in 5s..."; sleep 5; }; done ; fi && apt-get update -y && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv libibverbs-dev && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config && curl -sS ${GET_PIP_URL} | python${PYTHON_VERSION} && python3 --version && python3 -m pip --version # buildkit |
| ARG PIP_INDEX_URL UV_INDEX_URL |
| ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL |
| ARG PYTORCH_CUDA_INDEX_BASE_URL |
| ARG PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL |
| ARG PIP_KEYRING_PROVIDER UV_KEYRING_PROVIDER |
| RUN |17 CUDA_VERSION=12.8.1 PYTHON_VERSION=3.12 INSTALL_KV_CONNECTORS=true TARGETPLATFORM=linux/amd64 GDRCOPY_CUDA_VERSION=12.8 GDRCOPY_OS_VERSION=Ubuntu22_04 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py PIP_INDEX_URL= UV_INDEX_URL= PIP_EXTRA_INDEX_URL= UV_EXTRA_INDEX_URL= PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL=https://download.pytorch.org/whl/nightly PIP_KEYRING_PROVIDER=disabled UV_KEYRING_PROVIDER=disabled /bin/bash -c python3 -m pip install uv # buildkit |
| ENV UV_HTTP_TIMEOUT=500 |
| ENV UV_INDEX_STRATEGY=unsafe-best-match |
| ENV UV_LINK_MODE=copy |
| RUN |17 CUDA_VERSION=12.8.1 PYTHON_VERSION=3.12 INSTALL_KV_CONNECTORS=true TARGETPLATFORM=linux/amd64 GDRCOPY_CUDA_VERSION=12.8 GDRCOPY_OS_VERSION=Ubuntu22_04 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py PIP_INDEX_URL= UV_INDEX_URL= PIP_EXTRA_INDEX_URL= UV_EXTRA_INDEX_URL= PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL=https://download.pytorch.org/whl/nightly PIP_KEYRING_PROVIDER=disabled UV_KEYRING_PROVIDER=disabled /bin/bash -c ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ # buildkit |
| RUN |17 CUDA_VERSION=12.8.1 PYTHON_VERSION=3.12 INSTALL_KV_CONNECTORS=true TARGETPLATFORM=linux/amd64 GDRCOPY_CUDA_VERSION=12.8 GDRCOPY_OS_VERSION=Ubuntu22_04 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py PIP_INDEX_URL= UV_INDEX_URL= PIP_EXTRA_INDEX_URL= UV_EXTRA_INDEX_URL= PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL=https://download.pytorch.org/whl/nightly PIP_KEYRING_PROVIDER=disabled UV_KEYRING_PROVIDER=disabled /bin/bash -c if [ "$TARGETPLATFORM" = "linux/arm64" ]; then uv pip install --system --index-url ${PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') "torch==2.8.0.dev20250318+cu128" "torchvision==0.22.0.dev20250319" ; uv pip install --system --index-url ${PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') --pre pytorch_triton==3.3.0+gitab727c40 ; fi # buildkit |
| RUN |17 CUDA_VERSION=12.8.1 PYTHON_VERSION=3.12 INSTALL_KV_CONNECTORS=true TARGETPLATFORM=linux/amd64 GDRCOPY_CUDA_VERSION=12.8 GDRCOPY_OS_VERSION=Ubuntu22_04 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py PIP_INDEX_URL= UV_INDEX_URL= PIP_EXTRA_INDEX_URL= UV_EXTRA_INDEX_URL= PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL=https://download.pytorch.org/whl/nightly PIP_KEYRING_PROVIDER=disabled UV_KEYRING_PROVIDER=disabled /bin/bash -c uv pip install --system dist/*.whl --verbose --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') # buildkit |
| ARG FLASHINFER_GIT_REPO=https://github.com/flashinfer-ai/flashinfer.git |
| ARG FLASHINFER_GIT_REF=v0.3.1 |
| ARG FLASHINFER_AOT_COMPILE=false |
| RUN |20 CUDA_VERSION=12.8.1 PYTHON_VERSION=3.12 INSTALL_KV_CONNECTORS=true TARGETPLATFORM=linux/amd64 GDRCOPY_CUDA_VERSION=12.8 GDRCOPY_OS_VERSION=Ubuntu22_04 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py PIP_INDEX_URL= UV_INDEX_URL= PIP_EXTRA_INDEX_URL= UV_EXTRA_INDEX_URL= PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL=https://download.pytorch.org/whl/nightly PIP_KEYRING_PROVIDER=disabled UV_KEYRING_PROVIDER=disabled FLASHINFER_GIT_REPO=https://github.com/flashinfer-ai/flashinfer.git FLASHINFER_GIT_REF=v0.3.1 FLASHINFER_AOT_COMPILE=true /bin/bash -c bash - <<'BASH'
. /etc/environment
git clone --depth 1 --recursive --shallow-submodules \
--branch ${FLASHINFER_GIT_REF} \
${FLASHINFER_GIT_REPO} flashinfer
pushd flashinfer
if [ "${FLASHINFER_AOT_COMPILE}" = "true" ]; then
# Exclude CUDA arches for older versions (11.x and 12.0-12.7)
# TODO: Update this to allow setting TORCH_CUDA_ARCH_LIST as a build arg.
if [[ "${CUDA_VERSION}" == 11.* ]]; then
FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9"
elif [[ "${CUDA_VERSION}" == 12.[0-7]* ]]; then
FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a"
else
# CUDA 12.8+ supports 10.0a and 12.0
FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a 10.0a 12.0"
fi
echo "🏗️ Installing FlashInfer with AOT compilation for arches: ${FI_TORCH_CUDA_ARCH_LIST}"
export FLASHINFER_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}"
# HACK: We need these to run flashinfer.aot before installing flashinfer, get from the package in the future
uv pip install --system cuda-python==$(echo $CUDA_VERSION | cut -d. -f1,2) pynvml==$(echo $CUDA_VERSION | cut -d. -f1) nvidia-nvshmem-cu$(echo $CUDA_VERSION | cut -d. -f1)
# Build AOT kernels
TORCH_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}" \
python3 -m flashinfer.aot
# Install with no-build-isolation since we already built AOT kernels
TORCH_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}" \
uv pip install --system --no-build-isolation . \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
# Download pre-compiled cubins
TORCH_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}" \
python3 -m flashinfer --download-cubin || echo "WARNING: Failed to download flashinfer cubins."
else
echo "🏗️ Installing FlashInfer without AOT compilation in JIT mode"
uv pip install --system . \
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
fi
popd
rm -rf flashinfer
BASH # buildkit |
| COPY examples examples # buildkit |
| COPY benchmarks benchmarks # buildkit |
| COPY ./vllm/collect_env.py . # buildkit |
| RUN |20 CUDA_VERSION=12.8.1 PYTHON_VERSION=3.12 INSTALL_KV_CONNECTORS=true TARGETPLATFORM=linux/amd64 GDRCOPY_CUDA_VERSION=12.8 GDRCOPY_OS_VERSION=Ubuntu22_04 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py PIP_INDEX_URL= UV_INDEX_URL= PIP_EXTRA_INDEX_URL= UV_EXTRA_INDEX_URL= PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL=https://download.pytorch.org/whl/nightly PIP_KEYRING_PROVIDER=disabled UV_KEYRING_PROVIDER=disabled FLASHINFER_GIT_REPO=https://github.com/flashinfer-ai/flashinfer.git FLASHINFER_GIT_REF=v0.3.1 FLASHINFER_AOT_COMPILE=true /bin/bash -c . /etc/environment && uv pip list # buildkit |
| COPY requirements/build.txt requirements/build.txt # buildkit |
| RUN |20 CUDA_VERSION=12.8.1 PYTHON_VERSION=3.12 INSTALL_KV_CONNECTORS=true TARGETPLATFORM=linux/amd64 GDRCOPY_CUDA_VERSION=12.8 GDRCOPY_OS_VERSION=Ubuntu22_04 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py PIP_INDEX_URL= UV_INDEX_URL= PIP_EXTRA_INDEX_URL= UV_EXTRA_INDEX_URL= PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL=https://download.pytorch.org/whl/nightly PIP_KEYRING_PROVIDER=disabled UV_KEYRING_PROVIDER=disabled FLASHINFER_GIT_REPO=https://github.com/flashinfer-ai/flashinfer.git FLASHINFER_GIT_REF=v0.3.1 FLASHINFER_AOT_COMPILE=true /bin/bash -c uv pip install --system -r requirements/build.txt --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') # buildkit |
| ARG DEEPGEMM_GIT_REF |
| COPY tools/install_deepgemm.sh /tmp/install_deepgemm.sh # buildkit |
| RUN |21 CUDA_VERSION=12.8.1 PYTHON_VERSION=3.12 INSTALL_KV_CONNECTORS=true TARGETPLATFORM=linux/amd64 GDRCOPY_CUDA_VERSION=12.8 GDRCOPY_OS_VERSION=Ubuntu22_04 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py PIP_INDEX_URL= UV_INDEX_URL= PIP_EXTRA_INDEX_URL= UV_EXTRA_INDEX_URL= PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL=https://download.pytorch.org/whl/nightly PIP_KEYRING_PROVIDER=disabled UV_KEYRING_PROVIDER=disabled FLASHINFER_GIT_REPO=https://github.com/flashinfer-ai/flashinfer.git FLASHINFER_GIT_REF=v0.3.1 FLASHINFER_AOT_COMPILE=true DEEPGEMM_GIT_REF= /bin/bash -c VLLM_DOCKER_BUILD_CONTEXT=1 /tmp/install_deepgemm.sh --cuda-version "${CUDA_VERSION}" ${DEEPGEMM_GIT_REF:+--ref "$DEEPGEMM_GIT_REF"} # buildkit |
| COPY tools/install_gdrcopy.sh install_gdrcopy.sh # buildkit |
| RUN |21 CUDA_VERSION=12.8.1 PYTHON_VERSION=3.12 INSTALL_KV_CONNECTORS=true TARGETPLATFORM=linux/amd64 GDRCOPY_CUDA_VERSION=12.8 GDRCOPY_OS_VERSION=Ubuntu22_04 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py PIP_INDEX_URL= UV_INDEX_URL= PIP_EXTRA_INDEX_URL= UV_EXTRA_INDEX_URL= PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL=https://download.pytorch.org/whl/nightly PIP_KEYRING_PROVIDER=disabled UV_KEYRING_PROVIDER=disabled FLASHINFER_GIT_REPO=https://github.com/flashinfer-ai/flashinfer.git FLASHINFER_GIT_REF=v0.3.1 FLASHINFER_AOT_COMPILE=true DEEPGEMM_GIT_REF= /bin/bash -c set -eux; case "${TARGETPLATFORM}" in linux/arm64) UUARCH="aarch64" ;; linux/amd64) UUARCH="x64" ;; *) echo "Unsupported TARGETPLATFORM: ${TARGETPLATFORM}" >&2; exit 1 ;; esac; ./install_gdrcopy.sh "${GDRCOPY_OS_VERSION}" "${GDRCOPY_CUDA_VERSION}" "${UUARCH}"; rm ./install_gdrcopy.sh # buildkit |
| COPY tools/ep_kernels/install_python_libraries.sh install_python_libraries.sh # buildkit |
| ENV CUDA_HOME=/usr/local/cuda |
| RUN |21 CUDA_VERSION=12.8.1 PYTHON_VERSION=3.12 INSTALL_KV_CONNECTORS=true TARGETPLATFORM=linux/amd64 GDRCOPY_CUDA_VERSION=12.8 GDRCOPY_OS_VERSION=Ubuntu22_04 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py PIP_INDEX_URL= UV_INDEX_URL= PIP_EXTRA_INDEX_URL= UV_EXTRA_INDEX_URL= PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL=https://download.pytorch.org/whl/nightly PIP_KEYRING_PROVIDER=disabled UV_KEYRING_PROVIDER=disabled FLASHINFER_GIT_REPO=https://github.com/flashinfer-ai/flashinfer.git FLASHINFER_GIT_REF=v0.3.1 FLASHINFER_AOT_COMPILE=true DEEPGEMM_GIT_REF= /bin/bash -c export TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-9.0a+PTX}" && bash install_python_libraries.sh # buildkit |
| ARG TARGETPLATFORM |
| ARG INSTALL_KV_CONNECTORS=false |
| ARG PIP_INDEX_URL UV_INDEX_URL |
| ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL |
| ENV UV_HTTP_TIMEOUT=500 |
| COPY requirements/kv_connectors.txt requirements/kv_connectors.txt # buildkit |
| RUN |6 TARGETPLATFORM=linux/amd64 INSTALL_KV_CONNECTORS=true PIP_INDEX_URL= UV_INDEX_URL= PIP_EXTRA_INDEX_URL= UV_EXTRA_INDEX_URL= /bin/bash -c if [ "$INSTALL_KV_CONNECTORS" = "true" ]; then uv pip install --system -r requirements/kv_connectors.txt; fi; if [ "$TARGETPLATFORM" = "linux/arm64" ]; then BITSANDBYTES_VERSION="0.42.0"; else BITSANDBYTES_VERSION="0.46.1"; fi; uv pip install --system accelerate hf_transfer modelscope "bitsandbytes>=${BITSANDBYTES_VERSION}" 'timm>=1.0.17' boto3 runai-model-streamer runai-model-streamer[s3] # buildkit |
| ENV VLLM_USAGE_SOURCE=production-docker-image |
| ENTRYPOINT ["python3" "-m" "vllm.entrypoints.openai.api_server"] |
| /bin/sh -c pip install qwen-vl-utils==0.0.14 |