Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Pytorch 2.3.1 #172

Merged
merged 4 commits into from
Aug 2, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
209 changes: 209 additions & 0 deletions platform/Dockerfiles/pytorch/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
FROM debian:bookworm AS bookworm_cuda

###
#
# Define environment

WORKDIR /workspace


###
#
# Set global environment variables

ENV PYTORCH_VERSION="v2.3.1"
ENV CUDA_VERSION="12.5.1_555.42.06"
ENV PATH="$PATH:/usr/local/cuda/bin"
ENV DEBIAN_FRONTEND="noninteractive"
ENV NVIDIA_DRIVER_CAPABILITIES="compute,utility"
ENV NVIDIA_VISIBLE_DEVICES="all"
ENV VENV_PATH="/workspace/v"
ENV PYTHON_VENV="${VENV_PATH}/bin/python"
ENV PIP_BIN="${VENV_PATH}/bin/pip"

###
#
# Workaround gcc-12 issue:
# https://github.com/pytorch/pytorch/issues/77939#issuecomment-1526844015

ENV CXXFLAGS='-Wno-maybe-uninitialized -Wno-uninitialized -Wno-free-nonheap-object -Wno-dev'
ENV CFLAGS='-Wno-maybe-uninitialized -Wno-uninitialized -Wno-free-nonheap-object -Wno-dev'


###
#
# Set pytorch specific build environment variables

ENV REL_WITH_DEB_INFO="ON"
ENV MAX_JOBS="32"
ENV USE_CUDA="ON"
ENV USE_CUDNN=1
ENV USE_CUSPARSELT=1
ENV USE_FBGEMM="ON"
ENV USE_KINETO="ON"
ENV USE_NUMPY="ON"
ENV USE_NNPACK="ON"
ENV USE_DISTRIBUTED="ON"
ENV USE_TENSORPIPE="ON"
ENV USE_GLOO="ON"
ENV USE_MPI="ON"
ENV USE_SYSTEM_NCCL="OFF"
ENV USE_OPENMP="ON"
ENV USE_FLASH_ATTENTION="ON"
ENV USE_MEM_EFF_ATTENTION="ON"
ENV PYTORCH_BUILD_VERSION="2.3.1"
ENV PYTORCH_BUILD_NUMBER="1"
ENV TORCH_CUDA_ARCH_LIST="8.0;8.6;8.9;9.0"
ENV CUDA_PATH="/usr/local/cuda"
ENV CUDA_HOME="/usr/local/cuda"
ENV CUDA_TOOLKIT_ROOT_DIR="/usr/local/cuda"
ENV CUDA_NVCC_EXECUTABLE="/usr/local/cuda/bin/nvcc"
ENV CUDA_INCLUDE_DIRS="/usr/local/cuda/include"
ENV CUSPARSELT_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu"
ENV CUSPARSE_INCLUDE_PATH="/usr/include/x86_64-linux-gnu"
ENV CUDNN_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu"
ENV CUDNN_INCLUDE_PATH="/usr/include/x86_64-linux-gnu"

#ENV USE_UCC="ON"
ENV USE_MIMALLOC="ON"
ENV USE_NCCL="ON"

#ENV ATEN_THREADING="NATIVE"
#ENV USE_SYSTEM_LIBS ON

###
#
# Install toolchain and system dependencies

RUN apt update
RUN apt -y install build-essential
RUN apt -y install ca-certificates
#RUN apt -y install ccache
RUN apt -y install python3
RUN apt -y install python3-full
RUN apt -y install python3-venv
RUN apt -y install python3-pip
RUN apt -y install swig
RUN apt -y install ninja-build
RUN apt -y install git
RUN apt -y install cmake
RUN apt -y install gpg
RUN apt -y install curl
RUN apt -y install zstd

RUN apt -y install libnuma-dev
RUN apt -y install libssl-dev
RUN apt -y install libzstd-dev
RUN apt -y install libucx-dev
RUN apt -y install libmpfr-dev
RUN apt -y install libgmp3-dev
RUN apt -y install libfftw3-dev
#RUN apt -y install libmagma-dev


###
#
# Not sure if or why these are needed

RUN apt -y install libjpeg-dev
RUN apt -y install libpng-dev

#RUN /usr/sbin/update-ccache-symlinks
RUN mkdir -p /opt/ccache
#RUN ccache --set-config=cache_dir=/opt/ccache


###
#
# Setup build environment and clone pytorch

RUN mkdir -p /workspace/build
RUN mkdir -p /workspace/${PYTORCH_VERSION}
RUN mkdir -p /workspace/tmp
RUN mkdir -p /workspace/added
RUN mkdir -p /workspace/uncompressed
RUN mkdir -p /workspace/target
RUN mkdir -p /workspace/patches

RUN git clone --depth 1 --jobs ${MAX_JOBS} "https://github.com/pytorch/pytorch" --branch "${PYTORCH_VERSION}" --recurse-submodules --shallow-submodules build

###
#
#

COPY /workspace/patches/pytorch-compute-86-override.patch /workspace/patches
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
COPY /workspace/patches/pytorch-compute-86-override.patch /workspace/patches
COPY pytorch-compute-86-override.patch /workspace/patches

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for catching that. I had a diverged branch and an update that had fixed this wasn't pushed.

RUN patch --directory build -p1 pytorch-compute-86-override.patch

###
#
# Install NVIDIA CUDA SDK

RUN curl -LO https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/cuda-keyring_1.1-1_all.deb
RUN dpkg -i cuda-keyring_1.1-1_all.deb
RUN apt-get update
RUN apt -y install software-properties-common
RUN add-apt-repository contrib
RUN apt-get update
RUN apt -y install cuda-toolkit-12-5
RUN apt -y install libcusparselt-dev
RUN apt -y install cudnn


###
#
# Install Intel MKL BLAS

RUN curl --location "https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB" | gpg --dearmor > /usr/share/keyrings/oneapi-archive-keyring.gpg
RUN echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" > /etc/apt/sources.list.d/oneAPI.list

RUN apt update
RUN apt install -y intel-oneapi-mkl
RUN apt install -y intel-oneapi-mkl-devel

ENV MKL_VERSION="2024.2"
ENV MKL_ROOT="/opt/intel/oneapi/mkl/${MKL_VERSION}/lib/intel64"
ENV MKL_MODEL="ilp64"
ENV MKL_LIBRARIES="-Wl,--start-group;${MKL_ROOT}/libmkl_intel_${MKL_MODEL}.a;${MKL_ROOT}/libmkl_gnu_thread.a;${MKL_ROOT}/libmkl_core.a;-Wl,--end-group"
ENV CUDA_ARCHS="80;86;89;90"
ENV BLA_VENDOR=Intel10_64ilp
ENV BLA_STATIS=True


###
#
# Install Python virtual environmnet

RUN python3 -m venv ${VENV_PATH}
RUN ${PIP_BIN} install six
RUN ${PIP_BIN} install numpy
RUN ${PIP_BIN} install swig
RUN ${PIP_BIN} install build
RUN ${PIP_BIN} install wheel
RUN ${PIP_BIN} install pyyaml
RUN ${PIP_BIN} install cmake
RUN ${PIP_BIN} install ninja
RUN ${PIP_BIN} install -r /workspace/build/requirements.txt


###
#
# Hardcode the cuda library path for the system loader

RUN echo "/opt/nvidia/cuda/lib64" > /etc/ld.so.conf.d/cuda.conf
RUN ldconfig -v


###
#
# Build pytorch

WORKDIR /workspace/build
RUN ${PYTHON_VENV} -m build --wheel --sdist --no-isolation


###
#
# Produce a clean image of build results for output from buildx

FROM scratch
COPY --from=bookworm_cuda /workspace/build/dist /
6 changes: 6 additions & 0 deletions platform/Dockerfiles/pytorch/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
###
#
# Build pytorch and output the build results to "${PWD}/target"

mkdir -p "${PWD}/target"
docker buildx build --progress plain --output type=local,dest="${PWD}/target" . -t pytorch:v2.3.1
20 changes: 20 additions & 0 deletions platform/Dockerfiles/pytorch/pytorch-compute-86-override.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
diff --git a/aten/src/ATen/native/cuda/Blas.cpp b/aten/src/ATen/native/cuda/Blas.cpp
index 8d3b3dbea7..5f04c0cdd1 100644
--- a/aten/src/ATen/native/cuda/Blas.cpp
+++ b/aten/src/ATen/native/cuda/Blas.cpp
@@ -820,7 +820,7 @@ static bool _scaled_mm_allowed_device() {
}
return false;
#else
- return dprops->major >= 9 || (dprops->major == 8 && dprops->minor == 9);
+ return (dprops->major == 8 && dprops->minor >= 0);
#endif
}

diff --git a/third_party/cutlass b/third_party/cutlass
index bbe579a9e3..56b46e2d13 160000
--- a/third_party/cutlass
+++ b/third_party/cutlass
@@ -1 +1 @@
-Subproject commit bbe579a9e3beb6ea6626d9227ec32d0dae119a49
+Subproject commit 56b46e2d13875b46b8f6a03f9f5ac91e2bfdc01a
143 changes: 0 additions & 143 deletions platform/packaging/build/pytorch/Dockerfile

This file was deleted.

6 changes: 0 additions & 6 deletions platform/packaging/build/pytorch/build.sh

This file was deleted.

Loading