Skip to content

Commit bcf247c

Browse files
robotcatorYour Name
and
Your Name
authored
cuda 12.4 (#50)
* update to cuda12 * upgrade to cuda12.1 * use self-hosted runner * upgrade to torch2.3 --------- Co-authored-by: nobody <nobody> Co-authored-by: Your Name <you@example.com>
1 parent a0f432d commit bcf247c

File tree

2 files changed

+10
-22
lines changed

2 files changed

+10
-22
lines changed

.github/workflows/docker_rdma_latest.yml

+4-3
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@ on:
44
push:
55
branches:
66
- main
7+
- cuda12
78

89
jobs:
910
docker:
10-
runs-on: ubuntu-latest
11+
runs-on: self-hosted
1112
steps:
1213
-
1314
name: Checkout
@@ -25,9 +26,9 @@ jobs:
2526
username: ${{ secrets.DOCKERHUB_USERNAME }}
2627
password: ${{ secrets.DOCKERHUB_TOKEN }}
2728
-
28-
name: Build and push cu117 with rdma
29+
name: Build and push cu12.4 with rdma
2930
uses: docker/build-push-action@v3
3031
with:
3132
context: ./docker/rdma/
3233
push: true
33-
tags: dptechnology/unicore:latest-pytorch2.0.1-cuda11.7-rdma
34+
tags: dptechnology/unicore:latest-pytorch2.3.0-cuda12.4-rdma

docker/rdma/Dockerfile

+6-19
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM nvcr.io/nvidia/pytorch:22.05-py3
1+
FROM nvcr.io/nvidia/pytorch:24.03-py3
22

33
RUN APT_INSTALL="apt-get install -y --no-install-recommends" && \
44
rm -rf /var/lib/apt/lists/* \
@@ -50,38 +50,25 @@ RUN APT_INSTALL="apt-get install -y --no-install-recommends" && \
5050
nfs-common \
5151
bc
5252

53-
RUN pip uninstall -y torch torchvision torchtext && \
54-
pip uninstall -y torch torchvision torchtext && \
55-
rm -rf ~/.cache/pip && \
56-
conda clean -ya
57-
58-
RUN conda install -y pyyaml tensorboardX && \
59-
conda clean -ya
60-
61-
# RUN ldconfig
6253

6354
# # ==================================================================
6455
# # pytorch
6556
# # ------------------------------------------------------------------
66-
ENV TORCH_CUDA_ARCH_LIST "7.0;7.5;8.0"
67-
68-
RUN conda install -y ninja typing && \
69-
conda clean -ya
57+
ENV TORCH_CUDA_ARCH_LIST "7.0;7.5;8.0;9.0"
7058

7159
RUN pip3 install --upgrade sentry-sdk requests
7260

73-
RUN pip3 install torch==2.0.1+cu117 --index-url https://download.pytorch.org/whl/cu117 && rm -rf ~/.cache/pip
74-
7561
RUN cd /tmp && \
7662
git clone https://github.com/dptech-corp/Uni-Core && \
7763
cd Uni-Core && \
7864
python setup.py install && \
79-
rm -rf /tmp/* && rm -rf ~/.cache/pip
65+
rm -rf /tmp/* && rm -rf ~/.cache/pip
8066

8167
RUN pip3 install --no-cache-dir tokenizers lmdb biopython ml-collections timeout-decorator urllib3 tree dm-tree && rm -rf ~/.cache/pip
8268

69+
RUN MAX_JOBS=4 pip3 install -U 'flash-attn<2.5.0' --no-build-isolation --no-cache-dir
70+
8371
RUN ldconfig && \
8472
apt-get clean && \
8573
apt-get autoremove && \
86-
rm -rf /var/lib/apt/lists/* /tmp/* && \
87-
conda clean -ya
74+
rm -rf /var/lib/apt/lists/* /tmp/*

0 commit comments

Comments
 (0)