@@ -11,7 +11,7 @@ FROM ubuntu:22.04 AS sdk-base
1111
1212SHELL ["/bin/bash", "-c"]
1313
14- ENV DEBIAN_FRONTEND noninteractive
14+ ENV DEBIAN_FRONTEND= noninteractive
1515
1616# Install python
1717RUN apt-get update && \
@@ -20,20 +20,34 @@ RUN apt-get update && \
2020
2121# nodesource: nvdashboard requires nodejs>=10
2222RUN curl https://developer.download.nvidia.com/hpc-sdk/ubuntu/DEB-GPG-KEY-NVIDIA-HPC-SDK | gpg --yes --dearmor -o /usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg
23- RUN echo 'deb [trusted=yes, signed-by=/usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | tee /etc/apt/sources.list.d/nvhpc.list
23+ RUN arch="$(uname -m)" && \
24+ case "$arch" in \
25+ x86_64) nvplat=amd64 ;; \
26+ aarch64|arm64) nvplat=arm64 ;; \
27+ *) echo "Unsupported architecture: $arch" >&2; exit 1 ;; \
28+ esac && \
29+ echo "deb [trusted=yes, signed-by=/usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg] https://developer.download.nvidia.com/hpc-sdk/ubuntu/${nvplat} /" | tee /etc/apt/sources.list.d/nvhpc.list
2430RUN apt-key update *&& apt-get update -y
2531
2632# Install nvhpc. `nvhpc` is the alias for the latest avaialble version
2733ARG ver=nvhpc
2834# We use the standard apt-get for the default latest nvhpc. For earlier version, apt has a bug that it will always
2935# install the latest nvhpc-x-y no matter which version nvhpc-x-z is requested which would double (extra 10Gb) the size of the image.
3036# So for specific version we directly download the specific deb and install it.
31- RUN if [ "$ver" = "nvhpc" ]; then \
37+ RUN arch="$(uname -m)" && \
38+ case "$arch" in \
39+ x86_64) nvplat=amd64 ;; \
40+ aarch64|arm64) nvplat=arm64 ;; \
41+ *) echo "Unsupported architecture: $arch" >&2; exit 1 ;; \
42+ esac && \
43+ if [ "$ver" = "nvhpc" ]; then \
3244 apt-get install -y -q --allow-unauthenticated ${ver}; \
3345 else \
34- export year=$(echo $ver | cut -d "-" -f 2) && export minor=$(echo $ver | cut -d "-" -f 3) && \
35- wget https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64/nvhpc_${year}.${minor}_amd64.deb && \
36- apt-get install --allow-unauthenticated -y -q ./nvhpc_${year}.${minor}_amd64.deb; \
46+ export year=$(echo $ver | cut -d "-" -f 2) && \
47+ export minor=$(echo $ver | cut -d "-" -f 3) && \
48+ wget -O nvhpc.deb "https://developer.download.nvidia.com/hpc-sdk/ubuntu/${nvplat}/nvhpc_${year}.${minor}_${nvplat}.deb" \
49+ || wget -O nvhpc.deb "https://developer.download.nvidia.com/hpc-sdk/ubuntu/${nvplat}/nvhpc_${year}.${minor}-0_${nvplat}.deb" && \
50+ apt-get install --allow-unauthenticated -y -q ./nvhpc.deb; \
3751 fi;
3852
3953# Nodejs https://github.com/nodesource/distributions
@@ -43,12 +57,15 @@ RUN apt-get update && apt-get install -y -q \
4357 liblapack-dev libblas-dev \
4458 libibverbs-dev libmlx4-1 libmlx5-1 ibutils \
4559 # Devito Jupyter Notebooks and Ux experience
46- nodejs ffmpeg gcc-offload-nvptx \
60+ nodejs ffmpeg \
4761 texlive-latex-extra texlive-fonts-recommended dvipng cm-super
4862
63+ # nvptx only available on x86_64
64+ RUN apt-get install -y -q gcc-offload-nvptx || echo "Skipping nvptx compiler installation on non-x86_64 architecture"
65+
4966# nvidia-container-runtime
50- ENV NVIDIA_VISIBLE_DEVICES all
51- ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
67+ ENV NVIDIA_VISIBLE_DEVICES= all
68+ ENV NVIDIA_DRIVER_CAPABILITIES= compute,utility
5269
5370# MPI ROOT USER DEFAULTS
5471ENV OMPI_ALLOW_RUN_AS_ROOT=1
@@ -72,13 +89,19 @@ ENV UCX_TLS=cuda,cuda_copy,cuda_ipc,sm,shm,self
7289#ENV UCX_TLS=cuda,cuda_copy,cuda_ipc,sm,shm,self,rc_x,gdr_copy
7390
7491# Make simlink for path setup since ENV doesn't accept shell commands.
75- RUN export NVARCH=$(ls -1 /opt/nvidia/hpc_sdk/Linux_x86_64/ | grep '\.' | head -n 1) && \
76- export CUDA_V=$(ls /opt/nvidia/hpc_sdk/Linux_x86_64/${NVARCH}/cuda/ | grep '\.') && \
77- ln -sf /opt/nvidia/hpc_sdk/Linux_x86_64/${NVARCH} /opt/nvhpc && \
78- ln -sf /opt/nvidia/hpc_sdk/Linux_x86_64/${NVARCH}/cuda/${CUDA_V}/extras/CUPTI /opt/CUPTI && \
79- ln -sf /opt/nvidia/hpc_sdk/Linux_x86_64/comm_libs/${CUDA_V}/nvshmem /opt/nvhpc/comm_libs/nvshmem && \
80- ln -sf /opt/nvidia/hpc_sdk/Linux_x86_64/comm_libs/${CUDA_V}/nccl /opt/nvhpc/comm_libs/nccl && \
81- ln -sf /opt/nvidia/hpc_sdk/Linux_x86_64/${NVARCH}/cuda/${CUDA_V}/compute-sanitizer/compute-sanitizer /opt/nvhpc/compilers/bin/compute-sanitizer
92+ RUN arch="$(uname -m)" && \
93+ case "$arch" in \
94+ x86_64) linux=Linux_x86_64 ;; \
95+ aarch64|arm64) linux=Linux_aarch64 ;; \
96+ *) echo "Unsupported architecture: $arch" >&2; exit 1 ;; \
97+ esac && \
98+ export NVARCH=$(ls -1 /opt/nvidia/hpc_sdk/${linux}/ | grep '\.' | head -n 1) && \
99+ export CUDA_V=$(ls /opt/nvidia/hpc_sdk/${linux}/${NVARCH}/cuda/ | grep '\.') && \
100+ ln -sf /opt/nvidia/hpc_sdk/${linux}/${NVARCH} /opt/nvhpc && \
101+ ln -sf /opt/nvidia/hpc_sdk/${linux}/${NVARCH}/cuda/${CUDA_V}/extras/CUPTI /opt/CUPTI && \
102+ ln -sf /opt/nvidia/hpc_sdk/${linux}/comm_libs/${CUDA_V}/nvshmem /opt/nvhpc/comm_libs/nvshmem && \
103+ ln -sf /opt/nvidia/hpc_sdk/${linux}/comm_libs/${CUDA_V}/nccl /opt/nvhpc/comm_libs/nccl && \
104+ ln -sf /opt/nvidia/hpc_sdk/${linux}/${NVARCH}/cuda/${CUDA_V}/compute-sanitizer/compute-sanitizer /opt/nvhpc/compilers/bin/compute-sanitizer
82105
83106# Starting nvhpc 23.5 and cuda 12.1, hpcx and openmpi are inside the cuda version folder, only the bin is in the comm_libs path
84107RUN export CUDA_V=$(/opt/nvhpc/cuda/bin/nvcc --version | sed -n 's/^.*release \([0-9]\+\.[0-9]\+\).*$/\1/p') && \
@@ -102,12 +125,12 @@ RUN echo "$HPCSDK_HOME/cuda/lib" >> /etc/ld.so.conf.d/nvidia.conf && \
102125
103126# Compiler, CUDA, and Library paths
104127# CUDA_HOME has been deprecated but keep for now because of other dependencies (@mloubout).
105- ENV CUDA_HOME $HPCSDK_HOME/cuda
106- ENV NVHPC_CUDA_HOME $HPCSDK_HOME/cuda
107- ENV CUDA_ROOT $HPCSDK_HOME/cuda/bin
108- ENV PATH $HPCSDK_HOME/compilers/bin:$HPCSDK_HOME/cuda/bin:$HPCSDK_HOME/comm_libs/mpi/bin:${PATH}
109- ENV LD_LIBRARY_PATH $HPCSDK_HOME/cuda/lib:$HPCSDK_HOME/cuda/lib64:$HPCSDK_HOME/compilers/lib:$HPCSDK_HOME/math_libs/lib64:$HPCSDK_HOME/comm_libs/mpi/lib:$HPCSDK_CUPTI/lib64:bitcomp_DIR:${LD_LIBRARY_PATH}
110- ENV CPATH $HPCSDK_HOME/comm_libs/mpi/include:$HPCSDK_HOME/comm_libs/nvshmem/include:$HPCSDK_HOME/comm_libs/nccl/include:$HPCSDK_HOME/math_libs/include:${CPATH}
128+ ENV CUDA_HOME= $HPCSDK_HOME/cuda
129+ ENV NVHPC_CUDA_HOME= $HPCSDK_HOME/cuda
130+ ENV CUDA_ROOT= $HPCSDK_HOME/cuda/bin
131+ ENV PATH= $HPCSDK_HOME/compilers/bin:$HPCSDK_HOME/cuda/bin:$HPCSDK_HOME/comm_libs/mpi/bin:${PATH}
132+ ENV LD_LIBRARY_PATH= $HPCSDK_HOME/cuda/lib:$HPCSDK_HOME/cuda/lib64:$HPCSDK_HOME/compilers/lib:$HPCSDK_HOME/math_libs/lib64:$HPCSDK_HOME/comm_libs/mpi/lib:$HPCSDK_CUPTI/lib64:bitcomp_DIR
133+ ENV CPATH= $HPCSDK_HOME/comm_libs/mpi/include:$HPCSDK_HOME/comm_libs/nvshmem/include:$HPCSDK_HOME/comm_libs/nccl/include:$HPCSDK_HOME/math_libs/include
111134
112135# MPI
113136RUN rm -f $HPCSDK_HOME/comm_libs/mpi && \
0 commit comments