Skip to content

Commit edeb3b6

Browse files
committed
comiler: better handle buffer ispace
1 parent a4060b6 commit edeb3b6

6 files changed

Lines changed: 83 additions & 48 deletions

File tree

.github/workflows/docker-bases.yml

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,16 @@ on:
1818
inputs:
1919
cpu:
2020
type: boolean
21-
default: false
21+
default: true
2222
nvidia:
2323
type: boolean
24-
default: false
24+
default: true
2525
amd:
2626
type: boolean
27-
default: false
27+
default: true
2828
intel:
2929
type: boolean
30-
default: false
30+
default: true
3131

3232
tags:
3333
description: "Build compiler bases"
@@ -227,7 +227,9 @@ jobs:
227227
push: true
228228
target: "nvc"
229229
platforms: ${{ matrix.platform }}
230-
build-args: "arch=nvc"
230+
build-args: |
231+
arch=nvc
232+
ver=nvhpc-25-7
231233
# Label (not tag) with runner name for traceability without changing image tags
232234
labels: builder-runner=${{ runner.name }}
233235
tags: "devitocodes/bases:nvidia-nvc-${{ matrix.arch }}"
@@ -240,7 +242,9 @@ jobs:
240242
push: true
241243
target: "nvcc"
242244
platforms: ${{ matrix.platform }}
243-
build-args: "arch=nvcc"
245+
build-args: |
246+
arch=nvcc
247+
ver=nvhpc-25-7
244248
labels: builder-runner=${{ runner.name }}
245249
tags: "devitocodes/bases:nvidia-nvcc-${{ matrix.arch }}"
246250

@@ -252,7 +256,9 @@ jobs:
252256
push: true
253257
target: "nvc-host"
254258
platforms: ${{ matrix.platform }}
255-
build-args: "arch=nvc-host"
259+
build-args: |
260+
arch=nvc-host
261+
ver=nvhpc-25-7
256262
labels: builder-runner=${{ runner.name }}
257263
tags: "devitocodes/bases:cpu-nvc-${{ matrix.arch }}"
258264

.github/workflows/docker-devito.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ jobs:
215215
echo "No tags generated, skipping"
216216
exit 0
217217
fi
218-
for tag in $TAGS; do
218+
echo "$TAGS" | while read -r tag; do
219219
refs=""
220220
for arch in $ARCHES; do
221221
refs="$refs devitocodes/devito:${tag}-${arch}"

devito/passes/clusters/buffering.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -405,8 +405,10 @@ def generate_buffers(clusters, key, sregistry, options, **kwargs):
405405
name = sregistry.make_name(prefix='%sb' % f.name)
406406
# We specify the padding to match the input Function's one, so that
407407
# the array can be used in place of the Function with valid strides
408+
# Plain Array do not track mapped so we default to no padding
409+
padding = 0 if cls is Array else f.padding
408410
mapper[f] = cls(name=name, dimensions=dimensions, dtype=f.dtype,
409-
padding=f.padding, grid=f.grid, halo=f.halo,
411+
padding=padding, grid=f.grid, halo=f.halo,
410412
space='mapped', mapped=f, f=f)
411413

412414
return mapper
@@ -461,12 +463,13 @@ def itdims(self):
461463
@cached_property
462464
def ispace(self):
463465
# The IterationSpace within which the buffer will be accessed
466+
464467
# NOTE: The `key` is to avoid Clusters including `f` but not directly
465468
# using it in an expression, such as HaloTouch Clusters
466469
def key(c):
467470
bufferdim = any(i in c.ispace.dimensions for i in self.bdims)
468-
timeonly = all(d.is_Time for d in c.ispace.dimensions)
469-
return bufferdim or timeonly
471+
xd_only = all(d._defines & self.xd._defines for d in c.ispace.dimensions)
472+
return bufferdim or xd_only
470473

471474
ispaces = set()
472475
for c in self.clusters:
@@ -478,14 +481,13 @@ def key(c):
478481
continue
479482

480483
# Iterations space and buffering dims
481-
ispace = c.ispace
482-
edims = [d for d in self.bdims if d not in ispace.dimensions]
484+
edims = [d for d in self.bdims if d not in c.ispace.dimensions]
483485
if not edims:
484-
ispaces.add(ispace)
486+
ispaces.add(c.ispace)
485487
else:
486488
# Add all missing buffering dimensions and reorder to
487489
# avoid duplicates with different ordering
488-
ispaces.add(ispace.insert(self.dim, edims).reorder())
490+
ispaces.add(c.ispace.insert(self.dim, edims).reorder())
489491

490492
if len(ispaces) > 1:
491493
# Best effort to make buffering work in the presence of multiple

devito/passes/iet/languages/CXX.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,10 @@ def std_arith(prefix=None):
6767

6868

6969
def split_pointer(i, idx):
70+
"""
71+
Splits complex pointer std::complex<T> *a as
72+
(float *)(&a)[idx] for real/imag parts.
73+
"""
7074
dtype = i.dtype(0).real.__class__
7175
ptr = cast(dtype, stars='*')(Byref(i), reinterpret=True)
7276
return IndexedPointer(ptr, idx)

devito/passes/iet/languages/utils.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,24 +18,24 @@ def _atomic_add_split(i, pragmas, real, imag):
1818
lhs, rhs = i.expr.lhs, i.expr.rhs
1919
if (np.issubdtype(lhs.dtype, np.complexfloating)
2020
and np.issubdtype(rhs.dtype, np.complexfloating)):
21-
# Complex i, complex j
21+
# Complex lhs, complex rhs
2222
# Atomic add real and imaginary parts separately
2323
lhsr, rhsr = real(lhs), Real(rhs)
2424
lhsi, rhsi = imag(lhs), Imag(rhs)
25-
real = i._rebuild(expr=i.expr._rebuild(lhs=lhsr, rhs=rhsr),
26-
pragmas=pragmas)
27-
imag = i._rebuild(expr=i.expr._rebuild(lhs=lhsi, rhs=rhsi),
28-
pragmas=pragmas)
29-
return List(body=[real, imag])
25+
real_eq = i._rebuild(expr=i.expr._rebuild(lhs=lhsr, rhs=rhsr),
26+
pragmas=pragmas)
27+
imag_eq = i._rebuild(expr=i.expr._rebuild(lhs=lhsi, rhs=rhsi),
28+
pragmas=pragmas)
29+
return List(body=[real_eq, imag_eq])
3030

3131
elif (np.issubdtype(lhs.dtype, np.complexfloating)
3232
and not np.issubdtype(rhs.dtype, np.complexfloating)):
33-
# Complex i, real j
34-
# Atomic add j to real part of i
33+
# Complex lhs, real rhs
34+
# Atomic add rhs to real part of lhs
3535
lhsr, rhsr = real(lhs), rhs
36-
real = i._rebuild(expr=i.expr._rebuild(lhs=lhsr, rhs=rhsr),
37-
pragmas=pragmas)
38-
return real
36+
real_eq = i._rebuild(expr=i.expr._rebuild(lhs=lhsr, rhs=rhsr),
37+
pragmas=pragmas)
38+
return real_eq
3939
else:
4040
# Real i, complex j
4141
raise InvalidOperator("Atomic add not implemented for real "

docker/Dockerfile.nvidia

Lines changed: 45 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ FROM ubuntu:22.04 AS sdk-base
1111

1212
SHELL ["/bin/bash", "-c"]
1313

14-
ENV DEBIAN_FRONTEND noninteractive
14+
ENV DEBIAN_FRONTEND=noninteractive
1515

1616
# Install python
1717
RUN apt-get update && \
@@ -20,20 +20,34 @@ RUN apt-get update && \
2020

2121
# nodesource: nvdashboard requires nodejs>=10
2222
RUN curl https://developer.download.nvidia.com/hpc-sdk/ubuntu/DEB-GPG-KEY-NVIDIA-HPC-SDK | gpg --yes --dearmor -o /usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg
23-
RUN echo 'deb [trusted=yes, signed-by=/usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | tee /etc/apt/sources.list.d/nvhpc.list
23+
RUN arch="$(uname -m)" && \
24+
case "$arch" in \
25+
x86_64) nvplat=amd64 ;; \
26+
aarch64|arm64) nvplat=arm64 ;; \
27+
*) echo "Unsupported architecture: $arch" >&2; exit 1 ;; \
28+
esac && \
29+
echo "deb [trusted=yes, signed-by=/usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg] https://developer.download.nvidia.com/hpc-sdk/ubuntu/${nvplat} /" | tee /etc/apt/sources.list.d/nvhpc.list
2430
RUN apt-key update *&& apt-get update -y
2531

2632
# Install nvhpc. `nvhpc` is the alias for the latest avaialble version
2733
ARG ver=nvhpc
2834
# We use the standard apt-get for the default latest nvhpc. For earlier version, apt has a bug that it will always
2935
# install the latest nvhpc-x-y no matter which version nvhpc-x-z is requested which would double (extra 10Gb) the size of the image.
3036
# So for specific version we directly download the specific deb and install it.
31-
RUN if [ "$ver" = "nvhpc" ]; then \
37+
RUN arch="$(uname -m)" && \
38+
case "$arch" in \
39+
x86_64) nvplat=amd64 ;; \
40+
aarch64|arm64) nvplat=arm64 ;; \
41+
*) echo "Unsupported architecture: $arch" >&2; exit 1 ;; \
42+
esac && \
43+
if [ "$ver" = "nvhpc" ]; then \
3244
apt-get install -y -q --allow-unauthenticated ${ver}; \
3345
else \
34-
export year=$(echo $ver | cut -d "-" -f 2) && export minor=$(echo $ver | cut -d "-" -f 3) && \
35-
wget https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64/nvhpc_${year}.${minor}_amd64.deb && \
36-
apt-get install --allow-unauthenticated -y -q ./nvhpc_${year}.${minor}_amd64.deb; \
46+
export year=$(echo $ver | cut -d "-" -f 2) && \
47+
export minor=$(echo $ver | cut -d "-" -f 3) && \
48+
wget -O nvhpc.deb "https://developer.download.nvidia.com/hpc-sdk/ubuntu/${nvplat}/nvhpc_${year}.${minor}_${nvplat}.deb" \
49+
|| wget -O nvhpc.deb "https://developer.download.nvidia.com/hpc-sdk/ubuntu/${nvplat}/nvhpc_${year}.${minor}-0_${nvplat}.deb" && \
50+
apt-get install --allow-unauthenticated -y -q ./nvhpc.deb; \
3751
fi;
3852

3953
# Nodejs https://github.com/nodesource/distributions
@@ -43,12 +57,15 @@ RUN apt-get update && apt-get install -y -q \
4357
liblapack-dev libblas-dev \
4458
libibverbs-dev libmlx4-1 libmlx5-1 ibutils \
4559
# Devito Jupyter Notebooks and Ux experience
46-
nodejs ffmpeg gcc-offload-nvptx \
60+
nodejs ffmpeg \
4761
texlive-latex-extra texlive-fonts-recommended dvipng cm-super
4862

63+
# nvptx only available on x86_64
64+
RUN apt-get install -y -q gcc-offload-nvptx || echo "Skipping nvptx compiler installation on non-x86_64 architecture"
65+
4966
# nvidia-container-runtime
50-
ENV NVIDIA_VISIBLE_DEVICES all
51-
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
67+
ENV NVIDIA_VISIBLE_DEVICES=all
68+
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
5269

5370
# MPI ROOT USER DEFAULTS
5471
ENV OMPI_ALLOW_RUN_AS_ROOT=1
@@ -72,13 +89,19 @@ ENV UCX_TLS=cuda,cuda_copy,cuda_ipc,sm,shm,self
7289
#ENV UCX_TLS=cuda,cuda_copy,cuda_ipc,sm,shm,self,rc_x,gdr_copy
7390

7491
# Make simlink for path setup since ENV doesn't accept shell commands.
75-
RUN export NVARCH=$(ls -1 /opt/nvidia/hpc_sdk/Linux_x86_64/ | grep '\.' | head -n 1) && \
76-
export CUDA_V=$(ls /opt/nvidia/hpc_sdk/Linux_x86_64/${NVARCH}/cuda/ | grep '\.') && \
77-
ln -sf /opt/nvidia/hpc_sdk/Linux_x86_64/${NVARCH} /opt/nvhpc && \
78-
ln -sf /opt/nvidia/hpc_sdk/Linux_x86_64/${NVARCH}/cuda/${CUDA_V}/extras/CUPTI /opt/CUPTI && \
79-
ln -sf /opt/nvidia/hpc_sdk/Linux_x86_64/comm_libs/${CUDA_V}/nvshmem /opt/nvhpc/comm_libs/nvshmem && \
80-
ln -sf /opt/nvidia/hpc_sdk/Linux_x86_64/comm_libs/${CUDA_V}/nccl /opt/nvhpc/comm_libs/nccl && \
81-
ln -sf /opt/nvidia/hpc_sdk/Linux_x86_64/${NVARCH}/cuda/${CUDA_V}/compute-sanitizer/compute-sanitizer /opt/nvhpc/compilers/bin/compute-sanitizer
92+
RUN arch="$(uname -m)" && \
93+
case "$arch" in \
94+
x86_64) linux=Linux_x86_64 ;; \
95+
aarch64|arm64) linux=Linux_aarch64 ;; \
96+
*) echo "Unsupported architecture: $arch" >&2; exit 1 ;; \
97+
esac && \
98+
export NVARCH=$(ls -1 /opt/nvidia/hpc_sdk/${linux}/ | grep '\.' | head -n 1) && \
99+
export CUDA_V=$(ls /opt/nvidia/hpc_sdk/${linux}/${NVARCH}/cuda/ | grep '\.') && \
100+
ln -sf /opt/nvidia/hpc_sdk/${linux}/${NVARCH} /opt/nvhpc && \
101+
ln -sf /opt/nvidia/hpc_sdk/${linux}/${NVARCH}/cuda/${CUDA_V}/extras/CUPTI /opt/CUPTI && \
102+
ln -sf /opt/nvidia/hpc_sdk/${linux}/comm_libs/${CUDA_V}/nvshmem /opt/nvhpc/comm_libs/nvshmem && \
103+
ln -sf /opt/nvidia/hpc_sdk/${linux}/comm_libs/${CUDA_V}/nccl /opt/nvhpc/comm_libs/nccl && \
104+
ln -sf /opt/nvidia/hpc_sdk/${linux}/${NVARCH}/cuda/${CUDA_V}/compute-sanitizer/compute-sanitizer /opt/nvhpc/compilers/bin/compute-sanitizer
82105

83106
# Starting nvhpc 23.5 and cuda 12.1, hpcx and openmpi are inside the cuda version folder, only the bin is in the comm_libs path
84107
RUN export CUDA_V=$(/opt/nvhpc/cuda/bin/nvcc --version | sed -n 's/^.*release \([0-9]\+\.[0-9]\+\).*$/\1/p') && \
@@ -102,12 +125,12 @@ RUN echo "$HPCSDK_HOME/cuda/lib" >> /etc/ld.so.conf.d/nvidia.conf && \
102125

103126
# Compiler, CUDA, and Library paths
104127
# CUDA_HOME has been deprecated but keep for now because of other dependencies (@mloubout).
105-
ENV CUDA_HOME $HPCSDK_HOME/cuda
106-
ENV NVHPC_CUDA_HOME $HPCSDK_HOME/cuda
107-
ENV CUDA_ROOT $HPCSDK_HOME/cuda/bin
108-
ENV PATH $HPCSDK_HOME/compilers/bin:$HPCSDK_HOME/cuda/bin:$HPCSDK_HOME/comm_libs/mpi/bin:${PATH}
109-
ENV LD_LIBRARY_PATH $HPCSDK_HOME/cuda/lib:$HPCSDK_HOME/cuda/lib64:$HPCSDK_HOME/compilers/lib:$HPCSDK_HOME/math_libs/lib64:$HPCSDK_HOME/comm_libs/mpi/lib:$HPCSDK_CUPTI/lib64:bitcomp_DIR:${LD_LIBRARY_PATH}
110-
ENV CPATH $HPCSDK_HOME/comm_libs/mpi/include:$HPCSDK_HOME/comm_libs/nvshmem/include:$HPCSDK_HOME/comm_libs/nccl/include:$HPCSDK_HOME/math_libs/include:${CPATH}
128+
ENV CUDA_HOME=$HPCSDK_HOME/cuda
129+
ENV NVHPC_CUDA_HOME=$HPCSDK_HOME/cuda
130+
ENV CUDA_ROOT=$HPCSDK_HOME/cuda/bin
131+
ENV PATH=$HPCSDK_HOME/compilers/bin:$HPCSDK_HOME/cuda/bin:$HPCSDK_HOME/comm_libs/mpi/bin:${PATH}
132+
ENV LD_LIBRARY_PATH=$HPCSDK_HOME/cuda/lib:$HPCSDK_HOME/cuda/lib64:$HPCSDK_HOME/compilers/lib:$HPCSDK_HOME/math_libs/lib64:$HPCSDK_HOME/comm_libs/mpi/lib:$HPCSDK_CUPTI/lib64:bitcomp_DIR
133+
ENV CPATH=$HPCSDK_HOME/comm_libs/mpi/include:$HPCSDK_HOME/comm_libs/nvshmem/include:$HPCSDK_HOME/comm_libs/nccl/include:$HPCSDK_HOME/math_libs/include
111134

112135
# MPI
113136
RUN rm -f $HPCSDK_HOME/comm_libs/mpi && \

0 commit comments

Comments
 (0)