Skip to content

Commit d0e8220

Browse files
glesurnicolasaunaivdbmaMarc Van den Bosschedutta-alankar
authored
V2.1.02 RC (#288)
## [2.1.02] 2024-10-24 ### Changed - Fix a bug that could lead to corrupted VTK file when using single precision arithmetic (#255) - Fix a bug that could lead to incorrect central mass gravitational potential upon restart (#287) - Changed the way magnetic field is reconstructed when using grid coarsening to reduce roundoff errors on div(B). This can have an impact on the results of models using grid coarsening+MHD (#284) - Ensure that XDMF outputs are precision agnostic (#261) - Bump up Kokkos version to 4.4.01 (#289) - Check that writes are successfull in serial, otherwise throw an error (#260) - Ensure that shock flattening flags can be modified by user (#260) - Throw an error when user enables Fargo without enough DIMENSIONS (#250) - Fix linting errors following upgrade to cpplint 2.0 (#278, #279, #281) - Update idfx_io to numpy 2.0 (#283) ### Added - Allow the user to define the grid and boundary conditions only on active dimensions (#274) - Configuration for Nvidia H100 on Jean Zay in the documentation --------- Co-authored-by: Nicolas Aunai <nicolas.aunai@lpp.polytechnique.fr> Co-authored-by: vdbma <93188557+vdbma@users.noreply.github.com> Co-authored-by: Marc Van den Bossche <marc.vanden-bossche@univ-grenoble-alpes.fr> Co-authored-by: Alankar Dutta <dutta.alankar@gmail.com> Co-authored-by: Alankar Dutta <alankard@MB-167.local> Co-authored-by: ThomasJannaudCAM <159052976+ThomasJannaudCAM@users.noreply.github.com> Co-authored-by: Clément Robert <cr52@protonmail.com> Co-authored-by: marc <vandenbossche.marc@hotmail.com> Co-authored-by: Antonin Borderies <89980449+Anto6453@users.noreply.github.com>
1 parent 2f15373 commit d0e8220

103 files changed

Lines changed: 387 additions & 432 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.pre-commit-config.yaml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ exclude: "^(src/kokkos)"
33
repos:
44

55
- repo: https://github.com/pre-commit/pre-commit-hooks
6-
rev: v4.4.0
6+
rev: v5.0.0
77
hooks:
88
- id: trailing-whitespace # auto-fix trailing whitespaces
99
- id: end-of-file-fixer # add EOF "\n" if missing
@@ -23,7 +23,7 @@ repos:
2323

2424

2525
- repo: https://github.com/astral-sh/ruff-pre-commit
26-
rev: v0.0.278
26+
rev: v0.6.9
2727
hooks:
2828
- id: ruff
2929
args:
@@ -33,14 +33,15 @@ repos:
3333
- F # pyflakes
3434
- B # flake8-bugbear
3535
- I # isort
36+
- NPY # numpy-specific rules
3637

3738
- repo: https://github.com/neutrinoceros/inifix
38-
rev: v4.4.0
39+
rev: v5.0.2
3940
hooks:
4041
- id: inifix-format
4142

4243
- repo: https://github.com/Lucas-C/pre-commit-hooks
43-
rev: v1.5.1
44+
rev: v1.5.5
4445
hooks:
4546
- id: remove-tabs # auto-fix tab/space mixing
4647
- id: insert-license

CHANGELOG.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,26 @@ All notable changes to this project will be documented in this file.
44
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
55
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
66

7+
## [2.1.02] 2024-10-24
8+
### Changed
9+
10+
- Fix a bug that could lead to corrupted VTK file when using single precision arithmetic (#255)
11+
- Fix a bug that could lead to incorrect central mass gravitational potential upon restart (#287)
12+
- Changed the way magnetic field is reconstructed when using grid coarsening to reduce roundoff errors on div(B). This can have an impact on the results of models using grid coarsening+MHD (#284)
13+
- Ensure that XDMF outputs are precision agnostic (#261)
14+
- Bump up Kokkos version to 4.4.01 (#289)
15+
- Check that writes are successfull in serial, otherwise throw an error (#260)
16+
- Ensure that shock flattening flags can be modified by user (#260)
17+
- Throw an error when user enables Fargo without enough DIMENSIONS (#250)
18+
- Fix linting errors following upgrade to cpplint 2.0 (#278, #279, #281)
19+
- Update idfx_io to numpy 2.0 (#283)
20+
21+
### Added
22+
23+
- Allow the user to define the grid and boundary conditions only on active dimensions (#274)
24+
- Configuration for Nvidia H100 on Jean Zay in the documentation
25+
26+
727
## [2.1.01] 2024-06-20
828
### Changed
929
- Fix a bug that could result in too restrictive timesteps when resistivity is enabled (#244)

CMakeLists.txt

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
cmake_minimum_required(VERSION 3.16)
2-
set(CMAKE_BUILD_TYPE Release)
2+
if (NOT CMAKE_BUILD_TYPE)
3+
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE)
4+
endif()
35
set (CMAKE_CXX_STANDARD 17)
46

57
set(Idefix_VERSION_MAJOR 2)
68
set(Idefix_VERSION_MINOR 1)
7-
set(Idefix_VERSION_PATCH 01)
9+
set(Idefix_VERSION_PATCH 02)
810

9-
project (idefix VERSION 2.1.00)
11+
project (idefix VERSION 2.1.02)
1012
option(Idefix_MHD "enable MHD" OFF)
1113
option(Idefix_MPI "enable Message Passing Interface parallelisation" OFF)
1214
option(Idefix_HIGH_ORDER_FARGO "Force Fargo to use a PPM reconstruction scheme" OFF)
@@ -43,10 +45,12 @@ include(SetRequiredBuildSettingsForGCC8)
4345
#Idefix requires Cuda Lambdas (experimental)
4446
if(Kokkos_ENABLE_CUDA)
4547
set(Kokkos_ENABLE_CUDA_LAMBDA ON CACHE BOOL "Idefix requires lambdas on Cuda" FORCE)
48+
set(Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC OFF CACHE BOOL "Disable Async malloc to avoid bugs on PSM2" FORCE)
4649
endif()
4750

4851
# Add kokkos CMAKE files (required early since these set compiler options)
4952
add_subdirectory(src/kokkos build/kokkos)
53+
include_directories(${Kokkos_INCLUDE_DIRS_RET})
5054

5155
# Add Idefix CXX Flags
5256
add_compile_options(${Idefix_CXX_FLAGS})

CPPLINT.cfg

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ filter=-whitespace/comma # 6027 errors
1616
filter=-whitespace/comments # 881 errors
1717
filter=-whitespace/operators # 5240 errors
1818
filter=-whitespace/parens # 413 error
19+
filter=-whitespace/newline
20+
filter=-whitespace/indent_namespace
1921
filter=-readability/multiline_string
2022
filter=-build/include_subdir # 296 errors
2123
#filter=-whitespace/end_of_line

doc/source/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
author = 'Geoffroy Lesur'
2424

2525
# The full version, including alpha/beta/rc tags
26-
release = '2.1.01'
26+
release = '2.1.02'
2727

2828

2929

doc/source/performances.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ Performances
44

55
We report below the performances obtained on various architectures using Idefix. The reference test
66
is the 3D MHD Orszag-Tang test problem with 2nd order reconstruction and uct_contact EMFS bundled in
7-
Idefix test suite, computed with a 128\ :sup:`3` resolution per MPI sub-domain on GPUs or 32\ :sup:`3`
8-
per MPI sub-domain on CPUs. All of the performances measures have been obtained enabling MPI on
9-
*one full node*, but we report here the performance *per GPU*
7+
Idefix test suite, disabling passive tracers. The test is computed with a 128\ :sup:`3` resolution per
8+
MPI sub-domain on GPUs or 32\ :sup:`3` per MPI sub-domain on CPUs. All of the performances measures
9+
have been obtained enabling MPI on *one full node*, but we report here the performance *per GPU*
1010
(i.e. with 2 GCDs on AMD Mi250) or *per core* (on CPU), i.e. dividing the node performance by the number of GPU/core
1111
to simplify the comparison with other clusters.
1212

doc/source/reference/idefix.ini.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ allows for comments, which should start with ``#``.
1616

1717
``Grid`` section
1818
--------------------
19-
The grid section defines the grid total dimension. It consists of 3 entries ``X1-grid``, ``X2-grid`` and ``X3-grid``. Each entry defines the repartition of the grid points in the corresponding direction (the grid is always rectilinear).
19+
The grid section defines the grid total dimension. It consists of 3 entries ``X1-grid``, ``X2-grid`` (when DIMENSIONS>=2) and ``X3-grid`` (when DIMENSIONS=3). Each entry defines the repartition of the grid points in the corresponding direction (the grid is always rectilinear).
2020
Each entry defines a series of grid blocks which are concatenated along the direction. Each block in a direction can have a different spacing rule (uniform, log or stretched). The definition of the Grid entries is as follows
2121

2222
+----------------------------+-------------------------+------------------------------+
@@ -332,8 +332,8 @@ this block is simply ignored.
332332
------------------------
333333

334334
This section describes the boundary conditions used by the code. There are 6 entries
335-
which need to be defined: ``X1-beg``, ``X2-beg``, ``X3-beg`` for the left boundaries in the direction X1, X2, X3,
336-
and ``X1-end``, ``X2-end``, ``X3-end`` for the right boundaries. Each boundary can be assigned the following types of conditions
335+
that need to be defined: ``X1-beg``, ``X2-beg``, ``X3-beg`` for the left boundaries in the direction X1, X2, X3,
336+
and ``X1-end``, ``X2-end``, ``X3-end`` for the right boundaries. ``X2`` boundaries are mandatory only when DIMENSIONS>=2 and ``X3`` when DIMENSIONS=3. Each boundary can be assigned the following types of conditions
337337

338338
+----------------+------------------------------------------------------------------------------------------------------------------+
339339
| Boundary type | Comment |

doc/source/reference/makefile.rst

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -125,37 +125,61 @@ Finally, *Idefix* can be configured to run on Mi250 by enabling HIP and the desi
125125

126126
.. code-block:: bash
127127
128-
-DKokkos_ENABLE_HIP=ON -DKokkos_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATION=ON -DKokkos_ARCH_VEGA90A=ON
128+
-DKokkos_ENABLE_HIP=ON -DKokkos_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS=ON -DKokkos_ARCH_VEGA90A=ON
129129
130130
131131
MPI (multi-GPU) can be enabled by adding ``-DIdefix_MPI=ON`` as usual.
132132

133-
Jean Zay at IDRIS, Nvidia V100 and A100 GPUs
134-
--------------------------------------------
133+
Jean Zay at IDRIS, Nvidia V100/A100/H100 GPUs
134+
---------------------------------------------
135135

136-
We recommend the following modules and environement variables on Jean Zay:
136+
We recommend the following modules and environement variables on Jean Zay V100/A100:
137137

138138
.. code-block:: bash
139139
140+
module load arch/a100 # ONLY forA100
140141
module load cuda/12.1.0
141142
module load gcc/12.2.0
142143
module load openmpi/4.1.1-cuda
143-
module load cmake/3.18.0
144+
module load cmake/3.25.2
145+
146+
While for H100:
147+
148+
.. code-block:: bash
149+
150+
module load arch/h100
151+
module load cmake/3.30.1
152+
module load cuda/12.1.0
153+
module load openmpi/4.1.5-cuda
144154
145155
*Idefix* can then be configured to run on Nvidia V100 with the following options to ccmake:
146156

147157
.. code-block:: bash
148158
149-
-DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_VOLTA70=ON -DKokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC=OFF
159+
-DKokkos_ENABLE_CUDA=ON -DKokkos_ARCH_VOLTA70=ON
150160
151161
While Ampere A100 GPUs are enabled with
152162

153163
.. code-block:: bash
154164
155-
-DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_AMPERE80=ON -DKokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC=OFF
165+
-DKokkos_ENABLE_CUDA=ON -DKokkos_ARCH_AMPERE80=ON
166+
167+
And for H100 GPUS:
168+
169+
.. code-block:: bash
170+
171+
-DKokkos_ENABLE_CUDA=ON -DKokkos_ARCH_HOPPER90=ON
172+
173+
174+
MPI (multi-GPU) can be enabled by adding ``-DIdefix_MPI=ON`` as usual.
175+
176+
177+
.. warning::
178+
179+
As of *Idefix* 2.1.02, we automatically disable Cuda Malloc async (``-DKokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC=OFF``). However, earlier versions of
180+
*Idefix* requires this flag when calling cmake to prevent a bug when using PSM2 with async Cuda malloc possibly leading to openmpi crash or hangs on Jean Zay.
181+
156182

157-
MPI (multi-GPU) can be enabled by adding ``-DIdefix_MPI=ON`` as usual. The malloc async option is here to prevent a bug when using PSM2 with async
158-
Cuda malloc possibly leading to openmpi crash or hangs on Jean Zay.
159183

160184
.. _setupSpecificOptions:
161185

doc/source/reference/setup.cpp.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,7 @@ assuming we want to create a dump from ``mydump.dmp``:
369369
int iglob=i-2*d.beg[IDIR]+d.gbeg[IDIR];
370370
int jglob=j-2*d.beg[JDIR]+d.gbeg[JDIR];
371371
int kglob=k-2*d.beg[KDIR]+d.gbeg[KDIR];
372-
d.Vs(BX1s,k,j,i) = image->arrays["Vs-BX1s"](kglob,jglob,iglob);
372+
d.Vs(BX1s,k,j,i) = image.arrays["Vs-BX1s"](kglob,jglob,iglob);
373373
}}}
374374

375375
// And so on for the other components

pytools/idfx_io.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def __init__(self, fh, byteorder="little"):
2929
dims = []
3030
for dim in range(self.ndims):
3131
dims.append(int.from_bytes(fh.read(INT_SIZE), byteorder))
32-
ntot = int(np.product(dims))
32+
ntot = int(np.prod(dims))
3333
raw = struct.unpack(str(ntot) + "d", fh.read(DOUBLE_SIZE * ntot))
3434
self.array = np.asarray(raw).reshape(dims[::-1])
3535

0 commit comments

Comments
 (0)