idefix-code
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 5 additions & 4 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 20 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 7 additions & 3 deletions b/‎CMakeLists.txt‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎CPPLINT.cfg‎
Lines changed: 2 additions & 0 deletions b/‎CPPLINT.cfg‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎doc/source/conf.py‎
Lines changed: 1 addition & 1 deletion b/‎doc/source/conf.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/performances.rst‎
Lines changed: 3 additions & 3 deletions b/‎doc/source/performances.rst‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎doc/source/reference/idefix.ini.rst‎
Lines changed: 3 additions & 3 deletions b/‎doc/source/reference/idefix.ini.rst‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎doc/source/reference/makefile.rst‎
Lines changed: 33 additions & 9 deletions b/‎doc/source/reference/makefile.rst‎
Lines changed: 33 additions & 9 deletions
diff --git a/‎doc/source/reference/setup.cpp.rst‎
Lines changed: 1 addition & 1 deletion b/‎doc/source/reference/setup.cpp.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pytools/idfx_io.py‎
Lines changed: 1 addition & 1 deletion b/‎pytools/idfx_io.py‎
Lines changed: 1 addition & 1 deletion
@@ -3,7 +3,7 @@ exclude: "^(src/kokkos)"
 repos:
 
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.4.0
+    rev: v5.0.0
     hooks:
       - id: trailing-whitespace  # auto-fix trailing whitespaces
       - id: end-of-file-fixer  # add EOF "\n" if missing
@@ -23,7 +23,7 @@ repos:
 
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.0.278
+    rev: v0.6.9
     hooks:
       - id: ruff
         args:
@@ -33,14 +33,15 @@ repos:
         - F # pyflakes
         - B # flake8-bugbear
         - I # isort
+        - NPY # numpy-specific rules
 
   - repo: https://github.com/neutrinoceros/inifix
-    rev: v4.4.0
+    rev: v5.0.2
     hooks:
       - id: inifix-format
 
   - repo: https://github.com/Lucas-C/pre-commit-hooks
-    rev: v1.5.1
+    rev: v1.5.5
     hooks:
       - id: remove-tabs  # auto-fix tab/space mixing
       - id: insert-license
 
@@ -4,6 +4,26 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [2.1.02] 2024-10-24
+### Changed
+
+- Fix a bug that could lead to corrupted VTK file when using single precision arithmetic (#255)
+- Fix a bug that could lead to incorrect central mass gravitational potential upon restart (#287)
+- Changed the way magnetic field is reconstructed when using grid coarsening to reduce roundoff errors on div(B). This can have an impact on the results of models using grid coarsening+MHD (#284)
+- Ensure that XDMF outputs are precision agnostic (#261)
+- Bump up Kokkos version to 4.4.01 (#289)
+- Check that writes are successfull in serial, otherwise throw an error (#260)
+- Ensure that shock flattening flags can be modified by user (#260)
+- Throw an error when user enables Fargo without enough DIMENSIONS (#250)
+- Fix linting errors following upgrade to cpplint 2.0 (#278, #279, #281)
+- Update idfx_io to numpy 2.0 (#283)
+
+### Added
+
+- Allow the user to define the grid and boundary conditions only on active dimensions (#274)
+- Configuration for Nvidia H100 on Jean Zay in the documentation
+
+
 ## [2.1.01] 2024-06-20
 ### Changed
 - Fix a bug that could result in too restrictive timesteps when resistivity is enabled (#244)
 
@@ -1,12 +1,14 @@
 cmake_minimum_required(VERSION 3.16)
-set(CMAKE_BUILD_TYPE Release)
+if (NOT CMAKE_BUILD_TYPE)
+    set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE)
+endif()
 set (CMAKE_CXX_STANDARD 17)
 
 set(Idefix_VERSION_MAJOR 2)
 set(Idefix_VERSION_MINOR 1)
-set(Idefix_VERSION_PATCH 01)
+set(Idefix_VERSION_PATCH 02)
 
-project (idefix VERSION 2.1.00)
+project (idefix VERSION 2.1.02)
 option(Idefix_MHD "enable MHD" OFF)
 option(Idefix_MPI "enable Message Passing Interface parallelisation" OFF)
 option(Idefix_HIGH_ORDER_FARGO "Force Fargo to use a PPM reconstruction scheme" OFF)
@@ -43,10 +45,12 @@ include(SetRequiredBuildSettingsForGCC8)
 #Idefix requires Cuda Lambdas (experimental)
 if(Kokkos_ENABLE_CUDA)
   set(Kokkos_ENABLE_CUDA_LAMBDA ON CACHE BOOL "Idefix requires lambdas on Cuda" FORCE)
+  set(Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC OFF CACHE BOOL "Disable Async malloc to avoid bugs on PSM2" FORCE)
 endif()
 
 # Add kokkos CMAKE files (required early since these set compiler options)
 add_subdirectory(src/kokkos build/kokkos)
+include_directories(${Kokkos_INCLUDE_DIRS_RET})
 
 # Add Idefix CXX Flags
 add_compile_options(${Idefix_CXX_FLAGS})
 
@@ -16,6 +16,8 @@ filter=-whitespace/comma  # 6027 errors
 filter=-whitespace/comments  # 881 errors
 filter=-whitespace/operators  # 5240 errors
 filter=-whitespace/parens  # 413 error
+filter=-whitespace/newline
+filter=-whitespace/indent_namespace
 filter=-readability/multiline_string
 filter=-build/include_subdir  # 296 errors
 #filter=-whitespace/end_of_line
@@ -23,7 +23,7 @@
 author = 'Geoffroy Lesur'
 
 # The full version, including alpha/beta/rc tags
-release = '2.1.01'
+release = '2.1.02'
 
 
 
 
@@ -4,9 +4,9 @@ Performances
 
 We report below the performances obtained on various architectures using Idefix. The reference test
 is the 3D MHD Orszag-Tang test problem with 2nd order reconstruction and uct_contact EMFS bundled in
-Idefix test suite, computed with a 128\ :sup:`3` resolution per MPI sub-domain on GPUs or 32\ :sup:`3`
-per MPI sub-domain on CPUs. All of the performances measures have been obtained enabling MPI on
-*one full node*, but we report here the performance *per GPU*
+Idefix test suite, disabling passive tracers. The test is computed with a 128\ :sup:`3` resolution per
+MPI sub-domain on GPUs or 32\ :sup:`3` per MPI sub-domain on CPUs. All of the performances measures
+have been obtained enabling MPI on *one full node*, but we report here the performance *per GPU*
 (i.e. with 2 GCDs on AMD Mi250) or *per core* (on CPU), i.e. dividing the node performance by the number of GPU/core
 to simplify the comparison with other clusters.
 
 
@@ -16,7 +16,7 @@ allows for comments, which should start with ``#``.
 
 ``Grid`` section
 --------------------
-The grid section defines the grid total dimension. It consists of 3 entries ``X1-grid``, ``X2-grid`` and ``X3-grid``. Each entry defines the repartition of the grid points in the corresponding direction (the grid is always rectilinear).
+The grid section defines the grid total dimension. It consists of 3 entries ``X1-grid``, ``X2-grid`` (when DIMENSIONS>=2) and ``X3-grid`` (when DIMENSIONS=3). Each entry defines the repartition of the grid points in the corresponding direction (the grid is always rectilinear).
 Each entry defines a series of grid blocks which are concatenated along the direction. Each block in a direction can have a different spacing rule (uniform, log or stretched). The definition of the Grid entries is as follows
 
 +----------------------------+-------------------------+------------------------------+
@@ -332,8 +332,8 @@ this block is simply ignored.
 ------------------------
 
 This section describes the boundary conditions used by the code. There are 6 entries
-which need to be defined: ``X1-beg``, ``X2-beg``, ``X3-beg`` for the left boundaries in the direction X1, X2, X3,
-and ``X1-end``, ``X2-end``, ``X3-end`` for the right boundaries. Each boundary can be assigned the following types of conditions
+that need to be defined: ``X1-beg``, ``X2-beg``, ``X3-beg`` for the left boundaries in the direction X1, X2, X3,
+and ``X1-end``, ``X2-end``, ``X3-end`` for the right boundaries. ``X2`` boundaries are mandatory only when DIMENSIONS>=2 and ``X3`` when DIMENSIONS=3. Each boundary can be assigned the following types of conditions
 
 +----------------+------------------------------------------------------------------------------------------------------------------+
 | Boundary type  | Comment                                                                                                          |
 
@@ -125,37 +125,61 @@ Finally, *Idefix* can be configured to run on Mi250 by enabling HIP and the desi
 
 .. code-block:: bash
 
-    -DKokkos_ENABLE_HIP=ON -DKokkos_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATION=ON -DKokkos_ARCH_VEGA90A=ON
+    -DKokkos_ENABLE_HIP=ON -DKokkos_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS=ON -DKokkos_ARCH_VEGA90A=ON
 
 
 MPI (multi-GPU) can be enabled by adding ``-DIdefix_MPI=ON`` as usual.
 
-Jean Zay at IDRIS, Nvidia V100 and A100 GPUs
---------------------------------------------
+Jean Zay at IDRIS, Nvidia V100/A100/H100 GPUs
+---------------------------------------------
 
-We recommend the following modules and environement variables on Jean Zay:
+We recommend the following modules and environement variables on Jean Zay V100/A100:
 
 .. code-block:: bash
 
+    module load arch/a100 # ONLY forA100
     module load cuda/12.1.0
     module load gcc/12.2.0
     module load openmpi/4.1.1-cuda
-    module load cmake/3.18.0
+    module load cmake/3.25.2
+
+While for H100:
+
+.. code-block:: bash
+
+    module load arch/h100
+    module load cmake/3.30.1
+    module load cuda/12.1.0
+    module load openmpi/4.1.5-cuda
 
 *Idefix* can then be configured to run on Nvidia V100 with the following options to ccmake:
 
 .. code-block:: bash
 
-    -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_VOLTA70=ON -DKokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC=OFF
+    -DKokkos_ENABLE_CUDA=ON -DKokkos_ARCH_VOLTA70=ON
 
 While Ampere A100 GPUs are enabled with
 
 .. code-block:: bash
 
-    -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_AMPERE80=ON -DKokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC=OFF
+    -DKokkos_ENABLE_CUDA=ON -DKokkos_ARCH_AMPERE80=ON
+
+And for H100 GPUS:
+
+.. code-block:: bash
+
+    -DKokkos_ENABLE_CUDA=ON -DKokkos_ARCH_HOPPER90=ON
+
+
+MPI (multi-GPU) can be enabled by adding ``-DIdefix_MPI=ON`` as usual.
+
+
+.. warning::
+
+  As of *Idefix* 2.1.02, we automatically disable Cuda Malloc async (``-DKokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC=OFF``). However, earlier versions of
+  *Idefix* requires this flag when calling cmake to prevent a bug when using PSM2 with async Cuda malloc possibly leading to openmpi crash or hangs on Jean Zay.
+
 
-MPI (multi-GPU) can be enabled by adding ``-DIdefix_MPI=ON`` as usual. The malloc async option is here to prevent a bug when using PSM2 with async
-Cuda malloc possibly leading to openmpi crash or hangs on Jean Zay.
 
 .. _setupSpecificOptions:
 
 
@@ -369,7 +369,7 @@ assuming we want to create a dump from ``mydump.dmp``:
             int iglob=i-2*d.beg[IDIR]+d.gbeg[IDIR];
             int jglob=j-2*d.beg[JDIR]+d.gbeg[JDIR];
             int kglob=k-2*d.beg[KDIR]+d.gbeg[KDIR];
-            d.Vs(BX1s,k,j,i) = image->arrays["Vs-BX1s"](kglob,jglob,iglob);
+            d.Vs(BX1s,k,j,i) = image.arrays["Vs-BX1s"](kglob,jglob,iglob);
     }}}
 
     // And so on for the other components
 
@@ -29,7 +29,7 @@ def __init__(self, fh, byteorder="little"):
         dims = []
         for dim in range(self.ndims):
             dims.append(int.from_bytes(fh.read(INT_SIZE), byteorder))
-        ntot = int(np.product(dims))
+        ntot = int(np.prod(dims))
         raw = struct.unpack(str(ntot) + "d", fh.read(DOUBLE_SIZE * ntot))
         self.array = np.asarray(raw).reshape(dims[::-1])