tests: Update estimate-memory tests and consolidate boilerplate

EdCaunt · EdCaunt · commit 0d1acf6f5edc · 2025-08-04T09:44:35.000+01:00
diff --git a/.github/workflows/pytest-gpu.yml b/.github/workflows/pytest-gpu.yml
@@ -43,7 +43,7 @@ jobs:
         include:
           # -------------------- NVIDIA job --------------------
           - name: pytest-gpu-acc-nvidia
-            test_files: "tests/test_adjoint.py tests/test_gpu_common.py tests/test_gpu_openacc.py"
+            test_files: "tests/test_adjoint.py tests/test_gpu_common.py tests/test_gpu_openacc.py tests/test_operator.py::TestEstimateMemory"
             base: "devitocodes/bases:nvidia-nvc"
             runner_label: nvidiagpu
             test_drive_cmd: "nvidia-smi"
@@ -56,7 +56,7 @@ jobs:
 
           # -------------------- AMD job -----------------------
           - name: pytest-gpu-omp-amd
-            test_files: "tests/test_adjoint.py tests/test_gpu_common.py tests/test_gpu_openmp.py"
+            test_files: "tests/test_adjoint.py tests/test_gpu_common.py tests/test_gpu_openmp.py tests/test_operator.py::TestEstimateMemory"
             runner_label: amdgpu
             base: "devitocodes/bases:amd"
             test_drive_cmd: "rocm-smi"
diff --git a/devito/operator/operator.py b/devito/operator/operator.py
@@ -34,7 +34,7 @@
 from devito.tools import (DAG, OrderedSet, Signer, ReducerMap, as_mapper, as_tuple,
                           flatten, filter_sorted, frozendict, is_integer,
                           split, timed_pass, timed_region, contains_val,
-                          CacheInstances, MemoryEstimate, humanbytes)
+                          CacheInstances, MemoryEstimate)
 from devito.types import (Buffer, Evaluable, host_layer, device_layer,
                           disk_layer)
 from devito.types.dimension import Thickness
@@ -897,8 +897,6 @@ def estimate_memory(self, **kwargs):
 
         Parameters
         ----------
-        human_readable: bool
-            Return human-readable values, rather than raw byte counts. Default is False.
         **kwargs: dict
             As per `Operator.apply()`.
 
@@ -1341,6 +1339,36 @@ def saved_mapper(self):
 
         return mapper
 
+    @cached_property
+    def _op_symbols(self):
+        """Symbols in the Operator which may or may not carry data"""
+        return FindSymbols().visit(self.op)
+
+    def _apply_override(self, i):
+        try:
+            return self.get(i.name, i)._obj
+        except AttributeError:
+            return self.get(i.name, i)
+
+    def _get_nbytes(self, i):
+        """
+        Extract the allocated size of a symbol, accounting for any
+        overrides.
+        """
+        obj = self._apply_override(i)
+        try:
+            # Non-regular AbstractFunction (compressed, etc)
+            nbytes = obj.nbytes_max
+        except AttributeError:
+            # Garden-variety AbstractFunction
+            nbytes = obj.nbytes
+
+        # Could nominally have symbolic nbytes at this point
+        if isinstance(nbytes, SympyBasic):
+            return subs_op_args(nbytes, self)
+
+        return nbytes
+
     @cached_property
     def nbytes_avail_mapper(self):
         """
@@ -1400,32 +1428,14 @@ def nbytes_consumed_functions(self):
         Memory consumed on both device and host by Functions in the
         corresponding Operator.
         """
-        def get_nbytes(obj):
-            if obj.is_regular:
-                nbytes = obj.nbytes
-            else:
-                nbytes = obj.nbytes_max
-
-            # Could nominally have symbolic nbytes at this point
-            if isinstance(nbytes, SympyBasic):
-                return subs_op_args(nbytes, self)
-            else:
-                return nbytes
-
         host = 0
         device = 0
-
         # Filter out arrays, aliases and non-AbstractFunction objects
         op_symbols = [i for i in self._op_symbols if i.is_AbstractFunction
                       and not i.is_ArrayBasic and not i.alias]
 
         for i in op_symbols:
-            try:
-                # TODO: is _obj even needed?
-                v = get_nbytes(self[i.name]._obj)
-            except AttributeError:
-                v = get_nbytes(self.get(i.name, i))
-
+            v = self._get_nbytes(i)
             if i._mem_host or i._mem_mapped:
                 # No need to add to device , as it will be counted
                 # by nbytes_consumed_memmapped
@@ -1446,7 +1456,6 @@ def nbytes_consumed_arrays(self):
         """
         host = 0
         device = 0
-
         # Temporaries such as Arrays are allocated and deallocated on-the-fly
         # while in C land, so they need to be accounted for as well
         for i in self._op_symbols:
@@ -1492,11 +1501,7 @@ def nbytes_consumed_memmapped(self):
                     continue
                 try:
                     if i._mem_mapped:
-                        try:
-                            v = self[i.name]._obj.nbytes
-                        except AttributeError:
-                            v = i.nbytes
-                        device += v
+                        device += self._get_nbytes(i)
                 except AttributeError:
                     pass
 
@@ -1511,22 +1516,16 @@ def nbytes_snapshots(self):
         disk = 0
         for i in op_symbols:
             try:
-                v = self[i.name]._obj
-            except AttributeError:
-                v = self.get(i.name, i)
-
-            try:
-                disk += v.size_snapshot*v._time_size_ideal*np.dtype(v.dtype).itemsize
+                if i._child not in op_symbols:
+                    # Use only the "innermost" layer to avoid counting snapshots
+                    # twice
+                    v = self._apply_override(i)
+                    disk += v.size_snapshot*v._time_size_ideal*np.dtype(v.dtype).itemsize
             except AttributeError:
                 pass
 
         return {disk_layer: disk, host_layer: 0, device_layer: 0}
 
-    @cached_property
-    def _op_symbols(self):
-        """Symbols in the Operator which may or may not carry data"""
-        return FindSymbols().visit(self.op)
-
 
 def parse_kwargs(**kwargs):
     """
diff --git a/devito/tools/data_structures.py b/devito/tools/data_structures.py
@@ -662,10 +662,16 @@ def __hash__(self):
 
 class MemoryEstimate(frozendict):
     """
-    An immutable wrapper for a memory estimate, showing the
-    various values.
+    An immutable mapper for a memory estimate, providing the estimated memory
+    consumption across host, device, and so forth.
 
-    TODO: Finish this docstring
+    Properties
+    ----------
+    name: str
+        The name of the Operator for which this estimate was generated
+    human_readable: frozendict
+        The mapper, albeit with human-readable memory usage (MB, GB, etc)
+        rather than raw bytes.
     """
 
     def __init__(self, *args, **kwargs):
diff --git a/tests/test_operator.py b/tests/test_operator.py
@@ -20,7 +20,8 @@
                     NODE, CELL, dimensions, configuration, TensorFunction,
                     TensorTimeFunction, VectorFunction, VectorTimeFunction,
                     div, grad, switchconfig, exp, Buffer)
-from devito import  Inc, Le, Lt, Ge, Gt  # noqa
+from devito import  Inc, Le, Lt, Ge, Gt, sin  # noqa
+from devito.arch.archinfo import Device
 from devito.exceptions import InvalidOperator
 from devito.finite_differences.differentiable import diff2sympy
 from devito.ir.equations import ClusterizedEq
@@ -2066,12 +2067,17 @@ def test_indirection(self):
 class TestEstimateMemory:
     """Tests for the Operator.estimate_memory() utility"""
 
-    _array_temp = "r0L0(x, y)" if "CXX" in configuration['language'] else "r0[x][y]"
+    _array_temp = "r0L0(" if "CXX" in configuration['language'] else "r0["
 
-    def parse_output(self, summary, expected):
-        """Parse estimate_memory machine-readable output"""
+    def parse_output(self, summary, check, arrays=0):
+        device = isinstance(configuration['platform'], Device)
+        expected = ((check, check + arrays) if device else (check + arrays, 0))
         assert (summary['host'], summary['device']) == expected
 
+    def sum_sizes(self, funcs):
+        return sum(reduce(mul, func.shape_allocated)*np.dtype(func.dtype).itemsize
+                   for func in funcs)
+
     @pytest.mark.parametrize('shape', [(11,), (101, 101), (101, 101, 101)])
     @pytest.mark.parametrize('dtype', [np.int8, np.int16, np.float32,
                                        np.float32, np.complex64])
@@ -2088,8 +2094,7 @@ def test_basic_usage(self, caplog, shape, dtype, so):
 
             # Check output of estimate_memory
             host = reduce(mul, f.shape_allocated)*np.dtype(f.dtype).itemsize
-            expected = (host, 0)
-            self.parse_output(summary, expected)
+            self.parse_output(summary, host)
 
     def test_multiple_objects(self, caplog):
         grid = Grid(shape=(101, 101))
@@ -2101,10 +2106,8 @@ def test_multiple_objects(self, caplog):
             summary = op.estimate_memory()
             assert "Allocating" not in caplog.text
 
-            check = sum(reduce(mul, func.shape_allocated)*np.dtype(func.dtype).itemsize
-                        for func in (f, g))
-            expected = (check, 0)
-            self.parse_output(summary, expected)
+            check = self.sum_sizes((f, g))
+            self.parse_output(summary, check)
 
     @pytest.mark.parametrize('time', [True, False])
     def test_sparse(self, caplog, time):
@@ -2121,10 +2124,8 @@ def test_sparse(self, caplog, time):
             summary = op.estimate_memory()
             assert "Allocating" not in caplog.text
 
-            check = sum(reduce(mul, func.shape_allocated)*np.dtype(func.dtype).itemsize
-                        for func in (f, src, src.coordinates))
-            expected = (check, 0)
-            self.parse_output(summary, expected)
+            check = self.sum_sizes((f, src, src.coordinates))
+            self.parse_output(summary, check)
 
     @pytest.mark.parametrize('save', [None, Buffer(3), 10])
     def test_timefunction(self, caplog, save):
@@ -2136,8 +2137,7 @@ def test_timefunction(self, caplog, save):
             summary = op.estimate_memory()
             assert "Allocating" not in caplog.text
             check = reduce(mul, f.shape_allocated)*np.dtype(f.dtype).itemsize
-            expected = (check, 0)
-            self.parse_output(summary, expected)
+            self.parse_output(summary, check)
 
     def test_mashup(self, caplog):
         grid = Grid(shape=(101, 101))
@@ -2158,11 +2158,8 @@ def test_mashup(self, caplog):
             summary = op.estimate_memory()
             assert "Allocating" not in caplog.text
 
-            check = sum(reduce(mul, func.shape_allocated)*np.dtype(func.dtype).itemsize
-                        for func in (f, g, src0, src0.coordinates,
-                                     src1, src1.coordinates))
-            expected = (check, 0)
-            self.parse_output(summary, expected)
+            check = self.sum_sizes((f, g, src0, src0.coordinates, src1, src1.coordinates))
+            self.parse_output(summary, check)
 
     @pytest.mark.parametrize('override', [True, False])
     def test_temp_array(self, caplog, override):
@@ -2188,8 +2185,8 @@ def test_temp_array(self, caplog, override):
             b = Function(name='b', grid=grid, space_order=0)
 
         # Reuse an expensive function to encourage generation of an array temp
-        eq0 = Eq(f.forward, g + sympy.sin(a))
-        eq1 = Eq(g.forward, f + sympy.sin(a))
+        eq0 = Eq(f.forward, g + sin(a).dx)
+        eq1 = Eq(g.forward, f + sin(a).dx)
 
         with switchconfig(log_level='DEBUG'), caplog.at_level(logging.DEBUG):
             op = Operator([eq0, eq1])
@@ -2201,37 +2198,32 @@ def test_temp_array(self, caplog, override):
             summary = op.estimate_memory(**kwargs)
             assert "Allocating" not in caplog.text
 
-            check = sum(reduce(mul, func.shape_allocated)*np.dtype(func.dtype).itemsize
-                        for func in funcs)
+            check = self.sum_sizes(funcs)
 
             # Factor in the temp array
-            check += reduce(mul, b.shape_allocated)*np.dtype(b.dtype).itemsize
-
-            expected = (check, 0)
-            self.parse_output(summary, expected)
+            # Note: temp array size is incremented by one in the x dimension
+            # due to derivative.
+            array_check = (b.shape_allocated[0]+1)*b.shape_allocated[1]
+            array_check *= np.dtype(b.dtype).itemsize
+            self.parse_output(summary, check, arrays=array_check)
 
     def test_overrides(self, caplog):
-        # TODO: Consolidate this boilerplate
-        grid0 = Grid(shape=(101, 101))
+        def setup(size, npoint, nt, counter):
+            grid = Grid(shape=(size, size))
+            # Original fields
+            f = Function(name=f'f{counter}', grid=grid, space_order=4)
+            tf = TimeFunction(name=f'tf{counter}', grid=grid, space_order=4)
+            s = SparseFunction(name=f's{counter}', grid=grid, npoint=npoint)
+            st = SparseTimeFunction(name=f'st{counter}', grid=grid, npoint=npoint, nt=nt)
+
+            return f, tf, s, st
+
         # Original fields
-        f0 = Function(name='f0', grid=grid0, space_order=4)
-        tf0 = TimeFunction(name='tf0', grid=grid0, space_order=4)
-        s0 = SparseFunction(name='s0', grid=grid0, npoint=100)
-        st0 = SparseTimeFunction(name='st0', grid=grid0, npoint=100, nt=10)
-
-        grid1 = Grid(shape=(201, 201))  # Bigger grid so overrides are distinct
-        # Replacement fields
-        f1 = Function(name='f1', grid=grid1, space_order=4)
-        tf1 = TimeFunction(name='tf1', grid=grid1, space_order=4)
-        s1 = SparseFunction(name='s1', grid=grid1, npoint=200)
-        st1 = SparseTimeFunction(name='st1', grid=grid1, npoint=200, nt=20)
-
-        grid2 = Grid(shape=(51, 51))  # Smaller grid so overrides are distinct
-        # Alternative replacement fields
-        f2 = Function(name='f2', grid=grid2, space_order=4)
-        tf2 = TimeFunction(name='tf2', grid=grid2, space_order=4)
-        s2 = SparseFunction(name='s2', grid=grid2, npoint=50)
-        st2 = SparseTimeFunction(name='st2', grid=grid2, npoint=50, nt=5)
+        f0, tf0, s0, st0 = setup(101, 100, 10, 0)
+        # Replacement fields with bigger grid, etc
+        f1, tf1, s1, st1 = setup(201, 200, 20, 1)
+        # Replacement fields with smaller grid, etc
+        f2, tf2, s2, st2 = setup(51, 50, 5, 2)
 
         eq0 = Eq(f0, 1)
         eq1 = Eq(tf0, 1)
@@ -2244,21 +2236,15 @@ def test_overrides(self, caplog):
             # Apply overrides for the check
             summary0 = op.estimate_memory(f0=f1, tf0=tf1, s0=s1, st0=st1)
 
-            check0 = sum(reduce(mul, func.shape_allocated)*np.dtype(func.dtype).itemsize
-                         for func in (f1, tf1, s1, s1.coordinates, st1, st1.coordinates))
-
-            expected0 = (check0, 0)
-            self.parse_output(summary0, expected0)
+            check0 = self.sum_sizes((f1, tf1, s1, s1.coordinates, st1, st1.coordinates))
+            self.parse_output(summary0, check0)
 
             # Check with a second set of overrides
             summary1 = op.estimate_memory(f0=f2, tf0=tf2, s0=s2, st0=st2)
             assert "Allocating" not in caplog.text
 
-            check1 = sum(reduce(mul, func.shape_allocated)*np.dtype(func.dtype).itemsize
-                         for func in (f2, tf2, s2, s2.coordinates, st2, st2.coordinates))
-
-            expected1 = (check1, 0)
-            self.parse_output(summary1, expected1)
+            check1 = self.sum_sizes((f2, tf2, s2, s2.coordinates, st2, st2.coordinates))
+            self.parse_output(summary1, check1)
 
     def test_device(self, caplog):
         # Note: this uses switchconfig and runs on all backends to reflect expected
@@ -2269,7 +2255,7 @@ def test_device(self, caplog):
 
         f = Function(name='f', grid=grid, space_order=2)
 
-        # Compiler is never invoked, so this should be fine
+        # Compiler is never invoked, so this is fine
         config = {'log_level': 'DEBUG', 'language': 'openacc',
                   'platform': 'nvidiaX'}
         with switchconfig(**config), caplog.at_level(logging.DEBUG):
@@ -2281,5 +2267,4 @@ def test_device(self, caplog):
             check = reduce(mul, f.shape_allocated)*np.dtype(f.dtype).itemsize
 
             # Matching memory allocated both on host and device for memmap
-            expected = (check, check)
-            self.parse_output(summary, expected)
+            self.parse_output(summary, check)