tests: Add test for memory estimate on devices

EdCaunt · EdCaunt · commit 3fce82bc3f00 · 2025-08-04T09:42:22.000+01:00
diff --git a/devito/operator/operator.py b/devito/operator/operator.py
@@ -1429,8 +1429,8 @@ def get_nbytes(obj):
                 else:
                     host += v
             elif i._mem_mapped:
-                if isinstance(self.platform, Device):
-                    device += v
+                # No need to add to device, as it will be counted
+                # by nbytes_consumed_memmapped
                 host += v
 
         return {disk_layer: 0, host_layer: host, device_layer: device}
diff --git a/tests/test_operator.py b/tests/test_operator.py
@@ -7,7 +7,14 @@
 import sympy
 
 import pytest
-from conftest import assert_structure, skipif
+
+# Try-except required to allow for import of classes from this file
+# for testing in PRO
+try:
+    from ..conftest import assert_structure, skipif
+except ImportError:
+    from conftest import assert_structure, skipif
+
 from devito import (Grid, Eq, Operator, Constant, Function, TimeFunction,
                     SparseFunction, SparseTimeFunction, Dimension, error, SpaceDimension,
                     NODE, CELL, dimensions, configuration, TensorFunction,
@@ -2059,6 +2066,8 @@ def test_indirection(self):
 class TestEstimateMemory:
     """Tests for the Operator.estimate_memory() utility"""
 
+    _array_temp = "r0[x][y]"
+
     def parse_output(self, output, expected):
         """Parse estimate_memory machine-readable output"""
         # Check that no allocation occurs as estimate_memory should avoid data touch
@@ -2083,7 +2092,7 @@ def test_basic_usage(self, caplog, shape, dtype, so):
             op.estimate_memory(human_readable=False)
 
             # Check output of estimate_memory
-            host = reduce(mul, [s + 2*so for s in shape])*np.dtype(dtype).itemsize
+            host = reduce(mul, f.shape_allocated)*np.dtype(f.dtype).itemsize
             expected = ("Kernel", 0, host, 0)
             self.parse_output(caplog, expected)
 
@@ -2163,6 +2172,9 @@ def test_temp_array(self, caplog):
         g = TimeFunction(name='g', grid=grid, space_order=2)
         a = Function(name='a', grid=grid, space_order=2)
 
+        # Fake array allocated in Python land so that shape_allocated can be used
+        b = Function(name='b', grid=grid, space_order=0)
+
         # Reuse an expensive function to encourage generation of an array temp
         eq0 = Eq(f.forward, g + sympy.sin(a))
         eq1 = Eq(g.forward, f + sympy.sin(a))
@@ -2172,15 +2184,15 @@ def test_temp_array(self, caplog):
 
             # Regression to ensure this test functions as intended
             # Ensure an array temporary is created
-            assert "r0[x][y]" in str(op.ccode)
+            assert self._array_temp in str(op.ccode)
 
             op.estimate_memory(human_readable=False)
 
             check = sum(reduce(mul, func.shape_allocated)*np.dtype(func.dtype).itemsize
                         for func in (f, g, a))
 
             # Factor in the temp array
-            check += reduce(mul, a.shape)*np.dtype(a.dtype).itemsize
+            check += reduce(mul, b.shape_allocated)*np.dtype(a.dtype).itemsize
 
             expected = ("Kernel", 0, check, 0)
             self.parse_output(caplog, expected)
@@ -2217,4 +2229,25 @@ def test_overrides(self, caplog):
             expected = ("Kernel", 0, check, 0)
             self.parse_output(caplog, expected)
 
-    # Test with OpenACC
+    def test_device(self, caplog):
+        # Note: this uses switchconfig and runs on all backends to reflect expected
+        # usage: users are likely to run the estimate on the orchestration node which
+        # may not have the intended hardware, before using this output to determine which
+        # nodes to farm jobs out to.
+        grid = Grid(shape=(101, 101))
+
+        f = Function(name='f', grid=grid, space_order=2)
+
+        # Compiler is never invoked, so this should be fine
+        config = {'log_level': 'DEBUG', 'language': 'openacc',
+                  'platform': 'nvidiaX'}
+        with switchconfig(**config), caplog.at_level(logging.DEBUG):
+            op = Operator(Eq(f, 1))
+
+            op.estimate_memory(human_readable=False)
+
+            check = reduce(mul, f.shape_allocated)*np.dtype(f.dtype).itemsize
+
+            # Matching memory allocated both on host and device for memmap
+            expected = ("Kernel", 0, check, check)
+            self.parse_output(caplog, expected)