Skip to content

Commit b19e707

Browse files
authored
Merge pull request #2664 from devitocodes/index-mode
compiler: fix temp/array alloc
2 parents 598fd62 + e906679 commit b19e707

8 files changed

Lines changed: 70 additions & 34 deletions

File tree

devito/arch/compiler.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,7 @@ def __init_finalize__(self, **kwargs):
468468
# they support `#pragma omp simd`
469469
self.ldflags += ['-fopenmp']
470470
except (TypeError, ValueError):
471-
if language == 'openmp':
471+
if 'openmp' in language:
472472
self.ldflags += ['-fopenmp']
473473

474474
def __lookup_cmds__(self):
@@ -517,7 +517,7 @@ def __init_finalize__(self, **kwargs):
517517
# The compiler can be installed with Homebrew or can be built from scratch.
518518
# Check if installed and set compiler flags accordingly
519519
llvmm1 = get_m1_llvm_path(language)
520-
if llvmm1 and language == 'openmp':
520+
if llvmm1 and 'openmp' in language:
521521
mx = platform.march
522522
self.ldflags += [f'-mcpu=apple-{mx}',
523523
'-fopenmp', f'-L{llvmm1["libs"]}']
@@ -528,7 +528,7 @@ def __init_finalize__(self, **kwargs):
528528
self.cflags += ['-mcpu=native']
529529
else:
530530
self.cflags += ['-march=native']
531-
if language == 'openmp':
531+
if 'openmp' in language:
532532
self.ldflags += ['-fopenmp']
533533

534534
def __lookup_cmds__(self):
@@ -562,7 +562,7 @@ def __init_finalize__(self, **kwargs):
562562
if not configuration['safe-math']:
563563
self.cflags.append('-ffast-math')
564564

565-
if language == 'openmp':
565+
if 'openmp' in language:
566566
self.ldflags += ['-fopenmp']
567567

568568
if isinstance(platform, NvidiaDevice):
@@ -621,10 +621,10 @@ def __init_finalize__(self, **kwargs):
621621

622622
if language == 'openacc':
623623
self.cflags.extend(['-mp', '-acc:gpu'])
624-
elif language == 'openmp':
624+
elif 'openmp' in language:
625625
self.cflags.extend(['-mp=gpu'])
626626
elif isinstance(platform, Cpu64):
627-
if language == 'openmp':
627+
if 'openmp' in language:
628628
self.cflags.append('-mp')
629629
if isinstance(platform, NvidiaArm):
630630
self.cflags.append(f'-mcpu={platform.march}')
@@ -770,7 +770,7 @@ def __init_finalize__(self, **kwargs):
770770
# Systematically use 512-bit vectors if avx512 is available.
771771
self.cflags.append("-qopt-zmm-usage=high")
772772

773-
if language == 'openmp':
773+
if 'openmp' in language:
774774
self.ldflags.append('-qopenmp')
775775

776776
if kwargs.get('mpi'):
@@ -841,7 +841,7 @@ def __init_finalize__(self, **kwargs):
841841
if language == 'sycl':
842842
warning(f"Use SyclCompiler (`sycl`) to jit-compile sycl, not {self.name}")
843843

844-
elif language == 'openmp':
844+
elif 'openmp' in language:
845845
# Earlier versions to OneAPI 2023.2.0 (clang17 underneath), have an
846846
# OpenMP bug concerning reductions, hence with them we're forced to
847847
# use the obsolete -fopenmp

devito/ir/cgen/printer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ def _print_UnaryOp(self, expr, op=None, parenthesize=False):
370370

371371
def _print_Cast(self, expr):
372372
cast = f'({self._print(expr._C_ctype)}{self._print(expr.stars)})'
373-
return self._print_UnaryOp(expr, op=cast)
373+
return self._print_UnaryOp(expr, op=cast, parenthesize=not expr.base.is_Atom)
374374

375375
def _print_ComponentAccess(self, expr):
376376
return f"{self._print(expr.base)}.{expr.sindex}"

devito/passes/iet/definitions.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from devito.passes.iet.langbase import LangBB
2020
from devito.symbolics import (
2121
Byref, DefFunction, FieldFromPointer, IndexedPointer, ListInitializer,
22-
SizeOf, VOID, pow_to_mul, unevaluate
22+
SizeOf, VOID, pow_to_mul, unevaluate, as_long
2323
)
2424
from devito.tools import as_mapper, as_list, as_tuple, filter_sorted, flatten
2525
from devito.types import (
@@ -136,7 +136,7 @@ def _alloc_array_on_global_mem(self, site, obj, storage):
136136

137137
# Copy input array into global array
138138
name = self.sregistry.make_name(prefix='init_global')
139-
nbytes = SizeOf(obj._C_typedata)*obj.size
139+
nbytes = SizeOf(obj._C_typedata)*as_long(obj.size)
140140
body = [Definition(src),
141141
self.langbb['alloc-global-symbol'](obj.indexed, src.indexed, nbytes)]
142142
efunc = make_callable(name, body)
@@ -159,7 +159,7 @@ def _alloc_host_array_on_high_bw_mem(self, site, obj, storage, *args):
159159

160160
memptr = VOID(Byref(obj._C_symbol), '**')
161161
alignment = obj._data_alignment
162-
nbytes = SizeOf(obj._C_typedata)*obj.size
162+
nbytes = SizeOf(obj._C_typedata)*as_long(obj.size)
163163
alloc = self.langbb['host-alloc'](memptr, alignment, nbytes)
164164

165165
free = self.langbb['host-free'](obj._C_symbol)
@@ -358,15 +358,15 @@ def _alloc_pointed_array_on_high_bw_mem(self, site, obj, storage):
358358

359359
memptr = VOID(Byref(obj._C_symbol), '**')
360360
alignment = obj._data_alignment
361-
nbytes = SizeOf(obj._C_typedata, stars='*')*obj.dim.symbolic_size
361+
nbytes = SizeOf(obj._C_typedata, stars='*')*as_long(obj.dim.symbolic_size)
362362
alloc0 = self.langbb['host-alloc'](memptr, alignment, nbytes)
363363

364364
free0 = self.langbb['host-free'](obj._C_symbol)
365365

366366
# The pointee Array
367367
pobj = IndexedPointer(obj._C_symbol, obj.dim)
368368
memptr = VOID(Byref(pobj), '**')
369-
nbytes = SizeOf(obj._C_typedata)*obj.array.size
369+
nbytes = SizeOf(obj._C_typedata)*as_long(obj.array.size)
370370
alloc1 = self.langbb['host-alloc'](memptr, alignment, nbytes)
371371

372372
free1 = self.langbb['host-free'](pobj)
@@ -551,10 +551,10 @@ def process(self, graph):
551551

552552
class DeviceAwareDataManager(DataManager):
553553

554-
def __init__(self, **kwargs):
555-
self.gpu_fit = kwargs['options']['gpu-fit']
556-
self.gpu_create = kwargs['options']['gpu-create']
557-
self.pmode = kwargs['options'].get('place-transfers')
554+
def __init__(self, options=None, **kwargs):
555+
self.gpu_fit = options['gpu-fit']
556+
self.gpu_create = options['gpu-create']
557+
self.pmode = options.get('place-transfers')
558558

559559
super().__init__(**kwargs)
560560

devito/symbolics/extended_dtypes.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
from devito.tools.dtypes_lowering import dtype_mapper
88

99
__all__ = ['cast', 'CustomType', 'limits_mapper', 'INT', 'FLOAT', 'BaseCast', # noqa
10-
'DOUBLE', 'VOID', 'NoDeclStruct', 'c_complex', 'c_double_complex']
10+
'DOUBLE', 'VOID', 'NoDeclStruct', 'c_complex', 'c_double_complex',
11+
'LONG']
1112

1213

1314
limits_mapper = {
@@ -72,6 +73,7 @@ def cast(casttype, stars=None):
7273

7374
ULONG = cast(np.uint64)
7475
UINTP = cast(np.uint32, '*')
76+
LONG = cast(np.int64)
7577

7678

7779
# Standard ones, needed as class for e.g. single dispatch

devito/symbolics/manipulation.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,11 @@
1111
EvalDerivative, IndexDerivative
1212
)
1313
from devito.symbolics.extended_sympy import DefFunction, rfunc
14+
from devito.symbolics.extended_dtypes import LONG
1415
from devito.symbolics.queries import q_leaf
15-
from devito.symbolics.search import retrieve_indexed, retrieve_functions
16+
from devito.symbolics.search import (
17+
retrieve_indexed, retrieve_functions, retrieve_symbols
18+
)
1619
from devito.symbolics.unevaluation import (
1720
Add as UnevalAdd, Mul as UnevalMul, Pow as UnevalPow, UnevaluableMixin
1821
)
@@ -24,7 +27,8 @@
2427

2528
__all__ = ['xreplace_indices', 'pow_to_mul', 'indexify', 'subs_op_args',
2629
'normalize_args', 'uxreplace', 'Uxmapper', 'subs_if_composite',
27-
'reuse_if_untouched', 'evalrel', 'flatten_args', 'unevaluate']
30+
'reuse_if_untouched', 'evalrel', 'flatten_args', 'unevaluate',
31+
'as_long']
2832

2933

3034
def uxreplace(expr, rule):
@@ -523,3 +527,14 @@ def unevaluate(expr):
523527
return uneval_mapper[expr.func](*args)
524528
except KeyError:
525529
return reuse_if_untouched(expr, args)
530+
531+
532+
def as_long(expr):
533+
"""
534+
Convert an expression and its symbolic args to a long integer.
535+
"""
536+
try:
537+
syms = retrieve_symbols(expr)
538+
return expr.subs({s: LONG(s) for s in syms})
539+
except AttributeError:
540+
return LONG(expr)

examples/mpi/overview.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -486,9 +486,9 @@
486486
" MPI_Request rsend;\n",
487487
"\n",
488488
" float *restrict bufg_vec __attribute__ ((aligned (64)));\n",
489-
" posix_memalign((void**)(&bufg_vec),64,x_size*y_size*sizeof(float));\n",
489+
" posix_memalign((void**)(&bufg_vec),64,sizeof(float)*(long)y_size*(long)x_size);\n",
490490
" float *restrict bufs_vec __attribute__ ((aligned (64)));\n",
491-
" posix_memalign((void**)(&bufs_vec),64,x_size*y_size*sizeof(float));\n",
491+
" posix_memalign((void**)(&bufs_vec),64,sizeof(float)*(long)y_size*(long)x_size);\n",
492492
"\n",
493493
" MPI_Irecv(bufs_vec,x_size*y_size,MPI_FLOAT,fromrank,13,comm,&(rrecv));\n",
494494
" if (torank != MPI_PROC_NULL)\n",

examples/performance/00_overview.ipynb

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1192,13 +1192,13 @@
11921192
"int Kernel(struct dataobj *restrict f_vec, struct dataobj *restrict u_vec, const float h_y, const int time_M, const int time_m, const int x0_blk0_size, const int x_M, const int x_m, const int y0_blk0_size, const int y_M, const int y_m, const int z_M, const int z_m, const int nthreads, const int z_size, const int x_size, const int y_size, struct profiler * timers)\n",
11931193
"{\n",
11941194
" float **restrict pr2_vec __attribute__ ((aligned (64)));\n",
1195-
" posix_memalign((void**)(&pr2_vec),64,nthreads*sizeof(float*));\n",
1195+
" posix_memalign((void**)(&pr2_vec),64,sizeof(float*)*(long)nthreads);\n",
11961196
" float *restrict r0_vec __attribute__ ((aligned (64)));\n",
1197-
" posix_memalign((void**)(&r0_vec),64,x_size*y_size*z_size*sizeof(float));\n",
1197+
" posix_memalign((void**)(&r0_vec),64,sizeof(float)*(long)z_size*(long)y_size*(long)x_size);\n",
11981198
" #pragma omp parallel num_threads(nthreads)\n",
11991199
" {\n",
12001200
" const int tid = omp_get_thread_num();\n",
1201-
" posix_memalign((void**)(&(pr2_vec[tid])),64,z_size*(y0_blk0_size + 4)*sizeof(float));\n",
1201+
" posix_memalign((void**)(&(pr2_vec[tid])),64,sizeof(float)*(long)z_size*(4 + (long)y0_blk0_size));\n",
12021202
" }\n",
12031203
"\n",
12041204
" float (*restrict f)[f_vec->size[1]][f_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[f_vec->size[1]][f_vec->size[2]]) f_vec->data;\n",
@@ -1412,7 +1412,7 @@
14121412
"name": "stdout",
14131413
"output_type": "stream",
14141414
"text": [
1415-
"posix_memalign((void**)(&r0_vec),64,x_size*y_size*z_size*sizeof(float));\n"
1415+
"posix_memalign((void**)(&r0_vec),64,sizeof(float)*(long)z_size*(long)y_size*(long)x_size);\n"
14161416
]
14171417
}
14181418
],
@@ -1483,13 +1483,13 @@
14831483
"int Kernel(struct dataobj *restrict f_vec, struct dataobj *restrict u_vec, const float h_y, const int time_M, const int time_m, const int x_M, const int x_m, const int y_M, const int y_m, const int z_M, const int z_m, const int nthreads, const int z_size, const int x_size, const int y_size, struct profiler * timers)\n",
14841484
"{\n",
14851485
" float **restrict pr2_vec __attribute__ ((aligned (64)));\n",
1486-
" posix_memalign((void**)(&pr2_vec),64,nthreads*sizeof(float*));\n",
1486+
" posix_memalign((void**)(&pr2_vec),64,sizeof(float*)*(long)nthreads);\n",
14871487
" float *restrict r0_vec __attribute__ ((aligned (64)));\n",
1488-
" posix_memalign((void**)(&r0_vec),64,x_size*y_size*z_size*sizeof(float));\n",
1488+
" posix_memalign((void**)(&r0_vec),64,sizeof(float)*(long)z_size*(long)y_size*(long)x_size);\n",
14891489
" #pragma omp parallel num_threads(nthreads)\n",
14901490
" {\n",
14911491
" const int tid = omp_get_thread_num();\n",
1492-
" posix_memalign((void**)(&(pr2_vec[tid])),64,z_size*(y_size + 4)*sizeof(float));\n",
1492+
" posix_memalign((void**)(&(pr2_vec[tid])),64,sizeof(float)*(long)z_size*(4 + (long)y_size));\n",
14931493
" }\n",
14941494
"\n",
14951495
" float (*restrict f)[f_vec->size[1]][f_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[f_vec->size[1]][f_vec->size[2]]) f_vec->data;\n",
@@ -1626,11 +1626,11 @@
16261626
"int Kernel(struct dataobj *restrict f_vec, struct dataobj *restrict u_vec, const float h_x, const float h_y, const int time_M, const int time_m, const int x0_blk0_size, const int x1_blk0_size, const int x_M, const int x_m, const int y0_blk0_size, const int y1_blk0_size, const int y_M, const int y_m, const int z_M, const int z_m, const int nthreads, const int x_size, const int y_size, const int z_size, struct profiler * timers)\n",
16271627
"{\n",
16281628
" float *restrict r0_vec __attribute__ ((aligned (64)));\n",
1629-
" posix_memalign((void**)(&r0_vec),64,x_size*y_size*z_size*sizeof(float));\n",
1629+
" posix_memalign((void**)(&r0_vec),64,sizeof(float)*(long)z_size*(long)y_size*(long)x_size);\n",
16301630
" float *restrict r3_vec __attribute__ ((aligned (64)));\n",
1631-
" posix_memalign((void**)(&r3_vec),64,z_size*(x_size + 4)*(y_size + 4)*sizeof(float));\n",
1631+
" posix_memalign((void**)(&r3_vec),64,sizeof(float)*(long)z_size*(4 + (long)y_size)*(4 + (long)x_size));\n",
16321632
" float *restrict r4_vec __attribute__ ((aligned (64)));\n",
1633-
" posix_memalign((void**)(&r4_vec),64,z_size*(x_size + 4)*(y_size + 4)*sizeof(float));\n",
1633+
" posix_memalign((void**)(&r4_vec),64,sizeof(float)*(long)z_size*(4 + (long)y_size)*(4 + (long)x_size));\n",
16341634
"\n",
16351635
" float (*restrict f)[f_vec->size[1]][f_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[f_vec->size[1]][f_vec->size[2]]) f_vec->data;\n",
16361636
" float (*restrict r0)[y_size][z_size] __attribute__ ((aligned (64))) = (float (*)[y_size][z_size]) r0_vec;\n",

tests/test_linearize.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import scipy.sparse
44

55
from devito import (Grid, Function, TimeFunction, SparseTimeFunction, Operator, Eq,
6-
Inc, MatrixSparseTimeFunction, sin, switchconfig)
6+
Inc, MatrixSparseTimeFunction, sin, switchconfig, configuration)
77
from devito.ir import Call, Callable, DummyExpr, Expression, FindNodes, SymbolRegistry
88
from devito.passes import Graph, linearize, generate_macros
99
from devito.types import Array, Bundle, DefaultDimension
@@ -640,3 +640,22 @@ def _test_different_dtype():
640640
assert "L0(x,y) f[(x)*y_stride0 + (y)]" in str(op1)
641641

642642
_test_different_dtype()
643+
644+
645+
@pytest.mark.parametrize('order', [2, 4])
646+
def test_int64_array(order):
647+
648+
grid = Grid(shape=(4, 4))
649+
f = Function(name='f', grid=grid, space_order=order)
650+
651+
a = Array(name='a', dimensions=grid.dimensions, shape=grid.shape,
652+
halo=f.halo)
653+
654+
eqs = [Eq(f, a.indexify() + 1)]
655+
op = Operator(eqs, opt=('advanced', {'linearize': True, 'index-mode': 'int64'}))
656+
if 'CXX' in configuration['language']:
657+
long = 'static_cast<long>'
658+
assert f'({2*order} + {long}(y_size))*({2*order} + {long}(x_size)))' in str(op)
659+
else:
660+
long = '(long)'
661+
assert f'({2*order} + {long}y_size)*({2*order} + {long}x_size))' in str(op)

0 commit comments

Comments
 (0)