7373
7474#define MKL_INT_MAX ((npy_intp) ((~((MKL_UINT) 0)) >> 1))
7575
76- #define CHUNKED_VML_CALL2(vml_func, n, type, in1, op1) \
76+ #define CHUNKED_VML_CALL2(vml_func, n, type, mkl_type, mkl_ftype, in1, op1) \
7777 do { \
7878 npy_intp _n_ = (n); \
7979 const npy_intp _chunk_size = MKL_INT_MAX; \
8080 type *in1p = (type *) (in1); \
8181 type *op1p = (type *) (op1); \
8282 while (_n_ > 0) { \
8383 npy_intp _current_chunk = (_n_ > _chunk_size) ? _chunk_size : _n_; \
84- vml_func((MKL_INT) _current_chunk, in1p, op1p); \
84+ vml_func((MKL_INT) _current_chunk, (mkl_type *)(void *) in1p, \
85+ (mkl_ftype *)(void *) op1p); \
8586 _n_ -= _current_chunk; \
8687 in1p += _current_chunk; \
8788 op1p += _current_chunk; \
8889 } \
8990 } while (0)
9091
91- #define CHUNKED_VML_CALL3(vml_func, n, type, in1, in2, op1) \
92+ #define CHUNKED_VML_CALL3(vml_func, n, type, mkl_type, in1, in2, op1) \
9293 do { \
9394 npy_intp _n_ = (n); \
9495 const npy_intp _chunk_size = MKL_INT_MAX; \
9798 type *op1p = (type *) (op1); \
9899 while (_n_ > 0) { \
99100 npy_intp _current_chunk = (_n_ > _chunk_size) ? _chunk_size : _n_; \
100- vml_func((MKL_INT) _current_chunk, in1p, in2p, op1p); \
101+ vml_func((MKL_INT) _current_chunk, (mkl_type *)(void *) in1p, \
102+ (mkl_type *)(void *) in2p, (mkl_type *)(void *) op1p); \
101103 _n_ -= _current_chunk; \
102104 in1p += _current_chunk; \
103105 in2p += _current_chunk; \
@@ -323,7 +325,7 @@ mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp
323325
324326 if (IS_BINARY_CONT(@type@, @type@)) {
325327 if (dimensions[0] > VML_ASM_THRESHOLD && disjoint_or_same1 && disjoint_or_same2) {
326- CHUNKED_VML_CALL3(v@s@@VML@, dimensions[0], @type@, args[0], args[1], args[2]);
328+ CHUNKED_VML_CALL3(v@s@@VML@, dimensions[0], @type@, @type@, args[0], args[1], args[2]);
327329 /* v@s@@VML@(dimensions[0], (@type@*) args[0], (@type@*) args[1], (@type@*) args[2]); */
328330 }
329331 else {
@@ -482,7 +484,7 @@ mkl_umath_@TYPE@_multiply(char **args, const npy_intp *dimensions, const npy_int
482484
483485 if (IS_BINARY_CONT(@type@, @type@)) {
484486 if (dimensions[0] > VML_ASM_THRESHOLD && disjoint_or_same1 && disjoint_or_same2) {
485- CHUNKED_VML_CALL3(v@s@Mul, dimensions[0], @type@, args[0], args[1], args[2]);
487+ CHUNKED_VML_CALL3(v@s@Mul, dimensions[0], @type@, @type@, args[0], args[1], args[2]);
486488 /* v@s@Mul(dimensions[0], (@type@*) args[0], (@type@*) args[1], (@type@*) args[2]); */
487489 }
488490 else {
@@ -633,7 +635,7 @@ mkl_umath_@TYPE@_divide(char **args, const npy_intp *dimensions, const npy_intp
633635
634636 if (IS_BINARY_CONT(@type@, @type@)) {
635637 if (dimensions[0] > VML_D_THRESHOLD && disjoint_or_same1 && disjoint_or_same2) {
636- CHUNKED_VML_CALL3(v@s@Div, dimensions[0], @type@, args[0], args[1], args[2]);
638+ CHUNKED_VML_CALL3(v@s@Div, dimensions[0], @type@, @type@, args[0], args[1], args[2]);
637639 /* v@s@Div(dimensions[0], (@type@*) args[0], (@type@*) args[1], (@type@*) args[2]); */
638640 }
639641 else {
@@ -784,7 +786,7 @@ mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp
784786 const int can_vectorize = contig && disjoint_or_same1 && disjoint_or_same2;
785787
786788 if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
787- CHUNKED_VML_CALL3(v@s@@VML@, dimensions[0], @type@, args[0], args[1], args[2]);
789+ CHUNKED_VML_CALL3(v@s@@VML@, dimensions[0], @type@, @type@, args[0], args[1], args[2]);
788790 /* v@s@@VML@(dimensions[0], (@type@*) args[0], (@type@*) args[1], (@type@*) args[2]); */
789791 }
790792 else {
@@ -822,7 +824,7 @@ mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp
822824 const int can_vectorize = contig && disjoint_or_same1 && disjoint_or_same2;
823825
824826 if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
825- CHUNKED_VML_CALL3(v@s@@VML@, dimensions[0], @type@, args[0], args[1], args[2]);
827+ CHUNKED_VML_CALL3(v@s@@VML@, dimensions[0], @type@, @type@, args[0], args[1], args[2]);
826828 /* v@s@@VML@(dimensions[0], (@type@*) args[0], (@type@*) args[1], (@type@*) args[2]); */
827829 }
828830 else {
@@ -849,7 +851,7 @@ mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp
849851
850852 if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD)
851853 {
852- CHUNKED_VML_CALL2(v@s@@VML@, dimensions[0], @type@, args[0], args[1]);
854+ CHUNKED_VML_CALL2(v@s@@VML@, dimensions[0], @type@, @type@, @type@, args[0], args[1]);
853855 /* v@s@@VML@(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
854856 }
855857 else {
@@ -880,7 +882,7 @@ mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp
880882 if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD)
881883 {
882884 ignore_fpstatus = 1;
883- CHUNKED_VML_CALL2(v@s@@VML@, dimensions[0], @type@, args[0], args[1]);
885+ CHUNKED_VML_CALL2(v@s@@VML@, dimensions[0], @type@, @type@, @type@, args[0], args[1]);
884886 /* v@s@Exp(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
885887 }
886888 else {
@@ -909,7 +911,7 @@ mkl_umath_@TYPE@_absolute(char **args, const npy_intp *dimensions, const npy_int
909911 const int can_vectorize = contig && disjoint_or_same;
910912
911913 if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
912- CHUNKED_VML_CALL2(v@s@Abs, dimensions[0], @type@, args[0], args[1]);
914+ CHUNKED_VML_CALL2(v@s@Abs, dimensions[0], @type@, @type@, @type@, args[0], args[1]);
913915 /* v@s@Abs(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
914916 }
915917 else {
@@ -931,7 +933,7 @@ mkl_umath_@TYPE@_reciprocal(char **args, const npy_intp *dimensions, const npy_i
931933 const int can_vectorize = contig && disjoint_or_same;
932934
933935 if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
934- CHUNKED_VML_CALL2(v@s@Inv, dimensions[0], @type@, args[0], args[1]);
936+ CHUNKED_VML_CALL2(v@s@Inv, dimensions[0], @type@, @type@, @type@, args[0], args[1]);
935937 /* v@s@Inv(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
936938 }
937939 else {
@@ -950,7 +952,7 @@ mkl_umath_@TYPE@_square(char **args, const npy_intp *dimensions, const npy_intp
950952 const int can_vectorize = contig && disjoint_or_same;
951953
952954 if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
953- CHUNKED_VML_CALL2(v@s@Sqr, dimensions[0], @type@, args[0], args[1]);
955+ CHUNKED_VML_CALL2(v@s@Sqr, dimensions[0], @type@, @type@, @type@, args[0], args[1]);
954956 /* v@s@Sqr(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
955957 }
956958 else {
@@ -1187,7 +1189,9 @@ mkl_umath_@TYPE@_ldexp_long(char **args, const npy_intp *dimensions, const npy_i
11871189 * complex types
11881190 * #TYPE = CFLOAT, CDOUBLE#
11891191 * #ftype = npy_float, npy_double#
1190- * #type = npy_cfloat, npy_cdouble#
1192+ * #type = npy_cfloat, npy_cdouble#
1193+ * #mkl_type = MKL_Complex8, MKL_Complex16#
1194+ * #mkl_ftype = float, double#
11911195 * #c = f, #
11921196 * #C = F, #
11931197 * #s = c, z#
@@ -1281,14 +1285,14 @@ pairwise_sum_@TYPE@(@ftype@ *rr, @ftype@ * ri, char * a, npy_intp n, npy_intp st
12811285void
12821286mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
12831287{
1284- const int contig = IS_BINARY_CONT(@type @, @type @);
1288+ const int contig = IS_BINARY_CONT(@mkl_type @, @mkl_type @);
12851289 const int disjoint_or_same1 = DISJOINT_OR_SAME(args[0], args[2], dimensions[0], sizeof(@type@));
12861290 const int disjoint_or_same2 = DISJOINT_OR_SAME(args[1], args[2], dimensions[0], sizeof(@type@));
12871291 const int can_vectorize = contig && disjoint_or_same1 && disjoint_or_same2;
12881292
12891293 if (can_vectorize && dimensions[0] > VML_ASM_THRESHOLD) {
1290- CHUNKED_VML_CALL3(v@s@@VML@, dimensions[0], @type@, args[0], args[1], args[2]);
1291- /* v@s@@VML@(dimensions[0], (@type @*) args[0], (@type @*) args[1], (@type @*) args[2]); */
1294+ CHUNKED_VML_CALL3(v@s@@VML@, dimensions[0], @type@, @mkl_type@, args[0], args[1], args[2]);
1295+ /* v@s@@VML@(dimensions[0], (@mkl_type @*) args[0], (@mkl_type @*) args[1], (@mkl_type @*) args[2]); */
12921296 }
12931297 else {
12941298 if (IS_BINARY_REDUCE && @PW@) {
@@ -1319,14 +1323,14 @@ mkl_umath_@TYPE@_@kind@(char **args, const npy_intp *dimensions, const npy_intp
13191323void
13201324mkl_umath_@TYPE@_multiply(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
13211325{
1322- const int contig = IS_BINARY_CONT(@type @, @type @);
1326+ const int contig = IS_BINARY_CONT(@mkl_type @, @mkl_type @);
13231327 const int disjoint_or_same1 = DISJOINT_OR_SAME(args[0], args[2], dimensions[0], sizeof(@type@));
13241328 const int disjoint_or_same2 = DISJOINT_OR_SAME(args[1], args[2], dimensions[0], sizeof(@type@));
13251329 const int can_vectorize = contig && disjoint_or_same1 && disjoint_or_same2;
13261330
13271331 if (can_vectorize && dimensions[0] > VML_ASM_THRESHOLD) {
1328- CHUNKED_VML_CALL3(v@s@Mul, dimensions[0], @type@, args[0], args[1], args[2]);
1329- /* v@s@Mul(dimensions[0], (@type @*) args[0], (@type @*) args[1], (@type @*) args[2]); */
1332+ CHUNKED_VML_CALL3(v@s@Mul, dimensions[0], @type@, @mkl_type@, args[0], args[1], args[2]);
1333+ /* v@s@Mul(dimensions[0], (@mkl_type @*) args[0], (@mkl_type @*) args[1], (@mkl_type @*) args[2]); */
13301334 }
13311335 else {
13321336 BINARY_LOOP {
@@ -1343,14 +1347,14 @@ mkl_umath_@TYPE@_multiply(char **args, const npy_intp *dimensions, const npy_int
13431347void
13441348mkl_umath_@TYPE@_divide(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
13451349{
1346- const int contig = IS_BINARY_CONT(@type @, @type @);
1350+ const int contig = IS_BINARY_CONT(@mkl_type @, @mkl_type @);
13471351 const int disjoint_or_same1 = DISJOINT_OR_SAME(args[0], args[2], dimensions[0], sizeof(@type@));
13481352 const int disjoint_or_same2 = DISJOINT_OR_SAME(args[1], args[2], dimensions[0], sizeof(@type@));
13491353 const int can_vectorize = contig && disjoint_or_same1 && disjoint_or_same2;
13501354
13511355 if (can_vectorize && dimensions[0] > VML_D_THRESHOLD) {
1352- CHUNKED_VML_CALL3(v@s@Div, dimensions[0], @type@, args[0], args[1], args[2]);
1353- /* v@s@Div(dimensions[0], (@type @*) args[0], (@type @*) args[1], (@type @*) args[2]); */
1356+ CHUNKED_VML_CALL3(v@s@Div, dimensions[0], @type@, @mkl_type@, args[0], args[1], args[2]);
1357+ /* v@s@Div(dimensions[0], (@mkl_type @*) args[0], (@mkl_type @*) args[1], (@mkl_type @*) args[2]); */
13541358 }
13551359 else {
13561360 BINARY_LOOP {
@@ -1493,13 +1497,13 @@ mkl_umath_@TYPE@_reciprocal(char **args, const npy_intp *dimensions, const npy_i
14931497
14941498void
14951499mkl_umath_@TYPE@_conjugate(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) {
1496- const int contig = IS_UNARY_CONT(@type @, @type @);
1500+ const int contig = IS_UNARY_CONT(@mkl_type @, @mkl_type @);
14971501 const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@));
14981502 const int can_vectorize = contig && disjoint_or_same;
14991503
15001504 if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
1501- CHUNKED_VML_CALL2(v@s@Conj, dimensions[0], @type@, args[0], args[1]);
1502- /* v@s@Conj(dimensions[0], (@type @*) args[0], (@type @*) args[1]); */
1505+ CHUNKED_VML_CALL2(v@s@Conj, dimensions[0], @type@, @mkl_type@, @mkl_type@, args[0], args[1]);
1506+ /* v@s@Conj(dimensions[0], (@mkl_type @*) args[0], (@mkl_type @*) args[1]); */
15031507 }
15041508 else {
15051509 UNARY_LOOP {
@@ -1511,18 +1515,27 @@ mkl_umath_@TYPE@_conjugate(char **args, const npy_intp *dimensions, const npy_in
15111515 }
15121516}
15131517
1518+ /**begin repeat
1519+ * complex types
1520+ * #TYPE = CFLOAT, CDOUBLE#
1521+ * #ftype = npy_float, npy_double#
1522+ * #type = npy_cfloat, npy_cdouble#
1523+ * #mkl_ftype = float, double#
1524+ * #mkl_type = MKL_Complex8, MKL_Complex16#
1525+
1526+ */
15141527void
15151528mkl_umath_@TYPE@_absolute(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
15161529{
1517- const int contig = IS_UNARY_CONT(@type @, @ftype @);
1530+ const int contig = IS_UNARY_CONT(@mkl_type @, @mkl_ftype @);
15181531 const int disjoint_or_same = DISJOINT_OR_SAME_TWO_DTYPES(args[0], args[1], dimensions[0], sizeof(@type@), sizeof(@ftype@));
15191532 const int can_vectorize = contig && disjoint_or_same;
15201533 int ignore_fpstatus = 0;
15211534
15221535 if (can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
15231536 ignore_fpstatus = 1;
1524- CHUNKED_VML_CALL2(v@s@Abs, dimensions[0], @type@, args[0], args[1]);
1525- /* v@s@Abs(dimensions[0], (@type @*) args[0], (@type @*) args[1]); */
1537+ CHUNKED_VML_CALL2(v@s@Abs, dimensions[0], @type@, @mkl_type@, @mkl_ftype@, args[0], args[1]);
1538+ /* v@s@Abs(dimensions[0], (@mkl_type @*) args[0], (@mkl_ftype @*) args[1]); */
15261539 }
15271540 else {
15281541 UNARY_LOOP {
0 commit comments