|
35 | 35 | namespace mkl_blas = oneapi::mkl::blas; |
36 | 36 | namespace mkl_lapack = oneapi::mkl::lapack; |
37 | 37 |
|
38 | | -template <typename _KernelNameSpecialization> |
39 | | -class dpnp_matmul_c_kernel; |
40 | | - |
41 | | -template <typename _DataType> |
42 | | -void dpnp_matmul_c(void* array1_in, void* array2_in, void* result1, size_t size_m, size_t size_n, size_t size_k) |
43 | | -{ |
44 | | - cl::sycl::event event; |
45 | | - _DataType* array_1 = reinterpret_cast<_DataType*>(array1_in); |
46 | | - _DataType* array_2 = reinterpret_cast<_DataType*>(array2_in); |
47 | | - _DataType* result = reinterpret_cast<_DataType*>(result1); |
48 | | - |
49 | | - if (!size_m || !size_n || !size_k) |
50 | | - { |
51 | | - return; |
52 | | - } |
53 | | - |
54 | | - if constexpr (std::is_same<_DataType, double>::value || std::is_same<_DataType, float>::value) |
55 | | - { |
56 | | - // using std::max for these ldx variables is required by math library |
57 | | - const std::int64_t lda = std::max<size_t>(1UL, size_k); // First dimensions of array_1 |
58 | | - const std::int64_t ldb = std::max<size_t>(1UL, size_n); // First dimensions of array_2 |
59 | | - const std::int64_t ldc = std::max<size_t>(1UL, size_n); // Fast dimensions of result |
60 | | - |
61 | | - event = mkl_blas::gemm(DPNP_QUEUE, |
62 | | - oneapi::mkl::transpose::nontrans, |
63 | | - oneapi::mkl::transpose::nontrans, |
64 | | - size_n, |
65 | | - size_m, |
66 | | - size_k, |
67 | | - _DataType(1), |
68 | | - array_2, |
69 | | - ldb, |
70 | | - array_1, |
71 | | - lda, |
72 | | - _DataType(0), |
73 | | - result, |
74 | | - ldc); |
75 | | - } |
76 | | - else |
77 | | - { |
78 | | - // input1: M x K |
79 | | - // input2: K x N |
80 | | - // result: M x N |
81 | | - const size_t dim_m = size_m; // shape1.front(); // First dimensions of array1 |
82 | | - const size_t dim_n = size_n; // shape2.back(); // Last dimensions of array2 |
83 | | - const size_t dim_k = size_k; // shape1.back(); // First dimensions of array2 |
84 | | - |
85 | | - cl::sycl::range<2> gws(dim_m, dim_n); // dimensions are: "i" and "j" |
86 | | - |
87 | | - auto kernel_parallel_for_func = [=](cl::sycl::id<2> global_id) { |
88 | | - size_t i = global_id[0]; //for (size_t i = 0; i < size; ++i) |
89 | | - { |
90 | | - size_t j = global_id[1]; //for (size_t j = 0; j < size; ++j) |
91 | | - { |
92 | | - _DataType acc = _DataType(0); |
93 | | - for (size_t k = 0; k < dim_k; ++k) |
94 | | - { |
95 | | - const size_t index_1 = i * dim_k + k; |
96 | | - const size_t index_2 = k * dim_n + j; |
97 | | - acc += array_1[index_1] * array_2[index_2]; |
98 | | - } |
99 | | - const size_t index_result = i * dim_n + j; |
100 | | - result[index_result] = acc; |
101 | | - } |
102 | | - } |
103 | | - }; |
104 | | - |
105 | | - auto kernel_func = [&](cl::sycl::handler& cgh) { |
106 | | - cgh.parallel_for<class dpnp_matmul_c_kernel<_DataType>>(gws, kernel_parallel_for_func); |
107 | | - }; |
108 | | - |
109 | | - event = DPNP_QUEUE.submit(kernel_func); |
110 | | - } |
111 | | - event.wait(); |
112 | | -} |
113 | | - |
114 | 38 | template <typename _KernelNameSpecialization1, typename _KernelNameSpecialization2, typename _KernelNameSpecialization3> |
115 | 39 | class dpnp_dot_c_kernel; |
116 | 40 |
|
@@ -293,6 +217,111 @@ void dpnp_eigvals_c(const void* array_in, void* result1, size_t size) |
293 | 217 | dpnp_memory_free_c(result_val_kern); |
294 | 218 | } |
295 | 219 |
|
| 220 | +template <typename _DataType> |
| 221 | +class dpnp_initval_c_kernel; |
| 222 | + |
| 223 | +template <typename _DataType> |
| 224 | +void dpnp_initval_c(void* result1, void* value, size_t size) |
| 225 | +{ |
| 226 | + if (!size) |
| 227 | + { |
| 228 | + return; |
| 229 | + } |
| 230 | + |
| 231 | + _DataType* result = reinterpret_cast<_DataType*>(result1); |
| 232 | + _DataType val = *(reinterpret_cast<_DataType*>(value)); |
| 233 | + |
| 234 | + cl::sycl::range<1> gws(size); |
| 235 | + auto kernel_parallel_for_func = [=](cl::sycl::id<1> global_id) { |
| 236 | + const size_t idx = global_id[0]; |
| 237 | + result[idx] = val; |
| 238 | + }; |
| 239 | + |
| 240 | + auto kernel_func = [&](cl::sycl::handler& cgh) { |
| 241 | + cgh.parallel_for<class dpnp_initval_c_kernel<_DataType>>(gws, kernel_parallel_for_func); |
| 242 | + }; |
| 243 | + |
| 244 | + cl::sycl::event event = DPNP_QUEUE.submit(kernel_func); |
| 245 | + |
| 246 | + event.wait(); |
| 247 | +} |
| 248 | + |
| 249 | +template <typename _KernelNameSpecialization> |
| 250 | +class dpnp_matmul_c_kernel; |
| 251 | + |
| 252 | +template <typename _DataType> |
| 253 | +void dpnp_matmul_c(void* array1_in, void* array2_in, void* result1, size_t size_m, size_t size_n, size_t size_k) |
| 254 | +{ |
| 255 | + cl::sycl::event event; |
| 256 | + _DataType* array_1 = reinterpret_cast<_DataType*>(array1_in); |
| 257 | + _DataType* array_2 = reinterpret_cast<_DataType*>(array2_in); |
| 258 | + _DataType* result = reinterpret_cast<_DataType*>(result1); |
| 259 | + |
| 260 | + if (!size_m || !size_n || !size_k) |
| 261 | + { |
| 262 | + return; |
| 263 | + } |
| 264 | + |
| 265 | + if constexpr (std::is_same<_DataType, double>::value || std::is_same<_DataType, float>::value) |
| 266 | + { |
| 267 | + // using std::max for these ldx variables is required by math library |
| 268 | + const std::int64_t lda = std::max<size_t>(1UL, size_k); // First dimensions of array_1 |
| 269 | + const std::int64_t ldb = std::max<size_t>(1UL, size_n); // First dimensions of array_2 |
| 270 | + const std::int64_t ldc = std::max<size_t>(1UL, size_n); // Fast dimensions of result |
| 271 | + |
| 272 | + event = mkl_blas::gemm(DPNP_QUEUE, |
| 273 | + oneapi::mkl::transpose::nontrans, |
| 274 | + oneapi::mkl::transpose::nontrans, |
| 275 | + size_n, |
| 276 | + size_m, |
| 277 | + size_k, |
| 278 | + _DataType(1), |
| 279 | + array_2, |
| 280 | + ldb, |
| 281 | + array_1, |
| 282 | + lda, |
| 283 | + _DataType(0), |
| 284 | + result, |
| 285 | + ldc); |
| 286 | + } |
| 287 | + else |
| 288 | + { |
| 289 | + // input1: M x K |
| 290 | + // input2: K x N |
| 291 | + // result: M x N |
| 292 | + const size_t dim_m = size_m; // shape1.front(); // First dimensions of array1 |
| 293 | + const size_t dim_n = size_n; // shape2.back(); // Last dimensions of array2 |
| 294 | + const size_t dim_k = size_k; // shape1.back(); // First dimensions of array2 |
| 295 | + |
| 296 | + cl::sycl::range<2> gws(dim_m, dim_n); // dimensions are: "i" and "j" |
| 297 | + |
| 298 | + auto kernel_parallel_for_func = [=](cl::sycl::id<2> global_id) { |
| 299 | + size_t i = global_id[0]; //for (size_t i = 0; i < size; ++i) |
| 300 | + { |
| 301 | + size_t j = global_id[1]; //for (size_t j = 0; j < size; ++j) |
| 302 | + { |
| 303 | + _DataType acc = _DataType(0); |
| 304 | + for (size_t k = 0; k < dim_k; ++k) |
| 305 | + { |
| 306 | + const size_t index_1 = i * dim_k + k; |
| 307 | + const size_t index_2 = k * dim_n + j; |
| 308 | + acc += array_1[index_1] * array_2[index_2]; |
| 309 | + } |
| 310 | + const size_t index_result = i * dim_n + j; |
| 311 | + result[index_result] = acc; |
| 312 | + } |
| 313 | + } |
| 314 | + }; |
| 315 | + |
| 316 | + auto kernel_func = [&](cl::sycl::handler& cgh) { |
| 317 | + cgh.parallel_for<class dpnp_matmul_c_kernel<_DataType>>(gws, kernel_parallel_for_func); |
| 318 | + }; |
| 319 | + |
| 320 | + event = DPNP_QUEUE.submit(kernel_func); |
| 321 | + } |
| 322 | + event.wait(); |
| 323 | +} |
| 324 | + |
296 | 325 | void func_map_init_linalg(func_map_t& fmap) |
297 | 326 | { |
298 | 327 | fmap[DPNPFuncName::DPNP_FN_DOT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_dot_c<int, int, int>}; |
@@ -321,6 +350,13 @@ void func_map_init_linalg(func_map_t& fmap) |
321 | 350 | fmap[DPNPFuncName::DPNP_FN_EIGVALS][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_eigvals_c<float, float>}; |
322 | 351 | fmap[DPNPFuncName::DPNP_FN_EIGVALS][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_eigvals_c<double, double>}; |
323 | 352 |
|
| 353 | + fmap[DPNPFuncName::DPNP_FN_INITVAL][eft_BOOL][eft_BOOL] = {eft_BOOL, (void*)dpnp_initval_c<bool>}; |
| 354 | + fmap[DPNPFuncName::DPNP_FN_INITVAL][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_initval_c<int>}; |
| 355 | + fmap[DPNPFuncName::DPNP_FN_INITVAL][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_initval_c<long>}; |
| 356 | + fmap[DPNPFuncName::DPNP_FN_INITVAL][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_initval_c<float>}; |
| 357 | + fmap[DPNPFuncName::DPNP_FN_INITVAL][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_initval_c<double>}; |
| 358 | + fmap[DPNPFuncName::DPNP_FN_INITVAL][eft_C128][eft_C128] = {eft_C128, (void*)dpnp_initval_c<std::complex<double>>}; |
| 359 | + |
324 | 360 | fmap[DPNPFuncName::DPNP_FN_MATMUL][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_matmul_c<int>}; |
325 | 361 | fmap[DPNPFuncName::DPNP_FN_MATMUL][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_matmul_c<long>}; |
326 | 362 | fmap[DPNPFuncName::DPNP_FN_MATMUL][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_matmul_c<float>}; |
|
0 commit comments