@@ -298,6 +298,9 @@ void dpnp_rng_geometric_c(void* result, const float p, const size_t size)
298298 event_out.wait ();
299299}
300300
301+ template <typename _KernelNameSpecialization>
302+ class dpnp_blas_scal_c_kernel ;
303+
301304template <typename _DataType>
302305void dpnp_rng_gumbel_c (void * result, const double loc, const double scale, const size_t size)
303306{
@@ -308,15 +311,33 @@ void dpnp_rng_gumbel_c(void* result, const double loc, const double scale, const
308311 }
309312
310313 const _DataType alpha = (_DataType (-1.0 ));
311- const _DataType stride = ( _DataType ( 1.0 )) ;
314+ std:: int64_t incx = 1 ;
312315 _DataType* result1 = reinterpret_cast <_DataType*>(result);
313316 double negloc = loc * (double (-1.0 ));
314317
315318 mkl_rng::gumbel<_DataType> distribution (negloc, scale);
316- // perform generation
317319 event = mkl_rng::generate (distribution, DPNP_RNG_ENGINE, size, result1);
318320 event.wait ();
319- event = mkl_blas::scal (DPNP_QUEUE, size, alpha, result1, stride);
321+
322+ // OK for CPU and segfault for GPU device
323+ // event = mkl_blas::scal(DPNP_QUEUE, size, alpha, result1, incx);
324+ if (dpnp_queue_is_cpu_c ())
325+ {
326+ event = mkl_blas::scal (DPNP_QUEUE, size, alpha, result1, incx);
327+ }
328+ else
329+ {
330+ // for (size_t i = 0; i < size; i++) result1[i] *= alpha;
331+ cl::sycl::range<1 > gws (size);
332+ auto kernel_parallel_for_func = [=](cl::sycl::id<1 > global_id) {
333+ size_t i = global_id[0 ];
334+ result1[i] *= alpha;
335+ };
336+ auto kernel_func = [&](cl::sycl::handler& cgh) {
337+ cgh.parallel_for <class dpnp_blas_scal_c_kernel <_DataType>>(gws, kernel_parallel_for_func);
338+ };
339+ event = DPNP_QUEUE.submit (kernel_func);
340+ }
320341 event.wait ();
321342}
322343
0 commit comments