@@ -1331,11 +1331,6 @@ void copy_numpy_ndarray_into_usm_ndarray(
13311331 // Create shared pointers with shape and src/dst strides, copy into device
13321332 // memory
13331333 using shT = std::vector<py::ssize_t >;
1334- std::shared_ptr<shT> shp_shape = std::make_shared<shT>(simplified_shape);
1335- std::shared_ptr<shT> shp_src_strides =
1336- std::make_shared<shT>(simplified_src_strides);
1337- std::shared_ptr<shT> shp_dst_strides =
1338- std::make_shared<shT>(simplified_dst_strides);
13391334
13401335 // Get implementation function pointer
13411336 auto copy_and_cast_from_host_blocking_fn =
@@ -1351,42 +1346,22 @@ void copy_numpy_ndarray_into_usm_ndarray(
13511346 throw std::runtime_error (" Unabled to allocate device memory" );
13521347 }
13531348
1354- sycl::event copy_shape_ev =
1355- exec_q.copy <py::ssize_t >(shp_shape->data (), shape_strides, nd);
1356-
1357- exec_q.submit ([&](sycl::handler &cgh) {
1358- cgh.depends_on (copy_shape_ev);
1359- cgh.host_task ([shp_shape]() {
1360- // increment shared pointer ref-count to keep it alive
1361- // till copy operation completes;
1362- });
1363- });
1364-
1365- sycl::event copy_src_strides_ev = exec_q.copy <py::ssize_t >(
1366- shp_src_strides->data (), shape_strides + nd, nd);
1367- exec_q.submit ([&](sycl::handler &cgh) {
1368- cgh.depends_on (copy_src_strides_ev);
1369- cgh.host_task ([shp_src_strides]() {
1370- // increment shared pointer ref-count to keep it alive
1371- // till copy operation completes;
1372- });
1373- });
1349+ std::shared_ptr<shT> host_shape_strides_shp = std::make_shared<shT>(3 * nd);
1350+ std::copy (simplified_shape.begin (), simplified_shape.end (),
1351+ host_shape_strides_shp->begin ());
1352+ std::copy (simplified_src_strides.begin (), simplified_src_strides.end (),
1353+ host_shape_strides_shp->begin () + nd);
1354+ std::copy (simplified_dst_strides.begin (), simplified_dst_strides.end (),
1355+ host_shape_strides_shp->begin () + 2 * nd);
13741356
1375- sycl::event copy_dst_strides_ev = exec_q.copy <py::ssize_t >(
1376- shp_dst_strides->data (), shape_strides + 2 * nd, nd);
1377- exec_q.submit ([&](sycl::handler &cgh) {
1378- cgh.depends_on (copy_dst_strides_ev);
1379- cgh.host_task ([shp_dst_strides]() {
1380- // increment shared pointer ref-count to keep it alive
1381- // till copy operation completes;
1382- });
1383- });
1357+ sycl::event copy_packed_ev =
1358+ exec_q.copy <py::ssize_t >(host_shape_strides_shp->data (), shape_strides,
1359+ host_shape_strides_shp->size ());
13841360
13851361 copy_and_cast_from_host_blocking_fn (
13861362 exec_q, src_nelems, nd, shape_strides, src_data, src_offset,
13871363 npy_src_min_nelem_offset, npy_src_max_nelem_offset, dst_data,
1388- dst_offset, depends,
1389- {copy_shape_ev, copy_src_strides_ev, copy_dst_strides_ev});
1364+ dst_offset, depends, {copy_packed_ev});
13901365
13911366 sycl::free (shape_strides, exec_q);
13921367
0 commit comments