@@ -64,16 +64,46 @@ static sycl::event div_impl(sycl::queue exec_q,
6464{
6565 type_utils::validate_type_for_device<T>(exec_q);
6666
67- const T* a = reinterpret_cast <const T*>(in_a);
68- const T* b = reinterpret_cast <const T*>(in_b);
69- T* y = reinterpret_cast <T*>(out_y);
67+ std::cerr << " enter div_impl" << std::endl;
7068
71- return mkl_vm::div (exec_q,
69+ const T* _a = reinterpret_cast <const T*>(in_a);
70+ const T* _b = reinterpret_cast <const T*>(in_b);
71+ T* _y = reinterpret_cast <T*>(out_y);
72+
73+ std::cerr << " casting is done" << std::endl;
74+
75+ T* a = sycl::malloc_device<T>(n, exec_q);
76+ T* b = sycl::malloc_device<T>(n, exec_q);
77+ T* y = sycl::malloc_device<T>(n, exec_q);
78+
79+ std::cerr << " malloc is done" << std::endl;
80+
81+ exec_q.copy (_a, a, n).wait ();
82+ exec_q.copy (_b, b, n).wait ();
83+ exec_q.copy (_y, y, n).wait ();
84+
85+ std::cerr << " copy is done" << std::endl;
86+
87+ sycl::event ev = mkl_vm::div (exec_q,
7288 n, // number of elements to be calculated
7389 a, // pointer `a` containing 1st input vector of size n
7490 b, // pointer `b` containing 2nd input vector of size n
7591 y, // pointer `y` to the output vector of size n
7692 depends);
93+ ev.wait ();
94+
95+ std::cerr << " div is done" << std::endl;
96+
97+ exec_q.copy (y, _y, n).wait ();
98+
99+ std::cerr << " copy is done" << std::endl;
100+
101+ sycl::free (a, exec_q);
102+ sycl::free (b, exec_q);
103+ sycl::free (y, exec_q);
104+
105+ std::cerr << " leaving div_impl" << std::endl;
106+ return sycl::event ();
77107}
78108
79109std::pair<sycl::event, sycl::event> div (sycl::queue exec_q,
@@ -175,9 +205,21 @@ std::pair<sycl::event, sycl::event> div(sycl::queue exec_q,
175205 throw py::value_error (" No div implementation defined" );
176206 }
177207 sycl::event sum_ev = div_fn (exec_q, src_nelems, src1_data, src2_data, dst_data, depends);
178-
179- sycl::event ht_ev = dpctl::utils::keep_args_alive (exec_q, {src1, src2, dst}, {sum_ev});
180- return std::make_pair (ht_ev, sum_ev);
208+ // sum_ev.wait();
209+
210+ // int* dummy = sycl::malloc_device<int>(1, exec_q);
211+ // sycl::event cleanup_ev = exec_q.submit([&](sycl::handler& cgh) {
212+ // // cgh.depends_on(sum_ev);
213+ // auto ctx = exec_q.get_context();
214+ // cgh.host_task([dummy, ctx]() {
215+ // // dummy host task to pass into keep_args_alive
216+ // sycl::free(dummy, ctx);
217+ // });
218+ // });
219+
220+ // sycl::event ht_ev = dpctl::utils::keep_args_alive(exec_q, {src1, src2, dst}, {sum_ev});
221+ // return std::make_pair(ht_ev, sum_ev);
222+ return std::make_pair (sycl::event (), sycl::event ());
181223}
182224
183225bool can_call_div (sycl::queue exec_q,
0 commit comments