We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 94d2159 commit f25053fCopy full SHA for f25053f
1 file changed
dpctl/tensor/libtensor/include/kernels/elementwise_functions/common.hpp
@@ -705,7 +705,9 @@ template <typename argTy1,
705
typename T3,
706
unsigned int vs,
707
unsigned int nv>
708
- class kernel_name>
+ class kernel_name,
709
+ unsigned int vec_sz = 4,
710
+ unsigned int n_vecs = 2>
711
sycl::event binary_contig_impl(sycl::queue exec_q,
712
size_t nelems,
713
const char *arg1_p,
@@ -720,8 +722,6 @@ sycl::event binary_contig_impl(sycl::queue exec_q,
720
722
cgh.depends_on(depends);
721
723
724
size_t lws = 64;
- constexpr unsigned int vec_sz = 4;
- constexpr unsigned int n_vecs = 2;
725
const size_t n_groups =
726
((nelems + lws * n_vecs * vec_sz - 1) / (lws * n_vecs * vec_sz));
727
const auto gws_range = sycl::range<1>(n_groups * lws);
0 commit comments