Feature/1854 OpenCL /prim signatures part 2 (#1869)

rok-cesnovar · stan-buildbot · yashikno · web-flow · commit 17a67ad20446 · 2020-05-12T19:44:34.000+02:00
* added dims

* finished add, rows, cols

* scalar + matrix

* simplify with tadej's suggestion

* add col and tests and add the block boundary checks

* add row and row tests

* inv_sqrt done

* inv_square

* inv logit

* add inv_cloglog

* [Jenkins] auto-formatting by clang-format version 6.0.0-1ubuntu2~16.04.1 (tags/RELEASE_600/final)

* add inv()

* [Jenkins] auto-formatting by clang-format version 6.0.0

* fix doxygen, cpplint

* add header guards

* revising the block checks, cpplint

* [Jenkins] auto-formatting by clang-format version 6.0.0

* missing ifdef

* newline

* fix comments and use of x instead of a

* expression is now kernel_expression

* fix row/col

* fix comments, change throw behaviour of block

* fix block test

* add row/col tests with expressions

* [Jenkins] auto-formatting by clang-format version 6.0.0

* rename require

* [Jenkins] auto-formatting by clang-format version 6.0.0-1ubuntu2 (tags/RELEASE_600/final)

* rename require

* rename require

* [Jenkins] auto-formatting by clang-format version 6.0.0-1ubuntu2 (tags/RELEASE_600/final)

* fix tests

* add forward

* is_valid_kernel_expression to is_kernel_expression

* fix inv_square and inv_logit

* cleanup after merge

* replace with EXPECT_MATRIX_NEAR

* [Jenkins] auto-formatting by clang-format version 6.0.0

* remove duplicate include

* Update stan/math/opencl/prim/cols.hpp

Co-authored-by: Tadej Ciglarič &lt;tadej.c@gmail.com&gt;

* Update stan/math/opencl/prim/dims.hpp

Co-authored-by: Tadej Ciglarič &lt;tadej.c@gmail.com&gt;

* Update stan/math/opencl/prim/rows.hpp

Co-authored-by: Tadej Ciglarič &lt;tadej.c@gmail.com&gt;

* removed comment

* fix rows/cols template

* rename elewise functions

* added crossprod and tcrossprod

* adde fabs

* use T_A of (t)crossprod

* added logit

* added log1m_inv_logit

* add divide

* add trunc, fix names of tests

* [Jenkins] auto-formatting by clang-format version 6.0.0-1ubuntu2~16.04.1 (tags/RELEASE_600/final)

* newlines

* fix merge

* missing includes

* address review comments

* [Jenkins] auto-formatting by clang-format version 6.0.0-1ubuntu2~16.04.1 (tags/RELEASE_600/final)

* Apply suggestions from code review

Co-authored-by: Tadej Ciglarič &lt;tadej.c@gmail.com&gt;

Co-authored-by: Stan Jenkins &lt;mc.stanislaw@gmail.com&gt;
Co-authored-by: Jenkins &lt;nobody@nowhere&gt;
Co-authored-by: Tadej Ciglarič &lt;tadej.c@gmail.com&gt;
diff --git a/stan/math/opencl/kernel_generator/binary_operation.hpp b/stan/math/opencl/kernel_generator/binary_operation.hpp
@@ -187,8 +187,7 @@ ADD_BINARY_OPERATION(addition_, operator+, common_scalar_t<T_a COMMA T_b>, "+");
 ADD_BINARY_OPERATION(subtraction_, operator-, common_scalar_t<T_a COMMA T_b>,
                      "-");
 ADD_BINARY_OPERATION_WITH_CUSTOM_CODE(
-    elewise_multiplication_, elewise_multiplication,
-    common_scalar_t<T_a COMMA T_b>, "*",
+    elt_multiply_, elt_multiply, common_scalar_t<T_a COMMA T_b>, "*",
     using view_transitivity = std::tuple<std::true_type, std::true_type>;
     inline std::pair<int, int> extreme_diagonals() const {
       std::pair<int, int> diags0
@@ -200,7 +199,7 @@ ADD_BINARY_OPERATION_WITH_CUSTOM_CODE(
     });
 
 ADD_BINARY_OPERATION_WITH_CUSTOM_CODE(
-    elewise_division_, elewise_division, common_scalar_t<T_a COMMA T_b>, "/",
+    elt_divide_, elt_divide, common_scalar_t<T_a COMMA T_b>, "/",
     inline std::pair<int, int> extreme_diagonals() const {
       return {-rows() + 1, cols() - 1};
     });
@@ -246,7 +245,7 @@ ADD_BINARY_OPERATION_WITH_CUSTOM_CODE(
  */
 template <typename T_a, typename T_b, typename = require_arithmetic_t<T_a>,
           typename = require_all_kernel_expressions_t<T_b>>
-inline elewise_multiplication_<scalar_<T_a>, as_operation_cl_t<T_b>> operator*(
+inline elt_multiply_<scalar_<T_a>, as_operation_cl_t<T_b>> operator*(
     T_a&& a, T_b&& b) {  // NOLINT
   return {as_operation_cl(std::forward<T_a>(a)),
           as_operation_cl(std::forward<T_b>(b))};
@@ -263,7 +262,7 @@ inline elewise_multiplication_<scalar_<T_a>, as_operation_cl_t<T_b>> operator*(
 template <typename T_a, typename T_b,
           typename = require_all_kernel_expressions_t<T_a>,
           typename = require_arithmetic_t<T_b>>
-inline elewise_multiplication_<as_operation_cl_t<T_a>, scalar_<T_b>> operator*(
+inline elt_multiply_<as_operation_cl_t<T_a>, scalar_<T_b>> operator*(
     T_a&& a, const T_b b) {  // NOLINT
   return {as_operation_cl(std::forward<T_a>(a)), as_operation_cl(b)};
 }
diff --git a/stan/math/opencl/kernel_generator/matrix_vector_multiply.hpp b/stan/math/opencl/kernel_generator/matrix_vector_multiply.hpp
@@ -15,7 +15,7 @@ namespace math {
 template <typename T_matrix, typename T_vector,
           typename = require_all_kernel_expressions_t<T_matrix, T_vector>>
 inline auto matrix_vector_multiply(T_matrix&& matrix, T_vector&& vector) {
-  return rowwise_sum(elewise_multiplication(
+  return rowwise_sum(elt_multiply(
       std::forward<T_matrix>(matrix),
       colwise_broadcast(transpose(std::forward<T_vector>(vector)))));
 }
diff --git a/stan/math/opencl/kernel_generator/rowwise_reduction.hpp b/stan/math/opencl/kernel_generator/rowwise_reduction.hpp
@@ -39,12 +39,11 @@ struct matvec_mul_opt {
 };
 
 template <typename Mat, typename VecT>
-struct matvec_mul_opt<
-    elewise_multiplication_<Mat, broadcast_<VecT, true, false>>> {
+struct matvec_mul_opt<elt_multiply_<Mat, broadcast_<VecT, true, false>>> {
   // if the argument of rowwise reduction is multiplication with a broadcast
   // vector we can do the optimization
   enum { is_possible = 1 };
-  using Arg = elewise_multiplication_<Mat, broadcast_<VecT, true, false>>;
+  using Arg = elt_multiply_<Mat, broadcast_<VecT, true, false>>;
 
   /**
    * Return view of the vector.
diff --git a/stan/math/opencl/kernel_generator/unary_function_cl.hpp b/stan/math/opencl/kernel_generator/unary_function_cl.hpp
@@ -6,7 +6,9 @@
 #include <stan/math/opencl/err.hpp>
 #include <stan/math/opencl/kernels/device_functions/digamma.hpp>
 #include <stan/math/opencl/kernels/device_functions/log1m_exp.hpp>
+#include <stan/math/opencl/kernels/device_functions/log1m_inv_logit.hpp>
 #include <stan/math/opencl/kernels/device_functions/log1p_exp.hpp>
+#include <stan/math/opencl/kernels/device_functions/logit.hpp>
 #include <stan/math/opencl/kernels/device_functions/inv_logit.hpp>
 #include <stan/math/opencl/kernels/device_functions/inv_square.hpp>
 #include <stan/math/opencl/matrix_cl_view.hpp>
@@ -227,6 +229,8 @@ ADD_UNARY_FUNCTION(erfc)
 ADD_UNARY_FUNCTION_PASS_ZERO(floor)
 ADD_UNARY_FUNCTION_PASS_ZERO(round)
 ADD_UNARY_FUNCTION_PASS_ZERO(ceil)
+ADD_UNARY_FUNCTION_PASS_ZERO(fabs)
+ADD_UNARY_FUNCTION_PASS_ZERO(trunc)
 
 ADD_UNARY_FUNCTION_WITH_INCLUDE(digamma,
                                 opencl_kernels::digamma_device_function)
@@ -238,6 +242,9 @@ ADD_UNARY_FUNCTION_WITH_INCLUDE(inv_square,
                                 opencl_kernels::inv_square_device_function)
 ADD_UNARY_FUNCTION_WITH_INCLUDE(inv_logit,
                                 opencl_kernels::inv_logit_device_function)
+ADD_UNARY_FUNCTION_WITH_INCLUDE(logit, opencl_kernels::logit_device_function)
+ADD_UNARY_FUNCTION_WITH_INCLUDE(log1m_inv_logit,
+                                opencl_kernels::log1m_inv_logit_device_function)
 
 ADD_CLASSIFICATION_FUNCTION(isfinite, {-rows() + 1, cols() - 1})
 ADD_CLASSIFICATION_FUNCTION(isinf,
diff --git a/stan/math/opencl/kernels/device_functions/log1m_inv_logit.hpp b/stan/math/opencl/kernels/device_functions/log1m_inv_logit.hpp
@@ -0,0 +1,56 @@
+#ifndef STAN_MATH_OPENCL_KERNELS_DEVICE_FUNCTIONS_LOG1M_INV_LOGIT_HPP
+#define STAN_MATH_OPENCL_KERNELS_DEVICE_FUNCTIONS_LOG1M_INV_LOGIT_HPP
+#ifdef STAN_OPENCL
+
+#include <stan/math/opencl/stringify.hpp>
+#include <string>
+
+namespace stan {
+namespace math {
+namespace opencl_kernels {
+
+// \cond
+static const char* log1m_inv_logit_device_function
+    = "\n"
+      "#ifndef STAN_MATH_OPENCL_KERNELS_DEVICE_FUNCTIONS_LOG1M_INV_LOGIT\n"
+      "#define "
+      "STAN_MATH_OPENCL_KERNELS_DEVICE_FUNCTIONS_LOG1M_INV_LOGIT\n" STRINGIFY(
+          // \endcond
+          /** \ingroup opencl_kernels
+           *
+           * Return the the natural logarithm of 1 minus the inverse logit
+           * applied to the kernel generator expression.
+           *
+             \f[
+             \mbox{log1m\_inv\_logit}(x) =
+            \begin{cases}
+              -\ln(\exp(x)+1) & \mbox{if } -\infty\leq x \leq \infty \\[6pt]
+              \textrm{NaN} & \mbox{if } x = \textrm{NaN}
+            \end{cases}
+            \f]
+
+            \f[
+            \frac{\partial\, \mbox{log1m\_inv\_logit}(x)}{\partial x} =
+            \begin{cases}
+              -\frac{\exp(x)}{\exp(x)+1} & \mbox{if } -\infty\leq x\leq \infty
+          \\[6pt] \textrm{NaN} & \mbox{if } x = \textrm{NaN} \end{cases} \f]
+          *
+          * @param x argument
+          * @return log of one minus the inverse logit of the argument
+          */
+          inline double log1m_inv_logit(double x) {
+            if (x > 0.0) {
+              return -x - log1p(exp(-x));  // prevent underflow
+            }
+            return -log1p(exp(x));
+          }
+          // \cond
+          ) "\n#endif\n";  // NOLINT
+// \endcond
+
+}  // namespace opencl_kernels
+}  // namespace math
+}  // namespace stan
+
+#endif
+#endif
diff --git a/stan/math/opencl/kernels/device_functions/logit.hpp b/stan/math/opencl/kernels/device_functions/logit.hpp
@@ -0,0 +1,62 @@
+#ifndef STAN_MATH_OPENCL_KERNELS_DEVICE_FUNCTIONS_LOGIT_HPP
+#define STAN_MATH_OPENCL_KERNELS_DEVICE_FUNCTIONS_LOGIT_HPP
+#ifdef STAN_OPENCL
+
+#include <stan/math/opencl/stringify.hpp>
+#include <string>
+
+namespace stan {
+namespace math {
+namespace opencl_kernels {
+
+// \cond
+static const char* logit_device_function
+    = "\n"
+      "#ifndef STAN_MATH_OPENCL_KERNELS_DEVICE_FUNCTIONS_LOGIT\n"
+      "#define STAN_MATH_OPENCL_KERNELS_DEVICE_FUNCTIONS_LOGIT\n" STRINGIFY(
+          // \endcond
+          /** \ingroup opencl_kernels
+           *
+           * Return the log odds applied to the kernel generator
+           * expression.
+           *
+           * The logit function is defined as for \f$x \in [0, 1]\f$ by
+           * returning the log odds of \f$x\f$ treated as a probability,
+           *
+           * \f$\mbox{logit}(x) = \log \left( \frac{x}{1 - x} \right)\f$.
+           *
+           * The inverse to this function is <code>inv_logit</code>.
+           *
+           *
+           \f[
+           \mbox{logit}(x) =
+          \begin{cases}
+          \textrm{NaN}& \mbox{if } x < 0 \textrm{ or } x > 1\\
+          \ln\frac{x}{1-x} & \mbox{if } 0\leq x \leq 1 \\[6pt]
+          \textrm{NaN} & \mbox{if } x = \textrm{NaN}
+          \end{cases}
+          \f]
+
+          \f[
+          \frac{\partial\, \mbox{logit}(x)}{\partial x} =
+          \begin{cases}
+          \textrm{NaN}& \mbox{if } x < 0 \textrm{ or } x > 1\\
+          \frac{1}{x-x^2}& \mbox{if } 0\leq x\leq 1 \\[6pt]
+          \textrm{NaN} & \mbox{if } x = \textrm{NaN}
+          \end{cases}
+          \f]
+          *
+          * @param x argument
+          * @return log odds of argument
+          */
+          double logit(double x) { return log(x / (1 - x)); }
+          // \cond
+          ) "\n#endif\n";  // NOLINT
+// \endcond
+
+}  // namespace opencl_kernels
+}  // namespace math
+}  // namespace stan
+
+#endif
+#endif
diff --git a/stan/math/opencl/opencl.hpp b/stan/math/opencl/opencl.hpp
@@ -103,7 +103,9 @@
 #include <stan/math/opencl/prim/cholesky_decompose.hpp>
 #include <stan/math/opencl/prim/col.hpp>
 #include <stan/math/opencl/prim/cols.hpp>
+#include <stan/math/opencl/prim/crossprod.hpp>
 #include <stan/math/opencl/prim/dims.hpp>
+#include <stan/math/opencl/prim/divide.hpp>
 #include <stan/math/opencl/prim/divide_columns.hpp>
 #include <stan/math/opencl/prim/gp_exp_quad_cov.hpp>
 #include <stan/math/opencl/prim/inv.hpp>
@@ -120,6 +122,7 @@
 #include <stan/math/opencl/prim/rep_vector.hpp>
 #include <stan/math/opencl/prim/row.hpp>
 #include <stan/math/opencl/prim/rows.hpp>
+#include <stan/math/opencl/prim/tcrossprod.hpp>
 
 #include <stan/math/opencl/err.hpp>
 
diff --git a/stan/math/opencl/prim/crossprod.hpp b/stan/math/opencl/prim/crossprod.hpp
@@ -0,0 +1,29 @@
+#ifndef STAN_MATH_OPENCL_PRIM_FUN_CROSSPROD_HPP
+#define STAN_MATH_OPENCL_PRIM_FUN_CROSSPROD_HPP
+#ifdef STAN_OPENCL
+
+#include <stan/math/opencl/matrix_cl.hpp>
+#include <stan/math/opencl/multiply.hpp>
+#include <stan/math/opencl/kernel_generator.hpp>
+
+namespace stan {
+namespace math {
+/**
+ * Returns the result of pre-multiplying a matrix by its
+ * own transpose.
+ *
+ * @tparam T type of elements in A
+ * @param A input matrix
+ * @return transpose(A) * A
+ */
+template <typename T_A,
+          typename = require_all_kernel_expressions_and_none_scalar_t<T_A>>
+inline matrix_cl<typename std::decay_t<T_A>::Scalar> crossprod(T_A&& A) {
+  const matrix_cl<typename std::decay_t<T_A>::Scalar>& A_eval
+      = transpose(std::forward<T_A>(A));
+  return multiply_transpose(A_eval);
+}
+}  // namespace math
+}  // namespace stan
+#endif
+#endif
diff --git a/stan/math/opencl/prim/divide.hpp b/stan/math/opencl/prim/divide.hpp
@@ -0,0 +1,26 @@
+#ifndef STAN_MATH_OPENCL_PRIM_DIVIDE_HPP
+#define STAN_MATH_OPENCL_PRIM_DIVIDE_HPP
+#ifdef STAN_OPENCL
+
+#include <stan/math/opencl/matrix_cl.hpp>
+#include <stan/math/opencl/kernel_generator.hpp>
+
+namespace stan {
+namespace math {
+/** \ingroup opencl
+ * Returns the elementwise division of the kernel generator expression
+ *
+ * @tparam T_a type of input kernel generator expression a
+ * @param a expression to divide
+ * @param d scalar to divide by
+ * @return the elements of expression a divided by d
+ */
+template <typename T_a,
+          typename = require_all_kernel_expressions_and_none_scalar_t<T_a>>
+inline auto divide(T_a&& a, double d) {  // NOLINT
+  return elt_divide(std::forward<T_a>(a), d);
+}
+}  // namespace math
+}  // namespace stan
+#endif
+#endif
diff --git a/stan/math/opencl/prim/divide_columns.hpp b/stan/math/opencl/prim/divide_columns.hpp
@@ -54,7 +54,7 @@ inline void divide_columns(const matrix_cl<T1>& A, const matrix_cl<T2>& B) {
  */
 template <typename T1, typename T2, typename = require_all_arithmetic_t<T1, T2>>
 inline void divide_columns(const matrix_cl<T1>& A, const T2& divisor) {
-  A = elewise_division(A, divisor);
+  A = elt_divide(A, divisor);
 }
 
 }  // namespace math
diff --git a/stan/math/opencl/prim/inv.hpp b/stan/math/opencl/prim/inv.hpp
@@ -19,7 +19,7 @@ namespace math {
 template <typename T_x,
           typename = require_all_kernel_expressions_and_none_scalar_t<T_x>>
 inline auto inv(T_x&& x) {  // NOLINT
-  return elewise_division(1.0, std::forward<T_x>(x));
+  return elt_divide(1.0, std::forward<T_x>(x));
 }
 }  // namespace math
 }  // namespace stan
diff --git a/stan/math/opencl/prim/tcrossprod.hpp b/stan/math/opencl/prim/tcrossprod.hpp
@@ -0,0 +1,29 @@
+#ifndef STAN_MATH_OPENCL_PRIM_FUN_TCROSSPROD_HPP
+#define STAN_MATH_OPENCL_PRIM_FUN_TCROSSPROD_HPP
+#ifdef STAN_OPENCL
+
+#include <stan/math/opencl/matrix_cl.hpp>
+#include <stan/math/opencl/multiply.hpp>
+#include <stan/math/opencl/kernel_generator.hpp>
+
+namespace stan {
+namespace math {
+/**
+ * Returns the result of post-multiplying a matrix by its
+ * own transpose.
+ *
+ * @tparam T type of elements in A
+ * @param A input matrix
+ * @return A * transpose(A)
+ */
+template <typename T_A,
+          typename = require_all_kernel_expressions_and_none_scalar_t<T_A>>
+inline matrix_cl<typename std::decay_t<T_A>::Scalar> tcrossprod(T_A&& A) {
+  const matrix_cl<typename std::decay_t<T_A>::Scalar>& A_eval
+      = std::forward<T_A>(A);
+  return multiply_transpose(A_eval);
+}
+}  // namespace math
+}  // namespace stan
+#endif
+#endif
diff --git a/test/unit/math/opencl/kernel_generator/binary_operation_test.cpp b/test/unit/math/opencl/kernel_generator/binary_operation_test.cpp
@@ -66,7 +66,7 @@ TEST(KernelGenerator, addition_test) {
 
 BINARY_OPERATION_TEST(subtraction_test, -, double);
 
-TEST(KernelGenerator, elewise_multiplication_test) {
+TEST(KernelGenerator, elt_multiply_test) {
   MatrixXd m1(3, 3);
   m1 << 1, 2.5, 3, 4, 5, 6.3, 7, -8, -9.5;
   MatrixXi m2(3, 3);
@@ -75,23 +75,23 @@ TEST(KernelGenerator, elewise_multiplication_test) {
   matrix_cl<double> m1_cl(m1);
   matrix_cl<int> m2_cl(m2);
 
-  auto tmp = elewise_multiplication(m1_cl, m2_cl);
+  auto tmp = elt_multiply(m1_cl, m2_cl);
   matrix_cl<double> res_cl = tmp;
   MatrixXd res = stan::math::from_matrix_cl(res_cl);
 
   MatrixXd correct = m1.array() * m2.cast<double>().array();
   EXPECT_MATRIX_NEAR(res, correct, 1e-9);
 }
 
-TEST(KernelGenerator, elewise_division_test) {
+TEST(KernelGenerator, elt_divide_test) {
   MatrixXd m1(3, 3);
   m1 << 1, 2.5, 3, 4, 5, 6.3, 7, -8, -9.5;
   MatrixXi m2(3, 3);
   m2 << 10, 100, 1000, 1, -10, -12, 2, 4, 8;
 
   matrix_cl<double> m1_cl(m1);
   matrix_cl<int> m2_cl(m2);
-  auto tmp = elewise_division(m1_cl, m2_cl);
+  auto tmp = elt_divide(m1_cl, m2_cl);
   matrix_cl<double> res_cl = tmp;
 
   MatrixXd res = stan::math::from_matrix_cl(res_cl);
@@ -231,8 +231,8 @@ TEST(KernelGenerator, reuse_expression_simple) {
 
   matrix_cl<double> m1_cl(m1);
   matrix_cl<double> m2_cl(m2);
-  auto tmp = stan::math::elewise_division(m1_cl, m2_cl);
-  auto tmp2 = stan::math::elewise_multiplication(tmp, tmp);
+  auto tmp = stan::math::elt_divide(m1_cl, m2_cl);
+  auto tmp2 = stan::math::elt_multiply(tmp, tmp);
   matrix_cl<double> res_cl;
   std::string kernel_src = tmp2.get_kernel_source_for_evaluating_into(res_cl);
   // if the expression is correctly reused, division will only occur once in the
@@ -259,10 +259,8 @@ TEST(KernelGenerator, reuse_expression_complicated) {
   matrix_cl<double> m1_cl(m1);
   matrix_cl<double> m2_cl(m2);
   auto tmp = m1_cl + m2_cl;
-  auto tmp2 = stan::math::elewise_division(
-      stan::math::elewise_multiplication(tmp, tmp), m1_cl);
-  auto tmp3 = stan::math::elewise_multiplication(
-      stan::math::elewise_division(tmp, tmp2), tmp2);
+  auto tmp2 = stan::math::elt_divide(stan::math::elt_multiply(tmp, tmp), m1_cl);
+  auto tmp3 = stan::math::elt_multiply(stan::math::elt_divide(tmp, tmp2), tmp2);
   matrix_cl<double> res_cl;
   std::string kernel_src = tmp3.get_kernel_source_for_evaluating_into(res_cl);
   stan::test::store_reference_kernel_if_needed(kernel_filename, kernel_src);
diff --git a/test/unit/math/opencl/kernel_generator/unary_function_cl_test.cpp b/test/unit/math/opencl/kernel_generator/unary_function_cl_test.cpp
diff --git a/test/unit/math/opencl/prim/crossprod_test.cpp b/test/unit/math/opencl/prim/crossprod_test.cpp
diff --git a/test/unit/math/opencl/prim/divide_test.cpp b/test/unit/math/opencl/prim/divide_test.cpp
diff --git a/test/unit/math/opencl/prim/log1m_inv_logit_test.cpp b/test/unit/math/opencl/prim/log1m_inv_logit_test.cpp
diff --git a/test/unit/math/opencl/prim/logit_test.cpp b/test/unit/math/opencl/prim/logit_test.cpp
diff --git a/test/unit/math/opencl/prim/tcrossprod_test.cpp b/test/unit/math/opencl/prim/tcrossprod_test.cpp

Original file line number	Diff line number	Diff line change
`@@ -15,7 +15,7 @@ namespace math {`
`15`	`15`	`template <typename T_matrix, typename T_vector,`
`16`	`16`	`typename = require_all_kernel_expressions_t<T_matrix, T_vector>>`
`17`	`17`	`inline auto matrix_vector_multiply(T_matrix&& matrix, T_vector&& vector) {`
`18`		`- return rowwise_sum(elewise_multiplication(`
	`18`	`+ return rowwise_sum(elt_multiply(`
`19`	`19`	`std::forward<T_matrix>(matrix),`
`20`	`20`	`colwise_broadcast(transpose(std::forward<T_vector>(vector)))));`
`21`	`21`	`}`
Original file line number	Diff line number	Diff line change
`@@ -54,7 +54,7 @@ inline void divide_columns(const matrix_cl<T1>& A, const matrix_cl<T2>& B) {`
`54`	`54`	`*/`
`55`	`55`	`template <typename T1, typename T2, typename = require_all_arithmetic_t<T1, T2>>`
`56`	`56`	`inline void divide_columns(const matrix_cl<T1>& A, const T2& divisor) {`
`57`		`- A = elewise_division(A, divisor);`
	`57`	`+ A = elt_divide(A, divisor);`
`58`	`58`	`}`
`59`	`59`
`60`	`60`	`} // namespace math`