stan-dev
diff --git a/‎stan/math/opencl/kernel_generator.hpp‎
Lines changed: 1 addition & 0 deletions b/‎stan/math/opencl/kernel_generator.hpp‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎stan/math/opencl/kernel_generator/cast.hpp‎
Lines changed: 101 additions & 0 deletions b/‎stan/math/opencl/kernel_generator/cast.hpp‎
Lines changed: 101 additions & 0 deletions
diff --git a/‎stan/math/opencl/prim/bernoulli_cdf.hpp‎
Lines changed: 2 additions & 2 deletions b/‎stan/math/opencl/prim/bernoulli_cdf.hpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎stan/math/opencl/prim/bernoulli_lccdf.hpp‎
Lines changed: 4 additions & 4 deletions b/‎stan/math/opencl/prim/bernoulli_lccdf.hpp‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎stan/math/opencl/prim/bernoulli_lcdf.hpp‎
Lines changed: 2 additions & 2 deletions b/‎stan/math/opencl/prim/bernoulli_lcdf.hpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎stan/math/opencl/prim/cauchy_cdf.hpp‎
Lines changed: 2 additions & 3 deletions b/‎stan/math/opencl/prim/cauchy_cdf.hpp‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎stan/math/opencl/prim/exp_mod_normal_cdf.hpp‎
Lines changed: 2 additions & 3 deletions b/‎stan/math/opencl/prim/exp_mod_normal_cdf.hpp‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎stan/math/opencl/prim/exp_mod_normal_lccdf.hpp‎
Lines changed: 4 additions & 5 deletions b/‎stan/math/opencl/prim/exp_mod_normal_lccdf.hpp‎
Lines changed: 4 additions & 5 deletions
diff --git a/‎stan/math/opencl/prim/exp_mod_normal_lcdf.hpp‎
Lines changed: 4 additions & 5 deletions b/‎stan/math/opencl/prim/exp_mod_normal_lcdf.hpp‎
Lines changed: 4 additions & 5 deletions
diff --git a/‎stan/math/opencl/prim/gamma_lpdf.hpp‎
Lines changed: 2 additions & 2 deletions b/‎stan/math/opencl/prim/gamma_lpdf.hpp‎
Lines changed: 2 additions & 2 deletions
@@ -131,6 +131,7 @@
 #include <stan/math/opencl/kernel_generator/index.hpp>
 #include <stan/math/opencl/kernel_generator/indexing.hpp>
 #include <stan/math/opencl/kernel_generator/opencl_code.hpp>
+#include <stan/math/opencl/kernel_generator/cast.hpp>
 
 #include <stan/math/opencl/kernel_generator/multi_result_kernel.hpp>
 #include <stan/math/opencl/kernel_generator/get_kernel_source_for_evaluating_into.hpp>
 
@@ -0,0 +1,101 @@
+#ifndef STAN_MATH_OPENCL_KERNEL_GENERATOR_CAST_HPP
+#define STAN_MATH_OPENCL_KERNEL_GENERATOR_CAST_HPP
+#ifdef STAN_OPENCL
+
+#include <stan/math/prim/meta.hpp>
+#include <stan/math/opencl/matrix_cl_view.hpp>
+#include <stan/math/opencl/kernel_generator/common_return_scalar.hpp>
+#include <stan/math/opencl/kernel_generator/type_str.hpp>
+#include <stan/math/opencl/kernel_generator/name_generator.hpp>
+#include <stan/math/opencl/kernel_generator/operation_cl.hpp>
+#include <stan/math/opencl/kernel_generator/as_operation_cl.hpp>
+#include <array>
+#include <string>
+#include <type_traits>
+#include <set>
+#include <utility>
+
+namespace stan {
+namespace math {
+
+/** \addtogroup opencl_kernel_generator
+ *  @{
+ */
+
+/**
+ * Represents a typecast os scalar in kernel generator expressions.
+ * @tparam Derived derived type
+ * @tparam T type of argument
+ * @tparam Scal type of the scalar of result
+ */
+template <typename Scal, typename T>
+class cast_ : public operation_cl<cast_<Scal, T>, Scal, T> {
+ public:
+  using Scalar = Scal;
+  using base = operation_cl<cast_<Scal, T>, Scalar, T>;
+  using base::var_name_;
+
+  /**
+   * Constructor
+   * @param args argument expression(s)
+   */
+  explicit cast_(T&& arg) : base(std::forward<T>(arg)) {}
+
+  /**
+   * Generates kernel code for this expression.
+   * @param row_index_name row index variable name
+   * @param col_index_name column index variable name
+   * @param view_handled whether whether caller already handled matrix view
+   * @param var_names_arg variable names of the nested expressions
+   * @return part of kernel with code for this expression
+   */
+  inline kernel_parts generate(const std::string& row_index_name,
+                               const std::string& col_index_name,
+                               const bool view_handled,
+                               const std::string& var_name_arg) const {
+    kernel_parts res{};
+
+    res.body = type_str<Scalar>() + " " + var_name_ + " = ("
+               + type_str<Scalar>() + ")" + var_name_arg + ";\n";
+    return res;
+  }
+
+  inline auto deep_copy() const {
+    auto&& arg_copy = this->template get_arg<0>().deep_copy();
+    return cast_<Scalar, std::remove_reference_t<decltype(arg_copy)>>{
+        std::move(arg_copy)};
+  }
+};
+
+/**
+ * Typecast a kernel generator expression scalar.
+ *
+ * @tparam T type of argument
+ * @param a input argument
+ * @return Typecast of given expression
+ */
+template <typename Scalar, typename T,
+          require_all_kernel_expressions_and_none_scalar_t<T>* = nullptr>
+inline auto cast(T&& a) {
+  auto&& a_operation = as_operation_cl(std::forward<T>(a)).deep_copy();
+  return cast_<Scalar, std::remove_reference_t<decltype(a_operation)>>(
+      std::move(a_operation));
+}
+
+/**
+ * Typecast a scalar.
+ *
+ * @tparam T type of argument
+ * @param a input argument
+ * @return Typecast of given expression
+ */
+template <typename Scalar, typename T, require_stan_scalar_t<T>* = nullptr>
+inline Scalar cast(T a) {
+  return a;
+}
+
+/** @}*/
+}  // namespace math
+}  // namespace stan
+#endif
+#endif
@@ -49,13 +49,13 @@ return_type_t<T_prob_cl> bernoulli_cdf(const T_n_cl& n,
                                       theta_val, "in the interval [0, 1]");
   auto theta_bounded_expr = 0.0 <= theta_val && theta_val <= 1.0;
 
-  auto any_n_negative = colwise_max(constant(0, N, 1) + (n < 0));
+  auto any_n_negative = colwise_max(cast<char>(n < 0));
   auto cond = n >= 1;
   auto Pi_uncond = 1.0 - theta_val;
   auto Pi = select(cond, INFTY, Pi_uncond);
   auto P_expr = colwise_prod(select(cond, 1.0, Pi_uncond));
 
-  matrix_cl<double> any_n_negative_cl;
+  matrix_cl<char> any_n_negative_cl;
   matrix_cl<double> Pi_cl;
   matrix_cl<double> P_cl;
 
 
@@ -50,13 +50,13 @@ return_type_t<T_prob_cl> bernoulli_lccdf(const T_n_cl& n,
                                       theta_val, "in the interval [0, 1]");
   auto theta_bounded_expr = 0.0 <= theta_val && theta_val <= 1.0;
 
-  auto any_n_negative = colwise_max(0 + (n < 0));
-  auto any_n_over_one = colwise_max(constant(0, N, 1) + (n >= 1));
+  auto any_n_negative = colwise_max(cast<char>(n < 0));
+  auto any_n_over_one = colwise_max(cast<char>(n >= 1));
   auto P_expr = colwise_sum(log(theta_val));
   auto deriv = elt_divide(1.0, theta_val);
 
-  matrix_cl<double> any_n_negative_cl;
-  matrix_cl<double> any_n_over_one_cl;
+  matrix_cl<char> any_n_negative_cl;
+  matrix_cl<char> any_n_over_one_cl;
   matrix_cl<double> P_cl;
   matrix_cl<double> deriv_cl;
 
 
@@ -50,13 +50,13 @@ return_type_t<T_prob_cl> bernoulli_lcdf(const T_n_cl& n,
                                       theta_val, "in the interval [0, 1]");
   auto theta_bounded_expr = 0.0 <= theta_val && theta_val <= 1.0;
 
-  auto any_n_negative = colwise_max(0 + (n < 0));
+  auto any_n_negative = colwise_max(cast<char>(n < 0));
   auto Pi = 1.0 - theta_val;
   auto cond = n >= 1;
   auto P_expr = colwise_sum(select(cond, 0.0, log(Pi)));
   auto deriv = select(cond, 0.0, elt_divide(-1.0, Pi));
 
-  matrix_cl<double> any_n_negative_cl;
+  matrix_cl<char> any_n_negative_cl;
   matrix_cl<double> P_cl;
   matrix_cl<double> deriv_cl;
 
 
@@ -63,8 +63,7 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl> cauchy_cdf(
       = check_cl(function, "Scale parameter", sigma_val, "positive finite");
   auto sigma_positive_finite_expr = 0 < sigma_val && isfinite(sigma_val);
 
-  auto any_y_neg_inf
-      = colwise_max(constant(0, N, 1) + (y_val == NEGATIVE_INFTY));
+  auto any_y_neg_inf = colwise_max(cast<char>(y_val == NEGATIVE_INFTY));
   auto cond = y_val == INFTY;
   auto sigma_inv = elt_divide(1.0, sigma_val);
   auto z = elt_multiply(y_val - mu_val, sigma_inv);
@@ -76,7 +75,7 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl> cauchy_cdf(
       elt_divide(sigma_inv, -pi() * elt_multiply(1.0 + square(z), Pn)));
   auto sigma_deriv_tmp = elt_multiply(z, mu_deriv_tmp);
 
-  matrix_cl<double> any_y_neg_inf_cl;
+  matrix_cl<char> any_y_neg_inf_cl;
   matrix_cl<double> P_cl;
   matrix_cl<double> mu_deriv_cl;
   matrix_cl<double> y_deriv_cl;
 
@@ -72,8 +72,7 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl, T_inv_scale_cl> exp_mod_normal_cdf(
       = check_cl(function, "Inv_cale parameter", lambda_val, "positive finite");
   auto lambda_positive_finite_expr = 0 < lambda_val && isfinite(lambda_val);
 
-  auto any_y_neg_inf
-      = colwise_max(constant(0, N, 1) + (y_val == NEGATIVE_INFTY));
+  auto any_y_neg_inf = colwise_max(cast<char>(y_val == NEGATIVE_INFTY));
   auto inv_sigma = elt_divide(1.0, sigma_val);
   auto diff = y_val - mu_val;
   auto v = elt_multiply(lambda_val, sigma_val);
@@ -102,7 +101,7 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl, T_inv_scale_cl> exp_mod_normal_cdf(
               - elt_multiply(elt_multiply(v, sigma_val) - diff, erf_calc)),
       cdf_n);
 
-  matrix_cl<double> any_y_neg_inf_cl;
+  matrix_cl<char> any_y_neg_inf_cl;
   matrix_cl<double> cdf_cl;
   matrix_cl<double> y_deriv_cl;
   matrix_cl<double> mu_deriv_cl;
 
@@ -73,9 +73,8 @@ exp_mod_normal_lccdf(const T_y_cl& y, const T_loc_cl& mu,
       = check_cl(function, "Inv_cale parameter", lambda_val, "positive finite");
   auto lambda_positive_finite_expr = 0 < lambda_val && isfinite(lambda_val);
 
-  auto any_y_neg_inf
-      = colwise_max(constant(0, N, 1) + (y_val == NEGATIVE_INFTY));
-  auto any_y_pos_inf = colwise_max(constant(0, N, 1) + (y_val == INFTY));
+  auto any_y_neg_inf = colwise_max(cast<char>(y_val == NEGATIVE_INFTY));
+  auto any_y_pos_inf = colwise_max(cast<char>(y_val == INFTY));
   auto inv_sigma = elt_divide(1.0, sigma_val);
   auto diff = y_val - mu_val;
   auto scaled_diff = elt_multiply(diff, inv_sigma * INV_SQRT_TWO);
@@ -104,8 +103,8 @@ exp_mod_normal_lccdf(const T_y_cl& y, const T_loc_cl& mu,
                        - INV_SQRT_TWO_PI * elt_multiply(sigma_val, exp_term_2)),
       ccdf_n);
 
-  matrix_cl<double> any_y_neg_inf_cl;
-  matrix_cl<double> any_y_pos_inf_cl;
+  matrix_cl<char> any_y_neg_inf_cl;
+  matrix_cl<char> any_y_pos_inf_cl;
   matrix_cl<double> ccdf_log_cl;
   matrix_cl<double> mu_deriv_cl;
   matrix_cl<double> y_deriv_cl;
 
@@ -73,9 +73,8 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl, T_inv_scale_cl> exp_mod_normal_lcdf(
       = check_cl(function, "Inv_cale parameter", lambda_val, "positive finite");
   auto lambda_positive_finite_expr = 0 < lambda_val && isfinite(lambda_val);
 
-  auto any_y_neg_inf
-      = colwise_max(constant(0, N, 1) + (y_val == NEGATIVE_INFTY));
-  auto any_y_pos_inf = colwise_max(constant(0, N, 1) + (y_val == INFTY));
+  auto any_y_neg_inf = colwise_max(cast<char>(y_val == NEGATIVE_INFTY));
+  auto any_y_pos_inf = colwise_max(cast<char>(y_val == INFTY));
   auto sigma_inv = elt_divide(1.0, sigma_val);
   auto diff = y_val - mu_val;
   auto scaled_diff = elt_multiply(diff * INV_SQRT_TWO, sigma_inv);
@@ -105,8 +104,8 @@ return_type_t<T_y_cl, T_loc_cl, T_scale_cl, T_inv_scale_cl> exp_mod_normal_lcdf(
               - elt_multiply(elt_multiply(v, sigma_val) - diff, erf_calc)),
       cdf_n);
 
-  matrix_cl<double> any_y_neg_inf_cl;
-  matrix_cl<double> any_y_pos_inf_cl;
+  matrix_cl<char> any_y_neg_inf_cl;
+  matrix_cl<char> any_y_pos_inf_cl;
   matrix_cl<double> cdf_log_cl;
   matrix_cl<double> mu_deriv_cl;
   matrix_cl<double> y_deriv_cl;
 
@@ -80,7 +80,7 @@ return_type_t<T_y_cl, T_shape_cl, T_inv_scale_cl> gamma_lpdf(
                                         beta_val, "positive finite");
   auto beta_pos_finite_expr = beta_val > 0 && isfinite(beta_val);
 
-  auto any_y_negative_expr = colwise_max(constant(0, N, 1) + (y_val < 0));
+  auto any_y_negative_expr = colwise_max(cast<char>(y_val < 0));
   auto log_y_expr = log(y_val);
   auto log_beta_expr = log(beta_val);
   auto logp1_expr = static_select<include_summand<propto, T_shape_cl>::value>(
@@ -99,7 +99,7 @@ return_type_t<T_y_cl, T_shape_cl, T_inv_scale_cl> gamma_lpdf(
   auto alpha_deriv_expr = log_beta_expr + log_y_expr - digamma(alpha_val);
   auto beta_deriv_expr = elt_divide(alpha_val, beta_val) - y_val;
 
-  matrix_cl<int> any_y_negative_cl;
+  matrix_cl<char> any_y_negative_cl;
   matrix_cl<double> logp_cl;
   matrix_cl<double> y_deriv_cl;
   matrix_cl<double> alpha_deriv_cl;