stan-dev
diff --git a/‎benchmarks/benchmark.py‎
Lines changed: 4 additions & 3 deletions b/‎benchmarks/benchmark.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎stan/math/prim/fun.hpp‎
Lines changed: 1 addition & 0 deletions b/‎stan/math/prim/fun.hpp‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎stan/math/prim/fun/csr_matrix_times_vector.hpp‎
Lines changed: 3 additions & 1 deletion b/‎stan/math/prim/fun/csr_matrix_times_vector.hpp‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎stan/math/prim/fun/gp_dot_prod_cov.hpp‎
Lines changed: 37 additions & 25 deletions b/‎stan/math/prim/fun/gp_dot_prod_cov.hpp‎
Lines changed: 37 additions & 25 deletions
diff --git a/‎stan/math/prim/fun/gp_exp_quad_cov.hpp‎
Lines changed: 26 additions & 71 deletions b/‎stan/math/prim/fun/gp_exp_quad_cov.hpp‎
Lines changed: 26 additions & 71 deletions
diff --git a/‎stan/math/prim/fun/gp_exponential_cov.hpp‎
Lines changed: 46 additions & 17 deletions b/‎stan/math/prim/fun/gp_exponential_cov.hpp‎
Lines changed: 46 additions & 17 deletions
@@ -351,7 +351,7 @@ def benchmark(
             if "matrix" in arg:
                 arg_dimm = 2
             if "[" in arg:
-                arg_dimm += len(arg.split("[")[1])
+                arg_dimm += len(arg.split("]")[0].split("[")[1])
             if arg_dimm == dimm:
                 args_with_max_dimm += 1
             elif arg_dimm > dimm:
@@ -480,11 +480,12 @@ def benchmark(
                     code += "stan::math::to_var_value({}), ".format(var_name)
                 else:
                     code += var_name + ", "
-            if opencl == "base":
-                var_conversions += "  stan::math::opencl_context.queue().finish();\n"
             code = code[:-2] + "));\n"
             if "Rev" in arg_overloads:
                 code += "    stan::math::grad();\n"
+            if opencl == "base":
+              code += "    stan::math::opencl_context.queue().finish();\n"
+              var_conversions += "    stan::math::opencl_context.queue().finish();\n"
             result += BENCHMARK_TEMPLATE.format(
                 benchmark_name=benchmark_name,
                 setup=setup,
 
@@ -264,6 +264,7 @@
 #include <stan/math/prim/fun/quad_form.hpp>
 #include <stan/math/prim/fun/quad_form_diag.hpp>
 #include <stan/math/prim/fun/quad_form_sym.hpp>
+#include <stan/math/prim/fun/quantile.hpp>
 #include <stan/math/prim/fun/rank.hpp>
 #include <stan/math/prim/fun/read_corr_L.hpp>
 #include <stan/math/prim/fun/read_corr_matrix.hpp>
 
@@ -69,7 +69,9 @@ namespace math {
  *   for a given sparse matrix.
  * @throw std::out_of_range if any of the indexes are out of range.
  */
-template <typename T1, typename T2>
+template <typename T1, typename T2,
+          require_not_t<conjunction<std::is_arithmetic<scalar_type_t<T1>>,
+                                    is_var<scalar_type_t<T2>>>>* = nullptr>
 inline Eigen::Matrix<return_type_t<T1, T2>, Eigen::Dynamic, 1>
 csr_matrix_times_vector(int m, int n, const T1& w, const std::vector<int>& v,
                         const std::vector<int>& u, const T2& b) {
 
@@ -54,15 +54,22 @@ gp_dot_prod_cov(const std::vector<Eigen::Matrix<T_x, Eigen::Dynamic, 1>> &x,
   }
 
   T_sigma sigma_sq = square(sigma);
-
-  for (size_t i = 0; i < (x_size - 1); ++i) {
-    cov(i, i) = sigma_sq + dot_self(x[i]);
-    for (size_t j = i + 1; j < x_size; ++j) {
-      cov(i, j) = sigma_sq + dot_product(x[i], x[j]);
-      cov(j, i) = cov(i, j);
+  size_t block_size = 10;
+
+  for (size_t jb = 0; jb < x_size; jb += block_size) {
+    for (size_t ib = jb; ib < x_size; ib += block_size) {
+      size_t j_end = std::min(x_size, jb + block_size);
+      for (size_t j = jb; j < j_end; ++j) {
+        cov.coeffRef(j, j) = sigma_sq + dot_self(x[j]);
+        size_t i_end = std::min(x_size, ib + block_size);
+        for (size_t i = std::max(ib, j + 1); i < i_end; ++i) {
+          cov.coeffRef(j, i) = cov.coeffRef(i, j)
+              = sigma_sq + dot_product(x[i], x[j]);
+        }
+      }
     }
   }
-  cov(x_size - 1, x_size - 1) = sigma_sq + dot_self(x[x_size - 1]);
+  cov.coeffRef(x_size - 1, x_size - 1) = sigma_sq + dot_self(x[x_size - 1]);
   return cov;
 }
 
@@ -91,12 +98,10 @@ gp_dot_prod_cov(const std::vector<Eigen::Matrix<T_x, Eigen::Dynamic, 1>> &x,
 template <typename T_x, typename T_sigma>
 Eigen::Matrix<return_type_t<T_x, T_sigma>, Eigen::Dynamic, Eigen::Dynamic>
 gp_dot_prod_cov(const std::vector<T_x> &x, const T_sigma &sigma) {
-  check_not_nan("gp_dot_prod_cov", "sigma", sigma);
   check_nonnegative("gp_dot_prod_cov", "sigma", sigma);
   check_finite("gp_dot_prod_cov", "sigma", sigma);
 
   size_t x_size = x.size();
-  check_not_nan("gp_dot_prod_cov", "x", x);
   check_finite("gp_dot_prod_cov", "x", x);
 
   Eigen::Matrix<return_type_t<T_x, T_sigma>, Eigen::Dynamic, Eigen::Dynamic>
@@ -106,12 +111,18 @@ gp_dot_prod_cov(const std::vector<T_x> &x, const T_sigma &sigma) {
   }
 
   T_sigma sigma_sq = square(sigma);
-
-  for (size_t i = 0; i < (x_size - 1); ++i) {
-    cov(i, i) = sigma_sq + x[i] * x[i];
-    for (size_t j = i + 1; j < x_size; ++j) {
-      cov(i, j) = sigma_sq + x[i] * x[j];
-      cov(j, i) = cov(i, j);
+  size_t block_size = 10;
+
+  for (size_t jb = 0; jb < x_size; jb += block_size) {
+    for (size_t ib = jb; ib < x_size; ib += block_size) {
+      size_t j_end = std::min(x_size, jb + block_size);
+      for (size_t j = jb; j < j_end; ++j) {
+        cov.coeffRef(j, j) = sigma_sq + x[j] * x[j];
+        size_t i_end = std::min(x_size, ib + block_size);
+        for (size_t i = std::max(ib, j + 1); i < i_end; ++i) {
+          cov.coeffRef(j, i) = cov.coeffRef(i, j) = sigma_sq + x[i] * x[j];
+        }
+      }
     }
   }
   cov(x_size - 1, x_size - 1) = sigma_sq + x[x_size - 1] * x[x_size - 1];
@@ -146,18 +157,15 @@ Eigen::Matrix<return_type_t<T_x1, T_x2, T_sigma>, Eigen::Dynamic,
 gp_dot_prod_cov(const std::vector<Eigen::Matrix<T_x1, Eigen::Dynamic, 1>> &x1,
                 const std::vector<Eigen::Matrix<T_x2, Eigen::Dynamic, 1>> &x2,
                 const T_sigma &sigma) {
-  check_not_nan("gp_dot_prod_cov", "sigma", sigma);
   check_nonnegative("gp_dot_prod_cov", "sigma", sigma);
   check_finite("gp_dot_prod_cov", "sigma", sigma);
 
   size_t x1_size = x1.size();
   size_t x2_size = x2.size();
   for (size_t i = 0; i < x1_size; ++i) {
-    check_not_nan("gp_dot_prod_cov", "x1", x1[i]);
     check_finite("gp_dot_prod_cov", "x1", x1[i]);
   }
   for (size_t i = 0; i < x2_size; ++i) {
-    check_not_nan("gp_dot_prod_cov", "x2", x2[i]);
     check_finite("gp_dot_prod_cov", "x2", x2[i]);
   }
   Eigen::Matrix<return_type_t<T_x1, T_x2, T_sigma>, Eigen::Dynamic,
@@ -169,10 +177,17 @@ gp_dot_prod_cov(const std::vector<Eigen::Matrix<T_x1, Eigen::Dynamic, 1>> &x1,
   }
 
   T_sigma sigma_sq = square(sigma);
-
-  for (size_t i = 0; i < x1_size; ++i) {
-    for (size_t j = 0; j < x2_size; ++j) {
-      cov(i, j) = sigma_sq + dot_product(x1[i], x2[j]);
+  size_t block_size = 10;
+
+  for (size_t ib = 0; ib < x1_size; ib += block_size) {
+    for (size_t jb = 0; jb < x2_size; jb += block_size) {
+      size_t j_end = std::min(x2_size, jb + block_size);
+      for (size_t j = jb; j < j_end; ++j) {
+        size_t i_end = std::min(x1_size, ib + block_size);
+        for (size_t i = ib; i < i_end; ++i) {
+          cov(i, j) = sigma_sq + dot_product(x1[i], x2[j]);
+        }
+      }
     }
   }
   return cov;
@@ -205,15 +220,12 @@ Eigen::Matrix<return_type_t<T_x1, T_x2, T_sigma>, Eigen::Dynamic,
               Eigen::Dynamic>
 gp_dot_prod_cov(const std::vector<T_x1> &x1, const std::vector<T_x2> &x2,
                 const T_sigma &sigma) {
-  check_not_nan("gp_dot_prod_cov", "sigma", sigma);
   check_nonnegative("gp_dot_prod_cov", "sigma", sigma);
   check_finite("gp_dot_prod_cov", "sigma", sigma);
 
   size_t x1_size = x1.size();
   size_t x2_size = x2.size();
-  check_not_nan("gp_dot_prod_cov", "x1", x1);
   check_finite("gp_dot_prod_cov", "x1", x1);
-  check_not_nan("gp_dot_prod_cov", "x2", x2);
   check_finite("gp_dot_prod_cov", "x2", x2);
 
   Eigen::Matrix<return_type_t<T_x1, T_x2, T_sigma>, Eigen::Dynamic,
 
@@ -41,40 +41,18 @@ gp_exp_quad_cov(const std::vector<T_x> &x, const T_sigma &sigma_sq,
                 Eigen::Dynamic>
       cov(x_size, x_size);
   cov.diagonal().array() = sigma_sq;
-  for (size_t j = 0; j < x_size; ++j) {
-    for (size_t i = j + 1; i < x_size; ++i) {
-      cov(i, j)
-          = sigma_sq * exp(squared_distance(x[i], x[j]) * neg_half_inv_l_sq);
-    }
-  }
-  cov.template triangularView<Eigen::Upper>() = cov.transpose();
-  return cov;
-}
-
-/**
- * Returns a squared exponential kernel.
- *
- * @tparam T_x type for each scalar
- * @tparam T_sigma type of parameter sigma
- *
- * @param x std::vector of Eigen vectors of scalars.
- * @param sigma_sq square root of the marginal standard deviation or magnitude
- * @return squared distance
- *   x is nan or infinite
- */
-template <typename T_x, typename T_sigma>
-inline typename Eigen::Matrix<return_type_t<T_x, T_sigma>, Eigen::Dynamic,
-                              Eigen::Dynamic>
-gp_exp_quad_cov(const std::vector<Eigen::Matrix<T_x, -1, 1>> &x,
-                const T_sigma &sigma_sq) {
-  using std::exp;
-  const auto x_size = x.size();
-  Eigen::Matrix<return_type_t<T_x, T_sigma>, Eigen::Dynamic, Eigen::Dynamic>
-      cov(x_size, x_size);
-  cov.diagonal().array() = sigma_sq;
-  for (size_t j = 0; j < x_size; ++j) {
-    for (size_t i = j + 1; i < x_size; ++i) {
-      cov(i, j) = sigma_sq * exp(-0.5 * (x[i] - x[j]).squaredNorm());
+  size_t block_size = 10;
+  for (size_t jb = 0; jb < x.size(); jb += block_size) {
+    for (size_t ib = jb; ib < x.size(); ib += block_size) {
+      size_t j_end = std::min(x_size, jb + block_size);
+      for (size_t j = jb; j < j_end; ++j) {
+        size_t i_end = std::min(x_size, ib + block_size);
+        for (size_t i = std::max(ib, j + 1); i < i_end; ++i) {
+          cov.coeffRef(i, j)
+              = sigma_sq
+                * exp(squared_distance(x[i], x[j]) * neg_half_inv_l_sq);
+        }
+      }
     }
   }
   cov.template triangularView<Eigen::Upper>() = cov.transpose();
@@ -108,42 +86,19 @@ gp_exp_quad_cov(const std::vector<T_x1> &x1, const std::vector<T_x2> &x2,
   Eigen::Matrix<return_type_t<T_x1, T_x2, T_sigma, T_l>, Eigen::Dynamic,
                 Eigen::Dynamic>
       cov(x1.size(), x2.size());
-  for (size_t i = 0; i < x1.size(); ++i) {
-    for (size_t j = 0; j < x2.size(); ++j) {
-      cov(i, j)
-          = sigma_sq * exp(squared_distance(x1[i], x2[j]) * neg_half_inv_l_sq);
-    }
-  }
-  return cov;
-}
+  size_t block_size = 10;
 
-/**
- * Returns a squared exponential kernel.
- *
- * This function is for the cross covariance
- * matrix needed to compute the posterior predictive density.
- *
- * @tparam T_x1 type of first std::vector of elements
- * @tparam T_x2 type of second std::vector of elements
- * @tparam T_s type of sigma
- *
- * @param x1 std::vector of Eigen vectors of scalars.
- * @param x2 std::vector of Eigen vectors of scalars.
- * @param sigma_sq square root of the marginal standard deviation or magnitude
- * @return squared distance
- */
-template <typename T_x1, typename T_x2, typename T_s>
-inline typename Eigen::Matrix<return_type_t<T_x1, T_x2, T_s>, Eigen::Dynamic,
-                              Eigen::Dynamic>
-gp_exp_quad_cov(const std::vector<Eigen::Matrix<T_x1, -1, 1>> &x1,
-                const std::vector<Eigen::Matrix<T_x2, -1, 1>> &x2,
-                const T_s &sigma_sq) {
-  using std::exp;
-  Eigen::Matrix<return_type_t<T_x1, T_x2, T_s>, Eigen::Dynamic, Eigen::Dynamic>
-      cov(x1.size(), x2.size());
-  for (size_t i = 0; i < x1.size(); ++i) {
-    for (size_t j = 0; j < x2.size(); ++j) {
-      cov(i, j) = sigma_sq * exp(-0.5 * (x1[i] - x2[j]).squaredNorm());
+  for (size_t ib = 0; ib < x1.size(); ib += block_size) {
+    for (size_t jb = 0; jb < x2.size(); jb += block_size) {
+      size_t j_end = std::min(x2.size(), jb + block_size);
+      for (size_t j = jb; j < j_end; ++j) {
+        size_t i_end = std::min(x1.size(), ib + block_size);
+        for (size_t i = ib; i < i_end; ++i) {
+          cov.coeffRef(i, j)
+              = sigma_sq
+                * exp(squared_distance(x1[i], x2[j]) * neg_half_inv_l_sq);
+        }
+      }
     }
   }
   return cov;
@@ -224,7 +179,7 @@ gp_exp_quad_cov(const std::vector<Eigen::Matrix<T_x, -1, 1>> &x,
   check_size_match("gp_exp_quad_cov", "x dimension", x[0].size(),
                    "number of length scales", length_scale.size());
   cov = internal::gp_exp_quad_cov(divide_columns(x, length_scale),
-                                  square(sigma));
+                                  square(sigma), -0.5);
   return cov;
 }
 
@@ -330,7 +285,7 @@ gp_exp_quad_cov(const std::vector<Eigen::Matrix<T_x1, -1, 1>> &x1,
                    "number of length scales", l_size);
   cov = internal::gp_exp_quad_cov(divide_columns(x1, length_scale),
                                   divide_columns(x2, length_scale),
-                                  square(sigma));
+                                  square(sigma), -0.5);
   return cov;
 }
 
 
@@ -64,11 +64,18 @@ gp_exponential_cov(const std::vector<T_x> &x, const T_s &sigma,
   T_s sigma_sq = square(sigma);
   T_l neg_inv_l = -1.0 / length_scale;
 
-  for (size_t i = 0; i < x_size; ++i) {
-    cov(i, i) = sigma_sq;
-    for (size_t j = i + 1; j < x_size; ++j) {
-      cov(i, j) = sigma_sq * exp(neg_inv_l * distance(x[i], x[j]));
-      cov(j, i) = cov(i, j);
+  size_t block_size = 10;
+  for (size_t jb = 0; jb < x_size; jb += block_size) {
+    for (size_t ib = jb; ib < x_size; ib += block_size) {
+      size_t j_end = std::min(x_size, jb + block_size);
+      for (size_t j = jb; j < j_end; ++j) {
+        cov(j, j) = sigma_sq;
+        size_t i_end = std::min(x_size, ib + block_size);
+        for (size_t i = std::max(ib, j + 1); i < i_end; ++i) {
+          cov.coeffRef(j, i) = cov.coeffRef(i, j)
+              = sigma_sq * exp(neg_inv_l * distance(x[i], x[j]));
+        }
+      }
     }
   }
   return cov;
@@ -121,12 +128,18 @@ gp_exponential_cov(const std::vector<Eigen::Matrix<T_x, -1, 1>> &x,
       = divide_columns(x, length_scale);
 
   T_s sigma_sq = square(sigma);
-  for (size_t i = 0; i < x_size; ++i) {
-    cov(i, i) = sigma_sq;
-    for (size_t j = i + 1; j < x_size; ++j) {
-      return_type_t<T_x, T_l> dist = distance(x_new[i], x_new[j]);
-      cov(i, j) = sigma_sq * exp(-dist);
-      cov(j, i) = cov(i, j);
+  size_t block_size = 10;
+  for (size_t jb = 0; jb < x_size; jb += block_size) {
+    for (size_t ib = jb; ib < x_size; ib += block_size) {
+      size_t j_end = std::min(x_size, jb + block_size);
+      for (size_t j = jb; j < j_end; ++j) {
+        cov(j, j) = sigma_sq;
+        size_t i_end = std::min(x_size, ib + block_size);
+        for (size_t i = std::max(ib, j + 1); i < i_end; ++i) {
+          return_type_t<T_x, T_l> dist = distance(x_new[i], x_new[j]);
+          cov.coeffRef(j, i) = cov.coeffRef(i, j) = sigma_sq * exp(-dist);
+        }
+      }
     }
   }
   return cov;
@@ -192,9 +205,17 @@ gp_exponential_cov(const std::vector<T_x1> &x1, const std::vector<T_x2> &x2,
 
   T_s sigma_sq = square(sigma);
   T_l neg_inv_l = -1.0 / length_scale;
-  for (size_t i = 0; i < x1_size; ++i) {
-    for (size_t j = 0; j < x2_size; ++j) {
-      cov(i, j) = sigma_sq * exp(neg_inv_l * distance(x1[i], x2[j]));
+  size_t block_size = 10;
+
+  for (size_t ib = 0; ib < x1_size; ib += block_size) {
+    for (size_t jb = 0; jb < x2_size; jb += block_size) {
+      size_t j_end = std::min(x2_size, jb + block_size);
+      for (size_t j = jb; j < j_end; ++j) {
+        size_t i_end = std::min(x1_size, ib + block_size);
+        for (size_t i = ib; i < i_end; ++i) {
+          cov(i, j) = sigma_sq * exp(neg_inv_l * distance(x1[i], x2[j]));
+        }
+      }
     }
   }
   return cov;
@@ -268,9 +289,17 @@ gp_exponential_cov(const std::vector<Eigen::Matrix<T_x1, -1, 1>> &x1,
   std::vector<Eigen::Matrix<return_type_t<T_x2, T_l>, -1, 1>> x2_new
       = divide_columns(x2, length_scale);
 
-  for (size_t i = 0; i < x1_size; ++i) {
-    for (size_t j = 0; j < x2_size; ++j) {
-      cov(i, j) = sigma_sq * exp(-distance(x1_new[i], x2_new[j]));
+  size_t block_size = 10;
+
+  for (size_t ib = 0; ib < x1_size; ib += block_size) {
+    for (size_t jb = 0; jb < x2_size; jb += block_size) {
+      size_t j_end = std::min(x2_size, jb + block_size);
+      for (size_t j = jb; j < j_end; ++j) {
+        size_t i_end = std::min(x1_size, ib + block_size);
+        for (size_t i = ib; i < i_end; ++i) {
+          cov(i, j) = sigma_sq * exp(-distance(x1_new[i], x2_new[j]));
+        }
+      }
     }
   }
   return cov;