streams in kernels

alphaville · alphaville · commit c8b26cf8e954 · 2025-03-28T22:53:45.000Z
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,16 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+<!-- ---------------------
+      v1.9.1
+     --------------------- -->
+## v1.9.0 - 28-03-2025
+
+### Fixed
+
+- Using streams in kernels
+
+
 <!-- ---------------------
       v1.9.0
      --------------------- -->
diff --git a/include/tensor.cuh b/include/tensor.cuh
@@ -197,6 +197,13 @@ public:
      */
     cusolverDnHandle_t &cuSolverHandle(size_t idx = 0) { return m_cusolverHandles[idx]; }
 
+    /**
+     *
+     * @param idx index of stream
+     * @return stream
+     */
+    cudaStream_t &stream(size_t idx = 0) { return m_cublasStreams[idx]; }
+
     /**
      * Preferred method for CUDA memory allocation; it allocated memory on the device
      * and counts the allocated bytes (you can then call #totalAllocatedBytes()).
@@ -1602,7 +1609,8 @@ public:
         for (size_t i = 0; i < m_rank->numMats(); i++) {
             DTensor<T> Si(*m_S, 2, i, i);
             DTensor<unsigned int> rankI(*m_rank, 2, i, i);
-            k_countNonzeroSingularValues<T><<<numBlocks(numElS), THREADS_PER_BLOCK>>>(Si.raw(), numElS,
+            cudaStream_t s = Session::getInstance().stream(m_tensor->streamIdx());
+            k_countNonzeroSingularValues<T><<<numBlocks(numElS), THREADS_PER_BLOCK, 0, s>>>(Si.raw(), numElS,
                 rankI.raw(), epsilon);
         }
         return *m_rank;
@@ -2301,7 +2309,8 @@ inline void GivensAnnihilator<T>::annihilate(size_t i, size_t k, size_t j) {
     T *matData = m_matrix->raw();
 
     /* Call kernel to determine 1/sqrt(Ai^2 + Ak^2) */
-    k_givensAnnihilateRHypot<<<1, 1>>>(m_matrix->raw(), aux, i, k, j, nR);
+    cudaStream_t s = Session::getInstance().stream(m_matrix->streamIdx());
+    k_givensAnnihilateRHypot<<<1, 1, 0, s>>>(m_matrix->raw(), aux, i, k, j, nR);
 
     /* Apply Givens rotation */
     m_matrix->applyLeftGivensRotation(i, k, aux + 1, aux + 2);