Skip to content

Commit a424837

Browse files
committed
After making a CUDA API call, always clear the global CUDA error state by calling
cudaGetLastError. Otherwise, if the CUDA API call is followed directly by a kernel launch, checking for a synchronous error during the kernel launch by calling cudaGetLastError may potentially return the error code from the CUDA API call. This type of error leakage is very subtle and difficult to trace. Bug 2720132
1 parent 93ad270 commit a424837

10 files changed

Lines changed: 107 additions & 117 deletions

File tree

testing/out_of_memory_recovery.cu

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// Regression test for NVBug 2720132.
2+
3+
#include <unittest/unittest.h>
4+
#include <thrust/device_vector.h>
5+
#include <thrust/detail/cstdint.h>
6+
7+
struct non_trivial
8+
{
9+
__host__ __device__ non_trivial() {}
10+
__host__ __device__ ~non_trivial() {}
11+
};
12+
13+
void test_out_of_memory_recovery()
14+
{
15+
try
16+
{
17+
thrust::device_vector<non_trivial> x(1);
18+
19+
for (thrust::detail::uint64_t n = 1 ;; n <<= 1)
20+
thrust::device_vector<thrust::detail::uint32_t> y(n);
21+
}
22+
catch (...) { }
23+
}
24+
DECLARE_UNITTEST(test_out_of_memory_recovery);

thrust/system/cuda/detail/cub/iterator/tex_obj_input_iterator.cuh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/******************************************************************************
22
* Copyright (c) 2011, Duane Merrill. All rights reserved.
33
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4-
*
4+
*
55
* Redistribution and use in source and binary forms, with or without
66
* modification, are permitted provided that the following conditions are met:
77
* * Redistributions of source code must retain the above copyright
@@ -12,7 +12,7 @@
1212
* * Neither the name of the NVIDIA CORPORATION nor the
1313
* names of its contributors may be used to endorse or promote products
1414
* derived from this software without specific prior written permission.
15-
*
15+
*
1616
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
1717
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1818
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
@@ -177,13 +177,13 @@ public:
177177
res_desc.res.linear.desc = channel_desc;
178178
res_desc.res.linear.sizeInBytes = bytes;
179179
tex_desc.readMode = cudaReadModeElementType;
180-
return cudaCreateTextureObject(&tex_obj, &res_desc, &tex_desc, NULL);
180+
return CubDebug(cudaCreateTextureObject(&tex_obj, &res_desc, &tex_desc, NULL));
181181
}
182182

183183
/// Unbind this iterator from its texture reference
184184
cudaError_t UnbindTexture()
185185
{
186-
return cudaDestroyTextureObject(tex_obj);
186+
return CubDebug(cudaDestroyTextureObject(tex_obj));
187187
}
188188

189189
/// Postfix increment

thrust/system/cuda/detail/cub/util_allocator.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -406,7 +406,7 @@ struct CachingDeviceAllocator
406406
// in use by the device, only consider cached blocks that are
407407
// either (from the active stream) or (from an idle stream)
408408
if ((active_stream == block_itr->associated_stream) ||
409-
(cudaEventQuery(block_itr->ready_event) != cudaErrorNotReady))
409+
(CubDebug(cudaEventQuery(block_itr->ready_event)) != cudaErrorNotReady))
410410
{
411411
// Reuse existing cache block. Insert into live blocks.
412412
found = true;

thrust/system/cuda/detail/cub/util_debug.cuh

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/******************************************************************************
22
* Copyright (c) 2011, Duane Merrill. All rights reserved.
33
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4-
*
4+
*
55
* Redistribution and use in source and binary forms, with or without
66
* modification, are permitted provided that the following conditions are met:
77
* * Redistributions of source code must retain the above copyright
@@ -12,7 +12,7 @@
1212
* * Neither the name of the NVIDIA CORPORATION nor the
1313
* names of its contributors may be used to endorse or promote products
1414
* derived from this software without specific prior written permission.
15-
*
15+
*
1616
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
1717
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1818
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
@@ -72,6 +72,13 @@ __host__ __device__ __forceinline__ cudaError_t Debug(
7272
{
7373
(void)filename;
7474
(void)line;
75+
76+
#ifdef CUB_RUNTIME_ENABLED
77+
// Clear the global CUDA error state which may have been set by the last
78+
// call. Otherwise, errors may "leak" to unrelated kernel launches.
79+
cudaGetLastError();
80+
#endif
81+
7582
#ifdef CUB_STDERR
7683
if (error)
7784
{

thrust/system/cuda/detail/cub/util_device.cuh

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/******************************************************************************
22
* Copyright (c) 2011, Duane Merrill. All rights reserved.
33
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4-
*
4+
*
55
* Redistribution and use in source and binary forms, with or without
66
* modification, are permitted provided that the following conditions are met:
77
* * Redistributions of source code must retain the above copyright
@@ -12,7 +12,7 @@
1212
* * Neither the name of the NVIDIA CORPORATION nor the
1313
* names of its contributors may be used to endorse or promote products
1414
* derived from this software without specific prior written permission.
15-
*
15+
*
1616
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
1717
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1818
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
@@ -199,11 +199,11 @@ CUB_RUNTIME_FUNCTION __forceinline__
199199
static cudaError_t SyncStream(cudaStream_t stream)
200200
{
201201
#if (CUB_PTX_ARCH == 0)
202-
return cudaStreamSynchronize(stream);
202+
return CubDebug(cudaStreamSynchronize(stream));
203203
#else
204204
(void)stream;
205205
// Device can't yet sync on a specific stream
206-
return cudaDeviceSynchronize();
206+
return CubDebug(cudaDeviceSynchronize());
207207
#endif
208208
}
209209

@@ -255,15 +255,12 @@ cudaError_t MaxSmOccupancy(
255255

256256
// CUDA API calls not supported from this device
257257
return CubDebug(cudaErrorInvalidConfiguration);
258-
259258
#else
260-
261-
return cudaOccupancyMaxActiveBlocksPerMultiprocessor (
259+
return CubDebug(cudaOccupancyMaxActiveBlocksPerMultiprocessor(
262260
&max_sm_occupancy,
263261
kernel_ptr,
264262
block_threads,
265-
dynamic_smem_bytes);
266-
263+
dynamic_smem_bytes));
267264
#endif // CUB_RUNTIME_ENABLED
268265
}
269266

thrust/system/cuda/detail/malloc_and_free.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,9 @@ void *malloc(execution_policy<DerivedPolicy> &, std::size_t n)
6262

6363
if(status != cudaSuccess)
6464
{
65-
// cuda_cub::throw_on_error(status, "device malloc failed");
65+
cudaGetLastError(); // Clear global CUDA error state.
6666
thrust::system::detail::bad_alloc(thrust::cuda_category().message(status).c_str());
67-
}
67+
}
6868
#else
6969
result = thrust::raw_pointer_cast(thrust::malloc(thrust::seq, n));
7070
#endif

thrust/system/cuda/detail/par.h

Lines changed: 1 addition & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include <thrust/detail/config.h>
3030
#include <thrust/system/cuda/detail/guarded_cuda_runtime_api.h>
3131
#include <thrust/system/cuda/detail/execution_policy.h>
32+
#include <thrust/system/cuda/detail/util.h>
3233

3334
#include <thrust/detail/allocator_aware_execution_policy.h>
3435

@@ -40,36 +41,6 @@
4041
THRUST_BEGIN_NS
4142
namespace cuda_cub {
4243

43-
inline __host__ __device__
44-
cudaStream_t
45-
default_stream()
46-
{
47-
return cudaStreamLegacy;
48-
}
49-
50-
template <class Derived>
51-
__host__ __device__
52-
cudaStream_t
53-
get_stream(execution_policy<Derived> &)
54-
{
55-
return default_stream();
56-
}
57-
58-
__thrust_exec_check_disable__
59-
template <class Derived>
60-
__host__ __device__
61-
cudaError_t
62-
synchronize_stream(execution_policy<Derived> &)
63-
{
64-
#if __THRUST_HAS_CUDART__
65-
cudaDeviceSynchronize();
66-
return cudaGetLastError();
67-
#else
68-
return cudaSuccess;
69-
#endif
70-
}
71-
72-
7344
template <class Derived>
7445
struct execute_on_stream_base : execution_policy<Derived>
7546
{

0 commit comments

Comments
 (0)