3939#include < thrust/system/cuda/detail/par_to_seq.h>
4040#include < thrust/system/cuda/detail/get_value.h>
4141#include < thrust/system/cuda/detail/dispatch.h>
42+ #include < thrust/system/cuda/detail/make_unsigned_special.h>
4243#include < thrust/functional.h>
4344#include < thrust/system/cuda/detail/core/agent_launcher.h>
4445#include < thrust/detail/minmax.h>
@@ -64,9 +65,6 @@ namespace cuda_cub {
6465
6566namespace __reduce {
6667
67- // XXX should GridSizeType also be able accomodate 64 bit integers
68- typedef int GridSizeType;
69-
7068 template <bool >
7169 struct is_true : thrust::detail::false_type {};
7270 template <>
@@ -149,6 +147,8 @@ namespace __reduce {
149147 class ReductionOp >
150148 struct ReduceAgent
151149 {
150+ typedef typename detail::make_unsigned_special<Size>::type UnsignedSize;
151+
152152 template <class Arch >
153153 struct PtxPlan : Tuning<Arch,T>::type
154154 {
@@ -457,8 +457,8 @@ namespace __reduce {
457457 //
458458 THRUST_DEVICE_FUNCTION T
459459 consume_tiles (Size /* num_items*/ ,
460- cub::GridEvenShare<GridSizeType > &even_share,
461- cub::GridQueue<GridSizeType > & /* queue*/ ,
460+ cub::GridEvenShare<Size > &even_share,
461+ cub::GridQueue<UnsignedSize > & /* queue*/ ,
462462 thrust::detail::integral_constant<cub::GridMappingStrategy, cub::GRID_MAPPING_RAKE> /* is_rake*/ )
463463 {
464464 typedef is_true<ATTEMPT_VECTORIZATION> attempt_vec;
@@ -488,7 +488,7 @@ namespace __reduce {
488488 template <class CAN_VECTORIZE >
489489 THRUST_DEVICE_FUNCTION T
490490 consume_tiles_impl (Size num_items,
491- cub::GridQueue<GridSizeType > queue,
491+ cub::GridQueue<UnsignedSize > queue,
492492 CAN_VECTORIZE can_vectorize)
493493 {
494494 using core::sync_threadblock;
@@ -575,8 +575,8 @@ namespace __reduce {
575575 THRUST_DEVICE_FUNCTION T
576576 consume_tiles (
577577 Size num_items,
578- cub::GridEvenShare<GridSizeType > &/* even_share*/ ,
579- cub::GridQueue<GridSizeType > & queue,
578+ cub::GridEvenShare<Size > &/* even_share*/ ,
579+ cub::GridQueue<UnsignedSize > & queue,
580580 thrust::detail::integral_constant<cub::GridMappingStrategy, cub::GRID_MAPPING_DYNAMIC>)
581581 {
582582 typedef is_true<ATTEMPT_VECTORIZATION> attempt_vec;
@@ -643,8 +643,8 @@ namespace __reduce {
643643 THRUST_AGENT_ENTRY (InputIt input_it,
644644 OutputIt output_it,
645645 Size num_items,
646- cub::GridEvenShare<GridSizeType > even_share,
647- cub::GridQueue<GridSizeType > queue,
646+ cub::GridEvenShare<Size > even_share,
647+ cub::GridQueue<UnsignedSize > queue,
648648 ReductionOp reduction_op,
649649 char * shmem)
650650 {
@@ -664,6 +664,8 @@ namespace __reduce {
664664 template <class Size >
665665 struct DrainAgent
666666 {
667+ typedef typename detail::make_unsigned_special<Size>::type UnsignedSize;
668+
667669 template <class Arch >
668670 struct PtxPlan : PtxPolicy<1 > {};
669671 typedef core::specialize_plan<PtxPlan> ptx_plan;
@@ -672,7 +674,7 @@ namespace __reduce {
672674 // Agent entry point
673675 // ---------------------------------------------------------------------
674676
675- THRUST_AGENT_ENTRY (cub::GridQueue<GridSizeType > grid_queue,
677+ THRUST_AGENT_ENTRY (cub::GridQueue<UnsignedSize > grid_queue,
676678 Size num_items,
677679 char * /* shmem*/ )
678680 {
@@ -702,6 +704,8 @@ namespace __reduce {
702704 using core::get_agent_plan;
703705 using core::cuda_optional;
704706
707+ typedef typename detail::make_unsigned_special<Size>::type UnsignedSize;
708+
705709 if (num_items == 0 )
706710 return cudaErrorNotSupported;
707711
@@ -742,8 +746,8 @@ namespace __reduce {
742746 template get_max_blocks_per_sm<InputIt,
743747 OutputIt,
744748 Size,
745- cub::GridEvenShare<GridSizeType >,
746- cub::GridQueue<GridSizeType >,
749+ cub::GridEvenShare<Size >,
750+ cub::GridQueue<UnsignedSize >,
747751 ReductionOp>(reduce_plan);
748752 CUDA_CUB_RET_IF_FAIL (max_blocks_per_sm.status ());
749753
@@ -754,7 +758,7 @@ namespace __reduce {
754758 int sm_oversubscription = 5 ;
755759 int max_blocks = reduce_device_occupancy * sm_oversubscription;
756760
757- cub::GridEvenShare<GridSizeType > even_share;
761+ cub::GridEvenShare<Size > even_share;
758762 even_share.DispatchInit (static_cast <int >(num_items), max_blocks,
759763 reduce_plan.items_per_tile );
760764
@@ -769,7 +773,7 @@ namespace __reduce {
769773 size_t allocation_sizes[3 ] =
770774 {
771775 max_blocks * sizeof (T), // bytes needed for privatized block reductions
772- cub::GridQueue<GridSizeType >::AllocationSize (), // bytes needed for grid queue descriptor0
776+ cub::GridQueue<UnsignedSize >::AllocationSize (), // bytes needed for grid queue descriptor0
773777 vshmem_size // size of virtualized shared memory storage
774778 };
775779 status = cub::AliasTemporaries (d_temp_storage,
@@ -783,7 +787,7 @@ namespace __reduce {
783787 }
784788
785789 T *d_block_reductions = (T*) allocations[0 ];
786- cub::GridQueue<GridSizeType > queue (allocations[1 ]);
790+ cub::GridQueue<UnsignedSize > queue (allocations[1 ]);
787791 char *vshmem_ptr = vshmem_size > 0 ? (char *)allocations[2 ] : NULL ;
788792
789793
0 commit comments