@@ -92,7 +92,7 @@ namespace __binary_search {
9292 typedef typename iterator_traits<NeedlesIt>::value_type T;
9393
9494 template <class It, class CompareOp>
95- THRUST_DEVICE_FUNCTION bool
95+ THRUST_DEVICE_FUNCTION bool
9696 operator()(It begin, It end, T const& value, CompareOp comp)
9797 {
9898 HaystackIt iter = system::detail::generic::scalar::lower_bound(begin,
@@ -110,7 +110,7 @@ namespace __binary_search {
110110 class KeysIt2,
111111 class Size,
112112 class BinaryPred>
113- THRUST_DEVICE_FUNCTION Size
113+ THRUST_DEVICE_FUNCTION Size
114114 merge_path(KeysIt1 keys1,
115115 KeysIt2 keys2,
116116 Size keys1_count,
@@ -143,7 +143,7 @@ namespace __binary_search {
143143 }
144144
145145 template <class It, class T2, class CompareOp, int ITEMS_PER_THREAD>
146- THRUST_DEVICE_FUNCTION void
146+ THRUST_DEVICE_FUNCTION void
147147 serial_merge(It keys_shared,
148148 int keys1_beg,
149149 int keys2_beg,
@@ -155,7 +155,7 @@ namespace __binary_search {
155155 {
156156 int keys1_end = keys1_beg + keys1_count;
157157 int keys2_end = keys2_beg + keys2_count;
158-
158+
159159 typedef typename iterator_value<It>::type key_type;
160160
161161 key_type key1 = keys_shared[keys1_beg];
@@ -185,7 +185,6 @@ namespace __binary_search {
185185
186186 template <int _BLOCK_THREADS,
187187 int _ITEMS_PER_THREAD = 1,
188- int _MIN_BLOCKS = 1,
189188 cub::BlockLoadAlgorithm _LOAD_ALGORITHM = cub::BLOCK_LOAD_DIRECT,
190189 cub::CacheLoadModifier _LOAD_MODIFIER = cub::LOAD_LDG,
191190 cub::BlockStoreAlgorithm _STORE_ALGORITHM = cub::BLOCK_STORE_DIRECT>
@@ -195,19 +194,18 @@ namespace __binary_search {
195194 {
196195 BLOCK_THREADS = _BLOCK_THREADS,
197196 ITEMS_PER_THREAD = _ITEMS_PER_THREAD,
198- MIN_BLOCKS = _MIN_BLOCKS,
199- ITEMS_PER_TILE = _BLOCK_THREADS * _ITEMS_PER_THREAD,
197+ ITEMS_PER_TILE = _BLOCK_THREADS * _ITEMS_PER_THREAD
200198 };
201199
202200 static const cub::BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM;
203201 static const cub::CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER;
204202 static const cub::BlockStoreAlgorithm STORE_ALGORITHM = _STORE_ALGORITHM;
205203 }; // PtxPolicy
206-
204+
207205 template <class Arch, class T>
208206 struct Tuning;
209207
210- template<class T>
208+ template<class T>
211209 struct Tuning<sm30,T>
212210 {
213211 enum
@@ -218,13 +216,12 @@ namespace __binary_search {
218216
219217 typedef PtxPolicy<128,
220218 ITEMS_PER_THREAD,
221- 1,
222219 cub::BLOCK_LOAD_WARP_TRANSPOSE,
223220 cub::LOAD_LDG,
224221 cub::BLOCK_STORE_TRANSPOSE>
225222 type;
226223 };
227-
224+
228225 template<class T>
229226 struct Tuning<sm52,T>
230227 {
@@ -238,13 +235,12 @@ namespace __binary_search {
238235
239236 typedef PtxPolicy<128,
240237 ITEMS_PER_THREAD,
241- 1,
242238 cub::BLOCK_LOAD_WARP_TRANSPOSE,
243239 cub::LOAD_LDG,
244240 cub::BLOCK_STORE_WARP_TRANSPOSE>
245241 type;
246242 };
247-
243+
248244 template <class NeedlesIt,
249245 class HaystackIt,
250246 class Size,
@@ -429,7 +425,7 @@ namespace __binary_search {
429425 needle_type needles_loc[ITEMS_PER_THREAD];
430426 BlockLoadNeedles(storage.load_needles)
431427 .Load(needles_load_it + tile_base, needles_loc, num_remaining);
432-
428+
433429#ifdef BS_SIMPLE
434430
435431 result_type results_loc[ITEMS_PER_THREAD];
@@ -499,7 +495,7 @@ namespace __binary_search {
499495 needles_loc[ITEM],
500496 compare_op);
501497 }
502-
498+
503499 sync_threadblock();
504500
505501 result_type results_loc[ITEMS_PER_THREAD];
@@ -627,7 +623,7 @@ namespace __binary_search {
627623 result,
628624 compare_op,
629625 search_op);
630-
626+
631627 CUDA_CUB_RET_IF_FAIL(cudaPeekAtLastError());
632628
633629 return status;
@@ -692,7 +688,7 @@ namespace __binary_search {
692688 stream,
693689 debug_sync);
694690 cuda_cub::throw_on_error(status, "binary_search: failed on 2nt call");
695-
691+
696692 status = cuda_cub::synchronize(policy);
697693 cuda_cub::throw_on_error(status, "binary_search: failed to synchronize");
698694
0 commit comments