File tree Expand file tree Collapse file tree
thrust/system/cuda/detail Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -334,6 +334,7 @@ namespace __merge_sort {
334334 // Parallel thread block merge sort
335335 // ---------------------------------------------------------------------
336336
337+ template <bool IS_LAST_TILE>
337338 THRUST_DEVICE_FUNCTION void
338339 block_mergesort (int tid,
339340 int count,
@@ -343,9 +344,12 @@ namespace __merge_sort {
343344 using core::uninitialized_array;
344345 using core::sync_threadblock;
345346
346- // stable sort items in a single thread
347+ // if first element of thread is in input range, stable sort items
347348 //
348- stable_odd_even_sort (keys_loc,items_loc);
349+ if (!IS_LAST_TILE || ITEMS_PER_THREAD * tid < count)
350+ {
351+ stable_odd_even_sort (keys_loc, items_loc);
352+ }
349353
350354 // each thread has sorted keys_loc
351355 // merge sort keys_loc in shared memory
@@ -499,17 +503,17 @@ namespace __merge_sort {
499503
500504 if (IS_LAST_TILE)
501505 {
502- block_mergesort (tid,
503- num_remaining,
504- keys_loc,
505- items_loc);
506+ block_mergesort<IS_LAST_TILE> (tid,
507+ num_remaining,
508+ keys_loc,
509+ items_loc);
506510 }
507511 else
508512 {
509- block_mergesort (tid,
510- ITEMS_PER_TILE,
511- keys_loc,
512- items_loc);
513+ block_mergesort<IS_LAST_TILE> (tid,
514+ ITEMS_PER_TILE,
515+ keys_loc,
516+ items_loc);
513517 }
514518
515519 sync_threadblock ();
You can’t perform that action at this time.
0 commit comments