Skip to content

Commit 1865104

Browse files
authored
Merge pull request NVIDIA#1442 from senior-zero/main-feature/github/thrust_merge_sort_comparisons_count_reduction/1436
Reduce comparisons count in merge sort
2 parents 6cf65fc + 403829e commit 1865104

1 file changed

Lines changed: 14 additions & 10 deletions

File tree

  • thrust/system/cuda/detail

thrust/system/cuda/detail/sort.h

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,7 @@ namespace __merge_sort {
334334
// Parallel thread block merge sort
335335
//---------------------------------------------------------------------
336336

337+
template <bool IS_LAST_TILE>
337338
THRUST_DEVICE_FUNCTION void
338339
block_mergesort(int tid,
339340
int count,
@@ -343,9 +344,12 @@ namespace __merge_sort {
343344
using core::uninitialized_array;
344345
using core::sync_threadblock;
345346

346-
// stable sort items in a single thread
347+
// if first element of thread is in input range, stable sort items
347348
//
348-
stable_odd_even_sort(keys_loc,items_loc);
349+
if (!IS_LAST_TILE || ITEMS_PER_THREAD * tid < count)
350+
{
351+
stable_odd_even_sort(keys_loc, items_loc);
352+
}
349353

350354
// each thread has sorted keys_loc
351355
// merge sort keys_loc in shared memory
@@ -499,17 +503,17 @@ namespace __merge_sort {
499503

500504
if (IS_LAST_TILE)
501505
{
502-
block_mergesort(tid,
503-
num_remaining,
504-
keys_loc,
505-
items_loc);
506+
block_mergesort<IS_LAST_TILE>(tid,
507+
num_remaining,
508+
keys_loc,
509+
items_loc);
506510
}
507511
else
508512
{
509-
block_mergesort(tid,
510-
ITEMS_PER_TILE,
511-
keys_loc,
512-
items_loc);
513+
block_mergesort<IS_LAST_TILE>(tid,
514+
ITEMS_PER_TILE,
515+
keys_loc,
516+
items_loc);
513517
}
514518

515519
sync_threadblock();

0 commit comments

Comments
 (0)