Skip to content

Commit e1664a3

Browse files
committed
Use lock-free stack for swept_pages
1 parent 0a920cb commit e1664a3

1 file changed

Lines changed: 29 additions & 50 deletions

File tree

gc/default/default.c

Lines changed: 29 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -487,8 +487,7 @@ typedef struct rb_heap_struct {
487487
struct ccan_list_head pages;
488488
struct heap_page *sweeping_page; /* iterator for .pages. It always points to the next page to sweep. */
489489
struct heap_page *pre_sweeping_page; /* Background thread is currently sweeping this page */
490-
struct heap_page *swept_pages; /* pages claimed and swept by background thread */
491-
struct heap_page *latest_swept_page; // tail of `swept_pages`
490+
struct heap_page *swept_pages; /* lock-free Treiber stack of pages swept by background thread */
492491
struct heap_page *compact_cursor;
493492
uintptr_t compact_cursor_index;
494493
struct heap_page *pooled_pages;
@@ -501,7 +500,6 @@ typedef struct rb_heap_struct {
501500
rb_atomic_t foreground_sweep_steps; // incremented by ruby thread, checked by sweep thread
502501
rb_atomic_t background_sweep_steps; // only incremented/checked by sweep thread
503502
rb_nativethread_cond_t sweep_page_cond; // associated with global sweep lock
504-
rb_nativethread_lock_t swept_pages_lock;
505503
size_t pre_swept_slots_deferred;
506504
bool is_finished_sweeping;
507505
bool done_background_sweep;
@@ -1087,7 +1085,6 @@ typedef struct lock_stats {
10871085
} lock_stats_t;
10881086

10891087
static lock_stats_t sweep_lock_stats = {"objspace->sweep_lock", {{0}}, 0};
1090-
static lock_stats_t swept_pages_lock_stats = {"heap->swept_pages_lock", {{0}}, 0};
10911088

10921089

10931090
static lock_callsite_stats_t*
@@ -1139,9 +1136,9 @@ print_lock_stats(void)
11391136
fprintf(stderr, "%-40s %-30s %12s %12s %10s\n", "Lock Name", "Callsite", "Uncontended", "Contended", "Ratio");
11401137
fprintf(stderr, "%-40s %-30s %12s %12s %10s\n", "---------", "--------", "-----------", "---------", "-----");
11411138

1142-
lock_stats_t *all_stats[] = {&sweep_lock_stats, &swept_pages_lock_stats};
1139+
lock_stats_t *all_stats[] = {&sweep_lock_stats};
11431140

1144-
for (int i = 0; i < 2; i++) {
1141+
for (int i = 0; i < 1; i++) {
11451142
lock_stats_t *stats = all_stats[i];
11461143

11471144
/* Sort callsites by total contentions (descending) */
@@ -4624,8 +4621,7 @@ gc_sweep_step_worker(rb_objspace_t *objspace, rb_heap_t *heap)
46244621
return;
46254622
}
46264623
while (1) {
4627-
try_again:
4628-
struct heap_page *sweep_page = RUBY_ATOMIC_PTR_LOAD(heap->sweeping_page);
4624+
struct heap_page *sweep_page = rbimpl_atomic_ptr_load((void **)&heap->sweeping_page, RBIMPL_ATOMIC_ACQUIRE);
46294625
if (!sweep_page) {
46304626
GC_ASSERT(!heap->done_background_sweep);
46314627
GC_ASSERT(objspace->heaps_done_background_sweep < HEAP_COUNT);
@@ -4647,9 +4643,9 @@ gc_sweep_step_worker(rb_objspace_t *objspace, rb_heap_t *heap)
46474643
break;
46484644
}
46494645

4650-
struct heap_page *prev = RUBY_ATOMIC_PTR_CAS(heap->sweeping_page, sweep_page, next);
4651-
if (prev != sweep_page) {
4652-
goto try_again;
4646+
struct heap_page *prev = rbimpl_atomic_ptr_cas((void **)&heap->sweeping_page, sweep_page, next, RBIMPL_ATOMIC_ACQ_REL, RBIMPL_ATOMIC_ACQUIRE);
4647+
if (prev != sweep_page) { // ruby thread won the race
4648+
continue;
46534649
}
46544650
heap->pre_sweeping_page = sweep_page;
46554651

@@ -4665,25 +4661,14 @@ gc_sweep_step_worker(rb_objspace_t *objspace, rb_heap_t *heap)
46654661
int pre_empty_slots = sweep_page->pre_empty_slots;
46664662
int free_slots = pre_freed_slots + pre_empty_slots;
46674663

4668-
#if PSWEEP_LOCK_STATS > 0
4669-
instrumented_lock_acquire(&heap->swept_pages_lock, &swept_pages_lock_stats);
4670-
#else
4671-
rb_native_mutex_lock(&heap->swept_pages_lock);
4672-
#endif
4664+
/* Treiber stack push: lock-free LIFO */
46734665
{
4674-
if (heap->swept_pages) {
4675-
// NOTE: heap->swept_pages needs to be in swept order for gc_sweep_step to work properly.
4676-
// TODO: Change to LIFO to get better shared memory cache benefits across threads (L2/L3)
4677-
struct heap_page *latest = heap->latest_swept_page;
4678-
GC_ASSERT(latest);
4679-
latest->free_next = sweep_page;
4680-
}
4681-
else {
4682-
heap->swept_pages = sweep_page;
4683-
}
4684-
heap->latest_swept_page = sweep_page;
4666+
struct heap_page *head;
4667+
do {
4668+
head = rbimpl_atomic_ptr_load((void **)&heap->swept_pages, RBIMPL_ATOMIC_ACQUIRE);
4669+
sweep_page->free_next = head;
4670+
} while (rbimpl_atomic_ptr_cas((void **)&heap->swept_pages, head, sweep_page, RBIMPL_ATOMIC_RELEASE, RBIMPL_ATOMIC_RELAXED) != head);
46854671
}
4686-
rb_native_mutex_unlock(&heap->swept_pages_lock);
46874672

46884673
psweep_debug(-2, "[sweep] gc_sweep_step_worker: heap:%p (%ld) - swept page:%p\n", heap, heap - heaps, sweep_page);
46894674

@@ -4775,7 +4760,6 @@ gc_sweep_start_heap(rb_objspace_t *objspace, rb_heap_t *heap)
47754760
heap->free_pages = NULL;
47764761
heap->swept_pages = NULL;
47774762
heap->pooled_pages = NULL;
4778-
heap->latest_swept_page = NULL;
47794763
heap->pre_swept_slots_deferred = 0;
47804764
#if RUBY_DEBUG
47814765
heap->made_zombies = 0;
@@ -5044,49 +5028,47 @@ gc_sweep_dequeue_page(rb_objspace_t *objspace, rb_heap_t *heap, bool free_in_use
50445028

50455029
struct heap_page *page = NULL;
50465030

5047-
// Avoid taking the global sweep_lock if we can
5048-
#if PSWEEP_LOCK_STATS > 0
5049-
instrumented_lock_acquire(&heap->swept_pages_lock, &swept_pages_lock_stats);
5050-
#else
5051-
rb_native_mutex_lock(&heap->swept_pages_lock);
5052-
#endif
5031+
/* Treiber stack pop: try lock-free swept_pages first, then sweeping_page */
50535032
{
5054-
if (heap->swept_pages) {
5055-
page = heap->swept_pages;
5056-
psweep_debug(0, "[gc] gc_sweep_dequeue_page: got page:%p from heap(%p)->swept_pages (swept_pages lock) (heap %ld)\n", page, heap, heap - heaps);
5057-
heap->swept_pages = page->free_next;
5058-
} else if (!(page = RUBY_ATOMIC_PTR_LOAD(heap->sweeping_page))) {
5033+
struct heap_page *head;
5034+
do {
5035+
head = rbimpl_atomic_ptr_load((void **)&heap->swept_pages, RBIMPL_ATOMIC_RELAXED);
5036+
if (!head) break;
5037+
} while (rbimpl_atomic_ptr_cas((void **)&heap->swept_pages, head, head->free_next, RBIMPL_ATOMIC_ACQUIRE, RBIMPL_ATOMIC_RELAXED) != head);
5038+
if (head) {
5039+
page = head;
5040+
psweep_debug(0, "[gc] gc_sweep_dequeue_page: got page:%p from heap(%p)->swept_pages (lock-free) (heap %ld)\n", page, heap, heap - heaps);
5041+
}
5042+
else if (!RB_LIKELY((page = rbimpl_atomic_ptr_load((void **)&heap->sweeping_page, RBIMPL_ATOMIC_ACQUIRE)))) {
50595043
}
50605044
else {
50615045
while (page) {
50625046
struct heap_page *next = ccan_list_next(&heap->pages, page, page_node);
5063-
struct heap_page *prev = RUBY_ATOMIC_PTR_CAS(heap->sweeping_page, page, next);
5047+
struct heap_page *prev = rbimpl_atomic_ptr_cas((void **)&heap->sweeping_page, page, next, RBIMPL_ATOMIC_ACQ_REL, RBIMPL_ATOMIC_ACQUIRE);
50645048
if (prev == page) {
50655049
*dequeued_unswept_page = true;
50665050
break;
50675051
}
5068-
else if (prev == NULL) {
5052+
else if (RB_UNLIKELY(prev == NULL)) {
50695053
page = NULL;
50705054
break;
50715055
}
5072-
page = RUBY_ATOMIC_PTR_LOAD(heap->sweeping_page);
5056+
page = rbimpl_atomic_ptr_load((void **)&heap->sweeping_page, RBIMPL_ATOMIC_RELAXED);
50735057
}
50745058
psweep_debug(0, "[gc] gc_sweep_dequeue_page: dequeued unswept page from heap(%p) (heap %ld)\n", heap, heap - heaps);
50755059
}
50765060
}
5077-
rb_native_mutex_unlock(&heap->swept_pages_lock);
50785061
if (page) return page;
50795062

50805063
sweep_lock_lock(&objspace->sweep_lock);
50815064
{
50825065
GC_ASSERT(!objspace->background_sweep_mode);
50835066
retry_swept_pages:
5084-
if (heap->swept_pages) { // grab the earliest page that the sweep thread swept (ie: it dequeues in swept order)
5067+
if (heap->swept_pages) {
50855068
page = heap->swept_pages;
5086-
psweep_debug(0, "[gc] gc_sweep_dequeue_page: got page:%p from heap(%p)->swept_pages (sweep_lock) (heap %ld)\n", page, heap, heap - heaps);
50875069
heap->swept_pages = page->free_next;
50885070
}
5089-
else if (!RUBY_ATOMIC_PTR_LOAD(heap->sweeping_page)) { // This heap is finished
5071+
else if (!rbimpl_atomic_ptr_load((void **)&heap->sweeping_page, RBIMPL_ATOMIC_ACQUIRE)) { // This heap is finished
50905072
while (heap->pre_sweeping_page) {
50915073
sweep_lock_set_unlocked();
50925074
rb_native_cond_wait(&heap->sweep_page_cond, &objspace->sweep_lock);
@@ -11057,7 +11039,6 @@ rb_gc_impl_objspace_free(void *objspace_ptr)
1105711039

1105811040
for (int i = 0; i < HEAP_COUNT; i++) {
1105911041
rb_heap_t *heap = &heaps[i];
11060-
rb_native_mutex_destroy(&heap->swept_pages_lock);
1106111042
rb_native_cond_destroy(&heap->sweep_page_cond);
1106211043
heap->total_pages = 0;
1106311044
heap->total_slots = 0;
@@ -11136,7 +11117,6 @@ rb_gc_impl_after_fork(void *objspace_ptr, rb_pid_t pid)
1113611117
for (int i = 0; i < HEAP_COUNT; i++) {
1113711118
rb_heap_t *heap = &heaps[i];
1113811119

11139-
rb_native_mutex_initialize(&heap->swept_pages_lock);
1114011120
rb_native_cond_initialize(&heap->sweep_page_cond);
1114111121
heap->pre_sweeping_page = NULL;
1114211122
heap->background_sweep_steps = heap->foreground_sweep_steps;
@@ -11248,7 +11228,6 @@ rb_gc_impl_objspace_init(void *objspace_ptr)
1124811228
slot_div_magics[i] = (uint32_t)((uint64_t)UINT32_MAX / heap->slot_size + 1);
1124911229

1125011230
ccan_list_head_init(&heap->pages);
11251-
rb_native_mutex_initialize(&heap->swept_pages_lock);
1125211231
rb_native_cond_initialize(&heap->sweep_page_cond);
1125311232
}
1125411233

0 commit comments

Comments
 (0)