122122#else
123123#define psweep_debug (...) (void)0
124124#endif
125- #define PSWEEP_LOCK_STATS 1
126- #define PSWEEP_COLLECT_TIMINGS 1
125+ #define PSWEEP_LOCK_STATS 0
126+ #define PSWEEP_COLLECT_TIMINGS 0
127127
128128#ifndef GC_HEAP_FREE_SLOTS
129129#define GC_HEAP_FREE_SLOTS 4096
@@ -487,8 +487,7 @@ typedef struct rb_heap_struct {
487487 struct ccan_list_head pages ;
488488 struct heap_page * sweeping_page ; /* iterator for .pages. It always points to the next page to sweep. */
489489 struct heap_page * pre_sweeping_page ; /* Background thread is currently sweeping this page */
490- struct heap_page * swept_pages ; /* pages claimed and swept by background thread */
491- struct heap_page * latest_swept_page ; // tail of `swept_pages`
490+ struct heap_page * swept_pages ; /* lock-free Treiber stack of pages swept by background thread */
492491 struct heap_page * compact_cursor ;
493492 uintptr_t compact_cursor_index ;
494493 struct heap_page * pooled_pages ;
@@ -501,7 +500,6 @@ typedef struct rb_heap_struct {
501500 rb_atomic_t foreground_sweep_steps ; // incremented by ruby thread, checked by sweep thread
502501 rb_atomic_t background_sweep_steps ; // only incremented/checked by sweep thread
503502 rb_nativethread_cond_t sweep_page_cond ; // associated with global sweep lock
504- rb_nativethread_lock_t swept_pages_lock ;
505503 size_t pre_swept_slots_deferred ;
506504 bool is_finished_sweeping ;
507505 bool done_background_sweep ;
@@ -1087,7 +1085,6 @@ typedef struct lock_stats {
10871085} lock_stats_t ;
10881086
10891087static lock_stats_t sweep_lock_stats = {"objspace->sweep_lock" , {{0 }}, 0 };
1090- static lock_stats_t swept_pages_lock_stats = {"heap->swept_pages_lock" , {{0 }}, 0 };
10911088
10921089
10931090static lock_callsite_stats_t *
@@ -1139,9 +1136,9 @@ print_lock_stats(void)
11391136 fprintf (stderr , "%-40s %-30s %12s %12s %10s\n" , "Lock Name" , "Callsite" , "Uncontended" , "Contended" , "Ratio" );
11401137 fprintf (stderr , "%-40s %-30s %12s %12s %10s\n" , "---------" , "--------" , "-----------" , "---------" , "-----" );
11411138
1142- lock_stats_t * all_stats [] = {& sweep_lock_stats , & swept_pages_lock_stats };
1139+ lock_stats_t * all_stats [] = {& sweep_lock_stats };
11431140
1144- for (int i = 0 ; i < 2 ; i ++ ) {
1141+ for (int i = 0 ; i < 1 ; i ++ ) {
11451142 lock_stats_t * stats = all_stats [i ];
11461143
11471144 /* Sort callsites by total contentions (descending) */
@@ -4624,8 +4621,7 @@ gc_sweep_step_worker(rb_objspace_t *objspace, rb_heap_t *heap)
46244621 return ;
46254622 }
46264623 while (1 ) {
4627- try_again :
4628- struct heap_page * sweep_page = RUBY_ATOMIC_PTR_LOAD (heap -> sweeping_page );
4624+ struct heap_page * sweep_page = rbimpl_atomic_ptr_load ((void * * )& heap -> sweeping_page , RBIMPL_ATOMIC_ACQUIRE );
46294625 if (!sweep_page ) {
46304626 GC_ASSERT (!heap -> done_background_sweep );
46314627 GC_ASSERT (objspace -> heaps_done_background_sweep < HEAP_COUNT );
@@ -4647,9 +4643,9 @@ gc_sweep_step_worker(rb_objspace_t *objspace, rb_heap_t *heap)
46474643 break ;
46484644 }
46494645
4650- struct heap_page * prev = RUBY_ATOMIC_PTR_CAS ( heap -> sweeping_page , sweep_page , next );
4651- if (prev != sweep_page ) {
4652- goto try_again ;
4646+ struct heap_page * prev = rbimpl_atomic_ptr_cas (( void * * ) & heap -> sweeping_page , sweep_page , next , RBIMPL_ATOMIC_ACQ_REL , RBIMPL_ATOMIC_ACQUIRE );
4647+ if (prev != sweep_page ) { // ruby thread won the race
4648+ continue ;
46534649 }
46544650 heap -> pre_sweeping_page = sweep_page ;
46554651
@@ -4665,25 +4661,14 @@ gc_sweep_step_worker(rb_objspace_t *objspace, rb_heap_t *heap)
46654661 int pre_empty_slots = sweep_page -> pre_empty_slots ;
46664662 int free_slots = pre_freed_slots + pre_empty_slots ;
46674663
4668- #if PSWEEP_LOCK_STATS > 0
4669- instrumented_lock_acquire (& heap -> swept_pages_lock , & swept_pages_lock_stats );
4670- #else
4671- rb_native_mutex_lock (& heap -> swept_pages_lock );
4672- #endif
4664+ /* Treiber stack push: lock-free LIFO */
46734665 {
4674- if (heap -> swept_pages ) {
4675- // NOTE: heap->swept_pages needs to be in swept order for gc_sweep_step to work properly.
4676- // TODO: Change to LIFO to get better shared memory cache benefits across threads (L2/L3)
4677- struct heap_page * latest = heap -> latest_swept_page ;
4678- GC_ASSERT (latest );
4679- latest -> free_next = sweep_page ;
4680- }
4681- else {
4682- heap -> swept_pages = sweep_page ;
4683- }
4684- heap -> latest_swept_page = sweep_page ;
4666+ struct heap_page * head ;
4667+ do {
4668+ head = rbimpl_atomic_ptr_load ((void * * )& heap -> swept_pages , RBIMPL_ATOMIC_ACQUIRE );
4669+ sweep_page -> free_next = head ;
4670+ } while (rbimpl_atomic_ptr_cas ((void * * )& heap -> swept_pages , head , sweep_page , RBIMPL_ATOMIC_RELEASE , RBIMPL_ATOMIC_RELAXED ) != head );
46854671 }
4686- rb_native_mutex_unlock (& heap -> swept_pages_lock );
46874672
46884673 psweep_debug (-2 , "[sweep] gc_sweep_step_worker: heap:%p (%ld) - swept page:%p\n" , heap , heap - heaps , sweep_page );
46894674
@@ -4775,7 +4760,6 @@ gc_sweep_start_heap(rb_objspace_t *objspace, rb_heap_t *heap)
47754760 heap -> free_pages = NULL ;
47764761 heap -> swept_pages = NULL ;
47774762 heap -> pooled_pages = NULL ;
4778- heap -> latest_swept_page = NULL ;
47794763 heap -> pre_swept_slots_deferred = 0 ;
47804764#if RUBY_DEBUG
47814765 heap -> made_zombies = 0 ;
@@ -5044,49 +5028,47 @@ gc_sweep_dequeue_page(rb_objspace_t *objspace, rb_heap_t *heap, bool free_in_use
50445028
50455029 struct heap_page * page = NULL ;
50465030
5047- // Avoid taking the global sweep_lock if we can
5048- #if PSWEEP_LOCK_STATS > 0
5049- instrumented_lock_acquire (& heap -> swept_pages_lock , & swept_pages_lock_stats );
5050- #else
5051- rb_native_mutex_lock (& heap -> swept_pages_lock );
5052- #endif
5031+ /* Treiber stack pop: try lock-free swept_pages first, then sweeping_page */
50535032 {
5054- if (heap -> swept_pages ) {
5055- page = heap -> swept_pages ;
5056- psweep_debug (0 , "[gc] gc_sweep_dequeue_page: got page:%p from heap(%p)->swept_pages (swept_pages lock) (heap %ld)\n" , page , heap , heap - heaps );
5057- heap -> swept_pages = page -> free_next ;
5058- } else if (!(page = RUBY_ATOMIC_PTR_LOAD (heap -> sweeping_page ))) {
5033+ struct heap_page * head ;
5034+ do {
5035+ head = rbimpl_atomic_ptr_load ((void * * )& heap -> swept_pages , RBIMPL_ATOMIC_RELAXED );
5036+ if (!head ) break ;
5037+ } while (rbimpl_atomic_ptr_cas ((void * * )& heap -> swept_pages , head , head -> free_next , RBIMPL_ATOMIC_ACQUIRE , RBIMPL_ATOMIC_RELAXED ) != head );
5038+ if (head ) {
5039+ page = head ;
5040+ psweep_debug (0 , "[gc] gc_sweep_dequeue_page: got page:%p from heap(%p)->swept_pages (lock-free) (heap %ld)\n" , page , heap , heap - heaps );
5041+ }
5042+ else if (!RB_LIKELY ((page = rbimpl_atomic_ptr_load ((void * * )& heap -> sweeping_page , RBIMPL_ATOMIC_ACQUIRE )))) {
50595043 }
50605044 else {
50615045 while (page ) {
50625046 struct heap_page * next = ccan_list_next (& heap -> pages , page , page_node );
5063- struct heap_page * prev = RUBY_ATOMIC_PTR_CAS ( heap -> sweeping_page , page , next );
5047+ struct heap_page * prev = rbimpl_atomic_ptr_cas (( void * * ) & heap -> sweeping_page , page , next , RBIMPL_ATOMIC_ACQ_REL , RBIMPL_ATOMIC_ACQUIRE );
50645048 if (prev == page ) {
50655049 * dequeued_unswept_page = true;
50665050 break ;
50675051 }
5068- else if (prev == NULL ) {
5052+ else if (RB_UNLIKELY ( prev == NULL ) ) {
50695053 page = NULL ;
50705054 break ;
50715055 }
5072- page = RUBY_ATOMIC_PTR_LOAD ( heap -> sweeping_page );
5056+ page = rbimpl_atomic_ptr_load (( void * * ) & heap -> sweeping_page , RBIMPL_ATOMIC_RELAXED );
50735057 }
50745058 psweep_debug (0 , "[gc] gc_sweep_dequeue_page: dequeued unswept page from heap(%p) (heap %ld)\n" , heap , heap - heaps );
50755059 }
50765060 }
5077- rb_native_mutex_unlock (& heap -> swept_pages_lock );
50785061 if (page ) return page ;
50795062
50805063 sweep_lock_lock (& objspace -> sweep_lock );
50815064 {
50825065 GC_ASSERT (!objspace -> background_sweep_mode );
50835066 retry_swept_pages :
5084- if (heap -> swept_pages ) { // grab the earliest page that the sweep thread swept (ie: it dequeues in swept order)
5067+ if (heap -> swept_pages ) {
50855068 page = heap -> swept_pages ;
5086- psweep_debug (0 , "[gc] gc_sweep_dequeue_page: got page:%p from heap(%p)->swept_pages (sweep_lock) (heap %ld)\n" , page , heap , heap - heaps );
50875069 heap -> swept_pages = page -> free_next ;
50885070 }
5089- else if (!RUBY_ATOMIC_PTR_LOAD ( heap -> sweeping_page )) { // This heap is finished
5071+ else if (!rbimpl_atomic_ptr_load (( void * * ) & heap -> sweeping_page , RBIMPL_ATOMIC_ACQUIRE )) { // This heap is finished
50905072 while (heap -> pre_sweeping_page ) {
50915073 sweep_lock_set_unlocked ();
50925074 rb_native_cond_wait (& heap -> sweep_page_cond , & objspace -> sweep_lock );
@@ -11057,7 +11039,6 @@ rb_gc_impl_objspace_free(void *objspace_ptr)
1105711039
1105811040 for (int i = 0 ; i < HEAP_COUNT ; i ++ ) {
1105911041 rb_heap_t * heap = & heaps [i ];
11060- rb_native_mutex_destroy (& heap -> swept_pages_lock );
1106111042 rb_native_cond_destroy (& heap -> sweep_page_cond );
1106211043 heap -> total_pages = 0 ;
1106311044 heap -> total_slots = 0 ;
@@ -11136,7 +11117,6 @@ rb_gc_impl_after_fork(void *objspace_ptr, rb_pid_t pid)
1113611117 for (int i = 0 ; i < HEAP_COUNT ; i ++ ) {
1113711118 rb_heap_t * heap = & heaps [i ];
1113811119
11139- rb_native_mutex_initialize (& heap -> swept_pages_lock );
1114011120 rb_native_cond_initialize (& heap -> sweep_page_cond );
1114111121 heap -> pre_sweeping_page = NULL ;
1114211122 heap -> background_sweep_steps = heap -> foreground_sweep_steps ;
@@ -11248,7 +11228,6 @@ rb_gc_impl_objspace_init(void *objspace_ptr)
1124811228 slot_div_magics [i ] = (uint32_t )((uint64_t )UINT32_MAX / heap -> slot_size + 1 );
1124911229
1125011230 ccan_list_head_init (& heap -> pages );
11251- rb_native_mutex_initialize (& heap -> swept_pages_lock );
1125211231 rb_native_cond_initialize (& heap -> sweep_page_cond );
1125311232 }
1125411233
0 commit comments