Skip to content

Commit 5c71c8c

Browse files
gc: implement slot-based bitmap indexing with division magic
Replace the BASE_SLOT_SIZE-granularity bitmap scheme with slot-based indexing where each bit represents one slot regardless of size. Key changes: - Add slot_div_magic field to heap_page for fast division - Use Go-inspired formula: slot_index = (offset * div_magic) >> 32 - Update all bitmap iteration to use one-bit-per-slot scheme - Remove slot_bits_mask from rb_heap_t (no longer needed) This enables arbitrary slot sizes (not just power-of-two multiples of BASE_SLOT_SIZE) by decoupling bitmap indexing from slot size. Functions updated: - gc_sweep_plane/gc_sweep_page - rgengc_rememberset_mark/rgengc_rememberset_mark_plane - gc_marks_wb_unprotected_objects/gc_marks_wb_unprotected_objects_plane - gc_compact_plane/gc_compact_page - invalidate_moved_plane/invalidate_moved_page - RVALUE_AGE_GET/RVALUE_AGE_SET_BITMAP Inspired by Go runtime's mbitmap.go divideByElemSize().
1 parent c9a4941 commit 5c71c8c

1 file changed

Lines changed: 78 additions & 103 deletions

File tree

gc/default/default.c

Lines changed: 78 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -452,7 +452,6 @@ typedef int (*gc_compact_compare_func)(const void *l, const void *r, void *d);
452452

453453
typedef struct rb_heap_struct {
454454
short slot_size;
455-
bits_t slot_bits_mask;
456455

457456
/* Basic statistics */
458457
size_t total_allocated_pages;
@@ -765,6 +764,7 @@ struct free_slot {
765764

766765
struct heap_page {
767766
unsigned short slot_size;
767+
uint32_t slot_div_magic;
768768
unsigned short total_slots;
769769
unsigned short free_slots;
770770
unsigned short final_slots;
@@ -841,17 +841,33 @@ heap_page_in_global_empty_pages_pool(rb_objspace_t *objspace, struct heap_page *
841841
#define GET_PAGE_HEADER(x) (&GET_PAGE_BODY(x)->header)
842842
#define GET_HEAP_PAGE(x) (GET_PAGE_HEADER(x)->page)
843843

844-
#define NUM_IN_PAGE(p) (((bits_t)(p) & HEAP_PAGE_ALIGN_MASK) / BASE_SLOT_SIZE)
845-
#define BITMAP_INDEX(p) (NUM_IN_PAGE(p) / BITS_BITLENGTH )
846-
#define BITMAP_OFFSET(p) (NUM_IN_PAGE(p) & (BITS_BITLENGTH-1))
847-
#define BITMAP_BIT(p) ((bits_t)1 << BITMAP_OFFSET(p))
844+
static inline uint32_t
845+
compute_slot_div_magic(unsigned short slot_size)
846+
{
847+
return (uint32_t)(UINT32_MAX / slot_size) + 1;
848+
}
849+
850+
static inline size_t
851+
slot_index_for_offset(size_t offset, uint32_t div_magic)
852+
{
853+
return (size_t)(((uint64_t)offset * div_magic) >> 32);
854+
}
855+
856+
#define SLOT_INDEX(page, p) slot_index_for_offset((uintptr_t)(p) - (page)->start, (page)->slot_div_magic)
857+
#define SLOT_BITMAP_INDEX(page, p) (SLOT_INDEX(page, p) / BITS_BITLENGTH)
858+
#define SLOT_BITMAP_OFFSET(page, p) (SLOT_INDEX(page, p) & (BITS_BITLENGTH - 1))
859+
#define SLOT_BITMAP_BIT(page, p) ((bits_t)1 << SLOT_BITMAP_OFFSET(page, p))
860+
861+
#define _MARKED_IN_BITMAP(bits, page, p) ((bits)[SLOT_BITMAP_INDEX(page, p)] & SLOT_BITMAP_BIT(page, p))
862+
#define _MARK_IN_BITMAP(bits, page, p) ((bits)[SLOT_BITMAP_INDEX(page, p)] |= SLOT_BITMAP_BIT(page, p))
863+
#define _CLEAR_IN_BITMAP(bits, page, p) ((bits)[SLOT_BITMAP_INDEX(page, p)] &= ~SLOT_BITMAP_BIT(page, p))
848864

849-
/* Bitmap Operations */
850-
#define MARKED_IN_BITMAP(bits, p) ((bits)[BITMAP_INDEX(p)] & BITMAP_BIT(p))
851-
#define MARK_IN_BITMAP(bits, p) ((bits)[BITMAP_INDEX(p)] = (bits)[BITMAP_INDEX(p)] | BITMAP_BIT(p))
852-
#define CLEAR_IN_BITMAP(bits, p) ((bits)[BITMAP_INDEX(p)] = (bits)[BITMAP_INDEX(p)] & ~BITMAP_BIT(p))
865+
#define MARKED_IN_BITMAP(bits, p) _MARKED_IN_BITMAP(bits, GET_HEAP_PAGE(p), p)
866+
#define MARK_IN_BITMAP(bits, p) _MARK_IN_BITMAP(bits, GET_HEAP_PAGE(p), p)
867+
#define CLEAR_IN_BITMAP(bits, p) _CLEAR_IN_BITMAP(bits, GET_HEAP_PAGE(p), p)
868+
869+
#define NUM_IN_PAGE(p) (((bits_t)(p) & HEAP_PAGE_ALIGN_MASK) / BASE_SLOT_SIZE)
853870

854-
/* getting bitmap */
855871
#define GET_HEAP_MARK_BITS(x) (&GET_HEAP_PAGE(x)->mark_bits[0])
856872
#define GET_HEAP_PINNED_BITS(x) (&GET_HEAP_PAGE(x)->pinned_bits[0])
857873
#define GET_HEAP_UNCOLLECTIBLE_BITS(x) (&GET_HEAP_PAGE(x)->uncollectible_bits[0])
@@ -861,9 +877,11 @@ heap_page_in_global_empty_pages_pool(rb_objspace_t *objspace, struct heap_page *
861877
static int
862878
RVALUE_AGE_GET(VALUE obj)
863879
{
864-
bits_t *age_bits = GET_HEAP_PAGE(obj)->age_bits;
865-
int idx = BITMAP_INDEX(obj) * 2;
866-
int shift = BITMAP_OFFSET(obj);
880+
struct heap_page *page = GET_HEAP_PAGE(obj);
881+
bits_t *age_bits = page->age_bits;
882+
size_t slot_idx = SLOT_INDEX(page, obj);
883+
size_t idx = (slot_idx / BITS_BITLENGTH) * 2;
884+
int shift = (int)(slot_idx & (BITS_BITLENGTH - 1));
867885
int lo = (age_bits[idx] >> shift) & 1;
868886
int hi = (age_bits[idx + 1] >> shift) & 1;
869887
return lo | (hi << 1);
@@ -873,9 +891,11 @@ static void
873891
RVALUE_AGE_SET_BITMAP(VALUE obj, int age)
874892
{
875893
RUBY_ASSERT(age <= RVALUE_OLD_AGE);
876-
bits_t *age_bits = GET_HEAP_PAGE(obj)->age_bits;
877-
int idx = BITMAP_INDEX(obj) * 2;
878-
int shift = BITMAP_OFFSET(obj);
894+
struct heap_page *page = GET_HEAP_PAGE(obj);
895+
bits_t *age_bits = page->age_bits;
896+
size_t slot_idx = SLOT_INDEX(page, obj);
897+
size_t idx = (slot_idx / BITS_BITLENGTH) * 2;
898+
int shift = (int)(slot_idx & (BITS_BITLENGTH - 1));
879899
bits_t mask = (bits_t)1 << shift;
880900

881901
age_bits[idx] = (age_bits[idx] & ~mask) | ((bits_t)(age & 1) << shift);
@@ -1986,6 +2006,7 @@ heap_add_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page)
19862006
page->start = start;
19872007
page->total_slots = slot_count;
19882008
page->slot_size = heap->slot_size;
2009+
page->slot_div_magic = compute_slot_div_magic(heap->slot_size);
19892010
page->heap = heap;
19902011

19912012
asan_unlock_freelist(page);
@@ -2598,7 +2619,7 @@ is_pointer_to_heap(rb_objspace_t *objspace, const void *ptr)
25982619
else {
25992620
if (p < page->start) return FALSE;
26002621
if (p >= page->start + (page->total_slots * page->slot_size)) return FALSE;
2601-
if ((NUM_IN_PAGE(p) * BASE_SLOT_SIZE) % page->slot_size != 0) return FALSE;
2622+
if ((p - page->start) % page->slot_size != 0) return FALSE;
26022623

26032624
return TRUE;
26042625
}
@@ -3489,8 +3510,6 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit
34893510
{
34903511
struct heap_page *sweep_page = ctx->page;
34913512
short slot_size = sweep_page->slot_size;
3492-
short slot_bits = slot_size / BASE_SLOT_SIZE;
3493-
GC_ASSERT(slot_bits > 0);
34943513

34953514
do {
34963515
VALUE vp = (VALUE)p;
@@ -3566,7 +3585,7 @@ gc_sweep_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bit
35663585
}
35673586
}
35683587
p += slot_size;
3569-
bitset >>= slot_bits;
3588+
bitset >>= 1;
35703589
} while (bitset);
35713590
}
35723591

@@ -3591,50 +3610,33 @@ gc_sweep_page(rb_objspace_t *objspace, rb_heap_t *heap, struct gc_sweep_context
35913610

35923611
p = (uintptr_t)sweep_page->start;
35933612
bits = sweep_page->mark_bits;
3613+
short slot_size = sweep_page->slot_size;
3614+
int total_slots = sweep_page->total_slots;
3615+
int bitmap_plane_count = CEILDIV(total_slots, BITS_BITLENGTH);
35943616

3595-
int page_rvalue_count = sweep_page->total_slots * (sweep_page->slot_size / BASE_SLOT_SIZE);
3596-
int out_of_range_bits = (NUM_IN_PAGE(p) + page_rvalue_count) % BITS_BITLENGTH;
3597-
if (out_of_range_bits != 0) { // sizeof(RVALUE) == 64
3598-
bits[BITMAP_INDEX(p) + page_rvalue_count / BITS_BITLENGTH] |= ~(((bits_t)1 << out_of_range_bits) - 1);
3617+
int out_of_range_bits = total_slots % BITS_BITLENGTH;
3618+
if (out_of_range_bits != 0) {
3619+
bits[bitmap_plane_count - 1] |= ~(((bits_t)1 << out_of_range_bits) - 1);
35993620
}
36003621

3601-
/* The last bitmap plane may not be used if the last plane does not
3602-
* have enough space for the slot_size. In that case, the last plane must
3603-
* be skipped since none of the bits will be set. */
3604-
int bitmap_plane_count = CEILDIV(NUM_IN_PAGE(p) + page_rvalue_count, BITS_BITLENGTH);
3605-
GC_ASSERT(bitmap_plane_count == HEAP_PAGE_BITMAP_LIMIT - 1 ||
3606-
bitmap_plane_count == HEAP_PAGE_BITMAP_LIMIT);
3607-
3608-
bits_t slot_mask = heap->slot_bits_mask;
3609-
36103622
// Clear wb_unprotected and age bits for all unmarked slots
36113623
{
36123624
bits_t *wb_unprotected_bits = sweep_page->wb_unprotected_bits;
36133625
bits_t *age_bits = sweep_page->age_bits;
36143626
for (int i = 0; i < bitmap_plane_count; i++) {
3615-
bits_t unmarked = ~bits[i] & slot_mask;
3627+
bits_t unmarked = ~bits[i];
36163628
wb_unprotected_bits[i] &= ~unmarked;
36173629
age_bits[i * 2] &= ~unmarked;
36183630
age_bits[i * 2 + 1] &= ~unmarked;
36193631
}
36203632
}
36213633

3622-
// Skip out of range slots at the head of the page
3623-
bitset = ~bits[0];
3624-
bitset >>= NUM_IN_PAGE(p);
3625-
bitset &= slot_mask;
3626-
if (bitset) {
3627-
gc_sweep_plane(objspace, heap, p, bitset, ctx);
3628-
}
3629-
p += (BITS_BITLENGTH - NUM_IN_PAGE(p)) * BASE_SLOT_SIZE;
3630-
3631-
for (int i = 1; i < bitmap_plane_count; i++) {
3634+
for (int i = 0; i < bitmap_plane_count; i++) {
36323635
bitset = ~bits[i];
3633-
bitset &= slot_mask;
36343636
if (bitset) {
36353637
gc_sweep_plane(objspace, heap, p, bitset, ctx);
36363638
}
3637-
p += BITS_BITLENGTH * BASE_SLOT_SIZE;
3639+
p += BITS_BITLENGTH * slot_size;
36383640
}
36393641

36403642
if (!heap->compact_cursor) {
@@ -4086,7 +4088,7 @@ invalidate_moved_plane(rb_objspace_t *objspace, struct heap_page *page, uintptr_
40864088
GC_ASSERT(BUILTIN_TYPE(forwarding_object) != T_NONE);
40874089
}
40884090
}
4089-
p += BASE_SLOT_SIZE;
4091+
p += page->slot_size;
40904092
bitset >>= 1;
40914093
} while (bitset);
40924094
}
@@ -4098,25 +4100,21 @@ invalidate_moved_page(rb_objspace_t *objspace, struct heap_page *page)
40984100
int i;
40994101
bits_t *mark_bits, *pin_bits;
41004102
bits_t bitset;
4103+
short slot_size = page->slot_size;
4104+
int total_slots = page->total_slots;
4105+
int bitmap_plane_count = CEILDIV(total_slots, BITS_BITLENGTH);
41014106

41024107
mark_bits = page->mark_bits;
41034108
pin_bits = page->pinned_bits;
41044109

41054110
uintptr_t p = page->start;
41064111

4107-
// Skip out of range slots at the head of the page
4108-
bitset = pin_bits[0] & ~mark_bits[0];
4109-
bitset >>= NUM_IN_PAGE(p);
4110-
invalidate_moved_plane(objspace, page, p, bitset);
4111-
p += (BITS_BITLENGTH - NUM_IN_PAGE(p)) * BASE_SLOT_SIZE;
4112-
4113-
for (i=1; i < HEAP_PAGE_BITMAP_LIMIT; i++) {
4112+
for (i=0; i < bitmap_plane_count; i++) {
41144113
/* Moved objects are pinned but never marked. We reuse the pin bits
41154114
* to indicate there is a moved object in this slot. */
41164115
bitset = pin_bits[i] & ~mark_bits[i];
4117-
41184116
invalidate_moved_plane(objspace, page, p, bitset);
4119-
p += BITS_BITLENGTH * BASE_SLOT_SIZE;
4117+
p += BITS_BITLENGTH * slot_size;
41204118
}
41214119
}
41224120
#endif
@@ -5310,7 +5308,7 @@ gc_remember_unprotected(rb_objspace_t *objspace, VALUE obj)
53105308
}
53115309

53125310
static inline void
5313-
gc_marks_wb_unprotected_objects_plane(rb_objspace_t *objspace, uintptr_t p, bits_t bits)
5311+
gc_marks_wb_unprotected_objects_plane(rb_objspace_t *objspace, uintptr_t p, bits_t bits, short slot_size)
53145312
{
53155313
if (bits) {
53165314
do {
@@ -5320,7 +5318,7 @@ gc_marks_wb_unprotected_objects_plane(rb_objspace_t *objspace, uintptr_t p, bits
53205318
GC_ASSERT(RVALUE_MARKED(objspace, (VALUE)p));
53215319
gc_mark_children(objspace, (VALUE)p);
53225320
}
5323-
p += BASE_SLOT_SIZE;
5321+
p += slot_size;
53245322
bits >>= 1;
53255323
} while (bits);
53265324
}
@@ -5335,18 +5333,15 @@ gc_marks_wb_unprotected_objects(rb_objspace_t *objspace, rb_heap_t *heap)
53355333
bits_t *mark_bits = page->mark_bits;
53365334
bits_t *wbun_bits = page->wb_unprotected_bits;
53375335
uintptr_t p = page->start;
5336+
short slot_size = page->slot_size;
5337+
int total_slots = page->total_slots;
5338+
int bitmap_plane_count = CEILDIV(total_slots, BITS_BITLENGTH);
53385339
size_t j;
53395340

5340-
bits_t bits = mark_bits[0] & wbun_bits[0];
5341-
bits >>= NUM_IN_PAGE(p);
5342-
gc_marks_wb_unprotected_objects_plane(objspace, p, bits);
5343-
p += (BITS_BITLENGTH - NUM_IN_PAGE(p)) * BASE_SLOT_SIZE;
5344-
5345-
for (j=1; j<HEAP_PAGE_BITMAP_LIMIT; j++) {
5341+
for (j=0; j<(size_t)bitmap_plane_count; j++) {
53465342
bits_t bits = mark_bits[j] & wbun_bits[j];
5347-
5348-
gc_marks_wb_unprotected_objects_plane(objspace, p, bits);
5349-
p += BITS_BITLENGTH * BASE_SLOT_SIZE;
5343+
gc_marks_wb_unprotected_objects_plane(objspace, p, bits, slot_size);
5344+
p += BITS_BITLENGTH * slot_size;
53505345
}
53515346
}
53525347

@@ -5601,8 +5596,6 @@ static bool
56015596
gc_compact_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t bitset, struct heap_page *page)
56025597
{
56035598
short slot_size = page->slot_size;
5604-
short slot_bits = slot_size / BASE_SLOT_SIZE;
5605-
GC_ASSERT(slot_bits > 0);
56065599

56075600
do {
56085601
VALUE vp = (VALUE)p;
@@ -5619,7 +5612,7 @@ gc_compact_plane(rb_objspace_t *objspace, rb_heap_t *heap, uintptr_t p, bits_t b
56195612
}
56205613
}
56215614
p += slot_size;
5622-
bitset >>= slot_bits;
5615+
bitset >>= 1;
56235616
} while (bitset);
56245617

56255618
return true;
@@ -5634,26 +5627,21 @@ gc_compact_page(rb_objspace_t *objspace, rb_heap_t *heap, struct heap_page *page
56345627
bits_t *mark_bits, *pin_bits;
56355628
bits_t bitset;
56365629
uintptr_t p = page->start;
5630+
short slot_size = page->slot_size;
5631+
int total_slots = page->total_slots;
5632+
int bitmap_plane_count = CEILDIV(total_slots, BITS_BITLENGTH);
56375633

56385634
mark_bits = page->mark_bits;
56395635
pin_bits = page->pinned_bits;
56405636

5641-
// objects that can be moved are marked and not pinned
5642-
bitset = (mark_bits[0] & ~pin_bits[0]);
5643-
bitset >>= NUM_IN_PAGE(p);
5644-
if (bitset) {
5645-
if (!gc_compact_plane(objspace, heap, (uintptr_t)p, bitset, page))
5646-
return false;
5647-
}
5648-
p += (BITS_BITLENGTH - NUM_IN_PAGE(p)) * BASE_SLOT_SIZE;
5649-
5650-
for (int j = 1; j < HEAP_PAGE_BITMAP_LIMIT; j++) {
5637+
for (int j = 0; j < bitmap_plane_count; j++) {
5638+
// objects that can be moved are marked and not pinned
56515639
bitset = (mark_bits[j] & ~pin_bits[j]);
56525640
if (bitset) {
56535641
if (!gc_compact_plane(objspace, heap, (uintptr_t)p, bitset, page))
56545642
return false;
56555643
}
5656-
p += BITS_BITLENGTH * BASE_SLOT_SIZE;
5644+
p += BITS_BITLENGTH * slot_size;
56575645
}
56585646

56595647
return true;
@@ -5940,7 +5928,7 @@ rgengc_remember(rb_objspace_t *objspace, VALUE obj)
59405928
#endif
59415929

59425930
static inline void
5943-
rgengc_rememberset_mark_plane(rb_objspace_t *objspace, uintptr_t p, bits_t bitset)
5931+
rgengc_rememberset_mark_plane(rb_objspace_t *objspace, uintptr_t p, bits_t bitset, short slot_size)
59445932
{
59455933
if (bitset) {
59465934
do {
@@ -5956,7 +5944,7 @@ rgengc_rememberset_mark_plane(rb_objspace_t *objspace, uintptr_t p, bits_t bitse
59565944
rb_darray_append_without_gc(&objspace->weak_references, obj);
59575945
}
59585946
}
5959-
p += BASE_SLOT_SIZE;
5947+
p += slot_size;
59605948
bitset >>= 1;
59615949
} while (bitset);
59625950
}
@@ -5975,6 +5963,9 @@ rgengc_rememberset_mark(rb_objspace_t *objspace, rb_heap_t *heap)
59755963
ccan_list_for_each(&heap->pages, page, page_node) {
59765964
if (page->flags.has_remembered_objects | page->flags.has_uncollectible_wb_unprotected_objects) {
59775965
uintptr_t p = page->start;
5966+
short slot_size = page->slot_size;
5967+
int total_slots = page->total_slots;
5968+
int bitmap_plane_count = CEILDIV(total_slots, BITS_BITLENGTH);
59785969
bits_t bitset, bits[HEAP_PAGE_BITMAP_LIMIT];
59795970
bits_t *remembered_bits = page->remembered_bits;
59805971
bits_t *uncollectible_bits = page->uncollectible_bits;
@@ -5984,21 +5975,16 @@ rgengc_rememberset_mark(rb_objspace_t *objspace, rb_heap_t *heap)
59845975
else if (page->flags.has_remembered_objects) has_old++;
59855976
else if (page->flags.has_uncollectible_wb_unprotected_objects) has_shady++;
59865977
#endif
5987-
for (j=0; j<HEAP_PAGE_BITMAP_LIMIT; j++) {
5978+
for (j=0; j < (size_t)bitmap_plane_count; j++) {
59885979
bits[j] = remembered_bits[j] | (uncollectible_bits[j] & wb_unprotected_bits[j]);
59895980
remembered_bits[j] = 0;
59905981
}
59915982
page->flags.has_remembered_objects = FALSE;
59925983

5993-
bitset = bits[0];
5994-
bitset >>= NUM_IN_PAGE(p);
5995-
rgengc_rememberset_mark_plane(objspace, p, bitset);
5996-
p += (BITS_BITLENGTH - NUM_IN_PAGE(p)) * BASE_SLOT_SIZE;
5997-
5998-
for (j=1; j < HEAP_PAGE_BITMAP_LIMIT; j++) {
5984+
for (j=0; j < (size_t)bitmap_plane_count; j++) {
59995985
bitset = bits[j];
6000-
rgengc_rememberset_mark_plane(objspace, p, bitset);
6001-
p += BITS_BITLENGTH * BASE_SLOT_SIZE;
5986+
rgengc_rememberset_mark_plane(objspace, p, bitset, slot_size);
5987+
p += BITS_BITLENGTH * slot_size;
60025988
}
60035989
}
60045990
#if PROFILE_REMEMBERSET_MARK
@@ -9527,17 +9513,6 @@ rb_gc_impl_objspace_init(void *objspace_ptr)
95279513

95289514
heap->slot_size = (1 << i) * BASE_SLOT_SIZE;
95299515

9530-
// Bitmask with every (1 << i)th bit set, representing aligned slot positions
9531-
static const bits_t slot_bits_masks[] = {
9532-
~(bits_t)0, // i=0: every 1st bit
9533-
(bits_t)0x5555555555555555ULL, // i=1: every 2nd bit
9534-
(bits_t)0x1111111111111111ULL, // i=2: every 4th bit
9535-
(bits_t)0x0101010101010101ULL, // i=3: every 8th bit
9536-
(bits_t)0x0001000100010001ULL, // i=4: every 16th bit
9537-
};
9538-
GC_ASSERT(HEAP_COUNT == sizeof(slot_bits_masks) / sizeof(slot_bits_masks[0]));
9539-
heap->slot_bits_mask = slot_bits_masks[i];
9540-
95419516
ccan_list_head_init(&heap->pages);
95429517
}
95439518

0 commit comments

Comments
 (0)