Skip to content

Commit 07501e3

Browse files
authored
Merge pull request #88 from struct/5_19_2022_perf_formatting
5 19 2022 perf formatting
2 parents 7261d22 + e1ac190 commit 07501e3

7 files changed

Lines changed: 37 additions & 59 deletions

File tree

.cirrus.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ freebsd_instance:
99
task:
1010
name: testsuite-freebsd-amd64
1111
install_script:
12-
- pkg install bash gmake
12+
- pkg install -y bash gmake
1313
script:
1414
- gmake tests
1515
- gmake cpp_tests

include/conf.h

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,6 @@
4949
* of its current chunks are free */
5050
#define ZONE_ALLOC_RETIRE 32
5151

52-
/* The size of our bit slot freelist */
53-
#define BIT_SLOT_CACHE_SZ 255
54-
5552
/* This byte value will overwrite the contents
5653
* of all free'd user chunks if -DSANITIZE_CHUNKS
5754
* is enabled in the Makefile */
@@ -60,7 +57,7 @@
6057
/* See PERFORMANCE.md for notes on huge page sizes.
6158
* If your system uses a non-default value for huge
6259
* page sizes you will need to adjust that here */
63-
#if (__linux__ && MAP_HUGETLB) || (__APPLE__ && VM_FLAGS_SUPERPAGE_SIZE_2MB) || (__FreeBSD__ && MAP_HUGETLB) && HUGE_PAGES
60+
#if(__linux__ && MAP_HUGETLB) || (__APPLE__ && VM_FLAGS_SUPERPAGE_SIZE_2MB) || (__FreeBSD__ && MAP_HUGETLB) && HUGE_PAGES
6461
#define HUGE_PAGE_SZ 2097152
6562
#endif
6663

include/iso_alloc_internal.h

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -236,20 +236,17 @@ using namespace std;
236236
n &= ~(1UL << k);
237237

238238
#define ALIGN_SZ_UP(n) \
239-
((((n) + (ALIGNMENT) - 1) >> 3 ) * ALIGNMENT)
239+
((((n + ALIGNMENT) - 1) >> 3) * ALIGNMENT)
240240

241241
#define ALIGN_SZ_DOWN(n) \
242-
((((n) + (ALIGNMENT) -1) >> 3) * ALIGNMENT) - ALIGNMENT
242+
((((n + ALIGNMENT) - 1) >> 3) * ALIGNMENT) - ALIGNMENT
243243

244244
#define ROUND_UP_PAGE(n) \
245-
((((n) + (g_page_size) - 1) >> g_page_size_shift) * (g_page_size))
245+
((((n + g_page_size) - 1) >> g_page_size_shift) * (g_page_size))
246246

247247
#define ROUND_DOWN_PAGE(n) \
248248
(ROUND_UP_PAGE(n) - g_page_size)
249249

250-
#define GET_MAX_BITMASK_INDEX(zone) \
251-
(zone->bitmap_size >> 3)
252-
253250
#define MASK_ZONE_PTRS(zone) \
254251
MASK_BITMAP_PTRS(zone); \
255252
MASK_USER_PTRS(zone);
@@ -336,8 +333,7 @@ extern uint32_t g_page_size_shift;
336333
/* iso_alloc makes a number of default zones for common
337334
* allocation sizes. Allocations are 'first fit' up until
338335
* ZONE_1024 at which point a new zone is created for that
339-
* specific size request. You can create additional startup
340-
* profile by adjusting the next few lines below. */
336+
* specific size request. */
341337
#define DEFAULT_ZONE_COUNT sizeof(default_zones) >> 3
342338

343339
#define MEM_TAG_SIZE 1
@@ -351,6 +347,8 @@ typedef int64_t bitmap_index_t;
351347
typedef uint16_t zone_lookup_table_t;
352348
typedef uint16_t chunk_lookup_table_t;
353349

350+
#define BIT_SLOT_CACHE_SZ 255
351+
354352
typedef struct {
355353
void *user_pages_start; /* Start of the pages backing this zone */
356354
void *bitmap_start; /* Start of the bitmap */
@@ -363,14 +361,15 @@ typedef struct {
363361
uint64_t pointer_mask; /* Each zone has its own pointer protection secret */
364362
uint32_t chunk_size; /* Size of chunks managed by this zone */
365363
uint32_t bitmap_size; /* Size of the bitmap in bytes */
364+
bitmap_index_t max_bitmap_idx; /* Max bitmap index for this bitmap */
366365
bool internal; /* Zones can be managed by iso_alloc or private */
367366
bool is_full; /* Flags whether this zone is full to avoid bit slot searches */
368367
uint16_t index; /* Zone index */
369368
uint16_t next_sz_index; /* What is the index of the next zone of this size */
370369
uint32_t alloc_count; /* Total number of lifetime allocations */
371370
uint32_t af_count; /* Increment/Decrement with each alloc/free operation */
372371
uint32_t chunk_count; /* Total number of chunks in this zone */
373-
uint8_t chunk_size_pow2; /* Computed by _log2(chunk_size) */
372+
uint8_t chunk_size_pow2; /* Computed by _log2(chunk_size) at zone creation */
374373
#if MEMORY_TAGGING
375374
bool tagged; /* Zone supports memory tagging */
376375
#endif

src/iso_alloc.c

Lines changed: 17 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ INTERNAL_HIDDEN void create_canary_chunks(iso_alloc_zone_t *zone) {
7272
bitmap_index_t *bm = (bitmap_index_t *) zone->bitmap_start;
7373
bit_slot_t bit_slot;
7474

75-
const bitmap_index_t max_bitmap_idx = GET_MAX_BITMASK_INDEX(zone) - 1;
75+
const bitmap_index_t max_bitmap_idx = (zone->max_bitmap_idx - 1);
7676

7777
/* Roughly %1 of the chunks in this zone will become a canary */
7878
const uint64_t canary_count = (zone->chunk_count >> CANARY_COUNT_DIV);
@@ -174,7 +174,6 @@ INTERNAL_HIDDEN void _verify_all_zones(void) {
174174
INTERNAL_HIDDEN void _verify_zone(iso_alloc_zone_t *zone) {
175175
UNMASK_ZONE_PTRS(zone);
176176
const bitmap_index_t *bm = (bitmap_index_t *) zone->bitmap_start;
177-
const bitmap_index_t max_bm_idx = GET_MAX_BITMASK_INDEX(zone);
178177
bit_slot_t bit_slot;
179178

180179
if(zone->next_sz_index > _root->zones_used) {
@@ -188,7 +187,7 @@ INTERNAL_HIDDEN void _verify_zone(iso_alloc_zone_t *zone) {
188187
}
189188
}
190189

191-
for(bitmap_index_t i = 0; i < max_bm_idx; i++) {
190+
for(bitmap_index_t i = 0; i < zone->max_bitmap_idx; i++) {
192191
for(int64_t j = 1; j < BITS_PER_QWORD; j += BITS_PER_CHUNK) {
193192
/* If this bit is set it is either a free chunk or
194193
* a canary chunk. Either way it should have a set
@@ -213,18 +212,17 @@ INTERNAL_HIDDEN void _verify_zone(iso_alloc_zone_t *zone) {
213212
* find any free slots. */
214213
INTERNAL_HIDDEN INLINE void fill_free_bit_slot_cache(iso_alloc_zone_t *zone) {
215214
const bitmap_index_t *bm = (bitmap_index_t *) zone->bitmap_start;
216-
const bitmap_index_t max_bitmap_idx = GET_MAX_BITMASK_INDEX(zone);
217215

218216
/* This gives us an arbitrary spot in the bitmap to
219217
* start searching but may mean we end up with a smaller
220218
* cache. This may negatively affect performance but
221219
* leads to a less predictable free list */
222220
bitmap_index_t bm_idx;
223221

224-
/* The largest max_bitmap_idx we will ever
222+
/* The largest zone->max_bitmap_idx we will ever
225223
* have is 8192 for SMALLEST_CHUNK_SZ (16) */
226-
if(max_bitmap_idx > ALIGNMENT) {
227-
bm_idx = ((uint32_t) rand_uint64() * (max_bitmap_idx - 1) >> 32);
224+
if(zone->max_bitmap_idx > ALIGNMENT) {
225+
bm_idx = ((uint32_t) rand_uint64() * (zone->max_bitmap_idx - 1) >> 32);
228226
} else {
229227
bm_idx = 0;
230228
}
@@ -236,7 +234,7 @@ INTERNAL_HIDDEN INLINE void fill_free_bit_slot_cache(iso_alloc_zone_t *zone) {
236234
for(free_bit_slot_cache_index = 0; free_bit_slot_cache_index < BIT_SLOT_CACHE_SZ; bm_idx++) {
237235
/* Don't index outside of the bitmap or
238236
* we will return inaccurate bit slots */
239-
if(UNLIKELY(bm_idx >= max_bitmap_idx)) {
237+
if(UNLIKELY(bm_idx >= zone->max_bitmap_idx)) {
240238
zone->free_bit_slot_cache_index = free_bit_slot_cache_index;
241239
return;
242240
}
@@ -452,18 +450,11 @@ INTERNAL_HIDDEN void _unmap_zone(iso_alloc_zone_t *zone) {
452450
chunk_lookup_table[ADDR_TO_CHUNK_TABLE(zone->user_pages_start)] = 0;
453451

454452
munmap(zone->bitmap_start, zone->bitmap_size);
455-
madvise(zone->bitmap_start, zone->bitmap_size, MADV_DONTNEED);
456453
munmap(zone->bitmap_start - _root->system_page_size, _root->system_page_size);
457-
madvise(zone->bitmap_start - _root->system_page_size, _root->system_page_size, MADV_DONTNEED);
458454
munmap(zone->bitmap_start + zone->bitmap_size, _root->system_page_size);
459-
madvise(zone->bitmap_start + zone->bitmap_size, _root->system_page_size, MADV_DONTNEED);
460-
461455
munmap(zone->user_pages_start, ZONE_USER_SIZE);
462-
madvise(zone->user_pages_start, ZONE_USER_SIZE, MADV_DONTNEED);
463456
munmap(zone->user_pages_start - _root->system_page_size, _root->system_page_size);
464-
madvise(zone->user_pages_start - _root->system_page_size, _root->system_page_size, MADV_DONTNEED);
465457
munmap(zone->user_pages_start + ZONE_USER_SIZE, _root->system_page_size);
466-
madvise(zone->user_pages_start + ZONE_USER_SIZE, _root->system_page_size, MADV_DONTNEED);
467458
}
468459

469460
INTERNAL_HIDDEN void _iso_alloc_destroy_zone(iso_alloc_zone_t *zone) {
@@ -690,6 +681,7 @@ INTERNAL_HIDDEN iso_alloc_zone_t *_iso_new_zone(size_t size, bool internal) {
690681
* then we need to allocate a minimum size bitmap */
691682
uint32_t bitmap_size = (new_zone->chunk_count << BITS_PER_CHUNK_SHIFT) >> BITS_PER_BYTE_SHIFT;
692683
new_zone->bitmap_size = (bitmap_size > sizeof(bitmap_index_t)) ? bitmap_size : sizeof(bitmap_index_t);
684+
new_zone->max_bitmap_idx = new_zone->bitmap_size >> 3;
693685

694686
/* All of the following fields are immutable
695687
* and should not change once they are set */
@@ -840,10 +832,9 @@ INTERNAL_HIDDEN iso_alloc_zone_t *_iso_new_zone(size_t size, bool internal) {
840832
* looking for empty holes (i.e. slot == 0) */
841833
INTERNAL_HIDDEN bit_slot_t iso_scan_zone_free_slot(iso_alloc_zone_t *zone) {
842834
const bitmap_index_t *bm = (bitmap_index_t *) zone->bitmap_start;
843-
const bitmap_index_t max_bm_idx = GET_MAX_BITMASK_INDEX(zone);
844835

845836
/* Iterate the entire bitmap a qword at a time */
846-
for(bitmap_index_t i = 0; i < max_bm_idx; i++) {
837+
for(bitmap_index_t i = 0; i < zone->max_bitmap_idx; i++) {
847838
/* If the byte is 0 then there are some free
848839
* slots we can use at this location */
849840
if(bm[i] == 0x0) {
@@ -861,9 +852,8 @@ INTERNAL_HIDDEN bit_slot_t iso_scan_zone_free_slot(iso_alloc_zone_t *zone) {
861852
* that indicates there is at least 1 free bit slot */
862853
INTERNAL_HIDDEN bit_slot_t iso_scan_zone_free_slot_slow(iso_alloc_zone_t *zone) {
863854
const bitmap_index_t *bm = (bitmap_index_t *) zone->bitmap_start;
864-
const bitmap_index_t max_bm_idx = GET_MAX_BITMASK_INDEX(zone);
865855

866-
for(bitmap_index_t i = 0; i < max_bm_idx; i++) {
856+
for(bitmap_index_t i = 0; i < zone->max_bitmap_idx; i++) {
867857
for(int64_t j = 0; j < BITS_PER_QWORD; j += BITS_PER_CHUNK) {
868858
/* We can easily check if every bitslot represented by
869859
* this qword is allocated with or without canaries */
@@ -1175,10 +1165,6 @@ INTERNAL_HIDDEN ASSUME_ALIGNED void *_iso_alloc_bitslot_from_zone(bit_slot_t bit
11751165
void *p = POINTER_FROM_BITSLOT(zone, bitslot);
11761166
UNPOISON_ZONE_CHUNK(zone, p);
11771167

1178-
#if !ENABLE_ASAN && !DISABLE_CANARY
1179-
__builtin_prefetch(p, 1);
1180-
#endif
1181-
11821168
bitmap_index_t *bm = (bitmap_index_t *) zone->bitmap_start;
11831169

11841170
/* Read out 64 bits from the bitmap. We will write
@@ -1187,9 +1173,9 @@ INTERNAL_HIDDEN ASSUME_ALIGNED void *_iso_alloc_bitslot_from_zone(bit_slot_t bit
11871173
* which could result in a page fault */
11881174
bitmap_index_t b = bm[dwords_to_bit_slot];
11891175

1190-
if(UNLIKELY(p > zone->user_pages_start + ZONE_USER_SIZE)) {
1176+
if(UNLIKELY(p >= zone->user_pages_start + ZONE_USER_SIZE)) {
11911177
LOG_AND_ABORT("Allocating an address 0x%p from zone[%d], bit slot %lu %ld bytes %ld pages outside zones user pages 0x%p 0x%p",
1192-
p, zone->index, bitslot, p - (zone->user_pages_start + ZONE_USER_SIZE), (p - (zone->user_pages_start + ZONE_USER_SIZE)) / _root->system_page_size,
1178+
p, zone->index, bitslot, p - zone->user_pages_start + ZONE_USER_SIZE, (p - zone->user_pages_start + ZONE_USER_SIZE) / _root->system_page_size,
11931179
zone->user_pages_start, zone->user_pages_start + ZONE_USER_SIZE);
11941180
}
11951181

@@ -1257,7 +1243,7 @@ INTERNAL_HIDDEN uint8_t _iso_alloc_get_mem_tag(void *p, iso_alloc_zone_t *zone)
12571243
LOG_AND_ABORT("Chunk offset %d not an alignment of %d", chunk_offset, zone->chunk_size);
12581244
}
12591245

1260-
_mtp += (chunk_offset / zone->chunk_size);
1246+
_mtp += (chunk_offset >> zone->chunk_size_pow2);
12611247
return *_mtp;
12621248
#else
12631249
return 0;
@@ -1351,7 +1337,7 @@ INTERNAL_HIDDEN ASSUME_ALIGNED void *_iso_alloc(iso_alloc_zone_t *zone, size_t s
13511337
* thread recently used for an alloc/free operation.
13521338
* It's likely we are allocating a similar size chunk
13531339
* and this will speed up that operation */
1354-
for(int64_t i = 0; i < zone_cache_count; i++) {
1340+
for(size_t i = 0; i < zone_cache_count; i++) {
13551341
if(zone_cache[i].chunk_size >= size) {
13561342
bool fit = iso_does_zone_fit(zone_cache[i].zone, size);
13571343

@@ -1504,7 +1490,7 @@ INTERNAL_HIDDEN iso_alloc_zone_t *iso_find_zone_bitmap_range(const void *restric
15041490
iso_alloc_zone_t *tmp_zone = NULL;
15051491

15061492
/* Now we check the MRU thread zone cache */
1507-
for(int64_t i = 0; i < zone_cache_count; i++) {
1493+
for(size_t i = 0; i < zone_cache_count; i++) {
15081494
tmp_zone = zone_cache[i].zone;
15091495
bitmap_start = UNMASK_BITMAP_PTR(tmp_zone);
15101496

@@ -1548,7 +1534,7 @@ INTERNAL_HIDDEN iso_alloc_zone_t *iso_find_zone_range(const void *restrict p) {
15481534
iso_alloc_zone_t *tmp_zone = NULL;
15491535

15501536
/* Now we check the MRU thread zone cache */
1551-
for(int64_t i = 0; i < zone_cache_count; i++) {
1537+
for(size_t i = 0; i < zone_cache_count; i++) {
15521538
tmp_zone = zone_cache[i].zone;
15531539
user_pages_start = UNMASK_USER_PTR(tmp_zone);
15541540

@@ -1757,10 +1743,6 @@ INTERNAL_HIDDEN void iso_free_chunk_from_zone(iso_alloc_zone_t *zone, void *rest
17571743
/* Set the next bit so we know this chunk was used */
17581744
SET_BIT(b, (which_bit + 1));
17591745

1760-
#if !ENABLE_ASAN && (!DISABLE_CANARY || SANITIZE_CHUNKS)
1761-
__builtin_prefetch(p, 1);
1762-
#endif
1763-
17641746
/* Unset the bit and write the value into the bitmap
17651747
* if this is not a permanent free. A permanent free
17661748
* means this chunk will be marked as if it is a canary */
@@ -1850,7 +1832,7 @@ INTERNAL_HIDDEN void _iso_free(void *p, bool permanent) {
18501832
}
18511833

18521834
#if NO_ZERO_ALLOCATIONS
1853-
if(p == _zero_alloc_page) {
1835+
if(UNLIKELY(p == _zero_alloc_page)) {
18541836
return;
18551837
}
18561838
#endif
@@ -1894,7 +1876,7 @@ INTERNAL_HIDDEN void _iso_free_size(void *p, size_t size) {
18941876
}
18951877

18961878
#if NO_ZERO_ALLOCATIONS
1897-
if(p == _zero_alloc_page && size != 0) {
1879+
if(UNLIKELY(p == _zero_alloc_page && size != 0)) {
18981880
LOG_AND_ABORT("Zero sized chunk (0x%p) with non-zero (%d) size passed to free", p, size);
18991881
}
19001882

src/iso_alloc_profiler.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ INTERNAL_HIDDEN uint64_t _iso_alloc_zone_leak_detector(iso_alloc_zone_t *zone, b
104104
* canary value. If it doesn't validate then we assume
105105
* its a true leak and increment the in_use counter */
106106
bit_slot_t bit_slot = (i * BITS_PER_QWORD) + j;
107-
const void *leak = (zone->user_pages_start + ((bit_slot / BITS_PER_CHUNK) * zone->chunk_size));
107+
const void *leak = (zone->user_pages_start + ((bit_slot >> 1) * zone->chunk_size));
108108

109109
if(bit_two == 1 && (check_canary_no_abort(zone, leak) != ERR)) {
110110
continue;

src/iso_alloc_util.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,15 +48,15 @@ INTERNAL_HIDDEN void *mmap_pages(size_t size, bool populate, const char *name, i
4848
#if MAP_HUGETLB && HUGE_PAGES
4949
/* If we are allocating pages for a user zone
5050
* then take advantage of the huge TLB */
51-
if(size == ZONE_USER_SIZE || size == (ZONE_USER_SIZE / 2)) {
51+
if(size == ZONE_USER_SIZE || size == (ZONE_USER_SIZE >> 1)) {
5252
flags |= MAP_HUGETLB;
5353
}
5454
#endif
5555
#elif __APPLE__
5656
#if VM_FLAGS_SUPERPAGE_SIZE_2MB && HUGE_PAGES
5757
/* If we are allocating pages for a user zone
5858
* we are going to use the 2 MB superpage flag */
59-
if(size == ZONE_USER_SIZE || size == (ZONE_USER_SIZE / 2)) {
59+
if(size == ZONE_USER_SIZE || size == (ZONE_USER_SIZE >> 1)) {
6060
fd = VM_FLAGS_SUPERPAGE_SIZE_2MB;
6161
}
6262
#endif
@@ -70,7 +70,7 @@ INTERNAL_HIDDEN void *mmap_pages(size_t size, bool populate, const char *name, i
7070
}
7171

7272
#if __linux__ && MAP_HUGETLB && HUGE_PAGES && MADV_HUGEPAGE
73-
if(size == ZONE_USER_SIZE || size == (ZONE_USER_SIZE / 2)) {
73+
if(size == ZONE_USER_SIZE || size == (ZONE_USER_SIZE >> 1)) {
7474
madvise(p, size, MADV_HUGEPAGE);
7575
}
7676
#endif
@@ -121,10 +121,10 @@ INTERNAL_HIDDEN INLINE CONST size_t next_pow2(size_t sz) {
121121
}
122122

123123
const uint32_t _log_table[32] = {
124-
0, 9, 1, 10, 13, 21, 2, 29,
125-
11, 14, 16, 18, 22, 25, 3, 30,
126-
8, 12, 20, 28, 15, 17, 24, 7,
127-
19, 27, 23, 6, 26, 5, 4, 31};
124+
0, 9, 1, 10, 13, 21, 2, 29,
125+
11, 14, 16, 18, 22, 25, 3, 30,
126+
8, 12, 20, 28, 15, 17, 24, 7,
127+
19, 27, 23, 6, 26, 5, 4, 31};
128128

129129
/* Fast log2() implementation for 32 bit integers */
130130
INTERNAL_HIDDEN uint32_t _log2(uint32_t v) {
@@ -133,5 +133,5 @@ INTERNAL_HIDDEN uint32_t _log2(uint32_t v) {
133133
v |= v >> 4;
134134
v |= v >> 8;
135135
v |= v >> 16;
136-
return _log_table[(uint32_t)(v*0x07C4ACDD) >> 27];
136+
return _log_table[(uint32_t) (v * 0x07C4ACDD) >> 27];
137137
}

tests/alloc_fuzz.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ uint32_t allocation_sizes[] = {ZONE_16, ZONE_32, ZONE_64, ZONE_128,
1313
ZONE_256, ZONE_512, ZONE_1024,
1414
ZONE_2048, ZONE_4096, ZONE_8192,
1515
SMALL_SZ_MAX / 4, SMALL_SZ_MAX / 2,
16-
SMALL_SZ_MAX - 1, SMALL_SZ_MAX };
16+
SMALL_SZ_MAX - 1, SMALL_SZ_MAX};
1717

1818
uint32_t array_sizes[] = {16, 32, 64, 128, 256, 512, 1024, 2048};
1919

0 commit comments

Comments
 (0)