Skip to content

Commit 5f36c99

Browse files
committed
add support for the zone lookup table and next_sz list
1 parent 049c12e commit 5f36c99

6 files changed

Lines changed: 125 additions & 22 deletions

File tree

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ malloc_cmp_test: clean
270270
$(CC) $(CFLAGS) $(OPTIMIZE) $(EXE_CFLAGS) $(OS_FLAGS) -DMALLOC_PERF_TEST $(ISO_ALLOC_PRINTF_SRC) tests/tests.c -o $(BUILD_DIR)/malloc_tests
271271
echo "Running IsoAlloc Performance Test"
272272
build/tests
273-
echo "Running glibc malloc Performance Test"
273+
echo "Running system malloc Performance Test"
274274
build/malloc_tests
275275

276276
## C++ Support - Build a debug version of the unit test

PERFORMANCE.md

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -80,16 +80,31 @@ The same test run on an AWS t2.xlarge Ubuntu 20.04 instance with 4 `Intel(R) Xeo
8080
```
8181
Running IsoAlloc Performance Test
8282
83-
iso_alloc/iso_free 1441616 tests completed in 0.418426 seconds
84-
iso_calloc/iso_free 1441616 tests completed in 0.578068 seconds
85-
iso_realloc/iso_free 1441616 tests completed in 0.681393 seconds
83+
iso_alloc/iso_free 1441616 tests completed in 0.147336 seconds
84+
iso_calloc/iso_free 1441616 tests completed in 0.161482 seconds
85+
iso_realloc/iso_free 1441616 tests completed in 0.244981 seconds
8686
8787
Running glibc malloc Performance Test
8888
89-
malloc/free 1441616 tests completed in 0.352161 seconds
90-
calloc/free 1441616 tests completed in 0.562425 seconds
91-
realloc/free 1441616 tests completed in 0.590622 seconds
89+
malloc/free 1441616 tests completed in 0.182437 seconds
90+
calloc/free 1441616 tests completed in 0.246065 seconds
91+
realloc/free 1441616 tests completed in 0.332292 seconds
92+
```
93+
94+
Here is the same test as above on Mac OS 11.6
95+
96+
```
97+
Running IsoAlloc Performance Test
98+
99+
iso_alloc/iso_free 1441616 tests completed in 0.124150 seconds
100+
iso_calloc/iso_free 1441616 tests completed in 0.182955 seconds
101+
iso_realloc/iso_free 1441616 tests completed in 0.275084 seconds
102+
103+
Running system malloc Performance Test
92104
105+
malloc/free 1441616 tests completed in 0.090845 seconds
106+
calloc/free 1441616 tests completed in 0.200397 seconds
107+
realloc/free 1441616 tests completed in 0.254574 seconds
93108
```
94109

95110
This same test can be used with the `perf` utility to measure basic stats like page faults and CPU utilization using both heap implementations. The output below is on the same AWS t2.xlarge instance as above.
@@ -163,10 +178,10 @@ cache-thrashN mimalloc 00.36 3356 1.44 0.00 0 229
163178
cache-thrashN tcmalloc 01.87 6880 7.42 0.00 0 1138
164179
cache-thrashN jemalloc 00.37 3760 1.46 0.00 0 296
165180
166-
redis isoalloc 9.335 71048 4.35 0.36 0 19326 ops/sec: 214227.92
167-
redis mimalloc 4.611 28932 2.13 0.20 4 6657 ops/sec: 433692.97
168-
redis tcmalloc 5.055 37088 2.37 0.19 3 8444 ops/sec: 395588.59
169-
redis jemalloc 5.150 30964 2.42 0.19 5 7024 ops/sec: 388279.50
181+
redis isoalloc 8.669 76240 4.07 0.30 1 21473 ops/sec: 230702.66, relative time: 8.669s
182+
redis mimalloc 4.555 28968 2.13 0.17 4 6655 ops/sec: 439023.69, relative time: 4.555s
183+
redis tcmalloc 4.715 37120 2.21 0.17 3 8446 ops/sec: 424108.56, relative time: 4.715s
184+
redis jemalloc 5.125 30836 2.41 0.17 0 7034 ops/sec: 390174.03, relative time: 5.125s
170185
```
171186

172187
IsoAlloc isn't quite ready for performance sensitive server workloads but it's more than fast enough for client side mobile/desktop applications with risky C/C++ attack surface.

include/iso_alloc_internal.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -268,8 +268,8 @@ using namespace std;
268268
* create. This is a completely arbitrary number but
269269
* it does correspond to the size of the _root.zones
270270
* array that lives in global memory. Currently the
271-
* iso_alloc_zone structure is roughly 1088 bytes so
272-
* this allocates 8912896 bytes (~8.5 MB) for _root */
271+
* iso_alloc_zone structure is roughly 1090 bytes so
272+
* this allocates 8929280 bytes (~8.9 MB) for _root */
273273
#define MAX_ZONES 8192
274274

275275
/* Each user allocation zone we make is 4mb in size.
@@ -296,6 +296,8 @@ using namespace std;
296296
#define BIG_ZONE_USER_PAGE_COUNT 2
297297
#define BIG_ZONE_USER_PAGE_COUNT_SHIFT 1
298298

299+
#define ZONE_LOOKUP_TABLE_SZ ((SMALL_SZ_MAX+1) * sizeof(uint16_t))
300+
299301
/* We allocate zones at startup for common sizes.
300302
* Each of these default zones is ZONE_USER_SIZE bytes
301303
* so ZONE_8192 holds less chunks than ZONE_128 for
@@ -414,6 +416,7 @@ static uint64_t default_zones[] = {ZONE_512, ZONE_512, ZONE_512, ZONE_1024};
414416

415417
typedef uint64_t bit_slot_t;
416418
typedef int64_t bitmap_index_t;
419+
typedef uint16_t zone_lookup_table_t;
417420

418421
typedef struct {
419422
void *user_pages_start; /* Start of the pages backing this zone */
@@ -430,6 +433,7 @@ typedef struct {
430433
bool internally_managed; /* Zones can be managed by iso_alloc or custom */
431434
bool is_full; /* Indicates whether this zone is full to avoid expensive free bit slot searches */
432435
uint16_t index; /* Zone index */
436+
uint16_t next_sz_index; /* What is the index of the next zone of this size */
433437
#if CPU_PIN
434438
uint8_t cpu_core; /* What CPU core this zone is pinned to */
435439
#endif

misc/commands.gdb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,5 @@ i r
44
x/i $pc
55
thread apply all bt
66
thread apply all info locals
7+
p *_root
8+
p _zone_lookup_table

src/iso_alloc.c

Lines changed: 90 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,13 @@ uint32_t g_page_size;
1717
uint32_t _default_zone_count;
1818
iso_alloc_root *_root;
1919

20+
/* Zones are linked by their next_sz_index member which
21+
* tells the allocator where in the _root->zones array
22+
* it can find the next zone that holds the same size
23+
* chunks. The lookup table helps us find the first zone
24+
* that holds a specific size in O(1) time */
25+
static zone_lookup_table_t *_zone_lookup_table;
26+
2027
#if NO_ZERO_ALLOCATIONS
2128
void *_zero_alloc_page;
2229
#endif
@@ -363,6 +370,10 @@ INTERNAL_HIDDEN void iso_alloc_initialize_global_root(void) {
363370
LOG_AND_ABORT("Could not initialize global root");
364371
}
365372

373+
/* We mlock the root or every allocation would
374+
* result in a soft page fault */
375+
mlock(&_root, sizeof(iso_alloc_root));
376+
366377
_default_zone_count = sizeof(default_zones) >> 3;
367378

368379
_root->zones_size = (MAX_ZONES * sizeof(iso_alloc_zone));
@@ -377,18 +388,17 @@ INTERNAL_HIDDEN void iso_alloc_initialize_global_root(void) {
377388
_root->zones = (void *) (p + g_page_size);
378389
name_mapping(p, _root->zones_size, "isoalloc zone metadata");
379390

391+
/* If we don't lock the zone lookup table we will incur a
392+
* soft page fault with almost every allocation */
393+
_zone_lookup_table = mmap_rw_pages(ZONE_LOOKUP_TABLE_SZ, true, NULL);
394+
mlock(&_zone_lookup_table, ZONE_LOOKUP_TABLE_SZ);
395+
380396
for(int64_t i = 0; i < _default_zone_count; i++) {
381397
if((_iso_new_zone(default_zones[i], true)) == NULL) {
382398
LOG_AND_ABORT("Failed to create a new zone");
383399
}
384400
}
385401

386-
/* This call to mlock may fail if memory limits
387-
* are set too low. This will not affect us
388-
* at runtime. It just means some of the default
389-
* root meta data may get swapped to disk */
390-
mlock(&_root, sizeof(iso_alloc_root));
391-
392402
_root->zone_handle_mask = rand_uint64();
393403
_root->big_zone_next_mask = rand_uint64();
394404
_root->big_zone_canary_secret = rand_uint64();
@@ -574,6 +584,8 @@ __attribute__((destructor(LAST_DTOR))) void iso_alloc_dtor(void) {
574584
munmap(_root, sizeof(iso_alloc_root));
575585
#endif
576586

587+
munmap(_zone_lookup_table, ZONE_LOOKUP_TABLE_SZ);
588+
577589
UNLOCK_ROOT();
578590
}
579591

@@ -600,9 +612,10 @@ INTERNAL_HIDDEN iso_alloc_zone *iso_new_zone(size_t size, bool internal) {
600612
return zone;
601613
}
602614

615+
/* Requires the root is locked */
603616
INTERNAL_HIDDEN iso_alloc_zone *_iso_new_zone(size_t size, bool internal) {
604617
if(_root->zones_used >= MAX_ZONES) {
605-
LOG_AND_ABORT("Cannot allocate additional zones");
618+
LOG_AND_ABORT("Cannot allocate additional zones. I have already allocated %d", _root->zones_used);
606619
}
607620

608621
if(size > SMALL_SZ_MAX) {
@@ -691,6 +704,36 @@ INTERNAL_HIDDEN iso_alloc_zone *_iso_new_zone(size_t size, bool internal) {
691704
POISON_ZONE(new_zone);
692705
MASK_ZONE_PTRS(new_zone);
693706

707+
/* The lookup table is never used for custom zones */
708+
if(internal == true) {
709+
/* If no other zones of this size exist then set the
710+
* index in the zone lookup table to its index */
711+
if(_zone_lookup_table[size] == 0) {
712+
_zone_lookup_table[size] = _root->zones_used;
713+
} else {
714+
/* Other zones exist that hold this size. We need to
715+
* fixup the most recent ones next_sz_index member.
716+
* We do this by walking the list using next_sz_index */
717+
for(int32_t i = _zone_lookup_table[size]; i < _root->zones_used;) {
718+
iso_alloc_zone *zt = &_root->zones[i];
719+
720+
if(zt->chunk_size != size) {
721+
LOG_AND_ABORT("Inconsistent lookup table for zone[%d] chunk size %d (%d)", zt->index, zt->chunk_size, size);
722+
}
723+
724+
/* Follow this zone's next_sz_index member */
725+
if(zt->next_sz_index != 0) {
726+
i = zt->next_sz_index;
727+
} else {
728+
/* If this zones next_sz_index is zero then set
729+
* it to the zone we just created and break */
730+
zt->next_sz_index = new_zone->index;
731+
break;
732+
}
733+
}
734+
}
735+
}
736+
694737
_root->zones_used++;
695738

696739
return new_zone;
@@ -831,7 +874,46 @@ INTERNAL_HIDDEN iso_alloc_zone *iso_find_zone_fit(size_t size) {
831874
iso_alloc_zone *zone = NULL;
832875
int32_t i = 0;
833876

834-
#if !SMALL_MEM_STARTUP
877+
if(IS_ALIGNED(size) != 0) {
878+
size = ALIGN_SZ_UP(size);
879+
}
880+
881+
/* Fast path via lookup table */
882+
if(_zone_lookup_table[size] != 0) {
883+
i = _zone_lookup_table[size];
884+
885+
for(; i < _root->zones_used;) {
886+
zone = &_root->zones[i];
887+
888+
if(zone->chunk_size != size) {
889+
LOG_AND_ABORT("Zone lookup table failed to match sizes for zone[%d](%d) for chunk size (%d)", zone->index, zone->chunk_size, size);
890+
}
891+
892+
if(zone->internally_managed == false) {
893+
LOG_AND_ABORT("Lookup table should never contain custom zones");
894+
}
895+
896+
bool fits = iso_does_zone_fit(zone, size);
897+
898+
if(fits == true) {
899+
return zone;
900+
}
901+
902+
if(zone->next_sz_index != 0) {
903+
i = zone->next_sz_index;
904+
} else {
905+
/* We have reached the end of our linked zones. The
906+
* lookup table failed to find us a usable zone.
907+
* Instead of creating a new one we will break out
908+
* of this loop and try iterating through all zones,
909+
* including ones we may have skipped over, to find
910+
* a suitable candidate. */
911+
break;
912+
}
913+
}
914+
}
915+
916+
#if SMALL_MEM_STARTUP
835917
/* A simple optimization to find which default zone
836918
* should fit this allocation. If we fail then a
837919
* slower iterative approach is used. The longer a

src/iso_alloc_profiler.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ INTERNAL_HIDDEN uint64_t __iso_alloc_mem_usage() {
148148
iso_alloc_zone *zone = &_root->zones[i];
149149
mem_usage += zone->bitmap_size;
150150
mem_usage += ZONE_USER_SIZE;
151-
LOG("Zone[%d] holds %d byte chunks, megabytes (%d)", zone->index, zone->chunk_size, (ZONE_USER_SIZE / MEGABYTE_SIZE));
151+
LOG("Zone[%d] holds %d byte chunks, megabytes (%d) next zone = %d", zone->index, zone->chunk_size, (ZONE_USER_SIZE / MEGABYTE_SIZE), zone->next_sz_index);
152152
}
153153

154154
return (mem_usage / MEGABYTE_SIZE);

0 commit comments

Comments
 (0)