Skip to content

Commit 06458cc

Browse files
authored
Merge pull request #101 from struct/6_11_22_perf_v2
doc fixes, perf fixes through removing redundant code
2 parents 4b1cd9c + e9cf8dc commit 06458cc

6 files changed

Lines changed: 58 additions & 58 deletions

File tree

.github/workflows/testsuite.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,12 @@ jobs:
4949
- run: sudo apt install clang-12
5050
- run: make tests
5151
- run: make cpp_tests
52+
testsuite-clang12-perf:
53+
runs-on: ubuntu-latest
54+
steps:
55+
- uses: actions/checkout@v2
56+
- run: sudo apt install clang-12
57+
- run: make malloc_cmp_test
5258
testsuite-gcc:
5359
runs-on: ubuntu-latest
5460
steps:

PERFORMANCE.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ All data fetches from a zone bitmap are 64 bits at a time which takes advantage
1414

1515
All bitmaps pages allocated with `mmap` are passed to the `madvise` syscall with the advice arguments `MADV_WILLNEED`. All user pages allocated with `mmap` are passed to the `madvise` syscall with the advice arguments `MADV_WILLNEED`. Global caches, the root, and zone bitmaps (but not pages that hold user data) are created with `MAP_POPULATE` which instructs the kernel to pre-populate the page tables which reduces page faults and results in better performance. You can disable this with the `PRE_POPULATE_PAGES` Makefile flag. Note that by default at zone creation time user pages will have canaries written at random aligned offsets. This will cause page faults and populate those PTE's when the pages are first written to whether those pages are ever used at runtime or not.
1616

17+
The `MAX_ZONES` value in `conf.h` limits the total number of zones that can be allocated at runtime. If your program is being killed with OOM errors you can safely increase this value, however its max value is 65535. However it will result in a larger allocation for the `root->zones` array which holds meta data for each zone whether that zone is currently mapped and in use or not. To calculate the total number of bytes available for allocations you can do (`MAX_ZONES * ZONE_USER_SIZE`). Note that `ZONE_USER_SIZE` is not configurable in `conf.h`.
18+
1719
Default zones for common sizes are created in the library constructor. This helps speed up allocations for long running programs. New zones are created on demand when needed but this will incur a small performance penalty in the allocation path.
1820

1921
By default user chunks are not sanitized upon free. While this helps mitigate uninitialized memory vulnerabilities it is a very slow operation. You can enable this feature by changing the `SANITIZE_CHUNKS` flag in the Makefile.

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ If all else fails please file an issue on the [github project](https://github.co
153153

154154
`void iso_free_permanently(void *p)` - Same as `iso_free` but marks the chunk in such a way that it will not be reallocated
155155

156-
`void iso_free_from_zone(void *p, iso_alloc_zone_handle *zone)` - Free's a chunk from a private zone. Can take a tagged or untagged pointer if `MEMORY_TAGGING` is enabled.
156+
`void iso_free_from_zone(void *p, iso_alloc_zone_handle *zone)` - Free's a chunk from a private zone. Can take a tagged or untagged pointer if `MEMORY_TAGGING` is enabled. These chunks are not quarantined.
157157

158158
`void iso_free_from_zone_permanently(void *p, iso_alloc_zone_handle *zone)` - Permanently free's a chunk from a zone
159159

include/conf.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@
2121
#define CANARY_COUNT_DIV 7
2222

2323
/* If you're compiling for Android and want custom names
24-
* for internal mappings you can modify those here */
24+
* for internal mappings that are viewable from procfs
25+
* (i.e. /proc/pid/maps) you can modify those names here */
2526
#if NAMED_MAPPINGS
2627
#define SAMPLED_ALLOC_NAME "isoalloc sampled allocation"
2728
#define BIG_ZONE_UD_NAME "isoalloc big zone user data"
@@ -72,7 +73,8 @@
7273
* it does correspond to the size of the _root.zones
7374
* array. Currently the iso_alloc_zone_t structure is
7475
* roughly 2112 bytes so this results in 17301504 bytes
75-
* (~17 MB) for zone meta data */
76+
* (~17 MB) for zone meta data. See PERFORMANCE.md for
77+
* more information on this value. Max is 65335 */
7678
#define MAX_ZONES 8192
7779

7880
/* We allocate zones at startup for common sizes.
@@ -94,7 +96,7 @@
9496
* certain decisions based on this value such as the
9597
* number of canary values in a zone. It is safe to
9698
* modify to a larger value but you will likely be
97-
* wasting memory by doing so */
99+
* wasting memory by doing so. */
98100
#define MAX_DEFAULT_ZONE_SZ ZONE_8192
99101

100102
/* If you have specific allocation pattern requirements

include/iso_alloc_internal.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -521,7 +521,7 @@ INTERNAL_HIDDEN INLINE void _flush_chunk_quarantine(void);
521521
INTERNAL_HIDDEN INLINE void clear_chunk_quarantine(void);
522522
INTERNAL_HIDDEN INLINE void clear_zone_cache(void);
523523
INTERNAL_HIDDEN iso_alloc_zone_t *is_zone_usable(iso_alloc_zone_t *zone, size_t size);
524-
INTERNAL_HIDDEN iso_alloc_zone_t *iso_find_zone_fit(size_t size);
524+
INTERNAL_HIDDEN iso_alloc_zone_t *find_suitable_zone(size_t size);
525525
INTERNAL_HIDDEN iso_alloc_zone_t *iso_new_zone(size_t size, bool internal);
526526
INTERNAL_HIDDEN iso_alloc_zone_t *_iso_new_zone(size_t size, bool internal, int32_t index);
527527
INTERNAL_HIDDEN iso_alloc_zone_t *iso_find_zone_bitmap_range(const void *p);
@@ -532,7 +532,6 @@ INTERNAL_HIDDEN bit_slot_t iso_scan_zone_free_slot(iso_alloc_zone_t *zone);
532532
INTERNAL_HIDDEN bit_slot_t get_next_free_bit_slot(iso_alloc_zone_t *zone);
533533
INTERNAL_HIDDEN iso_alloc_root *iso_alloc_new_root(void);
534534
INTERNAL_HIDDEN bool is_pow2(uint64_t sz);
535-
INTERNAL_HIDDEN bool iso_does_zone_fit(iso_alloc_zone_t *zone, size_t size);
536535
INTERNAL_HIDDEN bool _is_zone_retired(iso_alloc_zone_t *zone);
537536
INTERNAL_HIDDEN bool _refresh_zone_mem_tags(iso_alloc_zone_t *zone);
538537
INTERNAL_HIDDEN iso_alloc_zone_t *_iso_free_internal_unlocked(void *p, bool permanent, iso_alloc_zone_t *zone);

src/iso_alloc.c

Lines changed: 43 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,7 @@ INTERNAL_HIDDEN void iso_alloc_initialize_global_root(void) {
380380

381381
_root->zones = (void *) (p + g_page_size);
382382
name_mapping(p, _root->zones_size, "isoalloc zone metadata");
383+
MLOCK(_root->zones, _root->zones_size);
383384

384385
#if !THREAD_SUPPORT
385386
size_t c = ROUND_UP_PAGE(CHUNK_QUARANTINE_SZ * sizeof(uintptr_t));
@@ -653,7 +654,7 @@ INTERNAL_HIDDEN iso_alloc_zone_t *iso_new_zone(size_t size, bool internal) {
653654
/* Requires the root is locked */
654655
INTERNAL_HIDDEN iso_alloc_zone_t *_iso_new_zone(size_t size, bool internal, int32_t index) {
655656
if(UNLIKELY(_root->zones_used >= MAX_ZONES) || UNLIKELY(index >= MAX_ZONES)) {
656-
LOG_AND_ABORT("Cannot allocate additional zones. I have already allocated %d", _root->zones_used);
657+
LOG_AND_ABORT("Cannot allocate additional zones. I have already allocated %d zones", _root->zones_used);
657658
}
658659

659660
/* In order for our bitmap to be a power of 2
@@ -901,6 +902,12 @@ INTERNAL_HIDDEN bit_slot_t iso_scan_zone_free_slot_slow(iso_alloc_zone_t *zone)
901902
}
902903

903904
INTERNAL_HIDDEN iso_alloc_zone_t *is_zone_usable(iso_alloc_zone_t *zone, size_t size) {
905+
#if CPU_PIN
906+
if(zone->cpu_core != sched_getcpu()) {
907+
return false;
908+
}
909+
#endif
910+
904911
/* If the zone is full it is not usable */
905912
if(zone->is_full == true) {
906913
return NULL;
@@ -913,7 +920,7 @@ INTERNAL_HIDDEN iso_alloc_zone_t *is_zone_usable(iso_alloc_zone_t *zone, size_t
913920
* sizes beyond ZONE_1024 bytes. In other words we can
914921
* live with some wasted space in zones that manage
915922
* chunks smaller than ZONE_1024 */
916-
if(zone->internal == true && size > ZONE_1024 && zone->chunk_size >= (size << WASTED_SZ_MULTIPLIER_SHIFT)) {
923+
if(size > ZONE_1024 && zone->chunk_size >= (size << WASTED_SZ_MULTIPLIER_SHIFT)) {
917924
return NULL;
918925
}
919926

@@ -962,38 +969,8 @@ INTERNAL_HIDDEN iso_alloc_zone_t *is_zone_usable(iso_alloc_zone_t *zone, size_t
962969
}
963970
}
964971

965-
/* Implements the check for iso_find_zone_fit */
966-
INTERNAL_HIDDEN bool iso_does_zone_fit(iso_alloc_zone_t *zone, size_t size) {
967-
#if CPU_PIN
968-
if(zone->cpu_core != sched_getcpu()) {
969-
return false;
970-
}
971-
#endif
972-
973-
/* Don't return a zone that handles a size far larger
974-
* than we need. This could lead to high memory usage
975-
* depending on allocation patterns but helps enforce
976-
* spatial separation based on sized */
977-
if(zone->chunk_size >= ZONE_1024 && size <= ZONE_128) {
978-
return false;
979-
}
980-
981-
if(zone->chunk_size < size || zone->internal == false || zone->is_full == true) {
982-
return false;
983-
}
984-
985-
/* We found a zone, lets try to find a free slot in it */
986-
zone = is_zone_usable(zone, size);
987-
988-
if(zone == NULL) {
989-
return false;
990-
} else {
991-
return true;
992-
}
993-
}
994-
995972
/* Finds a zone that can fit this allocation request */
996-
INTERNAL_HIDDEN iso_alloc_zone_t *iso_find_zone_fit(size_t size) {
973+
INTERNAL_HIDDEN iso_alloc_zone_t *find_suitable_zone(size_t size) {
997974
iso_alloc_zone_t *zone = NULL;
998975
int32_t i = 0;
999976

@@ -1016,7 +993,7 @@ INTERNAL_HIDDEN iso_alloc_zone_t *iso_find_zone_fit(size_t size) {
1016993
LOG_AND_ABORT("Lookup table should never contain private zones");
1017994
}
1018995

1019-
if(iso_does_zone_fit(zone, size) == true) {
996+
if(is_zone_usable(zone, size) != NULL) {
1020997
return zone;
1021998
}
1022999

@@ -1052,7 +1029,16 @@ INTERNAL_HIDDEN iso_alloc_zone_t *iso_find_zone_fit(size_t size) {
10521029
for(; i < _root->zones_used; i++) {
10531030
zone = &_root->zones[i];
10541031

1055-
if(iso_does_zone_fit(zone, size) == true) {
1032+
if(zone->chunk_size < size || zone->internal == false) {
1033+
continue;
1034+
}
1035+
1036+
/* Don't waste memory, enforce spatial separation by size */
1037+
if(zone->chunk_size >= ZONE_1024 && size <= ZONE_128) {
1038+
continue;
1039+
}
1040+
1041+
if(is_zone_usable(zone, size) != NULL) {
10561042
return zone;
10571043
}
10581044
}
@@ -1366,8 +1352,15 @@ INTERNAL_HIDDEN ASSUME_ALIGNED void *_iso_alloc(iso_alloc_zone_t *zone, size_t s
13661352
* and this will speed up that operation */
13671353
for(size_t i = 0; i < zone_cache_count; i++) {
13681354
if(zone_cache[i].chunk_size >= size) {
1369-
if(iso_does_zone_fit(zone_cache[i].zone, size) == true) {
1370-
zone = zone_cache[i].zone;
1355+
iso_alloc_zone_t *_zone = zone_cache[i].zone;
1356+
1357+
/* Don't waste memory, enforce spatial separation by size */
1358+
if(_zone->chunk_size >= ZONE_1024 && size <= ZONE_128) {
1359+
continue;
1360+
}
1361+
1362+
if(is_zone_usable(_zone, size) != NULL) {
1363+
zone = _zone;
13711364
break;
13721365
}
13731366
}
@@ -1380,13 +1373,13 @@ INTERNAL_HIDDEN ASSUME_ALIGNED void *_iso_alloc(iso_alloc_zone_t *zone, size_t s
13801373
* looking for a suitable one, this includes the
13811374
* zones we cached above */
13821375
if(zone == NULL) {
1383-
zone = iso_find_zone_fit(size);
1376+
zone = find_suitable_zone(size);
13841377
}
13851378

1386-
/* We only need to check if the zone is usable
1387-
* if it's a private zone. If we chose this zone
1388-
* then its guaranteed to already be usable */
13891379
if(LIKELY(zone != NULL)) {
1380+
/* We only need to check if the zone is usable
1381+
* if it's a private zone. If we chose this zone
1382+
* then its guaranteed to already be usable */
13901383
if(zone->internal == false) {
13911384
zone = is_zone_usable(zone, size);
13921385

@@ -1919,7 +1912,7 @@ INTERNAL_HIDDEN void _iso_free_size(void *p, size_t size) {
19191912
iso_alloc_zone_t *zone = iso_find_zone_range(p);
19201913

19211914
if(UNLIKELY(zone == NULL)) {
1922-
LOG_AND_ABORT("Could not find zone for %p", p);
1915+
LOG_AND_ABORT("Could not find zone for 0x%p", p);
19231916
}
19241917

19251918
/* We can't check for an exact size match because
@@ -2001,16 +1994,14 @@ INTERNAL_HIDDEN iso_alloc_zone_t *_iso_free_internal_unlocked(void *p, bool perm
20011994
if(zone->tagged == true) {
20021995
if(_refresh_zone_mem_tags(zone) == false) {
20031996
/* We only need to refresh this single tag */
2004-
if(zone->tagged == true) {
2005-
void *user_pages_start = UNMASK_USER_PTR(zone);
2006-
uint8_t *_mtp = (user_pages_start - _root->system_page_size - ROUND_UP_PAGE(zone->chunk_count * MEM_TAG_SIZE));
2007-
uint64_t chunk_offset = (uint64_t) (p - user_pages_start);
2008-
_mtp += (chunk_offset >> zone->chunk_size_pow2);
2009-
2010-
/* Generate and write a new tag for this chunk */
2011-
uint8_t mem_tag = (uint8_t) rand_uint64();
2012-
*_mtp = mem_tag;
2013-
}
1997+
void *user_pages_start = UNMASK_USER_PTR(zone);
1998+
uint8_t *_mtp = (user_pages_start - _root->system_page_size - ROUND_UP_PAGE(zone->chunk_count * MEM_TAG_SIZE));
1999+
uint64_t chunk_offset = (uint64_t) (p - user_pages_start);
2000+
_mtp += (chunk_offset >> zone->chunk_size_pow2);
2001+
2002+
/* Generate and write a new tag for this chunk */
2003+
uint8_t mem_tag = (uint8_t) rand_uint64();
2004+
*_mtp = mem_tag;
20142005
}
20152006
}
20162007
#endif

0 commit comments

Comments
 (0)