Skip to content

Commit 2a336e5

Browse files
authored
Merge pull request #107 from struct/new_libc_hook
add guarded memcpy with libc hook and security checks, update docs
2 parents 208d1ea + 32efa55 commit 2a336e5

12 files changed

Lines changed: 125 additions & 18 deletions

Makefile

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,6 @@ HUGE_PAGES = -DHUGE_PAGES=1
9292
## README for more detailed information. This is Linux only
9393
## and has negative performance implications
9494
CPU_PIN = -DCPU_PIN=0
95-
9695
SCHED_GETCPU =
9796

9897
## Enable the allocation sanity feature. This works a lot
@@ -104,6 +103,11 @@ SCHED_GETCPU =
104103
## much of a performance penalty
105104
ALLOC_SANITY = -DALLOC_SANITY=0
106105

106+
## Enable hooking of memcpy to detect out of bounds r/w
107+
## operations on chunks allocated with IsoAlloc. Does
108+
## not require ALLOC_SANITY is enabled
109+
MEMCPY_SANITY = -DMEMCPY_SANITY=0
110+
107111
## Enable the userfaultfd based uninitialized read detection
108112
## feature. This samples calls to malloc, and allocates raw
109113
## pages of memory with mmap which are registered with the
@@ -192,9 +196,10 @@ else
192196
BUILD_ERROR_FLAGS := $(BUILD_ERROR_FLAGS) -Wno-attributes -Wno-unused-variable
193197
endif
194198
CFLAGS = $(COMMON_CFLAGS) $(SECURITY_FLAGS) $(BUILD_ERROR_FLAGS) $(HOOKS) $(HEAP_PROFILER) -fvisibility=hidden \
195-
-std=c11 $(SANITIZER_SUPPORT) $(ALLOC_SANITY) $(UNINIT_READ_SANITY) $(CPU_PIN) $(SCHED_GETCPU) $(EXPERIMENTAL) $(UAF_PTR_PAGE) \
196-
$(VERIFY_BIT_SLOT_CACHE) $(NAMED_MAPPINGS) $(ABORT_ON_NULL) $(NO_ZERO_ALLOCATIONS) $(ABORT_NO_ENTROPY) \
197-
$(ISO_DTOR_CLEANUP) $(SHUFFLE_BIT_SLOT_CACHE) $(USE_SPINLOCK) $(HUGE_PAGES) $(USE_MLOCK) $(MEMORY_TAGGING)
199+
-std=c11 $(SANITIZER_SUPPORT) $(ALLOC_SANITY) $(MEMCPY_SANITY) $(UNINIT_READ_SANITY) $(CPU_PIN) $(SCHED_GETCPU) \
200+
$(EXPERIMENTAL) $(UAF_PTR_PAGE) $(VERIFY_BIT_SLOT_CACHE) $(NAMED_MAPPINGS) $(ABORT_ON_NULL) $(NO_ZERO_ALLOCATIONS) \
201+
$(ABORT_NO_ENTROPY) $(ISO_DTOR_CLEANUP) $(SHUFFLE_BIT_SLOT_CACHE) $(USE_SPINLOCK) $(HUGE_PAGES) $(USE_MLOCK) \
202+
$(MEMORY_TAGGING)
198203
CXXFLAGS = $(COMMON_CFLAGS) -DCPP_SUPPORT=1 -std=c++17 $(SANITIZER_SUPPORT) $(HOOKS)
199204
EXE_CFLAGS = -fPIE
200205
GDB_FLAGS = -g -ggdb3 -fno-omit-frame-pointer

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ When enabled, the `CPU_PIN` feature will restrict allocations from a given zone
8585
* When destroying private zones if `NEVER_REUSE_ZONES` is enabled IsoAlloc won't attempt to repurpose the zone
8686
* Zones are retired and replaced after they've allocated and freed a specific number of chunks. This is calculated as `ZONE_ALLOC_RETIRE * max_chunk_count_for_zone`.
8787
* When `MEMORY_TAGGING` is enabled IsoAlloc will create a 1 byte tag for each chunk in private zones. See the [MEMORY_TAGGING.md](MEMORY_TAGGING.md) documentation, or [this test](tests/tagged_ptr_test.cpp) for an example of how to use it.
88+
* When `MEMCPY_SANITY` is enabled the allocator will hook all calls to `memcpy` and check for out of bounds r/w operations when either src or dst points to a chunk allocated by IsoAlloc
8889

8990
## Building
9091

SECURITY_COMPARISON.MD

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ Heap allocators hide incredible complexity behind `malloc` and `free`. They must
3737
|Zero Size Allocation Special Handling|:heavy_check_mark:|:x: |:grey_question: |:grey_question: |:x: |:x: |:x: |:heavy_check_mark:|:x: |:x:
3838
|Read-only global structure|:heavy_minus_sign:|:x: |:x: |:x: |:x: |:x: |:x: |:heavy_check_mark:|:x: |:x:
3939
|SW Memory Tagging |:heavy_minus_sign:|:x: |:x: |:x: |:x: |:x: |:x: |:x: |:heavy_check_mark:|:x:
40-
|Guarded Memcpy |:x: |:x: |:x: |:x: |:x: |:x: |:x: |:x: |:x: |:heavy_check_mark:
40+
|Guarded Memcpy |:heavy_check_mark: |:x: |:x: |:x: |:x: |:x: |:x: |:x: |:x: |:heavy_check_mark:
4141

4242
**Lexicon**
4343

include/iso_alloc_sanity.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#pragma message "IsoAlloc is untested and unsupported on 32 bit platforms"
1212
#endif
1313

14+
#if ALLOC_SANITY
1415
#if UNINIT_READ_SANITY
1516
#include <fcntl.h>
1617
#include <linux/userfaultfd.h>
@@ -79,3 +80,9 @@ INTERNAL_HIDDEN void *_iso_alloc_sample(const size_t size);
7980
INTERNAL_HIDDEN int32_t _iso_alloc_free_sane_sample(void *p);
8081
INTERNAL_HIDDEN int32_t _remove_from_sane_trace(void *p);
8182
INTERNAL_HIDDEN _sane_allocation_t *_get_sane_alloc(void *p);
83+
#endif
84+
85+
#if MEMCPY_SANITY
86+
INTERNAL_HIDDEN void *__iso_memcpy(void *restrict dest, const void *restrict src, size_t n);
87+
INTERNAL_HIDDEN void *_iso_alloc_memcpy(void *restrict dest, const void *restrict src, size_t n);
88+
#endif

src/iso_alloc.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1186,6 +1186,9 @@ INTERNAL_HIDDEN iso_alloc_zone_t *search_chunk_lookup_table(const void *restrict
11861186
LOG_AND_ABORT("Pointer to zone lookup table corrupted at position %zu", ADDR_TO_CHUNK_TABLE(p));
11871187
}
11881188

1189+
/* Its possible that zone_index is 0 and this is
1190+
* actually a cache miss. In this case we return
1191+
* the first zone and let the caller figure it out */
11891192
return &_root->zones[zone_index];
11901193
}
11911194

src/iso_alloc_interfaces.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44
#include "iso_alloc.h"
55
#include "iso_alloc_internal.h"
66

7+
#if MEMCPY_SANITY
8+
#include "iso_alloc_sanity.h"
9+
#endif
10+
711
EXTERNAL_API NO_DISCARD MALLOC_ATTR ALLOC_SIZE ASSUME_ALIGNED void *iso_alloc(size_t size) {
812
return _iso_alloc(NULL, size);
913
}
@@ -57,7 +61,11 @@ EXTERNAL_API NO_DISCARD REALLOC_SIZE ASSUME_ALIGNED void *iso_realloc(void *p, s
5761
}
5862

5963
if(p != NULL) {
64+
#if MEMCPY_SANITY
65+
__iso_memcpy(r, p, size);
66+
#else
6067
__builtin_memcpy(r, p, size);
68+
#endif
6169
}
6270

6371
#if PERM_FREE_REALLOC
@@ -100,7 +108,11 @@ EXTERNAL_API NO_DISCARD ASSUME_ALIGNED char *iso_strdup_from_zone(iso_alloc_zone
100108
return NULL;
101109
}
102110

111+
#if MEMCPY_SANITY
112+
__iso_memcpy(p, str, size);
113+
#else
103114
__builtin_memcpy(p, str, size);
115+
#endif
104116
return p;
105117
}
106118

@@ -126,10 +138,18 @@ EXTERNAL_API NO_DISCARD ASSUME_ALIGNED char *iso_strndup_from_zone(iso_alloc_zon
126138
}
127139

128140
if(s_size > n) {
141+
#if MEMCPY_SANITY
142+
__iso_memcpy(p, str, n);
143+
#else
129144
__builtin_memcpy(p, str, n);
145+
#endif
130146
p[n - 1] = '\0';
131147
} else {
148+
#if MEMCPY_SANITY
149+
__iso_memcpy(p, str, s_size);
150+
#else
132151
__builtin_memcpy(p, str, s_size);
152+
#endif
133153
}
134154

135155
return p;

src/iso_alloc_profiler.c

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22
* Copyright 2022 - chris.rohlf@gmail.com */
33

44
#include "iso_alloc_internal.h"
5+
6+
#if MEMCPY_SANITY
7+
#include "iso_alloc_sanity.h"
8+
#endif
9+
510
#include <dlfcn.h>
611

712
INTERNAL_HIDDEN uint64_t _iso_alloc_detect_leaks_in_zone(iso_alloc_zone_t *zone) {
@@ -219,15 +224,23 @@ INTERNAL_HIDDEN uint64_t __iso_alloc_big_zone_mem_usage() {
219224
* be used to interpret allocation patterns */
220225
INTERNAL_HIDDEN size_t _iso_get_alloc_traces(iso_alloc_traces_t *traces_out) {
221226
LOCK_ROOT();
222-
memcpy(traces_out, _alloc_bts, sizeof(iso_alloc_traces_t));
227+
#if MEMCPY_SANITY
228+
__iso_memcpy(traces_out, _alloc_bts, sizeof(iso_alloc_traces_t));
229+
#else
230+
__builtin_memcpy(traces_out, _alloc_bts, sizeof(iso_alloc_traces_t));
231+
#endif
223232
size_t sz = _alloc_bts_count;
224233
UNLOCK_ROOT();
225234
return sz;
226235
}
227236

228237
INTERNAL_HIDDEN size_t _iso_get_free_traces(iso_free_traces_t *traces_out) {
229238
LOCK_ROOT();
230-
memcpy(traces_out, _free_bts, sizeof(iso_free_traces_t));
239+
#if MEMCPY_SANITY
240+
__iso_memcpy(traces_out, _free_bts, sizeof(iso_free_traces_t));
241+
#else
242+
__builtin_memcpy(traces_out, _free_bts, sizeof(iso_free_traces_t));
243+
#endif
231244
size_t sz = _free_bts_count;
232245
UNLOCK_ROOT();
233246
return sz;

src/iso_alloc_sanity.c

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,5 +274,43 @@ INTERNAL_HIDDEN void *_iso_alloc_sample(const size_t size) {
274274
UNLOCK_SANITY_CACHE();
275275
return p;
276276
}
277+
#endif
278+
279+
#if MEMCPY_SANITY
280+
INTERNAL_HIDDEN void *__iso_memcpy(void *restrict dest, const void *restrict src, size_t n) {
281+
char *p_dest = (char *) dest;
282+
char const *p_src = (char const *) src;
283+
284+
while(n--) {
285+
*p_dest++ = *p_src++;
286+
}
277287

288+
return dest;
289+
}
290+
291+
INTERNAL_HIDDEN void *_iso_alloc_memcpy(void *restrict dest, const void *restrict src, size_t n) {
292+
if(n > SMALLEST_CHUNK_SZ) {
293+
/* We don't want to add too much overhead here so we only
294+
* check the chunk-to-zone cache for zone data and we don't
295+
* need to lock the root for that. Its possible for a cache
296+
* miss to mean a security check doesn't happen here but
297+
* this feature is more for catching bugs than it is for
298+
* mitigating them */
299+
iso_alloc_zone_t *zone = search_chunk_lookup_table(dest);
300+
void *user_pages_start = UNMASK_USER_PTR(zone);
301+
302+
if(user_pages_start <= dest && (user_pages_start + ZONE_USER_SIZE) > dest && n > zone->chunk_size) {
303+
LOG_AND_ABORT("Detected an out of bounds write memcpy: dest=0x%p (%d bytes) src=0x%p size=%d", dest, zone->chunk_size, src, n);
304+
}
305+
306+
zone = search_chunk_lookup_table(src);
307+
user_pages_start = UNMASK_USER_PTR(zone);
308+
309+
if(user_pages_start <= src && (user_pages_start + ZONE_USER_SIZE) > src && n > zone->chunk_size) {
310+
LOG_AND_ABORT("Detected an out of bounds read memcpy: dest=0x%p src=0x%p (%d bytes) size=%d", dest, src, zone->chunk_size, n);
311+
}
312+
}
313+
314+
return __iso_memcpy(dest, src, n);
315+
}
278316
#endif

src/iso_alloc_util.c

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,14 @@
66
#if CPU_PIN
77
/* sched_getcpu's performance depends on the
88
* architecture/kernel version, so we lower
9-
* the cost of feature's abstraction here.
10-
*/
9+
* the cost of feature's abstraction here. */
1110
INTERNAL_HIDDEN INLINE int _iso_getcpu(void) {
1211
#if defined(SCHED_GETCPU)
1312
return sched_getcpu();
1413
#elif defined(__x86_64__)
1514
/* rdtscp is not always available and is pretty slow
1615
* we instead load from the global descriptor table
17-
* then "mov" it to a.
18-
*/
16+
* then "mov" it to 'a' */
1917
unsigned int a;
2018
const unsigned int cpunodesegment = 15 * 8 + 3;
2119
__asm__ volatile("lsl %1, %0"
@@ -26,11 +24,11 @@ INTERNAL_HIDDEN INLINE int _iso_getcpu(void) {
2624
#if __APPLE__
2725
/* unlike other operating systems, the tpidr_el0 register on macOs
2826
* is unused data stored for the current thread is instead fetchable
29-
* from "tpidrro_el0".
30-
*/
27+
* from "tpidrro_el0". */
3128
uintptr_t a;
32-
__asm__ volatile("mrs %x0, tpidrro_el0" : "=r"(a) :: "memory");
33-
return (int)((a & 0x8) - 1);
29+
__asm__ volatile("mrs %x0, tpidrro_el0"
30+
: "=r"(a)::"memory");
31+
return (int) ((a & 0x8) - 1);
3432
#else
3533
/* TODO most likely different register/making on other platforms */
3634
return -1;

src/libc_hook.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
/* libc_hook.c - Provides low level hooks for libc functions
2+
* Copyright 2022 - chris.rohlf@gmail.com */
3+
4+
#include "iso_alloc_internal.h"
5+
#include "iso_alloc_sanity.h"
6+
7+
#if MEMCPY_SANITY
8+
9+
EXTERNAL_API void *memcpy(void *restrict dest, const void *restrict src, size_t n) {
10+
return _iso_alloc_memcpy(dest, src, n);
11+
}
12+
13+
#endif

0 commit comments

Comments
 (0)