diff --git a/include/conf.h b/include/conf.h index fb5dc6b..e0e8bdf 100644 --- a/include/conf.h +++ b/include/conf.h @@ -42,10 +42,9 @@ #endif /* If you're using the UAF_PTR_PAGE functionality and - * want to change the frequency it is triggered or the - * magic value that is written */ + * want to change the frequency it is triggered */ #if UAF_PTR_PAGE -#define UAF_PTR_PAGE_ODDS 1000000 +#define UAF_PTR_PAGE_ODDS 250000 #endif /* Zones can be retired after a certain number of diff --git a/include/iso_alloc_internal.h b/include/iso_alloc_internal.h index 647f4d7..b8a036f 100644 --- a/include/iso_alloc_internal.h +++ b/include/iso_alloc_internal.h @@ -418,7 +418,9 @@ INTERNAL_HIDDEN ASSUME_ALIGNED void *_iso_big_alloc(size_t size); INTERNAL_HIDDEN ASSUME_ALIGNED void *_iso_alloc(iso_alloc_zone_t *zone, size_t size); INTERNAL_HIDDEN INLINE ASSUME_ALIGNED void *_iso_alloc_bitslot_from_zone(bit_slot_t bitslot, iso_alloc_zone_t *zone); INTERNAL_HIDDEN ASSUME_ALIGNED void *_iso_calloc(size_t nmemb, size_t size); -INTERNAL_HIDDEN void *_iso_alloc_ptr_search(void *n, bool poison); +#if UAF_PTR_PAGE +INTERNAL_HIDDEN void *_iso_alloc_ptr_search(void *n); +#endif INTERNAL_HIDDEN INLINE uint64_t us_rand_uint64(uint64_t *seed); INTERNAL_HIDDEN INLINE uint64_t rand_uint64(void); INTERNAL_HIDDEN uint8_t _iso_alloc_get_mem_tag(void *p, iso_alloc_zone_t *zone); diff --git a/src/iso_alloc.c b/src/iso_alloc.c index 0a1d7ff..79c01ed 100644 --- a/src/iso_alloc.c +++ b/src/iso_alloc.c @@ -1727,7 +1727,7 @@ INTERNAL_HIDDEN iso_alloc_zone_t *_iso_free_internal_unlocked(void *p, bool perm #if UAF_PTR_PAGE if(UNLIKELY((us_rand_uint64(&_root->seed) % UAF_PTR_PAGE_ODDS) == 1)) { - _iso_alloc_ptr_search(p, true); + _iso_alloc_ptr_search(p); } #endif diff --git a/src/iso_alloc_sanity.c b/src/iso_alloc_sanity.c index 56cea1a..1e601fd 100644 --- a/src/iso_alloc_sanity.c +++ b/src/iso_alloc_sanity.c @@ -285,8 +285,18 @@ INTERNAL_HIDDEN void *_page_fault_thread_handler(void *unused) { INTERNAL_HIDDEN INLINE void write_sanity_canary(void *p) { const uint64_t canary = (_sanity_canary & SANITY_CANARY_VALIDATE_MASK); + int32_t qwords = (int32_t) (g_page_size / sizeof(uint64_t)); - for(int32_t i = 0; i < (g_page_size / sizeof(uint64_t)); i++) { +#if USE_NEON + const uint64x2_t cv = vdupq_n_u64(canary); + while(qwords >= 2) { + vst1q_u64((uint64_t *) p, cv); + p += sizeof(uint64x2_t); + qwords -= 2; + } +#endif + + while(qwords--) { *(uint64_t *) p = canary; p += sizeof(uint64_t); } @@ -305,9 +315,36 @@ INTERNAL_HIDDEN INLINE void check_sanity_canary(_sane_allocation_t *sane_alloc) start = sane_alloc->address + sane_alloc->orig_size; } - while(start < end) { + const uint64_t canary = (_sanity_canary & SANITY_CANARY_VALIDATE_MASK); + + /* orig_size is unaligned in general, so [start, end) may begin or + * end mid-qword. The expected byte at offset (p & 7) is that byte + * within the canary qword, since write_sanity_canary stored qwords + * starting at the page base. Walk the partial bytes at the head + * until start is 8-aligned. */ + while(start < end && ((uintptr_t) start & 7)) { + uint8_t expected = (uint8_t) (canary >> (((uintptr_t) start & 7) << 3)); + if(UNLIKELY(*(uint8_t *) start != expected)) { + LOG_AND_ABORT("Sanity canary byte at 0x%p has been corrupted! Value: 0x%x Expected: 0x%x", start, *(uint8_t *) start, expected); + } + start++; + } + +#if USE_NEON + /* Compare two qwords at a time and reduce. On any mismatch, fall + * through to the scalar loop which will pinpoint and abort. */ + const uint64x2_t cv = vdupq_n_u64(canary); + while((start + sizeof(uint64x2_t)) <= end) { + uint64x2_t v = vld1q_u64((const uint64_t *) start); + if(UNLIKELY(vmaxvq_u32(vreinterpretq_u32_u64(veorq_u64(v, cv))) != 0)) { + break; + } + start += sizeof(uint64x2_t); + } +#endif + + while((start + sizeof(uint64_t)) <= end) { uint64_t v = *((uint64_t *) start); - uint64_t canary = (_sanity_canary & SANITY_CANARY_VALIDATE_MASK); if(UNLIKELY(v != canary)) { LOG_AND_ABORT("Sanity canary at 0x%p has been corrupted! Value: 0x%x Expected: 0x%x", start, v, canary); @@ -315,6 +352,16 @@ INTERNAL_HIDDEN INLINE void check_sanity_canary(_sane_allocation_t *sane_alloc) start += sizeof(uint64_t); } + + /* Tail: 0–7 partial bytes when end isn't 8-aligned (right-aligned + * sample with unaligned orig_size). */ + while(start < end) { + uint8_t expected = (uint8_t) (canary >> (((uintptr_t) start & 7) << 3)); + if(UNLIKELY(*(uint8_t *) start != expected)) { + LOG_AND_ABORT("Sanity canary byte at 0x%p has been corrupted! Value: 0x%x Expected: 0x%x", start, *(uint8_t *) start, expected); + } + start++; + } } /* Callers of this function should hold the sanity cache lock */ diff --git a/src/iso_alloc_search.c b/src/iso_alloc_search.c index aa42a59..ac1e948 100644 --- a/src/iso_alloc_search.c +++ b/src/iso_alloc_search.c @@ -3,10 +3,11 @@ #include "iso_alloc_internal.h" -/* Search all zones for either the first instance of a pointer - * value and return it or overwrite the first potentially - * dangling pointer with the address of an unmapped page */ -INTERNAL_HIDDEN void *_iso_alloc_ptr_search(void *n, bool poison) { +#if UAF_PTR_PAGE +/* Search all zones for the first 8-byte sequence equal to n and overwrite + * it with the address of the PROT_NONE uaf_ptr_page so the next deref + * faults at a known address. Sampled from the free path. */ +INTERNAL_HIDDEN void *_iso_alloc_ptr_search(void *n) { uint8_t *search = NULL; uint8_t *end = NULL; const size_t zones_used = _root->zones_used; @@ -17,30 +18,58 @@ INTERNAL_HIDDEN void *_iso_alloc_ptr_search(void *n, bool poison) { n = (void *) ((uintptr_t) n & TAGGED_PTR_MASK); #endif +#if USE_NEON + /* Per-call invariants — n is fixed for the entire search, so broadcast + * the two pre-filter bytes once instead of once per zone. */ + const uint8x16_t b0 = vdupq_n_u8((uint8_t) (uintptr_t) n); + const uint8x16_t b1 = vdupq_n_u8((uint8_t) ((uintptr_t) n >> 8)); +#endif + for(int32_t i = 0; i < zones_used; i++) { iso_alloc_zone_t *zone = &_root->zones[i]; search = UNMASK_USER_PTR(zone); end = search + ZONE_USER_SIZE; - while(search <= (uint8_t *) (end - sizeof(uint64_t))) { - if(LIKELY((uint64_t) * (uint64_t *) search != (uint64_t) n)) { - search++; - } else { - if(poison == false) { - return search; - } else { -#if UAF_PTR_PAGE +#if USE_NEON + /* A u64 at byte position k can match n only if bytes[k] == n[0] + * AND bytes[k+1] == n[1]. AND the two shifted byte-equality + * vectors before reducing — collapses false positives 256x and + * keeps the filter useful when chunks are filled with POISON_BYTE + * (which would defeat a single-byte filter when n[0] == 0xde). + * Stop 23 bytes before end so the last candidate u64 read fits. */ + uint8_t *neon_end = end - 23; + while(search <= neon_end) { + uint8x16_t eq = vandq_u8(vceqq_u8(vld1q_u8(search), b0), + vceqq_u8(vld1q_u8(search + 1), b1)); + if(LIKELY(vmaxvq_u8(eq) == 0)) { + search += 16; + continue; + } + uint8_t *win_end = search + 16; + while(search < win_end) { + if(UNLIKELY(*(uint64_t *) search == (uint64_t) n)) { *(uint64_t *) search = (uint64_t) (_root->uaf_ptr_page); return search; -#endif } + search++; + } + } +#endif + + uint8_t *tail_end = end - sizeof(uint64_t); + while(search <= tail_end) { + if(UNLIKELY((uint64_t) * (uint64_t *) search == (uint64_t) n)) { + *(uint64_t *) search = (uint64_t) (_root->uaf_ptr_page); + return search; } + search++; } } return NULL; } +#endif #if EXPERIMENTAL /* These functions are all experimental and subject to change */ diff --git a/utils/run_tests.sh b/utils/run_tests.sh index 4a2dc60..488a391 100755 --- a/utils/run_tests.sh +++ b/utils/run_tests.sh @@ -7,6 +7,7 @@ tests=("tests" "big_tests" "interfaces_test" "thread_tests" "pool_test" "rand_freelist") failure=0 succeeded=0 +start=$SECONDS $(ulimit -c 0) @@ -52,6 +53,7 @@ done echo "$succeeded Tests passed" echo "$failure Tests failed" +echo "Total runtime: $((SECONDS - start))s" unset LD_LIBRARY_PATH unset LD_PRELOAD