From 3e42dc3c305dd22a22e7d2996c7e24177f3a43ad Mon Sep 17 00:00:00 2001 From: Xinhao Yuan Date: Tue, 17 Mar 2026 10:42:56 -0700 Subject: [PATCH] Enable stack depth tracking via -fsanitize-coverage=stack-depth. When combined with -fsanitize-coverage-stack-depth-callback-min=, the stack depth is tracked in the __sanitizer_cov_stack_depth() callbacks. But this may cause binary data deps to have missing references to the callback (because sanitizers do not come with a default definition for it). So tracking without the callback is implemented too, which checks __sancov_lowest_stack at the end of each test. PiperOrigin-RevId: 885099833 --- centipede/runner.cc | 15 +++--- centipede/runner_utils.h | 4 ++ centipede/sancov_callbacks.cc | 84 ++++++++++++++++++++------------ centipede/sancov_interceptors.cc | 24 ++++----- centipede/sancov_state.cc | 52 +++++++++++++++----- centipede/sancov_state.h | 22 +++++++-- centipede/weak_sancov_stubs.cc | 6 +++ 7 files changed, 143 insertions(+), 64 deletions(-) diff --git a/centipede/runner.cc b/centipede/runner.cc index 3cf488818..e085b54e9 100644 --- a/centipede/runner.cc +++ b/centipede/runner.cc @@ -216,20 +216,21 @@ static void CheckWatchdogLimits() { } } -__attribute__((noinline)) void CheckStackLimit(uintptr_t sp) { +__attribute__((noinline)) void CheckStackLimit(size_t stack_usage, + bool is_current_stack) { static std::atomic_flag stack_limit_exceeded = ATOMIC_FLAG_INIT; const size_t stack_limit = state->run_time_flags.stack_limit_kb.load() << 10; // Check for the stack limit only if sp is inside the stack region. - if (stack_limit > 0 && tls.stack_region_low && - tls.top_frame_sp - sp > stack_limit) { + if (stack_limit > 0 && stack_usage > stack_limit) { const bool test_not_running = state->input_start_time == 0; - if (test_not_running) return; + if (test_not_running && is_current_stack) return; if (stack_limit_exceeded.test_and_set()) return; fprintf(stderr, - "========= Stack limit exceeded: %" PRIuPTR + "========= Stack limit exceeded: %zu" " > %zu" - " (byte); aborting\n", - tls.top_frame_sp - sp, stack_limit); + " (byte) in %s; aborting\n", + stack_usage, stack_limit, + is_current_stack ? "the current stack" : "a previous stack"); CentipedeSetFailureDescription( fuzztest::internal::kExecutionFailureStackLimitExceeded.data()); std::abort(); diff --git a/centipede/runner_utils.h b/centipede/runner_utils.h index 20fa40eb4..71b300520 100644 --- a/centipede/runner_utils.h +++ b/centipede/runner_utils.h @@ -24,6 +24,10 @@ #include "absl/base/nullability.h" +// Use this attribute for functions that must not be instrumented even if +// the library is built with sanitizers (asan, etc). +#define FUZZTEST_NO_SANITIZE __attribute__((no_sanitize("all"))) + namespace fuzztest::internal { // If `condition` prints `error` and calls exit(1). diff --git a/centipede/sancov_callbacks.cc b/centipede/sancov_callbacks.cc index 7e7fd15eb..3e5d0c504 100644 --- a/centipede/sancov_callbacks.cc +++ b/centipede/sancov_callbacks.cc @@ -29,6 +29,7 @@ #include "./centipede/pc_info.h" #include "./centipede/reverse_pc_table.h" #include "./centipede/runner_dl_info.h" +#include "./centipede/runner_utils.h" #include "./centipede/sancov_state.h" namespace fuzztest::internal { @@ -61,10 +62,6 @@ using fuzztest::internal::tls; // https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html. #define ENFORCE_INLINE __attribute__((always_inline)) inline -// Use this attribute for functions that must not be instrumented even if -// the runner is built with sanitizers (asan, etc). -#define NO_SANITIZE __attribute__((no_sanitize("all"))) - // NOTE: Enforce inlining so that `__builtin_return_address` works. ENFORCE_INLINE static void TraceLoad(void *addr) { if (ABSL_PREDICT_FALSE(!tls.traced) || @@ -127,55 +124,65 @@ ENFORCE_INLINE void TraceCmp(T a, T b, uintptr_t pc) { //------------------------------------------------------------------------------ extern "C" { -NO_SANITIZE void __sanitizer_cov_load1(uint8_t *addr) { TraceLoad(addr); } -NO_SANITIZE void __sanitizer_cov_load2(uint16_t *addr) { TraceLoad(addr); } -NO_SANITIZE void __sanitizer_cov_load4(uint32_t *addr) { TraceLoad(addr); } -NO_SANITIZE void __sanitizer_cov_load8(uint64_t *addr) { TraceLoad(addr); } -NO_SANITIZE void __sanitizer_cov_load16(__uint128_t *addr) { TraceLoad(addr); } +FUZZTEST_NO_SANITIZE void __sanitizer_cov_load1(uint8_t* addr) { + TraceLoad(addr); +} +FUZZTEST_NO_SANITIZE void __sanitizer_cov_load2(uint16_t* addr) { + TraceLoad(addr); +} +FUZZTEST_NO_SANITIZE void __sanitizer_cov_load4(uint32_t* addr) { + TraceLoad(addr); +} +FUZZTEST_NO_SANITIZE void __sanitizer_cov_load8(uint64_t* addr) { + TraceLoad(addr); +} +FUZZTEST_NO_SANITIZE void __sanitizer_cov_load16(__uint128_t* addr) { + TraceLoad(addr); +} -NO_SANITIZE +FUZZTEST_NO_SANITIZE void __sanitizer_cov_trace_const_cmp1(uint8_t Arg1, uint8_t Arg2) { if (ABSL_PREDICT_FALSE(!tls.traced)) return; TraceCmp(Arg1, Arg2, reinterpret_cast(__builtin_return_address(0))); } -NO_SANITIZE +FUZZTEST_NO_SANITIZE void __sanitizer_cov_trace_const_cmp2(uint16_t Arg1, uint16_t Arg2) { if (ABSL_PREDICT_FALSE(!tls.traced)) return; TraceCmp(Arg1, Arg2, reinterpret_cast(__builtin_return_address(0))); } -NO_SANITIZE +FUZZTEST_NO_SANITIZE void __sanitizer_cov_trace_const_cmp4(uint32_t Arg1, uint32_t Arg2) { if (ABSL_PREDICT_FALSE(!tls.traced)) return; TraceCmp(Arg1, Arg2, reinterpret_cast(__builtin_return_address(0))); } -NO_SANITIZE +FUZZTEST_NO_SANITIZE void __sanitizer_cov_trace_const_cmp8(uint64_t Arg1, uint64_t Arg2) { if (ABSL_PREDICT_FALSE(!tls.traced)) return; TraceCmp(Arg1, Arg2, reinterpret_cast(__builtin_return_address(0))); } -NO_SANITIZE +FUZZTEST_NO_SANITIZE void __sanitizer_cov_trace_cmp1(uint8_t Arg1, uint8_t Arg2) { if (ABSL_PREDICT_FALSE(!tls.traced)) return; TraceCmp(Arg1, Arg2, reinterpret_cast(__builtin_return_address(0))); } -NO_SANITIZE +FUZZTEST_NO_SANITIZE void __sanitizer_cov_trace_cmp2(uint16_t Arg1, uint16_t Arg2) { if (ABSL_PREDICT_FALSE(!tls.traced)) return; TraceCmp(Arg1, Arg2, reinterpret_cast(__builtin_return_address(0))); } -NO_SANITIZE +FUZZTEST_NO_SANITIZE void __sanitizer_cov_trace_cmp4(uint32_t Arg1, uint32_t Arg2) { if (ABSL_PREDICT_FALSE(!tls.traced)) return; TraceCmp(Arg1, Arg2, reinterpret_cast(__builtin_return_address(0))); } -NO_SANITIZE +FUZZTEST_NO_SANITIZE void __sanitizer_cov_trace_cmp8(uint64_t Arg1, uint64_t Arg2) { if (ABSL_PREDICT_FALSE(!tls.traced)) return; TraceCmp(Arg1, Arg2, @@ -188,7 +195,7 @@ void __sanitizer_cov_trace_cmp8(uint64_t Arg1, uint64_t Arg2) { // LLVM/libFuzzer implementation). // // Source: https://clang.llvm.org/docs/SanitizerCoverage.html -NO_SANITIZE +FUZZTEST_NO_SANITIZE void __sanitizer_cov_trace_switch(uint64_t val, uint64_t* cases) { if (ABSL_PREDICT_FALSE(!tls.traced)) return; const auto num_cases = cases[0]; @@ -262,6 +269,23 @@ __attribute__((noinline)) static void HandlePath(uintptr_t normalized_pc) { sancov_state->path_feature_set.set(hash); } +// Updates the lowest stack using the current stack pointer `sp` and checks +// against the stack limit if needed. +static ENFORCE_INLINE void UpdateLowestStackAndCheckLimit(uintptr_t sp) { + // It should be rare for the stack pointer to be valid and exceed the previous + // record. + if (ABSL_PREDICT_FALSE(sp < tls.lowest_sp && sp <= tls.top_frame_sp && + sp >= tls.stack_region_low && + tls.stack_region_low > 0)) { + tls.lowest_sp = sp; + if (fuzztest::internal::CheckStackLimit == nullptr) { + return; + } + fuzztest::internal::CheckStackLimit(tls.top_frame_sp - sp, + /*is_current_stack=*/true); + } +} + // Handles one observed PC. // `normalized_pc` is an integer representation of PC that is stable between // the executions. @@ -278,18 +302,7 @@ static ENFORCE_INLINE void HandleOnePc(PCGuard pc_guard) { if (pc_guard.is_function_entry) { uintptr_t sp = reinterpret_cast(__builtin_frame_address(0)); - // It should be rare for the stack depth to exceed the previous record. - if (__builtin_expect( - sp < tls.lowest_sp && - // And ignore the stack pointer when it is not in the known - // region (e.g. for signal handling with an alternative stack). - (tls.stack_region_low == 0 || sp >= tls.stack_region_low), - 0)) { - tls.lowest_sp = sp; - if (fuzztest::internal::CheckStackLimit != nullptr) { - fuzztest::internal::CheckStackLimit(sp); - } - } + UpdateLowestStackAndCheckLimit(sp); if (sancov_state->flags.callstack_level != 0) { tls.call_stack.OnFunctionEntry(pc_guard.pc_index, sp); sancov_state->callstack_set.set(tls.call_stack.Hash()); @@ -361,6 +374,7 @@ __attribute__((noinline)) static void MainObjectLazyInit() { // This instrumentation is redundant if other instrumentation // (e.g. trace-pc-guard) is available, but GCC as of 2022-04 only supports // this variant. +FUZZTEST_NO_SANITIZE void __sanitizer_cov_trace_pc() { if (ABSL_PREDICT_FALSE(!tls.traced)) return; uintptr_t pc = reinterpret_cast(__builtin_return_address(0)); @@ -386,7 +400,7 @@ void __sanitizer_cov_trace_pc_guard_init(PCGuard *absl_nonnull start, } // This function is called on every instrumented edge. -NO_SANITIZE +FUZZTEST_NO_SANITIZE void __sanitizer_cov_trace_pc_guard(PCGuard *absl_nonnull guard) { if (ABSL_PREDICT_FALSE(!tls.traced)) return; // This function may be called very early during the DSO initialization, @@ -397,4 +411,12 @@ void __sanitizer_cov_trace_pc_guard(PCGuard *absl_nonnull guard) { HandleOnePc(*guard); } +// This callback is called by the compiler on every function entry when enabled. +// https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-stack-depth +FUZZTEST_NO_SANITIZE void __sanitizer_cov_stack_depth() { + if (ABSL_PREDICT_FALSE(!tls.traced)) return; + UpdateLowestStackAndCheckLimit( + reinterpret_cast(__builtin_frame_address(0))); +} + } // extern "C" diff --git a/centipede/sancov_interceptors.cc b/centipede/sancov_interceptors.cc index 304d9496b..cf1e9ace1 100644 --- a/centipede/sancov_interceptors.cc +++ b/centipede/sancov_interceptors.cc @@ -22,6 +22,7 @@ #include "absl/base/nullability.h" #include "absl/base/optimization.h" +#include "./centipede/runner_utils.h" #include "./centipede/sancov_state.h" using fuzztest::internal::tls; @@ -30,7 +31,6 @@ using fuzztest::internal::tls; // before or during the sanitizer initialization. Instead, we check if the // current thread is marked as started by the runner as the proxy of sanitizier // initialization. If not, we skip the interception logic. -#define NO_SANITIZE __attribute__((no_sanitize("all"))) namespace { @@ -131,8 +131,8 @@ DECLARE_CENTIPEDE_ORIG_FUNC(int, pthread_create, // Fallback for the case *cmp_orig is null. // Will be executed several times at process startup, if at all. -static NO_SANITIZE int memcmp_fallback(const void *s1, const void *s2, - size_t n) { +static FUZZTEST_NO_SANITIZE int memcmp_fallback(const void* s1, const void* s2, + size_t n) { const auto *p1 = static_cast(s1); const auto *p2 = static_cast(s2); for (size_t i = 0; i < n; ++i) { @@ -143,8 +143,8 @@ static NO_SANITIZE int memcmp_fallback(const void *s1, const void *s2, } // Fallback for case insensitive comparison. -static NO_SANITIZE int memcasecmp_fallback(const void* s1, const void* s2, - size_t n) { +static FUZZTEST_NO_SANITIZE int memcasecmp_fallback(const void* s1, + const void* s2, size_t n) { static uint8_t to_lower[256]; [[maybe_unused]] static bool initialize_to_lower = [&] { for (size_t i = 0; i < sizeof(to_lower); ++i) { @@ -166,7 +166,8 @@ static NO_SANITIZE int memcasecmp_fallback(const void* s1, const void* s2, // memcmp interceptor. // Calls the real memcmp() and possibly modifies state.cmp_feature_set. -extern "C" NO_SANITIZE int memcmp(const void *s1, const void *s2, size_t n) { +extern "C" FUZZTEST_NO_SANITIZE int memcmp(const void* s1, const void* s2, + size_t n) { const int result = memcmp_orig ? memcmp_orig(s1, s2, n) : memcmp_fallback(s1, s2, n); if (ABSL_PREDICT_FALSE(!tls.traced)) { @@ -183,7 +184,7 @@ extern "C" NO_SANITIZE int memcmp(const void *s1, const void *s2, size_t n) { // strcmp interceptor. // Calls the real strcmp() and possibly modifies state.cmp_feature_set. -extern "C" NO_SANITIZE int strcmp(const char *s1, const char *s2) { +extern "C" FUZZTEST_NO_SANITIZE int strcmp(const char* s1, const char* s2) { // Find the length of the shorter string, as this determines the actual number // of bytes that are compared. Note that this is needed even if we call // `strcmp_orig` because we're passing it to `TraceMemCmp()`. @@ -205,7 +206,8 @@ extern "C" NO_SANITIZE int strcmp(const char *s1, const char *s2) { // strncmp interceptor. // Calls the real strncmp() and possibly modifies state.cmp_feature_set. -extern "C" NO_SANITIZE int strncmp(const char *s1, const char *s2, size_t n) { +extern "C" FUZZTEST_NO_SANITIZE int strncmp(const char* s1, const char* s2, + size_t n) { // Find the length of the shorter string, as this determines the actual number // of bytes that are compared. Note that this is needed even if we call // `strncmp_orig` because we're passing it to `TraceMemCmp()`. @@ -228,7 +230,7 @@ extern "C" NO_SANITIZE int strncmp(const char *s1, const char *s2, size_t n) { // strcasecmp interceptor. // Calls the real strcasecmp() and possibly modifies state.cmp_feature_set. -extern "C" NO_SANITIZE int strcasecmp(const char* s1, const char* s2) { +extern "C" FUZZTEST_NO_SANITIZE int strcasecmp(const char* s1, const char* s2) { // Find the length of the shorter string, as this determines the actual number // of bytes that are compared. Note that this is needed even if we call // `strcasecmp_orig` because we're passing it to `TraceMemCmp()`. @@ -251,8 +253,8 @@ extern "C" NO_SANITIZE int strcasecmp(const char* s1, const char* s2) { // strncasecmp interceptor. // Calls the real strncasecmp() and possibly modifies state.cmp_feature_set. -extern "C" NO_SANITIZE int strncasecmp(const char* s1, const char* s2, - size_t n) { +extern "C" FUZZTEST_NO_SANITIZE int strncasecmp(const char* s1, const char* s2, + size_t n) { // Find the length of the shorter string, as this determines the actual number // of bytes that are compared. Note that this is needed even if we call // `strncasecmp_orig` because we're passing it to `TraceMemCmp()`. diff --git a/centipede/sancov_state.cc b/centipede/sancov_state.cc index 25bcd7353..19570d6a1 100644 --- a/centipede/sancov_state.cc +++ b/centipede/sancov_state.cc @@ -38,6 +38,10 @@ __attribute__((weak)) extern fuzztest::internal::feature_t __attribute__((weak)) extern fuzztest::internal::feature_t __stop___centipede_extra_features; +// May be updated by sancov with -fsanitize-coverage=stack-depth. +__attribute__((visibility("default"))) +__attribute__((weak)) thread_local uintptr_t __sancov_lowest_stack; + namespace fuzztest::internal { ExplicitLifetime sancov_state; @@ -56,9 +60,8 @@ namespace { // // Must not be sanitized because sanitizers may trigger this on unsanitized // data, causing false positives and nested failures. -__attribute__((no_sanitize("all"))) size_t LengthOfCommonPrefix(const void* s1, - const void* s2, - size_t n) { +FUZZTEST_NO_SANITIZE size_t LengthOfCommonPrefix(const void* s1, const void* s2, + size_t n) { const auto *p1 = static_cast(s1); const auto *p2 = static_cast(s2); static constexpr size_t kMaxLen = feature_domains::kCMPScoreBitmask; @@ -122,7 +125,8 @@ void ThreadLocalSancovState::OnThreadStart() { // Always trace threads by default. Internal threads that do not want tracing // will set this to false later. tls.traced = true; - tls.lowest_sp = tls.top_frame_sp = + tls.sancov_lowest_sp = &__sancov_lowest_stack; + *tls.sancov_lowest_sp = tls.lowest_sp = tls.top_frame_sp = reinterpret_cast(__builtin_frame_address(0)); tls.stack_region_low = GetCurrentThreadStackRegionLow(); if (tls.stack_region_low == 0) { @@ -142,6 +146,13 @@ void ThreadLocalSancovState::OnThreadStart() { void ThreadLocalSancovState::OnThreadStop() { tls.traced = false; LockGuard lock(sancov_state->tls_list_mu); + const size_t sancov_lowest_sp = *tls.sancov_lowest_sp; + tls.sancov_lowest_sp = nullptr; + if (sancov_lowest_sp <= tls.top_frame_sp && + sancov_lowest_sp < tls.lowest_sp && + sancov_lowest_sp >= tls.stack_region_low && tls.stack_region_low > 0) { + tls.lowest_sp = sancov_lowest_sp; + } // Remove myself from state.tls_list. The list never // becomes empty because the main thread does not call OnThreadStop(). if (&tls == sancov_state->tls_list) { @@ -297,13 +308,17 @@ void MaybeAddFeature(feature_t feature) { void CleanUpSancovTls() { sancov_state->CleanUpDetachedTls(); - if (sancov_state->flags.path_level != 0) { - sancov_state->ForEachTls([](ThreadLocalSancovState& tls) { + sancov_state->ForEachTls([](ThreadLocalSancovState& tls) { + if (sancov_state->flags.path_level != 0) { tls.path_ring_buffer.Reset(sancov_state->flags.path_level); + } + if (sancov_state->flags.callstack_level != 0) { tls.call_stack.Reset(sancov_state->flags.callstack_level); - tls.lowest_sp = tls.top_frame_sp; - }); - } + } + RunnerCheck(tls.sancov_lowest_sp != nullptr, + "sancov_lowest_sp is null for a live thread"); + *tls.sancov_lowest_sp = tls.lowest_sp = tls.top_frame_sp; + }); } void PrepareSancov(bool full_clear) { @@ -439,10 +454,23 @@ void PostProcessSancov(bool reject_input) { // Iterate all threads and get features from TLS data. sancov_state->ForEachTls([&feature_handler](ThreadLocalSancovState& tls) { + RunnerCheck(tls.top_frame_sp >= tls.lowest_sp, + "bad values of tls.top_frame_sp and tls.lowest_sp"); + uintptr_t lowest_sp = tls.lowest_sp; + if (tls.sancov_lowest_sp != nullptr) { + const uintptr_t sancov_lowest_sp = *tls.sancov_lowest_sp; + if (sancov_lowest_sp <= tls.top_frame_sp && + sancov_lowest_sp <= lowest_sp && + sancov_lowest_sp >= tls.stack_region_low && + tls.stack_region_low > 0) { + lowest_sp = sancov_lowest_sp; + } + } + const size_t sp_diff = tls.top_frame_sp - lowest_sp; + if (CheckStackLimit != nullptr) { + CheckStackLimit(sp_diff, /*is_current_stack=*/false); + } if (sancov_state->flags.callstack_level != 0) { - RunnerCheck(tls.top_frame_sp >= tls.lowest_sp, - "bad values of tls.top_frame_sp and tls.lowest_sp"); - size_t sp_diff = tls.top_frame_sp - tls.lowest_sp; feature_handler(feature_domains::kCallStack.ConvertToMe(sp_diff)); } }); diff --git a/centipede/sancov_state.h b/centipede/sancov_state.h index 3afb6c4e1..85407e3de 100644 --- a/centipede/sancov_state.h +++ b/centipede/sancov_state.h @@ -110,8 +110,22 @@ struct ThreadLocalSancovState { uintptr_t top_frame_sp; // The lower bound of the stack region of this thread. 0 means unknown. uintptr_t stack_region_low; - // Lowest observed value of SP. + + // `lowest_sp` and `*sancov_lowest_sp` are read and written by both + // the current thread and the sancov processing thread. Thus race conditions + // may happen. We don't use mutex/atomic because they are slow (and it is not + // possible on sancov_lowest_sp). Instead we let race conditions happen and + // tolerate them with the best effort. An SP value is valid if and only if + // `stack_region_low <= SP <= top_frame_sp && stack_region_low > 0`. + // This should reject most bad values caused by race conditions. + // + // Lowest sp observed by the this library. uintptr_t lowest_sp; + // A pointer to the lowest sp updated by sancov (if enabled). It is constant + // and non-null when the thread is alive, and set to null when the thread + // is terminated, guarded by `state.tls_list_mu`. So no race conditions on + // the pointer itself. + uintptr_t* sancov_lowest_sp; // The (imprecise) call stack is updated by the PC callback. CallStack<> call_stack; @@ -296,8 +310,10 @@ SanCovRuntimeRawFeatureParts SanCovRuntimeGetFeatures(); // Gets the execution metadata gathered in `PostProcessSancov`. const ExecutionMetadata& SanCovRuntimeGetExecutionMetadata(); -// Check for stack limit for the stack pointer `sp` in the current thread. -__attribute__((weak)) void CheckStackLimit(uintptr_t sp); +// Check for stack limit for `stack_usage`, with `is_current_stack` set if it +// is for the current calling stack. +__attribute__((weak)) void CheckStackLimit(size_t stack_usage, + bool is_current_stack); extern ExplicitLifetime sancov_state; extern __thread ThreadLocalSancovState tls; diff --git a/centipede/weak_sancov_stubs.cc b/centipede/weak_sancov_stubs.cc index c9bfa63d8..66f78a66c 100644 --- a/centipede/weak_sancov_stubs.cc +++ b/centipede/weak_sancov_stubs.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include + #define WEAK_SANCOV_DEF(return_type, name, ...) \ extern "C" __attribute__((visibility("default"))) __attribute__((weak)) \ return_type \ @@ -42,3 +44,7 @@ WEAK_SANCOV_DEF(void, __sanitizer_cov_load2, void) {} WEAK_SANCOV_DEF(void, __sanitizer_cov_load4, void) {} WEAK_SANCOV_DEF(void, __sanitizer_cov_load8, void) {} WEAK_SANCOV_DEF(void, __sanitizer_cov_load16, void) {} + +WEAK_SANCOV_DEF(void, __sanitizer_cov_stack_depth, void) {} +extern "C" __attribute__((visibility("default"))) +__attribute__((weak)) thread_local uintptr_t __sancov_lowest_stack;