From 3f3c57371b315e6454f233665202175d19c187cb Mon Sep 17 00:00:00 2001 From: Henry Schreiner Date: Thu, 11 Jun 2026 17:10:27 -0400 Subject: [PATCH] fix: data race on last_storage_ptr_ cache in gil_safe_call_once_and_store Under free-threaded CPython (Py_GIL_DISABLED) the GIL provides no mutual exclusion, so the plain pointer last_storage_ptr_ was read and written concurrently without synchronization, a C++ data race. Make it a std::atomic. Also reorder get_stored() to check is_last_storage_valid() before loading the cached pointer. The writer publishes the pointer before setting the validity flag, so the flag must be observed first for correct acquire/release ordering. Assisted-by: ClaudeCode:claude-fable-5 --- include/pybind11/gil_safe_call_once.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/include/pybind11/gil_safe_call_once.h b/include/pybind11/gil_safe_call_once.h index 770ed49998..af26272d48 100644 --- a/include/pybind11/gil_safe_call_once.h +++ b/include/pybind11/gil_safe_call_once.h @@ -193,6 +193,8 @@ class gil_safe_call_once_and_store { ::new (value->storage) T(fn()); value->finalize = finalize_fn; value->is_initialized = true; + // Publish the cached pointer before setting the validity flag, so that any reader + // which observes the flag as true is guaranteed to also observe this pointer. last_storage_ptr_ = reinterpret_cast(value->storage); is_initialized_by_at_least_one_interpreter_ = true; }); @@ -204,11 +206,17 @@ class gil_safe_call_once_and_store { // This must only be called after `call_once_and_store_result()` was called. T &get_stored() { - T *result = last_storage_ptr_; - if (!is_last_storage_valid()) { + T *result = nullptr; + // Check the validity flag *before* loading the cached pointer: the writer publishes the + // pointer before setting the flag, so observing the flag as true guarantees the load below + // sees the published pointer. + if (is_last_storage_valid()) { + result = last_storage_ptr_; + } else { gil_scoped_acquire gil_acq; auto *value = get_or_create_storage_in_state_dict(); - result = last_storage_ptr_ = reinterpret_cast(value->storage); + result = reinterpret_cast(value->storage); + last_storage_ptr_ = result; } assert(result != nullptr); return *result; @@ -260,10 +268,12 @@ class gil_safe_call_once_and_store { // Fast local cache to avoid repeated lookups when there are no multiple interpreters. // This is only valid if there is a single interpreter. Otherwise, it is not used. + // It is atomic because under free-threading (`Py_GIL_DISABLED`) the GIL provides no mutual + // exclusion, so this pointer may be read and written concurrently by multiple threads. // WARNING: We cannot use thread local cache similar to `internals_pp_manager::internals_p_tls` // because the thread local storage cannot be explicitly invalidated when interpreters // are destroyed (unlike `internals_pp_manager` which has explicit hooks for that). - T *last_storage_ptr_ = nullptr; + std::atomic last_storage_ptr_{nullptr}; // This flag is true if the value has been initialized by any interpreter (may not be the // current one). detail::atomic_bool is_initialized_by_at_least_one_interpreter_{false};