From 6142b58f137db78571f9ebad2e9efdf8edf8ea30 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 17 Mar 2026 12:53:55 -0400 Subject: [PATCH 1/3] gh-146041: Avoid lock in sys.intern() for already interned strings --- InternalDocs/string_interning.md | 10 ++------ ...-03-17-00-00-00.gh-issue-146041.7799bb.rst | 3 +++ Objects/object.c | 7 ------ Objects/unicodeobject.c | 24 +++++++++++++++++-- 4 files changed, 27 insertions(+), 17 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-03-17-00-00-00.gh-issue-146041.7799bb.rst diff --git a/InternalDocs/string_interning.md b/InternalDocs/string_interning.md index 26a5197c6e70f3..0913b1a3471ef4 100644 --- a/InternalDocs/string_interning.md +++ b/InternalDocs/string_interning.md @@ -52,15 +52,9 @@ The key and value of each entry in this dict reference the same object. ## Immortality and reference counting -Invariant: Every immortal string is interned. +In the GIL-enabled build interned strings may be mortal or immortal. In the +free-threaded build, interned strings are always immortal. -In practice, this means that you must not use `_Py_SetImmortal` on -a string. (If you know it's already immortal, don't immortalize it; -if you know it's not interned you might be immortalizing a redundant copy; -if it's interned and mortal it needs extra processing in -`_PyUnicode_InternImmortal`.) - -The converse is not true: interned strings can be mortal. For mortal interned strings: - the 2 references from the interned dict (key & value) are excluded from diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-03-17-00-00-00.gh-issue-146041.7799bb.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-17-00-00-00.gh-issue-146041.7799bb.rst new file mode 100644 index 00000000000000..812f023266bd76 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-17-00-00-00.gh-issue-146041.7799bb.rst @@ -0,0 +1,3 @@ +Fix free-threading scaling bottleneck in :func:`sys.intern` and +:c:func:`PyObject_SetAttr` by avoiding the interpreter-wide lock when the string +is already interned and immortalized. diff --git a/Objects/object.c b/Objects/object.c index e405963614689f..585f6da6f839ed 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2728,13 +2728,6 @@ _Py_NewReferenceNoTotal(PyObject *op) void _Py_SetImmortalUntracked(PyObject *op) { -#ifdef Py_DEBUG - // For strings, use _PyUnicode_InternImmortal instead. - if (PyUnicode_CheckExact(op)) { - assert(PyUnicode_CHECK_INTERNED(op) == SSTATE_INTERNED_IMMORTAL - || PyUnicode_CHECK_INTERNED(op) == SSTATE_INTERNED_IMMORTAL_STATIC); - } -#endif // Check if already immortal to avoid degrading from static immortal to plain immortal if (_Py_IsImmortal(op)) { return; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index dd2482ca653fb1..481172d8af5d43 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14188,8 +14188,11 @@ immortalize_interned(PyObject *s) _Py_DecRefTotal(_PyThreadState_GET()); } #endif - FT_ATOMIC_STORE_UINT8_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_IMMORTAL); _Py_SetImmortal(s); + // The switch to SSTATE_INTERNED_IMMORTAL must be the last thing done here + // to synchronize with the check in intern_common() that avoids locking if + // the string is already immortal. + FT_ATOMIC_STORE_UINT8(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_IMMORTAL); } static /* non-null */ PyObject* @@ -14271,6 +14274,23 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */, assert(interned != NULL); #ifdef Py_GIL_DISABLED # define INTERN_MUTEX &_Py_INTERP_CACHED_OBJECT(interp, interned_mutex) + // Lock-free fast path: check if there's already an interned copy that + // is in its final immortal state. + PyObject *r; + int res = PyDict_GetItemRef(interned, s, &r); + if (res < 0) { + PyErr_Clear(); + return s; + } + if (res > 0) { + unsigned int state = _Py_atomic_load_uint8(&_PyUnicode_STATE(r).interned); + if (state == SSTATE_INTERNED_IMMORTAL) { + Py_DECREF(s); + return r; + } + // Not yet fully interned; fall through to the locking path. + Py_DECREF(r); + } #endif FT_MUTEX_LOCK(INTERN_MUTEX); PyObject *t; @@ -14308,7 +14328,7 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */, Py_DECREF(s); Py_DECREF(s); } - FT_ATOMIC_STORE_UINT8_RELAXED(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_MORTAL); + FT_ATOMIC_STORE_UINT8(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_MORTAL); /* INTERNED_MORTAL -> INTERNED_IMMORTAL (if needed) */ From 4f977e04389c4a899b4f06b7a2e2ea2a6f89ae9c Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 17 Mar 2026 22:20:21 -0400 Subject: [PATCH 2/3] Add scaling benchmark and fix refcount contention on type --- Objects/object.c | 4 ++-- Tools/ftscalingbench/ftscalingbench.py | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/Objects/object.c b/Objects/object.c index 585f6da6f839ed..09d14d22d295a4 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1999,7 +1999,7 @@ _PyObject_GenericSetAttrWithDict(PyObject *obj, PyObject *name, } Py_INCREF(name); - Py_INCREF(tp); + _Py_INCREF_TYPE(tp); PyThreadState *tstate = _PyThreadState_GET(); _PyCStackRef cref; @@ -2074,7 +2074,7 @@ _PyObject_GenericSetAttrWithDict(PyObject *obj, PyObject *name, } done: _PyThreadState_PopCStackRef(tstate, &cref); - Py_DECREF(tp); + _Py_DECREF_TYPE(tp); Py_DECREF(name); return res; } diff --git a/Tools/ftscalingbench/ftscalingbench.py b/Tools/ftscalingbench/ftscalingbench.py index 8d8bbc88e7f30a..546f64b430ce83 100644 --- a/Tools/ftscalingbench/ftscalingbench.py +++ b/Tools/ftscalingbench/ftscalingbench.py @@ -264,6 +264,15 @@ def deepcopy(): for i in range(40 * WORK_SCALE): copy.deepcopy(x) +@register_benchmark +def setattr_non_interned(): + prefix = sys.implementation.name + obj = MyObject() + for _ in range(1000 * WORK_SCALE): + setattr(obj, f"{prefix}_a", None) + setattr(obj, f"{prefix}_b", None) + setattr(obj, f"{prefix}_c", None) + def bench_one_thread(func): t0 = time.perf_counter_ns() From 5ba9f631bebc80ad3150a31024fa7f5cc8663337 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 17 Mar 2026 22:22:03 -0400 Subject: [PATCH 3/3] Any string for prefix is fine --- Tools/ftscalingbench/ftscalingbench.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/ftscalingbench/ftscalingbench.py b/Tools/ftscalingbench/ftscalingbench.py index 546f64b430ce83..7e7b7bee471c76 100644 --- a/Tools/ftscalingbench/ftscalingbench.py +++ b/Tools/ftscalingbench/ftscalingbench.py @@ -266,7 +266,7 @@ def deepcopy(): @register_benchmark def setattr_non_interned(): - prefix = sys.implementation.name + prefix = "prefix" obj = MyObject() for _ in range(1000 * WORK_SCALE): setattr(obj, f"{prefix}_a", None)