From 4c9f1aaf5870307efdd233538c28bdd090405560 Mon Sep 17 00:00:00 2001 From: abebus Date: Mon, 27 Oct 2025 23:21:36 +0300 Subject: [PATCH 01/24] this reuses SOME interned strings, but not `utf-8` and friends --- Objects/codeobject.c | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 3aea2038fd17e7..fc196b1886cc43 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -113,7 +113,7 @@ PyCode_ClearWatcher(int watcher_id) #define _PyCodeObject_CAST(op) (assert(PyCode_Check(op)), (PyCodeObject *)(op)) -static int +static inline int should_intern_string(PyObject *o) { #ifdef Py_GIL_DISABLED @@ -196,6 +196,22 @@ intern_strings(PyObject *tuple) return 0; } +static inline PyObject* +get_interned_string(PyObject *interned_dict, PyObject *s) { + if (!PyUnicode_CheckExact(s)) { + return NULL; + } + + PyObject *existing = PyDict_GetItemWithError(interned_dict, s); + if (existing == NULL) { + if (PyErr_Occurred()) { + return NULL; + } + return NULL; + } + return existing; +} + /* Intern constants. In the default build, this interns selected string constants. In the free-threaded build, this also interns non-string constants. */ @@ -203,10 +219,22 @@ static int intern_constants(PyObject *tuple, int *modified) { PyInterpreterState *interp = _PyInterpreterState_GET(); + PyObject *interned_dict = _Py_INTERP_CACHED_OBJECT(interp, interned_strings); for (Py_ssize_t i = PyTuple_GET_SIZE(tuple); --i >= 0; ) { PyObject *v = PyTuple_GET_ITEM(tuple, i); - if (PyUnicode_CheckExact(v)) { - if (should_intern_string(v)) { + if (PyUnicode_CheckExact(v) && PyUnicode_GET_LENGTH(v) > 1) { + if (PyUnicode_CHECK_INTERNED(v) != 0) { + continue; + } + PyObject *interned = get_interned_string(interned_dict, v); + if (interned != NULL && interned != v) { + Py_INCREF(interned); + PyTuple_SET_ITEM(tuple, i, interned); + Py_DECREF(v); + if (modified) { + *modified = 1; + } + } else if (should_intern_string(v)) { PyObject *w = v; _PyUnicode_InternMortal(interp, &v); if (w != v) { From d287cc6fe26de0d31e79f5c3c3fb672b2c637e23 Mon Sep 17 00:00:00 2001 From: "Albert Eduardovich N." Date: Tue, 28 Oct 2025 00:27:32 +0300 Subject: [PATCH 02/24] Update codeobject.c: - unnecessary check --- Objects/codeobject.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index fc196b1886cc43..b7dcdbb359204c 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -198,10 +198,6 @@ intern_strings(PyObject *tuple) static inline PyObject* get_interned_string(PyObject *interned_dict, PyObject *s) { - if (!PyUnicode_CheckExact(s)) { - return NULL; - } - PyObject *existing = PyDict_GetItemWithError(interned_dict, s); if (existing == NULL) { if (PyErr_Occurred()) { From d9eaaf6249cb0e4fd82bd1fa9df6fdf6b6ee1046 Mon Sep 17 00:00:00 2001 From: "Albert Eduardovich N." Date: Tue, 28 Oct 2025 07:16:51 +0000 Subject: [PATCH 03/24] correct error handling, refcount interned_dict --- Objects/codeobject.c | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index b7dcdbb359204c..95b20adf0da812 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -196,17 +196,6 @@ intern_strings(PyObject *tuple) return 0; } -static inline PyObject* -get_interned_string(PyObject *interned_dict, PyObject *s) { - PyObject *existing = PyDict_GetItemWithError(interned_dict, s); - if (existing == NULL) { - if (PyErr_Occurred()) { - return NULL; - } - return NULL; - } - return existing; -} /* Intern constants. In the default build, this interns selected string constants. In the free-threaded build, this also interns non-string @@ -216,13 +205,17 @@ intern_constants(PyObject *tuple, int *modified) { PyInterpreterState *interp = _PyInterpreterState_GET(); PyObject *interned_dict = _Py_INTERP_CACHED_OBJECT(interp, interned_strings); + Py_INCREF(interned_dict); for (Py_ssize_t i = PyTuple_GET_SIZE(tuple); --i >= 0; ) { PyObject *v = PyTuple_GET_ITEM(tuple, i); if (PyUnicode_CheckExact(v) && PyUnicode_GET_LENGTH(v) > 1) { if (PyUnicode_CHECK_INTERNED(v) != 0) { continue; } - PyObject *interned = get_interned_string(interned_dict, v); + PyObject *interned = PyDict_GetItemWithError(interned_dict, v); + if (interned == NULL && PyErr_Occurred()) { + goto error; + } if (interned != NULL && interned != v) { Py_INCREF(interned); PyTuple_SET_ITEM(tuple, i, interned); @@ -243,25 +236,25 @@ intern_constants(PyObject *tuple, int *modified) } else if (PyTuple_CheckExact(v)) { if (intern_constants(v, NULL) < 0) { - return -1; + goto error; } } else if (PyFrozenSet_CheckExact(v)) { PyObject *w = v; PyObject *tmp = PySequence_Tuple(v); if (tmp == NULL) { - return -1; + goto error; } int tmp_modified = 0; if (intern_constants(tmp, &tmp_modified) < 0) { Py_DECREF(tmp); - return -1; + goto error; } if (tmp_modified) { v = PyFrozenSet_New(tmp); if (v == NULL) { Py_DECREF(tmp); - return -1; + goto error; } PyTuple_SET_ITEM(tuple, i, v); @@ -277,7 +270,7 @@ intern_constants(PyObject *tuple, int *modified) PySliceObject *slice = (PySliceObject *)v; PyObject *tmp = PyTuple_New(3); if (tmp == NULL) { - return -1; + goto error; } PyTuple_SET_ITEM(tmp, 0, Py_NewRef(slice->start)); PyTuple_SET_ITEM(tmp, 1, Py_NewRef(slice->stop)); @@ -285,7 +278,7 @@ intern_constants(PyObject *tuple, int *modified) int tmp_modified = 0; if (intern_constants(tmp, &tmp_modified) < 0) { Py_DECREF(tmp); - return -1; + goto error; } if (tmp_modified) { v = PySlice_New(PyTuple_GET_ITEM(tmp, 0), @@ -293,7 +286,7 @@ intern_constants(PyObject *tuple, int *modified) PyTuple_GET_ITEM(tmp, 2)); if (v == NULL) { Py_DECREF(tmp); - return -1; + goto error; } PyTuple_SET_ITEM(tuple, i, v); Py_DECREF(slice); @@ -312,7 +305,7 @@ intern_constants(PyObject *tuple, int *modified) { PyObject *interned = intern_one_constant(v); if (interned == NULL) { - return -1; + goto error; } else if (interned != v) { PyTuple_SET_ITEM(tuple, i, interned); @@ -324,7 +317,12 @@ intern_constants(PyObject *tuple, int *modified) } #endif } + Py_DECREF(interned_dict); return 0; + +error: + Py_DECREF(interned_dict); + return -1; } /* Return a shallow copy of a tuple that is From 5a8b4ce87e1dffb635484b8f36ff6e1cab199440 Mon Sep 17 00:00:00 2001 From: "Albert Eduardovich N." Date: Fri, 19 Dec 2025 10:49:59 +0000 Subject: [PATCH 04/24] get interned strings from another dict with interned strings? --- Objects/codeobject.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 95b20adf0da812..606b5d386bc8a2 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -25,6 +25,9 @@ #define INITIAL_SPECIALIZED_CODE_SIZE 16 +// copypaste from unicodeobject.c +#define INTERNED_STRINGS _PyRuntime.cached_objects.interned_strings + static const char * code_event_name(PyCodeEvent event) { switch (event) { @@ -196,7 +199,6 @@ intern_strings(PyObject *tuple) return 0; } - /* Intern constants. In the default build, this interns selected string constants. In the free-threaded build, this also interns non-string constants. */ @@ -205,6 +207,11 @@ intern_constants(PyObject *tuple, int *modified) { PyInterpreterState *interp = _PyInterpreterState_GET(); PyObject *interned_dict = _Py_INTERP_CACHED_OBJECT(interp, interned_strings); + // copypaste from unicodeobject.c +#ifdef Py_GIL_DISABLED +# define INTERN_MUTEX &_Py_INTERP_CACHED_OBJECT(interp, interned_mutex) +#endif + FT_MUTEX_LOCK(INTERN_MUTEX); Py_INCREF(interned_dict); for (Py_ssize_t i = PyTuple_GET_SIZE(tuple); --i >= 0; ) { PyObject *v = PyTuple_GET_ITEM(tuple, i); @@ -212,10 +219,17 @@ intern_constants(PyObject *tuple, int *modified) if (PyUnicode_CHECK_INTERNED(v) != 0) { continue; } + // PyObject *interned = PyDict_GetItemWithError(interned_dict, v); if (interned == NULL && PyErr_Occurred()) { goto error; } + if (!interned) { + interned = (PyObject *)_Py_hashtable_get(INTERNED_STRINGS, v); + if (interned == NULL && PyErr_Occurred()) { + goto error; + } + } if (interned != NULL && interned != v) { Py_INCREF(interned); PyTuple_SET_ITEM(tuple, i, interned); @@ -317,10 +331,12 @@ intern_constants(PyObject *tuple, int *modified) } #endif } + FT_MUTEX_UNLOCK(INTERN_MUTEX); Py_DECREF(interned_dict); return 0; error: + FT_MUTEX_UNLOCK(INTERN_MUTEX); Py_DECREF(interned_dict); return -1; } From 7ab67c422c2c132562c5009ca6205f24ebe59057 Mon Sep 17 00:00:00 2001 From: "Albert Eduardovich N." Date: Fri, 19 Dec 2025 11:39:26 +0000 Subject: [PATCH 05/24] [SKIP CI] cancelling previous workflow From 4a9e55e417f86959d10732c1008a41be665fd55a Mon Sep 17 00:00:00 2001 From: "Albert Eduardovich N." Date: Fri, 19 Dec 2025 11:46:38 +0000 Subject: [PATCH 06/24] deadlock --- Objects/codeobject.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 606b5d386bc8a2..4a7758a8379f9f 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -206,12 +206,9 @@ static int intern_constants(PyObject *tuple, int *modified) { PyInterpreterState *interp = _PyInterpreterState_GET(); - PyObject *interned_dict = _Py_INTERP_CACHED_OBJECT(interp, interned_strings); // copypaste from unicodeobject.c -#ifdef Py_GIL_DISABLED -# define INTERN_MUTEX &_Py_INTERP_CACHED_OBJECT(interp, interned_mutex) -#endif - FT_MUTEX_LOCK(INTERN_MUTEX); + PyObject *interned_dict = _Py_INTERP_CACHED_OBJECT(interp, interned_strings); + Py_BEGIN_CRITICAL_SECTION(interned_dict); Py_INCREF(interned_dict); for (Py_ssize_t i = PyTuple_GET_SIZE(tuple); --i >= 0; ) { PyObject *v = PyTuple_GET_ITEM(tuple, i); @@ -219,7 +216,6 @@ intern_constants(PyObject *tuple, int *modified) if (PyUnicode_CHECK_INTERNED(v) != 0) { continue; } - // PyObject *interned = PyDict_GetItemWithError(interned_dict, v); if (interned == NULL && PyErr_Occurred()) { goto error; @@ -331,12 +327,12 @@ intern_constants(PyObject *tuple, int *modified) } #endif } - FT_MUTEX_UNLOCK(INTERN_MUTEX); + Py_END_CRITICAL_SECTION(); Py_DECREF(interned_dict); return 0; error: - FT_MUTEX_UNLOCK(INTERN_MUTEX); + Py_END_CRITICAL_SECTION(); Py_DECREF(interned_dict); return -1; } From bc861a6aa3cffcc7e205c02fc439ca16d2489cc9 Mon Sep 17 00:00:00 2001 From: "Albert Eduardovich N." Date: Fri, 19 Dec 2025 11:56:46 +0000 Subject: [PATCH 07/24] fix usage of Py_BEGIN_CRITICAL_SECTION --- Objects/codeobject.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 4a7758a8379f9f..1b632dabee154e 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -206,9 +206,7 @@ static int intern_constants(PyObject *tuple, int *modified) { PyInterpreterState *interp = _PyInterpreterState_GET(); - // copypaste from unicodeobject.c PyObject *interned_dict = _Py_INTERP_CACHED_OBJECT(interp, interned_strings); - Py_BEGIN_CRITICAL_SECTION(interned_dict); Py_INCREF(interned_dict); for (Py_ssize_t i = PyTuple_GET_SIZE(tuple); --i >= 0; ) { PyObject *v = PyTuple_GET_ITEM(tuple, i); @@ -216,7 +214,10 @@ intern_constants(PyObject *tuple, int *modified) if (PyUnicode_CHECK_INTERNED(v) != 0) { continue; } - PyObject *interned = PyDict_GetItemWithError(interned_dict, v); + PyObject *interned; + Py_BEGIN_CRITICAL_SECTION(interned_dict); + interned = PyDict_GetItemWithError(interned_dict, v); + Py_END_CRITICAL_SECTION(); if (interned == NULL && PyErr_Occurred()) { goto error; } @@ -327,12 +328,10 @@ intern_constants(PyObject *tuple, int *modified) } #endif } - Py_END_CRITICAL_SECTION(); Py_DECREF(interned_dict); return 0; error: - Py_END_CRITICAL_SECTION(); Py_DECREF(interned_dict); return -1; } From 45129f88dd1b54da91f5ab8a34592a5e5eb4445d Mon Sep 17 00:00:00 2001 From: "Albert Eduardovich N." Date: Fri, 19 Dec 2025 12:23:51 +0000 Subject: [PATCH 08/24] just guessing --- Objects/codeobject.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 1b632dabee154e..99e86fc298590c 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -214,10 +214,8 @@ intern_constants(PyObject *tuple, int *modified) if (PyUnicode_CHECK_INTERNED(v) != 0) { continue; } - PyObject *interned; Py_BEGIN_CRITICAL_SECTION(interned_dict); - interned = PyDict_GetItemWithError(interned_dict, v); - Py_END_CRITICAL_SECTION(); + PyObject *interned = PyDict_GetItemWithError(interned_dict, v); if (interned == NULL && PyErr_Occurred()) { goto error; } @@ -244,6 +242,7 @@ intern_constants(PyObject *tuple, int *modified) } } } + Py_END_CRITICAL_SECTION(); } else if (PyTuple_CheckExact(v)) { if (intern_constants(v, NULL) < 0) { From 485414cca0498d47c5f3fd96638f8e7746958c60 Mon Sep 17 00:00:00 2001 From: abebus Date: Sat, 20 Dec 2025 20:44:36 +0300 Subject: [PATCH 09/24] ft build interns and immortilizes everything anyway --- Include/internal/pycore_unicodeobject.h | 4 ++++ Objects/codeobject.c | 16 ++++++++++------ Objects/unicodeobject.c | 5 ----- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index 97dda73f9b584d..9a404b6b38677e 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -15,6 +15,10 @@ extern "C" { // Maximum code point of Unicode 6.0: 0x10ffff (1,114,111). #define _Py_MAX_UNICODE 0x10ffff +/* This hashtable holds statically allocated interned strings. + * See InternalDocs/string_interning.md for details. + */ +#define INTERNED_STRINGS _PyRuntime.cached_objects.interned_strings extern int _PyUnicode_IsModifiable(PyObject *unicode); extern void _PyUnicodeWriter_InitWithBuffer( diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 99e86fc298590c..e8b518e61ac9e3 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -25,9 +25,6 @@ #define INITIAL_SPECIALIZED_CODE_SIZE 16 -// copypaste from unicodeobject.c -#define INTERNED_STRINGS _PyRuntime.cached_objects.interned_strings - static const char * code_event_name(PyCodeEvent event) { switch (event) { @@ -206,15 +203,17 @@ static int intern_constants(PyObject *tuple, int *modified) { PyInterpreterState *interp = _PyInterpreterState_GET(); +#if !defined(Py_GIL_DISABLED) PyObject *interned_dict = _Py_INTERP_CACHED_OBJECT(interp, interned_strings); Py_INCREF(interned_dict); +#endif for (Py_ssize_t i = PyTuple_GET_SIZE(tuple); --i >= 0; ) { PyObject *v = PyTuple_GET_ITEM(tuple, i); if (PyUnicode_CheckExact(v) && PyUnicode_GET_LENGTH(v) > 1) { if (PyUnicode_CHECK_INTERNED(v) != 0) { continue; } - Py_BEGIN_CRITICAL_SECTION(interned_dict); +#if !defined(Py_GIL_DISABLED) PyObject *interned = PyDict_GetItemWithError(interned_dict, v); if (interned == NULL && PyErr_Occurred()) { goto error; @@ -232,7 +231,9 @@ intern_constants(PyObject *tuple, int *modified) if (modified) { *modified = 1; } - } else if (should_intern_string(v)) { + } else +#endif + if (should_intern_string(v)) { PyObject *w = v; _PyUnicode_InternMortal(interp, &v); if (w != v) { @@ -242,7 +243,6 @@ intern_constants(PyObject *tuple, int *modified) } } } - Py_END_CRITICAL_SECTION(); } else if (PyTuple_CheckExact(v)) { if (intern_constants(v, NULL) < 0) { @@ -327,11 +327,15 @@ intern_constants(PyObject *tuple, int *modified) } #endif } +#if !defined(Py_GIL_DISABLED) Py_DECREF(interned_dict); +#endif return 0; error: +#if !defined(Py_GIL_DISABLED) Py_DECREF(interned_dict); +#endif return -1; } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index f737a885f197a0..db11a19d8c2bfc 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -215,11 +215,6 @@ static inline PyObject *get_interned_dict(PyInterpreterState *interp) return _Py_INTERP_CACHED_OBJECT(interp, interned_strings); } -/* This hashtable holds statically allocated interned strings. - * See InternalDocs/string_interning.md for details. - */ -#define INTERNED_STRINGS _PyRuntime.cached_objects.interned_strings - /* Get number of all interned strings for the current interpreter. */ Py_ssize_t _PyUnicode_InternedSize(void) From cc63fa2a4c37b6f8224a36bacf8637d273a868f0 Mon Sep 17 00:00:00 2001 From: abebus Date: Sun, 21 Dec 2025 00:48:16 +0300 Subject: [PATCH 10/24] initial tests --- Lib/test/test_code.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index 655f5a9be7fa31..d0faca89f9d993 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -1168,6 +1168,34 @@ def test_stateless(self): def isinterned(s): return s is sys.intern(('_' + s + '_')[1:-1]) +# copypaste from 'Tools/build/generate_global_objects.py' +import os +import re +from pathlib import Path +ROOT = Path(__file__).resolve().parents[2] +def iter_files(): + for name in ('Modules', 'Objects', 'Parser', 'PC', 'Programs', 'Python'): + root = os.path.join(ROOT, name) + for dirname, _, files in os.walk(root): + for name in files: + if not name.endswith(('.c', '.h')): + continue + yield os.path.join(dirname, name) + +def iter_global_strings(): + str_regex = re.compile(r'\b_Py_DECLARE_STR\((\w+), "(.*?)"\)') + for filename in iter_files(): + try: + infile = open(filename, encoding='utf-8') + except FileNotFoundError: + # The file must have been a temporary file. + continue + with infile: + for lno, line in enumerate(infile, 1): + for m in str_regex.finditer(line): + varname, string = m.groups() + yield string + class CodeConstsTest(unittest.TestCase): def find_const(self, consts, value): @@ -1251,6 +1279,10 @@ class MyInt(int): self.assertIsInstance(code.co_consts[1], Unhashable) self.assertEqual(code.co_consts[2], code.co_consts[3]) + @cpython_only + def test__Py_DECLARE_STR_is_interned(self): + for global_string in iter_global_strings(): + self.assertIsInterned(global_string) class CodeWeakRefTest(unittest.TestCase): From ad6af24e7093539757db6daae9f631fb8665c3e6 Mon Sep 17 00:00:00 2001 From: abebus Date: Sun, 21 Dec 2025 12:34:01 +0300 Subject: [PATCH 11/24] fix tests --- Lib/test/test_code.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index d0faca89f9d993..39d6e8cbb51130 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -1282,8 +1282,15 @@ class MyInt(int): @cpython_only def test__Py_DECLARE_STR_is_interned(self): for global_string in iter_global_strings(): + # compile given string to a codeobject + global_string = eval(f"'{global_string}'") self.assertIsInterned(global_string) + @cpython_only + def test_non_internable_strings_not_interned(self): + self.assertIsNotInterned("not-internable") + self.assertIsNotInterned("not.internable") + class CodeWeakRefTest(unittest.TestCase): def test_basic(self): From 903cc962fe96fceddc95168af411ce98f49fe76b Mon Sep 17 00:00:00 2001 From: abebus Date: Sun, 21 Dec 2025 23:53:50 +0300 Subject: [PATCH 12/24] fix tests --- Lib/test/test_code.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index 39d6e8cbb51130..9bda4bae61ab2c 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -1280,6 +1280,7 @@ class MyInt(int): self.assertEqual(code.co_consts[2], code.co_consts[3]) @cpython_only + @unittest.skipIf(Py_GIL_DISABLED, "free-threaded build interns all string constants") def test__Py_DECLARE_STR_is_interned(self): for global_string in iter_global_strings(): # compile given string to a codeobject @@ -1287,6 +1288,7 @@ def test__Py_DECLARE_STR_is_interned(self): self.assertIsInterned(global_string) @cpython_only + @unittest.skipIf(Py_GIL_DISABLED, "free-threaded build interns all string constants") def test_non_internable_strings_not_interned(self): self.assertIsNotInterned("not-internable") self.assertIsNotInterned("not.internable") From 9ec2e09cf9fe76beb5082e88eaccba91a961bc9d Mon Sep 17 00:00:00 2001 From: abebus Date: Mon, 22 Dec 2025 00:36:09 +0300 Subject: [PATCH 13/24] global cache first --- Objects/codeobject.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index e8b518e61ac9e3..1045d5027c59c9 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -214,15 +214,10 @@ intern_constants(PyObject *tuple, int *modified) continue; } #if !defined(Py_GIL_DISABLED) - PyObject *interned = PyDict_GetItemWithError(interned_dict, v); - if (interned == NULL && PyErr_Occurred()) { - goto error; - } - if (!interned) { - interned = (PyObject *)_Py_hashtable_get(INTERNED_STRINGS, v); - if (interned == NULL && PyErr_Occurred()) { - goto error; - } + PyObject *interned = _Py_hashtable_get(INTERNED_STRINGS, v); + if (interned == NULL) { + interned = PyDict_GetItemWithError(interned_dict, v); + if (PyErr_Occurred()) goto error; } if (interned != NULL && interned != v) { Py_INCREF(interned); From a1655f1fa396f164d0c78f05f496af19717349a8 Mon Sep 17 00:00:00 2001 From: "Albert Eduardovich N." Date: Mon, 22 Dec 2025 11:02:16 +0000 Subject: [PATCH 14/24] reorginize tests --- Lib/test/support/constants_helper.py | 22 ++++++++++++ Lib/test/support/project_files_helper.py | 21 +++++++++++ Lib/test/test_code.py | 46 +++++++----------------- 3 files changed, 56 insertions(+), 33 deletions(-) create mode 100644 Lib/test/support/constants_helper.py create mode 100644 Lib/test/support/project_files_helper.py diff --git a/Lib/test/support/constants_helper.py b/Lib/test/support/constants_helper.py new file mode 100644 index 00000000000000..bac33fca9bc584 --- /dev/null +++ b/Lib/test/support/constants_helper.py @@ -0,0 +1,22 @@ +import re +from pathlib import Path + +from typing import Iterable + +from test.support.project_files_helper import iter_all_c_files + + +def iter_global_strings() -> Iterable[str]: + id_regex = re.compile(r"\b_Py_ID\((\w+)\)") + str_regex = re.compile(r'\b_Py_DECLARE_STR\((?:\w+), "(.*?)"\)') + for filename in iter_all_c_files(): + infile = Path(filename) + if not infile.exists(): + # The file must have been a temporary file. + continue + with infile.open(encoding="utf-8") as infile_open: + for line in infile_open: + for m in id_regex.finditer(line): + yield m.group(1) + for m in str_regex.finditer(line): + yield m.group(1) diff --git a/Lib/test/support/project_files_helper.py b/Lib/test/support/project_files_helper.py new file mode 100644 index 00000000000000..ed24df1603b604 --- /dev/null +++ b/Lib/test/support/project_files_helper.py @@ -0,0 +1,21 @@ +from pathlib import Path + +from typing import Iterable + +python_root = Path(__file__).resolve().parents[3] + + +def iter_all_c_files() -> Iterable[Path]: + for top_directory_name in ( + "Modules", + "Objects", + "Parser", + "PC", + "Programs", + "Python", + ): + for dirname, _, files in (python_root / top_directory_name).walk(): + for name in files: + if not name.endswith((".c", ".h")): + continue + yield dirname / name diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index 9bda4bae61ab2c..43eacafceef2ad 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -204,6 +204,8 @@ import weakref import dis +from test.support.constants_helper import iter_global_strings + try: import ctypes except ImportError: @@ -1168,34 +1170,6 @@ def test_stateless(self): def isinterned(s): return s is sys.intern(('_' + s + '_')[1:-1]) -# copypaste from 'Tools/build/generate_global_objects.py' -import os -import re -from pathlib import Path -ROOT = Path(__file__).resolve().parents[2] -def iter_files(): - for name in ('Modules', 'Objects', 'Parser', 'PC', 'Programs', 'Python'): - root = os.path.join(ROOT, name) - for dirname, _, files in os.walk(root): - for name in files: - if not name.endswith(('.c', '.h')): - continue - yield os.path.join(dirname, name) - -def iter_global_strings(): - str_regex = re.compile(r'\b_Py_DECLARE_STR\((\w+), "(.*?)"\)') - for filename in iter_files(): - try: - infile = open(filename, encoding='utf-8') - except FileNotFoundError: - # The file must have been a temporary file. - continue - with infile: - for lno, line in enumerate(infile, 1): - for m in str_regex.finditer(line): - varname, string = m.groups() - yield string - class CodeConstsTest(unittest.TestCase): def find_const(self, consts, value): @@ -1283,15 +1257,21 @@ class MyInt(int): @unittest.skipIf(Py_GIL_DISABLED, "free-threaded build interns all string constants") def test__Py_DECLARE_STR_is_interned(self): for global_string in iter_global_strings(): - # compile given string to a codeobject - global_string = eval(f"'{global_string}'") - self.assertIsInterned(global_string) + with self.subTest(global_string=global_string): + self.assertIsInterned(eval(f"'{global_string}'")) @cpython_only @unittest.skipIf(Py_GIL_DISABLED, "free-threaded build interns all string constants") def test_non_internable_strings_not_interned(self): - self.assertIsNotInterned("not-internable") - self.assertIsNotInterned("not.internable") + noninternable_strings = ( + "not-internable", + "not.internable", + "не_интернируемый", + "􀀀", + ) + for noninternable in noninternable_strings: + with self.subTest(noninternable=noninternable): + self.assertIsNotInterned(eval(f"'{noninternable}'")) class CodeWeakRefTest(unittest.TestCase): From 8863b2e2327457bbe666ac98469421b4e44d5f20 Mon Sep 17 00:00:00 2001 From: "Albert Eduardovich N." Date: Mon, 22 Dec 2025 12:13:34 +0000 Subject: [PATCH 15/24] unnecessary, but pretty --- Objects/codeobject.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 1045d5027c59c9..b14a8ff4d04b83 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -196,6 +196,8 @@ intern_strings(PyObject *tuple) return 0; } +#define _constants_tuple_modified(modified) if (modified) *modified = 1 + /* Intern constants. In the default build, this interns selected string constants. In the free-threaded build, this also interns non-string constants. */ @@ -223,9 +225,7 @@ intern_constants(PyObject *tuple, int *modified) Py_INCREF(interned); PyTuple_SET_ITEM(tuple, i, interned); Py_DECREF(v); - if (modified) { - *modified = 1; - } + _constants_tuple_modified(modified); } else #endif if (should_intern_string(v)) { @@ -233,9 +233,7 @@ intern_constants(PyObject *tuple, int *modified) _PyUnicode_InternMortal(interp, &v); if (w != v) { PyTuple_SET_ITEM(tuple, i, v); - if (modified) { - *modified = 1; - } + _constants_tuple_modified(modified); } } } @@ -264,9 +262,7 @@ intern_constants(PyObject *tuple, int *modified) PyTuple_SET_ITEM(tuple, i, v); Py_DECREF(w); - if (modified) { - *modified = 1; - } + _constants_tuple_modified(modified); } Py_DECREF(tmp); } @@ -295,9 +291,7 @@ intern_constants(PyObject *tuple, int *modified) } PyTuple_SET_ITEM(tuple, i, v); Py_DECREF(slice); - if (modified) { - *modified = 1; - } + _constants_tuple_modified(modified); } Py_DECREF(tmp); } @@ -315,9 +309,7 @@ intern_constants(PyObject *tuple, int *modified) else if (interned != v) { PyTuple_SET_ITEM(tuple, i, interned); Py_SETREF(v, interned); - if (modified) { - *modified = 1; - } + _constants_tuple_modified(modified); } } #endif From 1df49ee789f3f77a4ac25a334d45871810adba4e Mon Sep 17 00:00:00 2001 From: "Albert Eduardovich N." Date: Mon, 22 Dec 2025 12:28:34 +0000 Subject: [PATCH 16/24] move dicts of interned strings to appropriate section in header --- Include/internal/pycore_unicodeobject.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index 9a404b6b38677e..fd097a0ef75c7e 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -10,15 +10,12 @@ extern "C" { #include "pycore_fileutils.h" // _Py_error_handler #include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI +#include "pycore_global_objects.h"// _Py_INTERP_CACHED_OBJECT // Maximum code point of Unicode 6.0: 0x10ffff (1,114,111). #define _Py_MAX_UNICODE 0x10ffff -/* This hashtable holds statically allocated interned strings. - * See InternalDocs/string_interning.md for details. - */ -#define INTERNED_STRINGS _PyRuntime.cached_objects.interned_strings extern int _PyUnicode_IsModifiable(PyObject *unicode); extern void _PyUnicodeWriter_InitWithBuffer( @@ -362,6 +359,19 @@ extern PyTypeObject _PyUnicodeASCIIIter_Type; // All these are "ref-neutral", like the public PyUnicode_InternInPlace. +/* This hashtable holds statically allocated interned strings. + * See InternalDocs/string_interning.md for details. + */ +#define INTERNED_STRINGS _PyRuntime.cached_objects.interned_strings + +/* This dictionary holds per-interpreter interned strings. + * See InternalDocs/string_interning.md for details. + */ +static inline PyObject *get_interned_dict(PyInterpreterState *interp) +{ + return _Py_INTERP_CACHED_OBJECT(interp, interned_strings); +} + // Explicit interning routines: PyAPI_FUNC(void) _PyUnicode_InternMortal(PyInterpreterState *interp, PyObject **); PyAPI_FUNC(void) _PyUnicode_InternImmortal(PyInterpreterState *interp, PyObject **); From 98ac326f14f548045f27e50aaa12781ea214c151 Mon Sep 17 00:00:00 2001 From: "Albert Eduardovich N." Date: Mon, 22 Dec 2025 12:29:34 +0000 Subject: [PATCH 17/24] add notes in comments that this is copypaste --- Lib/test/support/constants_helper.py | 1 + Lib/test/support/project_files_helper.py | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Lib/test/support/constants_helper.py b/Lib/test/support/constants_helper.py index bac33fca9bc584..5dbf418110a325 100644 --- a/Lib/test/support/constants_helper.py +++ b/Lib/test/support/constants_helper.py @@ -6,6 +6,7 @@ from test.support.project_files_helper import iter_all_c_files +# copypaste from 'Tools/build/generate_global_objects.py' def iter_global_strings() -> Iterable[str]: id_regex = re.compile(r"\b_Py_ID\((\w+)\)") str_regex = re.compile(r'\b_Py_DECLARE_STR\((?:\w+), "(.*?)"\)') diff --git a/Lib/test/support/project_files_helper.py b/Lib/test/support/project_files_helper.py index ed24df1603b604..59f278821e1702 100644 --- a/Lib/test/support/project_files_helper.py +++ b/Lib/test/support/project_files_helper.py @@ -2,9 +2,10 @@ from typing import Iterable -python_root = Path(__file__).resolve().parents[3] +ROOT = Path(__file__).resolve().parents[3] +# copypaste from 'Tools/build/generate_global_objects.py' def iter_all_c_files() -> Iterable[Path]: for top_directory_name in ( "Modules", @@ -14,7 +15,7 @@ def iter_all_c_files() -> Iterable[Path]: "Programs", "Python", ): - for dirname, _, files in (python_root / top_directory_name).walk(): + for dirname, _, files in (ROOT / top_directory_name).walk(): for name in files: if not name.endswith((".c", ".h")): continue From 0c6d4501727dbd86631faf08be054bba294b4939 Mon Sep 17 00:00:00 2001 From: "Albert Eduardovich N." Date: Mon, 22 Dec 2025 12:29:47 +0000 Subject: [PATCH 18/24] move import --- Lib/test/test_code.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index 43eacafceef2ad..b8b7b16fad3417 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -204,8 +204,6 @@ import weakref import dis -from test.support.constants_helper import iter_global_strings - try: import ctypes except ImportError: @@ -213,6 +211,7 @@ from test.support import (cpython_only, check_impl_detail, requires_debug_ranges, gc_collect, Py_GIL_DISABLED) +from test.support.constants_helper import iter_global_strings from test.support.script_helper import assert_python_ok from test.support import threading_helper, import_helper from test.support.bytecode_helper import instructions_with_positions From 26fe5d5c50ef53c7526219ee9642804f947db943 Mon Sep 17 00:00:00 2001 From: "Albert Eduardovich N." Date: Mon, 22 Dec 2025 12:30:04 +0000 Subject: [PATCH 19/24] move dicts of interned strings to appropriate section in header --- Objects/unicodeobject.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index db11a19d8c2bfc..711f828eb603e5 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -207,13 +207,6 @@ _PyUnicode_GetEmpty(void) return &_Py_STR(empty); } -/* This dictionary holds per-interpreter interned strings. - * See InternalDocs/string_interning.md for details. - */ -static inline PyObject *get_interned_dict(PyInterpreterState *interp) -{ - return _Py_INTERP_CACHED_OBJECT(interp, interned_strings); -} /* Get number of all interned strings for the current interpreter. */ Py_ssize_t From f10e201851612660aa7a5feb3b73d145263433e9 Mon Sep 17 00:00:00 2001 From: "Albert Eduardovich N." Date: Mon, 22 Dec 2025 12:30:56 +0000 Subject: [PATCH 20/24] reuse `get_interned_dict` from header, and do not refcount it --- Objects/codeobject.c | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index b14a8ff4d04b83..5c4eb2a2fb3225 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -206,8 +206,7 @@ intern_constants(PyObject *tuple, int *modified) { PyInterpreterState *interp = _PyInterpreterState_GET(); #if !defined(Py_GIL_DISABLED) - PyObject *interned_dict = _Py_INTERP_CACHED_OBJECT(interp, interned_strings); - Py_INCREF(interned_dict); + PyObject *interned_dict = get_interned_dict(interp); #endif for (Py_ssize_t i = PyTuple_GET_SIZE(tuple); --i >= 0; ) { PyObject *v = PyTuple_GET_ITEM(tuple, i); @@ -219,7 +218,7 @@ intern_constants(PyObject *tuple, int *modified) PyObject *interned = _Py_hashtable_get(INTERNED_STRINGS, v); if (interned == NULL) { interned = PyDict_GetItemWithError(interned_dict, v); - if (PyErr_Occurred()) goto error; + if (PyErr_Occurred()) return -1; } if (interned != NULL && interned != v) { Py_INCREF(interned); @@ -239,25 +238,25 @@ intern_constants(PyObject *tuple, int *modified) } else if (PyTuple_CheckExact(v)) { if (intern_constants(v, NULL) < 0) { - goto error; + return -1; } } else if (PyFrozenSet_CheckExact(v)) { PyObject *w = v; PyObject *tmp = PySequence_Tuple(v); if (tmp == NULL) { - goto error; + return -1; } int tmp_modified = 0; if (intern_constants(tmp, &tmp_modified) < 0) { Py_DECREF(tmp); - goto error; + return -1; } if (tmp_modified) { v = PyFrozenSet_New(tmp); if (v == NULL) { Py_DECREF(tmp); - goto error; + return -1; } PyTuple_SET_ITEM(tuple, i, v); @@ -271,7 +270,7 @@ intern_constants(PyObject *tuple, int *modified) PySliceObject *slice = (PySliceObject *)v; PyObject *tmp = PyTuple_New(3); if (tmp == NULL) { - goto error; + return -1; } PyTuple_SET_ITEM(tmp, 0, Py_NewRef(slice->start)); PyTuple_SET_ITEM(tmp, 1, Py_NewRef(slice->stop)); @@ -279,7 +278,7 @@ intern_constants(PyObject *tuple, int *modified) int tmp_modified = 0; if (intern_constants(tmp, &tmp_modified) < 0) { Py_DECREF(tmp); - goto error; + return -1; } if (tmp_modified) { v = PySlice_New(PyTuple_GET_ITEM(tmp, 0), @@ -287,7 +286,7 @@ intern_constants(PyObject *tuple, int *modified) PyTuple_GET_ITEM(tmp, 2)); if (v == NULL) { Py_DECREF(tmp); - goto error; + return -1; } PyTuple_SET_ITEM(tuple, i, v); Py_DECREF(slice); @@ -304,7 +303,7 @@ intern_constants(PyObject *tuple, int *modified) { PyObject *interned = intern_one_constant(v); if (interned == NULL) { - goto error; + return -1; } else if (interned != v) { PyTuple_SET_ITEM(tuple, i, interned); @@ -314,16 +313,7 @@ intern_constants(PyObject *tuple, int *modified) } #endif } -#if !defined(Py_GIL_DISABLED) - Py_DECREF(interned_dict); -#endif return 0; - -error: -#if !defined(Py_GIL_DISABLED) - Py_DECREF(interned_dict); -#endif - return -1; } /* Return a shallow copy of a tuple that is From 9cec5f2720a81ddd03ccebaeff60f41c49c433d3 Mon Sep 17 00:00:00 2001 From: abebus Date: Tue, 23 Dec 2025 00:45:13 +0300 Subject: [PATCH 21/24] add test, confuse myself even more --- Lib/test/test_code.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index b8b7b16fad3417..baca016bf410cf 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -1259,19 +1259,32 @@ def test__Py_DECLARE_STR_is_interned(self): with self.subTest(global_string=global_string): self.assertIsInterned(eval(f"'{global_string}'")) + noninternable_by_default = textwrap.dedent(''' + not-internable + not.internable + не_интернируемый + str with spaces + ''' + '\U00100000') + @cpython_only @unittest.skipIf(Py_GIL_DISABLED, "free-threaded build interns all string constants") def test_non_internable_strings_not_interned(self): - noninternable_strings = ( - "not-internable", - "not.internable", - "не_интернируемый", - "􀀀", - ) - for noninternable in noninternable_strings: + for noninternable in self.noninternable_by_default.strip().splitlines(): with self.subTest(noninternable=noninternable): self.assertIsNotInterned(eval(f"'{noninternable}'")) + @cpython_only + @unittest.skipIf(Py_GIL_DISABLED, "free-threaded build interns all string constants") + def test_explicitly_interned_strings(self): + for noninternable in self.noninternable_by_default.strip().splitlines(): + self.assertIsNotInterned(noninternable) + sys.intern(noninternable) + with self.subTest(noninternable=noninternable): + self.assertIsInterned(noninternable) + interned_from_code = eval(f"'{noninternable}'") + self.assertIsInterned(interned_from_code) + self.assertIs(noninternable, interned_from_code) + class CodeWeakRefTest(unittest.TestCase): def test_basic(self): From 4d9f06885ca10e9eacb73aada694712a04c77588 Mon Sep 17 00:00:00 2001 From: abebus Date: Tue, 23 Dec 2025 01:13:34 +0300 Subject: [PATCH 22/24] why --- Lib/test/test_code.py | 7 +++++-- Objects/codeobject.c | 5 +---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index baca016bf410cf..fd5e09edfddfe3 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -1259,12 +1259,15 @@ def test__Py_DECLARE_STR_is_interned(self): with self.subTest(global_string=global_string): self.assertIsInterned(eval(f"'{global_string}'")) - noninternable_by_default = textwrap.dedent(''' + noninternable_by_default = textwrap.dedent(f''' not-internable not.internable не_интернируемый str with spaces - ''' + '\U00100000') + {chr(0x011111)} + {chr(0x9999)} + {chr(0x100)} + ''') @cpython_only @unittest.skipIf(Py_GIL_DISABLED, "free-threaded build interns all string constants") diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 5c4eb2a2fb3225..99d78a70771a16 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -205,9 +205,6 @@ static int intern_constants(PyObject *tuple, int *modified) { PyInterpreterState *interp = _PyInterpreterState_GET(); -#if !defined(Py_GIL_DISABLED) - PyObject *interned_dict = get_interned_dict(interp); -#endif for (Py_ssize_t i = PyTuple_GET_SIZE(tuple); --i >= 0; ) { PyObject *v = PyTuple_GET_ITEM(tuple, i); if (PyUnicode_CheckExact(v) && PyUnicode_GET_LENGTH(v) > 1) { @@ -217,7 +214,7 @@ intern_constants(PyObject *tuple, int *modified) #if !defined(Py_GIL_DISABLED) PyObject *interned = _Py_hashtable_get(INTERNED_STRINGS, v); if (interned == NULL) { - interned = PyDict_GetItemWithError(interned_dict, v); + interned = PyDict_GetItemWithError(get_interned_dict(interp), v); if (PyErr_Occurred()) return -1; } if (interned != NULL && interned != v) { From 7e072792a28b92890ad9767966e2ccc0bb6e9b47 Mon Sep 17 00:00:00 2001 From: abebus Date: Tue, 23 Dec 2025 01:15:27 +0300 Subject: [PATCH 23/24] :) --- Objects/codeobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 99d78a70771a16..620eaa0f082629 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -207,7 +207,7 @@ intern_constants(PyObject *tuple, int *modified) PyInterpreterState *interp = _PyInterpreterState_GET(); for (Py_ssize_t i = PyTuple_GET_SIZE(tuple); --i >= 0; ) { PyObject *v = PyTuple_GET_ITEM(tuple, i); - if (PyUnicode_CheckExact(v) && PyUnicode_GET_LENGTH(v) > 1) { + if (PyUnicode_CheckExact(v)) { if (PyUnicode_CHECK_INTERNED(v) != 0) { continue; } From 056e2c55d57481fd5ccf184201af849c02562326 Mon Sep 17 00:00:00 2001 From: abebus Date: Tue, 23 Dec 2025 10:40:36 +0300 Subject: [PATCH 24/24] rerun workflow