From a6c942cbbad909fe81aa76afeb58d5c2f077d107 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Tue, 17 Mar 2026 11:50:30 +0000 Subject: [PATCH 1/4] Limit the growth of `encodings.search_function` cache --- Lib/encodings/__init__.py | 5 +++-- Lib/test/test_codecs.py | 11 +++++++++++ .../2026-03-17-11-46-20.gh-issue-146054.udYcqn.rst | 1 + 3 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2026-03-17-11-46-20.gh-issue-146054.udYcqn.rst diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py index e205ec326376d8..40f0737f7d4ce0 100644 --- a/Lib/encodings/__init__.py +++ b/Lib/encodings/__init__.py @@ -34,6 +34,7 @@ from . import aliases _cache = {} +_MAXCACHE = 500 _unknown = '--unknown--' _import_tail = ['*'] _aliases = aliases.aliases @@ -110,8 +111,8 @@ def search_function(encoding): mod = None if mod is None: - # Cache misses - _cache[encoding] = None + if len(_cache) < _MAXCACHE: + _cache[encoding] = None return None # Now ask the module for the registry entry diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index c31faec9ee5214..79c8a7ef886482 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -3908,5 +3908,16 @@ def test_encodings_normalize_encoding(self): self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8') +class CodecCacheTest(unittest.TestCase): + def test_cache_bounded(self): + for i in range(encodings._MAXCACHE + 1000): + try: + b'x'.decode(f'nonexist_{i}') + except LookupError: + pass + + self.assertLessEqual(len(encodings._cache), encodings._MAXCACHE) + + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Library/2026-03-17-11-46-20.gh-issue-146054.udYcqn.rst b/Misc/NEWS.d/next/Library/2026-03-17-11-46-20.gh-issue-146054.udYcqn.rst new file mode 100644 index 00000000000000..c549b197a10b5c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-03-17-11-46-20.gh-issue-146054.udYcqn.rst @@ -0,0 +1 @@ +Limit the size of :func:`encodings.search_function` cache. From ed48ab8ca680045868fe7619ca9b56792e5691e8 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Tue, 17 Mar 2026 11:52:35 +0000 Subject: [PATCH 2/4] Add OSS Fuzz issue --- .../next/Library/2026-03-17-11-46-20.gh-issue-146054.udYcqn.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Misc/NEWS.d/next/Library/2026-03-17-11-46-20.gh-issue-146054.udYcqn.rst b/Misc/NEWS.d/next/Library/2026-03-17-11-46-20.gh-issue-146054.udYcqn.rst index c549b197a10b5c..8692c7f171d0fb 100644 --- a/Misc/NEWS.d/next/Library/2026-03-17-11-46-20.gh-issue-146054.udYcqn.rst +++ b/Misc/NEWS.d/next/Library/2026-03-17-11-46-20.gh-issue-146054.udYcqn.rst @@ -1 +1,2 @@ Limit the size of :func:`encodings.search_function` cache. +Found by OSS Fuzz in :oss-fuzz:`493449985`. From d57d3979f1bb82ce820689fade010247957ddcf3 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Tue, 17 Mar 2026 12:21:45 +0000 Subject: [PATCH 3/4] LRU cache --- Lib/encodings/__init__.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py index 40f0737f7d4ce0..fbf5a3a7f2ce18 100644 --- a/Lib/encodings/__init__.py +++ b/Lib/encodings/__init__.py @@ -111,8 +111,9 @@ def search_function(encoding): mod = None if mod is None: - if len(_cache) < _MAXCACHE: - _cache[encoding] = None + if len(_cache) >= _MAXCACHE: + _cache.clear() + _cache[encoding] = None return None # Now ask the module for the registry entry @@ -133,6 +134,8 @@ def search_function(encoding): entry = codecs.CodecInfo(*entry) # Cache the codec registry entry + if len(_cache) >= _MAXCACHE: + _cache.clear() _cache[encoding] = entry # Register its aliases (without overwriting previously registered From ef2851732705d9ace2b80b9d6aa2d32d174b97a0 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Tue, 17 Mar 2026 12:23:54 +0000 Subject: [PATCH 4/4] Accidentally removed comment. --- Lib/encodings/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py index fbf5a3a7f2ce18..169c48324f227b 100644 --- a/Lib/encodings/__init__.py +++ b/Lib/encodings/__init__.py @@ -111,6 +111,7 @@ def search_function(encoding): mod = None if mod is None: + # Cache misses if len(_cache) >= _MAXCACHE: _cache.clear() _cache[encoding] = None