diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py index f9075b8f0d98ac..1c420ba1c0b7a2 100644 --- a/Lib/encodings/__init__.py +++ b/Lib/encodings/__init__.py @@ -33,6 +33,7 @@ from . import aliases _cache = {} +_MAXCACHE = 500 _unknown = '--unknown--' _import_tail = ['*'] _aliases = aliases.aliases @@ -115,6 +116,8 @@ def search_function(encoding): if mod is None: # Cache misses + if len(_cache) >= _MAXCACHE: + _cache.clear() _cache[encoding] = None return None @@ -136,6 +139,8 @@ def search_function(encoding): entry = codecs.CodecInfo(*entry) # Cache the codec registry entry + if len(_cache) >= _MAXCACHE: + _cache.clear() _cache[encoding] = entry # Register its aliases (without overwriting previously registered diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 2d20efe74f2ea6..9eca8fff62d84d 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -3822,5 +3822,16 @@ def test_encodings_normalize_encoding(self): self.assertEqual(normalize('utf...8'), 'utf...8') +class CodecCacheTest(unittest.TestCase): + def test_cache_bounded(self): + for i in range(encodings._MAXCACHE + 1000): + try: + b'x'.decode(f'nonexist_{i}') + except LookupError: + pass + + self.assertLessEqual(len(encodings._cache), encodings._MAXCACHE) + + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Library/2026-03-17-11-46-20.gh-issue-146054.udYcqn.rst b/Misc/NEWS.d/next/Library/2026-03-17-11-46-20.gh-issue-146054.udYcqn.rst new file mode 100644 index 00000000000000..8692c7f171d0fb --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-03-17-11-46-20.gh-issue-146054.udYcqn.rst @@ -0,0 +1,2 @@ +Limit the size of :func:`encodings.search_function` cache. +Found by OSS Fuzz in :oss-fuzz:`493449985`.