python · malemburg · Nov 9, 2025 · Oct 13, 2025 · Oct 13, 2025 · Oct 13, 2025
diff --git a/Doc/deprecations/pending-removal-in-3.17.rst b/Doc/deprecations/pending-removal-in-3.17.rst
@@ -23,6 +23,12 @@ Pending removal in Python 3.17
     (Contributed by Shantanu Jain in :gh:`91896`.)
 
 
+* :mod:`encodings`:
+
+  - Passing non-ascii *encoding* names to :func:`encodings.normalize_encoding`
+    is deprecated and scheduled for removal in Python 3.17.
+    (Contributed by Stan Ulbrych in :gh:`136702`)
+
 * :mod:`typing`:
 
   - Before Python 3.14, old-style unions were implemented using the private class

@@ -796,13 +796,14 @@ def params(self):
                         value = urllib.parse.unquote(value, encoding='latin-1')
                     else:
                         try:
+                            charset = utils._sanitize_charset_name(charset, 'ascii')
                             value = value.decode(charset, 'surrogateescape')
                         except (LookupError, UnicodeEncodeError):
                             # XXX: there should really be a custom defect for
                             # unknown character set to make it easy to find,
                             # because otherwise unknown charset is a silent
                             # failure.
-                            value = value.decode('us-ascii', 'surrogateescape')
+                            value = value.decode('ascii', 'surrogateescape')
                         if utils._has_surrogates(value):
                             param.defects.append(errors.UndecodableBytesDefect())
                 value_parts.append(value)

@@ -446,8 +446,16 @@ def decode_params(params):
                 new_params.append((name, '"%s"' % value))
     return new_params
 
+_SANITIZE_TABLE = str.maketrans({i: None for i in range(128, 65536)})
+
+def _sanitize_charset_name(charset, fallback_charset):
+    if not charset:
+        return charset
+    sanitized = charset.translate(_SANITIZE_TABLE)
+    return sanitized if sanitized else fallback_charset
+
 def collapse_rfc2231_value(value, errors='replace',
-                           fallback_charset='us-ascii'):
+                           fallback_charset='ascii'):
     if not isinstance(value, tuple) or len(value) != 3:
         return unquote(value)
     # While value comes to us as a unicode string, we need it to be a bytes
@@ -458,6 +466,7 @@ def collapse_rfc2231_value(value, errors='replace',
         # Issue 17369: if charset/lang is None, decode_rfc2231 couldn't parse
         # the value, so use the fallback_charset.
         charset = fallback_charset
+    charset = _sanitize_charset_name(charset, fallback_charset)
     rawbytes = bytes(text, 'raw-unicode-escape')
     try:
         return str(rawbytes, charset, errors)

diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py
@@ -26,7 +26,7 @@
 
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 
-"""#"
+"""
 
 import codecs
 import sys
@@ -56,6 +56,12 @@ def normalize_encoding(encoding):
     if isinstance(encoding, bytes):
         encoding = str(encoding, "ascii")
 
+    if not encoding.isascii():
+        import warnings
+        warnings.warn(
+            "Support for non-ascii encoding names will be removed in 3.17",
+            DeprecationWarning, stacklevel=2)
+
     return _normalize_encoding(encoding)
 
 def search_function(encoding):

diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
@@ -3886,22 +3886,26 @@ def search_function(encoding):
         self.assertEqual(codecs.lookup('TEST.AAA 8'), ('test.aaa-8', 2, 3, 4))
         self.assertEqual(codecs.lookup('TEST.AAA---8'), ('test.aaa---8', 2, 3, 4))
         self.assertEqual(codecs.lookup('TEST.AAA   8'), ('test.aaa---8', 2, 3, 4))
-        self.assertEqual(codecs.lookup('TEST.AAA\xe9\u20ac-8'), ('test.aaa\xe9\u20ac-8', 2, 3, 4))
         self.assertEqual(codecs.lookup('TEST.AAA.8'), ('test.aaa.8', 2, 3, 4))
         self.assertEqual(codecs.lookup('TEST.AAA...8'), ('test.aaa...8', 2, 3, 4))
+        with self.assertWarns(DeprecationWarning):
+            self.assertEqual(codecs.lookup('TEST.AAA\xe9\u20ac-8'), ('test.aaa\xe9\u20ac-8', 2, 3, 4))
 
     def test_encodings_normalize_encoding(self):
-        # encodings.normalize_encoding() ignores non-ASCII characters.
         normalize = encodings.normalize_encoding
         self.assertEqual(normalize('utf_8'), 'utf_8')
-        self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8')
         self.assertEqual(normalize('utf   8'), 'utf_8')
         # encodings.normalize_encoding() doesn't convert
         # characters to lower case.
         self.assertEqual(normalize('UTF 8'), 'UTF_8')
         self.assertEqual(normalize('utf.8'), 'utf.8')
         self.assertEqual(normalize('utf...8'), 'utf...8')
 
+        # Non-ASCII *encoding* is deprecated.
+        with self.assertWarnsRegex(DeprecationWarning,
+                "Support for non-ascii encoding names will be removed in 3.17"):
+            self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8')
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/Misc/NEWS.d/next/Library/2025-10-13-11-25-41.gh-issue-136702.uvLGK1.rst b/Misc/NEWS.d/next/Library/2025-10-13-11-25-41.gh-issue-136702.uvLGK1.rst
@@ -0,0 +1,3 @@
+:mod:`encodings`: Deprecate passing a non-ascii *encoding* name to
+:func:`encodings.normalize_encoding` and schedule removal of support for
+Python 3.17.