From 4fb3b6f230fbf96613531874d3960117b6892687 Mon Sep 17 00:00:00 2001 From: eternalrights <3147268827@qq.com> Date: Fri, 15 May 2026 10:32:55 +0800 Subject: [PATCH 1/3] fix junitxml bin_xml_escape: use \U for supplementary plane range in illegal_xml_re --- changelog/14483.bugfix.rst | 1 + src/_pytest/junitxml.py | 2 +- testing/test_junitxml.py | 10 ++++++++-- 3 files changed, 10 insertions(+), 3 deletions(-) create mode 100644 changelog/14483.bugfix.rst diff --git a/changelog/14483.bugfix.rst b/changelog/14483.bugfix.rst new file mode 100644 index 00000000000..c1088d8f602 --- /dev/null +++ b/changelog/14483.bugfix.rst @@ -0,0 +1 @@ +Fixed ``bin_xml_escape`` in junitxml incorrectly escaping supplementary plane characters (U+10000 and above, including emoji) due to using ``\u`` instead of ``\U`` for the supplementary plane range in the ``illegal_xml_re`` regex. diff --git a/src/_pytest/junitxml.py b/src/_pytest/junitxml.py index ae8d2b94d36..a1f2eb265fd 100644 --- a/src/_pytest/junitxml.py +++ b/src/_pytest/junitxml.py @@ -56,7 +56,7 @@ def repl(matchobj: re.Match[str]) -> str: # Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] # For an unknown(?) reason, we disallow #x7F (DEL) as well. illegal_xml_re = ( - "[^\u0009\u000a\u000d\u0020-\u007e\u0080-\ud7ff\ue000-\ufffd\u10000-\u10ffff]" + "[^\u0009\u000a\u000d\u0020-\u007e\u0080-\ud7ff\ue000-\ufffd\U00010000-\U0010ffff]" ) return re.sub(illegal_xml_re, repl, str(arg)) diff --git a/testing/test_junitxml.py b/testing/test_junitxml.py index 5a603c05bc8..a774874f4c6 100644 --- a/testing/test_junitxml.py +++ b/testing/test_junitxml.py @@ -1122,8 +1122,7 @@ def test_invalid_xml_escape() -> None: 0xFFFE, 0x0FFFF, ) # , 0x110000) - valid = (0x9, 0xA, 0x20) - # 0xD, 0xD7FF, 0xE000, 0xFFFD, 0x10000, 0x10FFFF) + valid = (0x9, 0xA, 0x20, 0xD, 0xD7FF, 0xE000, 0xFFFD, 0x10000, 0x10FFFF) for i in invalid: got = bin_xml_escape(chr(i)) @@ -1136,6 +1135,13 @@ def test_invalid_xml_escape() -> None: assert chr(i) == bin_xml_escape(chr(i)) +def test_bin_xml_escape_supplementary_plane() -> None: + assert bin_xml_escape(chr(0x1F600)) == chr(0x1F600) + assert bin_xml_escape("test_😀") == "test_😀" + assert bin_xml_escape("test_𠀀") == "test_𠀀" + assert bin_xml_escape("test_𝄞") == "test_𝄞" + + def test_logxml_path_expansion(tmp_path: Path, monkeypatch: MonkeyPatch) -> None: home_tilde = Path(os.path.expanduser("~")).joinpath("test.xml") xml_tilde = LogXML(Path("~", "test.xml"), None) From e694e4b7fb226a8228b8488cd9bd97b985e7d9b8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 15 May 2026 02:33:34 +0000 Subject: [PATCH 2/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/_pytest/junitxml.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/_pytest/junitxml.py b/src/_pytest/junitxml.py index a1f2eb265fd..30c93c88c07 100644 --- a/src/_pytest/junitxml.py +++ b/src/_pytest/junitxml.py @@ -55,9 +55,7 @@ def repl(matchobj: re.Match[str]) -> str: # The spec range of valid chars is: # Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] # For an unknown(?) reason, we disallow #x7F (DEL) as well. - illegal_xml_re = ( - "[^\u0009\u000a\u000d\u0020-\u007e\u0080-\ud7ff\ue000-\ufffd\U00010000-\U0010ffff]" - ) + illegal_xml_re = "[^\u0009\u000a\u000d\u0020-\u007e\u0080-\ud7ff\ue000-\ufffd\U00010000-\U0010ffff]" return re.sub(illegal_xml_re, repl, str(arg)) From 7521f4dee7e191f7ca2f9113329eac764165a5b9 Mon Sep 17 00:00:00 2001 From: eternalrights <3147268827@qq.com> Date: Fri, 15 May 2026 14:35:07 +0800 Subject: [PATCH 3/3] fold supplementary plane checks into existing test_invalid_xml_escape, drop separate function --- testing/test_junitxml.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/testing/test_junitxml.py b/testing/test_junitxml.py index a774874f4c6..0e002d4dde2 100644 --- a/testing/test_junitxml.py +++ b/testing/test_junitxml.py @@ -1135,13 +1135,6 @@ def test_invalid_xml_escape() -> None: assert chr(i) == bin_xml_escape(chr(i)) -def test_bin_xml_escape_supplementary_plane() -> None: - assert bin_xml_escape(chr(0x1F600)) == chr(0x1F600) - assert bin_xml_escape("test_😀") == "test_😀" - assert bin_xml_escape("test_𠀀") == "test_𠀀" - assert bin_xml_escape("test_𝄞") == "test_𝄞" - - def test_logxml_path_expansion(tmp_path: Path, monkeypatch: MonkeyPatch) -> None: home_tilde = Path(os.path.expanduser("~")).joinpath("test.xml") xml_tilde = LogXML(Path("~", "test.xml"), None)