Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog/14483.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Preserved valid supplementary-plane Unicode characters, such as emoji, in JUnit XML output instead of visually escaping them as invalid XML.
4 changes: 1 addition & 3 deletions src/_pytest/junitxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,7 @@ def repl(matchobj: re.Match[str]) -> str:
# The spec range of valid chars is:
# Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
# For an unknown(?) reason, we disallow #x7F (DEL) as well.
illegal_xml_re = (
"[^\u0009\u000a\u000d\u0020-\u007e\u0080-\ud7ff\ue000-\ufffd\u10000-\u10ffff]"
)
illegal_xml_re = "[^\u0009\u000a\u000d\u0020-\u007e\u0080-\ud7ff\ue000-\ufffd\U00010000-\U0010ffff]"
return re.sub(illegal_xml_re, repl, str(arg))


Expand Down
19 changes: 12 additions & 7 deletions testing/test_junitxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -1104,11 +1104,6 @@ def test_invalid_xml_escape() -> None:
# Test some more invalid xml chars, the full range should be
# tested really but let's just test the edges of the ranges
# instead.
# XXX This only tests low unicode character points for now as
# there are some issues with the testing infrastructure for
# the higher ones.
# XXX Testing 0xD (\r) is tricky as it overwrites the just written
# line in the output, so we skip it too.
invalid = (
0x00,
0x1,
Expand All @@ -1122,8 +1117,18 @@ def test_invalid_xml_escape() -> None:
0xFFFE,
0x0FFFF,
) # , 0x110000)
valid = (0x9, 0xA, 0x20)
# 0xD, 0xD7FF, 0xE000, 0xFFFD, 0x10000, 0x10FFFF)
valid = (
0x9,
0xA,
0xD,
0x20,
0xD7FF,
0xE000,
0xFFFD,
0x10000,
0x1F600,
0x10FFFF,
)

for i in invalid:
got = bin_xml_escape(chr(i))
Expand Down
Loading