From 71718030c607fe07c97ac7947d42443d4920843a Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 13 Jun 2025 12:16:37 +0300 Subject: [PATCH 1/5] gh-135661: Fix parsing start and end tags in HTMLParser * Whitespaces no longer accepted between `` does not end the script section. * Vertical tabulation (`\v`) and non-ASCII whitespaces no longer recognized as whitespaces. The only whitespaces are `\t\n\r\f `. * Null character (U+0000) no longer ends the tag name. * End tag can have attributes and slashes after tag name. It no longer ends after the first `>` in quoted attribute value. E.g. ``. * Multiple slashes and whitespaces between the last attribute and closing `>` are now accepted in both start and end tags. E.g. ``. * Multiple `=` between attribute name and value are no longer collapsed. E.g. `` produces attribute "foo" with value "=bar". * Whitespaces between the `=` separator and attribute name or value are no longer ignored. E.g. `` produces two attributes "foo" and "=bar", both with value None; `` produces two attributes: "foo" with value "" and "bar" with value None. --- Lib/html/parser.py | 136 ++++++--------- Lib/test/test_htmlparser.py | 155 +++++++++++------- ...-06-25-14-13-39.gh-issue-135661.idjQ0B.rst | 23 +++ 3 files changed, 175 insertions(+), 139 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst diff --git a/Lib/html/parser.py b/Lib/html/parser.py index ba416e7fa6e3fe..accd220cca1b85 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -36,29 +36,33 @@ # explode, so don't do it. # see http://www.w3.org/TR/html5/tokenization.html#tag-open-state # and http://www.w3.org/TR/html5/tokenization.html#tag-name-state -tagfind_tolerant = re.compile(r'([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*') -attrfind_tolerant = re.compile( - r'((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*' - r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*') -locatestarttagend_tolerant = re.compile(r""" - <[a-zA-Z][^\t\n\r\f />\x00]* # tag name - (?:[\s/]* # optional whitespace before attribute name - (?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name - (?:\s*=+\s* # value indicator - (?:'[^']*' # LITA-enclosed value - |"[^"]*" # LIT-enclosed value - |(?!['"])[^>\s]* # bare value - ) - \s* # possibly followed by a space - )?(?:\s|/(?!>))* - )* +tagfind_tolerant = re.compile(r'([a-zA-Z][^\t\n\r\f />]*)(?:[\t\n\r\f ]|/(?!>))*') +attrfind_tolerant = re.compile(r""" + ( + (?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name + ) + (= # value indicator + ('[^']*' # LITA-enclosed value + |"[^"]*" # LIT-enclosed value + |(?!['"])[^>\t\n\r\f ]* # bare value + ) )? - \s* # trailing whitespace + (?:[\t\n\r\f ]|/(?!>))* # possibly followed by a space +""", re.VERBOSE) +locatetagend_tolerant = re.compile(r""" + [a-zA-Z][^\t\n\r\f />]* # tag name + [\t\n\r\f /]* # optional whitespace before attribute name + (?:(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name + (?:= # value indicator + (?:'[^']*' # LITA-enclosed value + |"[^"]*" # LIT-enclosed value + |(?!['"])[^>\t\n\r\f ]* # bare value + ) + )? + [\t\n\r\f /]* # possibly followed by a space + )* + >? """, re.VERBOSE) -endendtag = re.compile('>') -# the HTML 5 spec, section 8.1.2.2, doesn't allow spaces between -# ') # Character reference processing logic specific to attribute values # See: https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state @@ -141,7 +145,8 @@ def get_starttag_text(self): def set_cdata_mode(self, elem): self.cdata_elem = elem.lower() - self.interesting = re.compile(r'' % self.cdata_elem, re.I) + self.interesting = re.compile(r'])' % self.cdata_elem, + re.IGNORECASE|re.ASCII) def clear_cdata_mode(self): self.interesting = interesting_normal @@ -166,7 +171,7 @@ def goahead(self, end): # & near the end and see if it's followed by a space or ;. amppos = rawdata.rfind('&', max(i, n-34)) if (amppos >= 0 and - not re.compile(r'[\s;]').search(rawdata, amppos)): + not re.compile(r'[\t\n\r\f ;]').search(rawdata, amppos)): break # wait till we get all the text j = n else: @@ -381,76 +386,39 @@ def parse_starttag(self, i): # or -1 if incomplete. def check_for_whole_start_tag(self, i): rawdata = self.rawdata - m = locatestarttagend_tolerant.match(rawdata, i) - if m: - j = m.end() - next = rawdata[j:j+1] - if next == ">": - return j + 1 - if next == "/": - if rawdata.startswith("/>", j): - return j + 2 - if rawdata.startswith("/", j): - # buffer boundary - return -1 - # else bogus input - if j > i: - return j - else: - return i + 1 - if next == "": - # end of input - return -1 - if next in ("abcdefghijklmnopqrstuvwxyz=/" - "ABCDEFGHIJKLMNOPQRSTUVWXYZ"): - # end of input in or before attribute value, or we have the - # '/' from a '/>' ending - return -1 - if j > i: - return j - else: - return i + 1 - raise AssertionError("we should not get here!") + match = locatetagend_tolerant.match(rawdata, i+1) + assert match + j = match.end() + if rawdata[j-1] != ">": + return -1 + return j # Internal -- parse endtag, return end or -1 if incomplete def parse_endtag(self, i): rawdata = self.rawdata assert rawdata[i:i+2] == " - if not match: + if rawdata.find('>', i+2) < 0: return -1 - gtpos = match.end() - match = endtagfind.match(rawdata, i) # - if not match: - if self.cdata_elem is not None: - self.handle_data(rawdata[i:gtpos]) - return gtpos - # find the name: w3.org/TR/html5/tokenization.html#tag-name-state - namematch = tagfind_tolerant.match(rawdata, i+2) - if not namematch: - # w3.org/TR/html5/tokenization.html#end-tag-open-state - if rawdata[i:i+3] == '': - return i+3 - else: - return self.parse_bogus_comment(i) - tagname = namematch.group(1).lower() - # consume and ignore other stuff between the name and the > - # Note: this is not 100% correct, since we might have things like - # , but looking for > after the name should cover - # most of the cases and is much simpler - gtpos = rawdata.find('>', namematch.end()) - self.handle_endtag(tagname) - return gtpos+1 + if not endtagopen.match(rawdata, i): # ': + return i+3 + else: + return self.parse_bogus_comment(i) - elem = match.group(1).lower() # script or style - if self.cdata_elem is not None: - if elem != self.cdata_elem: - self.handle_data(rawdata[i:gtpos]) - return gtpos + match = locatetagend_tolerant.match(rawdata, i+2) + assert match + j = match.end() + if rawdata[j-1] != ">": + return -1 - self.handle_endtag(elem) + # find the name: w3.org/TR/html5/tokenization.html#tag-name-state + match = tagfind_tolerant.match(rawdata, i+2) + assert match + tag = match.group(1).lower() + self.handle_endtag(tag) self.clear_cdata_mode() - return gtpos + return j # Overridable -- finish processing of start+end tag: def handle_startendtag(self, tag, attrs): diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index 65a4bee72b9775..d0d2c54217ccaf 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -81,6 +81,13 @@ def handle_entityref(self, data): self.fail('This should never be called with convert_charrefs=True') +# The normal event collector normalizes the events in get_events, +# so we override it to return the original list of events. +class EventCollectorNoNormalize(EventCollector): + def get_events(self): + return self.events + + class TestCaseBase(unittest.TestCase): def get_collector(self): @@ -265,8 +272,7 @@ def test_get_starttag_text(self): ("starttag", "foo:bar", [("one", "1"), ("two", "2")]), ("starttag_text", s)]) - def test_cdata_content(self): - contents = [ + @support.subTests('content', [ ' ¬-an-entity-ref;', "", '

', @@ -279,54 +285,83 @@ def test_cdata_content(self): 'src="http://www.example.org/r=\'+new ' 'Date().getTime()+\'"><\\/s\'+\'cript>\');\n//]]>'), '\n\n', - 'foo = "";', '', - # these two should be invalid according to the HTML 5 spec, - # section 8.1.2.2 - #'foo = ', - #'foo = ', - ] - elements = ['script', 'style', 'SCRIPT', 'STYLE', 'Script', 'Style'] - for content in contents: - for element in elements: - element_lower = element.lower() - s = '<{element}>{content}'.format(element=element, - content=content) - self._run_check(s, [("starttag", element_lower, []), - ("data", content), - ("endtag", element_lower)]) - - def test_cdata_with_closing_tags(self): + 'foo = ""', + 'foo = ""', + 'foo = ""', + 'foo = ""', + 'foo = ""', + 'foo = ""', + ]) + def test_script_content(self, content): + s = f'' + self._run_check(s, [("starttag", "script", []), + ("data", content), + ("endtag", "script")]) + + @support.subTests('content', [ + 'a::before { content: ""; }', + 'a::before { content: "¬-an-entity-ref;"; }', + 'a::before { content: ""; }', + 'a::before { content: "\u2603"; }', + 'a::before { content: "< /style>"; }', + 'a::before { content: ""; }', + 'a::before { content: ""; }', + 'a::before { content: ""; }', + 'a::before { content: ""; }', + 'a::before { content: ""; }', + ]) + def test_style_content(self, content): + s = f'' + self._run_check(s, [("starttag", "style", []), + ("data", content), + ("endtag", "style")]) + + @support.subTests('endtag', ['script', 'SCRIPT', 'script ', 'script\n', + 'script/', 'script foo=bar', 'script foo=">"']) + def test_script_closing_tag(self, endtag): # see issue #13358 # make sure that HTMLParser calls handle_data only once for each CDATA. - # The normal event collector normalizes the events in get_events, - # so we override it to return the original list of events. - class Collector(EventCollector): - def get_events(self): - return self.events - content = """ ¬-an-entity-ref;

''""" - for element in [' script', 'script ', ' script ', - '\nscript', 'script\n', '\nscript\n']: - element_lower = element.lower().strip() - s = '{content}{tail}' + self._run_check(s, [("starttag", "script", []), + ("data", content if end else content + tail)], + collector=EventCollectorNoNormalize(convert_charrefs=False)) def test_comments(self): html = ("" @@ -443,7 +478,7 @@ def test_starttag_junk_chars(self): self._run_check("", [('comment', '$')]) self._run_check("", [('endtag', 'a')]) + self._run_check("", [('comment', ' a')]) self._run_check("", [('starttag', 'a", [('endtag', 'a', [('endtag', 'a')]) + self._run_check('', [('endtag', 'a')]) + def test_declaration_junk_chars(self): self._run_check("", [('decl', 'DOCTYPE foo $ ')]) @@ -525,15 +564,11 @@ def test_invalid_end_tags(self): self._run_check(html, expected) def test_broken_invalid_end_tag(self): - # This is technically wrong (the "> shouldn't be included in the 'data') - # but is probably not worth fixing it (in addition to all the cases of - # the previous test, it would require a full attribute parsing). - # see #13993 html = 'This confuses the parser' expected = [('starttag', 'b', []), ('data', 'This'), ('endtag', 'b'), - ('data', '"> confuses the parser')] + ('data', ' confuses the parser')] self._run_check(html, expected) def test_correct_detection_of_start_tags(self): @@ -560,7 +595,7 @@ def test_correct_detection_of_start_tags(self): html = '

The rain' expected = [ - ('starttag', 'div', [('style', ''), (',', None), ('foo', 'bar')]), + ('starttag', 'div', [('style', ''), (',', None), ('foo', None), ('=', None), ('"bar"', None)]), ('starttag', 'b', []), ('data', 'The '), ('starttag', 'a', [('href', 'some_url')]), @@ -749,9 +784,15 @@ def test_attr_syntax(self): ("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", None)]) ] self._run_check("""""", output) - self._run_check("""""", output) - self._run_check("""""", output) - self._run_check("""""", output) + self._run_check("", [('starttag', 'a', [('foo', '=bar')])]) + self._run_check("", [('starttag', 'a', [('foo', None), ('=bar', None)])]) + self._run_check("", [('starttag', 'a', [('foo', None), ('=bar', None)])]) + self._run_check("", [('starttag', 'a', [('foo\v', 'bar')])]) + self._run_check("", [('starttag', 'a', [('foo\xa0', 'bar')])]) + self._run_check("", [('starttag', 'a', [('foo', ''), ('bar', None)])]) + self._run_check("", [('starttag', 'a', [('foo', ''), ('bar', None)])]) + self._run_check("", [('starttag', 'a', [('foo', '\vbar')])]) + self._run_check("", [('starttag', 'a', [('foo', '\xa0bar')])]) def test_attr_values(self): self._run_check("""""", @@ -760,6 +801,10 @@ def test_attr_values(self): ("d", "\txyz\n")])]) self._run_check("""""", [("starttag", "a", [("b", ""), ("c", "")])]) + self._run_check("", + [("starttag", "a", [("b", ""), ("c", "")])]) + self._run_check("", + [("starttag", "a", [("b", "\v"), ("c", "\xa0")])]) # Regression test for SF patch #669683. self._run_check("", [("starttag", "e", [("a", "rgb(1,2,3)")])]) @@ -831,7 +876,7 @@ def test_malformed_attributes(self): ('data', 'test - bad2'), ('endtag', 'a'), ('starttag', 'a', [('href', "test'\xa0style='color:red;bad3'")]), ('data', 'test - bad3'), ('endtag', 'a'), - ('starttag', 'a', [('href', "test'\xa0style='color:red;bad4'")]), + ('starttag', 'a', [('href', None), ('=', None), ("test' style", 'color:red;bad4')]), ('data', 'test - bad4'), ('endtag', 'a') ] self._run_check(html, expected) diff --git a/Misc/NEWS.d/next/Library/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst b/Misc/NEWS.d/next/Library/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst new file mode 100644 index 00000000000000..f3186e17b78d91 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst @@ -0,0 +1,23 @@ +Fix parsing start and end tags in :class:`html.parser.HTMLParser`. + +* Whitespaces no longer accepted between ```` does not end the script section. + +* Vertical tabulation (``\v``) and non-ASCII whitespaces no longer recognized + as whitespaces. The only whitespaces are ``\t\n\r\f ``. + +* Null character (U+0000) no longer ends the tag name. + +* End tag can have attributes and slashes after tag name. It no longer ends + after the first ``>`` in quoted attribute value. E.g. ````. + +* Multiple slashes and whitespaces between the last attribute and closing ``>`` + are now accepted in both start and end tags. E.g. ````. + +* Multiple ``=`` between attribute name and value are no longer collapsed. + E.g. ```` produces attribute "foo" with value "=bar". + +* Whitespaces between the ``=`` separator and attribute name or value are no + longer ignored. E.g. ```` produces two attributes "foo" and + "=bar", both with value None; ```` produces two attributes: + "foo" with value "" and "bar" with value None. From 182b16f0d7d01d10b6ab46a28bab20876e94226b Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 25 Jun 2025 15:40:19 +0300 Subject: [PATCH 2/5] Fix Sphinx errors. --- .../next/Library/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst b/Misc/NEWS.d/next/Library/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst index f3186e17b78d91..948c867c83d209 100644 --- a/Misc/NEWS.d/next/Library/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst +++ b/Misc/NEWS.d/next/Library/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst @@ -4,7 +4,7 @@ Fix parsing start and end tags in :class:`html.parser.HTMLParser`. E.g. ```` does not end the script section. * Vertical tabulation (``\v``) and non-ASCII whitespaces no longer recognized - as whitespaces. The only whitespaces are ``\t\n\r\f ``. + as whitespaces. The only whitespaces are ``\t\n\r\f`` and space. * Null character (U+0000) no longer ends the tag name. From ebf8ce3a11e7ebd824f25ac20ae44a1f550dc766 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 2 Jul 2025 21:16:17 +0300 Subject: [PATCH 3/5] Apply suggestions from code review Co-authored-by: Ezio Melotti --- Lib/html/parser.py | 2 +- .../2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Lib/html/parser.py b/Lib/html/parser.py index accd220cca1b85..b4251639bf5938 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -401,7 +401,7 @@ def parse_endtag(self, i): return -1 if not endtagopen.match(rawdata, i): # ': + if rawdata[i+2:i+3] == '>': # is ignored return i+3 else: return self.parse_bogus_comment(i) diff --git a/Misc/NEWS.d/next/Library/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst b/Misc/NEWS.d/next/Library/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst index 948c867c83d209..a6152ea9634d1f 100644 --- a/Misc/NEWS.d/next/Library/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst +++ b/Misc/NEWS.d/next/Library/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst @@ -1,4 +1,5 @@ -Fix parsing start and end tags in :class:`html.parser.HTMLParser`. +Fix parsing start and end tags in :class:`html.parser.HTMLParser` +according to the HTML5 standard. * Whitespaces no longer accepted between ```` does not end the script section. @@ -8,8 +9,9 @@ Fix parsing start and end tags in :class:`html.parser.HTMLParser`. * Null character (U+0000) no longer ends the tag name. -* End tag can have attributes and slashes after tag name. It no longer ends - after the first ``>`` in quoted attribute value. E.g. ````. +* Attributes and slashes after the tag name in end tags are now correctly + parsed as comments, instead of terminating after the first ``>`` + in quoted attribute value. E.g. ````. * Multiple slashes and whitespaces between the last attribute and closing ``>`` are now accepted in both start and end tags. E.g. ````. From 955db4e153ff49ef3abdc3b2418c7e4fa32bff09 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 2 Jul 2025 21:44:27 +0300 Subject: [PATCH 4/5] Address review comments. --- Lib/html/parser.py | 49 ++++++++++++++----- ...-06-25-14-13-39.gh-issue-135661.idjQ0B.rst | 8 +-- 2 files changed, 42 insertions(+), 15 deletions(-) diff --git a/Lib/html/parser.py b/Lib/html/parser.py index b4251639bf5938..cc15de07b5bae6 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -31,11 +31,14 @@ piclose = re.compile('>') commentclose = re.compile(r'--\s*>') # Note: -# 1) if you change tagfind/attrfind remember to update locatestarttagend too; -# 2) if you change tagfind/attrfind and/or locatestarttagend the parser will +# 1) if you change tagfind/attrfind remember to update locatetagend too; +# 2) if you change tagfind/attrfind and/or locatetagend the parser will # explode, so don't do it. -# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state -# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state +# see the HTML5 specs section "13.2.5.6 Tag open state", +# "13.2.5.8 Tag name state" and "13.2.5.33 Attribute name state". +# https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state +# https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state +# https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state tagfind_tolerant = re.compile(r'([a-zA-Z][^\t\n\r\f />]*)(?:[\t\n\r\f ]|/(?!>))*') attrfind_tolerant = re.compile(r""" ( @@ -49,7 +52,7 @@ )? (?:[\t\n\r\f ]|/(?!>))* # possibly followed by a space """, re.VERBOSE) -locatetagend_tolerant = re.compile(r""" +locatetagend = re.compile(r""" [a-zA-Z][^\t\n\r\f />]* # tag name [\t\n\r\f /]* # optional whitespace before attribute name (?:(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name @@ -63,6 +66,25 @@ )* >? """, re.VERBOSE) +# The following variables are not used, but are temporarily left for +# backward compatibility. +locatestarttagend_tolerant = re.compile(r""" + <[a-zA-Z][^\t\n\r\f />\x00]* # tag name + (?:[\s/]* # optional whitespace before attribute name + (?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name + (?:\s*=+\s* # value indicator + (?:'[^']*' # LITA-enclosed value + |"[^"]*" # LIT-enclosed value + |(?!['"])[^>\s]* # bare value + ) + \s* # possibly followed by a space + )?(?:\s|/(?!>))* + )* + )? + \s* # trailing whitespace +""", re.VERBOSE) +endendtag = re.compile('>') +endtagfind = re.compile(r'') # Character reference processing logic specific to attribute values # See: https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state @@ -315,7 +337,7 @@ def parse_html_declaration(self, i): return self.parse_bogus_comment(i) # Internal -- parse bogus comment, return length or -1 if not terminated - # see http://www.w3.org/TR/html5/tokenization.html#bogus-comment-state + # see https://html.spec.whatwg.org/multipage/parsing.html#bogus-comment-state def parse_bogus_comment(self, i, report=1): rawdata = self.rawdata assert rawdata[i:i+2] in ('": @@ -395,24 +419,27 @@ def check_for_whole_start_tag(self, i): # Internal -- parse endtag, return end or -1 if incomplete def parse_endtag(self, i): + # See the HTML5 specs section "13.2.5.7 End tag open state" + # https://html.spec.whatwg.org/multipage/parsing.html#end-tag-open-state rawdata = self.rawdata assert rawdata[i:i+2] == "', i+2) < 0: + if rawdata.find('>', i+2) < 0: # fast check return -1 if not endtagopen.match(rawdata, i): # ': # is ignored + # "missing-end-tag-name" parser error return i+3 else: return self.parse_bogus_comment(i) - match = locatetagend_tolerant.match(rawdata, i+2) + match = locatetagend.match(rawdata, i+2) assert match j = match.end() if rawdata[j-1] != ">": return -1 - # find the name: w3.org/TR/html5/tokenization.html#tag-name-state + # find the name: "13.2.5.8 Tag name state" + # https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state match = tagfind_tolerant.match(rawdata, i+2) assert match tag = match.group(1).lower() diff --git a/Misc/NEWS.d/next/Library/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst b/Misc/NEWS.d/next/Library/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst index a6152ea9634d1f..b6f9e104e44047 100644 --- a/Misc/NEWS.d/next/Library/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst +++ b/Misc/NEWS.d/next/Library/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst @@ -9,12 +9,12 @@ according to the HTML5 standard. * Null character (U+0000) no longer ends the tag name. -* Attributes and slashes after the tag name in end tags are now correctly - parsed as comments, instead of terminating after the first ``>`` - in quoted attribute value. E.g. ````. +* Attributes and slashes after the tag name in end tags are now ignored, + instead of terminating after the first ``>`` in quoted attribute value. + E.g. ````. * Multiple slashes and whitespaces between the last attribute and closing ``>`` - are now accepted in both start and end tags. E.g. ````. + are now ignored in both start and end tags. E.g. ````. * Multiple ``=`` between attribute name and value are no longer collapsed. E.g. ```` produces attribute "foo" with value "=bar". From f38ad41e6db72f96c91a4394acc356acda2fb003 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 3 Jul 2025 18:24:23 +0300 Subject: [PATCH 5/5] Move to Security. --- .../2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename Misc/NEWS.d/next/{Library => Security}/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst (100%) diff --git a/Misc/NEWS.d/next/Library/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst b/Misc/NEWS.d/next/Security/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst similarity index 100% rename from Misc/NEWS.d/next/Library/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst rename to Misc/NEWS.d/next/Security/2025-06-25-14-13-39.gh-issue-135661.idjQ0B.rst