Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ See the [Contributing Guide](contributing.md) for details.
### Fixed

* Fix a regression related to comment handling (#1590).
* More reliable fix for `</`.

## [3.10.1] - 2026-01-21

Expand Down
46 changes: 26 additions & 20 deletions markdown/htmlparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@

# Included for versions which do not have current comment fix
commentclose = re.compile(r'--!?>')
commentabruptclose = re.compile(r'-?>')

# Import a copy of the html.parser lib as `htmlparser` so we can monkeypatch it.
# Users can still do `from html import parser` and get the default behavior.
Expand All @@ -48,6 +47,8 @@
# throwing it away. When we see it, we will process it as data.
htmlparser.starttagopen = re.compile('<[a-zA-Z]|</>')

htmlparser.endtagopen = re.compile('</[a-zA-Z]?')

# Monkeypatch `HTMLParser` to only accept `?>` to close Processing Instructions.
htmlparser.piclose = re.compile(r'\?>')
# Monkeypatch `HTMLParser` to only recognize entity references with a closing semicolon.
Expand Down Expand Up @@ -92,6 +93,30 @@
blank_line_re = re.compile(r'^([ ]*\n){2}')


class _HTMLParser(htmlparser.HTMLParser):
"""Handle special start and end tags."""

def parse_endtag(self, i):
start = self.rawdata[i:i+3]
c = ord(start[-1])
if len(start) < 3 or not (65 <= c <= 90 or 97 <= c <= 122):
self.handle_data(self.rawdata[i:i + 2])
return i + 2
return super().parse_endtag(i)

def parse_starttag(self, i: int) -> int: # pragma: no cover
# Treat `</>` as normal data as it is not a real tag.
if self.rawdata[i:i + 3] == '</>':
self.handle_data(self.rawdata[i:i + 3])
return i + 3

return super().parse_starttag(i)


# Overwrite our custom one for people like MkDocs that pull it in
htmlparser.HTMLParser = _HTMLParser


class HTMLExtractor(htmlparser.HTMLParser):
"""
Extract raw HTML from text.
Expand All @@ -110,9 +135,6 @@ def __init__(self, md: Markdown, *args, **kwargs):

self.lineno_start_cache = [0]

self.override_comment_update = False
self.override_comment_start = 0

# This calls self.reset
super().__init__(*args, **kwargs)
self.md = md
Expand All @@ -125,8 +147,6 @@ def reset(self):
self._cache: list[str] = []
self.cleandoc: list[str] = []
self.lineno_start_cache = [0]
self.override_comment_start = 0
self.override_comment_update = False

super().reset()

Expand Down Expand Up @@ -276,22 +296,8 @@ def handle_entityref(self, name: str):

def handle_comment(self, data: str):
# Check if the comment is unclosed, if so, we need to override position
j = self.rawdata.find(data)
i = j - 2
if self.rawdata[i:j] == '</':
self.handle_data('<')
self.override_comment_start = i
self.override_comment_update = True
return
self.handle_empty_tag('<!--{}-->'.format(data), is_block=True)

def updatepos(self, i: int, j: int) -> int:
if self.override_comment_update:
self.override_comment_update = False
i = self.override_comment_start
j = self.override_comment_start + 1
return super().updatepos(i, j)

def handle_decl(self, data: str):
self.handle_empty_tag('<!{}>'.format(data), is_block=True)

Expand Down
55 changes: 55 additions & 0 deletions tests/test_syntax/blocks/test_html_blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1732,3 +1732,58 @@ def test_issue_1590(self):
'''
)
)

def test_stress_comment_handling(self):
"""Stress test the comment handling."""

self.assertMarkdownRenders(
self.dedent(
'''
`</` <!-- `<!--[if mso]>` and <!-- </> and `<!--[if mso]>`

<!-- and <!-- `<!--[if mso]>` and </> `</` and `<!--[if mso]>`

<!-- Real comment -->

`<!--[if mso]>` `</` `<!--[if mso]>` and </> <!-- and <!--

</> `<!--[if mso]>` `</` <!-- and <!-- and `<!--[if mso]>`
'''
),
self.dedent(
'''
<p><code>&lt;/</code> &lt;!-- <code>&lt;!--[if mso]&gt;</code> and &lt;!-- &lt;/&gt; and <code>&lt;!--[if mso]&gt;</code></p>
<p>&lt;!-- and &lt;!-- <code>&lt;!--[if mso]&gt;</code> and &lt;/&gt; <code>&lt;/</code> and <code>&lt;!--[if mso]&gt;</code></p>
<!-- Real comment -->
<p><code>&lt;!--[if mso]&gt;</code> <code>&lt;/</code> <code>&lt;!--[if mso]&gt;</code> and &lt;/&gt; &lt;!-- and &lt;!--</p>
<p>&lt;/&gt; <code>&lt;!--[if mso]&gt;</code> <code>&lt;/</code> &lt;!-- and &lt;!-- and <code>&lt;!--[if mso]&gt;</code></p>
''' # noqa: E501
)
)

def test_unclosed_endtag(self):
"""Ensure unclosed end tag does not have side effects."""

self.assertMarkdownRenders(
self.dedent(
'''
`</`

<div>
<!--[if mso]>-->
<p>foo</p>
<!--<!endif]-->
</div>
'''
),
self.dedent(
'''
<p><code>&lt;/</code></p>
<div>
<!--[if mso]>-->
<p>foo</p>
<!--<!endif]-->
</div>
'''
)
)