Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions monty/exts/info/github/_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import enum
import re
from abc import abstractmethod
from typing import Generic, Literal, NamedTuple, TypeVar, overload
from typing import Generic, Literal, NamedTuple, TypeVar, cast, overload

import disnake
import disnake.utils
Expand Down Expand Up @@ -171,7 +171,8 @@ def render_markdown(self, body: str, *, repo_url: str, limit: int = 2700) -> str
"url",
],
)
body = markdown(body) or ""
# this will always be str, unless renderer above is set to None
body = cast("str", markdown(body))

body = body.strip()

Expand Down
231 changes: 124 additions & 107 deletions monty/utils/markdown.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,30 @@
import itertools
import re
from typing import Any
from typing import TYPE_CHECKING, Any
from urllib.parse import urljoin

import mistune.renderers
import mistune.renderers._list
import mistune.renderers.markdown
from bs4.element import PageElement, Tag
from markdownify import MarkdownConverter
from mistune.core import BlockState
from typing_extensions import override

from monty import constants


if TYPE_CHECKING:
from collections.abc import Iterator


__all__ = (
"DiscordRenderer",
"DocMarkdownConverter",
"remove_codeblocks",
)

RenderToken = dict[str, Any]


CODE_BLOCK_RE = re.compile(
r"```(.+?)```|(?P<delim>`{1,2})([^\n]+?)(?P=delim)",
Expand Down Expand Up @@ -100,155 +110,162 @@ def convert_hr(self, el: PageElement, text: str, parent_tags: set[str]) -> str:
return ""


# TODO: this will be expanded over time as necessary
class DiscordRenderer(mistune.renderers.BaseRenderer):
"""Custom renderer for markdown to discord compatiable markdown."""
class DiscordRenderer(mistune.renderers.markdown.MarkdownRenderer):
"""Custom renderer for markdown to discord compatible markdown."""

def __init__(self, repo: str | None = None) -> None:
super().__init__()
self._repo = (repo or "").rstrip("/")

def text(self, text: str) -> str:
@override
def text(self, token: RenderToken, state: BlockState) -> str:
"""Replace GitHub links with their expanded versions."""
text: str = token["raw"]
if self._repo:
# TODO: expand this to all different varieties of automatic links
# FIXME: this shouldn't expand shorthands inside []() links
# if a repository is provided we replace all snippets with the correct thing
def replacement(match: re.Match[str]) -> str:
return self.link(self._repo + "/issues/" + match[1], text=match[0])
full, num = match[0], match[1]
url = f"{self._repo}/issues/{num}"
# NOTE: until the above fixme is resolved, we can't use self.link here,
# since it would recurse indefinitely.
return f"[{full}]({url})"

text = GH_ISSUE_RE.sub(replacement, text)
return text

def link(self, link: str, text: str | None = None, title: str | None = None) -> str:
"""Properly format a link."""
if text or title:
if not text:
text = link
if title:
paran = f'({link} "{title}")'
else:
paran = f"({link})"
return f"[{text}]{paran}"
else:
return link

def image(self, src: str, alt: str | None = None, title: str | None = None) -> str:
"""Return a link to the provided image."""
return "!" + self.link(src, text="image", title=alt)

def emphasis(self, text: str) -> str:
"""Return italiced text."""
return f"*{text}*"

def strong(self, text: str) -> str:
"""Return bold text."""
return f"**{text}**"

def strikethrough(self, text: str) -> str:
# Discord renders links regardless of whether it's `link` or `<link>`
@override
def link(self, token: RenderToken, state: BlockState) -> str:
"""Format links, removing unnecessary angle brackets."""
s = super().link(token, state)
if s.startswith("<") and s.endswith(">"):
s = s[1:-1]
return s

# provided by plugin, so not part of base MarkdownRenderer
def strikethrough(self, token: RenderToken, state: BlockState) -> str:
"""Return crossed-out text."""
text = self.render_children(token, state)
return f"~~{text}~~"

def heading(self, text: str, level: int) -> str:
@override
def heading(self, token: RenderToken, state: BlockState) -> str:
"""Format the heading normally if it's large enough, or underline it."""
level: int = token["attrs"]["level"]
text = self.render_children(token, state)
if level in (1, 2, 3):
return "#" * level + f" {text.strip()}\n"
else:
# TODO: consider `-# __text__` for level 5 (smallest) headings?
return f"__{text}__\n"

def newline(self) -> str:
"""No op."""
@override
def inline_html(self, token: RenderToken, state: BlockState) -> str:
"""No op, Discord doesn't render HTML."""
return ""

# this is for forced breaks like `text \ntext`; Discord
def linebreak(self) -> str:
"""Return a new line."""
return "\n"

def inline_html(self, html: str) -> str:
"""No op."""
return ""

def thematic_break(self) -> str:
"""No op."""
@override
def thematic_break(self, token: RenderToken, state: BlockState) -> str:
"""No op, Discord doesn't render breaks as horizontal rules."""
return ""

def block_text(self, text: str) -> str:
"""Return text in lists as-is."""
return text + "\n"
# Block code can be fenced by 3+ backticks or 3+ tildes, or be an indented block.
# Discord only renders code blocks with exactly 3 backticks, so we have to force this format.
@override
def block_code(self, token: RenderToken, state: BlockState) -> str:
"""Put code in a codeblock with triple backticks."""
code: str = token["raw"]
info: str | None = token.get("attrs", {}).get("info")

def block_code(self, code: str, info: str | None = None) -> str:
"""Put the code in a codeblock."""
md = "```"
if info is not None:
info = info.strip()
if info:
lang = info.split(None, 1)[0]
md += lang
lang = info.strip().split(None, 1)[0]
if lang:
md += lang
md += "\n"

return md + code.replace("`" * 3, "`\u200b" * 3) + "\n```\n"

def block_quote(self, text: str) -> str:
"""Quote the provided text."""
if text:
return "> " + "> ".join(text.rstrip().splitlines(keepends=True)) + "\n\n"
@override
def block_html(self, token: RenderToken, state: BlockState) -> str:
"""No op, Discord doesn't render HTML."""
return ""

def block_html(self, html: str) -> str:
@override
def block_error(self, token: RenderToken, state: BlockState) -> str:
"""No op."""
return ""

def block_error(self, html: str) -> str:
"""No op."""
return ""

def codespan(self, text: str) -> str:
# Codespans can be delimited with two backticks as well, which allows having
# single backticks in the contents.
# Additionally, the delimiters may include one space, e.g. "`` text ``", for text that starts/ends
# with a backtick. Mistune strips these spaces, but we need them to avoid breaking formatting.
# Discord renders these spaces (even though they shouldn't), but it's better than no formatting at all.
# TODO: instead of spaces, we could use \u200b?
@override
def codespan(self, token: RenderToken, state: BlockState) -> str:
"""Return the text in a codeblock."""
char = "``" if "`" in text else "`"
return char + text + char

def paragraph(self, text: str) -> str:
"""Return a paragraph with a newline postceeding."""
return f"{text}\n\n"

def list(self, text: str, ordered: bool, level: int, start: Any = None) -> str:
"""Return the unedited list."""
# TODO: figure out how this should actually work
if level == 1:
return text.lstrip("\n") + "\n"
return text
text: str = token["raw"]

delim = "``" if "`" in text else "`"

def list_item(self, text: str, level: int) -> str:
"""Show the list, indented to its proper level."""
lines = text.rstrip().splitlines()
if text.startswith("`") or text.endswith("`"):
text = f" {text} "

prefix = "- "
result: list[str] = [prefix + lines[0]]
return delim + text + delim

# just add one level of indentation; any outer lists will indent this again as needed
indent = " " * len(prefix)
in_codeblock = "```" in lines[0]
for line in lines[1:]:
if not line.strip():
# whitespace-only lines can be rendered as empty
result.append("")
continue
@override
def list(self, token: RenderToken, state: BlockState) -> str:
"""Render lists for Discord's (relatively limited subset of) markdown.

if in_codeblock:
# don't indent lines inside codeblocks
result.append(line)
else:
result.append(indent + line)
This includes:
- For ordered lists, enforce 1. instead of 1)
- For unordered lists, enforce - instead of * or +
- Discord technically supports *, but might as well use - for all of them
- Always use "tight" list spacing, Discord does not render loose list items properly

# check this at the end, since the first codeblock line should generally be indented
if "```" in line:
in_codeblock = not in_codeblock
Moreover, this renders list items with the generic token renderer instead of directly
calling into list_item(), which allows custom list items (such as `task_list_item`)
to work (unlike the builtin list renderer :( ).
"""
prefix_gen: Iterator[str]
if token["attrs"]["ordered"]:
start = token["attrs"].get("start", 1)
prefix_gen = (f"{i}. " for i in itertools.count(start))
else:
prefix_gen = itertools.repeat("- ")

text = ""
for child, prefix in zip(token["children"], prefix_gen, strict=False):
child = {**child, "parent": {"leading": prefix}}
text += self.render_token(child, state)

# if this is a nested list, strip trailing newlines
if token.get("parent"):
text = text.rstrip()
return text + "\n"

return "\n".join(result) + "\n"
def list_item(self, token: RenderToken, state: BlockState) -> str:
"""Render a single list item.

def task_list_item(self, text: Any, level: int, checked: bool = False, **attrs) -> str:
"""Convert task list options to emoji."""
See `list()` above for details.
"""
for child in token["children"]:
# force tight list
if child["type"] == "paragraph":
child["type"] = "block_text"

return mistune.renderers._list._render_list_item(self, token["parent"], token, state)

def task_list_item(self, token: RenderToken, state: BlockState) -> str:
"""Render a task list item, e.g. `- [x] stuff`."""
checked: bool = token["attrs"]["checked"]
emoji = constants.Emojis.confirmation if checked else constants.Emojis.no_choice_light
return self.list_item(emoji + " " + text, level=level)

def finalize(self, data: Any) -> str:
"""Finalize the data."""
return "".join(data)
prefix = {"type": "text", "raw": f"{emoji} "}
token["children"].insert(0, prefix)

# treat this like a normal list item now
return self.list_item(token, state)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ dependencies = [
"httpx-aiohttp>=0.1.9",
"lxml>=5.4.0,<7.0.0",
"markdownify~=1.1.0",
"mistune<3.0.0,>=2.0.4",
"mistune<4.0.0,>=3.2.0",
"msgpack<2.0.0,>=1.1.0",
"orjson<4.0.0,>=3.10.18",
"Pillow<12.0,>=11.2",
Expand Down
11 changes: 7 additions & 4 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.