Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 93 additions & 0 deletions generator/_scripts/cfdoc_ip_autolink.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import re

IPV4_URL_RE = re.compile(
r"""
(?<![<(`/\w"']) # skip if already inside <autolink>, [text](link),
# `code`, an href="..."/'...' attribute, or a longer
# token (word char / slash) that we'd be splitting
( # capture group 1: the URL itself
https?:// # scheme
(?:\d{1,3}\.){3}\d{1,3} # IPv4
(?::\d+)? # optional :port
(?:/[^\s<>)\]`'"]*)? # optional /path — stop at whitespace or chars
# that typically close/quote the URL
)
""",
re.VERBOSE,
)

FENCE_RE = re.compile(
r"""
^(\s*) # leading indentation (captured but unused)
(```+|~~~+) # fence marker: 3+ backticks or 3+ tildes
""",
re.VERBOSE,
)


def run(config):
for file in config["markdown_files"]:
process(file)


def process(file_path):
try:
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()

transformed = transform(content)

if transformed != content:
with open(file_path, "w", encoding="utf-8") as f:
f.write(transformed)
except Exception as e:
print(f"cfdoc_ip_autolink: error processing {file_path}: {e}")
raise


def transform(content):
out_lines = []
in_fence = False
fence_marker = None

for line in content.splitlines(keepends=True):
if in_fence:
out_lines.append(line)
if fence_marker and line.lstrip().startswith(fence_marker):
in_fence = False
fence_marker = None
continue

m = FENCE_RE.match(line)
if m:
fence_marker = m.group(2)
in_fence = True
out_lines.append(line)
continue

out_lines.append(transform_line(line))

return "".join(out_lines)


# Stripped from the end of a URL so sentence punctuation stays in the prose.
TRAILING_PUNCT = ".,;:!?"


def _wrap(match):
url = match.group(1)
trailing = ""
while url and url[-1] in TRAILING_PUNCT:
trailing = url[-1] + trailing
url = url[:-1]
return f"<{url}>{trailing}"


def transform_line(line):
# Split on inline backtick spans so URLs inside `code` are untouched.
parts = re.split(r"(`+[^`\n]*`+)", line)
for i, chunk in enumerate(parts):
if i % 2 == 1:
continue
parts[i] = IPV4_URL_RE.sub(_wrap, chunk)
return "".join(parts)
2 changes: 2 additions & 0 deletions generator/_scripts/cfdoc_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import cfdoc_shortcodes_resolver as shortcodes_resolver
import cfdoc_images_path_resolver as images_path_resolver
import cfdoc_codeblock_resolver as codeblock_resolver
import cfdoc_ip_autolink as ip_autolink
import sys
import os

Expand All @@ -50,3 +51,4 @@
shortcodes_resolver.run(config)
images_path_resolver.run(config)
codeblock_resolver.run(config)
ip_autolink.run(config)
5 changes: 5 additions & 0 deletions hugo/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ wrapperClass = 'hlc'
[markup.goldmark.renderer]
unsafe = true # Allow HTML in md files

# Automatically convert bare URLs into clickable links
# linkify skips IP addresses, which are handled by pre-process scripts
[markup.goldmark.extensions]
linkify = true

[params.sitemap]
baseUrl = "https://docs.cfengine.com/docs/%branch%"

Expand Down
Loading