diff --git a/generator/_scripts/cfdoc_ip_autolink.py b/generator/_scripts/cfdoc_ip_autolink.py new file mode 100644 index 000000000..8720cc7ec --- /dev/null +++ b/generator/_scripts/cfdoc_ip_autolink.py @@ -0,0 +1,93 @@ +import re + +IPV4_URL_RE = re.compile( + r""" + (?, [text](link), + # `code`, an href="..."/'...' attribute, or a longer + # token (word char / slash) that we'd be splitting + ( # capture group 1: the URL itself + https?:// # scheme + (?:\d{1,3}\.){3}\d{1,3} # IPv4 + (?::\d+)? # optional :port + (?:/[^\s<>)\]`'"]*)? # optional /path — stop at whitespace or chars + # that typically close/quote the URL + ) + """, + re.VERBOSE, +) + +FENCE_RE = re.compile( + r""" + ^(\s*) # leading indentation (captured but unused) + (```+|~~~+) # fence marker: 3+ backticks or 3+ tildes + """, + re.VERBOSE, +) + + +def run(config): + for file in config["markdown_files"]: + process(file) + + +def process(file_path): + try: + with open(file_path, "r", encoding="utf-8") as f: + content = f.read() + + transformed = transform(content) + + if transformed != content: + with open(file_path, "w", encoding="utf-8") as f: + f.write(transformed) + except Exception as e: + print(f"cfdoc_ip_autolink: error processing {file_path}: {e}") + raise + + +def transform(content): + out_lines = [] + in_fence = False + fence_marker = None + + for line in content.splitlines(keepends=True): + if in_fence: + out_lines.append(line) + if fence_marker and line.lstrip().startswith(fence_marker): + in_fence = False + fence_marker = None + continue + + m = FENCE_RE.match(line) + if m: + fence_marker = m.group(2) + in_fence = True + out_lines.append(line) + continue + + out_lines.append(transform_line(line)) + + return "".join(out_lines) + + +# Stripped from the end of a URL so sentence punctuation stays in the prose. +TRAILING_PUNCT = ".,;:!?" + + +def _wrap(match): + url = match.group(1) + trailing = "" + while url and url[-1] in TRAILING_PUNCT: + trailing = url[-1] + trailing + url = url[:-1] + return f"<{url}>{trailing}" + + +def transform_line(line): + # Split on inline backtick spans so URLs inside `code` are untouched. + parts = re.split(r"(`+[^`\n]*`+)", line) + for i, chunk in enumerate(parts): + if i % 2 == 1: + continue + parts[i] = IPV4_URL_RE.sub(_wrap, chunk) + return "".join(parts) diff --git a/generator/_scripts/cfdoc_preprocess.py b/generator/_scripts/cfdoc_preprocess.py index 4c04a7487..20820c546 100755 --- a/generator/_scripts/cfdoc_preprocess.py +++ b/generator/_scripts/cfdoc_preprocess.py @@ -33,6 +33,7 @@ import cfdoc_shortcodes_resolver as shortcodes_resolver import cfdoc_images_path_resolver as images_path_resolver import cfdoc_codeblock_resolver as codeblock_resolver +import cfdoc_ip_autolink as ip_autolink import sys import os @@ -50,3 +51,4 @@ shortcodes_resolver.run(config) images_path_resolver.run(config) codeblock_resolver.run(config) +ip_autolink.run(config) diff --git a/hugo/config.toml b/hugo/config.toml index aa18a86e8..feea1bdb3 100644 --- a/hugo/config.toml +++ b/hugo/config.toml @@ -42,6 +42,11 @@ wrapperClass = 'hlc' [markup.goldmark.renderer] unsafe = true # Allow HTML in md files +# Automatically convert bare URLs into clickable links +# linkify skips IP addresses, which are handled by pre-process scripts +[markup.goldmark.extensions] + linkify = true + [params.sitemap] baseUrl = "https://docs.cfengine.com/docs/%branch%"