|
| 1 | +import re |
| 2 | +import sys |
| 3 | +from pathlib import Path |
| 4 | + |
| 5 | + |
| 6 | +def find_images_without_alt(file_path): |
| 7 | + """Find images in qmd files missing alt text (both markdown and HTML)""" |
| 8 | + with open(file_path, "r", encoding="utf-8") as f: |
| 9 | + content = f.read() |
| 10 | + |
| 11 | + issues = [] |
| 12 | + |
| 13 | + # Remove fenced code blocks ```...``` (including ```{.md} etc.) |
| 14 | + code_fence_pattern = r"```[\s\S]*?```" |
| 15 | + content_no_code = re.sub(code_fence_pattern, "", content) |
| 16 | + |
| 17 | + # Remove inline code `...` |
| 18 | + inline_code_pattern = r"`[^`]*`" |
| 19 | + content_no_code = re.sub(inline_code_pattern, "", content_no_code) |
| 20 | + |
| 21 | + # Pattern 1:  - markdown images without fig-alt |
| 22 | + # Only flag images whose alt text is empty or whitespace |
| 23 | + # Negative lookahead: (?!.*\{[^}]*fig-alt=) - ensures no fig-alt follows |
| 24 | + md_pattern = r"(?<!\[)!\[(?:\s*)\]\([^)]+\)(?!.*?\{[^}]*fig-alt=)" |
| 25 | + |
| 26 | + for match in re.finditer(md_pattern, content_no_code): |
| 27 | + line_num = content[:match.start()].count("\n") + 1 |
| 28 | + issues.append(("markdown", line_num, match.group(0))) |
| 29 | + |
| 30 | + # Pattern 2: <img> tags without alt attribute |
| 31 | + # Matches <img...> that doesn't contain alt= |
| 32 | + img_pattern = r"<img\s+(?![^>]*\balt=)[^>]*>" |
| 33 | + |
| 34 | + for match in re.finditer(img_pattern, content, re.IGNORECASE): |
| 35 | + line_num = content[:match.start()].count("\n") + 1 |
| 36 | + issues.append(("html", line_num, match.group(0))) |
| 37 | + |
| 38 | + return issues |
| 39 | + |
| 40 | + |
| 41 | +# Check all qmd files |
| 42 | +qmd_files = Path(".").rglob("*.qmd") |
| 43 | +found_issues = False |
| 44 | + |
| 45 | +for file in qmd_files: |
| 46 | + issues = find_images_without_alt(file) |
| 47 | + if issues: |
| 48 | + found_issues = True |
| 49 | + print(f"\n{file}:") |
| 50 | + for img_type, line_num, img in issues: |
| 51 | + print(f" Line {line_num} [{img_type}]: {img}") |
| 52 | + |
| 53 | +if not found_issues: |
| 54 | + print("✓ All images have alt text!") |
| 55 | +else: |
| 56 | + sys.exit(1) # Non-zero exit for CI/CD |
0 commit comments