File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1+ # Lychee link checker configuration
2+ # https://lychee.cli.rs/
3+
4+ # Accept these status codes as valid (in addition to 200)
5+ # 403: Sites that block automated scrapers (LinkedIn, HackerNews, Medium, etc.)
6+ # 405: POST-only endpoints (e.g. newsletter subscribe forms) return 405 to GET requests
7+ # 999: LinkedIn's custom "bot detected" status
8+ # 429: Rate limiting (site exists but we're being throttled)
9+ accept = [403 , 405 , 429 , 999 ]
10+
11+ # Exclude known false-positive domains that block link checkers
12+ exclude = [
13+ # Social/professional networks that block bots
14+ " linkedin.com" ,
15+ " archive.ph" ,
16+ " pod.link" ,
17+
18+ # Sites that return 403 to crawlers but are actually live
19+ " news.ycombinator.com" ,
20+ " medium.com" ,
21+ " freepik.com" ,
22+ " www.freepik.com" ,
23+ " shnatsel.medium.com" ,
24+ " mdwdotla.medium.com" ,
25+ " shahbhargav.medium.com" ,
26+
27+ # Newsletter subscribe endpoint (POST-only, returns 405 to GET requests from lychee)
28+ " corrode-newsletter.fly.dev" ,
29+
30+ # Sites that occasionally rate-limit or block crawlers
31+ " rustjobs.dev" ,
32+ " mend.io" ,
33+ " npmjs.com" ,
34+ " premiumbeat.com" ,
35+ " cacm.acm.org" ,
36+ " nixos.wiki" ,
37+ " raspberrypi.com" ,
38+ " cvedetails.com" ,
39+ " volvocars.com" ,
40+ " crunchbase.com" ,
41+ " kraken.com" ,
42+ " blueorigin.com" ,
43+ " uppbeat.io" ,
44+ " gnu.org" ,
45+ ]
46+
47+ # Maximum number of concurrent link checks
48+ max_concurrency = 16
49+
50+ # Timeout for each request (in seconds)
51+ timeout = 20
52+
53+ # Number of retries for failed requests
54+ max_retries = 1
You can’t perform that action at this time.
0 commit comments