|
1 | 1 | import type { MetadataRoute } from 'next' |
2 | 2 | import { getBaseUrl } from '@/lib/core/utils/urls' |
3 | 3 |
|
| 4 | +/** |
| 5 | + * Default disallow list applied to crawlers and the wildcard rule. Blocks |
| 6 | + * authenticated surfaces, internal endpoints, and one-time-use links. |
| 7 | + */ |
| 8 | +const DISALLOWED_PATHS = [ |
| 9 | + '/api/', |
| 10 | + '/workspace/', |
| 11 | + '/chat/', |
| 12 | + '/playground/', |
| 13 | + '/resume/', |
| 14 | + '/invite/', |
| 15 | + '/unsubscribe/', |
| 16 | + '/w/', |
| 17 | + '/form/', |
| 18 | + '/credential-account/', |
| 19 | + '/_next/', |
| 20 | + '/private/', |
| 21 | + '/blog*tag=', |
| 22 | +] |
| 23 | + |
| 24 | +/** |
| 25 | + * Tighter disallow list for link-preview bots. They fetch single URLs to |
| 26 | + * render Open Graph cards rather than crawl, so publicly-shareable surfaces |
| 27 | + * like /chat/, /form/, and /w/ must be reachable for previews to render. |
| 28 | + */ |
| 29 | +const LINK_PREVIEW_DISALLOWED_PATHS = [ |
| 30 | + '/api/', |
| 31 | + '/workspace/', |
| 32 | + '/resume/', |
| 33 | + '/invite/', |
| 34 | + '/unsubscribe/', |
| 35 | + '/credential-account/', |
| 36 | + '/_next/', |
| 37 | + '/private/', |
| 38 | +] |
| 39 | + |
| 40 | +/** |
| 41 | + * Search engines and AI/answer-engine crawlers explicitly allow-listed for |
| 42 | + * SEO/AEO/GEO. Explicit Allow rules ensure these bots are not accidentally |
| 43 | + * suppressed by downstream filters and signal intent to operators that |
| 44 | + * publish allow-list audits (Profound, Scrunch, Otterly, etc.). |
| 45 | + */ |
| 46 | +const SEARCH_AND_AI_BOTS = [ |
| 47 | + // Western search engines |
| 48 | + 'Googlebot', |
| 49 | + 'Bingbot', |
| 50 | + 'DuckDuckBot', |
| 51 | + 'Kagibot', |
| 52 | + 'Bravebot', |
| 53 | + // Regional search engines |
| 54 | + 'YandexBot', |
| 55 | + 'Baiduspider', |
| 56 | + 'Sogou web spider', |
| 57 | + 'Yeti', |
| 58 | + 'SeznamBot', |
| 59 | + 'PetalBot', |
| 60 | + // OpenAI |
| 61 | + 'GPTBot', |
| 62 | + 'OAI-SearchBot', |
| 63 | + 'ChatGPT-User', |
| 64 | + // Anthropic |
| 65 | + 'ClaudeBot', |
| 66 | + 'Claude-SearchBot', |
| 67 | + 'Claude-User', |
| 68 | + // Google AI |
| 69 | + 'Google-Extended', |
| 70 | + // Perplexity |
| 71 | + 'PerplexityBot', |
| 72 | + 'Perplexity-User', |
| 73 | + // Apple |
| 74 | + 'Applebot', |
| 75 | + 'Applebot-Extended', |
| 76 | + // Meta |
| 77 | + 'Meta-ExternalAgent', |
| 78 | + 'Meta-ExternalFetcher', |
| 79 | + 'FacebookBot', |
| 80 | + // Other major AI / answer engines |
| 81 | + 'Amazonbot', |
| 82 | + 'CCBot', |
| 83 | + 'cohere-ai', |
| 84 | + 'cohere-training-data-crawler', |
| 85 | + 'Grok-web-crawl', |
| 86 | + 'MistralAI-User', |
| 87 | + 'DeepSeek-AI', |
| 88 | + 'YouBot', |
| 89 | + 'Diffbot', |
| 90 | + 'AI2Bot', |
| 91 | + 'Timpibot', |
| 92 | + 'ImageSiftBot', |
| 93 | +] |
| 94 | + |
| 95 | +/** |
| 96 | + * Social and messaging platforms that fetch URLs to render link previews |
| 97 | + * (Open Graph / Twitter Card images). These need access to publicly-shared |
| 98 | + * surfaces like /chat/ and /form/ that are otherwise blocked. |
| 99 | + */ |
| 100 | +const LINK_PREVIEW_BOTS = [ |
| 101 | + 'Twitterbot', |
| 102 | + 'LinkedInBot', |
| 103 | + 'Slackbot', |
| 104 | + 'Slack-ImgProxy', |
| 105 | + 'Discordbot', |
| 106 | + 'TelegramBot', |
| 107 | + 'WhatsApp', |
| 108 | + 'facebookexternalhit', |
| 109 | + 'Pinterestbot', |
| 110 | + 'redditbot', |
| 111 | +] |
| 112 | + |
4 | 113 | export default function robots(): MetadataRoute.Robots { |
5 | 114 | const baseUrl = getBaseUrl() |
6 | 115 |
|
7 | 116 | return { |
8 | 117 | rules: [ |
| 118 | + { userAgent: '*', allow: '/', disallow: DISALLOWED_PATHS }, |
| 119 | + { userAgent: SEARCH_AND_AI_BOTS, allow: '/', disallow: DISALLOWED_PATHS }, |
9 | 120 | { |
10 | | - userAgent: '*', |
| 121 | + userAgent: LINK_PREVIEW_BOTS, |
11 | 122 | allow: '/', |
12 | | - disallow: [ |
13 | | - '/api/', |
14 | | - '/workspace/', |
15 | | - '/chat/', |
16 | | - '/playground/', |
17 | | - '/resume/', |
18 | | - '/invite/', |
19 | | - '/unsubscribe/', |
20 | | - '/w/', |
21 | | - '/form/', |
22 | | - '/credential-account/', |
23 | | - '/_next/', |
24 | | - '/private/', |
25 | | - '/blog*tag=', |
26 | | - ], |
| 123 | + disallow: LINK_PREVIEW_DISALLOWED_PATHS, |
27 | 124 | }, |
28 | 125 | ], |
29 | 126 | sitemap: [`${baseUrl}/sitemap.xml`, `${baseUrl}/blog/sitemap-images.xml`], |
|
0 commit comments