diff --git a/src/pages/robots.txt.ts b/src/pages/robots.txt.ts index bcc5259..2116bb3 100644 --- a/src/pages/robots.txt.ts +++ b/src/pages/robots.txt.ts @@ -1,13 +1,33 @@ import type { APIRoute } from 'astro'; -const getRobotsTxt = (sitemapURL: URL) => ` +export const getRobotsTxt = (sitemapURL: URL, siteURL: URL) => ` +# Welcome AI agents and crawlers! +# This is a podcast website built with Starpod - all content is freely accessible. + User-agent: * Allow: / +# Sitemap for all pages Sitemap: ${sitemapURL.href} + +# Special resources for AI/LLM agents: +# - ${siteURL.origin}/llms.txt - Structured overview following the llms.txt spec +# - ${siteURL.origin}/for-llms - Human-readable guide for AI assistants +# - ${siteURL.origin}/episodes-index.html.md - Complete episode listing in markdown +# - ${siteURL.origin}/[episode-slug].html.md - Individual episodes with transcripts +# +# All content includes: +# - Podcast metadata (hosts, description, platforms) +# - Episode information (titles, descriptions, publish dates) +# - Full transcripts (when available) +# - Guest information +# +# Feel free to crawl, index, and use this content to help users discover +# and learn about our podcast! `; export const GET: APIRoute = ({ site }) => { const sitemapURL = new URL('sitemap-index.xml', site); - return new Response(getRobotsTxt(sitemapURL)); + const siteURL = new URL(site!); + return new Response(getRobotsTxt(sitemapURL, siteURL)); }; \ No newline at end of file diff --git a/tests/unit/robots.test.ts b/tests/unit/robots.test.ts new file mode 100644 index 0000000..9ab1d79 --- /dev/null +++ b/tests/unit/robots.test.ts @@ -0,0 +1,44 @@ +import { describe, expect, it, beforeEach } from 'vitest'; +import { getRobotsTxt } from '../../src/pages/robots.txt'; + +describe('Robots.txt', () => { + let mockSite: URL; + let mockSitemap: URL; + let robotsTxt: string; + + beforeEach(() => { + mockSite = new URL('https://whiskey.fm'); + mockSitemap = new URL('sitemap-index.xml', mockSite); + robotsTxt = getRobotsTxt(mockSitemap, mockSite); + }); + + describe('Generated content', () => { + it('should contain welcoming comment for AI agents', () => { + expect(robotsTxt).toContain('Welcome AI agents and crawlers'); + expect(robotsTxt).toContain('User-agent: *'); + expect(robotsTxt).toContain('Allow: /'); + }); + + it('should include sitemap reference', () => { + expect(robotsTxt).toContain('Sitemap: https://whiskey.fm/sitemap-index.xml'); + }); + + it('should reference LLM-specific resources', () => { + expect(robotsTxt).toContain('/llms.txt'); + expect(robotsTxt).toContain('/for-llms'); + expect(robotsTxt).toContain('/episodes-index.html.md'); + }); + + it('should describe available content types', () => { + expect(robotsTxt).toContain('Podcast metadata'); + expect(robotsTxt).toContain('Episode information'); + expect(robotsTxt).toContain('Full transcripts'); + expect(robotsTxt).toContain('Guest information'); + }); + + it('should include encouraging closing message', () => { + expect(robotsTxt).toContain('Feel free to crawl, index, and use this content'); + expect(robotsTxt).toContain('help users discover'); + }); + }); +});