Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions src/lib/trafficClassification.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/**
* Shared traffic classification types and patterns.
* Used by middleware (for classification) and tracesSampler (for metrics).
*/

/**
* Traffic type classification for metrics and sampling decisions.
*/
export type TrafficType = 'ai_agent' | 'bot' | 'user' | 'unknown';

/**
* AI agents we want to track for docs/markdown consumption visibility.
* These fetch markdown content and we need performance data on serving to agentic tools.
* Also used by middleware to decide whether to serve markdown content.
*/
export const AI_AGENT_PATTERN =
/claude|anthropic|gptbot|chatgpt|openai|cursor|codex|copilot|perplexity|cohere|gemini/i;

/**
* Bots/crawlers to filter out (SEO crawlers, social media, testing tools, monitors).
* Used as fallback when Next.js isBot detection isn't available.
*/
export const BOT_PATTERN =
/googlebot|bingbot|yandexbot|baiduspider|duckduckbot|applebot|ahrefsbot|semrushbot|dotbot|mj12bot|slackbot|twitterbot|linkedinbot|telegrambot|discordbot|facebookexternalhit|whatsapp|crawler|spider|scraper|headless|phantomjs|selenium|puppeteer|playwright|lighthouse|pagespeed|gtmetrix|pingdom|uptimerobot/i;

/**
* Sample rates by traffic type.
*/
export const SAMPLE_RATES: Record<TrafficType, number> = {
ai_agent: 1, // 100% - full visibility into agentic docs consumption
bot: 0, // 0% - filter out noise
user: 0.3, // 30% - reasonable sample of real users
unknown: 0.3, // 30% - same as users, but tracked separately
};

/**
* Checks if the input matches the pattern.
* Returns the matched substring (lowercase), or undefined if no match.
*/
export function matchPattern(input: string, pattern: RegExp): string | undefined {
const match = input.match(pattern);
return match ? match[0].toLowerCase() : undefined;
}
123 changes: 97 additions & 26 deletions src/middleware.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import * as Sentry from '@sentry/nextjs';
import type {NextRequest} from 'next/server';
import {NextResponse} from 'next/server';
import {NextResponse, userAgent} from 'next/server';

import {AI_AGENT_PATTERN, type TrafficType} from './lib/trafficClassification';

// This env var is set in next.config.js based on the `NEXT_PUBLIC_DEVELOPER_DOCS` env var at build time
// a workaround edge middleware not having access to env vars
Expand Down Expand Up @@ -35,24 +37,56 @@ const redirectStatusCode = process.env.NODE_ENV === 'development' ? 302 : 301;

/**
* Detects if the user agent belongs to an AI/LLM tool or development environment
* that would benefit from markdown format
* that would benefit from markdown format.
* Uses shared AI_AGENT_PATTERN from trafficClassification.ts.
*/
function isAIOrDevTool(userAgent: string): boolean {
const patterns = [
/claude/i, // Claude Desktop/Code
/cursor/i, // Cursor IDE
/copilot/i, // GitHub Copilot
/chatgpt/i, // ChatGPT
/openai/i, // OpenAI tools
/anthropic/i, // Anthropic tools
/vscode/i, // VS Code extensions
/intellij/i, // IntelliJ plugins
/sublime/i, // Sublime Text plugins
/got/i, // Got HTTP library (sindresorhus/got)
// Add more patterns as needed
];
function isAIOrDevTool(userAgentString: string): boolean {
return AI_AGENT_PATTERN.test(userAgentString);
}

/**
* Traffic classification for metrics tracking.
* Uses Next.js userAgent() for enhanced bot detection plus custom AI agent patterns.
*/
function classifyTraffic(request: NextRequest): {
deviceType: string;
isBot: boolean;
trafficType: TrafficType;
} {
const userAgentString = request.headers.get('user-agent');

// No user-agent = unknown traffic
if (!userAgentString) {
return {trafficType: 'unknown', deviceType: 'unknown', isBot: false};
}

return patterns.some(pattern => pattern.test(userAgent));
// Use Next.js built-in userAgent() for enhanced parsing
const ua = userAgent(request);

// Check for AI agents first (higher priority than generic bot detection)
if (AI_AGENT_PATTERN.test(userAgentString)) {
return {
trafficType: 'ai_agent',
deviceType: ua.device.type || 'desktop',
isBot: true,
};
}

// Use Next.js isBot detection (covers major search engines, social crawlers, etc.)
if (ua.isBot) {
return {
trafficType: 'bot',
deviceType: ua.device.type || 'crawler',
isBot: true,
};
}

// Real user traffic - include device type for richer metrics
return {
trafficType: 'user',
deviceType: ua.device.type || 'desktop',
isBot: false,
};
}

/**
Expand All @@ -70,7 +104,7 @@ function wantsMarkdownViaAccept(acceptHeader: string): boolean {
* Detects if client wants markdown via Accept header or user-agent
*/
function wantsMarkdown(request: NextRequest): boolean {
const userAgent = request.headers.get('user-agent') || '';
const uaString = request.headers.get('user-agent') || '';
const acceptHeader = request.headers.get('accept') || '';

// Strategy 1: Accept header content negotiation (standards-compliant)
Expand All @@ -79,14 +113,49 @@ function wantsMarkdown(request: NextRequest): boolean {
}

// Strategy 2: User-agent detection (fallback for tools that don't set Accept)
return isAIOrDevTool(userAgent);
return isAIOrDevTool(uaString);
}

/**
* Creates request headers with traffic classification for downstream consumption.
* These headers are added to the REQUEST (not response) so tracesSampler can read them.
* Uses NextResponse.next({ request: { headers } }) pattern to modify the request.
*/
function createClassifiedRequestHeaders(request: NextRequest): Headers {
const classification = classifyTraffic(request);
const headers = new Headers(request.headers);
headers.set('x-traffic-type', classification.trafficType);
headers.set('x-device-type', classification.deviceType);
return headers;
}

/**
* Creates a pass-through response with traffic classification headers on the request.
*/
function nextWithClassification(request: NextRequest): NextResponse {
return NextResponse.next({
request: {
headers: createClassifiedRequestHeaders(request),
},
});
}

/**
* Creates a rewrite response with traffic classification headers on the request.
*/
function rewriteWithClassification(request: NextRequest, destination: URL): NextResponse {
return NextResponse.rewrite(destination, {
request: {
headers: createClassifiedRequestHeaders(request),
},
});
}

/**
* Handles redirection to markdown versions for AI/LLM clients
*/
const handleAIClientRedirect = (request: NextRequest) => {
const userAgent = request.headers.get('user-agent') || '';
const userAgentString = request.headers.get('user-agent') || '';
const acceptHeader = request.headers.get('accept') || '';
const url = request.nextUrl;

Expand All @@ -99,7 +168,7 @@ const handleAIClientRedirect = (request: NextRequest) => {
// Determine detection method for logging
const detectionMethod = wantsMarkdownViaAccept(acceptHeader)
? 'Accept header'
: isAIOrDevTool(userAgent)
: isAIOrDevTool(userAgentString)
? 'User-agent'
: 'Manual';

Expand All @@ -118,20 +187,21 @@ const handleAIClientRedirect = (request: NextRequest) => {
});
}

// Skip if already requesting a markdown file
// Skip if already requesting a markdown file - pass through with classification headers
if (url.pathname.endsWith('.md')) {
return undefined;
return nextWithClassification(request);
}

// Skip API routes and static assets (should already be filtered by matcher)
// Pass through with classification headers
if (
url.pathname.startsWith('/api/') ||
url.pathname.startsWith('/_next/') ||
/\.(js|json|png|jpg|jpeg|gif|ico|pdf|css|woff|woff2|ttf|map|xml|txt|zip|svg)$/i.test(
url.pathname
)
) {
return undefined;
return nextWithClassification(request);
}

// Check for markdown request (Accept header, user-agent, or manual)
Expand All @@ -158,10 +228,11 @@ const handleAIClientRedirect = (request: NextRequest) => {

// Rewrite to serve markdown inline (same URL, different content)
// The next.config.ts rewrite rule maps *.md to /md-exports/*.md
return NextResponse.rewrite(newUrl);
return rewriteWithClassification(request, newUrl);
}

return undefined;
// Default: pass through with traffic classification headers
return nextWithClassification(request);
};

const handleRedirects = (request: NextRequest) => {
Expand Down
Loading
Loading