getsentry · jaffrepaul · Feb 9, 2026 · Feb 2, 2026 · Feb 9, 2026 · Feb 9, 2026
diff --git a/src/lib/trafficClassification.ts b/src/lib/trafficClassification.ts
@@ -0,0 +1,43 @@
+/**
+ * Shared traffic classification types and patterns.
+ * Used by middleware (for classification) and tracesSampler (for metrics).
+ */
+
+/**
+ * Traffic type classification for metrics and sampling decisions.
+ */
+export type TrafficType = 'ai_agent' | 'bot' | 'user' | 'unknown';
+
+/**
+ * AI agents we want to track for docs/markdown consumption visibility.
+ * These fetch markdown content and we need performance data on serving to agentic tools.
+ * Also used by middleware to decide whether to serve markdown content.
+ */
+export const AI_AGENT_PATTERN =
+  /claude|anthropic|gptbot|chatgpt|openai|cursor|codex|copilot|perplexity|cohere|gemini/i;
+
+/**
+ * Bots/crawlers to filter out (SEO crawlers, social media, testing tools, monitors).
+ * Used as fallback when Next.js isBot detection isn't available.
+ */
+export const BOT_PATTERN =
+  /googlebot|bingbot|yandexbot|baiduspider|duckduckbot|applebot|ahrefsbot|semrushbot|dotbot|mj12bot|slackbot|twitterbot|linkedinbot|telegrambot|discordbot|facebookexternalhit|whatsapp|crawler|spider|scraper|headless|phantomjs|selenium|puppeteer|playwright|lighthouse|pagespeed|gtmetrix|pingdom|uptimerobot/i;
+
+/**
+ * Sample rates by traffic type.
+ */
+export const SAMPLE_RATES: Record<TrafficType, number> = {
+  ai_agent: 1, // 100% - full visibility into agentic docs consumption
+  bot: 0, // 0% - filter out noise
+  user: 0.3, // 30% - reasonable sample of real users
+  unknown: 0.3, // 30% - same as users, but tracked separately
+};
+
+/**
+ * Checks if the input matches the pattern.
+ * Returns the matched substring (lowercase), or undefined if no match.
+ */
+export function matchPattern(input: string, pattern: RegExp): string | undefined {
+  const match = input.match(pattern);
+  return match ? match[0].toLowerCase() : undefined;
+}
diff --git a/src/middleware.ts b/src/middleware.ts
@@ -1,6 +1,8 @@
 import * as Sentry from '@sentry/nextjs';
 import type {NextRequest} from 'next/server';
-import {NextResponse} from 'next/server';
+import {NextResponse, userAgent} from 'next/server';
+
+import {AI_AGENT_PATTERN, type TrafficType} from './lib/trafficClassification';
 
 // This env var is set in next.config.js based on the `NEXT_PUBLIC_DEVELOPER_DOCS` env var at build time
 // a workaround edge middleware not having access to env vars
@@ -35,24 +37,56 @@ const redirectStatusCode = process.env.NODE_ENV === 'development' ? 302 : 301;
 
 /**
  * Detects if the user agent belongs to an AI/LLM tool or development environment
- * that would benefit from markdown format
+ * that would benefit from markdown format.
+ * Uses shared AI_AGENT_PATTERN from trafficClassification.ts.
  */
-function isAIOrDevTool(userAgent: string): boolean {
-  const patterns = [
-    /claude/i, // Claude Desktop/Code
-    /cursor/i, // Cursor IDE
-    /copilot/i, // GitHub Copilot
-    /chatgpt/i, // ChatGPT
-    /openai/i, // OpenAI tools
-    /anthropic/i, // Anthropic tools
-    /vscode/i, // VS Code extensions
-    /intellij/i, // IntelliJ plugins
-    /sublime/i, // Sublime Text plugins
-    /got/i, // Got HTTP library (sindresorhus/got)
-    // Add more patterns as needed
-  ];
+function isAIOrDevTool(userAgentString: string): boolean {
+  return AI_AGENT_PATTERN.test(userAgentString);
+}
+
+/**
+ * Traffic classification for metrics tracking.
+ * Uses Next.js userAgent() for enhanced bot detection plus custom AI agent patterns.
+ */
+function classifyTraffic(request: NextRequest): {
+  deviceType: string;
+  isBot: boolean;
+  trafficType: TrafficType;
+} {
+  const userAgentString = request.headers.get('user-agent');
+
+  // No user-agent = unknown traffic
+  if (!userAgentString) {
+    return {trafficType: 'unknown', deviceType: 'unknown', isBot: false};
+  }
 
-  return patterns.some(pattern => pattern.test(userAgent));
+  // Use Next.js built-in userAgent() for enhanced parsing
+  const ua = userAgent(request);
+
+  // Check for AI agents first (higher priority than generic bot detection)
+  if (AI_AGENT_PATTERN.test(userAgentString)) {
+    return {
+      trafficType: 'ai_agent',
+      deviceType: ua.device.type || 'desktop',
+      isBot: true,
+    };
+  }
+
+  // Use Next.js isBot detection (covers major search engines, social crawlers, etc.)
+  if (ua.isBot) {
+    return {
+      trafficType: 'bot',
+      deviceType: ua.device.type || 'crawler',
+      isBot: true,
+    };
+  }
+
+  // Real user traffic - include device type for richer metrics
+  return {
+    trafficType: 'user',
+    deviceType: ua.device.type || 'desktop',
+    isBot: false,
+  };
 }
 
 /**
@@ -70,7 +104,7 @@ function wantsMarkdownViaAccept(acceptHeader: string): boolean {
  * Detects if client wants markdown via Accept header or user-agent
  */
 function wantsMarkdown(request: NextRequest): boolean {
-  const userAgent = request.headers.get('user-agent') || '';
+  const uaString = request.headers.get('user-agent') || '';
   const acceptHeader = request.headers.get('accept') || '';
 
   // Strategy 1: Accept header content negotiation (standards-compliant)
@@ -79,14 +113,49 @@ function wantsMarkdown(request: NextRequest): boolean {
   }
 
   // Strategy 2: User-agent detection (fallback for tools that don't set Accept)
-  return isAIOrDevTool(userAgent);
+  return isAIOrDevTool(uaString);
+}
+
+/**
+ * Creates request headers with traffic classification for downstream consumption.
+ * These headers are added to the REQUEST (not response) so tracesSampler can read them.
+ * Uses NextResponse.next({ request: { headers } }) pattern to modify the request.
+ */
+function createClassifiedRequestHeaders(request: NextRequest): Headers {
+  const classification = classifyTraffic(request);
+  const headers = new Headers(request.headers);
+  headers.set('x-traffic-type', classification.trafficType);
+  headers.set('x-device-type', classification.deviceType);
+  return headers;
+}
+
+/**
+ * Creates a pass-through response with traffic classification headers on the request.
+ */
+function nextWithClassification(request: NextRequest): NextResponse {
+  return NextResponse.next({
+    request: {
+      headers: createClassifiedRequestHeaders(request),
+    },
+  });
+}
+
+/**
+ * Creates a rewrite response with traffic classification headers on the request.
+ */
+function rewriteWithClassification(request: NextRequest, destination: URL): NextResponse {
+  return NextResponse.rewrite(destination, {
+    request: {
+      headers: createClassifiedRequestHeaders(request),
+    },
+  });
 }
 
 /**
  * Handles redirection to markdown versions for AI/LLM clients
  */
 const handleAIClientRedirect = (request: NextRequest) => {
-  const userAgent = request.headers.get('user-agent') || '';
+  const userAgentString = request.headers.get('user-agent') || '';
   const acceptHeader = request.headers.get('accept') || '';
   const url = request.nextUrl;
 
@@ -99,7 +168,7 @@ const handleAIClientRedirect = (request: NextRequest) => {
   // Determine detection method for logging
   const detectionMethod = wantsMarkdownViaAccept(acceptHeader)
     ? 'Accept header'
-    : isAIOrDevTool(userAgent)
+    : isAIOrDevTool(userAgentString)
       ? 'User-agent'
       : 'Manual';
 
@@ -118,20 +187,21 @@ const handleAIClientRedirect = (request: NextRequest) => {
     });
   }
 
-  // Skip if already requesting a markdown file
+  // Skip if already requesting a markdown file - pass through with classification headers
   if (url.pathname.endsWith('.md')) {
-    return undefined;
+    return nextWithClassification(request);
   }
 
   // Skip API routes and static assets (should already be filtered by matcher)
+  // Pass through with classification headers
   if (
     url.pathname.startsWith('/api/') ||
     url.pathname.startsWith('/_next/') ||
     /\.(js|json|png|jpg|jpeg|gif|ico|pdf|css|woff|woff2|ttf|map|xml|txt|zip|svg)$/i.test(
       url.pathname
     )
   ) {
-    return undefined;
+    return nextWithClassification(request);
   }
 
   // Check for markdown request (Accept header, user-agent, or manual)
@@ -158,10 +228,11 @@ const handleAIClientRedirect = (request: NextRequest) => {
 
     // Rewrite to serve markdown inline (same URL, different content)
     // The next.config.ts rewrite rule maps *.md to /md-exports/*.md
-    return NextResponse.rewrite(newUrl);
+    return rewriteWithClassification(request, newUrl);
   }
 
-  return undefined;
+  // Default: pass through with traffic classification headers
+  return nextWithClassification(request);
 };
 
 const handleRedirects = (request: NextRequest) => {