diff --git a/.claude/worktrees/llm-gateway-fixes b/.claude/worktrees/llm-gateway-fixes
new file mode 160000
index 000000000..d6dc4f11b
--- /dev/null
+++ b/.claude/worktrees/llm-gateway-fixes
@@ -0,0 +1 @@
+Subproject commit d6dc4f11bfa37e875142c512149c9c49df55ab3e
diff --git a/cloudflare-ai-attribution/src/ai-attribution.worker.ts b/cloudflare-ai-attribution/src/ai-attribution.worker.ts
index 1a529de01..3b149d402 100644
--- a/cloudflare-ai-attribution/src/ai-attribution.worker.ts
+++ b/cloudflare-ai-attribution/src/ai-attribution.worker.ts
@@ -30,7 +30,6 @@ export type HonoContext = {
 
 const app = new Hono<HonoContext>();
 
-// @ts-expect-error workers-tagged-logger returns Handler typed against an older hono; incompatible with hono 4.12+
 app.use('*', useWorkersLogger('ai-attribution'));
 
 // Health check endpoint (no auth required)
diff --git a/cloudflare-o11y/src/api-metrics-routes.ts b/cloudflare-o11y/src/api-metrics-routes.ts
index 9fb28c134..34caa9ced 100644
--- a/cloudflare-o11y/src/api-metrics-routes.ts
+++ b/cloudflare-o11y/src/api-metrics-routes.ts
@@ -1,36 +1,8 @@
 import type { Hono } from 'hono';
-import { z } from 'zod';
-import { zodJsonValidator } from '@kilocode/worker-utils';
+import { zodJsonValidator, ApiMetricsParamsSchema } from '@kilocode/worker-utils';
 import { writeApiMetricsDataPoint } from './o11y-analytics';
 import { requireAdmin } from './admin-middleware';
 
-export const ApiMetricsParamsSchema = z.object({
-	kiloUserId: z.string().min(1),
-	organizationId: z.string().min(1).optional(),
-	isAnonymous: z.boolean(),
-	isStreaming: z.boolean(),
-	userByok: z.boolean(),
-	mode: z.string().min(1).optional(),
-	provider: z.string().min(1),
-	inferenceProvider: z.string().optional().default(''),
-	requestedModel: z.string().min(1),
-	resolvedModel: z.string().min(1),
-	toolsAvailable: z.array(z.string().min(1)),
-	toolsUsed: z.array(z.string().min(1)),
-	ttfbMs: z.number().int().nonnegative(),
-	completeRequestMs: z.number().int().nonnegative(),
-	statusCode: z.number().int().min(100).max(599),
-	tokens: z
-		.object({
-			inputTokens: z.number().int().nonnegative().optional(),
-			outputTokens: z.number().int().nonnegative().optional(),
-			cacheWriteTokens: z.number().int().nonnegative().optional(),
-			cacheHitTokens: z.number().int().nonnegative().optional(),
-			totalTokens: z.number().int().nonnegative().optional(),
-		})
-		.optional(),
-});
-
 export function registerApiMetricsRoutes(app: Hono<{ Bindings: Env }>): void {
 	app.post('/ingest/api-metrics', requireAdmin, zodJsonValidator(ApiMetricsParamsSchema), async (c) => {
 		const params = c.req.valid('json');
diff --git a/cloudflare-o11y/src/index.ts b/cloudflare-o11y/src/index.ts
index 57b3c6d7d..52dd40055 100644
--- a/cloudflare-o11y/src/index.ts
+++ b/cloudflare-o11y/src/index.ts
@@ -1,11 +1,12 @@
 import { WorkerEntrypoint } from 'cloudflare:workers';
 import { Hono } from 'hono';
+import { ApiMetricsParamsSchema, SessionMetricsParamsSchema } from '@kilocode/worker-utils';
+import type { ApiMetricsParams, SessionMetricsParams } from '@kilocode/worker-utils';
 import { registerApiMetricsRoutes } from './api-metrics-routes';
 import { evaluateAlerts } from './alerting/evaluate';
 import { registerAlertingConfigRoutes } from './alerting/config-routes';
-import { SessionMetricsParamsSchema } from './session-metrics-schema';
-import type { SessionMetricsParams } from './session-metrics-schema';
 import { writeSessionMetricsDataPoint } from './session-metrics-analytics';
+import { writeApiMetricsDataPoint } from './o11y-analytics';
 
 export { AlertConfigDO } from './alerting/AlertConfigDO';
 
@@ -28,4 +29,10 @@ export default class extends WorkerEntrypoint<Env> {
 		const parsed = SessionMetricsParamsSchema.parse(params);
 		await writeSessionMetricsDataPoint(parsed, this.env);
 	}
+
+	/** RPC method called by llm-gateway via service binding. */
+	async ingestApiMetrics(params: ApiMetricsParams): Promise<void> {
+		const parsed = ApiMetricsParamsSchema.parse(params);
+		writeApiMetricsDataPoint(parsed, 'kilo-gateway', this.env, (p) => this.ctx.waitUntil(p));
+	}
 }
diff --git a/cloudflare-o11y/src/o11y-analytics.ts b/cloudflare-o11y/src/o11y-analytics.ts
index d492b4987..515c5b0d3 100644
--- a/cloudflare-o11y/src/o11y-analytics.ts
+++ b/cloudflare-o11y/src/o11y-analytics.ts
@@ -1,7 +1,4 @@
-import type { z } from 'zod';
-import type { ApiMetricsParamsSchema } from './api-metrics-routes';
-
-type ApiMetricsParams = z.infer<typeof ApiMetricsParamsSchema>;
+import type { ApiMetricsParamsParsed as ApiMetricsParams } from '@kilocode/worker-utils';
 
 /**
  * Write an API metrics data point to Analytics Engine for alerting queries,
diff --git a/cloudflare-o11y/src/session-metrics-analytics.ts b/cloudflare-o11y/src/session-metrics-analytics.ts
index d6441ee2c..192bb1edb 100644
--- a/cloudflare-o11y/src/session-metrics-analytics.ts
+++ b/cloudflare-o11y/src/session-metrics-analytics.ts
@@ -1,4 +1,4 @@
-import type { SessionMetricsParams } from './session-metrics-schema';
+import type { SessionMetricsParamsParsed as SessionMetricsParams } from '@kilocode/worker-utils';
 
 /**
  * Write a session metrics data point to Analytics Engine,
diff --git a/cloudflare-o11y/src/session-metrics-schema.ts b/cloudflare-o11y/src/session-metrics-schema.ts
deleted file mode 100644
index 6ddee8fe1..000000000
--- a/cloudflare-o11y/src/session-metrics-schema.ts
+++ /dev/null
@@ -1,43 +0,0 @@
-import { z } from 'zod';
-
-export const TerminationReasons = ['completed', 'error', 'interrupted', 'abandoned', 'unknown'] as const;
-
-export const SessionMetricsParamsSchema = z.object({
-	kiloUserId: z.string().min(1),
-	organizationId: z.string().optional().default(''),
-	sessionId: z.string().min(1),
-	platform: z.string().min(1),
-
-	sessionDurationMs: z.number().int().nonnegative(),
-	timeToFirstResponseMs: z.number().int().nonnegative().optional(),
-
-	totalTurns: z.number().int().nonnegative(),
-	totalSteps: z.number().int().nonnegative(),
-
-	toolCallsByType: z.record(z.string(), z.number().int().nonnegative()),
-	toolErrorsByType: z.record(z.string(), z.number().int().nonnegative()),
-
-	totalErrors: z.number().int().nonnegative(),
-	errorsByType: z.record(z.string(), z.number().int().nonnegative()),
-	stuckToolCallCount: z.number().int().nonnegative(),
-
-	totalTokens: z.object({
-		input: z.number().int().nonnegative(),
-		output: z.number().int().nonnegative(),
-		reasoning: z.number().int().nonnegative(),
-		cacheRead: z.number().int().nonnegative(),
-		cacheWrite: z.number().int().nonnegative(),
-	}),
-	totalCost: z.number().nonnegative(),
-
-	compactionCount: z.number().int().nonnegative(),
-	autoCompactionCount: z.number().int().nonnegative(),
-
-	terminationReason: z.enum(TerminationReasons),
-
-	model: z.string().optional().default(''),
-
-	ingestVersion: z.number().int().nonnegative().default(0),
-});
-
-export type SessionMetricsParams = z.infer<typeof SessionMetricsParamsSchema>;
diff --git a/cloudflare-session-ingest/src/env.ts b/cloudflare-session-ingest/src/env.ts
index 11d765f81..85b5c7367 100644
--- a/cloudflare-session-ingest/src/env.ts
+++ b/cloudflare-session-ingest/src/env.ts
@@ -1 +1,3 @@
+import type { O11YBinding } from './o11y-binding';
+
 export type Env = Omit<Cloudflare.Env, 'O11Y'> & { O11Y: O11YBinding };
diff --git a/cloudflare-session-ingest/src/middleware/kilo-jwt-auth.ts b/cloudflare-session-ingest/src/middleware/kilo-jwt-auth.ts
index 509037535..82b6b6255 100644
--- a/cloudflare-session-ingest/src/middleware/kilo-jwt-auth.ts
+++ b/cloudflare-session-ingest/src/middleware/kilo-jwt-auth.ts
@@ -1,46 +1,12 @@
 import { createMiddleware } from 'hono/factory';
-import { verifyKiloToken, extractBearerToken } from '@kilocode/worker-utils';
-import { eq } from 'drizzle-orm';
+import { verifyKiloToken, extractBearerToken, userExistsWithCache } from '@kilocode/worker-utils';
 import { getWorkerDb } from '@kilocode/db/client';
-import { kilocode_users } from '@kilocode/db/schema';
 
 import type { Env } from '../env';
 
-const USER_EXISTS_TTL_SECONDS = 24 * 60 * 60; // 24h
-const USER_NOT_FOUND_TTL_SECONDS = 5 * 60; // 5m
-
-/**
- * Check whether a user exists, using KV as a cache in front of Postgres.
- * Positive results are cached for 24h. Negative results are cached for 5m
- * to rate-limit DB hits from deleted/nonexistent users with valid tokens.
- */
-async function userExists(env: Env, userId: string): Promise<boolean> {
-  const cacheKey = `user-exists:${userId}`;
-
-  const cached = await env.USER_EXISTS_CACHE.get(cacheKey);
-  if (cached === '1') {
-    return true;
-  }
-  if (cached === '0') {
-    return false;
-  }
-
+function userExists(env: Env, userId: string): Promise<boolean> {
   const db = getWorkerDb(env.HYPERDRIVE.connectionString);
-  const rows = await db
-    .select({ id: kilocode_users.id })
-    .from(kilocode_users)
-    .where(eq(kilocode_users.id, userId))
-    .limit(1);
-
-  const row = rows[0];
-
-  if (!row) {
-    void env.USER_EXISTS_CACHE.put(cacheKey, '0', { expirationTtl: USER_NOT_FOUND_TTL_SECONDS });
-    return false;
-  }
-
-  void env.USER_EXISTS_CACHE.put(cacheKey, '1', { expirationTtl: USER_EXISTS_TTL_SECONDS });
-  return true;
+  return userExistsWithCache(env.USER_EXISTS_CACHE, db, userId);
 }
 
 export const kiloJwtAuthMiddleware = createMiddleware<{
diff --git a/cloudflare-session-ingest/src/o11y-binding.d.ts b/cloudflare-session-ingest/src/o11y-binding.d.ts
index 329c87090..c7e026610 100644
--- a/cloudflare-session-ingest/src/o11y-binding.d.ts
+++ b/cloudflare-session-ingest/src/o11y-binding.d.ts
@@ -1,41 +1,5 @@
-/**
- * Augment the wrangler-generated Env to give the O11Y service binding its RPC
- * method types.  `wrangler types` only sees `Fetcher` for service bindings;
- * the actual RPC shape comes from the o11y worker's WorkerEntrypoint and is
- * declared here so the generated file can be freely regenerated.
- *
- * Keep in sync with: cloudflare-o11y/src/session-metrics-schema.ts
- */
+import type { SessionMetricsParams } from '@kilocode/worker-utils';
 
-type O11YSessionMetricsParams = {
-  kiloUserId: string;
-  organizationId?: string;
-  sessionId: string;
-  platform: string;
-  sessionDurationMs: number;
-  timeToFirstResponseMs?: number;
-  totalTurns: number;
-  totalSteps: number;
-  toolCallsByType: Record<string, number>;
-  toolErrorsByType: Record<string, number>;
-  totalErrors: number;
-  errorsByType: Record<string, number>;
-  stuckToolCallCount: number;
-  totalTokens: {
-    input: number;
-    output: number;
-    reasoning: number;
-    cacheRead: number;
-    cacheWrite: number;
-  };
-  totalCost: number;
-  compactionCount: number;
-  autoCompactionCount: number;
-  terminationReason: 'completed' | 'error' | 'interrupted' | 'abandoned' | 'unknown';
-  model?: string;
-  ingestVersion: number;
-};
-
-type O11YBinding = Fetcher & {
-  ingestSessionMetrics(params: O11YSessionMetricsParams): Promise<void>;
+export type O11YBinding = Fetcher & {
+  ingestSessionMetrics(params: SessionMetricsParams): Promise<void>;
 };
diff --git a/cloudflare-webhook-agent-ingest/src/index.ts b/cloudflare-webhook-agent-ingest/src/index.ts
index bc2331f0b..f8ce1d6d6 100644
--- a/cloudflare-webhook-agent-ingest/src/index.ts
+++ b/cloudflare-webhook-agent-ingest/src/index.ts
@@ -18,7 +18,6 @@ export type HonoContext = {
 
 const app = new Hono<HonoContext>();
 
-// @ts-expect-error workers-tagged-logger returns Handler typed against an older hono; incompatible with hono 4.12+
 app.use('*', useWorkersLogger('webhook-agent'));
 
 app.get('/health', c => {
diff --git a/llm-gateway/eslint.config.mjs b/llm-gateway/eslint.config.mjs
new file mode 100644
index 000000000..4792c67f5
--- /dev/null
+++ b/llm-gateway/eslint.config.mjs
@@ -0,0 +1,16 @@
+import { dirname } from 'path';
+import { fileURLToPath } from 'url';
+import { defineConfig } from 'eslint/config';
+import baseConfig from '@kilocode/eslint-config';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+export default defineConfig([
+  ...baseConfig(__dirname),
+  {
+    files: ['**/*.ts'],
+    rules: {
+      '@typescript-eslint/restrict-template-expressions': 'off',
+    },
+  },
+]);
diff --git a/llm-gateway/package.json b/llm-gateway/package.json
new file mode 100644
index 000000000..33cddd33b
--- /dev/null
+++ b/llm-gateway/package.json
@@ -0,0 +1,51 @@
+{
+  "name": "llm-gateway",
+  "version": "1.0.0",
+  "type": "module",
+  "private": true,
+  "description": "LLM Gateway Cloudflare Worker — transparent drop-in replacement for /api/openrouter",
+  "scripts": {
+    "preinstall": "npx only-allow pnpm",
+    "deploy": "wrangler deploy",
+    "dev": "wrangler dev",
+    "start": "wrangler dev",
+    "types": "wrangler types",
+    "lint": "eslint --config eslint.config.mjs --cache 'src/**/*.ts'",
+    "lint:fix": "eslint --config eslint.config.mjs --cache --fix 'src/**/*.ts'",
+    "format": "prettier --write 'src/**/*.ts'",
+    "format:check": "prettier --check 'src/**/*.ts'",
+    "test": "vitest run",
+    "test:watch": "vitest",
+    "test:integration": "vitest run --config vitest.workers.config.ts",
+    "test:integration:watch": "vitest --config vitest.workers.config.ts",
+    "typecheck": "tsgo --noEmit --incremental false"
+  },
+  "dependencies": {
+    "@sentry/cloudflare": "^10.25.0",
+    "@ai-sdk/anthropic": "^3.0.41",
+    "@ai-sdk/openai": "^3.0.27",
+    "@kilocode/db": "workspace:*",
+    "@kilocode/encryption": "workspace:*",
+    "@kilocode/worker-utils": "workspace:*",
+    "ai": "^6.0.78",
+    "drizzle-orm": "catalog:",
+    "eventsource-parser": "^3.0.6",
+    "hono": "catalog:",
+    "workers-tagged-logger": "catalog:",
+    "zod": "catalog:"
+  },
+  "devDependencies": {
+    "@cloudflare/vitest-pool-workers": "^0.12.8",
+    "jose": "catalog:",
+    "@kilocode/eslint-config": "workspace:*",
+    "@types/node": "^22",
+    "@typescript/native-preview": "7.0.0-dev.20251019.1",
+    "@vitest/ui": "^3.2.4",
+    "drizzle-kit": "catalog:",
+    "eslint": "catalog:",
+    "prettier": "catalog:",
+    "typescript": "catalog:",
+    "vitest": "^3.2.4",
+    "wrangler": "catalog:"
+  }
+}
diff --git a/llm-gateway/src/background/api-metrics.ts b/llm-gateway/src/background/api-metrics.ts
new file mode 100644
index 000000000..87fab960c
--- /dev/null
+++ b/llm-gateway/src/background/api-metrics.ts
@@ -0,0 +1,283 @@
+// Background task: emit API metrics to the O11Y service binding via RPC.
+// The O11Y worker exposes an ingestApiMetrics RPC method on its WorkerEntrypoint,
+// eliminating the need for HTTP routing, JSON serialization, and admin token auth.
+
+import { createParser } from 'eventsource-parser';
+import type { EventSourceMessage } from 'eventsource-parser';
+import { z } from 'zod';
+import type { ApiMetricsParams } from '@kilocode/worker-utils';
+
+export type { ApiMetricsParams };
+
+export type ApiMetricsTokens = NonNullable<ApiMetricsParams['tokens']>;
+
+// ─── Token extraction ─────────────────────────────────────────────────────────
+
+type OpenAICompletionUsage = {
+  prompt_tokens?: number;
+  completion_tokens?: number;
+  total_tokens?: number;
+  prompt_tokens_details?: { cached_tokens?: number };
+};
+
+export function getTokensFromCompletionUsage(
+  usage: OpenAICompletionUsage | null | undefined
+): ApiMetricsTokens | undefined {
+  if (!usage) return undefined;
+
+  const tokens: ApiMetricsTokens = {
+    inputTokens: usage.prompt_tokens,
+    outputTokens: usage.completion_tokens,
+    cacheHitTokens: usage.prompt_tokens_details?.cached_tokens,
+    totalTokens: usage.total_tokens,
+    cacheWriteTokens: undefined,
+  };
+
+  const hasAny =
+    tokens.inputTokens !== undefined ||
+    tokens.outputTokens !== undefined ||
+    tokens.cacheWriteTokens !== undefined ||
+    tokens.cacheHitTokens !== undefined ||
+    tokens.totalTokens !== undefined;
+
+  return hasAny ? tokens : undefined;
+}
+
+type ChatCompletionTool = {
+  type?: string;
+  function?: { name?: string };
+  custom?: { name?: string };
+};
+
+function isTool(item: unknown): item is ChatCompletionTool {
+  return typeof item === 'object' && item !== null;
+}
+
+export function getToolsAvailable(tools: unknown[] | undefined): string[] {
+  if (!tools) return [];
+  return tools.map(item => {
+    if (!isTool(item)) return 'unknown:unknown';
+    if (item.type === 'function') {
+      const name = typeof item.function?.name === 'string' ? item.function.name.trim() : '';
+      return name ? `function:${name}` : 'function:unknown';
+    }
+    if (item.type === 'custom') {
+      const name = typeof item.custom?.name === 'string' ? item.custom.name.trim() : '';
+      return name ? `custom:${name}` : 'custom:unknown';
+    }
+    return 'unknown:unknown';
+  });
+}
+
+type AssistantMessage = {
+  role?: string;
+  tool_calls?: Array<{
+    type?: string;
+    function?: { name?: string };
+    custom?: { name?: string };
+  }>;
+};
+
+export function getToolsUsed(messages: AssistantMessage[] | undefined): string[] {
+  if (!messages) return [];
+  const used: string[] = [];
+  for (const message of messages) {
+    if (message.role !== 'assistant') continue;
+    for (const toolCall of message.tool_calls ?? []) {
+      if (toolCall.type === 'function') {
+        const name =
+          typeof toolCall.function?.name === 'string' ? toolCall.function.name.trim() : '';
+        used.push(name ? `function:${name}` : 'function:unknown');
+      } else if (toolCall.type === 'custom') {
+        const name = typeof toolCall.custom?.name === 'string' ? toolCall.custom.name.trim() : '';
+        used.push(name ? `custom:${name}` : 'custom:unknown');
+      } else {
+        used.push('unknown:unknown');
+      }
+    }
+  }
+  return used;
+}
+
+// ─── Inference provider extraction ───────────────────────────────────────────
+
+const inferenceProviderSchema = z.object({
+  provider: z.string().min(1).optional(),
+  choices: z
+    .array(
+      z.object({
+        message: z
+          .object({
+            provider_metadata: z
+              .object({
+                gateway: z
+                  .object({ routing: z.object({ finalProvider: z.string().min(1).optional() }) })
+                  .partial()
+                  .optional(),
+              })
+              .partial()
+              .optional(),
+          })
+          .partial()
+          .optional(),
+        delta: z
+          .object({
+            provider_metadata: z
+              .object({
+                gateway: z
+                  .object({ routing: z.object({ finalProvider: z.string().min(1).optional() }) })
+                  .partial()
+                  .optional(),
+              })
+              .partial()
+              .optional(),
+          })
+          .partial()
+          .optional(),
+      })
+    )
+    .optional(),
+});
+
+function extractInferenceProvider(data: unknown): string | undefined {
+  const parsed = inferenceProviderSchema.safeParse(data);
+  if (!parsed.success) return undefined;
+  const directProvider = parsed.data.provider?.trim();
+  if (directProvider) return directProvider;
+  const choice = parsed.data.choices?.[0];
+  const finalProvider =
+    choice?.message?.provider_metadata?.gateway?.routing?.finalProvider?.trim() ??
+    choice?.delta?.provider_metadata?.gateway?.routing?.finalProvider?.trim();
+  return finalProvider || undefined;
+}
+
+function safeParseJson(payload: string): unknown {
+  try {
+    return JSON.parse(payload) as unknown;
+  } catch {
+    return null;
+  }
+}
+
+async function drainResponseBodyForInferenceProvider(
+  response: Response,
+  timeoutMs: number
+): Promise<string | undefined> {
+  const body = response.body;
+  if (!body) return undefined;
+
+  const reader = body.getReader() as ReadableStreamDefaultReader<Uint8Array>;
+  const contentType = response.headers.get('content-type') ?? '';
+  const isEventStream = contentType.includes('text/event-stream');
+
+  try {
+    const startedAt = performance.now();
+    const decoder = new TextDecoder();
+    let inferenceProvider: string | undefined;
+
+    const sseParser = isEventStream
+      ? createParser({
+          onEvent(event: EventSourceMessage) {
+            if (event.data === '[DONE]') return;
+            const json = safeParseJson(event.data);
+            if (!json) return;
+            inferenceProvider = extractInferenceProvider(json);
+          },
+        })
+      : null;
+
+    let buffered = '';
+    const MAX_BUFFER_CHARS = 512_000;
+
+    while (true) {
+      const elapsedMs = performance.now() - startedAt;
+      const remainingMs = timeoutMs - elapsedMs;
+      if (remainingMs <= 0) {
+        try {
+          await reader.cancel();
+        } catch {
+          /* intentionally empty */
+        }
+        return inferenceProvider;
+      }
+
+      const result = await Promise.race([
+        reader.read(),
+        scheduler.wait(remainingMs).then(() => ({ timeout: true }) as const),
+      ]);
+
+      if ('timeout' in result) {
+        try {
+          await reader.cancel();
+        } catch {
+          /* intentionally empty */
+        }
+        return inferenceProvider;
+      }
+
+      if (result.done) {
+        if (!inferenceProvider && !isEventStream && buffered) {
+          const json = safeParseJson(buffered);
+          inferenceProvider = json ? extractInferenceProvider(json) : undefined;
+        }
+        return inferenceProvider;
+      }
+
+      if (result.value) {
+        const chunk = decoder.decode(result.value, { stream: true });
+        if (sseParser) {
+          sseParser.feed(chunk);
+        } else if (buffered.length < MAX_BUFFER_CHARS) {
+          buffered += chunk;
+        }
+      }
+    }
+  } finally {
+    reader.releaseLock();
+  }
+}
+
+// ─── O11Y service binding type (RPC) ──────────────────────────────────────────
+
+type O11YRpc = { ingestApiMetrics(params: ApiMetricsParams): Promise<void> };
+
+// ─── Main entry point ─────────────────────────────────────────────────────────
+
+async function sendApiMetrics(o11y: O11YRpc, params: ApiMetricsParams): Promise<void> {
+  try {
+    await o11y.ingestApiMetrics(params);
+  } catch (err) {
+    console.error('[api-metrics] Failed to send metrics:', err);
+  }
+}
+
+/**
+ * Drain the background response stream to extract inferenceProvider,
+ * then emit the final ApiMetricsParams to O11Y via RPC. Bounded to 60s internally.
+ */
+export async function runApiMetrics(
+  o11y: O11YRpc,
+  params: Omit<ApiMetricsParams, 'completeRequestMs'>,
+  backgroundStream: ReadableStream,
+  requestStartedAt: number
+): Promise<void> {
+  let inferenceProvider: string | undefined;
+  try {
+    inferenceProvider = await drainResponseBodyForInferenceProvider(
+      new Response(backgroundStream, {
+        headers: { 'content-type': params.isStreaming ? 'text/event-stream' : 'application/json' },
+      }),
+      60_000
+    );
+  } catch {
+    /* ignore drain errors — still emit timing */
+  }
+
+  const completeRequestMs = Math.max(0, Math.round(performance.now() - requestStartedAt));
+
+  await sendApiMetrics(o11y, {
+    ...params,
+    inferenceProvider: inferenceProvider ?? params.inferenceProvider,
+    completeRequestMs,
+  });
+}
diff --git a/llm-gateway/src/background/kilo-pass.ts b/llm-gateway/src/background/kilo-pass.ts
new file mode 100644
index 000000000..1fae22547
--- /dev/null
+++ b/llm-gateway/src/background/kilo-pass.ts
@@ -0,0 +1,518 @@
+// KiloPass bonus credit issuance triggered by usage threshold.
+// Port of src/lib/kilo-pass/usage-triggered-bonus.ts and related files.
+//
+// Simplified for CF Workers:
+//   - No Sentry error captures (use console.error)
+//   - No server-only imports
+//   - Uses vanilla Date arithmetic instead of dayjs
+//   - Direct credit grant (insert credit_transactions + update kilocode_users)
+//     instead of processTopUp/grantCreditForCategory
+
+import { sql, eq, and, ne, desc, inArray } from 'drizzle-orm';
+import type { WorkerDb } from '@kilocode/db/client';
+import {
+  kilocode_users,
+  kilo_pass_subscriptions,
+  kilo_pass_issuances,
+  kilo_pass_issuance_items,
+  kilo_pass_audit_log,
+  credit_transactions,
+} from '@kilocode/db/schema';
+import {
+  KiloPassTier,
+  KiloPassCadence,
+  KiloPassIssuanceSource,
+  KiloPassIssuanceItemKind,
+  KiloPassAuditLogAction,
+  KiloPassAuditLogResult,
+} from '@kilocode/db/schema-types';
+
+// ─── Constants ────────────────────────────────────────────────────────────────
+
+const KILO_PASS_YEARLY_MONTHLY_BONUS_PERCENT = 0.5;
+const KILO_PASS_FIRST_MONTH_PROMO_BONUS_PERCENT = 0.5;
+const KILO_PASS_MONTHLY_FIRST_2_MONTHS_PROMO_BONUS_PERCENT = 0.5;
+// First-time subscribers who started strictly before this cutoff get 50% bonus for first 2 months.
+const KILO_PASS_MONTHLY_FIRST_2_MONTHS_PROMO_CUTOFF_ISO = '2026-03-07T07:59:59Z';
+
+const KILO_PASS_MONTHLY_RAMP_BASE = 0.05;
+const KILO_PASS_MONTHLY_RAMP_STEP = 0.05;
+const KILO_PASS_MONTHLY_RAMP_CAP = 0.4;
+
+const KILO_PASS_TIER_CONFIG: Record<KiloPassTier, { monthlyPriceUsd: number }> = {
+  [KiloPassTier.Tier19]: { monthlyPriceUsd: 19 },
+  [KiloPassTier.Tier49]: { monthlyPriceUsd: 49 },
+  [KiloPassTier.Tier199]: { monthlyPriceUsd: 199 },
+};
+
+// ─── Helpers ──────────────────────────────────────────────────────────────────
+
+export function getEffectiveKiloPassThreshold(threshold: number | null): number | null {
+  if (threshold === null) return null;
+  return Math.max(0, threshold - 1_000_000);
+}
+
+function toMicrodollars(usd: number): number {
+  return Math.round(usd * 1_000_000);
+}
+
+function roundUsdToCents(usd: number): number {
+  return Math.round(usd * 100);
+}
+
+function centsToUsd(cents: number): number {
+  return cents / 100;
+}
+
+/** Returns the YYYY-MM-01 string for the given UTC date. */
+function computeIssueMonth(date: Date): string {
+  const y = date.getUTCFullYear();
+  const m = String(date.getUTCMonth() + 1).padStart(2, '0');
+  return `${y}-${m}-01`;
+}
+
+/** Add months to a UTC date (handles month overflow correctly). */
+function addMonths(date: Date, months: number): Date {
+  const d = new Date(date);
+  d.setUTCMonth(d.getUTCMonth() + months);
+  return d;
+}
+
+/** Parse an ISO string safely, returning null if invalid. */
+function parseIso(iso: string | null | undefined): Date | null {
+  if (!iso) return null;
+  const d = new Date(iso);
+  return isNaN(d.getTime()) ? null : d;
+}
+
+function computeMonthlyCadenceBonusPercent(params: {
+  tier: KiloPassTier;
+  streakMonths: number;
+  isFirstTimeSubscriberEver: boolean;
+  subscriptionStartedAtIso: string | null;
+}): number {
+  const { streakMonths, isFirstTimeSubscriberEver, subscriptionStartedAtIso } = params;
+  const streak = Math.max(1, streakMonths);
+
+  if (streak <= 2 && isFirstTimeSubscriberEver) {
+    const startedAt = parseIso(subscriptionStartedAtIso);
+    const cutoff = new Date(KILO_PASS_MONTHLY_FIRST_2_MONTHS_PROMO_CUTOFF_ISO);
+    if (startedAt && startedAt < cutoff) {
+      return KILO_PASS_MONTHLY_FIRST_2_MONTHS_PROMO_BONUS_PERCENT;
+    }
+    if (streak === 1) {
+      return KILO_PASS_FIRST_MONTH_PROMO_BONUS_PERCENT;
+    }
+  }
+
+  const nMinus1 = streak - 1;
+  const uncapped = KILO_PASS_MONTHLY_RAMP_BASE + KILO_PASS_MONTHLY_RAMP_STEP * nMinus1;
+  return Math.min(KILO_PASS_MONTHLY_RAMP_CAP, uncapped);
+}
+
+// ─── Types ────────────────────────────────────────────────────────────────────
+
+type KiloPassSubscriptionState = {
+  subscriptionId: string;
+  tier: KiloPassTier;
+  cadence: KiloPassCadence;
+  status: string;
+  cancelAtPeriodEnd: boolean;
+  currentStreakMonths: number;
+  nextYearlyIssueAt: string | null;
+  startedAt: string | null;
+};
+
+type Tx = Parameters<WorkerDb['transaction']>[0] extends (tx: infer T) => unknown ? T : never;
+
+// ─── DB helpers ───────────────────────────────────────────────────────────────
+
+function getStatusPriority(row: { status: string; cancelAtPeriodEnd: boolean }): number {
+  if (row.status === 'active' && !row.cancelAtPeriodEnd) return 0;
+  if (row.status === 'active' && row.cancelAtPeriodEnd) return 1;
+  if (row.status === 'trialing') return 2;
+  if (row.status === 'past_due') return 3;
+  if (row.status === 'paused') return 4;
+  if (row.status === 'incomplete') return 5;
+  const endedStatuses = ['incomplete_expired', 'canceled', 'unpaid'];
+  if (endedStatuses.includes(row.status)) return 6;
+  return 7;
+}
+
+async function getKiloPassStateForUser(
+  tx: Tx,
+  kiloUserId: string
+): Promise<KiloPassSubscriptionState | null> {
+  const rows = await tx
+    .select({
+      subscriptionId: kilo_pass_subscriptions.id,
+      tier: kilo_pass_subscriptions.tier,
+      cadence: kilo_pass_subscriptions.cadence,
+      status: kilo_pass_subscriptions.status,
+      cancelAtPeriodEnd: kilo_pass_subscriptions.cancel_at_period_end,
+      currentStreakMonths: kilo_pass_subscriptions.current_streak_months,
+      nextYearlyIssueAt: kilo_pass_subscriptions.next_yearly_issue_at,
+      startedAt: kilo_pass_subscriptions.started_at,
+    })
+    .from(kilo_pass_subscriptions)
+    .where(eq(kilo_pass_subscriptions.kilo_user_id, kiloUserId));
+
+  if (rows.length === 0) return null;
+
+  const sorted = [...rows].sort((a, b) => {
+    const pd = getStatusPriority(a) - getStatusPriority(b);
+    if (pd !== 0) return pd;
+    const aMs = parseIso(a.startedAt)?.getTime() ?? Number.NEGATIVE_INFINITY;
+    const bMs = parseIso(b.startedAt)?.getTime() ?? Number.NEGATIVE_INFINITY;
+    return bMs - aMs;
+  });
+
+  const s = sorted[0];
+  if (!s) return null;
+  return {
+    subscriptionId: s.subscriptionId,
+    tier: s.tier,
+    cadence: s.cadence,
+    status: s.status,
+    cancelAtPeriodEnd: s.cancelAtPeriodEnd,
+    currentStreakMonths: s.currentStreakMonths,
+    nextYearlyIssueAt: s.nextYearlyIssueAt,
+    startedAt: s.startedAt,
+  };
+}
+
+async function clearKiloPassThreshold(tx: Tx, kiloUserId: string): Promise<void> {
+  await tx
+    .update(kilocode_users)
+    .set({ kilo_pass_threshold: null })
+    .where(eq(kilocode_users.id, kiloUserId));
+}
+
+/** Compute bonus expiry date for a given subscription and issuance. */
+async function computeBonusExpiryDate(
+  tx: Tx,
+  issuanceId: string,
+  subscriptionId: string
+): Promise<Date | null> {
+  const issuanceRows = await tx
+    .select({ issueMonth: kilo_pass_issuances.issue_month })
+    .from(kilo_pass_issuances)
+    .where(eq(kilo_pass_issuances.id, issuanceId))
+    .limit(1);
+  const issueMonth = issuanceRows[0]?.issueMonth;
+  if (!issueMonth) return null;
+
+  const subRows = await tx
+    .select({
+      cadence: kilo_pass_subscriptions.cadence,
+      nextYearlyIssueAt: kilo_pass_subscriptions.next_yearly_issue_at,
+      startedAt: kilo_pass_subscriptions.started_at,
+    })
+    .from(kilo_pass_subscriptions)
+    .where(eq(kilo_pass_subscriptions.id, subscriptionId))
+    .limit(1);
+  const sub = subRows[0];
+  if (!sub) return null;
+
+  if (sub.cadence === KiloPassCadence.Yearly) {
+    return parseIso(sub.nextYearlyIssueAt);
+  }
+
+  if (sub.cadence === KiloPassCadence.Monthly) {
+    const startedAt = parseIso(sub.startedAt);
+    if (!startedAt) return null;
+    const issueMonthStart = parseIso(`${issueMonth}T00:00:00.000Z`);
+    if (!issueMonthStart) return null;
+    // Compute months since start
+    const startMonthStart = new Date(
+      Date.UTC(startedAt.getUTCFullYear(), startedAt.getUTCMonth(), 1)
+    );
+    const monthOffset = Math.round(
+      (issueMonthStart.getTime() - startMonthStart.getTime()) / (30 * 24 * 60 * 60 * 1000)
+    );
+    if (monthOffset < 0) return null;
+    const periodStart = addMonths(startedAt, monthOffset);
+    return addMonths(periodStart, 1);
+  }
+
+  return null;
+}
+
+/** Grant bonus credits directly: insert credit_transaction + update user balance. */
+async function grantBonusCredit(
+  tx: Tx,
+  params: {
+    kiloUserId: string;
+    amountMicrodollars: number;
+    description: string;
+    expiryDate: Date | null;
+  }
+): Promise<string> {
+  const creditId = crypto.randomUUID();
+  await tx.insert(credit_transactions).values({
+    id: creditId,
+    kilo_user_id: params.kiloUserId,
+    amount_microdollars: params.amountMicrodollars,
+    is_free: true,
+    description: params.description,
+    credit_category: 'kilo-pass-bonus',
+    expiry_date: params.expiryDate?.toISOString() ?? null,
+  });
+  await tx
+    .update(kilocode_users)
+    .set({
+      total_microdollars_acquired: sql`${kilocode_users.total_microdollars_acquired} + ${params.amountMicrodollars}`,
+    })
+    .where(eq(kilocode_users.id, params.kiloUserId));
+  return creditId;
+}
+
+/** Get or create an issuance header for a subscription+month. */
+async function createOrGetIssuanceHeader(
+  tx: Tx,
+  subscriptionId: string,
+  issueMonth: string
+): Promise<string | null> {
+  const insertResult = await tx
+    .insert(kilo_pass_issuances)
+    .values({
+      kilo_pass_subscription_id: subscriptionId,
+      issue_month: issueMonth,
+      source: KiloPassIssuanceSource.Cron,
+      stripe_invoice_id: null,
+    })
+    .onConflictDoNothing()
+    .returning({ id: kilo_pass_issuances.id });
+
+  if (insertResult[0]?.id) return insertResult[0].id;
+
+  const existing = await tx
+    .select({ id: kilo_pass_issuances.id })
+    .from(kilo_pass_issuances)
+    .where(
+      and(
+        eq(kilo_pass_issuances.kilo_pass_subscription_id, subscriptionId),
+        eq(kilo_pass_issuances.issue_month, issueMonth)
+      )
+    )
+    .limit(1);
+
+  return existing[0]?.id ?? null;
+}
+
+/** Compute the current issuance month for a yearly subscription. */
+function computeYearlyIssueMonth(
+  nextYearlyIssueAtIso: string | null,
+  startedAtIso: string | null
+): string | null {
+  const anchor = parseIso(nextYearlyIssueAtIso) ?? parseIso(startedAtIso);
+  if (!anchor) return null;
+  // currentPeriodStart = nextYearlyIssueAt - 1 month (or startedAt)
+  const currentPeriodStart = nextYearlyIssueAtIso
+    ? addMonths(new Date(nextYearlyIssueAtIso), -1)
+    : anchor;
+  return computeIssueMonth(currentPeriodStart);
+}
+
+async function maybeIssueBonusFromUsageThreshold(
+  tx: Tx,
+  subscription: KiloPassSubscriptionState,
+  kiloUserId: string
+): Promise<void> {
+  const monthlyBaseAmountUsd = KILO_PASS_TIER_CONFIG[subscription.tier].monthlyPriceUsd;
+
+  // Determine the issuance to attach the bonus to
+  let issuanceId: string | null;
+  let issueMonth: string;
+
+  if (subscription.cadence === KiloPassCadence.Monthly) {
+    // Monthly: use the latest issuance for this subscription
+    const latest = await tx
+      .select({
+        id: kilo_pass_issuances.id,
+        issueMonth: kilo_pass_issuances.issue_month,
+      })
+      .from(kilo_pass_issuances)
+      .where(eq(kilo_pass_issuances.kilo_pass_subscription_id, subscription.subscriptionId))
+      .orderBy(desc(kilo_pass_issuances.issue_month))
+      .limit(1);
+
+    if (!latest[0]) {
+      await clearKiloPassThreshold(tx, kiloUserId);
+      return;
+    }
+    issuanceId = latest[0].id;
+    issueMonth = latest[0].issueMonth;
+  } else {
+    // Yearly: get or create an issuance for the current period
+    const ym = computeYearlyIssueMonth(subscription.nextYearlyIssueAt, subscription.startedAt);
+    if (!ym) {
+      await clearKiloPassThreshold(tx, kiloUserId);
+      return;
+    }
+    issueMonth = ym;
+    issuanceId = await createOrGetIssuanceHeader(tx, subscription.subscriptionId, issueMonth);
+    if (!issuanceId) {
+      await clearKiloPassThreshold(tx, kiloUserId);
+      return;
+    }
+  }
+
+  // Check that the base item exists (issuance must be funded before bonus can be issued)
+  const baseItem = await tx
+    .select({ id: kilo_pass_issuance_items.id })
+    .from(kilo_pass_issuance_items)
+    .where(
+      and(
+        eq(kilo_pass_issuance_items.kilo_pass_issuance_id, issuanceId),
+        eq(kilo_pass_issuance_items.kind, KiloPassIssuanceItemKind.Base)
+      )
+    )
+    .limit(1);
+
+  if (!baseItem[0]) {
+    await clearKiloPassThreshold(tx, kiloUserId);
+    return;
+  }
+
+  // Idempotency: skip if bonus or promo item already issued
+  const alreadyIssued = await tx
+    .select({ id: kilo_pass_issuance_items.id })
+    .from(kilo_pass_issuance_items)
+    .where(
+      and(
+        eq(kilo_pass_issuance_items.kilo_pass_issuance_id, issuanceId),
+        inArray(kilo_pass_issuance_items.kind, [
+          KiloPassIssuanceItemKind.Bonus,
+          KiloPassIssuanceItemKind.PromoFirstMonth50Pct,
+        ])
+      )
+    )
+    .limit(1);
+
+  if (alreadyIssued[0]) {
+    await clearKiloPassThreshold(tx, kiloUserId);
+    return;
+  }
+
+  // Compute bonus percent
+  let bonusPercentApplied: number;
+  let description: string;
+  let auditPayload: Record<string, unknown>;
+
+  if (subscription.cadence !== KiloPassCadence.Monthly) {
+    bonusPercentApplied = KILO_PASS_YEARLY_MONTHLY_BONUS_PERCENT;
+    description = `Kilo Pass yearly monthly bonus (${subscription.tier}, ${issueMonth})`;
+    auditPayload = { bonusKind: 'yearly-monthly' };
+  } else {
+    // Check if first-time subscriber
+    const otherSubs = await tx
+      .select({ id: kilo_pass_subscriptions.id })
+      .from(kilo_pass_subscriptions)
+      .where(
+        and(
+          eq(kilo_pass_subscriptions.kilo_user_id, kiloUserId),
+          ne(kilo_pass_subscriptions.id, subscription.subscriptionId)
+        )
+      )
+      .limit(1);
+
+    const isFirstTimeSubscriberEver = otherSubs.length === 0;
+    const streakMonths = Math.max(1, subscription.currentStreakMonths);
+    bonusPercentApplied = computeMonthlyCadenceBonusPercent({
+      tier: subscription.tier,
+      streakMonths,
+      isFirstTimeSubscriberEver,
+      subscriptionStartedAtIso: subscription.startedAt,
+    });
+    const isPromo = bonusPercentApplied === 0.5 && streakMonths <= 2;
+    description = isPromo
+      ? `Kilo Pass promo 50% bonus (${subscription.tier}, streak=${streakMonths})`
+      : `Kilo Pass monthly bonus (${subscription.tier}, streak=${streakMonths})`;
+    auditPayload = {
+      bonusKind: isPromo ? 'promo-50pct' : 'monthly-ramp',
+      streakMonths,
+      issueMonth,
+    };
+  }
+
+  // Compute credit amount
+  const baseCents = roundUsdToCents(monthlyBaseAmountUsd);
+  const bonusCents = Math.round(baseCents * bonusPercentApplied);
+  const bonusUsd = centsToUsd(bonusCents);
+  const bonusMicrodollars = toMicrodollars(bonusUsd);
+
+  const expiryDate = await computeBonusExpiryDate(tx, issuanceId, subscription.subscriptionId);
+
+  const creditTransactionId = await grantBonusCredit(tx, {
+    kiloUserId,
+    amountMicrodollars: bonusMicrodollars,
+    description,
+    expiryDate,
+  });
+
+  // Record issuance item
+  await tx.insert(kilo_pass_issuance_items).values({
+    kilo_pass_issuance_id: issuanceId,
+    kind: KiloPassIssuanceItemKind.Bonus,
+    credit_transaction_id: creditTransactionId,
+    amount_usd: bonusUsd,
+    bonus_percent_applied: bonusPercentApplied,
+  });
+
+  // Audit log
+  await tx.insert(kilo_pass_audit_log).values({
+    action: KiloPassAuditLogAction.BonusCreditsIssued,
+    result: KiloPassAuditLogResult.Success,
+    kilo_user_id: kiloUserId,
+    kilo_pass_subscription_id: subscription.subscriptionId,
+    related_credit_transaction_id: creditTransactionId,
+    related_monthly_issuance_id: issuanceId,
+    payload_json: {
+      source: 'usage_threshold',
+      kind: KiloPassIssuanceItemKind.Bonus,
+      bonusPercentApplied,
+      bonusAmountUsd: bonusUsd,
+      creditCategory: 'kilo-pass-bonus',
+      ...auditPayload,
+    },
+  });
+
+  // Clear threshold so we don't trigger again until Stripe sets a new one
+  await clearKiloPassThreshold(tx, kiloUserId);
+}
+
+// ─── Main export ──────────────────────────────────────────────────────────────
+
+export async function maybeIssueKiloPassBonusFromUsageThreshold(
+  db: WorkerDb,
+  kiloUserId: string,
+  _nowIso: string
+): Promise<void> {
+  await db.transaction(async tx => {
+    // Lock the user row to prevent concurrent issuance
+    const userRows = await tx
+      .select({
+        microdollarsUsed: kilocode_users.microdollars_used,
+        kiloPassThreshold: kilocode_users.kilo_pass_threshold,
+      })
+      .from(kilocode_users)
+      .where(eq(kilocode_users.id, kiloUserId))
+      .for('update')
+      .limit(1);
+
+    const user = userRows[0];
+    if (!user) return;
+
+    const effectiveThreshold = getEffectiveKiloPassThreshold(user.kiloPassThreshold ?? null);
+    if (effectiveThreshold === null || user.microdollarsUsed < effectiveThreshold) return;
+
+    const subscriptionState = await getKiloPassStateForUser(tx as unknown as Tx, kiloUserId);
+    if (!subscriptionState || subscriptionState.status !== 'active') {
+      await clearKiloPassThreshold(tx as unknown as Tx, kiloUserId);
+      return;
+    }
+
+    await maybeIssueBonusFromUsageThreshold(tx as unknown as Tx, subscriptionState, kiloUserId);
+  });
+}
diff --git a/llm-gateway/src/background/request-logging.ts b/llm-gateway/src/background/request-logging.ts
new file mode 100644
index 000000000..6250733a6
--- /dev/null
+++ b/llm-gateway/src/background/request-logging.ts
@@ -0,0 +1,58 @@
+// Background task: insert api_request_log for Kilo employees.
+// Port of src/lib/handleRequestLogging.ts — uses WorkerDb instead of the global db.
+
+import type { WorkerDb } from '@kilocode/db/client';
+import { api_request_log } from '@kilocode/db/schema';
+import type { OpenRouterChatCompletionRequest } from '../types/request';
+
+export const KILO_ORGANIZATION_ID = '9d278969-5453-4ae3-a51f-a8d2274a7b56';
+
+type RequestLoggingUser = {
+  id?: string;
+  google_user_email?: string;
+};
+
+function isKiloEmployee(
+  user: RequestLoggingUser | null | undefined,
+  organizationId: string | null | undefined
+): boolean {
+  return (
+    user?.google_user_email?.endsWith('@kilo.ai') === true ||
+    user?.google_user_email?.endsWith('@kilocode.ai') === true ||
+    organizationId === KILO_ORGANIZATION_ID
+  );
+}
+
+export async function runRequestLogging(params: {
+  db: WorkerDb;
+  responseStream: ReadableStream;
+  statusCode: number;
+  user: RequestLoggingUser | null;
+  organizationId: string | null | undefined;
+  provider: string;
+  model: string;
+  request: OpenRouterChatCompletionRequest;
+}): Promise<void> {
+  const { db, responseStream, statusCode, user, organizationId, provider, model, request } = params;
+
+  if (!isKiloEmployee(user, organizationId)) return;
+
+  try {
+    const responseText = await new Response(responseStream).text();
+    const rows = await db
+      .insert(api_request_log)
+      .values({
+        kilo_user_id: user?.id,
+        organization_id: organizationId ?? null,
+        status_code: statusCode,
+        model,
+        provider,
+        request,
+        response: responseText,
+      })
+      .returning({ id: api_request_log.id });
+    console.log('[request-logging] Inserted api_request_log', rows[0]?.id);
+  } catch (err) {
+    console.error('[request-logging] Failed to insert api_request_log', err);
+  }
+}
diff --git a/llm-gateway/src/background/usage-accounting.ts b/llm-gateway/src/background/usage-accounting.ts
new file mode 100644
index 000000000..20aa9e6a9
--- /dev/null
+++ b/llm-gateway/src/background/usage-accounting.ts
@@ -0,0 +1,1018 @@
+// Background task: parse response stream for token usage, insert microdollar_usage,
+// update balances, and track org per-user daily limits.
+// Port of src/lib/processUsage.ts — simplified:
+//   - No Sentry spans/captures (use console.error/warn)
+//   - No PostHog first-usage events
+//   - No KiloPass threshold check
+//   - Uses crypto.randomUUID() (Web Crypto global) instead of Node `randomUUID`
+//   - Uses scheduler.wait() instead of setTimeout for CF Workers backoff
+
+import { createParser } from 'eventsource-parser';
+import type { EventSourceMessage } from 'eventsource-parser';
+import { sql } from 'drizzle-orm';
+import { eq } from 'drizzle-orm';
+import type { WorkerDb } from '@kilocode/db/client';
+import { organizations, organization_user_usage } from '@kilocode/db/schema';
+import type { FraudDetectionHeaders } from '../lib/extract-headers';
+import type { FeatureValue } from '../lib/feature-detection';
+import type { PromptInfo } from '../lib/prompt-info';
+import { isFreeModel } from '../lib/models';
+import { isActiveReviewPromo, isActiveCloudAgentPromo } from '../lib/promotions';
+import {
+  getEffectiveKiloPassThreshold,
+  maybeIssueKiloPassBonusFromUsageThreshold,
+} from './kilo-pass';
+
+// ─── Types ───────────────────────────────────────────────────────────────────
+
+export type OpenRouterGeneration = {
+  data: {
+    id: string;
+    is_byok?: boolean | null;
+    total_cost: number;
+    upstream_inference_cost?: number | null;
+    created_at: string;
+    model: string;
+    origin: string;
+    usage: number;
+    upstream_id?: string | null;
+    cache_discount?: number | null;
+    app_id?: number | null;
+    streamed?: boolean | null;
+    cancelled?: boolean | null;
+    provider_name?: string | null;
+    latency?: number | null;
+    moderation_latency?: number | null;
+    generation_time?: number | null;
+    finish_reason?: string | null;
+    native_finish_reason?: string | null;
+    tokens_prompt?: number | null;
+    tokens_completion?: number | null;
+    native_tokens_prompt?: number | null;
+    native_tokens_completion?: number | null;
+    native_tokens_reasoning?: number | null;
+    native_tokens_cached?: number | null;
+    num_media_prompt?: number | null;
+    num_media_completion?: number | null;
+    num_search_results?: number | null;
+  };
+};
+
+export type OpenRouterUsage = {
+  cost?: number;
+  is_byok?: boolean | null;
+  cost_details?: { upstream_inference_cost: number };
+  completion_tokens: number;
+  completion_tokens_details: { reasoning_tokens: number };
+  prompt_tokens: number;
+  prompt_tokens_details: { cached_tokens: number };
+  total_tokens: number;
+};
+
+type MaybeHasOpenRouterUsage = {
+  usage?: OpenRouterUsage | null;
+  provider?: string | null;
+};
+
+type VercelProviderMetaData = { gateway?: { routing?: { finalProvider?: string } } };
+
+type MaybeHasVercelProviderMetaDataChunk = {
+  choices?: {
+    delta?: { provider_metadata?: VercelProviderMetaData; content?: string | null };
+    message?: { provider_metadata?: VercelProviderMetaData; content?: string | null };
+    finish_reason?: string | null;
+  }[];
+};
+
+type ChatCompletionChunk = MaybeHasOpenRouterUsage &
+  MaybeHasVercelProviderMetaDataChunk & {
+    id?: string | null;
+    model?: string | null;
+    error?: unknown;
+  };
+
+export type MicrodollarUsageContext = {
+  kiloUserId: string;
+  fraudHeaders: FraudDetectionHeaders;
+  organizationId?: string;
+  /** ProviderId string */
+  provider: string;
+  requested_model: string;
+  promptInfo: PromptInfo;
+  max_tokens: number | null;
+  has_middle_out_transform: boolean | null;
+  estimatedInputTokens: number;
+  estimatedOutputTokens: number;
+  isStreaming: boolean;
+  /** User's microdollars_used before this request (for first-usage detection). */
+  prior_microdollar_usage: number;
+  /** User email for authenticated users — used as PostHog distinctId. Undefined for anonymous users. */
+  posthog_distinct_id?: string;
+  /** PostHog API key for first-usage event capture. Undefined when not configured. */
+  posthogApiKey?: string;
+  /** Provider base URL — used to call the /generation endpoint for accurate cost data. */
+  providerApiUrl: string;
+  /** Provider API key — used to authenticate /generation endpoint requests. */
+  providerApiKey: string;
+  /** Whether the provider supports the /generation?id= endpoint for post-stream cost lookup. */
+  providerHasGenerationEndpoint: boolean;
+  project_id: string | null;
+  status_code: number | null;
+  editor_name: string | null;
+  machine_id: string | null;
+  user_byok: boolean;
+  has_tools: boolean;
+  botId?: string;
+  tokenSource?: string;
+  /** Request ID from abuse service classify response; 0 means skip. */
+  abuse_request_id?: number;
+  feature: FeatureValue | null;
+  session_id: string | null;
+  mode: string | null;
+  auto_model: string | null;
+};
+
+type NotYetCostedUsageStats = {
+  messageId: string | null;
+  model: string | null;
+  responseContent: string;
+  hasError: boolean;
+  inference_provider: string | null;
+  upstream_id: string | null;
+  finish_reason: string | null;
+  latency: number | null;
+  moderation_latency: number | null;
+  generation_time: number | null;
+  streamed: boolean | null;
+  cancelled: boolean | null;
+};
+
+type JustTheCostsUsageStats = {
+  cost_mUsd: number;
+  cacheDiscount_mUsd?: number;
+  market_cost?: number;
+  inputTokens: number;
+  outputTokens: number;
+  cacheWriteTokens: number;
+  cacheHitTokens: number;
+  is_byok: boolean | null;
+};
+
+export type MicrodollarUsageStats = NotYetCostedUsageStats & JustTheCostsUsageStats;
+
+type UsageMetaData = {
+  id: string;
+  message_id: string;
+  created_at: string;
+  http_x_forwarded_for: string | null;
+  geo_city: string | null;
+  geo_country: string | null;
+  geo_latitude: number | null;
+  geo_longitude: number | null;
+  ja3_hash: string | null;
+  user_prompt_prefix: string | null;
+  system_prompt_prefix: string | null;
+  system_prompt_length: number | null;
+  http_user_agent: string | null;
+  max_tokens: number | null;
+  has_middle_out_transform: boolean | null;
+  status_code: number | null;
+  upstream_id: string | null;
+  finish_reason: string | null;
+  latency: number | null;
+  moderation_latency: number | null;
+  generation_time: number | null;
+  is_byok: boolean | null;
+  is_user_byok: boolean;
+  streamed: boolean | null;
+  cancelled: boolean | null;
+  editor_name: string | null;
+  has_tools: boolean | null;
+  machine_id: string | null;
+  feature: string | null;
+  session_id: string | null;
+  mode: string | null;
+  auto_model: string | null;
+  market_cost: number | null;
+};
+
+type CoreUsageFields = {
+  id: string;
+  kilo_user_id: string;
+  organization_id: string | null;
+  provider: string;
+  cost: number;
+  input_tokens: number;
+  output_tokens: number;
+  cache_write_tokens: number;
+  cache_hit_tokens: number;
+  created_at: string;
+  model: string | null;
+  requested_model: string;
+  cache_discount: number | null;
+  has_error: boolean;
+  abuse_classification: number;
+  inference_provider: string | null;
+  project_id: string | null;
+};
+
+// ─── Helpers ─────────────────────────────────────────────────────────────────
+
+function toMicrodollars(usd: number): number {
+  return Math.round(usd * 1_000_000);
+}
+
+// For BYOK, OpenRouter only reports 5% of the actual cost.
+const OPENROUTER_BYOK_COST_MULTIPLIER = 20.0;
+
+function processOpenRouterUsage(
+  usage: OpenRouterUsage | null | undefined,
+  coreProps: NotYetCostedUsageStats
+): JustTheCostsUsageStats {
+  const is_byok = usage?.is_byok ?? null;
+  const openrouterCost_USD = usage?.cost ?? 0;
+  const upstream_inference_cost_USD = usage?.cost_details?.upstream_inference_cost ?? 0;
+  const cost_mUsd = toMicrodollars(is_byok ? upstream_inference_cost_USD : openrouterCost_USD);
+  const inferredUpstream_USD = openrouterCost_USD * OPENROUTER_BYOK_COST_MULTIPLIER;
+  const microdollar_error = (inferredUpstream_USD - upstream_inference_cost_USD) * 1000000;
+
+  if (
+    (is_byok == null && (openrouterCost_USD || upstream_inference_cost_USD)) ||
+    (is_byok && usage?.cost !== 0 && 1.1 < Math.abs(microdollar_error))
+  ) {
+    const { responseContent: _ignore, ...logProps } = coreProps;
+    console.warn("SUSPICIOUS: openrouter's cost accounting doesn't make sense", {
+      ...logProps,
+      cost_mUsd,
+      is_byok,
+      openrouterCost_USD,
+      upstream_inference_cost_USD,
+    });
+  }
+
+  return {
+    inputTokens: usage?.prompt_tokens ?? 0,
+    cacheHitTokens: usage?.prompt_tokens_details?.cached_tokens ?? 0,
+    cacheWriteTokens: 0,
+    outputTokens: usage?.completion_tokens ?? 0,
+    cost_mUsd,
+    is_byok,
+  };
+}
+
+// ─── Generation endpoint refetch ─────────────────────────────────────────────
+
+// Fetch generation data from the provider's /generation?id= endpoint.
+// Uses exponential backoff because OpenRouter may return 404 if called too soon after streaming.
+async function fetchWithBackoff(
+  url: string,
+  init: RequestInit,
+  shouldRetry: (r: Response) => boolean
+): Promise<Response> {
+  const maxElapsedMs = 20_000;
+  const startedAt = Date.now();
+  let nextDelayMs = 200 * (1 + (Math.random() - 0.5) / 10);
+  while (true) {
+    const response = await fetch(url, init);
+    if (!shouldRetry(response)) return response;
+    if (Date.now() - startedAt + nextDelayMs > maxElapsedMs) return response;
+    await scheduler.wait(nextDelayMs);
+    nextDelayMs = nextDelayMs * 1.5;
+  }
+}
+
+async function fetchGeneration(
+  apiUrl: string,
+  apiKey: string,
+  messageId: string
+): Promise<OpenRouterGeneration | null> {
+  // Delay 200ms — the provider may not have the cost ready immediately after streaming.
+  await scheduler.wait(200);
+  try {
+    const response = await fetchWithBackoff(
+      `${apiUrl}/generation?id=${messageId}`,
+      {
+        method: 'GET',
+        headers: {
+          Authorization: `Bearer ${apiKey}`,
+          'HTTP-Referer': 'https://kilocode.ai',
+          'X-Title': 'Kilo Code',
+        },
+      },
+      r => r.status >= 400 // retry on 404 (generation not yet available)
+    );
+    if (!response.ok) {
+      console.warn('fetchGeneration: non-ok response', {
+        status: response.status,
+        messageId,
+      });
+      return null;
+    }
+    return await response.json();
+  } catch (err) {
+    console.warn('fetchGeneration: fetch error', { messageId, err });
+    return null;
+  }
+}
+
+export function mapToUsageStats(
+  generation: OpenRouterGeneration,
+  responseContent: string
+): MicrodollarUsageStats {
+  const { data } = generation;
+  let llmCostUsd: number;
+  if (!data.is_byok) {
+    llmCostUsd = data.total_cost;
+  } else if (data.upstream_inference_cost == null) {
+    console.warn('SUSPICIOUS: openrouter missing upstream_inference_cost', { id: data.id });
+    llmCostUsd = data.total_cost * OPENROUTER_BYOK_COST_MULTIPLIER;
+  } else {
+    llmCostUsd = data.upstream_inference_cost;
+  }
+
+  return {
+    messageId: data.id,
+    hasError: false,
+    model: data.model,
+    responseContent,
+    inputTokens: data.native_tokens_prompt ?? 0,
+    cacheHitTokens: data.native_tokens_cached ?? 0,
+    cacheWriteTokens: 0,
+    outputTokens: data.native_tokens_completion ?? 0,
+    cost_mUsd: toMicrodollars(llmCostUsd),
+    is_byok: data.is_byok ?? null,
+    cacheDiscount_mUsd:
+      data.cache_discount == null ? undefined : toMicrodollars(data.cache_discount),
+    inference_provider: data.provider_name ?? null,
+    upstream_id: data.upstream_id ?? null,
+    finish_reason: data.finish_reason ?? null,
+    latency: data.latency ?? null,
+    moderation_latency: data.moderation_latency ?? null,
+    generation_time: data.generation_time ?? null,
+    streamed: data.streamed ?? null,
+    cancelled: data.cancelled ?? null,
+  };
+}
+
+// ─── Stream/string parsers ────────────────────────────────────────────────────
+
+export async function parseMicrodollarUsageFromStream(
+  stream: ReadableStream<Uint8Array>,
+  kiloUserId: string,
+  provider: string,
+  statusCode: number
+): Promise<MicrodollarUsageStats> {
+  let messageId: string | null = null;
+  let model: string | null = null;
+  let responseContent = '';
+  let reportedError = statusCode >= 400;
+  let usage: OpenRouterUsage | null = null;
+  let inference_provider: string | null = null;
+  let finish_reason: string | null = null;
+
+  const reader = stream.getReader();
+  const decoder = new TextDecoder();
+
+  const sseStreamParser = createParser({
+    onEvent(event: EventSourceMessage) {
+      if (event.data === '[DONE]') return;
+
+      let json: ChatCompletionChunk | undefined;
+      try {
+        json = JSON.parse(event.data) as ChatCompletionChunk;
+      } catch {
+        return;
+      }
+
+      if (!json) return;
+
+      if ('error' in json) {
+        reportedError = true;
+        console.warn('OpenRouter error in SSE stream', { error: json.error, kiloUserId, provider });
+      }
+
+      model = json.model ?? model;
+      messageId = json.id ?? messageId;
+      usage = json.usage ?? usage;
+      const choice = json.choices?.[0];
+      inference_provider =
+        json.provider ??
+        choice?.delta?.provider_metadata?.gateway?.routing?.finalProvider ??
+        inference_provider;
+      finish_reason = choice?.finish_reason ?? finish_reason;
+
+      const contentDelta = choice?.delta?.content;
+      if (typeof contentDelta === 'string') {
+        responseContent += contentDelta;
+      }
+    },
+  });
+
+  let wasAborted = false;
+  try {
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) break;
+      sseStreamParser.feed(decoder.decode(value, { stream: true }));
+    }
+  } catch (error) {
+    if (error instanceof Error && error.name === 'ResponseAborted') {
+      wasAborted = true;
+    } else {
+      throw error;
+    }
+  } finally {
+    reader.releaseLock();
+  }
+
+  if (!reportedError && !usage) {
+    console.warn('SUSPICIOUS: No usage chunk in stream', {
+      kiloUserId,
+      provider,
+      messageId,
+      model,
+    });
+  }
+
+  const coreProps: NotYetCostedUsageStats = {
+    messageId,
+    hasError: reportedError || wasAborted,
+    model,
+    responseContent,
+    inference_provider,
+    finish_reason,
+    upstream_id: null,
+    latency: null,
+    moderation_latency: null,
+    generation_time: null,
+    streamed: true,
+    cancelled: null,
+  };
+
+  return { ...coreProps, ...processOpenRouterUsage(usage, coreProps) };
+}
+
+type NonStreamingResponseJson = {
+  id?: string | null;
+  model?: string | null;
+  provider?: string | null;
+  usage?: OpenRouterUsage | null;
+  choices?: {
+    finish_reason?: string | null;
+    message?: {
+      content?: string | null;
+      provider_metadata?: VercelProviderMetaData;
+    };
+  }[];
+};
+
+export function parseMicrodollarUsageFromString(
+  fullResponse: string,
+  kiloUserId: string,
+  statusCode: number
+): MicrodollarUsageStats {
+  let responseJson: NonStreamingResponseJson | null = null;
+
+  try {
+    responseJson = JSON.parse(fullResponse) as NonStreamingResponseJson;
+  } catch {
+    console.warn('parseMicrodollarUsageFromString: failed to parse JSON', { kiloUserId });
+  }
+
+  if (responseJson?.usage?.is_byok == null && responseJson?.usage?.cost) {
+    console.warn('SUSPICIOUS: is_byok is null', { kiloUserId });
+  }
+
+  const choice = responseJson?.choices?.[0];
+  const coreProps: NotYetCostedUsageStats = {
+    messageId: responseJson?.id ?? null,
+    hasError: !responseJson?.model || statusCode >= 400,
+    model: responseJson?.model ?? null,
+    responseContent: choice?.message?.content ?? '',
+    inference_provider:
+      responseJson?.provider ??
+      choice?.message?.provider_metadata?.gateway?.routing?.finalProvider ??
+      null,
+    upstream_id: null,
+    finish_reason: choice?.finish_reason ?? null,
+    latency: null,
+    moderation_latency: null,
+    generation_time: null,
+    streamed: false,
+    cancelled: null,
+  };
+
+  return { ...coreProps, ...processOpenRouterUsage(responseJson?.usage, coreProps) };
+}
+
+// ─── DB insertion ─────────────────────────────────────────────────────────────
+
+/**
+ * CTE fragment that upserts a value into a small lookup table.
+ * Returns CTEs: `{name}_value`, `{name}_existing`, `{name}_ins`, `{name}_cte`
+ * containing the ID of the (possibly newly inserted) row.
+ */
+function createUpsertCTE(metaDataKindName: ReturnType<typeof sql>, value: string | null) {
+  return sql`
+${metaDataKindName}_value AS (
+  SELECT value
+  FROM (VALUES (${value})) v(value)
+  WHERE value IS NOT NULL
+),
+${metaDataKindName}_existing AS (
+  SELECT ${metaDataKindName}_id
+  FROM ${metaDataKindName}, ${metaDataKindName}_value
+  WHERE ${metaDataKindName}.${metaDataKindName} = ${metaDataKindName}_value.value
+),
+${metaDataKindName}_ins AS (
+  INSERT INTO ${metaDataKindName} (${metaDataKindName})
+  SELECT ${metaDataKindName}_value.value FROM ${metaDataKindName}_value
+  WHERE NOT EXISTS (SELECT 1 FROM ${metaDataKindName}_existing)
+  ON CONFLICT (${metaDataKindName}) DO UPDATE SET ${metaDataKindName} = EXCLUDED.${metaDataKindName}
+  RETURNING ${metaDataKindName}_id
+),
+${metaDataKindName}_cte AS (
+  SELECT ${metaDataKindName}_id FROM ${metaDataKindName}_existing
+  UNION ALL
+  SELECT ${metaDataKindName}_id FROM ${metaDataKindName}_ins
+)`;
+}
+
+async function insertUsageAndMetadataWithBalanceUpdate(
+  db: WorkerDb,
+  coreUsageFields: CoreUsageFields,
+  metadataFields: UsageMetaData
+): Promise<{ newMicrodollarsUsed: number; kiloPassThreshold: number | null } | null> {
+  const result = await db.execute<{
+    new_microdollars_used: number | bigint | string;
+    kilo_pass_threshold: number | bigint | string | null;
+  }>(sql`
+    WITH microdollar_usage_ins AS (
+      INSERT INTO microdollar_usage (
+        id, kilo_user_id, organization_id, provider, cost,
+        input_tokens, output_tokens, cache_write_tokens, cache_hit_tokens,
+        created_at, model, requested_model, cache_discount, has_error, abuse_classification,
+        inference_provider, project_id
+      ) VALUES (
+        ${coreUsageFields.id},
+        ${coreUsageFields.kilo_user_id},
+        ${coreUsageFields.organization_id},
+        ${coreUsageFields.provider},
+        ${coreUsageFields.cost},
+        ${coreUsageFields.input_tokens},
+        ${coreUsageFields.output_tokens},
+        ${coreUsageFields.cache_write_tokens},
+        ${coreUsageFields.cache_hit_tokens},
+        ${coreUsageFields.created_at},
+        ${coreUsageFields.model},
+        ${coreUsageFields.requested_model},
+        ${coreUsageFields.cache_discount},
+        ${coreUsageFields.has_error},
+        ${coreUsageFields.abuse_classification},
+        ${coreUsageFields.inference_provider},
+        ${coreUsageFields.project_id}
+      )
+    )
+    , ${createUpsertCTE(sql`http_user_agent`, metadataFields.http_user_agent)}
+    , ${createUpsertCTE(sql`http_ip`, metadataFields.http_x_forwarded_for)}
+    , ${createUpsertCTE(sql`vercel_ip_country`, metadataFields.geo_country)}
+    , ${createUpsertCTE(sql`vercel_ip_city`, metadataFields.geo_city)}
+    , ${createUpsertCTE(sql`ja4_digest`, metadataFields.ja3_hash)}
+    , ${createUpsertCTE(sql`system_prompt_prefix`, metadataFields.system_prompt_prefix)}
+    , ${createUpsertCTE(sql`finish_reason`, metadataFields.finish_reason)}
+    , ${createUpsertCTE(sql`editor_name`, metadataFields.editor_name)}
+    , ${createUpsertCTE(sql`feature`, metadataFields.feature)}
+    , ${createUpsertCTE(sql`mode`, metadataFields.mode)}
+    , ${createUpsertCTE(sql`auto_model`, metadataFields.auto_model)}
+    , metadata_ins AS (
+      INSERT INTO microdollar_usage_metadata (
+        id,
+        message_id,
+        created_at,
+        user_prompt_prefix,
+        vercel_ip_latitude,
+        vercel_ip_longitude,
+        system_prompt_length,
+        max_tokens,
+        has_middle_out_transform,
+        status_code,
+        upstream_id,
+        latency,
+        moderation_latency,
+        generation_time,
+        is_byok,
+        is_user_byok,
+        streamed,
+        cancelled,
+        has_tools,
+        machine_id,
+        session_id,
+        market_cost,
+
+        http_user_agent_id,
+        http_ip_id,
+        vercel_ip_country_id,
+        vercel_ip_city_id,
+        ja4_digest_id,
+        system_prompt_prefix_id,
+        finish_reason_id,
+        editor_name_id,
+        feature_id,
+        mode_id,
+        auto_model_id
+      )
+      SELECT
+        ${metadataFields.id},
+        ${metadataFields.message_id},
+        ${metadataFields.created_at},
+        ${metadataFields.user_prompt_prefix},
+        ${metadataFields.geo_latitude},
+        ${metadataFields.geo_longitude},
+        ${metadataFields.system_prompt_length},
+        ${metadataFields.max_tokens},
+        ${metadataFields.has_middle_out_transform},
+        ${metadataFields.status_code},
+        ${metadataFields.upstream_id},
+        ${metadataFields.latency},
+        ${metadataFields.moderation_latency},
+        ${metadataFields.generation_time},
+        ${metadataFields.is_byok},
+        ${metadataFields.is_user_byok},
+        ${metadataFields.streamed},
+        ${metadataFields.cancelled},
+        ${metadataFields.has_tools},
+        ${metadataFields.machine_id},
+        ${metadataFields.session_id},
+        ${metadataFields.market_cost},
+
+        (SELECT http_user_agent_id FROM http_user_agent_cte),
+        (SELECT http_ip_id FROM http_ip_cte),
+        (SELECT vercel_ip_country_id FROM vercel_ip_country_cte),
+        (SELECT vercel_ip_city_id FROM vercel_ip_city_cte),
+        (SELECT ja4_digest_id FROM ja4_digest_cte),
+        (SELECT system_prompt_prefix_id FROM system_prompt_prefix_cte),
+        (SELECT finish_reason_id FROM finish_reason_cte),
+        (SELECT editor_name_id FROM editor_name_cte),
+        (SELECT feature_id FROM feature_cte),
+        (SELECT mode_id FROM mode_cte),
+        (SELECT auto_model_id FROM auto_model_cte)
+    )
+    UPDATE kilocode_users
+    SET microdollars_used = microdollars_used + ${coreUsageFields.cost}
+    WHERE id = ${coreUsageFields.kilo_user_id}
+      AND ${coreUsageFields.organization_id}::uuid IS NULL
+      AND ${coreUsageFields.cost} > 0
+    RETURNING microdollars_used AS new_microdollars_used, kilo_pass_threshold
+  `);
+
+  if (!result.rows[0]) {
+    if (!coreUsageFields.organization_id && coreUsageFields.cost && coreUsageFields.cost > 0) {
+      console.error('impossible: missing user in balance update', {
+        kilo_user_id: coreUsageFields.kilo_user_id,
+        cost: coreUsageFields.cost,
+      });
+    }
+    return null;
+  }
+
+  const newMicrodollarsUsed = Number(result.rows[0].new_microdollars_used);
+  const kiloPassThreshold =
+    result.rows[0].kilo_pass_threshold == null ? null : Number(result.rows[0].kilo_pass_threshold);
+
+  return { newMicrodollarsUsed, kiloPassThreshold };
+}
+
+async function ingestOrganizationTokenUsage(
+  db: WorkerDb,
+  usage: { cost: number; kilo_user_id: string; organization_id: string | null }
+): Promise<void> {
+  if (!usage.organization_id) return;
+  const orgId = usage.organization_id;
+
+  await db.transaction(async tx => {
+    await tx
+      .update(organizations)
+      .set({
+        microdollars_used: sql`${organizations.microdollars_used} + ${usage.cost}`,
+        microdollars_balance: sql`${organizations.microdollars_balance} - ${usage.cost}`,
+      })
+      .where(eq(organizations.id, orgId));
+
+    await tx.execute(sql`
+      INSERT INTO ${organization_user_usage} (
+        organization_id,
+        kilo_user_id,
+        usage_date,
+        limit_type,
+        microdollar_usage,
+        created_at,
+        updated_at
+      )
+      SELECT
+        ${usage.organization_id},
+        ${usage.kilo_user_id},
+        CURRENT_DATE,
+        ${'daily'},
+        ${usage.cost},
+        NOW(),
+        NOW()
+      ON CONFLICT (organization_id, kilo_user_id, limit_type, usage_date)
+      DO UPDATE SET
+        microdollar_usage = ${organization_user_usage.microdollar_usage} + ${usage.cost},
+        updated_at = NOW()
+    `);
+  });
+}
+
+// ─── PostHog first-usage events ───────────────────────────────────────────────
+
+const POSTHOG_CAPTURE_URL = 'https://us.i.posthog.com/capture';
+
+async function sendPostHogEvent(
+  apiKey: string,
+  distinctId: string,
+  event: string,
+  properties: Record<string, unknown>
+): Promise<void> {
+  try {
+    await fetch(POSTHOG_CAPTURE_URL, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ api_key: apiKey, distinct_id: distinctId, event, properties }),
+    });
+  } catch (err) {
+    console.warn(`[posthog] Failed to send ${event} event`, err);
+  }
+}
+
+async function isFirstUsageEver(
+  db: WorkerDb,
+  kiloUserId: string,
+  priorMicrodollarUsage: number,
+  organizationId: string | undefined
+): Promise<boolean> {
+  if (priorMicrodollarUsage > 0 || organizationId) return false;
+  // Check if there are any prior usage records for this user
+  const result = await db.execute<{ exists: boolean }>(sql`
+    SELECT EXISTS (
+      SELECT 1 FROM microdollar_usage WHERE kilo_user_id = ${kiloUserId} LIMIT 1
+    ) AS exists
+  `);
+  return !result.rows[0]?.exists;
+}
+
+// ─── Main entry point ─────────────────────────────────────────────────────────
+
+/**
+ * Parse usage from the background response stream, build the DB record, and insert.
+ * Returns the MicrodollarUsageStats (including inference_provider and messageId) for
+ * downstream use by api-metrics and abuse-cost background tasks.
+ */
+export async function runUsageAccounting(
+  stream: ReadableStream<Uint8Array> | null,
+  usageContext: MicrodollarUsageContext,
+  db: WorkerDb
+): Promise<MicrodollarUsageStats | null> {
+  if (!stream) {
+    console.warn('runUsageAccounting: no stream provided', {
+      kiloUserId: usageContext.kiloUserId,
+    });
+    return null;
+  }
+
+  let usageStats: MicrodollarUsageStats;
+  try {
+    if (usageContext.isStreaming) {
+      usageStats = await parseMicrodollarUsageFromStream(
+        stream,
+        usageContext.kiloUserId,
+        usageContext.provider,
+        usageContext.status_code ?? 200
+      );
+    } else {
+      const text = await new Response(stream).text();
+      usageStats = parseMicrodollarUsageFromString(
+        text,
+        usageContext.kiloUserId,
+        usageContext.status_code ?? 200
+      );
+    }
+  } catch (err) {
+    console.error('runUsageAccounting: parse error', err);
+    return null;
+  }
+
+  // Refetch accurate cost/token data from the provider's generation endpoint when available.
+  // OpenRouter's /generation?id= gives more precise token counts and cost data than the SSE stream.
+  if (usageContext.providerHasGenerationEndpoint && usageStats.messageId && !usageStats.hasError) {
+    try {
+      const generation = await fetchGeneration(
+        usageContext.providerApiUrl,
+        usageContext.providerApiKey,
+        usageStats.messageId
+      );
+      if (generation) {
+        const genStats = mapToUsageStats(generation, usageStats.responseContent);
+        // Preserve stream-derived fields that the generation endpoint may not have.
+        genStats.model = usageStats.model;
+        genStats.hasError = usageStats.hasError;
+        genStats.streamed ??= usageContext.isStreaming;
+        if (genStats.cost_mUsd !== usageStats.cost_mUsd) {
+          console.warn('DEV ODDITY: usage stats do not match generation data', {
+            model: genStats.model,
+            gen_cost: genStats.cost_mUsd,
+            stream_cost: usageStats.cost_mUsd,
+          });
+        }
+        usageStats = genStats;
+      }
+    } catch (err) {
+      console.warn('runUsageAccounting: fetchGeneration failed', err);
+    }
+  }
+
+  // Use requested_model as model fallback
+  if (!usageStats.model) {
+    usageStats.model = usageContext.requested_model;
+  }
+
+  // Preserve the real cost before zeroing for free/BYOK/promo
+  usageStats.market_cost = usageStats.cost_mUsd;
+
+  // Zero out cost for free/BYOK/promo requests
+  if (
+    isFreeModel(usageContext.requested_model) ||
+    usageContext.user_byok ||
+    isActiveReviewPromo(usageContext.botId, usageContext.requested_model) ||
+    isActiveCloudAgentPromo(usageContext.tokenSource, usageContext.requested_model)
+  ) {
+    usageStats.cost_mUsd = 0;
+    usageStats.cacheDiscount_mUsd = 0;
+  }
+
+  // Build DB records
+  const id = crypto.randomUUID();
+  const created_at = new Date().toISOString();
+
+  const coreUsageFields: CoreUsageFields = {
+    id,
+    kilo_user_id: usageContext.kiloUserId,
+    organization_id: usageContext.organizationId ?? null,
+    provider: usageContext.provider,
+    cost: usageStats.cost_mUsd,
+    input_tokens: usageStats.inputTokens,
+    output_tokens: usageStats.outputTokens,
+    cache_write_tokens: usageStats.cacheWriteTokens,
+    cache_hit_tokens: usageStats.cacheHitTokens,
+    created_at,
+    model: usageStats.model,
+    requested_model: usageContext.requested_model,
+    cache_discount: usageStats.cacheDiscount_mUsd ?? null,
+    has_error: usageStats.hasError,
+    abuse_classification: 0,
+    inference_provider: usageStats.inference_provider,
+    project_id: usageContext.project_id,
+  };
+
+  let system_prompt_prefix: string | null = usageContext.promptInfo.system_prompt_prefix;
+  let user_prompt_prefix: string | null = usageContext.promptInfo.user_prompt_prefix;
+
+  // Never log sensitive data for org requests
+  if (usageContext.organizationId) {
+    system_prompt_prefix = '';
+    user_prompt_prefix = null;
+  }
+
+  const metadataFields: UsageMetaData = {
+    id,
+    created_at,
+    message_id: usageStats.messageId ?? '<missing>',
+    http_x_forwarded_for: usageContext.fraudHeaders.http_x_forwarded_for,
+    geo_city: usageContext.fraudHeaders.geo_city,
+    geo_country: usageContext.fraudHeaders.geo_country,
+    geo_latitude: usageContext.fraudHeaders.geo_latitude,
+    geo_longitude: usageContext.fraudHeaders.geo_longitude,
+    ja3_hash: usageContext.fraudHeaders.ja3_hash,
+    user_prompt_prefix: user_prompt_prefix ?? null,
+    system_prompt_prefix: system_prompt_prefix || null,
+    system_prompt_length: usageContext.promptInfo.system_prompt_length,
+    http_user_agent: usageContext.fraudHeaders.http_user_agent,
+    max_tokens: usageContext.max_tokens,
+    has_middle_out_transform: usageContext.has_middle_out_transform,
+    status_code: usageContext.status_code,
+    upstream_id: usageStats.upstream_id,
+    finish_reason: usageStats.finish_reason,
+    latency: usageStats.latency,
+    moderation_latency: usageStats.moderation_latency,
+    generation_time: usageStats.generation_time,
+    is_byok: usageStats.is_byok,
+    is_user_byok: usageContext.user_byok,
+    streamed: usageStats.streamed,
+    cancelled: usageStats.cancelled,
+    editor_name: usageContext.editor_name,
+    has_tools: usageContext.has_tools,
+    machine_id: usageContext.machine_id,
+    feature: usageContext.feature,
+    session_id: usageContext.session_id,
+    mode: usageContext.mode,
+    auto_model: usageContext.auto_model,
+    market_cost: usageStats.market_cost ?? null,
+  };
+
+  let balanceUpdateResult: {
+    newMicrodollarsUsed: number;
+    kiloPassThreshold: number | null;
+  } | null = null;
+  try {
+    let attempt = 0;
+    while (true) {
+      try {
+        balanceUpdateResult = await insertUsageAndMetadataWithBalanceUpdate(
+          db,
+          coreUsageFields,
+          metadataFields
+        );
+        break;
+      } catch (err) {
+        if (attempt >= 2) throw err;
+        console.warn('insertUsageRecord concurrency failure, retrying', { attempt });
+        await scheduler.wait(Math.random() * 100);
+        attempt++;
+      }
+    }
+  } catch (err) {
+    console.error('insertUsageRecord failed', err);
+    // Don't return null — we still want to return stats for abuse cost reporting
+  }
+
+  // KiloPass: trigger bonus credit issuance if usage threshold is crossed.
+  if (balanceUpdateResult) {
+    const effectiveThreshold = getEffectiveKiloPassThreshold(balanceUpdateResult.kiloPassThreshold);
+    if (
+      effectiveThreshold !== null &&
+      balanceUpdateResult.newMicrodollarsUsed >= effectiveThreshold
+    ) {
+      // Fire async — do not await; errors are logged inside.
+      void maybeIssueKiloPassBonusFromUsageThreshold(
+        db,
+        coreUsageFields.kilo_user_id,
+        coreUsageFields.created_at
+      ).catch(err => {
+        console.error('[kilo-pass] maybeIssueKiloPassBonusFromUsageThreshold failed', err);
+      });
+    }
+  }
+
+  try {
+    await ingestOrganizationTokenUsage(db, {
+      cost: coreUsageFields.cost,
+      kilo_user_id: coreUsageFields.kilo_user_id,
+      organization_id: coreUsageFields.organization_id,
+    });
+  } catch (err) {
+    console.error('ingestOrganizationTokenUsage failed', err);
+  }
+
+  // PostHog first-usage events (authenticated non-org users only)
+  if (usageContext.posthog_distinct_id && usageContext.posthogApiKey) {
+    const apiKey = usageContext.posthogApiKey;
+    const distinctId = usageContext.posthog_distinct_id;
+
+    try {
+      const isFirst = await isFirstUsageEver(
+        db,
+        coreUsageFields.kilo_user_id,
+        usageContext.prior_microdollar_usage,
+        usageContext.organizationId
+      );
+      if (isFirst) {
+        await sendPostHogEvent(apiKey, distinctId, 'first_usage', {
+          model: usageStats.model,
+          cost_mUsd: coreUsageFields.cost,
+        });
+        console.log('first_usage PostHog event sent');
+      }
+    } catch (err) {
+      console.warn('[posthog] first_usage check failed', err);
+    }
+
+    // first_microdollar_usage: fires the first time the user crosses the 1 microdollar threshold
+    if (balanceUpdateResult) {
+      const priorUsageAtEnd = Math.abs(
+        balanceUpdateResult.newMicrodollarsUsed - coreUsageFields.cost
+      );
+      if (priorUsageAtEnd < 1) {
+        try {
+          await sendPostHogEvent(apiKey, distinctId, 'first_microdollar_usage', {
+            model: usageStats.model,
+            cost_mUsd: coreUsageFields.cost,
+          });
+        } catch (err) {
+          console.warn('[posthog] first_microdollar_usage send failed', err);
+        }
+      }
+    }
+  }
+
+  return usageStats;
+}
diff --git a/llm-gateway/src/dos/RateLimitDO.ts b/llm-gateway/src/dos/RateLimitDO.ts
new file mode 100644
index 000000000..f74d56109
--- /dev/null
+++ b/llm-gateway/src/dos/RateLimitDO.ts
@@ -0,0 +1,112 @@
+// Per-IP Durable Object for rate limiting.
+// Each IP gets its own DO instance (via idFromName(ip)), giving us
+// single-threaded, strongly consistent check-and-increment — no TOCTOU races.
+//
+// Uses ctx.storage KV API with alarms for automatic expiry.
+
+import { DurableObject } from 'cloudflare:workers';
+import type { Env } from '../env';
+
+const FREE_MODEL_WINDOW_MS = 60 * 60 * 1000; // 1 hour
+const FREE_MODEL_MAX_REQUESTS = 200;
+
+const PROMOTION_WINDOW_MS = 24 * 60 * 60 * 1000; // 24 hours
+const PROMOTION_MAX_REQUESTS = 10_000;
+
+// Storage keys for the two sliding window timestamp arrays
+const FREE_KEY = 'free';
+const PROMO_KEY = 'promo';
+
+export type RateLimitResult = {
+  allowed: boolean;
+  requestCount: number;
+};
+
+export class RateLimitDO extends DurableObject<Env> {
+  // Read the current window count without modifying state.
+  private async peekCount(key: string, windowMs: number): Promise<number> {
+    const now = Date.now();
+    const windowStart = now - windowMs;
+    const timestamps = ((await this.ctx.storage.get<number[]>(key)) ?? []).filter(
+      t => t >= windowStart
+    );
+    return timestamps.length;
+  }
+
+  // Append a timestamp to the sliding window. No race conditions because
+  // the DO serializes all concurrent requests to the same instance.
+  private async appendTimestamp(key: string, windowMs: number): Promise<number> {
+    const now = Date.now();
+    const windowStart = now - windowMs;
+    const timestamps = ((await this.ctx.storage.get<number[]>(key)) ?? []).filter(
+      t => t >= windowStart
+    );
+    timestamps.push(now);
+    await this.ctx.storage.put(key, timestamps);
+    this.scheduleCleanup(windowMs);
+    return timestamps.length;
+  }
+
+  // Check-only — does NOT increment the counter. Used by rate-limit middleware
+  // so that the log middleware is the sole place that increments.
+  async checkFreeModel(): Promise<RateLimitResult> {
+    const count = await this.peekCount(FREE_KEY, FREE_MODEL_WINDOW_MS);
+    return { allowed: count < FREE_MODEL_MAX_REQUESTS, requestCount: count };
+  }
+
+  async incrementFreeModel(): Promise<void> {
+    await this.appendTimestamp(FREE_KEY, FREE_MODEL_WINDOW_MS);
+  }
+
+  // Check-only — does NOT increment the counter.
+  async checkPromotion(): Promise<RateLimitResult> {
+    const count = await this.peekCount(PROMO_KEY, PROMOTION_WINDOW_MS);
+    return { allowed: count < PROMOTION_MAX_REQUESTS, requestCount: count };
+  }
+
+  async incrementPromotion(): Promise<void> {
+    await this.appendTimestamp(PROMO_KEY, PROMOTION_WINDOW_MS);
+  }
+
+  // Schedule an alarm to clean up expired entries so the DO can be evicted.
+  private scheduleCleanup(windowMs: number) {
+    // setAlarm is idempotent if an alarm is already scheduled.
+    // Schedule cleanup slightly after the longest window expires.
+    void this.ctx.storage.setAlarm(Date.now() + windowMs + 1000);
+  }
+
+  override async alarm() {
+    const now = Date.now();
+    const freeTs = (await this.ctx.storage.get<number[]>(FREE_KEY)) ?? [];
+    const promoTs = (await this.ctx.storage.get<number[]>(PROMO_KEY)) ?? [];
+
+    const freeFiltered = freeTs.filter(t => t >= now - FREE_MODEL_WINDOW_MS);
+    const promoFiltered = promoTs.filter(t => t >= now - PROMOTION_WINDOW_MS);
+
+    if (freeFiltered.length > 0) {
+      await this.ctx.storage.put(FREE_KEY, freeFiltered);
+    } else {
+      await this.ctx.storage.delete(FREE_KEY);
+    }
+
+    if (promoFiltered.length > 0) {
+      await this.ctx.storage.put(PROMO_KEY, promoFiltered);
+    } else {
+      await this.ctx.storage.delete(PROMO_KEY);
+    }
+
+    // If there are still entries, re-schedule cleanup
+    if (freeFiltered.length > 0 || promoFiltered.length > 0) {
+      const nextCleanup = Math.max(FREE_MODEL_WINDOW_MS, PROMOTION_WINDOW_MS);
+      await this.ctx.storage.setAlarm(now + nextCleanup + 1000);
+    }
+  }
+}
+
+export function getRateLimitDO(
+  env: { RATE_LIMIT_DO: DurableObjectNamespace<RateLimitDO> },
+  ip: string
+): DurableObjectStub<RateLimitDO> {
+  const id = env.RATE_LIMIT_DO.idFromName(ip);
+  return env.RATE_LIMIT_DO.get(id);
+}
diff --git a/llm-gateway/src/env.ts b/llm-gateway/src/env.ts
new file mode 100644
index 000000000..825c4f4e3
--- /dev/null
+++ b/llm-gateway/src/env.ts
@@ -0,0 +1,7 @@
+// Env type for the llm-gateway worker.
+// Cloudflare.Env is declared in worker-configuration.d.ts (generated by `wrangler types`).
+// O11YBinding is declared in o11y-binding.d.ts with the RPC method types.
+
+import type { O11YBinding } from './o11y-binding';
+
+export type Env = Omit<Cloudflare.Env, 'O11Y'> & { O11Y: O11YBinding };
diff --git a/llm-gateway/src/handler/background-tasks.ts b/llm-gateway/src/handler/background-tasks.ts
new file mode 100644
index 000000000..debe37eed
--- /dev/null
+++ b/llm-gateway/src/handler/background-tasks.ts
@@ -0,0 +1,243 @@
+// Background tasks scheduled via ctx.waitUntil() after the client response is sent.
+// Handles usage accounting, API metrics, request logging, and abuse cost reporting.
+
+import { getWorkerDb } from '@kilocode/db/client';
+import {
+  runUsageAccounting,
+  type MicrodollarUsageContext,
+  type MicrodollarUsageStats,
+} from '../background/usage-accounting';
+import { runApiMetrics } from '../background/api-metrics';
+import { runRequestLogging } from '../background/request-logging';
+import { reportAbuseCost, type AbuseServiceSecrets } from '../lib/abuse-service';
+import { extractPromptInfo, estimateChatTokens } from '../lib/prompt-info';
+import { normalizeModelId } from '../lib/models';
+import { getToolsAvailable, type getToolsUsed } from '../background/api-metrics';
+import type { FraudDetectionHeaders } from '../lib/extract-headers';
+import type { FeatureValue } from '../lib/feature-detection';
+import type { OpenRouterChatCompletionRequest } from '../types/request';
+import type { ApiMetricsParams } from '@kilocode/worker-utils';
+
+const BACKGROUND_TASK_TIMEOUT_MS = 25_000;
+
+// Wrap a promise to never exceed a max duration, so waitUntil budgets are bounded.
+// Uses scheduler.wait (Workers-native) instead of setTimeout for proper I/O scheduling.
+function withTimeout<T>(p: Promise<T>, ms: number): Promise<T | undefined> {
+  return Promise.race([p, scheduler.wait(ms).then(() => undefined)]);
+}
+
+type BgUser = {
+  id: string;
+  google_user_email?: string;
+  microdollars_used?: number;
+};
+
+export type BackgroundTaskParams = {
+  accountingStream: ReadableStream | null;
+  metricsStream: ReadableStream | null;
+  loggingStream: ReadableStream | null;
+  upstreamStatusCode: number;
+  abuseServiceUrl: string;
+  abuseSecrets: AbuseServiceSecrets | undefined;
+  abuseRequestId: number | undefined;
+  isStreaming: boolean;
+  requestStartedAt: number;
+  provider: string;
+  providerApiUrl: string;
+  providerApiKey: string;
+  providerHasGenerationEndpoint: boolean;
+  resolvedModel: string;
+  requestBody: OpenRouterChatCompletionRequest;
+  user: BgUser;
+  organizationId: string | undefined;
+  modeHeader: string | null;
+  fraudHeaders: FraudDetectionHeaders;
+  projectId: string | null;
+  editorName: string | null;
+  machineId: string | null;
+  feature: FeatureValue | null;
+  autoModel: string | null;
+  botId: string | undefined;
+  tokenSource: string | undefined;
+  userByok: boolean;
+  isAnon: boolean;
+  sessionId: string | null;
+  ttfbMs: number;
+  toolsUsed: ReturnType<typeof getToolsUsed>;
+  posthogApiKey: string | undefined;
+  connectionString: string;
+  o11y: { ingestApiMetrics(params: ApiMetricsParams): Promise<void> };
+};
+
+export function scheduleBackgroundTasks(
+  ctx: { waitUntil(p: Promise<unknown>): void },
+  params: BackgroundTaskParams
+): void {
+  const {
+    accountingStream,
+    metricsStream,
+    loggingStream,
+    upstreamStatusCode,
+    abuseServiceUrl,
+    abuseSecrets,
+    abuseRequestId,
+    isStreaming,
+    requestStartedAt,
+    provider,
+    providerApiUrl,
+    providerApiKey,
+    providerHasGenerationEndpoint,
+    resolvedModel,
+    requestBody,
+    user,
+    organizationId,
+    modeHeader,
+    fraudHeaders,
+    projectId,
+    editorName,
+    machineId,
+    feature,
+    autoModel,
+    botId,
+    tokenSource,
+    userByok,
+    isAnon,
+    sessionId,
+    ttfbMs,
+    toolsUsed,
+    posthogApiKey,
+    connectionString,
+    o11y,
+  } = params;
+
+  // ── Usage accounting ───────────────────────────────────────────────────────
+  const usageTask: Promise<MicrodollarUsageStats | null | undefined> =
+    accountingStream && !isAnon
+      ? withTimeout(
+          (async () => {
+            const db = getWorkerDb(connectionString);
+            const promptInfo = extractPromptInfo(requestBody);
+            const { estimatedInputTokens, estimatedOutputTokens } = estimateChatTokens(requestBody);
+
+            const usageContext: MicrodollarUsageContext = {
+              kiloUserId: user.id,
+              fraudHeaders,
+              organizationId,
+              provider,
+              requested_model: resolvedModel,
+              promptInfo,
+              max_tokens: requestBody.max_tokens ?? null,
+              has_middle_out_transform: requestBody.transforms?.includes('middle-out') ?? false,
+              estimatedInputTokens,
+              estimatedOutputTokens,
+              isStreaming,
+              prior_microdollar_usage: user.microdollars_used ?? 0,
+              posthog_distinct_id: user.google_user_email,
+              posthogApiKey,
+              providerApiUrl,
+              providerApiKey,
+              providerHasGenerationEndpoint,
+              project_id: projectId,
+              status_code: upstreamStatusCode,
+              editor_name: editorName,
+              machine_id: machineId,
+              user_byok: userByok,
+              has_tools: Array.isArray(requestBody.tools) && requestBody.tools.length > 0,
+              botId,
+              tokenSource,
+              abuse_request_id: abuseRequestId,
+              feature,
+              session_id: sessionId,
+              mode: modeHeader,
+              auto_model: autoModel,
+            };
+
+            return runUsageAccounting(accountingStream, usageContext, db);
+          })(),
+          BACKGROUND_TASK_TIMEOUT_MS
+        )
+      : (accountingStream?.cancel(), Promise.resolve(null));
+
+  // ── API metrics ────────────────────────────────────────────────────────────
+  const metricsTask =
+    metricsStream && o11y
+      ? withTimeout(
+          (async () => {
+            await runApiMetrics(
+              o11y,
+              {
+                kiloUserId: user.id,
+                organizationId,
+                isAnonymous: isAnon,
+                isStreaming,
+                userByok,
+                mode: modeHeader ?? undefined,
+                provider,
+                requestedModel: autoModel ?? resolvedModel,
+                resolvedModel: normalizeModelId(resolvedModel),
+                toolsAvailable: getToolsAvailable(requestBody.tools),
+                toolsUsed,
+                ttfbMs,
+                statusCode: upstreamStatusCode,
+              },
+              metricsStream,
+              requestStartedAt
+            );
+          })(),
+          BACKGROUND_TASK_TIMEOUT_MS
+        )
+      : (metricsStream?.cancel(), Promise.resolve(undefined));
+
+  // ── Request logging (Kilo employees only) ──────────────────────────────────
+  const loggingTask =
+    loggingStream && !isAnon
+      ? withTimeout(
+          (async () => {
+            const db = getWorkerDb(connectionString);
+            await runRequestLogging({
+              db,
+              responseStream: loggingStream,
+              statusCode: upstreamStatusCode,
+              user: { id: user.id, google_user_email: user.google_user_email },
+              organizationId,
+              provider,
+              model: resolvedModel,
+              request: requestBody,
+            });
+          })(),
+          BACKGROUND_TASK_TIMEOUT_MS
+        )
+      : (loggingStream?.cancel(), Promise.resolve(undefined));
+
+  // ── Abuse cost (depends on usage accounting result) ────────────────────────
+  const abuseCostTask = withTimeout(
+    usageTask.then(usageStats => {
+      if (!usageStats || !abuseRequestId) return;
+      return reportAbuseCost(
+        abuseServiceUrl,
+        abuseSecrets,
+        {
+          kiloUserId: user.id,
+          fraudHeaders,
+          requested_model: resolvedModel,
+          abuse_request_id: abuseRequestId,
+        },
+        {
+          messageId: usageStats.messageId,
+          cost_mUsd: usageStats.market_cost ?? usageStats.cost_mUsd,
+          inputTokens: usageStats.inputTokens,
+          outputTokens: usageStats.outputTokens,
+          cacheWriteTokens: usageStats.cacheWriteTokens,
+          cacheHitTokens: usageStats.cacheHitTokens,
+        }
+      );
+    }),
+    BACKGROUND_TASK_TIMEOUT_MS
+  );
+
+  ctx.waitUntil(
+    Promise.all([usageTask, metricsTask, loggingTask, abuseCostTask]).catch(err => {
+      console.error('[proxy] Background task error', err);
+    })
+  );
+}
diff --git a/llm-gateway/src/handler/proxy.ts b/llm-gateway/src/handler/proxy.ts
new file mode 100644
index 000000000..066bb7b03
--- /dev/null
+++ b/llm-gateway/src/handler/proxy.ts
@@ -0,0 +1,422 @@
+// Core proxy handler — the final step in the middleware chain.
+//
+// Responsibilities:
+//   1. Make upstream request (custom LLM or provider API)
+//   2. Start abuse classification early (non-blocking)
+//   3. Log proxy errors for 4xx/5xx responses
+//   4. Await abuse classification result (2s timeout)
+//   5. Schedule background tasks (always, even for error responses)
+//   6. Handle 402 → 503 conversion for non-BYOK cases (after bg tasks)
+//   7. Apply makeErrorReadable for BYOK/context-length errors
+//   8. Rewrite free model response (SSE or JSON)
+
+import type { Handler } from 'hono';
+import type { HonoContext } from '../types/hono';
+import { isAnonymousContext } from '../lib/anonymous';
+import { isKiloFreeModel } from '../lib/models';
+import { customLlmRequest } from '../lib/custom-llm/index';
+import { getOutputHeaders, wrapResponse, makeErrorReadable } from '../lib/response-helpers';
+import { rewriteFreeModelResponse } from '../lib/rewrite-free-model-response';
+import { classifyAbuse, type AbuseServiceSecrets } from '../lib/abuse-service';
+import { isActiveReviewPromo, isActiveCloudAgentPromo } from '../lib/promotions';
+import { getWorkerDb } from '@kilocode/db/client';
+import { scheduleBackgroundTasks } from './background-tasks';
+import { getToolsUsed } from '../background/api-metrics';
+import { captureException } from '../lib/sentry';
+
+const TEN_MINUTES_MS = 10 * 60 * 1000;
+
+// Build the upstream fetch URL — always /chat/completions on the provider base URL,
+// preserving any query string from the original request.
+function buildUpstreamUrl(providerApiUrl: string, search: string): string {
+  return `${providerApiUrl}/chat/completions${search}`;
+}
+
+// Send request to the provider API (non-custom-LLM path).
+async function openRouterRequest(
+  providerApiUrl: string,
+  apiKey: string,
+  body: unknown,
+  extraHeaders: Record<string, string>,
+  search: string,
+  clientSignal: AbortSignal
+): Promise<Response> {
+  const headers = new Headers({
+    Authorization: `Bearer ${apiKey}`,
+    'HTTP-Referer': 'https://kilocode.ai',
+    'X-Title': 'Kilo Code',
+    'Content-Type': 'application/json',
+  });
+  for (const [k, v] of Object.entries(extraHeaders)) headers.set(k, v);
+
+  // Abort on whichever comes first: client disconnect or 10-minute hard timeout.
+  const timeoutSignal = AbortSignal.timeout(TEN_MINUTES_MS);
+  const combinedSignal = AbortSignal.any([clientSignal, timeoutSignal]);
+
+  return fetch(buildUpstreamUrl(providerApiUrl, search), {
+    method: 'POST',
+    headers,
+    body: JSON.stringify(body),
+    signal: combinedSignal,
+  });
+}
+
+// ─── Main handler ─────────────────────────────────────────────────────────────
+
+export const proxyHandler: Handler<HonoContext> = async c => {
+  const requestBody = c.get('requestBody');
+  const resolvedModel = c.get('resolvedModel');
+  const provider = c.get('provider');
+  const userByok = c.get('userByok');
+  const customLlm = c.get('customLlm');
+  const user = c.get('user');
+  const organizationId = c.get('organizationId');
+  const projectId = c.get('projectId');
+  const extraHeaders = c.get('extraHeaders');
+  const fraudHeaders = c.get('fraudHeaders');
+  const editorName = c.get('editorName');
+  const machineId = c.get('machineId');
+  const taskId = c.get('taskId');
+  const botId = c.get('botId');
+  const tokenSource = c.get('tokenSource');
+  const feature = c.get('feature');
+  const autoModel = c.get('autoModel');
+  const requestStartedAt = c.get('requestStartedAt');
+  const modeHeader = c.get('modeHeader');
+  const isAnon = isAnonymousContext(user);
+
+  // Preserve query string so it is forwarded to the upstream provider.
+  const { search } = new URL(c.req.url);
+
+  // Fetch PostHog + abuse secrets in parallel — all fail-open.
+  let posthogApiKey: string | undefined;
+  let abuseSecrets: AbuseServiceSecrets | undefined;
+
+  const [abuseServiceUrl] = await Promise.all([
+    c.env.ABUSE_SERVICE_URL.get(),
+    c.env.POSTHOG_API_KEY.get()
+      .then(k => {
+        posthogApiKey = k;
+      })
+      .catch(() => {
+        /* fail-open */
+      }),
+  ]);
+
+  // Abuse classification starts non-blocking — we hold a promise and
+  // await it (with a 2s timeout) after the upstream response arrives.
+  const abuseSecretsPromise = Promise.all([
+    c.env.ABUSE_CF_ACCESS_CLIENT_ID.get(),
+    c.env.ABUSE_CF_ACCESS_CLIENT_SECRET.get(),
+  ])
+    .then(([id, secret]) => {
+      abuseSecrets = { cfAccessClientId: id, cfAccessClientSecret: secret };
+    })
+    .catch(() => {
+      /* fail-open */
+    });
+
+  // Start classification in parallel with the upstream request.
+  const classifyPromise = abuseSecretsPromise.then(() =>
+    classifyAbuse(abuseServiceUrl, abuseSecrets, fraudHeaders, editorName, requestBody, {
+      kiloUserId: user.id,
+      organizationId,
+      projectId,
+      provider: provider.id,
+      isByok: !!userByok,
+    })
+  );
+
+  // ── Upstream request ────────────────────────────────────────────────────────
+  let response: Response;
+  if (customLlm) {
+    const db = getWorkerDb(c.env.HYPERDRIVE.connectionString);
+    const isLegacyExtension = !!fraudHeaders.http_user_agent?.startsWith('Kilo-Code/');
+    response = await customLlmRequest(
+      customLlm,
+      requestBody,
+      user.id,
+      taskId ?? undefined,
+      isLegacyExtension,
+      db
+    );
+  } else {
+    response = await openRouterRequest(
+      provider.apiUrl,
+      provider.apiKey,
+      requestBody,
+      extraHeaders,
+      search,
+      c.req.raw.signal
+    );
+  }
+
+  // Record time-to-first-byte (wall-clock from request start to upstream response).
+  const ttfbMs = Math.max(0, Math.round(performance.now() - requestStartedAt));
+
+  console.debug(`Upstream ${provider.id} responded with ${response.status}`);
+
+  // ── Error logging ────────────────────────────────────────────────────────────
+  if (response.status >= 400) {
+    const responseClone = response.clone();
+    const logLevel = response.status >= 500 ? 'error' : 'warn';
+    c.executionCtx.waitUntil(
+      responseClone
+        .text()
+        .then(body => {
+          const errorMessage = `${provider.id} returned error ${response.status}`;
+          const extra = {
+            kiloUserId: user.id,
+            model: requestBody.model,
+            organizationId,
+            status: response.status,
+            first4k: body.slice(0, 4096),
+          };
+          console[logLevel](errorMessage, extra);
+          if (response.status >= 500) {
+            captureException(new Error(errorMessage), extra);
+          }
+        })
+        .catch(() => {
+          /* ignore */
+        })
+    );
+  }
+
+  // ── Await abuse classification (2s timeout) ───────────────────────────────────
+  let classifyResult: Awaited<typeof classifyPromise> | null = null;
+  try {
+    classifyResult = await Promise.race([classifyPromise, scheduler.wait(2000).then(() => null)]);
+  } catch {
+    // ignore — abuse service is fail-open
+  }
+
+  if (classifyResult) {
+    console.log('Abuse classification result', {
+      verdict: classifyResult.verdict,
+      risk_score: classifyResult.risk_score,
+      signals: classifyResult.signals,
+      identity_key: classifyResult.context.identity_key,
+      kilo_user_id: user.id,
+      requested_model: resolvedModel,
+      rps: classifyResult.context.requests_per_second,
+      request_id: classifyResult.request_id,
+    });
+  }
+
+  const abuseRequestId = classifyResult?.request_id ?? undefined;
+
+  // ── Shared background task context ──────────────────────────────────────────
+  const bgCommon = {
+    upstreamStatusCode: response.status,
+    abuseServiceUrl,
+    abuseSecrets,
+    abuseRequestId,
+    isStreaming: requestBody.stream === true,
+    requestStartedAt,
+    ttfbMs,
+    provider: provider.id,
+    providerApiUrl: provider.apiUrl,
+    providerApiKey: provider.apiKey,
+    providerHasGenerationEndpoint: provider.hasGenerationEndpoint,
+    resolvedModel,
+    requestBody,
+    user,
+    organizationId,
+    modeHeader,
+    fraudHeaders,
+    projectId,
+    editorName,
+    machineId,
+    feature,
+    autoModel,
+    botId,
+    tokenSource,
+    userByok: !!userByok,
+    isAnon,
+    sessionId: taskId,
+    toolsUsed: getToolsUsed(requestBody.messages),
+    posthogApiKey,
+    connectionString: c.env.HYPERDRIVE.connectionString,
+    o11y: c.env.O11Y,
+  } as const;
+
+  // ── Error responses: schedule background tasks before returning ──────────────
+  // Background tasks must be scheduled even when makeErrorReadable intercepts,
+  // matching the reference implementation which always runs accounting + logging.
+  if (response.status >= 400) {
+    // Error bodies are small JSON — buffer synchronously so background tasks can
+    // read the body independently of whatever response we send to the client.
+    const errorBodyBytes = new Uint8Array(await response.arrayBuffer());
+
+    function makeErrorStream(): ReadableStream<Uint8Array> {
+      return new ReadableStream({
+        start(ctrl) {
+          ctrl.enqueue(errorBodyBytes);
+          ctrl.close();
+        },
+      });
+    }
+
+    scheduleBackgroundTasks(c.executionCtx, {
+      ...bgCommon,
+      accountingStream: !isAnon ? makeErrorStream() : null,
+      metricsStream: makeErrorStream(),
+      loggingStream: !isAnon ? makeErrorStream() : null,
+    });
+
+    // ── 402 → 503 conversion (non-BYOK) ───────────────────────────────────────
+    // Placed after scheduleBackgroundTasks so metrics/accounting/logging are
+    // emitted even for 402 responses, matching the reference implementation.
+    if (response.status === 402 && !userByok) {
+      captureException(new Error(`${provider.id} returned 402 Payment Required`), {
+        kiloUserId: user.id,
+        model: requestBody.model,
+        organizationId,
+      });
+      return c.json(
+        {
+          error: 'Service Unavailable',
+          message: 'The service is temporarily unavailable. Please try again later.',
+        },
+        503
+      );
+    }
+
+    // BYOK / context-length readable error — return a custom message instead of
+    // the raw upstream body.
+    const errorResponse = await makeErrorReadable({
+      requestedModel: resolvedModel,
+      request: requestBody,
+      response: new Response(errorBodyBytes, response),
+      isUserByok: !!userByok,
+    });
+    if (errorResponse) return errorResponse;
+
+    return wrapResponse(new Response(errorBodyBytes, response));
+  }
+
+  // ── Free model response rewrite ───────────────────────────────────────────────
+  const shouldRewrite =
+    provider.id !== 'custom' &&
+    (isKiloFreeModel(resolvedModel) ||
+      isActiveReviewPromo(botId, resolvedModel) ||
+      isActiveCloudAgentPromo(tokenSource, resolvedModel));
+
+  if (shouldRewrite) {
+    if (response.body) {
+      // Buffer chunks while forwarding to client (same pattern as the paid path
+      // below) so the metrics consumer can't stall the client via backpressure.
+      const responseBody = response.body;
+      const chunks: Uint8Array[] = [];
+      const { readable: clientStream, writable } = new TransformStream<Uint8Array, Uint8Array>();
+      const writer = writable.getWriter();
+
+      const pipePromise = (async () => {
+        const reader = responseBody.getReader() as ReadableStreamDefaultReader<Uint8Array>;
+        try {
+          for (;;) {
+            const result = await reader.read();
+            if (result.done) break;
+            chunks.push(result.value);
+            await writer.write(result.value);
+          }
+          await writer.close();
+        } catch (err) {
+          await reader.cancel().catch(() => {});
+          await writer.abort(err).catch(() => {});
+          throw err;
+        }
+      })();
+
+      function replayFreeStream(): ReadableStream<Uint8Array> {
+        return new ReadableStream({
+          start(controller) {
+            for (const chunk of chunks) controller.enqueue(chunk);
+            controller.close();
+          },
+        });
+      }
+
+      c.executionCtx.waitUntil(
+        pipePromise
+          .then(() => {
+            scheduleBackgroundTasks(c.executionCtx, {
+              ...bgCommon,
+              accountingStream: !isAnon ? replayFreeStream() : null,
+              metricsStream: replayFreeStream(),
+              loggingStream: !isAnon ? replayFreeStream() : null,
+            });
+          })
+          .catch(err => {
+            console.error('[proxy] Free model stream pipe error', err);
+          })
+      );
+      return rewriteFreeModelResponse(new Response(clientStream, response), resolvedModel);
+    }
+    return rewriteFreeModelResponse(response, resolvedModel);
+  }
+
+  // ── Pass-through with background tasks (buffer-based, no .tee()) ────────────
+  if (response.body) {
+    // Instead of .tee() (which couples consumer speeds via backpressure and stalls
+    // the client when background consumers are slow), pipe the upstream body through
+    // a TransformStream that forwards every chunk to the client immediately while
+    // accumulating a copy. After the stream completes, background tasks replay the
+    // buffered data without any coupling to client delivery speed.
+    const responseBody = response.body;
+    const chunks: Uint8Array[] = [];
+    const { readable: clientStream, writable } = new TransformStream<Uint8Array, Uint8Array>();
+    const writer = writable.getWriter();
+
+    const pipePromise = (async () => {
+      const reader = responseBody.getReader() as ReadableStreamDefaultReader<Uint8Array>;
+      try {
+        for (;;) {
+          const result = await reader.read();
+          if (result.done) break;
+          chunks.push(result.value);
+          await writer.write(result.value);
+        }
+        await writer.close();
+      } catch (err) {
+        await reader.cancel().catch(() => {});
+        await writer.abort(err).catch(() => {});
+        throw err;
+      }
+    })();
+
+    // Build a ReadableStream from the buffered chunks (usable after pipePromise resolves).
+    function replayStream(): ReadableStream<Uint8Array> {
+      return new ReadableStream({
+        start(controller) {
+          for (const chunk of chunks) controller.enqueue(chunk);
+          controller.close();
+        },
+      });
+    }
+
+    // Background tasks run after the stream completes (all chunks buffered).
+    c.executionCtx.waitUntil(
+      pipePromise
+        .then(() => {
+          scheduleBackgroundTasks(c.executionCtx, {
+            ...bgCommon,
+            accountingStream: !isAnon ? replayStream() : null,
+            metricsStream: replayStream(),
+            loggingStream: !isAnon ? replayStream() : null,
+          });
+        })
+        .catch(err => {
+          console.error('[proxy] Stream pipe error', err);
+        })
+    );
+
+    return wrapResponse(new Response(clientStream, response));
+  }
+
+  return wrapResponse(response);
+};
+
+// Re-export output headers helper for tests.
+export { getOutputHeaders };
diff --git a/llm-gateway/src/index.ts b/llm-gateway/src/index.ts
new file mode 100644
index 000000000..891371232
--- /dev/null
+++ b/llm-gateway/src/index.ts
@@ -0,0 +1,79 @@
+export { RateLimitDO } from './dos/RateLimitDO';
+import * as Sentry from '@sentry/cloudflare';
+import { SENTRY_DSN } from './lib/sentry';
+import { Hono } from 'hono';
+import { useWorkersLogger } from 'workers-tagged-logger';
+import type { HonoContext } from './types/hono';
+import { requestTimingMiddleware } from './middleware/request-timing';
+import { parseBodyMiddleware } from './middleware/parse-body';
+import { extractIpMiddleware } from './middleware/extract-ip';
+import { resolveAutoModelMiddleware } from './middleware/resolve-auto-model';
+import { authMiddleware } from './middleware/auth';
+import { anonymousGateMiddleware } from './middleware/anonymous-gate';
+import { freeModelRateLimitMiddleware } from './middleware/free-model-rate-limit';
+import { promotionLimitMiddleware } from './middleware/promotion-limit';
+import { logFreeModelUsageMiddleware } from './middleware/log-free-model-usage';
+import { providerResolutionMiddleware } from './middleware/provider-resolution';
+import { requestValidationMiddleware } from './middleware/request-validation';
+import { balanceAndOrgCheckMiddleware } from './middleware/balance-and-org';
+import { requestTransformMiddleware } from './middleware/request-transform';
+import { proxyHandler } from './handler/proxy';
+import { captureException } from './lib/sentry';
+
+const app = new Hono<HonoContext>();
+
+app.use('*', useWorkersLogger('llm-gateway') as Parameters<typeof app.use>[1]);
+
+function registerChatCompletions(path: string) {
+  app.post(
+    path,
+    requestTimingMiddleware,
+    parseBodyMiddleware,
+    extractIpMiddleware,
+    resolveAutoModelMiddleware,
+    freeModelRateLimitMiddleware,
+    authMiddleware,
+    anonymousGateMiddleware,
+    promotionLimitMiddleware,
+    logFreeModelUsageMiddleware,
+    providerResolutionMiddleware,
+    requestValidationMiddleware,
+    balanceAndOrgCheckMiddleware,
+    requestTransformMiddleware,
+    proxyHandler
+  );
+}
+
+// Match the Next.js routes exactly so clients need no URL reconfiguration
+registerChatCompletions('/api/gateway/chat/completions');
+registerChatCompletions('/api/openrouter/chat/completions');
+
+app.notFound(c => {
+  const path = new URL(c.req.url).pathname;
+  // The reference validates that [...path] is /chat/completions and returns
+  // invalidPathResponse() for anything else under /api/gateway or /api/openrouter.
+  if (path.startsWith('/api/gateway/') || path.startsWith('/api/openrouter/')) {
+    return c.json(
+      {
+        error: 'Invalid path',
+        message: 'This endpoint only accepts the path `/chat/completions`.',
+      },
+      400
+    );
+  }
+  return c.json({ error: 'Not found' }, 404);
+});
+
+app.onError((err, c) => {
+  console.error('[llm-gateway] Unhandled error', err);
+  captureException(err);
+  return c.json({ error: 'Internal server error' }, 500);
+});
+
+export default Sentry.withSentry(
+  (_env: Env) => ({
+    dsn: SENTRY_DSN,
+    sendDefaultPii: true,
+  }),
+  { fetch: app.fetch }
+);
diff --git a/llm-gateway/src/lib/abuse-service.ts b/llm-gateway/src/lib/abuse-service.ts
new file mode 100644
index 000000000..0d8a9b2fa
--- /dev/null
+++ b/llm-gateway/src/lib/abuse-service.ts
@@ -0,0 +1,279 @@
+// Abuse detection service client — port of src/lib/abuse-service.ts.
+// Communicates with the Kilo Abuse Detection Service via Cloudflare Access.
+
+import type { OpenRouterChatCompletionRequest } from '../types/request';
+import type { FraudDetectionHeaders } from './extract-headers';
+
+// ─── Public types (mirror the Next.js version for Phase 6 compatibility) ────
+
+export type Verdict = 'ALLOW' | 'CHALLENGE' | 'SOFT_BLOCK' | 'HARD_BLOCK';
+export type AbuseSignal =
+  | 'high_velocity'
+  | 'free_tier_exhausted'
+  | 'premium_harvester'
+  | 'suspicious_fingerprint'
+  | 'datacenter_ip'
+  | 'known_abuser';
+export type ChallengeType = 'turnstile' | 'payment_verification';
+export type ActionMetadata = {
+  challenge_type?: ChallengeType;
+  model_override?: string;
+  retry_after_seconds?: number;
+};
+export type ClassificationContext = {
+  identity_key: string;
+  current_spend_1h: number;
+  is_new_user: boolean;
+  requests_per_second: number;
+};
+export type AbuseClassificationResponse = {
+  verdict: Verdict;
+  risk_score: number;
+  signals: AbuseSignal[];
+  action_metadata: ActionMetadata;
+  context: ClassificationContext;
+  /** 0 indicates classification error */
+  request_id: number;
+};
+
+export type UsagePayload = {
+  id?: string;
+  kilo_user_id?: string | null;
+  organization_id?: string | null;
+  project_id?: string | null;
+  message_id?: string | null;
+  cost?: number | null;
+  cache_discount?: number | null;
+  input_tokens?: number | null;
+  output_tokens?: number | null;
+  cache_write_tokens?: number | null;
+  cache_hit_tokens?: number | null;
+  ip_address?: string | null;
+  geo_city?: string | null;
+  geo_country?: string | null;
+  geo_latitude?: number | null;
+  geo_longitude?: number | null;
+  ja4_digest?: string | null;
+  user_agent?: string | null;
+  provider?: string | null;
+  model?: string | null;
+  requested_model?: string | null;
+  inference_provider?: string | null;
+  user_prompt?: string | null;
+  system_prompt?: string | null;
+  max_tokens?: number | null;
+  has_middle_out_transform?: boolean | null;
+  has_tools?: boolean | null;
+  streamed?: boolean | null;
+  status_code?: number | null;
+  upstream_id?: string | null;
+  finish_reason?: string | null;
+  has_error?: boolean | null;
+  cancelled?: boolean | null;
+  created_at?: string | null;
+  latency?: number | null;
+  moderation_latency?: number | null;
+  generation_time?: number | null;
+  is_byok?: boolean | null;
+  is_user_byok?: boolean | null;
+  editor_name?: string | null;
+  abuse_classification?: number | null;
+};
+
+export type CostUpdateResponse = {
+  success: boolean;
+  identity_key?: string;
+  message_id?: string;
+  do_updated?: boolean;
+  error?: string;
+};
+
+// ─── Secrets bundle needed for CF Access auth ────────────────────────────────
+
+export type AbuseServiceSecrets = {
+  cfAccessClientId: string;
+  cfAccessClientSecret: string;
+};
+
+// ─── Internal helpers ────────────────────────────────────────────────────────
+
+type Message = { role: string; content?: string | Array<{ type?: string; text?: string }> };
+
+function extractMessageTextContent(m: Message): string {
+  if (typeof m.content === 'string') return m.content;
+  if (Array.isArray(m.content)) {
+    return m.content
+      .filter(c => c.type === 'text')
+      .map(c => c.text ?? '')
+      .join('\n');
+  }
+  return '';
+}
+
+function extractFullPrompts(body: OpenRouterChatCompletionRequest): {
+  systemPrompt: string | null;
+  userPrompt: string | null;
+} {
+  const messages = (body.messages as Message[]) ?? [];
+  const systemPrompt =
+    messages
+      .filter(m => m.role === 'system' || m.role === 'developer')
+      .map(extractMessageTextContent)
+      .join('\n') || null;
+  const userPrompt =
+    messages
+      .filter(m => m.role === 'user')
+      .map(extractMessageTextContent)
+      .at(-1) ?? null;
+  return { systemPrompt, userPrompt };
+}
+
+function buildAccessHeaders(secrets: AbuseServiceSecrets | undefined): Record<string, string> {
+  const headers: Record<string, string> = { 'Content-Type': 'application/json' };
+  if (secrets) {
+    headers['CF-Access-Client-Id'] = secrets.cfAccessClientId;
+    headers['CF-Access-Client-Secret'] = secrets.cfAccessClientSecret;
+  }
+  return headers;
+}
+
+// ─── Public API ───────────────────────────────────────────────────────────────
+
+export async function classifyRequest(
+  serviceUrl: string,
+  secrets: AbuseServiceSecrets | undefined,
+  payload: UsagePayload
+): Promise<AbuseClassificationResponse | null> {
+  if (!serviceUrl) return null;
+
+  try {
+    const response = await fetch(`${serviceUrl}/api/classify`, {
+      method: 'POST',
+      headers: buildAccessHeaders(secrets),
+      body: JSON.stringify(payload),
+    });
+    if (!response.ok) {
+      console.error(`Abuse service error (${response.status}): ${await response.text()}`);
+      return null;
+    }
+    return await response.json<AbuseClassificationResponse>();
+  } catch (err) {
+    console.error('Abuse classification failed:', err);
+    return null;
+  }
+}
+
+export type AbuseClassificationContext = {
+  kiloUserId?: string | null;
+  organizationId?: string | null;
+  projectId?: string | null;
+  provider?: string | null;
+  isByok?: boolean | null;
+};
+
+export async function classifyAbuse(
+  serviceUrl: string,
+  secrets: AbuseServiceSecrets | undefined,
+  fraudHeaders: FraudDetectionHeaders,
+  editorName: string | null,
+  body: OpenRouterChatCompletionRequest,
+  context?: AbuseClassificationContext
+): Promise<AbuseClassificationResponse | null> {
+  const { systemPrompt, userPrompt } = extractFullPrompts(body);
+  const payload: UsagePayload = {
+    kilo_user_id: context?.kiloUserId ?? null,
+    organization_id: context?.organizationId ?? null,
+    project_id: context?.projectId ?? null,
+    ip_address: fraudHeaders.http_x_forwarded_for,
+    geo_city: fraudHeaders.geo_city,
+    geo_country: fraudHeaders.geo_country,
+    geo_latitude: fraudHeaders.geo_latitude,
+    geo_longitude: fraudHeaders.geo_longitude,
+    ja4_digest: fraudHeaders.ja3_hash,
+    user_agent: fraudHeaders.http_user_agent,
+    provider: context?.provider ?? null,
+    requested_model: body.model?.toLowerCase() ?? null,
+    user_prompt: userPrompt,
+    system_prompt: systemPrompt,
+    max_tokens: body.max_tokens ?? null,
+    has_middle_out_transform: body.transforms?.includes('middle-out') ?? false,
+    has_tools: (body.tools?.length ?? 0) > 0,
+    streamed: body.stream === true,
+    is_user_byok: context?.isByok ?? null,
+    editor_name: editorName,
+  };
+  return classifyRequest(serviceUrl, secrets, payload);
+}
+
+type CostUpdatePayload = {
+  kilo_user_id?: string | null;
+  ip_address?: string | null;
+  ja4_digest?: string | null;
+  user_agent?: string | null;
+  request_id: number;
+  message_id: string;
+  cost: number;
+  requested_model?: string | null;
+  input_tokens?: number | null;
+  output_tokens?: number | null;
+  cache_write_tokens?: number | null;
+  cache_hit_tokens?: number | null;
+};
+
+export async function reportCost(
+  serviceUrl: string,
+  secrets: AbuseServiceSecrets | undefined,
+  payload: CostUpdatePayload
+): Promise<CostUpdateResponse | null> {
+  if (!serviceUrl) return null;
+  try {
+    const response = await fetch(`${serviceUrl}/api/usage/cost`, {
+      method: 'POST',
+      headers: buildAccessHeaders(secrets),
+      body: JSON.stringify(payload),
+    });
+    if (!response.ok) {
+      console.error(`[Abuse] Cost update failed (${response.status}): ${await response.text()}`);
+      return null;
+    }
+    return await response.json<CostUpdateResponse>();
+  } catch (err) {
+    console.error('[Abuse] Failed to report cost:', err);
+    return null;
+  }
+}
+
+export async function reportAbuseCost(
+  serviceUrl: string,
+  secrets: AbuseServiceSecrets | undefined,
+  usageContext: {
+    kiloUserId: string;
+    fraudHeaders: FraudDetectionHeaders;
+    requested_model: string;
+    abuse_request_id?: number;
+  },
+  usageStats: {
+    messageId: string | null;
+    cost_mUsd: number;
+    inputTokens: number;
+    outputTokens: number;
+    cacheWriteTokens: number;
+    cacheHitTokens: number;
+  }
+): Promise<CostUpdateResponse | null> {
+  if (!usageContext.abuse_request_id || !usageStats.messageId) return null;
+  return reportCost(serviceUrl, secrets, {
+    kilo_user_id: usageContext.kiloUserId,
+    ip_address: usageContext.fraudHeaders.http_x_forwarded_for,
+    ja4_digest: usageContext.fraudHeaders.ja3_hash,
+    user_agent: usageContext.fraudHeaders.http_user_agent,
+    request_id: usageContext.abuse_request_id,
+    message_id: usageStats.messageId,
+    cost: usageStats.cost_mUsd,
+    requested_model: usageContext.requested_model,
+    input_tokens: usageStats.inputTokens,
+    output_tokens: usageStats.outputTokens,
+    cache_write_tokens: usageStats.cacheWriteTokens,
+    cache_hit_tokens: usageStats.cacheHitTokens,
+  });
+}
diff --git a/llm-gateway/src/lib/anonymous.ts b/llm-gateway/src/lib/anonymous.ts
new file mode 100644
index 000000000..9cce472eb
--- /dev/null
+++ b/llm-gateway/src/lib/anonymous.ts
@@ -0,0 +1,26 @@
+// Port of src/lib/anonymous/anonymous-user.ts + ip-rate-limiter.ts
+
+export type AnonymousUserContext = {
+  isAnonymous: true;
+  ipAddress: string;
+  // Synthetic user-like properties for compatibility with the rest of the chain.
+  id: string; // 'anon:{ipAddress}'
+  microdollars_used: number;
+  is_admin: false;
+};
+
+export function createAnonymousContext(ipAddress: string): AnonymousUserContext {
+  return {
+    isAnonymous: true,
+    ipAddress,
+    id: `anon:${ipAddress}`,
+    microdollars_used: 0,
+    is_admin: false,
+  };
+}
+
+export function isAnonymousContext(user: unknown): user is AnonymousUserContext {
+  return (
+    typeof user === 'object' && user !== null && 'isAnonymous' in user && user.isAnonymous === true
+  );
+}
diff --git a/llm-gateway/src/lib/byok.ts b/llm-gateway/src/lib/byok.ts
new file mode 100644
index 000000000..7a8e2f9c6
--- /dev/null
+++ b/llm-gateway/src/lib/byok.ts
@@ -0,0 +1,194 @@
+// BYOK (Bring Your Own Key) utilities.
+// Ported from src/lib/byok/index.ts + src/lib/byok/encryption.ts.
+// Uses Web Crypto (crypto.subtle) instead of Node.js createDecipheriv.
+
+import type { WorkerDb } from '@kilocode/db/client';
+import { byok_api_keys, modelsByProvider } from '@kilocode/db/schema';
+import { and, eq, inArray, desc } from 'drizzle-orm';
+import * as z from 'zod';
+
+// --- Types ---
+
+type EncryptedData = {
+  iv: string;
+  data: string;
+  authTag: string;
+};
+
+export const VercelUserByokInferenceProviderIdSchema = z.enum([
+  'anthropic',
+  'bedrock',
+  'google',
+  'openai',
+  'minimax',
+  'mistral',
+  'xai',
+  'zai',
+]);
+
+export const AutocompleteUserByokProviderIdSchema = z.enum(['codestral']);
+
+export const UserByokProviderIdSchema = VercelUserByokInferenceProviderIdSchema.or(
+  AutocompleteUserByokProviderIdSchema
+);
+
+export type UserByokProviderId = z.infer<typeof UserByokProviderIdSchema>;
+export type VercelUserByokInferenceProviderId = z.infer<
+  typeof VercelUserByokInferenceProviderIdSchema
+>;
+
+export type BYOKResult = {
+  decryptedAPIKey: string;
+  providerId: UserByokProviderId;
+};
+
+// --- Web Crypto AES-256-GCM decryption ---
+
+async function decryptApiKey(encrypted: EncryptedData, keyBase64: string): Promise<string> {
+  const keyBytes = Uint8Array.from(atob(keyBase64), c => c.charCodeAt(0));
+  const ivBytes = Uint8Array.from(atob(encrypted.iv), c => c.charCodeAt(0));
+  const cipherBytes = Uint8Array.from(atob(encrypted.data), c => c.charCodeAt(0));
+  const tagBytes = Uint8Array.from(atob(encrypted.authTag), c => c.charCodeAt(0));
+
+  // Web Crypto expects ciphertext + auth tag concatenated
+  const cipherWithTag = new Uint8Array(cipherBytes.length + tagBytes.length);
+  cipherWithTag.set(cipherBytes);
+  cipherWithTag.set(tagBytes, cipherBytes.length);
+
+  const cryptoKey = await crypto.subtle.importKey('raw', keyBytes, 'AES-GCM', false, ['decrypt']);
+
+  const decrypted = await crypto.subtle.decrypt(
+    { name: 'AES-GCM', iv: ivBytes, tagLength: 128 },
+    cryptoKey,
+    cipherWithTag
+  );
+
+  return new TextDecoder().decode(decrypted);
+}
+
+function isCodestralModel(model: string): boolean {
+  return model.startsWith('mistralai/codestral');
+}
+
+// --- Provider lookups ---
+
+type StoredModelEndpoint = { tag: string };
+type StoredModel = { endpoints: StoredModelEndpoint[] };
+
+export async function getModelUserByokProviders(
+  db: WorkerDb,
+  model: string
+): Promise<UserByokProviderId[]> {
+  if (isCodestralModel(model)) return ['codestral'];
+
+  const row = await db
+    .select({ vercel: modelsByProvider.vercel })
+    .from(modelsByProvider)
+    .orderBy(desc(modelsByProvider.id))
+    .limit(1);
+
+  const vercelMeta = row[0]?.vercel;
+  if (!vercelMeta) return [];
+
+  const vercelModelKey = mapModelIdToVercel(model);
+  const endpoints =
+    (vercelMeta as Record<string, StoredModel | undefined>)[vercelModelKey]?.endpoints ?? [];
+
+  return endpoints
+    .map(ep => UserByokProviderIdSchema.safeParse(ep.tag).data)
+    .filter((id): id is UserByokProviderId => id !== undefined);
+}
+
+// Model-id → Vercel key mapping (mirrors src/lib/providers/vercel/mapModelIdToVercel.ts)
+const vercelModelIdMapping: Record<string, string | undefined> = {
+  'arcee-ai/trinity-large-preview:free': 'arcee-ai/trinity-large-preview',
+  'mistralai/codestral-2508': 'mistral/codestral',
+  'mistralai/devstral-2512': 'mistral/devstral-2',
+};
+
+const modelPrefixToVercelProvider: Record<string, string | undefined> = {
+  anthropic: 'anthropic',
+  google: 'google',
+  openai: 'openai',
+  minimax: 'minimax',
+  mistralai: 'mistral',
+  // qwen → alibaba (no BYOK for alibaba)
+  'x-ai': 'xai',
+  'z-ai': 'zai',
+};
+
+function mapModelIdToVercel(modelId: string): string {
+  const hardcoded = vercelModelIdMapping[modelId];
+  if (hardcoded) return hardcoded;
+
+  const slashIndex = modelId.indexOf('/');
+  if (slashIndex < 0) return modelId;
+
+  const prefix = modelId.slice(0, slashIndex);
+  const rest = modelId.slice(slashIndex);
+  const vercelProvider =
+    prefix === 'openai' && modelId.startsWith('openai/gpt-oss')
+      ? undefined
+      : modelPrefixToVercelProvider[prefix];
+  return vercelProvider ? vercelProvider + rest : modelId;
+}
+
+async function decryptRow(
+  row: { encrypted_api_key: EncryptedData; provider_id: string },
+  encryptionKey: string
+): Promise<BYOKResult> {
+  return {
+    decryptedAPIKey: await decryptApiKey(row.encrypted_api_key, encryptionKey),
+    providerId: UserByokProviderIdSchema.parse(row.provider_id),
+  };
+}
+
+export async function getBYOKforUser(
+  db: WorkerDb,
+  userId: string,
+  providerIds: UserByokProviderId[],
+  encryptionKey: string
+): Promise<BYOKResult[] | null> {
+  const rows = await db
+    .select({
+      encrypted_api_key: byok_api_keys.encrypted_api_key,
+      provider_id: byok_api_keys.provider_id,
+    })
+    .from(byok_api_keys)
+    .where(
+      and(
+        eq(byok_api_keys.kilo_user_id, userId),
+        eq(byok_api_keys.is_enabled, true),
+        inArray(byok_api_keys.provider_id, providerIds)
+      )
+    )
+    .orderBy(byok_api_keys.created_at);
+
+  if (rows.length === 0) return null;
+  return Promise.all(rows.map(row => decryptRow(row, encryptionKey)));
+}
+
+export async function getBYOKforOrganization(
+  db: WorkerDb,
+  organizationId: string,
+  providerIds: UserByokProviderId[],
+  encryptionKey: string
+): Promise<BYOKResult[] | null> {
+  const rows = await db
+    .select({
+      encrypted_api_key: byok_api_keys.encrypted_api_key,
+      provider_id: byok_api_keys.provider_id,
+    })
+    .from(byok_api_keys)
+    .where(
+      and(
+        eq(byok_api_keys.organization_id, organizationId),
+        eq(byok_api_keys.is_enabled, true),
+        inArray(byok_api_keys.provider_id, providerIds)
+      )
+    )
+    .orderBy(byok_api_keys.created_at);
+
+  if (rows.length === 0) return null;
+  return Promise.all(rows.map(row => decryptRow(row, encryptionKey)));
+}
diff --git a/llm-gateway/src/lib/custom-llm/format.ts b/llm-gateway/src/lib/custom-llm/format.ts
new file mode 100644
index 000000000..3fac8c5d5
--- /dev/null
+++ b/llm-gateway/src/lib/custom-llm/format.ts
@@ -0,0 +1,13 @@
+// Port of src/lib/custom-llm/format.ts
+
+export enum ReasoningFormat {
+  Unknown = 'unknown',
+  OpenAIResponsesV1 = 'openai-responses-v1',
+  XAIResponsesV1 = 'xai-responses-v1',
+  AnthropicClaudeV1 = 'anthropic-claude-v1',
+  GoogleGeminiV1 = 'google-gemini-v1',
+  // Prevents the extension from stripping ids
+  OpenAIResponsesV1_Obscured = 'openai-responses-v1-obscured',
+}
+
+export const DEFAULT_REASONING_FORMAT = ReasoningFormat.AnthropicClaudeV1;
diff --git a/llm-gateway/src/lib/custom-llm/index.ts b/llm-gateway/src/lib/custom-llm/index.ts
new file mode 100644
index 000000000..373109c57
--- /dev/null
+++ b/llm-gateway/src/lib/custom-llm/index.ts
@@ -0,0 +1,898 @@
+// Custom LLM request handler — port of src/lib/custom-llm/customLlmRequest.ts.
+// Uses Vercel AI SDK for Anthropic and OpenAI-compatible endpoints.
+// Adapted for Cloudflare Workers: no Node.js crypto, no global DB, no Next.js.
+
+import type { OpenRouterChatCompletionRequest } from '../../types/request';
+import { createAnthropic } from '@ai-sdk/anthropic';
+import type { AnthropicProviderOptions } from '@ai-sdk/anthropic';
+import {
+  APICallError,
+  generateText,
+  jsonSchema,
+  streamText,
+  type ModelMessage,
+  type TextStreamPart,
+  type ToolChoice,
+  type ToolSet,
+} from 'ai';
+import type { CustomLlm } from '@kilocode/db/schema';
+import type { OpenAILanguageModelResponsesOptions } from '@ai-sdk/openai';
+import { createOpenAI } from '@ai-sdk/openai';
+import { ReasoningDetailType } from './reasoning-details';
+import type { ReasoningDetailUnion } from './reasoning-details';
+import {
+  reasoningDetailsToAiSdkParts,
+  reasoningOutputToDetails,
+  extractSignature,
+  extractEncryptedData,
+  extractItemId,
+  extractFormat,
+  type AiSdkReasoningPart,
+} from './reasoning-provider-metadata';
+import { ReasoningFormat } from './format';
+import type { WorkerDb } from '@kilocode/db/client';
+import { temp_phase } from '@kilocode/db/schema';
+import { inArray } from 'drizzle-orm';
+import { VerbositySchema, ReasoningEffortSchema } from '@kilocode/db/schema-types';
+
+// ─── Types ───────────────────────────────────────────────────────────────────
+
+type OpenRouterCacheControl = { type: 'ephemeral' };
+
+type ChatCompletionContentPartText = {
+  type: 'text';
+  text: string;
+  reasoning?: string | null;
+  cache_control?: OpenRouterCacheControl;
+};
+type ChatCompletionContentPartImage = {
+  type: 'image_url';
+  image_url: { url: string };
+  cache_control?: OpenRouterCacheControl;
+};
+type ChatCompletionContentPartFile = {
+  type: 'file';
+  file: { filename?: string; file_data?: string; file_id?: string };
+  cache_control?: OpenRouterCacheControl;
+};
+type ChatCompletionContentPartInputAudio = {
+  type: 'input_audio';
+  input_audio: { data: string; format: string };
+  cache_control?: OpenRouterCacheControl;
+};
+
+type ChatCompletionContentPart =
+  | ChatCompletionContentPartText
+  | ChatCompletionContentPartImage
+  | ChatCompletionContentPartFile
+  | ChatCompletionContentPartInputAudio;
+
+type ChatCompletionToolMessageParam = {
+  role: 'tool';
+  tool_call_id: string;
+  content: string | Array<ChatCompletionContentPart>;
+};
+
+type ChatCompletionAssistantMessageParam = {
+  role: 'assistant';
+  content?: string;
+  reasoning?: string;
+  reasoning_details?: ReasoningDetailUnion[];
+  tool_calls?: Array<{
+    id: string;
+    type: 'function';
+    function: { name: string; arguments: string };
+  }>;
+};
+
+type ChatCompletionSystemMessageParam = {
+  role: 'system';
+  content: string | Array<ChatCompletionContentPartText>;
+};
+
+type ChatCompletionUserMessageParam = {
+  role: 'user';
+  content: string | Array<ChatCompletionContentPart>;
+  cache_control?: OpenRouterCacheControl;
+};
+
+type ChatCompletionMessageParam =
+  | ChatCompletionSystemMessageParam
+  | ChatCompletionUserMessageParam
+  | ChatCompletionAssistantMessageParam
+  | ChatCompletionToolMessageParam;
+
+type OpenRouterChatCompletionsInput = Array<ChatCompletionMessageParam>;
+
+type ChatCompletionChunkChoice = {
+  delta?: {
+    content?: string | null;
+    reasoning?: string;
+    reasoning_details?: ReasoningDetailUnion[];
+    tool_calls?: Array<{
+      index: number;
+      id?: string;
+      type?: 'function';
+      function?: { name?: string; arguments?: string };
+    }>;
+    role?: string | null;
+    [key: string]: unknown;
+  };
+  finish_reason?: string | null;
+  [key: string]: unknown;
+};
+
+type ChatCompletionChunk = {
+  id?: string;
+  model: string;
+  choices: ChatCompletionChunkChoice[];
+  usage?: {
+    prompt_tokens: number;
+    completion_tokens: number;
+    total_tokens: number;
+    prompt_tokens_details?: { cached_tokens: number; cache_write_tokens?: number };
+    completion_tokens_details?: { reasoning_tokens: number };
+  };
+  [key: string]: unknown;
+};
+
+// ─── Crypto helpers ───────────────────────────────────────────────────────────
+
+async function sha256Hex(input: string): Promise<string> {
+  const enc = new TextEncoder();
+  const buf = await crypto.subtle.digest('SHA-256', enc.encode(input));
+  return Array.from(new Uint8Array(buf))
+    .map(b => b.toString(16).padStart(2, '0'))
+    .join('');
+}
+
+async function phaseKey(
+  userId: string,
+  taskId: string | undefined,
+  content: string[]
+): Promise<string> {
+  return sha256Hex([userId, taskId, ...content].join('|'));
+}
+
+// ─── Message conversion ───────────────────────────────────────────────────────
+
+function extractMessageTextParts(content: unknown): string[] {
+  if (typeof content === 'string') return [content];
+  if (!Array.isArray(content)) return [];
+  return (content as Array<Record<string, unknown>>)
+    .filter(
+      (part): part is { type: string; text: string } =>
+        part !== null &&
+        typeof part === 'object' &&
+        (part['type'] === 'input_text' || part['type'] === 'output_text') &&
+        typeof part['text'] === 'string'
+    )
+    .map(p => p.text);
+}
+
+type ToolOutputContentPart =
+  | { type: 'text'; text: string }
+  | { type: 'media'; data: string; mediaType: string };
+
+function parseDataUrl(url: string): { data: string; mediaType: string } | null {
+  const match = url.match(/^data:([^;]+);base64,(.+)$/);
+  if (match) return { mediaType: match[1], data: match[2] };
+  return null;
+}
+
+const AUDIO_MEDIA_TYPES: Record<string, string> = {
+  wav: 'audio/wav',
+  mp3: 'audio/mpeg',
+  aiff: 'audio/aiff',
+  aac: 'audio/aac',
+  ogg: 'audio/ogg',
+  flac: 'audio/flac',
+  m4a: 'audio/mp4',
+  pcm16: 'audio/pcm',
+  pcm24: 'audio/pcm',
+};
+
+function audioFormatToMediaType(format: string): string {
+  return AUDIO_MEDIA_TYPES[format] ?? 'application/octet-stream';
+}
+
+function convertToolOutputPart(part: ChatCompletionContentPart): ToolOutputContentPart {
+  switch (part.type) {
+    case 'text':
+      return { type: 'text', text: part.text };
+    case 'image_url': {
+      const parsed = parseDataUrl(part.image_url.url);
+      if (parsed) return { type: 'media', data: parsed.data, mediaType: parsed.mediaType };
+      return { type: 'text', text: part.image_url.url };
+    }
+    case 'file': {
+      const parsed = part.file.file_data ? parseDataUrl(part.file.file_data) : null;
+      if (parsed) return { type: 'media', data: parsed.data, mediaType: parsed.mediaType };
+      return { type: 'text', text: part.file.file_data ?? '' };
+    }
+    case 'input_audio':
+      return {
+        type: 'media',
+        data: part.input_audio.data,
+        mediaType: audioFormatToMediaType(part.input_audio.format),
+      };
+  }
+}
+
+function convertToolOutput(content: string | Array<ChatCompletionContentPart>) {
+  if (typeof content === 'string') return { type: 'text' as const, value: content };
+  const parts: ToolOutputContentPart[] = content.map(convertToolOutputPart);
+  return { type: 'content' as const, value: parts };
+}
+
+function convertUserContentPart(part: ChatCompletionContentPart) {
+  const providerOptions = part.cache_control
+    ? { anthropic: { cacheControl: part.cache_control } }
+    : undefined;
+  switch (part.type) {
+    case 'text':
+      return {
+        type: 'text' as const,
+        text: part.text,
+        ...(providerOptions && { providerOptions }),
+      };
+    case 'image_url':
+      return {
+        type: 'image' as const,
+        image: new URL(part.image_url.url),
+        ...(providerOptions && { providerOptions }),
+      };
+    case 'file':
+      return {
+        type: 'file' as const,
+        data: part.file.file_data ?? '',
+        filename: part.file.filename,
+        mediaType: parseDataUrl(part.file.file_data ?? '')?.mediaType ?? 'application/octet-stream',
+        ...(providerOptions && { providerOptions }),
+      };
+    case 'input_audio':
+      return {
+        type: 'file' as const,
+        data: part.input_audio.data,
+        mediaType: audioFormatToMediaType(part.input_audio.format),
+        ...(providerOptions && { providerOptions }),
+      };
+  }
+}
+
+type AssistantContentPart =
+  | { type: 'text'; text: string }
+  | AiSdkReasoningPart
+  | { type: 'tool-call'; toolCallId: string; toolName: string; input: unknown };
+
+function convertAssistantContent(
+  msg: ChatCompletionAssistantMessageParam
+): string | AssistantContentPart[] {
+  const parts: AssistantContentPart[] = [];
+
+  if (msg.reasoning_details && msg.reasoning_details.length > 0) {
+    for (const p of reasoningDetailsToAiSdkParts(msg.reasoning_details)) parts.push(p);
+  } else if (msg.reasoning) {
+    parts.push({ type: 'reasoning', text: msg.reasoning });
+  }
+
+  if (msg.content) parts.push({ type: 'text', text: msg.content });
+
+  if (msg.tool_calls) {
+    for (const tc of msg.tool_calls) {
+      parts.push({
+        type: 'tool-call',
+        toolCallId: tc.id,
+        toolName: tc.function.name,
+        input: JSON.parse(tc.function.arguments),
+      });
+    }
+  }
+
+  if (parts.length === 1 && parts[0].type === 'text') return parts[0].text;
+  return parts.length > 0 ? parts : '';
+}
+
+function convertMessages(messages: OpenRouterChatCompletionsInput): ModelMessage[] {
+  const toolNameByCallId = new Map<string, string>();
+  for (const msg of messages) {
+    if (msg.role === 'assistant' && msg.tool_calls) {
+      for (const tc of msg.tool_calls) toolNameByCallId.set(tc.id, tc.function.name);
+    }
+  }
+
+  return messages.map((msg): ModelMessage => {
+    switch (msg.role) {
+      case 'system':
+        return {
+          role: 'system',
+          content:
+            typeof msg.content === 'string' ? msg.content : msg.content.map(p => p.text).join(''),
+          providerOptions: { anthropic: { cacheControl: { type: 'ephemeral' } } },
+        };
+      case 'user': {
+        const content =
+          typeof msg.content === 'string' ? msg.content : msg.content.map(convertUserContentPart);
+        return {
+          role: 'user',
+          content,
+          ...(msg.cache_control && {
+            providerOptions: { anthropic: { cacheControl: msg.cache_control } },
+          }),
+        };
+      }
+      case 'assistant':
+        return { role: 'assistant', content: convertAssistantContent(msg) };
+      case 'tool':
+        return {
+          role: 'tool',
+          content: [
+            {
+              type: 'tool-result',
+              toolCallId: msg.tool_call_id,
+              toolName: toolNameByCallId.get(msg.tool_call_id) ?? '',
+              output: convertToolOutput(msg.content),
+            },
+          ],
+        };
+    }
+  });
+}
+
+// ─── Tool conversion ───────────────────────────────────────────────────────────
+
+function convertTools(tools: OpenRouterChatCompletionRequest['tools']): ToolSet | undefined {
+  if (!tools || tools.length === 0) return undefined;
+  const result: ToolSet = {};
+  const toolsArr = tools as Array<{
+    type: string;
+    function: { name: string; description?: string; parameters?: unknown; strict?: boolean };
+  }>;
+  for (const t of toolsArr) {
+    if (t.type !== 'function') continue;
+    result[t.function.name] = {
+      description: t.function.description,
+      strict: t.function.strict ?? undefined,
+      inputSchema: jsonSchema(
+        (t.function.parameters as Record<string, unknown>) ?? { type: 'object' }
+      ),
+    };
+  }
+  return result;
+}
+
+function convertToolChoice(
+  toolChoice: OpenRouterChatCompletionRequest['tool_choice']
+): ToolChoice<ToolSet> | undefined {
+  if (toolChoice === undefined || toolChoice === null) return undefined;
+  if (toolChoice === 'none' || toolChoice === 'auto' || toolChoice === 'required')
+    return toolChoice as ToolChoice<ToolSet>;
+  if (typeof toolChoice === 'object' && 'type' in toolChoice && toolChoice.type === 'function') {
+    const tc = toolChoice as { type: 'function'; function: { name: string } };
+    return { type: 'tool', toolName: tc.function.name };
+  }
+  return undefined;
+}
+
+// ─── Common params builder ─────────────────────────────────────────────────────
+
+function buildCommonParams(
+  customLlm: CustomLlm,
+  messages: ModelMessage[],
+  request: OpenRouterChatCompletionRequest,
+  isLegacyExtension: boolean
+) {
+  const verbosity = VerbositySchema.safeParse(request.verbosity ?? customLlm.verbosity).data;
+  const reasoningEffort = ReasoningEffortSchema.safeParse(
+    request.reasoning?.effort ?? customLlm.reasoning_effort
+  ).data;
+  return {
+    messages,
+    tools: convertTools(request.tools),
+    toolChoice: convertToolChoice(request.tool_choice),
+    maxOutputTokens:
+      (request['max_completion_tokens'] as number | undefined) ?? request.max_tokens ?? undefined,
+    temperature: (request.temperature as number | undefined) ?? undefined,
+    headers: { 'anthropic-beta': 'context-1m-2025-08-07' },
+    providerOptions: {
+      anthropic: {
+        thinking: { type: 'adaptive' },
+        effort: verbosity,
+        disableParallelToolUse:
+          (request['parallel_tool_calls'] as boolean | undefined) === false || isLegacyExtension,
+      } satisfies AnthropicProviderOptions,
+      openai: {
+        forceReasoning: (reasoningEffort !== 'none' && customLlm.force_reasoning) || undefined,
+        reasoningSummary: 'auto',
+        textVerbosity: verbosity === 'max' ? 'high' : verbosity,
+        reasoningEffort,
+        include: ['reasoning.encrypted_content'],
+        parallelToolCalls:
+          ((request['parallel_tool_calls'] as boolean | undefined) ?? true) && !isLegacyExtension,
+        store: false,
+        promptCacheKey: request.prompt_cache_key,
+        safetyIdentifier: request.safety_identifier,
+        user: request.user,
+      } satisfies OpenAILanguageModelResponsesOptions,
+    },
+  };
+}
+
+// ─── Non-streaming response converter ────────────────────────────────────────
+
+function convertGenerateResultToResponse(
+  result: Awaited<ReturnType<typeof generateText>>,
+  model: string
+) {
+  const toolCalls = result.toolCalls.map((tc, i) => ({
+    id: tc.toolCallId,
+    type: 'function' as const,
+    index: i,
+    function: { name: tc.toolName, arguments: JSON.stringify(tc.input) },
+  }));
+
+  const reasoning_details =
+    result.reasoning.length > 0 ? reasoningOutputToDetails(result.reasoning) : undefined;
+
+  return {
+    id: result.response.id,
+    model,
+    choices: [
+      {
+        message: {
+          role: 'assistant' as const,
+          content: result.text || null,
+          ...(result.reasoningText ? { reasoning: result.reasoningText } : {}),
+          ...(reasoning_details ? { reasoning_details } : {}),
+          ...(toolCalls.length > 0 ? { tool_calls: toolCalls } : {}),
+        },
+        finish_reason: FINISH_REASON_MAP[result.finishReason] ?? 'stop',
+        index: 0,
+      },
+    ],
+    usage: {
+      prompt_tokens: result.usage.inputTokens ?? 0,
+      completion_tokens: result.usage.outputTokens ?? 0,
+      total_tokens: result.usage.totalTokens ?? 0,
+      ...(result.usage.inputTokenDetails.cacheReadTokens != null ||
+      result.usage.inputTokenDetails.cacheWriteTokens != null
+        ? {
+            prompt_tokens_details: {
+              cached_tokens: result.usage.inputTokenDetails.cacheReadTokens ?? 0,
+              ...(result.usage.inputTokenDetails.cacheWriteTokens != null && {
+                cache_write_tokens: result.usage.inputTokenDetails.cacheWriteTokens,
+              }),
+            },
+          }
+        : {}),
+      ...(result.usage.outputTokenDetails.reasoningTokens != null
+        ? {
+            completion_tokens_details: {
+              reasoning_tokens: result.usage.outputTokenDetails.reasoningTokens,
+            },
+          }
+        : {}),
+    },
+  };
+}
+
+// ─── Streaming chunk converter ────────────────────────────────────────────────
+
+const FINISH_REASON_MAP: Record<string, string> = {
+  stop: 'stop',
+  length: 'length',
+  'content-filter': 'content_filter',
+  'tool-calls': 'tool_calls',
+  error: 'error',
+  other: 'stop',
+};
+
+function createStreamPartConverter(
+  userId: string,
+  taskId: string | undefined,
+  model: string,
+  db: WorkerDb | null
+) {
+  const toolCallIndices = new Map<string, number>();
+  let nextToolIndex = 0;
+  let nextReasoningIndex = 0;
+  let currentTextBlockIndex: number | null = null;
+  let inReasoningBlock = false;
+  let responseId: string | undefined;
+
+  return async function convertStreamPartToChunk(
+    part: TextStreamPart<ToolSet>
+  ): Promise<ChatCompletionChunk | null> {
+    const id = responseId;
+    switch (part.type) {
+      case 'raw': {
+        // Handle phase metadata insertion for OpenAI responses
+        if (db) {
+          type ResponseItemDone = {
+            type: string;
+            item?: {
+              type?: string;
+              phase?: string;
+              content?: Array<{ type: string; text?: string }>;
+            };
+          };
+          const event = part.rawValue as ResponseItemDone;
+          if (event.type === 'response.output_item.done' && event.item) {
+            const item = event.item;
+            const phase = typeof item.phase === 'string' ? item.phase : null;
+            if (item.type === 'message' && phase && Array.isArray(item.content)) {
+              const key = await phaseKey(
+                userId,
+                taskId,
+                item.content.filter(c => c.type === 'output_text').map(c => c.text ?? '')
+              );
+              await db.insert(temp_phase).values({ key, value: phase }).onConflictDoNothing();
+            }
+          }
+        }
+        return null;
+      }
+
+      case 'text-delta':
+        return {
+          ...(id !== undefined ? { id } : {}),
+          model,
+          choices: [{ delta: { content: part.text } }],
+        };
+
+      case 'reasoning-start': {
+        const encData = extractEncryptedData(part.providerMetadata);
+        if (encData) {
+          const itemId = extractItemId(part.providerMetadata);
+          const format = extractFormat(part.providerMetadata);
+          const index = nextReasoningIndex++;
+          return {
+            ...(id !== undefined ? { id } : {}),
+            model,
+            choices: [
+              {
+                delta: {
+                  reasoning_details: [
+                    {
+                      type: ReasoningDetailType.Encrypted,
+                      data: encData,
+                      index,
+                      ...(itemId ? { id: itemId } : {}),
+                      ...(format ? { format } : {}),
+                    },
+                  ],
+                },
+              },
+            ],
+          };
+        }
+        inReasoningBlock = true;
+        return null;
+      }
+
+      case 'reasoning-delta': {
+        const details: ReasoningDetailUnion[] = [];
+        const signature = extractSignature(part.providerMetadata);
+        const format = extractFormat(part.providerMetadata);
+
+        if (part.text) {
+          if (inReasoningBlock) {
+            currentTextBlockIndex = nextReasoningIndex++;
+            inReasoningBlock = false;
+          }
+          const itemId = extractItemId(part.providerMetadata);
+          details.push({
+            type: ReasoningDetailType.Text,
+            text: part.text,
+            index: currentTextBlockIndex ?? 0,
+            ...(signature ? { signature } : {}),
+            ...(itemId ? { id: itemId } : {}),
+            ...(format ? { format } : {}),
+          });
+        } else if (signature) {
+          details.push({
+            type: ReasoningDetailType.Text,
+            text: '',
+            signature,
+            index: currentTextBlockIndex ?? 0,
+            ...(format ? { format } : {}),
+          });
+        }
+
+        if (details.length === 0) return null;
+        return {
+          ...(id !== undefined ? { id } : {}),
+          model,
+          choices: [{ delta: { reasoning: part.text || '', reasoning_details: details } }],
+        };
+      }
+
+      case 'reasoning-end': {
+        const encData = extractEncryptedData(part.providerMetadata);
+        const signature = extractSignature(part.providerMetadata);
+        if (!encData && !signature) return null;
+
+        const details: ReasoningDetailUnion[] = [];
+        const itemId = extractItemId(part.providerMetadata);
+        const format = extractFormat(part.providerMetadata);
+
+        if (encData) {
+          details.push({
+            type: ReasoningDetailType.Encrypted,
+            data: encData,
+            index: nextReasoningIndex++,
+            ...(itemId ? { id: itemId } : {}),
+            ...(format ? { format } : {}),
+          });
+        }
+        if (signature) {
+          details.push({
+            type: ReasoningDetailType.Text,
+            text: '',
+            signature,
+            index: currentTextBlockIndex ?? 0,
+            ...(itemId ? { id: itemId } : {}),
+            ...(format ? { format } : {}),
+          });
+        }
+        return {
+          ...(id !== undefined ? { id } : {}),
+          model,
+          choices: [{ delta: { reasoning_details: details } }],
+        };
+      }
+
+      case 'tool-input-start': {
+        const index = nextToolIndex++;
+        toolCallIndices.set(part.id, index);
+        return {
+          ...(id !== undefined ? { id } : {}),
+          model,
+          choices: [
+            {
+              delta: {
+                tool_calls: [
+                  {
+                    index,
+                    id: part.id,
+                    type: 'function' as const,
+                    function: { name: part.toolName },
+                  },
+                ],
+              },
+            },
+          ],
+        };
+      }
+
+      case 'tool-input-delta': {
+        const index = toolCallIndices.get(part.id) ?? 0;
+        return {
+          ...(id !== undefined ? { id } : {}),
+          model,
+          choices: [{ delta: { tool_calls: [{ index, function: { arguments: part.delta } }] } }],
+        };
+      }
+
+      case 'tool-call': {
+        if (toolCallIndices.has(part.toolCallId)) return null;
+        const index = nextToolIndex++;
+        return {
+          ...(id !== undefined ? { id } : {}),
+          model,
+          choices: [
+            {
+              delta: {
+                tool_calls: [
+                  {
+                    index,
+                    id: part.toolCallId,
+                    type: 'function' as const,
+                    function: { name: part.toolName, arguments: JSON.stringify(part.input) },
+                  },
+                ],
+              },
+            },
+          ],
+        };
+      }
+
+      case 'finish-step': {
+        responseId = part.response.id;
+        const cRd = part.usage.inputTokenDetails.cacheReadTokens;
+        const cWr = part.usage.inputTokenDetails.cacheWriteTokens;
+        const rsnTok = part.usage.outputTokenDetails.reasoningTokens;
+        return {
+          id: responseId,
+          model,
+          choices: [{ delta: {}, finish_reason: FINISH_REASON_MAP[part.finishReason] ?? 'stop' }],
+          usage: {
+            prompt_tokens: part.usage.inputTokens ?? 0,
+            completion_tokens: part.usage.outputTokens ?? 0,
+            total_tokens: part.usage.totalTokens ?? 0,
+            ...(cRd != null || cWr != null
+              ? {
+                  prompt_tokens_details: {
+                    cached_tokens: cRd ?? 0,
+                    ...(cWr != null && { cache_write_tokens: cWr }),
+                  },
+                }
+              : {}),
+            ...(rsnTok != null ? { completion_tokens_details: { reasoning_tokens: rsnTok } } : {}),
+          },
+        };
+      }
+
+      default:
+        return null;
+    }
+  };
+}
+
+// ─── Legacy extension hack (OpenAIResponsesV1 ↔ OpenAIResponsesV1_Obscured) ──
+
+function reverseLegacyExtensionHack(messages: OpenRouterChatCompletionsInput) {
+  for (const msg of messages) {
+    if (msg.role === 'assistant') {
+      for (const rd of msg.reasoning_details ?? []) {
+        if (rd.format === ReasoningFormat.OpenAIResponsesV1_Obscured) {
+          rd.format = ReasoningFormat.OpenAIResponsesV1;
+        }
+      }
+    }
+  }
+}
+
+function applyLegacyExtensionHack(choice: ChatCompletionChunkChoice | undefined) {
+  for (const rd of choice?.delta?.reasoning_details ?? []) {
+    if (rd.format === ReasoningFormat.OpenAIResponsesV1) {
+      rd.format = ReasoningFormat.OpenAIResponsesV1_Obscured;
+    }
+  }
+}
+
+// ─── Model factory ────────────────────────────────────────────────────────────
+
+function createModel(
+  customLlm: CustomLlm,
+  userId: string,
+  taskId: string | undefined,
+  db: WorkerDb | null
+) {
+  if (customLlm.provider === 'anthropic') {
+    const anthropic = createAnthropic({ apiKey: customLlm.api_key, baseURL: customLlm.base_url });
+    return anthropic(customLlm.internal_id);
+  }
+  if (customLlm.provider === 'openai') {
+    const patchedFetch =
+      customLlm.base_url === 'https://api.openai.com/v1' && db
+        ? responseCreateParamsPatchFetch(userId, taskId, db)
+        : undefined;
+    const openai = createOpenAI({
+      apiKey: customLlm.api_key,
+      baseURL: customLlm.base_url,
+      fetch: patchedFetch,
+    });
+    return openai(customLlm.internal_id);
+  }
+  throw new Error(`Unknown custom LLM provider: ${customLlm.provider}`);
+}
+
+// Patches the OpenAI Responses API request to inject `phase` into assistant messages.
+function responseCreateParamsPatchFetch(userId: string, taskId: string | undefined, db: WorkerDb) {
+  return async function (input: string | URL | Request, init?: RequestInit) {
+    if (typeof init?.body === 'string') {
+      type ResponseCreateParams = {
+        input?: Array<{ role?: string; content?: unknown; phase?: string }>;
+      };
+      const json = JSON.parse(init.body) as ResponseCreateParams;
+      if (Array.isArray(json.input)) {
+        const assistantMessages = json.input.filter(m => 'role' in m && m.role === 'assistant');
+
+        if (assistantMessages.length > 0) {
+          const keyByMessage = new Map<(typeof assistantMessages)[number], string>();
+          for (const msg of assistantMessages) {
+            keyByMessage.set(
+              msg,
+              await phaseKey(userId, taskId, extractMessageTextParts(msg.content))
+            );
+          }
+
+          const keys = [...new Set(keyByMessage.values())];
+          const rows = await db
+            .select({ key: temp_phase.key, phase: temp_phase.value })
+            .from(temp_phase)
+            .where(inArray(temp_phase.key, keys));
+          const phaseByKey = new Map(rows.map(r => [r.key, r.phase]));
+
+          for (const msg of assistantMessages) {
+            const phase = phaseByKey.get(keyByMessage.get(msg) ?? '');
+            if (phase) {
+              Object.assign(msg, { phase });
+            } else {
+              console.error(
+                `[responseCreateParamsPatchFetch] failed to find phase for userId: ${userId}, taskId: ${taskId}`
+              );
+            }
+          }
+          init = { ...init, body: JSON.stringify(json) };
+        }
+      }
+    }
+    return fetch(input, init);
+  };
+}
+
+// ─── Public API ────────────────────────────────────────────────────────────────
+
+export async function customLlmRequest(
+  customLlm: CustomLlm,
+  request: OpenRouterChatCompletionRequest,
+  userId: string,
+  taskId: string | undefined,
+  isLegacyExtension: boolean,
+  db: WorkerDb | null
+): Promise<Response> {
+  const messages = request.messages as OpenRouterChatCompletionsInput;
+  if (isLegacyExtension) reverseLegacyExtensionHack(messages);
+
+  const model = createModel(customLlm, userId, taskId, db);
+  const commonParams = buildCommonParams(
+    customLlm,
+    convertMessages(messages),
+    request,
+    isLegacyExtension
+  );
+  const modelId = customLlm.public_id;
+
+  if (!request.stream) {
+    try {
+      const result = await generateText({ model, ...commonParams });
+      const converted = convertGenerateResultToResponse(result, modelId);
+      return Response.json(converted);
+    } catch (e) {
+      console.error('Caught exception while processing non-streaming custom LLM request', e);
+      const status = APICallError.isInstance(e) ? (e.statusCode ?? 500) : 500;
+      const msg = e instanceof Error ? e.message : 'Generation failed';
+      return Response.json({ error: { message: msg, code: status, type: 'error' } }, { status });
+    }
+  }
+
+  const result = streamText({ model, ...commonParams, includeRawChunks: true });
+  const convertStreamPartToChunk = createStreamPartConverter(userId, taskId, modelId, db);
+  const encoder = new TextEncoder();
+
+  const stream = new ReadableStream({
+    async start(controller) {
+      try {
+        for await (const chunk of result.fullStream) {
+          const converted = await convertStreamPartToChunk(chunk);
+          if (converted) {
+            if (isLegacyExtension) {
+              applyLegacyExtensionHack(converted.choices[0]);
+            }
+            controller.enqueue(encoder.encode(`data: ${JSON.stringify(converted)}\n\n`));
+          }
+        }
+        controller.enqueue(encoder.encode('data: [DONE]\n\n'));
+      } catch (e) {
+        console.error('Caught exception while processing streaming custom LLM request', e);
+        const errorChunk = {
+          error: {
+            message: e instanceof Error ? e.message : 'Stream error',
+            code: APICallError.isInstance(e) ? (e.statusCode ?? 500) : 500,
+            ...(APICallError.isInstance(e) && e.responseBody
+              ? { metadata: { raw: e.responseBody } }
+              : {}),
+            type: 'error',
+          },
+        };
+        controller.enqueue(encoder.encode(`data: ${JSON.stringify(errorChunk)}\n\n`));
+      } finally {
+        controller.close();
+      }
+    },
+  });
+
+  return new Response(stream, { status: 200, headers: { 'Content-Type': 'text/event-stream' } });
+}
diff --git a/llm-gateway/src/lib/custom-llm/reasoning-details.ts b/llm-gateway/src/lib/custom-llm/reasoning-details.ts
new file mode 100644
index 000000000..03b8d0d69
--- /dev/null
+++ b/llm-gateway/src/lib/custom-llm/reasoning-details.ts
@@ -0,0 +1,40 @@
+// Port of src/lib/custom-llm/reasoning-details.ts
+// Minimal type definitions needed by customLlmRequest.
+
+import type { ReasoningFormat } from './format';
+
+export enum ReasoningDetailType {
+  Summary = 'reasoning.summary',
+  Encrypted = 'reasoning.encrypted',
+  Text = 'reasoning.text',
+}
+
+export type ReasoningDetailSummary = {
+  type: ReasoningDetailType.Summary;
+  summary: string;
+  id?: string | null;
+  format?: ReasoningFormat | null;
+  index?: number;
+};
+
+export type ReasoningDetailEncrypted = {
+  type: ReasoningDetailType.Encrypted;
+  data: string;
+  id?: string | null;
+  format?: ReasoningFormat | null;
+  index?: number;
+};
+
+export type ReasoningDetailText = {
+  type: ReasoningDetailType.Text;
+  text?: string | null;
+  signature?: string | null;
+  id?: string | null;
+  format?: ReasoningFormat | null;
+  index?: number;
+};
+
+export type ReasoningDetailUnion =
+  | ReasoningDetailSummary
+  | ReasoningDetailEncrypted
+  | ReasoningDetailText;
diff --git a/llm-gateway/src/lib/custom-llm/reasoning-provider-metadata.ts b/llm-gateway/src/lib/custom-llm/reasoning-provider-metadata.ts
new file mode 100644
index 000000000..37e318e79
--- /dev/null
+++ b/llm-gateway/src/lib/custom-llm/reasoning-provider-metadata.ts
@@ -0,0 +1,214 @@
+// Port of src/lib/custom-llm/reasoning-provider-metadata.ts
+
+import { ReasoningFormat } from './format';
+import { ReasoningDetailType } from './reasoning-details';
+import type {
+  ReasoningDetailUnion,
+  ReasoningDetailText,
+  ReasoningDetailEncrypted,
+} from './reasoning-details';
+
+type JsonValue = string | number | boolean | null | { [key: string]: JsonValue } | JsonValue[];
+type AiSdkProviderOptions = Record<string, Record<string, JsonValue>>;
+
+export type AiSdkReasoningPart = {
+  type: 'reasoning';
+  text: string;
+  providerOptions?: AiSdkProviderOptions;
+};
+
+function detailToAiSdkPart(detail: ReasoningDetailUnion): AiSdkReasoningPart | null {
+  switch (detail.type) {
+    case ReasoningDetailType.Text: {
+      const text = detail.text ?? '';
+      const opts = buildTextProviderOptions(detail);
+      return { type: 'reasoning', text, ...(opts ? { providerOptions: opts } : {}) };
+    }
+    case ReasoningDetailType.Encrypted: {
+      const opts = buildEncryptedProviderOptions(detail);
+      return { type: 'reasoning', text: '', ...(opts ? { providerOptions: opts } : {}) };
+    }
+    case ReasoningDetailType.Summary:
+      return { type: 'reasoning', text: detail.summary };
+  }
+}
+
+function buildTextProviderOptions(detail: ReasoningDetailText): AiSdkProviderOptions | null {
+  switch (detail.format) {
+    case ReasoningFormat.AnthropicClaudeV1:
+      if (!detail.signature) return null;
+      return { anthropic: { signature: detail.signature } };
+    case ReasoningFormat.OpenAIResponsesV1:
+      if (!detail.id) return null;
+      return { openai: { itemId: detail.id } };
+    case ReasoningFormat.XAIResponsesV1:
+      if (!detail.id) return null;
+      return { xai: { itemId: detail.id } };
+    case ReasoningFormat.GoogleGeminiV1:
+      if (!detail.signature) return null;
+      return { google: { thoughtSignature: detail.signature } };
+    default:
+      return null;
+  }
+}
+
+function buildEncryptedProviderOptions(
+  detail: ReasoningDetailEncrypted
+): AiSdkProviderOptions | null {
+  switch (detail.format) {
+    case ReasoningFormat.AnthropicClaudeV1:
+      return { anthropic: { redactedData: detail.data } };
+    case ReasoningFormat.OpenAIResponsesV1: {
+      const inner: Record<string, JsonValue> = { reasoningEncryptedContent: detail.data };
+      if (detail.id) inner.itemId = detail.id;
+      return { openai: inner };
+    }
+    case ReasoningFormat.XAIResponsesV1: {
+      const inner: Record<string, JsonValue> = { reasoningEncryptedContent: detail.data };
+      if (detail.id) inner.itemId = detail.id;
+      return { xai: inner };
+    }
+    default:
+      return null;
+  }
+}
+
+const FORMAT_TO_PROVIDER_KEY: Partial<Record<ReasoningFormat, string>> = {
+  [ReasoningFormat.AnthropicClaudeV1]: 'anthropic',
+  [ReasoningFormat.OpenAIResponsesV1]: 'openai',
+  [ReasoningFormat.XAIResponsesV1]: 'xai',
+  [ReasoningFormat.GoogleGeminiV1]: 'google',
+};
+
+function mergeEncryptedIntoTextParts(details: ReasoningDetailUnion[]): AiSdkReasoningPart[] {
+  const encryptedById = new Map<string, string>();
+  for (const d of details) {
+    if (d.type === ReasoningDetailType.Encrypted && d.id) {
+      encryptedById.set(d.id, d.data);
+    }
+  }
+
+  const usedEncryptedIds = new Set<string>();
+  const parts: AiSdkReasoningPart[] = [];
+
+  for (const detail of details) {
+    if (detail.type === ReasoningDetailType.Encrypted) continue;
+    const part = detailToAiSdkPart(detail);
+    if (!part) continue;
+
+    if (detail.type === ReasoningDetailType.Text && detail.id) {
+      const encryptedData = encryptedById.get(detail.id);
+      if (encryptedData) {
+        const providerKey = detail.format ? FORMAT_TO_PROVIDER_KEY[detail.format] : undefined;
+        if (providerKey) {
+          const existing = (part.providerOptions?.[providerKey] ?? {}) satisfies Record<
+            string,
+            JsonValue
+          >;
+          part.providerOptions = {
+            ...part.providerOptions,
+            [providerKey]: { ...existing, reasoningEncryptedContent: encryptedData },
+          };
+          usedEncryptedIds.add(detail.id);
+        }
+      }
+    }
+    parts.push(part);
+  }
+
+  for (const detail of details) {
+    if (detail.type !== ReasoningDetailType.Encrypted) continue;
+    if (detail.id && usedEncryptedIds.has(detail.id)) continue;
+    const part = detailToAiSdkPart(detail);
+    if (part) parts.push(part);
+  }
+
+  return parts;
+}
+
+export function reasoningDetailsToAiSdkParts(
+  details: ReasoningDetailUnion[]
+): AiSdkReasoningPart[] {
+  const needsMerge = details.some(
+    d =>
+      d.format === ReasoningFormat.OpenAIResponsesV1 || d.format === ReasoningFormat.XAIResponsesV1
+  );
+  if (needsMerge) return mergeEncryptedIntoTextParts(details);
+
+  const parts: AiSdkReasoningPart[] = [];
+  for (const detail of details) {
+    const part = detailToAiSdkPart(detail);
+    if (part) parts.push(part);
+  }
+  return parts;
+}
+
+type ProviderMetadata = Record<string, Record<string, unknown>> | undefined;
+
+export function extractSignature(meta: ProviderMetadata): string | null {
+  if (!meta) return null;
+  const anthropicSig = meta.anthropic?.signature;
+  if (typeof anthropicSig === 'string') return anthropicSig;
+  const googleSig = meta.google?.thoughtSignature;
+  if (typeof googleSig === 'string') return googleSig;
+  const vertexSig = meta.vertex?.thoughtSignature;
+  if (typeof vertexSig === 'string') return vertexSig;
+  return null;
+}
+
+export function extractEncryptedData(meta: ProviderMetadata): string | null {
+  if (!meta) return null;
+  const anthropic = meta.anthropic?.redactedData;
+  if (typeof anthropic === 'string') return anthropic;
+  const openai = meta.openai?.reasoningEncryptedContent;
+  if (typeof openai === 'string') return openai;
+  const xai = meta.xai?.reasoningEncryptedContent;
+  if (typeof xai === 'string') return xai;
+  return null;
+}
+
+export function extractItemId(meta: ProviderMetadata): string | null {
+  if (!meta) return null;
+  const openaiId = meta.openai?.itemId;
+  if (typeof openaiId === 'string') return openaiId;
+  const xaiId = meta.xai?.itemId;
+  if (typeof xaiId === 'string') return xaiId;
+  return null;
+}
+
+export function extractFormat(meta: ProviderMetadata): ReasoningFormat | null {
+  if (!meta) return null;
+  if (meta.anthropic) return ReasoningFormat.AnthropicClaudeV1;
+  if (meta.openai) return ReasoningFormat.OpenAIResponsesV1;
+  if (meta.xai) return ReasoningFormat.XAIResponsesV1;
+  if (meta.google || meta.vertex) return ReasoningFormat.GoogleGeminiV1;
+  return null;
+}
+
+export function reasoningOutputToDetails(
+  reasoning: ReadonlyArray<{ type: 'reasoning'; text: string; providerMetadata?: ProviderMetadata }>
+): ReasoningDetailUnion[] {
+  const details: ReasoningDetailUnion[] = [];
+  for (const part of reasoning) {
+    const signature = extractSignature(part.providerMetadata);
+    const encryptedData = extractEncryptedData(part.providerMetadata);
+    const itemId = extractItemId(part.providerMetadata);
+    const format = extractFormat(part.providerMetadata);
+    const optionalFields = {
+      ...(itemId ? { id: itemId } : {}),
+      ...(format ? { format } : {}),
+    };
+    if (part.text) {
+      details.push({
+        type: ReasoningDetailType.Text,
+        text: part.text,
+        ...(signature ? { signature } : {}),
+        ...optionalFields,
+      });
+    }
+    if (encryptedData) {
+      details.push({ type: ReasoningDetailType.Encrypted, data: encryptedData, ...optionalFields });
+    }
+  }
+  return details;
+}
diff --git a/llm-gateway/src/lib/extract-headers.ts b/llm-gateway/src/lib/extract-headers.ts
new file mode 100644
index 000000000..52017a072
--- /dev/null
+++ b/llm-gateway/src/lib/extract-headers.ts
@@ -0,0 +1,107 @@
+// Header extraction helpers — port of src/lib/llm-proxy-helpers.ts and src/lib/utils.ts.
+// Uses the Fetch API Headers interface (compatible with Cloudflare Workers).
+
+export function extractHeaderAndLimitLength(headers: Headers, name: string): string | null {
+  return headers.get(name)?.slice(0, 500)?.trim() || null;
+}
+
+export type FraudDetectionHeaders = {
+  http_x_forwarded_for: string | null;
+  geo_city: string | null;
+  geo_country: string | null;
+  geo_latitude: number | null;
+  geo_longitude: number | null;
+  ja3_hash: string | null;
+  http_user_agent: string | null;
+};
+
+function parseFloatOrNull(value: unknown): number | null {
+  if (typeof value !== 'string') return null;
+  const n = parseFloat(value);
+  return Number.isNaN(n) ? null : n;
+}
+
+const str = (value: unknown): string | null => (typeof value === 'string' ? value : null);
+
+// Safe property access on an unknown object.
+function prop(obj: unknown, key: string): unknown {
+  if (typeof obj === 'object' && obj !== null && key in obj) {
+    return (obj as Record<string, unknown>)[key];
+  }
+  return undefined;
+}
+
+// Reads geo/fingerprint data from Cloudflare's request.cf object.
+// `cf` is typed as `unknown` to avoid fighting the CfProperties union
+// (IncomingRequestCfProperties | RequestInitCfProperties); at runtime it's
+// always an IncomingRequestCfProperties on incoming requests.
+export function getFraudDetectionHeaders(headers: Headers, cf: unknown): FraudDetectionHeaders {
+  return {
+    http_x_forwarded_for: headers.get('x-forwarded-for'),
+    geo_city: str(prop(cf, 'city')),
+    geo_country: str(prop(cf, 'country')),
+    geo_latitude: parseFloatOrNull(prop(cf, 'latitude')),
+    geo_longitude: parseFloatOrNull(prop(cf, 'longitude')),
+    ja3_hash: str(prop(prop(cf, 'botManagement'), 'ja3Hash')),
+    http_user_agent: headers.get('user-agent'),
+  };
+}
+
+// Port of src/lib/normalizeProjectId.ts
+function normalizeProjectId(projectId: string | null): string | null {
+  if (!projectId) return null;
+  const truncated = projectId.substring(0, 256);
+
+  const httpsRepoPattern = /^https?:\/\/[^/]+\/([^\s?#]+?)(?:\.git)?$/i;
+  const httpsMatch = truncated.match(httpsRepoPattern);
+  if (httpsMatch) {
+    const repoPath = httpsMatch[1];
+    const parts = repoPath.split('/');
+    return parts[parts.length - 1] ?? null;
+  }
+
+  const sshGitPattern = /^git@[^:]+:([^\s]+?)(?:\.git)?$/i;
+  const sshMatch = truncated.match(sshGitPattern);
+  if (sshMatch) {
+    const repoPath = sshMatch[1];
+    const parts = repoPath.split('/');
+    return parts[parts.length - 1] ?? null;
+  }
+
+  return truncated;
+}
+
+// Port of src/lib/userAgent.ts (getXKiloCodeVersionNumber)
+function getXKiloCodeVersionNumber(userAgent: string | null | undefined): number | undefined {
+  if (!userAgent) return undefined;
+  const match = /^(\d+)(?:\.(\d+))?(?:\.(\d+))?(?:-[a-zA-Z0-9.]+)?(?:\s|$)/.exec(userAgent);
+  if (!match) return undefined;
+  const major = Number(match[1]);
+  const minor = match[2] ? Number(match[2]) : 0;
+  const patch = match[3] ? Number(match[3]) : 0;
+  if (Number.isNaN(major) || Number.isNaN(minor) || Number.isNaN(patch)) return undefined;
+  return major + minor / 1000 + patch / 1_000_000;
+}
+
+export type ProjectHeaders = {
+  fraudHeaders: FraudDetectionHeaders;
+  xKiloCodeVersion: string | null;
+  projectId: string | null;
+  numericKiloCodeVersion: number;
+  taskId: string | null;
+  editorName: string | null;
+  machineId: string | null;
+};
+
+export function extractProjectHeaders(headers: Headers, cf: unknown): ProjectHeaders {
+  const xKiloCodeVersion = headers.get('X-KiloCode-Version');
+  return {
+    fraudHeaders: getFraudDetectionHeaders(headers, cf),
+    xKiloCodeVersion,
+    projectId: normalizeProjectId(headers.get('X-KiloCode-ProjectId')),
+    numericKiloCodeVersion: getXKiloCodeVersionNumber(xKiloCodeVersion) ?? 0,
+    taskId: extractHeaderAndLimitLength(headers, 'x-kilocode-taskid'),
+    editorName: extractHeaderAndLimitLength(headers, 'x-kilocode-editorname'),
+    machineId: extractHeaderAndLimitLength(headers, 'x-kilocode-machineid'),
+  };
+}
diff --git a/llm-gateway/src/lib/feature-detection.ts b/llm-gateway/src/lib/feature-detection.ts
new file mode 100644
index 000000000..2d5f87907
--- /dev/null
+++ b/llm-gateway/src/lib/feature-detection.ts
@@ -0,0 +1,35 @@
+// Direct port of src/lib/feature-detection.ts.
+import { z } from 'zod';
+
+export const FEATURE_VALUES = [
+  'vscode-extension',
+  'jetbrains-extension',
+  'autocomplete',
+  'parallel-agent',
+  'managed-indexing',
+  'cli',
+  'cloud-agent',
+  'code-review',
+  'auto-triage',
+  'autofix',
+  'app-builder',
+  'agent-manager',
+  'security-agent',
+  'slack',
+  'discord',
+  'webhook',
+  'kilo-claw',
+  'direct-gateway',
+] as const;
+
+const featureSchema = z.enum(FEATURE_VALUES);
+
+export type FeatureValue = z.infer<typeof featureSchema>;
+
+export const FEATURE_HEADER = 'x-kilocode-feature';
+
+export function validateFeatureHeader(headerValue: string | null): FeatureValue | null {
+  if (!headerValue) return null;
+  const result = featureSchema.safeParse(headerValue.trim().toLowerCase());
+  return result.success ? result.data : null;
+}
diff --git a/llm-gateway/src/lib/jwt.ts b/llm-gateway/src/lib/jwt.ts
new file mode 100644
index 000000000..5d126b372
--- /dev/null
+++ b/llm-gateway/src/lib/jwt.ts
@@ -0,0 +1,36 @@
+import { verifyKiloToken, extractBearerToken, type KiloTokenPayload } from '@kilocode/worker-utils';
+import { timingSafeEqual } from '@kilocode/encryption';
+
+export { extractBearerToken };
+export type { KiloTokenPayload };
+
+export type JWTVerifyResult =
+  | { ok: true; payload: KiloTokenPayload }
+  | { ok: false; reason: 'invalid' | 'expired' | 'version' };
+
+export async function verifyGatewayJwt(token: string, secret: string): Promise<JWTVerifyResult> {
+  try {
+    const payload = await verifyKiloToken(token, secret);
+    return { ok: true, payload };
+  } catch (err) {
+    if (err instanceof Error) {
+      // jose uses error.code for JWT-specific errors
+      if ((err as { code?: string }).code === 'ERR_JWT_EXPIRED') {
+        return { ok: false, reason: 'expired' };
+      }
+      if (err.name === 'ZodError') return { ok: false, reason: 'version' };
+    }
+    return { ok: false, reason: 'invalid' };
+  }
+}
+
+// Returns true when the JWT pepper matches the DB pepper.
+// If the DB user has no pepper set, any token is accepted.
+export function isPepperValid(
+  jwtPepper: string | null | undefined,
+  dbPepper: string | null
+): boolean {
+  if (!dbPepper) return true;
+  if (!jwtPepper) return false;
+  return timingSafeEqual(jwtPepper, dbPepper);
+}
diff --git a/llm-gateway/src/lib/kilo-auto-model.ts b/llm-gateway/src/lib/kilo-auto-model.ts
new file mode 100644
index 000000000..2e46c9cef
--- /dev/null
+++ b/llm-gateway/src/lib/kilo-auto-model.ts
@@ -0,0 +1,45 @@
+// Direct port of src/lib/kilo-auto-model.ts.
+// "kilo/auto" is a quasi-model id that resolves to a real model based on the
+// x-kilocode-mode header. The rest of the proxy flow then behaves as if the
+// client had requested the resolved model directly.
+
+const CLAUDE_SONNET = 'anthropic/claude-sonnet-4-20250514';
+const CLAUDE_OPUS = 'anthropic/claude-opus-4-20250514';
+const MINIMAX_FREE = 'minimax/minimax-m2.5:free';
+
+export type ResolvedAutoModel = {
+  model: string;
+  reasoning?: { effort?: string; max_tokens?: number; exclude?: boolean; enabled?: boolean };
+  verbosity?: 'low' | 'medium' | 'high';
+};
+
+const AUTO_MODEL_IDS = ['kilo/auto', 'kilo/auto-free', 'kilo/auto-small'] as const;
+
+export function isKiloAutoModel(model: string): boolean {
+  return (AUTO_MODEL_IDS as readonly string[]).includes(model);
+}
+
+const CODE_MODEL: ResolvedAutoModel = {
+  model: CLAUDE_SONNET,
+  reasoning: { enabled: true },
+  verbosity: 'low',
+};
+
+const MODE_TO_MODEL = new Map<string, ResolvedAutoModel>([
+  ['plan', { model: CLAUDE_OPUS, reasoning: { enabled: true }, verbosity: 'high' }],
+  ['general', { model: CLAUDE_OPUS, reasoning: { enabled: true }, verbosity: 'medium' }],
+  ['architect', { model: CLAUDE_OPUS, reasoning: { enabled: true }, verbosity: 'high' }],
+  ['orchestrator', { model: CLAUDE_OPUS, reasoning: { enabled: true }, verbosity: 'high' }],
+  ['ask', { model: CLAUDE_OPUS, reasoning: { enabled: true }, verbosity: 'high' }],
+  ['debug', { model: CLAUDE_OPUS, reasoning: { enabled: true }, verbosity: 'high' }],
+  ['build', { model: CLAUDE_SONNET, reasoning: { enabled: true }, verbosity: 'medium' }],
+  ['explore', { model: CLAUDE_SONNET, reasoning: { enabled: true }, verbosity: 'medium' }],
+  ['code', CODE_MODEL],
+]);
+
+export function resolveAutoModel(model: string, modeHeader: string | null): ResolvedAutoModel {
+  if (model === 'kilo/auto-free') return { model: MINIMAX_FREE };
+  if (model === 'kilo/auto-small') return { model: 'openai/gpt-5-nano' };
+  const mode = modeHeader?.trim().toLowerCase() ?? '';
+  return MODE_TO_MODEL.get(mode) ?? CODE_MODEL;
+}
diff --git a/llm-gateway/src/lib/models.ts b/llm-gateway/src/lib/models.ts
new file mode 100644
index 000000000..762d6dde6
--- /dev/null
+++ b/llm-gateway/src/lib/models.ts
@@ -0,0 +1,154 @@
+// Model classification helpers.
+// Direct port of src/lib/models.ts — pure functions, no side effects.
+
+type KiloFreeModel = {
+  public_id: string;
+  context_length: number;
+  is_enabled: boolean;
+  inference_providers: string[];
+};
+
+// Keep in sync with src/lib/providers/*.ts
+const kiloFreeModels: KiloFreeModel[] = [
+  {
+    public_id: 'corethink:free',
+    context_length: 78_000,
+    is_enabled: true,
+    inference_providers: ['corethink'],
+  },
+  {
+    public_id: 'giga-potato',
+    context_length: 256_000,
+    is_enabled: true,
+    inference_providers: ['stealth'],
+  },
+  {
+    public_id: 'giga-potato-thinking',
+    context_length: 256_000,
+    is_enabled: true,
+    inference_providers: ['stealth'],
+  },
+  {
+    public_id: 'moonshotai/kimi-k2.5:free',
+    context_length: 262_144,
+    is_enabled: true,
+    inference_providers: [],
+  },
+  {
+    public_id: 'minimax/minimax-m2.5:free',
+    context_length: 204_800,
+    is_enabled: true,
+    inference_providers: [],
+  },
+  {
+    public_id: 'x-ai/grok-code-fast-1:optimized:free',
+    context_length: 256_000,
+    is_enabled: false,
+    inference_providers: ['stealth'],
+  },
+  {
+    public_id: 'z-ai/glm-5:free',
+    context_length: 202_800,
+    is_enabled: false,
+    inference_providers: [],
+  },
+];
+
+// Models tested and recommended for Vercel AI Gateway routing.
+// Keep in sync with src/lib/models.ts preferredModels.
+export const preferredModels: string[] = [
+  'kilo/auto',
+  'kilo/auto-free',
+  'minimax/minimax-m2.5:free',
+  'moonshotai/kimi-k2.5:free',
+  'giga-potato-thinking',
+  'arcee-ai/trinity-large-preview:free',
+  'anthropic/claude-opus-4.6',
+  'anthropic/claude-sonnet-4.6',
+  'openai/gpt-5.2',
+  'openai/gpt-5.3-codex',
+  'google/gemini-3.1-pro-preview',
+  'z-ai/glm-5',
+  'x-ai/grok-code-fast-1',
+];
+
+// A model is "free" if it's a Kilo-hosted free model, ends in ':free', is the
+// OpenRouter free catch-all, or is an OpenRouter stealth (alpha/beta) model.
+export function isFreeModel(model: string): boolean {
+  return (
+    kiloFreeModels.some(m => m.public_id === model && m.is_enabled) ||
+    model.endsWith(':free') ||
+    model === 'openrouter/free' ||
+    isOpenRouterStealthModel(model)
+  );
+}
+
+// Kilo-hosted free models only (not generic :free OpenRouter models).
+export function isKiloFreeModel(model: string): boolean {
+  return kiloFreeModels.some(m => m.public_id === model && m.is_enabled);
+}
+
+// A dead free model has been disabled — return a clear error instead of proxying.
+export function isDeadFreeModel(model: string): boolean {
+  return kiloFreeModels.some(m => m.public_id === model && !m.is_enabled);
+}
+
+// Models that are so rate-limited upstream that they're effectively unusable.
+const rateLimitedToDeathModelIds: ReadonlySet<string> = new Set([
+  'arcee-ai/trinity-mini:free',
+  'cognitivecomputations/dolphin-mistral-24b-venice-edition:free',
+  'deepseek/deepseek-r1-0528:free',
+  'google/gemma-3-12b-it:free',
+  'google/gemma-3-27b-it:free',
+  'google/gemma-3-4b-it:free',
+  'google/gemma-3n-e2b-it:free',
+  'google/gemma-3n-e4b-it:free',
+  'liquid/lfm-2.5-1.2b-instruct:free',
+  'liquid/lfm-2.5-1.2b-thinking:free',
+  'meta-llama/llama-3.2-3b-instruct:free',
+  'meta-llama/llama-3.3-70b-instruct:free',
+  'mistralai/mistral-small-3.1-24b-instruct:free',
+  'nousresearch/hermes-3-llama-3.1-405b:free',
+  'nvidia/nemotron-3-nano-30b-a3b:free',
+  'nvidia/nemotron-nano-12b-v2-vl:free',
+  'nvidia/nemotron-nano-9b-v2:free',
+  'openai/gpt-oss-120b:free',
+  'openai/gpt-oss-20b:free',
+  'qwen/qwen3-4b:free',
+  'qwen/qwen3-coder:free',
+  'qwen/qwen3-next-80b-a3b-instruct:free',
+  'upstage/solar-pro-3:free',
+  'z-ai/glm-4.5-air:free',
+]);
+
+export function isRateLimitedToDeath(modelId: string): boolean {
+  return rateLimitedToDeathModelIds.has(modelId);
+}
+
+function isOpenRouterStealthModel(model: string): boolean {
+  return model.startsWith('openrouter/') && (model.endsWith('-alpha') || model.endsWith('-beta'));
+}
+
+// Data collection is required for Kilo-hosted free models when prompt training
+// is not explicitly allowed by the provider config.
+export function isDataCollectionRequiredOnKiloCodeOnly(model: string): boolean {
+  return kiloFreeModels.some(m => m.public_id === model && m.is_enabled);
+}
+
+// Returns context_length for a Kilo free model, or undefined for other models.
+export function getKiloFreeModelContextLength(model: string): number | undefined {
+  return kiloFreeModels.find(m => m.public_id === model)?.context_length;
+}
+
+// A Kilo free model routed through a stealth inference provider.
+export function isKiloStealthModel(model: string): boolean {
+  return kiloFreeModels.some(
+    m => m.public_id === model && m.inference_providers.includes('stealth')
+  );
+}
+
+// Strip `:free`, `:exacto` etc. suffixes — port of src/lib/model-utils.ts.
+export function normalizeModelId(modelId: string): string {
+  const colonIndex = modelId.indexOf(':');
+  return colonIndex >= 0 ? modelId.substring(0, colonIndex) : modelId;
+}
diff --git a/llm-gateway/src/lib/org-restrictions.ts b/llm-gateway/src/lib/org-restrictions.ts
new file mode 100644
index 000000000..1d53078fc
--- /dev/null
+++ b/llm-gateway/src/lib/org-restrictions.ts
@@ -0,0 +1,180 @@
+// Organization balance and model restriction checks.
+// Ports checkOrganizationModelRestrictions from src/lib/llm-proxy-helpers.ts and
+// getBalanceForOrganizationUser from src/lib/organizations/organization-usage.ts.
+// Credit expiration and auto-top-up are deferred background tasks (Phase 6).
+
+import type { WorkerDb } from '@kilocode/db/client';
+import type { OrganizationSettings, OrganizationPlan } from '@kilocode/db/schema-types';
+import {
+  organizations,
+  organization_memberships,
+  organization_user_limits,
+  organization_user_usage,
+} from '@kilocode/db/schema';
+import { and, eq, sql, not } from 'drizzle-orm';
+import { normalizeModelId } from './models';
+
+// Inference providers that a Kilo free model REQUIRES (must all be in provider allow list)
+const kiloFreeModelProviders: Record<string, string[]> = {
+  'corethink:free': ['corethink'],
+  'giga-potato': ['stealth'],
+  'giga-potato-thinking': ['stealth'],
+  'moonshotai/kimi-k2.5:free': [],
+  'minimax/minimax-m2.5:free': [],
+  'x-ai/grok-code-fast-1:optimized:free': ['stealth'],
+  'z-ai/glm-5:free': [],
+};
+
+function extraRequiredProviders(model: string): string[] {
+  return kiloFreeModelProviders[model] ?? [];
+}
+
+export type OpenRouterProviderConfig = {
+  order?: string[];
+  only?: string[];
+  data_collection?: 'allow' | 'deny';
+};
+
+export type OrganizationRestrictionResult = {
+  error: { status: 400 | 401 | 402 | 403 | 404; message: string } | null;
+  providerConfig?: OpenRouterProviderConfig;
+};
+
+export function checkOrganizationModelRestrictions(params: {
+  modelId: string;
+  settings?: OrganizationSettings;
+  organizationPlan?: OrganizationPlan;
+}): OrganizationRestrictionResult {
+  if (!params.settings) return { error: null };
+
+  const normalizedModelId = normalizeModelId(params.modelId);
+
+  // Model allow list only enforced for Enterprise plans
+  if (params.organizationPlan === 'enterprise') {
+    const modelAllowList = params.settings.model_allow_list ?? [];
+    if (modelAllowList.length > 0) {
+      const isExactMatch = modelAllowList.includes(normalizedModelId);
+      const providerSlug = normalizedModelId.split('/')[0];
+      const wildcardEntry = `${providerSlug}/*`;
+      const isWildcardMatch = modelAllowList.includes(wildcardEntry);
+      if (!isExactMatch && !isWildcardMatch) {
+        return { error: { status: 404, message: 'Model not allowed for your team.' } };
+      }
+    }
+  }
+
+  const providerAllowList = params.settings.provider_allow_list ?? [];
+  const dataCollection = params.settings.data_collection;
+  const providerConfig: OpenRouterProviderConfig = {};
+
+  if (params.organizationPlan === 'enterprise' && providerAllowList.length > 0) {
+    const requiredProviders = extraRequiredProviders(normalizedModelId);
+    if (
+      requiredProviders.length > 0 &&
+      !requiredProviders.every(p => providerAllowList.includes(p))
+    ) {
+      return { error: { status: 404, message: 'Model not allowed for your team.' } };
+    }
+    providerConfig.only = providerAllowList;
+  }
+
+  if (dataCollection) {
+    providerConfig.data_collection = dataCollection;
+  }
+
+  return {
+    error: null,
+    providerConfig: Object.keys(providerConfig).length > 0 ? providerConfig : undefined,
+  };
+}
+
+export type OrgBalanceAndSettings = {
+  balance: number;
+  settings: OrganizationSettings | undefined;
+  plan: OrganizationPlan | undefined;
+};
+
+export async function getBalanceAndOrgSettings(
+  db: WorkerDb,
+  organizationId: string | undefined,
+  user: { total_microdollars_acquired: number; microdollars_used: number; id: string }
+): Promise<OrgBalanceAndSettings> {
+  // Non-org users: balance is on the user object already
+  if (!organizationId) {
+    const balance = (user.total_microdollars_acquired - user.microdollars_used) / 1_000_000;
+    return { balance, settings: undefined, plan: undefined };
+  }
+
+  const [row] = await db
+    .select({
+      total_microdollars_acquired: organizations.total_microdollars_acquired,
+      microdollars_used: organizations.microdollars_used,
+      settings: organizations.settings,
+      plan: organizations.plan,
+      require_seats: organizations.require_seats,
+      microdollar_limit: organization_user_limits.microdollar_limit,
+      microdollar_usage: organization_user_usage.microdollar_usage,
+    })
+    .from(organizations)
+    .innerJoin(
+      organization_memberships,
+      eq(organization_memberships.organization_id, organizations.id)
+    )
+    .leftJoin(
+      organization_user_limits,
+      and(
+        eq(organization_user_limits.organization_id, organizations.id),
+        eq(organization_user_limits.kilo_user_id, user.id),
+        eq(organization_user_limits.limit_type, 'daily')
+      )
+    )
+    .leftJoin(
+      organization_user_usage,
+      and(
+        eq(organization_user_usage.organization_id, organizations.id),
+        eq(organization_user_usage.kilo_user_id, user.id),
+        eq(organization_user_usage.limit_type, 'daily'),
+        eq(organization_user_usage.usage_date, sql`CURRENT_DATE`)
+      )
+    )
+    .where(
+      and(
+        eq(organizations.id, organizationId),
+        eq(organization_memberships.kilo_user_id, user.id),
+        not(eq(organization_memberships.role, 'billing_manager'))
+      )
+    )
+    .limit(1);
+
+  if (!row) {
+    return { balance: 0, settings: undefined, plan: undefined };
+  }
+
+  const orgBalance = (row.total_microdollars_acquired - row.microdollars_used) / 1_000_000;
+
+  if (row.require_seats) {
+    return {
+      balance: orgBalance,
+      settings: row.settings ?? undefined,
+      plan: row.plan ?? undefined,
+    };
+  }
+
+  if (row.microdollar_limit == null) {
+    return {
+      balance: orgBalance,
+      settings: row.settings ?? undefined,
+      plan: row.plan ?? undefined,
+    };
+  }
+
+  const usageAmount = row.microdollar_usage ?? 0;
+  const remainingAllowance = (row.microdollar_limit - usageAmount) / 1_000_000;
+  const cappedBalance = Math.min(remainingAllowance, orgBalance);
+
+  return {
+    balance: cappedBalance,
+    settings: row.settings ?? undefined,
+    plan: row.plan ?? undefined,
+  };
+}
diff --git a/llm-gateway/src/lib/promotions.ts b/llm-gateway/src/lib/promotions.ts
new file mode 100644
index 000000000..97ecd66cf
--- /dev/null
+++ b/llm-gateway/src/lib/promotions.ts
@@ -0,0 +1,23 @@
+// Promotion helpers — direct port of:
+//   src/lib/code-reviews/core/constants.ts  (isActiveReviewPromo)
+//   src/lib/promotions/cloud-agent-promo.ts (isActiveCloudAgentPromo)
+
+const REVIEW_PROMO_MODEL = 'anthropic/claude-sonnet-4.6';
+const REVIEW_PROMO_END = '2026-02-25T14:00:00Z';
+
+export function isActiveReviewPromo(botId: string | undefined, model: string): boolean {
+  if (botId !== 'reviewer') return false;
+  if (model !== REVIEW_PROMO_MODEL) return false;
+  return Date.now() < Date.parse(REVIEW_PROMO_END);
+}
+
+const CLOUD_AGENT_PROMO_MODEL = 'anthropic/claude-sonnet-4.6';
+const CLOUD_AGENT_PROMO_START = '2026-02-26T08:00:00Z';
+const CLOUD_AGENT_PROMO_END = '2026-02-28T08:00:00Z';
+
+export function isActiveCloudAgentPromo(tokenSource: string | undefined, model: string): boolean {
+  if (tokenSource !== 'cloud-agent') return false;
+  if (model !== CLOUD_AGENT_PROMO_MODEL) return false;
+  const now = Date.now();
+  return now >= Date.parse(CLOUD_AGENT_PROMO_START) && now < Date.parse(CLOUD_AGENT_PROMO_END);
+}
diff --git a/llm-gateway/src/lib/prompt-info.ts b/llm-gateway/src/lib/prompt-info.ts
new file mode 100644
index 000000000..154f0d236
--- /dev/null
+++ b/llm-gateway/src/lib/prompt-info.ts
@@ -0,0 +1,79 @@
+// Prompt info extraction and token estimation.
+// Port of src/lib/processUsage.ts (extractPromptInfo) and
+// src/lib/llm-proxy-helpers.ts (estimateChatTokens).
+
+import type { OpenRouterChatCompletionRequest } from '../types/request';
+
+export type PromptInfo = {
+  system_prompt_prefix: string;
+  system_prompt_length: number;
+  user_prompt_prefix: string;
+};
+
+type MessageContent = string | Array<{ type: string; text?: string }> | null | undefined;
+
+function extractMessageTextContent(content: MessageContent): string {
+  if (typeof content === 'string') return content;
+  if (Array.isArray(content)) {
+    return content
+      .filter(
+        (c): c is { type: 'text'; text: string } => c.type === 'text' && typeof c.text === 'string'
+      )
+      .map(c => c.text)
+      .join('');
+  }
+  return '';
+}
+
+export function extractPromptInfo(body: OpenRouterChatCompletionRequest): PromptInfo {
+  try {
+    const messages = body.messages ?? [];
+
+    const systemPrompt = messages
+      .filter(m => m.role === 'system' || m.role === 'developer')
+      .map(m => extractMessageTextContent(m.content as MessageContent))
+      .join('\n');
+
+    const system_prompt_prefix = systemPrompt.slice(0, 100);
+    const system_prompt_length = systemPrompt.length;
+
+    const lastUserMessage =
+      messages
+        .filter(m => m.role === 'user')
+        .slice(-1)
+        .map(m => extractMessageTextContent(m.content as MessageContent))[0] ?? '';
+
+    const user_prompt_prefix = lastUserMessage.slice(0, 100);
+
+    return { system_prompt_prefix, system_prompt_length, user_prompt_prefix };
+  } catch {
+    return { system_prompt_prefix: '', system_prompt_length: 0, user_prompt_prefix: '' };
+  }
+}
+
+export function estimateChatTokens(body: OpenRouterChatCompletionRequest): {
+  estimatedInputTokens: number;
+  estimatedOutputTokens: number;
+} {
+  if (!body.messages || !Array.isArray(body.messages)) {
+    return { estimatedInputTokens: 0, estimatedOutputTokens: 0 };
+  }
+  const overallLength = body.messages.reduce((sum, m) => {
+    const content = m.content;
+    if (typeof content === 'string') return sum + content.length;
+    if (Array.isArray(content)) {
+      const textLength = content
+        .filter(
+          (c): c is { type: 'text'; text: string } =>
+            typeof c === 'object' && c !== null && 'type' in c && c.type === 'text'
+        )
+        .reduce((l, c) => l + c.text.length + 1, 0);
+      return sum + textLength;
+    }
+    return sum;
+  }, 0);
+  return {
+    estimatedInputTokens: overallLength / 4,
+    estimatedOutputTokens: overallLength / 4,
+  };
+}
diff --git a/llm-gateway/src/lib/provider-hash.ts b/llm-gateway/src/lib/provider-hash.ts
new file mode 100644
index 000000000..caad40bbd
--- /dev/null
+++ b/llm-gateway/src/lib/provider-hash.ts
@@ -0,0 +1,36 @@
+// Provider-specific SHA-256 hash — async Web Crypto port of src/lib/providerHash.ts.
+// The original uses Node.js crypto.createHash; here we use crypto.subtle.digest for
+// Cloudflare Workers (no nodejs_compat dependency needed).
+
+import type { Provider } from './providers';
+
+const HASH_SALT = 'd20250815';
+
+function getPepper(provider: Provider): string {
+  if (provider.id === 'custom') return provider.apiUrl;
+  if (provider.id === 'openrouter') return 'henk is a boss';
+  return provider.id;
+}
+
+async function sha256Base64(input: string): Promise<string> {
+  const encoded = new TextEncoder().encode(input);
+  const hashBuffer = await crypto.subtle.digest('SHA-256', encoded);
+  const hashArray = new Uint8Array(hashBuffer);
+  // Convert to base64 without Node.js Buffer
+  let binary = '';
+  for (const byte of hashArray) {
+    binary += String.fromCharCode(byte);
+  }
+  return btoa(binary);
+}
+
+/**
+ * Generates a service-specific SHA-256 hash for safety_identifier / prompt_cache_key.
+ * Async because Web Crypto subtle.digest is Promise-based.
+ */
+export async function generateProviderSpecificHash(
+  payload: string,
+  provider: Provider
+): Promise<string> {
+  return sha256Base64(HASH_SALT + getPepper(provider) + payload);
+}
diff --git a/llm-gateway/src/lib/provider-specific.ts b/llm-gateway/src/lib/provider-specific.ts
new file mode 100644
index 000000000..41df4b119
--- /dev/null
+++ b/llm-gateway/src/lib/provider-specific.ts
@@ -0,0 +1,460 @@
+// Provider-specific request mutations — port of src/lib/providers/index.ts:applyProviderSpecificLogic
+// and associated provider sub-modules.
+
+import type { OpenRouterChatCompletionRequest, ChatMessage } from '../types/request';
+import type { Provider } from './providers';
+import type { BYOKResult, VercelUserByokInferenceProviderId } from './byok';
+import {
+  VercelUserByokInferenceProviderIdSchema,
+  AutocompleteUserByokProviderIdSchema,
+} from './byok';
+import { getKiloFreeModelWithGateway, getPreferredProviderOrder } from './providers';
+import {
+  hasAttemptCompletionTool,
+  normalizeToolCallIds,
+  dropToolStrictProperties,
+} from './tool-calling';
+
+// --- Model predicates ---
+
+function isAnthropicModel(model: string) {
+  return model.startsWith('anthropic/');
+}
+function isHaikuModel(model: string) {
+  return model.startsWith('anthropic/claude-haiku');
+}
+function isMistralModel(model: string) {
+  return model.startsWith('mistralai/');
+}
+function isXaiModel(model: string) {
+  return model.startsWith('x-ai/');
+}
+function isGeminiModel(model: string) {
+  return model.startsWith('google/gemini');
+}
+function isMoonshotModel(model: string) {
+  return model.startsWith('moonshotai/');
+}
+function isQwenModel(model: string) {
+  return model.startsWith('qwen/');
+}
+function isOpenAiModel(model: string) {
+  return model.startsWith('openai/') && !model.startsWith('openai/gpt-oss');
+}
+function isZaiModel(model: string) {
+  return model.startsWith('z-ai/');
+}
+
+// --- Anthropic ---
+
+function appendAnthropicBetaHeader(headers: Record<string, string>, flag: string) {
+  headers['x-anthropic-beta'] = [headers['x-anthropic-beta'], flag].filter(Boolean).join(',');
+}
+
+function hasCacheControl(msg: ChatMessage): boolean {
+  return (
+    'cache_control' in msg ||
+    (Array.isArray(msg.content) &&
+      (msg.content as Array<Record<string, unknown>>).some(c => 'cache_control' in c))
+  );
+}
+
+function setCacheControl(msg: ChatMessage) {
+  if (typeof msg.content === 'string') {
+    msg.content = [{ type: 'text', text: msg.content, cache_control: { type: 'ephemeral' } }];
+  } else if (Array.isArray(msg.content)) {
+    const last = (msg.content as Array<Record<string, unknown>>).at(-1);
+    if (last) last.cache_control = { type: 'ephemeral' };
+  }
+}
+
+function addCacheBreakpoints(messages: ChatMessage[]) {
+  const systemPrompt = messages.find(m => m.role === 'system');
+  if (!systemPrompt || hasCacheControl(systemPrompt)) return;
+  setCacheControl(systemPrompt);
+  const lastUser = messages.findLast(m => m.role === 'user' || m.role === 'tool');
+  if (lastUser) setCacheControl(lastUser);
+}
+
+async function applyAnthropicModelSettings(
+  requestToMutate: OpenRouterChatCompletionRequest,
+  extraHeaders: Record<string, string>
+) {
+  appendAnthropicBetaHeader(extraHeaders, 'fine-grained-tool-streaming-2025-05-14');
+  addCacheBreakpoints(requestToMutate.messages);
+  await normalizeToolCallIds(requestToMutate, id => id.includes('.'), undefined);
+}
+
+// --- xAI ---
+
+function applyXaiModelSettings(
+  requestToMutate: OpenRouterChatCompletionRequest,
+  extraHeaders: Record<string, string>
+) {
+  extraHeaders['x-grok-conv-id'] = requestToMutate.prompt_cache_key || crypto.randomUUID();
+  extraHeaders['x-grok-req-id'] = crypto.randomUUID();
+}
+
+// --- Google ---
+
+function applyGoogleModelSettings(
+  provider: string,
+  requestToMutate: OpenRouterChatCompletionRequest
+) {
+  if (provider !== 'vercel') return;
+
+  type ReadFileParams = {
+    properties?: {
+      files?: {
+        items?: {
+          properties?: { line_ranges?: { type?: unknown; items?: unknown; anyOf?: unknown } };
+        };
+      };
+    };
+  };
+  const readFileTool = (
+    requestToMutate.tools as
+      | Array<{ type?: string; function?: { name?: string; parameters?: unknown } }>
+      | undefined
+  )?.find(t => t.type === 'function' && t.function?.name === 'read_file');
+  if (!readFileTool || readFileTool.type !== 'function') return;
+
+  const lineRanges = (readFileTool.function?.parameters as ReadFileParams | undefined)?.properties
+    ?.files?.items?.properties?.line_ranges;
+  if (lineRanges?.type && lineRanges?.items) {
+    lineRanges.anyOf = [{ type: 'null' }, { type: 'array', items: lineRanges.items }];
+    delete lineRanges.type;
+    delete lineRanges.items;
+  }
+}
+
+// --- Moonshotai ---
+
+function applyMoonshotProviderSettings(requestToMutate: OpenRouterChatCompletionRequest) {
+  delete requestToMutate.temperature;
+}
+
+// --- Qwen ---
+
+function applyQwenModelSettings(requestToMutate: OpenRouterChatCompletionRequest) {
+  if (requestToMutate.max_tokens) {
+    requestToMutate.max_tokens = Math.min(requestToMutate.max_tokens, 32768);
+  }
+  if (typeof requestToMutate.max_completion_tokens === 'number') {
+    requestToMutate.max_completion_tokens = Math.min(requestToMutate.max_completion_tokens, 32768);
+  }
+}
+
+// --- Mistral ---
+
+async function applyMistralModelSettings(requestToMutate: OpenRouterChatCompletionRequest) {
+  if (requestToMutate.temperature === undefined) {
+    requestToMutate.temperature = 0.2;
+  }
+  await normalizeToolCallIds(requestToMutate, id => id.length !== 9, 9);
+  dropToolStrictProperties(requestToMutate);
+  if (hasAttemptCompletionTool(requestToMutate)) {
+    requestToMutate.tool_choice = 'required';
+  }
+}
+
+async function applyMistralProviderSettings(
+  requestToMutate: OpenRouterChatCompletionRequest,
+  extraHeaders: Record<string, string>
+) {
+  if (requestToMutate.prompt_cache_key) {
+    extraHeaders['x-affinity'] = requestToMutate.prompt_cache_key;
+  }
+  for (const msg of requestToMutate.messages) {
+    if ('reasoning_details' in msg) delete (msg as Record<string, unknown>).reasoning_details;
+  }
+  delete requestToMutate.reasoning;
+  delete requestToMutate.reasoning_effort;
+  delete requestToMutate.transforms;
+  delete requestToMutate.safety_identifier;
+  delete requestToMutate.prompt_cache_key;
+  delete requestToMutate.user;
+  delete requestToMutate.provider;
+  await applyMistralModelSettings(requestToMutate);
+}
+
+// --- CoreThink ---
+
+function applyCoreThinkProviderSettings(requestToMutate: OpenRouterChatCompletionRequest) {
+  delete requestToMutate.transforms;
+  delete requestToMutate.prompt_cache_key;
+  delete requestToMutate.safety_identifier;
+  delete requestToMutate.description;
+  delete requestToMutate.usage;
+  for (const msg of requestToMutate.messages) {
+    if ('reasoning' in msg) delete (msg as Record<string, unknown>).reasoning;
+    if ('reasoning_details' in msg) delete (msg as Record<string, unknown>).reasoning_details;
+  }
+}
+
+// --- GigaPotato ---
+
+function applyGigaPotatoProviderSettings(
+  requestedModel: string,
+  requestToMutate: OpenRouterChatCompletionRequest
+) {
+  const nonDisclosureRule = {
+    type: 'text' as const,
+    text: 'You are an AI assistant in Kilo. Your name is Giga Potato. Do not reveal your model size, architecture, or any information that could hint at your origin or capabilities.',
+  };
+  const systemPrompt = requestToMutate.messages.find(m => m.role === 'system');
+  if (systemPrompt) {
+    if (Array.isArray(systemPrompt.content)) {
+      systemPrompt.content.push(nonDisclosureRule);
+    } else if (systemPrompt.content) {
+      systemPrompt.content = [{ type: 'text', text: systemPrompt.content }, nonDisclosureRule];
+    } else {
+      systemPrompt.content = [nonDisclosureRule];
+    }
+  } else {
+    requestToMutate.messages.splice(0, 0, { role: 'system', content: [nonDisclosureRule] });
+  }
+  requestToMutate.thinking = {
+    type: requestedModel === 'giga-potato-thinking' ? 'enabled' : 'disabled',
+  };
+}
+
+// --- Vercel BYOK ---
+
+type VercelInferenceProviderConfig = { apiKey?: string; baseURL?: string } | AwsCredentials;
+type AwsCredentials = { accessKeyId: string; secretAccessKey: string; region: string };
+
+function parseAwsCredentials(input: string): AwsCredentials {
+  try {
+    const parsed: unknown = JSON.parse(input);
+    if (
+      typeof parsed === 'object' &&
+      parsed !== null &&
+      'accessKeyId' in parsed &&
+      'secretAccessKey' in parsed &&
+      'region' in parsed
+    ) {
+      return parsed as AwsCredentials;
+    }
+  } catch {
+    // fall through to throw
+  }
+  throw new Error('Failed to parse AWS credentials');
+}
+
+function getVercelInferenceProviderConfig(
+  provider: BYOKResult
+): [VercelUserByokInferenceProviderId, VercelInferenceProviderConfig[]] {
+  const key =
+    provider.providerId === AutocompleteUserByokProviderIdSchema.enum.codestral
+      ? VercelUserByokInferenceProviderIdSchema.enum.mistral
+      : VercelUserByokInferenceProviderIdSchema.parse(provider.providerId);
+
+  const list: VercelInferenceProviderConfig[] = [];
+  if (key === 'zai') {
+    list.push({ apiKey: provider.decryptedAPIKey, baseURL: 'https://api.z.ai/api/coding/paas/v4' });
+  } else if (key === 'bedrock') {
+    list.push(parseAwsCredentials(provider.decryptedAPIKey));
+  } else {
+    list.push({ apiKey: provider.decryptedAPIKey });
+  }
+  return [key, list];
+}
+
+function openRouterToVercelProviderId(providerId: string): string {
+  const mapping: Record<string, string> = {
+    'amazon-bedrock': 'bedrock',
+    'google-ai-studio': 'google',
+    'google-vertex': 'vertex',
+    'z-ai': 'zai',
+  };
+  const slashIndex = providerId.indexOf('/');
+  const normalized = (slashIndex >= 0 ? providerId.slice(0, slashIndex) : providerId).toLowerCase();
+  return mapping[normalized] ?? normalized;
+}
+
+function applyVercelSettings(
+  requestedModel: string,
+  requestToMutate: OpenRouterChatCompletionRequest,
+  extraHeaders: Record<string, string>,
+  userByok: BYOKResult[] | null
+) {
+  // Map to Vercel model ID
+  requestToMutate.model = mapModelIdToVercel(requestedModel);
+
+  if (isAnthropicModel(requestedModel)) {
+    const existing = extraHeaders['x-anthropic-beta'];
+    extraHeaders['anthropic-beta'] = [existing, 'context-1m-2025-08-07'].filter(Boolean).join(',');
+    delete extraHeaders['x-anthropic-beta'];
+  }
+
+  if (userByok) {
+    if (userByok.length === 0) throw new Error('Invalid state: userByok is empty');
+    const byokProviders: Record<string, VercelInferenceProviderConfig[]> = {};
+    for (const provider of userByok) {
+      const [key, list] = getVercelInferenceProviderConfig(provider);
+      byokProviders[key] = [...(byokProviders[key] ?? []), ...list];
+    }
+    requestToMutate.providerOptions = {
+      gateway: { only: Object.keys(byokProviders), byok: byokProviders },
+    };
+  } else {
+    const provider = requestToMutate.provider;
+    if (provider) {
+      requestToMutate.providerOptions = {
+        gateway: {
+          only: provider.only?.map(openRouterToVercelProviderId),
+          order: provider.order?.map(openRouterToVercelProviderId),
+          zeroDataRetention: provider.zdr,
+        },
+      };
+    }
+  }
+
+  if (requestToMutate.providerOptions && requestToMutate.verbosity) {
+    (requestToMutate.providerOptions as Record<string, unknown>).anthropic = {
+      effort: requestToMutate.verbosity,
+    };
+  }
+
+  delete requestToMutate.provider;
+}
+
+function mapModelIdToVercel(modelId: string): string {
+  const hardcoded: Record<string, string | undefined> = {
+    'arcee-ai/trinity-large-preview:free': 'arcee-ai/trinity-large-preview',
+    'mistralai/codestral-2508': 'mistral/codestral',
+    'mistralai/devstral-2512': 'mistral/devstral-2',
+  };
+  const hardcodedId = hardcoded[modelId];
+  if (hardcodedId) return hardcodedId;
+
+  const kiloFree = getKiloFreeModelWithGateway(modelId);
+  const baseId =
+    kiloFree?.is_enabled && kiloFree.gateway === 'OPENROUTER' ? kiloFree.internal_id : modelId;
+
+  const slashIndex = baseId.indexOf('/');
+  if (slashIndex < 0) return baseId;
+
+  const prefixToVercel: Record<string, string | undefined> = {
+    anthropic: 'anthropic',
+    google: 'google',
+    openai: 'openai',
+    minimax: 'minimax',
+    mistralai: 'mistral',
+    'x-ai': 'xai',
+    'z-ai': 'zai',
+  };
+  const prefix = baseId.slice(0, slashIndex);
+  const isGptOss = baseId.startsWith('openai/gpt-oss');
+  const vercelProvider = isGptOss ? undefined : prefixToVercel[prefix];
+  return vercelProvider ? vercelProvider + baseId.slice(slashIndex) : baseId;
+}
+
+// --- Kilo free model internal_id mapping ----
+
+function applyKiloFreeModelSettings(
+  requestedModel: string,
+  requestToMutate: OpenRouterChatCompletionRequest
+) {
+  const kiloFreeModel = getKiloFreeModelWithGateway(requestedModel);
+  if (!kiloFreeModel) return;
+  requestToMutate.model = kiloFreeModel.internal_id;
+  if (kiloFreeModel.inference_providers.length > 0) {
+    requestToMutate.provider = { only: kiloFreeModel.inference_providers };
+  }
+}
+
+// --- Preferred provider (OpenRouter routing hints) ---
+
+function applyPreferredProvider(
+  requestedModel: string,
+  requestToMutate: OpenRouterChatCompletionRequest
+) {
+  const order = getPreferredProviderOrder(requestedModel);
+  if (order.length === 0) return;
+  if (!requestToMutate.provider) {
+    requestToMutate.provider = { order };
+  } else if (!requestToMutate.provider.order) {
+    requestToMutate.provider.order = order;
+  }
+}
+
+// --- tool_choice: required ---
+
+async function applyToolChoiceSetting(
+  requestedModel: string,
+  requestToMutate: OpenRouterChatCompletionRequest
+) {
+  if (!hasAttemptCompletionTool(requestToMutate)) return;
+  const isReasoningEnabled =
+    (requestToMutate.reasoning?.enabled ?? false) === true ||
+    (requestToMutate.reasoning?.effort ?? 'none') !== 'none' ||
+    (requestToMutate.reasoning?.max_tokens ?? 0) > 0;
+  if (
+    isXaiModel(requestedModel) ||
+    isOpenAiModel(requestedModel) ||
+    isGeminiModel(requestedModel) ||
+    (isHaikuModel(requestedModel) && !isReasoningEnabled)
+  ) {
+    requestToMutate.tool_choice = 'required';
+  }
+}
+
+// --- Main entry point ---
+
+export async function applyProviderSpecificLogic(
+  provider: Provider,
+  requestedModel: string,
+  requestToMutate: OpenRouterChatCompletionRequest,
+  extraHeaders: Record<string, string>,
+  userByok: BYOKResult[] | null
+): Promise<void> {
+  applyKiloFreeModelSettings(requestedModel, requestToMutate);
+
+  if (isAnthropicModel(requestedModel)) {
+    await applyAnthropicModelSettings(requestToMutate, extraHeaders);
+  }
+
+  await applyToolChoiceSetting(requestedModel, requestToMutate);
+
+  applyPreferredProvider(requestedModel, requestToMutate);
+
+  if (isXaiModel(requestedModel)) {
+    applyXaiModelSettings(requestToMutate, extraHeaders);
+  }
+
+  if (isGeminiModel(requestedModel)) {
+    applyGoogleModelSettings(provider.id, requestToMutate);
+  }
+
+  if (isMoonshotModel(requestedModel)) {
+    applyMoonshotProviderSettings(requestToMutate);
+  }
+
+  if (isQwenModel(requestedModel)) {
+    applyQwenModelSettings(requestToMutate);
+  }
+
+  if (provider.id === 'gigapotato') {
+    applyGigaPotatoProviderSettings(requestedModel, requestToMutate);
+  }
+
+  if (provider.id === 'corethink') {
+    applyCoreThinkProviderSettings(requestToMutate);
+  }
+
+  if (provider.id === 'mistral') {
+    await applyMistralProviderSettings(requestToMutate, extraHeaders);
+  } else if (isMistralModel(requestedModel)) {
+    await applyMistralModelSettings(requestToMutate);
+  }
+
+  if (isZaiModel(requestedModel)) {
+    // Z.AI uses specific routing
+  }
+
+  if (provider.id === 'vercel') {
+    applyVercelSettings(requestedModel, requestToMutate, extraHeaders, userByok);
+  }
+}
diff --git a/llm-gateway/src/lib/providers.ts b/llm-gateway/src/lib/providers.ts
new file mode 100644
index 000000000..c04810a68
--- /dev/null
+++ b/llm-gateway/src/lib/providers.ts
@@ -0,0 +1,309 @@
+// Provider routing — port of src/lib/providers/index.ts.
+// API keys come from Secrets Store bindings (resolved asynchronously at request time).
+
+import type { WorkerDb } from '@kilocode/db/client';
+import { custom_llm, organization_memberships } from '@kilocode/db/schema';
+import type { CustomLlm } from '@kilocode/db/schema';
+import { and, eq } from 'drizzle-orm';
+import type { User } from '@kilocode/db';
+import type { BYOKResult } from './byok';
+import { getModelUserByokProviders, getBYOKforUser, getBYOKforOrganization } from './byok';
+import type { OpenRouterChatCompletionRequest } from '../types/request';
+import type { AnonymousUserContext } from './anonymous';
+import { isAnonymousContext } from './anonymous';
+import { isKiloFreeModel } from './models';
+import { shouldRouteToVercel } from './vercel-routing';
+
+export type ProviderId =
+  | 'openrouter'
+  | 'gigapotato'
+  | 'corethink'
+  | 'martian'
+  | 'mistral'
+  | 'vercel'
+  | 'custom';
+
+export type Provider = {
+  id: ProviderId;
+  apiUrl: string;
+  apiKey: string;
+  hasGenerationEndpoint: boolean;
+};
+
+// Resolved secrets bundle — fetched once per request via Promise.all()
+export type SecretsBundle = {
+  openrouterApiKey: string;
+  gigapotatoApiKey: string;
+  gigapotatoApiUrl: string;
+  corethinkApiKey: string;
+  martianApiKey: string;
+  mistralApiKey: string;
+  vercelAiGatewayApiKey: string;
+  byokEncryptionKey: string;
+};
+
+export function buildProviders(secrets: SecretsBundle): Record<string, Provider> {
+  return {
+    OPENROUTER: {
+      id: 'openrouter',
+      apiUrl: 'https://openrouter.ai/api/v1',
+      apiKey: secrets.openrouterApiKey,
+      hasGenerationEndpoint: true,
+    },
+    GIGAPOTATO: {
+      id: 'gigapotato',
+      apiUrl: secrets.gigapotatoApiUrl,
+      apiKey: secrets.gigapotatoApiKey,
+      hasGenerationEndpoint: false,
+    },
+    CORETHINK: {
+      id: 'corethink',
+      apiUrl: 'https://api.corethink.ai/v1/code',
+      apiKey: secrets.corethinkApiKey,
+      hasGenerationEndpoint: false,
+    },
+    MARTIAN: {
+      id: 'martian',
+      apiUrl: 'https://api.withmartian.com/v1',
+      apiKey: secrets.martianApiKey,
+      hasGenerationEndpoint: false,
+    },
+    MISTRAL: {
+      id: 'mistral',
+      apiUrl: 'https://api.mistral.ai/v1',
+      apiKey: secrets.mistralApiKey,
+      hasGenerationEndpoint: false,
+    },
+    VERCEL_AI_GATEWAY: {
+      id: 'vercel',
+      apiUrl: 'https://ai-gateway.vercel.sh/v1',
+      apiKey: secrets.vercelAiGatewayApiKey,
+      hasGenerationEndpoint: true,
+    },
+  };
+}
+
+// Free model definitions — gateway field maps to a PROVIDERS key
+type KiloFreeModelWithGateway = {
+  public_id: string;
+  internal_id: string;
+  display_name: string;
+  context_length: number;
+  max_completion_tokens: number;
+  is_enabled: boolean;
+  flags: string[];
+  gateway: string;
+  inference_providers: string[];
+};
+
+const kiloFreeModelsWithGateway: KiloFreeModelWithGateway[] = [
+  {
+    public_id: 'corethink:free',
+    internal_id: 'corethink',
+    display_name: 'CoreThink (free)',
+    context_length: 78_000,
+    max_completion_tokens: 8192,
+    is_enabled: true,
+    flags: [],
+    gateway: 'CORETHINK',
+    inference_providers: ['corethink'],
+  },
+  {
+    public_id: 'giga-potato',
+    internal_id: 'ep-20260109111813-hztxv',
+    display_name: 'Giga Potato (free)',
+    context_length: 256_000,
+    max_completion_tokens: 32_000,
+    is_enabled: true,
+    flags: ['prompt_cache', 'vision'],
+    gateway: 'GIGAPOTATO',
+    inference_providers: ['stealth'],
+  },
+  {
+    public_id: 'giga-potato-thinking',
+    internal_id: 'ep-20260109111813-hztxv',
+    display_name: 'Giga Potato Thinking (free)',
+    context_length: 256_000,
+    max_completion_tokens: 32_000,
+    is_enabled: true,
+    flags: ['prompt_cache', 'vision', 'reasoning'],
+    gateway: 'GIGAPOTATO',
+    inference_providers: ['stealth'],
+  },
+  {
+    public_id: 'moonshotai/kimi-k2.5:free',
+    internal_id: 'moonshotai/kimi-k2.5',
+    display_name: 'MoonshotAI: Kimi K2.5 (free)',
+    context_length: 262144,
+    max_completion_tokens: 65536,
+    is_enabled: true,
+    flags: ['reasoning', 'prompt_cache', 'vision'],
+    gateway: 'OPENROUTER',
+    inference_providers: [],
+  },
+  {
+    public_id: 'minimax/minimax-m2.5:free',
+    internal_id: 'minimax/minimax-m2.5',
+    display_name: 'MiniMax M2.5 (free)',
+    context_length: 1_000_000,
+    max_completion_tokens: 40960,
+    is_enabled: true,
+    flags: ['reasoning', 'prompt_cache', 'vision'],
+    gateway: 'OPENROUTER',
+    inference_providers: [],
+  },
+  {
+    public_id: 'x-ai/grok-code-fast-1:optimized:free',
+    internal_id: 'x-ai/grok-code-fast-1:optimized',
+    display_name: 'xAI: Grok Code Fast 1 Optimized (experimental, free)',
+    context_length: 256_000,
+    max_completion_tokens: 10_000,
+    is_enabled: false,
+    flags: ['reasoning', 'prompt_cache'],
+    gateway: 'MARTIAN',
+    inference_providers: ['stealth'],
+  },
+  {
+    public_id: 'z-ai/glm-5:free',
+    internal_id: 'z-ai/glm-5',
+    display_name: 'Z.ai: GLM 5 (free)',
+    context_length: 202800,
+    max_completion_tokens: 131072,
+    is_enabled: false,
+    flags: ['reasoning', 'prompt_cache'],
+    gateway: 'OPENROUTER',
+    inference_providers: [],
+  },
+];
+
+export function getKiloFreeModelWithGateway(
+  publicId: string
+): KiloFreeModelWithGateway | undefined {
+  return kiloFreeModelsWithGateway.find(m => m.public_id === publicId);
+}
+
+export type ProviderResolutionResult = {
+  provider: Provider;
+  userByok: BYOKResult[] | null;
+  customLlm: CustomLlm | null;
+};
+
+export async function getProvider(
+  db: WorkerDb,
+  requestedModel: string,
+  request: OpenRouterChatCompletionRequest,
+  user: User | AnonymousUserContext,
+  organizationId: string | undefined,
+  secrets: SecretsBundle,
+  randomSeed: string
+): Promise<ProviderResolutionResult> {
+  const providers = buildProviders(secrets);
+
+  // 1. BYOK check (authenticated users only)
+  if (!isAnonymousContext(user)) {
+    const modelProviders = await getModelUserByokProviders(db, requestedModel);
+    if (modelProviders.length > 0) {
+      const userByok = organizationId
+        ? await getBYOKforOrganization(
+            db,
+            organizationId,
+            modelProviders,
+            secrets.byokEncryptionKey
+          )
+        : await getBYOKforUser(db, user.id, modelProviders, secrets.byokEncryptionKey);
+      if (userByok) {
+        return { provider: providers.VERCEL_AI_GATEWAY, userByok, customLlm: null };
+      }
+    }
+  }
+
+  // 2. Custom LLM check (kilo-internal/ prefix + organizationId + membership)
+  if (requestedModel.startsWith('kilo-internal/') && organizationId && !isAnonymousContext(user)) {
+    const [customLlmRow] = await db
+      .select()
+      .from(custom_llm)
+      .where(eq(custom_llm.public_id, requestedModel));
+    if (customLlmRow && customLlmRow.organization_ids.includes(organizationId)) {
+      // Verify the user actually belongs to this organization — the organizationId
+      // comes from a client-supplied header and is not otherwise validated.
+      const [membership] = await db
+        .select({ id: organization_memberships.id })
+        .from(organization_memberships)
+        .where(
+          and(
+            eq(organization_memberships.organization_id, organizationId),
+            eq(organization_memberships.kilo_user_id, user.id)
+          )
+        )
+        .limit(1);
+      if (membership) {
+        return {
+          provider: {
+            id: 'custom',
+            apiUrl: customLlmRow.base_url,
+            apiKey: customLlmRow.api_key,
+            hasGenerationEndpoint: true,
+          },
+          userByok: null,
+          customLlm: customLlmRow,
+        };
+      }
+    }
+  }
+
+  // 3. Vercel AI Gateway A/B routing (non-BYOK, non-custom-LLM)
+  if (await shouldRouteToVercel(db, requestedModel, request, randomSeed)) {
+    return { provider: providers.VERCEL_AI_GATEWAY, userByok: null, customLlm: null };
+  }
+
+  // 4. Kilo free model with Martian gateway → wrap as custom provider
+  const kiloFreeModel = getKiloFreeModelWithGateway(requestedModel);
+  if (kiloFreeModel?.is_enabled) {
+    const gatewayProvider = providers[kiloFreeModel.gateway];
+    if (gatewayProvider?.id === 'martian') {
+      return {
+        provider: { ...gatewayProvider, id: 'custom' },
+        userByok: null,
+        customLlm: {
+          public_id: kiloFreeModel.public_id,
+          internal_id: kiloFreeModel.internal_id,
+          display_name: kiloFreeModel.display_name,
+          context_length: kiloFreeModel.context_length,
+          max_completion_tokens: kiloFreeModel.max_completion_tokens,
+          verbosity: null,
+          provider: 'openai', // xai doesn't support preserved reasoning
+          organization_ids: [],
+          base_url: gatewayProvider.apiUrl,
+          api_key: gatewayProvider.apiKey,
+          reasoning_effort: null,
+          included_tools: null,
+          excluded_tools: null,
+          supports_image_input: kiloFreeModel.flags.includes('vision'),
+          force_reasoning: true,
+          opencode_settings: null,
+        },
+      };
+    }
+
+    if (gatewayProvider) {
+      return { provider: gatewayProvider, userByok: null, customLlm: null };
+    }
+  }
+
+  // 5. Default to OpenRouter
+  return { provider: providers.OPENROUTER, userByok: null, customLlm: null };
+}
+
+// Preferred provider ordering for OpenRouter inference routing
+export function getPreferredProviderOrder(requestedModel: string): string[] {
+  if (requestedModel.startsWith('anthropic/')) {
+    return ['amazon-bedrock', 'anthropic'];
+  }
+  if (requestedModel.startsWith('minimax/')) return ['minimax'];
+  if (requestedModel.startsWith('mistralai/')) return ['mistral'];
+  if (requestedModel.startsWith('moonshotai/')) return ['moonshotai'];
+  if (requestedModel.startsWith('z-ai/')) return ['z-ai'];
+  return [];
+}
+
+export { isKiloFreeModel };
diff --git a/llm-gateway/src/lib/rate-limit.ts b/llm-gateway/src/lib/rate-limit.ts
new file mode 100644
index 000000000..96c55ac83
--- /dev/null
+++ b/llm-gateway/src/lib/rate-limit.ts
@@ -0,0 +1,28 @@
+// Rate limiting via Durable Object.
+// Each IP gets its own DO instance for strongly-consistent, atomic
+// check-and-increment with no TOCTOU race conditions.
+
+import { getRateLimitDO } from '../dos/RateLimitDO';
+export type { RateLimitResult } from '../dos/RateLimitDO';
+
+type DOEnv = { RATE_LIMIT_DO: Parameters<typeof getRateLimitDO>[0]['RATE_LIMIT_DO'] };
+
+export async function checkFreeModelRateLimit(env: DOEnv, ip: string) {
+  const stub = getRateLimitDO(env, ip);
+  return stub.checkFreeModel();
+}
+
+export async function checkPromotionLimit(env: DOEnv, ip: string) {
+  const stub = getRateLimitDO(env, ip);
+  return stub.checkPromotion();
+}
+
+export async function incrementFreeModelUsage(env: DOEnv, ip: string) {
+  const stub = getRateLimitDO(env, ip);
+  await stub.incrementFreeModel();
+}
+
+export async function incrementPromotionUsage(env: DOEnv, ip: string) {
+  const stub = getRateLimitDO(env, ip);
+  await stub.incrementPromotion();
+}
diff --git a/llm-gateway/src/lib/response-helpers.ts b/llm-gateway/src/lib/response-helpers.ts
new file mode 100644
index 000000000..a651def7a
--- /dev/null
+++ b/llm-gateway/src/lib/response-helpers.ts
@@ -0,0 +1,88 @@
+// Response helpers — port of src/lib/llm-proxy-helpers.ts (response-side utilities).
+// All functions use plain Fetch API constructs (no Next.js dependencies).
+
+import type { OpenRouterChatCompletionRequest } from '../types/request';
+import { getKiloFreeModelContextLength, isKiloStealthModel } from './models';
+
+// Whitelist upstream headers, add Content-Encoding: identity.
+// Content-Encoding: identity ensures no intermediary re-compresses the stream.
+export function getOutputHeaders(response: Response): Headers {
+  const out = new Headers();
+  for (const key of ['date', 'content-type', 'request-id']) {
+    const val = response.headers.get(key);
+    if (val) out.set(key, val);
+  }
+  out.set('Content-Encoding', 'identity');
+  return out;
+}
+
+// Wrap an upstream response for delivery to the client, stripping and
+// normalising headers.
+export function wrapResponse(response: Response): Response {
+  return new Response(response.body, {
+    status: response.status,
+    statusText: response.statusText,
+    headers: getOutputHeaders(response),
+  });
+}
+
+// ─── BYOK error messages ────────────────────────────────────────────────────
+
+const byokErrorMessages: Partial<Record<number, string>> = {
+  401: '[BYOK] Your API key is invalid or has been revoked. Please check your API key configuration.',
+  402: '[BYOK] Your API account has insufficient funds. Please check your billing details with your API provider.',
+  403: '[BYOK] Your API key does not have permission to access this resource. Please check your API key permissions.',
+  429: '[BYOK] Your API key has hit its rate limit. Please try again later or check your rate limit settings with your API provider.',
+};
+
+// Returns an alternative Response when there is a meaningful error message to
+// show the client, or undefined if the original response should be forwarded.
+export async function makeErrorReadable({
+  requestedModel,
+  request,
+  response,
+  isUserByok,
+}: {
+  requestedModel: string;
+  request: OpenRouterChatCompletionRequest;
+  response: Response;
+  isUserByok: boolean;
+}): Promise<Response | undefined> {
+  if (response.status < 400) return undefined;
+
+  if (isUserByok) {
+    const msg = byokErrorMessages[response.status];
+    if (msg) {
+      console.warn(`Responding with ${response.status} ${msg}`);
+      return Response.json({ error: msg, message: msg }, { status: response.status });
+    }
+  }
+
+  // Sometimes upstream returns generic or nonsensical errors when the context length
+  // is exceeded. If we can detect that the request likely exceeds the model's context
+  // window, return a clear message instead.
+  const contextLength = getKiloFreeModelContextLength(requestedModel);
+  if (contextLength) {
+    const estimatedTokenCount = estimateTokenCount(request);
+    if (estimatedTokenCount >= contextLength) {
+      const error = `The maximum context length is ${contextLength} tokens. However, about ${estimatedTokenCount} tokens were requested.`;
+      console.warn(`Responding with ${response.status} ${error}`);
+      return Response.json({ error, message: error }, { status: response.status });
+    }
+  }
+
+  if (isKiloStealthModel(requestedModel)) {
+    const error = 'Stealth model unable to process request';
+    console.warn(`Responding with ${response.status} ${error}`);
+    return Response.json({ error, message: error }, { status: response.status });
+  }
+
+  return undefined;
+}
+
+// Matches the reference estimateTokenCount in llm-proxy-helpers.ts:
+// rough char/4 approximation + max output tokens.
+function estimateTokenCount(request: OpenRouterChatCompletionRequest): number {
+  const maxOutputTokens = Number(request.max_completion_tokens ?? request.max_tokens ?? 0);
+  return Math.round(JSON.stringify(request).length / 4 + maxOutputTokens);
+}
diff --git a/llm-gateway/src/lib/rewrite-free-model-response.ts b/llm-gateway/src/lib/rewrite-free-model-response.ts
new file mode 100644
index 000000000..def4b995a
--- /dev/null
+++ b/llm-gateway/src/lib/rewrite-free-model-response.ts
@@ -0,0 +1,147 @@
+// SSE stream transformer for Kilo free model responses.
+// Port of src/lib/rewriteModelResponse.ts — removes cost fields and normalises
+// reasoning_details so the client receives a consistent OpenRouter-shaped payload.
+
+import { createParser } from 'eventsource-parser';
+import { getOutputHeaders } from './response-helpers';
+
+// ─── Types (subset of processUsage/rewriteModelResponse types) ───────────────
+
+type OpenRouterUsage = {
+  cost?: number;
+  cost_details?: unknown;
+  is_byok?: unknown;
+};
+
+type MessageWithReasoning = {
+  reasoning_content?: string;
+  reasoning?: string;
+  reasoning_details?: Array<{ type: string; text: string }>;
+  role?: string | null;
+  [key: string]: unknown;
+};
+
+type ChatCompletionChunk = {
+  model?: string;
+  choices?: Array<{
+    delta?: MessageWithReasoning & { role?: string | null };
+    [key: string]: unknown;
+  }>;
+  usage?: OpenRouterUsage;
+  [key: string]: unknown;
+};
+
+// ─── Helpers ─────────────────────────────────────────────────────────────────
+
+const ReasoningDetailType = { Text: 'reasoning.text' } as const;
+
+function convertReasoningToOpenRouterFormat(message: MessageWithReasoning) {
+  if (!message.reasoning_content) return;
+  if (!message.reasoning) {
+    message.reasoning = message.reasoning_content;
+  }
+  if (!message.reasoning_details) {
+    message.reasoning_details = [
+      { type: ReasoningDetailType.Text, text: message.reasoning_content },
+    ];
+  }
+  delete message.reasoning_content;
+}
+
+function removeCostInfo(usage: OpenRouterUsage) {
+  delete usage.cost;
+  delete usage.cost_details;
+  delete usage.is_byok;
+}
+
+// ─── Public API ───────────────────────────────────────────────────────────────
+
+export async function rewriteFreeModelResponse(
+  response: Response,
+  model: string
+): Promise<Response> {
+  const headers = getOutputHeaders(response);
+
+  // Non-streaming (application/json)
+  if (headers.get('content-type')?.includes('application/json')) {
+    type JsonCompletion = {
+      model?: string;
+      choices?: Array<{ message?: MessageWithReasoning }>;
+      usage?: OpenRouterUsage;
+    };
+    const json: JsonCompletion = await response.json();
+    if (json.model) json.model = model;
+
+    const message = json.choices?.[0]?.message;
+    if (message) convertReasoningToOpenRouterFormat(message);
+
+    if (json.usage) removeCostInfo(json.usage);
+
+    return Response.json(json, {
+      status: response.status,
+      statusText: response.statusText,
+      headers,
+    });
+  }
+
+  // Streaming (text/event-stream)
+  const encoder = new TextEncoder();
+  const decoder = new TextDecoder();
+
+  const stream = new ReadableStream({
+    async start(controller) {
+      const reader = (response.body as ReadableStream<Uint8Array> | null)?.getReader();
+      if (!reader) {
+        controller.close();
+        return;
+      }
+
+      const parser = createParser({
+        onEvent(event) {
+          if (event.data === '[DONE]') return;
+          const chunk = JSON.parse(event.data) as ChatCompletionChunk;
+          if (chunk.model) chunk.model = model;
+
+          const delta = chunk.choices?.[0]?.delta;
+          if (delta) {
+            if (delta.role === null) delete delta.role;
+            convertReasoningToOpenRouterFormat(delta);
+          }
+
+          if (!chunk.choices) {
+            // Some APIs omit choices on the usage chunk — ensure OpenCode accepts it
+            chunk.choices = [];
+          }
+
+          if (chunk.usage) removeCostInfo(chunk.usage);
+
+          controller.enqueue(encoder.encode('data: ' + JSON.stringify(chunk) + '\n\n'));
+        },
+        onComment() {
+          controller.enqueue(encoder.encode(': KILO PROCESSING\n\n'));
+        },
+      });
+
+      try {
+        while (true) {
+          const { done, value } = await reader.read();
+          if (done) {
+            controller.enqueue(encoder.encode('data: [DONE]\n\n'));
+            controller.close();
+            break;
+          }
+          parser.feed(decoder.decode(value, { stream: true }));
+        }
+      } catch (err) {
+        console.error('[rewriteFreeModelResponse] stream error', err);
+        controller.error(err);
+      }
+    },
+  });
+
+  return new Response(stream, {
+    status: response.status,
+    statusText: response.statusText,
+    headers,
+  });
+}
diff --git a/llm-gateway/src/lib/sentry.ts b/llm-gateway/src/lib/sentry.ts
new file mode 100644
index 000000000..4e8023bcc
--- /dev/null
+++ b/llm-gateway/src/lib/sentry.ts
@@ -0,0 +1,14 @@
+// Thin wrapper around @sentry/cloudflare for use in middleware and handlers.
+// The Sentry SDK is initialised by withSentry() in src/index.ts — captureException
+// can be called freely from any code that runs after that wrapping.
+
+import * as Sentry from '@sentry/cloudflare';
+
+// Dedicated Sentry project for the llm-gateway worker.
+// Sentry DSNs are intentionally public; they are embedded in client-side bundles.
+export const SENTRY_DSN =
+  'https://0f7c4afba6c991a1eb7efd413b3f4f5f@o4509356317474816.ingest.us.sentry.io/4510981962006528';
+
+export function captureException(err: unknown, extra?: Record<string, unknown>): void {
+  Sentry.captureException(err, extra ? { extra } : undefined);
+}
diff --git a/llm-gateway/src/lib/tool-calling.ts b/llm-gateway/src/lib/tool-calling.ts
new file mode 100644
index 000000000..f98ce4c88
--- /dev/null
+++ b/llm-gateway/src/lib/tool-calling.ts
@@ -0,0 +1,136 @@
+// Tool-calling utilities — direct port of src/lib/tool-calling.ts.
+// Uses Web Crypto (crypto.subtle) instead of Node.js crypto.hash for CF Workers.
+
+import type { OpenRouterChatCompletionRequest, ChatMessage } from '../types/request';
+
+type ToolCall = { id: string; type: string; function?: { name?: string } };
+type AssistantMessage = ChatMessage & { role: 'assistant'; tool_calls?: ToolCall[] };
+type ToolMessage = ChatMessage & { role: 'tool'; tool_call_id: string };
+
+function isAssistantMessage(msg: ChatMessage): msg is AssistantMessage {
+  return msg.role === 'assistant';
+}
+
+function isToolMessage(msg: ChatMessage): msg is ToolMessage {
+  return msg.role === 'tool' && typeof (msg as Record<string, unknown>).tool_call_id === 'string';
+}
+
+async function hashToolCallId(
+  toolCallId: string,
+  maxIdLength: number | undefined
+): Promise<string> {
+  const data = new TextEncoder().encode(toolCallId);
+  const hashBuffer = await crypto.subtle.digest('SHA-256', data);
+  const hex = Array.from(new Uint8Array(hashBuffer))
+    .map(b => b.toString(16).padStart(2, '0'))
+    .join('');
+  return maxIdLength !== undefined ? hex.slice(0, maxIdLength) : hex;
+}
+
+export function dropToolStrictProperties(requestToMutate: OpenRouterChatCompletionRequest) {
+  for (const tool of (requestToMutate.tools ?? []) as Array<{
+    type?: string;
+    function?: { strict?: unknown };
+  }>) {
+    if (tool.type === 'function' && tool.function) {
+      delete tool.function.strict;
+    }
+  }
+}
+
+export async function normalizeToolCallIds(
+  requestToMutate: OpenRouterChatCompletionRequest,
+  filter: (toolCallId: string) => boolean,
+  maxIdLength: number | undefined
+): Promise<void> {
+  for (const msg of requestToMutate.messages) {
+    if (isAssistantMessage(msg)) {
+      for (const toolCall of msg.tool_calls ?? []) {
+        if (filter(toolCall.id)) {
+          toolCall.id = await hashToolCallId(toolCall.id, maxIdLength);
+        }
+      }
+    }
+    if (isToolMessage(msg) && filter(msg.tool_call_id)) {
+      msg.tool_call_id = await hashToolCallId(msg.tool_call_id, maxIdLength);
+    }
+  }
+}
+
+export function hasAttemptCompletionTool(request: OpenRouterChatCompletionRequest): boolean {
+  return ((request.tools ?? []) as Array<{ type?: string; function?: { name?: string } }>).some(
+    tool => tool.type === 'function' && tool.function?.name === 'attempt_completion'
+  );
+}
+
+function groupByAssistantMessage(messages: ChatMessage[]) {
+  const groups: Array<{
+    assistantMessage?: AssistantMessage;
+    otherMessages: ChatMessage[];
+  }> = [{ assistantMessage: undefined, otherMessages: [] }];
+
+  for (const msg of messages) {
+    if (isAssistantMessage(msg)) {
+      groups.push({ assistantMessage: msg, otherMessages: [] });
+    } else {
+      const lastGroup = groups.at(-1);
+      if (lastGroup) lastGroup.otherMessages.push(msg);
+    }
+  }
+
+  return groups;
+}
+
+function deduplicateToolUses(assistantMessage: AssistantMessage) {
+  if (!assistantMessage.tool_calls) return;
+  const seen = new Set<string>();
+  assistantMessage.tool_calls = assistantMessage.tool_calls.filter(tc => {
+    if (seen.has(tc.id)) {
+      console.warn(`[repairTools] removing duplicate tool call id ${tc.id}`);
+      return false;
+    }
+    seen.add(tc.id);
+    return true;
+  });
+}
+
+export const ENABLE_TOOL_REPAIR = true;
+
+export function repairTools(requestToMutate: OpenRouterChatCompletionRequest) {
+  if (!Array.isArray(requestToMutate.messages)) return;
+  const groups = groupByAssistantMessage(requestToMutate.messages);
+
+  for (const group of groups) {
+    if (group.assistantMessage) {
+      deduplicateToolUses(group.assistantMessage);
+    }
+
+    const toolCallIds = new Set<string>();
+    const missingResults: ToolMessage[] = [];
+
+    for (const tc of group.assistantMessage?.tool_calls ?? []) {
+      toolCallIds.add(tc.id);
+      if (group.otherMessages.some(m => isToolMessage(m) && m.tool_call_id === tc.id)) continue;
+      const name = tc.function?.name ?? 'unknown';
+      console.warn(`[repairTools] inserting missing result for tool ${name} id ${tc.id}`);
+      missingResults.push({
+        role: 'tool',
+        tool_call_id: tc.id,
+        content: 'Tool execution was interrupted before completion.',
+      });
+    }
+    group.otherMessages.splice(0, 0, ...missingResults);
+
+    group.otherMessages = group.otherMessages.filter(msg => {
+      if (isToolMessage(msg) && !toolCallIds.delete(msg.tool_call_id)) {
+        console.warn(`[repairTools] deleting orphan tool result for id ${msg.tool_call_id}`);
+        return false;
+      }
+      return true;
+    });
+  }
+
+  requestToMutate.messages = groups.flatMap(g =>
+    g.assistantMessage ? [g.assistantMessage, ...g.otherMessages] : g.otherMessages
+  );
+}
diff --git a/llm-gateway/src/lib/vercel-routing.ts b/llm-gateway/src/lib/vercel-routing.ts
new file mode 100644
index 000000000..ee544f994
--- /dev/null
+++ b/llm-gateway/src/lib/vercel-routing.ts
@@ -0,0 +1,119 @@
+// Vercel AI Gateway A/B routing — port of src/lib/providers/vercel/index.ts (routing decision only).
+// Determines whether a non-BYOK request should be routed to Vercel instead of OpenRouter.
+
+import type { WorkerDb } from '@kilocode/db/client';
+import { sql } from 'drizzle-orm';
+import { isKiloFreeModel, preferredModels } from './models';
+import { getKiloFreeModelWithGateway } from './providers';
+import type { OpenRouterChatCompletionRequest } from '../types/request';
+
+// Emergency switch — routes ALL eligible models to Vercel. Default: off.
+const ENABLE_UNIVERSAL_VERCEL_ROUTING = false;
+
+const ERROR_RATE_THRESHOLD = 0.5;
+
+// Deterministic hash-based random in [0, 100) so the same user/task always gets
+// the same routing decision.
+async function getRandomNumberLessThan100(randomSeed: string): Promise<number> {
+  const data = new TextEncoder().encode(randomSeed);
+  const hash = await crypto.subtle.digest('SHA-256', data);
+  return new DataView(hash).getUint32(0) % 100;
+}
+
+// Query the microdollar_usage_view for recent error rates per gateway.
+// 500ms timeout, 60s cache (via the DB view), fail-open to 0/0.
+export async function getGatewayErrorRate(
+  db: WorkerDb
+): Promise<{ openrouter: number; vercel: number }> {
+  const fallback = { openrouter: 0, vercel: 0 };
+  try {
+    const result = await Promise.race([
+      db.execute<{ gateway: string; errorRate: number }>(sql`
+        select
+          provider as "gateway",
+          1.0 * count(*) filter(where has_error = true) / count(*) as "errorRate"
+        from microdollar_usage_view
+        where true
+          and created_at >= now() - interval '10 minutes'
+          and is_user_byok = false
+          and provider in ('openrouter', 'vercel')
+        group by provider
+      `),
+      scheduler.wait(500).then(() => 'timeout' as const),
+    ]);
+    if (result === 'timeout') {
+      console.debug('[getGatewayErrorRate] query timeout');
+      return fallback;
+    }
+    const rows = result.rows as unknown as Array<{ gateway: string; errorRate: number }>;
+    return {
+      openrouter: rows.find(r => r.gateway === 'openrouter')?.errorRate ?? 0,
+      vercel: rows.find(r => r.gateway === 'vercel')?.errorRate ?? 0,
+    };
+  } catch (e) {
+    console.debug('[getGatewayErrorRate] query error', e);
+    return fallback;
+  }
+}
+
+async function getVercelRoutingPercentage(db: WorkerDb): Promise<number> {
+  const errorRate = await getGatewayErrorRate(db);
+  const isOpenRouterErrorRateHigh =
+    errorRate.openrouter > ERROR_RATE_THRESHOLD && errorRate.vercel < ERROR_RATE_THRESHOLD;
+  if (isOpenRouterErrorRateHigh) {
+    console.error(
+      `[getVercelRoutingPercentage] OpenRouter error rate is high: ${errorRate.openrouter}`
+    );
+  }
+  return isOpenRouterErrorRateHigh ? 90 : 10;
+}
+
+function isLikelyAvailableOnAllGateways(requestedModel: string): boolean {
+  if (requestedModel.startsWith('openrouter/')) return false;
+  // Kilo free models with a non-openrouter gateway (e.g. gigapotato, corethink, martian)
+  // are not available on Vercel.
+  if (isKiloFreeModel(requestedModel)) {
+    const freeModel = getKiloFreeModelWithGateway(requestedModel);
+    if (freeModel && freeModel.gateway !== 'OPENROUTER') return false;
+  }
+  return true;
+}
+
+export async function shouldRouteToVercel(
+  db: WorkerDb,
+  requestedModel: string,
+  request: OpenRouterChatCompletionRequest,
+  randomSeed: string
+): Promise<boolean> {
+  if (request.provider?.data_collection === 'deny') {
+    console.debug('[shouldRouteToVercel] not routing: data_collection=deny not supported');
+    return false;
+  }
+
+  if (!isLikelyAvailableOnAllGateways(requestedModel)) {
+    console.debug('[shouldRouteToVercel] model not available on all gateways');
+    return false;
+  }
+
+  if (ENABLE_UNIVERSAL_VERCEL_ROUTING) {
+    console.debug('[shouldRouteToVercel] universal Vercel routing enabled');
+    return true;
+  }
+
+  // Anthropic models excluded pending fine-grained tool streaming support
+  if (requestedModel.startsWith('anthropic/')) {
+    console.debug('[shouldRouteToVercel] Anthropic models excluded');
+    return false;
+  }
+
+  if (!preferredModels.includes(requestedModel)) {
+    console.debug('[shouldRouteToVercel] only preferred models are tested for Vercel routing');
+    return false;
+  }
+
+  console.debug('[shouldRouteToVercel] randomizing to OpenRouter or Vercel');
+  return (
+    (await getRandomNumberLessThan100('vercel_routing_' + randomSeed)) <
+    (await getVercelRoutingPercentage(db))
+  );
+}
diff --git a/llm-gateway/src/middleware/anonymous-gate.ts b/llm-gateway/src/middleware/anonymous-gate.ts
new file mode 100644
index 000000000..1526454a6
--- /dev/null
+++ b/llm-gateway/src/middleware/anonymous-gate.ts
@@ -0,0 +1,35 @@
+import { createMiddleware } from 'hono/factory';
+import type { HonoContext } from '../types/hono';
+import { isFreeModel } from '../lib/models';
+import { createAnonymousContext } from '../lib/anonymous';
+
+const PAID_MODEL_AUTH_REQUIRED = 'PAID_MODEL_AUTH_REQUIRED';
+
+export const anonymousGateMiddleware = createMiddleware<HonoContext>(async (c, next) => {
+  const authUser = c.get('authUser');
+
+  if (authUser !== undefined) {
+    // Successfully authenticated — wire up the shared `user` variable
+    c.set('user', authUser);
+    return next();
+  }
+
+  // Auth failed or no token — decide based on model
+  const resolvedModel = c.get('resolvedModel');
+
+  if (!isFreeModel(resolvedModel)) {
+    return c.json(
+      {
+        error: {
+          code: PAID_MODEL_AUTH_REQUIRED,
+          message: 'You need to sign in to use this model.',
+        },
+      },
+      401
+    );
+  }
+
+  // Free model: allow anonymous access
+  c.set('user', createAnonymousContext(c.get('clientIp')));
+  return next();
+});
diff --git a/llm-gateway/src/middleware/auth.ts b/llm-gateway/src/middleware/auth.ts
new file mode 100644
index 000000000..3d6aef73f
--- /dev/null
+++ b/llm-gateway/src/middleware/auth.ts
@@ -0,0 +1,50 @@
+import { createMiddleware } from 'hono/factory';
+import { eq } from 'drizzle-orm';
+import { getWorkerDb } from '@kilocode/db/client';
+import { kilocode_users } from '@kilocode/db/schema';
+import type { HonoContext } from '../types/hono';
+import { extractBearerToken } from '@kilocode/worker-utils';
+import { verifyGatewayJwt, isPepperValid } from '../lib/jwt';
+
+const ORGANIZATION_ID_HEADER = 'x-kilocode-organizationid';
+
+export const authMiddleware = createMiddleware<HonoContext>(async (c, next) => {
+  const token = extractBearerToken(c.req.header('Authorization'));
+
+  if (!token) {
+    // No token — let anonymous-gate decide
+    return next();
+  }
+
+  const secret = await c.env.NEXTAUTH_SECRET_PROD.get();
+  const verifyResult = await verifyGatewayJwt(token, secret);
+
+  if (!verifyResult.ok) {
+    return c.json({ error: { message: 'Invalid or expired token' } }, 401);
+  }
+
+  const { payload } = verifyResult;
+  const db = getWorkerDb(c.env.HYPERDRIVE.connectionString);
+
+  const rows = await db
+    .select()
+    .from(kilocode_users)
+    .where(eq(kilocode_users.id, payload.kiloUserId))
+    .limit(1);
+  const user = rows[0];
+
+  if (!user) {
+    return c.json({ error: { message: 'User not found' } }, 401);
+  }
+
+  if (!isPepperValid(payload.apiTokenPepper, user.api_token_pepper)) {
+    return c.json({ error: { message: 'Token has been revoked' } }, 401);
+  }
+
+  c.set('authUser', user);
+  c.set('organizationId', c.req.header(ORGANIZATION_ID_HEADER) ?? undefined);
+  c.set('botId', payload.botId);
+  c.set('tokenSource', payload.tokenSource);
+
+  return next();
+});
diff --git a/llm-gateway/src/middleware/balance-and-org.ts b/llm-gateway/src/middleware/balance-and-org.ts
new file mode 100644
index 000000000..c6c997b81
--- /dev/null
+++ b/llm-gateway/src/middleware/balance-and-org.ts
@@ -0,0 +1,136 @@
+// Balance and organization checks.
+// Skipped for anonymous users (they can only use free models, already rate-limited above).
+// Skipped for custom LLM requests when the org matches.
+//
+// Checks (in order):
+//   1. User/org balance > 0 for paid model requests
+//   2. Org model/provider allow list restrictions
+//   3. Data collection requirement for Kilo free models
+
+import type { MiddlewareHandler } from 'hono';
+import type { HonoContext } from '../types/hono';
+import { isAnonymousContext } from '../lib/anonymous';
+import { isFreeModel, isDataCollectionRequiredOnKiloCodeOnly } from '../lib/models';
+import {
+  getBalanceAndOrgSettings,
+  checkOrganizationModelRestrictions,
+} from '../lib/org-restrictions';
+import { isActiveReviewPromo, isActiveCloudAgentPromo } from '../lib/promotions';
+import { getWorkerDb, type WorkerDb } from '@kilocode/db/client';
+import { and, eq, gt, notExists, sql } from 'drizzle-orm';
+import { credit_transactions, kilo_pass_issuance_items } from '@kilocode/db/schema';
+
+// Mirrors summarizeUserPayments() in src/lib/creditTransactions.ts.
+// Returns true if the user has made at least one paid (non-free) top-up,
+// excluding KiloPass bonus credits (which are linked via kilo_pass_issuance_items).
+async function hasUserMadePaidTopup(db: WorkerDb, userId: string): Promise<boolean> {
+  const [row] = await db
+    .select({ count: sql<number>`count(*)::int` })
+    .from(credit_transactions)
+    .where(
+      and(
+        eq(credit_transactions.kilo_user_id, userId),
+        eq(credit_transactions.is_free, false),
+        gt(credit_transactions.amount_microdollars, 0),
+        notExists(
+          db
+            .select({ id: kilo_pass_issuance_items.id })
+            .from(kilo_pass_issuance_items)
+            .where(eq(kilo_pass_issuance_items.credit_transaction_id, credit_transactions.id))
+        )
+      )
+    );
+  return (row?.count ?? 0) > 0;
+}
+
+function isFreePromptTrainingAllowed(
+  provider: { data_collection?: 'allow' | 'deny' } | undefined
+): boolean {
+  return provider?.data_collection !== 'deny';
+}
+
+export const balanceAndOrgCheckMiddleware: MiddlewareHandler<HonoContext> = async (c, next) => {
+  const user = c.get('user');
+  const resolvedModel = c.get('resolvedModel');
+  const organizationId = c.get('organizationId');
+  const customLlm = c.get('customLlm');
+  const userByok = c.get('userByok');
+  const botId = c.get('botId');
+  const tokenSource = c.get('tokenSource');
+  const requestBody = c.get('requestBody');
+
+  // Anonymous users only access free models, already rate-limited in earlier middleware
+  if (isAnonymousContext(user)) {
+    await next();
+    return;
+  }
+
+  // Custom LLM when the org has explicitly configured it — bypass access checks
+  const bypassForCustomLlm =
+    !!customLlm && !!organizationId && customLlm.organization_ids.includes(organizationId);
+  if (bypassForCustomLlm) {
+    await next();
+    return;
+  }
+
+  const db = getWorkerDb(c.env.HYPERDRIVE.connectionString);
+  const { balance, settings, plan } = await getBalanceAndOrgSettings(db, organizationId, user);
+
+  // Balance check for paid models
+  if (
+    balance <= 0 &&
+    !isFreeModel(resolvedModel) &&
+    !userByok &&
+    !isActiveReviewPromo(botId, resolvedModel) &&
+    !isActiveCloudAgentPromo(tokenSource, resolvedModel)
+  ) {
+    // Mirror usageLimitExceededResponse(): branch on payment history to choose title/message.
+    const isReturningUser = await hasUserMadePaidTopup(db, user.id);
+    const title = isReturningUser ? 'Low Credit Warning!' : 'Paid Model - Credits Required';
+    // The reference calls FIRST_TOPUP_BONUS_AMOUNT() which returns 20 (the XL promo
+    // deadline of 2025-10-14 has passed). If that constant ever changes, update here.
+    const FIRST_TOPUP_BONUS = 20;
+    const message = isReturningUser
+      ? 'Add credits to continue, or switch to a free model'
+      : `This is a paid model. To use paid models, you need to add credits. Get $${FIRST_TOPUP_BONUS} free on your first topup!`;
+    return c.json(
+      { error: { title, message, balance, buyCreditsUrl: 'https://app.kilo.ai/profile' } },
+      402
+    );
+  }
+
+  // Organization model and provider restrictions
+  const { error: restrictionError, providerConfig } = checkOrganizationModelRestrictions({
+    modelId: resolvedModel,
+    settings,
+    organizationPlan: plan,
+  });
+
+  if (restrictionError) {
+    // The reference modelNotAllowedResponse() uses distinct error/message values.
+    return c.json(
+      {
+        error: restrictionError.message,
+        message: 'The requested model is not allowed for your team.',
+      },
+      restrictionError.status
+    );
+  }
+
+  // Apply provider config from org restrictions to the request body before data-collection check
+  if (providerConfig) {
+    requestBody.provider = providerConfig;
+  }
+
+  // Data collection check — Kilo free models require prompt training unless org explicitly denies
+  if (
+    isDataCollectionRequiredOnKiloCodeOnly(resolvedModel) &&
+    !isFreePromptTrainingAllowed(requestBody.provider)
+  ) {
+    const error =
+      'Data collection is required for this model. Please enable data collection to use this model or choose another model.';
+    return c.json({ error, message: error }, 400);
+  }
+
+  await next();
+};
diff --git a/llm-gateway/src/middleware/extract-ip.ts b/llm-gateway/src/middleware/extract-ip.ts
new file mode 100644
index 000000000..3c08de554
--- /dev/null
+++ b/llm-gateway/src/middleware/extract-ip.ts
@@ -0,0 +1,25 @@
+import { createMiddleware } from 'hono/factory';
+import type { HonoContext } from '../types/hono';
+
+const MAX_HEADER_LENGTH = 500;
+
+function limitLength(value: string | null | undefined): string | null {
+  if (!value) return null;
+  return value.slice(0, MAX_HEADER_LENGTH).trim() || null;
+}
+
+export const extractIpMiddleware = createMiddleware<HonoContext>(async (c, next) => {
+  // CF-Connecting-IP is the authoritative source on Cloudflare Workers
+  const cfIp = c.req.header('CF-Connecting-IP');
+  const xffIp = c.req.header('x-forwarded-for')?.split(',')[0]?.trim();
+  const clientIp = cfIp ?? xffIp;
+
+  if (!clientIp) {
+    return c.json({ error: 'Unable to determine client IP' }, 400);
+  }
+
+  c.set('clientIp', clientIp);
+  c.set('modeHeader', limitLength(c.req.header('x-kilocode-mode')));
+
+  await next();
+});
diff --git a/llm-gateway/src/middleware/free-model-rate-limit.ts b/llm-gateway/src/middleware/free-model-rate-limit.ts
new file mode 100644
index 000000000..1f94c939b
--- /dev/null
+++ b/llm-gateway/src/middleware/free-model-rate-limit.ts
@@ -0,0 +1,25 @@
+import { createMiddleware } from 'hono/factory';
+import type { HonoContext } from '../types/hono';
+import { isKiloFreeModel } from '../lib/models';
+import { checkFreeModelRateLimit } from '../lib/rate-limit';
+
+// Applies to ALL requests for Kilo-hosted free models (both anonymous and authenticated).
+export const freeModelRateLimitMiddleware = createMiddleware<HonoContext>(async (c, next) => {
+  if (!isKiloFreeModel(c.get('resolvedModel'))) {
+    return next();
+  }
+
+  const result = await checkFreeModelRateLimit(c.env, c.get('clientIp'));
+  if (!result.allowed) {
+    return c.json(
+      {
+        error: 'Rate limit exceeded',
+        message:
+          'Free model usage limit reached. Please try again later or upgrade to a paid model.',
+      },
+      429
+    );
+  }
+
+  return next();
+});
diff --git a/llm-gateway/src/middleware/log-free-model-usage.ts b/llm-gateway/src/middleware/log-free-model-usage.ts
new file mode 100644
index 000000000..b8a4ffcf0
--- /dev/null
+++ b/llm-gateway/src/middleware/log-free-model-usage.ts
@@ -0,0 +1,65 @@
+import { createMiddleware } from 'hono/factory';
+import type { HonoContext } from '../types/hono';
+import { isKiloFreeModel } from '../lib/models';
+import { isAnonymousContext } from '../lib/anonymous';
+import { incrementFreeModelUsage, incrementPromotionUsage } from '../lib/rate-limit';
+import { getWorkerDb } from '@kilocode/db/client';
+import { free_model_usage } from '@kilocode/db/schema';
+
+// Runs after rate limit + auth checks pass.
+//
+// The DB insert into free_model_usage is awaited synchronously (before the
+// upstream request), matching the reference implementation (route.ts:220)
+// where `await logFreeModelRequest(...)` runs before processing. This ensures
+// the rate-limit entry is counted even if the upstream request fails.
+//
+// DO increments are non-blocking — they're a worker-specific optimization.
+export const logFreeModelUsageMiddleware = createMiddleware<HonoContext>(async (c, next) => {
+  const resolvedModel = c.get('resolvedModel');
+
+  // Only log for Kilo-hosted free models, matching the reference implementation.
+  // OpenRouter :free suffix models are not tracked in free_model_usage.
+  if (!isKiloFreeModel(resolvedModel)) {
+    return next();
+  }
+
+  const ip = c.get('clientIp');
+  const user = c.get('user');
+  const kiloUserId = isAnonymousContext(user) ? undefined : user.id;
+
+  // DB insert — awaited before processing, matching the reference.
+  try {
+    const db = getWorkerDb(c.env.HYPERDRIVE.connectionString);
+    await db.insert(free_model_usage).values({
+      ip_address: ip,
+      model: resolvedModel,
+      kilo_user_id: kiloUserId ?? null,
+    });
+  } catch (err) {
+    console.error('[logFreeModelUsageMiddleware] DB insert failed', err);
+  }
+
+  // DO increments — non-blocking, worker-specific optimization.
+  c.executionCtx.waitUntil(
+    Promise.all([
+      (async () => {
+        try {
+          await incrementFreeModelUsage(c.env, ip);
+        } catch (err) {
+          console.error('[logFreeModelUsageMiddleware] DO increment failed', err);
+        }
+      })(),
+      (async () => {
+        try {
+          if (isAnonymousContext(user)) {
+            await incrementPromotionUsage(c.env, ip);
+          }
+        } catch (err) {
+          console.error('[logFreeModelUsageMiddleware] promotion DO increment failed', err);
+        }
+      })(),
+    ])
+  );
+
+  return next();
+});
diff --git a/llm-gateway/src/middleware/parse-body.ts b/llm-gateway/src/middleware/parse-body.ts
new file mode 100644
index 000000000..55d1e0110
--- /dev/null
+++ b/llm-gateway/src/middleware/parse-body.ts
@@ -0,0 +1,43 @@
+import { createMiddleware } from 'hono/factory';
+import type { HonoContext } from '../types/hono';
+import { validateFeatureHeader, FEATURE_HEADER } from '../lib/feature-detection';
+import type { OpenRouterChatCompletionRequest } from '../types/request';
+import { captureException } from '../lib/sentry';
+
+export const parseBodyMiddleware = createMiddleware<HonoContext>(async (c, next) => {
+  let body: OpenRouterChatCompletionRequest;
+  try {
+    body = await c.req.json<OpenRouterChatCompletionRequest>();
+  } catch (err) {
+    captureException(err, { source: 'llm-gateway-parse-body' });
+    return c.json(
+      {
+        error: 'Invalid request',
+        message: 'Could not parse request body. Please ensure it is valid JSON.',
+      },
+      400
+    );
+  }
+
+  // OpenRouter-specific field that we do not support
+  delete body.models;
+
+  if (typeof body.model !== 'string' || body.model.trim().length === 0) {
+    return c.json(
+      { error: 'Model not found', message: 'The requested model could not be found.' },
+      404
+    );
+  }
+
+  // Ensure usage is always returned so background accounting can parse it
+  body.stream_options = { ...(body.stream_options ?? {}), include_usage: true };
+
+  const feature = validateFeatureHeader(c.req.header(FEATURE_HEADER) ?? null);
+  const resolvedModel = body.model.trim().toLowerCase();
+
+  c.set('requestBody', body);
+  c.set('resolvedModel', resolvedModel);
+  c.set('feature', feature);
+
+  await next();
+});
diff --git a/llm-gateway/src/middleware/promotion-limit.ts b/llm-gateway/src/middleware/promotion-limit.ts
new file mode 100644
index 000000000..42d97b64c
--- /dev/null
+++ b/llm-gateway/src/middleware/promotion-limit.ts
@@ -0,0 +1,32 @@
+import { createMiddleware } from 'hono/factory';
+import type { HonoContext } from '../types/hono';
+import { isAnonymousContext } from '../lib/anonymous';
+import { checkPromotionLimit } from '../lib/rate-limit';
+
+const PROMOTION_MODEL_LIMIT_REACHED = 'PROMOTION_MODEL_LIMIT_REACHED';
+
+// Anonymous users are limited to PROMOTION_MAX_REQUESTS per 24h window.
+// Authenticated users skip this check entirely.
+export const promotionLimitMiddleware = createMiddleware<HonoContext>(async (c, next) => {
+  const user = c.get('user');
+  if (!isAnonymousContext(user)) {
+    return next();
+  }
+
+  const result = await checkPromotionLimit(c.env, c.get('clientIp'));
+  if (!result.allowed) {
+    return c.json(
+      {
+        error: {
+          code: PROMOTION_MODEL_LIMIT_REACHED,
+          message:
+            'Sign up for free to continue and explore 500 other models. ' +
+            'Takes 2 minutes, no credit card required. Or come back later.',
+        },
+      },
+      401
+    );
+  }
+
+  return next();
+});
diff --git a/llm-gateway/src/middleware/provider-resolution.ts b/llm-gateway/src/middleware/provider-resolution.ts
new file mode 100644
index 000000000..234f17a59
--- /dev/null
+++ b/llm-gateway/src/middleware/provider-resolution.ts
@@ -0,0 +1,65 @@
+import { createMiddleware } from 'hono/factory';
+import type { HonoContext } from '../types/hono';
+import { getProvider } from '../lib/providers';
+import type { SecretsBundle } from '../lib/providers';
+import { getWorkerDb } from '@kilocode/db/client';
+
+// Resolves API keys from Secrets Store, then determines which provider to route to.
+// Sets provider, userByok, and customLlm on the Hono context.
+export const providerResolutionMiddleware = createMiddleware<HonoContext>(async (c, next) => {
+  // Pre-fetch all secrets in parallel to avoid serial Secrets Store round-trips
+  const [
+    openrouterApiKey,
+    gigapotatoApiKey,
+    corethinkApiKey,
+    martianApiKey,
+    mistralApiKey,
+    vercelAiGatewayApiKey,
+    byokEncryptionKey,
+    gigapotatoApiUrl,
+  ] = await Promise.all([
+    c.env.OPENROUTER_API_KEY.get(),
+    c.env.GIGAPOTATO_API_KEY.get(),
+    c.env.CORETHINK_API_KEY.get(),
+    c.env.MARTIAN_API_KEY.get(),
+    c.env.MISTRAL_API_KEY.get(),
+    c.env.VERCEL_AI_GATEWAY_API_KEY.get(),
+    c.env.BYOK_ENCRYPTION_KEY.get(),
+    c.env.GIGAPOTATO_API_URL.get(),
+  ]);
+
+  const secrets: SecretsBundle = {
+    openrouterApiKey,
+    gigapotatoApiKey,
+    gigapotatoApiUrl,
+    corethinkApiKey,
+    martianApiKey,
+    mistralApiKey,
+    vercelAiGatewayApiKey,
+    byokEncryptionKey,
+  };
+
+  const db = getWorkerDb(c.env.HYPERDRIVE.connectionString);
+
+  // Random seed for Vercel A/B routing — same as reference: taskId || user.id
+  const taskId = c.req.header('x-kilocode-taskid') ?? undefined;
+  const user = c.get('user');
+  const randomSeed = taskId ?? user.id;
+
+  const { provider, userByok, customLlm } = await getProvider(
+    db,
+    c.get('resolvedModel'),
+    c.get('requestBody'),
+    user,
+    c.get('organizationId'),
+    secrets,
+    randomSeed
+  );
+
+  c.set('provider', provider);
+  c.set('userByok', userByok);
+  c.set('customLlm', customLlm);
+  c.set('secrets', secrets);
+
+  return next();
+});
diff --git a/llm-gateway/src/middleware/request-timing.ts b/llm-gateway/src/middleware/request-timing.ts
new file mode 100644
index 000000000..8fb64f143
--- /dev/null
+++ b/llm-gateway/src/middleware/request-timing.ts
@@ -0,0 +1,7 @@
+import { createMiddleware } from 'hono/factory';
+import type { HonoContext } from '../types/hono';
+
+export const requestTimingMiddleware = createMiddleware<HonoContext>(async (c, next) => {
+  c.set('requestStartedAt', performance.now());
+  await next();
+});
diff --git a/llm-gateway/src/middleware/request-transform.ts b/llm-gateway/src/middleware/request-transform.ts
new file mode 100644
index 000000000..e12c8db48
--- /dev/null
+++ b/llm-gateway/src/middleware/request-transform.ts
@@ -0,0 +1,66 @@
+// Request transformation — the final mutation pass before the upstream fetch.
+//
+// Sets:
+//   1. requestBody.safety_identifier + requestBody.user (provider-specific SHA-256 hash)
+//   2. requestBody.prompt_cache_key (if taskId header present)
+//   3. Repairs malformed tool schemas (ENABLE_TOOL_REPAIR flag)
+//   4. Applies provider-specific mutations (Anthropic, xAI, Mistral, etc.)
+//
+// Also extracts per-request header values and stores them on context for
+// background tasks (fraudHeaders, projectId, taskId, etc.).
+
+import type { MiddlewareHandler } from 'hono';
+import type { HonoContext } from '../types/hono';
+import { generateProviderSpecificHash } from '../lib/provider-hash';
+import { ENABLE_TOOL_REPAIR, repairTools } from '../lib/tool-calling';
+import { applyProviderSpecificLogic } from '../lib/provider-specific';
+import { extractProjectHeaders } from '../lib/extract-headers';
+
+export const requestTransformMiddleware: MiddlewareHandler<HonoContext> = async (c, next) => {
+  const requestBody = c.get('requestBody');
+  const provider = c.get('provider');
+  const user = c.get('user');
+  const userByok = c.get('userByok');
+
+  // Extract per-request headers + CF geo data (stored for background tasks)
+  const projectHeaders = extractProjectHeaders(c.req.raw.headers, c.req.raw.cf);
+  c.set('fraudHeaders', projectHeaders.fraudHeaders);
+  c.set('projectId', projectHeaders.projectId);
+  c.set('taskId', projectHeaders.taskId);
+  c.set('editorName', projectHeaders.editorName);
+  c.set('machineId', projectHeaders.machineId);
+  c.set('xKiloCodeVersion', projectHeaders.xKiloCodeVersion);
+  c.set('numericKiloCodeVersion', projectHeaders.numericKiloCodeVersion);
+
+  // safety_identifier — hash of userId, provider-specific salt
+  const safetyIdentifier = await generateProviderSpecificHash(user.id, provider);
+  requestBody.safety_identifier = safetyIdentifier;
+  // Deprecated field still expected by OpenRouter
+  requestBody.user = safetyIdentifier;
+
+  // prompt_cache_key — hash of userId+taskId when a task session is present
+  if (projectHeaders.taskId) {
+    requestBody.prompt_cache_key = await generateProviderSpecificHash(
+      user.id + projectHeaders.taskId,
+      provider
+    );
+  }
+
+  // Tool repair — fix malformed tool schemas before sending upstream
+  if (ENABLE_TOOL_REPAIR) {
+    repairTools(requestBody);
+  }
+
+  // Provider-specific mutations (Anthropic beta header, Mistral tool normalization, etc.)
+  const extraHeaders: Record<string, string> = {};
+  await applyProviderSpecificLogic(
+    provider,
+    c.get('resolvedModel'),
+    requestBody,
+    extraHeaders,
+    userByok
+  );
+  c.set('extraHeaders', extraHeaders);
+
+  await next();
+};
diff --git a/llm-gateway/src/middleware/request-validation.ts b/llm-gateway/src/middleware/request-validation.ts
new file mode 100644
index 000000000..085576646
--- /dev/null
+++ b/llm-gateway/src/middleware/request-validation.ts
@@ -0,0 +1,45 @@
+// Request validation — checks max_tokens, dead free models, and rate-limited-to-death models.
+// These checks happen after provider resolution but before balance/org checks.
+
+import type { MiddlewareHandler } from 'hono';
+import type { HonoContext } from '../types/hono';
+import { isDeadFreeModel, isRateLimitedToDeath } from '../lib/models';
+
+const MAX_TOKENS_LIMIT = 99_999_999_999;
+
+export const requestValidationMiddleware: MiddlewareHandler<HonoContext> = async (c, next) => {
+  const body = c.get('requestBody');
+  const resolvedModel = c.get('resolvedModel');
+  const user = c.get('user');
+
+  const maxCompletionTokens =
+    typeof body.max_completion_tokens === 'number' ? body.max_completion_tokens : undefined;
+  const maxTokens = body.max_tokens ?? maxCompletionTokens;
+  if (maxTokens && maxTokens > MAX_TOKENS_LIMIT) {
+    console.warn(`SECURITY: Max tokens limit exceeded: ${user.id}`, { maxTokens });
+    return c.json(
+      {
+        error: 'Service Unavailable',
+        message: 'The service is temporarily unavailable. Please try again later.',
+      },
+      503
+    );
+  }
+
+  if (isDeadFreeModel(resolvedModel)) {
+    const error = 'The alpha period for this model has ended.';
+    return c.json({ error, message: error }, 404);
+  }
+
+  if (isRateLimitedToDeath(resolvedModel)) {
+    return c.json(
+      {
+        error: 'Model not found',
+        message: 'The requested model could not be found.',
+      },
+      404
+    );
+  }
+
+  await next();
+};
diff --git a/llm-gateway/src/middleware/resolve-auto-model.ts b/llm-gateway/src/middleware/resolve-auto-model.ts
new file mode 100644
index 000000000..be5c83775
--- /dev/null
+++ b/llm-gateway/src/middleware/resolve-auto-model.ts
@@ -0,0 +1,24 @@
+import { createMiddleware } from 'hono/factory';
+import type { HonoContext } from '../types/hono';
+import { isKiloAutoModel, resolveAutoModel } from '../lib/kilo-auto-model';
+
+export const resolveAutoModelMiddleware = createMiddleware<HonoContext>(async (c, next) => {
+  const body = c.get('requestBody');
+  const resolvedModel = c.get('resolvedModel');
+
+  if (isKiloAutoModel(resolvedModel)) {
+    const modeHeader = c.get('modeHeader');
+    const resolved = resolveAutoModel(resolvedModel, modeHeader);
+
+    // Save original kilo/auto* id before overwriting
+    c.set('autoModel', resolvedModel);
+
+    // Merge resolved fields into request body so downstream sees the real model
+    Object.assign(body, resolved);
+    c.set('resolvedModel', resolved.model.toLowerCase());
+  } else {
+    c.set('autoModel', null);
+  }
+
+  await next();
+});
diff --git a/llm-gateway/src/o11y-binding.d.ts b/llm-gateway/src/o11y-binding.d.ts
new file mode 100644
index 000000000..18976f765
--- /dev/null
+++ b/llm-gateway/src/o11y-binding.d.ts
@@ -0,0 +1,5 @@
+import type { ApiMetricsParams } from '@kilocode/worker-utils';
+
+export type O11YBinding = Fetcher & {
+  ingestApiMetrics(params: ApiMetricsParams): Promise<void>;
+};
diff --git a/llm-gateway/src/types/hono.ts b/llm-gateway/src/types/hono.ts
new file mode 100644
index 000000000..8669a375a
--- /dev/null
+++ b/llm-gateway/src/types/hono.ts
@@ -0,0 +1,62 @@
+import type { User } from '@kilocode/db';
+import type { CustomLlm } from '@kilocode/db/schema';
+import type { Env } from '../env';
+import type { AnonymousUserContext } from '../lib/anonymous';
+import type { FeatureValue } from '../lib/feature-detection';
+import type { OpenRouterChatCompletionRequest } from './request';
+import type { Provider, SecretsBundle } from '../lib/providers';
+import type { BYOKResult } from '../lib/byok';
+import type { FraudDetectionHeaders } from '../lib/extract-headers';
+
+// Hono app context — bindings + all middleware variables.
+export type HonoContext = {
+  Bindings: Env;
+  Variables: Variables;
+};
+
+// Values set via c.set() / c.get() across the middleware chain.
+// Each key is populated by the middleware named in the comment.
+export type Variables = {
+  // request-timing.ts
+  requestStartedAt: number;
+
+  // parse-body.ts
+  requestBody: OpenRouterChatCompletionRequest;
+  resolvedModel: string; // lowercased, after auto-resolution
+  feature: FeatureValue | null;
+
+  // extract-ip.ts
+  clientIp: string;
+  modeHeader: string | null;
+
+  // resolve-auto-model.ts
+  autoModel: string | null; // original kilo/auto* id, null when not an auto model
+
+  // auth.ts — set on successful JWT verification + DB lookup; undefined if auth failed/absent.
+  // anonymous-gate.ts reads authUser to decide whether to allow anonymous access or return 401.
+  authUser?: User;
+  organizationId?: string;
+  botId?: string;
+  tokenSource?: string;
+
+  // anonymous-gate.ts — always set once this middleware runs
+  user: User | AnonymousUserContext;
+
+  // provider-resolution.ts — set after secrets are fetched and provider is chosen
+  provider: Provider;
+  userByok: BYOKResult[] | null;
+  customLlm: CustomLlm | null;
+  secrets: SecretsBundle;
+
+  // request-transform.ts — extracted from request headers, stored for background tasks
+  fraudHeaders: FraudDetectionHeaders;
+  projectId: string | null;
+  taskId: string | null;
+  editorName: string | null;
+  machineId: string | null;
+  xKiloCodeVersion: string | null;
+  numericKiloCodeVersion: number;
+
+  // request-transform.ts — extra headers to forward to the upstream provider
+  extraHeaders: Record<string, string>;
+};
diff --git a/llm-gateway/src/types/index.ts b/llm-gateway/src/types/index.ts
new file mode 100644
index 000000000..d9340ec25
--- /dev/null
+++ b/llm-gateway/src/types/index.ts
@@ -0,0 +1,2 @@
+export type { HonoContext, Variables } from './hono';
+export type { OpenRouterChatCompletionRequest, ChatMessage } from './request';
diff --git a/llm-gateway/src/types/request.ts b/llm-gateway/src/types/request.ts
new file mode 100644
index 000000000..b8ad31fbb
--- /dev/null
+++ b/llm-gateway/src/types/request.ts
@@ -0,0 +1,30 @@
+// OpenRouter-compatible chat completion request shape.
+// Intentionally loose — unknown fields are passed through to upstream.
+
+export type OpenRouterChatCompletionRequest = {
+  model: string;
+  messages: ChatMessage[];
+  stream?: boolean;
+  stream_options?: { include_usage?: boolean };
+  max_tokens?: number;
+  tools?: unknown[];
+  transforms?: string[];
+  provider?: {
+    order?: string[];
+    only?: string[];
+    data_collection?: 'allow' | 'deny';
+    zdr?: boolean;
+  };
+  reasoning?: { effort?: string; max_tokens?: number; exclude?: boolean; enabled?: boolean };
+  verbosity?: string;
+  prompt_cache_key?: string;
+  safety_identifier?: string;
+  user?: string;
+  [key: string]: unknown;
+};
+
+export type ChatMessage = {
+  role: string;
+  content: string | unknown[];
+  [key: string]: unknown;
+};
diff --git a/llm-gateway/test/integration/_setup.ts b/llm-gateway/test/integration/_setup.ts
new file mode 100644
index 000000000..ccfb01223
--- /dev/null
+++ b/llm-gateway/test/integration/_setup.ts
@@ -0,0 +1,157 @@
+// Shared test infrastructure for integration tests.
+// Re-exports helpers from unit tests and adds dispatch + DB mock + fixtures.
+
+export {
+  signToken,
+  makeEnv,
+  fakeExecutionCtx,
+  chatRequest,
+  makeSSEStream,
+  sseChunk,
+  sseDone,
+  readSSEEvents,
+  TEST_SECRET,
+} from '../unit/helpers';
+
+// ── Dispatch helper ───────────────────────────────────────────────────────────
+// Dynamically imports the worker and calls its fetch method.
+
+import { makeEnv, fakeExecutionCtx } from '../unit/helpers';
+
+export async function dispatch(
+  req: Request,
+  envOverrides: Partial<Record<string, unknown>> = {}
+) {
+  const { default: worker } = await import('../../src/index');
+  const env = makeEnv(envOverrides);
+  return worker.fetch(req, env, fakeExecutionCtx());
+}
+
+// ── User fixtures ─────────────────────────────────────────────────────────────
+
+export const VALID_USER = {
+  id: 'user-1',
+  google_user_email: 'test@example.com',
+  api_token_pepper: null as string | null,
+  total_microdollars_acquired: 10_000_000, // $10
+  microdollars_used: 0,
+  is_admin: false,
+};
+
+export const VALID_USER_ZERO_BALANCE = {
+  ...VALID_USER,
+  id: 'user-zero',
+  total_microdollars_acquired: 0,
+  microdollars_used: 0,
+};
+
+export const VALID_USER_NEW = {
+  ...VALID_USER_ZERO_BALANCE,
+  id: 'user-new',
+};
+
+// ── Drizzle table name helper ─────────────────────────────────────────────────
+// Drizzle table objects store the SQL table name under Symbol.for('drizzle:Name').
+
+const DRIZZLE_NAME = Symbol.for('drizzle:Name');
+
+export function getTableName(table: unknown): string {
+  if (table && typeof table === 'object' && DRIZZLE_NAME in table) {
+    return (table as Record<symbol, string>)[DRIZZLE_NAME] ?? '';
+  }
+  return '';
+}
+
+// ── DB mock query chain helper ────────────────────────────────────────────────
+// Creates a thenable-proxy that supports arbitrary drizzle method chaining
+// (.where, .limit, .orderBy, .innerJoin, .leftJoin, etc.) and resolves to
+// `result` when awaited.
+
+export function chainResult(result: unknown) {
+  const resolved = Promise.resolve(result);
+  const proxy: unknown = new Proxy(Function, {
+    get(_target, prop) {
+      // Make the proxy thenable — when awaited, resolve to `result`
+      if (prop === 'then') return resolved.then.bind(resolved);
+      if (prop === 'catch') return resolved.catch.bind(resolved);
+      if (prop === 'finally') return resolved.finally.bind(resolved);
+      // All other method calls return the same chainable proxy
+      return () => proxy;
+    },
+    apply() {
+      return proxy;
+    },
+  });
+  return proxy;
+}
+
+// ── Standard module mocks ─────────────────────────────────────────────────────
+// Common mock definitions reused across test files.
+
+export const WORKER_UTILS_MOCK = {
+  userExistsWithCache: async () => true,
+  extractBearerToken: (header: string | undefined) => {
+    if (!header) return null;
+    const parts = header.split(' ');
+    return parts.length === 2 && parts[0].toLowerCase() === 'bearer' ? parts[1] : null;
+  },
+  verifyKiloToken: async () => {
+    throw new Error('should not be called directly');
+  },
+};
+
+export const ABUSE_SERVICE_MOCK = {
+  classifyAbuse: async () => null,
+  reportAbuseCost: async () => null,
+};
+
+export const ENCRYPTION_MOCK = {
+  timingSafeEqual: (a: string, b: string) => a === b,
+};
+
+// ── DO namespace factory ──────────────────────────────────────────────────────
+
+export function makeFakeDONamespace(opts: {
+  freeModelBlocked?: Set<string>;
+  promotionBlocked?: Set<string>;
+} = {}) {
+  const freeModelBlocked = opts.freeModelBlocked ?? new Set();
+  const promotionBlocked = opts.promotionBlocked ?? new Set();
+
+  const createStub = (ip: string) => ({
+    checkFreeModel: async () => ({
+      allowed: !freeModelBlocked.has(ip),
+      requestCount: freeModelBlocked.has(ip) ? 200 : 0,
+    }),
+    checkPromotion: async () => ({
+      allowed: !promotionBlocked.has(ip),
+      requestCount: promotionBlocked.has(ip) ? 10000 : 0,
+    }),
+    incrementFreeModel: async () => {},
+    incrementPromotion: async () => {},
+  });
+
+  let lastIp = '0.0.0.0';
+
+  return {
+    idFromName(name: string) {
+      lastIp = name;
+      return {} as DurableObjectId;
+    },
+    newUniqueId() {
+      return {} as DurableObjectId;
+    },
+    idFromString() {
+      return {} as DurableObjectId;
+    },
+    getByName(name: string) {
+      return createStub(name) as unknown as DurableObjectStub;
+    },
+    get() {
+      return createStub(lastIp) as unknown as DurableObjectStub;
+    },
+    jurisdiction() {
+      return this;
+    },
+  } as unknown as Cloudflare.Env['RATE_LIMIT_DO'];
+}
diff --git a/llm-gateway/test/integration/anonymous-gate.test.ts b/llm-gateway/test/integration/anonymous-gate.test.ts
new file mode 100644
index 000000000..8189eb680
--- /dev/null
+++ b/llm-gateway/test/integration/anonymous-gate.test.ts
@@ -0,0 +1,53 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { dispatch, chatRequest, chainResult } from './_setup';
+
+vi.mock('@kilocode/db/client', () => ({
+  getWorkerDb: () => ({
+    select: () => ({
+      from: () => chainResult([]),
+    }),
+    insert: () => chainResult([]),
+    execute: () => Promise.resolve({ rows: [] }),
+  }),
+}));
+
+vi.mock('@kilocode/worker-utils', () => ({
+  userExistsWithCache: async () => true,
+  extractBearerToken: (header: string | undefined) => {
+    if (!header) return null;
+    const parts = header.split(' ');
+    return parts.length === 2 && parts[0].toLowerCase() === 'bearer' ? parts[1] : null;
+  },
+  verifyKiloToken: async () => {
+    throw new Error('should not be called directly');
+  },
+}));
+
+vi.mock('../../src/lib/abuse-service', () => ({
+  classifyAbuse: async () => null,
+  reportAbuseCost: async () => null,
+}));
+
+const realFetch = globalThis.fetch;
+beforeEach(() => {
+  globalThis.fetch = vi.fn();
+});
+afterEach(() => {
+  globalThis.fetch = realFetch;
+  vi.restoreAllMocks();
+});
+
+describe('anonymousGate', () => {
+  it('returns 401 with PAID_MODEL_AUTH_REQUIRED for paid model without auth', async () => {
+    const res = await dispatch(
+      chatRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(401);
+    const body: { error: { code: string; message: string } } = await res.json();
+    expect(body.error.code).toBe('PAID_MODEL_AUTH_REQUIRED');
+    expect(body.error.message).toBe('You need to sign in to use this model.');
+  });
+});
diff --git a/llm-gateway/test/integration/auth.test.ts b/llm-gateway/test/integration/auth.test.ts
new file mode 100644
index 000000000..f3d623398
--- /dev/null
+++ b/llm-gateway/test/integration/auth.test.ts
@@ -0,0 +1,114 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import {
+  dispatch,
+  chatRequest,
+  signToken,
+  VALID_USER,
+  TEST_SECRET,
+  getTableName,
+  chainResult,
+} from './_setup';
+
+// ── Configurable DB ────────────────────────────────────────────────────────────
+
+let _userRows: Record<string, unknown>[] = [];
+
+vi.mock('@kilocode/db/client', () => ({
+  getWorkerDb: () => ({
+    select: () => ({
+      from: (table: unknown) => {
+        const name = getTableName(table);
+        if (name === 'kilocode_users') return chainResult(_userRows);
+        return chainResult([]);
+      },
+    }),
+    insert: () => chainResult([]),
+    execute: () => Promise.resolve({ rows: [] }),
+  }),
+}));
+
+vi.mock('@kilocode/worker-utils', () => ({
+  userExistsWithCache: async () => true,
+  extractBearerToken: (header: string | undefined) => {
+    if (!header) return null;
+    const parts = header.split(' ');
+    return parts.length === 2 && parts[0].toLowerCase() === 'bearer' ? parts[1] : null;
+  },
+  verifyKiloToken: async (token: string, secret: string) => {
+    const { jwtVerify } = await import('jose');
+    const { payload } = await jwtVerify(token, new TextEncoder().encode(secret));
+    return payload as Record<string, unknown>;
+  },
+}));
+
+vi.mock('@kilocode/encryption', () => ({
+  timingSafeEqual: (a: string, b: string) => a === b,
+}));
+
+vi.mock('../../src/lib/abuse-service', () => ({
+  classifyAbuse: async () => null,
+  reportAbuseCost: async () => null,
+}));
+
+const realFetch = globalThis.fetch;
+beforeEach(() => {
+  _userRows = [];
+  globalThis.fetch = vi.fn();
+});
+afterEach(() => {
+  globalThis.fetch = realFetch;
+  vi.restoreAllMocks();
+});
+
+describe('auth', () => {
+  it('returns 401 for expired/malformed token', async () => {
+    const expiredToken = await signToken({}, TEST_SECRET, '0s');
+    await new Promise(r => setTimeout(r, 10));
+    const res = await dispatch(
+      chatRequest(
+        {
+          model: 'anthropic/claude-sonnet-4-20250514',
+          messages: [{ role: 'user', content: 'hi' }],
+        },
+        { token: expiredToken }
+      )
+    );
+    expect(res.status).toBe(401);
+    const body: { error: { message: string } } = await res.json();
+    expect(body.error.message).toBe('Invalid or expired token');
+  });
+
+  it('returns 401 when user is not found in DB', async () => {
+    _userRows = [];
+    const token = await signToken({ kiloUserId: 'user-nonexistent' });
+    const res = await dispatch(
+      chatRequest(
+        {
+          model: 'anthropic/claude-sonnet-4-20250514',
+          messages: [{ role: 'user', content: 'hi' }],
+        },
+        { token }
+      )
+    );
+    expect(res.status).toBe(401);
+    const body: { error: { message: string } } = await res.json();
+    expect(body.error.message).toBe('User not found');
+  });
+
+  it('returns 401 when pepper does not match', async () => {
+    _userRows = [{ ...VALID_USER, api_token_pepper: 'correct-pepper' }];
+    const token = await signToken({ kiloUserId: 'user-1', apiTokenPepper: 'wrong-pepper' });
+    const res = await dispatch(
+      chatRequest(
+        {
+          model: 'anthropic/claude-sonnet-4-20250514',
+          messages: [{ role: 'user', content: 'hi' }],
+        },
+        { token }
+      )
+    );
+    expect(res.status).toBe(401);
+    const body: { error: { message: string } } = await res.json();
+    expect(body.error.message).toBe('Token has been revoked');
+  });
+});
diff --git a/llm-gateway/test/integration/auto-model.test.ts b/llm-gateway/test/integration/auto-model.test.ts
new file mode 100644
index 000000000..89ad118f3
--- /dev/null
+++ b/llm-gateway/test/integration/auto-model.test.ts
@@ -0,0 +1,176 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { dispatch, chatRequest, signToken, VALID_USER, getTableName, chainResult } from './_setup';
+
+// ── Configurable DB ────────────────────────────────────────────────────────────
+
+let _userRows: Record<string, unknown>[] = [];
+
+vi.mock('@kilocode/db/client', () => ({
+  getWorkerDb: () => ({
+    select: () => ({
+      from: (table: unknown) => {
+        const name = getTableName(table);
+        if (name === 'kilocode_users') return chainResult(_userRows);
+        if (name === 'credit_transactions') return chainResult([{ count: 1 }]);
+        if (name === 'model_user_byok_providers') return chainResult([]);
+        if (name === 'custom_llm') return chainResult([]);
+        if (name === 'organizations') return chainResult([]);
+        if (name === 'models_by_provider') return chainResult([]);
+        return chainResult([]);
+      },
+    }),
+    insert: () => chainResult([]),
+    execute: () => Promise.resolve({ rows: [] }),
+  }),
+}));
+
+vi.mock('@kilocode/worker-utils', () => ({
+  userExistsWithCache: async () => true,
+  extractBearerToken: (header: string | undefined) => {
+    if (!header) return null;
+    const parts = header.split(' ');
+    return parts.length === 2 && parts[0].toLowerCase() === 'bearer' ? parts[1] : null;
+  },
+  verifyKiloToken: async (token: string, secret: string) => {
+    const { jwtVerify } = await import('jose');
+    const { payload } = await jwtVerify(token, new TextEncoder().encode(secret));
+    return payload as Record<string, unknown>;
+  },
+}));
+
+vi.mock('@kilocode/encryption', () => ({
+  timingSafeEqual: (a: string, b: string) => a === b,
+}));
+
+vi.mock('../../src/lib/abuse-service', () => ({
+  classifyAbuse: async () => null,
+  reportAbuseCost: async () => null,
+}));
+
+// Spy on scheduleBackgroundTasks
+const bgTasksSpy = vi.fn();
+vi.mock('../../src/handler/background-tasks', async (importOriginal) => {
+  const mod = await importOriginal();
+  return {
+    ...(mod as Record<string, unknown>),
+    scheduleBackgroundTasks: (...args: unknown[]) => {
+      bgTasksSpy(...args);
+    },
+  };
+});
+
+// Polyfill scheduler.wait for Node
+if (!(globalThis as Record<string, unknown>).scheduler) {
+  (globalThis as Record<string, unknown>).scheduler = {
+    wait: (ms: number) => new Promise(r => setTimeout(r, ms)),
+  };
+}
+
+const realFetch = globalThis.fetch;
+let fetchMock: ReturnType<typeof vi.fn>;
+
+beforeEach(() => {
+  _userRows = [{ ...VALID_USER }];
+  bgTasksSpy.mockClear();
+  fetchMock = vi.fn();
+  globalThis.fetch = fetchMock;
+});
+afterEach(() => {
+  globalThis.fetch = realFetch;
+  vi.restoreAllMocks();
+});
+
+async function authRequest(
+  body: Record<string, unknown>,
+  opts: { headers?: Record<string, string> } = {}
+) {
+  const token = await signToken({ kiloUserId: 'user-1' });
+  return chatRequest(body, { token, ...opts });
+}
+
+function mockUpstream200() {
+  fetchMock.mockResolvedValueOnce(
+    new Response(JSON.stringify({ choices: [{ message: { content: 'ok' } }] }), {
+      status: 200,
+      headers: { 'content-type': 'application/json' },
+    })
+  );
+}
+
+describe('auto-model resolution', () => {
+  it('kilo/auto without mode resolves to code model (claude-sonnet)', async () => {
+    mockUpstream200();
+    const res = await dispatch(
+      await authRequest({
+        model: 'kilo/auto',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    await res.text();
+
+    // The upstream URL should go to openrouter (paid model)
+    const url = fetchMock.mock.calls[0][0] as string;
+    expect(url).toContain('chat/completions');
+
+    // The body should have the resolved model (claude-sonnet)
+    const [, init] = fetchMock.mock.calls[0] as [string, { body: string }];
+    const body = JSON.parse(init.body) as Record<string, unknown>;
+    expect(body.model).toContain('claude-sonnet');
+  });
+
+  it('kilo/auto with x-kilocode-mode: plan resolves to plan model', async () => {
+    mockUpstream200();
+    const res = await dispatch(
+      await authRequest(
+        {
+          model: 'kilo/auto',
+          messages: [{ role: 'user', content: 'hi' }],
+        },
+        { headers: { 'x-kilocode-mode': 'plan' } }
+      )
+    );
+    expect(res.status).toBe(200);
+    await res.text();
+
+    const [, init] = fetchMock.mock.calls[0] as [string, { body: string }];
+    const body = JSON.parse(init.body) as Record<string, unknown>;
+    // plan mode resolves to claude-opus
+    expect(body.model).toContain('claude-opus');
+  });
+
+  it('kilo/auto-free resolves to free model', async () => {
+    mockUpstream200();
+    // kilo/auto-free resolves to minimax/minimax-m2.5:free which is a free model
+    // The anonymous path should work too
+    const res = await dispatch(
+      chatRequest({
+        model: 'kilo/auto-free',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    await res.text();
+
+    const [, init] = fetchMock.mock.calls[0] as [string, { body: string }];
+    const body = JSON.parse(init.body) as Record<string, unknown>;
+    expect(body.model).toContain('minimax');
+  });
+
+  it('kilo/auto sets autoModel in background task params', async () => {
+    mockUpstream200();
+    const res = await dispatch(
+      await authRequest({
+        model: 'kilo/auto',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    await res.text();
+    await new Promise(r => setTimeout(r, 50));
+
+    expect(bgTasksSpy).toHaveBeenCalled();
+    const params = bgTasksSpy.mock.calls[0][1] as Record<string, unknown>;
+    expect(params.autoModel).toBe('kilo/auto');
+  });
+});
diff --git a/llm-gateway/test/integration/background-tasks.test.ts b/llm-gateway/test/integration/background-tasks.test.ts
new file mode 100644
index 000000000..4b9072c32
--- /dev/null
+++ b/llm-gateway/test/integration/background-tasks.test.ts
@@ -0,0 +1,338 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { dispatch, chatRequest, signToken, VALID_USER, getTableName, chainResult } from './_setup';
+
+// ── DB mock ────────────────────────────────────────────────────────────────────
+
+let _userRows: Record<string, unknown>[] = [];
+
+vi.mock('@kilocode/db/client', () => ({
+  getWorkerDb: () => ({
+    select: () => ({
+      from: (table: unknown) => {
+        const name = getTableName(table);
+        if (name === 'kilocode_users') return chainResult(_userRows);
+        if (name === 'credit_transactions') return chainResult([{ count: 1 }]);
+        if (name === 'model_user_byok_providers') return chainResult([]);
+        if (name === 'custom_llm') return chainResult([]);
+        if (name === 'organizations') return chainResult([]);
+        if (name === 'models_by_provider') return chainResult([]);
+        return chainResult([]);
+      },
+    }),
+    insert: () => chainResult([]),
+    execute: () => Promise.resolve({ rows: [] }),
+  }),
+}));
+
+vi.mock('@kilocode/worker-utils', () => ({
+  userExistsWithCache: async () => true,
+  extractBearerToken: (header: string | undefined) => {
+    if (!header) return null;
+    const parts = header.split(' ');
+    return parts.length === 2 && parts[0].toLowerCase() === 'bearer' ? parts[1] : null;
+  },
+  verifyKiloToken: async (token: string, secret: string) => {
+    const { jwtVerify } = await import('jose');
+    const { payload } = await jwtVerify(token, new TextEncoder().encode(secret));
+    return payload as Record<string, unknown>;
+  },
+}));
+
+vi.mock('@kilocode/encryption', () => ({
+  timingSafeEqual: (a: string, b: string) => a === b,
+}));
+
+vi.mock('../../src/lib/abuse-service', () => ({
+  classifyAbuse: async () => null,
+  reportAbuseCost: async () => null,
+}));
+
+// Spy on scheduleBackgroundTasks
+const bgTasksSpy = vi.fn();
+vi.mock('../../src/handler/background-tasks', async (importOriginal) => {
+  const mod = await importOriginal();
+  return {
+    ...(mod as Record<string, unknown>),
+    scheduleBackgroundTasks: (...args: unknown[]) => {
+      bgTasksSpy(...args);
+    },
+  };
+});
+
+// Polyfill scheduler.wait for Node
+if (!(globalThis as Record<string, unknown>).scheduler) {
+  (globalThis as Record<string, unknown>).scheduler = {
+    wait: (ms: number) => new Promise(r => setTimeout(r, ms)),
+  };
+}
+
+const realFetch = globalThis.fetch;
+let fetchMock: ReturnType<typeof vi.fn>;
+
+beforeEach(() => {
+  _userRows = [{ ...VALID_USER }];
+  bgTasksSpy.mockClear();
+  fetchMock = vi.fn();
+  globalThis.fetch = fetchMock;
+});
+afterEach(() => {
+  globalThis.fetch = realFetch;
+  vi.restoreAllMocks();
+});
+
+async function authRequest(body: Record<string, unknown>) {
+  const token = await signToken({ kiloUserId: 'user-1' });
+  return chatRequest(body, { token });
+}
+
+describe('background tasks', () => {
+  it('schedules background tasks on 200 success', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify({ choices: [{ message: { content: 'ok' } }] }), {
+        status: 200,
+        headers: { 'content-type': 'application/json' },
+      })
+    );
+
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    // Consume the body so the stream completes and bg tasks schedule
+    await res.text();
+    // Allow microtask queue to flush
+    await new Promise(r => setTimeout(r, 50));
+
+    expect(bgTasksSpy).toHaveBeenCalled();
+    const params = bgTasksSpy.mock.calls[0][1] as Record<string, unknown>;
+    expect(params.accountingStream).not.toBeNull();
+    expect(params.metricsStream).not.toBeNull();
+    expect(params.loggingStream).not.toBeNull();
+  });
+
+  it('schedules background tasks on 400 error', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify({ error: 'Bad Request' }), {
+        status: 400,
+        headers: { 'content-type': 'application/json' },
+      })
+    );
+
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(400);
+    await res.text();
+    await new Promise(r => setTimeout(r, 50));
+
+    expect(bgTasksSpy).toHaveBeenCalled();
+  });
+
+  it('schedules background tasks before returning 503 for 402→503 conversion', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify({ error: 'Payment Required' }), {
+        status: 402,
+        headers: { 'content-type': 'application/json' },
+      })
+    );
+
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(503);
+
+    expect(bgTasksSpy).toHaveBeenCalled();
+    const params = bgTasksSpy.mock.calls[0][1] as Record<string, unknown>;
+    expect(params.upstreamStatusCode).toBe(402);
+  });
+
+  it('accountingStream is null for anonymous users', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify({ choices: [{ message: { content: 'ok' } }] }), {
+        status: 200,
+        headers: { 'content-type': 'application/json' },
+      })
+    );
+
+    const res = await dispatch(
+      chatRequest({
+        model: 'corethink:free',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    await res.text();
+    await new Promise(r => setTimeout(r, 50));
+
+    expect(bgTasksSpy).toHaveBeenCalled();
+    const params = bgTasksSpy.mock.calls[0][1] as Record<string, unknown>;
+    expect(params.accountingStream).toBeNull();
+  });
+
+  it('loggingStream is null for anonymous users', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify({ choices: [{ message: { content: 'ok' } }] }), {
+        status: 200,
+        headers: { 'content-type': 'application/json' },
+      })
+    );
+
+    const res = await dispatch(
+      chatRequest({
+        model: 'corethink:free',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    await res.text();
+    await new Promise(r => setTimeout(r, 50));
+
+    expect(bgTasksSpy).toHaveBeenCalled();
+    const params = bgTasksSpy.mock.calls[0][1] as Record<string, unknown>;
+    expect(params.loggingStream).toBeNull();
+  });
+
+  it('metricsStream is non-null for anonymous users', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify({ choices: [{ message: { content: 'ok' } }] }), {
+        status: 200,
+        headers: { 'content-type': 'application/json' },
+      })
+    );
+
+    const res = await dispatch(
+      chatRequest({
+        model: 'corethink:free',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    await res.text();
+    await new Promise(r => setTimeout(r, 50));
+
+    expect(bgTasksSpy).toHaveBeenCalled();
+    const params = bgTasksSpy.mock.calls[0][1] as Record<string, unknown>;
+    expect(params.metricsStream).not.toBeNull();
+  });
+
+  it('accountingStream is non-null for authenticated 200', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify({ choices: [{ message: { content: 'ok' } }] }), {
+        status: 200,
+        headers: { 'content-type': 'application/json' },
+      })
+    );
+
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    await res.text();
+    await new Promise(r => setTimeout(r, 50));
+
+    expect(bgTasksSpy).toHaveBeenCalled();
+    const params = bgTasksSpy.mock.calls[0][1] as Record<string, unknown>;
+    expect(params.accountingStream).not.toBeNull();
+  });
+
+  it('loggingStream is non-null for authenticated 200', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify({ choices: [{ message: { content: 'ok' } }] }), {
+        status: 200,
+        headers: { 'content-type': 'application/json' },
+      })
+    );
+
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    await res.text();
+    await new Promise(r => setTimeout(r, 50));
+
+    expect(bgTasksSpy).toHaveBeenCalled();
+    const params = bgTasksSpy.mock.calls[0][1] as Record<string, unknown>;
+    expect(params.loggingStream).not.toBeNull();
+  });
+
+  it('params include correct user/org/provider/model', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify({ choices: [{ message: { content: 'ok' } }] }), {
+        status: 200,
+        headers: { 'content-type': 'application/json' },
+      })
+    );
+
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    await res.text();
+    await new Promise(r => setTimeout(r, 50));
+
+    expect(bgTasksSpy).toHaveBeenCalled();
+    const params = bgTasksSpy.mock.calls[0][1] as Record<string, unknown>;
+    expect((params.user as { id: string }).id).toBe('user-1');
+    expect(params.resolvedModel).toBe('anthropic/claude-sonnet-4-20250514');
+    expect(params.isAnon).toBe(false);
+    expect(params.userByok).toBe(false);
+  });
+
+  it('params include header-sourced fields (modeHeader, feature, etc.)', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify({ choices: [{ message: { content: 'ok' } }] }), {
+        status: 200,
+        headers: { 'content-type': 'application/json' },
+      })
+    );
+
+    const token = await signToken({ kiloUserId: 'user-1' });
+    const res = await dispatch(
+      chatRequest(
+        {
+          model: 'anthropic/claude-sonnet-4-20250514',
+          messages: [{ role: 'user', content: 'hi' }],
+        },
+        {
+          token,
+          headers: {
+            'x-kilocode-mode': 'code',
+            'x-kilocode-feature': 'vscode-extension',
+            'x-kilocode-taskid': 'task-abc',
+            'x-kilocode-editorname': 'vscode',
+            'x-kilocode-machineid': 'machine-xyz',
+          },
+        }
+      )
+    );
+    expect(res.status).toBe(200);
+    await res.text();
+    await new Promise(r => setTimeout(r, 50));
+
+    expect(bgTasksSpy).toHaveBeenCalled();
+    const params = bgTasksSpy.mock.calls[0][1] as Record<string, unknown>;
+    expect(params.modeHeader).toBe('code');
+    expect(params.feature).toBe('vscode-extension');
+    expect(params.sessionId).toBe('task-abc');
+    expect(params.editorName).toBe('vscode');
+    expect(params.machineId).toBe('machine-xyz');
+  });
+});
diff --git a/llm-gateway/test/integration/balance-and-org.test.ts b/llm-gateway/test/integration/balance-and-org.test.ts
new file mode 100644
index 000000000..0dbc86f77
--- /dev/null
+++ b/llm-gateway/test/integration/balance-and-org.test.ts
@@ -0,0 +1,173 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import {
+  dispatch,
+  chatRequest,
+  signToken,
+  VALID_USER,
+  VALID_USER_ZERO_BALANCE,
+  getTableName,
+  chainResult,
+} from './_setup';
+
+// ── Configurable DB ────────────────────────────────────────────────────────────
+
+let _userRows: Record<string, unknown>[] = [];
+let _creditCount = 0;
+let _orgRow: Record<string, unknown> | null = null;
+
+vi.mock('@kilocode/db/client', () => ({
+  getWorkerDb: () => ({
+    select: () => ({
+      from: (table: unknown) => {
+        const name = getTableName(table);
+        if (name === 'kilocode_users') return chainResult(_userRows);
+        if (name === 'credit_transactions') return chainResult([{ count: _creditCount }]);
+        if (name === 'organizations') return chainResult(_orgRow ? [_orgRow] : []);
+        if (name === 'model_user_byok_providers') return chainResult([]);
+        if (name === 'custom_llm') return chainResult([]);
+        if (name === 'models_by_provider') return chainResult([]);
+        return chainResult([]);
+      },
+    }),
+    insert: () => chainResult([]),
+    execute: () => Promise.resolve({ rows: [] }),
+  }),
+}));
+
+vi.mock('@kilocode/worker-utils', () => ({
+  userExistsWithCache: async () => true,
+  extractBearerToken: (header: string | undefined) => {
+    if (!header) return null;
+    const parts = header.split(' ');
+    return parts.length === 2 && parts[0].toLowerCase() === 'bearer' ? parts[1] : null;
+  },
+  verifyKiloToken: async (token: string, secret: string) => {
+    const { jwtVerify } = await import('jose');
+    const { payload } = await jwtVerify(token, new TextEncoder().encode(secret));
+    return payload as Record<string, unknown>;
+  },
+}));
+
+vi.mock('@kilocode/encryption', () => ({
+  timingSafeEqual: (a: string, b: string) => a === b,
+}));
+
+vi.mock('../../src/lib/abuse-service', () => ({
+  classifyAbuse: async () => null,
+  reportAbuseCost: async () => null,
+}));
+
+const realFetch = globalThis.fetch;
+beforeEach(() => {
+  _userRows = [];
+  _creditCount = 0;
+  _orgRow = null;
+  globalThis.fetch = vi.fn();
+});
+afterEach(() => {
+  globalThis.fetch = realFetch;
+  vi.restoreAllMocks();
+});
+
+describe('balanceAndOrg', () => {
+  it('returns 402 with Low Credit Warning for returning user with zero balance', async () => {
+    _userRows = [{ ...VALID_USER_ZERO_BALANCE }];
+    _creditCount = 1; // has paid topup → returning user
+
+    const token = await signToken({ kiloUserId: 'user-zero' });
+    const res = await dispatch(
+      chatRequest(
+        {
+          model: 'anthropic/claude-sonnet-4-20250514',
+          messages: [{ role: 'user', content: 'hi' }],
+        },
+        { token }
+      )
+    );
+    expect(res.status).toBe(402);
+    const body: { error: { title: string; balance: number } } = await res.json();
+    expect(body.error.title).toBe('Low Credit Warning!');
+    expect(body.error.balance).toBe(0);
+  });
+
+  it('returns 402 with Paid Model - Credits Required for new user with zero balance', async () => {
+    _userRows = [{ ...VALID_USER_ZERO_BALANCE, id: 'user-new' }];
+    _creditCount = 0; // no paid topup → new user
+
+    const token = await signToken({ kiloUserId: 'user-new' });
+    const res = await dispatch(
+      chatRequest(
+        {
+          model: 'anthropic/claude-sonnet-4-20250514',
+          messages: [{ role: 'user', content: 'hi' }],
+        },
+        { token }
+      )
+    );
+    expect(res.status).toBe(402);
+    const body: { error: { title: string; message: string } } = await res.json();
+    expect(body.error.title).toBe('Paid Model - Credits Required');
+    expect(body.error.message).toContain('$20 free');
+  });
+
+  it('returns 404 for org enterprise model not in allow list', async () => {
+    _userRows = [{ ...VALID_USER }];
+    _orgRow = {
+      total_microdollars_acquired: 10_000_000,
+      microdollars_used: 0,
+      settings: {
+        model_allow_list: ['openai/gpt-4o'],
+        provider_allow_list: [],
+      },
+      plan: 'enterprise',
+      require_seats: false,
+      microdollar_limit: null,
+      microdollar_usage: null,
+    };
+
+    const token = await signToken({ kiloUserId: 'user-1' });
+    const res = await dispatch(
+      chatRequest(
+        {
+          model: 'anthropic/claude-sonnet-4-20250514',
+          messages: [{ role: 'user', content: 'hi' }],
+        },
+        { token, headers: { 'x-kilocode-organizationid': 'org-1' } }
+      )
+    );
+    expect(res.status).toBe(404);
+    const body: { error: string } = await res.json();
+    expect(body.error).toContain('not allowed');
+  });
+
+  it('returns 400 for Kilo free model with org data_collection=deny', async () => {
+    _userRows = [{ ...VALID_USER }];
+    _orgRow = {
+      total_microdollars_acquired: 10_000_000,
+      microdollars_used: 0,
+      settings: {
+        model_allow_list: [],
+        provider_allow_list: [],
+        data_collection: 'deny',
+      },
+      plan: 'team',
+      require_seats: false,
+      microdollar_limit: null,
+      microdollar_usage: null,
+    };
+
+    const token = await signToken({ kiloUserId: 'user-1' });
+    const res = await dispatch(
+      chatRequest(
+        {
+          model: 'corethink:free',
+          messages: [{ role: 'user', content: 'hi' }],
+        },
+        { token, headers: { 'x-kilocode-organizationid': 'org-1' } }
+      )
+    );
+    expect(res.status).toBe(400);
+    const body: { error: string } = await res.json();
+    expect(body.error).toContain('Data collection');
+  });
+});
diff --git a/llm-gateway/test/integration/body-mutations.test.ts b/llm-gateway/test/integration/body-mutations.test.ts
new file mode 100644
index 000000000..3fdd101e2
--- /dev/null
+++ b/llm-gateway/test/integration/body-mutations.test.ts
@@ -0,0 +1,221 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import {
+  dispatch,
+  chatRequest,
+  signToken,
+  VALID_USER,
+  getTableName,
+  chainResult,
+} from './_setup';
+
+// ── Configurable DB ────────────────────────────────────────────────────────────
+
+let _userRows: Record<string, unknown>[] = [];
+
+vi.mock('@kilocode/db/client', () => ({
+  getWorkerDb: () => ({
+    select: () => ({
+      from: (table: unknown) => {
+        const name = getTableName(table);
+        if (name === 'kilocode_users') return chainResult(_userRows);
+        if (name === 'credit_transactions') return chainResult([{ count: 1 }]);
+        if (name === 'model_user_byok_providers') return chainResult([]);
+        if (name === 'custom_llm') return chainResult([]);
+        if (name === 'organizations') return chainResult([]);
+        if (name === 'models_by_provider') return chainResult([]);
+        return chainResult([]);
+      },
+    }),
+    insert: () => chainResult([]),
+    execute: () => Promise.resolve({ rows: [] }),
+  }),
+}));
+
+vi.mock('@kilocode/worker-utils', () => ({
+  userExistsWithCache: async () => true,
+  extractBearerToken: (header: string | undefined) => {
+    if (!header) return null;
+    const parts = header.split(' ');
+    return parts.length === 2 && parts[0].toLowerCase() === 'bearer' ? parts[1] : null;
+  },
+  verifyKiloToken: async (token: string, secret: string) => {
+    const { jwtVerify } = await import('jose');
+    const { payload } = await jwtVerify(token, new TextEncoder().encode(secret));
+    return payload as Record<string, unknown>;
+  },
+}));
+
+vi.mock('@kilocode/encryption', () => ({
+  timingSafeEqual: (a: string, b: string) => a === b,
+}));
+
+vi.mock('../../src/lib/abuse-service', () => ({
+  classifyAbuse: async () => null,
+  reportAbuseCost: async () => null,
+}));
+
+// Polyfill scheduler.wait for Node
+if (!(globalThis as Record<string, unknown>).scheduler) {
+  (globalThis as Record<string, unknown>).scheduler = {
+    wait: (ms: number) => new Promise(r => setTimeout(r, ms)),
+  };
+}
+
+const realFetch = globalThis.fetch;
+let fetchMock: ReturnType<typeof vi.fn>;
+
+beforeEach(() => {
+  _userRows = [{ ...VALID_USER }];
+  fetchMock = vi.fn();
+  globalThis.fetch = fetchMock;
+});
+afterEach(() => {
+  globalThis.fetch = realFetch;
+  vi.restoreAllMocks();
+});
+
+async function authRequest(
+  body: Record<string, unknown>,
+  opts: { headers?: Record<string, string> } = {}
+) {
+  const token = await signToken({ kiloUserId: 'user-1' });
+  return chatRequest(body, { token, ...opts });
+}
+
+function mockUpstream200() {
+  fetchMock.mockResolvedValueOnce(
+    new Response(JSON.stringify({ choices: [{ message: { content: 'ok' } }] }), {
+      status: 200,
+      headers: { 'content-type': 'application/json' },
+    })
+  );
+}
+
+function getUpstreamBody(): Record<string, unknown> {
+  const [, init] = fetchMock.mock.calls[0] as [string, { body: string }];
+  return JSON.parse(init.body) as Record<string, unknown>;
+}
+
+describe('body mutations', () => {
+  it('stream_options.include_usage is forced to true', async () => {
+    mockUpstream200();
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    await res.text();
+
+    const body = getUpstreamBody();
+    expect(body.stream_options).toMatchObject({ include_usage: true });
+  });
+
+  it('stream_options.include_usage merges with existing stream_options', async () => {
+    mockUpstream200();
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+        stream_options: { some_custom_option: 'value' },
+      })
+    );
+    expect(res.status).toBe(200);
+    await res.text();
+
+    const body = getUpstreamBody();
+    expect(body.stream_options).toMatchObject({
+      include_usage: true,
+      some_custom_option: 'value',
+    });
+  });
+
+  it('models field is deleted from upstream body', async () => {
+    mockUpstream200();
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        models: ['model-a', 'model-b'],
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    await res.text();
+
+    const body = getUpstreamBody();
+    expect(body.models).toBeUndefined();
+  });
+
+  it('model is lowercased and trimmed in resolved context', async () => {
+    mockUpstream200();
+    const res = await dispatch(
+      await authRequest({
+        model: '  Anthropic/Claude-Sonnet-4-20250514  ',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    await res.text();
+
+    // The upstream URL should be valid (routing used the lowercase/trimmed resolvedModel)
+    const url = fetchMock.mock.calls[0][0] as string;
+    expect(url).toContain('chat/completions');
+    // The original body.model is preserved as-is (not mutated by parseBody)
+    // but the route was resolved correctly via the lowercased resolvedModel
+    const body = getUpstreamBody();
+    expect(body.model).toBeDefined();
+  });
+
+  it('safety_identifier and user fields set on upstream body', async () => {
+    mockUpstream200();
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    await res.text();
+
+    const body = getUpstreamBody();
+    expect(body.safety_identifier).toBeDefined();
+    expect(typeof body.safety_identifier).toBe('string');
+    expect(body.user).toBeDefined();
+    expect(body.safety_identifier).toBe(body.user);
+  });
+
+  it('prompt_cache_key set when x-kilocode-taskid present', async () => {
+    mockUpstream200();
+    const res = await dispatch(
+      await authRequest(
+        {
+          model: 'anthropic/claude-sonnet-4-20250514',
+          messages: [{ role: 'user', content: 'hi' }],
+        },
+        { headers: { 'x-kilocode-taskid': 'task-123' } }
+      )
+    );
+    expect(res.status).toBe(200);
+    await res.text();
+
+    const body = getUpstreamBody();
+    expect(body.prompt_cache_key).toBeDefined();
+    expect(typeof body.prompt_cache_key).toBe('string');
+  });
+
+  it('prompt_cache_key absent when no taskid header', async () => {
+    mockUpstream200();
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    await res.text();
+
+    const body = getUpstreamBody();
+    expect(body.prompt_cache_key).toBeUndefined();
+  });
+});
diff --git a/llm-gateway/test/integration/byok-errors.test.ts b/llm-gateway/test/integration/byok-errors.test.ts
new file mode 100644
index 000000000..5b72a87b2
--- /dev/null
+++ b/llm-gateway/test/integration/byok-errors.test.ts
@@ -0,0 +1,187 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { dispatch, chatRequest, signToken, VALID_USER, getTableName, chainResult } from './_setup';
+
+// ── Configurable DB ────────────────────────────────────────────────────────────
+
+let _userRows: Record<string, unknown>[] = [];
+
+vi.mock('@kilocode/db/client', () => ({
+  getWorkerDb: () => ({
+    select: () => ({
+      from: (table: unknown) => {
+        const name = getTableName(table);
+        if (name === 'kilocode_users') return chainResult(_userRows);
+        if (name === 'credit_transactions') return chainResult([{ count: 1 }]);
+        if (name === 'model_user_byok_providers') return chainResult([]);
+        if (name === 'custom_llm') return chainResult([]);
+        if (name === 'organizations') return chainResult([]);
+        if (name === 'models_by_provider') return chainResult([]);
+        return chainResult([]);
+      },
+    }),
+    insert: () => chainResult([]),
+    execute: () => Promise.resolve({ rows: [] }),
+  }),
+}));
+
+vi.mock('@kilocode/worker-utils', () => ({
+  userExistsWithCache: async () => true,
+  extractBearerToken: (header: string | undefined) => {
+    if (!header) return null;
+    const parts = header.split(' ');
+    return parts.length === 2 && parts[0].toLowerCase() === 'bearer' ? parts[1] : null;
+  },
+  verifyKiloToken: async (token: string, secret: string) => {
+    const { jwtVerify } = await import('jose');
+    const { payload } = await jwtVerify(token, new TextEncoder().encode(secret));
+    return payload as Record<string, unknown>;
+  },
+}));
+
+vi.mock('@kilocode/encryption', () => ({
+  timingSafeEqual: (a: string, b: string) => a === b,
+}));
+
+vi.mock('../../src/lib/abuse-service', () => ({
+  classifyAbuse: async () => null,
+  reportAbuseCost: async () => null,
+}));
+
+// Mock BYOK module to return BYOK keys for the test user.
+// This bypasses DB+crypto complexity while exercising the full
+// provider-resolution → proxy → makeErrorReadable chain.
+vi.mock('../../src/lib/byok', async (importOriginal) => {
+  const mod = await importOriginal();
+  return {
+    ...(mod as Record<string, unknown>),
+    getModelUserByokProviders: async () => ['anthropic'],
+    getBYOKforUser: async () => [{ decryptedAPIKey: 'sk-test-byok', providerId: 'anthropic' }],
+    getBYOKforOrganization: async () => null,
+  };
+});
+
+// Polyfill scheduler.wait for Node
+if (!(globalThis as Record<string, unknown>).scheduler) {
+  (globalThis as Record<string, unknown>).scheduler = {
+    wait: (ms: number) => new Promise(r => setTimeout(r, ms)),
+  };
+}
+
+const realFetch = globalThis.fetch;
+let fetchMock: ReturnType<typeof vi.fn>;
+
+beforeEach(() => {
+  _userRows = [{ ...VALID_USER }];
+  fetchMock = vi.fn();
+  globalThis.fetch = fetchMock;
+});
+afterEach(() => {
+  globalThis.fetch = realFetch;
+  vi.restoreAllMocks();
+});
+
+async function authRequest(body: Record<string, unknown>) {
+  const token = await signToken({ kiloUserId: 'user-1' });
+  return chatRequest(body, { token });
+}
+
+describe('BYOK errors', () => {
+  it('BYOK user: upstream 401 → response with [BYOK] invalid key message', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify({ error: 'Unauthorized' }), {
+        status: 401,
+        headers: { 'content-type': 'application/json' },
+      })
+    );
+
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(401);
+    const body: { error: string } = await res.json();
+    expect(body.error).toContain('[BYOK]');
+    expect(body.error).toContain('invalid');
+  });
+
+  it('BYOK user: upstream 402 → response with [BYOK] insufficient funds', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify({ error: 'Payment Required' }), {
+        status: 402,
+        headers: { 'content-type': 'application/json' },
+      })
+    );
+
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(402);
+    const body: { error: string } = await res.json();
+    expect(body.error).toContain('[BYOK]');
+    expect(body.error).toContain('insufficient funds');
+  });
+
+  it('BYOK user: upstream 403 → response with [BYOK] no permission', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify({ error: 'Forbidden' }), {
+        status: 403,
+        headers: { 'content-type': 'application/json' },
+      })
+    );
+
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(403);
+    const body: { error: string } = await res.json();
+    expect(body.error).toContain('[BYOK]');
+    expect(body.error).toContain('permission');
+  });
+
+  it('BYOK user: upstream 429 → response with [BYOK] rate limit', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify({ error: 'Too Many Requests' }), {
+        status: 429,
+        headers: { 'content-type': 'application/json' },
+      })
+    );
+
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(429);
+    const body: { error: string } = await res.json();
+    expect(body.error).toContain('[BYOK]');
+    expect(body.error).toContain('rate limit');
+  });
+
+  it('BYOK user: upstream 402 is NOT converted to 503 (only non-BYOK gets 503)', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify({ error: 'Payment Required' }), {
+        status: 402,
+        headers: { 'content-type': 'application/json' },
+      })
+    );
+
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    // BYOK 402 should remain 402, NOT be converted to 503
+    expect(res.status).toBe(402);
+    expect(res.status).not.toBe(503);
+  });
+});
diff --git a/llm-gateway/test/integration/error-handling.test.ts b/llm-gateway/test/integration/error-handling.test.ts
new file mode 100644
index 000000000..6c51361b5
--- /dev/null
+++ b/llm-gateway/test/integration/error-handling.test.ts
@@ -0,0 +1,173 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { dispatch, chatRequest, signToken, VALID_USER, getTableName, chainResult } from './_setup';
+
+// ── Configurable DB ────────────────────────────────────────────────────────────
+
+let _userRows: Record<string, unknown>[] = [];
+
+vi.mock('@kilocode/db/client', () => ({
+  getWorkerDb: () => ({
+    select: () => ({
+      from: (table: unknown) => {
+        const name = getTableName(table);
+        if (name === 'kilocode_users') return chainResult(_userRows);
+        if (name === 'credit_transactions') return chainResult([{ count: 1 }]);
+        if (name === 'model_user_byok_providers') return chainResult([]);
+        if (name === 'custom_llm') return chainResult([]);
+        if (name === 'organizations') return chainResult([]);
+        if (name === 'models_by_provider') return chainResult([]);
+        return chainResult([]);
+      },
+    }),
+    insert: () => chainResult([]),
+    execute: () => Promise.resolve({ rows: [] }),
+  }),
+}));
+
+vi.mock('@kilocode/worker-utils', () => ({
+  userExistsWithCache: async () => true,
+  extractBearerToken: (header: string | undefined) => {
+    if (!header) return null;
+    const parts = header.split(' ');
+    return parts.length === 2 && parts[0].toLowerCase() === 'bearer' ? parts[1] : null;
+  },
+  verifyKiloToken: async (token: string, secret: string) => {
+    const { jwtVerify } = await import('jose');
+    const { payload } = await jwtVerify(token, new TextEncoder().encode(secret));
+    return payload as Record<string, unknown>;
+  },
+}));
+
+vi.mock('@kilocode/encryption', () => ({
+  timingSafeEqual: (a: string, b: string) => a === b,
+}));
+
+vi.mock('../../src/lib/abuse-service', () => ({
+  classifyAbuse: async () => null,
+  reportAbuseCost: async () => null,
+}));
+
+// Spy on Sentry captureException
+const captureExceptionSpy = vi.fn();
+vi.mock('../../src/lib/sentry', () => ({
+  SENTRY_DSN: 'https://fake@sentry.io/123',
+  captureException: (...args: unknown[]) => captureExceptionSpy(...args) as void,
+}));
+
+// Also mock @sentry/cloudflare to prevent real Sentry initialization
+vi.mock('@sentry/cloudflare', () => ({
+  withSentry: (_config: unknown, handler: { fetch: unknown }) => handler,
+  captureException: () => {},
+}));
+
+// Polyfill scheduler.wait for Node
+if (!(globalThis as Record<string, unknown>).scheduler) {
+  (globalThis as Record<string, unknown>).scheduler = {
+    wait: (ms: number) => new Promise(r => setTimeout(r, ms)),
+  };
+}
+
+const realFetch = globalThis.fetch;
+let fetchMock: ReturnType<typeof vi.fn>;
+
+beforeEach(() => {
+  _userRows = [{ ...VALID_USER }];
+  captureExceptionSpy.mockClear();
+  fetchMock = vi.fn();
+  globalThis.fetch = fetchMock;
+});
+afterEach(() => {
+  globalThis.fetch = realFetch;
+  vi.restoreAllMocks();
+});
+
+async function authRequest(body: Record<string, unknown>) {
+  const token = await signToken({ kiloUserId: 'user-1' });
+  return chatRequest(body, { token });
+}
+
+describe('error handling', () => {
+  it('unhandled middleware exception returns 500 Internal server error', async () => {
+    // Trigger an error by having fetch throw an exception
+    fetchMock.mockRejectedValueOnce(new Error('network failure'));
+
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(500);
+    const body: { error: string } = await res.json();
+    expect(body.error).toContain('Internal server error');
+  });
+
+  it('captureException called for upstream 5xx', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify({ error: 'Internal Server Error' }), {
+        status: 500,
+        headers: { 'content-type': 'application/json' },
+      })
+    );
+
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(500);
+    await res.text();
+    // Allow waitUntil microtasks to flush
+    await new Promise(r => setTimeout(r, 100));
+
+    expect(captureExceptionSpy).toHaveBeenCalled();
+    const err = captureExceptionSpy.mock.calls[0][0] as Error;
+    expect(err).toBeInstanceOf(Error);
+    expect(err.message).toContain('500');
+  });
+
+  it('captureException NOT called for upstream 4xx (non-402)', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify({ error: 'Bad Request' }), {
+        status: 400,
+        headers: { 'content-type': 'application/json' },
+      })
+    );
+
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(400);
+    await res.text();
+    await new Promise(r => setTimeout(r, 100));
+
+    // captureException should not be called for 4xx (only called for 5xx and 402→503)
+    expect(captureExceptionSpy).not.toHaveBeenCalled();
+  });
+
+  it('captureException called for 402→503 conversion', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify({ error: 'Payment Required' }), {
+        status: 402,
+        headers: { 'content-type': 'application/json' },
+      })
+    );
+
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(503);
+
+    expect(captureExceptionSpy).toHaveBeenCalled();
+    const err = captureExceptionSpy.mock.calls[0][0] as Error;
+    expect(err).toBeInstanceOf(Error);
+    expect(err.message).toContain('402');
+  });
+});
diff --git a/llm-gateway/test/integration/extract-ip.test.ts b/llm-gateway/test/integration/extract-ip.test.ts
new file mode 100644
index 000000000..d79af1ae4
--- /dev/null
+++ b/llm-gateway/test/integration/extract-ip.test.ts
@@ -0,0 +1,72 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { dispatch, chainResult } from './_setup';
+
+vi.mock('@kilocode/db/client', () => ({
+  getWorkerDb: () => ({
+    select: () => ({
+      from: () => chainResult([]),
+    }),
+    insert: () => chainResult([]),
+    execute: () => Promise.resolve({ rows: [] }),
+  }),
+}));
+
+vi.mock('@kilocode/worker-utils', () => ({
+  userExistsWithCache: async () => true,
+  extractBearerToken: (header: string | undefined) => {
+    if (!header) return null;
+    const parts = header.split(' ');
+    return parts.length === 2 && parts[0].toLowerCase() === 'bearer' ? parts[1] : null;
+  },
+  verifyKiloToken: async () => {
+    throw new Error('should not be called directly');
+  },
+}));
+
+vi.mock('../../src/lib/abuse-service', () => ({
+  classifyAbuse: async () => null,
+  reportAbuseCost: async () => null,
+}));
+
+const realFetch = globalThis.fetch;
+beforeEach(() => {
+  globalThis.fetch = vi.fn();
+});
+afterEach(() => {
+  globalThis.fetch = realFetch;
+  vi.restoreAllMocks();
+});
+
+describe('extractIp', () => {
+  it('returns 400 when both CF-Connecting-IP and x-forwarded-for are absent', async () => {
+    const req = new Request('http://localhost/api/gateway/chat/completions', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({
+        model: 'corethink:free',
+        messages: [{ role: 'user', content: 'hi' }],
+      }),
+    });
+    const res = await dispatch(req);
+    expect(res.status).toBe(400);
+    const body = await res.json();
+    expect(body).toEqual({ error: 'Unable to determine client IP' });
+  });
+
+  it('proceeds past IP check when only x-forwarded-for is present', async () => {
+    const req = new Request('http://localhost/api/gateway/chat/completions', {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'x-forwarded-for': '5.6.7.8, 9.10.11.12',
+      },
+      body: JSON.stringify({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      }),
+    });
+    const res = await dispatch(req);
+    // Should proceed past IP extraction. Without auth, a paid model → 401
+    expect(res.status).toBe(401);
+  });
+});
diff --git a/llm-gateway/test/integration/free-model-rate-limit.test.ts b/llm-gateway/test/integration/free-model-rate-limit.test.ts
new file mode 100644
index 000000000..62f9b6643
--- /dev/null
+++ b/llm-gateway/test/integration/free-model-rate-limit.test.ts
@@ -0,0 +1,74 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { dispatch, chatRequest, makeFakeDONamespace, chainResult } from './_setup';
+
+vi.mock('@kilocode/db/client', () => ({
+  getWorkerDb: () => ({
+    select: () => ({
+      from: () => chainResult([]),
+    }),
+    insert: () => chainResult([]),
+    execute: () => Promise.resolve({ rows: [] }),
+  }),
+}));
+
+vi.mock('@kilocode/worker-utils', () => ({
+  userExistsWithCache: async () => true,
+  extractBearerToken: (header: string | undefined) => {
+    if (!header) return null;
+    const parts = header.split(' ');
+    return parts.length === 2 && parts[0].toLowerCase() === 'bearer' ? parts[1] : null;
+  },
+  verifyKiloToken: async () => {
+    throw new Error('should not be called directly');
+  },
+}));
+
+vi.mock('../../src/lib/abuse-service', () => ({
+  classifyAbuse: async () => null,
+  reportAbuseCost: async () => null,
+}));
+
+const realFetch = globalThis.fetch;
+beforeEach(() => {
+  globalThis.fetch = vi.fn();
+});
+afterEach(() => {
+  globalThis.fetch = realFetch;
+  vi.restoreAllMocks();
+});
+
+describe('freeModelRateLimit', () => {
+  it('returns 429 for Kilo free model when DO reports blocked', async () => {
+    const doNamespace = makeFakeDONamespace({ freeModelBlocked: new Set(['1.2.3.4']) });
+    const res = await dispatch(
+      chatRequest({
+        model: 'corethink:free',
+        messages: [{ role: 'user', content: 'hi' }],
+      }),
+      { RATE_LIMIT_DO: doNamespace }
+    );
+    expect(res.status).toBe(429);
+    const body = await res.json();
+    expect(body).toEqual({
+      error: 'Rate limit exceeded',
+      message:
+        'Free model usage limit reached. Please try again later or upgrade to a paid model.',
+    });
+  });
+
+  it('skips Kilo-specific rate limit for non-Kilo :free model', async () => {
+    // some-model:free is not a Kilo free model, so freeModelRateLimit should be skipped.
+    // Even if DO would block, the middleware should not check it.
+    const doNamespace = makeFakeDONamespace({ freeModelBlocked: new Set(['1.2.3.4']) });
+    const res = await dispatch(
+      chatRequest({
+        model: 'some-vendor/some-model:free',
+        messages: [{ role: 'user', content: 'hi' }],
+      }),
+      { RATE_LIMIT_DO: doNamespace }
+    );
+    // Non-Kilo :free model without auth → anonymous gate allows (it's a free model)
+    // Then continues down the chain. Should NOT be 429.
+    expect(res.status).not.toBe(429);
+  });
+});
diff --git a/llm-gateway/test/integration/free-model-rewrite.test.ts b/llm-gateway/test/integration/free-model-rewrite.test.ts
new file mode 100644
index 000000000..a35dee8f8
--- /dev/null
+++ b/llm-gateway/test/integration/free-model-rewrite.test.ts
@@ -0,0 +1,350 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import {
+  dispatch,
+  chatRequest,
+  signToken,
+  VALID_USER,
+  sseChunk,
+  sseDone,
+  readSSEEvents,
+  getTableName,
+  chainResult,
+} from './_setup';
+
+// ── Configurable DB ────────────────────────────────────────────────────────────
+
+let _userRows: Record<string, unknown>[] = [];
+
+vi.mock('@kilocode/db/client', () => ({
+  getWorkerDb: () => ({
+    select: () => ({
+      from: (table: unknown) => {
+        const name = getTableName(table);
+        if (name === 'kilocode_users') return chainResult(_userRows);
+        if (name === 'credit_transactions') return chainResult([{ count: 1 }]);
+        if (name === 'model_user_byok_providers') return chainResult([]);
+        if (name === 'custom_llm') return chainResult([]);
+        if (name === 'organizations') return chainResult([]);
+        if (name === 'models_by_provider') return chainResult([]);
+        return chainResult([]);
+      },
+    }),
+    insert: () => chainResult([]),
+    execute: () => Promise.resolve({ rows: [] }),
+  }),
+}));
+
+vi.mock('@kilocode/worker-utils', () => ({
+  userExistsWithCache: async () => true,
+  extractBearerToken: (header: string | undefined) => {
+    if (!header) return null;
+    const parts = header.split(' ');
+    return parts.length === 2 && parts[0].toLowerCase() === 'bearer' ? parts[1] : null;
+  },
+  verifyKiloToken: async (token: string, secret: string) => {
+    const { jwtVerify } = await import('jose');
+    const { payload } = await jwtVerify(token, new TextEncoder().encode(secret));
+    return payload as Record<string, unknown>;
+  },
+}));
+
+vi.mock('@kilocode/encryption', () => ({
+  timingSafeEqual: (a: string, b: string) => a === b,
+}));
+
+vi.mock('../../src/lib/abuse-service', () => ({
+  classifyAbuse: async () => null,
+  reportAbuseCost: async () => null,
+}));
+
+// Polyfill scheduler.wait for Node
+if (!(globalThis as Record<string, unknown>).scheduler) {
+  (globalThis as Record<string, unknown>).scheduler = {
+    wait: (ms: number) => new Promise(r => setTimeout(r, ms)),
+  };
+}
+
+const realFetch = globalThis.fetch;
+let fetchMock: ReturnType<typeof vi.fn>;
+
+beforeEach(() => {
+  _userRows = [{ ...VALID_USER }];
+  fetchMock = vi.fn();
+  globalThis.fetch = fetchMock;
+});
+afterEach(() => {
+  globalThis.fetch = realFetch;
+  vi.restoreAllMocks();
+});
+
+async function authRequest(
+  body: Record<string, unknown>,
+  opts: { headers?: Record<string, string> } = {}
+) {
+  const token = await signToken({ kiloUserId: 'user-1' });
+  return chatRequest(body, { token, ...opts });
+}
+
+describe('free model rewrite', () => {
+  it('auth user + corethink:free JSON: model rewritten, cost stripped', async () => {
+    const upstreamBody = {
+      id: 'chatcmpl-1',
+      model: 'corethink-internal',
+      choices: [{ message: { role: 'assistant', content: 'Hello!' } }],
+      usage: { prompt_tokens: 10, completion_tokens: 5, cost: 0.001 },
+    };
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify(upstreamBody), {
+        status: 200,
+        headers: { 'content-type': 'application/json' },
+      })
+    );
+
+    const res = await dispatch(
+      await authRequest({
+        model: 'corethink:free',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    const body: { model: string; usage: { cost?: number } } = await res.json();
+    expect(body.model).toBe('corethink:free');
+    expect(body.usage.cost).toBeUndefined();
+  });
+
+  it('auth user + corethink:free SSE: model rewritten in every chunk', async () => {
+    const sseBody =
+      sseChunk({
+        id: 'chatcmpl-1',
+        model: 'corethink-internal',
+        choices: [{ delta: { content: 'Hi' } }],
+      }) +
+      sseChunk({
+        id: 'chatcmpl-1',
+        model: 'corethink-internal',
+        choices: [],
+        usage: { prompt_tokens: 10, completion_tokens: 5, cost: 0.001 },
+      }) +
+      sseDone();
+
+    fetchMock.mockResolvedValueOnce(
+      new Response(sseBody, {
+        status: 200,
+        headers: { 'content-type': 'text/event-stream' },
+      })
+    );
+
+    const res = await dispatch(
+      await authRequest({
+        model: 'corethink:free',
+        messages: [{ role: 'user', content: 'hi' }],
+        stream: true,
+      })
+    );
+    expect(res.status).toBe(200);
+    const events = await readSSEEvents(res);
+    for (const event of events) {
+      const e = event as { model: string; usage?: { cost?: number } };
+      expect(e.model).toBe('corethink:free');
+      if (e.usage) {
+        expect(e.usage.cost).toBeUndefined();
+      }
+    }
+  });
+
+  it('reasoning_content converted to reasoning + reasoning_details in JSON', async () => {
+    const upstreamBody = {
+      id: 'chatcmpl-1',
+      model: 'corethink-internal',
+      choices: [
+        {
+          message: {
+            role: 'assistant',
+            content: 'Answer here',
+            reasoning_content: 'Let me think step by step...',
+          },
+        },
+      ],
+      usage: { prompt_tokens: 10, completion_tokens: 5 },
+    };
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify(upstreamBody), {
+        status: 200,
+        headers: { 'content-type': 'application/json' },
+      })
+    );
+
+    const res = await dispatch(
+      chatRequest({
+        model: 'corethink:free',
+        messages: [{ role: 'user', content: 'think about this' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    const body = await res.json();
+    const message = (body as { choices: Array<{ message: Record<string, unknown> }> }).choices[0]
+      .message;
+    expect(message.reasoning).toBe('Let me think step by step...');
+    expect(message.reasoning_details).toEqual([
+      { type: 'reasoning.text', text: 'Let me think step by step...' },
+    ]);
+    expect(message.reasoning_content).toBeUndefined();
+  });
+
+  it('reasoning_content converted to reasoning + reasoning_details in SSE delta', async () => {
+    const sseBody =
+      sseChunk({
+        id: 'chatcmpl-1',
+        model: 'corethink-internal',
+        choices: [
+          {
+            delta: {
+              reasoning_content: 'Step 1: analyze...',
+              content: '',
+            },
+          },
+        ],
+      }) + sseDone();
+
+    fetchMock.mockResolvedValueOnce(
+      new Response(sseBody, {
+        status: 200,
+        headers: { 'content-type': 'text/event-stream' },
+      })
+    );
+
+    const res = await dispatch(
+      chatRequest({
+        model: 'corethink:free',
+        messages: [{ role: 'user', content: 'think' }],
+        stream: true,
+      })
+    );
+    expect(res.status).toBe(200);
+    const events = await readSSEEvents(res);
+    const first = events[0] as {
+      choices: Array<{
+        delta: {
+          reasoning?: string;
+          reasoning_details?: Array<{ type: string; text: string }>;
+          reasoning_content?: string;
+        };
+      }>;
+    };
+    const delta = first.choices[0].delta;
+    expect(delta.reasoning).toBe('Step 1: analyze...');
+    expect(delta.reasoning_details).toEqual([
+      { type: 'reasoning.text', text: 'Step 1: analyze...' },
+    ]);
+    expect(delta.reasoning_content).toBeUndefined();
+  });
+
+  it('cost, cost_details, is_byok stripped from JSON usage', async () => {
+    const upstreamBody = {
+      id: 'chatcmpl-1',
+      model: 'corethink-internal',
+      choices: [{ message: { role: 'assistant', content: 'ok' } }],
+      usage: {
+        prompt_tokens: 10,
+        completion_tokens: 5,
+        cost: 0.001,
+        cost_details: { input: 0.0005, output: 0.0005 },
+        is_byok: false,
+      },
+    };
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify(upstreamBody), {
+        status: 200,
+        headers: { 'content-type': 'application/json' },
+      })
+    );
+
+    const res = await dispatch(
+      chatRequest({
+        model: 'corethink:free',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    const body = await res.json();
+    const usage = (body as { usage: Record<string, unknown> }).usage;
+    expect(usage.cost).toBeUndefined();
+    expect(usage.cost_details).toBeUndefined();
+    expect(usage.is_byok).toBeUndefined();
+    // Preserved fields should remain
+    expect(usage.prompt_tokens).toBe(10);
+    expect(usage.completion_tokens).toBe(5);
+  });
+
+  it('cost, cost_details, is_byok stripped from SSE final chunk usage', async () => {
+    const sseBody =
+      sseChunk({
+        id: 'chatcmpl-1',
+        model: 'corethink-internal',
+        choices: [{ delta: { content: 'Hi' } }],
+      }) +
+      sseChunk({
+        id: 'chatcmpl-1',
+        model: 'corethink-internal',
+        choices: [],
+        usage: {
+          prompt_tokens: 10,
+          completion_tokens: 5,
+          cost: 0.001,
+          cost_details: { input: 0.0005 },
+          is_byok: false,
+        },
+      }) +
+      sseDone();
+
+    fetchMock.mockResolvedValueOnce(
+      new Response(sseBody, {
+        status: 200,
+        headers: { 'content-type': 'text/event-stream' },
+      })
+    );
+
+    const res = await dispatch(
+      chatRequest({
+        model: 'corethink:free',
+        messages: [{ role: 'user', content: 'hi' }],
+        stream: true,
+      })
+    );
+    expect(res.status).toBe(200);
+    const events = await readSSEEvents(res);
+    const usageEvent = events.find(
+      e => (e as { usage?: unknown }).usage !== undefined
+    ) as { usage: Record<string, unknown> } | undefined;
+    expect(usageEvent).toBeDefined();
+    expect(usageEvent!.usage.cost).toBeUndefined();
+    expect(usageEvent!.usage.cost_details).toBeUndefined();
+    expect(usageEvent!.usage.is_byok).toBeUndefined();
+    expect(usageEvent!.usage.prompt_tokens).toBe(10);
+  });
+
+  it('giga-potato response model rewritten from internal ep-* to giga-potato', async () => {
+    const upstreamBody = {
+      id: 'chatcmpl-1',
+      model: 'ep-20260109111813-hztxv',
+      choices: [{ message: { role: 'assistant', content: 'ok' } }],
+      usage: { prompt_tokens: 10, completion_tokens: 5, cost: 0.0 },
+    };
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify(upstreamBody), {
+        status: 200,
+        headers: { 'content-type': 'application/json' },
+      })
+    );
+
+    const res = await dispatch(
+      chatRequest({
+        model: 'giga-potato',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    const body: { model: string } = await res.json();
+    expect(body.model).toBe('giga-potato');
+  });
+});
diff --git a/llm-gateway/test/integration/happy-path.test.ts b/llm-gateway/test/integration/happy-path.test.ts
new file mode 100644
index 000000000..8d9f9138b
--- /dev/null
+++ b/llm-gateway/test/integration/happy-path.test.ts
@@ -0,0 +1,165 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { dispatch, chatRequest, signToken, VALID_USER, getTableName, chainResult } from './_setup';
+
+// ── DB mock ────────────────────────────────────────────────────────────────────
+
+let _userRows: Record<string, unknown>[] = [];
+
+vi.mock('@kilocode/db/client', () => ({
+  getWorkerDb: () => ({
+    select: () => ({
+      from: (table: unknown) => {
+        const name = getTableName(table);
+        if (name === 'kilocode_users') return chainResult(_userRows);
+        if (name === 'credit_transactions') return chainResult([{ count: 1 }]);
+        if (name === 'model_user_byok_providers') return chainResult([]);
+        if (name === 'custom_llm') return chainResult([]);
+        if (name === 'organizations') return chainResult([]);
+        if (name === 'models_by_provider') return chainResult([]);
+        return chainResult([]);
+      },
+    }),
+    insert: () => chainResult([]),
+    execute: () => Promise.resolve({ rows: [] }),
+  }),
+}));
+
+vi.mock('@kilocode/worker-utils', () => ({
+  userExistsWithCache: async () => true,
+  extractBearerToken: (header: string | undefined) => {
+    if (!header) return null;
+    const parts = header.split(' ');
+    return parts.length === 2 && parts[0].toLowerCase() === 'bearer' ? parts[1] : null;
+  },
+  verifyKiloToken: async (token: string, secret: string) => {
+    const { jwtVerify } = await import('jose');
+    const { payload } = await jwtVerify(token, new TextEncoder().encode(secret));
+    return payload as Record<string, unknown>;
+  },
+}));
+
+vi.mock('@kilocode/encryption', () => ({
+  timingSafeEqual: (a: string, b: string) => a === b,
+}));
+
+vi.mock('../../src/lib/abuse-service', () => ({
+  classifyAbuse: async () => null,
+  reportAbuseCost: async () => null,
+}));
+
+// Polyfill scheduler.wait for Node
+if (!(globalThis as Record<string, unknown>).scheduler) {
+  (globalThis as Record<string, unknown>).scheduler = {
+    wait: (ms: number) => new Promise(r => setTimeout(r, ms)),
+  };
+}
+
+const realFetch = globalThis.fetch;
+let fetchMock: ReturnType<typeof vi.fn>;
+
+beforeEach(() => {
+  _userRows = [{ ...VALID_USER }];
+  fetchMock = vi.fn();
+  globalThis.fetch = fetchMock;
+});
+afterEach(() => {
+  globalThis.fetch = realFetch;
+  vi.restoreAllMocks();
+});
+
+describe('happy path', () => {
+  it('anonymous + corethink:free → 200, model rewritten, upstream URL contains corethink', async () => {
+    const upstreamBody = {
+      id: 'chatcmpl-1',
+      model: 'corethink-internal',
+      choices: [{ message: { role: 'assistant', content: 'Hello from corethink!' } }],
+      usage: { prompt_tokens: 10, completion_tokens: 5, cost: 0.001 },
+    };
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify(upstreamBody), {
+        status: 200,
+        headers: { 'content-type': 'application/json' },
+      })
+    );
+
+    const res = await dispatch(
+      chatRequest({
+        model: 'corethink:free',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+
+    expect(fetchMock).toHaveBeenCalled();
+    const fetchUrl = fetchMock.mock.calls[0][0] as string;
+    expect(fetchUrl).toContain('corethink');
+
+    const body: { model: string; usage: { cost?: number } } = await res.json();
+    expect(body.model).toBe('corethink:free');
+    expect(body.usage.cost).toBeUndefined();
+  });
+
+  it('authenticated + anthropic/claude-sonnet-4-20250514 → 200, upstream URL contains openrouter', async () => {
+    const upstreamBody = {
+      id: 'chatcmpl-2',
+      model: 'anthropic/claude-sonnet-4-20250514',
+      choices: [{ message: { role: 'assistant', content: 'Hello from Claude!' } }],
+      usage: { prompt_tokens: 10, completion_tokens: 5 },
+    };
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify(upstreamBody), {
+        status: 200,
+        headers: { 'content-type': 'application/json' },
+      })
+    );
+
+    const token = await signToken({ kiloUserId: 'user-1' });
+    const res = await dispatch(
+      chatRequest(
+        {
+          model: 'anthropic/claude-sonnet-4-20250514',
+          messages: [{ role: 'user', content: 'hi' }],
+        },
+        { token }
+      )
+    );
+    expect(res.status).toBe(200);
+
+    expect(fetchMock).toHaveBeenCalled();
+    const fetchUrl = fetchMock.mock.calls[0][0] as string;
+    expect(fetchUrl).toContain('openrouter.ai');
+
+    const body: { model: string } = await res.json();
+    expect(body.model).toBe('anthropic/claude-sonnet-4-20250514');
+  });
+
+  it('anonymous + giga-potato → 200, upstream URL contains gigapotato', async () => {
+    const upstreamBody = {
+      id: 'chatcmpl-3',
+      model: 'ep-20260109111813-hztxv',
+      choices: [{ message: { role: 'assistant', content: 'Hello from giga-potato!' } }],
+      usage: { prompt_tokens: 10, completion_tokens: 5, cost: 0 },
+    };
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify(upstreamBody), {
+        status: 200,
+        headers: { 'content-type': 'application/json' },
+      })
+    );
+
+    const res = await dispatch(
+      chatRequest({
+        model: 'giga-potato',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+
+    expect(fetchMock).toHaveBeenCalled();
+    const fetchUrl = fetchMock.mock.calls[0][0] as string;
+    expect(fetchUrl).toContain('gigapotato');
+
+    const body: { model: string } = await res.json();
+    expect(body.model).toBe('giga-potato');
+  });
+});
diff --git a/llm-gateway/test/integration/parse-body.test.ts b/llm-gateway/test/integration/parse-body.test.ts
new file mode 100644
index 000000000..d8710a105
--- /dev/null
+++ b/llm-gateway/test/integration/parse-body.test.ts
@@ -0,0 +1,89 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { dispatch, chatRequest, chainResult } from './_setup';
+
+vi.mock('@kilocode/db/client', () => ({
+  getWorkerDb: () => ({
+    select: () => ({
+      from: () => chainResult([]),
+    }),
+    insert: () => chainResult([]),
+    execute: () => Promise.resolve({ rows: [] }),
+  }),
+}));
+
+vi.mock('@kilocode/worker-utils', () => ({
+  userExistsWithCache: async () => true,
+  extractBearerToken: (header: string | undefined) => {
+    if (!header) return null;
+    const parts = header.split(' ');
+    return parts.length === 2 && parts[0].toLowerCase() === 'bearer' ? parts[1] : null;
+  },
+  verifyKiloToken: async () => {
+    throw new Error('should not be called directly');
+  },
+}));
+
+vi.mock('../../src/lib/abuse-service', () => ({
+  classifyAbuse: async () => null,
+  reportAbuseCost: async () => null,
+}));
+
+const realFetch = globalThis.fetch;
+beforeEach(() => {
+  globalThis.fetch = vi.fn();
+});
+afterEach(() => {
+  globalThis.fetch = realFetch;
+  vi.restoreAllMocks();
+});
+
+describe('parseBody', () => {
+  it('returns 400 for non-JSON body', async () => {
+    const req = new Request('http://localhost/api/gateway/chat/completions', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json', 'CF-Connecting-IP': '1.2.3.4' },
+      body: 'not json',
+    });
+    const res = await dispatch(req);
+    expect(res.status).toBe(400);
+    const body = await res.json();
+    expect(body).toEqual({
+      error: 'Invalid request',
+      message: 'Could not parse request body. Please ensure it is valid JSON.',
+    });
+  });
+
+  it('returns 404 for missing model field', async () => {
+    const res = await dispatch(chatRequest({ messages: [{ role: 'user', content: 'hi' }] }));
+    expect(res.status).toBe(404);
+    const body = await res.json();
+    expect(body).toEqual({
+      error: 'Model not found',
+      message: 'The requested model could not be found.',
+    });
+  });
+
+  it('returns 404 for empty string model', async () => {
+    const res = await dispatch(
+      chatRequest({ model: '', messages: [{ role: 'user', content: 'hi' }] })
+    );
+    expect(res.status).toBe(404);
+    const body = await res.json();
+    expect(body).toEqual({
+      error: 'Model not found',
+      message: 'The requested model could not be found.',
+    });
+  });
+
+  it('returns 404 for non-string model', async () => {
+    const res = await dispatch(
+      chatRequest({ model: 123, messages: [{ role: 'user', content: 'hi' }] })
+    );
+    expect(res.status).toBe(404);
+    const body = await res.json();
+    expect(body).toEqual({
+      error: 'Model not found',
+      message: 'The requested model could not be found.',
+    });
+  });
+});
diff --git a/llm-gateway/test/integration/promotion-limit.test.ts b/llm-gateway/test/integration/promotion-limit.test.ts
new file mode 100644
index 000000000..f473e901e
--- /dev/null
+++ b/llm-gateway/test/integration/promotion-limit.test.ts
@@ -0,0 +1,55 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { dispatch, chatRequest, makeFakeDONamespace, chainResult } from './_setup';
+
+vi.mock('@kilocode/db/client', () => ({
+  getWorkerDb: () => ({
+    select: () => ({
+      from: () => chainResult([]),
+    }),
+    insert: () => chainResult([]),
+    execute: () => Promise.resolve({ rows: [] }),
+  }),
+}));
+
+vi.mock('@kilocode/worker-utils', () => ({
+  userExistsWithCache: async () => true,
+  extractBearerToken: (header: string | undefined) => {
+    if (!header) return null;
+    const parts = header.split(' ');
+    return parts.length === 2 && parts[0].toLowerCase() === 'bearer' ? parts[1] : null;
+  },
+  verifyKiloToken: async () => {
+    throw new Error('should not be called directly');
+  },
+}));
+
+vi.mock('../../src/lib/abuse-service', () => ({
+  classifyAbuse: async () => null,
+  reportAbuseCost: async () => null,
+}));
+
+const realFetch = globalThis.fetch;
+beforeEach(() => {
+  globalThis.fetch = vi.fn();
+});
+afterEach(() => {
+  globalThis.fetch = realFetch;
+  vi.restoreAllMocks();
+});
+
+describe('promotionLimit', () => {
+  it('returns 401 with PROMOTION_MODEL_LIMIT_REACHED for anonymous + free model when DO promotion blocked', async () => {
+    const doNamespace = makeFakeDONamespace({ promotionBlocked: new Set(['1.2.3.4']) });
+    const res = await dispatch(
+      chatRequest({
+        model: 'some-vendor/some-model:free',
+        messages: [{ role: 'user', content: 'hi' }],
+      }),
+      { RATE_LIMIT_DO: doNamespace }
+    );
+    expect(res.status).toBe(401);
+    const body: { error: { code: string; message: string } } = await res.json();
+    expect(body.error.code).toBe('PROMOTION_MODEL_LIMIT_REACHED');
+    expect(body.error.message).toContain('Sign up for free');
+  });
+});
diff --git a/llm-gateway/test/integration/provider-routing.test.ts b/llm-gateway/test/integration/provider-routing.test.ts
new file mode 100644
index 000000000..e243b2d93
--- /dev/null
+++ b/llm-gateway/test/integration/provider-routing.test.ts
@@ -0,0 +1,224 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import {
+  dispatch,
+  chatRequest,
+  signToken,
+  VALID_USER,
+  getTableName,
+  chainResult,
+} from './_setup';
+
+// ── Configurable DB ────────────────────────────────────────────────────────────
+
+let _userRows: Record<string, unknown>[] = [];
+
+vi.mock('@kilocode/db/client', () => ({
+  getWorkerDb: () => ({
+    select: () => ({
+      from: (table: unknown) => {
+        const name = getTableName(table);
+        if (name === 'kilocode_users') return chainResult(_userRows);
+        if (name === 'credit_transactions') return chainResult([{ count: 1 }]);
+        if (name === 'model_user_byok_providers') return chainResult([]);
+        if (name === 'custom_llm') return chainResult([]);
+        if (name === 'organizations') return chainResult([]);
+        if (name === 'models_by_provider') return chainResult([]);
+        return chainResult([]);
+      },
+    }),
+    insert: () => chainResult([]),
+    execute: () => Promise.resolve({ rows: [] }),
+  }),
+}));
+
+vi.mock('@kilocode/worker-utils', () => ({
+  userExistsWithCache: async () => true,
+  extractBearerToken: (header: string | undefined) => {
+    if (!header) return null;
+    const parts = header.split(' ');
+    return parts.length === 2 && parts[0].toLowerCase() === 'bearer' ? parts[1] : null;
+  },
+  verifyKiloToken: async (token: string, secret: string) => {
+    const { jwtVerify } = await import('jose');
+    const { payload } = await jwtVerify(token, new TextEncoder().encode(secret));
+    return payload as Record<string, unknown>;
+  },
+}));
+
+vi.mock('@kilocode/encryption', () => ({
+  timingSafeEqual: (a: string, b: string) => a === b,
+}));
+
+vi.mock('../../src/lib/abuse-service', () => ({
+  classifyAbuse: async () => null,
+  reportAbuseCost: async () => null,
+}));
+
+// Polyfill scheduler.wait for Node
+if (!(globalThis as Record<string, unknown>).scheduler) {
+  (globalThis as Record<string, unknown>).scheduler = {
+    wait: (ms: number) => new Promise(r => setTimeout(r, ms)),
+  };
+}
+
+const realFetch = globalThis.fetch;
+let fetchMock: ReturnType<typeof vi.fn>;
+
+beforeEach(() => {
+  _userRows = [{ ...VALID_USER }];
+  fetchMock = vi.fn();
+  globalThis.fetch = fetchMock;
+});
+afterEach(() => {
+  globalThis.fetch = realFetch;
+  vi.restoreAllMocks();
+});
+
+async function authRequest(
+  body: Record<string, unknown>,
+  opts: { headers?: Record<string, string>; path?: string } = {}
+) {
+  const token = await signToken({ kiloUserId: 'user-1' });
+  return chatRequest(body, { token, ...opts });
+}
+
+function mockUpstream200() {
+  fetchMock.mockResolvedValueOnce(
+    new Response(JSON.stringify({ choices: [{ message: { content: 'ok' } }] }), {
+      status: 200,
+      headers: { 'content-type': 'application/json' },
+    })
+  );
+}
+
+function getUpstreamUrl(): string {
+  return fetchMock.mock.calls[0][0] as string;
+}
+
+function getUpstreamInit(): RequestInit & { headers: Headers } {
+  return fetchMock.mock.calls[0][1] as RequestInit & { headers: Headers };
+}
+
+describe('provider routing', () => {
+  it('corethink:free routes to api.corethink.ai/v1/code/chat/completions', async () => {
+    mockUpstream200();
+    const res = await dispatch(
+      chatRequest({
+        model: 'corethink:free',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    await res.text();
+
+    const url = getUpstreamUrl();
+    expect(url).toContain('api.corethink.ai/v1/code/chat/completions');
+  });
+
+  it('giga-potato routes to gigapotato API URL', async () => {
+    mockUpstream200();
+    const res = await dispatch(
+      chatRequest({
+        model: 'giga-potato',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    await res.text();
+
+    const url = getUpstreamUrl();
+    expect(url).toContain('gigapotato.example.com');
+    expect(url).toContain('/chat/completions');
+  });
+
+  it('generic :free model routes to openrouter.ai/api/v1/chat/completions', async () => {
+    mockUpstream200();
+    // Use a :free model that is NOT rate-limited, NOT a Kilo free model, NOT in preferredModels
+    const res = await dispatch(
+      chatRequest({
+        model: 'deepseek/deepseek-v3-0324:free',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    await res.text();
+
+    const url = getUpstreamUrl();
+    expect(url).toContain('openrouter.ai');
+    expect(url).toContain('/chat/completions');
+  });
+
+  it('paid model routes to openrouter.ai', async () => {
+    mockUpstream200();
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    await res.text();
+
+    const url = getUpstreamUrl();
+    expect(url).toContain('openrouter.ai');
+    expect(url).toContain('/chat/completions');
+  });
+
+  it('upstream gets Authorization, HTTP-Referer, X-Title, Content-Type headers', async () => {
+    mockUpstream200();
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    await res.text();
+
+    const init = getUpstreamInit();
+    const headers = new Headers(init.headers);
+    expect(headers.get('Authorization')).toMatch(/^Bearer /);
+    expect(headers.get('HTTP-Referer')).toBe('https://kilocode.ai');
+    expect(headers.get('X-Title')).toBe('Kilo Code');
+    expect(headers.get('Content-Type')).toBe('application/json');
+  });
+
+  it('query string preserved in upstream URL', async () => {
+    mockUpstream200();
+    const res = await dispatch(
+      await authRequest(
+        {
+          model: 'anthropic/claude-sonnet-4-20250514',
+          messages: [{ role: 'user', content: 'hi' }],
+        },
+        { path: '/api/gateway/chat/completions?foo=bar&baz=1' }
+      )
+    );
+    expect(res.status).toBe(200);
+    await res.text();
+
+    const url = getUpstreamUrl();
+    expect(url).toContain('?foo=bar&baz=1');
+  });
+
+  it('Kilo free model internal_id replaces public_id in upstream body', async () => {
+    mockUpstream200();
+    const res = await dispatch(
+      chatRequest({
+        model: 'corethink:free',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    await res.text();
+
+    const [, init] = fetchMock.mock.calls[0] as [string, { body: string }];
+    const body = JSON.parse(init.body) as Record<string, unknown>;
+    // corethink:free has internal_id 'corethink' — the model sent upstream should be 'corethink'
+    // (parseBody lowercases and the provider-specific logic may strip the :free suffix)
+    expect(body.model).not.toContain(':free');
+    // The upstream URL should be the corethink endpoint, not openrouter
+    const url = getUpstreamUrl();
+    expect(url).toContain('corethink');
+  });
+});
diff --git a/llm-gateway/test/integration/proxy-upstream.test.ts b/llm-gateway/test/integration/proxy-upstream.test.ts
new file mode 100644
index 000000000..a8e425624
--- /dev/null
+++ b/llm-gateway/test/integration/proxy-upstream.test.ts
@@ -0,0 +1,321 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import {
+  dispatch,
+  chatRequest,
+  signToken,
+  VALID_USER,
+  sseChunk,
+  sseDone,
+  readSSEEvents,
+  getTableName,
+  chainResult,
+} from './_setup';
+
+// ── Configurable DB ────────────────────────────────────────────────────────────
+
+let _userRows: Record<string, unknown>[] = [];
+
+vi.mock('@kilocode/db/client', () => ({
+  getWorkerDb: () => ({
+    select: () => ({
+      from: (table: unknown) => {
+        const name = getTableName(table);
+        if (name === 'kilocode_users') return chainResult(_userRows);
+        if (name === 'credit_transactions') return chainResult([{ count: 1 }]);
+        if (name === 'model_user_byok_providers') return chainResult([]);
+        if (name === 'custom_llm') return chainResult([]);
+        if (name === 'organizations') return chainResult([]);
+        if (name === 'models_by_provider') return chainResult([]);
+        return chainResult([]);
+      },
+    }),
+    insert: () => chainResult([]),
+    execute: () => Promise.resolve({ rows: [] }),
+  }),
+}));
+
+vi.mock('@kilocode/worker-utils', () => ({
+  userExistsWithCache: async () => true,
+  extractBearerToken: (header: string | undefined) => {
+    if (!header) return null;
+    const parts = header.split(' ');
+    return parts.length === 2 && parts[0].toLowerCase() === 'bearer' ? parts[1] : null;
+  },
+  verifyKiloToken: async (token: string, secret: string) => {
+    const { jwtVerify } = await import('jose');
+    const { payload } = await jwtVerify(token, new TextEncoder().encode(secret));
+    return payload as Record<string, unknown>;
+  },
+}));
+
+vi.mock('@kilocode/encryption', () => ({
+  timingSafeEqual: (a: string, b: string) => a === b,
+}));
+
+vi.mock('../../src/lib/abuse-service', () => ({
+  classifyAbuse: async () => null,
+  reportAbuseCost: async () => null,
+}));
+
+// Polyfill scheduler.wait for Node
+if (!(globalThis as Record<string, unknown>).scheduler) {
+  (globalThis as Record<string, unknown>).scheduler = {
+    wait: (ms: number) => new Promise(r => setTimeout(r, ms)),
+  };
+}
+
+const realFetch = globalThis.fetch;
+let fetchMock: ReturnType<typeof vi.fn>;
+
+beforeEach(() => {
+  _userRows = [{ ...VALID_USER }];
+  fetchMock = vi.fn();
+  globalThis.fetch = fetchMock;
+});
+afterEach(() => {
+  globalThis.fetch = realFetch;
+  vi.restoreAllMocks();
+});
+
+async function authRequest(
+  body: Record<string, unknown>,
+  opts: { headers?: Record<string, string> } = {}
+) {
+  const token = await signToken({ kiloUserId: 'user-1' });
+  return chatRequest(body, { token, ...opts });
+}
+
+describe('proxy upstream', () => {
+  it('returns 200 JSON for paid model (non-streaming)', async () => {
+    const upstreamBody = {
+      id: 'chatcmpl-1',
+      model: 'anthropic/claude-sonnet-4-20250514',
+      choices: [{ message: { role: 'assistant', content: 'Hello!' } }],
+      usage: { prompt_tokens: 10, completion_tokens: 5 },
+    };
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify(upstreamBody), {
+        status: 200,
+        headers: {
+          'content-type': 'application/json',
+          'x-secret': 'should-be-stripped',
+          date: 'Mon, 03 Mar 2026 00:00:00 GMT',
+        },
+      })
+    );
+
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    expect(res.headers.get('Content-Encoding')).toBe('identity');
+    expect(res.headers.has('x-secret')).toBe(false);
+    const body = await res.json();
+    expect(body).toMatchObject({ choices: [{ message: { content: 'Hello!' } }] });
+  });
+
+  it('returns 200 SSE for paid model (streaming)', async () => {
+    const sseBody =
+      sseChunk({
+        id: 'chatcmpl-1',
+        model: 'anthropic/claude-sonnet-4-20250514',
+        choices: [{ delta: { content: 'Hi' } }],
+      }) + sseDone();
+
+    fetchMock.mockResolvedValueOnce(
+      new Response(sseBody, {
+        status: 200,
+        headers: { 'content-type': 'text/event-stream' },
+      })
+    );
+
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+        stream: true,
+      })
+    );
+    expect(res.status).toBe(200);
+    const text = await res.text();
+    expect(text).toContain('data:');
+  });
+
+  it('rewrites model field in 200 JSON for free anonymous model', async () => {
+    const upstreamBody = {
+      id: 'chatcmpl-1',
+      model: 'corethink-internal',
+      choices: [{ message: { role: 'assistant', content: 'Hello!' } }],
+      usage: { prompt_tokens: 10, completion_tokens: 5, cost: 0.001 },
+    };
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify(upstreamBody), {
+        status: 200,
+        headers: { 'content-type': 'application/json' },
+      })
+    );
+
+    const res = await dispatch(
+      chatRequest({
+        model: 'corethink:free',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    const body: { model: string; usage: { cost?: number } } = await res.json();
+    expect(body.model).toBe('corethink:free');
+    expect(body.usage.cost).toBeUndefined();
+  });
+
+  it('rewrites model in SSE chunks for free anonymous model', async () => {
+    const sseBody =
+      sseChunk({
+        id: 'chatcmpl-1',
+        model: 'corethink-internal',
+        choices: [{ delta: { content: 'Hi' } }],
+      }) +
+      sseChunk({
+        id: 'chatcmpl-1',
+        model: 'corethink-internal',
+        choices: [],
+        usage: { prompt_tokens: 10, completion_tokens: 5, cost: 0.001 },
+      }) +
+      sseDone();
+
+    fetchMock.mockResolvedValueOnce(
+      new Response(sseBody, {
+        status: 200,
+        headers: { 'content-type': 'text/event-stream' },
+      })
+    );
+
+    const res = await dispatch(
+      chatRequest({
+        model: 'corethink:free',
+        messages: [{ role: 'user', content: 'hi' }],
+        stream: true,
+      })
+    );
+    expect(res.status).toBe(200);
+    const events = await readSSEEvents(res);
+    for (const event of events) {
+      const e = event as { model: string; usage?: { cost?: number } };
+      expect(e.model).toBe('corethink:free');
+      if (e.usage) {
+        expect(e.usage.cost).toBeUndefined();
+      }
+    }
+  });
+
+  it('converts 402 to 503 for non-BYOK', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify({ error: 'Payment Required' }), {
+        status: 402,
+        headers: { 'content-type': 'application/json' },
+      })
+    );
+
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(503);
+    const body = await res.json();
+    expect(body).toEqual({
+      error: 'Service Unavailable',
+      message: 'The service is temporarily unavailable. Please try again later.',
+    });
+  });
+
+  it('passes through 500 from upstream', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify({ error: 'Internal Server Error' }), {
+        status: 500,
+        headers: { 'content-type': 'application/json' },
+      })
+    );
+
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(500);
+  });
+
+  it('passes through 400 from upstream', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify({ error: 'Bad Request' }), {
+        status: 400,
+        headers: { 'content-type': 'application/json' },
+      })
+    );
+
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(400);
+  });
+
+  it('returns context_length error for Kilo free model exceeding context', async () => {
+    // corethink:free has context_length: 78_000
+    // Need estimated token count > 78_000: JSON.stringify(request).length / 4 + max_output_tokens
+    const bigMessage = 'x'.repeat(320_000);
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify({ error: { message: 'some upstream error' } }), {
+        status: 400,
+        headers: { 'content-type': 'application/json' },
+      })
+    );
+
+    const res = await dispatch(
+      chatRequest({
+        model: 'corethink:free',
+        messages: [{ role: 'user', content: bigMessage }],
+      })
+    );
+    expect(res.status).toBe(400);
+    const body: { error: string } = await res.json();
+    expect(body.error).toContain('context length');
+    expect(body.error).toContain('tokens');
+  });
+
+  it('returns stealth model error for giga-potato on 4xx', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify({ error: 'some error' }), {
+        status: 400,
+        headers: { 'content-type': 'application/json' },
+      })
+    );
+
+    const res = await dispatch(
+      chatRequest({
+        model: 'giga-potato',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(400);
+    const body: { error: string } = await res.json();
+    expect(body.error).toBe('Stealth model unable to process request');
+  });
+
+  describe('BYOK errors', () => {
+    // BYOK detection requires real DB interaction (model_user_byok_providers + user_byok_keys)
+    // and AES-256-GCM decryption. These error messages are tested in unit tests
+    // for makeErrorReadable. Integration-level BYOK tests would need a more complete
+    // DB mock with BYOK key data and encryption stubs.
+    it.skip('BYOK 401 → [BYOK] invalid key', () => {});
+    it.skip('BYOK 402 → [BYOK] insufficient funds', () => {});
+    it.skip('BYOK 403 → [BYOK] permission', () => {});
+    it.skip('BYOK 429 → [BYOK] rate limit', () => {});
+  });
+});
diff --git a/llm-gateway/test/integration/request-validation.test.ts b/llm-gateway/test/integration/request-validation.test.ts
new file mode 100644
index 000000000..5693d94f3
--- /dev/null
+++ b/llm-gateway/test/integration/request-validation.test.ts
@@ -0,0 +1,94 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { dispatch, chatRequest, chainResult } from './_setup';
+
+vi.mock('@kilocode/db/client', () => ({
+  getWorkerDb: () => ({
+    select: () => ({
+      from: () => chainResult([]),
+    }),
+    insert: () => chainResult([]),
+    execute: () => Promise.resolve({ rows: [] }),
+  }),
+}));
+
+vi.mock('@kilocode/worker-utils', () => ({
+  userExistsWithCache: async () => true,
+  extractBearerToken: (header: string | undefined) => {
+    if (!header) return null;
+    const parts = header.split(' ');
+    return parts.length === 2 && parts[0].toLowerCase() === 'bearer' ? parts[1] : null;
+  },
+  verifyKiloToken: async () => {
+    throw new Error('should not be called directly');
+  },
+}));
+
+vi.mock('../../src/lib/abuse-service', () => ({
+  classifyAbuse: async () => null,
+  reportAbuseCost: async () => null,
+}));
+
+const realFetch = globalThis.fetch;
+beforeEach(() => {
+  globalThis.fetch = vi.fn();
+});
+afterEach(() => {
+  globalThis.fetch = realFetch;
+  vi.restoreAllMocks();
+});
+
+describe('requestValidation', () => {
+  it('returns 503 for max_tokens exceeding limit on free model', async () => {
+    // Anonymous + free model to get past auth + anonymous-gate
+    const res = await dispatch(
+      chatRequest({
+        model: 'some-vendor/some-model:free',
+        messages: [{ role: 'user', content: 'hi' }],
+        max_tokens: 100_000_000_000,
+      })
+    );
+    expect(res.status).toBe(503);
+    const body = await res.json();
+    expect(body).toEqual({
+      error: 'Service Unavailable',
+      message: 'The service is temporarily unavailable. Please try again later.',
+    });
+  });
+
+  it('returns 503 for max_completion_tokens exceeding limit', async () => {
+    const res = await dispatch(
+      chatRequest({
+        model: 'some-vendor/some-model:free',
+        messages: [{ role: 'user', content: 'hi' }],
+        max_completion_tokens: 100_000_000_000,
+      })
+    );
+    expect(res.status).toBe(503);
+    const body: { error: string } = await res.json();
+    expect(body.error).toBe('Service Unavailable');
+  });
+
+  it('returns 404 for dead free model', async () => {
+    const res = await dispatch(
+      chatRequest({
+        model: 'x-ai/grok-code-fast-1:optimized:free',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(404);
+    const body: { error: string } = await res.json();
+    expect(body.error).toContain('alpha period');
+  });
+
+  it('returns 404 for rate-limited-to-death model', async () => {
+    const res = await dispatch(
+      chatRequest({
+        model: 'deepseek/deepseek-r1-0528:free',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(404);
+    const body: { error: string } = await res.json();
+    expect(body.error).toBe('Model not found');
+  });
+});
diff --git a/llm-gateway/test/integration/response-headers.test.ts b/llm-gateway/test/integration/response-headers.test.ts
new file mode 100644
index 000000000..19a6b7bca
--- /dev/null
+++ b/llm-gateway/test/integration/response-headers.test.ts
@@ -0,0 +1,240 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import {
+  dispatch,
+  chatRequest,
+  signToken,
+  VALID_USER,
+  sseChunk,
+  sseDone,
+  getTableName,
+  chainResult,
+} from './_setup';
+
+// ── Configurable DB ────────────────────────────────────────────────────────────
+
+let _userRows: Record<string, unknown>[] = [];
+
+vi.mock('@kilocode/db/client', () => ({
+  getWorkerDb: () => ({
+    select: () => ({
+      from: (table: unknown) => {
+        const name = getTableName(table);
+        if (name === 'kilocode_users') return chainResult(_userRows);
+        if (name === 'credit_transactions') return chainResult([{ count: 1 }]);
+        if (name === 'model_user_byok_providers') return chainResult([]);
+        if (name === 'custom_llm') return chainResult([]);
+        if (name === 'organizations') return chainResult([]);
+        if (name === 'models_by_provider') return chainResult([]);
+        return chainResult([]);
+      },
+    }),
+    insert: () => chainResult([]),
+    execute: () => Promise.resolve({ rows: [] }),
+  }),
+}));
+
+vi.mock('@kilocode/worker-utils', () => ({
+  userExistsWithCache: async () => true,
+  extractBearerToken: (header: string | undefined) => {
+    if (!header) return null;
+    const parts = header.split(' ');
+    return parts.length === 2 && parts[0].toLowerCase() === 'bearer' ? parts[1] : null;
+  },
+  verifyKiloToken: async (token: string, secret: string) => {
+    const { jwtVerify } = await import('jose');
+    const { payload } = await jwtVerify(token, new TextEncoder().encode(secret));
+    return payload as Record<string, unknown>;
+  },
+}));
+
+vi.mock('@kilocode/encryption', () => ({
+  timingSafeEqual: (a: string, b: string) => a === b,
+}));
+
+vi.mock('../../src/lib/abuse-service', () => ({
+  classifyAbuse: async () => null,
+  reportAbuseCost: async () => null,
+}));
+
+// Polyfill scheduler.wait for Node
+if (!(globalThis as Record<string, unknown>).scheduler) {
+  (globalThis as Record<string, unknown>).scheduler = {
+    wait: (ms: number) => new Promise(r => setTimeout(r, ms)),
+  };
+}
+
+const realFetch = globalThis.fetch;
+let fetchMock: ReturnType<typeof vi.fn>;
+
+beforeEach(() => {
+  _userRows = [{ ...VALID_USER }];
+  fetchMock = vi.fn();
+  globalThis.fetch = fetchMock;
+});
+afterEach(() => {
+  globalThis.fetch = realFetch;
+  vi.restoreAllMocks();
+});
+
+async function authRequest(
+  body: Record<string, unknown>,
+  opts: { headers?: Record<string, string> } = {}
+) {
+  const token = await signToken({ kiloUserId: 'user-1' });
+  return chatRequest(body, { token, ...opts });
+}
+
+describe('response headers', () => {
+  it('Content-Encoding: identity on 200 JSON', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify({ choices: [{ message: { content: 'ok' } }] }), {
+        status: 200,
+        headers: { 'content-type': 'application/json' },
+      })
+    );
+
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    expect(res.headers.get('Content-Encoding')).toBe('identity');
+  });
+
+  it('Content-Encoding: identity on 200 SSE', async () => {
+    const sseBody =
+      sseChunk({
+        id: 'chatcmpl-1',
+        model: 'anthropic/claude-sonnet-4-20250514',
+        choices: [{ delta: { content: 'Hi' } }],
+      }) + sseDone();
+
+    fetchMock.mockResolvedValueOnce(
+      new Response(sseBody, {
+        status: 200,
+        headers: { 'content-type': 'text/event-stream' },
+      })
+    );
+
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+        stream: true,
+      })
+    );
+    expect(res.status).toBe(200);
+    expect(res.headers.get('Content-Encoding')).toBe('identity');
+  });
+
+  it('Content-Encoding: identity on free model rewritten response', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(
+        JSON.stringify({
+          model: 'corethink-internal',
+          choices: [{ message: { content: 'ok' } }],
+          usage: { prompt_tokens: 10, completion_tokens: 5 },
+        }),
+        { status: 200, headers: { 'content-type': 'application/json' } }
+      )
+    );
+
+    const res = await dispatch(
+      chatRequest({
+        model: 'corethink:free',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    expect(res.headers.get('Content-Encoding')).toBe('identity');
+  });
+
+  it('upstream date header preserved', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify({ choices: [{ message: { content: 'ok' } }] }), {
+        status: 200,
+        headers: {
+          'content-type': 'application/json',
+          date: 'Mon, 03 Mar 2026 12:00:00 GMT',
+        },
+      })
+    );
+
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    expect(res.headers.get('date')).toBe('Mon, 03 Mar 2026 12:00:00 GMT');
+  });
+
+  it('upstream content-type header preserved', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify({ choices: [{ message: { content: 'ok' } }] }), {
+        status: 200,
+        headers: { 'content-type': 'application/json; charset=utf-8' },
+      })
+    );
+
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    expect(res.headers.get('content-type')).toBe('application/json; charset=utf-8');
+  });
+
+  it('upstream request-id header preserved', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify({ choices: [{ message: { content: 'ok' } }] }), {
+        status: 200,
+        headers: {
+          'content-type': 'application/json',
+          'request-id': 'req-abc-123',
+        },
+      })
+    );
+
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    expect(res.headers.get('request-id')).toBe('req-abc-123');
+  });
+
+  it('unknown upstream headers (x-ratelimit-remaining, x-custom, etc.) stripped', async () => {
+    fetchMock.mockResolvedValueOnce(
+      new Response(JSON.stringify({ choices: [{ message: { content: 'ok' } }] }), {
+        status: 200,
+        headers: {
+          'content-type': 'application/json',
+          'x-ratelimit-remaining': '99',
+          'x-custom': 'secret-value',
+          server: 'openrouter',
+          'x-request-id': 'or-abc',
+        },
+      })
+    );
+
+    const res = await dispatch(
+      await authRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(200);
+    expect(res.headers.has('x-ratelimit-remaining')).toBe(false);
+    expect(res.headers.has('x-custom')).toBe(false);
+    expect(res.headers.has('server')).toBe(false);
+    expect(res.headers.has('x-request-id')).toBe(false);
+  });
+});
diff --git a/llm-gateway/test/integration/routing.test.ts b/llm-gateway/test/integration/routing.test.ts
new file mode 100644
index 000000000..2df32cdd8
--- /dev/null
+++ b/llm-gateway/test/integration/routing.test.ts
@@ -0,0 +1,100 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { dispatch, chainResult } from './_setup';
+
+vi.mock('@kilocode/db/client', () => ({
+  getWorkerDb: () => ({
+    select: () => ({
+      from: () => chainResult([]),
+    }),
+    insert: () => chainResult([]),
+    execute: () => Promise.resolve({ rows: [] }),
+  }),
+}));
+
+vi.mock('@kilocode/worker-utils', () => ({
+  userExistsWithCache: async () => true,
+  extractBearerToken: (header: string | undefined) => {
+    if (!header) return null;
+    const parts = header.split(' ');
+    return parts.length === 2 && parts[0].toLowerCase() === 'bearer' ? parts[1] : null;
+  },
+  verifyKiloToken: async () => {
+    throw new Error('should not be called directly');
+  },
+}));
+
+vi.mock('../../src/lib/abuse-service', () => ({
+  classifyAbuse: async () => null,
+  reportAbuseCost: async () => null,
+}));
+
+const realFetch = globalThis.fetch;
+beforeEach(() => {
+  globalThis.fetch = vi.fn();
+});
+afterEach(() => {
+  globalThis.fetch = realFetch;
+  vi.restoreAllMocks();
+});
+
+describe('routing', () => {
+  it('returns 400 for POST /api/gateway/foo (invalid sub-path)', async () => {
+    const req = new Request('http://localhost/api/gateway/foo', { method: 'POST' });
+    const res = await dispatch(req);
+    expect(res.status).toBe(400);
+    const body = await res.json();
+    expect(body).toEqual({
+      error: 'Invalid path',
+      message: 'This endpoint only accepts the path `/chat/completions`.',
+    });
+  });
+
+  it('returns 400 for POST /api/openrouter/models', async () => {
+    const req = new Request('http://localhost/api/openrouter/models', { method: 'POST' });
+    const res = await dispatch(req);
+    expect(res.status).toBe(400);
+    const body = await res.json();
+    expect(body).toEqual({
+      error: 'Invalid path',
+      message: 'This endpoint only accepts the path `/chat/completions`.',
+    });
+  });
+
+  it('returns 404 for POST /completely/unknown', async () => {
+    const req = new Request('http://localhost/completely/unknown', { method: 'POST' });
+    const res = await dispatch(req);
+    expect(res.status).toBe(404);
+    const body = await res.json();
+    expect(body).toEqual({ error: 'Not found' });
+  });
+
+  it('returns 400 for GET /api/gateway/chat/completions (wrong method falls to notFound)', async () => {
+    const req = new Request('http://localhost/api/gateway/chat/completions', { method: 'GET' });
+    const res = await dispatch(req);
+    expect(res.status).toBe(400);
+    const body = await res.json();
+    expect(body).toEqual({
+      error: 'Invalid path',
+      message: 'This endpoint only accepts the path `/chat/completions`.',
+    });
+  });
+
+  it('both /api/gateway/ and /api/openrouter/ proceed past routing', async () => {
+    const makeReq = (path: string) =>
+      new Request(`http://localhost${path}`, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json', 'CF-Connecting-IP': '1.2.3.4' },
+        body: 'not json',
+      });
+
+    const res1 = await dispatch(makeReq('/api/gateway/chat/completions'));
+    expect(res1.status).toBe(400);
+    const body1: { error: string } = await res1.json();
+    expect(body1.error).toBe('Invalid request');
+
+    const res2 = await dispatch(makeReq('/api/openrouter/chat/completions'));
+    expect(res2.status).toBe(400);
+    const body2: { error: string } = await res2.json();
+    expect(body2.error).toBe('Invalid request');
+  });
+});
diff --git a/llm-gateway/test/unit/abuse-service.test.ts b/llm-gateway/test/unit/abuse-service.test.ts
new file mode 100644
index 000000000..d941d1c0f
--- /dev/null
+++ b/llm-gateway/test/unit/abuse-service.test.ts
@@ -0,0 +1,203 @@
+// Tests for abuse-service: classifyAbuse, reportAbuseCost, classifyRequest, reportCost.
+
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import {
+  classifyAbuse,
+  reportAbuseCost,
+  reportCost,
+  classifyRequest,
+} from '../../src/lib/abuse-service';
+import type { AbuseServiceSecrets, AbuseClassificationResponse } from '../../src/lib/abuse-service';
+import type { FraudDetectionHeaders } from '../../src/lib/extract-headers';
+
+const realFetch = globalThis.fetch;
+
+beforeEach(() => {
+  globalThis.fetch = vi.fn();
+});
+
+afterEach(() => {
+  globalThis.fetch = realFetch;
+});
+
+const secrets: AbuseServiceSecrets = {
+  cfAccessClientId: 'test-id',
+  cfAccessClientSecret: 'test-secret',
+};
+
+const emptyFraudHeaders: FraudDetectionHeaders = {
+  http_x_forwarded_for: '1.2.3.4',
+  geo_city: null,
+  geo_country: null,
+  geo_latitude: null,
+  geo_longitude: null,
+  ja3_hash: null,
+  http_user_agent: null,
+};
+
+describe('classifyRequest', () => {
+  it('returns null for empty serviceUrl', async () => {
+    const result = await classifyRequest('', secrets, {});
+    expect(result).toBeNull();
+    expect(globalThis.fetch).not.toHaveBeenCalled();
+  });
+
+  it('sends POST to /api/classify with CF Access headers', async () => {
+    const mockResponse: AbuseClassificationResponse = {
+      verdict: 'ALLOW',
+      risk_score: 0.1,
+      signals: [],
+      action_metadata: {},
+      context: {
+        identity_key: 'test',
+        current_spend_1h: 0,
+        is_new_user: false,
+        requests_per_second: 1,
+      },
+      request_id: 42,
+    };
+    vi.mocked(globalThis.fetch).mockResolvedValue(
+      new Response(JSON.stringify(mockResponse), { status: 200 })
+    );
+
+    const result = await classifyRequest('https://abuse.example.com', secrets, {
+      kilo_user_id: 'user-1',
+    });
+    expect(result).toEqual(mockResponse);
+
+    const [url, init] = vi.mocked(globalThis.fetch).mock.calls[0];
+    expect(url).toBe('https://abuse.example.com/api/classify');
+    expect((init?.headers as Record<string, string>)['CF-Access-Client-Id']).toBe('test-id');
+    expect((init?.headers as Record<string, string>)['CF-Access-Client-Secret']).toBe(
+      'test-secret'
+    );
+  });
+
+  it('returns null on fetch failure', async () => {
+    vi.mocked(globalThis.fetch).mockRejectedValue(new Error('network error'));
+    const result = await classifyRequest('https://abuse.example.com', secrets, {});
+    expect(result).toBeNull();
+  });
+
+  it('returns null on non-ok response', async () => {
+    vi.mocked(globalThis.fetch).mockResolvedValue(new Response('error', { status: 500 }));
+    const result = await classifyRequest('https://abuse.example.com', secrets, {});
+    expect(result).toBeNull();
+  });
+});
+
+describe('classifyAbuse', () => {
+  it('extracts prompts from messages and sends classification', async () => {
+    vi.mocked(globalThis.fetch).mockResolvedValue(
+      new Response(
+        JSON.stringify({
+          verdict: 'ALLOW',
+          risk_score: 0,
+          signals: [],
+          action_metadata: {},
+          context: {
+            identity_key: 'test',
+            current_spend_1h: 0,
+            is_new_user: false,
+            requests_per_second: 0,
+          },
+          request_id: 1,
+        }),
+        { status: 200 }
+      )
+    );
+
+    await classifyAbuse(
+      'https://abuse.example.com',
+      secrets,
+      emptyFraudHeaders,
+      'vscode',
+      {
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [
+          { role: 'system', content: 'You are helpful.' },
+          { role: 'user', content: 'Hello world' },
+        ],
+      },
+      { kiloUserId: 'user-1', organizationId: 'org-1' }
+    );
+
+    const [, init] = vi.mocked(globalThis.fetch).mock.calls[0];
+    const body = JSON.parse(init?.body as string) as Record<string, unknown>;
+    expect(body.system_prompt).toBe('You are helpful.');
+    expect(body.user_prompt).toBe('Hello world');
+    expect(body.kilo_user_id).toBe('user-1');
+    expect(body.editor_name).toBe('vscode');
+  });
+});
+
+describe('reportCost', () => {
+  it('returns null for empty serviceUrl', async () => {
+    const result = await reportCost('', secrets, {
+      request_id: 1,
+      message_id: 'msg-1',
+      cost: 100,
+    });
+    expect(result).toBeNull();
+  });
+
+  it('sends POST to /api/usage/cost', async () => {
+    vi.mocked(globalThis.fetch).mockResolvedValue(
+      new Response(JSON.stringify({ success: true }), { status: 200 })
+    );
+    await reportCost('https://abuse.example.com', secrets, {
+      request_id: 42,
+      message_id: 'msg-1',
+      cost: 500,
+    });
+    const [url] = vi.mocked(globalThis.fetch).mock.calls[0];
+    expect(url).toBe('https://abuse.example.com/api/usage/cost');
+  });
+});
+
+describe('reportAbuseCost', () => {
+  it('returns null when abuseRequestId is missing', async () => {
+    const result = await reportAbuseCost(
+      'https://abuse.example.com',
+      secrets,
+      {
+        kiloUserId: 'user-1',
+        fraudHeaders: emptyFraudHeaders,
+        requested_model: 'test',
+        abuse_request_id: undefined,
+      },
+      {
+        messageId: 'msg-1',
+        cost_mUsd: 100,
+        inputTokens: 10,
+        outputTokens: 20,
+        cacheWriteTokens: 0,
+        cacheHitTokens: 0,
+      }
+    );
+    expect(result).toBeNull();
+    expect(globalThis.fetch).not.toHaveBeenCalled();
+  });
+
+  it('returns null when messageId is null', async () => {
+    const result = await reportAbuseCost(
+      'https://abuse.example.com',
+      secrets,
+      {
+        kiloUserId: 'user-1',
+        fraudHeaders: emptyFraudHeaders,
+        requested_model: 'test',
+        abuse_request_id: 42,
+      },
+      {
+        messageId: null,
+        cost_mUsd: 100,
+        inputTokens: 10,
+        outputTokens: 20,
+        cacheWriteTokens: 0,
+        cacheHitTokens: 0,
+      }
+    );
+    expect(result).toBeNull();
+  });
+});
diff --git a/llm-gateway/test/unit/anonymous-gate.test.ts b/llm-gateway/test/unit/anonymous-gate.test.ts
new file mode 100644
index 000000000..2d6a0ecdf
--- /dev/null
+++ b/llm-gateway/test/unit/anonymous-gate.test.ts
@@ -0,0 +1,110 @@
+// Tests for anonymousGateMiddleware — decides between authenticated user,
+// anonymous free model access, and 401 rejection for paid models.
+
+import { describe, it, expect } from 'vitest';
+import { Hono } from 'hono';
+import type { HonoContext } from '../../src/types/hono';
+import { anonymousGateMiddleware } from '../../src/middleware/anonymous-gate';
+import { parseBodyMiddleware } from '../../src/middleware/parse-body';
+import { extractIpMiddleware } from '../../src/middleware/extract-ip';
+import { resolveAutoModelMiddleware } from '../../src/middleware/resolve-auto-model';
+
+function makeApp() {
+  const app = new Hono<HonoContext>();
+  app.post(
+    '/test',
+    parseBodyMiddleware,
+    extractIpMiddleware,
+    resolveAutoModelMiddleware,
+    anonymousGateMiddleware,
+    c => {
+      const user = c.get('user');
+      return c.json({ userId: user.id, isAnonymous: 'isAnonymous' in user });
+    }
+  );
+  return app;
+}
+
+function post(app: ReturnType<typeof makeApp>, body: Record<string, unknown>) {
+  return app.fetch(
+    new Request('http://x/test', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json', 'CF-Connecting-IP': '9.8.7.6' },
+      body: JSON.stringify(body),
+    })
+  );
+}
+
+describe('anonymousGateMiddleware', () => {
+  it('allows anonymous access for free models (ending in :free)', async () => {
+    const app = makeApp();
+    const res = await post(app, {
+      model: 'meta-llama/llama-3.1-8b-instruct:free',
+      messages: [{ role: 'user', content: 'hi' }],
+    });
+    expect(res.status).toBe(200);
+    const body = (await res.json()) as Record<string, unknown>;
+    expect(body.isAnonymous).toBe(true);
+    expect(body.userId).toBe('anon:9.8.7.6');
+  });
+
+  it('allows anonymous access for Kilo free models', async () => {
+    const app = makeApp();
+    const res = await post(app, {
+      model: 'corethink:free',
+      messages: [{ role: 'user', content: 'hi' }],
+    });
+    expect(res.status).toBe(200);
+    const body = (await res.json()) as Record<string, unknown>;
+    expect(body.isAnonymous).toBe(true);
+  });
+
+  it('returns 401 for paid models without auth', async () => {
+    const app = makeApp();
+    const res = await post(app, {
+      model: 'anthropic/claude-sonnet-4-20250514',
+      messages: [{ role: 'user', content: 'hi' }],
+    });
+    expect(res.status).toBe(401);
+    const body = (await res.json()) as { error: { code: string; message: string } };
+    expect(body.error.code).toBe('PAID_MODEL_AUTH_REQUIRED');
+    expect(body.error.message).toContain('sign in');
+  });
+
+  it('passes through when authUser is set', async () => {
+    const app = new Hono<HonoContext>();
+    app.post(
+      '/test',
+      parseBodyMiddleware,
+      extractIpMiddleware,
+      resolveAutoModelMiddleware,
+      // Simulate auth middleware having set authUser
+      async (c, next) => {
+        c.set('authUser', {
+          id: 'user-42',
+          google_user_email: 'test@example.com',
+        } as HonoContext['Variables']['authUser']);
+        await next();
+      },
+      anonymousGateMiddleware,
+      c => {
+        const user = c.get('user');
+        return c.json({ userId: user.id, isAnonymous: 'isAnonymous' in user });
+      }
+    );
+    const res = await app.fetch(
+      new Request('http://x/test', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json', 'CF-Connecting-IP': '9.8.7.6' },
+        body: JSON.stringify({
+          model: 'anthropic/claude-sonnet-4-20250514',
+          messages: [{ role: 'user', content: 'hi' }],
+        }),
+      })
+    );
+    expect(res.status).toBe(200);
+    const body = (await res.json()) as Record<string, unknown>;
+    expect(body.userId).toBe('user-42');
+    expect(body.isAnonymous).toBe(false);
+  });
+});
diff --git a/llm-gateway/test/unit/anonymous.test.ts b/llm-gateway/test/unit/anonymous.test.ts
new file mode 100644
index 000000000..97eff4861
--- /dev/null
+++ b/llm-gateway/test/unit/anonymous.test.ts
@@ -0,0 +1,36 @@
+// Tests for anonymous context utilities.
+
+import { describe, it, expect } from 'vitest';
+import { createAnonymousContext, isAnonymousContext } from '../../src/lib/anonymous';
+
+describe('createAnonymousContext', () => {
+  it('creates context with synthetic user ID', () => {
+    const ctx = createAnonymousContext('1.2.3.4');
+    expect(ctx.id).toBe('anon:1.2.3.4');
+    expect(ctx.isAnonymous).toBe(true);
+    expect(ctx.ipAddress).toBe('1.2.3.4');
+    expect(ctx.microdollars_used).toBe(0);
+    expect(ctx.is_admin).toBe(false);
+  });
+});
+
+describe('isAnonymousContext', () => {
+  it('returns true for anonymous context', () => {
+    const ctx = createAnonymousContext('1.2.3.4');
+    expect(isAnonymousContext(ctx)).toBe(true);
+  });
+
+  it('returns false for regular user', () => {
+    expect(isAnonymousContext({ id: 'user-1', isAnonymous: false })).toBe(false);
+  });
+
+  it('returns false for null/undefined', () => {
+    expect(isAnonymousContext(null)).toBe(false);
+    expect(isAnonymousContext(undefined)).toBe(false);
+  });
+
+  it('returns false for non-object', () => {
+    expect(isAnonymousContext('string')).toBe(false);
+    expect(isAnonymousContext(42)).toBe(false);
+  });
+});
diff --git a/llm-gateway/test/unit/background-tasks.test.ts b/llm-gateway/test/unit/background-tasks.test.ts
new file mode 100644
index 000000000..80581eec6
--- /dev/null
+++ b/llm-gateway/test/unit/background-tasks.test.ts
@@ -0,0 +1,211 @@
+// Test: background task params — particularly requestedModel for auto-models (B3).
+
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+
+// ── Capture what runApiMetrics receives ──────────────────────────────────────
+
+const apiMetricsCalls: unknown[] = [];
+
+vi.mock('../../src/background/api-metrics', () => ({
+  runApiMetrics: async (_o11y: unknown, params: unknown) => {
+    apiMetricsCalls.push(params);
+  },
+  getToolsAvailable: () => [],
+  getToolsUsed: () => [],
+}));
+
+vi.mock('../../src/background/usage-accounting', () => ({
+  runUsageAccounting: async () => null,
+}));
+
+vi.mock('../../src/background/request-logging', () => ({
+  runRequestLogging: async () => {},
+}));
+
+vi.mock('../../src/lib/abuse-service', () => ({
+  reportAbuseCost: async () => {},
+}));
+
+vi.mock('../../src/lib/prompt-info', () => ({
+  extractPromptInfo: () => ({}),
+  estimateChatTokens: () => ({ estimatedInputTokens: 0, estimatedOutputTokens: 0 }),
+}));
+
+vi.mock('@kilocode/db/client', () => ({
+  getWorkerDb: () => ({}),
+}));
+
+beforeEach(() => {
+  apiMetricsCalls.length = 0;
+
+  // scheduler.wait is a Workers-only global — stub it for Node tests.
+  const g = globalThis as Record<string, unknown>;
+  if (g.scheduler === undefined) {
+    g.scheduler = { wait: (ms: number) => new Promise(resolve => setTimeout(resolve, ms)) };
+  }
+});
+
+// ── Helpers ──────────────────────────────────────────────────────────────────
+
+function makeStream(): ReadableStream {
+  return new ReadableStream({
+    start(ctrl) {
+      ctrl.enqueue(new TextEncoder().encode('{}'));
+      ctrl.close();
+    },
+  });
+}
+
+function baseParams() {
+  return {
+    upstreamStatusCode: 200,
+    abuseServiceUrl: '',
+    abuseSecrets: undefined,
+    abuseRequestId: undefined,
+    isStreaming: false,
+    requestStartedAt: performance.now(),
+    provider: 'openrouter',
+    providerApiUrl: 'https://openrouter.example.com/v1',
+    providerApiKey: 'key',
+    providerHasGenerationEndpoint: true,
+    requestBody: {
+      model: 'anthropic/claude-sonnet-4-20250514',
+      messages: [{ role: 'user' as const, content: 'hi' }],
+    },
+    user: { id: 'user-1' },
+    organizationId: undefined,
+    modeHeader: null,
+    fraudHeaders: { cf_connecting_ip: '1.2.3.4' },
+    projectId: null,
+    editorName: null,
+    machineId: null,
+    feature: null,
+    botId: undefined,
+    tokenSource: undefined,
+    userByok: false,
+    isAnon: false,
+    sessionId: null,
+    ttfbMs: 100,
+    toolsUsed: [],
+    posthogApiKey: undefined,
+    connectionString: 'postgres://localhost:5432/test',
+    o11y: { ingestApiMetrics: async () => {} },
+  } as const;
+}
+
+// ── Tests ────────────────────────────────────────────────────────────────────
+
+describe('scheduleBackgroundTasks – requestedModel (B3)', () => {
+  it('uses autoModel as requestedModel when set (kilo/auto)', async () => {
+    const { scheduleBackgroundTasks } = await import('../../src/handler/background-tasks');
+    const waitUntilPromises: Promise<unknown>[] = [];
+    const ctx = { waitUntil: (p: Promise<unknown>) => waitUntilPromises.push(p) };
+
+    scheduleBackgroundTasks(ctx, {
+      ...baseParams(),
+      resolvedModel: 'anthropic/claude-sonnet-4-20250514',
+      autoModel: 'kilo/auto',
+      accountingStream: null,
+      metricsStream: makeStream(),
+      loggingStream: null,
+    } as never);
+
+    // Wait for all background tasks to complete
+    await Promise.all(waitUntilPromises);
+
+    expect(apiMetricsCalls).toHaveLength(1);
+    const params = apiMetricsCalls[0] as Record<string, unknown>;
+    expect(params.requestedModel).toBe('kilo/auto');
+    expect(params.resolvedModel).toBe('anthropic/claude-sonnet-4-20250514');
+  });
+
+  it('uses resolvedModel as requestedModel when autoModel is null', async () => {
+    const { scheduleBackgroundTasks } = await import('../../src/handler/background-tasks');
+    const waitUntilPromises: Promise<unknown>[] = [];
+    const ctx = { waitUntil: (p: Promise<unknown>) => waitUntilPromises.push(p) };
+
+    scheduleBackgroundTasks(ctx, {
+      ...baseParams(),
+      resolvedModel: 'anthropic/claude-sonnet-4-20250514',
+      autoModel: null,
+      accountingStream: null,
+      metricsStream: makeStream(),
+      loggingStream: null,
+    } as never);
+
+    await Promise.all(waitUntilPromises);
+
+    expect(apiMetricsCalls).toHaveLength(1);
+    const params = apiMetricsCalls[0] as Record<string, unknown>;
+    expect(params.requestedModel).toBe('anthropic/claude-sonnet-4-20250514');
+    expect(params.resolvedModel).toBe('anthropic/claude-sonnet-4-20250514');
+  });
+});
+
+describe('scheduleBackgroundTasks – resolvedModel normalization (B4)', () => {
+  it('strips :free suffix from resolvedModel in metrics', async () => {
+    const { scheduleBackgroundTasks } = await import('../../src/handler/background-tasks');
+    const waitUntilPromises: Promise<unknown>[] = [];
+    const ctx = { waitUntil: (p: Promise<unknown>) => waitUntilPromises.push(p) };
+
+    scheduleBackgroundTasks(ctx, {
+      ...baseParams(),
+      resolvedModel: 'corethink:free',
+      autoModel: null,
+      accountingStream: null,
+      metricsStream: makeStream(),
+      loggingStream: null,
+    } as never);
+
+    await Promise.all(waitUntilPromises);
+
+    expect(apiMetricsCalls).toHaveLength(1);
+    const params = apiMetricsCalls[0] as Record<string, unknown>;
+    // B4: resolvedModel must be normalized — :free stripped
+    expect(params.resolvedModel).toBe('corethink');
+    // requestedModel is NOT normalized (preserves original for tracking)
+    expect(params.requestedModel).toBe('corethink:free');
+  });
+
+  it('strips :exacto suffix from resolvedModel in metrics', async () => {
+    const { scheduleBackgroundTasks } = await import('../../src/handler/background-tasks');
+    const waitUntilPromises: Promise<unknown>[] = [];
+    const ctx = { waitUntil: (p: Promise<unknown>) => waitUntilPromises.push(p) };
+
+    scheduleBackgroundTasks(ctx, {
+      ...baseParams(),
+      resolvedModel: 'some-model:exacto',
+      autoModel: null,
+      accountingStream: null,
+      metricsStream: makeStream(),
+      loggingStream: null,
+    } as never);
+
+    await Promise.all(waitUntilPromises);
+
+    expect(apiMetricsCalls).toHaveLength(1);
+    const params = apiMetricsCalls[0] as Record<string, unknown>;
+    expect(params.resolvedModel).toBe('some-model');
+  });
+
+  it('leaves models without colon suffix unchanged', async () => {
+    const { scheduleBackgroundTasks } = await import('../../src/handler/background-tasks');
+    const waitUntilPromises: Promise<unknown>[] = [];
+    const ctx = { waitUntil: (p: Promise<unknown>) => waitUntilPromises.push(p) };
+
+    scheduleBackgroundTasks(ctx, {
+      ...baseParams(),
+      resolvedModel: 'anthropic/claude-sonnet-4-20250514',
+      autoModel: null,
+      accountingStream: null,
+      metricsStream: makeStream(),
+      loggingStream: null,
+    } as never);
+
+    await Promise.all(waitUntilPromises);
+
+    expect(apiMetricsCalls).toHaveLength(1);
+    const params = apiMetricsCalls[0] as Record<string, unknown>;
+    expect(params.resolvedModel).toBe('anthropic/claude-sonnet-4-20250514');
+  });
+});
diff --git a/llm-gateway/test/unit/extract-headers.test.ts b/llm-gateway/test/unit/extract-headers.test.ts
new file mode 100644
index 000000000..cc2b6387c
--- /dev/null
+++ b/llm-gateway/test/unit/extract-headers.test.ts
@@ -0,0 +1,95 @@
+// Tests for extract-headers: extractProjectHeaders, getFraudDetectionHeaders.
+
+import { describe, it, expect } from 'vitest';
+import { extractProjectHeaders, getFraudDetectionHeaders } from '../../src/lib/extract-headers';
+
+describe('getFraudDetectionHeaders', () => {
+  it('extracts geo data from cf object', () => {
+    const headers = new Headers({
+      'x-forwarded-for': '1.2.3.4',
+      'user-agent': 'Kilo-Code/3.0.0',
+    });
+    const cf = {
+      city: 'San Francisco',
+      country: 'US',
+      latitude: '37.7749',
+      longitude: '-122.4194',
+      botManagement: { ja3Hash: 'abc123' },
+    };
+    const result = getFraudDetectionHeaders(headers, cf);
+    expect(result.http_x_forwarded_for).toBe('1.2.3.4');
+    expect(result.geo_city).toBe('San Francisco');
+    expect(result.geo_country).toBe('US');
+    expect(result.geo_latitude).toBe(37.7749);
+    expect(result.geo_longitude).toBe(-122.4194);
+    expect(result.ja3_hash).toBe('abc123');
+    expect(result.http_user_agent).toBe('Kilo-Code/3.0.0');
+  });
+
+  it('returns null when cf is undefined', () => {
+    const result = getFraudDetectionHeaders(new Headers(), undefined);
+    expect(result.http_x_forwarded_for).toBeNull();
+    expect(result.geo_city).toBeNull();
+    expect(result.geo_latitude).toBeNull();
+    expect(result.ja3_hash).toBeNull();
+  });
+
+  it('returns null when cf has no botManagement (non-Enterprise)', () => {
+    const cf = { city: 'Austin', country: 'US', latitude: '30.27', longitude: '-97.74' };
+    const result = getFraudDetectionHeaders(new Headers(), cf);
+    expect(result.geo_city).toBe('Austin');
+    expect(result.ja3_hash).toBeNull();
+  });
+});
+
+describe('extractProjectHeaders', () => {
+  it('extracts all project headers', () => {
+    const headers = new Headers({
+      'X-KiloCode-Version': '3.2.1',
+      'X-KiloCode-ProjectId': 'my-project',
+      'x-kilocode-taskid': 'task-123',
+      'x-kilocode-editorname': 'vscode',
+      'x-kilocode-machineid': 'machine-abc',
+      'x-forwarded-for': '5.6.7.8',
+    });
+    const result = extractProjectHeaders(headers, undefined);
+    expect(result.xKiloCodeVersion).toBe('3.2.1');
+    expect(result.projectId).toBe('my-project');
+    expect(result.taskId).toBe('task-123');
+    expect(result.editorName).toBe('vscode');
+    expect(result.machineId).toBe('machine-abc');
+    expect(result.numericKiloCodeVersion).toBeCloseTo(3.002001, 6);
+    expect(result.fraudHeaders.http_x_forwarded_for).toBe('5.6.7.8');
+  });
+
+  it('normalizes git HTTPS URLs to repo name', () => {
+    const headers = new Headers({
+      'X-KiloCode-ProjectId': 'https://github.com/org/my-repo.git',
+    });
+    const result = extractProjectHeaders(headers, undefined);
+    expect(result.projectId).toBe('my-repo');
+  });
+
+  it('normalizes SSH git URLs to repo name', () => {
+    const headers = new Headers({
+      'X-KiloCode-ProjectId': 'git@github.com:org/my-repo.git',
+    });
+    const result = extractProjectHeaders(headers, undefined);
+    expect(result.projectId).toBe('my-repo');
+  });
+
+  it('returns 0 for missing version header', () => {
+    const result = extractProjectHeaders(new Headers(), undefined);
+    expect(result.numericKiloCodeVersion).toBe(0);
+    expect(result.xKiloCodeVersion).toBeNull();
+  });
+
+  it('truncates long header values', () => {
+    const longValue = 'x'.repeat(600);
+    const headers = new Headers({
+      'x-kilocode-taskid': longValue,
+    });
+    const result = extractProjectHeaders(headers, undefined);
+    expect(result.taskId).toHaveLength(500);
+  });
+});
diff --git a/llm-gateway/test/unit/feature-detection.test.ts b/llm-gateway/test/unit/feature-detection.test.ts
new file mode 100644
index 000000000..e646e6335
--- /dev/null
+++ b/llm-gateway/test/unit/feature-detection.test.ts
@@ -0,0 +1,27 @@
+// Tests for feature-detection: validateFeatureHeader.
+
+import { describe, it, expect } from 'vitest';
+import { validateFeatureHeader, FEATURE_VALUES } from '../../src/lib/feature-detection';
+
+describe('validateFeatureHeader', () => {
+  it('returns valid feature values', () => {
+    expect(validateFeatureHeader('vscode-extension')).toBe('vscode-extension');
+    expect(validateFeatureHeader('jetbrains-extension')).toBe('jetbrains-extension');
+    expect(validateFeatureHeader('autocomplete')).toBe('autocomplete');
+  });
+
+  it('returns null for invalid values', () => {
+    expect(validateFeatureHeader('unknown-tool')).toBeNull();
+    expect(validateFeatureHeader('')).toBeNull();
+  });
+
+  it('returns null for null input', () => {
+    expect(validateFeatureHeader(null)).toBeNull();
+  });
+
+  it('FEATURE_VALUES contains expected entries', () => {
+    expect(FEATURE_VALUES).toContain('vscode-extension');
+    expect(FEATURE_VALUES).toContain('jetbrains-extension');
+    expect(FEATURE_VALUES).toContain('autocomplete');
+  });
+});
diff --git a/llm-gateway/test/unit/free-model-rate-limit.test.ts b/llm-gateway/test/unit/free-model-rate-limit.test.ts
new file mode 100644
index 000000000..bd573b623
--- /dev/null
+++ b/llm-gateway/test/unit/free-model-rate-limit.test.ts
@@ -0,0 +1,88 @@
+// Tests for freeModelRateLimitMiddleware — DO-backed rate limit check for Kilo free models.
+
+import { describe, it, expect } from 'vitest';
+import { Hono } from 'hono';
+import type { HonoContext } from '../../src/types/hono';
+import { freeModelRateLimitMiddleware } from '../../src/middleware/free-model-rate-limit';
+import { parseBodyMiddleware } from '../../src/middleware/parse-body';
+import { extractIpMiddleware } from '../../src/middleware/extract-ip';
+import { resolveAutoModelMiddleware } from '../../src/middleware/resolve-auto-model';
+
+// Fake DO that simulates rate limit behavior with a configurable threshold.
+function makeFakeDONamespace(blocked = new Set<string>()) {
+  return {
+    idFromName(name: string) {
+      return { name };
+    },
+    get(id: { name: string }) {
+      return {
+        checkFreeModel: async () => ({
+          allowed: !blocked.has(id.name),
+          requestCount: blocked.has(id.name) ? 200 : 0,
+        }),
+        checkPromotion: async () => ({ allowed: true, requestCount: 0 }),
+        incrementFreeModel: async () => {},
+        incrementPromotion: async () => {},
+      };
+    },
+  };
+}
+
+function makeApp() {
+  const app = new Hono<HonoContext>();
+  app.post(
+    '/test',
+    parseBodyMiddleware,
+    extractIpMiddleware,
+    resolveAutoModelMiddleware,
+    freeModelRateLimitMiddleware,
+    c => c.json({ ok: true })
+  );
+  return app;
+}
+
+function post(doNamespace: ReturnType<typeof makeFakeDONamespace>, model: string, ip = '1.2.3.4') {
+  const app = makeApp();
+  const env = { RATE_LIMIT_DO: doNamespace } as unknown as Cloudflare.Env;
+  return app.fetch(
+    new Request('http://x/test', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json', 'CF-Connecting-IP': ip },
+      body: JSON.stringify({ model, messages: [{ role: 'user', content: 'hi' }] }),
+    }),
+    env
+  );
+}
+
+describe('freeModelRateLimitMiddleware', () => {
+  it('allows Kilo free model when under the limit', async () => {
+    const ns = makeFakeDONamespace();
+    const res = await post(ns, 'corethink:free');
+    expect(res.status).toBe(200);
+  });
+
+  it('blocks Kilo free model at 200 requests/hour', async () => {
+    const ns = makeFakeDONamespace(new Set(['1.2.3.4']));
+    const res = await post(ns, 'corethink:free');
+    expect(res.status).toBe(429);
+    const body = (await res.json()) as { error: string; message: string };
+    expect(body.error).toBe('Rate limit exceeded');
+    expect(body.message).toBe(
+      'Free model usage limit reached. Please try again later or upgrade to a paid model.'
+    );
+  });
+
+  it('skips non-Kilo free models', async () => {
+    // Even if the IP is blocked, non-Kilo free models are not rate-limited here
+    const ns = makeFakeDONamespace(new Set(['1.2.3.4']));
+    const res = await post(ns, 'meta-llama/llama-3.1-8b-instruct:free');
+    expect(res.status).toBe(200);
+  });
+
+  it('rate limits per IP', async () => {
+    // Only 5.5.5.5 is blocked, 6.6.6.6 should pass
+    const ns = makeFakeDONamespace(new Set(['5.5.5.5']));
+    const res = await post(ns, 'corethink:free', '6.6.6.6');
+    expect(res.status).toBe(200);
+  });
+});
diff --git a/llm-gateway/test/unit/helpers.ts b/llm-gateway/test/unit/helpers.ts
new file mode 100644
index 000000000..fe6da75fc
--- /dev/null
+++ b/llm-gateway/test/unit/helpers.ts
@@ -0,0 +1,151 @@
+// Shared test helpers for mocking Cloudflare bindings and building requests.
+
+import { SignJWT } from 'jose';
+import type { ExecutionContext } from 'hono';
+
+const TEST_SECRET = 'test-secret-at-least-32-characters-long';
+
+function encode(s: string) {
+  return new TextEncoder().encode(s);
+}
+
+// Sign a v3 JWT matching verifyGatewayJwt expectations.
+export async function signToken(
+  payload: Record<string, unknown> = {},
+  secret = TEST_SECRET,
+  expiresIn = '1h'
+) {
+  return new SignJWT({ version: 3, kiloUserId: 'user-1', ...payload })
+    .setProtectedHeader({ alg: 'HS256' })
+    .setIssuedAt()
+    .setExpirationTime(expiresIn)
+    .sign(encode(secret));
+}
+
+// Build a minimal mock Env matching worker-configuration.d.ts.
+export function makeEnv(overrides: Partial<Record<string, unknown>> = {}): Cloudflare.Env {
+  function makeSecret(value: string): SecretsStoreSecret {
+    return { get: async () => value };
+  }
+
+  // Fake DO namespace that creates stubs returning a fixed result.
+  function makeFakeDONamespace(): Cloudflare.Env['RATE_LIMIT_DO'] {
+    const stub = {
+      checkFreeModel: async () => ({ allowed: true, requestCount: 0 }),
+      checkPromotion: async () => ({ allowed: true, requestCount: 0 }),
+      incrementFreeModel: async () => {},
+      incrementPromotion: async () => {},
+    };
+    return {
+      idFromName() {
+        return {} as DurableObjectId;
+      },
+      newUniqueId() {
+        return {} as DurableObjectId;
+      },
+      idFromString() {
+        return {} as DurableObjectId;
+      },
+      getByName() {
+        return stub as unknown as DurableObjectStub;
+      },
+      get() {
+        return stub as unknown as DurableObjectStub;
+      },
+      jurisdiction() {
+        return this;
+      },
+    } as unknown as Cloudflare.Env['RATE_LIMIT_DO'];
+  }
+
+  return {
+    HYPERDRIVE: { connectionString: 'postgres://localhost:5432/test' } as Hyperdrive,
+    RATE_LIMIT_DO: makeFakeDONamespace(),
+    O11Y: {
+      fetch: async () => new Response(JSON.stringify({ success: true })),
+      ingestApiMetrics: async () => {},
+    } as unknown as Fetcher,
+    NEXTAUTH_SECRET_PROD: makeSecret(TEST_SECRET),
+    OPENROUTER_API_KEY: makeSecret('or-key'),
+    GIGAPOTATO_API_KEY: makeSecret('gp-key'),
+    CORETHINK_API_KEY: makeSecret('ct-key'),
+    MARTIAN_API_KEY: makeSecret('mt-key'),
+    MISTRAL_API_KEY: makeSecret('ms-key'),
+    VERCEL_AI_GATEWAY_API_KEY: makeSecret('vc-key'),
+    BYOK_ENCRYPTION_KEY: makeSecret('byok-key-32-chars-exactly-here!'),
+    ABUSE_CF_ACCESS_CLIENT_ID: makeSecret('abuse-id'),
+    ABUSE_CF_ACCESS_CLIENT_SECRET: makeSecret('abuse-secret'),
+    GIGAPOTATO_API_URL: makeSecret('https://gigapotato.example.com'),
+    ABUSE_SERVICE_URL: makeSecret('https://abuse.example.com'),
+    POSTHOG_API_KEY: makeSecret('phk-test'),
+    ...overrides,
+  } as Cloudflare.Env;
+}
+
+export { TEST_SECRET };
+
+export function fakeExecutionCtx(): ExecutionContext {
+  return {
+    waitUntil: () => {},
+    passThroughOnException: () => {},
+    props: {},
+  };
+}
+
+// Build a POST request for /api/gateway/chat/completions.
+export function chatRequest(
+  body: Record<string, unknown>,
+  opts: {
+    headers?: Record<string, string>;
+    token?: string;
+    path?: string;
+  } = {}
+) {
+  const path = opts.path ?? '/api/gateway/chat/completions';
+  const headers: Record<string, string> = {
+    'Content-Type': 'application/json',
+    'CF-Connecting-IP': '1.2.3.4',
+    ...opts.headers,
+  };
+  if (opts.token) {
+    headers.Authorization = `Bearer ${opts.token}`;
+  }
+  return new Request(`http://localhost${path}`, {
+    method: 'POST',
+    headers,
+    body: JSON.stringify(body),
+  });
+}
+
+// SSE helpers.
+export function makeSSEStream(chunks: string[]): ReadableStream<Uint8Array> {
+  const encoder = new TextEncoder();
+  return new ReadableStream({
+    start(controller) {
+      for (const chunk of chunks) {
+        controller.enqueue(encoder.encode(chunk));
+      }
+      controller.close();
+    },
+  });
+}
+
+export function sseChunk(data: Record<string, unknown>): string {
+  return `data: ${JSON.stringify(data)}\n\n`;
+}
+
+export function sseDone(): string {
+  return 'data: [DONE]\n\n';
+}
+
+// Read an SSE response body into parsed event data objects.
+export async function readSSEEvents(response: Response): Promise<unknown[]> {
+  const text = await response.text();
+  const events: unknown[] = [];
+  for (const line of text.split('\n')) {
+    if (line.startsWith('data: ') && line !== 'data: [DONE]') {
+      events.push(JSON.parse(line.slice(6)));
+    }
+  }
+  return events;
+}
diff --git a/llm-gateway/test/unit/index.test.ts b/llm-gateway/test/unit/index.test.ts
new file mode 100644
index 000000000..6410bf097
--- /dev/null
+++ b/llm-gateway/test/unit/index.test.ts
@@ -0,0 +1,9 @@
+import { describe, it, expect } from 'vitest';
+
+// Phase 1 scaffolding smoke test.
+describe('llm-gateway scaffold', () => {
+  it('module loads without error', async () => {
+    const { default: worker } = await import('../../src/index');
+    expect(typeof worker.fetch).toBe('function');
+  });
+});
diff --git a/llm-gateway/test/unit/jwt.test.ts b/llm-gateway/test/unit/jwt.test.ts
new file mode 100644
index 000000000..30e795254
--- /dev/null
+++ b/llm-gateway/test/unit/jwt.test.ts
@@ -0,0 +1,83 @@
+import { describe, it, expect } from 'vitest';
+import { verifyGatewayJwt, isPepperValid } from '../../src/lib/jwt';
+import { SignJWT } from 'jose';
+
+const SECRET = 'test-secret-at-least-32-characters-long';
+
+function encode(s: string) {
+  return new TextEncoder().encode(s);
+}
+
+async function sign(payload: Record<string, unknown>, secret = SECRET, expiresIn = '1h') {
+  return new SignJWT(payload)
+    .setProtectedHeader({ alg: 'HS256' })
+    .setIssuedAt()
+    .setExpirationTime(expiresIn)
+    .sign(encode(secret));
+}
+
+describe('verifyGatewayJwt', () => {
+  it('returns ok for a valid v3 token', async () => {
+    const token = await sign({ version: 3, kiloUserId: 'user-1' });
+    const result = await verifyGatewayJwt(token, SECRET);
+    expect(result).toMatchObject({ ok: true, payload: { kiloUserId: 'user-1', version: 3 } });
+  });
+
+  it('preserves extra payload fields', async () => {
+    const token = await sign({
+      version: 3,
+      kiloUserId: 'user-2',
+      apiTokenPepper: 'abc',
+      botId: 'bot-x',
+      tokenSource: 'cloud-agent',
+      organizationId: 'org-1',
+    });
+    const result = await verifyGatewayJwt(token, SECRET);
+    expect(result.ok).toBe(true);
+    if (!result.ok) return;
+    expect(result.payload.apiTokenPepper).toBe('abc');
+    expect(result.payload.botId).toBe('bot-x');
+    expect(result.payload.tokenSource).toBe('cloud-agent');
+    expect(result.payload.organizationId).toBe('org-1');
+  });
+
+  it('returns version reason for wrong version', async () => {
+    const token = await sign({ version: 2, kiloUserId: 'user-1' });
+    const result = await verifyGatewayJwt(token, SECRET);
+    expect(result).toEqual({ ok: false, reason: 'version' });
+  });
+
+  it('returns expired reason for expired token', async () => {
+    const token = await sign({ version: 3, kiloUserId: 'user-1' }, SECRET, '0s');
+    const result = await verifyGatewayJwt(token, SECRET);
+    expect(result).toEqual({ ok: false, reason: 'expired' });
+  });
+
+  it('returns invalid reason for wrong secret', async () => {
+    const token = await sign({ version: 3, kiloUserId: 'user-1' });
+    const result = await verifyGatewayJwt(token, 'wrong-secret-at-least-32-chars!!');
+    expect(result).toEqual({ ok: false, reason: 'invalid' });
+  });
+
+  it('returns invalid reason for garbage token', async () => {
+    const result = await verifyGatewayJwt('not.a.jwt', SECRET);
+    expect(result).toEqual({ ok: false, reason: 'invalid' });
+  });
+});
+
+describe('isPepperValid', () => {
+  it('passes when DB has no pepper', () => {
+    expect(isPepperValid('any', null)).toBe(true);
+    expect(isPepperValid(undefined, null)).toBe(true);
+  });
+
+  it('passes when JWT and DB peppers match', () => {
+    expect(isPepperValid('p1', 'p1')).toBe(true);
+  });
+
+  it('fails when peppers differ', () => {
+    expect(isPepperValid('p1', 'p2')).toBe(false);
+    expect(isPepperValid(undefined, 'p2')).toBe(false);
+    expect(isPepperValid(null, 'p2')).toBe(false);
+  });
+});
diff --git a/llm-gateway/test/unit/kilo-auto-model.test.ts b/llm-gateway/test/unit/kilo-auto-model.test.ts
new file mode 100644
index 000000000..2a1d0e24c
--- /dev/null
+++ b/llm-gateway/test/unit/kilo-auto-model.test.ts
@@ -0,0 +1,47 @@
+import { describe, it, expect } from 'vitest';
+import { isKiloAutoModel, resolveAutoModel } from '../../src/lib/kilo-auto-model';
+
+describe('isKiloAutoModel', () => {
+  it('recognises kilo/auto variants', () => {
+    expect(isKiloAutoModel('kilo/auto')).toBe(true);
+    expect(isKiloAutoModel('kilo/auto-free')).toBe(true);
+    expect(isKiloAutoModel('kilo/auto-small')).toBe(true);
+  });
+
+  it('returns false for real models', () => {
+    expect(isKiloAutoModel('anthropic/claude-sonnet-4-20250514')).toBe(false);
+    expect(isKiloAutoModel('openai/gpt-4o')).toBe(false);
+  });
+});
+
+describe('resolveAutoModel', () => {
+  it('resolves kilo/auto-free to minimax free model', () => {
+    const result = resolveAutoModel('kilo/auto-free', null);
+    expect(result.model).toBe('minimax/minimax-m2.5:free');
+  });
+
+  it('resolves kilo/auto-small to gpt-5-nano', () => {
+    const result = resolveAutoModel('kilo/auto-small', null);
+    expect(result.model).toBe('openai/gpt-5-nano');
+  });
+
+  it('resolves kilo/auto with plan mode to Claude Opus', () => {
+    const result = resolveAutoModel('kilo/auto', 'plan');
+    expect(result.model).toBe('anthropic/claude-opus-4-20250514');
+  });
+
+  it('resolves kilo/auto with code mode to Claude Sonnet', () => {
+    const result = resolveAutoModel('kilo/auto', 'code');
+    expect(result.model).toBe('anthropic/claude-sonnet-4-20250514');
+  });
+
+  it('falls back to code model for unknown mode', () => {
+    const result = resolveAutoModel('kilo/auto', 'unknown-mode');
+    expect(result.model).toBe('anthropic/claude-sonnet-4-20250514');
+  });
+
+  it('falls back to code model when modeHeader is null', () => {
+    const result = resolveAutoModel('kilo/auto', null);
+    expect(result.model).toBe('anthropic/claude-sonnet-4-20250514');
+  });
+});
diff --git a/llm-gateway/test/unit/log-free-model-usage.test.ts b/llm-gateway/test/unit/log-free-model-usage.test.ts
new file mode 100644
index 000000000..871ae184c
--- /dev/null
+++ b/llm-gateway/test/unit/log-free-model-usage.test.ts
@@ -0,0 +1,159 @@
+// Test: logFreeModelUsageMiddleware DB insert timing (B5).
+// The DB insert must be awaited BEFORE next() so the rate-limit entry
+// is counted even if the upstream request fails.
+
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+import { Hono } from 'hono';
+import type { HonoContext } from '../../src/types/hono';
+import { fakeExecutionCtx } from './helpers';
+
+// ── Track DB insert timing relative to next() ──────────────────────────────
+
+const timeline: string[] = [];
+
+vi.mock('@kilocode/db/client', () => ({
+  getWorkerDb: () => ({
+    insert: () => ({
+      values: () => {
+        timeline.push('db-insert');
+        return Promise.resolve();
+      },
+    }),
+  }),
+}));
+
+vi.mock('../../src/lib/rate-limit', () => ({
+  incrementFreeModelUsage: async () => {
+    timeline.push('do-increment');
+  },
+  incrementPromotionUsage: async () => {
+    timeline.push('promo-increment');
+  },
+}));
+
+beforeEach(() => {
+  timeline.length = 0;
+});
+
+// ── Tests ────────────────────────────────────────────────────────────────────
+
+describe('logFreeModelUsageMiddleware', () => {
+  it('awaits DB insert before calling next()', async () => {
+    const { logFreeModelUsageMiddleware } =
+      await import('../../src/middleware/log-free-model-usage');
+
+    const app = new Hono<HonoContext>();
+
+    // Stub context variables
+    app.use('*', async (c, next) => {
+      c.set('resolvedModel', 'corethink:free');
+      c.set('clientIp', '1.2.3.4');
+      c.set('user', { id: 'user-1' } as never);
+      await next();
+    });
+
+    app.use('*', logFreeModelUsageMiddleware);
+
+    app.post('*', c => {
+      timeline.push('handler');
+      return c.json({ ok: true });
+    });
+
+    const req = new Request('http://localhost/api/gateway/chat/completions', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: '{}',
+    });
+
+    const env = {
+      HYPERDRIVE: { connectionString: 'postgres://localhost:5432/test' },
+      RATE_LIMIT_DO: {},
+    };
+
+    await app.fetch(req, env as never, fakeExecutionCtx());
+
+    // DB insert must happen BEFORE the handler (next())
+    const dbIndex = timeline.indexOf('db-insert');
+    const handlerIndex = timeline.indexOf('handler');
+    expect(dbIndex).toBeGreaterThanOrEqual(0);
+    expect(handlerIndex).toBeGreaterThanOrEqual(0);
+    expect(dbIndex).toBeLessThan(handlerIndex);
+  });
+
+  it('still calls next() even if DB insert fails', async () => {
+    // Override the mock for this test to simulate failure
+    const { logFreeModelUsageMiddleware } =
+      await import('../../src/middleware/log-free-model-usage');
+
+    const app = new Hono<HonoContext>();
+
+    app.use('*', async (c, next) => {
+      c.set('resolvedModel', 'corethink:free');
+      c.set('clientIp', '1.2.3.4');
+      c.set('user', { id: 'user-1' } as never);
+      await next();
+    });
+
+    app.use('*', logFreeModelUsageMiddleware);
+
+    app.post('*', c => {
+      timeline.push('handler');
+      return c.json({ ok: true });
+    });
+
+    const req = new Request('http://localhost/api/gateway/chat/completions', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: '{}',
+    });
+
+    const env = {
+      HYPERDRIVE: { connectionString: 'postgres://localhost:5432/test' },
+      RATE_LIMIT_DO: {},
+    };
+
+    const res = await app.fetch(req, env as never, fakeExecutionCtx());
+
+    // Handler should still run despite any DB issues
+    expect(res.status).toBe(200);
+    expect(timeline).toContain('handler');
+  });
+
+  it('skips for non-free models', async () => {
+    const { logFreeModelUsageMiddleware } =
+      await import('../../src/middleware/log-free-model-usage');
+
+    const app = new Hono<HonoContext>();
+
+    app.use('*', async (c, next) => {
+      c.set('resolvedModel', 'anthropic/claude-sonnet-4-20250514');
+      c.set('clientIp', '1.2.3.4');
+      c.set('user', { id: 'user-1' } as never);
+      await next();
+    });
+
+    app.use('*', logFreeModelUsageMiddleware);
+
+    app.post('*', c => {
+      timeline.push('handler');
+      return c.json({ ok: true });
+    });
+
+    const req = new Request('http://localhost/api/gateway/chat/completions', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: '{}',
+    });
+
+    const env = {
+      HYPERDRIVE: { connectionString: 'postgres://localhost:5432/test' },
+      RATE_LIMIT_DO: {},
+    };
+
+    await app.fetch(req, env as never, fakeExecutionCtx());
+
+    // No DB insert for paid models
+    expect(timeline).not.toContain('db-insert');
+    expect(timeline).toContain('handler');
+  });
+});
diff --git a/llm-gateway/test/unit/middleware-chain.test.ts b/llm-gateway/test/unit/middleware-chain.test.ts
new file mode 100644
index 000000000..673d840e8
--- /dev/null
+++ b/llm-gateway/test/unit/middleware-chain.test.ts
@@ -0,0 +1,169 @@
+// Integration test: full middleware chain exercised end-to-end.
+// All external dependencies (DB, KV, fetch) are mocked; the test runs through
+// every middleware from parseBody to proxyHandler, confirming the correct
+// response for several representative scenarios.
+
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { makeEnv, chatRequest, fakeExecutionCtx } from './helpers';
+
+// ── Module mocks ───────────────────────────────────────────────────────────────
+
+// Mock @kilocode/db/client so we never hit a real Postgres
+vi.mock('@kilocode/db/client', () => ({
+  getWorkerDb: () => ({
+    select: () => ({
+      from: () => ({
+        where: () => ({
+          limit: () => Promise.resolve([]),
+        }),
+      }),
+    }),
+    insert: () => ({
+      values: () => ({
+        returning: () => Promise.resolve([]),
+      }),
+    }),
+  }),
+}));
+
+// Mock @kilocode/worker-utils to bypass KV cache and provide extractBearerToken
+vi.mock('@kilocode/worker-utils', () => ({
+  userExistsWithCache: async () => true,
+  extractBearerToken: (header: string | undefined) => {
+    if (!header) return null;
+    const parts = header.split(' ');
+    return parts.length === 2 && parts[0].toLowerCase() === 'bearer' ? parts[1] : null;
+  },
+  verifyKiloToken: async () => {
+    throw new Error('should not be called directly — verifyGatewayJwt wraps this');
+  },
+}));
+
+// Keep a reference to the real globalThis.fetch
+const realFetch = globalThis.fetch;
+let fetchMock: ReturnType<typeof vi.fn>;
+
+beforeEach(() => {
+  fetchMock = vi.fn();
+  globalThis.fetch = fetchMock;
+});
+
+afterEach(() => {
+  globalThis.fetch = realFetch;
+  vi.restoreAllMocks();
+});
+
+// ── Helpers ────────────────────────────────────────────────────────────────────
+
+async function dispatch(req: Request, envOverrides: Partial<Record<string, unknown>> = {}) {
+  const { default: worker } = await import('../../src/index');
+  const env = makeEnv(envOverrides);
+  return worker.fetch(req, env, fakeExecutionCtx());
+}
+
+// ── Tests ──────────────────────────────────────────────────────────────────────
+
+describe('middleware chain – health check', () => {
+  it('GET /health returns 404 (removed)', async () => {
+    const res = await dispatch(new Request('http://localhost/health'));
+    expect(res.status).toBe(404);
+  });
+});
+
+describe('middleware chain – 404', () => {
+  it('unknown path returns 404', async () => {
+    const res = await dispatch(new Request('http://localhost/unknown'));
+    expect(res.status).toBe(404);
+  });
+});
+
+describe('middleware chain – invalid path', () => {
+  it('returns 400 for /api/gateway/other (matches reference invalidPathResponse)', async () => {
+    const req = new Request('http://localhost/api/gateway/other', { method: 'POST' });
+    const res = await dispatch(req);
+    expect(res.status).toBe(400);
+    const body = (await res.json()) as Record<string, unknown>;
+    expect(body).toEqual({
+      error: 'Invalid path',
+      message: 'This endpoint only accepts the path `/chat/completions`.',
+    });
+  });
+
+  it('returns 400 for /api/openrouter/v1/models (matches reference invalidPathResponse)', async () => {
+    const req = new Request('http://localhost/api/openrouter/v1/models', { method: 'GET' });
+    const res = await dispatch(req);
+    expect(res.status).toBe(400);
+    const body = (await res.json()) as Record<string, unknown>;
+    expect(body).toEqual({
+      error: 'Invalid path',
+      message: 'This endpoint only accepts the path `/chat/completions`.',
+    });
+  });
+});
+
+describe('middleware chain – body validation', () => {
+  it('returns 404 for missing model (matches reference modelDoesNotExistResponse)', async () => {
+    const res = await dispatch(chatRequest({ messages: [] }));
+    expect(res.status).toBe(404);
+    const body = (await res.json()) as Record<string, unknown>;
+    expect(body).toEqual({
+      error: 'Model not found',
+      message: 'The requested model could not be found.',
+    });
+  });
+
+  it('returns 400 for invalid JSON', async () => {
+    const req = new Request('http://localhost/api/gateway/chat/completions', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json', 'CF-Connecting-IP': '1.2.3.4' },
+      body: 'not json',
+    });
+    const res = await dispatch(req);
+    expect(res.status).toBe(400);
+  });
+});
+
+describe('middleware chain – anonymous gate', () => {
+  it('returns 401 for paid model without token', async () => {
+    const res = await dispatch(
+      chatRequest({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      })
+    );
+    expect(res.status).toBe(401);
+    const body = (await res.json()) as { error: { code: string } };
+    expect(body.error.code).toBe('PAID_MODEL_AUTH_REQUIRED');
+  });
+});
+
+describe('middleware chain – route parity', () => {
+  it('/api/openrouter/chat/completions works the same as /api/gateway/', async () => {
+    const res = await dispatch(
+      chatRequest(
+        {
+          model: 'anthropic/claude-sonnet-4-20250514',
+          messages: [{ role: 'user', content: 'hi' }],
+        },
+        { path: '/api/openrouter/chat/completions' }
+      )
+    );
+    // Should still hit anonymous-gate → 401 for paid model
+    expect(res.status).toBe(401);
+  });
+});
+
+describe('middleware chain – missing IP', () => {
+  it('returns 400 when CF-Connecting-IP and x-forwarded-for are both absent', async () => {
+    const req = new Request('http://localhost/api/gateway/chat/completions', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({
+        model: 'corethink:free',
+        messages: [{ role: 'user', content: 'hi' }],
+      }),
+    });
+    const res = await dispatch(req);
+    expect(res.status).toBe(400);
+  });
+});
diff --git a/llm-gateway/test/unit/models.test.ts b/llm-gateway/test/unit/models.test.ts
new file mode 100644
index 000000000..f976b0713
--- /dev/null
+++ b/llm-gateway/test/unit/models.test.ts
@@ -0,0 +1,78 @@
+import { describe, it, expect } from 'vitest';
+import {
+  isFreeModel,
+  isKiloFreeModel,
+  isDeadFreeModel,
+  isRateLimitedToDeath,
+} from '../../src/lib/models';
+
+describe('isFreeModel', () => {
+  it('recognises enabled Kilo-hosted free models', () => {
+    expect(isFreeModel('giga-potato')).toBe(true);
+    expect(isFreeModel('corethink:free')).toBe(true);
+    expect(isFreeModel('minimax/minimax-m2.5:free')).toBe(true);
+  });
+
+  it('recognises generic :free suffix models', () => {
+    expect(isFreeModel('meta-llama/llama-3.3-70b-instruct:free')).toBe(true);
+    expect(isFreeModel('openai/gpt-4o:free')).toBe(true);
+  });
+
+  it('recognises openrouter/free', () => {
+    expect(isFreeModel('openrouter/free')).toBe(true);
+  });
+
+  it('recognises OpenRouter stealth models', () => {
+    expect(isFreeModel('openrouter/some-model-alpha')).toBe(true);
+    expect(isFreeModel('openrouter/some-model-beta')).toBe(true);
+  });
+
+  it('returns false for paid models', () => {
+    expect(isFreeModel('anthropic/claude-3-5-sonnet')).toBe(false);
+    expect(isFreeModel('openai/gpt-4o')).toBe(false);
+  });
+
+  // Disabled Kilo free models still match the generic :free suffix rule
+  it('still returns true for disabled Kilo free models (they end in :free)', () => {
+    expect(isFreeModel('x-ai/grok-code-fast-1:optimized:free')).toBe(true);
+  });
+});
+
+describe('isKiloFreeModel', () => {
+  it('returns true only for enabled Kilo-hosted free models', () => {
+    expect(isKiloFreeModel('giga-potato')).toBe(true);
+    expect(isKiloFreeModel('corethink:free')).toBe(true);
+  });
+
+  it('returns false for generic :free models', () => {
+    expect(isKiloFreeModel('meta-llama/llama-3.3-70b-instruct:free')).toBe(false);
+  });
+
+  it('returns false for disabled Kilo free models', () => {
+    expect(isKiloFreeModel('x-ai/grok-code-fast-1:optimized:free')).toBe(false);
+  });
+});
+
+describe('isDeadFreeModel', () => {
+  it('returns true for disabled Kilo free models', () => {
+    expect(isDeadFreeModel('x-ai/grok-code-fast-1:optimized:free')).toBe(true);
+    expect(isDeadFreeModel('z-ai/glm-5:free')).toBe(true);
+  });
+
+  it('returns false for enabled models', () => {
+    expect(isDeadFreeModel('giga-potato')).toBe(false);
+    expect(isDeadFreeModel('anthropic/claude-3-5-sonnet')).toBe(false);
+  });
+});
+
+describe('isRateLimitedToDeath', () => {
+  it('returns true for known rate-limited models', () => {
+    expect(isRateLimitedToDeath('meta-llama/llama-3.3-70b-instruct:free')).toBe(true);
+    expect(isRateLimitedToDeath('deepseek/deepseek-r1-0528:free')).toBe(true);
+  });
+
+  it('returns false for models not in the list', () => {
+    expect(isRateLimitedToDeath('anthropic/claude-3-5-sonnet')).toBe(false);
+    expect(isRateLimitedToDeath('giga-potato')).toBe(false);
+  });
+});
diff --git a/llm-gateway/test/unit/org-restrictions.test.ts b/llm-gateway/test/unit/org-restrictions.test.ts
new file mode 100644
index 000000000..2bf1bce2c
--- /dev/null
+++ b/llm-gateway/test/unit/org-restrictions.test.ts
@@ -0,0 +1,117 @@
+import { describe, it, expect } from 'vitest';
+import { checkOrganizationModelRestrictions } from '../../src/lib/org-restrictions';
+
+describe('checkOrganizationModelRestrictions', () => {
+  it('allows everything when no settings', () => {
+    const result = checkOrganizationModelRestrictions({ modelId: 'anthropic/claude-3-opus' });
+    expect(result.error).toBeNull();
+    expect(result.providerConfig).toBeUndefined();
+  });
+
+  it('allows everything when settings is empty', () => {
+    const result = checkOrganizationModelRestrictions({
+      modelId: 'anthropic/claude-3-opus',
+      settings: {},
+      organizationPlan: 'teams',
+    });
+    expect(result.error).toBeNull();
+  });
+
+  it('skips model allow list for teams plan', () => {
+    const result = checkOrganizationModelRestrictions({
+      modelId: 'anthropic/claude-3-opus',
+      settings: { model_allow_list: ['openai/gpt-4'] },
+      organizationPlan: 'teams',
+    });
+    expect(result.error).toBeNull();
+  });
+
+  it('blocks model not in allow list for enterprise plan', () => {
+    const result = checkOrganizationModelRestrictions({
+      modelId: 'anthropic/claude-3-opus',
+      settings: { model_allow_list: ['openai/gpt-4'] },
+      organizationPlan: 'enterprise',
+    });
+    expect(result.error).not.toBeNull();
+    expect(result.error?.status).toBe(404);
+  });
+
+  it('allows model in allow list for enterprise plan (exact match)', () => {
+    const result = checkOrganizationModelRestrictions({
+      modelId: 'anthropic/claude-3-opus',
+      settings: { model_allow_list: ['anthropic/claude-3-opus'] },
+      organizationPlan: 'enterprise',
+    });
+    expect(result.error).toBeNull();
+  });
+
+  it('allows model via wildcard in allow list for enterprise plan', () => {
+    const result = checkOrganizationModelRestrictions({
+      modelId: 'anthropic/claude-3-opus',
+      settings: { model_allow_list: ['anthropic/*'] },
+      organizationPlan: 'enterprise',
+    });
+    expect(result.error).toBeNull();
+  });
+
+  it('strips :free suffix before matching', () => {
+    const result = checkOrganizationModelRestrictions({
+      modelId: 'anthropic/claude-3-haiku:free',
+      settings: { model_allow_list: ['anthropic/*'] },
+      organizationPlan: 'enterprise',
+    });
+    expect(result.error).toBeNull();
+  });
+
+  it('sets provider config only when from enterprise plan', () => {
+    const result = checkOrganizationModelRestrictions({
+      modelId: 'anthropic/claude-3-opus',
+      settings: { provider_allow_list: ['anthropic', 'openai'] },
+      organizationPlan: 'enterprise',
+    });
+    expect(result.error).toBeNull();
+    expect(result.providerConfig?.only).toEqual(['anthropic', 'openai']);
+  });
+
+  it('does not set provider allow list for teams plan', () => {
+    const result = checkOrganizationModelRestrictions({
+      modelId: 'anthropic/claude-3-opus',
+      settings: { provider_allow_list: ['anthropic'] },
+      organizationPlan: 'teams',
+    });
+    expect(result.error).toBeNull();
+    expect(result.providerConfig?.only).toBeUndefined();
+  });
+
+  it('sets data_collection from settings regardless of plan', () => {
+    const result = checkOrganizationModelRestrictions({
+      modelId: 'anthropic/claude-3-opus',
+      settings: { data_collection: 'deny' },
+      organizationPlan: 'teams',
+    });
+    expect(result.error).toBeNull();
+    expect(result.providerConfig?.data_collection).toBe('deny');
+  });
+
+  it('blocks kilo free model when its required provider is not in allow list', () => {
+    const result = checkOrganizationModelRestrictions({
+      modelId: 'giga-potato',
+      settings: { provider_allow_list: ['anthropic'] },
+      organizationPlan: 'enterprise',
+    });
+    expect(result.error).not.toBeNull();
+    expect(result.error?.status).toBe(404);
+  });
+
+  it('restriction error message matches reference modelNotAllowedResponse error field', () => {
+    const result = checkOrganizationModelRestrictions({
+      modelId: 'anthropic/claude-3-opus',
+      settings: { model_allow_list: ['openai/gpt-4'] },
+      organizationPlan: 'enterprise',
+    });
+    // The middleware uses restrictionError.message as the `error` field and
+    // 'The requested model is not allowed for your team.' as the `message` field,
+    // matching the reference modelNotAllowedResponse() which has distinct values.
+    expect(result.error?.message).toBe('Model not allowed for your team.');
+  });
+});
diff --git a/llm-gateway/test/unit/parse-body.test.ts b/llm-gateway/test/unit/parse-body.test.ts
new file mode 100644
index 000000000..19376319d
--- /dev/null
+++ b/llm-gateway/test/unit/parse-body.test.ts
@@ -0,0 +1,119 @@
+import { describe, it, expect } from 'vitest';
+import { Hono } from 'hono';
+import type { HonoContext } from '../../src/types/hono';
+import { parseBodyMiddleware } from '../../src/middleware/parse-body';
+
+function makeApp() {
+  const app = new Hono<HonoContext>();
+  app.post('/test', parseBodyMiddleware, c => {
+    return c.json({
+      model: c.get('requestBody').model,
+      resolvedModel: c.get('resolvedModel'),
+      feature: c.get('feature'),
+      stream_options: c.get('requestBody').stream_options,
+    });
+  });
+  return app;
+}
+
+async function post(
+  app: ReturnType<typeof makeApp>,
+  body: unknown,
+  headers?: Record<string, string>
+) {
+  return app.fetch(
+    new Request('http://x/test', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json', ...headers },
+      body: JSON.stringify(body),
+    })
+  );
+}
+
+type JsonData = Record<string, unknown>;
+
+describe('parseBodyMiddleware', () => {
+  it('sets requestBody, resolvedModel, and stream_options', async () => {
+    const app = makeApp();
+    const res = await post(app, { model: 'anthropic/claude-3-5-sonnet', messages: [] });
+    expect(res.status).toBe(200);
+    const data = (await res.json()) as JsonData;
+    expect(data.model).toBe('anthropic/claude-3-5-sonnet');
+    expect(data.resolvedModel).toBe('anthropic/claude-3-5-sonnet');
+    expect(data.stream_options).toEqual({ include_usage: true });
+  });
+
+  it('lowercases resolvedModel', async () => {
+    const app = makeApp();
+    const res = await post(app, { model: 'Anthropic/Claude-3-5-Sonnet', messages: [] });
+    const data = (await res.json()) as JsonData;
+    expect(data.resolvedModel).toBe('anthropic/claude-3-5-sonnet');
+  });
+
+  it('merges stream_options, preserving caller fields', async () => {
+    const app = makeApp();
+    const res = await post(app, {
+      model: 'gpt-4',
+      messages: [],
+      stream_options: { include_usage: false },
+    });
+    const data = (await res.json()) as JsonData;
+    expect(data.stream_options).toEqual({ include_usage: true });
+  });
+
+  it('returns 404 for missing model (matches reference modelDoesNotExistResponse)', async () => {
+    const app = makeApp();
+    const res = await post(app, { messages: [] });
+    expect(res.status).toBe(404);
+    const data = (await res.json()) as JsonData;
+    expect(data).toEqual({
+      error: 'Model not found',
+      message: 'The requested model could not be found.',
+    });
+  });
+
+  it('returns 404 for empty model (matches reference modelDoesNotExistResponse)', async () => {
+    const app = makeApp();
+    const res = await post(app, { model: '  ', messages: [] });
+    expect(res.status).toBe(404);
+    const data = (await res.json()) as JsonData;
+    expect(data).toEqual({
+      error: 'Model not found',
+      message: 'The requested model could not be found.',
+    });
+  });
+
+  it('returns 400 for invalid JSON', async () => {
+    const app = makeApp();
+    const res = await app.fetch(
+      new Request('http://x/test', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: 'not json',
+      })
+    );
+    expect(res.status).toBe(400);
+  });
+
+  it('validates x-kilocode-feature header', async () => {
+    const app = makeApp();
+    const res = await post(
+      app,
+      { model: 'gpt-4', messages: [] },
+      { 'x-kilocode-feature': 'vscode-extension' }
+    );
+    const data = (await res.json()) as JsonData;
+    expect(data.feature).toBe('vscode-extension');
+  });
+
+  it('sets feature to null for unknown header value', async () => {
+    const app = makeApp();
+    const res = await post(
+      app,
+      { model: 'gpt-4', messages: [] },
+      { 'x-kilocode-feature': 'unknown-tool' }
+    );
+    const data = (await res.json()) as JsonData;
+    expect(data.feature).toBeNull();
+  });
+});
diff --git a/llm-gateway/test/unit/promotions.test.ts b/llm-gateway/test/unit/promotions.test.ts
new file mode 100644
index 000000000..3e375d369
--- /dev/null
+++ b/llm-gateway/test/unit/promotions.test.ts
@@ -0,0 +1,68 @@
+// Tests for promotions: isActiveReviewPromo, isActiveCloudAgentPromo.
+
+import { describe, it, expect, vi, afterEach } from 'vitest';
+import { isActiveReviewPromo, isActiveCloudAgentPromo } from '../../src/lib/promotions';
+
+afterEach(() => {
+  vi.restoreAllMocks();
+});
+
+describe('isActiveReviewPromo', () => {
+  it('returns false for non-reviewer botId', () => {
+    expect(isActiveReviewPromo('other', 'anthropic/claude-sonnet-4.6')).toBe(false);
+  });
+
+  it('returns false for wrong model', () => {
+    expect(isActiveReviewPromo('reviewer', 'anthropic/claude-sonnet-4-20250514')).toBe(false);
+  });
+
+  it('returns false for undefined botId', () => {
+    expect(isActiveReviewPromo(undefined, 'anthropic/claude-sonnet-4.6')).toBe(false);
+  });
+
+  it('returns false when promo has ended', () => {
+    // The promo ends at 2026-02-25T14:00:00Z — mock a date after that
+    vi.useFakeTimers();
+    vi.setSystemTime(new Date('2026-03-01T00:00:00Z'));
+    expect(isActiveReviewPromo('reviewer', 'anthropic/claude-sonnet-4.6')).toBe(false);
+    vi.useRealTimers();
+  });
+
+  it('returns true when promo is active', () => {
+    vi.useFakeTimers();
+    vi.setSystemTime(new Date('2026-02-20T00:00:00Z'));
+    expect(isActiveReviewPromo('reviewer', 'anthropic/claude-sonnet-4.6')).toBe(true);
+    vi.useRealTimers();
+  });
+});
+
+describe('isActiveCloudAgentPromo', () => {
+  it('returns false for non-cloud-agent tokenSource', () => {
+    expect(isActiveCloudAgentPromo('other', 'anthropic/claude-sonnet-4.6')).toBe(false);
+  });
+
+  it('returns false for wrong model', () => {
+    expect(isActiveCloudAgentPromo('cloud-agent', 'anthropic/claude-3-5-sonnet')).toBe(false);
+  });
+
+  it('returns false before promo start', () => {
+    vi.useFakeTimers();
+    vi.setSystemTime(new Date('2026-02-25T00:00:00Z'));
+    expect(isActiveCloudAgentPromo('cloud-agent', 'anthropic/claude-sonnet-4.6')).toBe(false);
+    vi.useRealTimers();
+  });
+
+  it('returns true during promo window', () => {
+    vi.useFakeTimers();
+    vi.setSystemTime(new Date('2026-02-27T00:00:00Z'));
+    expect(isActiveCloudAgentPromo('cloud-agent', 'anthropic/claude-sonnet-4.6')).toBe(true);
+    vi.useRealTimers();
+  });
+
+  it('returns false after promo end', () => {
+    vi.useFakeTimers();
+    vi.setSystemTime(new Date('2026-03-01T00:00:00Z'));
+    expect(isActiveCloudAgentPromo('cloud-agent', 'anthropic/claude-sonnet-4.6')).toBe(false);
+    vi.useRealTimers();
+  });
+});
diff --git a/llm-gateway/test/unit/prompt-info.test.ts b/llm-gateway/test/unit/prompt-info.test.ts
new file mode 100644
index 000000000..7e6382eaf
--- /dev/null
+++ b/llm-gateway/test/unit/prompt-info.test.ts
@@ -0,0 +1,116 @@
+// Tests for prompt-info: extractPromptInfo, estimateChatTokens.
+
+import { describe, it, expect } from 'vitest';
+import { extractPromptInfo, estimateChatTokens } from '../../src/lib/prompt-info';
+
+describe('extractPromptInfo', () => {
+  it('extracts system and user prompt prefixes', () => {
+    const result = extractPromptInfo({
+      model: 'test',
+      messages: [
+        { role: 'system', content: 'You are a helpful assistant.' },
+        { role: 'user', content: 'What is the meaning of life?' },
+      ],
+    });
+    expect(result.system_prompt_prefix).toBe('You are a helpful assistant.');
+    expect(result.system_prompt_length).toBe(28);
+    expect(result.user_prompt_prefix).toBe('What is the meaning of life?');
+  });
+
+  it('uses last user message for user_prompt_prefix', () => {
+    const result = extractPromptInfo({
+      model: 'test',
+      messages: [
+        { role: 'user', content: 'first message' },
+        { role: 'assistant', content: 'ok' },
+        { role: 'user', content: 'second message' },
+      ],
+    });
+    expect(result.user_prompt_prefix).toBe('second message');
+  });
+
+  it('handles multipart content arrays', () => {
+    const result = extractPromptInfo({
+      model: 'test',
+      messages: [
+        {
+          role: 'system',
+          content: [
+            { type: 'text', text: 'System part 1' },
+            { type: 'text', text: 'System part 2' },
+          ],
+        },
+      ],
+    });
+    expect(result.system_prompt_prefix).toBe('System part 1System part 2');
+  });
+
+  it('truncates at 100 characters', () => {
+    const long = 'a'.repeat(200);
+    const result = extractPromptInfo({
+      model: 'test',
+      messages: [{ role: 'system', content: long }],
+    });
+    expect(result.system_prompt_prefix).toHaveLength(100);
+    expect(result.system_prompt_length).toBe(200);
+  });
+
+  it('handles empty messages gracefully', () => {
+    const result = extractPromptInfo({ model: 'test', messages: [] });
+    expect(result.system_prompt_prefix).toBe('');
+    expect(result.user_prompt_prefix).toBe('');
+  });
+
+  it('handles developer role as system', () => {
+    const result = extractPromptInfo({
+      model: 'test',
+      messages: [{ role: 'developer', content: 'dev instructions' }],
+    });
+    expect(result.system_prompt_prefix).toBe('dev instructions');
+  });
+});
+
+describe('estimateChatTokens', () => {
+  it('estimates tokens at ~length/4', () => {
+    // 40 chars → ~10 tokens
+    const result = estimateChatTokens({
+      model: 'test',
+      messages: [{ role: 'user', content: 'a'.repeat(40) }],
+    });
+    expect(result.estimatedInputTokens).toBe(10);
+    expect(result.estimatedOutputTokens).toBe(10);
+  });
+
+  it('sums across multiple messages', () => {
+    const result = estimateChatTokens({
+      model: 'test',
+      messages: [
+        { role: 'system', content: 'a'.repeat(100) },
+        { role: 'user', content: 'b'.repeat(100) },
+      ],
+    });
+    expect(result.estimatedInputTokens).toBe(50);
+  });
+
+  it('handles missing messages', () => {
+    const result = estimateChatTokens({ model: 'test', messages: undefined as never });
+    expect(result.estimatedInputTokens).toBe(0);
+  });
+
+  it('handles multipart content arrays', () => {
+    const result = estimateChatTokens({
+      model: 'test',
+      messages: [
+        {
+          role: 'user',
+          content: [
+            { type: 'text', text: 'a'.repeat(40) },
+            { type: 'image_url', image_url: { url: 'data:...' } },
+          ],
+        },
+      ],
+    });
+    // Only text parts count: 40 chars + 1 (join separator) = 41/4 ≈ 10.25
+    expect(result.estimatedInputTokens).toBeCloseTo(10.25, 1);
+  });
+});
diff --git a/llm-gateway/test/unit/provider-hash.test.ts b/llm-gateway/test/unit/provider-hash.test.ts
new file mode 100644
index 000000000..28f96e462
--- /dev/null
+++ b/llm-gateway/test/unit/provider-hash.test.ts
@@ -0,0 +1,66 @@
+import { describe, it, expect } from 'vitest';
+import { generateProviderSpecificHash } from '../../src/lib/provider-hash';
+import type { Provider } from '../../src/lib/providers';
+
+const openrouterProvider: Provider = {
+  id: 'openrouter',
+  apiUrl: 'https://openrouter.ai/api/v1',
+  apiKey: 'test-key',
+  hasGenerationEndpoint: true,
+};
+
+const gigapotatoProvider: Provider = {
+  id: 'gigapotato',
+  apiUrl: 'https://giga.potato.ai/v1',
+  apiKey: 'test-key',
+  hasGenerationEndpoint: false,
+};
+
+const customProvider: Provider = {
+  id: 'custom',
+  apiUrl: 'https://custom.example.com/v1',
+  apiKey: 'test-key',
+  hasGenerationEndpoint: true,
+};
+
+describe('generateProviderSpecificHash', () => {
+  it('returns a base64 string for openrouter provider', async () => {
+    const hash = await generateProviderSpecificHash('user123', openrouterProvider);
+    expect(typeof hash).toBe('string');
+    expect(hash.length).toBeGreaterThan(0);
+    // Base64 chars only
+    expect(hash).toMatch(/^[A-Za-z0-9+/=]+$/);
+  });
+
+  it('returns different hashes for different providers', async () => {
+    const hash1 = await generateProviderSpecificHash('user123', openrouterProvider);
+    const hash2 = await generateProviderSpecificHash('user123', gigapotatoProvider);
+    expect(hash1).not.toBe(hash2);
+  });
+
+  it('returns different hashes for different payloads', async () => {
+    const hash1 = await generateProviderSpecificHash('user1', openrouterProvider);
+    const hash2 = await generateProviderSpecificHash('user2', openrouterProvider);
+    expect(hash1).not.toBe(hash2);
+  });
+
+  it('is deterministic — same inputs produce same output', async () => {
+    const hash1 = await generateProviderSpecificHash('user123', openrouterProvider);
+    const hash2 = await generateProviderSpecificHash('user123', openrouterProvider);
+    expect(hash1).toBe(hash2);
+  });
+
+  it('uses apiUrl as pepper for custom provider', async () => {
+    const customA: Provider = { ...customProvider, apiUrl: 'https://a.example.com' };
+    const customB: Provider = { ...customProvider, apiUrl: 'https://b.example.com' };
+    const hash1 = await generateProviderSpecificHash('user123', customA);
+    const hash2 = await generateProviderSpecificHash('user123', customB);
+    expect(hash1).not.toBe(hash2);
+  });
+
+  it('produces a 44-character base64 string (SHA-256 = 32 bytes)', async () => {
+    const hash = await generateProviderSpecificHash('user123', openrouterProvider);
+    // SHA-256 → 32 bytes → base64: ceil(32/3)*4 = 44 chars
+    expect(hash.length).toBe(44);
+  });
+});
diff --git a/llm-gateway/test/unit/providers.test.ts b/llm-gateway/test/unit/providers.test.ts
new file mode 100644
index 000000000..d90b11d22
--- /dev/null
+++ b/llm-gateway/test/unit/providers.test.ts
@@ -0,0 +1,59 @@
+import { describe, it, expect } from 'vitest';
+import { getPreferredProviderOrder, buildProviders } from '../../src/lib/providers';
+import type { SecretsBundle } from '../../src/lib/providers';
+
+const testSecrets: SecretsBundle = {
+  openrouterApiKey: 'or-key',
+  gigapotatoApiKey: 'gp-key',
+  gigapotatoApiUrl: 'https://gp.example.com/v1',
+  corethinkApiKey: 'ct-key',
+  martianApiKey: 'mt-key',
+  mistralApiKey: 'ms-key',
+  vercelAiGatewayApiKey: 'vg-key',
+  byokEncryptionKey: 'bk-key',
+};
+
+describe('buildProviders', () => {
+  it('returns correct URLs and keys for OPENROUTER', () => {
+    const p = buildProviders(testSecrets);
+    expect(p.OPENROUTER.apiUrl).toBe('https://openrouter.ai/api/v1');
+    expect(p.OPENROUTER.apiKey).toBe('or-key');
+    expect(p.OPENROUTER.hasGenerationEndpoint).toBe(true);
+  });
+
+  it('uses provided GIGAPOTATO_API_URL', () => {
+    const p = buildProviders(testSecrets);
+    expect(p.GIGAPOTATO.apiUrl).toBe('https://gp.example.com/v1');
+    expect(p.GIGAPOTATO.hasGenerationEndpoint).toBe(false);
+  });
+
+  it('VERCEL_AI_GATEWAY has generation endpoint', () => {
+    const p = buildProviders(testSecrets);
+    expect(p.VERCEL_AI_GATEWAY.hasGenerationEndpoint).toBe(true);
+  });
+});
+
+describe('getPreferredProviderOrder', () => {
+  it('routes anthropic models to bedrock first', () => {
+    expect(getPreferredProviderOrder('anthropic/claude-sonnet-4')).toEqual([
+      'amazon-bedrock',
+      'anthropic',
+    ]);
+  });
+
+  it('routes minimax models to minimax', () => {
+    expect(getPreferredProviderOrder('minimax/minimax-m2.5')).toEqual(['minimax']);
+  });
+
+  it('routes mistralai models to mistral', () => {
+    expect(getPreferredProviderOrder('mistralai/devstral')).toEqual(['mistral']);
+  });
+
+  it('returns empty for openai models', () => {
+    expect(getPreferredProviderOrder('openai/gpt-4o')).toEqual([]);
+  });
+
+  it('returns empty for unknown models', () => {
+    expect(getPreferredProviderOrder('unknown/model')).toEqual([]);
+  });
+});
diff --git a/llm-gateway/test/unit/proxy-402.test.ts b/llm-gateway/test/unit/proxy-402.test.ts
new file mode 100644
index 000000000..96f42d246
--- /dev/null
+++ b/llm-gateway/test/unit/proxy-402.test.ts
@@ -0,0 +1,346 @@
+// Tests for proxyHandler background task scheduling:
+// - B1: 402 upstream responses still emit background tasks
+// - B2: Free model responses include accounting and logging streams
+
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { Hono } from 'hono';
+import type { HonoContext } from '../../src/types/hono';
+import type { ProviderId } from '../../src/lib/providers';
+import { fakeExecutionCtx } from './helpers';
+
+// ── Track scheduleBackgroundTasks calls ──────────────────────────────────────
+
+const scheduledCalls: unknown[] = [];
+
+vi.mock('../../src/handler/background-tasks', () => ({
+  scheduleBackgroundTasks: (_ctx: unknown, params: unknown) => {
+    scheduledCalls.push(params);
+  },
+}));
+
+vi.mock('../../src/lib/abuse-service', () => ({
+  classifyAbuse: async () => null,
+}));
+
+vi.mock('@kilocode/db/client', () => ({
+  getWorkerDb: () => ({}),
+}));
+
+// ── Restore real fetch after each test ───────────────────────────────────────
+
+const realFetch = globalThis.fetch;
+let fetchMock: ReturnType<typeof vi.fn>;
+
+beforeEach(() => {
+  scheduledCalls.length = 0;
+  fetchMock = vi.fn();
+  globalThis.fetch = fetchMock;
+
+  // scheduler.wait is a Workers-only global — stub it for Node tests.
+  const g = globalThis as Record<string, unknown>;
+  if (g.scheduler === undefined) {
+    g.scheduler = { wait: (ms: number) => new Promise(resolve => setTimeout(resolve, ms)) };
+  }
+});
+
+afterEach(() => {
+  globalThis.fetch = realFetch;
+  vi.restoreAllMocks();
+});
+
+// ── Helpers ──────────────────────────────────────────────────────────────────
+
+function makeSecret(value: string) {
+  return { get: async () => value };
+}
+
+const testEnv = {
+  HYPERDRIVE: { connectionString: 'postgres://localhost:5432/test' },
+  POSTHOG_API_KEY: makeSecret('ph-key'),
+  ABUSE_SERVICE_URL: makeSecret('https://abuse.example.com'),
+  ABUSE_CF_ACCESS_CLIENT_ID: makeSecret('abuse-id'),
+  ABUSE_CF_ACCESS_CLIENT_SECRET: makeSecret('abuse-secret'),
+  O11Y: { ingestApiMetrics: async () => {} },
+};
+
+type ContextOverrides = {
+  model?: string;
+  providerId?: ProviderId;
+};
+
+function buildApp(overrides: ContextOverrides = {}) {
+  const model = overrides.model ?? 'anthropic/claude-sonnet-4-20250514';
+  const providerId = overrides.providerId ?? 'openrouter';
+  const app = new Hono<HonoContext>();
+
+  // Pre-populate context variables normally set by earlier middleware.
+  app.use('*', async (c, next) => {
+    c.set('requestStartedAt', performance.now());
+    c.set('requestBody', {
+      model,
+      messages: [{ role: 'user' as const, content: 'hi' }],
+      stream: false,
+    });
+    c.set('resolvedModel', model);
+    c.set('provider', {
+      id: providerId,
+      apiUrl: 'https://openrouter.example.com/v1',
+      apiKey: 'test-key',
+      hasGenerationEndpoint: true,
+    });
+    c.set('userByok', null);
+    c.set('customLlm', null);
+    c.set('user', {
+      id: 'user-1',
+      total_microdollars_acquired: 10_000_000,
+      microdollars_used: 0,
+    } as never);
+    c.set('organizationId', undefined);
+    c.set('projectId', null);
+    c.set('extraHeaders', {});
+    c.set('fraudHeaders', { cf_connecting_ip: '1.2.3.4' } as never);
+    c.set('editorName', null);
+    c.set('machineId', null);
+    c.set('taskId', null);
+    c.set('botId', undefined);
+    c.set('tokenSource', undefined);
+    c.set('feature', null);
+    c.set('autoModel', null);
+    c.set('modeHeader', null);
+    await next();
+  });
+
+  return app;
+}
+
+function dispatch(app: Hono<HonoContext>, req: Request) {
+  return app.fetch(req, testEnv as never, fakeExecutionCtx());
+}
+
+// ── Tests ────────────────────────────────────────────────────────────────────
+
+describe('proxy handler – 402 upstream', () => {
+  it('schedules background tasks before returning 503', async () => {
+    // Upstream returns 402 Payment Required
+    fetchMock.mockResolvedValue(
+      new Response(JSON.stringify({ error: 'Payment Required' }), {
+        status: 402,
+        headers: { 'Content-Type': 'application/json' },
+      })
+    );
+
+    const { proxyHandler } = await import('../../src/handler/proxy');
+    const app = buildApp();
+    app.post('/api/gateway/chat/completions', proxyHandler);
+
+    const req = new Request('http://localhost/api/gateway/chat/completions', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      }),
+    });
+
+    const res = await dispatch(app, req);
+
+    // Should convert 402 → 503
+    expect(res.status).toBe(503);
+    const body = (await res.json()) as Record<string, string>;
+    expect(body.error).toBe('Service Unavailable');
+
+    // Background tasks MUST have been scheduled (the whole point of B1)
+    expect(scheduledCalls).toHaveLength(1);
+    const params = scheduledCalls[0] as Record<string, unknown>;
+    expect(params.upstreamStatusCode).toBe(402);
+    // metricsStream should be provided (non-null)
+    expect(params.metricsStream).not.toBeNull();
+  });
+
+  it('does NOT convert 402 to 503 when userByok is set', async () => {
+    // Upstream returns 402 with BYOK — should pass through as-is
+    fetchMock.mockResolvedValue(
+      new Response(JSON.stringify({ error: { message: 'Insufficient credits' } }), {
+        status: 402,
+        headers: { 'Content-Type': 'application/json' },
+      })
+    );
+
+    const { proxyHandler } = await import('../../src/handler/proxy');
+    const app = buildApp();
+
+    // Override userByok in this test's middleware
+    app.use('/byok/*', async (c, next) => {
+      c.set('userByok', [{ provider_id: 'anthropic', encrypted_api_key: 'enc-key' }] as never);
+      await next();
+    });
+    app.post('/byok/chat/completions', proxyHandler);
+
+    const req = new Request('http://localhost/byok/chat/completions', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      }),
+    });
+
+    const res = await dispatch(app, req);
+
+    // BYOK 402 should NOT be converted — goes through makeErrorReadable instead
+    // (which returns a readable BYOK error for 402)
+    // Background tasks should still be scheduled
+    expect(scheduledCalls).toHaveLength(1);
+    expect((scheduledCalls[0] as Record<string, unknown>).upstreamStatusCode).toBe(402);
+  });
+
+  it('schedules background tasks for non-402 errors too', async () => {
+    // Upstream returns 500 — verify background tasks still run
+    fetchMock.mockResolvedValue(
+      new Response(JSON.stringify({ error: 'Internal Server Error' }), {
+        status: 500,
+        headers: { 'Content-Type': 'application/json' },
+      })
+    );
+
+    const { proxyHandler } = await import('../../src/handler/proxy');
+    const app = buildApp();
+    app.post('/api/gateway/chat/completions', proxyHandler);
+
+    const req = new Request('http://localhost/api/gateway/chat/completions', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({
+        model: 'anthropic/claude-sonnet-4-20250514',
+        messages: [{ role: 'user', content: 'hi' }],
+      }),
+    });
+
+    const res = await dispatch(app, req);
+
+    // 500 should pass through (no conversion)
+    expect(res.status).toBe(500);
+
+    // Background tasks should be scheduled
+    expect(scheduledCalls).toHaveLength(1);
+    expect((scheduledCalls[0] as Record<string, unknown>).upstreamStatusCode).toBe(500);
+  });
+});
+
+// ── B2: Free model responses include accounting and logging ───────────────────
+
+describe('proxy handler – free model background tasks', () => {
+  it('provides accountingStream and loggingStream for free model responses', async () => {
+    // Upstream returns 200 OK for a free model
+    const upstreamBody = JSON.stringify({
+      id: 'chatcmpl-1',
+      choices: [{ message: { role: 'assistant', content: 'hi' } }],
+      usage: { prompt_tokens: 10, completion_tokens: 5 },
+    });
+    fetchMock.mockResolvedValue(
+      new Response(upstreamBody, {
+        status: 200,
+        headers: { 'Content-Type': 'application/json' },
+      })
+    );
+
+    const { proxyHandler } = await import('../../src/handler/proxy');
+    const app = buildApp({ model: 'corethink:free', providerId: 'corethink' });
+    app.post('/api/gateway/chat/completions', proxyHandler);
+
+    const req = new Request('http://localhost/api/gateway/chat/completions', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({
+        model: 'corethink:free',
+        messages: [{ role: 'user', content: 'hi' }],
+      }),
+    });
+
+    const res = await dispatch(app, req);
+    expect(res.status).toBe(200);
+
+    // Consume the response body to let the pipe promise complete.
+    await res.text();
+    // Allow microtasks / waitUntil promises to settle.
+    await new Promise(resolve => setTimeout(resolve, 50));
+
+    // Background tasks MUST include accounting and logging streams (B2 fix).
+    expect(scheduledCalls).toHaveLength(1);
+    const params = scheduledCalls[0] as Record<string, unknown>;
+    expect(params.accountingStream).not.toBeNull();
+    expect(params.metricsStream).not.toBeNull();
+    expect(params.loggingStream).not.toBeNull();
+  });
+
+  it('skips accountingStream and loggingStream for anonymous free model requests', async () => {
+    const upstreamBody = JSON.stringify({
+      id: 'chatcmpl-1',
+      choices: [{ message: { role: 'assistant', content: 'hi' } }],
+      usage: { prompt_tokens: 10, completion_tokens: 5 },
+    });
+    fetchMock.mockResolvedValue(
+      new Response(upstreamBody, {
+        status: 200,
+        headers: { 'Content-Type': 'application/json' },
+      })
+    );
+
+    const { proxyHandler } = await import('../../src/handler/proxy');
+    // Build app with anonymous user (id starts with 'anon:')
+    const app = new Hono<HonoContext>();
+    app.use('*', async (c, next) => {
+      c.set('requestStartedAt', performance.now());
+      c.set('requestBody', {
+        model: 'corethink:free',
+        messages: [{ role: 'user' as const, content: 'hi' }],
+        stream: false,
+      });
+      c.set('resolvedModel', 'corethink:free');
+      c.set('provider', {
+        id: 'corethink' as const,
+        apiUrl: 'https://corethink.example.com/v1',
+        apiKey: 'test-key',
+        hasGenerationEndpoint: true,
+      });
+      c.set('userByok', null);
+      c.set('customLlm', null);
+      c.set('user', { id: 'anon:1.2.3.4', isAnonymous: true } as never);
+      c.set('organizationId', undefined);
+      c.set('projectId', null);
+      c.set('extraHeaders', {});
+      c.set('fraudHeaders', { cf_connecting_ip: '1.2.3.4' } as never);
+      c.set('editorName', null);
+      c.set('machineId', null);
+      c.set('taskId', null);
+      c.set('botId', undefined);
+      c.set('tokenSource', undefined);
+      c.set('feature', null);
+      c.set('autoModel', null);
+      c.set('modeHeader', null);
+      await next();
+    });
+    app.post('/api/gateway/chat/completions', proxyHandler);
+
+    const req = new Request('http://localhost/api/gateway/chat/completions', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({
+        model: 'corethink:free',
+        messages: [{ role: 'user', content: 'hi' }],
+      }),
+    });
+
+    const res = await dispatch(app, req);
+    expect(res.status).toBe(200);
+    await res.text();
+    await new Promise(resolve => setTimeout(resolve, 50));
+
+    // For anonymous users: accounting and logging are null, but metrics are present.
+    expect(scheduledCalls).toHaveLength(1);
+    const params = scheduledCalls[0] as Record<string, unknown>;
+    expect(params.accountingStream).toBeNull();
+    expect(params.metricsStream).not.toBeNull();
+    expect(params.loggingStream).toBeNull();
+  });
+});
diff --git a/llm-gateway/test/unit/rate-limit.test.ts b/llm-gateway/test/unit/rate-limit.test.ts
new file mode 100644
index 000000000..815c53027
--- /dev/null
+++ b/llm-gateway/test/unit/rate-limit.test.ts
@@ -0,0 +1,136 @@
+import { describe, it, expect } from 'vitest';
+
+// We test the DO logic directly by simulating what the DO class does.
+// The actual DO class extends DurableObject (which requires the Workers runtime),
+// so we replicate its core check-and-increment logic here.
+// The rate-limit.ts module is a thin wrapper that just calls the DO stub methods.
+
+const FREE_MODEL_WINDOW_MS = 60 * 60 * 1000;
+const FREE_MODEL_MAX_REQUESTS = 200;
+const PROMOTION_WINDOW_MS = 24 * 60 * 60 * 1000;
+const PROMOTION_MAX_REQUESTS = 10_000;
+
+function makeStorage() {
+  const store = new Map<string, number[]>();
+  return {
+    get(key: string): number[] | undefined {
+      return store.get(key);
+    },
+    put(key: string, value: number[]) {
+      store.set(key, value);
+    },
+  };
+}
+
+function checkAndIncrement(
+  storage: ReturnType<typeof makeStorage>,
+  key: string,
+  windowMs: number,
+  maxRequests: number
+) {
+  const now = Date.now();
+  const windowStart = now - windowMs;
+  const timestamps = (storage.get(key) ?? []).filter(t => t >= windowStart);
+
+  if (timestamps.length >= maxRequests) {
+    return { allowed: false, requestCount: timestamps.length };
+  }
+  timestamps.push(now);
+  storage.put(key, timestamps);
+  return { allowed: true, requestCount: timestamps.length };
+}
+
+describe('RateLimitDO: checkFreeModel', () => {
+  it('allows when no prior requests', () => {
+    const storage = makeStorage();
+    const result = checkAndIncrement(
+      storage,
+      'free',
+      FREE_MODEL_WINDOW_MS,
+      FREE_MODEL_MAX_REQUESTS
+    );
+    expect(result.allowed).toBe(true);
+    expect(result.requestCount).toBe(1);
+  });
+
+  it('allows when under the 200 request limit', () => {
+    const storage = makeStorage();
+    for (let i = 0; i < 199; i++) {
+      checkAndIncrement(storage, 'free', FREE_MODEL_WINDOW_MS, FREE_MODEL_MAX_REQUESTS);
+    }
+    const result = checkAndIncrement(
+      storage,
+      'free',
+      FREE_MODEL_WINDOW_MS,
+      FREE_MODEL_MAX_REQUESTS
+    );
+    expect(result.allowed).toBe(true);
+    expect(result.requestCount).toBe(200);
+  });
+
+  it('blocks when at the 200 request limit', () => {
+    const storage = makeStorage();
+    for (let i = 0; i < 200; i++) {
+      checkAndIncrement(storage, 'free', FREE_MODEL_WINDOW_MS, FREE_MODEL_MAX_REQUESTS);
+    }
+    const result = checkAndIncrement(
+      storage,
+      'free',
+      FREE_MODEL_WINDOW_MS,
+      FREE_MODEL_MAX_REQUESTS
+    );
+    expect(result.allowed).toBe(false);
+    expect(result.requestCount).toBe(200);
+  });
+
+  it('ignores timestamps outside the 1-hour window', () => {
+    const storage = makeStorage();
+    const now = Date.now();
+    const twoHoursAgo = now - 2 * 60 * 60 * 1000;
+    // Pre-populate with 200 expired timestamps + 1 recent
+    storage.put('free', [...Array.from({ length: 200 }, () => twoHoursAgo), now - 1000]);
+    const result = checkAndIncrement(
+      storage,
+      'free',
+      FREE_MODEL_WINDOW_MS,
+      FREE_MODEL_MAX_REQUESTS
+    );
+    expect(result.allowed).toBe(true);
+    // 1 recent + 1 new = 2
+    expect(result.requestCount).toBe(2);
+  });
+});
+
+describe('RateLimitDO: checkPromotion', () => {
+  it('allows when under 10000 requests per 24h', () => {
+    const storage = makeStorage();
+    const result = checkAndIncrement(storage, 'promo', PROMOTION_WINDOW_MS, PROMOTION_MAX_REQUESTS);
+    expect(result.allowed).toBe(true);
+  });
+
+  it('blocks at 10000', () => {
+    const storage = makeStorage();
+    for (let i = 0; i < 10_000; i++) {
+      checkAndIncrement(storage, 'promo', PROMOTION_WINDOW_MS, PROMOTION_MAX_REQUESTS);
+    }
+    const result = checkAndIncrement(storage, 'promo', PROMOTION_WINDOW_MS, PROMOTION_MAX_REQUESTS);
+    expect(result.allowed).toBe(false);
+    expect(result.requestCount).toBe(10_000);
+  });
+});
+
+describe('RateLimitDO: atomicity', () => {
+  it('check and increment happen atomically (no TOCTOU)', () => {
+    const storage = makeStorage();
+    // Fill to 199
+    for (let i = 0; i < 199; i++) {
+      checkAndIncrement(storage, 'free', FREE_MODEL_WINDOW_MS, FREE_MODEL_MAX_REQUESTS);
+    }
+    // Two "concurrent" calls — both see 199, but only first should succeed
+    // because the function is atomic (check+increment in one call)
+    const r1 = checkAndIncrement(storage, 'free', FREE_MODEL_WINDOW_MS, FREE_MODEL_MAX_REQUESTS);
+    const r2 = checkAndIncrement(storage, 'free', FREE_MODEL_WINDOW_MS, FREE_MODEL_MAX_REQUESTS);
+    expect(r1.allowed).toBe(true);
+    expect(r2.allowed).toBe(false);
+  });
+});
diff --git a/llm-gateway/test/unit/request-logging.test.ts b/llm-gateway/test/unit/request-logging.test.ts
new file mode 100644
index 000000000..60302716d
--- /dev/null
+++ b/llm-gateway/test/unit/request-logging.test.ts
@@ -0,0 +1,115 @@
+// Tests for background/request-logging: isKiloEmployee guard and DB insert.
+
+import { describe, it, expect, vi } from 'vitest';
+import { runRequestLogging, KILO_ORGANIZATION_ID } from '../../src/background/request-logging';
+
+function makeDb(
+  insertMock = vi.fn().mockReturnValue({
+    values: vi.fn().mockReturnValue({
+      returning: vi.fn().mockResolvedValue([{ id: 'log-1' }]),
+    }),
+  })
+) {
+  return { insert: insertMock } as unknown as import('@kilocode/db/client').WorkerDb;
+}
+
+function emptyStream() {
+  return new ReadableStream({
+    start(controller) {
+      controller.enqueue(new TextEncoder().encode('test response'));
+      controller.close();
+    },
+  });
+}
+
+describe('runRequestLogging', () => {
+  it('skips non-Kilo employees', async () => {
+    const insertMock = vi.fn();
+    const db = makeDb(insertMock);
+    await runRequestLogging({
+      db,
+      responseStream: emptyStream(),
+      statusCode: 200,
+      user: { id: 'user-1', google_user_email: 'user@gmail.com' },
+      organizationId: null,
+      provider: 'openrouter',
+      model: 'test',
+      request: { model: 'test', messages: [] },
+    });
+    expect(insertMock).not.toHaveBeenCalled();
+  });
+
+  it('logs for @kilo.ai employees', async () => {
+    const returningMock = vi.fn().mockResolvedValue([{ id: 'log-1' }]);
+    const valuesMock = vi.fn().mockReturnValue({ returning: returningMock });
+    const insertMock = vi.fn().mockReturnValue({ values: valuesMock });
+    const db = makeDb(insertMock);
+    await runRequestLogging({
+      db,
+      responseStream: emptyStream(),
+      statusCode: 200,
+      user: { id: 'user-1', google_user_email: 'dev@kilo.ai' },
+      organizationId: null,
+      provider: 'openrouter',
+      model: 'test-model',
+      request: { model: 'test-model', messages: [] },
+    });
+    expect(insertMock).toHaveBeenCalled();
+  });
+
+  it('logs for @kilocode.ai employees', async () => {
+    const returningMock = vi.fn().mockResolvedValue([{ id: 'log-1' }]);
+    const valuesMock = vi.fn().mockReturnValue({ returning: returningMock });
+    const insertMock = vi.fn().mockReturnValue({ values: valuesMock });
+    const db = makeDb(insertMock);
+    await runRequestLogging({
+      db,
+      responseStream: emptyStream(),
+      statusCode: 200,
+      user: { id: 'user-1', google_user_email: 'dev@kilocode.ai' },
+      organizationId: null,
+      provider: 'openrouter',
+      model: 'test-model',
+      request: { model: 'test-model', messages: [] },
+    });
+    expect(insertMock).toHaveBeenCalled();
+  });
+
+  it('logs for Kilo organization ID', async () => {
+    const returningMock = vi.fn().mockResolvedValue([{ id: 'log-1' }]);
+    const valuesMock = vi.fn().mockReturnValue({ returning: returningMock });
+    const insertMock = vi.fn().mockReturnValue({ values: valuesMock });
+    const db = makeDb(insertMock);
+    await runRequestLogging({
+      db,
+      responseStream: emptyStream(),
+      statusCode: 200,
+      user: { id: 'user-1', google_user_email: 'user@random.com' },
+      organizationId: KILO_ORGANIZATION_ID,
+      provider: 'openrouter',
+      model: 'test-model',
+      request: { model: 'test-model', messages: [] },
+    });
+    expect(insertMock).toHaveBeenCalled();
+  });
+
+  it('handles DB insert failure gracefully', async () => {
+    const insertMock = vi.fn().mockReturnValue({
+      values: vi.fn().mockReturnValue({
+        returning: vi.fn().mockRejectedValue(new Error('DB error')),
+      }),
+    });
+    const db = makeDb(insertMock);
+    // Should not throw
+    await runRequestLogging({
+      db,
+      responseStream: emptyStream(),
+      statusCode: 200,
+      user: { id: 'user-1', google_user_email: 'dev@kilo.ai' },
+      organizationId: null,
+      provider: 'openrouter',
+      model: 'test-model',
+      request: { model: 'test-model', messages: [] },
+    });
+  });
+});
diff --git a/llm-gateway/test/unit/request-validation.test.ts b/llm-gateway/test/unit/request-validation.test.ts
new file mode 100644
index 000000000..2121606eb
--- /dev/null
+++ b/llm-gateway/test/unit/request-validation.test.ts
@@ -0,0 +1,90 @@
+// Tests for requestValidationMiddleware — max_tokens, dead models, rate-limited-to-death models.
+
+import { describe, it, expect } from 'vitest';
+import { Hono } from 'hono';
+import type { HonoContext } from '../../src/types/hono';
+import { requestValidationMiddleware } from '../../src/middleware/request-validation';
+import { parseBodyMiddleware } from '../../src/middleware/parse-body';
+import { extractIpMiddleware } from '../../src/middleware/extract-ip';
+import { resolveAutoModelMiddleware } from '../../src/middleware/resolve-auto-model';
+import { anonymousGateMiddleware } from '../../src/middleware/anonymous-gate';
+
+function makeApp() {
+  const app = new Hono<HonoContext>();
+  app.post(
+    '/test',
+    parseBodyMiddleware,
+    extractIpMiddleware,
+    resolveAutoModelMiddleware,
+    anonymousGateMiddleware,
+    requestValidationMiddleware,
+    c => c.json({ ok: true })
+  );
+  return app;
+}
+
+function post(app: ReturnType<typeof makeApp>, body: Record<string, unknown>) {
+  return app.fetch(
+    new Request('http://x/test', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json', 'CF-Connecting-IP': '1.2.3.4' },
+      body: JSON.stringify(body),
+    })
+  );
+}
+
+describe('requestValidationMiddleware', () => {
+  it('allows valid free model requests', async () => {
+    const app = makeApp();
+    const res = await post(app, {
+      model: 'meta-llama/llama-3.1-8b-instruct:free',
+      messages: [{ role: 'user', content: 'hi' }],
+    });
+    expect(res.status).toBe(200);
+  });
+
+  it('returns 503 for absurdly large max_tokens', async () => {
+    const app = makeApp();
+    const res = await post(app, {
+      model: 'meta-llama/llama-3.1-8b-instruct:free',
+      messages: [{ role: 'user', content: 'hi' }],
+      max_tokens: 100_000_000_000,
+    });
+    expect(res.status).toBe(503);
+    const body = (await res.json()) as { error: string };
+    expect(body.error).toBe('Service Unavailable');
+  });
+
+  it('allows normal max_tokens values', async () => {
+    const app = makeApp();
+    const res = await post(app, {
+      model: 'meta-llama/llama-3.1-8b-instruct:free',
+      messages: [{ role: 'user', content: 'hi' }],
+      max_tokens: 4096,
+    });
+    expect(res.status).toBe(200);
+  });
+
+  it('returns 404 for dead free models', async () => {
+    const app = makeApp();
+    // x-ai/grok-code-fast-1:optimized:free is disabled in the models list
+    const res = await post(app, {
+      model: 'x-ai/grok-code-fast-1:optimized:free',
+      messages: [{ role: 'user', content: 'hi' }],
+    });
+    expect(res.status).toBe(404);
+    const body = (await res.json()) as { error: string };
+    expect(body.error).toContain('alpha period');
+  });
+
+  it('returns 404 for rate-limited-to-death models', async () => {
+    const app = makeApp();
+    const res = await post(app, {
+      model: 'deepseek/deepseek-r1-0528:free',
+      messages: [{ role: 'user', content: 'hi' }],
+    });
+    expect(res.status).toBe(404);
+    const body = (await res.json()) as { error: string };
+    expect(body.error).toContain('not found');
+  });
+});
diff --git a/llm-gateway/test/unit/response-helpers.test.ts b/llm-gateway/test/unit/response-helpers.test.ts
new file mode 100644
index 000000000..4faf840b2
--- /dev/null
+++ b/llm-gateway/test/unit/response-helpers.test.ts
@@ -0,0 +1,221 @@
+// Tests for response-helpers: getOutputHeaders, wrapResponse, makeErrorReadable.
+
+import { describe, it, expect } from 'vitest';
+import { getOutputHeaders, wrapResponse, makeErrorReadable } from '../../src/lib/response-helpers';
+
+describe('getOutputHeaders', () => {
+  it('whitelists date, content-type, request-id', () => {
+    const upstream = new Response('body', {
+      headers: {
+        date: 'Mon, 01 Jan 2026 00:00:00 GMT',
+        'content-type': 'text/event-stream',
+        'request-id': 'req-123',
+        'x-secret-header': 'should-be-stripped',
+        'set-cookie': 'should-be-stripped',
+      },
+    });
+    const out = getOutputHeaders(upstream);
+    expect(out.get('date')).toBe('Mon, 01 Jan 2026 00:00:00 GMT');
+    expect(out.get('content-type')).toBe('text/event-stream');
+    expect(out.get('request-id')).toBe('req-123');
+    expect(out.get('x-secret-header')).toBeNull();
+    expect(out.get('set-cookie')).toBeNull();
+  });
+
+  it('sets Content-Encoding: identity', () => {
+    const upstream = new Response('body');
+    const out = getOutputHeaders(upstream);
+    expect(out.get('Content-Encoding')).toBe('identity');
+  });
+});
+
+describe('wrapResponse', () => {
+  it('preserves status and body', async () => {
+    const upstream = new Response('hello', { status: 201, statusText: 'Created' });
+    const wrapped = wrapResponse(upstream);
+    expect(wrapped.status).toBe(201);
+    expect(await wrapped.text()).toBe('hello');
+    expect(wrapped.headers.get('Content-Encoding')).toBe('identity');
+  });
+});
+
+describe('makeErrorReadable', () => {
+  it('returns undefined for successful responses', async () => {
+    const response = new Response('ok', { status: 200 });
+    const result = await makeErrorReadable({
+      requestedModel: 'test',
+      request: { model: 'test', messages: [] },
+      response,
+      isUserByok: false,
+    });
+    expect(result).toBeUndefined();
+  });
+
+  it('returns BYOK message for 401', async () => {
+    const response = new Response('Unauthorized', { status: 401 });
+    const result = await makeErrorReadable({
+      requestedModel: 'test',
+      request: { model: 'test', messages: [] },
+      response,
+      isUserByok: true,
+    });
+    expect(result).toBeDefined();
+    expect(result!.status).toBe(401);
+    const body = (await result!.json()) as { error: string };
+    expect(body.error).toContain('[BYOK]');
+    expect(body.error).toContain('invalid or has been revoked');
+  });
+
+  it('returns BYOK message for 402', async () => {
+    const response = new Response('Payment Required', { status: 402 });
+    const result = await makeErrorReadable({
+      requestedModel: 'test',
+      request: { model: 'test', messages: [] },
+      response,
+      isUserByok: true,
+    });
+    expect(result).toBeDefined();
+    expect(result!.status).toBe(402);
+    const body = (await result!.json()) as { error: string };
+    expect(body.error).toContain('insufficient funds');
+  });
+
+  it('returns BYOK message for 429', async () => {
+    const response = new Response('Rate Limited', { status: 429 });
+    const result = await makeErrorReadable({
+      requestedModel: 'test',
+      request: { model: 'test', messages: [] },
+      response,
+      isUserByok: true,
+    });
+    expect(result).toBeDefined();
+    expect(result!.status).toBe(429);
+    const body = (await result!.json()) as { error: string };
+    expect(body.error).toContain('rate limit');
+  });
+
+  it('returns undefined for non-BYOK errors', async () => {
+    const response = new Response('Server Error', { status: 500 });
+    const result = await makeErrorReadable({
+      requestedModel: 'test',
+      request: { model: 'test', messages: [] },
+      response,
+      isUserByok: false,
+    });
+    expect(result).toBeUndefined();
+  });
+
+  it('returns undefined for BYOK with non-mapped status codes', async () => {
+    const response = new Response('Server Error', { status: 500 });
+    const result = await makeErrorReadable({
+      requestedModel: 'test',
+      request: { model: 'test', messages: [] },
+      response,
+      isUserByok: true,
+    });
+    expect(result).toBeUndefined();
+  });
+
+  it('returns context-length error for Kilo free model when estimated tokens exceed limit', async () => {
+    // corethink:free has context_length 78_000. Build a request whose
+    // JSON serialization / 4 exceeds that threshold.
+    const longContent = 'x'.repeat(78_000 * 4);
+    const response = new Response('Internal Server Error', { status: 500 });
+    const result = await makeErrorReadable({
+      requestedModel: 'corethink:free',
+      request: { model: 'corethink:free', messages: [{ role: 'user', content: longContent }] },
+      response,
+      isUserByok: false,
+    });
+    expect(result).toBeDefined();
+    expect(result!.status).toBe(500);
+    const body = (await result!.json()) as { error: string; message: string };
+    expect(body.error).toContain('The maximum context length is 78000 tokens');
+    expect(body.error).toContain('tokens were requested');
+    expect(body.error).toBe(body.message);
+  });
+
+  it('returns undefined for Kilo free model when estimated tokens are within limit', async () => {
+    const response = new Response('Bad Request', { status: 400 });
+    const result = await makeErrorReadable({
+      requestedModel: 'corethink:free',
+      request: { model: 'corethink:free', messages: [{ role: 'user', content: 'hi' }] },
+      response,
+      isUserByok: false,
+    });
+    expect(result).toBeUndefined();
+  });
+
+  it('accounts for max_completion_tokens in context-length estimate', async () => {
+    // corethink:free context_length is 78_000. A short prompt + huge max_completion_tokens
+    // should trigger the check.
+    const response = new Response('Error', { status: 500 });
+    const result = await makeErrorReadable({
+      requestedModel: 'corethink:free',
+      request: {
+        model: 'corethink:free',
+        messages: [{ role: 'user', content: 'hi' }],
+        max_completion_tokens: 100_000,
+      },
+      response,
+      isUserByok: false,
+    });
+    expect(result).toBeDefined();
+    const body = (await result!.json()) as { error: string };
+    expect(body.error).toContain('The maximum context length is 78000 tokens');
+  });
+
+  it('skips context-length check for non-Kilo models', async () => {
+    const response = new Response('Error', { status: 500 });
+    const result = await makeErrorReadable({
+      requestedModel: 'openai/gpt-4',
+      request: {
+        model: 'openai/gpt-4',
+        messages: [{ role: 'user', content: 'x'.repeat(1_000_000) }],
+      },
+      response,
+      isUserByok: false,
+    });
+    expect(result).toBeUndefined();
+  });
+
+  it('returns stealth model error for Kilo stealth models', async () => {
+    // giga-potato is a stealth model (inference_providers includes 'stealth')
+    const response = new Response('Internal Server Error', { status: 500 });
+    const result = await makeErrorReadable({
+      requestedModel: 'giga-potato',
+      request: { model: 'giga-potato', messages: [{ role: 'user', content: 'hi' }] },
+      response,
+      isUserByok: false,
+    });
+    expect(result).toBeDefined();
+    expect(result!.status).toBe(500);
+    const body = (await result!.json()) as { error: string; message: string };
+    expect(body.error).toBe('Stealth model unable to process request');
+    expect(body.message).toBe('Stealth model unable to process request');
+  });
+
+  it('preserves upstream status code for stealth model errors', async () => {
+    const response = new Response('Bad Gateway', { status: 502 });
+    const result = await makeErrorReadable({
+      requestedModel: 'giga-potato-thinking',
+      request: { model: 'giga-potato-thinking', messages: [{ role: 'user', content: 'hi' }] },
+      response,
+      isUserByok: false,
+    });
+    expect(result).toBeDefined();
+    expect(result!.status).toBe(502);
+  });
+
+  it('does not return stealth error for non-stealth Kilo free models', async () => {
+    // corethink:free is not a stealth model — short request should return undefined
+    const response = new Response('Error', { status: 500 });
+    const result = await makeErrorReadable({
+      requestedModel: 'corethink:free',
+      request: { model: 'corethink:free', messages: [{ role: 'user', content: 'hi' }] },
+      response,
+      isUserByok: false,
+    });
+    expect(result).toBeUndefined();
+  });
+});
diff --git a/llm-gateway/test/unit/rewrite-free-model-response.test.ts b/llm-gateway/test/unit/rewrite-free-model-response.test.ts
new file mode 100644
index 000000000..1e2d63a62
--- /dev/null
+++ b/llm-gateway/test/unit/rewrite-free-model-response.test.ts
@@ -0,0 +1,193 @@
+// Tests for rewriteFreeModelResponse — SSE stream transformer for free model responses.
+// Verifies cost stripping, model replacement, and reasoning_content → reasoning conversion.
+
+import { describe, it, expect } from 'vitest';
+import { rewriteFreeModelResponse } from '../../src/lib/rewrite-free-model-response';
+
+function sseChunk(data: Record<string, unknown>): string {
+  return `data: ${JSON.stringify(data)}\n\n`;
+}
+
+function makeSSEResponse(chunks: string[], status = 200): Response {
+  const encoder = new TextEncoder();
+  const stream = new ReadableStream({
+    start(controller) {
+      for (const chunk of chunks) {
+        controller.enqueue(encoder.encode(chunk));
+      }
+      controller.close();
+    },
+  });
+  return new Response(stream, {
+    status,
+    headers: { 'content-type': 'text/event-stream' },
+  });
+}
+
+function makeJsonResponse(body: Record<string, unknown>, status = 200): Response {
+  return new Response(JSON.stringify(body), {
+    status,
+    headers: { 'content-type': 'application/json' },
+  });
+}
+
+async function readSSEEvents(response: Response): Promise<unknown[]> {
+  const text = await response.text();
+  const events: unknown[] = [];
+  for (const line of text.split('\n')) {
+    if (line.startsWith('data: ') && line !== 'data: [DONE]') {
+      events.push(JSON.parse(line.slice(6)));
+    }
+  }
+  return events;
+}
+
+describe('rewriteFreeModelResponse — SSE streaming', () => {
+  it('replaces model name in SSE chunks', async () => {
+    const upstream = makeSSEResponse([
+      sseChunk({
+        model: 'actual-provider-model-id',
+        choices: [{ delta: { content: 'hello' } }],
+      }),
+      'data: [DONE]\n\n',
+    ]);
+    const res = await rewriteFreeModelResponse(upstream, 'corethink:free');
+    const events = await readSSEEvents(res);
+    expect(events).toHaveLength(1);
+    expect((events[0] as Record<string, unknown>).model).toBe('corethink:free');
+  });
+
+  it('strips cost from usage chunks', async () => {
+    const upstream = makeSSEResponse([
+      sseChunk({
+        model: 'internal-model',
+        choices: [],
+        usage: {
+          prompt_tokens: 10,
+          completion_tokens: 20,
+          total_tokens: 30,
+          cost: 0.0001,
+          cost_details: { upstream_inference_cost: 0.0001 },
+          is_byok: false,
+        },
+      }),
+      'data: [DONE]\n\n',
+    ]);
+    const res = await rewriteFreeModelResponse(upstream, 'corethink:free');
+    const events = await readSSEEvents(res);
+    const usage = (events[0] as { usage: Record<string, unknown> }).usage;
+    expect(usage.cost).toBeUndefined();
+    expect(usage.cost_details).toBeUndefined();
+    expect(usage.is_byok).toBeUndefined();
+    expect(usage.prompt_tokens).toBe(10);
+  });
+
+  it('converts reasoning_content to reasoning + reasoning_details', async () => {
+    const upstream = makeSSEResponse([
+      sseChunk({
+        model: 'internal-model',
+        choices: [
+          {
+            delta: {
+              reasoning_content: 'Let me think...',
+              content: 'The answer is 42.',
+            },
+          },
+        ],
+      }),
+      'data: [DONE]\n\n',
+    ]);
+    const res = await rewriteFreeModelResponse(upstream, 'giga-potato-thinking');
+    const events = await readSSEEvents(res);
+    const delta = (events[0] as { choices: Array<{ delta: Record<string, unknown> }> }).choices[0]
+      .delta;
+    expect(delta.reasoning).toBe('Let me think...');
+    expect(delta.reasoning_details).toEqual([{ type: 'reasoning.text', text: 'Let me think...' }]);
+    expect(delta.reasoning_content).toBeUndefined();
+  });
+
+  it('removes null role from delta', async () => {
+    const upstream = makeSSEResponse([
+      sseChunk({
+        model: 'internal-model',
+        choices: [{ delta: { role: null, content: 'hi' } }],
+      }),
+      'data: [DONE]\n\n',
+    ]);
+    const res = await rewriteFreeModelResponse(upstream, 'corethink:free');
+    const events = await readSSEEvents(res);
+    const delta = (events[0] as { choices: Array<{ delta: Record<string, unknown> }> }).choices[0]
+      .delta;
+    expect(delta.role).toBeUndefined();
+  });
+
+  it('emits [DONE] sentinel at end', async () => {
+    const upstream = makeSSEResponse([
+      sseChunk({ model: 'x', choices: [{ delta: { content: 'a' } }] }),
+      'data: [DONE]\n\n',
+    ]);
+    const res = await rewriteFreeModelResponse(upstream, 'corethink:free');
+    const text = await res.text();
+    expect(text).toContain('data: [DONE]');
+  });
+
+  it('sets Content-Encoding: identity', async () => {
+    const upstream = makeSSEResponse([sseChunk({ model: 'x', choices: [] }), 'data: [DONE]\n\n']);
+    const res = await rewriteFreeModelResponse(upstream, 'corethink:free');
+    expect(res.headers.get('Content-Encoding')).toBe('identity');
+  });
+});
+
+describe('rewriteFreeModelResponse — JSON (non-streaming)', () => {
+  it('replaces model name in JSON response', async () => {
+    const upstream = makeJsonResponse({
+      model: 'internal-model-id',
+      choices: [{ message: { content: 'hello' } }],
+    });
+    const res = await rewriteFreeModelResponse(upstream, 'corethink:free');
+    const body = (await res.json()) as Record<string, unknown>;
+    expect(body.model).toBe('corethink:free');
+  });
+
+  it('strips cost from JSON usage', async () => {
+    const upstream = makeJsonResponse({
+      model: 'internal-model',
+      choices: [{ message: { content: 'ok' } }],
+      usage: {
+        prompt_tokens: 5,
+        completion_tokens: 10,
+        cost: 0.05,
+        cost_details: {},
+        is_byok: true,
+      },
+    });
+    const res = await rewriteFreeModelResponse(upstream, 'corethink:free');
+    const body = (await res.json()) as { usage: Record<string, unknown> };
+    expect(body.usage.cost).toBeUndefined();
+    expect(body.usage.cost_details).toBeUndefined();
+    expect(body.usage.is_byok).toBeUndefined();
+    expect(body.usage.prompt_tokens).toBe(5);
+  });
+
+  it('converts reasoning_content in JSON message', async () => {
+    const upstream = makeJsonResponse({
+      model: 'internal',
+      choices: [
+        {
+          message: {
+            reasoning_content: 'thinking...',
+            content: 'done',
+          },
+        },
+      ],
+    });
+    const res = await rewriteFreeModelResponse(upstream, 'giga-potato-thinking');
+    const body = (await res.json()) as {
+      choices: Array<{ message: Record<string, unknown> }>;
+    };
+    const msg = body.choices[0].message;
+    expect(msg.reasoning).toBe('thinking...');
+    expect(msg.reasoning_details).toEqual([{ type: 'reasoning.text', text: 'thinking...' }]);
+    expect(msg.reasoning_content).toBeUndefined();
+  });
+});
diff --git a/llm-gateway/test/unit/stubs/cloudflare-workers.ts b/llm-gateway/test/unit/stubs/cloudflare-workers.ts
new file mode 100644
index 000000000..f87f0f622
--- /dev/null
+++ b/llm-gateway/test/unit/stubs/cloudflare-workers.ts
@@ -0,0 +1,11 @@
+// Minimal stub for cloudflare:workers in unit tests.
+// Only provides the DurableObject base class needed by RateLimitDO.
+
+export class DurableObject {
+  protected ctx: unknown;
+  protected env: unknown;
+  constructor(ctx: unknown, env: unknown) {
+    this.ctx = ctx;
+    this.env = env;
+  }
+}
diff --git a/llm-gateway/test/unit/stubs/sentry-cloudflare.ts b/llm-gateway/test/unit/stubs/sentry-cloudflare.ts
new file mode 100644
index 000000000..1f831de09
--- /dev/null
+++ b/llm-gateway/test/unit/stubs/sentry-cloudflare.ts
@@ -0,0 +1,11 @@
+// Minimal stub for @sentry/cloudflare in unit tests.
+// Provides no-op implementations of the Sentry APIs used in src/.
+
+export function captureException(_err: unknown, _opts?: unknown): void {}
+
+export function withSentry(
+  _optsOrFn: unknown,
+  handler: { fetch: (...args: unknown[]) => unknown }
+): { fetch: (...args: unknown[]) => unknown } {
+  return handler;
+}
diff --git a/llm-gateway/test/unit/tool-calling.test.ts b/llm-gateway/test/unit/tool-calling.test.ts
new file mode 100644
index 000000000..72dc35584
--- /dev/null
+++ b/llm-gateway/test/unit/tool-calling.test.ts
@@ -0,0 +1,156 @@
+// Tests for tool-calling utilities: repairTools, dropToolStrictProperties,
+// normalizeToolCallIds, hasAttemptCompletionTool.
+
+import { describe, it, expect } from 'vitest';
+import {
+  repairTools,
+  dropToolStrictProperties,
+  normalizeToolCallIds,
+  hasAttemptCompletionTool,
+} from '../../src/lib/tool-calling';
+import type { OpenRouterChatCompletionRequest } from '../../src/types/request';
+
+function makeRequest(
+  messages: Array<Record<string, unknown>>,
+  tools?: Array<Record<string, unknown>>
+): OpenRouterChatCompletionRequest {
+  return { model: 'test', messages, tools } as unknown as OpenRouterChatCompletionRequest;
+}
+
+describe('repairTools', () => {
+  it('deduplicates tool calls with same id', () => {
+    const req = makeRequest([
+      { role: 'user', content: 'hi' },
+      {
+        role: 'assistant',
+        tool_calls: [
+          { id: 'tc-1', type: 'function', function: { name: 'foo' } },
+          { id: 'tc-1', type: 'function', function: { name: 'foo' } },
+          { id: 'tc-2', type: 'function', function: { name: 'bar' } },
+        ],
+      },
+      { role: 'tool', tool_call_id: 'tc-1', content: 'result1' },
+      { role: 'tool', tool_call_id: 'tc-2', content: 'result2' },
+    ]);
+    repairTools(req);
+    const assistant = req.messages.find(m => m.role === 'assistant') as Record<string, unknown>;
+    const toolCalls = assistant.tool_calls as Array<{ id: string }>;
+    expect(toolCalls).toHaveLength(2);
+    expect(toolCalls.map(tc => tc.id)).toEqual(['tc-1', 'tc-2']);
+  });
+
+  it('inserts missing tool results', () => {
+    const req = makeRequest([
+      { role: 'user', content: 'hi' },
+      {
+        role: 'assistant',
+        tool_calls: [
+          { id: 'tc-1', type: 'function', function: { name: 'foo' } },
+          { id: 'tc-2', type: 'function', function: { name: 'bar' } },
+        ],
+      },
+      // Only result for tc-1; tc-2 is missing
+      { role: 'tool', tool_call_id: 'tc-1', content: 'ok' },
+    ]);
+    repairTools(req);
+    const toolMessages = req.messages.filter(m => m.role === 'tool');
+    expect(toolMessages).toHaveLength(2);
+    const missing = toolMessages.find(
+      m => (m as Record<string, unknown>).tool_call_id === 'tc-2'
+    ) as Record<string, unknown>;
+    expect(missing).toBeDefined();
+    expect(missing.content).toContain('interrupted');
+  });
+
+  it('removes orphan tool results', () => {
+    const req = makeRequest([
+      { role: 'user', content: 'hi' },
+      {
+        role: 'assistant',
+        tool_calls: [{ id: 'tc-1', type: 'function', function: { name: 'foo' } }],
+      },
+      { role: 'tool', tool_call_id: 'tc-1', content: 'ok' },
+      // Orphan — no corresponding tool_call
+      { role: 'tool', tool_call_id: 'tc-999', content: 'orphan' },
+    ]);
+    repairTools(req);
+    const toolMessages = req.messages.filter(m => m.role === 'tool');
+    expect(toolMessages).toHaveLength(1);
+    expect((toolMessages[0] as Record<string, unknown>).tool_call_id).toBe('tc-1');
+  });
+
+  it('handles empty messages gracefully', () => {
+    const req = makeRequest([]);
+    repairTools(req);
+    expect(req.messages).toEqual([]);
+  });
+});
+
+describe('dropToolStrictProperties', () => {
+  it('removes strict from function tool definitions', () => {
+    const req = makeRequest(
+      [{ role: 'user', content: 'hi' }],
+      [
+        { type: 'function', function: { name: 'foo', strict: true, parameters: {} } },
+        { type: 'function', function: { name: 'bar', strict: false, parameters: {} } },
+      ]
+    );
+    dropToolStrictProperties(req);
+    const tools = req.tools as Array<{ function?: { strict?: unknown } }>;
+    expect(tools[0].function?.strict).toBeUndefined();
+    expect(tools[1].function?.strict).toBeUndefined();
+  });
+});
+
+describe('normalizeToolCallIds', () => {
+  it('hashes tool call IDs matching the filter', async () => {
+    const req = makeRequest([
+      { role: 'user', content: 'hi' },
+      {
+        role: 'assistant',
+        tool_calls: [
+          { id: 'long-id-that-needs-hashing', type: 'function', function: { name: 'foo' } },
+          { id: 'short', type: 'function', function: { name: 'bar' } },
+        ],
+      },
+      { role: 'tool', tool_call_id: 'long-id-that-needs-hashing', content: 'ok' },
+      { role: 'tool', tool_call_id: 'short', content: 'ok' },
+    ]);
+    // Only hash IDs longer than 10 characters
+    await normalizeToolCallIds(req, id => id.length > 10, 24);
+    const assistant = req.messages.find(m => m.role === 'assistant') as Record<string, unknown>;
+    const toolCalls = assistant.tool_calls as Array<{ id: string }>;
+    // The long one should be hashed (24 hex chars)
+    expect(toolCalls[0].id).toHaveLength(24);
+    expect(toolCalls[0].id).not.toBe('long-id-that-needs-hashing');
+    // The short one stays unchanged
+    expect(toolCalls[1].id).toBe('short');
+    // Tool result should also be updated
+    const toolMsgs = req.messages.filter(m => m.role === 'tool') as Array<Record<string, unknown>>;
+    expect(toolMsgs[0].tool_call_id).toBe(toolCalls[0].id);
+    expect(toolMsgs[1].tool_call_id).toBe('short');
+  });
+});
+
+describe('hasAttemptCompletionTool', () => {
+  it('returns true when attempt_completion tool is present', () => {
+    const req = makeRequest(
+      [{ role: 'user', content: 'hi' }],
+      [{ type: 'function', function: { name: 'attempt_completion' } }]
+    );
+    expect(hasAttemptCompletionTool(req)).toBe(true);
+  });
+
+  it('returns false when attempt_completion tool is absent', () => {
+    const req = makeRequest(
+      [{ role: 'user', content: 'hi' }],
+      [{ type: 'function', function: { name: 'other_tool' } }]
+    );
+    expect(hasAttemptCompletionTool(req)).toBe(false);
+  });
+
+  it('returns false when no tools at all', () => {
+    const req = makeRequest([{ role: 'user', content: 'hi' }]);
+    expect(hasAttemptCompletionTool(req)).toBe(false);
+  });
+});
diff --git a/llm-gateway/test/unit/vercel-routing.test.ts b/llm-gateway/test/unit/vercel-routing.test.ts
new file mode 100644
index 000000000..d5c7562fb
--- /dev/null
+++ b/llm-gateway/test/unit/vercel-routing.test.ts
@@ -0,0 +1,151 @@
+// Tests for Vercel AI Gateway A/B routing logic.
+
+import { describe, it, expect, vi, beforeAll, afterAll } from 'vitest';
+import type { OpenRouterChatCompletionRequest } from '../../src/types/request';
+
+// Stub scheduler.wait globally — it's a Workers runtime global not available in Node.
+const g = globalThis as Record<string, unknown>;
+const realScheduler = g.scheduler;
+beforeAll(() => {
+  g.scheduler = { wait: (ms: number) => new Promise(r => setTimeout(r, ms)) };
+});
+afterAll(() => {
+  g.scheduler = realScheduler;
+});
+
+// Mock the DB module to avoid real Postgres connections.
+vi.mock('@kilocode/db/client', () => ({
+  getWorkerDb: () => ({}),
+}));
+
+// We import after mocking so the module picks up the mock.
+const { shouldRouteToVercel, getGatewayErrorRate } = await import('../../src/lib/vercel-routing');
+
+function makeRequest(
+  overrides: Partial<OpenRouterChatCompletionRequest> = {}
+): OpenRouterChatCompletionRequest {
+  return { model: 'openai/gpt-5.2', messages: [{ role: 'user', content: 'hi' }], ...overrides };
+}
+
+// Fake WorkerDb that returns configurable error rates.
+function fakeDb(openrouter = 0, vercel = 0) {
+  return {
+    execute: async () => ({
+      rows: [
+        { gateway: 'openrouter', errorRate: openrouter },
+        { gateway: 'vercel', errorRate: vercel },
+      ],
+    }),
+  } as never;
+}
+
+describe('shouldRouteToVercel', () => {
+  it('returns false when data_collection=deny', async () => {
+    const req = makeRequest({ provider: { data_collection: 'deny' } });
+    const result = await shouldRouteToVercel(fakeDb(), 'openai/gpt-5.2', req, 'seed-1');
+    expect(result).toBe(false);
+  });
+
+  it('returns false for openrouter/* models', async () => {
+    const result = await shouldRouteToVercel(
+      fakeDb(),
+      'openrouter/free',
+      makeRequest({ model: 'openrouter/free' }),
+      'seed-1'
+    );
+    expect(result).toBe(false);
+  });
+
+  it('returns false for Anthropic models', async () => {
+    const result = await shouldRouteToVercel(
+      fakeDb(),
+      'anthropic/claude-sonnet-4.6',
+      makeRequest({ model: 'anthropic/claude-sonnet-4.6' }),
+      'seed-1'
+    );
+    expect(result).toBe(false);
+  });
+
+  it('returns false for models not in preferredModels', async () => {
+    const result = await shouldRouteToVercel(
+      fakeDb(),
+      'meta-llama/llama-3.3-70b-instruct',
+      makeRequest({ model: 'meta-llama/llama-3.3-70b-instruct' }),
+      'seed-1'
+    );
+    expect(result).toBe(false);
+  });
+
+  it('returns false for Kilo free models with non-openrouter gateway (e.g. corethink)', async () => {
+    const result = await shouldRouteToVercel(
+      fakeDb(),
+      'corethink:free',
+      makeRequest({ model: 'corethink:free' }),
+      'seed-1'
+    );
+    expect(result).toBe(false);
+  });
+
+  it('returns false for Kilo free models with non-openrouter gateway (e.g. giga-potato)', async () => {
+    const result = await shouldRouteToVercel(
+      fakeDb(),
+      'giga-potato',
+      makeRequest({ model: 'giga-potato' }),
+      'seed-1'
+    );
+    expect(result).toBe(false);
+  });
+
+  it('routes preferred model deterministically based on seed', async () => {
+    const db = fakeDb();
+    const r1 = await shouldRouteToVercel(db, 'openai/gpt-5.2', makeRequest(), 'stable-seed');
+    const r2 = await shouldRouteToVercel(db, 'openai/gpt-5.2', makeRequest(), 'stable-seed');
+    expect(r1).toBe(r2);
+  });
+
+  it('can route to Vercel for eligible preferred models', async () => {
+    // Try many seeds; at 10% routing at least one should hit Vercel
+    const db = fakeDb();
+    const results = await Promise.all(
+      Array.from({ length: 100 }, (_, i) =>
+        shouldRouteToVercel(db, 'openai/gpt-5.2', makeRequest(), `seed-${i}`)
+      )
+    );
+    const trueCount = results.filter(Boolean).length;
+    // With 10% routing, we expect ~10 out of 100, but at least 1 and at most 30
+    expect(trueCount).toBeGreaterThan(0);
+    expect(trueCount).toBeLessThan(30);
+  });
+
+  it('routes ~90% to Vercel when OpenRouter error rate is high', async () => {
+    // OpenRouter error rate > 50%, Vercel < 50% → 90% to Vercel
+    const db = fakeDb(0.7, 0.1);
+    const results = await Promise.all(
+      Array.from({ length: 100 }, (_, i) =>
+        shouldRouteToVercel(db, 'openai/gpt-5.2', makeRequest(), `failover-seed-${i}`)
+      )
+    );
+    const trueCount = results.filter(Boolean).length;
+    // With 90% routing, we expect ~90 out of 100
+    expect(trueCount).toBeGreaterThan(70);
+  });
+});
+
+describe('getGatewayErrorRate', () => {
+  it('returns error rates from DB', async () => {
+    const db = fakeDb(0.05, 0.02);
+    const result = await getGatewayErrorRate(db);
+    expect(result.openrouter).toBe(0.05);
+    expect(result.vercel).toBe(0.02);
+  });
+
+  it('returns 0/0 on DB error', async () => {
+    const db = {
+      execute: async () => {
+        throw new Error('connection failed');
+      },
+    } as never;
+    const result = await getGatewayErrorRate(db);
+    expect(result).toEqual({ openrouter: 0, vercel: 0 });
+  });
+});
diff --git a/llm-gateway/tsconfig.json b/llm-gateway/tsconfig.json
new file mode 100644
index 000000000..2a1edb25a
--- /dev/null
+++ b/llm-gateway/tsconfig.json
@@ -0,0 +1,24 @@
+{
+  "compilerOptions": {
+    "target": "esnext",
+    "lib": ["esnext"],
+    "module": "esnext",
+    "moduleResolution": "bundler",
+    "types": ["@types/node", "./worker-configuration.d.ts"],
+    "esModuleInterop": true,
+    "forceConsistentCasingInFileNames": true,
+    "strict": true,
+    "skipLibCheck": true,
+    "noEmit": true,
+    "experimentalDecorators": true,
+    "resolveJsonModule": true,
+    "allowJs": true
+  },
+  "include": [
+    "worker-configuration.d.ts",
+    "src/**/*.ts",
+    "test/**/*.ts",
+    "vitest.config.ts",
+    "vitest.workers.config.ts"
+  ]
+}
diff --git a/llm-gateway/vitest.config.ts b/llm-gateway/vitest.config.ts
new file mode 100644
index 000000000..8175c1006
--- /dev/null
+++ b/llm-gateway/vitest.config.ts
@@ -0,0 +1,29 @@
+import { defineConfig } from 'vitest/config';
+import path from 'node:path';
+
+// Unit tests - run in Node (fast, supports vi.mock and global mocking)
+export default defineConfig({
+  resolve: {
+    alias: {
+      // cloudflare:workers is only available in the Workers runtime.
+      // Provide a minimal stub so unit tests can import modules that
+      // transitively depend on DurableObject (e.g. RateLimitDO).
+      'cloudflare:workers': path.resolve(__dirname, 'test/unit/stubs/cloudflare-workers.ts'),
+      // @sentry/cloudflare is only available in the Workers runtime.
+      // Provide a no-op stub so unit tests can import modules that
+      // transitively depend on Sentry (e.g. sentry.ts, index.ts).
+      '@sentry/cloudflare': path.resolve(__dirname, 'test/unit/stubs/sentry-cloudflare.ts'),
+    },
+  },
+  test: {
+    name: 'unit',
+    globals: true,
+    environment: 'node',
+    include: ['src/**/*.test.ts', 'test/unit/**/*.test.ts', 'test/integration/**/*.test.ts'],
+    coverage: {
+      provider: 'v8',
+      reporter: ['text', 'json', 'html'],
+      exclude: ['node_modules/', 'dist/', '**/*.test.ts'],
+    },
+  },
+});
diff --git a/llm-gateway/vitest.workers.config.ts b/llm-gateway/vitest.workers.config.ts
new file mode 100644
index 000000000..5b0dbd0e1
--- /dev/null
+++ b/llm-gateway/vitest.workers.config.ts
@@ -0,0 +1,18 @@
+import { defineWorkersProject } from '@cloudflare/vitest-pool-workers/config';
+
+// Integration tests - run in Cloudflare Workers runtime via Miniflare
+export default defineWorkersProject({
+  test: {
+    name: 'integration',
+    globals: true,
+    include: ['test/integration/**/*.test.ts'],
+    poolOptions: {
+      workers: {
+        singleWorker: true,
+        wrangler: {
+          configPath: './wrangler.jsonc',
+        },
+      },
+    },
+  },
+});
diff --git a/llm-gateway/worker-configuration.d.ts b/llm-gateway/worker-configuration.d.ts
new file mode 100644
index 000000000..4a17e008d
--- /dev/null
+++ b/llm-gateway/worker-configuration.d.ts
@@ -0,0 +1,11231 @@
+/* eslint-disable */
+// Generated by Wrangler by running `wrangler types` (hash: 24227e11db859c7abdab73d38606f08e)
+// Runtime types generated with workerd@1.20260302.0 2026-02-01 nodejs_compat
+declare namespace Cloudflare {
+	interface GlobalProps {
+		mainModule: typeof import("./src/index");
+		durableNamespaces: "RateLimitDO";
+	}
+	interface Env {
+		HYPERDRIVE: Hyperdrive;
+		NEXTAUTH_SECRET_PROD: SecretsStoreSecret;
+		OPENROUTER_API_KEY: SecretsStoreSecret;
+		GIGAPOTATO_API_KEY: SecretsStoreSecret;
+		CORETHINK_API_KEY: SecretsStoreSecret;
+		MARTIAN_API_KEY: SecretsStoreSecret;
+		MISTRAL_API_KEY: SecretsStoreSecret;
+		VERCEL_AI_GATEWAY_API_KEY: SecretsStoreSecret;
+		BYOK_ENCRYPTION_KEY: SecretsStoreSecret;
+		ABUSE_CF_ACCESS_CLIENT_ID: SecretsStoreSecret;
+		ABUSE_CF_ACCESS_CLIENT_SECRET: SecretsStoreSecret;
+		GIGAPOTATO_API_URL: SecretsStoreSecret;
+		ABUSE_SERVICE_URL: SecretsStoreSecret;
+		POSTHOG_API_KEY: SecretsStoreSecret;
+		RATE_LIMIT_DO: DurableObjectNamespace<import("./src/index").RateLimitDO>;
+		O11Y: Fetcher /* o11y */;
+	}
+}
+interface Env extends Cloudflare.Env {}
+
+// Begin runtime types
+/*! *****************************************************************************
+Copyright (c) Cloudflare. All rights reserved.
+Copyright (c) Microsoft Corporation. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use
+this file except in compliance with the License. You may obtain a copy of the
+License at http://www.apache.org/licenses/LICENSE-2.0
+THIS CODE IS PROVIDED ON AN *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+MERCHANTABLITY OR NON-INFRINGEMENT.
+See the Apache Version 2.0 License for specific language governing permissions
+and limitations under the License.
+***************************************************************************** */
+/* eslint-disable */
+// noinspection JSUnusedGlobalSymbols
+declare var onmessage: never;
+/**
+ * The **`DOMException`** interface represents an abnormal event (called an **exception**) that occurs as a result of calling a method or accessing a property of a web API.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/DOMException)
+ */
+declare class DOMException extends Error {
+    constructor(message?: string, name?: string);
+    /**
+     * The **`message`** read-only property of the a message or description associated with the given error name.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/DOMException/message)
+     */
+    readonly message: string;
+    /**
+     * The **`name`** read-only property of the one of the strings associated with an error name.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/DOMException/name)
+     */
+    readonly name: string;
+    /**
+     * The **`code`** read-only property of the DOMException interface returns one of the legacy error code constants, or `0` if none match.
+     * @deprecated
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/DOMException/code)
+     */
+    readonly code: number;
+    static readonly INDEX_SIZE_ERR: number;
+    static readonly DOMSTRING_SIZE_ERR: number;
+    static readonly HIERARCHY_REQUEST_ERR: number;
+    static readonly WRONG_DOCUMENT_ERR: number;
+    static readonly INVALID_CHARACTER_ERR: number;
+    static readonly NO_DATA_ALLOWED_ERR: number;
+    static readonly NO_MODIFICATION_ALLOWED_ERR: number;
+    static readonly NOT_FOUND_ERR: number;
+    static readonly NOT_SUPPORTED_ERR: number;
+    static readonly INUSE_ATTRIBUTE_ERR: number;
+    static readonly INVALID_STATE_ERR: number;
+    static readonly SYNTAX_ERR: number;
+    static readonly INVALID_MODIFICATION_ERR: number;
+    static readonly NAMESPACE_ERR: number;
+    static readonly INVALID_ACCESS_ERR: number;
+    static readonly VALIDATION_ERR: number;
+    static readonly TYPE_MISMATCH_ERR: number;
+    static readonly SECURITY_ERR: number;
+    static readonly NETWORK_ERR: number;
+    static readonly ABORT_ERR: number;
+    static readonly URL_MISMATCH_ERR: number;
+    static readonly QUOTA_EXCEEDED_ERR: number;
+    static readonly TIMEOUT_ERR: number;
+    static readonly INVALID_NODE_TYPE_ERR: number;
+    static readonly DATA_CLONE_ERR: number;
+    get stack(): any;
+    set stack(value: any);
+}
+type WorkerGlobalScopeEventMap = {
+    fetch: FetchEvent;
+    scheduled: ScheduledEvent;
+    queue: QueueEvent;
+    unhandledrejection: PromiseRejectionEvent;
+    rejectionhandled: PromiseRejectionEvent;
+};
+declare abstract class WorkerGlobalScope extends EventTarget<WorkerGlobalScopeEventMap> {
+    EventTarget: typeof EventTarget;
+}
+/* The **`console`** object provides access to the debugging console (e.g., the Web console in Firefox). *
+ * The **`console`** object provides access to the debugging console (e.g., the Web console in Firefox).
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/console)
+ */
+interface Console {
+    "assert"(condition?: boolean, ...data: any[]): void;
+    /**
+     * The **`console.clear()`** static method clears the console if possible.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/console/clear_static)
+     */
+    clear(): void;
+    /**
+     * The **`console.count()`** static method logs the number of times that this particular call to `count()` has been called.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/console/count_static)
+     */
+    count(label?: string): void;
+    /**
+     * The **`console.countReset()`** static method resets counter used with console/count_static.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/console/countReset_static)
+     */
+    countReset(label?: string): void;
+    /**
+     * The **`console.debug()`** static method outputs a message to the console at the 'debug' log level.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/console/debug_static)
+     */
+    debug(...data: any[]): void;
+    /**
+     * The **`console.dir()`** static method displays a list of the properties of the specified JavaScript object.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/console/dir_static)
+     */
+    dir(item?: any, options?: any): void;
+    /**
+     * The **`console.dirxml()`** static method displays an interactive tree of the descendant elements of the specified XML/HTML element.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/console/dirxml_static)
+     */
+    dirxml(...data: any[]): void;
+    /**
+     * The **`console.error()`** static method outputs a message to the console at the 'error' log level.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/console/error_static)
+     */
+    error(...data: any[]): void;
+    /**
+     * The **`console.group()`** static method creates a new inline group in the Web console log, causing any subsequent console messages to be indented by an additional level, until console/groupEnd_static is called.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/console/group_static)
+     */
+    group(...data: any[]): void;
+    /**
+     * The **`console.groupCollapsed()`** static method creates a new inline group in the console.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/console/groupCollapsed_static)
+     */
+    groupCollapsed(...data: any[]): void;
+    /**
+     * The **`console.groupEnd()`** static method exits the current inline group in the console.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/console/groupEnd_static)
+     */
+    groupEnd(): void;
+    /**
+     * The **`console.info()`** static method outputs a message to the console at the 'info' log level.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/console/info_static)
+     */
+    info(...data: any[]): void;
+    /**
+     * The **`console.log()`** static method outputs a message to the console.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/console/log_static)
+     */
+    log(...data: any[]): void;
+    /**
+     * The **`console.table()`** static method displays tabular data as a table.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/console/table_static)
+     */
+    table(tabularData?: any, properties?: string[]): void;
+    /**
+     * The **`console.time()`** static method starts a timer you can use to track how long an operation takes.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/console/time_static)
+     */
+    time(label?: string): void;
+    /**
+     * The **`console.timeEnd()`** static method stops a timer that was previously started by calling console/time_static.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/console/timeEnd_static)
+     */
+    timeEnd(label?: string): void;
+    /**
+     * The **`console.timeLog()`** static method logs the current value of a timer that was previously started by calling console/time_static.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/console/timeLog_static)
+     */
+    timeLog(label?: string, ...data: any[]): void;
+    timeStamp(label?: string): void;
+    /**
+     * The **`console.trace()`** static method outputs a stack trace to the console.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/console/trace_static)
+     */
+    trace(...data: any[]): void;
+    /**
+     * The **`console.warn()`** static method outputs a warning message to the console at the 'warning' log level.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/console/warn_static)
+     */
+    warn(...data: any[]): void;
+}
+declare const console: Console;
+type BufferSource = ArrayBufferView | ArrayBuffer;
+type TypedArray = Int8Array | Uint8Array | Uint8ClampedArray | Int16Array | Uint16Array | Int32Array | Uint32Array | Float32Array | Float64Array | BigInt64Array | BigUint64Array;
+declare namespace WebAssembly {
+    class CompileError extends Error {
+        constructor(message?: string);
+    }
+    class RuntimeError extends Error {
+        constructor(message?: string);
+    }
+    type ValueType = "anyfunc" | "externref" | "f32" | "f64" | "i32" | "i64" | "v128";
+    interface GlobalDescriptor {
+        value: ValueType;
+        mutable?: boolean;
+    }
+    class Global {
+        constructor(descriptor: GlobalDescriptor, value?: any);
+        value: any;
+        valueOf(): any;
+    }
+    type ImportValue = ExportValue | number;
+    type ModuleImports = Record<string, ImportValue>;
+    type Imports = Record<string, ModuleImports>;
+    type ExportValue = Function | Global | Memory | Table;
+    type Exports = Record<string, ExportValue>;
+    class Instance {
+        constructor(module: Module, imports?: Imports);
+        readonly exports: Exports;
+    }
+    interface MemoryDescriptor {
+        initial: number;
+        maximum?: number;
+        shared?: boolean;
+    }
+    class Memory {
+        constructor(descriptor: MemoryDescriptor);
+        readonly buffer: ArrayBuffer;
+        grow(delta: number): number;
+    }
+    type ImportExportKind = "function" | "global" | "memory" | "table";
+    interface ModuleExportDescriptor {
+        kind: ImportExportKind;
+        name: string;
+    }
+    interface ModuleImportDescriptor {
+        kind: ImportExportKind;
+        module: string;
+        name: string;
+    }
+    abstract class Module {
+        static customSections(module: Module, sectionName: string): ArrayBuffer[];
+        static exports(module: Module): ModuleExportDescriptor[];
+        static imports(module: Module): ModuleImportDescriptor[];
+    }
+    type TableKind = "anyfunc" | "externref";
+    interface TableDescriptor {
+        element: TableKind;
+        initial: number;
+        maximum?: number;
+    }
+    class Table {
+        constructor(descriptor: TableDescriptor, value?: any);
+        readonly length: number;
+        get(index: number): any;
+        grow(delta: number, value?: any): number;
+        set(index: number, value?: any): void;
+    }
+    function instantiate(module: Module, imports?: Imports): Promise<Instance>;
+    function validate(bytes: BufferSource): boolean;
+}
+/**
+ * The **`ServiceWorkerGlobalScope`** interface of the Service Worker API represents the global execution context of a service worker.
+ * Available only in secure contexts.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ServiceWorkerGlobalScope)
+ */
+interface ServiceWorkerGlobalScope extends WorkerGlobalScope {
+    DOMException: typeof DOMException;
+    WorkerGlobalScope: typeof WorkerGlobalScope;
+    btoa(data: string): string;
+    atob(data: string): string;
+    setTimeout(callback: (...args: any[]) => void, msDelay?: number): number;
+    setTimeout<Args extends any[]>(callback: (...args: Args) => void, msDelay?: number, ...args: Args): number;
+    clearTimeout(timeoutId: number | null): void;
+    setInterval(callback: (...args: any[]) => void, msDelay?: number): number;
+    setInterval<Args extends any[]>(callback: (...args: Args) => void, msDelay?: number, ...args: Args): number;
+    clearInterval(timeoutId: number | null): void;
+    queueMicrotask(task: Function): void;
+    structuredClone<T>(value: T, options?: StructuredSerializeOptions): T;
+    reportError(error: any): void;
+    fetch(input: RequestInfo | URL, init?: RequestInit<RequestInitCfProperties>): Promise<Response>;
+    self: ServiceWorkerGlobalScope;
+    crypto: Crypto;
+    caches: CacheStorage;
+    scheduler: Scheduler;
+    performance: Performance;
+    Cloudflare: Cloudflare;
+    readonly origin: string;
+    Event: typeof Event;
+    ExtendableEvent: typeof ExtendableEvent;
+    CustomEvent: typeof CustomEvent;
+    PromiseRejectionEvent: typeof PromiseRejectionEvent;
+    FetchEvent: typeof FetchEvent;
+    TailEvent: typeof TailEvent;
+    TraceEvent: typeof TailEvent;
+    ScheduledEvent: typeof ScheduledEvent;
+    MessageEvent: typeof MessageEvent;
+    CloseEvent: typeof CloseEvent;
+    ReadableStreamDefaultReader: typeof ReadableStreamDefaultReader;
+    ReadableStreamBYOBReader: typeof ReadableStreamBYOBReader;
+    ReadableStream: typeof ReadableStream;
+    WritableStream: typeof WritableStream;
+    WritableStreamDefaultWriter: typeof WritableStreamDefaultWriter;
+    TransformStream: typeof TransformStream;
+    ByteLengthQueuingStrategy: typeof ByteLengthQueuingStrategy;
+    CountQueuingStrategy: typeof CountQueuingStrategy;
+    ErrorEvent: typeof ErrorEvent;
+    MessageChannel: typeof MessageChannel;
+    MessagePort: typeof MessagePort;
+    EventSource: typeof EventSource;
+    ReadableStreamBYOBRequest: typeof ReadableStreamBYOBRequest;
+    ReadableStreamDefaultController: typeof ReadableStreamDefaultController;
+    ReadableByteStreamController: typeof ReadableByteStreamController;
+    WritableStreamDefaultController: typeof WritableStreamDefaultController;
+    TransformStreamDefaultController: typeof TransformStreamDefaultController;
+    CompressionStream: typeof CompressionStream;
+    DecompressionStream: typeof DecompressionStream;
+    TextEncoderStream: typeof TextEncoderStream;
+    TextDecoderStream: typeof TextDecoderStream;
+    Headers: typeof Headers;
+    Body: typeof Body;
+    Request: typeof Request;
+    Response: typeof Response;
+    WebSocket: typeof WebSocket;
+    WebSocketPair: typeof WebSocketPair;
+    WebSocketRequestResponsePair: typeof WebSocketRequestResponsePair;
+    AbortController: typeof AbortController;
+    AbortSignal: typeof AbortSignal;
+    TextDecoder: typeof TextDecoder;
+    TextEncoder: typeof TextEncoder;
+    navigator: Navigator;
+    Navigator: typeof Navigator;
+    URL: typeof URL;
+    URLSearchParams: typeof URLSearchParams;
+    URLPattern: typeof URLPattern;
+    Blob: typeof Blob;
+    File: typeof File;
+    FormData: typeof FormData;
+    Crypto: typeof Crypto;
+    SubtleCrypto: typeof SubtleCrypto;
+    CryptoKey: typeof CryptoKey;
+    CacheStorage: typeof CacheStorage;
+    Cache: typeof Cache;
+    FixedLengthStream: typeof FixedLengthStream;
+    IdentityTransformStream: typeof IdentityTransformStream;
+    HTMLRewriter: typeof HTMLRewriter;
+}
+declare function addEventListener<Type extends keyof WorkerGlobalScopeEventMap>(type: Type, handler: EventListenerOrEventListenerObject<WorkerGlobalScopeEventMap[Type]>, options?: EventTargetAddEventListenerOptions | boolean): void;
+declare function removeEventListener<Type extends keyof WorkerGlobalScopeEventMap>(type: Type, handler: EventListenerOrEventListenerObject<WorkerGlobalScopeEventMap[Type]>, options?: EventTargetEventListenerOptions | boolean): void;
+/**
+ * The **`dispatchEvent()`** method of the EventTarget sends an Event to the object, (synchronously) invoking the affected event listeners in the appropriate order.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/EventTarget/dispatchEvent)
+ */
+declare function dispatchEvent(event: WorkerGlobalScopeEventMap[keyof WorkerGlobalScopeEventMap]): boolean;
+/* [MDN Reference](https://developer.mozilla.org/docs/Web/API/Window/btoa) */
+declare function btoa(data: string): string;
+/* [MDN Reference](https://developer.mozilla.org/docs/Web/API/Window/atob) */
+declare function atob(data: string): string;
+/* [MDN Reference](https://developer.mozilla.org/docs/Web/API/Window/setTimeout) */
+declare function setTimeout(callback: (...args: any[]) => void, msDelay?: number): number;
+/* [MDN Reference](https://developer.mozilla.org/docs/Web/API/Window/setTimeout) */
+declare function setTimeout<Args extends any[]>(callback: (...args: Args) => void, msDelay?: number, ...args: Args): number;
+/* [MDN Reference](https://developer.mozilla.org/docs/Web/API/Window/clearTimeout) */
+declare function clearTimeout(timeoutId: number | null): void;
+/* [MDN Reference](https://developer.mozilla.org/docs/Web/API/Window/setInterval) */
+declare function setInterval(callback: (...args: any[]) => void, msDelay?: number): number;
+/* [MDN Reference](https://developer.mozilla.org/docs/Web/API/Window/setInterval) */
+declare function setInterval<Args extends any[]>(callback: (...args: Args) => void, msDelay?: number, ...args: Args): number;
+/* [MDN Reference](https://developer.mozilla.org/docs/Web/API/Window/clearInterval) */
+declare function clearInterval(timeoutId: number | null): void;
+/* [MDN Reference](https://developer.mozilla.org/docs/Web/API/Window/queueMicrotask) */
+declare function queueMicrotask(task: Function): void;
+/* [MDN Reference](https://developer.mozilla.org/docs/Web/API/Window/structuredClone) */
+declare function structuredClone<T>(value: T, options?: StructuredSerializeOptions): T;
+/* [MDN Reference](https://developer.mozilla.org/docs/Web/API/Window/reportError) */
+declare function reportError(error: any): void;
+/* [MDN Reference](https://developer.mozilla.org/docs/Web/API/Window/fetch) */
+declare function fetch(input: RequestInfo | URL, init?: RequestInit<RequestInitCfProperties>): Promise<Response>;
+declare const self: ServiceWorkerGlobalScope;
+/**
+* The Web Crypto API provides a set of low-level functions for common cryptographic tasks.
+* The Workers runtime implements the full surface of this API, but with some differences in
+* the [supported algorithms](https://developers.cloudflare.com/workers/runtime-apis/web-crypto/#supported-algorithms)
+* compared to those implemented in most browsers.
+*
+* [Cloudflare Docs Reference](https://developers.cloudflare.com/workers/runtime-apis/web-crypto/)
+*/
+declare const crypto: Crypto;
+/**
+* The Cache API allows fine grained control of reading and writing from the Cloudflare global network cache.
+*
+* [Cloudflare Docs Reference](https://developers.cloudflare.com/workers/runtime-apis/cache/)
+*/
+declare const caches: CacheStorage;
+declare const scheduler: Scheduler;
+/**
+* The Workers runtime supports a subset of the Performance API, used to measure timing and performance,
+* as well as timing of subrequests and other operations.
+*
+* [Cloudflare Docs Reference](https://developers.cloudflare.com/workers/runtime-apis/performance/)
+*/
+declare const performance: Performance;
+declare const Cloudflare: Cloudflare;
+declare const origin: string;
+declare const navigator: Navigator;
+interface TestController {
+}
+interface ExecutionContext<Props = unknown> {
+    waitUntil(promise: Promise<any>): void;
+    passThroughOnException(): void;
+    readonly exports: Cloudflare.Exports;
+    readonly props: Props;
+}
+type ExportedHandlerFetchHandler<Env = unknown, CfHostMetadata = unknown> = (request: Request<CfHostMetadata, IncomingRequestCfProperties<CfHostMetadata>>, env: Env, ctx: ExecutionContext) => Response | Promise<Response>;
+type ExportedHandlerTailHandler<Env = unknown> = (events: TraceItem[], env: Env, ctx: ExecutionContext) => void | Promise<void>;
+type ExportedHandlerTraceHandler<Env = unknown> = (traces: TraceItem[], env: Env, ctx: ExecutionContext) => void | Promise<void>;
+type ExportedHandlerTailStreamHandler<Env = unknown> = (event: TailStream.TailEvent<TailStream.Onset>, env: Env, ctx: ExecutionContext) => TailStream.TailEventHandlerType | Promise<TailStream.TailEventHandlerType>;
+type ExportedHandlerScheduledHandler<Env = unknown> = (controller: ScheduledController, env: Env, ctx: ExecutionContext) => void | Promise<void>;
+type ExportedHandlerQueueHandler<Env = unknown, Message = unknown> = (batch: MessageBatch<Message>, env: Env, ctx: ExecutionContext) => void | Promise<void>;
+type ExportedHandlerTestHandler<Env = unknown> = (controller: TestController, env: Env, ctx: ExecutionContext) => void | Promise<void>;
+interface ExportedHandler<Env = unknown, QueueHandlerMessage = unknown, CfHostMetadata = unknown> {
+    fetch?: ExportedHandlerFetchHandler<Env, CfHostMetadata>;
+    tail?: ExportedHandlerTailHandler<Env>;
+    trace?: ExportedHandlerTraceHandler<Env>;
+    tailStream?: ExportedHandlerTailStreamHandler<Env>;
+    scheduled?: ExportedHandlerScheduledHandler<Env>;
+    test?: ExportedHandlerTestHandler<Env>;
+    email?: EmailExportedHandler<Env>;
+    queue?: ExportedHandlerQueueHandler<Env, QueueHandlerMessage>;
+}
+interface StructuredSerializeOptions {
+    transfer?: any[];
+}
+declare abstract class Navigator {
+    sendBeacon(url: string, body?: BodyInit): boolean;
+    readonly userAgent: string;
+    readonly hardwareConcurrency: number;
+    readonly language: string;
+    readonly languages: string[];
+}
+interface AlarmInvocationInfo {
+    readonly isRetry: boolean;
+    readonly retryCount: number;
+}
+interface Cloudflare {
+    readonly compatibilityFlags: Record<string, boolean>;
+}
+interface DurableObject {
+    fetch(request: Request): Response | Promise<Response>;
+    alarm?(alarmInfo?: AlarmInvocationInfo): void | Promise<void>;
+    webSocketMessage?(ws: WebSocket, message: string | ArrayBuffer): void | Promise<void>;
+    webSocketClose?(ws: WebSocket, code: number, reason: string, wasClean: boolean): void | Promise<void>;
+    webSocketError?(ws: WebSocket, error: unknown): void | Promise<void>;
+}
+type DurableObjectStub<T extends Rpc.DurableObjectBranded | undefined = undefined> = Fetcher<T, "alarm" | "webSocketMessage" | "webSocketClose" | "webSocketError"> & {
+    readonly id: DurableObjectId;
+    readonly name?: string;
+};
+interface DurableObjectId {
+    toString(): string;
+    equals(other: DurableObjectId): boolean;
+    readonly name?: string;
+}
+declare abstract class DurableObjectNamespace<T extends Rpc.DurableObjectBranded | undefined = undefined> {
+    newUniqueId(options?: DurableObjectNamespaceNewUniqueIdOptions): DurableObjectId;
+    idFromName(name: string): DurableObjectId;
+    idFromString(id: string): DurableObjectId;
+    get(id: DurableObjectId, options?: DurableObjectNamespaceGetDurableObjectOptions): DurableObjectStub<T>;
+    getByName(name: string, options?: DurableObjectNamespaceGetDurableObjectOptions): DurableObjectStub<T>;
+    jurisdiction(jurisdiction: DurableObjectJurisdiction): DurableObjectNamespace<T>;
+}
+type DurableObjectJurisdiction = "eu" | "fedramp" | "fedramp-high";
+interface DurableObjectNamespaceNewUniqueIdOptions {
+    jurisdiction?: DurableObjectJurisdiction;
+}
+type DurableObjectLocationHint = "wnam" | "enam" | "sam" | "weur" | "eeur" | "apac" | "oc" | "afr" | "me";
+type DurableObjectRoutingMode = "primary-only";
+interface DurableObjectNamespaceGetDurableObjectOptions {
+    locationHint?: DurableObjectLocationHint;
+    routingMode?: DurableObjectRoutingMode;
+}
+interface DurableObjectClass<_T extends Rpc.DurableObjectBranded | undefined = undefined> {
+}
+interface DurableObjectState<Props = unknown> {
+    waitUntil(promise: Promise<any>): void;
+    readonly exports: Cloudflare.Exports;
+    readonly props: Props;
+    readonly id: DurableObjectId;
+    readonly storage: DurableObjectStorage;
+    container?: Container;
+    blockConcurrencyWhile<T>(callback: () => Promise<T>): Promise<T>;
+    acceptWebSocket(ws: WebSocket, tags?: string[]): void;
+    getWebSockets(tag?: string): WebSocket[];
+    setWebSocketAutoResponse(maybeReqResp?: WebSocketRequestResponsePair): void;
+    getWebSocketAutoResponse(): WebSocketRequestResponsePair | null;
+    getWebSocketAutoResponseTimestamp(ws: WebSocket): Date | null;
+    setHibernatableWebSocketEventTimeout(timeoutMs?: number): void;
+    getHibernatableWebSocketEventTimeout(): number | null;
+    getTags(ws: WebSocket): string[];
+    abort(reason?: string): void;
+}
+interface DurableObjectTransaction {
+    get<T = unknown>(key: string, options?: DurableObjectGetOptions): Promise<T | undefined>;
+    get<T = unknown>(keys: string[], options?: DurableObjectGetOptions): Promise<Map<string, T>>;
+    list<T = unknown>(options?: DurableObjectListOptions): Promise<Map<string, T>>;
+    put<T>(key: string, value: T, options?: DurableObjectPutOptions): Promise<void>;
+    put<T>(entries: Record<string, T>, options?: DurableObjectPutOptions): Promise<void>;
+    delete(key: string, options?: DurableObjectPutOptions): Promise<boolean>;
+    delete(keys: string[], options?: DurableObjectPutOptions): Promise<number>;
+    rollback(): void;
+    getAlarm(options?: DurableObjectGetAlarmOptions): Promise<number | null>;
+    setAlarm(scheduledTime: number | Date, options?: DurableObjectSetAlarmOptions): Promise<void>;
+    deleteAlarm(options?: DurableObjectSetAlarmOptions): Promise<void>;
+}
+interface DurableObjectStorage {
+    get<T = unknown>(key: string, options?: DurableObjectGetOptions): Promise<T | undefined>;
+    get<T = unknown>(keys: string[], options?: DurableObjectGetOptions): Promise<Map<string, T>>;
+    list<T = unknown>(options?: DurableObjectListOptions): Promise<Map<string, T>>;
+    put<T>(key: string, value: T, options?: DurableObjectPutOptions): Promise<void>;
+    put<T>(entries: Record<string, T>, options?: DurableObjectPutOptions): Promise<void>;
+    delete(key: string, options?: DurableObjectPutOptions): Promise<boolean>;
+    delete(keys: string[], options?: DurableObjectPutOptions): Promise<number>;
+    deleteAll(options?: DurableObjectPutOptions): Promise<void>;
+    transaction<T>(closure: (txn: DurableObjectTransaction) => Promise<T>): Promise<T>;
+    getAlarm(options?: DurableObjectGetAlarmOptions): Promise<number | null>;
+    setAlarm(scheduledTime: number | Date, options?: DurableObjectSetAlarmOptions): Promise<void>;
+    deleteAlarm(options?: DurableObjectSetAlarmOptions): Promise<void>;
+    sync(): Promise<void>;
+    sql: SqlStorage;
+    kv: SyncKvStorage;
+    transactionSync<T>(closure: () => T): T;
+    getCurrentBookmark(): Promise<string>;
+    getBookmarkForTime(timestamp: number | Date): Promise<string>;
+    onNextSessionRestoreBookmark(bookmark: string): Promise<string>;
+}
+interface DurableObjectListOptions {
+    start?: string;
+    startAfter?: string;
+    end?: string;
+    prefix?: string;
+    reverse?: boolean;
+    limit?: number;
+    allowConcurrency?: boolean;
+    noCache?: boolean;
+}
+interface DurableObjectGetOptions {
+    allowConcurrency?: boolean;
+    noCache?: boolean;
+}
+interface DurableObjectGetAlarmOptions {
+    allowConcurrency?: boolean;
+}
+interface DurableObjectPutOptions {
+    allowConcurrency?: boolean;
+    allowUnconfirmed?: boolean;
+    noCache?: boolean;
+}
+interface DurableObjectSetAlarmOptions {
+    allowConcurrency?: boolean;
+    allowUnconfirmed?: boolean;
+}
+declare class WebSocketRequestResponsePair {
+    constructor(request: string, response: string);
+    get request(): string;
+    get response(): string;
+}
+interface AnalyticsEngineDataset {
+    writeDataPoint(event?: AnalyticsEngineDataPoint): void;
+}
+interface AnalyticsEngineDataPoint {
+    indexes?: ((ArrayBuffer | string) | null)[];
+    doubles?: number[];
+    blobs?: ((ArrayBuffer | string) | null)[];
+}
+/**
+ * The **`Event`** interface represents an event which takes place on an `EventTarget`.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Event)
+ */
+declare class Event {
+    constructor(type: string, init?: EventInit);
+    /**
+     * The **`type`** read-only property of the Event interface returns a string containing the event's type.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Event/type)
+     */
+    get type(): string;
+    /**
+     * The **`eventPhase`** read-only property of the being evaluated.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Event/eventPhase)
+     */
+    get eventPhase(): number;
+    /**
+     * The read-only **`composed`** property of the or not the event will propagate across the shadow DOM boundary into the standard DOM.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Event/composed)
+     */
+    get composed(): boolean;
+    /**
+     * The **`bubbles`** read-only property of the Event interface indicates whether the event bubbles up through the DOM tree or not.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Event/bubbles)
+     */
+    get bubbles(): boolean;
+    /**
+     * The **`cancelable`** read-only property of the Event interface indicates whether the event can be canceled, and therefore prevented as if the event never happened.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Event/cancelable)
+     */
+    get cancelable(): boolean;
+    /**
+     * The **`defaultPrevented`** read-only property of the Event interface returns a boolean value indicating whether or not the call to Event.preventDefault() canceled the event.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Event/defaultPrevented)
+     */
+    get defaultPrevented(): boolean;
+    /**
+     * The Event property **`returnValue`** indicates whether the default action for this event has been prevented or not.
+     * @deprecated
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Event/returnValue)
+     */
+    get returnValue(): boolean;
+    /**
+     * The **`currentTarget`** read-only property of the Event interface identifies the element to which the event handler has been attached.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Event/currentTarget)
+     */
+    get currentTarget(): EventTarget | undefined;
+    /**
+     * The read-only **`target`** property of the dispatched.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Event/target)
+     */
+    get target(): EventTarget | undefined;
+    /**
+     * The deprecated **`Event.srcElement`** is an alias for the Event.target property.
+     * @deprecated
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Event/srcElement)
+     */
+    get srcElement(): EventTarget | undefined;
+    /**
+     * The **`timeStamp`** read-only property of the Event interface returns the time (in milliseconds) at which the event was created.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Event/timeStamp)
+     */
+    get timeStamp(): number;
+    /**
+     * The **`isTrusted`** read-only property of the when the event was generated by the user agent (including via user actions and programmatic methods such as HTMLElement.focus()), and `false` when the event was dispatched via The only exception is the `click` event, which initializes the `isTrusted` property to `false` in user agents.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Event/isTrusted)
+     */
+    get isTrusted(): boolean;
+    /**
+     * The **`cancelBubble`** property of the Event interface is deprecated.
+     * @deprecated
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Event/cancelBubble)
+     */
+    get cancelBubble(): boolean;
+    /**
+     * The **`cancelBubble`** property of the Event interface is deprecated.
+     * @deprecated
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Event/cancelBubble)
+     */
+    set cancelBubble(value: boolean);
+    /**
+     * The **`stopImmediatePropagation()`** method of the If several listeners are attached to the same element for the same event type, they are called in the order in which they were added.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Event/stopImmediatePropagation)
+     */
+    stopImmediatePropagation(): void;
+    /**
+     * The **`preventDefault()`** method of the Event interface tells the user agent that if the event does not get explicitly handled, its default action should not be taken as it normally would be.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Event/preventDefault)
+     */
+    preventDefault(): void;
+    /**
+     * The **`stopPropagation()`** method of the Event interface prevents further propagation of the current event in the capturing and bubbling phases.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Event/stopPropagation)
+     */
+    stopPropagation(): void;
+    /**
+     * The **`composedPath()`** method of the Event interface returns the event's path which is an array of the objects on which listeners will be invoked.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Event/composedPath)
+     */
+    composedPath(): EventTarget[];
+    static readonly NONE: number;
+    static readonly CAPTURING_PHASE: number;
+    static readonly AT_TARGET: number;
+    static readonly BUBBLING_PHASE: number;
+}
+interface EventInit {
+    bubbles?: boolean;
+    cancelable?: boolean;
+    composed?: boolean;
+}
+type EventListener<EventType extends Event = Event> = (event: EventType) => void;
+interface EventListenerObject<EventType extends Event = Event> {
+    handleEvent(event: EventType): void;
+}
+type EventListenerOrEventListenerObject<EventType extends Event = Event> = EventListener<EventType> | EventListenerObject<EventType>;
+/**
+ * The **`EventTarget`** interface is implemented by objects that can receive events and may have listeners for them.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/EventTarget)
+ */
+declare class EventTarget<EventMap extends Record<string, Event> = Record<string, Event>> {
+    constructor();
+    /**
+     * The **`addEventListener()`** method of the EventTarget interface sets up a function that will be called whenever the specified event is delivered to the target.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/EventTarget/addEventListener)
+     */
+    addEventListener<Type extends keyof EventMap>(type: Type, handler: EventListenerOrEventListenerObject<EventMap[Type]>, options?: EventTargetAddEventListenerOptions | boolean): void;
+    /**
+     * The **`removeEventListener()`** method of the EventTarget interface removes an event listener previously registered with EventTarget.addEventListener() from the target.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/EventTarget/removeEventListener)
+     */
+    removeEventListener<Type extends keyof EventMap>(type: Type, handler: EventListenerOrEventListenerObject<EventMap[Type]>, options?: EventTargetEventListenerOptions | boolean): void;
+    /**
+     * The **`dispatchEvent()`** method of the EventTarget sends an Event to the object, (synchronously) invoking the affected event listeners in the appropriate order.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/EventTarget/dispatchEvent)
+     */
+    dispatchEvent(event: EventMap[keyof EventMap]): boolean;
+}
+interface EventTargetEventListenerOptions {
+    capture?: boolean;
+}
+interface EventTargetAddEventListenerOptions {
+    capture?: boolean;
+    passive?: boolean;
+    once?: boolean;
+    signal?: AbortSignal;
+}
+interface EventTargetHandlerObject {
+    handleEvent: (event: Event) => any | undefined;
+}
+/**
+ * The **`AbortController`** interface represents a controller object that allows you to abort one or more Web requests as and when desired.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/AbortController)
+ */
+declare class AbortController {
+    constructor();
+    /**
+     * The **`signal`** read-only property of the AbortController interface returns an AbortSignal object instance, which can be used to communicate with/abort an asynchronous operation as desired.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/AbortController/signal)
+     */
+    get signal(): AbortSignal;
+    /**
+     * The **`abort()`** method of the AbortController interface aborts an asynchronous operation before it has completed.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/AbortController/abort)
+     */
+    abort(reason?: any): void;
+}
+/**
+ * The **`AbortSignal`** interface represents a signal object that allows you to communicate with an asynchronous operation (such as a fetch request) and abort it if required via an AbortController object.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/AbortSignal)
+ */
+declare abstract class AbortSignal extends EventTarget {
+    /**
+     * The **`AbortSignal.abort()`** static method returns an AbortSignal that is already set as aborted (and which does not trigger an AbortSignal/abort_event event).
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/AbortSignal/abort_static)
+     */
+    static abort(reason?: any): AbortSignal;
+    /**
+     * The **`AbortSignal.timeout()`** static method returns an AbortSignal that will automatically abort after a specified time.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/AbortSignal/timeout_static)
+     */
+    static timeout(delay: number): AbortSignal;
+    /**
+     * The **`AbortSignal.any()`** static method takes an iterable of abort signals and returns an AbortSignal.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/AbortSignal/any_static)
+     */
+    static any(signals: AbortSignal[]): AbortSignal;
+    /**
+     * The **`aborted`** read-only property returns a value that indicates whether the asynchronous operations the signal is communicating with are aborted (`true`) or not (`false`).
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/AbortSignal/aborted)
+     */
+    get aborted(): boolean;
+    /**
+     * The **`reason`** read-only property returns a JavaScript value that indicates the abort reason.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/AbortSignal/reason)
+     */
+    get reason(): any;
+    /* [MDN Reference](https://developer.mozilla.org/docs/Web/API/AbortSignal/abort_event) */
+    get onabort(): any | null;
+    /* [MDN Reference](https://developer.mozilla.org/docs/Web/API/AbortSignal/abort_event) */
+    set onabort(value: any | null);
+    /**
+     * The **`throwIfAborted()`** method throws the signal's abort AbortSignal.reason if the signal has been aborted; otherwise it does nothing.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/AbortSignal/throwIfAborted)
+     */
+    throwIfAborted(): void;
+}
+interface Scheduler {
+    wait(delay: number, maybeOptions?: SchedulerWaitOptions): Promise<void>;
+}
+interface SchedulerWaitOptions {
+    signal?: AbortSignal;
+}
+/**
+ * The **`ExtendableEvent`** interface extends the lifetime of the `install` and `activate` events dispatched on the global scope as part of the service worker lifecycle.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ExtendableEvent)
+ */
+declare abstract class ExtendableEvent extends Event {
+    /**
+     * The **`ExtendableEvent.waitUntil()`** method tells the event dispatcher that work is ongoing.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ExtendableEvent/waitUntil)
+     */
+    waitUntil(promise: Promise<any>): void;
+}
+/**
+ * The **`CustomEvent`** interface represents events initialized by an application for any purpose.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/CustomEvent)
+ */
+declare class CustomEvent<T = any> extends Event {
+    constructor(type: string, init?: CustomEventCustomEventInit);
+    /**
+     * The read-only **`detail`** property of the CustomEvent interface returns any data passed when initializing the event.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/CustomEvent/detail)
+     */
+    get detail(): T;
+}
+interface CustomEventCustomEventInit {
+    bubbles?: boolean;
+    cancelable?: boolean;
+    composed?: boolean;
+    detail?: any;
+}
+/**
+ * The **`Blob`** interface represents a blob, which is a file-like object of immutable, raw data; they can be read as text or binary data, or converted into a ReadableStream so its methods can be used for processing the data.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Blob)
+ */
+declare class Blob {
+    constructor(type?: ((ArrayBuffer | ArrayBufferView) | string | Blob)[], options?: BlobOptions);
+    /**
+     * The **`size`** read-only property of the Blob interface returns the size of the Blob or File in bytes.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Blob/size)
+     */
+    get size(): number;
+    /**
+     * The **`type`** read-only property of the Blob interface returns the MIME type of the file.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Blob/type)
+     */
+    get type(): string;
+    /**
+     * The **`slice()`** method of the Blob interface creates and returns a new `Blob` object which contains data from a subset of the blob on which it's called.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Blob/slice)
+     */
+    slice(start?: number, end?: number, type?: string): Blob;
+    /**
+     * The **`arrayBuffer()`** method of the Blob interface returns a Promise that resolves with the contents of the blob as binary data contained in an ArrayBuffer.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Blob/arrayBuffer)
+     */
+    arrayBuffer(): Promise<ArrayBuffer>;
+    /**
+     * The **`bytes()`** method of the Blob interface returns a Promise that resolves with a Uint8Array containing the contents of the blob as an array of bytes.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Blob/bytes)
+     */
+    bytes(): Promise<Uint8Array>;
+    /**
+     * The **`text()`** method of the string containing the contents of the blob, interpreted as UTF-8.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Blob/text)
+     */
+    text(): Promise<string>;
+    /**
+     * The **`stream()`** method of the Blob interface returns a ReadableStream which upon reading returns the data contained within the `Blob`.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Blob/stream)
+     */
+    stream(): ReadableStream;
+}
+interface BlobOptions {
+    type?: string;
+}
+/**
+ * The **`File`** interface provides information about files and allows JavaScript in a web page to access their content.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/File)
+ */
+declare class File extends Blob {
+    constructor(bits: ((ArrayBuffer | ArrayBufferView) | string | Blob)[] | undefined, name: string, options?: FileOptions);
+    /**
+     * The **`name`** read-only property of the File interface returns the name of the file represented by a File object.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/File/name)
+     */
+    get name(): string;
+    /**
+     * The **`lastModified`** read-only property of the File interface provides the last modified date of the file as the number of milliseconds since the Unix epoch (January 1, 1970 at midnight).
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/File/lastModified)
+     */
+    get lastModified(): number;
+}
+interface FileOptions {
+    type?: string;
+    lastModified?: number;
+}
+/**
+* The Cache API allows fine grained control of reading and writing from the Cloudflare global network cache.
+*
+* [Cloudflare Docs Reference](https://developers.cloudflare.com/workers/runtime-apis/cache/)
+*/
+declare abstract class CacheStorage {
+    /**
+     * The **`open()`** method of the the Cache object matching the `cacheName`.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/CacheStorage/open)
+     */
+    open(cacheName: string): Promise<Cache>;
+    readonly default: Cache;
+}
+/**
+* The Cache API allows fine grained control of reading and writing from the Cloudflare global network cache.
+*
+* [Cloudflare Docs Reference](https://developers.cloudflare.com/workers/runtime-apis/cache/)
+*/
+declare abstract class Cache {
+    /* [Cloudflare Docs Reference](https://developers.cloudflare.com/workers/runtime-apis/cache/#delete) */
+    delete(request: RequestInfo | URL, options?: CacheQueryOptions): Promise<boolean>;
+    /* [Cloudflare Docs Reference](https://developers.cloudflare.com/workers/runtime-apis/cache/#match) */
+    match(request: RequestInfo | URL, options?: CacheQueryOptions): Promise<Response | undefined>;
+    /* [Cloudflare Docs Reference](https://developers.cloudflare.com/workers/runtime-apis/cache/#put) */
+    put(request: RequestInfo | URL, response: Response): Promise<void>;
+}
+interface CacheQueryOptions {
+    ignoreMethod?: boolean;
+}
+/**
+* The Web Crypto API provides a set of low-level functions for common cryptographic tasks.
+* The Workers runtime implements the full surface of this API, but with some differences in
+* the [supported algorithms](https://developers.cloudflare.com/workers/runtime-apis/web-crypto/#supported-algorithms)
+* compared to those implemented in most browsers.
+*
+* [Cloudflare Docs Reference](https://developers.cloudflare.com/workers/runtime-apis/web-crypto/)
+*/
+declare abstract class Crypto {
+    /**
+     * The **`Crypto.subtle`** read-only property returns a cryptographic operations.
+     * Available only in secure contexts.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Crypto/subtle)
+     */
+    get subtle(): SubtleCrypto;
+    /**
+     * The **`Crypto.getRandomValues()`** method lets you get cryptographically strong random values.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Crypto/getRandomValues)
+     */
+    getRandomValues<T extends Int8Array | Uint8Array | Int16Array | Uint16Array | Int32Array | Uint32Array | BigInt64Array | BigUint64Array>(buffer: T): T;
+    /**
+     * The **`randomUUID()`** method of the Crypto interface is used to generate a v4 UUID using a cryptographically secure random number generator.
+     * Available only in secure contexts.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Crypto/randomUUID)
+     */
+    randomUUID(): string;
+    DigestStream: typeof DigestStream;
+}
+/**
+ * The **`SubtleCrypto`** interface of the Web Crypto API provides a number of low-level cryptographic functions.
+ * Available only in secure contexts.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/SubtleCrypto)
+ */
+declare abstract class SubtleCrypto {
+    /**
+     * The **`encrypt()`** method of the SubtleCrypto interface encrypts data.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/SubtleCrypto/encrypt)
+     */
+    encrypt(algorithm: string | SubtleCryptoEncryptAlgorithm, key: CryptoKey, plainText: ArrayBuffer | ArrayBufferView): Promise<ArrayBuffer>;
+    /**
+     * The **`decrypt()`** method of the SubtleCrypto interface decrypts some encrypted data.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/SubtleCrypto/decrypt)
+     */
+    decrypt(algorithm: string | SubtleCryptoEncryptAlgorithm, key: CryptoKey, cipherText: ArrayBuffer | ArrayBufferView): Promise<ArrayBuffer>;
+    /**
+     * The **`sign()`** method of the SubtleCrypto interface generates a digital signature.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/SubtleCrypto/sign)
+     */
+    sign(algorithm: string | SubtleCryptoSignAlgorithm, key: CryptoKey, data: ArrayBuffer | ArrayBufferView): Promise<ArrayBuffer>;
+    /**
+     * The **`verify()`** method of the SubtleCrypto interface verifies a digital signature.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/SubtleCrypto/verify)
+     */
+    verify(algorithm: string | SubtleCryptoSignAlgorithm, key: CryptoKey, signature: ArrayBuffer | ArrayBufferView, data: ArrayBuffer | ArrayBufferView): Promise<boolean>;
+    /**
+     * The **`digest()`** method of the SubtleCrypto interface generates a _digest_ of the given data, using the specified hash function.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/SubtleCrypto/digest)
+     */
+    digest(algorithm: string | SubtleCryptoHashAlgorithm, data: ArrayBuffer | ArrayBufferView): Promise<ArrayBuffer>;
+    /**
+     * The **`generateKey()`** method of the SubtleCrypto interface is used to generate a new key (for symmetric algorithms) or key pair (for public-key algorithms).
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/SubtleCrypto/generateKey)
+     */
+    generateKey(algorithm: string | SubtleCryptoGenerateKeyAlgorithm, extractable: boolean, keyUsages: string[]): Promise<CryptoKey | CryptoKeyPair>;
+    /**
+     * The **`deriveKey()`** method of the SubtleCrypto interface can be used to derive a secret key from a master key.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/SubtleCrypto/deriveKey)
+     */
+    deriveKey(algorithm: string | SubtleCryptoDeriveKeyAlgorithm, baseKey: CryptoKey, derivedKeyAlgorithm: string | SubtleCryptoImportKeyAlgorithm, extractable: boolean, keyUsages: string[]): Promise<CryptoKey>;
+    /**
+     * The **`deriveBits()`** method of the key.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/SubtleCrypto/deriveBits)
+     */
+    deriveBits(algorithm: string | SubtleCryptoDeriveKeyAlgorithm, baseKey: CryptoKey, length?: number | null): Promise<ArrayBuffer>;
+    /**
+     * The **`importKey()`** method of the SubtleCrypto interface imports a key: that is, it takes as input a key in an external, portable format and gives you a CryptoKey object that you can use in the Web Crypto API.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/SubtleCrypto/importKey)
+     */
+    importKey(format: string, keyData: (ArrayBuffer | ArrayBufferView) | JsonWebKey, algorithm: string | SubtleCryptoImportKeyAlgorithm, extractable: boolean, keyUsages: string[]): Promise<CryptoKey>;
+    /**
+     * The **`exportKey()`** method of the SubtleCrypto interface exports a key: that is, it takes as input a CryptoKey object and gives you the key in an external, portable format.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/SubtleCrypto/exportKey)
+     */
+    exportKey(format: string, key: CryptoKey): Promise<ArrayBuffer | JsonWebKey>;
+    /**
+     * The **`wrapKey()`** method of the SubtleCrypto interface 'wraps' a key.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/SubtleCrypto/wrapKey)
+     */
+    wrapKey(format: string, key: CryptoKey, wrappingKey: CryptoKey, wrapAlgorithm: string | SubtleCryptoEncryptAlgorithm): Promise<ArrayBuffer>;
+    /**
+     * The **`unwrapKey()`** method of the SubtleCrypto interface 'unwraps' a key.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/SubtleCrypto/unwrapKey)
+     */
+    unwrapKey(format: string, wrappedKey: ArrayBuffer | ArrayBufferView, unwrappingKey: CryptoKey, unwrapAlgorithm: string | SubtleCryptoEncryptAlgorithm, unwrappedKeyAlgorithm: string | SubtleCryptoImportKeyAlgorithm, extractable: boolean, keyUsages: string[]): Promise<CryptoKey>;
+    timingSafeEqual(a: ArrayBuffer | ArrayBufferView, b: ArrayBuffer | ArrayBufferView): boolean;
+}
+/**
+ * The **`CryptoKey`** interface of the Web Crypto API represents a cryptographic key obtained from one of the SubtleCrypto methods SubtleCrypto.generateKey, SubtleCrypto.deriveKey, SubtleCrypto.importKey, or SubtleCrypto.unwrapKey.
+ * Available only in secure contexts.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/CryptoKey)
+ */
+declare abstract class CryptoKey {
+    /**
+     * The read-only **`type`** property of the CryptoKey interface indicates which kind of key is represented by the object.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/CryptoKey/type)
+     */
+    readonly type: string;
+    /**
+     * The read-only **`extractable`** property of the CryptoKey interface indicates whether or not the key may be extracted using `SubtleCrypto.exportKey()` or `SubtleCrypto.wrapKey()`.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/CryptoKey/extractable)
+     */
+    readonly extractable: boolean;
+    /**
+     * The read-only **`algorithm`** property of the CryptoKey interface returns an object describing the algorithm for which this key can be used, and any associated extra parameters.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/CryptoKey/algorithm)
+     */
+    readonly algorithm: CryptoKeyKeyAlgorithm | CryptoKeyAesKeyAlgorithm | CryptoKeyHmacKeyAlgorithm | CryptoKeyRsaKeyAlgorithm | CryptoKeyEllipticKeyAlgorithm | CryptoKeyArbitraryKeyAlgorithm;
+    /**
+     * The read-only **`usages`** property of the CryptoKey interface indicates what can be done with the key.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/CryptoKey/usages)
+     */
+    readonly usages: string[];
+}
+interface CryptoKeyPair {
+    publicKey: CryptoKey;
+    privateKey: CryptoKey;
+}
+interface JsonWebKey {
+    kty: string;
+    use?: string;
+    key_ops?: string[];
+    alg?: string;
+    ext?: boolean;
+    crv?: string;
+    x?: string;
+    y?: string;
+    d?: string;
+    n?: string;
+    e?: string;
+    p?: string;
+    q?: string;
+    dp?: string;
+    dq?: string;
+    qi?: string;
+    oth?: RsaOtherPrimesInfo[];
+    k?: string;
+}
+interface RsaOtherPrimesInfo {
+    r?: string;
+    d?: string;
+    t?: string;
+}
+interface SubtleCryptoDeriveKeyAlgorithm {
+    name: string;
+    salt?: (ArrayBuffer | ArrayBufferView);
+    iterations?: number;
+    hash?: (string | SubtleCryptoHashAlgorithm);
+    $public?: CryptoKey;
+    info?: (ArrayBuffer | ArrayBufferView);
+}
+interface SubtleCryptoEncryptAlgorithm {
+    name: string;
+    iv?: (ArrayBuffer | ArrayBufferView);
+    additionalData?: (ArrayBuffer | ArrayBufferView);
+    tagLength?: number;
+    counter?: (ArrayBuffer | ArrayBufferView);
+    length?: number;
+    label?: (ArrayBuffer | ArrayBufferView);
+}
+interface SubtleCryptoGenerateKeyAlgorithm {
+    name: string;
+    hash?: (string | SubtleCryptoHashAlgorithm);
+    modulusLength?: number;
+    publicExponent?: (ArrayBuffer | ArrayBufferView);
+    length?: number;
+    namedCurve?: string;
+}
+interface SubtleCryptoHashAlgorithm {
+    name: string;
+}
+interface SubtleCryptoImportKeyAlgorithm {
+    name: string;
+    hash?: (string | SubtleCryptoHashAlgorithm);
+    length?: number;
+    namedCurve?: string;
+    compressed?: boolean;
+}
+interface SubtleCryptoSignAlgorithm {
+    name: string;
+    hash?: (string | SubtleCryptoHashAlgorithm);
+    dataLength?: number;
+    saltLength?: number;
+}
+interface CryptoKeyKeyAlgorithm {
+    name: string;
+}
+interface CryptoKeyAesKeyAlgorithm {
+    name: string;
+    length: number;
+}
+interface CryptoKeyHmacKeyAlgorithm {
+    name: string;
+    hash: CryptoKeyKeyAlgorithm;
+    length: number;
+}
+interface CryptoKeyRsaKeyAlgorithm {
+    name: string;
+    modulusLength: number;
+    publicExponent: ArrayBuffer | ArrayBufferView;
+    hash?: CryptoKeyKeyAlgorithm;
+}
+interface CryptoKeyEllipticKeyAlgorithm {
+    name: string;
+    namedCurve: string;
+}
+interface CryptoKeyArbitraryKeyAlgorithm {
+    name: string;
+    hash?: CryptoKeyKeyAlgorithm;
+    namedCurve?: string;
+    length?: number;
+}
+declare class DigestStream extends WritableStream<ArrayBuffer | ArrayBufferView> {
+    constructor(algorithm: string | SubtleCryptoHashAlgorithm);
+    readonly digest: Promise<ArrayBuffer>;
+    get bytesWritten(): number | bigint;
+}
+/**
+ * The **`TextDecoder`** interface represents a decoder for a specific text encoding, such as `UTF-8`, `ISO-8859-2`, `KOI8-R`, `GBK`, etc.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/TextDecoder)
+ */
+declare class TextDecoder {
+    constructor(label?: string, options?: TextDecoderConstructorOptions);
+    /**
+     * The **`TextDecoder.decode()`** method returns a string containing text decoded from the buffer passed as a parameter.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/TextDecoder/decode)
+     */
+    decode(input?: (ArrayBuffer | ArrayBufferView), options?: TextDecoderDecodeOptions): string;
+    get encoding(): string;
+    get fatal(): boolean;
+    get ignoreBOM(): boolean;
+}
+/**
+ * The **`TextEncoder`** interface takes a stream of code points as input and emits a stream of UTF-8 bytes.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/TextEncoder)
+ */
+declare class TextEncoder {
+    constructor();
+    /**
+     * The **`TextEncoder.encode()`** method takes a string as input, and returns a Global_Objects/Uint8Array containing the text given in parameters encoded with the specific method for that TextEncoder object.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/TextEncoder/encode)
+     */
+    encode(input?: string): Uint8Array;
+    /**
+     * The **`TextEncoder.encodeInto()`** method takes a string to encode and a destination Uint8Array to put resulting UTF-8 encoded text into, and returns a dictionary object indicating the progress of the encoding.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/TextEncoder/encodeInto)
+     */
+    encodeInto(input: string, buffer: Uint8Array): TextEncoderEncodeIntoResult;
+    get encoding(): string;
+}
+interface TextDecoderConstructorOptions {
+    fatal: boolean;
+    ignoreBOM: boolean;
+}
+interface TextDecoderDecodeOptions {
+    stream: boolean;
+}
+interface TextEncoderEncodeIntoResult {
+    read: number;
+    written: number;
+}
+/**
+ * The **`ErrorEvent`** interface represents events providing information related to errors in scripts or in files.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ErrorEvent)
+ */
+declare class ErrorEvent extends Event {
+    constructor(type: string, init?: ErrorEventErrorEventInit);
+    /**
+     * The **`filename`** read-only property of the ErrorEvent interface returns a string containing the name of the script file in which the error occurred.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ErrorEvent/filename)
+     */
+    get filename(): string;
+    /**
+     * The **`message`** read-only property of the ErrorEvent interface returns a string containing a human-readable error message describing the problem.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ErrorEvent/message)
+     */
+    get message(): string;
+    /**
+     * The **`lineno`** read-only property of the ErrorEvent interface returns an integer containing the line number of the script file on which the error occurred.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ErrorEvent/lineno)
+     */
+    get lineno(): number;
+    /**
+     * The **`colno`** read-only property of the ErrorEvent interface returns an integer containing the column number of the script file on which the error occurred.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ErrorEvent/colno)
+     */
+    get colno(): number;
+    /**
+     * The **`error`** read-only property of the ErrorEvent interface returns a JavaScript value, such as an Error or DOMException, representing the error associated with this event.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ErrorEvent/error)
+     */
+    get error(): any;
+}
+interface ErrorEventErrorEventInit {
+    message?: string;
+    filename?: string;
+    lineno?: number;
+    colno?: number;
+    error?: any;
+}
+/**
+ * The **`MessageEvent`** interface represents a message received by a target object.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/MessageEvent)
+ */
+declare class MessageEvent extends Event {
+    constructor(type: string, initializer: MessageEventInit);
+    /**
+     * The **`data`** read-only property of the The data sent by the message emitter; this can be any data type, depending on what originated this event.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/MessageEvent/data)
+     */
+    readonly data: any;
+    /**
+     * The **`origin`** read-only property of the origin of the message emitter.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/MessageEvent/origin)
+     */
+    readonly origin: string | null;
+    /**
+     * The **`lastEventId`** read-only property of the unique ID for the event.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/MessageEvent/lastEventId)
+     */
+    readonly lastEventId: string;
+    /**
+     * The **`source`** read-only property of the a WindowProxy, MessagePort, or a `MessageEventSource` (which can be a WindowProxy, message emitter.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/MessageEvent/source)
+     */
+    readonly source: MessagePort | null;
+    /**
+     * The **`ports`** read-only property of the containing all MessagePort objects sent with the message, in order.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/MessageEvent/ports)
+     */
+    readonly ports: MessagePort[];
+}
+interface MessageEventInit {
+    data: ArrayBuffer | string;
+}
+/**
+ * The **`PromiseRejectionEvent`** interface represents events which are sent to the global script context when JavaScript Promises are rejected.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/PromiseRejectionEvent)
+ */
+declare abstract class PromiseRejectionEvent extends Event {
+    /**
+     * The PromiseRejectionEvent interface's **`promise`** read-only property indicates the JavaScript rejected.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/PromiseRejectionEvent/promise)
+     */
+    readonly promise: Promise<any>;
+    /**
+     * The PromiseRejectionEvent **`reason`** read-only property is any JavaScript value or Object which provides the reason passed into Promise.reject().
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/PromiseRejectionEvent/reason)
+     */
+    readonly reason: any;
+}
+/**
+ * The **`FormData`** interface provides a way to construct a set of key/value pairs representing form fields and their values, which can be sent using the Window/fetch, XMLHttpRequest.send() or navigator.sendBeacon() methods.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/FormData)
+ */
+declare class FormData {
+    constructor();
+    /**
+     * The **`append()`** method of the FormData interface appends a new value onto an existing key inside a `FormData` object, or adds the key if it does not already exist.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/FormData/append)
+     */
+    append(name: string, value: string | Blob): void;
+    /**
+     * The **`append()`** method of the FormData interface appends a new value onto an existing key inside a `FormData` object, or adds the key if it does not already exist.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/FormData/append)
+     */
+    append(name: string, value: string): void;
+    /**
+     * The **`append()`** method of the FormData interface appends a new value onto an existing key inside a `FormData` object, or adds the key if it does not already exist.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/FormData/append)
+     */
+    append(name: string, value: Blob, filename?: string): void;
+    /**
+     * The **`delete()`** method of the FormData interface deletes a key and its value(s) from a `FormData` object.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/FormData/delete)
+     */
+    delete(name: string): void;
+    /**
+     * The **`get()`** method of the FormData interface returns the first value associated with a given key from within a `FormData` object.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/FormData/get)
+     */
+    get(name: string): (File | string) | null;
+    /**
+     * The **`getAll()`** method of the FormData interface returns all the values associated with a given key from within a `FormData` object.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/FormData/getAll)
+     */
+    getAll(name: string): (File | string)[];
+    /**
+     * The **`has()`** method of the FormData interface returns whether a `FormData` object contains a certain key.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/FormData/has)
+     */
+    has(name: string): boolean;
+    /**
+     * The **`set()`** method of the FormData interface sets a new value for an existing key inside a `FormData` object, or adds the key/value if it does not already exist.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/FormData/set)
+     */
+    set(name: string, value: string | Blob): void;
+    /**
+     * The **`set()`** method of the FormData interface sets a new value for an existing key inside a `FormData` object, or adds the key/value if it does not already exist.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/FormData/set)
+     */
+    set(name: string, value: string): void;
+    /**
+     * The **`set()`** method of the FormData interface sets a new value for an existing key inside a `FormData` object, or adds the key/value if it does not already exist.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/FormData/set)
+     */
+    set(name: string, value: Blob, filename?: string): void;
+    /* Returns an array of key, value pairs for every entry in the list. */
+    entries(): IterableIterator<[
+        key: string,
+        value: File | string
+    ]>;
+    /* Returns a list of keys in the list. */
+    keys(): IterableIterator<string>;
+    /* Returns a list of values in the list. */
+    values(): IterableIterator<(File | string)>;
+    forEach<This = unknown>(callback: (this: This, value: File | string, key: string, parent: FormData) => void, thisArg?: This): void;
+    [Symbol.iterator](): IterableIterator<[
+        key: string,
+        value: File | string
+    ]>;
+}
+interface ContentOptions {
+    html?: boolean;
+}
+declare class HTMLRewriter {
+    constructor();
+    on(selector: string, handlers: HTMLRewriterElementContentHandlers): HTMLRewriter;
+    onDocument(handlers: HTMLRewriterDocumentContentHandlers): HTMLRewriter;
+    transform(response: Response): Response;
+}
+interface HTMLRewriterElementContentHandlers {
+    element?(element: Element): void | Promise<void>;
+    comments?(comment: Comment): void | Promise<void>;
+    text?(element: Text): void | Promise<void>;
+}
+interface HTMLRewriterDocumentContentHandlers {
+    doctype?(doctype: Doctype): void | Promise<void>;
+    comments?(comment: Comment): void | Promise<void>;
+    text?(text: Text): void | Promise<void>;
+    end?(end: DocumentEnd): void | Promise<void>;
+}
+interface Doctype {
+    readonly name: string | null;
+    readonly publicId: string | null;
+    readonly systemId: string | null;
+}
+interface Element {
+    tagName: string;
+    readonly attributes: IterableIterator<string[]>;
+    readonly removed: boolean;
+    readonly namespaceURI: string;
+    getAttribute(name: string): string | null;
+    hasAttribute(name: string): boolean;
+    setAttribute(name: string, value: string): Element;
+    removeAttribute(name: string): Element;
+    before(content: string | ReadableStream | Response, options?: ContentOptions): Element;
+    after(content: string | ReadableStream | Response, options?: ContentOptions): Element;
+    prepend(content: string | ReadableStream | Response, options?: ContentOptions): Element;
+    append(content: string | ReadableStream | Response, options?: ContentOptions): Element;
+    replace(content: string | ReadableStream | Response, options?: ContentOptions): Element;
+    remove(): Element;
+    removeAndKeepContent(): Element;
+    setInnerContent(content: string | ReadableStream | Response, options?: ContentOptions): Element;
+    onEndTag(handler: (tag: EndTag) => void | Promise<void>): void;
+}
+interface EndTag {
+    name: string;
+    before(content: string | ReadableStream | Response, options?: ContentOptions): EndTag;
+    after(content: string | ReadableStream | Response, options?: ContentOptions): EndTag;
+    remove(): EndTag;
+}
+interface Comment {
+    text: string;
+    readonly removed: boolean;
+    before(content: string, options?: ContentOptions): Comment;
+    after(content: string, options?: ContentOptions): Comment;
+    replace(content: string, options?: ContentOptions): Comment;
+    remove(): Comment;
+}
+interface Text {
+    readonly text: string;
+    readonly lastInTextNode: boolean;
+    readonly removed: boolean;
+    before(content: string | ReadableStream | Response, options?: ContentOptions): Text;
+    after(content: string | ReadableStream | Response, options?: ContentOptions): Text;
+    replace(content: string | ReadableStream | Response, options?: ContentOptions): Text;
+    remove(): Text;
+}
+interface DocumentEnd {
+    append(content: string, options?: ContentOptions): DocumentEnd;
+}
+/**
+ * This is the event type for `fetch` events dispatched on the ServiceWorkerGlobalScope.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/FetchEvent)
+ */
+declare abstract class FetchEvent extends ExtendableEvent {
+    /**
+     * The **`request`** read-only property of the the event handler.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/FetchEvent/request)
+     */
+    readonly request: Request;
+    /**
+     * The **`respondWith()`** method of allows you to provide a promise for a Response yourself.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/FetchEvent/respondWith)
+     */
+    respondWith(promise: Response | Promise<Response>): void;
+    passThroughOnException(): void;
+}
+type HeadersInit = Headers | Iterable<Iterable<string>> | Record<string, string>;
+/**
+ * The **`Headers`** interface of the Fetch API allows you to perform various actions on HTTP request and response headers.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Headers)
+ */
+declare class Headers {
+    constructor(init?: HeadersInit);
+    /**
+     * The **`get()`** method of the Headers interface returns a byte string of all the values of a header within a `Headers` object with a given name.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Headers/get)
+     */
+    get(name: string): string | null;
+    getAll(name: string): string[];
+    /**
+     * The **`getSetCookie()`** method of the Headers interface returns an array containing the values of all Set-Cookie headers associated with a response.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Headers/getSetCookie)
+     */
+    getSetCookie(): string[];
+    /**
+     * The **`has()`** method of the Headers interface returns a boolean stating whether a `Headers` object contains a certain header.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Headers/has)
+     */
+    has(name: string): boolean;
+    /**
+     * The **`set()`** method of the Headers interface sets a new value for an existing header inside a `Headers` object, or adds the header if it does not already exist.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Headers/set)
+     */
+    set(name: string, value: string): void;
+    /**
+     * The **`append()`** method of the Headers interface appends a new value onto an existing header inside a `Headers` object, or adds the header if it does not already exist.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Headers/append)
+     */
+    append(name: string, value: string): void;
+    /**
+     * The **`delete()`** method of the Headers interface deletes a header from the current `Headers` object.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Headers/delete)
+     */
+    delete(name: string): void;
+    forEach<This = unknown>(callback: (this: This, value: string, key: string, parent: Headers) => void, thisArg?: This): void;
+    /* Returns an iterator allowing to go through all key/value pairs contained in this object. */
+    entries(): IterableIterator<[
+        key: string,
+        value: string
+    ]>;
+    /* Returns an iterator allowing to go through all keys of the key/value pairs contained in this object. */
+    keys(): IterableIterator<string>;
+    /* Returns an iterator allowing to go through all values of the key/value pairs contained in this object. */
+    values(): IterableIterator<string>;
+    [Symbol.iterator](): IterableIterator<[
+        key: string,
+        value: string
+    ]>;
+}
+type BodyInit = ReadableStream<Uint8Array> | string | ArrayBuffer | ArrayBufferView | Blob | URLSearchParams | FormData;
+declare abstract class Body {
+    /* [MDN Reference](https://developer.mozilla.org/docs/Web/API/Request/body) */
+    get body(): ReadableStream | null;
+    /* [MDN Reference](https://developer.mozilla.org/docs/Web/API/Request/bodyUsed) */
+    get bodyUsed(): boolean;
+    /* [MDN Reference](https://developer.mozilla.org/docs/Web/API/Request/arrayBuffer) */
+    arrayBuffer(): Promise<ArrayBuffer>;
+    /* [MDN Reference](https://developer.mozilla.org/docs/Web/API/Request/bytes) */
+    bytes(): Promise<Uint8Array>;
+    /* [MDN Reference](https://developer.mozilla.org/docs/Web/API/Request/text) */
+    text(): Promise<string>;
+    /* [MDN Reference](https://developer.mozilla.org/docs/Web/API/Request/json) */
+    json<T>(): Promise<T>;
+    /* [MDN Reference](https://developer.mozilla.org/docs/Web/API/Request/formData) */
+    formData(): Promise<FormData>;
+    /* [MDN Reference](https://developer.mozilla.org/docs/Web/API/Request/blob) */
+    blob(): Promise<Blob>;
+}
+/**
+ * The **`Response`** interface of the Fetch API represents the response to a request.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Response)
+ */
+declare var Response: {
+    prototype: Response;
+    new (body?: BodyInit | null, init?: ResponseInit): Response;
+    error(): Response;
+    redirect(url: string, status?: number): Response;
+    json(any: any, maybeInit?: (ResponseInit | Response)): Response;
+};
+/**
+ * The **`Response`** interface of the Fetch API represents the response to a request.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Response)
+ */
+interface Response extends Body {
+    /**
+     * The **`clone()`** method of the Response interface creates a clone of a response object, identical in every way, but stored in a different variable.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Response/clone)
+     */
+    clone(): Response;
+    /**
+     * The **`status`** read-only property of the Response interface contains the HTTP status codes of the response.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Response/status)
+     */
+    status: number;
+    /**
+     * The **`statusText`** read-only property of the Response interface contains the status message corresponding to the HTTP status code in Response.status.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Response/statusText)
+     */
+    statusText: string;
+    /**
+     * The **`headers`** read-only property of the with the response.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Response/headers)
+     */
+    headers: Headers;
+    /**
+     * The **`ok`** read-only property of the Response interface contains a Boolean stating whether the response was successful (status in the range 200-299) or not.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Response/ok)
+     */
+    ok: boolean;
+    /**
+     * The **`redirected`** read-only property of the Response interface indicates whether or not the response is the result of a request you made which was redirected.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Response/redirected)
+     */
+    redirected: boolean;
+    /**
+     * The **`url`** read-only property of the Response interface contains the URL of the response.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Response/url)
+     */
+    url: string;
+    webSocket: WebSocket | null;
+    cf: any | undefined;
+    /**
+     * The **`type`** read-only property of the Response interface contains the type of the response.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Response/type)
+     */
+    type: "default" | "error";
+}
+interface ResponseInit {
+    status?: number;
+    statusText?: string;
+    headers?: HeadersInit;
+    cf?: any;
+    webSocket?: (WebSocket | null);
+    encodeBody?: "automatic" | "manual";
+}
+type RequestInfo<CfHostMetadata = unknown, Cf = CfProperties<CfHostMetadata>> = Request<CfHostMetadata, Cf> | string;
+/**
+ * The **`Request`** interface of the Fetch API represents a resource request.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Request)
+ */
+declare var Request: {
+    prototype: Request;
+    new <CfHostMetadata = unknown, Cf = CfProperties<CfHostMetadata>>(input: RequestInfo<CfProperties> | URL, init?: RequestInit<Cf>): Request<CfHostMetadata, Cf>;
+};
+/**
+ * The **`Request`** interface of the Fetch API represents a resource request.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Request)
+ */
+interface Request<CfHostMetadata = unknown, Cf = CfProperties<CfHostMetadata>> extends Body {
+    /**
+     * The **`clone()`** method of the Request interface creates a copy of the current `Request` object.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Request/clone)
+     */
+    clone(): Request<CfHostMetadata, Cf>;
+    /**
+     * The **`method`** read-only property of the `POST`, etc.) A String indicating the method of the request.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Request/method)
+     */
+    method: string;
+    /**
+     * The **`url`** read-only property of the Request interface contains the URL of the request.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Request/url)
+     */
+    url: string;
+    /**
+     * The **`headers`** read-only property of the with the request.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Request/headers)
+     */
+    headers: Headers;
+    /**
+     * The **`redirect`** read-only property of the Request interface contains the mode for how redirects are handled.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Request/redirect)
+     */
+    redirect: string;
+    fetcher: Fetcher | null;
+    /**
+     * The read-only **`signal`** property of the Request interface returns the AbortSignal associated with the request.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Request/signal)
+     */
+    signal: AbortSignal;
+    cf?: Cf;
+    /**
+     * The **`integrity`** read-only property of the Request interface contains the subresource integrity value of the request.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Request/integrity)
+     */
+    integrity: string;
+    /**
+     * The **`keepalive`** read-only property of the Request interface contains the request's `keepalive` setting (`true` or `false`), which indicates whether the browser will keep the associated request alive if the page that initiated it is unloaded before the request is complete.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Request/keepalive)
+     */
+    keepalive: boolean;
+    /**
+     * The **`cache`** read-only property of the Request interface contains the cache mode of the request.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/Request/cache)
+     */
+    cache?: "no-store" | "no-cache";
+}
+interface RequestInit<Cf = CfProperties> {
+    /* A string to set request's method. */
+    method?: string;
+    /* A Headers object, an object literal, or an array of two-item arrays to set request's headers. */
+    headers?: HeadersInit;
+    /* A BodyInit object or null to set request's body. */
+    body?: BodyInit | null;
+    /* A string indicating whether request follows redirects, results in an error upon encountering a redirect, or returns the redirect (in an opaque fashion). Sets request's redirect. */
+    redirect?: string;
+    fetcher?: (Fetcher | null);
+    cf?: Cf;
+    /* A string indicating how the request will interact with the browser's cache to set request's cache. */
+    cache?: "no-store" | "no-cache";
+    /* A cryptographic hash of the resource to be fetched by request. Sets request's integrity. */
+    integrity?: string;
+    /* An AbortSignal to set request's signal. */
+    signal?: (AbortSignal | null);
+    encodeResponseBody?: "automatic" | "manual";
+}
+type Service<T extends (new (...args: any[]) => Rpc.WorkerEntrypointBranded) | Rpc.WorkerEntrypointBranded | ExportedHandler<any, any, any> | undefined = undefined> = T extends new (...args: any[]) => Rpc.WorkerEntrypointBranded ? Fetcher<InstanceType<T>> : T extends Rpc.WorkerEntrypointBranded ? Fetcher<T> : T extends Exclude<Rpc.EntrypointBranded, Rpc.WorkerEntrypointBranded> ? never : Fetcher<undefined>;
+type Fetcher<T extends Rpc.EntrypointBranded | undefined = undefined, Reserved extends string = never> = (T extends Rpc.EntrypointBranded ? Rpc.Provider<T, Reserved | "fetch" | "connect"> : unknown) & {
+    fetch(input: RequestInfo | URL, init?: RequestInit): Promise<Response>;
+    connect(address: SocketAddress | string, options?: SocketOptions): Socket;
+};
+interface KVNamespaceListKey<Metadata, Key extends string = string> {
+    name: Key;
+    expiration?: number;
+    metadata?: Metadata;
+}
+type KVNamespaceListResult<Metadata, Key extends string = string> = {
+    list_complete: false;
+    keys: KVNamespaceListKey<Metadata, Key>[];
+    cursor: string;
+    cacheStatus: string | null;
+} | {
+    list_complete: true;
+    keys: KVNamespaceListKey<Metadata, Key>[];
+    cacheStatus: string | null;
+};
+interface KVNamespace<Key extends string = string> {
+    get(key: Key, options?: Partial<KVNamespaceGetOptions<undefined>>): Promise<string | null>;
+    get(key: Key, type: "text"): Promise<string | null>;
+    get<ExpectedValue = unknown>(key: Key, type: "json"): Promise<ExpectedValue | null>;
+    get(key: Key, type: "arrayBuffer"): Promise<ArrayBuffer | null>;
+    get(key: Key, type: "stream"): Promise<ReadableStream | null>;
+    get(key: Key, options?: KVNamespaceGetOptions<"text">): Promise<string | null>;
+    get<ExpectedValue = unknown>(key: Key, options?: KVNamespaceGetOptions<"json">): Promise<ExpectedValue | null>;
+    get(key: Key, options?: KVNamespaceGetOptions<"arrayBuffer">): Promise<ArrayBuffer | null>;
+    get(key: Key, options?: KVNamespaceGetOptions<"stream">): Promise<ReadableStream | null>;
+    get(key: Array<Key>, type: "text"): Promise<Map<string, string | null>>;
+    get<ExpectedValue = unknown>(key: Array<Key>, type: "json"): Promise<Map<string, ExpectedValue | null>>;
+    get(key: Array<Key>, options?: Partial<KVNamespaceGetOptions<undefined>>): Promise<Map<string, string | null>>;
+    get(key: Array<Key>, options?: KVNamespaceGetOptions<"text">): Promise<Map<string, string | null>>;
+    get<ExpectedValue = unknown>(key: Array<Key>, options?: KVNamespaceGetOptions<"json">): Promise<Map<string, ExpectedValue | null>>;
+    list<Metadata = unknown>(options?: KVNamespaceListOptions): Promise<KVNamespaceListResult<Metadata, Key>>;
+    put(key: Key, value: string | ArrayBuffer | ArrayBufferView | ReadableStream, options?: KVNamespacePutOptions): Promise<void>;
+    getWithMetadata<Metadata = unknown>(key: Key, options?: Partial<KVNamespaceGetOptions<undefined>>): Promise<KVNamespaceGetWithMetadataResult<string, Metadata>>;
+    getWithMetadata<Metadata = unknown>(key: Key, type: "text"): Promise<KVNamespaceGetWithMetadataResult<string, Metadata>>;
+    getWithMetadata<ExpectedValue = unknown, Metadata = unknown>(key: Key, type: "json"): Promise<KVNamespaceGetWithMetadataResult<ExpectedValue, Metadata>>;
+    getWithMetadata<Metadata = unknown>(key: Key, type: "arrayBuffer"): Promise<KVNamespaceGetWithMetadataResult<ArrayBuffer, Metadata>>;
+    getWithMetadata<Metadata = unknown>(key: Key, type: "stream"): Promise<KVNamespaceGetWithMetadataResult<ReadableStream, Metadata>>;
+    getWithMetadata<Metadata = unknown>(key: Key, options: KVNamespaceGetOptions<"text">): Promise<KVNamespaceGetWithMetadataResult<string, Metadata>>;
+    getWithMetadata<ExpectedValue = unknown, Metadata = unknown>(key: Key, options: KVNamespaceGetOptions<"json">): Promise<KVNamespaceGetWithMetadataResult<ExpectedValue, Metadata>>;
+    getWithMetadata<Metadata = unknown>(key: Key, options: KVNamespaceGetOptions<"arrayBuffer">): Promise<KVNamespaceGetWithMetadataResult<ArrayBuffer, Metadata>>;
+    getWithMetadata<Metadata = unknown>(key: Key, options: KVNamespaceGetOptions<"stream">): Promise<KVNamespaceGetWithMetadataResult<ReadableStream, Metadata>>;
+    getWithMetadata<Metadata = unknown>(key: Array<Key>, type: "text"): Promise<Map<string, KVNamespaceGetWithMetadataResult<string, Metadata>>>;
+    getWithMetadata<ExpectedValue = unknown, Metadata = unknown>(key: Array<Key>, type: "json"): Promise<Map<string, KVNamespaceGetWithMetadataResult<ExpectedValue, Metadata>>>;
+    getWithMetadata<Metadata = unknown>(key: Array<Key>, options?: Partial<KVNamespaceGetOptions<undefined>>): Promise<Map<string, KVNamespaceGetWithMetadataResult<string, Metadata>>>;
+    getWithMetadata<Metadata = unknown>(key: Array<Key>, options?: KVNamespaceGetOptions<"text">): Promise<Map<string, KVNamespaceGetWithMetadataResult<string, Metadata>>>;
+    getWithMetadata<ExpectedValue = unknown, Metadata = unknown>(key: Array<Key>, options?: KVNamespaceGetOptions<"json">): Promise<Map<string, KVNamespaceGetWithMetadataResult<ExpectedValue, Metadata>>>;
+    delete(key: Key): Promise<void>;
+}
+interface KVNamespaceListOptions {
+    limit?: number;
+    prefix?: (string | null);
+    cursor?: (string | null);
+}
+interface KVNamespaceGetOptions<Type> {
+    type: Type;
+    cacheTtl?: number;
+}
+interface KVNamespacePutOptions {
+    expiration?: number;
+    expirationTtl?: number;
+    metadata?: (any | null);
+}
+interface KVNamespaceGetWithMetadataResult<Value, Metadata> {
+    value: Value | null;
+    metadata: Metadata | null;
+    cacheStatus: string | null;
+}
+type QueueContentType = "text" | "bytes" | "json" | "v8";
+interface Queue<Body = unknown> {
+    send(message: Body, options?: QueueSendOptions): Promise<void>;
+    sendBatch(messages: Iterable<MessageSendRequest<Body>>, options?: QueueSendBatchOptions): Promise<void>;
+}
+interface QueueSendOptions {
+    contentType?: QueueContentType;
+    delaySeconds?: number;
+}
+interface QueueSendBatchOptions {
+    delaySeconds?: number;
+}
+interface MessageSendRequest<Body = unknown> {
+    body: Body;
+    contentType?: QueueContentType;
+    delaySeconds?: number;
+}
+interface QueueRetryOptions {
+    delaySeconds?: number;
+}
+interface Message<Body = unknown> {
+    readonly id: string;
+    readonly timestamp: Date;
+    readonly body: Body;
+    readonly attempts: number;
+    retry(options?: QueueRetryOptions): void;
+    ack(): void;
+}
+interface QueueEvent<Body = unknown> extends ExtendableEvent {
+    readonly messages: readonly Message<Body>[];
+    readonly queue: string;
+    retryAll(options?: QueueRetryOptions): void;
+    ackAll(): void;
+}
+interface MessageBatch<Body = unknown> {
+    readonly messages: readonly Message<Body>[];
+    readonly queue: string;
+    retryAll(options?: QueueRetryOptions): void;
+    ackAll(): void;
+}
+interface R2Error extends Error {
+    readonly name: string;
+    readonly code: number;
+    readonly message: string;
+    readonly action: string;
+    readonly stack: any;
+}
+interface R2ListOptions {
+    limit?: number;
+    prefix?: string;
+    cursor?: string;
+    delimiter?: string;
+    startAfter?: string;
+    include?: ("httpMetadata" | "customMetadata")[];
+}
+declare abstract class R2Bucket {
+    head(key: string): Promise<R2Object | null>;
+    get(key: string, options: R2GetOptions & {
+        onlyIf: R2Conditional | Headers;
+    }): Promise<R2ObjectBody | R2Object | null>;
+    get(key: string, options?: R2GetOptions): Promise<R2ObjectBody | null>;
+    put(key: string, value: ReadableStream | ArrayBuffer | ArrayBufferView | string | null | Blob, options?: R2PutOptions & {
+        onlyIf: R2Conditional | Headers;
+    }): Promise<R2Object | null>;
+    put(key: string, value: ReadableStream | ArrayBuffer | ArrayBufferView | string | null | Blob, options?: R2PutOptions): Promise<R2Object>;
+    createMultipartUpload(key: string, options?: R2MultipartOptions): Promise<R2MultipartUpload>;
+    resumeMultipartUpload(key: string, uploadId: string): R2MultipartUpload;
+    delete(keys: string | string[]): Promise<void>;
+    list(options?: R2ListOptions): Promise<R2Objects>;
+}
+interface R2MultipartUpload {
+    readonly key: string;
+    readonly uploadId: string;
+    uploadPart(partNumber: number, value: ReadableStream | (ArrayBuffer | ArrayBufferView) | string | Blob, options?: R2UploadPartOptions): Promise<R2UploadedPart>;
+    abort(): Promise<void>;
+    complete(uploadedParts: R2UploadedPart[]): Promise<R2Object>;
+}
+interface R2UploadedPart {
+    partNumber: number;
+    etag: string;
+}
+declare abstract class R2Object {
+    readonly key: string;
+    readonly version: string;
+    readonly size: number;
+    readonly etag: string;
+    readonly httpEtag: string;
+    readonly checksums: R2Checksums;
+    readonly uploaded: Date;
+    readonly httpMetadata?: R2HTTPMetadata;
+    readonly customMetadata?: Record<string, string>;
+    readonly range?: R2Range;
+    readonly storageClass: string;
+    readonly ssecKeyMd5?: string;
+    writeHttpMetadata(headers: Headers): void;
+}
+interface R2ObjectBody extends R2Object {
+    get body(): ReadableStream;
+    get bodyUsed(): boolean;
+    arrayBuffer(): Promise<ArrayBuffer>;
+    bytes(): Promise<Uint8Array>;
+    text(): Promise<string>;
+    json<T>(): Promise<T>;
+    blob(): Promise<Blob>;
+}
+type R2Range = {
+    offset: number;
+    length?: number;
+} | {
+    offset?: number;
+    length: number;
+} | {
+    suffix: number;
+};
+interface R2Conditional {
+    etagMatches?: string;
+    etagDoesNotMatch?: string;
+    uploadedBefore?: Date;
+    uploadedAfter?: Date;
+    secondsGranularity?: boolean;
+}
+interface R2GetOptions {
+    onlyIf?: (R2Conditional | Headers);
+    range?: (R2Range | Headers);
+    ssecKey?: (ArrayBuffer | string);
+}
+interface R2PutOptions {
+    onlyIf?: (R2Conditional | Headers);
+    httpMetadata?: (R2HTTPMetadata | Headers);
+    customMetadata?: Record<string, string>;
+    md5?: ((ArrayBuffer | ArrayBufferView) | string);
+    sha1?: ((ArrayBuffer | ArrayBufferView) | string);
+    sha256?: ((ArrayBuffer | ArrayBufferView) | string);
+    sha384?: ((ArrayBuffer | ArrayBufferView) | string);
+    sha512?: ((ArrayBuffer | ArrayBufferView) | string);
+    storageClass?: string;
+    ssecKey?: (ArrayBuffer | string);
+}
+interface R2MultipartOptions {
+    httpMetadata?: (R2HTTPMetadata | Headers);
+    customMetadata?: Record<string, string>;
+    storageClass?: string;
+    ssecKey?: (ArrayBuffer | string);
+}
+interface R2Checksums {
+    readonly md5?: ArrayBuffer;
+    readonly sha1?: ArrayBuffer;
+    readonly sha256?: ArrayBuffer;
+    readonly sha384?: ArrayBuffer;
+    readonly sha512?: ArrayBuffer;
+    toJSON(): R2StringChecksums;
+}
+interface R2StringChecksums {
+    md5?: string;
+    sha1?: string;
+    sha256?: string;
+    sha384?: string;
+    sha512?: string;
+}
+interface R2HTTPMetadata {
+    contentType?: string;
+    contentLanguage?: string;
+    contentDisposition?: string;
+    contentEncoding?: string;
+    cacheControl?: string;
+    cacheExpiry?: Date;
+}
+type R2Objects = {
+    objects: R2Object[];
+    delimitedPrefixes: string[];
+} & ({
+    truncated: true;
+    cursor: string;
+} | {
+    truncated: false;
+});
+interface R2UploadPartOptions {
+    ssecKey?: (ArrayBuffer | string);
+}
+declare abstract class ScheduledEvent extends ExtendableEvent {
+    readonly scheduledTime: number;
+    readonly cron: string;
+    noRetry(): void;
+}
+interface ScheduledController {
+    readonly scheduledTime: number;
+    readonly cron: string;
+    noRetry(): void;
+}
+interface QueuingStrategy<T = any> {
+    highWaterMark?: (number | bigint);
+    size?: (chunk: T) => number | bigint;
+}
+interface UnderlyingSink<W = any> {
+    type?: string;
+    start?: (controller: WritableStreamDefaultController) => void | Promise<void>;
+    write?: (chunk: W, controller: WritableStreamDefaultController) => void | Promise<void>;
+    abort?: (reason: any) => void | Promise<void>;
+    close?: () => void | Promise<void>;
+}
+interface UnderlyingByteSource {
+    type: "bytes";
+    autoAllocateChunkSize?: number;
+    start?: (controller: ReadableByteStreamController) => void | Promise<void>;
+    pull?: (controller: ReadableByteStreamController) => void | Promise<void>;
+    cancel?: (reason: any) => void | Promise<void>;
+}
+interface UnderlyingSource<R = any> {
+    type?: "" | undefined;
+    start?: (controller: ReadableStreamDefaultController<R>) => void | Promise<void>;
+    pull?: (controller: ReadableStreamDefaultController<R>) => void | Promise<void>;
+    cancel?: (reason: any) => void | Promise<void>;
+    expectedLength?: (number | bigint);
+}
+interface Transformer<I = any, O = any> {
+    readableType?: string;
+    writableType?: string;
+    start?: (controller: TransformStreamDefaultController<O>) => void | Promise<void>;
+    transform?: (chunk: I, controller: TransformStreamDefaultController<O>) => void | Promise<void>;
+    flush?: (controller: TransformStreamDefaultController<O>) => void | Promise<void>;
+    cancel?: (reason: any) => void | Promise<void>;
+    expectedLength?: number;
+}
+interface StreamPipeOptions {
+    preventAbort?: boolean;
+    preventCancel?: boolean;
+    /**
+     * Pipes this readable stream to a given writable stream destination. The way in which the piping process behaves under various error conditions can be customized with a number of passed options. It returns a promise that fulfills when the piping process completes successfully, or rejects if any errors were encountered.
+     *
+     * Piping a stream will lock it for the duration of the pipe, preventing any other consumer from acquiring a reader.
+     *
+     * Errors and closures of the source and destination streams propagate as follows:
+     *
+     * An error in this source readable stream will abort destination, unless preventAbort is truthy. The returned promise will be rejected with the source's error, or with any error that occurs during aborting the destination.
+     *
+     * An error in destination will cancel this source readable stream, unless preventCancel is truthy. The returned promise will be rejected with the destination's error, or with any error that occurs during canceling the source.
+     *
+     * When this source readable stream closes, destination will be closed, unless preventClose is truthy. The returned promise will be fulfilled once this process completes, unless an error is encountered while closing the destination, in which case it will be rejected with that error.
+     *
+     * If destination starts out closed or closing, this source readable stream will be canceled, unless preventCancel is true. The returned promise will be rejected with an error indicating piping to a closed stream failed, or with any error that occurs during canceling the source.
+     *
+     * The signal option can be set to an AbortSignal to allow aborting an ongoing pipe operation via the corresponding AbortController. In this case, this source readable stream will be canceled, and destination aborted, unless the respective options preventCancel or preventAbort are set.
+     */
+    preventClose?: boolean;
+    signal?: AbortSignal;
+}
+type ReadableStreamReadResult<R = any> = {
+    done: false;
+    value: R;
+} | {
+    done: true;
+    value?: undefined;
+};
+/**
+ * The `ReadableStream` interface of the Streams API represents a readable stream of byte data.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ReadableStream)
+ */
+interface ReadableStream<R = any> {
+    /**
+     * The **`locked`** read-only property of the ReadableStream interface returns whether or not the readable stream is locked to a reader.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ReadableStream/locked)
+     */
+    get locked(): boolean;
+    /**
+     * The **`cancel()`** method of the ReadableStream interface returns a Promise that resolves when the stream is canceled.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ReadableStream/cancel)
+     */
+    cancel(reason?: any): Promise<void>;
+    /**
+     * The **`getReader()`** method of the ReadableStream interface creates a reader and locks the stream to it.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ReadableStream/getReader)
+     */
+    getReader(): ReadableStreamDefaultReader<R>;
+    /**
+     * The **`getReader()`** method of the ReadableStream interface creates a reader and locks the stream to it.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ReadableStream/getReader)
+     */
+    getReader(options: ReadableStreamGetReaderOptions): ReadableStreamBYOBReader;
+    /**
+     * The **`pipeThrough()`** method of the ReadableStream interface provides a chainable way of piping the current stream through a transform stream or any other writable/readable pair.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ReadableStream/pipeThrough)
+     */
+    pipeThrough<T>(transform: ReadableWritablePair<T, R>, options?: StreamPipeOptions): ReadableStream<T>;
+    /**
+     * The **`pipeTo()`** method of the ReadableStream interface pipes the current `ReadableStream` to a given WritableStream and returns a Promise that fulfills when the piping process completes successfully, or rejects if any errors were encountered.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ReadableStream/pipeTo)
+     */
+    pipeTo(destination: WritableStream<R>, options?: StreamPipeOptions): Promise<void>;
+    /**
+     * The **`tee()`** method of the two-element array containing the two resulting branches as new ReadableStream instances.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ReadableStream/tee)
+     */
+    tee(): [
+        ReadableStream<R>,
+        ReadableStream<R>
+    ];
+    values(options?: ReadableStreamValuesOptions): AsyncIterableIterator<R>;
+    [Symbol.asyncIterator](options?: ReadableStreamValuesOptions): AsyncIterableIterator<R>;
+}
+/**
+ * The `ReadableStream` interface of the Streams API represents a readable stream of byte data.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ReadableStream)
+ */
+declare const ReadableStream: {
+    prototype: ReadableStream;
+    new (underlyingSource: UnderlyingByteSource, strategy?: QueuingStrategy<Uint8Array>): ReadableStream<Uint8Array>;
+    new <R = any>(underlyingSource?: UnderlyingSource<R>, strategy?: QueuingStrategy<R>): ReadableStream<R>;
+};
+/**
+ * The **`ReadableStreamDefaultReader`** interface of the Streams API represents a default reader that can be used to read stream data supplied from a network (such as a fetch request).
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ReadableStreamDefaultReader)
+ */
+declare class ReadableStreamDefaultReader<R = any> {
+    constructor(stream: ReadableStream);
+    get closed(): Promise<void>;
+    cancel(reason?: any): Promise<void>;
+    /**
+     * The **`read()`** method of the ReadableStreamDefaultReader interface returns a Promise providing access to the next chunk in the stream's internal queue.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ReadableStreamDefaultReader/read)
+     */
+    read(): Promise<ReadableStreamReadResult<R>>;
+    /**
+     * The **`releaseLock()`** method of the ReadableStreamDefaultReader interface releases the reader's lock on the stream.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ReadableStreamDefaultReader/releaseLock)
+     */
+    releaseLock(): void;
+}
+/**
+ * The `ReadableStreamBYOBReader` interface of the Streams API defines a reader for a ReadableStream that supports zero-copy reading from an underlying byte source.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ReadableStreamBYOBReader)
+ */
+declare class ReadableStreamBYOBReader {
+    constructor(stream: ReadableStream);
+    get closed(): Promise<void>;
+    cancel(reason?: any): Promise<void>;
+    /**
+     * The **`read()`** method of the ReadableStreamBYOBReader interface is used to read data into a view on a user-supplied buffer from an associated readable byte stream.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ReadableStreamBYOBReader/read)
+     */
+    read<T extends ArrayBufferView>(view: T): Promise<ReadableStreamReadResult<T>>;
+    /**
+     * The **`releaseLock()`** method of the ReadableStreamBYOBReader interface releases the reader's lock on the stream.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ReadableStreamBYOBReader/releaseLock)
+     */
+    releaseLock(): void;
+    readAtLeast<T extends ArrayBufferView>(minElements: number, view: T): Promise<ReadableStreamReadResult<T>>;
+}
+interface ReadableStreamBYOBReaderReadableStreamBYOBReaderReadOptions {
+    min?: number;
+}
+interface ReadableStreamGetReaderOptions {
+    /**
+     * Creates a ReadableStreamBYOBReader and locks the stream to the new reader.
+     *
+     * This call behaves the same way as the no-argument variant, except that it only works on readable byte streams, i.e. streams which were constructed specifically with the ability to handle "bring your own buffer" reading. The returned BYOB reader provides the ability to directly read individual chunks from the stream via its read() method, into developer-supplied buffers, allowing more precise control over allocation.
+     */
+    mode: "byob";
+}
+/**
+ * The **`ReadableStreamBYOBRequest`** interface of the Streams API represents a 'pull request' for data from an underlying source that will made as a zero-copy transfer to a consumer (bypassing the stream's internal queues).
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ReadableStreamBYOBRequest)
+ */
+declare abstract class ReadableStreamBYOBRequest {
+    /**
+     * The **`view`** getter property of the ReadableStreamBYOBRequest interface returns the current view.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ReadableStreamBYOBRequest/view)
+     */
+    get view(): Uint8Array | null;
+    /**
+     * The **`respond()`** method of the ReadableStreamBYOBRequest interface is used to signal to the associated readable byte stream that the specified number of bytes were written into the ReadableStreamBYOBRequest.view.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ReadableStreamBYOBRequest/respond)
+     */
+    respond(bytesWritten: number): void;
+    /**
+     * The **`respondWithNewView()`** method of the ReadableStreamBYOBRequest interface specifies a new view that the consumer of the associated readable byte stream should write to instead of ReadableStreamBYOBRequest.view.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ReadableStreamBYOBRequest/respondWithNewView)
+     */
+    respondWithNewView(view: ArrayBuffer | ArrayBufferView): void;
+    get atLeast(): number | null;
+}
+/**
+ * The **`ReadableStreamDefaultController`** interface of the Streams API represents a controller allowing control of a ReadableStream's state and internal queue.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ReadableStreamDefaultController)
+ */
+declare abstract class ReadableStreamDefaultController<R = any> {
+    /**
+     * The **`desiredSize`** read-only property of the required to fill the stream's internal queue.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ReadableStreamDefaultController/desiredSize)
+     */
+    get desiredSize(): number | null;
+    /**
+     * The **`close()`** method of the ReadableStreamDefaultController interface closes the associated stream.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ReadableStreamDefaultController/close)
+     */
+    close(): void;
+    /**
+     * The **`enqueue()`** method of the ```js-nolint enqueue(chunk) ``` - `chunk` - : The chunk to enqueue.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ReadableStreamDefaultController/enqueue)
+     */
+    enqueue(chunk?: R): void;
+    /**
+     * The **`error()`** method of the with the associated stream to error.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ReadableStreamDefaultController/error)
+     */
+    error(reason: any): void;
+}
+/**
+ * The **`ReadableByteStreamController`** interface of the Streams API represents a controller for a readable byte stream.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ReadableByteStreamController)
+ */
+declare abstract class ReadableByteStreamController {
+    /**
+     * The **`byobRequest`** read-only property of the ReadableByteStreamController interface returns the current BYOB request, or `null` if there are no pending requests.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ReadableByteStreamController/byobRequest)
+     */
+    get byobRequest(): ReadableStreamBYOBRequest | null;
+    /**
+     * The **`desiredSize`** read-only property of the ReadableByteStreamController interface returns the number of bytes required to fill the stream's internal queue to its 'desired size'.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ReadableByteStreamController/desiredSize)
+     */
+    get desiredSize(): number | null;
+    /**
+     * The **`close()`** method of the ReadableByteStreamController interface closes the associated stream.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ReadableByteStreamController/close)
+     */
+    close(): void;
+    /**
+     * The **`enqueue()`** method of the ReadableByteStreamController interface enqueues a given chunk on the associated readable byte stream (the chunk is copied into the stream's internal queues).
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ReadableByteStreamController/enqueue)
+     */
+    enqueue(chunk: ArrayBuffer | ArrayBufferView): void;
+    /**
+     * The **`error()`** method of the ReadableByteStreamController interface causes any future interactions with the associated stream to error with the specified reason.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ReadableByteStreamController/error)
+     */
+    error(reason: any): void;
+}
+/**
+ * The **`WritableStreamDefaultController`** interface of the Streams API represents a controller allowing control of a WritableStream's state.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/WritableStreamDefaultController)
+ */
+declare abstract class WritableStreamDefaultController {
+    /**
+     * The read-only **`signal`** property of the WritableStreamDefaultController interface returns the AbortSignal associated with the controller.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/WritableStreamDefaultController/signal)
+     */
+    get signal(): AbortSignal;
+    /**
+     * The **`error()`** method of the with the associated stream to error.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/WritableStreamDefaultController/error)
+     */
+    error(reason?: any): void;
+}
+/**
+ * The **`TransformStreamDefaultController`** interface of the Streams API provides methods to manipulate the associated ReadableStream and WritableStream.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/TransformStreamDefaultController)
+ */
+declare abstract class TransformStreamDefaultController<O = any> {
+    /**
+     * The **`desiredSize`** read-only property of the TransformStreamDefaultController interface returns the desired size to fill the queue of the associated ReadableStream.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/TransformStreamDefaultController/desiredSize)
+     */
+    get desiredSize(): number | null;
+    /**
+     * The **`enqueue()`** method of the TransformStreamDefaultController interface enqueues the given chunk in the readable side of the stream.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/TransformStreamDefaultController/enqueue)
+     */
+    enqueue(chunk?: O): void;
+    /**
+     * The **`error()`** method of the TransformStreamDefaultController interface errors both sides of the stream.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/TransformStreamDefaultController/error)
+     */
+    error(reason: any): void;
+    /**
+     * The **`terminate()`** method of the TransformStreamDefaultController interface closes the readable side and errors the writable side of the stream.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/TransformStreamDefaultController/terminate)
+     */
+    terminate(): void;
+}
+interface ReadableWritablePair<R = any, W = any> {
+    readable: ReadableStream<R>;
+    /**
+     * Provides a convenient, chainable way of piping this readable stream through a transform stream (or any other { writable, readable } pair). It simply pipes the stream into the writable side of the supplied pair, and returns the readable side for further use.
+     *
+     * Piping a stream will lock it for the duration of the pipe, preventing any other consumer from acquiring a reader.
+     */
+    writable: WritableStream<W>;
+}
+/**
+ * The **`WritableStream`** interface of the Streams API provides a standard abstraction for writing streaming data to a destination, known as a sink.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/WritableStream)
+ */
+declare class WritableStream<W = any> {
+    constructor(underlyingSink?: UnderlyingSink, queuingStrategy?: QueuingStrategy);
+    /**
+     * The **`locked`** read-only property of the WritableStream interface returns a boolean indicating whether the `WritableStream` is locked to a writer.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/WritableStream/locked)
+     */
+    get locked(): boolean;
+    /**
+     * The **`abort()`** method of the WritableStream interface aborts the stream, signaling that the producer can no longer successfully write to the stream and it is to be immediately moved to an error state, with any queued writes discarded.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/WritableStream/abort)
+     */
+    abort(reason?: any): Promise<void>;
+    /**
+     * The **`close()`** method of the WritableStream interface closes the associated stream.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/WritableStream/close)
+     */
+    close(): Promise<void>;
+    /**
+     * The **`getWriter()`** method of the WritableStream interface returns a new instance of WritableStreamDefaultWriter and locks the stream to that instance.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/WritableStream/getWriter)
+     */
+    getWriter(): WritableStreamDefaultWriter<W>;
+}
+/**
+ * The **`WritableStreamDefaultWriter`** interface of the Streams API is the object returned by WritableStream.getWriter() and once created locks the writer to the `WritableStream` ensuring that no other streams can write to the underlying sink.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/WritableStreamDefaultWriter)
+ */
+declare class WritableStreamDefaultWriter<W = any> {
+    constructor(stream: WritableStream);
+    /**
+     * The **`closed`** read-only property of the the stream errors or the writer's lock is released.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/WritableStreamDefaultWriter/closed)
+     */
+    get closed(): Promise<void>;
+    /**
+     * The **`ready`** read-only property of the that resolves when the desired size of the stream's internal queue transitions from non-positive to positive, signaling that it is no longer applying backpressure.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/WritableStreamDefaultWriter/ready)
+     */
+    get ready(): Promise<void>;
+    /**
+     * The **`desiredSize`** read-only property of the to fill the stream's internal queue.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/WritableStreamDefaultWriter/desiredSize)
+     */
+    get desiredSize(): number | null;
+    /**
+     * The **`abort()`** method of the the producer can no longer successfully write to the stream and it is to be immediately moved to an error state, with any queued writes discarded.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/WritableStreamDefaultWriter/abort)
+     */
+    abort(reason?: any): Promise<void>;
+    /**
+     * The **`close()`** method of the stream.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/WritableStreamDefaultWriter/close)
+     */
+    close(): Promise<void>;
+    /**
+     * The **`write()`** method of the operation.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/WritableStreamDefaultWriter/write)
+     */
+    write(chunk?: W): Promise<void>;
+    /**
+     * The **`releaseLock()`** method of the corresponding stream.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/WritableStreamDefaultWriter/releaseLock)
+     */
+    releaseLock(): void;
+}
+/**
+ * The **`TransformStream`** interface of the Streams API represents a concrete implementation of the pipe chain _transform stream_ concept.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/TransformStream)
+ */
+declare class TransformStream<I = any, O = any> {
+    constructor(transformer?: Transformer<I, O>, writableStrategy?: QueuingStrategy<I>, readableStrategy?: QueuingStrategy<O>);
+    /**
+     * The **`readable`** read-only property of the TransformStream interface returns the ReadableStream instance controlled by this `TransformStream`.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/TransformStream/readable)
+     */
+    get readable(): ReadableStream<O>;
+    /**
+     * The **`writable`** read-only property of the TransformStream interface returns the WritableStream instance controlled by this `TransformStream`.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/TransformStream/writable)
+     */
+    get writable(): WritableStream<I>;
+}
+declare class FixedLengthStream extends IdentityTransformStream {
+    constructor(expectedLength: number | bigint, queuingStrategy?: IdentityTransformStreamQueuingStrategy);
+}
+declare class IdentityTransformStream extends TransformStream<ArrayBuffer | ArrayBufferView, Uint8Array> {
+    constructor(queuingStrategy?: IdentityTransformStreamQueuingStrategy);
+}
+interface IdentityTransformStreamQueuingStrategy {
+    highWaterMark?: (number | bigint);
+}
+interface ReadableStreamValuesOptions {
+    preventCancel?: boolean;
+}
+/**
+ * The **`CompressionStream`** interface of the Compression Streams API is an API for compressing a stream of data.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/CompressionStream)
+ */
+declare class CompressionStream extends TransformStream<ArrayBuffer | ArrayBufferView, Uint8Array> {
+    constructor(format: "gzip" | "deflate" | "deflate-raw");
+}
+/**
+ * The **`DecompressionStream`** interface of the Compression Streams API is an API for decompressing a stream of data.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/DecompressionStream)
+ */
+declare class DecompressionStream extends TransformStream<ArrayBuffer | ArrayBufferView, Uint8Array> {
+    constructor(format: "gzip" | "deflate" | "deflate-raw");
+}
+/**
+ * The **`TextEncoderStream`** interface of the Encoding API converts a stream of strings into bytes in the UTF-8 encoding.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/TextEncoderStream)
+ */
+declare class TextEncoderStream extends TransformStream<string, Uint8Array> {
+    constructor();
+    get encoding(): string;
+}
+/**
+ * The **`TextDecoderStream`** interface of the Encoding API converts a stream of text in a binary encoding, such as UTF-8 etc., to a stream of strings.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/TextDecoderStream)
+ */
+declare class TextDecoderStream extends TransformStream<ArrayBuffer | ArrayBufferView, string> {
+    constructor(label?: string, options?: TextDecoderStreamTextDecoderStreamInit);
+    get encoding(): string;
+    get fatal(): boolean;
+    get ignoreBOM(): boolean;
+}
+interface TextDecoderStreamTextDecoderStreamInit {
+    fatal?: boolean;
+    ignoreBOM?: boolean;
+}
+/**
+ * The **`ByteLengthQueuingStrategy`** interface of the Streams API provides a built-in byte length queuing strategy that can be used when constructing streams.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ByteLengthQueuingStrategy)
+ */
+declare class ByteLengthQueuingStrategy implements QueuingStrategy<ArrayBufferView> {
+    constructor(init: QueuingStrategyInit);
+    /**
+     * The read-only **`ByteLengthQueuingStrategy.highWaterMark`** property returns the total number of bytes that can be contained in the internal queue before backpressure is applied.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/ByteLengthQueuingStrategy/highWaterMark)
+     */
+    get highWaterMark(): number;
+    /* [MDN Reference](https://developer.mozilla.org/docs/Web/API/ByteLengthQueuingStrategy/size) */
+    get size(): (chunk?: any) => number;
+}
+/**
+ * The **`CountQueuingStrategy`** interface of the Streams API provides a built-in chunk counting queuing strategy that can be used when constructing streams.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/CountQueuingStrategy)
+ */
+declare class CountQueuingStrategy implements QueuingStrategy {
+    constructor(init: QueuingStrategyInit);
+    /**
+     * The read-only **`CountQueuingStrategy.highWaterMark`** property returns the total number of chunks that can be contained in the internal queue before backpressure is applied.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/CountQueuingStrategy/highWaterMark)
+     */
+    get highWaterMark(): number;
+    /* [MDN Reference](https://developer.mozilla.org/docs/Web/API/CountQueuingStrategy/size) */
+    get size(): (chunk?: any) => number;
+}
+interface QueuingStrategyInit {
+    /**
+     * Creates a new ByteLengthQueuingStrategy with the provided high water mark.
+     *
+     * Note that the provided high water mark will not be validated ahead of time. Instead, if it is negative, NaN, or not a number, the resulting ByteLengthQueuingStrategy will cause the corresponding stream constructor to throw.
+     */
+    highWaterMark: number;
+}
+interface ScriptVersion {
+    id?: string;
+    tag?: string;
+    message?: string;
+}
+declare abstract class TailEvent extends ExtendableEvent {
+    readonly events: TraceItem[];
+    readonly traces: TraceItem[];
+}
+interface TraceItem {
+    readonly event: (TraceItemFetchEventInfo | TraceItemJsRpcEventInfo | TraceItemScheduledEventInfo | TraceItemAlarmEventInfo | TraceItemQueueEventInfo | TraceItemEmailEventInfo | TraceItemTailEventInfo | TraceItemCustomEventInfo | TraceItemHibernatableWebSocketEventInfo) | null;
+    readonly eventTimestamp: number | null;
+    readonly logs: TraceLog[];
+    readonly exceptions: TraceException[];
+    readonly diagnosticsChannelEvents: TraceDiagnosticChannelEvent[];
+    readonly scriptName: string | null;
+    readonly entrypoint?: string;
+    readonly scriptVersion?: ScriptVersion;
+    readonly dispatchNamespace?: string;
+    readonly scriptTags?: string[];
+    readonly durableObjectId?: string;
+    readonly outcome: string;
+    readonly executionModel: string;
+    readonly truncated: boolean;
+    readonly cpuTime: number;
+    readonly wallTime: number;
+}
+interface TraceItemAlarmEventInfo {
+    readonly scheduledTime: Date;
+}
+interface TraceItemCustomEventInfo {
+}
+interface TraceItemScheduledEventInfo {
+    readonly scheduledTime: number;
+    readonly cron: string;
+}
+interface TraceItemQueueEventInfo {
+    readonly queue: string;
+    readonly batchSize: number;
+}
+interface TraceItemEmailEventInfo {
+    readonly mailFrom: string;
+    readonly rcptTo: string;
+    readonly rawSize: number;
+}
+interface TraceItemTailEventInfo {
+    readonly consumedEvents: TraceItemTailEventInfoTailItem[];
+}
+interface TraceItemTailEventInfoTailItem {
+    readonly scriptName: string | null;
+}
+interface TraceItemFetchEventInfo {
+    readonly response?: TraceItemFetchEventInfoResponse;
+    readonly request: TraceItemFetchEventInfoRequest;
+}
+interface TraceItemFetchEventInfoRequest {
+    readonly cf?: any;
+    readonly headers: Record<string, string>;
+    readonly method: string;
+    readonly url: string;
+    getUnredacted(): TraceItemFetchEventInfoRequest;
+}
+interface TraceItemFetchEventInfoResponse {
+    readonly status: number;
+}
+interface TraceItemJsRpcEventInfo {
+    readonly rpcMethod: string;
+}
+interface TraceItemHibernatableWebSocketEventInfo {
+    readonly getWebSocketEvent: TraceItemHibernatableWebSocketEventInfoMessage | TraceItemHibernatableWebSocketEventInfoClose | TraceItemHibernatableWebSocketEventInfoError;
+}
+interface TraceItemHibernatableWebSocketEventInfoMessage {
+    readonly webSocketEventType: string;
+}
+interface TraceItemHibernatableWebSocketEventInfoClose {
+    readonly webSocketEventType: string;
+    readonly code: number;
+    readonly wasClean: boolean;
+}
+interface TraceItemHibernatableWebSocketEventInfoError {
+    readonly webSocketEventType: string;
+}
+interface TraceLog {
+    readonly timestamp: number;
+    readonly level: string;
+    readonly message: any;
+}
+interface TraceException {
+    readonly timestamp: number;
+    readonly message: string;
+    readonly name: string;
+    readonly stack?: string;
+}
+interface TraceDiagnosticChannelEvent {
+    readonly timestamp: number;
+    readonly channel: string;
+    readonly message: any;
+}
+interface TraceMetrics {
+    readonly cpuTime: number;
+    readonly wallTime: number;
+}
+interface UnsafeTraceMetrics {
+    fromTrace(item: TraceItem): TraceMetrics;
+}
+/**
+ * The **`URL`** interface is used to parse, construct, normalize, and encode URL.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URL)
+ */
+declare class URL {
+    constructor(url: string | URL, base?: string | URL);
+    /**
+     * The **`origin`** read-only property of the URL interface returns a string containing the Unicode serialization of the origin of the represented URL.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URL/origin)
+     */
+    get origin(): string;
+    /**
+     * The **`href`** property of the URL interface is a string containing the whole URL.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URL/href)
+     */
+    get href(): string;
+    /**
+     * The **`href`** property of the URL interface is a string containing the whole URL.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URL/href)
+     */
+    set href(value: string);
+    /**
+     * The **`protocol`** property of the URL interface is a string containing the protocol or scheme of the URL, including the final `':'`.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URL/protocol)
+     */
+    get protocol(): string;
+    /**
+     * The **`protocol`** property of the URL interface is a string containing the protocol or scheme of the URL, including the final `':'`.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URL/protocol)
+     */
+    set protocol(value: string);
+    /**
+     * The **`username`** property of the URL interface is a string containing the username component of the URL.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URL/username)
+     */
+    get username(): string;
+    /**
+     * The **`username`** property of the URL interface is a string containing the username component of the URL.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URL/username)
+     */
+    set username(value: string);
+    /**
+     * The **`password`** property of the URL interface is a string containing the password component of the URL.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URL/password)
+     */
+    get password(): string;
+    /**
+     * The **`password`** property of the URL interface is a string containing the password component of the URL.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URL/password)
+     */
+    set password(value: string);
+    /**
+     * The **`host`** property of the URL interface is a string containing the host, which is the URL.hostname, and then, if the port of the URL is nonempty, a `':'`, followed by the URL.port of the URL.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URL/host)
+     */
+    get host(): string;
+    /**
+     * The **`host`** property of the URL interface is a string containing the host, which is the URL.hostname, and then, if the port of the URL is nonempty, a `':'`, followed by the URL.port of the URL.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URL/host)
+     */
+    set host(value: string);
+    /**
+     * The **`hostname`** property of the URL interface is a string containing either the domain name or IP address of the URL.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URL/hostname)
+     */
+    get hostname(): string;
+    /**
+     * The **`hostname`** property of the URL interface is a string containing either the domain name or IP address of the URL.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URL/hostname)
+     */
+    set hostname(value: string);
+    /**
+     * The **`port`** property of the URL interface is a string containing the port number of the URL.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URL/port)
+     */
+    get port(): string;
+    /**
+     * The **`port`** property of the URL interface is a string containing the port number of the URL.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URL/port)
+     */
+    set port(value: string);
+    /**
+     * The **`pathname`** property of the URL interface represents a location in a hierarchical structure.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URL/pathname)
+     */
+    get pathname(): string;
+    /**
+     * The **`pathname`** property of the URL interface represents a location in a hierarchical structure.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URL/pathname)
+     */
+    set pathname(value: string);
+    /**
+     * The **`search`** property of the URL interface is a search string, also called a _query string_, that is a string containing a `'?'` followed by the parameters of the URL.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URL/search)
+     */
+    get search(): string;
+    /**
+     * The **`search`** property of the URL interface is a search string, also called a _query string_, that is a string containing a `'?'` followed by the parameters of the URL.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URL/search)
+     */
+    set search(value: string);
+    /**
+     * The **`hash`** property of the URL interface is a string containing a `'#'` followed by the fragment identifier of the URL.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URL/hash)
+     */
+    get hash(): string;
+    /**
+     * The **`hash`** property of the URL interface is a string containing a `'#'` followed by the fragment identifier of the URL.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URL/hash)
+     */
+    set hash(value: string);
+    /**
+     * The **`searchParams`** read-only property of the access to the [MISSING: httpmethod('GET')] decoded query arguments contained in the URL.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URL/searchParams)
+     */
+    get searchParams(): URLSearchParams;
+    /**
+     * The **`toJSON()`** method of the URL interface returns a string containing a serialized version of the URL, although in practice it seems to have the same effect as ```js-nolint toJSON() ``` None.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URL/toJSON)
+     */
+    toJSON(): string;
+    /*function toString() { [native code] }*/
+    toString(): string;
+    /**
+     * The **`URL.canParse()`** static method of the URL interface returns a boolean indicating whether or not an absolute URL, or a relative URL combined with a base URL, are parsable and valid.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URL/canParse_static)
+     */
+    static canParse(url: string, base?: string): boolean;
+    /**
+     * The **`URL.parse()`** static method of the URL interface returns a newly created URL object representing the URL defined by the parameters.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URL/parse_static)
+     */
+    static parse(url: string, base?: string): URL | null;
+    /**
+     * The **`createObjectURL()`** static method of the URL interface creates a string containing a URL representing the object given in the parameter.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URL/createObjectURL_static)
+     */
+    static createObjectURL(object: File | Blob): string;
+    /**
+     * The **`revokeObjectURL()`** static method of the URL interface releases an existing object URL which was previously created by calling Call this method when you've finished using an object URL to let the browser know not to keep the reference to the file any longer.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URL/revokeObjectURL_static)
+     */
+    static revokeObjectURL(object_url: string): void;
+}
+/**
+ * The **`URLSearchParams`** interface defines utility methods to work with the query string of a URL.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URLSearchParams)
+ */
+declare class URLSearchParams {
+    constructor(init?: (Iterable<Iterable<string>> | Record<string, string> | string));
+    /**
+     * The **`size`** read-only property of the URLSearchParams interface indicates the total number of search parameter entries.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URLSearchParams/size)
+     */
+    get size(): number;
+    /**
+     * The **`append()`** method of the URLSearchParams interface appends a specified key/value pair as a new search parameter.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URLSearchParams/append)
+     */
+    append(name: string, value: string): void;
+    /**
+     * The **`delete()`** method of the URLSearchParams interface deletes specified parameters and their associated value(s) from the list of all search parameters.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URLSearchParams/delete)
+     */
+    delete(name: string, value?: string): void;
+    /**
+     * The **`get()`** method of the URLSearchParams interface returns the first value associated to the given search parameter.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URLSearchParams/get)
+     */
+    get(name: string): string | null;
+    /**
+     * The **`getAll()`** method of the URLSearchParams interface returns all the values associated with a given search parameter as an array.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URLSearchParams/getAll)
+     */
+    getAll(name: string): string[];
+    /**
+     * The **`has()`** method of the URLSearchParams interface returns a boolean value that indicates whether the specified parameter is in the search parameters.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URLSearchParams/has)
+     */
+    has(name: string, value?: string): boolean;
+    /**
+     * The **`set()`** method of the URLSearchParams interface sets the value associated with a given search parameter to the given value.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URLSearchParams/set)
+     */
+    set(name: string, value: string): void;
+    /**
+     * The **`URLSearchParams.sort()`** method sorts all key/value pairs contained in this object in place and returns `undefined`.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/URLSearchParams/sort)
+     */
+    sort(): void;
+    /* Returns an array of key, value pairs for every entry in the search params. */
+    entries(): IterableIterator<[
+        key: string,
+        value: string
+    ]>;
+    /* Returns a list of keys in the search params. */
+    keys(): IterableIterator<string>;
+    /* Returns a list of values in the search params. */
+    values(): IterableIterator<string>;
+    forEach<This = unknown>(callback: (this: This, value: string, key: string, parent: URLSearchParams) => void, thisArg?: This): void;
+    /*function toString() { [native code] }*/
+    toString(): string;
+    [Symbol.iterator](): IterableIterator<[
+        key: string,
+        value: string
+    ]>;
+}
+declare class URLPattern {
+    constructor(input?: (string | URLPatternInit), baseURL?: (string | URLPatternOptions), patternOptions?: URLPatternOptions);
+    get protocol(): string;
+    get username(): string;
+    get password(): string;
+    get hostname(): string;
+    get port(): string;
+    get pathname(): string;
+    get search(): string;
+    get hash(): string;
+    get hasRegExpGroups(): boolean;
+    test(input?: (string | URLPatternInit), baseURL?: string): boolean;
+    exec(input?: (string | URLPatternInit), baseURL?: string): URLPatternResult | null;
+}
+interface URLPatternInit {
+    protocol?: string;
+    username?: string;
+    password?: string;
+    hostname?: string;
+    port?: string;
+    pathname?: string;
+    search?: string;
+    hash?: string;
+    baseURL?: string;
+}
+interface URLPatternComponentResult {
+    input: string;
+    groups: Record<string, string>;
+}
+interface URLPatternResult {
+    inputs: (string | URLPatternInit)[];
+    protocol: URLPatternComponentResult;
+    username: URLPatternComponentResult;
+    password: URLPatternComponentResult;
+    hostname: URLPatternComponentResult;
+    port: URLPatternComponentResult;
+    pathname: URLPatternComponentResult;
+    search: URLPatternComponentResult;
+    hash: URLPatternComponentResult;
+}
+interface URLPatternOptions {
+    ignoreCase?: boolean;
+}
+/**
+ * A `CloseEvent` is sent to clients using WebSockets when the connection is closed.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/CloseEvent)
+ */
+declare class CloseEvent extends Event {
+    constructor(type: string, initializer?: CloseEventInit);
+    /**
+     * The **`code`** read-only property of the CloseEvent interface returns a WebSocket connection close code indicating the reason the connection was closed.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/CloseEvent/code)
+     */
+    readonly code: number;
+    /**
+     * The **`reason`** read-only property of the CloseEvent interface returns the WebSocket connection close reason the server gave for closing the connection; that is, a concise human-readable prose explanation for the closure.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/CloseEvent/reason)
+     */
+    readonly reason: string;
+    /**
+     * The **`wasClean`** read-only property of the CloseEvent interface returns `true` if the connection closed cleanly.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/CloseEvent/wasClean)
+     */
+    readonly wasClean: boolean;
+}
+interface CloseEventInit {
+    code?: number;
+    reason?: string;
+    wasClean?: boolean;
+}
+type WebSocketEventMap = {
+    close: CloseEvent;
+    message: MessageEvent;
+    open: Event;
+    error: ErrorEvent;
+};
+/**
+ * The `WebSocket` object provides the API for creating and managing a WebSocket connection to a server, as well as for sending and receiving data on the connection.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/WebSocket)
+ */
+declare var WebSocket: {
+    prototype: WebSocket;
+    new (url: string, protocols?: (string[] | string)): WebSocket;
+    readonly READY_STATE_CONNECTING: number;
+    readonly CONNECTING: number;
+    readonly READY_STATE_OPEN: number;
+    readonly OPEN: number;
+    readonly READY_STATE_CLOSING: number;
+    readonly CLOSING: number;
+    readonly READY_STATE_CLOSED: number;
+    readonly CLOSED: number;
+};
+/**
+ * The `WebSocket` object provides the API for creating and managing a WebSocket connection to a server, as well as for sending and receiving data on the connection.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/WebSocket)
+ */
+interface WebSocket extends EventTarget<WebSocketEventMap> {
+    accept(): void;
+    /**
+     * The **`WebSocket.send()`** method enqueues the specified data to be transmitted to the server over the WebSocket connection, increasing the value of `bufferedAmount` by the number of bytes needed to contain the data.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/WebSocket/send)
+     */
+    send(message: (ArrayBuffer | ArrayBufferView) | string): void;
+    /**
+     * The **`WebSocket.close()`** method closes the already `CLOSED`, this method does nothing.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/WebSocket/close)
+     */
+    close(code?: number, reason?: string): void;
+    serializeAttachment(attachment: any): void;
+    deserializeAttachment(): any | null;
+    /**
+     * The **`WebSocket.readyState`** read-only property returns the current state of the WebSocket connection.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/WebSocket/readyState)
+     */
+    readyState: number;
+    /**
+     * The **`WebSocket.url`** read-only property returns the absolute URL of the WebSocket as resolved by the constructor.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/WebSocket/url)
+     */
+    url: string | null;
+    /**
+     * The **`WebSocket.protocol`** read-only property returns the name of the sub-protocol the server selected; this will be one of the strings specified in the `protocols` parameter when creating the WebSocket object, or the empty string if no connection is established.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/WebSocket/protocol)
+     */
+    protocol: string | null;
+    /**
+     * The **`WebSocket.extensions`** read-only property returns the extensions selected by the server.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/WebSocket/extensions)
+     */
+    extensions: string | null;
+}
+declare const WebSocketPair: {
+    new (): {
+        0: WebSocket;
+        1: WebSocket;
+    };
+};
+interface SqlStorage {
+    exec<T extends Record<string, SqlStorageValue>>(query: string, ...bindings: any[]): SqlStorageCursor<T>;
+    get databaseSize(): number;
+    Cursor: typeof SqlStorageCursor;
+    Statement: typeof SqlStorageStatement;
+}
+declare abstract class SqlStorageStatement {
+}
+type SqlStorageValue = ArrayBuffer | string | number | null;
+declare abstract class SqlStorageCursor<T extends Record<string, SqlStorageValue>> {
+    next(): {
+        done?: false;
+        value: T;
+    } | {
+        done: true;
+        value?: never;
+    };
+    toArray(): T[];
+    one(): T;
+    raw<U extends SqlStorageValue[]>(): IterableIterator<U>;
+    columnNames: string[];
+    get rowsRead(): number;
+    get rowsWritten(): number;
+    [Symbol.iterator](): IterableIterator<T>;
+}
+interface Socket {
+    get readable(): ReadableStream;
+    get writable(): WritableStream;
+    get closed(): Promise<void>;
+    get opened(): Promise<SocketInfo>;
+    get upgraded(): boolean;
+    get secureTransport(): "on" | "off" | "starttls";
+    close(): Promise<void>;
+    startTls(options?: TlsOptions): Socket;
+}
+interface SocketOptions {
+    secureTransport?: string;
+    allowHalfOpen: boolean;
+    highWaterMark?: (number | bigint);
+}
+interface SocketAddress {
+    hostname: string;
+    port: number;
+}
+interface TlsOptions {
+    expectedServerHostname?: string;
+}
+interface SocketInfo {
+    remoteAddress?: string;
+    localAddress?: string;
+}
+/**
+ * The **`EventSource`** interface is web content's interface to server-sent events.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/EventSource)
+ */
+declare class EventSource extends EventTarget {
+    constructor(url: string, init?: EventSourceEventSourceInit);
+    /**
+     * The **`close()`** method of the EventSource interface closes the connection, if one is made, and sets the ```js-nolint close() ``` None.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/EventSource/close)
+     */
+    close(): void;
+    /**
+     * The **`url`** read-only property of the URL of the source.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/EventSource/url)
+     */
+    get url(): string;
+    /**
+     * The **`withCredentials`** read-only property of the the `EventSource` object was instantiated with CORS credentials set.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/EventSource/withCredentials)
+     */
+    get withCredentials(): boolean;
+    /**
+     * The **`readyState`** read-only property of the connection.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/EventSource/readyState)
+     */
+    get readyState(): number;
+    /* [MDN Reference](https://developer.mozilla.org/docs/Web/API/EventSource/open_event) */
+    get onopen(): any | null;
+    /* [MDN Reference](https://developer.mozilla.org/docs/Web/API/EventSource/open_event) */
+    set onopen(value: any | null);
+    /* [MDN Reference](https://developer.mozilla.org/docs/Web/API/EventSource/message_event) */
+    get onmessage(): any | null;
+    /* [MDN Reference](https://developer.mozilla.org/docs/Web/API/EventSource/message_event) */
+    set onmessage(value: any | null);
+    /* [MDN Reference](https://developer.mozilla.org/docs/Web/API/EventSource/error_event) */
+    get onerror(): any | null;
+    /* [MDN Reference](https://developer.mozilla.org/docs/Web/API/EventSource/error_event) */
+    set onerror(value: any | null);
+    static readonly CONNECTING: number;
+    static readonly OPEN: number;
+    static readonly CLOSED: number;
+    static from(stream: ReadableStream): EventSource;
+}
+interface EventSourceEventSourceInit {
+    withCredentials?: boolean;
+    fetcher?: Fetcher;
+}
+interface Container {
+    get running(): boolean;
+    start(options?: ContainerStartupOptions): void;
+    monitor(): Promise<void>;
+    destroy(error?: any): Promise<void>;
+    signal(signo: number): void;
+    getTcpPort(port: number): Fetcher;
+    setInactivityTimeout(durationMs: number | bigint): Promise<void>;
+}
+interface ContainerStartupOptions {
+    entrypoint?: string[];
+    enableInternet: boolean;
+    env?: Record<string, string>;
+    hardTimeout?: (number | bigint);
+}
+/**
+ * The **`MessagePort`** interface of the Channel Messaging API represents one of the two ports of a MessageChannel, allowing messages to be sent from one port and listening out for them arriving at the other.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/MessagePort)
+ */
+declare abstract class MessagePort extends EventTarget {
+    /**
+     * The **`postMessage()`** method of the transfers ownership of objects to other browsing contexts.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/MessagePort/postMessage)
+     */
+    postMessage(data?: any, options?: (any[] | MessagePortPostMessageOptions)): void;
+    /**
+     * The **`close()`** method of the MessagePort interface disconnects the port, so it is no longer active.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/MessagePort/close)
+     */
+    close(): void;
+    /**
+     * The **`start()`** method of the MessagePort interface starts the sending of messages queued on the port.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/MessagePort/start)
+     */
+    start(): void;
+    get onmessage(): any | null;
+    set onmessage(value: any | null);
+}
+/**
+ * The **`MessageChannel`** interface of the Channel Messaging API allows us to create a new message channel and send data through it via its two MessagePort properties.
+ *
+ * [MDN Reference](https://developer.mozilla.org/docs/Web/API/MessageChannel)
+ */
+declare class MessageChannel {
+    constructor();
+    /**
+     * The **`port1`** read-only property of the the port attached to the context that originated the channel.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/MessageChannel/port1)
+     */
+    readonly port1: MessagePort;
+    /**
+     * The **`port2`** read-only property of the the port attached to the context at the other end of the channel, which the message is initially sent to.
+     *
+     * [MDN Reference](https://developer.mozilla.org/docs/Web/API/MessageChannel/port2)
+     */
+    readonly port2: MessagePort;
+}
+interface MessagePortPostMessageOptions {
+    transfer?: any[];
+}
+type LoopbackForExport<T extends (new (...args: any[]) => Rpc.EntrypointBranded) | ExportedHandler<any, any, any> | undefined = undefined> = T extends new (...args: any[]) => Rpc.WorkerEntrypointBranded ? LoopbackServiceStub<InstanceType<T>> : T extends new (...args: any[]) => Rpc.DurableObjectBranded ? LoopbackDurableObjectClass<InstanceType<T>> : T extends ExportedHandler<any, any, any> ? LoopbackServiceStub<undefined> : undefined;
+type LoopbackServiceStub<T extends Rpc.WorkerEntrypointBranded | undefined = undefined> = Fetcher<T> & (T extends CloudflareWorkersModule.WorkerEntrypoint<any, infer Props> ? (opts: {
+    props?: Props;
+}) => Fetcher<T> : (opts: {
+    props?: any;
+}) => Fetcher<T>);
+type LoopbackDurableObjectClass<T extends Rpc.DurableObjectBranded | undefined = undefined> = DurableObjectClass<T> & (T extends CloudflareWorkersModule.DurableObject<any, infer Props> ? (opts: {
+    props?: Props;
+}) => DurableObjectClass<T> : (opts: {
+    props?: any;
+}) => DurableObjectClass<T>);
+interface SyncKvStorage {
+    get<T = unknown>(key: string): T | undefined;
+    list<T = unknown>(options?: SyncKvListOptions): Iterable<[
+        string,
+        T
+    ]>;
+    put<T>(key: string, value: T): void;
+    delete(key: string): boolean;
+}
+interface SyncKvListOptions {
+    start?: string;
+    startAfter?: string;
+    end?: string;
+    prefix?: string;
+    reverse?: boolean;
+    limit?: number;
+}
+interface WorkerStub {
+    getEntrypoint<T extends Rpc.WorkerEntrypointBranded | undefined>(name?: string, options?: WorkerStubEntrypointOptions): Fetcher<T>;
+}
+interface WorkerStubEntrypointOptions {
+    props?: any;
+}
+interface WorkerLoader {
+    get(name: string | null, getCode: () => WorkerLoaderWorkerCode | Promise<WorkerLoaderWorkerCode>): WorkerStub;
+}
+interface WorkerLoaderModule {
+    js?: string;
+    cjs?: string;
+    text?: string;
+    data?: ArrayBuffer;
+    json?: any;
+    py?: string;
+    wasm?: ArrayBuffer;
+}
+interface WorkerLoaderWorkerCode {
+    compatibilityDate: string;
+    compatibilityFlags?: string[];
+    allowExperimental?: boolean;
+    mainModule: string;
+    modules: Record<string, WorkerLoaderModule | string>;
+    env?: any;
+    globalOutbound?: (Fetcher | null);
+    tails?: Fetcher[];
+    streamingTails?: Fetcher[];
+}
+/**
+* The Workers runtime supports a subset of the Performance API, used to measure timing and performance,
+* as well as timing of subrequests and other operations.
+*
+* [Cloudflare Docs Reference](https://developers.cloudflare.com/workers/runtime-apis/performance/)
+*/
+declare abstract class Performance {
+    /* [Cloudflare Docs Reference](https://developers.cloudflare.com/workers/runtime-apis/performance/#performancetimeorigin) */
+    get timeOrigin(): number;
+    /* [Cloudflare Docs Reference](https://developers.cloudflare.com/workers/runtime-apis/performance/#performancenow) */
+    now(): number;
+}
+// AI Search V2 API Error Interfaces
+interface AiSearchInternalError extends Error {
+}
+interface AiSearchNotFoundError extends Error {
+}
+interface AiSearchNameNotSetError extends Error {
+}
+// Filter types (shared with AutoRAG for compatibility)
+type ComparisonFilter = {
+    key: string;
+    type: 'eq' | 'ne' | 'gt' | 'gte' | 'lt' | 'lte';
+    value: string | number | boolean;
+};
+type CompoundFilter = {
+    type: 'and' | 'or';
+    filters: ComparisonFilter[];
+};
+// AI Search V2 Request Types
+type AiSearchSearchRequest = {
+    messages: Array<{
+        role: 'system' | 'developer' | 'user' | 'assistant' | 'tool';
+        content: string | null;
+    }>;
+    ai_search_options?: {
+        retrieval?: {
+            retrieval_type?: 'vector' | 'keyword' | 'hybrid';
+            /** Match threshold (0-1, default 0.4) */
+            match_threshold?: number;
+            /** Maximum number of results (1-50, default 10) */
+            max_num_results?: number;
+            filters?: CompoundFilter | ComparisonFilter;
+            /** Context expansion (0-3, default 0) */
+            context_expansion?: number;
+            [key: string]: unknown;
+        };
+        query_rewrite?: {
+            enabled?: boolean;
+            model?: string;
+            rewrite_prompt?: string;
+            [key: string]: unknown;
+        };
+        reranking?: {
+            /** Enable reranking (default false) */
+            enabled?: boolean;
+            model?: '@cf/baai/bge-reranker-base' | '';
+            /** Match threshold (0-1, default 0.4) */
+            match_threshold?: number;
+            [key: string]: unknown;
+        };
+        [key: string]: unknown;
+    };
+};
+type AiSearchChatCompletionsRequest = {
+    messages: Array<{
+        role: 'system' | 'developer' | 'user' | 'assistant' | 'tool';
+        content: string | null;
+    }>;
+    model?: string;
+    stream?: boolean;
+    ai_search_options?: {
+        retrieval?: {
+            retrieval_type?: 'vector' | 'keyword' | 'hybrid';
+            match_threshold?: number;
+            max_num_results?: number;
+            filters?: CompoundFilter | ComparisonFilter;
+            context_expansion?: number;
+            [key: string]: unknown;
+        };
+        query_rewrite?: {
+            enabled?: boolean;
+            model?: string;
+            rewrite_prompt?: string;
+            [key: string]: unknown;
+        };
+        reranking?: {
+            enabled?: boolean;
+            model?: '@cf/baai/bge-reranker-base' | '';
+            match_threshold?: number;
+            [key: string]: unknown;
+        };
+        [key: string]: unknown;
+    };
+    [key: string]: unknown;
+};
+// AI Search V2 Response Types
+type AiSearchSearchResponse = {
+    search_query: string;
+    chunks: Array<{
+        id: string;
+        type: string;
+        /** Match score (0-1) */
+        score: number;
+        text: string;
+        item: {
+            timestamp?: number;
+            key: string;
+            metadata?: Record<string, unknown>;
+        };
+        scoring_details?: {
+            /** Keyword match score (0-1) */
+            keyword_score?: number;
+            /** Vector similarity score (0-1) */
+            vector_score?: number;
+        };
+    }>;
+};
+type AiSearchListResponse = Array<{
+    id: string;
+    internal_id?: string;
+    account_id?: string;
+    account_tag?: string;
+    /** Whether the instance is enabled (default true) */
+    enable?: boolean;
+    type?: 'r2' | 'web-crawler';
+    source?: string;
+    [key: string]: unknown;
+}>;
+type AiSearchConfig = {
+    /** Instance ID (1-32 chars, pattern: ^[a-z0-9_]+(?:-[a-z0-9_]+)*$) */
+    id: string;
+    type: 'r2' | 'web-crawler';
+    source: string;
+    source_params?: object;
+    /** Token ID (UUID format) */
+    token_id?: string;
+    ai_gateway_id?: string;
+    /** Enable query rewriting (default false) */
+    rewrite_query?: boolean;
+    /** Enable reranking (default false) */
+    reranking?: boolean;
+    embedding_model?: string;
+    ai_search_model?: string;
+};
+type AiSearchInstance = {
+    id: string;
+    enable?: boolean;
+    type?: 'r2' | 'web-crawler';
+    source?: string;
+    [key: string]: unknown;
+};
+// AI Search Instance Service - Instance-level operations
+declare abstract class AiSearchInstanceService {
+    /**
+     * Search the AI Search instance for relevant chunks.
+     * @param params Search request with messages and AI search options
+     * @returns Search response with matching chunks
+     */
+    search(params: AiSearchSearchRequest): Promise<AiSearchSearchResponse>;
+    /**
+     * Generate chat completions with AI Search context.
+     * @param params Chat completions request with optional streaming
+     * @returns Response object (if streaming) or chat completion result
+     */
+    chatCompletions(params: AiSearchChatCompletionsRequest): Promise<Response | object>;
+    /**
+     * Delete this AI Search instance.
+     */
+    delete(): Promise<void>;
+}
+// AI Search Account Service - Account-level operations
+declare abstract class AiSearchAccountService {
+    /**
+     * List all AI Search instances in the account.
+     * @returns Array of AI Search instances
+     */
+    list(): Promise<AiSearchListResponse>;
+    /**
+     * Get an AI Search instance by ID.
+     * @param name Instance ID
+     * @returns Instance service for performing operations
+     */
+    get(name: string): AiSearchInstanceService;
+    /**
+     * Create a new AI Search instance.
+     * @param config Instance configuration
+     * @returns Instance service for performing operations
+     */
+    create(config: AiSearchConfig): Promise<AiSearchInstanceService>;
+}
+type AiImageClassificationInput = {
+    image: number[];
+};
+type AiImageClassificationOutput = {
+    score?: number;
+    label?: string;
+}[];
+declare abstract class BaseAiImageClassification {
+    inputs: AiImageClassificationInput;
+    postProcessedOutputs: AiImageClassificationOutput;
+}
+type AiImageToTextInput = {
+    image: number[];
+    prompt?: string;
+    max_tokens?: number;
+    temperature?: number;
+    top_p?: number;
+    top_k?: number;
+    seed?: number;
+    repetition_penalty?: number;
+    frequency_penalty?: number;
+    presence_penalty?: number;
+    raw?: boolean;
+    messages?: RoleScopedChatInput[];
+};
+type AiImageToTextOutput = {
+    description: string;
+};
+declare abstract class BaseAiImageToText {
+    inputs: AiImageToTextInput;
+    postProcessedOutputs: AiImageToTextOutput;
+}
+type AiImageTextToTextInput = {
+    image: string;
+    prompt?: string;
+    max_tokens?: number;
+    temperature?: number;
+    ignore_eos?: boolean;
+    top_p?: number;
+    top_k?: number;
+    seed?: number;
+    repetition_penalty?: number;
+    frequency_penalty?: number;
+    presence_penalty?: number;
+    raw?: boolean;
+    messages?: RoleScopedChatInput[];
+};
+type AiImageTextToTextOutput = {
+    description: string;
+};
+declare abstract class BaseAiImageTextToText {
+    inputs: AiImageTextToTextInput;
+    postProcessedOutputs: AiImageTextToTextOutput;
+}
+type AiMultimodalEmbeddingsInput = {
+    image: string;
+    text: string[];
+};
+type AiIMultimodalEmbeddingsOutput = {
+    data: number[][];
+    shape: number[];
+};
+declare abstract class BaseAiMultimodalEmbeddings {
+    inputs: AiImageTextToTextInput;
+    postProcessedOutputs: AiImageTextToTextOutput;
+}
+type AiObjectDetectionInput = {
+    image: number[];
+};
+type AiObjectDetectionOutput = {
+    score?: number;
+    label?: string;
+}[];
+declare abstract class BaseAiObjectDetection {
+    inputs: AiObjectDetectionInput;
+    postProcessedOutputs: AiObjectDetectionOutput;
+}
+type AiSentenceSimilarityInput = {
+    source: string;
+    sentences: string[];
+};
+type AiSentenceSimilarityOutput = number[];
+declare abstract class BaseAiSentenceSimilarity {
+    inputs: AiSentenceSimilarityInput;
+    postProcessedOutputs: AiSentenceSimilarityOutput;
+}
+type AiAutomaticSpeechRecognitionInput = {
+    audio: number[];
+};
+type AiAutomaticSpeechRecognitionOutput = {
+    text?: string;
+    words?: {
+        word: string;
+        start: number;
+        end: number;
+    }[];
+    vtt?: string;
+};
+declare abstract class BaseAiAutomaticSpeechRecognition {
+    inputs: AiAutomaticSpeechRecognitionInput;
+    postProcessedOutputs: AiAutomaticSpeechRecognitionOutput;
+}
+type AiSummarizationInput = {
+    input_text: string;
+    max_length?: number;
+};
+type AiSummarizationOutput = {
+    summary: string;
+};
+declare abstract class BaseAiSummarization {
+    inputs: AiSummarizationInput;
+    postProcessedOutputs: AiSummarizationOutput;
+}
+type AiTextClassificationInput = {
+    text: string;
+};
+type AiTextClassificationOutput = {
+    score?: number;
+    label?: string;
+}[];
+declare abstract class BaseAiTextClassification {
+    inputs: AiTextClassificationInput;
+    postProcessedOutputs: AiTextClassificationOutput;
+}
+type AiTextEmbeddingsInput = {
+    text: string | string[];
+};
+type AiTextEmbeddingsOutput = {
+    shape: number[];
+    data: number[][];
+};
+declare abstract class BaseAiTextEmbeddings {
+    inputs: AiTextEmbeddingsInput;
+    postProcessedOutputs: AiTextEmbeddingsOutput;
+}
+type RoleScopedChatInput = {
+    role: "user" | "assistant" | "system" | "tool" | (string & NonNullable<unknown>);
+    content: string;
+    name?: string;
+};
+type AiTextGenerationToolLegacyInput = {
+    name: string;
+    description: string;
+    parameters?: {
+        type: "object" | (string & NonNullable<unknown>);
+        properties: {
+            [key: string]: {
+                type: string;
+                description?: string;
+            };
+        };
+        required: string[];
+    };
+};
+type AiTextGenerationToolInput = {
+    type: "function" | (string & NonNullable<unknown>);
+    function: {
+        name: string;
+        description: string;
+        parameters?: {
+            type: "object" | (string & NonNullable<unknown>);
+            properties: {
+                [key: string]: {
+                    type: string;
+                    description?: string;
+                };
+            };
+            required: string[];
+        };
+    };
+};
+type AiTextGenerationFunctionsInput = {
+    name: string;
+    code: string;
+};
+type AiTextGenerationResponseFormat = {
+    type: string;
+    json_schema?: any;
+};
+type AiTextGenerationInput = {
+    prompt?: string;
+    raw?: boolean;
+    stream?: boolean;
+    max_tokens?: number;
+    temperature?: number;
+    top_p?: number;
+    top_k?: number;
+    seed?: number;
+    repetition_penalty?: number;
+    frequency_penalty?: number;
+    presence_penalty?: number;
+    messages?: RoleScopedChatInput[];
+    response_format?: AiTextGenerationResponseFormat;
+    tools?: AiTextGenerationToolInput[] | AiTextGenerationToolLegacyInput[] | (object & NonNullable<unknown>);
+    functions?: AiTextGenerationFunctionsInput[];
+};
+type AiTextGenerationToolLegacyOutput = {
+    name: string;
+    arguments: unknown;
+};
+type AiTextGenerationToolOutput = {
+    id: string;
+    type: "function";
+    function: {
+        name: string;
+        arguments: string;
+    };
+};
+type UsageTags = {
+    prompt_tokens: number;
+    completion_tokens: number;
+    total_tokens: number;
+};
+type AiTextGenerationOutput = {
+    response?: string;
+    tool_calls?: AiTextGenerationToolLegacyOutput[] & AiTextGenerationToolOutput[];
+    usage?: UsageTags;
+};
+declare abstract class BaseAiTextGeneration {
+    inputs: AiTextGenerationInput;
+    postProcessedOutputs: AiTextGenerationOutput;
+}
+type AiTextToSpeechInput = {
+    prompt: string;
+    lang?: string;
+};
+type AiTextToSpeechOutput = Uint8Array | {
+    audio: string;
+};
+declare abstract class BaseAiTextToSpeech {
+    inputs: AiTextToSpeechInput;
+    postProcessedOutputs: AiTextToSpeechOutput;
+}
+type AiTextToImageInput = {
+    prompt: string;
+    negative_prompt?: string;
+    height?: number;
+    width?: number;
+    image?: number[];
+    image_b64?: string;
+    mask?: number[];
+    num_steps?: number;
+    strength?: number;
+    guidance?: number;
+    seed?: number;
+};
+type AiTextToImageOutput = ReadableStream<Uint8Array>;
+declare abstract class BaseAiTextToImage {
+    inputs: AiTextToImageInput;
+    postProcessedOutputs: AiTextToImageOutput;
+}
+type AiTranslationInput = {
+    text: string;
+    target_lang: string;
+    source_lang?: string;
+};
+type AiTranslationOutput = {
+    translated_text?: string;
+};
+declare abstract class BaseAiTranslation {
+    inputs: AiTranslationInput;
+    postProcessedOutputs: AiTranslationOutput;
+}
+/**
+ * Workers AI support for OpenAI's Responses API
+ * Reference: https://github.com/openai/openai-node/blob/master/src/resources/responses/responses.ts
+ *
+ * It's a stripped down version from its source.
+ * It currently supports basic function calling, json mode and accepts images as input.
+ *
+ * It does not include types for WebSearch, CodeInterpreter, FileInputs, MCP, CustomTools.
+ * We plan to add those incrementally as model + platform capabilities evolve.
+ */
+type ResponsesInput = {
+    background?: boolean | null;
+    conversation?: string | ResponseConversationParam | null;
+    include?: Array<ResponseIncludable> | null;
+    input?: string | ResponseInput;
+    instructions?: string | null;
+    max_output_tokens?: number | null;
+    parallel_tool_calls?: boolean | null;
+    previous_response_id?: string | null;
+    prompt_cache_key?: string;
+    reasoning?: Reasoning | null;
+    safety_identifier?: string;
+    service_tier?: "auto" | "default" | "flex" | "scale" | "priority" | null;
+    stream?: boolean | null;
+    stream_options?: StreamOptions | null;
+    temperature?: number | null;
+    text?: ResponseTextConfig;
+    tool_choice?: ToolChoiceOptions | ToolChoiceFunction;
+    tools?: Array<Tool>;
+    top_p?: number | null;
+    truncation?: "auto" | "disabled" | null;
+};
+type ResponsesOutput = {
+    id?: string;
+    created_at?: number;
+    output_text?: string;
+    error?: ResponseError | null;
+    incomplete_details?: ResponseIncompleteDetails | null;
+    instructions?: string | Array<ResponseInputItem> | null;
+    object?: "response";
+    output?: Array<ResponseOutputItem>;
+    parallel_tool_calls?: boolean;
+    temperature?: number | null;
+    tool_choice?: ToolChoiceOptions | ToolChoiceFunction;
+    tools?: Array<Tool>;
+    top_p?: number | null;
+    max_output_tokens?: number | null;
+    previous_response_id?: string | null;
+    prompt?: ResponsePrompt | null;
+    reasoning?: Reasoning | null;
+    safety_identifier?: string;
+    service_tier?: "auto" | "default" | "flex" | "scale" | "priority" | null;
+    status?: ResponseStatus;
+    text?: ResponseTextConfig;
+    truncation?: "auto" | "disabled" | null;
+    usage?: ResponseUsage;
+};
+type EasyInputMessage = {
+    content: string | ResponseInputMessageContentList;
+    role: "user" | "assistant" | "system" | "developer";
+    type?: "message";
+};
+type ResponsesFunctionTool = {
+    name: string;
+    parameters: {
+        [key: string]: unknown;
+    } | null;
+    strict: boolean | null;
+    type: "function";
+    description?: string | null;
+};
+type ResponseIncompleteDetails = {
+    reason?: "max_output_tokens" | "content_filter";
+};
+type ResponsePrompt = {
+    id: string;
+    variables?: {
+        [key: string]: string | ResponseInputText | ResponseInputImage;
+    } | null;
+    version?: string | null;
+};
+type Reasoning = {
+    effort?: ReasoningEffort | null;
+    generate_summary?: "auto" | "concise" | "detailed" | null;
+    summary?: "auto" | "concise" | "detailed" | null;
+};
+type ResponseContent = ResponseInputText | ResponseInputImage | ResponseOutputText | ResponseOutputRefusal | ResponseContentReasoningText;
+type ResponseContentReasoningText = {
+    text: string;
+    type: "reasoning_text";
+};
+type ResponseConversationParam = {
+    id: string;
+};
+type ResponseCreatedEvent = {
+    response: Response;
+    sequence_number: number;
+    type: "response.created";
+};
+type ResponseCustomToolCallOutput = {
+    call_id: string;
+    output: string | Array<ResponseInputText | ResponseInputImage>;
+    type: "custom_tool_call_output";
+    id?: string;
+};
+type ResponseError = {
+    code: "server_error" | "rate_limit_exceeded" | "invalid_prompt" | "vector_store_timeout" | "invalid_image" | "invalid_image_format" | "invalid_base64_image" | "invalid_image_url" | "image_too_large" | "image_too_small" | "image_parse_error" | "image_content_policy_violation" | "invalid_image_mode" | "image_file_too_large" | "unsupported_image_media_type" | "empty_image_file" | "failed_to_download_image" | "image_file_not_found";
+    message: string;
+};
+type ResponseErrorEvent = {
+    code: string | null;
+    message: string;
+    param: string | null;
+    sequence_number: number;
+    type: "error";
+};
+type ResponseFailedEvent = {
+    response: Response;
+    sequence_number: number;
+    type: "response.failed";
+};
+type ResponseFormatText = {
+    type: "text";
+};
+type ResponseFormatJSONObject = {
+    type: "json_object";
+};
+type ResponseFormatTextConfig = ResponseFormatText | ResponseFormatTextJSONSchemaConfig | ResponseFormatJSONObject;
+type ResponseFormatTextJSONSchemaConfig = {
+    name: string;
+    schema: {
+        [key: string]: unknown;
+    };
+    type: "json_schema";
+    description?: string;
+    strict?: boolean | null;
+};
+type ResponseFunctionCallArgumentsDeltaEvent = {
+    delta: string;
+    item_id: string;
+    output_index: number;
+    sequence_number: number;
+    type: "response.function_call_arguments.delta";
+};
+type ResponseFunctionCallArgumentsDoneEvent = {
+    arguments: string;
+    item_id: string;
+    name: string;
+    output_index: number;
+    sequence_number: number;
+    type: "response.function_call_arguments.done";
+};
+type ResponseFunctionCallOutputItem = ResponseInputTextContent | ResponseInputImageContent;
+type ResponseFunctionCallOutputItemList = Array<ResponseFunctionCallOutputItem>;
+type ResponseFunctionToolCall = {
+    arguments: string;
+    call_id: string;
+    name: string;
+    type: "function_call";
+    id?: string;
+    status?: "in_progress" | "completed" | "incomplete";
+};
+interface ResponseFunctionToolCallItem extends ResponseFunctionToolCall {
+    id: string;
+}
+type ResponseFunctionToolCallOutputItem = {
+    id: string;
+    call_id: string;
+    output: string | Array<ResponseInputText | ResponseInputImage>;
+    type: "function_call_output";
+    status?: "in_progress" | "completed" | "incomplete";
+};
+type ResponseIncludable = "message.input_image.image_url" | "message.output_text.logprobs";
+type ResponseIncompleteEvent = {
+    response: Response;
+    sequence_number: number;
+    type: "response.incomplete";
+};
+type ResponseInput = Array<ResponseInputItem>;
+type ResponseInputContent = ResponseInputText | ResponseInputImage;
+type ResponseInputImage = {
+    detail: "low" | "high" | "auto";
+    type: "input_image";
+    /**
+     * Base64 encoded image
+     */
+    image_url?: string | null;
+};
+type ResponseInputImageContent = {
+    type: "input_image";
+    detail?: "low" | "high" | "auto" | null;
+    /**
+     * Base64 encoded image
+     */
+    image_url?: string | null;
+};
+type ResponseInputItem = EasyInputMessage | ResponseInputItemMessage | ResponseOutputMessage | ResponseFunctionToolCall | ResponseInputItemFunctionCallOutput | ResponseReasoningItem;
+type ResponseInputItemFunctionCallOutput = {
+    call_id: string;
+    output: string | ResponseFunctionCallOutputItemList;
+    type: "function_call_output";
+    id?: string | null;
+    status?: "in_progress" | "completed" | "incomplete" | null;
+};
+type ResponseInputItemMessage = {
+    content: ResponseInputMessageContentList;
+    role: "user" | "system" | "developer";
+    status?: "in_progress" | "completed" | "incomplete";
+    type?: "message";
+};
+type ResponseInputMessageContentList = Array<ResponseInputContent>;
+type ResponseInputMessageItem = {
+    id: string;
+    content: ResponseInputMessageContentList;
+    role: "user" | "system" | "developer";
+    status?: "in_progress" | "completed" | "incomplete";
+    type?: "message";
+};
+type ResponseInputText = {
+    text: string;
+    type: "input_text";
+};
+type ResponseInputTextContent = {
+    text: string;
+    type: "input_text";
+};
+type ResponseItem = ResponseInputMessageItem | ResponseOutputMessage | ResponseFunctionToolCallItem | ResponseFunctionToolCallOutputItem;
+type ResponseOutputItem = ResponseOutputMessage | ResponseFunctionToolCall | ResponseReasoningItem;
+type ResponseOutputItemAddedEvent = {
+    item: ResponseOutputItem;
+    output_index: number;
+    sequence_number: number;
+    type: "response.output_item.added";
+};
+type ResponseOutputItemDoneEvent = {
+    item: ResponseOutputItem;
+    output_index: number;
+    sequence_number: number;
+    type: "response.output_item.done";
+};
+type ResponseOutputMessage = {
+    id: string;
+    content: Array<ResponseOutputText | ResponseOutputRefusal>;
+    role: "assistant";
+    status: "in_progress" | "completed" | "incomplete";
+    type: "message";
+};
+type ResponseOutputRefusal = {
+    refusal: string;
+    type: "refusal";
+};
+type ResponseOutputText = {
+    text: string;
+    type: "output_text";
+    logprobs?: Array<Logprob>;
+};
+type ResponseReasoningItem = {
+    id: string;
+    summary: Array<ResponseReasoningSummaryItem>;
+    type: "reasoning";
+    content?: Array<ResponseReasoningContentItem>;
+    encrypted_content?: string | null;
+    status?: "in_progress" | "completed" | "incomplete";
+};
+type ResponseReasoningSummaryItem = {
+    text: string;
+    type: "summary_text";
+};
+type ResponseReasoningContentItem = {
+    text: string;
+    type: "reasoning_text";
+};
+type ResponseReasoningTextDeltaEvent = {
+    content_index: number;
+    delta: string;
+    item_id: string;
+    output_index: number;
+    sequence_number: number;
+    type: "response.reasoning_text.delta";
+};
+type ResponseReasoningTextDoneEvent = {
+    content_index: number;
+    item_id: string;
+    output_index: number;
+    sequence_number: number;
+    text: string;
+    type: "response.reasoning_text.done";
+};
+type ResponseRefusalDeltaEvent = {
+    content_index: number;
+    delta: string;
+    item_id: string;
+    output_index: number;
+    sequence_number: number;
+    type: "response.refusal.delta";
+};
+type ResponseRefusalDoneEvent = {
+    content_index: number;
+    item_id: string;
+    output_index: number;
+    refusal: string;
+    sequence_number: number;
+    type: "response.refusal.done";
+};
+type ResponseStatus = "completed" | "failed" | "in_progress" | "cancelled" | "queued" | "incomplete";
+type ResponseStreamEvent = ResponseCompletedEvent | ResponseCreatedEvent | ResponseErrorEvent | ResponseFunctionCallArgumentsDeltaEvent | ResponseFunctionCallArgumentsDoneEvent | ResponseFailedEvent | ResponseIncompleteEvent | ResponseOutputItemAddedEvent | ResponseOutputItemDoneEvent | ResponseReasoningTextDeltaEvent | ResponseReasoningTextDoneEvent | ResponseRefusalDeltaEvent | ResponseRefusalDoneEvent | ResponseTextDeltaEvent | ResponseTextDoneEvent;
+type ResponseCompletedEvent = {
+    response: Response;
+    sequence_number: number;
+    type: "response.completed";
+};
+type ResponseTextConfig = {
+    format?: ResponseFormatTextConfig;
+    verbosity?: "low" | "medium" | "high" | null;
+};
+type ResponseTextDeltaEvent = {
+    content_index: number;
+    delta: string;
+    item_id: string;
+    logprobs: Array<Logprob>;
+    output_index: number;
+    sequence_number: number;
+    type: "response.output_text.delta";
+};
+type ResponseTextDoneEvent = {
+    content_index: number;
+    item_id: string;
+    logprobs: Array<Logprob>;
+    output_index: number;
+    sequence_number: number;
+    text: string;
+    type: "response.output_text.done";
+};
+type Logprob = {
+    token: string;
+    logprob: number;
+    top_logprobs?: Array<TopLogprob>;
+};
+type TopLogprob = {
+    token?: string;
+    logprob?: number;
+};
+type ResponseUsage = {
+    input_tokens: number;
+    output_tokens: number;
+    total_tokens: number;
+};
+type Tool = ResponsesFunctionTool;
+type ToolChoiceFunction = {
+    name: string;
+    type: "function";
+};
+type ToolChoiceOptions = "none";
+type ReasoningEffort = "minimal" | "low" | "medium" | "high" | null;
+type StreamOptions = {
+    include_obfuscation?: boolean;
+};
+type Ai_Cf_Baai_Bge_Base_En_V1_5_Input = {
+    text: string | string[];
+    /**
+     * The pooling method used in the embedding process. `cls` pooling will generate more accurate embeddings on larger inputs - however, embeddings created with cls pooling are not compatible with embeddings generated with mean pooling. The default pooling method is `mean` in order for this to not be a breaking change, but we highly suggest using the new `cls` pooling for better accuracy.
+     */
+    pooling?: "mean" | "cls";
+} | {
+    /**
+     * Batch of the embeddings requests to run using async-queue
+     */
+    requests: {
+        text: string | string[];
+        /**
+         * The pooling method used in the embedding process. `cls` pooling will generate more accurate embeddings on larger inputs - however, embeddings created with cls pooling are not compatible with embeddings generated with mean pooling. The default pooling method is `mean` in order for this to not be a breaking change, but we highly suggest using the new `cls` pooling for better accuracy.
+         */
+        pooling?: "mean" | "cls";
+    }[];
+};
+type Ai_Cf_Baai_Bge_Base_En_V1_5_Output = {
+    shape?: number[];
+    /**
+     * Embeddings of the requested text values
+     */
+    data?: number[][];
+    /**
+     * The pooling method used in the embedding process.
+     */
+    pooling?: "mean" | "cls";
+} | Ai_Cf_Baai_Bge_Base_En_V1_5_AsyncResponse;
+interface Ai_Cf_Baai_Bge_Base_En_V1_5_AsyncResponse {
+    /**
+     * The async request id that can be used to obtain the results.
+     */
+    request_id?: string;
+}
+declare abstract class Base_Ai_Cf_Baai_Bge_Base_En_V1_5 {
+    inputs: Ai_Cf_Baai_Bge_Base_En_V1_5_Input;
+    postProcessedOutputs: Ai_Cf_Baai_Bge_Base_En_V1_5_Output;
+}
+type Ai_Cf_Openai_Whisper_Input = string | {
+    /**
+     * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+     */
+    audio: number[];
+};
+interface Ai_Cf_Openai_Whisper_Output {
+    /**
+     * The transcription
+     */
+    text: string;
+    word_count?: number;
+    words?: {
+        word?: string;
+        /**
+         * The second this word begins in the recording
+         */
+        start?: number;
+        /**
+         * The ending second when the word completes
+         */
+        end?: number;
+    }[];
+    vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper {
+    inputs: Ai_Cf_Openai_Whisper_Input;
+    postProcessedOutputs: Ai_Cf_Openai_Whisper_Output;
+}
+type Ai_Cf_Meta_M2M100_1_2B_Input = {
+    /**
+     * The text to be translated
+     */
+    text: string;
+    /**
+     * The language code of the source text (e.g., 'en' for English). Defaults to 'en' if not specified
+     */
+    source_lang?: string;
+    /**
+     * The language code to translate the text into (e.g., 'es' for Spanish)
+     */
+    target_lang: string;
+} | {
+    /**
+     * Batch of the embeddings requests to run using async-queue
+     */
+    requests: {
+        /**
+         * The text to be translated
+         */
+        text: string;
+        /**
+         * The language code of the source text (e.g., 'en' for English). Defaults to 'en' if not specified
+         */
+        source_lang?: string;
+        /**
+         * The language code to translate the text into (e.g., 'es' for Spanish)
+         */
+        target_lang: string;
+    }[];
+};
+type Ai_Cf_Meta_M2M100_1_2B_Output = {
+    /**
+     * The translated text in the target language
+     */
+    translated_text?: string;
+} | Ai_Cf_Meta_M2M100_1_2B_AsyncResponse;
+interface Ai_Cf_Meta_M2M100_1_2B_AsyncResponse {
+    /**
+     * The async request id that can be used to obtain the results.
+     */
+    request_id?: string;
+}
+declare abstract class Base_Ai_Cf_Meta_M2M100_1_2B {
+    inputs: Ai_Cf_Meta_M2M100_1_2B_Input;
+    postProcessedOutputs: Ai_Cf_Meta_M2M100_1_2B_Output;
+}
+type Ai_Cf_Baai_Bge_Small_En_V1_5_Input = {
+    text: string | string[];
+    /**
+     * The pooling method used in the embedding process. `cls` pooling will generate more accurate embeddings on larger inputs - however, embeddings created with cls pooling are not compatible with embeddings generated with mean pooling. The default pooling method is `mean` in order for this to not be a breaking change, but we highly suggest using the new `cls` pooling for better accuracy.
+     */
+    pooling?: "mean" | "cls";
+} | {
+    /**
+     * Batch of the embeddings requests to run using async-queue
+     */
+    requests: {
+        text: string | string[];
+        /**
+         * The pooling method used in the embedding process. `cls` pooling will generate more accurate embeddings on larger inputs - however, embeddings created with cls pooling are not compatible with embeddings generated with mean pooling. The default pooling method is `mean` in order for this to not be a breaking change, but we highly suggest using the new `cls` pooling for better accuracy.
+         */
+        pooling?: "mean" | "cls";
+    }[];
+};
+type Ai_Cf_Baai_Bge_Small_En_V1_5_Output = {
+    shape?: number[];
+    /**
+     * Embeddings of the requested text values
+     */
+    data?: number[][];
+    /**
+     * The pooling method used in the embedding process.
+     */
+    pooling?: "mean" | "cls";
+} | Ai_Cf_Baai_Bge_Small_En_V1_5_AsyncResponse;
+interface Ai_Cf_Baai_Bge_Small_En_V1_5_AsyncResponse {
+    /**
+     * The async request id that can be used to obtain the results.
+     */
+    request_id?: string;
+}
+declare abstract class Base_Ai_Cf_Baai_Bge_Small_En_V1_5 {
+    inputs: Ai_Cf_Baai_Bge_Small_En_V1_5_Input;
+    postProcessedOutputs: Ai_Cf_Baai_Bge_Small_En_V1_5_Output;
+}
+type Ai_Cf_Baai_Bge_Large_En_V1_5_Input = {
+    text: string | string[];
+    /**
+     * The pooling method used in the embedding process. `cls` pooling will generate more accurate embeddings on larger inputs - however, embeddings created with cls pooling are not compatible with embeddings generated with mean pooling. The default pooling method is `mean` in order for this to not be a breaking change, but we highly suggest using the new `cls` pooling for better accuracy.
+     */
+    pooling?: "mean" | "cls";
+} | {
+    /**
+     * Batch of the embeddings requests to run using async-queue
+     */
+    requests: {
+        text: string | string[];
+        /**
+         * The pooling method used in the embedding process. `cls` pooling will generate more accurate embeddings on larger inputs - however, embeddings created with cls pooling are not compatible with embeddings generated with mean pooling. The default pooling method is `mean` in order for this to not be a breaking change, but we highly suggest using the new `cls` pooling for better accuracy.
+         */
+        pooling?: "mean" | "cls";
+    }[];
+};
+type Ai_Cf_Baai_Bge_Large_En_V1_5_Output = {
+    shape?: number[];
+    /**
+     * Embeddings of the requested text values
+     */
+    data?: number[][];
+    /**
+     * The pooling method used in the embedding process.
+     */
+    pooling?: "mean" | "cls";
+} | Ai_Cf_Baai_Bge_Large_En_V1_5_AsyncResponse;
+interface Ai_Cf_Baai_Bge_Large_En_V1_5_AsyncResponse {
+    /**
+     * The async request id that can be used to obtain the results.
+     */
+    request_id?: string;
+}
+declare abstract class Base_Ai_Cf_Baai_Bge_Large_En_V1_5 {
+    inputs: Ai_Cf_Baai_Bge_Large_En_V1_5_Input;
+    postProcessedOutputs: Ai_Cf_Baai_Bge_Large_En_V1_5_Output;
+}
+type Ai_Cf_Unum_Uform_Gen2_Qwen_500M_Input = string | {
+    /**
+     * The input text prompt for the model to generate a response.
+     */
+    prompt?: string;
+    /**
+     * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+     */
+    raw?: boolean;
+    /**
+     * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+     */
+    top_p?: number;
+    /**
+     * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+     */
+    top_k?: number;
+    /**
+     * Random seed for reproducibility of the generation.
+     */
+    seed?: number;
+    /**
+     * Penalty for repeated tokens; higher values discourage repetition.
+     */
+    repetition_penalty?: number;
+    /**
+     * Decreases the likelihood of the model repeating the same lines verbatim.
+     */
+    frequency_penalty?: number;
+    /**
+     * Increases the likelihood of the model introducing new topics.
+     */
+    presence_penalty?: number;
+    image: number[] | (string & NonNullable<unknown>);
+    /**
+     * The maximum number of tokens to generate in the response.
+     */
+    max_tokens?: number;
+};
+interface Ai_Cf_Unum_Uform_Gen2_Qwen_500M_Output {
+    description?: string;
+}
+declare abstract class Base_Ai_Cf_Unum_Uform_Gen2_Qwen_500M {
+    inputs: Ai_Cf_Unum_Uform_Gen2_Qwen_500M_Input;
+    postProcessedOutputs: Ai_Cf_Unum_Uform_Gen2_Qwen_500M_Output;
+}
+type Ai_Cf_Openai_Whisper_Tiny_En_Input = string | {
+    /**
+     * An array of integers that represent the audio data constrained to 8-bit unsigned integer values
+     */
+    audio: number[];
+};
+interface Ai_Cf_Openai_Whisper_Tiny_En_Output {
+    /**
+     * The transcription
+     */
+    text: string;
+    word_count?: number;
+    words?: {
+        word?: string;
+        /**
+         * The second this word begins in the recording
+         */
+        start?: number;
+        /**
+         * The ending second when the word completes
+         */
+        end?: number;
+    }[];
+    vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper_Tiny_En {
+    inputs: Ai_Cf_Openai_Whisper_Tiny_En_Input;
+    postProcessedOutputs: Ai_Cf_Openai_Whisper_Tiny_En_Output;
+}
+interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input {
+    /**
+     * Base64 encoded value of the audio data.
+     */
+    audio: string;
+    /**
+     * Supported tasks are 'translate' or 'transcribe'.
+     */
+    task?: string;
+    /**
+     * The language of the audio being transcribed or translated.
+     */
+    language?: string;
+    /**
+     * Preprocess the audio with a voice activity detection model.
+     */
+    vad_filter?: boolean;
+    /**
+     * A text prompt to help provide context to the model on the contents of the audio.
+     */
+    initial_prompt?: string;
+    /**
+     * The prefix it appended the the beginning of the output of the transcription and can guide the transcription result.
+     */
+    prefix?: string;
+}
+interface Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output {
+    transcription_info?: {
+        /**
+         * The language of the audio being transcribed or translated.
+         */
+        language?: string;
+        /**
+         * The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1.
+         */
+        language_probability?: number;
+        /**
+         * The total duration of the original audio file, in seconds.
+         */
+        duration?: number;
+        /**
+         * The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds.
+         */
+        duration_after_vad?: number;
+    };
+    /**
+     * The complete transcription of the audio.
+     */
+    text: string;
+    /**
+     * The total number of words in the transcription.
+     */
+    word_count?: number;
+    segments?: {
+        /**
+         * The starting time of the segment within the audio, in seconds.
+         */
+        start?: number;
+        /**
+         * The ending time of the segment within the audio, in seconds.
+         */
+        end?: number;
+        /**
+         * The transcription of the segment.
+         */
+        text?: string;
+        /**
+         * The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs.
+         */
+        temperature?: number;
+        /**
+         * The average log probability of the predictions for the words in this segment, indicating overall confidence.
+         */
+        avg_logprob?: number;
+        /**
+         * The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process.
+         */
+        compression_ratio?: number;
+        /**
+         * The probability that the segment contains no speech, represented as a decimal between 0 and 1.
+         */
+        no_speech_prob?: number;
+        words?: {
+            /**
+             * The individual word transcribed from the audio.
+             */
+            word?: string;
+            /**
+             * The starting time of the word within the audio, in seconds.
+             */
+            start?: number;
+            /**
+             * The ending time of the word within the audio, in seconds.
+             */
+            end?: number;
+        }[];
+    }[];
+    /**
+     * The transcription in WebVTT format, which includes timing and text information for use in subtitles.
+     */
+    vtt?: string;
+}
+declare abstract class Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo {
+    inputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Input;
+    postProcessedOutputs: Ai_Cf_Openai_Whisper_Large_V3_Turbo_Output;
+}
+type Ai_Cf_Baai_Bge_M3_Input = Ai_Cf_Baai_Bge_M3_Input_QueryAnd_Contexts | Ai_Cf_Baai_Bge_M3_Input_Embedding | {
+    /**
+     * Batch of the embeddings requests to run using async-queue
+     */
+    requests: (Ai_Cf_Baai_Bge_M3_Input_QueryAnd_Contexts_1 | Ai_Cf_Baai_Bge_M3_Input_Embedding_1)[];
+};
+interface Ai_Cf_Baai_Bge_M3_Input_QueryAnd_Contexts {
+    /**
+     * A query you wish to perform against the provided contexts. If no query is provided the model with respond with embeddings for contexts
+     */
+    query?: string;
+    /**
+     * List of provided contexts. Note that the index in this array is important, as the response will refer to it.
+     */
+    contexts: {
+        /**
+         * One of the provided context content
+         */
+        text?: string;
+    }[];
+    /**
+     * When provided with too long context should the model error out or truncate the context to fit?
+     */
+    truncate_inputs?: boolean;
+}
+interface Ai_Cf_Baai_Bge_M3_Input_Embedding {
+    text: string | string[];
+    /**
+     * When provided with too long context should the model error out or truncate the context to fit?
+     */
+    truncate_inputs?: boolean;
+}
+interface Ai_Cf_Baai_Bge_M3_Input_QueryAnd_Contexts_1 {
+    /**
+     * A query you wish to perform against the provided contexts. If no query is provided the model with respond with embeddings for contexts
+     */
+    query?: string;
+    /**
+     * List of provided contexts. Note that the index in this array is important, as the response will refer to it.
+     */
+    contexts: {
+        /**
+         * One of the provided context content
+         */
+        text?: string;
+    }[];
+    /**
+     * When provided with too long context should the model error out or truncate the context to fit?
+     */
+    truncate_inputs?: boolean;
+}
+interface Ai_Cf_Baai_Bge_M3_Input_Embedding_1 {
+    text: string | string[];
+    /**
+     * When provided with too long context should the model error out or truncate the context to fit?
+     */
+    truncate_inputs?: boolean;
+}
+type Ai_Cf_Baai_Bge_M3_Output = Ai_Cf_Baai_Bge_M3_Ouput_Query | Ai_Cf_Baai_Bge_M3_Output_EmbeddingFor_Contexts | Ai_Cf_Baai_Bge_M3_Ouput_Embedding | Ai_Cf_Baai_Bge_M3_AsyncResponse;
+interface Ai_Cf_Baai_Bge_M3_Ouput_Query {
+    response?: {
+        /**
+         * Index of the context in the request
+         */
+        id?: number;
+        /**
+         * Score of the context under the index.
+         */
+        score?: number;
+    }[];
+}
+interface Ai_Cf_Baai_Bge_M3_Output_EmbeddingFor_Contexts {
+    response?: number[][];
+    shape?: number[];
+    /**
+     * The pooling method used in the embedding process.
+     */
+    pooling?: "mean" | "cls";
+}
+interface Ai_Cf_Baai_Bge_M3_Ouput_Embedding {
+    shape?: number[];
+    /**
+     * Embeddings of the requested text values
+     */
+    data?: number[][];
+    /**
+     * The pooling method used in the embedding process.
+     */
+    pooling?: "mean" | "cls";
+}
+interface Ai_Cf_Baai_Bge_M3_AsyncResponse {
+    /**
+     * The async request id that can be used to obtain the results.
+     */
+    request_id?: string;
+}
+declare abstract class Base_Ai_Cf_Baai_Bge_M3 {
+    inputs: Ai_Cf_Baai_Bge_M3_Input;
+    postProcessedOutputs: Ai_Cf_Baai_Bge_M3_Output;
+}
+interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input {
+    /**
+     * A text description of the image you want to generate.
+     */
+    prompt: string;
+    /**
+     * The number of diffusion steps; higher values can improve quality but take longer.
+     */
+    steps?: number;
+}
+interface Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output {
+    /**
+     * The generated image in Base64 format.
+     */
+    image?: string;
+}
+declare abstract class Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell {
+    inputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Input;
+    postProcessedOutputs: Ai_Cf_Black_Forest_Labs_Flux_1_Schnell_Output;
+}
+type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input = Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Prompt | Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Messages;
+interface Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Prompt {
+    /**
+     * The input text prompt for the model to generate a response.
+     */
+    prompt: string;
+    image?: number[] | (string & NonNullable<unknown>);
+    /**
+     * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+     */
+    raw?: boolean;
+    /**
+     * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+     */
+    stream?: boolean;
+    /**
+     * The maximum number of tokens to generate in the response.
+     */
+    max_tokens?: number;
+    /**
+     * Controls the randomness of the output; higher values produce more random results.
+     */
+    temperature?: number;
+    /**
+     * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+     */
+    top_p?: number;
+    /**
+     * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+     */
+    top_k?: number;
+    /**
+     * Random seed for reproducibility of the generation.
+     */
+    seed?: number;
+    /**
+     * Penalty for repeated tokens; higher values discourage repetition.
+     */
+    repetition_penalty?: number;
+    /**
+     * Decreases the likelihood of the model repeating the same lines verbatim.
+     */
+    frequency_penalty?: number;
+    /**
+     * Increases the likelihood of the model introducing new topics.
+     */
+    presence_penalty?: number;
+    /**
+     * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model.
+     */
+    lora?: string;
+}
+interface Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Messages {
+    /**
+     * An array of message objects representing the conversation history.
+     */
+    messages: {
+        /**
+         * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+         */
+        role?: string;
+        /**
+         * The tool call id. Must be supplied for tool calls for Mistral-3. If you don't know what to put here you can fall back to 000000001
+         */
+        tool_call_id?: string;
+        content?: string | {
+            /**
+             * Type of the content provided
+             */
+            type?: string;
+            text?: string;
+            image_url?: {
+                /**
+                 * image uri with data (e.g. data:image/jpeg;base64,/9j/...). HTTP URL will not be accepted
+                 */
+                url?: string;
+            };
+        }[] | {
+            /**
+             * Type of the content provided
+             */
+            type?: string;
+            text?: string;
+            image_url?: {
+                /**
+                 * image uri with data (e.g. data:image/jpeg;base64,/9j/...). HTTP URL will not be accepted
+                 */
+                url?: string;
+            };
+        };
+    }[];
+    image?: number[] | (string & NonNullable<unknown>);
+    functions?: {
+        name: string;
+        code: string;
+    }[];
+    /**
+     * A list of tools available for the assistant to use.
+     */
+    tools?: ({
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+                [k: string]: {
+                    /**
+                     * The data type of the parameter.
+                     */
+                    type: string;
+                    /**
+                     * A description of the expected parameter.
+                     */
+                    description: string;
+                };
+            };
+        };
+    } | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+            /**
+             * The name of the function.
+             */
+            name: string;
+            /**
+             * A brief description of what the function does.
+             */
+            description: string;
+            /**
+             * Schema defining the parameters accepted by the function.
+             */
+            parameters: {
+                /**
+                 * The type of the parameters object (usually 'object').
+                 */
+                type: string;
+                /**
+                 * List of required parameter names.
+                 */
+                required?: string[];
+                /**
+                 * Definitions of each parameter.
+                 */
+                properties: {
+                    [k: string]: {
+                        /**
+                         * The data type of the parameter.
+                         */
+                        type: string;
+                        /**
+                         * A description of the expected parameter.
+                         */
+                        description: string;
+                    };
+                };
+            };
+        };
+    })[];
+    /**
+     * If true, the response will be streamed back incrementally.
+     */
+    stream?: boolean;
+    /**
+     * The maximum number of tokens to generate in the response.
+     */
+    max_tokens?: number;
+    /**
+     * Controls the randomness of the output; higher values produce more random results.
+     */
+    temperature?: number;
+    /**
+     * Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+     */
+    top_p?: number;
+    /**
+     * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+     */
+    top_k?: number;
+    /**
+     * Random seed for reproducibility of the generation.
+     */
+    seed?: number;
+    /**
+     * Penalty for repeated tokens; higher values discourage repetition.
+     */
+    repetition_penalty?: number;
+    /**
+     * Decreases the likelihood of the model repeating the same lines verbatim.
+     */
+    frequency_penalty?: number;
+    /**
+     * Increases the likelihood of the model introducing new topics.
+     */
+    presence_penalty?: number;
+}
+type Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output = {
+    /**
+     * The generated text response from the model
+     */
+    response?: string;
+    /**
+     * An array of tool calls requests made during the response generation
+     */
+    tool_calls?: {
+        /**
+         * The arguments passed to be passed to the tool call request
+         */
+        arguments?: object;
+        /**
+         * The name of the tool to be called
+         */
+        name?: string;
+    }[];
+};
+declare abstract class Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct {
+    inputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Input;
+    postProcessedOutputs: Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct_Output;
+}
+type Ai_Cf_Meta_Llama_3_3_70B_Instruct_Fp8_Fast_Input = Ai_Cf_Meta_Llama_3_3_70B_Instruct_Fp8_Fast_Prompt | Ai_Cf_Meta_Llama_3_3_70B_Instruct_Fp8_Fast_Messages | Ai_Cf_Meta_Llama_3_3_70B_Instruct_Fp8_Fast_Async_Batch;
+interface Ai_Cf_Meta_Llama_3_3_70B_Instruct_Fp8_Fast_Prompt {
+    /**
+     * The input text prompt for the model to generate a response.
+     */
+    prompt: string;
+    /**
+     * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model.
+     */
+    lora?: string;
+    response_format?: Ai_Cf_Meta_Llama_3_3_70B_Instruct_Fp8_Fast_JSON_Mode;
+    /**
+     * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+     */
+    raw?: boolean;
+    /**
+     * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+     */
+    stream?: boolean;
+    /**
+     * The maximum number of tokens to generate in the response.
+     */
+    max_tokens?: number;
+    /**
+     * Controls the randomness of the output; higher values produce more random results.
+     */
+    temperature?: number;
+    /**
+     * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+     */
+    top_p?: number;
+    /**
+     * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+     */
+    top_k?: number;
+    /**
+     * Random seed for reproducibility of the generation.
+     */
+    seed?: number;
+    /**
+     * Penalty for repeated tokens; higher values discourage repetition.
+     */
+    repetition_penalty?: number;
+    /**
+     * Decreases the likelihood of the model repeating the same lines verbatim.
+     */
+    frequency_penalty?: number;
+    /**
+     * Increases the likelihood of the model introducing new topics.
+     */
+    presence_penalty?: number;
+}
+interface Ai_Cf_Meta_Llama_3_3_70B_Instruct_Fp8_Fast_JSON_Mode {
+    type?: "json_object" | "json_schema";
+    json_schema?: unknown;
+}
+interface Ai_Cf_Meta_Llama_3_3_70B_Instruct_Fp8_Fast_Messages {
+    /**
+     * An array of message objects representing the conversation history.
+     */
+    messages: {
+        /**
+         * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+         */
+        role: string;
+        /**
+         * The content of the message as a string.
+         */
+        content: string;
+    }[];
+    functions?: {
+        name: string;
+        code: string;
+    }[];
+    /**
+     * A list of tools available for the assistant to use.
+     */
+    tools?: ({
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+                [k: string]: {
+                    /**
+                     * The data type of the parameter.
+                     */
+                    type: string;
+                    /**
+                     * A description of the expected parameter.
+                     */
+                    description: string;
+                };
+            };
+        };
+    } | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+            /**
+             * The name of the function.
+             */
+            name: string;
+            /**
+             * A brief description of what the function does.
+             */
+            description: string;
+            /**
+             * Schema defining the parameters accepted by the function.
+             */
+            parameters: {
+                /**
+                 * The type of the parameters object (usually 'object').
+                 */
+                type: string;
+                /**
+                 * List of required parameter names.
+                 */
+                required?: string[];
+                /**
+                 * Definitions of each parameter.
+                 */
+                properties: {
+                    [k: string]: {
+                        /**
+                         * The data type of the parameter.
+                         */
+                        type: string;
+                        /**
+                         * A description of the expected parameter.
+                         */
+                        description: string;
+                    };
+                };
+            };
+        };
+    })[];
+    response_format?: Ai_Cf_Meta_Llama_3_3_70B_Instruct_Fp8_Fast_JSON_Mode_1;
+    /**
+     * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+     */
+    raw?: boolean;
+    /**
+     * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+     */
+    stream?: boolean;
+    /**
+     * The maximum number of tokens to generate in the response.
+     */
+    max_tokens?: number;
+    /**
+     * Controls the randomness of the output; higher values produce more random results.
+     */
+    temperature?: number;
+    /**
+     * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+     */
+    top_p?: number;
+    /**
+     * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+     */
+    top_k?: number;
+    /**
+     * Random seed for reproducibility of the generation.
+     */
+    seed?: number;
+    /**
+     * Penalty for repeated tokens; higher values discourage repetition.
+     */
+    repetition_penalty?: number;
+    /**
+     * Decreases the likelihood of the model repeating the same lines verbatim.
+     */
+    frequency_penalty?: number;
+    /**
+     * Increases the likelihood of the model introducing new topics.
+     */
+    presence_penalty?: number;
+}
+interface Ai_Cf_Meta_Llama_3_3_70B_Instruct_Fp8_Fast_JSON_Mode_1 {
+    type?: "json_object" | "json_schema";
+    json_schema?: unknown;
+}
+interface Ai_Cf_Meta_Llama_3_3_70B_Instruct_Fp8_Fast_Async_Batch {
+    requests?: {
+        /**
+         * User-supplied reference. This field will be present in the response as well it can be used to reference the request and response. It's NOT validated to be unique.
+         */
+        external_reference?: string;
+        /**
+         * Prompt for the text generation model
+         */
+        prompt?: string;
+        /**
+         * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+         */
+        stream?: boolean;
+        /**
+         * The maximum number of tokens to generate in the response.
+         */
+        max_tokens?: number;
+        /**
+         * Controls the randomness of the output; higher values produce more random results.
+         */
+        temperature?: number;
+        /**
+         * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+         */
+        top_p?: number;
+        /**
+         * Random seed for reproducibility of the generation.
+         */
+        seed?: number;
+        /**
+         * Penalty for repeated tokens; higher values discourage repetition.
+         */
+        repetition_penalty?: number;
+        /**
+         * Decreases the likelihood of the model repeating the same lines verbatim.
+         */
+        frequency_penalty?: number;
+        /**
+         * Increases the likelihood of the model introducing new topics.
+         */
+        presence_penalty?: number;
+        response_format?: Ai_Cf_Meta_Llama_3_3_70B_Instruct_Fp8_Fast_JSON_Mode_2;
+    }[];
+}
+interface Ai_Cf_Meta_Llama_3_3_70B_Instruct_Fp8_Fast_JSON_Mode_2 {
+    type?: "json_object" | "json_schema";
+    json_schema?: unknown;
+}
+type Ai_Cf_Meta_Llama_3_3_70B_Instruct_Fp8_Fast_Output = {
+    /**
+     * The generated text response from the model
+     */
+    response: string;
+    /**
+     * Usage statistics for the inference request
+     */
+    usage?: {
+        /**
+         * Total number of tokens in input
+         */
+        prompt_tokens?: number;
+        /**
+         * Total number of tokens in output
+         */
+        completion_tokens?: number;
+        /**
+         * Total number of input and output tokens
+         */
+        total_tokens?: number;
+    };
+    /**
+     * An array of tool calls requests made during the response generation
+     */
+    tool_calls?: {
+        /**
+         * The arguments passed to be passed to the tool call request
+         */
+        arguments?: object;
+        /**
+         * The name of the tool to be called
+         */
+        name?: string;
+    }[];
+} | string | Ai_Cf_Meta_Llama_3_3_70B_Instruct_Fp8_Fast_AsyncResponse;
+interface Ai_Cf_Meta_Llama_3_3_70B_Instruct_Fp8_Fast_AsyncResponse {
+    /**
+     * The async request id that can be used to obtain the results.
+     */
+    request_id?: string;
+}
+declare abstract class Base_Ai_Cf_Meta_Llama_3_3_70B_Instruct_Fp8_Fast {
+    inputs: Ai_Cf_Meta_Llama_3_3_70B_Instruct_Fp8_Fast_Input;
+    postProcessedOutputs: Ai_Cf_Meta_Llama_3_3_70B_Instruct_Fp8_Fast_Output;
+}
+interface Ai_Cf_Meta_Llama_Guard_3_8B_Input {
+    /**
+     * An array of message objects representing the conversation history.
+     */
+    messages: {
+        /**
+         * The role of the message sender must alternate between 'user' and 'assistant'.
+         */
+        role: "user" | "assistant";
+        /**
+         * The content of the message as a string.
+         */
+        content: string;
+    }[];
+    /**
+     * The maximum number of tokens to generate in the response.
+     */
+    max_tokens?: number;
+    /**
+     * Controls the randomness of the output; higher values produce more random results.
+     */
+    temperature?: number;
+    /**
+     * Dictate the output format of the generated response.
+     */
+    response_format?: {
+        /**
+         * Set to json_object to process and output generated text as JSON.
+         */
+        type?: string;
+    };
+}
+interface Ai_Cf_Meta_Llama_Guard_3_8B_Output {
+    response?: string | {
+        /**
+         * Whether the conversation is safe or not.
+         */
+        safe?: boolean;
+        /**
+         * A list of what hazard categories predicted for the conversation, if the conversation is deemed unsafe.
+         */
+        categories?: string[];
+    };
+    /**
+     * Usage statistics for the inference request
+     */
+    usage?: {
+        /**
+         * Total number of tokens in input
+         */
+        prompt_tokens?: number;
+        /**
+         * Total number of tokens in output
+         */
+        completion_tokens?: number;
+        /**
+         * Total number of input and output tokens
+         */
+        total_tokens?: number;
+    };
+}
+declare abstract class Base_Ai_Cf_Meta_Llama_Guard_3_8B {
+    inputs: Ai_Cf_Meta_Llama_Guard_3_8B_Input;
+    postProcessedOutputs: Ai_Cf_Meta_Llama_Guard_3_8B_Output;
+}
+interface Ai_Cf_Baai_Bge_Reranker_Base_Input {
+    /**
+     * A query you wish to perform against the provided contexts.
+     */
+    /**
+     * Number of returned results starting with the best score.
+     */
+    top_k?: number;
+    /**
+     * List of provided contexts. Note that the index in this array is important, as the response will refer to it.
+     */
+    contexts: {
+        /**
+         * One of the provided context content
+         */
+        text?: string;
+    }[];
+}
+interface Ai_Cf_Baai_Bge_Reranker_Base_Output {
+    response?: {
+        /**
+         * Index of the context in the request
+         */
+        id?: number;
+        /**
+         * Score of the context under the index.
+         */
+        score?: number;
+    }[];
+}
+declare abstract class Base_Ai_Cf_Baai_Bge_Reranker_Base {
+    inputs: Ai_Cf_Baai_Bge_Reranker_Base_Input;
+    postProcessedOutputs: Ai_Cf_Baai_Bge_Reranker_Base_Output;
+}
+type Ai_Cf_Qwen_Qwen2_5_Coder_32B_Instruct_Input = Ai_Cf_Qwen_Qwen2_5_Coder_32B_Instruct_Prompt | Ai_Cf_Qwen_Qwen2_5_Coder_32B_Instruct_Messages;
+interface Ai_Cf_Qwen_Qwen2_5_Coder_32B_Instruct_Prompt {
+    /**
+     * The input text prompt for the model to generate a response.
+     */
+    prompt: string;
+    /**
+     * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model.
+     */
+    lora?: string;
+    response_format?: Ai_Cf_Qwen_Qwen2_5_Coder_32B_Instruct_JSON_Mode;
+    /**
+     * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+     */
+    raw?: boolean;
+    /**
+     * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+     */
+    stream?: boolean;
+    /**
+     * The maximum number of tokens to generate in the response.
+     */
+    max_tokens?: number;
+    /**
+     * Controls the randomness of the output; higher values produce more random results.
+     */
+    temperature?: number;
+    /**
+     * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+     */
+    top_p?: number;
+    /**
+     * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+     */
+    top_k?: number;
+    /**
+     * Random seed for reproducibility of the generation.
+     */
+    seed?: number;
+    /**
+     * Penalty for repeated tokens; higher values discourage repetition.
+     */
+    repetition_penalty?: number;
+    /**
+     * Decreases the likelihood of the model repeating the same lines verbatim.
+     */
+    frequency_penalty?: number;
+    /**
+     * Increases the likelihood of the model introducing new topics.
+     */
+    presence_penalty?: number;
+}
+interface Ai_Cf_Qwen_Qwen2_5_Coder_32B_Instruct_JSON_Mode {
+    type?: "json_object" | "json_schema";
+    json_schema?: unknown;
+}
+interface Ai_Cf_Qwen_Qwen2_5_Coder_32B_Instruct_Messages {
+    /**
+     * An array of message objects representing the conversation history.
+     */
+    messages: {
+        /**
+         * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+         */
+        role: string;
+        /**
+         * The content of the message as a string.
+         */
+        content: string;
+    }[];
+    functions?: {
+        name: string;
+        code: string;
+    }[];
+    /**
+     * A list of tools available for the assistant to use.
+     */
+    tools?: ({
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+                [k: string]: {
+                    /**
+                     * The data type of the parameter.
+                     */
+                    type: string;
+                    /**
+                     * A description of the expected parameter.
+                     */
+                    description: string;
+                };
+            };
+        };
+    } | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+            /**
+             * The name of the function.
+             */
+            name: string;
+            /**
+             * A brief description of what the function does.
+             */
+            description: string;
+            /**
+             * Schema defining the parameters accepted by the function.
+             */
+            parameters: {
+                /**
+                 * The type of the parameters object (usually 'object').
+                 */
+                type: string;
+                /**
+                 * List of required parameter names.
+                 */
+                required?: string[];
+                /**
+                 * Definitions of each parameter.
+                 */
+                properties: {
+                    [k: string]: {
+                        /**
+                         * The data type of the parameter.
+                         */
+                        type: string;
+                        /**
+                         * A description of the expected parameter.
+                         */
+                        description: string;
+                    };
+                };
+            };
+        };
+    })[];
+    response_format?: Ai_Cf_Qwen_Qwen2_5_Coder_32B_Instruct_JSON_Mode_1;
+    /**
+     * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+     */
+    raw?: boolean;
+    /**
+     * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+     */
+    stream?: boolean;
+    /**
+     * The maximum number of tokens to generate in the response.
+     */
+    max_tokens?: number;
+    /**
+     * Controls the randomness of the output; higher values produce more random results.
+     */
+    temperature?: number;
+    /**
+     * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+     */
+    top_p?: number;
+    /**
+     * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+     */
+    top_k?: number;
+    /**
+     * Random seed for reproducibility of the generation.
+     */
+    seed?: number;
+    /**
+     * Penalty for repeated tokens; higher values discourage repetition.
+     */
+    repetition_penalty?: number;
+    /**
+     * Decreases the likelihood of the model repeating the same lines verbatim.
+     */
+    frequency_penalty?: number;
+    /**
+     * Increases the likelihood of the model introducing new topics.
+     */
+    presence_penalty?: number;
+}
+interface Ai_Cf_Qwen_Qwen2_5_Coder_32B_Instruct_JSON_Mode_1 {
+    type?: "json_object" | "json_schema";
+    json_schema?: unknown;
+}
+type Ai_Cf_Qwen_Qwen2_5_Coder_32B_Instruct_Output = {
+    /**
+     * The generated text response from the model
+     */
+    response: string;
+    /**
+     * Usage statistics for the inference request
+     */
+    usage?: {
+        /**
+         * Total number of tokens in input
+         */
+        prompt_tokens?: number;
+        /**
+         * Total number of tokens in output
+         */
+        completion_tokens?: number;
+        /**
+         * Total number of input and output tokens
+         */
+        total_tokens?: number;
+    };
+    /**
+     * An array of tool calls requests made during the response generation
+     */
+    tool_calls?: {
+        /**
+         * The arguments passed to be passed to the tool call request
+         */
+        arguments?: object;
+        /**
+         * The name of the tool to be called
+         */
+        name?: string;
+    }[];
+};
+declare abstract class Base_Ai_Cf_Qwen_Qwen2_5_Coder_32B_Instruct {
+    inputs: Ai_Cf_Qwen_Qwen2_5_Coder_32B_Instruct_Input;
+    postProcessedOutputs: Ai_Cf_Qwen_Qwen2_5_Coder_32B_Instruct_Output;
+}
+type Ai_Cf_Qwen_Qwq_32B_Input = Ai_Cf_Qwen_Qwq_32B_Prompt | Ai_Cf_Qwen_Qwq_32B_Messages;
+interface Ai_Cf_Qwen_Qwq_32B_Prompt {
+    /**
+     * The input text prompt for the model to generate a response.
+     */
+    prompt: string;
+    /**
+     * JSON schema that should be fulfilled for the response.
+     */
+    guided_json?: object;
+    /**
+     * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+     */
+    raw?: boolean;
+    /**
+     * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+     */
+    stream?: boolean;
+    /**
+     * The maximum number of tokens to generate in the response.
+     */
+    max_tokens?: number;
+    /**
+     * Controls the randomness of the output; higher values produce more random results.
+     */
+    temperature?: number;
+    /**
+     * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+     */
+    top_p?: number;
+    /**
+     * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+     */
+    top_k?: number;
+    /**
+     * Random seed for reproducibility of the generation.
+     */
+    seed?: number;
+    /**
+     * Penalty for repeated tokens; higher values discourage repetition.
+     */
+    repetition_penalty?: number;
+    /**
+     * Decreases the likelihood of the model repeating the same lines verbatim.
+     */
+    frequency_penalty?: number;
+    /**
+     * Increases the likelihood of the model introducing new topics.
+     */
+    presence_penalty?: number;
+}
+interface Ai_Cf_Qwen_Qwq_32B_Messages {
+    /**
+     * An array of message objects representing the conversation history.
+     */
+    messages: {
+        /**
+         * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+         */
+        role?: string;
+        /**
+         * The tool call id. Must be supplied for tool calls for Mistral-3. If you don't know what to put here you can fall back to 000000001
+         */
+        tool_call_id?: string;
+        content?: string | {
+            /**
+             * Type of the content provided
+             */
+            type?: string;
+            text?: string;
+            image_url?: {
+                /**
+                 * image uri with data (e.g. data:image/jpeg;base64,/9j/...). HTTP URL will not be accepted
+                 */
+                url?: string;
+            };
+        }[] | {
+            /**
+             * Type of the content provided
+             */
+            type?: string;
+            text?: string;
+            image_url?: {
+                /**
+                 * image uri with data (e.g. data:image/jpeg;base64,/9j/...). HTTP URL will not be accepted
+                 */
+                url?: string;
+            };
+        };
+    }[];
+    functions?: {
+        name: string;
+        code: string;
+    }[];
+    /**
+     * A list of tools available for the assistant to use.
+     */
+    tools?: ({
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+                [k: string]: {
+                    /**
+                     * The data type of the parameter.
+                     */
+                    type: string;
+                    /**
+                     * A description of the expected parameter.
+                     */
+                    description: string;
+                };
+            };
+        };
+    } | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+            /**
+             * The name of the function.
+             */
+            name: string;
+            /**
+             * A brief description of what the function does.
+             */
+            description: string;
+            /**
+             * Schema defining the parameters accepted by the function.
+             */
+            parameters: {
+                /**
+                 * The type of the parameters object (usually 'object').
+                 */
+                type: string;
+                /**
+                 * List of required parameter names.
+                 */
+                required?: string[];
+                /**
+                 * Definitions of each parameter.
+                 */
+                properties: {
+                    [k: string]: {
+                        /**
+                         * The data type of the parameter.
+                         */
+                        type: string;
+                        /**
+                         * A description of the expected parameter.
+                         */
+                        description: string;
+                    };
+                };
+            };
+        };
+    })[];
+    /**
+     * JSON schema that should be fulfilled for the response.
+     */
+    guided_json?: object;
+    /**
+     * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+     */
+    raw?: boolean;
+    /**
+     * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+     */
+    stream?: boolean;
+    /**
+     * The maximum number of tokens to generate in the response.
+     */
+    max_tokens?: number;
+    /**
+     * Controls the randomness of the output; higher values produce more random results.
+     */
+    temperature?: number;
+    /**
+     * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+     */
+    top_p?: number;
+    /**
+     * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+     */
+    top_k?: number;
+    /**
+     * Random seed for reproducibility of the generation.
+     */
+    seed?: number;
+    /**
+     * Penalty for repeated tokens; higher values discourage repetition.
+     */
+    repetition_penalty?: number;
+    /**
+     * Decreases the likelihood of the model repeating the same lines verbatim.
+     */
+    frequency_penalty?: number;
+    /**
+     * Increases the likelihood of the model introducing new topics.
+     */
+    presence_penalty?: number;
+}
+type Ai_Cf_Qwen_Qwq_32B_Output = {
+    /**
+     * The generated text response from the model
+     */
+    response: string;
+    /**
+     * Usage statistics for the inference request
+     */
+    usage?: {
+        /**
+         * Total number of tokens in input
+         */
+        prompt_tokens?: number;
+        /**
+         * Total number of tokens in output
+         */
+        completion_tokens?: number;
+        /**
+         * Total number of input and output tokens
+         */
+        total_tokens?: number;
+    };
+    /**
+     * An array of tool calls requests made during the response generation
+     */
+    tool_calls?: {
+        /**
+         * The arguments passed to be passed to the tool call request
+         */
+        arguments?: object;
+        /**
+         * The name of the tool to be called
+         */
+        name?: string;
+    }[];
+};
+declare abstract class Base_Ai_Cf_Qwen_Qwq_32B {
+    inputs: Ai_Cf_Qwen_Qwq_32B_Input;
+    postProcessedOutputs: Ai_Cf_Qwen_Qwq_32B_Output;
+}
+type Ai_Cf_Mistralai_Mistral_Small_3_1_24B_Instruct_Input = Ai_Cf_Mistralai_Mistral_Small_3_1_24B_Instruct_Prompt | Ai_Cf_Mistralai_Mistral_Small_3_1_24B_Instruct_Messages;
+interface Ai_Cf_Mistralai_Mistral_Small_3_1_24B_Instruct_Prompt {
+    /**
+     * The input text prompt for the model to generate a response.
+     */
+    prompt: string;
+    /**
+     * JSON schema that should be fulfilled for the response.
+     */
+    guided_json?: object;
+    /**
+     * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+     */
+    raw?: boolean;
+    /**
+     * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+     */
+    stream?: boolean;
+    /**
+     * The maximum number of tokens to generate in the response.
+     */
+    max_tokens?: number;
+    /**
+     * Controls the randomness of the output; higher values produce more random results.
+     */
+    temperature?: number;
+    /**
+     * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+     */
+    top_p?: number;
+    /**
+     * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+     */
+    top_k?: number;
+    /**
+     * Random seed for reproducibility of the generation.
+     */
+    seed?: number;
+    /**
+     * Penalty for repeated tokens; higher values discourage repetition.
+     */
+    repetition_penalty?: number;
+    /**
+     * Decreases the likelihood of the model repeating the same lines verbatim.
+     */
+    frequency_penalty?: number;
+    /**
+     * Increases the likelihood of the model introducing new topics.
+     */
+    presence_penalty?: number;
+}
+interface Ai_Cf_Mistralai_Mistral_Small_3_1_24B_Instruct_Messages {
+    /**
+     * An array of message objects representing the conversation history.
+     */
+    messages: {
+        /**
+         * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+         */
+        role?: string;
+        /**
+         * The tool call id. Must be supplied for tool calls for Mistral-3. If you don't know what to put here you can fall back to 000000001
+         */
+        tool_call_id?: string;
+        content?: string | {
+            /**
+             * Type of the content provided
+             */
+            type?: string;
+            text?: string;
+            image_url?: {
+                /**
+                 * image uri with data (e.g. data:image/jpeg;base64,/9j/...). HTTP URL will not be accepted
+                 */
+                url?: string;
+            };
+        }[] | {
+            /**
+             * Type of the content provided
+             */
+            type?: string;
+            text?: string;
+            image_url?: {
+                /**
+                 * image uri with data (e.g. data:image/jpeg;base64,/9j/...). HTTP URL will not be accepted
+                 */
+                url?: string;
+            };
+        };
+    }[];
+    functions?: {
+        name: string;
+        code: string;
+    }[];
+    /**
+     * A list of tools available for the assistant to use.
+     */
+    tools?: ({
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+                [k: string]: {
+                    /**
+                     * The data type of the parameter.
+                     */
+                    type: string;
+                    /**
+                     * A description of the expected parameter.
+                     */
+                    description: string;
+                };
+            };
+        };
+    } | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+            /**
+             * The name of the function.
+             */
+            name: string;
+            /**
+             * A brief description of what the function does.
+             */
+            description: string;
+            /**
+             * Schema defining the parameters accepted by the function.
+             */
+            parameters: {
+                /**
+                 * The type of the parameters object (usually 'object').
+                 */
+                type: string;
+                /**
+                 * List of required parameter names.
+                 */
+                required?: string[];
+                /**
+                 * Definitions of each parameter.
+                 */
+                properties: {
+                    [k: string]: {
+                        /**
+                         * The data type of the parameter.
+                         */
+                        type: string;
+                        /**
+                         * A description of the expected parameter.
+                         */
+                        description: string;
+                    };
+                };
+            };
+        };
+    })[];
+    /**
+     * JSON schema that should be fulfilled for the response.
+     */
+    guided_json?: object;
+    /**
+     * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+     */
+    raw?: boolean;
+    /**
+     * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+     */
+    stream?: boolean;
+    /**
+     * The maximum number of tokens to generate in the response.
+     */
+    max_tokens?: number;
+    /**
+     * Controls the randomness of the output; higher values produce more random results.
+     */
+    temperature?: number;
+    /**
+     * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+     */
+    top_p?: number;
+    /**
+     * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+     */
+    top_k?: number;
+    /**
+     * Random seed for reproducibility of the generation.
+     */
+    seed?: number;
+    /**
+     * Penalty for repeated tokens; higher values discourage repetition.
+     */
+    repetition_penalty?: number;
+    /**
+     * Decreases the likelihood of the model repeating the same lines verbatim.
+     */
+    frequency_penalty?: number;
+    /**
+     * Increases the likelihood of the model introducing new topics.
+     */
+    presence_penalty?: number;
+}
+type Ai_Cf_Mistralai_Mistral_Small_3_1_24B_Instruct_Output = {
+    /**
+     * The generated text response from the model
+     */
+    response: string;
+    /**
+     * Usage statistics for the inference request
+     */
+    usage?: {
+        /**
+         * Total number of tokens in input
+         */
+        prompt_tokens?: number;
+        /**
+         * Total number of tokens in output
+         */
+        completion_tokens?: number;
+        /**
+         * Total number of input and output tokens
+         */
+        total_tokens?: number;
+    };
+    /**
+     * An array of tool calls requests made during the response generation
+     */
+    tool_calls?: {
+        /**
+         * The arguments passed to be passed to the tool call request
+         */
+        arguments?: object;
+        /**
+         * The name of the tool to be called
+         */
+        name?: string;
+    }[];
+};
+declare abstract class Base_Ai_Cf_Mistralai_Mistral_Small_3_1_24B_Instruct {
+    inputs: Ai_Cf_Mistralai_Mistral_Small_3_1_24B_Instruct_Input;
+    postProcessedOutputs: Ai_Cf_Mistralai_Mistral_Small_3_1_24B_Instruct_Output;
+}
+type Ai_Cf_Google_Gemma_3_12B_It_Input = Ai_Cf_Google_Gemma_3_12B_It_Prompt | Ai_Cf_Google_Gemma_3_12B_It_Messages;
+interface Ai_Cf_Google_Gemma_3_12B_It_Prompt {
+    /**
+     * The input text prompt for the model to generate a response.
+     */
+    prompt: string;
+    /**
+     * JSON schema that should be fulfilled for the response.
+     */
+    guided_json?: object;
+    /**
+     * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+     */
+    raw?: boolean;
+    /**
+     * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+     */
+    stream?: boolean;
+    /**
+     * The maximum number of tokens to generate in the response.
+     */
+    max_tokens?: number;
+    /**
+     * Controls the randomness of the output; higher values produce more random results.
+     */
+    temperature?: number;
+    /**
+     * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+     */
+    top_p?: number;
+    /**
+     * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+     */
+    top_k?: number;
+    /**
+     * Random seed for reproducibility of the generation.
+     */
+    seed?: number;
+    /**
+     * Penalty for repeated tokens; higher values discourage repetition.
+     */
+    repetition_penalty?: number;
+    /**
+     * Decreases the likelihood of the model repeating the same lines verbatim.
+     */
+    frequency_penalty?: number;
+    /**
+     * Increases the likelihood of the model introducing new topics.
+     */
+    presence_penalty?: number;
+}
+interface Ai_Cf_Google_Gemma_3_12B_It_Messages {
+    /**
+     * An array of message objects representing the conversation history.
+     */
+    messages: {
+        /**
+         * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+         */
+        role?: string;
+        content?: string | {
+            /**
+             * Type of the content provided
+             */
+            type?: string;
+            text?: string;
+            image_url?: {
+                /**
+                 * image uri with data (e.g. data:image/jpeg;base64,/9j/...). HTTP URL will not be accepted
+                 */
+                url?: string;
+            };
+        }[];
+    }[];
+    functions?: {
+        name: string;
+        code: string;
+    }[];
+    /**
+     * A list of tools available for the assistant to use.
+     */
+    tools?: ({
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+                [k: string]: {
+                    /**
+                     * The data type of the parameter.
+                     */
+                    type: string;
+                    /**
+                     * A description of the expected parameter.
+                     */
+                    description: string;
+                };
+            };
+        };
+    } | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+            /**
+             * The name of the function.
+             */
+            name: string;
+            /**
+             * A brief description of what the function does.
+             */
+            description: string;
+            /**
+             * Schema defining the parameters accepted by the function.
+             */
+            parameters: {
+                /**
+                 * The type of the parameters object (usually 'object').
+                 */
+                type: string;
+                /**
+                 * List of required parameter names.
+                 */
+                required?: string[];
+                /**
+                 * Definitions of each parameter.
+                 */
+                properties: {
+                    [k: string]: {
+                        /**
+                         * The data type of the parameter.
+                         */
+                        type: string;
+                        /**
+                         * A description of the expected parameter.
+                         */
+                        description: string;
+                    };
+                };
+            };
+        };
+    })[];
+    /**
+     * JSON schema that should be fulfilled for the response.
+     */
+    guided_json?: object;
+    /**
+     * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+     */
+    raw?: boolean;
+    /**
+     * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+     */
+    stream?: boolean;
+    /**
+     * The maximum number of tokens to generate in the response.
+     */
+    max_tokens?: number;
+    /**
+     * Controls the randomness of the output; higher values produce more random results.
+     */
+    temperature?: number;
+    /**
+     * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+     */
+    top_p?: number;
+    /**
+     * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+     */
+    top_k?: number;
+    /**
+     * Random seed for reproducibility of the generation.
+     */
+    seed?: number;
+    /**
+     * Penalty for repeated tokens; higher values discourage repetition.
+     */
+    repetition_penalty?: number;
+    /**
+     * Decreases the likelihood of the model repeating the same lines verbatim.
+     */
+    frequency_penalty?: number;
+    /**
+     * Increases the likelihood of the model introducing new topics.
+     */
+    presence_penalty?: number;
+}
+type Ai_Cf_Google_Gemma_3_12B_It_Output = {
+    /**
+     * The generated text response from the model
+     */
+    response: string;
+    /**
+     * Usage statistics for the inference request
+     */
+    usage?: {
+        /**
+         * Total number of tokens in input
+         */
+        prompt_tokens?: number;
+        /**
+         * Total number of tokens in output
+         */
+        completion_tokens?: number;
+        /**
+         * Total number of input and output tokens
+         */
+        total_tokens?: number;
+    };
+    /**
+     * An array of tool calls requests made during the response generation
+     */
+    tool_calls?: {
+        /**
+         * The arguments passed to be passed to the tool call request
+         */
+        arguments?: object;
+        /**
+         * The name of the tool to be called
+         */
+        name?: string;
+    }[];
+};
+declare abstract class Base_Ai_Cf_Google_Gemma_3_12B_It {
+    inputs: Ai_Cf_Google_Gemma_3_12B_It_Input;
+    postProcessedOutputs: Ai_Cf_Google_Gemma_3_12B_It_Output;
+}
+type Ai_Cf_Meta_Llama_4_Scout_17B_16E_Instruct_Input = Ai_Cf_Meta_Llama_4_Scout_17B_16E_Instruct_Prompt | Ai_Cf_Meta_Llama_4_Scout_17B_16E_Instruct_Messages | Ai_Cf_Meta_Llama_4_Scout_17B_16E_Instruct_Async_Batch;
+interface Ai_Cf_Meta_Llama_4_Scout_17B_16E_Instruct_Prompt {
+    /**
+     * The input text prompt for the model to generate a response.
+     */
+    prompt: string;
+    /**
+     * JSON schema that should be fulfilled for the response.
+     */
+    guided_json?: object;
+    response_format?: Ai_Cf_Meta_Llama_4_Scout_17B_16E_Instruct_JSON_Mode;
+    /**
+     * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+     */
+    raw?: boolean;
+    /**
+     * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+     */
+    stream?: boolean;
+    /**
+     * The maximum number of tokens to generate in the response.
+     */
+    max_tokens?: number;
+    /**
+     * Controls the randomness of the output; higher values produce more random results.
+     */
+    temperature?: number;
+    /**
+     * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+     */
+    top_p?: number;
+    /**
+     * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+     */
+    top_k?: number;
+    /**
+     * Random seed for reproducibility of the generation.
+     */
+    seed?: number;
+    /**
+     * Penalty for repeated tokens; higher values discourage repetition.
+     */
+    repetition_penalty?: number;
+    /**
+     * Decreases the likelihood of the model repeating the same lines verbatim.
+     */
+    frequency_penalty?: number;
+    /**
+     * Increases the likelihood of the model introducing new topics.
+     */
+    presence_penalty?: number;
+}
+interface Ai_Cf_Meta_Llama_4_Scout_17B_16E_Instruct_JSON_Mode {
+    type?: "json_object" | "json_schema";
+    json_schema?: unknown;
+}
+interface Ai_Cf_Meta_Llama_4_Scout_17B_16E_Instruct_Messages {
+    /**
+     * An array of message objects representing the conversation history.
+     */
+    messages: {
+        /**
+         * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+         */
+        role?: string;
+        /**
+         * The tool call id. If you don't know what to put here you can fall back to 000000001
+         */
+        tool_call_id?: string;
+        content?: string | {
+            /**
+             * Type of the content provided
+             */
+            type?: string;
+            text?: string;
+            image_url?: {
+                /**
+                 * image uri with data (e.g. data:image/jpeg;base64,/9j/...). HTTP URL will not be accepted
+                 */
+                url?: string;
+            };
+        }[] | {
+            /**
+             * Type of the content provided
+             */
+            type?: string;
+            text?: string;
+            image_url?: {
+                /**
+                 * image uri with data (e.g. data:image/jpeg;base64,/9j/...). HTTP URL will not be accepted
+                 */
+                url?: string;
+            };
+        };
+    }[];
+    functions?: {
+        name: string;
+        code: string;
+    }[];
+    /**
+     * A list of tools available for the assistant to use.
+     */
+    tools?: ({
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+                [k: string]: {
+                    /**
+                     * The data type of the parameter.
+                     */
+                    type: string;
+                    /**
+                     * A description of the expected parameter.
+                     */
+                    description: string;
+                };
+            };
+        };
+    } | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+            /**
+             * The name of the function.
+             */
+            name: string;
+            /**
+             * A brief description of what the function does.
+             */
+            description: string;
+            /**
+             * Schema defining the parameters accepted by the function.
+             */
+            parameters: {
+                /**
+                 * The type of the parameters object (usually 'object').
+                 */
+                type: string;
+                /**
+                 * List of required parameter names.
+                 */
+                required?: string[];
+                /**
+                 * Definitions of each parameter.
+                 */
+                properties: {
+                    [k: string]: {
+                        /**
+                         * The data type of the parameter.
+                         */
+                        type: string;
+                        /**
+                         * A description of the expected parameter.
+                         */
+                        description: string;
+                    };
+                };
+            };
+        };
+    })[];
+    response_format?: Ai_Cf_Meta_Llama_4_Scout_17B_16E_Instruct_JSON_Mode;
+    /**
+     * JSON schema that should be fulfilled for the response.
+     */
+    guided_json?: object;
+    /**
+     * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+     */
+    raw?: boolean;
+    /**
+     * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+     */
+    stream?: boolean;
+    /**
+     * The maximum number of tokens to generate in the response.
+     */
+    max_tokens?: number;
+    /**
+     * Controls the randomness of the output; higher values produce more random results.
+     */
+    temperature?: number;
+    /**
+     * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+     */
+    top_p?: number;
+    /**
+     * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+     */
+    top_k?: number;
+    /**
+     * Random seed for reproducibility of the generation.
+     */
+    seed?: number;
+    /**
+     * Penalty for repeated tokens; higher values discourage repetition.
+     */
+    repetition_penalty?: number;
+    /**
+     * Decreases the likelihood of the model repeating the same lines verbatim.
+     */
+    frequency_penalty?: number;
+    /**
+     * Increases the likelihood of the model introducing new topics.
+     */
+    presence_penalty?: number;
+}
+interface Ai_Cf_Meta_Llama_4_Scout_17B_16E_Instruct_Async_Batch {
+    requests: (Ai_Cf_Meta_Llama_4_Scout_17B_16E_Instruct_Prompt_Inner | Ai_Cf_Meta_Llama_4_Scout_17B_16E_Instruct_Messages_Inner)[];
+}
+interface Ai_Cf_Meta_Llama_4_Scout_17B_16E_Instruct_Prompt_Inner {
+    /**
+     * The input text prompt for the model to generate a response.
+     */
+    prompt: string;
+    /**
+     * JSON schema that should be fulfilled for the response.
+     */
+    guided_json?: object;
+    response_format?: Ai_Cf_Meta_Llama_4_Scout_17B_16E_Instruct_JSON_Mode;
+    /**
+     * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+     */
+    raw?: boolean;
+    /**
+     * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+     */
+    stream?: boolean;
+    /**
+     * The maximum number of tokens to generate in the response.
+     */
+    max_tokens?: number;
+    /**
+     * Controls the randomness of the output; higher values produce more random results.
+     */
+    temperature?: number;
+    /**
+     * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+     */
+    top_p?: number;
+    /**
+     * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+     */
+    top_k?: number;
+    /**
+     * Random seed for reproducibility of the generation.
+     */
+    seed?: number;
+    /**
+     * Penalty for repeated tokens; higher values discourage repetition.
+     */
+    repetition_penalty?: number;
+    /**
+     * Decreases the likelihood of the model repeating the same lines verbatim.
+     */
+    frequency_penalty?: number;
+    /**
+     * Increases the likelihood of the model introducing new topics.
+     */
+    presence_penalty?: number;
+}
+interface Ai_Cf_Meta_Llama_4_Scout_17B_16E_Instruct_Messages_Inner {
+    /**
+     * An array of message objects representing the conversation history.
+     */
+    messages: {
+        /**
+         * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+         */
+        role?: string;
+        /**
+         * The tool call id. If you don't know what to put here you can fall back to 000000001
+         */
+        tool_call_id?: string;
+        content?: string | {
+            /**
+             * Type of the content provided
+             */
+            type?: string;
+            text?: string;
+            image_url?: {
+                /**
+                 * image uri with data (e.g. data:image/jpeg;base64,/9j/...). HTTP URL will not be accepted
+                 */
+                url?: string;
+            };
+        }[] | {
+            /**
+             * Type of the content provided
+             */
+            type?: string;
+            text?: string;
+            image_url?: {
+                /**
+                 * image uri with data (e.g. data:image/jpeg;base64,/9j/...). HTTP URL will not be accepted
+                 */
+                url?: string;
+            };
+        };
+    }[];
+    functions?: {
+        name: string;
+        code: string;
+    }[];
+    /**
+     * A list of tools available for the assistant to use.
+     */
+    tools?: ({
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+                [k: string]: {
+                    /**
+                     * The data type of the parameter.
+                     */
+                    type: string;
+                    /**
+                     * A description of the expected parameter.
+                     */
+                    description: string;
+                };
+            };
+        };
+    } | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+            /**
+             * The name of the function.
+             */
+            name: string;
+            /**
+             * A brief description of what the function does.
+             */
+            description: string;
+            /**
+             * Schema defining the parameters accepted by the function.
+             */
+            parameters: {
+                /**
+                 * The type of the parameters object (usually 'object').
+                 */
+                type: string;
+                /**
+                 * List of required parameter names.
+                 */
+                required?: string[];
+                /**
+                 * Definitions of each parameter.
+                 */
+                properties: {
+                    [k: string]: {
+                        /**
+                         * The data type of the parameter.
+                         */
+                        type: string;
+                        /**
+                         * A description of the expected parameter.
+                         */
+                        description: string;
+                    };
+                };
+            };
+        };
+    })[];
+    response_format?: Ai_Cf_Meta_Llama_4_Scout_17B_16E_Instruct_JSON_Mode;
+    /**
+     * JSON schema that should be fulfilled for the response.
+     */
+    guided_json?: object;
+    /**
+     * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+     */
+    raw?: boolean;
+    /**
+     * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+     */
+    stream?: boolean;
+    /**
+     * The maximum number of tokens to generate in the response.
+     */
+    max_tokens?: number;
+    /**
+     * Controls the randomness of the output; higher values produce more random results.
+     */
+    temperature?: number;
+    /**
+     * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+     */
+    top_p?: number;
+    /**
+     * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+     */
+    top_k?: number;
+    /**
+     * Random seed for reproducibility of the generation.
+     */
+    seed?: number;
+    /**
+     * Penalty for repeated tokens; higher values discourage repetition.
+     */
+    repetition_penalty?: number;
+    /**
+     * Decreases the likelihood of the model repeating the same lines verbatim.
+     */
+    frequency_penalty?: number;
+    /**
+     * Increases the likelihood of the model introducing new topics.
+     */
+    presence_penalty?: number;
+}
+type Ai_Cf_Meta_Llama_4_Scout_17B_16E_Instruct_Output = {
+    /**
+     * The generated text response from the model
+     */
+    response: string;
+    /**
+     * Usage statistics for the inference request
+     */
+    usage?: {
+        /**
+         * Total number of tokens in input
+         */
+        prompt_tokens?: number;
+        /**
+         * Total number of tokens in output
+         */
+        completion_tokens?: number;
+        /**
+         * Total number of input and output tokens
+         */
+        total_tokens?: number;
+    };
+    /**
+     * An array of tool calls requests made during the response generation
+     */
+    tool_calls?: {
+        /**
+         * The tool call id.
+         */
+        id?: string;
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type?: string;
+        /**
+         * Details of the function tool.
+         */
+        function?: {
+            /**
+             * The name of the tool to be called
+             */
+            name?: string;
+            /**
+             * The arguments passed to be passed to the tool call request
+             */
+            arguments?: object;
+        };
+    }[];
+};
+declare abstract class Base_Ai_Cf_Meta_Llama_4_Scout_17B_16E_Instruct {
+    inputs: Ai_Cf_Meta_Llama_4_Scout_17B_16E_Instruct_Input;
+    postProcessedOutputs: Ai_Cf_Meta_Llama_4_Scout_17B_16E_Instruct_Output;
+}
+type Ai_Cf_Qwen_Qwen3_30B_A3B_Fp8_Input = Ai_Cf_Qwen_Qwen3_30B_A3B_Fp8_Prompt | Ai_Cf_Qwen_Qwen3_30B_A3B_Fp8_Messages | Ai_Cf_Qwen_Qwen3_30B_A3B_Fp8_Async_Batch;
+interface Ai_Cf_Qwen_Qwen3_30B_A3B_Fp8_Prompt {
+    /**
+     * The input text prompt for the model to generate a response.
+     */
+    prompt: string;
+    /**
+     * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model.
+     */
+    lora?: string;
+    response_format?: Ai_Cf_Qwen_Qwen3_30B_A3B_Fp8_JSON_Mode;
+    /**
+     * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+     */
+    raw?: boolean;
+    /**
+     * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+     */
+    stream?: boolean;
+    /**
+     * The maximum number of tokens to generate in the response.
+     */
+    max_tokens?: number;
+    /**
+     * Controls the randomness of the output; higher values produce more random results.
+     */
+    temperature?: number;
+    /**
+     * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+     */
+    top_p?: number;
+    /**
+     * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+     */
+    top_k?: number;
+    /**
+     * Random seed for reproducibility of the generation.
+     */
+    seed?: number;
+    /**
+     * Penalty for repeated tokens; higher values discourage repetition.
+     */
+    repetition_penalty?: number;
+    /**
+     * Decreases the likelihood of the model repeating the same lines verbatim.
+     */
+    frequency_penalty?: number;
+    /**
+     * Increases the likelihood of the model introducing new topics.
+     */
+    presence_penalty?: number;
+}
+interface Ai_Cf_Qwen_Qwen3_30B_A3B_Fp8_JSON_Mode {
+    type?: "json_object" | "json_schema";
+    json_schema?: unknown;
+}
+interface Ai_Cf_Qwen_Qwen3_30B_A3B_Fp8_Messages {
+    /**
+     * An array of message objects representing the conversation history.
+     */
+    messages: {
+        /**
+         * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+         */
+        role: string;
+        /**
+         * The content of the message as a string.
+         */
+        content: string;
+    }[];
+    functions?: {
+        name: string;
+        code: string;
+    }[];
+    /**
+     * A list of tools available for the assistant to use.
+     */
+    tools?: ({
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+                [k: string]: {
+                    /**
+                     * The data type of the parameter.
+                     */
+                    type: string;
+                    /**
+                     * A description of the expected parameter.
+                     */
+                    description: string;
+                };
+            };
+        };
+    } | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+            /**
+             * The name of the function.
+             */
+            name: string;
+            /**
+             * A brief description of what the function does.
+             */
+            description: string;
+            /**
+             * Schema defining the parameters accepted by the function.
+             */
+            parameters: {
+                /**
+                 * The type of the parameters object (usually 'object').
+                 */
+                type: string;
+                /**
+                 * List of required parameter names.
+                 */
+                required?: string[];
+                /**
+                 * Definitions of each parameter.
+                 */
+                properties: {
+                    [k: string]: {
+                        /**
+                         * The data type of the parameter.
+                         */
+                        type: string;
+                        /**
+                         * A description of the expected parameter.
+                         */
+                        description: string;
+                    };
+                };
+            };
+        };
+    })[];
+    response_format?: Ai_Cf_Qwen_Qwen3_30B_A3B_Fp8_JSON_Mode_1;
+    /**
+     * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+     */
+    raw?: boolean;
+    /**
+     * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+     */
+    stream?: boolean;
+    /**
+     * The maximum number of tokens to generate in the response.
+     */
+    max_tokens?: number;
+    /**
+     * Controls the randomness of the output; higher values produce more random results.
+     */
+    temperature?: number;
+    /**
+     * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+     */
+    top_p?: number;
+    /**
+     * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+     */
+    top_k?: number;
+    /**
+     * Random seed for reproducibility of the generation.
+     */
+    seed?: number;
+    /**
+     * Penalty for repeated tokens; higher values discourage repetition.
+     */
+    repetition_penalty?: number;
+    /**
+     * Decreases the likelihood of the model repeating the same lines verbatim.
+     */
+    frequency_penalty?: number;
+    /**
+     * Increases the likelihood of the model introducing new topics.
+     */
+    presence_penalty?: number;
+}
+interface Ai_Cf_Qwen_Qwen3_30B_A3B_Fp8_JSON_Mode_1 {
+    type?: "json_object" | "json_schema";
+    json_schema?: unknown;
+}
+interface Ai_Cf_Qwen_Qwen3_30B_A3B_Fp8_Async_Batch {
+    requests: (Ai_Cf_Qwen_Qwen3_30B_A3B_Fp8_Prompt_1 | Ai_Cf_Qwen_Qwen3_30B_A3B_Fp8_Messages_1)[];
+}
+interface Ai_Cf_Qwen_Qwen3_30B_A3B_Fp8_Prompt_1 {
+    /**
+     * The input text prompt for the model to generate a response.
+     */
+    prompt: string;
+    /**
+     * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model.
+     */
+    lora?: string;
+    response_format?: Ai_Cf_Qwen_Qwen3_30B_A3B_Fp8_JSON_Mode_2;
+    /**
+     * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+     */
+    raw?: boolean;
+    /**
+     * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+     */
+    stream?: boolean;
+    /**
+     * The maximum number of tokens to generate in the response.
+     */
+    max_tokens?: number;
+    /**
+     * Controls the randomness of the output; higher values produce more random results.
+     */
+    temperature?: number;
+    /**
+     * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+     */
+    top_p?: number;
+    /**
+     * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+     */
+    top_k?: number;
+    /**
+     * Random seed for reproducibility of the generation.
+     */
+    seed?: number;
+    /**
+     * Penalty for repeated tokens; higher values discourage repetition.
+     */
+    repetition_penalty?: number;
+    /**
+     * Decreases the likelihood of the model repeating the same lines verbatim.
+     */
+    frequency_penalty?: number;
+    /**
+     * Increases the likelihood of the model introducing new topics.
+     */
+    presence_penalty?: number;
+}
+interface Ai_Cf_Qwen_Qwen3_30B_A3B_Fp8_JSON_Mode_2 {
+    type?: "json_object" | "json_schema";
+    json_schema?: unknown;
+}
+interface Ai_Cf_Qwen_Qwen3_30B_A3B_Fp8_Messages_1 {
+    /**
+     * An array of message objects representing the conversation history.
+     */
+    messages: {
+        /**
+         * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+         */
+        role: string;
+        /**
+         * The content of the message as a string.
+         */
+        content: string;
+    }[];
+    functions?: {
+        name: string;
+        code: string;
+    }[];
+    /**
+     * A list of tools available for the assistant to use.
+     */
+    tools?: ({
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+                [k: string]: {
+                    /**
+                     * The data type of the parameter.
+                     */
+                    type: string;
+                    /**
+                     * A description of the expected parameter.
+                     */
+                    description: string;
+                };
+            };
+        };
+    } | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+            /**
+             * The name of the function.
+             */
+            name: string;
+            /**
+             * A brief description of what the function does.
+             */
+            description: string;
+            /**
+             * Schema defining the parameters accepted by the function.
+             */
+            parameters: {
+                /**
+                 * The type of the parameters object (usually 'object').
+                 */
+                type: string;
+                /**
+                 * List of required parameter names.
+                 */
+                required?: string[];
+                /**
+                 * Definitions of each parameter.
+                 */
+                properties: {
+                    [k: string]: {
+                        /**
+                         * The data type of the parameter.
+                         */
+                        type: string;
+                        /**
+                         * A description of the expected parameter.
+                         */
+                        description: string;
+                    };
+                };
+            };
+        };
+    })[];
+    response_format?: Ai_Cf_Qwen_Qwen3_30B_A3B_Fp8_JSON_Mode_3;
+    /**
+     * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+     */
+    raw?: boolean;
+    /**
+     * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+     */
+    stream?: boolean;
+    /**
+     * The maximum number of tokens to generate in the response.
+     */
+    max_tokens?: number;
+    /**
+     * Controls the randomness of the output; higher values produce more random results.
+     */
+    temperature?: number;
+    /**
+     * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+     */
+    top_p?: number;
+    /**
+     * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+     */
+    top_k?: number;
+    /**
+     * Random seed for reproducibility of the generation.
+     */
+    seed?: number;
+    /**
+     * Penalty for repeated tokens; higher values discourage repetition.
+     */
+    repetition_penalty?: number;
+    /**
+     * Decreases the likelihood of the model repeating the same lines verbatim.
+     */
+    frequency_penalty?: number;
+    /**
+     * Increases the likelihood of the model introducing new topics.
+     */
+    presence_penalty?: number;
+}
+interface Ai_Cf_Qwen_Qwen3_30B_A3B_Fp8_JSON_Mode_3 {
+    type?: "json_object" | "json_schema";
+    json_schema?: unknown;
+}
+type Ai_Cf_Qwen_Qwen3_30B_A3B_Fp8_Output = Ai_Cf_Qwen_Qwen3_30B_A3B_Fp8_Chat_Completion_Response | Ai_Cf_Qwen_Qwen3_30B_A3B_Fp8_Text_Completion_Response | string | Ai_Cf_Qwen_Qwen3_30B_A3B_Fp8_AsyncResponse;
+interface Ai_Cf_Qwen_Qwen3_30B_A3B_Fp8_Chat_Completion_Response {
+    /**
+     * Unique identifier for the completion
+     */
+    id?: string;
+    /**
+     * Object type identifier
+     */
+    object?: "chat.completion";
+    /**
+     * Unix timestamp of when the completion was created
+     */
+    created?: number;
+    /**
+     * Model used for the completion
+     */
+    model?: string;
+    /**
+     * List of completion choices
+     */
+    choices?: {
+        /**
+         * Index of the choice in the list
+         */
+        index?: number;
+        /**
+         * The message generated by the model
+         */
+        message?: {
+            /**
+             * Role of the message author
+             */
+            role: string;
+            /**
+             * The content of the message
+             */
+            content: string;
+            /**
+             * Internal reasoning content (if available)
+             */
+            reasoning_content?: string;
+            /**
+             * Tool calls made by the assistant
+             */
+            tool_calls?: {
+                /**
+                 * Unique identifier for the tool call
+                 */
+                id: string;
+                /**
+                 * Type of tool call
+                 */
+                type: "function";
+                function: {
+                    /**
+                     * Name of the function to call
+                     */
+                    name: string;
+                    /**
+                     * JSON string of arguments for the function
+                     */
+                    arguments: string;
+                };
+            }[];
+        };
+        /**
+         * Reason why the model stopped generating
+         */
+        finish_reason?: string;
+        /**
+         * Stop reason (may be null)
+         */
+        stop_reason?: string | null;
+        /**
+         * Log probabilities (if requested)
+         */
+        logprobs?: {} | null;
+    }[];
+    /**
+     * Usage statistics for the inference request
+     */
+    usage?: {
+        /**
+         * Total number of tokens in input
+         */
+        prompt_tokens?: number;
+        /**
+         * Total number of tokens in output
+         */
+        completion_tokens?: number;
+        /**
+         * Total number of input and output tokens
+         */
+        total_tokens?: number;
+    };
+    /**
+     * Log probabilities for the prompt (if requested)
+     */
+    prompt_logprobs?: {} | null;
+}
+interface Ai_Cf_Qwen_Qwen3_30B_A3B_Fp8_Text_Completion_Response {
+    /**
+     * Unique identifier for the completion
+     */
+    id?: string;
+    /**
+     * Object type identifier
+     */
+    object?: "text_completion";
+    /**
+     * Unix timestamp of when the completion was created
+     */
+    created?: number;
+    /**
+     * Model used for the completion
+     */
+    model?: string;
+    /**
+     * List of completion choices
+     */
+    choices?: {
+        /**
+         * Index of the choice in the list
+         */
+        index: number;
+        /**
+         * The generated text completion
+         */
+        text: string;
+        /**
+         * Reason why the model stopped generating
+         */
+        finish_reason: string;
+        /**
+         * Stop reason (may be null)
+         */
+        stop_reason?: string | null;
+        /**
+         * Log probabilities (if requested)
+         */
+        logprobs?: {} | null;
+        /**
+         * Log probabilities for the prompt (if requested)
+         */
+        prompt_logprobs?: {} | null;
+    }[];
+    /**
+     * Usage statistics for the inference request
+     */
+    usage?: {
+        /**
+         * Total number of tokens in input
+         */
+        prompt_tokens?: number;
+        /**
+         * Total number of tokens in output
+         */
+        completion_tokens?: number;
+        /**
+         * Total number of input and output tokens
+         */
+        total_tokens?: number;
+    };
+}
+interface Ai_Cf_Qwen_Qwen3_30B_A3B_Fp8_AsyncResponse {
+    /**
+     * The async request id that can be used to obtain the results.
+     */
+    request_id?: string;
+}
+declare abstract class Base_Ai_Cf_Qwen_Qwen3_30B_A3B_Fp8 {
+    inputs: Ai_Cf_Qwen_Qwen3_30B_A3B_Fp8_Input;
+    postProcessedOutputs: Ai_Cf_Qwen_Qwen3_30B_A3B_Fp8_Output;
+}
+interface Ai_Cf_Deepgram_Nova_3_Input {
+    audio: {
+        body: object;
+        contentType: string;
+    };
+    /**
+     * Sets how the model will interpret strings submitted to the custom_topic param. When strict, the model will only return topics submitted using the custom_topic param. When extended, the model will return its own detected topics in addition to those submitted using the custom_topic param.
+     */
+    custom_topic_mode?: "extended" | "strict";
+    /**
+     * Custom topics you want the model to detect within your input audio or text if present Submit up to 100
+     */
+    custom_topic?: string;
+    /**
+     * Sets how the model will interpret intents submitted to the custom_intent param. When strict, the model will only return intents submitted using the custom_intent param. When extended, the model will return its own detected intents in addition those submitted using the custom_intents param
+     */
+    custom_intent_mode?: "extended" | "strict";
+    /**
+     * Custom intents you want the model to detect within your input audio if present
+     */
+    custom_intent?: string;
+    /**
+     * Identifies and extracts key entities from content in submitted audio
+     */
+    detect_entities?: boolean;
+    /**
+     * Identifies the dominant language spoken in submitted audio
+     */
+    detect_language?: boolean;
+    /**
+     * Recognize speaker changes. Each word in the transcript will be assigned a speaker number starting at 0
+     */
+    diarize?: boolean;
+    /**
+     * Identify and extract key entities from content in submitted audio
+     */
+    dictation?: boolean;
+    /**
+     * Specify the expected encoding of your submitted audio
+     */
+    encoding?: "linear16" | "flac" | "mulaw" | "amr-nb" | "amr-wb" | "opus" | "speex" | "g729";
+    /**
+     * Arbitrary key-value pairs that are attached to the API response for usage in downstream processing
+     */
+    extra?: string;
+    /**
+     * Filler Words can help transcribe interruptions in your audio, like 'uh' and 'um'
+     */
+    filler_words?: boolean;
+    /**
+     * Key term prompting can boost or suppress specialized terminology and brands.
+     */
+    keyterm?: string;
+    /**
+     * Keywords can boost or suppress specialized terminology and brands.
+     */
+    keywords?: string;
+    /**
+     * The BCP-47 language tag that hints at the primary spoken language. Depending on the Model and API endpoint you choose only certain languages are available.
+     */
+    language?: string;
+    /**
+     * Spoken measurements will be converted to their corresponding abbreviations.
+     */
+    measurements?: boolean;
+    /**
+     * Opts out requests from the Deepgram Model Improvement Program. Refer to our Docs for pricing impacts before setting this to true. https://dpgr.am/deepgram-mip.
+     */
+    mip_opt_out?: boolean;
+    /**
+     * Mode of operation for the model representing broad area of topic that will be talked about in the supplied audio
+     */
+    mode?: "general" | "medical" | "finance";
+    /**
+     * Transcribe each audio channel independently.
+     */
+    multichannel?: boolean;
+    /**
+     * Numerals converts numbers from written format to numerical format.
+     */
+    numerals?: boolean;
+    /**
+     * Splits audio into paragraphs to improve transcript readability.
+     */
+    paragraphs?: boolean;
+    /**
+     * Profanity Filter looks for recognized profanity and converts it to the nearest recognized non-profane word or removes it from the transcript completely.
+     */
+    profanity_filter?: boolean;
+    /**
+     * Add punctuation and capitalization to the transcript.
+     */
+    punctuate?: boolean;
+    /**
+     * Redaction removes sensitive information from your transcripts.
+     */
+    redact?: string;
+    /**
+     * Search for terms or phrases in submitted audio and replaces them.
+     */
+    replace?: string;
+    /**
+     * Search for terms or phrases in submitted audio.
+     */
+    search?: string;
+    /**
+     * Recognizes the sentiment throughout a transcript or text.
+     */
+    sentiment?: boolean;
+    /**
+     * Apply formatting to transcript output. When set to true, additional formatting will be applied to transcripts to improve readability.
+     */
+    smart_format?: boolean;
+    /**
+     * Detect topics throughout a transcript or text.
+     */
+    topics?: boolean;
+    /**
+     * Segments speech into meaningful semantic units.
+     */
+    utterances?: boolean;
+    /**
+     * Seconds to wait before detecting a pause between words in submitted audio.
+     */
+    utt_split?: number;
+    /**
+     * The number of channels in the submitted audio
+     */
+    channels?: number;
+    /**
+     * Specifies whether the streaming endpoint should provide ongoing transcription updates as more audio is received. When set to true, the endpoint sends continuous updates, meaning transcription results may evolve over time. Note: Supported only for webosockets.
+     */
+    interim_results?: boolean;
+    /**
+     * Indicates how long model will wait to detect whether a speaker has finished speaking or pauses for a significant period of time. When set to a value, the streaming endpoint immediately finalizes the transcription for the processed time range and returns the transcript with a speech_final parameter set to true. Can also be set to false to disable endpointing
+     */
+    endpointing?: string;
+    /**
+     * Indicates that speech has started. You'll begin receiving Speech Started messages upon speech starting. Note: Supported only for webosockets.
+     */
+    vad_events?: boolean;
+    /**
+     * Indicates how long model will wait to send an UtteranceEnd message after a word has been transcribed. Use with interim_results. Note: Supported only for webosockets.
+     */
+    utterance_end_ms?: boolean;
+}
+interface Ai_Cf_Deepgram_Nova_3_Output {
+    results?: {
+        channels?: {
+            alternatives?: {
+                confidence?: number;
+                transcript?: string;
+                words?: {
+                    confidence?: number;
+                    end?: number;
+                    start?: number;
+                    word?: string;
+                }[];
+            }[];
+        }[];
+        summary?: {
+            result?: string;
+            short?: string;
+        };
+        sentiments?: {
+            segments?: {
+                text?: string;
+                start_word?: number;
+                end_word?: number;
+                sentiment?: string;
+                sentiment_score?: number;
+            }[];
+            average?: {
+                sentiment?: string;
+                sentiment_score?: number;
+            };
+        };
+    };
+}
+declare abstract class Base_Ai_Cf_Deepgram_Nova_3 {
+    inputs: Ai_Cf_Deepgram_Nova_3_Input;
+    postProcessedOutputs: Ai_Cf_Deepgram_Nova_3_Output;
+}
+interface Ai_Cf_Qwen_Qwen3_Embedding_0_6B_Input {
+    queries?: string | string[];
+    /**
+     * Optional instruction for the task
+     */
+    instruction?: string;
+    documents?: string | string[];
+    text?: string | string[];
+}
+interface Ai_Cf_Qwen_Qwen3_Embedding_0_6B_Output {
+    data?: number[][];
+    shape?: number[];
+}
+declare abstract class Base_Ai_Cf_Qwen_Qwen3_Embedding_0_6B {
+    inputs: Ai_Cf_Qwen_Qwen3_Embedding_0_6B_Input;
+    postProcessedOutputs: Ai_Cf_Qwen_Qwen3_Embedding_0_6B_Output;
+}
+type Ai_Cf_Pipecat_Ai_Smart_Turn_V2_Input = {
+    /**
+     * readable stream with audio data and content-type specified for that data
+     */
+    audio: {
+        body: object;
+        contentType: string;
+    };
+    /**
+     * type of data PCM data that's sent to the inference server as raw array
+     */
+    dtype?: "uint8" | "float32" | "float64";
+} | {
+    /**
+     * base64 encoded audio data
+     */
+    audio: string;
+    /**
+     * type of data PCM data that's sent to the inference server as raw array
+     */
+    dtype?: "uint8" | "float32" | "float64";
+};
+interface Ai_Cf_Pipecat_Ai_Smart_Turn_V2_Output {
+    /**
+     * if true, end-of-turn was detected
+     */
+    is_complete?: boolean;
+    /**
+     * probability of the end-of-turn detection
+     */
+    probability?: number;
+}
+declare abstract class Base_Ai_Cf_Pipecat_Ai_Smart_Turn_V2 {
+    inputs: Ai_Cf_Pipecat_Ai_Smart_Turn_V2_Input;
+    postProcessedOutputs: Ai_Cf_Pipecat_Ai_Smart_Turn_V2_Output;
+}
+declare abstract class Base_Ai_Cf_Openai_Gpt_Oss_120B {
+    inputs: ResponsesInput;
+    postProcessedOutputs: ResponsesOutput;
+}
+declare abstract class Base_Ai_Cf_Openai_Gpt_Oss_20B {
+    inputs: ResponsesInput;
+    postProcessedOutputs: ResponsesOutput;
+}
+interface Ai_Cf_Leonardo_Phoenix_1_0_Input {
+    /**
+     * A text description of the image you want to generate.
+     */
+    prompt: string;
+    /**
+     * Controls how closely the generated image should adhere to the prompt; higher values make the image more aligned with the prompt
+     */
+    guidance?: number;
+    /**
+     * Random seed for reproducibility of the image generation
+     */
+    seed?: number;
+    /**
+     * The height of the generated image in pixels
+     */
+    height?: number;
+    /**
+     * The width of the generated image in pixels
+     */
+    width?: number;
+    /**
+     * The number of diffusion steps; higher values can improve quality but take longer
+     */
+    num_steps?: number;
+    /**
+     * Specify what to exclude from the generated images
+     */
+    negative_prompt?: string;
+}
+/**
+ * The generated image in JPEG format
+ */
+type Ai_Cf_Leonardo_Phoenix_1_0_Output = string;
+declare abstract class Base_Ai_Cf_Leonardo_Phoenix_1_0 {
+    inputs: Ai_Cf_Leonardo_Phoenix_1_0_Input;
+    postProcessedOutputs: Ai_Cf_Leonardo_Phoenix_1_0_Output;
+}
+interface Ai_Cf_Leonardo_Lucid_Origin_Input {
+    /**
+     * A text description of the image you want to generate.
+     */
+    prompt: string;
+    /**
+     * Controls how closely the generated image should adhere to the prompt; higher values make the image more aligned with the prompt
+     */
+    guidance?: number;
+    /**
+     * Random seed for reproducibility of the image generation
+     */
+    seed?: number;
+    /**
+     * The height of the generated image in pixels
+     */
+    height?: number;
+    /**
+     * The width of the generated image in pixels
+     */
+    width?: number;
+    /**
+     * The number of diffusion steps; higher values can improve quality but take longer
+     */
+    num_steps?: number;
+    /**
+     * The number of diffusion steps; higher values can improve quality but take longer
+     */
+    steps?: number;
+}
+interface Ai_Cf_Leonardo_Lucid_Origin_Output {
+    /**
+     * The generated image in Base64 format.
+     */
+    image?: string;
+}
+declare abstract class Base_Ai_Cf_Leonardo_Lucid_Origin {
+    inputs: Ai_Cf_Leonardo_Lucid_Origin_Input;
+    postProcessedOutputs: Ai_Cf_Leonardo_Lucid_Origin_Output;
+}
+interface Ai_Cf_Deepgram_Aura_1_Input {
+    /**
+     * Speaker used to produce the audio.
+     */
+    speaker?: "angus" | "asteria" | "arcas" | "orion" | "orpheus" | "athena" | "luna" | "zeus" | "perseus" | "helios" | "hera" | "stella";
+    /**
+     * Encoding of the output audio.
+     */
+    encoding?: "linear16" | "flac" | "mulaw" | "alaw" | "mp3" | "opus" | "aac";
+    /**
+     * Container specifies the file format wrapper for the output audio. The available options depend on the encoding type..
+     */
+    container?: "none" | "wav" | "ogg";
+    /**
+     * The text content to be converted to speech
+     */
+    text: string;
+    /**
+     * Sample Rate specifies the sample rate for the output audio. Based on the encoding, different sample rates are supported. For some encodings, the sample rate is not configurable
+     */
+    sample_rate?: number;
+    /**
+     * The bitrate of the audio in bits per second. Choose from predefined ranges or specific values based on the encoding type.
+     */
+    bit_rate?: number;
+}
+/**
+ * The generated audio in MP3 format
+ */
+type Ai_Cf_Deepgram_Aura_1_Output = string;
+declare abstract class Base_Ai_Cf_Deepgram_Aura_1 {
+    inputs: Ai_Cf_Deepgram_Aura_1_Input;
+    postProcessedOutputs: Ai_Cf_Deepgram_Aura_1_Output;
+}
+interface Ai_Cf_Ai4Bharat_Indictrans2_En_Indic_1B_Input {
+    /**
+     * Input text to translate. Can be a single string or a list of strings.
+     */
+    text: string | string[];
+    /**
+     * Target language to translate to
+     */
+    target_language: "asm_Beng" | "awa_Deva" | "ben_Beng" | "bho_Deva" | "brx_Deva" | "doi_Deva" | "eng_Latn" | "gom_Deva" | "gon_Deva" | "guj_Gujr" | "hin_Deva" | "hne_Deva" | "kan_Knda" | "kas_Arab" | "kas_Deva" | "kha_Latn" | "lus_Latn" | "mag_Deva" | "mai_Deva" | "mal_Mlym" | "mar_Deva" | "mni_Beng" | "mni_Mtei" | "npi_Deva" | "ory_Orya" | "pan_Guru" | "san_Deva" | "sat_Olck" | "snd_Arab" | "snd_Deva" | "tam_Taml" | "tel_Telu" | "urd_Arab" | "unr_Deva";
+}
+interface Ai_Cf_Ai4Bharat_Indictrans2_En_Indic_1B_Output {
+    /**
+     * Translated texts
+     */
+    translations: string[];
+}
+declare abstract class Base_Ai_Cf_Ai4Bharat_Indictrans2_En_Indic_1B {
+    inputs: Ai_Cf_Ai4Bharat_Indictrans2_En_Indic_1B_Input;
+    postProcessedOutputs: Ai_Cf_Ai4Bharat_Indictrans2_En_Indic_1B_Output;
+}
+type Ai_Cf_Aisingapore_Gemma_Sea_Lion_V4_27B_It_Input = Ai_Cf_Aisingapore_Gemma_Sea_Lion_V4_27B_It_Prompt | Ai_Cf_Aisingapore_Gemma_Sea_Lion_V4_27B_It_Messages | Ai_Cf_Aisingapore_Gemma_Sea_Lion_V4_27B_It_Async_Batch;
+interface Ai_Cf_Aisingapore_Gemma_Sea_Lion_V4_27B_It_Prompt {
+    /**
+     * The input text prompt for the model to generate a response.
+     */
+    prompt: string;
+    /**
+     * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model.
+     */
+    lora?: string;
+    response_format?: Ai_Cf_Aisingapore_Gemma_Sea_Lion_V4_27B_It_JSON_Mode;
+    /**
+     * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+     */
+    raw?: boolean;
+    /**
+     * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+     */
+    stream?: boolean;
+    /**
+     * The maximum number of tokens to generate in the response.
+     */
+    max_tokens?: number;
+    /**
+     * Controls the randomness of the output; higher values produce more random results.
+     */
+    temperature?: number;
+    /**
+     * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+     */
+    top_p?: number;
+    /**
+     * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+     */
+    top_k?: number;
+    /**
+     * Random seed for reproducibility of the generation.
+     */
+    seed?: number;
+    /**
+     * Penalty for repeated tokens; higher values discourage repetition.
+     */
+    repetition_penalty?: number;
+    /**
+     * Decreases the likelihood of the model repeating the same lines verbatim.
+     */
+    frequency_penalty?: number;
+    /**
+     * Increases the likelihood of the model introducing new topics.
+     */
+    presence_penalty?: number;
+}
+interface Ai_Cf_Aisingapore_Gemma_Sea_Lion_V4_27B_It_JSON_Mode {
+    type?: "json_object" | "json_schema";
+    json_schema?: unknown;
+}
+interface Ai_Cf_Aisingapore_Gemma_Sea_Lion_V4_27B_It_Messages {
+    /**
+     * An array of message objects representing the conversation history.
+     */
+    messages: {
+        /**
+         * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+         */
+        role: string;
+        /**
+         * The content of the message as a string.
+         */
+        content: string;
+    }[];
+    functions?: {
+        name: string;
+        code: string;
+    }[];
+    /**
+     * A list of tools available for the assistant to use.
+     */
+    tools?: ({
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+                [k: string]: {
+                    /**
+                     * The data type of the parameter.
+                     */
+                    type: string;
+                    /**
+                     * A description of the expected parameter.
+                     */
+                    description: string;
+                };
+            };
+        };
+    } | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+            /**
+             * The name of the function.
+             */
+            name: string;
+            /**
+             * A brief description of what the function does.
+             */
+            description: string;
+            /**
+             * Schema defining the parameters accepted by the function.
+             */
+            parameters: {
+                /**
+                 * The type of the parameters object (usually 'object').
+                 */
+                type: string;
+                /**
+                 * List of required parameter names.
+                 */
+                required?: string[];
+                /**
+                 * Definitions of each parameter.
+                 */
+                properties: {
+                    [k: string]: {
+                        /**
+                         * The data type of the parameter.
+                         */
+                        type: string;
+                        /**
+                         * A description of the expected parameter.
+                         */
+                        description: string;
+                    };
+                };
+            };
+        };
+    })[];
+    response_format?: Ai_Cf_Aisingapore_Gemma_Sea_Lion_V4_27B_It_JSON_Mode_1;
+    /**
+     * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+     */
+    raw?: boolean;
+    /**
+     * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+     */
+    stream?: boolean;
+    /**
+     * The maximum number of tokens to generate in the response.
+     */
+    max_tokens?: number;
+    /**
+     * Controls the randomness of the output; higher values produce more random results.
+     */
+    temperature?: number;
+    /**
+     * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+     */
+    top_p?: number;
+    /**
+     * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+     */
+    top_k?: number;
+    /**
+     * Random seed for reproducibility of the generation.
+     */
+    seed?: number;
+    /**
+     * Penalty for repeated tokens; higher values discourage repetition.
+     */
+    repetition_penalty?: number;
+    /**
+     * Decreases the likelihood of the model repeating the same lines verbatim.
+     */
+    frequency_penalty?: number;
+    /**
+     * Increases the likelihood of the model introducing new topics.
+     */
+    presence_penalty?: number;
+}
+interface Ai_Cf_Aisingapore_Gemma_Sea_Lion_V4_27B_It_JSON_Mode_1 {
+    type?: "json_object" | "json_schema";
+    json_schema?: unknown;
+}
+interface Ai_Cf_Aisingapore_Gemma_Sea_Lion_V4_27B_It_Async_Batch {
+    requests: (Ai_Cf_Aisingapore_Gemma_Sea_Lion_V4_27B_It_Prompt_1 | Ai_Cf_Aisingapore_Gemma_Sea_Lion_V4_27B_It_Messages_1)[];
+}
+interface Ai_Cf_Aisingapore_Gemma_Sea_Lion_V4_27B_It_Prompt_1 {
+    /**
+     * The input text prompt for the model to generate a response.
+     */
+    prompt: string;
+    /**
+     * Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model.
+     */
+    lora?: string;
+    response_format?: Ai_Cf_Aisingapore_Gemma_Sea_Lion_V4_27B_It_JSON_Mode_2;
+    /**
+     * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+     */
+    raw?: boolean;
+    /**
+     * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+     */
+    stream?: boolean;
+    /**
+     * The maximum number of tokens to generate in the response.
+     */
+    max_tokens?: number;
+    /**
+     * Controls the randomness of the output; higher values produce more random results.
+     */
+    temperature?: number;
+    /**
+     * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+     */
+    top_p?: number;
+    /**
+     * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+     */
+    top_k?: number;
+    /**
+     * Random seed for reproducibility of the generation.
+     */
+    seed?: number;
+    /**
+     * Penalty for repeated tokens; higher values discourage repetition.
+     */
+    repetition_penalty?: number;
+    /**
+     * Decreases the likelihood of the model repeating the same lines verbatim.
+     */
+    frequency_penalty?: number;
+    /**
+     * Increases the likelihood of the model introducing new topics.
+     */
+    presence_penalty?: number;
+}
+interface Ai_Cf_Aisingapore_Gemma_Sea_Lion_V4_27B_It_JSON_Mode_2 {
+    type?: "json_object" | "json_schema";
+    json_schema?: unknown;
+}
+interface Ai_Cf_Aisingapore_Gemma_Sea_Lion_V4_27B_It_Messages_1 {
+    /**
+     * An array of message objects representing the conversation history.
+     */
+    messages: {
+        /**
+         * The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool').
+         */
+        role: string;
+        /**
+         * The content of the message as a string.
+         */
+        content: string;
+    }[];
+    functions?: {
+        name: string;
+        code: string;
+    }[];
+    /**
+     * A list of tools available for the assistant to use.
+     */
+    tools?: ({
+        /**
+         * The name of the tool. More descriptive the better.
+         */
+        name: string;
+        /**
+         * A brief description of what the tool does.
+         */
+        description: string;
+        /**
+         * Schema defining the parameters accepted by the tool.
+         */
+        parameters: {
+            /**
+             * The type of the parameters object (usually 'object').
+             */
+            type: string;
+            /**
+             * List of required parameter names.
+             */
+            required?: string[];
+            /**
+             * Definitions of each parameter.
+             */
+            properties: {
+                [k: string]: {
+                    /**
+                     * The data type of the parameter.
+                     */
+                    type: string;
+                    /**
+                     * A description of the expected parameter.
+                     */
+                    description: string;
+                };
+            };
+        };
+    } | {
+        /**
+         * Specifies the type of tool (e.g., 'function').
+         */
+        type: string;
+        /**
+         * Details of the function tool.
+         */
+        function: {
+            /**
+             * The name of the function.
+             */
+            name: string;
+            /**
+             * A brief description of what the function does.
+             */
+            description: string;
+            /**
+             * Schema defining the parameters accepted by the function.
+             */
+            parameters: {
+                /**
+                 * The type of the parameters object (usually 'object').
+                 */
+                type: string;
+                /**
+                 * List of required parameter names.
+                 */
+                required?: string[];
+                /**
+                 * Definitions of each parameter.
+                 */
+                properties: {
+                    [k: string]: {
+                        /**
+                         * The data type of the parameter.
+                         */
+                        type: string;
+                        /**
+                         * A description of the expected parameter.
+                         */
+                        description: string;
+                    };
+                };
+            };
+        };
+    })[];
+    response_format?: Ai_Cf_Aisingapore_Gemma_Sea_Lion_V4_27B_It_JSON_Mode_3;
+    /**
+     * If true, a chat template is not applied and you must adhere to the specific model's expected formatting.
+     */
+    raw?: boolean;
+    /**
+     * If true, the response will be streamed back incrementally using SSE, Server Sent Events.
+     */
+    stream?: boolean;
+    /**
+     * The maximum number of tokens to generate in the response.
+     */
+    max_tokens?: number;
+    /**
+     * Controls the randomness of the output; higher values produce more random results.
+     */
+    temperature?: number;
+    /**
+     * Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses.
+     */
+    top_p?: number;
+    /**
+     * Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises.
+     */
+    top_k?: number;
+    /**
+     * Random seed for reproducibility of the generation.
+     */
+    seed?: number;
+    /**
+     * Penalty for repeated tokens; higher values discourage repetition.
+     */
+    repetition_penalty?: number;
+    /**
+     * Decreases the likelihood of the model repeating the same lines verbatim.
+     */
+    frequency_penalty?: number;
+    /**
+     * Increases the likelihood of the model introducing new topics.
+     */
+    presence_penalty?: number;
+}
+interface Ai_Cf_Aisingapore_Gemma_Sea_Lion_V4_27B_It_JSON_Mode_3 {
+    type?: "json_object" | "json_schema";
+    json_schema?: unknown;
+}
+type Ai_Cf_Aisingapore_Gemma_Sea_Lion_V4_27B_It_Output = Ai_Cf_Aisingapore_Gemma_Sea_Lion_V4_27B_It_Chat_Completion_Response | Ai_Cf_Aisingapore_Gemma_Sea_Lion_V4_27B_It_Text_Completion_Response | string | Ai_Cf_Aisingapore_Gemma_Sea_Lion_V4_27B_It_AsyncResponse;
+interface Ai_Cf_Aisingapore_Gemma_Sea_Lion_V4_27B_It_Chat_Completion_Response {
+    /**
+     * Unique identifier for the completion
+     */
+    id?: string;
+    /**
+     * Object type identifier
+     */
+    object?: "chat.completion";
+    /**
+     * Unix timestamp of when the completion was created
+     */
+    created?: number;
+    /**
+     * Model used for the completion
+     */
+    model?: string;
+    /**
+     * List of completion choices
+     */
+    choices?: {
+        /**
+         * Index of the choice in the list
+         */
+        index?: number;
+        /**
+         * The message generated by the model
+         */
+        message?: {
+            /**
+             * Role of the message author
+             */
+            role: string;
+            /**
+             * The content of the message
+             */
+            content: string;
+            /**
+             * Internal reasoning content (if available)
+             */
+            reasoning_content?: string;
+            /**
+             * Tool calls made by the assistant
+             */
+            tool_calls?: {
+                /**
+                 * Unique identifier for the tool call
+                 */
+                id: string;
+                /**
+                 * Type of tool call
+                 */
+                type: "function";
+                function: {
+                    /**
+                     * Name of the function to call
+                     */
+                    name: string;
+                    /**
+                     * JSON string of arguments for the function
+                     */
+                    arguments: string;
+                };
+            }[];
+        };
+        /**
+         * Reason why the model stopped generating
+         */
+        finish_reason?: string;
+        /**
+         * Stop reason (may be null)
+         */
+        stop_reason?: string | null;
+        /**
+         * Log probabilities (if requested)
+         */
+        logprobs?: {} | null;
+    }[];
+    /**
+     * Usage statistics for the inference request
+     */
+    usage?: {
+        /**
+         * Total number of tokens in input
+         */
+        prompt_tokens?: number;
+        /**
+         * Total number of tokens in output
+         */
+        completion_tokens?: number;
+        /**
+         * Total number of input and output tokens
+         */
+        total_tokens?: number;
+    };
+    /**
+     * Log probabilities for the prompt (if requested)
+     */
+    prompt_logprobs?: {} | null;
+}
+interface Ai_Cf_Aisingapore_Gemma_Sea_Lion_V4_27B_It_Text_Completion_Response {
+    /**
+     * Unique identifier for the completion
+     */
+    id?: string;
+    /**
+     * Object type identifier
+     */
+    object?: "text_completion";
+    /**
+     * Unix timestamp of when the completion was created
+     */
+    created?: number;
+    /**
+     * Model used for the completion
+     */
+    model?: string;
+    /**
+     * List of completion choices
+     */
+    choices?: {
+        /**
+         * Index of the choice in the list
+         */
+        index: number;
+        /**
+         * The generated text completion
+         */
+        text: string;
+        /**
+         * Reason why the model stopped generating
+         */
+        finish_reason: string;
+        /**
+         * Stop reason (may be null)
+         */
+        stop_reason?: string | null;
+        /**
+         * Log probabilities (if requested)
+         */
+        logprobs?: {} | null;
+        /**
+         * Log probabilities for the prompt (if requested)
+         */
+        prompt_logprobs?: {} | null;
+    }[];
+    /**
+     * Usage statistics for the inference request
+     */
+    usage?: {
+        /**
+         * Total number of tokens in input
+         */
+        prompt_tokens?: number;
+        /**
+         * Total number of tokens in output
+         */
+        completion_tokens?: number;
+        /**
+         * Total number of input and output tokens
+         */
+        total_tokens?: number;
+    };
+}
+interface Ai_Cf_Aisingapore_Gemma_Sea_Lion_V4_27B_It_AsyncResponse {
+    /**
+     * The async request id that can be used to obtain the results.
+     */
+    request_id?: string;
+}
+declare abstract class Base_Ai_Cf_Aisingapore_Gemma_Sea_Lion_V4_27B_It {
+    inputs: Ai_Cf_Aisingapore_Gemma_Sea_Lion_V4_27B_It_Input;
+    postProcessedOutputs: Ai_Cf_Aisingapore_Gemma_Sea_Lion_V4_27B_It_Output;
+}
+interface Ai_Cf_Pfnet_Plamo_Embedding_1B_Input {
+    /**
+     * Input text to embed. Can be a single string or a list of strings.
+     */
+    text: string | string[];
+}
+interface Ai_Cf_Pfnet_Plamo_Embedding_1B_Output {
+    /**
+     * Embedding vectors, where each vector is a list of floats.
+     */
+    data: number[][];
+    /**
+     * Shape of the embedding data as [number_of_embeddings, embedding_dimension].
+     *
+     * @minItems 2
+     * @maxItems 2
+     */
+    shape: [
+        number,
+        number
+    ];
+}
+declare abstract class Base_Ai_Cf_Pfnet_Plamo_Embedding_1B {
+    inputs: Ai_Cf_Pfnet_Plamo_Embedding_1B_Input;
+    postProcessedOutputs: Ai_Cf_Pfnet_Plamo_Embedding_1B_Output;
+}
+interface Ai_Cf_Deepgram_Flux_Input {
+    /**
+     * Encoding of the audio stream. Currently only supports raw signed little-endian 16-bit PCM.
+     */
+    encoding: "linear16";
+    /**
+     * Sample rate of the audio stream in Hz.
+     */
+    sample_rate: string;
+    /**
+     * End-of-turn confidence required to fire an eager end-of-turn event. When set, enables EagerEndOfTurn and TurnResumed events. Valid Values 0.3 - 0.9.
+     */
+    eager_eot_threshold?: string;
+    /**
+     * End-of-turn confidence required to finish a turn. Valid Values 0.5 - 0.9.
+     */
+    eot_threshold?: string;
+    /**
+     * A turn will be finished when this much time has passed after speech, regardless of EOT confidence.
+     */
+    eot_timeout_ms?: string;
+    /**
+     * Keyterm prompting can improve recognition of specialized terminology. Pass multiple keyterm query parameters to boost multiple keyterms.
+     */
+    keyterm?: string;
+    /**
+     * Opts out requests from the Deepgram Model Improvement Program. Refer to Deepgram Docs for pricing impacts before setting this to true. https://dpgr.am/deepgram-mip
+     */
+    mip_opt_out?: "true" | "false";
+    /**
+     * Label your requests for the purpose of identification during usage reporting
+     */
+    tag?: string;
+}
+/**
+ * Output will be returned as websocket messages.
+ */
+interface Ai_Cf_Deepgram_Flux_Output {
+    /**
+     * The unique identifier of the request (uuid)
+     */
+    request_id?: string;
+    /**
+     * Starts at 0 and increments for each message the server sends to the client.
+     */
+    sequence_id?: number;
+    /**
+     * The type of event being reported.
+     */
+    event?: "Update" | "StartOfTurn" | "EagerEndOfTurn" | "TurnResumed" | "EndOfTurn";
+    /**
+     * The index of the current turn
+     */
+    turn_index?: number;
+    /**
+     * Start time in seconds of the audio range that was transcribed
+     */
+    audio_window_start?: number;
+    /**
+     * End time in seconds of the audio range that was transcribed
+     */
+    audio_window_end?: number;
+    /**
+     * Text that was said over the course of the current turn
+     */
+    transcript?: string;
+    /**
+     * The words in the transcript
+     */
+    words?: {
+        /**
+         * The individual punctuated, properly-cased word from the transcript
+         */
+        word: string;
+        /**
+         * Confidence that this word was transcribed correctly
+         */
+        confidence: number;
+    }[];
+    /**
+     * Confidence that no more speech is coming in this turn
+     */
+    end_of_turn_confidence?: number;
+}
+declare abstract class Base_Ai_Cf_Deepgram_Flux {
+    inputs: Ai_Cf_Deepgram_Flux_Input;
+    postProcessedOutputs: Ai_Cf_Deepgram_Flux_Output;
+}
+interface Ai_Cf_Deepgram_Aura_2_En_Input {
+    /**
+     * Speaker used to produce the audio.
+     */
+    speaker?: "amalthea" | "andromeda" | "apollo" | "arcas" | "aries" | "asteria" | "athena" | "atlas" | "aurora" | "callista" | "cora" | "cordelia" | "delia" | "draco" | "electra" | "harmonia" | "helena" | "hera" | "hermes" | "hyperion" | "iris" | "janus" | "juno" | "jupiter" | "luna" | "mars" | "minerva" | "neptune" | "odysseus" | "ophelia" | "orion" | "orpheus" | "pandora" | "phoebe" | "pluto" | "saturn" | "thalia" | "theia" | "vesta" | "zeus";
+    /**
+     * Encoding of the output audio.
+     */
+    encoding?: "linear16" | "flac" | "mulaw" | "alaw" | "mp3" | "opus" | "aac";
+    /**
+     * Container specifies the file format wrapper for the output audio. The available options depend on the encoding type..
+     */
+    container?: "none" | "wav" | "ogg";
+    /**
+     * The text content to be converted to speech
+     */
+    text: string;
+    /**
+     * Sample Rate specifies the sample rate for the output audio. Based on the encoding, different sample rates are supported. For some encodings, the sample rate is not configurable
+     */
+    sample_rate?: number;
+    /**
+     * The bitrate of the audio in bits per second. Choose from predefined ranges or specific values based on the encoding type.
+     */
+    bit_rate?: number;
+}
+/**
+ * The generated audio in MP3 format
+ */
+type Ai_Cf_Deepgram_Aura_2_En_Output = string;
+declare abstract class Base_Ai_Cf_Deepgram_Aura_2_En {
+    inputs: Ai_Cf_Deepgram_Aura_2_En_Input;
+    postProcessedOutputs: Ai_Cf_Deepgram_Aura_2_En_Output;
+}
+interface Ai_Cf_Deepgram_Aura_2_Es_Input {
+    /**
+     * Speaker used to produce the audio.
+     */
+    speaker?: "sirio" | "nestor" | "carina" | "celeste" | "alvaro" | "diana" | "aquila" | "selena" | "estrella" | "javier";
+    /**
+     * Encoding of the output audio.
+     */
+    encoding?: "linear16" | "flac" | "mulaw" | "alaw" | "mp3" | "opus" | "aac";
+    /**
+     * Container specifies the file format wrapper for the output audio. The available options depend on the encoding type..
+     */
+    container?: "none" | "wav" | "ogg";
+    /**
+     * The text content to be converted to speech
+     */
+    text: string;
+    /**
+     * Sample Rate specifies the sample rate for the output audio. Based on the encoding, different sample rates are supported. For some encodings, the sample rate is not configurable
+     */
+    sample_rate?: number;
+    /**
+     * The bitrate of the audio in bits per second. Choose from predefined ranges or specific values based on the encoding type.
+     */
+    bit_rate?: number;
+}
+/**
+ * The generated audio in MP3 format
+ */
+type Ai_Cf_Deepgram_Aura_2_Es_Output = string;
+declare abstract class Base_Ai_Cf_Deepgram_Aura_2_Es {
+    inputs: Ai_Cf_Deepgram_Aura_2_Es_Input;
+    postProcessedOutputs: Ai_Cf_Deepgram_Aura_2_Es_Output;
+}
+interface AiModels {
+    "@cf/huggingface/distilbert-sst-2-int8": BaseAiTextClassification;
+    "@cf/stabilityai/stable-diffusion-xl-base-1.0": BaseAiTextToImage;
+    "@cf/runwayml/stable-diffusion-v1-5-inpainting": BaseAiTextToImage;
+    "@cf/runwayml/stable-diffusion-v1-5-img2img": BaseAiTextToImage;
+    "@cf/lykon/dreamshaper-8-lcm": BaseAiTextToImage;
+    "@cf/bytedance/stable-diffusion-xl-lightning": BaseAiTextToImage;
+    "@cf/myshell-ai/melotts": BaseAiTextToSpeech;
+    "@cf/google/embeddinggemma-300m": BaseAiTextEmbeddings;
+    "@cf/microsoft/resnet-50": BaseAiImageClassification;
+    "@cf/meta/llama-2-7b-chat-int8": BaseAiTextGeneration;
+    "@cf/mistral/mistral-7b-instruct-v0.1": BaseAiTextGeneration;
+    "@cf/meta/llama-2-7b-chat-fp16": BaseAiTextGeneration;
+    "@hf/thebloke/llama-2-13b-chat-awq": BaseAiTextGeneration;
+    "@hf/thebloke/mistral-7b-instruct-v0.1-awq": BaseAiTextGeneration;
+    "@hf/thebloke/zephyr-7b-beta-awq": BaseAiTextGeneration;
+    "@hf/thebloke/openhermes-2.5-mistral-7b-awq": BaseAiTextGeneration;
+    "@hf/thebloke/neural-chat-7b-v3-1-awq": BaseAiTextGeneration;
+    "@hf/thebloke/llamaguard-7b-awq": BaseAiTextGeneration;
+    "@hf/thebloke/deepseek-coder-6.7b-base-awq": BaseAiTextGeneration;
+    "@hf/thebloke/deepseek-coder-6.7b-instruct-awq": BaseAiTextGeneration;
+    "@cf/deepseek-ai/deepseek-math-7b-instruct": BaseAiTextGeneration;
+    "@cf/defog/sqlcoder-7b-2": BaseAiTextGeneration;
+    "@cf/openchat/openchat-3.5-0106": BaseAiTextGeneration;
+    "@cf/tiiuae/falcon-7b-instruct": BaseAiTextGeneration;
+    "@cf/thebloke/discolm-german-7b-v1-awq": BaseAiTextGeneration;
+    "@cf/qwen/qwen1.5-0.5b-chat": BaseAiTextGeneration;
+    "@cf/qwen/qwen1.5-7b-chat-awq": BaseAiTextGeneration;
+    "@cf/qwen/qwen1.5-14b-chat-awq": BaseAiTextGeneration;
+    "@cf/tinyllama/tinyllama-1.1b-chat-v1.0": BaseAiTextGeneration;
+    "@cf/microsoft/phi-2": BaseAiTextGeneration;
+    "@cf/qwen/qwen1.5-1.8b-chat": BaseAiTextGeneration;
+    "@cf/mistral/mistral-7b-instruct-v0.2-lora": BaseAiTextGeneration;
+    "@hf/nousresearch/hermes-2-pro-mistral-7b": BaseAiTextGeneration;
+    "@hf/nexusflow/starling-lm-7b-beta": BaseAiTextGeneration;
+    "@hf/google/gemma-7b-it": BaseAiTextGeneration;
+    "@cf/meta-llama/llama-2-7b-chat-hf-lora": BaseAiTextGeneration;
+    "@cf/google/gemma-2b-it-lora": BaseAiTextGeneration;
+    "@cf/google/gemma-7b-it-lora": BaseAiTextGeneration;
+    "@hf/mistral/mistral-7b-instruct-v0.2": BaseAiTextGeneration;
+    "@cf/meta/llama-3-8b-instruct": BaseAiTextGeneration;
+    "@cf/fblgit/una-cybertron-7b-v2-bf16": BaseAiTextGeneration;
+    "@cf/meta/llama-3-8b-instruct-awq": BaseAiTextGeneration;
+    "@cf/meta/llama-3.1-8b-instruct-fp8": BaseAiTextGeneration;
+    "@cf/meta/llama-3.1-8b-instruct-awq": BaseAiTextGeneration;
+    "@cf/meta/llama-3.2-3b-instruct": BaseAiTextGeneration;
+    "@cf/meta/llama-3.2-1b-instruct": BaseAiTextGeneration;
+    "@cf/deepseek-ai/deepseek-r1-distill-qwen-32b": BaseAiTextGeneration;
+    "@cf/ibm-granite/granite-4.0-h-micro": BaseAiTextGeneration;
+    "@cf/facebook/bart-large-cnn": BaseAiSummarization;
+    "@cf/llava-hf/llava-1.5-7b-hf": BaseAiImageToText;
+    "@cf/baai/bge-base-en-v1.5": Base_Ai_Cf_Baai_Bge_Base_En_V1_5;
+    "@cf/openai/whisper": Base_Ai_Cf_Openai_Whisper;
+    "@cf/meta/m2m100-1.2b": Base_Ai_Cf_Meta_M2M100_1_2B;
+    "@cf/baai/bge-small-en-v1.5": Base_Ai_Cf_Baai_Bge_Small_En_V1_5;
+    "@cf/baai/bge-large-en-v1.5": Base_Ai_Cf_Baai_Bge_Large_En_V1_5;
+    "@cf/unum/uform-gen2-qwen-500m": Base_Ai_Cf_Unum_Uform_Gen2_Qwen_500M;
+    "@cf/openai/whisper-tiny-en": Base_Ai_Cf_Openai_Whisper_Tiny_En;
+    "@cf/openai/whisper-large-v3-turbo": Base_Ai_Cf_Openai_Whisper_Large_V3_Turbo;
+    "@cf/baai/bge-m3": Base_Ai_Cf_Baai_Bge_M3;
+    "@cf/black-forest-labs/flux-1-schnell": Base_Ai_Cf_Black_Forest_Labs_Flux_1_Schnell;
+    "@cf/meta/llama-3.2-11b-vision-instruct": Base_Ai_Cf_Meta_Llama_3_2_11B_Vision_Instruct;
+    "@cf/meta/llama-3.3-70b-instruct-fp8-fast": Base_Ai_Cf_Meta_Llama_3_3_70B_Instruct_Fp8_Fast;
+    "@cf/meta/llama-guard-3-8b": Base_Ai_Cf_Meta_Llama_Guard_3_8B;
+    "@cf/baai/bge-reranker-base": Base_Ai_Cf_Baai_Bge_Reranker_Base;
+    "@cf/qwen/qwen2.5-coder-32b-instruct": Base_Ai_Cf_Qwen_Qwen2_5_Coder_32B_Instruct;
+    "@cf/qwen/qwq-32b": Base_Ai_Cf_Qwen_Qwq_32B;
+    "@cf/mistralai/mistral-small-3.1-24b-instruct": Base_Ai_Cf_Mistralai_Mistral_Small_3_1_24B_Instruct;
+    "@cf/google/gemma-3-12b-it": Base_Ai_Cf_Google_Gemma_3_12B_It;
+    "@cf/meta/llama-4-scout-17b-16e-instruct": Base_Ai_Cf_Meta_Llama_4_Scout_17B_16E_Instruct;
+    "@cf/qwen/qwen3-30b-a3b-fp8": Base_Ai_Cf_Qwen_Qwen3_30B_A3B_Fp8;
+    "@cf/deepgram/nova-3": Base_Ai_Cf_Deepgram_Nova_3;
+    "@cf/qwen/qwen3-embedding-0.6b": Base_Ai_Cf_Qwen_Qwen3_Embedding_0_6B;
+    "@cf/pipecat-ai/smart-turn-v2": Base_Ai_Cf_Pipecat_Ai_Smart_Turn_V2;
+    "@cf/openai/gpt-oss-120b": Base_Ai_Cf_Openai_Gpt_Oss_120B;
+    "@cf/openai/gpt-oss-20b": Base_Ai_Cf_Openai_Gpt_Oss_20B;
+    "@cf/leonardo/phoenix-1.0": Base_Ai_Cf_Leonardo_Phoenix_1_0;
+    "@cf/leonardo/lucid-origin": Base_Ai_Cf_Leonardo_Lucid_Origin;
+    "@cf/deepgram/aura-1": Base_Ai_Cf_Deepgram_Aura_1;
+    "@cf/ai4bharat/indictrans2-en-indic-1B": Base_Ai_Cf_Ai4Bharat_Indictrans2_En_Indic_1B;
+    "@cf/aisingapore/gemma-sea-lion-v4-27b-it": Base_Ai_Cf_Aisingapore_Gemma_Sea_Lion_V4_27B_It;
+    "@cf/pfnet/plamo-embedding-1b": Base_Ai_Cf_Pfnet_Plamo_Embedding_1B;
+    "@cf/deepgram/flux": Base_Ai_Cf_Deepgram_Flux;
+    "@cf/deepgram/aura-2-en": Base_Ai_Cf_Deepgram_Aura_2_En;
+    "@cf/deepgram/aura-2-es": Base_Ai_Cf_Deepgram_Aura_2_Es;
+}
+type AiOptions = {
+    /**
+     * Send requests as an asynchronous batch job, only works for supported models
+     * https://developers.cloudflare.com/workers-ai/features/batch-api
+     */
+    queueRequest?: boolean;
+    /**
+     * Establish websocket connections, only works for supported models
+     */
+    websocket?: boolean;
+    /**
+     * Tag your requests to group and view them in Cloudflare dashboard.
+     *
+     * Rules:
+     * Tags must only contain letters, numbers, and the symbols: : - . / @
+     * Each tag can have maximum 50 characters.
+     * Maximum 5 tags are allowed each request.
+     * Duplicate tags will removed.
+     */
+    tags?: string[];
+    gateway?: GatewayOptions;
+    returnRawResponse?: boolean;
+    prefix?: string;
+    extraHeaders?: object;
+};
+type AiModelsSearchParams = {
+    author?: string;
+    hide_experimental?: boolean;
+    page?: number;
+    per_page?: number;
+    search?: string;
+    source?: number;
+    task?: string;
+};
+type AiModelsSearchObject = {
+    id: string;
+    source: number;
+    name: string;
+    description: string;
+    task: {
+        id: string;
+        name: string;
+        description: string;
+    };
+    tags: string[];
+    properties: {
+        property_id: string;
+        value: string;
+    }[];
+};
+interface InferenceUpstreamError extends Error {
+}
+interface AiInternalError extends Error {
+}
+type AiModelListType = Record<string, any>;
+declare abstract class Ai<AiModelList extends AiModelListType = AiModels> {
+    aiGatewayLogId: string | null;
+    gateway(gatewayId: string): AiGateway;
+    /**
+     * Access the AI Search API for managing AI-powered search instances.
+     *
+     * This is the new API that replaces AutoRAG with better namespace separation:
+     * - Account-level operations: `list()`, `create()`
+     * - Instance-level operations: `get(id).search()`, `get(id).chatCompletions()`, `get(id).delete()`
+     *
+     * @example
+     * ```typescript
+     * // List all AI Search instances
+     * const instances = await env.AI.aiSearch.list();
+     *
+     * // Search an instance
+     * const results = await env.AI.aiSearch.get('my-search').search({
+     *   messages: [{ role: 'user', content: 'What is the policy?' }],
+     *   ai_search_options: {
+     *     retrieval: { max_num_results: 10 }
+     *   }
+     * });
+     *
+     * // Generate chat completions with AI Search context
+     * const response = await env.AI.aiSearch.get('my-search').chatCompletions({
+     *   messages: [{ role: 'user', content: 'What is the policy?' }],
+     *   model: '@cf/meta/llama-3.3-70b-instruct-fp8-fast'
+     * });
+     * ```
+     */
+    aiSearch: AiSearchAccountService;
+    /**
+     * @deprecated AutoRAG has been replaced by AI Search.
+     * Use `env.AI.aiSearch` instead for better API design and new features.
+     *
+     * Migration guide:
+     * - `env.AI.autorag().list()` → `env.AI.aiSearch.list()`
+     * - `env.AI.autorag('id').search({ query: '...' })` → `env.AI.aiSearch.get('id').search({ messages: [{ role: 'user', content: '...' }] })`
+     * - `env.AI.autorag('id').aiSearch(...)` → `env.AI.aiSearch.get('id').chatCompletions(...)`
+     *
+     * Note: The old API continues to work for backwards compatibility, but new projects should use AI Search.
+     *
+     * @see AiSearchAccountService
+     * @param autoragId Optional instance ID (omit for account-level operations)
+     */
+    autorag(autoragId: string): AutoRAG;
+    run<Name extends keyof AiModelList, Options extends AiOptions, InputOptions extends AiModelList[Name]["inputs"]>(model: Name, inputs: InputOptions, options?: Options): Promise<Options extends {
+        returnRawResponse: true;
+    } | {
+        websocket: true;
+    } ? Response : InputOptions extends {
+        stream: true;
+    } ? ReadableStream : AiModelList[Name]["postProcessedOutputs"]>;
+    models(params?: AiModelsSearchParams): Promise<AiModelsSearchObject[]>;
+    toMarkdown(): ToMarkdownService;
+    toMarkdown(files: MarkdownDocument[], options?: ConversionRequestOptions): Promise<ConversionResponse[]>;
+    toMarkdown(files: MarkdownDocument, options?: ConversionRequestOptions): Promise<ConversionResponse>;
+}
+type GatewayRetries = {
+    maxAttempts?: 1 | 2 | 3 | 4 | 5;
+    retryDelayMs?: number;
+    backoff?: 'constant' | 'linear' | 'exponential';
+};
+type GatewayOptions = {
+    id: string;
+    cacheKey?: string;
+    cacheTtl?: number;
+    skipCache?: boolean;
+    metadata?: Record<string, number | string | boolean | null | bigint>;
+    collectLog?: boolean;
+    eventId?: string;
+    requestTimeoutMs?: number;
+    retries?: GatewayRetries;
+};
+type UniversalGatewayOptions = Exclude<GatewayOptions, 'id'> & {
+    /**
+     ** @deprecated
+     */
+    id?: string;
+};
+type AiGatewayPatchLog = {
+    score?: number | null;
+    feedback?: -1 | 1 | null;
+    metadata?: Record<string, number | string | boolean | null | bigint> | null;
+};
+type AiGatewayLog = {
+    id: string;
+    provider: string;
+    model: string;
+    model_type?: string;
+    path: string;
+    duration: number;
+    request_type?: string;
+    request_content_type?: string;
+    status_code: number;
+    response_content_type?: string;
+    success: boolean;
+    cached: boolean;
+    tokens_in?: number;
+    tokens_out?: number;
+    metadata?: Record<string, number | string | boolean | null | bigint>;
+    step?: number;
+    cost?: number;
+    custom_cost?: boolean;
+    request_size: number;
+    request_head?: string;
+    request_head_complete: boolean;
+    response_size: number;
+    response_head?: string;
+    response_head_complete: boolean;
+    created_at: Date;
+};
+type AIGatewayProviders = 'workers-ai' | 'anthropic' | 'aws-bedrock' | 'azure-openai' | 'google-vertex-ai' | 'huggingface' | 'openai' | 'perplexity-ai' | 'replicate' | 'groq' | 'cohere' | 'google-ai-studio' | 'mistral' | 'grok' | 'openrouter' | 'deepseek' | 'cerebras' | 'cartesia' | 'elevenlabs' | 'adobe-firefly';
+type AIGatewayHeaders = {
+    'cf-aig-metadata': Record<string, number | string | boolean | null | bigint> | string;
+    'cf-aig-custom-cost': {
+        per_token_in?: number;
+        per_token_out?: number;
+    } | {
+        total_cost?: number;
+    } | string;
+    'cf-aig-cache-ttl': number | string;
+    'cf-aig-skip-cache': boolean | string;
+    'cf-aig-cache-key': string;
+    'cf-aig-event-id': string;
+    'cf-aig-request-timeout': number | string;
+    'cf-aig-max-attempts': number | string;
+    'cf-aig-retry-delay': number | string;
+    'cf-aig-backoff': string;
+    'cf-aig-collect-log': boolean | string;
+    Authorization: string;
+    'Content-Type': string;
+    [key: string]: string | number | boolean | object;
+};
+type AIGatewayUniversalRequest = {
+    provider: AIGatewayProviders | string; // eslint-disable-line
+    endpoint: string;
+    headers: Partial<AIGatewayHeaders>;
+    query: unknown;
+};
+interface AiGatewayInternalError extends Error {
+}
+interface AiGatewayLogNotFound extends Error {
+}
+declare abstract class AiGateway {
+    patchLog(logId: string, data: AiGatewayPatchLog): Promise<void>;
+    getLog(logId: string): Promise<AiGatewayLog>;
+    run(data: AIGatewayUniversalRequest | AIGatewayUniversalRequest[], options?: {
+        gateway?: UniversalGatewayOptions;
+        extraHeaders?: object;
+    }): Promise<Response>;
+    getUrl(provider?: AIGatewayProviders | string): Promise<string>; // eslint-disable-line
+}
+/**
+ * @deprecated AutoRAG has been replaced by AI Search. Use AiSearchInternalError instead.
+ * @see AiSearchInternalError
+ */
+interface AutoRAGInternalError extends Error {
+}
+/**
+ * @deprecated AutoRAG has been replaced by AI Search. Use AiSearchNotFoundError instead.
+ * @see AiSearchNotFoundError
+ */
+interface AutoRAGNotFoundError extends Error {
+}
+/**
+ * @deprecated This error type is no longer used in the AI Search API.
+ */
+interface AutoRAGUnauthorizedError extends Error {
+}
+/**
+ * @deprecated AutoRAG has been replaced by AI Search. Use AiSearchNameNotSetError instead.
+ * @see AiSearchNameNotSetError
+ */
+interface AutoRAGNameNotSetError extends Error {
+}
+/**
+ * @deprecated AutoRAG has been replaced by AI Search.
+ * Use AiSearchSearchRequest with the new API instead.
+ * @see AiSearchSearchRequest
+ */
+type AutoRagSearchRequest = {
+    query: string;
+    filters?: CompoundFilter | ComparisonFilter;
+    max_num_results?: number;
+    ranking_options?: {
+        ranker?: string;
+        score_threshold?: number;
+    };
+    reranking?: {
+        enabled?: boolean;
+        model?: string;
+    };
+    rewrite_query?: boolean;
+};
+/**
+ * @deprecated AutoRAG has been replaced by AI Search.
+ * Use AiSearchChatCompletionsRequest with the new API instead.
+ * @see AiSearchChatCompletionsRequest
+ */
+type AutoRagAiSearchRequest = AutoRagSearchRequest & {
+    stream?: boolean;
+    system_prompt?: string;
+};
+/**
+ * @deprecated AutoRAG has been replaced by AI Search.
+ * Use AiSearchChatCompletionsRequest with stream: true instead.
+ * @see AiSearchChatCompletionsRequest
+ */
+type AutoRagAiSearchRequestStreaming = Omit<AutoRagAiSearchRequest, 'stream'> & {
+    stream: true;
+};
+/**
+ * @deprecated AutoRAG has been replaced by AI Search.
+ * Use AiSearchSearchResponse with the new API instead.
+ * @see AiSearchSearchResponse
+ */
+type AutoRagSearchResponse = {
+    object: 'vector_store.search_results.page';
+    search_query: string;
+    data: {
+        file_id: string;
+        filename: string;
+        score: number;
+        attributes: Record<string, string | number | boolean | null>;
+        content: {
+            type: 'text';
+            text: string;
+        }[];
+    }[];
+    has_more: boolean;
+    next_page: string | null;
+};
+/**
+ * @deprecated AutoRAG has been replaced by AI Search.
+ * Use AiSearchListResponse with the new API instead.
+ * @see AiSearchListResponse
+ */
+type AutoRagListResponse = {
+    id: string;
+    enable: boolean;
+    type: string;
+    source: string;
+    vectorize_name: string;
+    paused: boolean;
+    status: string;
+}[];
+/**
+ * @deprecated AutoRAG has been replaced by AI Search.
+ * The new API returns different response formats for chat completions.
+ */
+type AutoRagAiSearchResponse = AutoRagSearchResponse & {
+    response: string;
+};
+/**
+ * @deprecated AutoRAG has been replaced by AI Search.
+ * Use the new AI Search API instead: `env.AI.aiSearch`
+ *
+ * Migration guide:
+ * - `env.AI.autorag().list()` → `env.AI.aiSearch.list()`
+ * - `env.AI.autorag('id').search(...)` → `env.AI.aiSearch.get('id').search(...)`
+ * - `env.AI.autorag('id').aiSearch(...)` → `env.AI.aiSearch.get('id').chatCompletions(...)`
+ *
+ * @see AiSearchAccountService
+ * @see AiSearchInstanceService
+ */
+declare abstract class AutoRAG {
+    /**
+     * @deprecated Use `env.AI.aiSearch.list()` instead.
+     * @see AiSearchAccountService.list
+     */
+    list(): Promise<AutoRagListResponse>;
+    /**
+     * @deprecated Use `env.AI.aiSearch.get(id).search(...)` instead.
+     * Note: The new API uses a messages array instead of a query string.
+     * @see AiSearchInstanceService.search
+     */
+    search(params: AutoRagSearchRequest): Promise<AutoRagSearchResponse>;
+    /**
+     * @deprecated Use `env.AI.aiSearch.get(id).chatCompletions(...)` instead.
+     * @see AiSearchInstanceService.chatCompletions
+     */
+    aiSearch(params: AutoRagAiSearchRequestStreaming): Promise<Response>;
+    /**
+     * @deprecated Use `env.AI.aiSearch.get(id).chatCompletions(...)` instead.
+     * @see AiSearchInstanceService.chatCompletions
+     */
+    aiSearch(params: AutoRagAiSearchRequest): Promise<AutoRagAiSearchResponse>;
+    /**
+     * @deprecated Use `env.AI.aiSearch.get(id).chatCompletions(...)` instead.
+     * @see AiSearchInstanceService.chatCompletions
+     */
+    aiSearch(params: AutoRagAiSearchRequest): Promise<AutoRagAiSearchResponse | Response>;
+}
+interface BasicImageTransformations {
+    /**
+     * Maximum width in image pixels. The value must be an integer.
+     */
+    width?: number;
+    /**
+     * Maximum height in image pixels. The value must be an integer.
+     */
+    height?: number;
+    /**
+     * Resizing mode as a string. It affects interpretation of width and height
+     * options:
+     *  - scale-down: Similar to contain, but the image is never enlarged. If
+     *    the image is larger than given width or height, it will be resized.
+     *    Otherwise its original size will be kept.
+     *  - contain: Resizes to maximum size that fits within the given width and
+     *    height. If only a single dimension is given (e.g. only width), the
+     *    image will be shrunk or enlarged to exactly match that dimension.
+     *    Aspect ratio is always preserved.
+     *  - cover: Resizes (shrinks or enlarges) to fill the entire area of width
+     *    and height. If the image has an aspect ratio different from the ratio
+     *    of width and height, it will be cropped to fit.
+     *  - crop: The image will be shrunk and cropped to fit within the area
+     *    specified by width and height. The image will not be enlarged. For images
+     *    smaller than the given dimensions it's the same as scale-down. For
+     *    images larger than the given dimensions, it's the same as cover.
+     *    See also trim.
+     *  - pad: Resizes to the maximum size that fits within the given width and
+     *    height, and then fills the remaining area with a background color
+     *    (white by default). Use of this mode is not recommended, as the same
+     *    effect can be more efficiently achieved with the contain mode and the
+     *    CSS object-fit: contain property.
+     *  - squeeze: Stretches and deforms to the width and height given, even if it
+     *    breaks aspect ratio
+     */
+    fit?: "scale-down" | "contain" | "cover" | "crop" | "pad" | "squeeze";
+    /**
+     * Image segmentation using artificial intelligence models. Sets pixels not
+     * within selected segment area to transparent e.g "foreground" sets every
+     * background pixel as transparent.
+     */
+    segment?: "foreground";
+    /**
+     * When cropping with fit: "cover", this defines the side or point that should
+     * be left uncropped. The value is either a string
+     * "left", "right", "top", "bottom", "auto", or "center" (the default),
+     * or an object {x, y} containing focal point coordinates in the original
+     * image expressed as fractions ranging from 0.0 (top or left) to 1.0
+     * (bottom or right), 0.5 being the center. {fit: "cover", gravity: "top"} will
+     * crop bottom or left and right sides as necessary, but won’t crop anything
+     * from the top. {fit: "cover", gravity: {x:0.5, y:0.2}} will crop each side to
+     * preserve as much as possible around a point at 20% of the height of the
+     * source image.
+     */
+    gravity?: 'face' | 'left' | 'right' | 'top' | 'bottom' | 'center' | 'auto' | 'entropy' | BasicImageTransformationsGravityCoordinates;
+    /**
+     * Background color to add underneath the image. Applies only to images with
+     * transparency (such as PNG). Accepts any CSS color (#RRGGBB, rgba(…),
+     * hsl(…), etc.)
+     */
+    background?: string;
+    /**
+     * Number of degrees (90, 180, 270) to rotate the image by. width and height
+     * options refer to axes after rotation.
+     */
+    rotate?: 0 | 90 | 180 | 270 | 360;
+}
+interface BasicImageTransformationsGravityCoordinates {
+    x?: number;
+    y?: number;
+    mode?: 'remainder' | 'box-center';
+}
+/**
+ * In addition to the properties you can set in the RequestInit dict
+ * that you pass as an argument to the Request constructor, you can
+ * set certain properties of a `cf` object to control how Cloudflare
+ * features are applied to that new Request.
+ *
+ * Note: Currently, these properties cannot be tested in the
+ * playground.
+ */
+interface RequestInitCfProperties extends Record<string, unknown> {
+    cacheEverything?: boolean;
+    /**
+     * A request's cache key is what determines if two requests are
+     * "the same" for caching purposes. If a request has the same cache key
+     * as some previous request, then we can serve the same cached response for
+     * both. (e.g. 'some-key')
+     *
+     * Only available for Enterprise customers.
+     */
+    cacheKey?: string;
+    /**
+     * This allows you to append additional Cache-Tag response headers
+     * to the origin response without modifications to the origin server.
+     * This will allow for greater control over the Purge by Cache Tag feature
+     * utilizing changes only in the Workers process.
+     *
+     * Only available for Enterprise customers.
+     */
+    cacheTags?: string[];
+    /**
+     * Force response to be cached for a given number of seconds. (e.g. 300)
+     */
+    cacheTtl?: number;
+    /**
+     * Force response to be cached for a given number of seconds based on the Origin status code.
+     * (e.g. { '200-299': 86400, '404': 1, '500-599': 0 })
+     */
+    cacheTtlByStatus?: Record<string, number>;
+    scrapeShield?: boolean;
+    apps?: boolean;
+    image?: RequestInitCfPropertiesImage;
+    minify?: RequestInitCfPropertiesImageMinify;
+    mirage?: boolean;
+    polish?: "lossy" | "lossless" | "off";
+    r2?: RequestInitCfPropertiesR2;
+    /**
+     * Redirects the request to an alternate origin server. You can use this,
+     * for example, to implement load balancing across several origins.
+     * (e.g.us-east.example.com)
+     *
+     * Note - For security reasons, the hostname set in resolveOverride must
+     * be proxied on the same Cloudflare zone of the incoming request.
+     * Otherwise, the setting is ignored. CNAME hosts are allowed, so to
+     * resolve to a host under a different domain or a DNS only domain first
+     * declare a CNAME record within your own zone’s DNS mapping to the
+     * external hostname, set proxy on Cloudflare, then set resolveOverride
+     * to point to that CNAME record.
+     */
+    resolveOverride?: string;
+}
+interface RequestInitCfPropertiesImageDraw extends BasicImageTransformations {
+    /**
+     * Absolute URL of the image file to use for the drawing. It can be any of
+     * the supported file formats. For drawing of watermarks or non-rectangular
+     * overlays we recommend using PNG or WebP images.
+     */
+    url: string;
+    /**
+     * Floating-point number between 0 (transparent) and 1 (opaque).
+     * For example, opacity: 0.5 makes overlay semitransparent.
+     */
+    opacity?: number;
+    /**
+     * - If set to true, the overlay image will be tiled to cover the entire
+     *   area. This is useful for stock-photo-like watermarks.
+     * - If set to "x", the overlay image will be tiled horizontally only
+     *   (form a line).
+     * - If set to "y", the overlay image will be tiled vertically only
+     *   (form a line).
+     */
+    repeat?: true | "x" | "y";
+    /**
+     * Position of the overlay image relative to a given edge. Each property is
+     * an offset in pixels. 0 aligns exactly to the edge. For example, left: 10
+     * positions left side of the overlay 10 pixels from the left edge of the
+     * image it's drawn over. bottom: 0 aligns bottom of the overlay with bottom
+     * of the background image.
+     *
+     * Setting both left & right, or both top & bottom is an error.
+     *
+     * If no position is specified, the image will be centered.
+     */
+    top?: number;
+    left?: number;
+    bottom?: number;
+    right?: number;
+}
+interface RequestInitCfPropertiesImage extends BasicImageTransformations {
+    /**
+     * Device Pixel Ratio. Default 1. Multiplier for width/height that makes it
+     * easier to specify higher-DPI sizes in <img srcset>.
+     */
+    dpr?: number;
+    /**
+     * Allows you to trim your image. Takes dpr into account and is performed before
+     * resizing or rotation.
+     *
+     * It can be used as:
+     * - left, top, right, bottom - it will specify the number of pixels to cut
+     *   off each side
+     * - width, height - the width/height you'd like to end up with - can be used
+     *   in combination with the properties above
+     * - border - this will automatically trim the surroundings of an image based on
+     *   it's color. It consists of three properties:
+     *    - color: rgb or hex representation of the color you wish to trim (todo: verify the rgba bit)
+     *    - tolerance: difference from color to treat as color
+     *    - keep: the number of pixels of border to keep
+     */
+    trim?: "border" | {
+        top?: number;
+        bottom?: number;
+        left?: number;
+        right?: number;
+        width?: number;
+        height?: number;
+        border?: boolean | {
+            color?: string;
+            tolerance?: number;
+            keep?: number;
+        };
+    };
+    /**
+     * Quality setting from 1-100 (useful values are in 60-90 range). Lower values
+     * make images look worse, but load faster. The default is 85. It applies only
+     * to JPEG and WebP images. It doesn’t have any effect on PNG.
+     */
+    quality?: number | "low" | "medium-low" | "medium-high" | "high";
+    /**
+     * Output format to generate. It can be:
+     *  - avif: generate images in AVIF format.
+     *  - webp: generate images in Google WebP format. Set quality to 100 to get
+     *    the WebP-lossless format.
+     *  - json: instead of generating an image, outputs information about the
+     *    image, in JSON format. The JSON object will contain image size
+     *    (before and after resizing), source image’s MIME type, file size, etc.
+     * - jpeg: generate images in JPEG format.
+     * - png: generate images in PNG format.
+     */
+    format?: "avif" | "webp" | "json" | "jpeg" | "png" | "baseline-jpeg" | "png-force" | "svg";
+    /**
+     * Whether to preserve animation frames from input files. Default is true.
+     * Setting it to false reduces animations to still images. This setting is
+     * recommended when enlarging images or processing arbitrary user content,
+     * because large GIF animations can weigh tens or even hundreds of megabytes.
+     * It is also useful to set anim:false when using format:"json" to get the
+     * response quicker without the number of frames.
+     */
+    anim?: boolean;
+    /**
+     * What EXIF data should be preserved in the output image. Note that EXIF
+     * rotation and embedded color profiles are always applied ("baked in" into
+     * the image), and aren't affected by this option. Note that if the Polish
+     * feature is enabled, all metadata may have been removed already and this
+     * option may have no effect.
+     *  - keep: Preserve most of EXIF metadata, including GPS location if there's
+     *    any.
+     *  - copyright: Only keep the copyright tag, and discard everything else.
+     *    This is the default behavior for JPEG files.
+     *  - none: Discard all invisible EXIF metadata. Currently WebP and PNG
+     *    output formats always discard metadata.
+     */
+    metadata?: "keep" | "copyright" | "none";
+    /**
+     * Strength of sharpening filter to apply to the image. Floating-point
+     * number between 0 (no sharpening, default) and 10 (maximum). 1.0 is a
+     * recommended value for downscaled images.
+     */
+    sharpen?: number;
+    /**
+     * Radius of a blur filter (approximate gaussian). Maximum supported radius
+     * is 250.
+     */
+    blur?: number;
+    /**
+     * Overlays are drawn in the order they appear in the array (last array
+     * entry is the topmost layer).
+     */
+    draw?: RequestInitCfPropertiesImageDraw[];
+    /**
+     * Fetching image from authenticated origin. Setting this property will
+     * pass authentication headers (Authorization, Cookie, etc.) through to
+     * the origin.
+     */
+    "origin-auth"?: "share-publicly";
+    /**
+     * Adds a border around the image. The border is added after resizing. Border
+     * width takes dpr into account, and can be specified either using a single
+     * width property, or individually for each side.
+     */
+    border?: {
+        color: string;
+        width: number;
+    } | {
+        color: string;
+        top: number;
+        right: number;
+        bottom: number;
+        left: number;
+    };
+    /**
+     * Increase brightness by a factor. A value of 1.0 equals no change, a value
+     * of 0.5 equals half brightness, and a value of 2.0 equals twice as bright.
+     * 0 is ignored.
+     */
+    brightness?: number;
+    /**
+     * Increase contrast by a factor. A value of 1.0 equals no change, a value of
+     * 0.5 equals low contrast, and a value of 2.0 equals high contrast. 0 is
+     * ignored.
+     */
+    contrast?: number;
+    /**
+     * Increase exposure by a factor. A value of 1.0 equals no change, a value of
+     * 0.5 darkens the image, and a value of 2.0 lightens the image. 0 is ignored.
+     */
+    gamma?: number;
+    /**
+     * Increase contrast by a factor. A value of 1.0 equals no change, a value of
+     * 0.5 equals low contrast, and a value of 2.0 equals high contrast. 0 is
+     * ignored.
+     */
+    saturation?: number;
+    /**
+     * Flips the images horizontally, vertically, or both. Flipping is applied before
+     * rotation, so if you apply flip=h,rotate=90 then the image will be flipped
+     * horizontally, then rotated by 90 degrees.
+     */
+    flip?: 'h' | 'v' | 'hv';
+    /**
+     * Slightly reduces latency on a cache miss by selecting a
+     * quickest-to-compress file format, at a cost of increased file size and
+     * lower image quality. It will usually override the format option and choose
+     * JPEG over WebP or AVIF. We do not recommend using this option, except in
+     * unusual circumstances like resizing uncacheable dynamically-generated
+     * images.
+     */
+    compression?: "fast";
+}
+interface RequestInitCfPropertiesImageMinify {
+    javascript?: boolean;
+    css?: boolean;
+    html?: boolean;
+}
+interface RequestInitCfPropertiesR2 {
+    /**
+     * Colo id of bucket that an object is stored in
+     */
+    bucketColoId?: number;
+}
+/**
+ * Request metadata provided by Cloudflare's edge.
+ */
+type IncomingRequestCfProperties<HostMetadata = unknown> = IncomingRequestCfPropertiesBase & IncomingRequestCfPropertiesBotManagementEnterprise & IncomingRequestCfPropertiesCloudflareForSaaSEnterprise<HostMetadata> & IncomingRequestCfPropertiesGeographicInformation & IncomingRequestCfPropertiesCloudflareAccessOrApiShield;
+interface IncomingRequestCfPropertiesBase extends Record<string, unknown> {
+    /**
+     * [ASN](https://www.iana.org/assignments/as-numbers/as-numbers.xhtml) of the incoming request.
+     *
+     * @example 395747
+     */
+    asn?: number;
+    /**
+     * The organization which owns the ASN of the incoming request.
+     *
+     * @example "Google Cloud"
+     */
+    asOrganization?: string;
+    /**
+     * The original value of the `Accept-Encoding` header if Cloudflare modified it.
+     *
+     * @example "gzip, deflate, br"
+     */
+    clientAcceptEncoding?: string;
+    /**
+     * The number of milliseconds it took for the request to reach your worker.
+     *
+     * @example 22
+     */
+    clientTcpRtt?: number;
+    /**
+     * The three-letter [IATA](https://en.wikipedia.org/wiki/IATA_airport_code)
+     * airport code of the data center that the request hit.
+     *
+     * @example "DFW"
+     */
+    colo: string;
+    /**
+     * Represents the upstream's response to a
+     * [TCP `keepalive` message](https://tldp.org/HOWTO/TCP-Keepalive-HOWTO/overview.html)
+     * from cloudflare.
+     *
+     * For workers with no upstream, this will always be `1`.
+     *
+     * @example 3
+     */
+    edgeRequestKeepAliveStatus: IncomingRequestCfPropertiesEdgeRequestKeepAliveStatus;
+    /**
+     * The HTTP Protocol the request used.
+     *
+     * @example "HTTP/2"
+     */
+    httpProtocol: string;
+    /**
+     * The browser-requested prioritization information in the request object.
+     *
+     * If no information was set, defaults to the empty string `""`
+     *
+     * @example "weight=192;exclusive=0;group=3;group-weight=127"
+     * @default ""
+     */
+    requestPriority: string;
+    /**
+     * The TLS version of the connection to Cloudflare.
+     * In requests served over plaintext (without TLS), this property is the empty string `""`.
+     *
+     * @example "TLSv1.3"
+     */
+    tlsVersion: string;
+    /**
+     * The cipher for the connection to Cloudflare.
+     * In requests served over plaintext (without TLS), this property is the empty string `""`.
+     *
+     * @example "AEAD-AES128-GCM-SHA256"
+     */
+    tlsCipher: string;
+    /**
+     * Metadata containing the [`HELLO`](https://www.rfc-editor.org/rfc/rfc5246#section-7.4.1.2) and [`FINISHED`](https://www.rfc-editor.org/rfc/rfc5246#section-7.4.9) messages from this request's TLS handshake.
+     *
+     * If the incoming request was served over plaintext (without TLS) this field is undefined.
+     */
+    tlsExportedAuthenticator?: IncomingRequestCfPropertiesExportedAuthenticatorMetadata;
+}
+interface IncomingRequestCfPropertiesBotManagementBase {
+    /**
+     * Cloudflare’s [level of certainty](https://developers.cloudflare.com/bots/concepts/bot-score/) that a request comes from a bot,
+     * represented as an integer percentage between `1` (almost certainly a bot) and `99` (almost certainly human).
+     *
+     * @example 54
+     */
+    score: number;
+    /**
+     * A boolean value that is true if the request comes from a good bot, like Google or Bing.
+     * Most customers choose to allow this traffic. For more details, see [Traffic from known bots](https://developers.cloudflare.com/firewall/known-issues-and-faq/#how-does-firewall-rules-handle-traffic-from-known-bots).
+     */
+    verifiedBot: boolean;
+    /**
+     * A boolean value that is true if the request originates from a
+     * Cloudflare-verified proxy service.
+     */
+    corporateProxy: boolean;
+    /**
+     * A boolean value that's true if the request matches [file extensions](https://developers.cloudflare.com/bots/reference/static-resources/) for many types of static resources.
+     */
+    staticResource: boolean;
+    /**
+     * List of IDs that correlate to the Bot Management heuristic detections made on a request (you can have multiple heuristic detections on the same request).
+     */
+    detectionIds: number[];
+}
+interface IncomingRequestCfPropertiesBotManagement {
+    /**
+     * Results of Cloudflare's Bot Management analysis
+     */
+    botManagement: IncomingRequestCfPropertiesBotManagementBase;
+    /**
+     * Duplicate of `botManagement.score`.
+     *
+     * @deprecated
+     */
+    clientTrustScore: number;
+}
+interface IncomingRequestCfPropertiesBotManagementEnterprise extends IncomingRequestCfPropertiesBotManagement {
+    /**
+     * Results of Cloudflare's Bot Management analysis
+     */
+    botManagement: IncomingRequestCfPropertiesBotManagementBase & {
+        /**
+         * A [JA3 Fingerprint](https://developers.cloudflare.com/bots/concepts/ja3-fingerprint/) to help profile specific SSL/TLS clients
+         * across different destination IPs, Ports, and X509 certificates.
+         */
+        ja3Hash: string;
+    };
+}
+interface IncomingRequestCfPropertiesCloudflareForSaaSEnterprise<HostMetadata> {
+    /**
+     * Custom metadata set per-host in [Cloudflare for SaaS](https://developers.cloudflare.com/cloudflare-for-platforms/cloudflare-for-saas/).
+     *
+     * This field is only present if you have Cloudflare for SaaS enabled on your account
+     * and you have followed the [required steps to enable it]((https://developers.cloudflare.com/cloudflare-for-platforms/cloudflare-for-saas/domain-support/custom-metadata/)).
+     */
+    hostMetadata?: HostMetadata;
+}
+interface IncomingRequestCfPropertiesCloudflareAccessOrApiShield {
+    /**
+     * Information about the client certificate presented to Cloudflare.
+     *
+     * This is populated when the incoming request is served over TLS using
+     * either Cloudflare Access or API Shield (mTLS)
+     * and the presented SSL certificate has a valid
+     * [Certificate Serial Number](https://ldapwiki.com/wiki/Certificate%20Serial%20Number)
+     * (i.e., not `null` or `""`).
+     *
+     * Otherwise, a set of placeholder values are used.
+     *
+     * The property `certPresented` will be set to `"1"` when
+     * the object is populated (i.e. the above conditions were met).
+     */
+    tlsClientAuth: IncomingRequestCfPropertiesTLSClientAuth | IncomingRequestCfPropertiesTLSClientAuthPlaceholder;
+}
+/**
+ * Metadata about the request's TLS handshake
+ */
+interface IncomingRequestCfPropertiesExportedAuthenticatorMetadata {
+    /**
+     * The client's [`HELLO` message](https://www.rfc-editor.org/rfc/rfc5246#section-7.4.1.2), encoded in hexadecimal
+     *
+     * @example "44372ba35fa1270921d318f34c12f155dc87b682cf36a790cfaa3ba8737a1b5d"
+     */
+    clientHandshake: string;
+    /**
+     * The server's [`HELLO` message](https://www.rfc-editor.org/rfc/rfc5246#section-7.4.1.2), encoded in hexadecimal
+     *
+     * @example "44372ba35fa1270921d318f34c12f155dc87b682cf36a790cfaa3ba8737a1b5d"
+     */
+    serverHandshake: string;
+    /**
+     * The client's [`FINISHED` message](https://www.rfc-editor.org/rfc/rfc5246#section-7.4.9), encoded in hexadecimal
+     *
+     * @example "084ee802fe1348f688220e2a6040a05b2199a761f33cf753abb1b006792d3f8b"
+     */
+    clientFinished: string;
+    /**
+     * The server's [`FINISHED` message](https://www.rfc-editor.org/rfc/rfc5246#section-7.4.9), encoded in hexadecimal
+     *
+     * @example "084ee802fe1348f688220e2a6040a05b2199a761f33cf753abb1b006792d3f8b"
+     */
+    serverFinished: string;
+}
+/**
+ * Geographic data about the request's origin.
+ */
+interface IncomingRequestCfPropertiesGeographicInformation {
+    /**
+     * The [ISO 3166-1 Alpha 2](https://www.iso.org/iso-3166-country-codes.html) country code the request originated from.
+     *
+     * If your worker is [configured to accept TOR connections](https://support.cloudflare.com/hc/en-us/articles/203306930-Understanding-Cloudflare-Tor-support-and-Onion-Routing), this may also be `"T1"`, indicating a request that originated over TOR.
+     *
+     * If Cloudflare is unable to determine where the request originated this property is omitted.
+     *
+     * The country code `"T1"` is used for requests originating on TOR.
+     *
+     * @example "GB"
+     */
+    country?: Iso3166Alpha2Code | "T1";
+    /**
+     * If present, this property indicates that the request originated in the EU
+     *
+     * @example "1"
+     */
+    isEUCountry?: "1";
+    /**
+     * A two-letter code indicating the continent the request originated from.
+     *
+     * @example "AN"
+     */
+    continent?: ContinentCode;
+    /**
+     * The city the request originated from
+     *
+     * @example "Austin"
+     */
+    city?: string;
+    /**
+     * Postal code of the incoming request
+     *
+     * @example "78701"
+     */
+    postalCode?: string;
+    /**
+     * Latitude of the incoming request
+     *
+     * @example "30.27130"
+     */
+    latitude?: string;
+    /**
+     * Longitude of the incoming request
+     *
+     * @example "-97.74260"
+     */
+    longitude?: string;
+    /**
+     * Timezone of the incoming request
+     *
+     * @example "America/Chicago"
+     */
+    timezone?: string;
+    /**
+     * If known, the ISO 3166-2 name for the first level region associated with
+     * the IP address of the incoming request
+     *
+     * @example "Texas"
+     */
+    region?: string;
+    /**
+     * If known, the ISO 3166-2 code for the first-level region associated with
+     * the IP address of the incoming request
+     *
+     * @example "TX"
+     */
+    regionCode?: string;
+    /**
+     * Metro code (DMA) of the incoming request
+     *
+     * @example "635"
+     */
+    metroCode?: string;
+}
+/** Data about the incoming request's TLS certificate */
+interface IncomingRequestCfPropertiesTLSClientAuth {
+    /** Always `"1"`, indicating that the certificate was presented */
+    certPresented: "1";
+    /**
+     * Result of certificate verification.
+     *
+     * @example "FAILED:self signed certificate"
+     */
+    certVerified: Exclude<CertVerificationStatus, "NONE">;
+    /** The presented certificate's revokation status.
+     *
+     * - A value of `"1"` indicates the certificate has been revoked
+     * - A value of `"0"` indicates the certificate has not been revoked
+     */
+    certRevoked: "1" | "0";
+    /**
+     * The certificate issuer's [distinguished name](https://knowledge.digicert.com/generalinformation/INFO1745.html)
+     *
+     * @example "CN=cloudflareaccess.com, C=US, ST=Texas, L=Austin, O=Cloudflare"
+     */
+    certIssuerDN: string;
+    /**
+     * The certificate subject's [distinguished name](https://knowledge.digicert.com/generalinformation/INFO1745.html)
+     *
+     * @example "CN=*.cloudflareaccess.com, C=US, ST=Texas, L=Austin, O=Cloudflare"
+     */
+    certSubjectDN: string;
+    /**
+     * The certificate issuer's [distinguished name](https://knowledge.digicert.com/generalinformation/INFO1745.html) ([RFC 2253](https://www.rfc-editor.org/rfc/rfc2253.html) formatted)
+     *
+     * @example "CN=cloudflareaccess.com, C=US, ST=Texas, L=Austin, O=Cloudflare"
+     */
+    certIssuerDNRFC2253: string;
+    /**
+     * The certificate subject's [distinguished name](https://knowledge.digicert.com/generalinformation/INFO1745.html) ([RFC 2253](https://www.rfc-editor.org/rfc/rfc2253.html) formatted)
+     *
+     * @example "CN=*.cloudflareaccess.com, C=US, ST=Texas, L=Austin, O=Cloudflare"
+     */
+    certSubjectDNRFC2253: string;
+    /** The certificate issuer's distinguished name (legacy policies) */
+    certIssuerDNLegacy: string;
+    /** The certificate subject's distinguished name (legacy policies) */
+    certSubjectDNLegacy: string;
+    /**
+     * The certificate's serial number
+     *
+     * @example "00936EACBE07F201DF"
+     */
+    certSerial: string;
+    /**
+     * The certificate issuer's serial number
+     *
+     * @example "2489002934BDFEA34"
+     */
+    certIssuerSerial: string;
+    /**
+     * The certificate's Subject Key Identifier
+     *
+     * @example "BB:AF:7E:02:3D:FA:A6:F1:3C:84:8E:AD:EE:38:98:EC:D9:32:32:D4"
+     */
+    certSKI: string;
+    /**
+     * The certificate issuer's Subject Key Identifier
+     *
+     * @example "BB:AF:7E:02:3D:FA:A6:F1:3C:84:8E:AD:EE:38:98:EC:D9:32:32:D4"
+     */
+    certIssuerSKI: string;
+    /**
+     * The certificate's SHA-1 fingerprint
+     *
+     * @example "6b9109f323999e52259cda7373ff0b4d26bd232e"
+     */
+    certFingerprintSHA1: string;
+    /**
+     * The certificate's SHA-256 fingerprint
+     *
+     * @example "acf77cf37b4156a2708e34c4eb755f9b5dbbe5ebb55adfec8f11493438d19e6ad3f157f81fa3b98278453d5652b0c1fd1d71e5695ae4d709803a4d3f39de9dea"
+     */
+    certFingerprintSHA256: string;
+    /**
+     * The effective starting date of the certificate
+     *
+     * @example "Dec 22 19:39:00 2018 GMT"
+     */
+    certNotBefore: string;
+    /**
+     * The effective expiration date of the certificate
+     *
+     * @example "Dec 22 19:39:00 2018 GMT"
+     */
+    certNotAfter: string;
+}
+/** Placeholder values for TLS Client Authorization */
+interface IncomingRequestCfPropertiesTLSClientAuthPlaceholder {
+    certPresented: "0";
+    certVerified: "NONE";
+    certRevoked: "0";
+    certIssuerDN: "";
+    certSubjectDN: "";
+    certIssuerDNRFC2253: "";
+    certSubjectDNRFC2253: "";
+    certIssuerDNLegacy: "";
+    certSubjectDNLegacy: "";
+    certSerial: "";
+    certIssuerSerial: "";
+    certSKI: "";
+    certIssuerSKI: "";
+    certFingerprintSHA1: "";
+    certFingerprintSHA256: "";
+    certNotBefore: "";
+    certNotAfter: "";
+}
+/** Possible outcomes of TLS verification */
+declare type CertVerificationStatus = 
+/** Authentication succeeded */
+"SUCCESS"
+/** No certificate was presented */
+ | "NONE"
+/** Failed because the certificate was self-signed */
+ | "FAILED:self signed certificate"
+/** Failed because the certificate failed a trust chain check */
+ | "FAILED:unable to verify the first certificate"
+/** Failed because the certificate not yet valid */
+ | "FAILED:certificate is not yet valid"
+/** Failed because the certificate is expired */
+ | "FAILED:certificate has expired"
+/** Failed for another unspecified reason */
+ | "FAILED";
+/**
+ * An upstream endpoint's response to a TCP `keepalive` message from Cloudflare.
+ */
+declare type IncomingRequestCfPropertiesEdgeRequestKeepAliveStatus = 0 /** Unknown */ | 1 /** no keepalives (not found) */ | 2 /** no connection re-use, opening keepalive connection failed */ | 3 /** no connection re-use, keepalive accepted and saved */ | 4 /** connection re-use, refused by the origin server (`TCP FIN`) */ | 5; /** connection re-use, accepted by the origin server */
+/** ISO 3166-1 Alpha-2 codes */
+declare type Iso3166Alpha2Code = "AD" | "AE" | "AF" | "AG" | "AI" | "AL" | "AM" | "AO" | "AQ" | "AR" | "AS" | "AT" | "AU" | "AW" | "AX" | "AZ" | "BA" | "BB" | "BD" | "BE" | "BF" | "BG" | "BH" | "BI" | "BJ" | "BL" | "BM" | "BN" | "BO" | "BQ" | "BR" | "BS" | "BT" | "BV" | "BW" | "BY" | "BZ" | "CA" | "CC" | "CD" | "CF" | "CG" | "CH" | "CI" | "CK" | "CL" | "CM" | "CN" | "CO" | "CR" | "CU" | "CV" | "CW" | "CX" | "CY" | "CZ" | "DE" | "DJ" | "DK" | "DM" | "DO" | "DZ" | "EC" | "EE" | "EG" | "EH" | "ER" | "ES" | "ET" | "FI" | "FJ" | "FK" | "FM" | "FO" | "FR" | "GA" | "GB" | "GD" | "GE" | "GF" | "GG" | "GH" | "GI" | "GL" | "GM" | "GN" | "GP" | "GQ" | "GR" | "GS" | "GT" | "GU" | "GW" | "GY" | "HK" | "HM" | "HN" | "HR" | "HT" | "HU" | "ID" | "IE" | "IL" | "IM" | "IN" | "IO" | "IQ" | "IR" | "IS" | "IT" | "JE" | "JM" | "JO" | "JP" | "KE" | "KG" | "KH" | "KI" | "KM" | "KN" | "KP" | "KR" | "KW" | "KY" | "KZ" | "LA" | "LB" | "LC" | "LI" | "LK" | "LR" | "LS" | "LT" | "LU" | "LV" | "LY" | "MA" | "MC" | "MD" | "ME" | "MF" | "MG" | "MH" | "MK" | "ML" | "MM" | "MN" | "MO" | "MP" | "MQ" | "MR" | "MS" | "MT" | "MU" | "MV" | "MW" | "MX" | "MY" | "MZ" | "NA" | "NC" | "NE" | "NF" | "NG" | "NI" | "NL" | "NO" | "NP" | "NR" | "NU" | "NZ" | "OM" | "PA" | "PE" | "PF" | "PG" | "PH" | "PK" | "PL" | "PM" | "PN" | "PR" | "PS" | "PT" | "PW" | "PY" | "QA" | "RE" | "RO" | "RS" | "RU" | "RW" | "SA" | "SB" | "SC" | "SD" | "SE" | "SG" | "SH" | "SI" | "SJ" | "SK" | "SL" | "SM" | "SN" | "SO" | "SR" | "SS" | "ST" | "SV" | "SX" | "SY" | "SZ" | "TC" | "TD" | "TF" | "TG" | "TH" | "TJ" | "TK" | "TL" | "TM" | "TN" | "TO" | "TR" | "TT" | "TV" | "TW" | "TZ" | "UA" | "UG" | "UM" | "US" | "UY" | "UZ" | "VA" | "VC" | "VE" | "VG" | "VI" | "VN" | "VU" | "WF" | "WS" | "YE" | "YT" | "ZA" | "ZM" | "ZW";
+/** The 2-letter continent codes Cloudflare uses */
+declare type ContinentCode = "AF" | "AN" | "AS" | "EU" | "NA" | "OC" | "SA";
+type CfProperties<HostMetadata = unknown> = IncomingRequestCfProperties<HostMetadata> | RequestInitCfProperties;
+interface D1Meta {
+    duration: number;
+    size_after: number;
+    rows_read: number;
+    rows_written: number;
+    last_row_id: number;
+    changed_db: boolean;
+    changes: number;
+    /**
+     * The region of the database instance that executed the query.
+     */
+    served_by_region?: string;
+    /**
+     * The three letters airport code of the colo that executed the query.
+     */
+    served_by_colo?: string;
+    /**
+     * True if-and-only-if the database instance that executed the query was the primary.
+     */
+    served_by_primary?: boolean;
+    timings?: {
+        /**
+         * The duration of the SQL query execution by the database instance. It doesn't include any network time.
+         */
+        sql_duration_ms: number;
+    };
+    /**
+     * Number of total attempts to execute the query, due to automatic retries.
+     * Note: All other fields in the response like `timings` only apply to the last attempt.
+     */
+    total_attempts?: number;
+}
+interface D1Response {
+    success: true;
+    meta: D1Meta & Record<string, unknown>;
+    error?: never;
+}
+type D1Result<T = unknown> = D1Response & {
+    results: T[];
+};
+interface D1ExecResult {
+    count: number;
+    duration: number;
+}
+type D1SessionConstraint = 
+// Indicates that the first query should go to the primary, and the rest queries
+// using the same D1DatabaseSession will go to any replica that is consistent with
+// the bookmark maintained by the session (returned by the first query).
+'first-primary'
+// Indicates that the first query can go anywhere (primary or replica), and the rest queries
+// using the same D1DatabaseSession will go to any replica that is consistent with
+// the bookmark maintained by the session (returned by the first query).
+ | 'first-unconstrained';
+type D1SessionBookmark = string;
+declare abstract class D1Database {
+    prepare(query: string): D1PreparedStatement;
+    batch<T = unknown>(statements: D1PreparedStatement[]): Promise<D1Result<T>[]>;
+    exec(query: string): Promise<D1ExecResult>;
+    /**
+     * Creates a new D1 Session anchored at the given constraint or the bookmark.
+     * All queries executed using the created session will have sequential consistency,
+     * meaning that all writes done through the session will be visible in subsequent reads.
+     *
+     * @param constraintOrBookmark Either the session constraint or the explicit bookmark to anchor the created session.
+     */
+    withSession(constraintOrBookmark?: D1SessionBookmark | D1SessionConstraint): D1DatabaseSession;
+    /**
+     * @deprecated dump() will be removed soon, only applies to deprecated alpha v1 databases.
+     */
+    dump(): Promise<ArrayBuffer>;
+}
+declare abstract class D1DatabaseSession {
+    prepare(query: string): D1PreparedStatement;
+    batch<T = unknown>(statements: D1PreparedStatement[]): Promise<D1Result<T>[]>;
+    /**
+     * @returns The latest session bookmark across all executed queries on the session.
+     *          If no query has been executed yet, `null` is returned.
+     */
+    getBookmark(): D1SessionBookmark | null;
+}
+declare abstract class D1PreparedStatement {
+    bind(...values: unknown[]): D1PreparedStatement;
+    first<T = unknown>(colName: string): Promise<T | null>;
+    first<T = Record<string, unknown>>(): Promise<T | null>;
+    run<T = Record<string, unknown>>(): Promise<D1Result<T>>;
+    all<T = Record<string, unknown>>(): Promise<D1Result<T>>;
+    raw<T = unknown[]>(options: {
+        columnNames: true;
+    }): Promise<[
+        string[],
+        ...T[]
+    ]>;
+    raw<T = unknown[]>(options?: {
+        columnNames?: false;
+    }): Promise<T[]>;
+}
+// `Disposable` was added to TypeScript's standard lib types in version 5.2.
+// To support older TypeScript versions, define an empty `Disposable` interface.
+// Users won't be able to use `using`/`Symbol.dispose` without upgrading to 5.2,
+// but this will ensure type checking on older versions still passes.
+// TypeScript's interface merging will ensure our empty interface is effectively
+// ignored when `Disposable` is included in the standard lib.
+interface Disposable {
+}
+/**
+ * The returned data after sending an email
+ */
+interface EmailSendResult {
+    /**
+     * The Email Message ID
+     */
+    messageId: string;
+}
+/**
+ * An email message that can be sent from a Worker.
+ */
+interface EmailMessage {
+    /**
+     * Envelope From attribute of the email message.
+     */
+    readonly from: string;
+    /**
+     * Envelope To attribute of the email message.
+     */
+    readonly to: string;
+}
+/**
+ * An email message that is sent to a consumer Worker and can be rejected/forwarded.
+ */
+interface ForwardableEmailMessage extends EmailMessage {
+    /**
+     * Stream of the email message content.
+     */
+    readonly raw: ReadableStream<Uint8Array>;
+    /**
+     * An [Headers object](https://developer.mozilla.org/en-US/docs/Web/API/Headers).
+     */
+    readonly headers: Headers;
+    /**
+     * Size of the email message content.
+     */
+    readonly rawSize: number;
+    /**
+     * Reject this email message by returning a permanent SMTP error back to the connecting client including the given reason.
+     * @param reason The reject reason.
+     * @returns void
+     */
+    setReject(reason: string): void;
+    /**
+     * Forward this email message to a verified destination address of the account.
+     * @param rcptTo Verified destination address.
+     * @param headers A [Headers object](https://developer.mozilla.org/en-US/docs/Web/API/Headers).
+     * @returns A promise that resolves when the email message is forwarded.
+     */
+    forward(rcptTo: string, headers?: Headers): Promise<EmailSendResult>;
+    /**
+     * Reply to the sender of this email message with a new EmailMessage object.
+     * @param message The reply message.
+     * @returns A promise that resolves when the email message is replied.
+     */
+    reply(message: EmailMessage): Promise<EmailSendResult>;
+}
+/** A file attachment for an email message */
+type EmailAttachment = {
+    disposition: 'inline';
+    contentId: string;
+    filename: string;
+    type: string;
+    content: string | ArrayBuffer | ArrayBufferView;
+} | {
+    disposition: 'attachment';
+    contentId?: undefined;
+    filename: string;
+    type: string;
+    content: string | ArrayBuffer | ArrayBufferView;
+};
+/** An Email Address */
+interface EmailAddress {
+    name: string;
+    email: string;
+}
+/**
+ * A binding that allows a Worker to send email messages.
+ */
+interface SendEmail {
+    send(message: EmailMessage): Promise<EmailSendResult>;
+    send(builder: {
+        from: string | EmailAddress;
+        to: string | string[];
+        subject: string;
+        replyTo?: string | EmailAddress;
+        cc?: string | string[];
+        bcc?: string | string[];
+        headers?: Record<string, string>;
+        text?: string;
+        html?: string;
+        attachments?: EmailAttachment[];
+    }): Promise<EmailSendResult>;
+}
+declare abstract class EmailEvent extends ExtendableEvent {
+    readonly message: ForwardableEmailMessage;
+}
+declare type EmailExportedHandler<Env = unknown> = (message: ForwardableEmailMessage, env: Env, ctx: ExecutionContext) => void | Promise<void>;
+declare module "cloudflare:email" {
+    let _EmailMessage: {
+        prototype: EmailMessage;
+        new (from: string, to: string, raw: ReadableStream | string): EmailMessage;
+    };
+    export { _EmailMessage as EmailMessage };
+}
+/**
+ * Hello World binding to serve as an explanatory example. DO NOT USE
+ */
+interface HelloWorldBinding {
+    /**
+     * Retrieve the current stored value
+     */
+    get(): Promise<{
+        value: string;
+        ms?: number;
+    }>;
+    /**
+     * Set a new stored value
+     */
+    set(value: string): Promise<void>;
+}
+interface Hyperdrive {
+    /**
+     * Connect directly to Hyperdrive as if it's your database, returning a TCP socket.
+     *
+     * Calling this method returns an identical socket to if you call
+     * `connect("host:port")` using the `host` and `port` fields from this object.
+     * Pick whichever approach works better with your preferred DB client library.
+     *
+     * Note that this socket is not yet authenticated -- it's expected that your
+     * code (or preferably, the client library of your choice) will authenticate
+     * using the information in this class's readonly fields.
+     */
+    connect(): Socket;
+    /**
+     * A valid DB connection string that can be passed straight into the typical
+     * client library/driver/ORM. This will typically be the easiest way to use
+     * Hyperdrive.
+     */
+    readonly connectionString: string;
+    /*
+     * A randomly generated hostname that is only valid within the context of the
+     * currently running Worker which, when passed into `connect()` function from
+     * the "cloudflare:sockets" module, will connect to the Hyperdrive instance
+     * for your database.
+     */
+    readonly host: string;
+    /*
+     * The port that must be paired the the host field when connecting.
+     */
+    readonly port: number;
+    /*
+     * The username to use when authenticating to your database via Hyperdrive.
+     * Unlike the host and password, this will be the same every time
+     */
+    readonly user: string;
+    /*
+     * The randomly generated password to use when authenticating to your
+     * database via Hyperdrive. Like the host field, this password is only valid
+     * within the context of the currently running Worker instance from which
+     * it's read.
+     */
+    readonly password: string;
+    /*
+     * The name of the database to connect to.
+     */
+    readonly database: string;
+}
+// Copyright (c) 2024 Cloudflare, Inc.
+// Licensed under the Apache 2.0 license found in the LICENSE file or at:
+//     https://opensource.org/licenses/Apache-2.0
+type ImageInfoResponse = {
+    format: 'image/svg+xml';
+} | {
+    format: string;
+    fileSize: number;
+    width: number;
+    height: number;
+};
+type ImageTransform = {
+    width?: number;
+    height?: number;
+    background?: string;
+    blur?: number;
+    border?: {
+        color?: string;
+        width?: number;
+    } | {
+        top?: number;
+        bottom?: number;
+        left?: number;
+        right?: number;
+    };
+    brightness?: number;
+    contrast?: number;
+    fit?: 'scale-down' | 'contain' | 'pad' | 'squeeze' | 'cover' | 'crop';
+    flip?: 'h' | 'v' | 'hv';
+    gamma?: number;
+    segment?: 'foreground';
+    gravity?: 'face' | 'left' | 'right' | 'top' | 'bottom' | 'center' | 'auto' | 'entropy' | {
+        x?: number;
+        y?: number;
+        mode: 'remainder' | 'box-center';
+    };
+    rotate?: 0 | 90 | 180 | 270;
+    saturation?: number;
+    sharpen?: number;
+    trim?: 'border' | {
+        top?: number;
+        bottom?: number;
+        left?: number;
+        right?: number;
+        width?: number;
+        height?: number;
+        border?: boolean | {
+            color?: string;
+            tolerance?: number;
+            keep?: number;
+        };
+    };
+};
+type ImageDrawOptions = {
+    opacity?: number;
+    repeat?: boolean | string;
+    top?: number;
+    left?: number;
+    bottom?: number;
+    right?: number;
+};
+type ImageInputOptions = {
+    encoding?: 'base64';
+};
+type ImageOutputOptions = {
+    format: 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp' | 'image/avif' | 'rgb' | 'rgba';
+    quality?: number;
+    background?: string;
+    anim?: boolean;
+};
+interface ImagesBinding {
+    /**
+     * Get image metadata (type, width and height)
+     * @throws {@link ImagesError} with code 9412 if input is not an image
+     * @param stream The image bytes
+     */
+    info(stream: ReadableStream<Uint8Array>, options?: ImageInputOptions): Promise<ImageInfoResponse>;
+    /**
+     * Begin applying a series of transformations to an image
+     * @param stream The image bytes
+     * @returns A transform handle
+     */
+    input(stream: ReadableStream<Uint8Array>, options?: ImageInputOptions): ImageTransformer;
+}
+interface ImageTransformer {
+    /**
+     * Apply transform next, returning a transform handle.
+     * You can then apply more transformations, draw, or retrieve the output.
+     * @param transform
+     */
+    transform(transform: ImageTransform): ImageTransformer;
+    /**
+     * Draw an image on this transformer, returning a transform handle.
+     * You can then apply more transformations, draw, or retrieve the output.
+     * @param image The image (or transformer that will give the image) to draw
+     * @param options The options configuring how to draw the image
+     */
+    draw(image: ReadableStream<Uint8Array> | ImageTransformer, options?: ImageDrawOptions): ImageTransformer;
+    /**
+     * Retrieve the image that results from applying the transforms to the
+     * provided input
+     * @param options Options that apply to the output e.g. output format
+     */
+    output(options: ImageOutputOptions): Promise<ImageTransformationResult>;
+}
+type ImageTransformationOutputOptions = {
+    encoding?: 'base64';
+};
+interface ImageTransformationResult {
+    /**
+     * The image as a response, ready to store in cache or return to users
+     */
+    response(): Response;
+    /**
+     * The content type of the returned image
+     */
+    contentType(): string;
+    /**
+     * The bytes of the response
+     */
+    image(options?: ImageTransformationOutputOptions): ReadableStream<Uint8Array>;
+}
+interface ImagesError extends Error {
+    readonly code: number;
+    readonly message: string;
+    readonly stack?: string;
+}
+/**
+ * Media binding for transforming media streams.
+ * Provides the entry point for media transformation operations.
+ */
+interface MediaBinding {
+    /**
+     * Creates a media transformer from an input stream.
+     * @param media - The input media bytes
+     * @returns A MediaTransformer instance for applying transformations
+     */
+    input(media: ReadableStream<Uint8Array>): MediaTransformer;
+}
+/**
+ * Media transformer for applying transformation operations to media content.
+ * Handles sizing, fitting, and other input transformation parameters.
+ */
+interface MediaTransformer {
+    /**
+     * Applies transformation options to the media content.
+     * @param transform - Configuration for how the media should be transformed
+     * @returns A generator for producing the transformed media output
+     */
+    transform(transform?: MediaTransformationInputOptions): MediaTransformationGenerator;
+    /**
+     * Generates the final media output with specified options.
+     * @param output - Configuration for the output format and parameters
+     * @returns The final transformation result containing the transformed media
+     */
+    output(output?: MediaTransformationOutputOptions): MediaTransformationResult;
+}
+/**
+ * Generator for producing media transformation results.
+ * Configures the output format and parameters for the transformed media.
+ */
+interface MediaTransformationGenerator {
+    /**
+     * Generates the final media output with specified options.
+     * @param output - Configuration for the output format and parameters
+     * @returns The final transformation result containing the transformed media
+     */
+    output(output?: MediaTransformationOutputOptions): MediaTransformationResult;
+}
+/**
+ * Result of a media transformation operation.
+ * Provides multiple ways to access the transformed media content.
+ */
+interface MediaTransformationResult {
+    /**
+     * Returns the transformed media as a readable stream of bytes.
+     * @returns A promise containing a readable stream with the transformed media
+     */
+    media(): Promise<ReadableStream<Uint8Array>>;
+    /**
+     * Returns the transformed media as an HTTP response object.
+     * @returns The transformed media as a Promise<Response>, ready to store in cache or return to users
+     */
+    response(): Promise<Response>;
+    /**
+     * Returns the MIME type of the transformed media.
+     * @returns A promise containing the content type string (e.g., 'image/jpeg', 'video/mp4')
+     */
+    contentType(): Promise<string>;
+}
+/**
+ * Configuration options for transforming media input.
+ * Controls how the media should be resized and fitted.
+ */
+type MediaTransformationInputOptions = {
+    /** How the media should be resized to fit the specified dimensions */
+    fit?: 'contain' | 'cover' | 'scale-down';
+    /** Target width in pixels */
+    width?: number;
+    /** Target height in pixels */
+    height?: number;
+};
+/**
+ * Configuration options for Media Transformations output.
+ * Controls the format, timing, and type of the generated output.
+ */
+type MediaTransformationOutputOptions = {
+    /**
+     * Output mode determining the type of media to generate
+     */
+    mode?: 'video' | 'spritesheet' | 'frame' | 'audio';
+    /** Whether to include audio in the output */
+    audio?: boolean;
+    /**
+     * Starting timestamp for frame extraction or start time for clips. (e.g. '2s').
+     */
+    time?: string;
+    /**
+     * Duration for video clips, audio extraction, and spritesheet generation (e.g. '5s').
+     */
+    duration?: string;
+    /**
+     * Number of frames in the spritesheet.
+     */
+    imageCount?: number;
+    /**
+     * Output format for the generated media.
+     */
+    format?: 'jpg' | 'png' | 'm4a';
+};
+/**
+ * Error object for media transformation operations.
+ * Extends the standard Error interface with additional media-specific information.
+ */
+interface MediaError extends Error {
+    readonly code: number;
+    readonly message: string;
+    readonly stack?: string;
+}
+declare module 'cloudflare:node' {
+    interface NodeStyleServer {
+        listen(...args: unknown[]): this;
+        address(): {
+            port?: number | null | undefined;
+        };
+    }
+    export function httpServerHandler(port: number): ExportedHandler;
+    export function httpServerHandler(options: {
+        port: number;
+    }): ExportedHandler;
+    export function httpServerHandler(server: NodeStyleServer): ExportedHandler;
+}
+type Params<P extends string = any> = Record<P, string | string[]>;
+type EventContext<Env, P extends string, Data> = {
+    request: Request<unknown, IncomingRequestCfProperties<unknown>>;
+    functionPath: string;
+    waitUntil: (promise: Promise<any>) => void;
+    passThroughOnException: () => void;
+    next: (input?: Request | string, init?: RequestInit) => Promise<Response>;
+    env: Env & {
+        ASSETS: {
+            fetch: typeof fetch;
+        };
+    };
+    params: Params<P>;
+    data: Data;
+};
+type PagesFunction<Env = unknown, Params extends string = any, Data extends Record<string, unknown> = Record<string, unknown>> = (context: EventContext<Env, Params, Data>) => Response | Promise<Response>;
+type EventPluginContext<Env, P extends string, Data, PluginArgs> = {
+    request: Request<unknown, IncomingRequestCfProperties<unknown>>;
+    functionPath: string;
+    waitUntil: (promise: Promise<any>) => void;
+    passThroughOnException: () => void;
+    next: (input?: Request | string, init?: RequestInit) => Promise<Response>;
+    env: Env & {
+        ASSETS: {
+            fetch: typeof fetch;
+        };
+    };
+    params: Params<P>;
+    data: Data;
+    pluginArgs: PluginArgs;
+};
+type PagesPluginFunction<Env = unknown, Params extends string = any, Data extends Record<string, unknown> = Record<string, unknown>, PluginArgs = unknown> = (context: EventPluginContext<Env, Params, Data, PluginArgs>) => Response | Promise<Response>;
+declare module "assets:*" {
+    export const onRequest: PagesFunction;
+}
+// Copyright (c) 2022-2023 Cloudflare, Inc.
+// Licensed under the Apache 2.0 license found in the LICENSE file or at:
+//     https://opensource.org/licenses/Apache-2.0
+declare module "cloudflare:pipelines" {
+    export abstract class PipelineTransformationEntrypoint<Env = unknown, I extends PipelineRecord = PipelineRecord, O extends PipelineRecord = PipelineRecord> {
+        protected env: Env;
+        protected ctx: ExecutionContext;
+        constructor(ctx: ExecutionContext, env: Env);
+        /**
+         * run receives an array of PipelineRecord which can be
+         * transformed and returned to the pipeline
+         * @param records Incoming records from the pipeline to be transformed
+         * @param metadata Information about the specific pipeline calling the transformation entrypoint
+         * @returns A promise containing the transformed PipelineRecord array
+         */
+        public run(records: I[], metadata: PipelineBatchMetadata): Promise<O[]>;
+    }
+    export type PipelineRecord = Record<string, unknown>;
+    export type PipelineBatchMetadata = {
+        pipelineId: string;
+        pipelineName: string;
+    };
+    export interface Pipeline<T extends PipelineRecord = PipelineRecord> {
+        /**
+         * The Pipeline interface represents the type of a binding to a Pipeline
+         *
+         * @param records The records to send to the pipeline
+         */
+        send(records: T[]): Promise<void>;
+    }
+}
+// PubSubMessage represents an incoming PubSub message.
+// The message includes metadata about the broker, the client, and the payload
+// itself.
+// https://developers.cloudflare.com/pub-sub/
+interface PubSubMessage {
+    // Message ID
+    readonly mid: number;
+    // MQTT broker FQDN in the form mqtts://BROKER.NAMESPACE.cloudflarepubsub.com:PORT
+    readonly broker: string;
+    // The MQTT topic the message was sent on.
+    readonly topic: string;
+    // The client ID of the client that published this message.
+    readonly clientId: string;
+    // The unique identifier (JWT ID) used by the client to authenticate, if token
+    // auth was used.
+    readonly jti?: string;
+    // A Unix timestamp (seconds from Jan 1, 1970), set when the Pub/Sub Broker
+    // received the message from the client.
+    readonly receivedAt: number;
+    // An (optional) string with the MIME type of the payload, if set by the
+    // client.
+    readonly contentType: string;
+    // Set to 1 when the payload is a UTF-8 string
+    // https://docs.oasis-open.org/mqtt/mqtt/v5.0/os/mqtt-v5.0-os.html#_Toc3901063
+    readonly payloadFormatIndicator: number;
+    // Pub/Sub (MQTT) payloads can be UTF-8 strings, or byte arrays.
+    // You can use payloadFormatIndicator to inspect this before decoding.
+    payload: string | Uint8Array;
+}
+// JsonWebKey extended by kid parameter
+interface JsonWebKeyWithKid extends JsonWebKey {
+    // Key Identifier of the JWK
+    readonly kid: string;
+}
+interface RateLimitOptions {
+    key: string;
+}
+interface RateLimitOutcome {
+    success: boolean;
+}
+interface RateLimit {
+    /**
+     * Rate limit a request based on the provided options.
+     * @see https://developers.cloudflare.com/workers/runtime-apis/bindings/rate-limit/
+     * @returns A promise that resolves with the outcome of the rate limit.
+     */
+    limit(options: RateLimitOptions): Promise<RateLimitOutcome>;
+}
+// Namespace for RPC utility types. Unfortunately, we can't use a `module` here as these types need
+// to referenced by `Fetcher`. This is included in the "importable" version of the types which
+// strips all `module` blocks.
+declare namespace Rpc {
+    // Branded types for identifying `WorkerEntrypoint`/`DurableObject`/`Target`s.
+    // TypeScript uses *structural* typing meaning anything with the same shape as type `T` is a `T`.
+    // For the classes exported by `cloudflare:workers` we want *nominal* typing (i.e. we only want to
+    // accept `WorkerEntrypoint` from `cloudflare:workers`, not any other class with the same shape)
+    export const __RPC_STUB_BRAND: '__RPC_STUB_BRAND';
+    export const __RPC_TARGET_BRAND: '__RPC_TARGET_BRAND';
+    export const __WORKER_ENTRYPOINT_BRAND: '__WORKER_ENTRYPOINT_BRAND';
+    export const __DURABLE_OBJECT_BRAND: '__DURABLE_OBJECT_BRAND';
+    export const __WORKFLOW_ENTRYPOINT_BRAND: '__WORKFLOW_ENTRYPOINT_BRAND';
+    export interface RpcTargetBranded {
+        [__RPC_TARGET_BRAND]: never;
+    }
+    export interface WorkerEntrypointBranded {
+        [__WORKER_ENTRYPOINT_BRAND]: never;
+    }
+    export interface DurableObjectBranded {
+        [__DURABLE_OBJECT_BRAND]: never;
+    }
+    export interface WorkflowEntrypointBranded {
+        [__WORKFLOW_ENTRYPOINT_BRAND]: never;
+    }
+    export type EntrypointBranded = WorkerEntrypointBranded | DurableObjectBranded | WorkflowEntrypointBranded;
+    // Types that can be used through `Stub`s
+    export type Stubable = RpcTargetBranded | ((...args: any[]) => any);
+    // Types that can be passed over RPC
+    // The reason for using a generic type here is to build a serializable subset of structured
+    //   cloneable composite types. This allows types defined with the "interface" keyword to pass the
+    //   serializable check as well. Otherwise, only types defined with the "type" keyword would pass.
+    type Serializable<T> = 
+    // Structured cloneables
+    BaseType
+    // Structured cloneable composites
+     | Map<T extends Map<infer U, unknown> ? Serializable<U> : never, T extends Map<unknown, infer U> ? Serializable<U> : never> | Set<T extends Set<infer U> ? Serializable<U> : never> | ReadonlyArray<T extends ReadonlyArray<infer U> ? Serializable<U> : never> | {
+        [K in keyof T]: K extends number | string ? Serializable<T[K]> : never;
+    }
+    // Special types
+     | Stub<Stubable>
+    // Serialized as stubs, see `Stubify`
+     | Stubable;
+    // Base type for all RPC stubs, including common memory management methods.
+    // `T` is used as a marker type for unwrapping `Stub`s later.
+    interface StubBase<T extends Stubable> extends Disposable {
+        [__RPC_STUB_BRAND]: T;
+        dup(): this;
+    }
+    export type Stub<T extends Stubable> = Provider<T> & StubBase<T>;
+    // This represents all the types that can be sent as-is over an RPC boundary
+    type BaseType = void | undefined | null | boolean | number | bigint | string | TypedArray | ArrayBuffer | DataView | Date | Error | RegExp | ReadableStream<Uint8Array> | WritableStream<Uint8Array> | Request | Response | Headers;
+    // Recursively rewrite all `Stubable` types with `Stub`s
+    // prettier-ignore
+    type Stubify<T> = T extends Stubable ? Stub<T> : T extends Map<infer K, infer V> ? Map<Stubify<K>, Stubify<V>> : T extends Set<infer V> ? Set<Stubify<V>> : T extends Array<infer V> ? Array<Stubify<V>> : T extends ReadonlyArray<infer V> ? ReadonlyArray<Stubify<V>> : T extends BaseType ? T : T extends {
+        [key: string | number]: any;
+    } ? {
+        [K in keyof T]: Stubify<T[K]>;
+    } : T;
+    // Recursively rewrite all `Stub<T>`s with the corresponding `T`s.
+    // Note we use `StubBase` instead of `Stub` here to avoid circular dependencies:
+    // `Stub` depends on `Provider`, which depends on `Unstubify`, which would depend on `Stub`.
+    // prettier-ignore
+    type Unstubify<T> = T extends StubBase<infer V> ? V : T extends Map<infer K, infer V> ? Map<Unstubify<K>, Unstubify<V>> : T extends Set<infer V> ? Set<Unstubify<V>> : T extends Array<infer V> ? Array<Unstubify<V>> : T extends ReadonlyArray<infer V> ? ReadonlyArray<Unstubify<V>> : T extends BaseType ? T : T extends {
+        [key: string | number]: unknown;
+    } ? {
+        [K in keyof T]: Unstubify<T[K]>;
+    } : T;
+    type UnstubifyAll<A extends any[]> = {
+        [I in keyof A]: Unstubify<A[I]>;
+    };
+    // Utility type for adding `Provider`/`Disposable`s to `object` types only.
+    // Note `unknown & T` is equivalent to `T`.
+    type MaybeProvider<T> = T extends object ? Provider<T> : unknown;
+    type MaybeDisposable<T> = T extends object ? Disposable : unknown;
+    // Type for method return or property on an RPC interface.
+    // - Stubable types are replaced by stubs.
+    // - Serializable types are passed by value, with stubable types replaced by stubs
+    //   and a top-level `Disposer`.
+    // Everything else can't be passed over PRC.
+    // Technically, we use custom thenables here, but they quack like `Promise`s.
+    // Intersecting with `(Maybe)Provider` allows pipelining.
+    // prettier-ignore
+    type Result<R> = R extends Stubable ? Promise<Stub<R>> & Provider<R> : R extends Serializable<R> ? Promise<Stubify<R> & MaybeDisposable<R>> & MaybeProvider<R> : never;
+    // Type for method or property on an RPC interface.
+    // For methods, unwrap `Stub`s in parameters, and rewrite returns to be `Result`s.
+    // Unwrapping `Stub`s allows calling with `Stubable` arguments.
+    // For properties, rewrite types to be `Result`s.
+    // In each case, unwrap `Promise`s.
+    type MethodOrProperty<V> = V extends (...args: infer P) => infer R ? (...args: UnstubifyAll<P>) => Result<Awaited<R>> : Result<Awaited<V>>;
+    // Type for the callable part of an `Provider` if `T` is callable.
+    // This is intersected with methods/properties.
+    type MaybeCallableProvider<T> = T extends (...args: any[]) => any ? MethodOrProperty<T> : unknown;
+    // Base type for all other types providing RPC-like interfaces.
+    // Rewrites all methods/properties to be `MethodOrProperty`s, while preserving callable types.
+    // `Reserved` names (e.g. stub method names like `dup()`) and symbols can't be accessed over RPC.
+    export type Provider<T extends object, Reserved extends string = never> = MaybeCallableProvider<T> & Pick<{
+        [K in keyof T]: MethodOrProperty<T[K]>;
+    }, Exclude<keyof T, Reserved | symbol | keyof StubBase<never>>>;
+}
+declare namespace Cloudflare {
+    // Type of `env`.
+    //
+    // The specific project can extend `Env` by redeclaring it in project-specific files. Typescript
+    // will merge all declarations.
+    //
+    // You can use `wrangler types` to generate the `Env` type automatically.
+    interface Env {
+    }
+    // Project-specific parameters used to inform types.
+    //
+    // This interface is, again, intended to be declared in project-specific files, and then that
+    // declaration will be merged with this one.
+    //
+    // A project should have a declaration like this:
+    //
+    //     interface GlobalProps {
+    //       // Declares the main module's exports. Used to populate Cloudflare.Exports aka the type
+    //       // of `ctx.exports`.
+    //       mainModule: typeof import("my-main-module");
+    //
+    //       // Declares which of the main module's exports are configured with durable storage, and
+    //       // thus should behave as Durable Object namsepace bindings.
+    //       durableNamespaces: "MyDurableObject" | "AnotherDurableObject";
+    //     }
+    //
+    // You can use `wrangler types` to generate `GlobalProps` automatically.
+    interface GlobalProps {
+    }
+    // Evaluates to the type of a property in GlobalProps, defaulting to `Default` if it is not
+    // present.
+    type GlobalProp<K extends string, Default> = K extends keyof GlobalProps ? GlobalProps[K] : Default;
+    // The type of the program's main module exports, if known. Requires `GlobalProps` to declare the
+    // `mainModule` property.
+    type MainModule = GlobalProp<"mainModule", {}>;
+    // The type of ctx.exports, which contains loopback bindings for all top-level exports.
+    type Exports = {
+        [K in keyof MainModule]: LoopbackForExport<MainModule[K]>
+        // If the export is listed in `durableNamespaces`, then it is also a
+        // DurableObjectNamespace.
+         & (K extends GlobalProp<"durableNamespaces", never> ? MainModule[K] extends new (...args: any[]) => infer DoInstance ? DoInstance extends Rpc.DurableObjectBranded ? DurableObjectNamespace<DoInstance> : DurableObjectNamespace<undefined> : DurableObjectNamespace<undefined> : {});
+    };
+}
+declare namespace CloudflareWorkersModule {
+    export type RpcStub<T extends Rpc.Stubable> = Rpc.Stub<T>;
+    export const RpcStub: {
+        new <T extends Rpc.Stubable>(value: T): Rpc.Stub<T>;
+    };
+    export abstract class RpcTarget implements Rpc.RpcTargetBranded {
+        [Rpc.__RPC_TARGET_BRAND]: never;
+    }
+    // `protected` fields don't appear in `keyof`s, so can't be accessed over RPC
+    export abstract class WorkerEntrypoint<Env = Cloudflare.Env, Props = {}> implements Rpc.WorkerEntrypointBranded {
+        [Rpc.__WORKER_ENTRYPOINT_BRAND]: never;
+        protected ctx: ExecutionContext<Props>;
+        protected env: Env;
+        constructor(ctx: ExecutionContext, env: Env);
+        email?(message: ForwardableEmailMessage): void | Promise<void>;
+        fetch?(request: Request): Response | Promise<Response>;
+        queue?(batch: MessageBatch<unknown>): void | Promise<void>;
+        scheduled?(controller: ScheduledController): void | Promise<void>;
+        tail?(events: TraceItem[]): void | Promise<void>;
+        tailStream?(event: TailStream.TailEvent<TailStream.Onset>): TailStream.TailEventHandlerType | Promise<TailStream.TailEventHandlerType>;
+        test?(controller: TestController): void | Promise<void>;
+        trace?(traces: TraceItem[]): void | Promise<void>;
+    }
+    export abstract class DurableObject<Env = Cloudflare.Env, Props = {}> implements Rpc.DurableObjectBranded {
+        [Rpc.__DURABLE_OBJECT_BRAND]: never;
+        protected ctx: DurableObjectState<Props>;
+        protected env: Env;
+        constructor(ctx: DurableObjectState, env: Env);
+        alarm?(alarmInfo?: AlarmInvocationInfo): void | Promise<void>;
+        fetch?(request: Request): Response | Promise<Response>;
+        webSocketMessage?(ws: WebSocket, message: string | ArrayBuffer): void | Promise<void>;
+        webSocketClose?(ws: WebSocket, code: number, reason: string, wasClean: boolean): void | Promise<void>;
+        webSocketError?(ws: WebSocket, error: unknown): void | Promise<void>;
+    }
+    export type WorkflowDurationLabel = 'second' | 'minute' | 'hour' | 'day' | 'week' | 'month' | 'year';
+    export type WorkflowSleepDuration = `${number} ${WorkflowDurationLabel}${'s' | ''}` | number;
+    export type WorkflowDelayDuration = WorkflowSleepDuration;
+    export type WorkflowTimeoutDuration = WorkflowSleepDuration;
+    export type WorkflowRetentionDuration = WorkflowSleepDuration;
+    export type WorkflowBackoff = 'constant' | 'linear' | 'exponential';
+    export type WorkflowStepConfig = {
+        retries?: {
+            limit: number;
+            delay: WorkflowDelayDuration | number;
+            backoff?: WorkflowBackoff;
+        };
+        timeout?: WorkflowTimeoutDuration | number;
+    };
+    export type WorkflowEvent<T> = {
+        payload: Readonly<T>;
+        timestamp: Date;
+        instanceId: string;
+    };
+    export type WorkflowStepEvent<T> = {
+        payload: Readonly<T>;
+        timestamp: Date;
+        type: string;
+    };
+    export abstract class WorkflowStep {
+        do<T extends Rpc.Serializable<T>>(name: string, callback: () => Promise<T>): Promise<T>;
+        do<T extends Rpc.Serializable<T>>(name: string, config: WorkflowStepConfig, callback: () => Promise<T>): Promise<T>;
+        sleep: (name: string, duration: WorkflowSleepDuration) => Promise<void>;
+        sleepUntil: (name: string, timestamp: Date | number) => Promise<void>;
+        waitForEvent<T extends Rpc.Serializable<T>>(name: string, options: {
+            type: string;
+            timeout?: WorkflowTimeoutDuration | number;
+        }): Promise<WorkflowStepEvent<T>>;
+    }
+    export type WorkflowInstanceStatus = 'queued' | 'running' | 'paused' | 'errored' | 'terminated' | 'complete' | 'waiting' | 'waitingForPause' | 'unknown';
+    export abstract class WorkflowEntrypoint<Env = unknown, T extends Rpc.Serializable<T> | unknown = unknown> implements Rpc.WorkflowEntrypointBranded {
+        [Rpc.__WORKFLOW_ENTRYPOINT_BRAND]: never;
+        protected ctx: ExecutionContext;
+        protected env: Env;
+        constructor(ctx: ExecutionContext, env: Env);
+        run(event: Readonly<WorkflowEvent<T>>, step: WorkflowStep): Promise<unknown>;
+    }
+    export function waitUntil(promise: Promise<unknown>): void;
+    export function withEnv(newEnv: unknown, fn: () => unknown): unknown;
+    export function withExports(newExports: unknown, fn: () => unknown): unknown;
+    export function withEnvAndExports(newEnv: unknown, newExports: unknown, fn: () => unknown): unknown;
+    export const env: Cloudflare.Env;
+    export const exports: Cloudflare.Exports;
+}
+declare module 'cloudflare:workers' {
+    export = CloudflareWorkersModule;
+}
+interface SecretsStoreSecret {
+    /**
+     * Get a secret from the Secrets Store, returning a string of the secret value
+     * if it exists, or throws an error if it does not exist
+     */
+    get(): Promise<string>;
+}
+declare module "cloudflare:sockets" {
+    function _connect(address: string | SocketAddress, options?: SocketOptions): Socket;
+    export { _connect as connect };
+}
+type MarkdownDocument = {
+    name: string;
+    blob: Blob;
+};
+type ConversionResponse = {
+    id: string;
+    name: string;
+    mimeType: string;
+    format: 'markdown';
+    tokens: number;
+    data: string;
+} | {
+    id: string;
+    name: string;
+    mimeType: string;
+    format: 'error';
+    error: string;
+};
+type ImageConversionOptions = {
+    descriptionLanguage?: 'en' | 'es' | 'fr' | 'it' | 'pt' | 'de';
+};
+type EmbeddedImageConversionOptions = ImageConversionOptions & {
+    convert?: boolean;
+    maxConvertedImages?: number;
+};
+type ConversionOptions = {
+    html?: {
+        images?: EmbeddedImageConversionOptions & {
+            convertOGImage?: boolean;
+        };
+        hostname?: string;
+    };
+    docx?: {
+        images?: EmbeddedImageConversionOptions;
+    };
+    image?: ImageConversionOptions;
+    pdf?: {
+        images?: EmbeddedImageConversionOptions;
+        metadata?: boolean;
+    };
+};
+type ConversionRequestOptions = {
+    gateway?: GatewayOptions;
+    extraHeaders?: object;
+    conversionOptions?: ConversionOptions;
+};
+type SupportedFileFormat = {
+    mimeType: string;
+    extension: string;
+};
+declare abstract class ToMarkdownService {
+    transform(files: MarkdownDocument[], options?: ConversionRequestOptions): Promise<ConversionResponse[]>;
+    transform(files: MarkdownDocument, options?: ConversionRequestOptions): Promise<ConversionResponse>;
+    supported(): Promise<SupportedFileFormat[]>;
+}
+declare namespace TailStream {
+    interface Header {
+        readonly name: string;
+        readonly value: string;
+    }
+    interface FetchEventInfo {
+        readonly type: "fetch";
+        readonly method: string;
+        readonly url: string;
+        readonly cfJson?: object;
+        readonly headers: Header[];
+    }
+    interface JsRpcEventInfo {
+        readonly type: "jsrpc";
+    }
+    interface ScheduledEventInfo {
+        readonly type: "scheduled";
+        readonly scheduledTime: Date;
+        readonly cron: string;
+    }
+    interface AlarmEventInfo {
+        readonly type: "alarm";
+        readonly scheduledTime: Date;
+    }
+    interface QueueEventInfo {
+        readonly type: "queue";
+        readonly queueName: string;
+        readonly batchSize: number;
+    }
+    interface EmailEventInfo {
+        readonly type: "email";
+        readonly mailFrom: string;
+        readonly rcptTo: string;
+        readonly rawSize: number;
+    }
+    interface TraceEventInfo {
+        readonly type: "trace";
+        readonly traces: (string | null)[];
+    }
+    interface HibernatableWebSocketEventInfoMessage {
+        readonly type: "message";
+    }
+    interface HibernatableWebSocketEventInfoError {
+        readonly type: "error";
+    }
+    interface HibernatableWebSocketEventInfoClose {
+        readonly type: "close";
+        readonly code: number;
+        readonly wasClean: boolean;
+    }
+    interface HibernatableWebSocketEventInfo {
+        readonly type: "hibernatableWebSocket";
+        readonly info: HibernatableWebSocketEventInfoClose | HibernatableWebSocketEventInfoError | HibernatableWebSocketEventInfoMessage;
+    }
+    interface CustomEventInfo {
+        readonly type: "custom";
+    }
+    interface FetchResponseInfo {
+        readonly type: "fetch";
+        readonly statusCode: number;
+    }
+    type EventOutcome = "ok" | "canceled" | "exception" | "unknown" | "killSwitch" | "daemonDown" | "exceededCpu" | "exceededMemory" | "loadShed" | "responseStreamDisconnected" | "scriptNotFound";
+    interface ScriptVersion {
+        readonly id: string;
+        readonly tag?: string;
+        readonly message?: string;
+    }
+    interface Onset {
+        readonly type: "onset";
+        readonly attributes: Attribute[];
+        // id for the span being opened by this Onset event.
+        readonly spanId: string;
+        readonly dispatchNamespace?: string;
+        readonly entrypoint?: string;
+        readonly executionModel: string;
+        readonly scriptName?: string;
+        readonly scriptTags?: string[];
+        readonly scriptVersion?: ScriptVersion;
+        readonly info: FetchEventInfo | JsRpcEventInfo | ScheduledEventInfo | AlarmEventInfo | QueueEventInfo | EmailEventInfo | TraceEventInfo | HibernatableWebSocketEventInfo | CustomEventInfo;
+    }
+    interface Outcome {
+        readonly type: "outcome";
+        readonly outcome: EventOutcome;
+        readonly cpuTime: number;
+        readonly wallTime: number;
+    }
+    interface SpanOpen {
+        readonly type: "spanOpen";
+        readonly name: string;
+        // id for the span being opened by this SpanOpen event.
+        readonly spanId: string;
+        readonly info?: FetchEventInfo | JsRpcEventInfo | Attributes;
+    }
+    interface SpanClose {
+        readonly type: "spanClose";
+        readonly outcome: EventOutcome;
+    }
+    interface DiagnosticChannelEvent {
+        readonly type: "diagnosticChannel";
+        readonly channel: string;
+        readonly message: any;
+    }
+    interface Exception {
+        readonly type: "exception";
+        readonly name: string;
+        readonly message: string;
+        readonly stack?: string;
+    }
+    interface Log {
+        readonly type: "log";
+        readonly level: "debug" | "error" | "info" | "log" | "warn";
+        readonly message: object;
+    }
+    interface DroppedEventsDiagnostic {
+        readonly diagnosticsType: "droppedEvents";
+        readonly count: number;
+    }
+    interface StreamDiagnostic {
+        readonly type: 'streamDiagnostic';
+        // To add new diagnostic types, define a new interface and add it to this union type.
+        readonly diagnostic: DroppedEventsDiagnostic;
+    }
+    // This marks the worker handler return information.
+    // This is separate from Outcome because the worker invocation can live for a long time after
+    // returning. For example - Websockets that return an http upgrade response but then continue
+    // streaming information or SSE http connections.
+    interface Return {
+        readonly type: "return";
+        readonly info?: FetchResponseInfo;
+    }
+    interface Attribute {
+        readonly name: string;
+        readonly value: string | string[] | boolean | boolean[] | number | number[] | bigint | bigint[];
+    }
+    interface Attributes {
+        readonly type: "attributes";
+        readonly info: Attribute[];
+    }
+    type EventType = Onset | Outcome | SpanOpen | SpanClose | DiagnosticChannelEvent | Exception | Log | StreamDiagnostic | Return | Attributes;
+    // Context in which this trace event lives.
+    interface SpanContext {
+        // Single id for the entire top-level invocation
+        // This should be a new traceId for the first worker stage invoked in the eyeball request and then
+        // same-account service-bindings should reuse the same traceId but cross-account service-bindings
+        // should use a new traceId.
+        readonly traceId: string;
+        // spanId in which this event is handled
+        // for Onset and SpanOpen events this would be the parent span id
+        // for Outcome and SpanClose these this would be the span id of the opening Onset and SpanOpen events
+        // For Hibernate and Mark this would be the span under which they were emitted.
+        // spanId is not set ONLY if:
+        //  1. This is an Onset event
+        //  2. We are not inheriting any SpanContext. (e.g. this is a cross-account service binding or a new top-level invocation)
+        readonly spanId?: string;
+    }
+    interface TailEvent<Event extends EventType> {
+        // invocation id of the currently invoked worker stage.
+        // invocation id will always be unique to every Onset event and will be the same until the Outcome event.
+        readonly invocationId: string;
+        // Inherited spanContext for this event.
+        readonly spanContext: SpanContext;
+        readonly timestamp: Date;
+        readonly sequence: number;
+        readonly event: Event;
+    }
+    type TailEventHandler<Event extends EventType = EventType> = (event: TailEvent<Event>) => void | Promise<void>;
+    type TailEventHandlerObject = {
+        outcome?: TailEventHandler<Outcome>;
+        spanOpen?: TailEventHandler<SpanOpen>;
+        spanClose?: TailEventHandler<SpanClose>;
+        diagnosticChannel?: TailEventHandler<DiagnosticChannelEvent>;
+        exception?: TailEventHandler<Exception>;
+        log?: TailEventHandler<Log>;
+        return?: TailEventHandler<Return>;
+        attributes?: TailEventHandler<Attributes>;
+    };
+    type TailEventHandlerType = TailEventHandler | TailEventHandlerObject;
+}
+// Copyright (c) 2022-2023 Cloudflare, Inc.
+// Licensed under the Apache 2.0 license found in the LICENSE file or at:
+//     https://opensource.org/licenses/Apache-2.0
+/**
+ * Data types supported for holding vector metadata.
+ */
+type VectorizeVectorMetadataValue = string | number | boolean | string[];
+/**
+ * Additional information to associate with a vector.
+ */
+type VectorizeVectorMetadata = VectorizeVectorMetadataValue | Record<string, VectorizeVectorMetadataValue>;
+type VectorFloatArray = Float32Array | Float64Array;
+interface VectorizeError {
+    code?: number;
+    error: string;
+}
+/**
+ * Comparison logic/operation to use for metadata filtering.
+ *
+ * This list is expected to grow as support for more operations are released.
+ */
+type VectorizeVectorMetadataFilterOp = '$eq' | '$ne' | '$lt' | '$lte' | '$gt' | '$gte';
+type VectorizeVectorMetadataFilterCollectionOp = '$in' | '$nin';
+/**
+ * Filter criteria for vector metadata used to limit the retrieved query result set.
+ */
+type VectorizeVectorMetadataFilter = {
+    [field: string]: Exclude<VectorizeVectorMetadataValue, string[]> | null | {
+        [Op in VectorizeVectorMetadataFilterOp]?: Exclude<VectorizeVectorMetadataValue, string[]> | null;
+    } | {
+        [Op in VectorizeVectorMetadataFilterCollectionOp]?: Exclude<VectorizeVectorMetadataValue, string[]>[];
+    };
+};
+/**
+ * Supported distance metrics for an index.
+ * Distance metrics determine how other "similar" vectors are determined.
+ */
+type VectorizeDistanceMetric = "euclidean" | "cosine" | "dot-product";
+/**
+ * Metadata return levels for a Vectorize query.
+ *
+ * Default to "none".
+ *
+ * @property all      Full metadata for the vector return set, including all fields (including those un-indexed) without truncation. This is a more expensive retrieval, as it requires additional fetching & reading of un-indexed data.
+ * @property indexed  Return all metadata fields configured for indexing in the vector return set. This level of retrieval is "free" in that no additional overhead is incurred returning this data. However, note that indexed metadata is subject to truncation (especially for larger strings).
+ * @property none     No indexed metadata will be returned.
+ */
+type VectorizeMetadataRetrievalLevel = "all" | "indexed" | "none";
+interface VectorizeQueryOptions {
+    topK?: number;
+    namespace?: string;
+    returnValues?: boolean;
+    returnMetadata?: boolean | VectorizeMetadataRetrievalLevel;
+    filter?: VectorizeVectorMetadataFilter;
+}
+/**
+ * Information about the configuration of an index.
+ */
+type VectorizeIndexConfig = {
+    dimensions: number;
+    metric: VectorizeDistanceMetric;
+} | {
+    preset: string; // keep this generic, as we'll be adding more presets in the future and this is only in a read capacity
+};
+/**
+ * Metadata about an existing index.
+ *
+ * This type is exclusively for the Vectorize **beta** and will be deprecated once Vectorize RC is released.
+ * See {@link VectorizeIndexInfo} for its post-beta equivalent.
+ */
+interface VectorizeIndexDetails {
+    /** The unique ID of the index */
+    readonly id: string;
+    /** The name of the index. */
+    name: string;
+    /** (optional) A human readable description for the index. */
+    description?: string;
+    /** The index configuration, including the dimension size and distance metric. */
+    config: VectorizeIndexConfig;
+    /** The number of records containing vectors within the index. */
+    vectorsCount: number;
+}
+/**
+ * Metadata about an existing index.
+ */
+interface VectorizeIndexInfo {
+    /** The number of records containing vectors within the index. */
+    vectorCount: number;
+    /** Number of dimensions the index has been configured for. */
+    dimensions: number;
+    /** ISO 8601 datetime of the last processed mutation on in the index. All changes before this mutation will be reflected in the index state. */
+    processedUpToDatetime: number;
+    /** UUIDv4 of the last mutation processed by the index. All changes before this mutation will be reflected in the index state. */
+    processedUpToMutation: number;
+}
+/**
+ * Represents a single vector value set along with its associated metadata.
+ */
+interface VectorizeVector {
+    /** The ID for the vector. This can be user-defined, and must be unique. It should uniquely identify the object, and is best set based on the ID of what the vector represents. */
+    id: string;
+    /** The vector values */
+    values: VectorFloatArray | number[];
+    /** The namespace this vector belongs to. */
+    namespace?: string;
+    /** Metadata associated with the vector. Includes the values of other fields and potentially additional details. */
+    metadata?: Record<string, VectorizeVectorMetadata>;
+}
+/**
+ * Represents a matched vector for a query along with its score and (if specified) the matching vector information.
+ */
+type VectorizeMatch = Pick<Partial<VectorizeVector>, "values"> & Omit<VectorizeVector, "values"> & {
+    /** The score or rank for similarity, when returned as a result */
+    score: number;
+};
+/**
+ * A set of matching {@link VectorizeMatch} for a particular query.
+ */
+interface VectorizeMatches {
+    matches: VectorizeMatch[];
+    count: number;
+}
+/**
+ * Results of an operation that performed a mutation on a set of vectors.
+ * Here, `ids` is a list of vectors that were successfully processed.
+ *
+ * This type is exclusively for the Vectorize **beta** and will be deprecated once Vectorize RC is released.
+ * See {@link VectorizeAsyncMutation} for its post-beta equivalent.
+ */
+interface VectorizeVectorMutation {
+    /* List of ids of vectors that were successfully processed. */
+    ids: string[];
+    /* Total count of the number of processed vectors. */
+    count: number;
+}
+/**
+ * Result type indicating a mutation on the Vectorize Index.
+ * Actual mutations are processed async where the `mutationId` is the unique identifier for the operation.
+ */
+interface VectorizeAsyncMutation {
+    /** The unique identifier for the async mutation operation containing the changeset. */
+    mutationId: string;
+}
+/**
+ * A Vectorize Vector Search Index for querying vectors/embeddings.
+ *
+ * This type is exclusively for the Vectorize **beta** and will be deprecated once Vectorize RC is released.
+ * See {@link Vectorize} for its new implementation.
+ */
+declare abstract class VectorizeIndex {
+    /**
+     * Get information about the currently bound index.
+     * @returns A promise that resolves with information about the current index.
+     */
+    public describe(): Promise<VectorizeIndexDetails>;
+    /**
+     * Use the provided vector to perform a similarity search across the index.
+     * @param vector Input vector that will be used to drive the similarity search.
+     * @param options Configuration options to massage the returned data.
+     * @returns A promise that resolves with matched and scored vectors.
+     */
+    public query(vector: VectorFloatArray | number[], options?: VectorizeQueryOptions): Promise<VectorizeMatches>;
+    /**
+     * Insert a list of vectors into the index dataset. If a provided id exists, an error will be thrown.
+     * @param vectors List of vectors that will be inserted.
+     * @returns A promise that resolves with the ids & count of records that were successfully processed.
+     */
+    public insert(vectors: VectorizeVector[]): Promise<VectorizeVectorMutation>;
+    /**
+     * Upsert a list of vectors into the index dataset. If a provided id exists, it will be replaced with the new values.
+     * @param vectors List of vectors that will be upserted.
+     * @returns A promise that resolves with the ids & count of records that were successfully processed.
+     */
+    public upsert(vectors: VectorizeVector[]): Promise<VectorizeVectorMutation>;
+    /**
+     * Delete a list of vectors with a matching id.
+     * @param ids List of vector ids that should be deleted.
+     * @returns A promise that resolves with the ids & count of records that were successfully processed (and thus deleted).
+     */
+    public deleteByIds(ids: string[]): Promise<VectorizeVectorMutation>;
+    /**
+     * Get a list of vectors with a matching id.
+     * @param ids List of vector ids that should be returned.
+     * @returns A promise that resolves with the raw unscored vectors matching the id set.
+     */
+    public getByIds(ids: string[]): Promise<VectorizeVector[]>;
+}
+/**
+ * A Vectorize Vector Search Index for querying vectors/embeddings.
+ *
+ * Mutations in this version are async, returning a mutation id.
+ */
+declare abstract class Vectorize {
+    /**
+     * Get information about the currently bound index.
+     * @returns A promise that resolves with information about the current index.
+     */
+    public describe(): Promise<VectorizeIndexInfo>;
+    /**
+     * Use the provided vector to perform a similarity search across the index.
+     * @param vector Input vector that will be used to drive the similarity search.
+     * @param options Configuration options to massage the returned data.
+     * @returns A promise that resolves with matched and scored vectors.
+     */
+    public query(vector: VectorFloatArray | number[], options?: VectorizeQueryOptions): Promise<VectorizeMatches>;
+    /**
+     * Use the provided vector-id to perform a similarity search across the index.
+     * @param vectorId Id for a vector in the index against which the index should be queried.
+     * @param options Configuration options to massage the returned data.
+     * @returns A promise that resolves with matched and scored vectors.
+     */
+    public queryById(vectorId: string, options?: VectorizeQueryOptions): Promise<VectorizeMatches>;
+    /**
+     * Insert a list of vectors into the index dataset. If a provided id exists, an error will be thrown.
+     * @param vectors List of vectors that will be inserted.
+     * @returns A promise that resolves with a unique identifier of a mutation containing the insert changeset.
+     */
+    public insert(vectors: VectorizeVector[]): Promise<VectorizeAsyncMutation>;
+    /**
+     * Upsert a list of vectors into the index dataset. If a provided id exists, it will be replaced with the new values.
+     * @param vectors List of vectors that will be upserted.
+     * @returns A promise that resolves with a unique identifier of a mutation containing the upsert changeset.
+     */
+    public upsert(vectors: VectorizeVector[]): Promise<VectorizeAsyncMutation>;
+    /**
+     * Delete a list of vectors with a matching id.
+     * @param ids List of vector ids that should be deleted.
+     * @returns A promise that resolves with a unique identifier of a mutation containing the delete changeset.
+     */
+    public deleteByIds(ids: string[]): Promise<VectorizeAsyncMutation>;
+    /**
+     * Get a list of vectors with a matching id.
+     * @param ids List of vector ids that should be returned.
+     * @returns A promise that resolves with the raw unscored vectors matching the id set.
+     */
+    public getByIds(ids: string[]): Promise<VectorizeVector[]>;
+}
+/**
+ * The interface for "version_metadata" binding
+ * providing metadata about the Worker Version using this binding.
+ */
+type WorkerVersionMetadata = {
+    /** The ID of the Worker Version using this binding */
+    id: string;
+    /** The tag of the Worker Version using this binding */
+    tag: string;
+    /** The timestamp of when the Worker Version was uploaded */
+    timestamp: string;
+};
+interface DynamicDispatchLimits {
+    /**
+     * Limit CPU time in milliseconds.
+     */
+    cpuMs?: number;
+    /**
+     * Limit number of subrequests.
+     */
+    subRequests?: number;
+}
+interface DynamicDispatchOptions {
+    /**
+     * Limit resources of invoked Worker script.
+     */
+    limits?: DynamicDispatchLimits;
+    /**
+     * Arguments for outbound Worker script, if configured.
+     */
+    outbound?: {
+        [key: string]: any;
+    };
+}
+interface DispatchNamespace {
+    /**
+    * @param name Name of the Worker script.
+    * @param args Arguments to Worker script.
+    * @param options Options for Dynamic Dispatch invocation.
+    * @returns A Fetcher object that allows you to send requests to the Worker script.
+    * @throws If the Worker script does not exist in this dispatch namespace, an error will be thrown.
+    */
+    get(name: string, args?: {
+        [key: string]: any;
+    }, options?: DynamicDispatchOptions): Fetcher;
+}
+declare module 'cloudflare:workflows' {
+    /**
+     * NonRetryableError allows for a user to throw a fatal error
+     * that makes a Workflow instance fail immediately without triggering a retry
+     */
+    export class NonRetryableError extends Error {
+        public constructor(message: string, name?: string);
+    }
+}
+declare abstract class Workflow<PARAMS = unknown> {
+    /**
+     * Get a handle to an existing instance of the Workflow.
+     * @param id Id for the instance of this Workflow
+     * @returns A promise that resolves with a handle for the Instance
+     */
+    public get(id: string): Promise<WorkflowInstance>;
+    /**
+     * Create a new instance and return a handle to it. If a provided id exists, an error will be thrown.
+     * @param options Options when creating an instance including id and params
+     * @returns A promise that resolves with a handle for the Instance
+     */
+    public create(options?: WorkflowInstanceCreateOptions<PARAMS>): Promise<WorkflowInstance>;
+    /**
+     * Create a batch of instances and return handle for all of them. If a provided id exists, an error will be thrown.
+     * `createBatch` is limited at 100 instances at a time or when the RPC limit for the batch (1MiB) is reached.
+     * @param batch List of Options when creating an instance including name and params
+     * @returns A promise that resolves with a list of handles for the created instances.
+     */
+    public createBatch(batch: WorkflowInstanceCreateOptions<PARAMS>[]): Promise<WorkflowInstance[]>;
+}
+type WorkflowDurationLabel = 'second' | 'minute' | 'hour' | 'day' | 'week' | 'month' | 'year';
+type WorkflowSleepDuration = `${number} ${WorkflowDurationLabel}${'s' | ''}` | number;
+type WorkflowRetentionDuration = WorkflowSleepDuration;
+interface WorkflowInstanceCreateOptions<PARAMS = unknown> {
+    /**
+     * An id for your Workflow instance. Must be unique within the Workflow.
+     */
+    id?: string;
+    /**
+     * The event payload the Workflow instance is triggered with
+     */
+    params?: PARAMS;
+    /**
+     * The retention policy for Workflow instance.
+     * Defaults to the maximum retention period available for the owner's account.
+     */
+    retention?: {
+        successRetention?: WorkflowRetentionDuration;
+        errorRetention?: WorkflowRetentionDuration;
+    };
+}
+type InstanceStatus = {
+    status: 'queued' // means that instance is waiting to be started (see concurrency limits)
+     | 'running' | 'paused' | 'errored' | 'terminated' // user terminated the instance while it was running
+     | 'complete' | 'waiting' // instance is hibernating and waiting for sleep or event to finish
+     | 'waitingForPause' // instance is finishing the current work to pause
+     | 'unknown';
+    error?: {
+        name: string;
+        message: string;
+    };
+    output?: unknown;
+};
+interface WorkflowError {
+    code?: number;
+    message: string;
+}
+declare abstract class WorkflowInstance {
+    public id: string;
+    /**
+     * Pause the instance.
+     */
+    public pause(): Promise<void>;
+    /**
+     * Resume the instance. If it is already running, an error will be thrown.
+     */
+    public resume(): Promise<void>;
+    /**
+     * Terminate the instance. If it is errored, terminated or complete, an error will be thrown.
+     */
+    public terminate(): Promise<void>;
+    /**
+     * Restart the instance.
+     */
+    public restart(): Promise<void>;
+    /**
+     * Returns the current status of the instance.
+     */
+    public status(): Promise<InstanceStatus>;
+    /**
+     * Send an event to this instance.
+     */
+    public sendEvent({ type, payload, }: {
+        type: string;
+        payload: unknown;
+    }): Promise<void>;
+}
diff --git a/llm-gateway/wrangler.jsonc b/llm-gateway/wrangler.jsonc
new file mode 100644
index 000000000..c531c77e8
--- /dev/null
+++ b/llm-gateway/wrangler.jsonc
@@ -0,0 +1,115 @@
+{
+  "$schema": "node_modules/wrangler/config-schema.json",
+  "name": "llm-gateway",
+  "account_id": "e115e769bcdd4c3d66af59d3332cb394",
+  "main": "src/index.ts",
+  "compatibility_date": "2026-02-01",
+  "compatibility_flags": ["nodejs_compat"],
+  "observability": {
+    "enabled": true,
+  },
+  "logpush": true,
+  "placement": {
+    "mode": "smart",
+  },
+  "routes": [
+    {
+      "pattern": "llm-gateway.kiloapps.io",
+      "custom_domain": true,
+    },
+  ],
+  "hyperdrive": [
+    {
+      "binding": "HYPERDRIVE",
+      "id": "624ec80650dd414199349f4e217ddb10",
+      "localConnectionString": "postgres://postgres:postgres@localhost:5432/postgres",
+    },
+  ],
+  "durable_objects": {
+    "bindings": [
+      {
+        "name": "RATE_LIMIT_DO",
+        "class_name": "RateLimitDO",
+      },
+    ],
+  },
+  "migrations": [
+    {
+      "tag": "v1",
+      "new_classes": ["RateLimitDO"],
+    },
+  ],
+  "services": [
+    {
+      "binding": "O11Y",
+      "service": "o11y",
+    },
+  ],
+  "secrets_store_secrets": [
+    {
+      "binding": "NEXTAUTH_SECRET_PROD",
+      "store_id": "342a86d9e3a94da698e82d0c6e2a36f0",
+      "secret_name": "NEXTAUTH_SECRET_PROD",
+    },
+    {
+      "binding": "OPENROUTER_API_KEY",
+      "store_id": "342a86d9e3a94da698e82d0c6e2a36f0",
+      "secret_name": "OPENROUTER_API_KEY",
+    },
+    {
+      "binding": "GIGAPOTATO_API_KEY",
+      "store_id": "342a86d9e3a94da698e82d0c6e2a36f0",
+      "secret_name": "GIGAPOTATO_API_KEY",
+    },
+    {
+      "binding": "CORETHINK_API_KEY",
+      "store_id": "342a86d9e3a94da698e82d0c6e2a36f0",
+      "secret_name": "CORETHINK_API_KEY",
+    },
+    {
+      "binding": "MARTIAN_API_KEY",
+      "store_id": "342a86d9e3a94da698e82d0c6e2a36f0",
+      "secret_name": "MARTIAN_API_KEY",
+    },
+    {
+      "binding": "MISTRAL_API_KEY",
+      "store_id": "342a86d9e3a94da698e82d0c6e2a36f0",
+      "secret_name": "MISTRAL_API_KEY",
+    },
+    {
+      "binding": "VERCEL_AI_GATEWAY_API_KEY",
+      "store_id": "342a86d9e3a94da698e82d0c6e2a36f0",
+      "secret_name": "VERCEL_AI_GATEWAY_API_KEY",
+    },
+    {
+      "binding": "BYOK_ENCRYPTION_KEY",
+      "store_id": "342a86d9e3a94da698e82d0c6e2a36f0",
+      "secret_name": "BYOK_ENCRYPTION_KEY",
+    },
+    {
+      "binding": "ABUSE_CF_ACCESS_CLIENT_ID",
+      "store_id": "342a86d9e3a94da698e82d0c6e2a36f0",
+      "secret_name": "ABUSE_CF_ACCESS_CLIENT_ID",
+    },
+    {
+      "binding": "ABUSE_CF_ACCESS_CLIENT_SECRET",
+      "store_id": "342a86d9e3a94da698e82d0c6e2a36f0",
+      "secret_name": "ABUSE_CF_ACCESS_CLIENT_SECRET",
+    },
+    {
+      "binding": "GIGAPOTATO_API_URL",
+      "store_id": "342a86d9e3a94da698e82d0c6e2a36f0",
+      "secret_name": "GIGAPOTATO_API_URL",
+    },
+    {
+      "binding": "ABUSE_SERVICE_URL",
+      "store_id": "342a86d9e3a94da698e82d0c6e2a36f0",
+      "secret_name": "ABUSE_SERVICE_URL",
+    },
+    {
+      "binding": "POSTHOG_API_KEY",
+      "store_id": "342a86d9e3a94da698e82d0c6e2a36f0",
+      "secret_name": "POSTHOG_API_KEY",
+    },
+  ],
+}
diff --git a/packages/worker-utils/package.json b/packages/worker-utils/package.json
index 878a1897e..57ac1da13 100644
--- a/packages/worker-utils/package.json
+++ b/packages/worker-utils/package.json
@@ -13,6 +13,7 @@
     "lint": "eslint --config eslint.config.mjs --cache 'src/**/*.ts'"
   },
   "dependencies": {
+    "@kilocode/db": "workspace:*",
     "aws4fetch": "catalog:",
     "hono": "catalog:",
     "jose": "catalog:",
diff --git a/packages/worker-utils/src/index.ts b/packages/worker-utils/src/index.ts
index 8a4a06fc7..6ea86b382 100644
--- a/packages/worker-utils/src/index.ts
+++ b/packages/worker-utils/src/index.ts
@@ -25,3 +25,17 @@ export type { Owner, MCPServerConfig } from './types.js';
 
 export { verifyKiloToken, kiloTokenPayload } from './kilo-token.js';
 export type { KiloTokenPayload } from './kilo-token.js';
+
+export { userExistsWithCache } from './user-exists-cache.js';
+
+export {
+  ApiMetricsParamsSchema,
+  SessionMetricsParamsSchema,
+  TerminationReasons,
+} from './o11y-schemas.js';
+export type {
+  ApiMetricsParams,
+  ApiMetricsParamsParsed,
+  SessionMetricsParams,
+  SessionMetricsParamsParsed,
+} from './o11y-schemas.js';
diff --git a/packages/worker-utils/src/o11y-schemas.ts b/packages/worker-utils/src/o11y-schemas.ts
new file mode 100644
index 000000000..6de03c4e6
--- /dev/null
+++ b/packages/worker-utils/src/o11y-schemas.ts
@@ -0,0 +1,86 @@
+import { z } from 'zod';
+
+// ─── API metrics (llm-gateway → o11y) ────────────────────────────────────────
+
+export const ApiMetricsParamsSchema = z.object({
+  kiloUserId: z.string().min(1),
+  organizationId: z.string().min(1).optional(),
+  isAnonymous: z.boolean(),
+  isStreaming: z.boolean(),
+  userByok: z.boolean(),
+  mode: z.string().min(1).optional(),
+  provider: z.string().min(1),
+  inferenceProvider: z.string().optional().default(''),
+  requestedModel: z.string().min(1),
+  resolvedModel: z.string().min(1),
+  toolsAvailable: z.array(z.string().min(1)),
+  toolsUsed: z.array(z.string().min(1)),
+  ttfbMs: z.number().int().nonnegative(),
+  completeRequestMs: z.number().int().nonnegative(),
+  statusCode: z.number().int().min(100).max(599),
+  tokens: z
+    .object({
+      inputTokens: z.number().int().nonnegative().optional(),
+      outputTokens: z.number().int().nonnegative().optional(),
+      cacheWriteTokens: z.number().int().nonnegative().optional(),
+      cacheHitTokens: z.number().int().nonnegative().optional(),
+      totalTokens: z.number().int().nonnegative().optional(),
+    })
+    .optional(),
+});
+
+// Input type: callers can pass undefined for fields with .default().
+export type ApiMetricsParams = z.input<typeof ApiMetricsParamsSchema>;
+// Output type: after .parse(), defaults are applied — all fields are concrete.
+export type ApiMetricsParamsParsed = z.infer<typeof ApiMetricsParamsSchema>;
+
+// ─── Session metrics (session-ingest → o11y) ─────────────────────────────────
+
+export const TerminationReasons = [
+  'completed',
+  'error',
+  'interrupted',
+  'abandoned',
+  'unknown',
+] as const;
+
+export const SessionMetricsParamsSchema = z.object({
+  kiloUserId: z.string().min(1),
+  organizationId: z.string().optional().default(''),
+  sessionId: z.string().min(1),
+  platform: z.string().min(1),
+
+  sessionDurationMs: z.number().int().nonnegative(),
+  timeToFirstResponseMs: z.number().int().nonnegative().optional(),
+
+  totalTurns: z.number().int().nonnegative(),
+  totalSteps: z.number().int().nonnegative(),
+
+  toolCallsByType: z.record(z.string(), z.number().int().nonnegative()),
+  toolErrorsByType: z.record(z.string(), z.number().int().nonnegative()),
+
+  totalErrors: z.number().int().nonnegative(),
+  errorsByType: z.record(z.string(), z.number().int().nonnegative()),
+  stuckToolCallCount: z.number().int().nonnegative(),
+
+  totalTokens: z.object({
+    input: z.number().int().nonnegative(),
+    output: z.number().int().nonnegative(),
+    reasoning: z.number().int().nonnegative(),
+    cacheRead: z.number().int().nonnegative(),
+    cacheWrite: z.number().int().nonnegative(),
+  }),
+  totalCost: z.number().nonnegative(),
+
+  compactionCount: z.number().int().nonnegative(),
+  autoCompactionCount: z.number().int().nonnegative(),
+
+  terminationReason: z.enum(TerminationReasons),
+
+  model: z.string().optional().default(''),
+
+  ingestVersion: z.number().int().nonnegative().default(0),
+});
+
+export type SessionMetricsParams = z.input<typeof SessionMetricsParamsSchema>;
+export type SessionMetricsParamsParsed = z.infer<typeof SessionMetricsParamsSchema>;
diff --git a/packages/worker-utils/src/user-exists-cache.ts b/packages/worker-utils/src/user-exists-cache.ts
new file mode 100644
index 000000000..f4e3a1941
--- /dev/null
+++ b/packages/worker-utils/src/user-exists-cache.ts
@@ -0,0 +1,46 @@
+import { eq } from 'drizzle-orm';
+import { kilocode_users } from '@kilocode/db/schema';
+import type { WorkerDb } from '@kilocode/db';
+
+type KVLike = {
+  get(key: string): Promise<string | null>;
+  put(key: string, value: string, options?: { expirationTtl?: number }): Promise<void>;
+};
+
+const TTL_EXISTS_SECONDS = 24 * 60 * 60; // 24h positive cache
+const TTL_NOT_FOUND_SECONDS = 5 * 60; // 5m negative cache — rate-limits DB hits from deleted users
+
+function cacheKey(userId: string) {
+  return `user-exists:${userId}`;
+}
+
+/**
+ * Check whether a user exists using a KV existence cache in front of Postgres.
+ *
+ * - Positive cache ('1'): returns true immediately, no DB query.
+ * - Negative cache ('0'): returns false immediately, no DB query.
+ * - Cache miss: queries the DB, then updates the cache (fire-and-forget).
+ */
+export async function userExistsWithCache(
+  cache: KVLike,
+  db: WorkerDb,
+  userId: string
+): Promise<boolean> {
+  const cached = await cache.get(cacheKey(userId));
+
+  if (cached === '1') return true;
+  if (cached === '0') return false;
+
+  const rows = await db
+    .select({ id: kilocode_users.id })
+    .from(kilocode_users)
+    .where(eq(kilocode_users.id, userId))
+    .limit(1);
+
+  const exists = rows[0] !== undefined;
+  void cache.put(cacheKey(userId), exists ? '1' : '0', {
+    expirationTtl: exists ? TTL_EXISTS_SECONDS : TTL_NOT_FOUND_SECONDS,
+  });
+
+  return exists;
+}
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 79eb482d6..02ba0830b 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -1207,7 +1207,7 @@ importers:
         version: 5.9.3
       wrangler:
         specifier: ^4.61.0
-        version: 4.61.1(@cloudflare/workers-types@4.20260130.0)
+        version: 4.68.1(@cloudflare/workers-types@4.20260130.0)
 
   cloudflare-session-ingest:
     dependencies:
@@ -1365,6 +1365,82 @@ importers:
         specifier: 'catalog:'
         version: 4.68.1(@cloudflare/workers-types@4.20260130.0)
 
+  llm-gateway:
+    dependencies:
+      '@ai-sdk/anthropic':
+        specifier: ^3.0.41
+        version: 3.0.41(zod@4.3.6)
+      '@ai-sdk/openai':
+        specifier: ^3.0.27
+        version: 3.0.27(zod@4.3.6)
+      '@kilocode/db':
+        specifier: workspace:*
+        version: link:../packages/db
+      '@kilocode/encryption':
+        specifier: workspace:*
+        version: link:../packages/encryption
+      '@kilocode/worker-utils':
+        specifier: workspace:*
+        version: link:../packages/worker-utils
+      '@sentry/cloudflare':
+        specifier: ^10.25.0
+        version: 10.25.0(@cloudflare/workers-types@4.20260130.0)
+      ai:
+        specifier: ^6.0.78
+        version: 6.0.78(zod@4.3.6)
+      drizzle-orm:
+        specifier: 'catalog:'
+        version: 0.45.1(@cloudflare/workers-types@4.20260130.0)(@opentelemetry/api@1.9.0)(@types/pg@8.16.0)(bun-types@1.3.9)(pg@8.18.0)
+      eventsource-parser:
+        specifier: ^3.0.6
+        version: 3.0.6
+      hono:
+        specifier: 'catalog:'
+        version: 4.12.2
+      workers-tagged-logger:
+        specifier: 'catalog:'
+        version: 1.0.0
+      zod:
+        specifier: 'catalog:'
+        version: 4.3.6
+    devDependencies:
+      '@cloudflare/vitest-pool-workers':
+        specifier: ^0.12.8
+        version: 0.12.8(@cloudflare/workers-types@4.20260130.0)(@vitest/runner@4.0.18)(@vitest/snapshot@4.0.18)(vitest@3.2.4)
+      '@kilocode/eslint-config':
+        specifier: workspace:*
+        version: link:../packages/eslint-config
+      '@types/node':
+        specifier: ^22
+        version: 22.19.1
+      '@typescript/native-preview':
+        specifier: 7.0.0-dev.20251019.1
+        version: 7.0.0-dev.20251019.1
+      '@vitest/ui':
+        specifier: ^3.2.4
+        version: 3.2.4(vitest@3.2.4)
+      drizzle-kit:
+        specifier: 'catalog:'
+        version: 0.31.9
+      eslint:
+        specifier: 'catalog:'
+        version: 9.39.3(jiti@2.6.1)
+      jose:
+        specifier: 'catalog:'
+        version: 6.1.3
+      prettier:
+        specifier: 'catalog:'
+        version: 3.8.1
+      typescript:
+        specifier: 'catalog:'
+        version: 5.9.3
+      vitest:
+        specifier: ^3.2.4
+        version: 3.2.4(@types/debug@4.1.12)(@types/node@22.19.1)(@vitest/ui@3.2.4)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.0)(tsx@4.20.6)(yaml@2.8.1)
+      wrangler:
+        specifier: 'catalog:'
+        version: 4.68.1(@cloudflare/workers-types@4.20260130.0)
+
   packages/db:
     dependencies:
       drizzle-orm:
@@ -1425,6 +1501,9 @@ importers:
 
   packages/worker-utils:
     dependencies:
+      '@kilocode/db':
+        specifier: workspace:*
+        version: link:../db
       aws4fetch:
         specifier: 'catalog:'
         version: 1.0.20
@@ -8440,10 +8519,6 @@ packages:
       unstorage:
         optional: true
 
-  hono@4.11.7:
-    resolution: {integrity: sha512-l7qMiNee7t82bH3SeyUCt9UF15EVmaBvsppY2zQtrbIhl/yzBTny+YUxsVjSjQ6gaqaeVtZmGocom8TzBlA4Yw==}
-    engines: {node: '>=16.9.0'}
-
   hono@4.12.2:
     resolution: {integrity: sha512-gJnaDHXKDayjt8ue0n8Gs0A007yKXj4Xzb8+cNjZeYsSzzwKc0Lr+OZgYwVfB0pHfUs17EPoLvrOsEaJ9mj+Tg==}
     engines: {node: '>=16.9.0'}
@@ -10104,6 +10179,9 @@ packages:
     peerDependencies:
       pg: '>=8.0'
 
+  pg-protocol@1.10.3:
+    resolution: {integrity: sha512-6DIBgBQaTKDJyxnXaLiLR8wBpQQcGWuAESkRBX/t6OwA8YsqP+iVSiond2EDy6Y/dsGk8rh/jtax3js5NeV7JQ==}
+
   pg-protocol@1.11.0:
     resolution: {integrity: sha512-pfsxk2M9M3BuGgDOfuy37VNRRX3jmKgMjcvAcWqNDpZSf4cUmv8HSOl5ViRQFsfARFn0KuUQTgLxVMbNq5NW3g==}
 
@@ -14055,9 +14133,9 @@ snapshots:
     dependencies:
       '@hapi/hoek': 9.3.0
 
-  '@hono/node-server@1.19.9(hono@4.11.7)':
+  '@hono/node-server@1.19.9(hono@4.12.2)':
     dependencies:
-      hono: 4.11.7
+      hono: 4.12.2
 
   '@hono/trpc-server@0.4.2(@trpc/server@11.9.0(typescript@5.9.3))(hono@4.12.2)':
     dependencies:
@@ -14653,7 +14731,7 @@ snapshots:
 
   '@modelcontextprotocol/sdk@1.27.0(zod@4.3.6)':
     dependencies:
-      '@hono/node-server': 1.19.9(hono@4.11.7)
+      '@hono/node-server': 1.19.9(hono@4.12.2)
       ajv: 8.17.1
       ajv-formats: 3.0.1
       content-type: 1.0.5
@@ -14663,7 +14741,7 @@ snapshots:
       eventsource-parser: 3.0.6
       express: 5.2.1
       express-rate-limit: 8.2.1(express@5.2.1)
-      hono: 4.11.7
+      hono: 4.12.2
       jose: 6.1.3
       json-schema-typed: 8.0.2
       pkce-challenge: 5.0.1
@@ -17551,7 +17629,7 @@ snapshots:
   '@types/pg@8.15.6':
     dependencies:
       '@types/node': 22.19.1
-      pg-protocol: 1.11.0
+      pg-protocol: 1.10.3
       pg-types: 2.2.0
 
   '@types/pg@8.16.0':
@@ -20239,8 +20317,6 @@ snapshots:
     dependencies:
       hono: 4.12.2
 
-  hono@4.11.7: {}
-
   hono@4.12.2: {}
 
   html-entities@2.6.0: {}
@@ -22646,6 +22722,8 @@ snapshots:
     dependencies:
       pg: 8.18.0
 
+  pg-protocol@1.10.3: {}
+
   pg-protocol@1.11.0: {}
 
   pg-types@2.2.0:
@@ -24715,7 +24793,7 @@ snapshots:
     dependencies:
       zod: 4.3.6
     optionalDependencies:
-      hono: 4.11.7
+      hono: 4.12.2
 
   wrangler@4.61.1(@cloudflare/workers-types@4.20260130.0):
     dependencies:
diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml
index e31c311f1..f2a4b6a06 100644
--- a/pnpm-workspace.yaml
+++ b/pnpm-workspace.yaml
@@ -44,6 +44,7 @@ packages:
   - 'kiloclaw'
   - 'cloudflare-gastown'
   - 'cloudflare-gastown/container'
+  - 'llm-gateway'
 
 ignoredBuiltDependencies:
   - '@sentry/cli'