Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
82 commits
Select commit Hold shift + click to select a range
5a6b119
feat(llm-gateway): phase 1 scaffolding
iscekic Mar 2, 2026
d11d11e
chore(llm-gateway): strip wrangler.jsonc to bare minimum
iscekic Mar 2, 2026
55ad075
chore(llm-gateway): tidy wrangler.jsonc bindings
iscekic Mar 2, 2026
775f840
feat(llm-gateway): phase 2 — request parsing, auth, anonymous gate
iscekic Mar 2, 2026
7ce0844
feat(llm-gateway): Phase 3 — rate limiting + provider resolution
iscekic Mar 2, 2026
faeacf1
Phase 4: balance/org checks, request validation, request transform
iscekic Mar 2, 2026
84202a8
feat(llm-gateway): Phase 5 — upstream proxy + response handling
iscekic Mar 2, 2026
6121e81
feat(llm-gateway): Phase 6 — background tasks (usage accounting, api …
iscekic Mar 2, 2026
b31334a
feat(llm-gateway): Phase 7 — testing + parity verification (168 tests…
iscekic Mar 2, 2026
b07e629
refactor(llm-gateway): replace setTimeout with scheduler.wait
iscekic Mar 2, 2026
b5eaf47
refactor(llm-gateway): use O11Y service binding RPC instead of HTTP f…
iscekic Mar 2, 2026
c10a8a0
chore(llm-gateway): configure custom domain and remove dev settings
iscekic Mar 3, 2026
1e14ac4
refactor(llm-gateway): move vars to Secrets Store bindings
iscekic Mar 3, 2026
ada05a2
fix(llm-gateway): eliminate .tee() backpressure stalling client stream
iscekic Mar 3, 2026
75a1a56
chore(llm-gateway): fix all lint errors across source files
iscekic Mar 3, 2026
fec8eb9
chore(llm-gateway): use dedicated KV namespace for RATE_LIMIT_KV
iscekic Mar 3, 2026
d226c64
refactor: extract O11Y schemas to @kilocode/worker-utils
iscekic Mar 3, 2026
6a88d63
fix: use z.input for O11Y schema types, add Parsed variants for consu…
iscekic Mar 3, 2026
0af8fac
Merge branch 'main' into feat/llm-gateway
iscekic Mar 3, 2026
8108f0a
Revert unnecessary changes to SessionIngestDO.ts
iscekic Mar 3, 2026
f197802
Regenerate llm-gateway/worker-configuration.d.ts via wrangler types
iscekic Mar 3, 2026
d632dad
Remove redundant comment and empty vars from llm-gateway wrangler.jsonc
iscekic Mar 3, 2026
9a54652
Remove dead llm-gateway/src/types.ts (superseded by src/types/)
iscekic Mar 3, 2026
7daef78
Stop leaking err.message to clients in onError handler
iscekic Mar 3, 2026
cd488ee
Remove unused logger.ts singleton (console.* is intercepted by worker…
iscekic Mar 3, 2026
b3ab5b8
Remove unused /health endpoint from llm-gateway
iscekic Mar 3, 2026
76592e9
Tighten auth middleware: reject invalid tokens, remove redundant user…
iscekic Mar 3, 2026
a4ecbc2
Remove USER_EXISTS_CACHE KV binding from llm-gateway (no longer used …
iscekic Mar 3, 2026
8e735ff
Remove outdated promotion-limit comment from anonymous-gate
iscekic Mar 3, 2026
6c547ea
Remove 'as' cast in balance-and-org by narrowing status type at the s…
iscekic Mar 3, 2026
275ad5b
Remove phase references from request-transform comments
iscekic Mar 3, 2026
e20d58c
Refactor proxy.ts: extract background tasks, fix error logging, remov…
iscekic Mar 3, 2026
2c45058
Remove dead abuse-cost.ts (logic is inline in background-tasks.ts)
iscekic Mar 3, 2026
ab0449d
Clean up api-metrics.ts: reuse getToolsAvailable, remove casts, widen…
iscekic Mar 3, 2026
18af05d
Remove stale cross-project path reference from request-logging comment
iscekic Mar 3, 2026
893e936
Remove redundant casts from usage-accounting.ts
iscekic Mar 3, 2026
f8b3f17
Replace Vercel platform headers with Cloudflare request.cf geo data
iscekic Mar 3, 2026
f456dec
Address remaining bot review comments
iscekic Mar 3, 2026
939711a
Remove stale @ts-expect-error directives for workers-tagged-logger
iscekic Mar 3, 2026
c367f99
Use constant-time comparison for JWT pepper validation
iscekic Mar 3, 2026
212b13b
fix(llm-gateway): fix zai double push and wrap parseAwsCredentials in…
iscekic Mar 3, 2026
176291e
fix(llm-gateway): remove as cast in isAnonymousContext
iscekic Mar 3, 2026
8bb7bd3
fix(llm-gateway): validate max_completion_tokens in addition to max_t…
iscekic Mar 3, 2026
ff7bacd
fix(llm-gateway): verify org membership before granting custom LLM ac…
iscekic Mar 3, 2026
a1b7336
fix(llm-gateway): replace KV rate limiter with Durable Object
iscekic Mar 3, 2026
4911edc
fix(llm-gateway): fix eslint errors
iscekic Mar 3, 2026
f05c021
fix(llm-gateway): fix rate limit double-counting in Durable Object
iscekic Mar 3, 2026
71862d2
fix(llm-gateway): background tasks, TTFB, toolsUsed, query params, cl…
iscekic Mar 3, 2026
e5228c5
fix(llm-gateway): match rate limit error codes and messages to reference
iscekic Mar 3, 2026
ef728fe
fix(llm-gateway): use app.kilo.ai/profile for buyCreditsUrl in 402 re…
iscekic Mar 3, 2026
72f8e57
fix(llm-gateway): scope free_model_usage logging to Kilo-hosted model…
iscekic Mar 3, 2026
bd94d1d
fix: move freeModelRateLimitMiddleware before authMiddleware
iscekic Mar 3, 2026
f9ff2a1
feat: add generation endpoint refetch for accurate cost/token data
iscekic Mar 3, 2026
b0d82a2
feat: add KiloPass threshold check and bonus credit issuance
iscekic Mar 3, 2026
8879ea1
feat: add PostHog first_usage and first_microdollar_usage event tracking
iscekic Mar 3, 2026
0ab25db
fix(llm-gateway): match invalid JSON error shape to reference
iscekic Mar 3, 2026
9363967
fix(llm-gateway): match 402 balance error title/message to reference
iscekic Mar 3, 2026
3ef5600
fix(llm-gateway): add Sentry error observability
iscekic Mar 3, 2026
2964a89
fix(llm-gateway): return 404 for missing/empty model to match reference
iscekic Mar 3, 2026
cce57ac
fix(llm-gateway): fix all pre-existing test failures (169/169 passing)
iscekic Mar 3, 2026
8b2a8bf
fix(llm-gateway): return 400 invalid-path for sub-routes under /api/g…
iscekic Mar 3, 2026
5bc3dec
fix(llm-gateway): use distinct error/message in model-not-allowed res…
iscekic Mar 3, 2026
a440305
fix(llm-gateway): include first-topup bonus amount in 402 message for…
iscekic Mar 3, 2026
c4ff5be
fix(llm-gateway): add context-length exceeded error translation for K…
iscekic Mar 3, 2026
4ce8d26
fix(llm-gateway): add stealth model error handling in makeErrorReadable
iscekic Mar 3, 2026
8897f4c
feat(llm-gateway): add Vercel AI Gateway A/B routing
iscekic Mar 3, 2026
0ec7518
fix(B1): emit background tasks for 402 upstream responses
iscekic Mar 3, 2026
dd56377
fix(B2): emit accounting and logging for free model responses
iscekic Mar 3, 2026
e1d1fc3
fix(B3): use original model id as requestedModel in API metrics for a…
iscekic Mar 3, 2026
5480ac6
fix(B4): normalize resolvedModel in API metrics to strip :free/:exact…
iscekic Mar 3, 2026
e9d7c37
fix(B5): await free_model_usage DB insert before upstream request
iscekic Mar 3, 2026
397173e
fix(B8): await POSTHOG_API_KEY fetch to eliminate race condition
iscekic Mar 3, 2026
6b54ee4
fix(B9): default has_middle_out_transform to false instead of null
iscekic Mar 3, 2026
da262c6
fix(B10): exclude KiloPass credits from paid top-up check
iscekic Mar 3, 2026
8592163
fix: resolve typecheck errors for scheduler stub in test files
iscekic Mar 3, 2026
cddfbae
chore: use dedicated Sentry project for llm-gateway worker
iscekic Mar 3, 2026
614af83
chore: remove tracesSampleRate from Sentry config
iscekic Mar 3, 2026
4086045
chore: simplify deploy script to single env-less command
iscekic Mar 3, 2026
faf9a50
test(vitest): run integration tests and add PostHog test key
iscekic Mar 3, 2026
406ce37
fix: resolve eslint errors in llm-gateway
iscekic Mar 3, 2026
80fd849
test(llm-gateway): add request integration tests
iscekic Mar 3, 2026
f633a6b
test(llm-gateway): tighten typings and mocks in integration tests
iscekic Mar 3, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .claude/worktrees/llm-gateway-fixes
Submodule llm-gateway-fixes added at d6dc4f
1 change: 0 additions & 1 deletion cloudflare-ai-attribution/src/ai-attribution.worker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ export type HonoContext = {

const app = new Hono<HonoContext>();

// @ts-expect-error workers-tagged-logger returns Handler typed against an older hono; incompatible with hono 4.12+
app.use('*', useWorkersLogger('ai-attribution'));

// Health check endpoint (no auth required)
Expand Down
30 changes: 1 addition & 29 deletions cloudflare-o11y/src/api-metrics-routes.ts
Original file line number Diff line number Diff line change
@@ -1,36 +1,8 @@
import type { Hono } from 'hono';
import { z } from 'zod';
import { zodJsonValidator } from '@kilocode/worker-utils';
import { zodJsonValidator, ApiMetricsParamsSchema } from '@kilocode/worker-utils';
import { writeApiMetricsDataPoint } from './o11y-analytics';
import { requireAdmin } from './admin-middleware';

export const ApiMetricsParamsSchema = z.object({
kiloUserId: z.string().min(1),
organizationId: z.string().min(1).optional(),
isAnonymous: z.boolean(),
isStreaming: z.boolean(),
userByok: z.boolean(),
mode: z.string().min(1).optional(),
provider: z.string().min(1),
inferenceProvider: z.string().optional().default(''),
requestedModel: z.string().min(1),
resolvedModel: z.string().min(1),
toolsAvailable: z.array(z.string().min(1)),
toolsUsed: z.array(z.string().min(1)),
ttfbMs: z.number().int().nonnegative(),
completeRequestMs: z.number().int().nonnegative(),
statusCode: z.number().int().min(100).max(599),
tokens: z
.object({
inputTokens: z.number().int().nonnegative().optional(),
outputTokens: z.number().int().nonnegative().optional(),
cacheWriteTokens: z.number().int().nonnegative().optional(),
cacheHitTokens: z.number().int().nonnegative().optional(),
totalTokens: z.number().int().nonnegative().optional(),
})
.optional(),
});

export function registerApiMetricsRoutes(app: Hono<{ Bindings: Env }>): void {
app.post('/ingest/api-metrics', requireAdmin, zodJsonValidator(ApiMetricsParamsSchema), async (c) => {
const params = c.req.valid('json');
Expand Down
11 changes: 9 additions & 2 deletions cloudflare-o11y/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import { WorkerEntrypoint } from 'cloudflare:workers';
import { Hono } from 'hono';
import { ApiMetricsParamsSchema, SessionMetricsParamsSchema } from '@kilocode/worker-utils';
import type { ApiMetricsParams, SessionMetricsParams } from '@kilocode/worker-utils';
import { registerApiMetricsRoutes } from './api-metrics-routes';
import { evaluateAlerts } from './alerting/evaluate';
import { registerAlertingConfigRoutes } from './alerting/config-routes';
import { SessionMetricsParamsSchema } from './session-metrics-schema';
import type { SessionMetricsParams } from './session-metrics-schema';
import { writeSessionMetricsDataPoint } from './session-metrics-analytics';
import { writeApiMetricsDataPoint } from './o11y-analytics';

export { AlertConfigDO } from './alerting/AlertConfigDO';

Expand All @@ -28,4 +29,10 @@ export default class extends WorkerEntrypoint<Env> {
const parsed = SessionMetricsParamsSchema.parse(params);
await writeSessionMetricsDataPoint(parsed, this.env);
}

/** RPC method called by llm-gateway via service binding. */
async ingestApiMetrics(params: ApiMetricsParams): Promise<void> {
const parsed = ApiMetricsParamsSchema.parse(params);
writeApiMetricsDataPoint(parsed, 'kilo-gateway', this.env, (p) => this.ctx.waitUntil(p));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[WARNING]: writeApiMetricsDataPoint is not awaited, unlike the sibling ingestSessionMetrics method (line 30) which awaits writeSessionMetricsDataPoint.

writeApiMetricsDataPoint is synchronous for the Analytics Engine write but calls waitUntil for the Stream send. Since this is an RPC method called via service binding, the caller (sendApiMetrics in api-metrics.ts) awaits the result. The method will return before the Stream send completes, which is likely fine since waitUntil extends the execution context. However, the inconsistency with ingestSessionMetrics is worth noting — if writeApiMetricsDataPoint ever becomes async (e.g., for error handling), the missing await would silently swallow errors.

}
}
5 changes: 1 addition & 4 deletions cloudflare-o11y/src/o11y-analytics.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
import type { z } from 'zod';
import type { ApiMetricsParamsSchema } from './api-metrics-routes';

type ApiMetricsParams = z.infer<typeof ApiMetricsParamsSchema>;
import type { ApiMetricsParamsParsed as ApiMetricsParams } from '@kilocode/worker-utils';

/**
* Write an API metrics data point to Analytics Engine for alerting queries,
Expand Down
2 changes: 1 addition & 1 deletion cloudflare-o11y/src/session-metrics-analytics.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import type { SessionMetricsParams } from './session-metrics-schema';
import type { SessionMetricsParamsParsed as SessionMetricsParams } from '@kilocode/worker-utils';

/**
* Write a session metrics data point to Analytics Engine,
Expand Down
43 changes: 0 additions & 43 deletions cloudflare-o11y/src/session-metrics-schema.ts

This file was deleted.

2 changes: 2 additions & 0 deletions cloudflare-session-ingest/src/env.ts
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
import type { O11YBinding } from './o11y-binding';

export type Env = Omit<Cloudflare.Env, 'O11Y'> & { O11Y: O11YBinding };
40 changes: 3 additions & 37 deletions cloudflare-session-ingest/src/middleware/kilo-jwt-auth.ts
Original file line number Diff line number Diff line change
@@ -1,46 +1,12 @@
import { createMiddleware } from 'hono/factory';
import { verifyKiloToken, extractBearerToken } from '@kilocode/worker-utils';
import { eq } from 'drizzle-orm';
import { verifyKiloToken, extractBearerToken, userExistsWithCache } from '@kilocode/worker-utils';
import { getWorkerDb } from '@kilocode/db/client';
import { kilocode_users } from '@kilocode/db/schema';

import type { Env } from '../env';

const USER_EXISTS_TTL_SECONDS = 24 * 60 * 60; // 24h
const USER_NOT_FOUND_TTL_SECONDS = 5 * 60; // 5m

/**
* Check whether a user exists, using KV as a cache in front of Postgres.
* Positive results are cached for 24h. Negative results are cached for 5m
* to rate-limit DB hits from deleted/nonexistent users with valid tokens.
*/
async function userExists(env: Env, userId: string): Promise<boolean> {
const cacheKey = `user-exists:${userId}`;

const cached = await env.USER_EXISTS_CACHE.get(cacheKey);
if (cached === '1') {
return true;
}
if (cached === '0') {
return false;
}

function userExists(env: Env, userId: string): Promise<boolean> {
const db = getWorkerDb(env.HYPERDRIVE.connectionString);
const rows = await db
.select({ id: kilocode_users.id })
.from(kilocode_users)
.where(eq(kilocode_users.id, userId))
.limit(1);

const row = rows[0];

if (!row) {
void env.USER_EXISTS_CACHE.put(cacheKey, '0', { expirationTtl: USER_NOT_FOUND_TTL_SECONDS });
return false;
}

void env.USER_EXISTS_CACHE.put(cacheKey, '1', { expirationTtl: USER_EXISTS_TTL_SECONDS });
return true;
return userExistsWithCache(env.USER_EXISTS_CACHE, db, userId);
}

export const kiloJwtAuthMiddleware = createMiddleware<{
Expand Down
42 changes: 3 additions & 39 deletions cloudflare-session-ingest/src/o11y-binding.d.ts
Original file line number Diff line number Diff line change
@@ -1,41 +1,5 @@
/**
* Augment the wrangler-generated Env to give the O11Y service binding its RPC
* method types. `wrangler types` only sees `Fetcher` for service bindings;
* the actual RPC shape comes from the o11y worker's WorkerEntrypoint and is
* declared here so the generated file can be freely regenerated.
*
* Keep in sync with: cloudflare-o11y/src/session-metrics-schema.ts
*/
import type { SessionMetricsParams } from '@kilocode/worker-utils';

type O11YSessionMetricsParams = {
kiloUserId: string;
organizationId?: string;
sessionId: string;
platform: string;
sessionDurationMs: number;
timeToFirstResponseMs?: number;
totalTurns: number;
totalSteps: number;
toolCallsByType: Record<string, number>;
toolErrorsByType: Record<string, number>;
totalErrors: number;
errorsByType: Record<string, number>;
stuckToolCallCount: number;
totalTokens: {
input: number;
output: number;
reasoning: number;
cacheRead: number;
cacheWrite: number;
};
totalCost: number;
compactionCount: number;
autoCompactionCount: number;
terminationReason: 'completed' | 'error' | 'interrupted' | 'abandoned' | 'unknown';
model?: string;
ingestVersion: number;
};

type O11YBinding = Fetcher & {
ingestSessionMetrics(params: O11YSessionMetricsParams): Promise<void>;
export type O11YBinding = Fetcher & {
ingestSessionMetrics(params: SessionMetricsParams): Promise<void>;
};
1 change: 0 additions & 1 deletion cloudflare-webhook-agent-ingest/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ export type HonoContext = {

const app = new Hono<HonoContext>();

// @ts-expect-error workers-tagged-logger returns Handler typed against an older hono; incompatible with hono 4.12+
app.use('*', useWorkersLogger('webhook-agent'));

app.get('/health', c => {
Expand Down
16 changes: 16 additions & 0 deletions llm-gateway/eslint.config.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import { dirname } from 'path';
import { fileURLToPath } from 'url';
import { defineConfig } from 'eslint/config';
import baseConfig from '@kilocode/eslint-config';

const __dirname = dirname(fileURLToPath(import.meta.url));

export default defineConfig([
...baseConfig(__dirname),
{
files: ['**/*.ts'],
rules: {
'@typescript-eslint/restrict-template-expressions': 'off',
},
},
]);
51 changes: 51 additions & 0 deletions llm-gateway/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
{
"name": "llm-gateway",
"version": "1.0.0",
"type": "module",
"private": true,
"description": "LLM Gateway Cloudflare Worker — transparent drop-in replacement for /api/openrouter",
"scripts": {
"preinstall": "npx only-allow pnpm",
"deploy": "wrangler deploy",
"dev": "wrangler dev",
"start": "wrangler dev",
"types": "wrangler types",
"lint": "eslint --config eslint.config.mjs --cache 'src/**/*.ts'",
"lint:fix": "eslint --config eslint.config.mjs --cache --fix 'src/**/*.ts'",
"format": "prettier --write 'src/**/*.ts'",
"format:check": "prettier --check 'src/**/*.ts'",
"test": "vitest run",
"test:watch": "vitest",
"test:integration": "vitest run --config vitest.workers.config.ts",
"test:integration:watch": "vitest --config vitest.workers.config.ts",
"typecheck": "tsgo --noEmit --incremental false"
},
"dependencies": {
"@sentry/cloudflare": "^10.25.0",
"@ai-sdk/anthropic": "^3.0.41",
"@ai-sdk/openai": "^3.0.27",
"@kilocode/db": "workspace:*",
"@kilocode/encryption": "workspace:*",
"@kilocode/worker-utils": "workspace:*",
"ai": "^6.0.78",
"drizzle-orm": "catalog:",
"eventsource-parser": "^3.0.6",
"hono": "catalog:",
"workers-tagged-logger": "catalog:",
"zod": "catalog:"
},
"devDependencies": {
"@cloudflare/vitest-pool-workers": "^0.12.8",
"jose": "catalog:",
"@kilocode/eslint-config": "workspace:*",
"@types/node": "^22",
"@typescript/native-preview": "7.0.0-dev.20251019.1",
"@vitest/ui": "^3.2.4",
"drizzle-kit": "catalog:",
"eslint": "catalog:",
"prettier": "catalog:",
"typescript": "catalog:",
"vitest": "^3.2.4",
"wrangler": "catalog:"
}
}
Loading
Loading