Kilo-Org · lambertjosh · Jun 3, 2026 · Jun 5, 2026 · Jun 5, 2026
diff --git a/apps/web/src/lib/ai-gateway/providers/openrouter/index.ts b/apps/web/src/lib/ai-gateway/providers/openrouter/index.ts
@@ -24,6 +24,7 @@ import { ATTRIBUTION_HEADERS } from '@/lib/ai-gateway/providers/openrouter/attri
 import { getOpenRouterModelsMetadata } from '@/lib/ai-gateway/providers/gateway-models-cache';
 import { getPreferredProviderOrder } from '@/lib/ai-gateway/providers/apply-provider-specific-logic';
 import { normalizeInferenceProviderId } from '@/lib/ai-gateway/providers/openrouter/inference-provider-id';
+import { getTerminalBenchSummaries, terminalBenchFor } from '@/lib/model-stats/terminal-bench';
 import { isFreeNemotronModel, NVIDIA_TRIAL_TOS } from '@/lib/ai-gateway/providers/nvidia';
 
 // Re-export from shared module for backwards compatibility
@@ -119,6 +120,7 @@ export function shouldSuppressOpenRouterModel(model: KiloExclusiveModel): boolea
 async function enhancedModelList(models: OpenRouterModel[]) {
   const autoModels = buildAutoModels();
   const endpointsMetadata = await getOpenRouterModelsMetadata();
+  const summaries = await getTerminalBenchSummaries();
   const enhancedModels = await Promise.all(
     models
       .filter(
@@ -139,7 +141,12 @@ async function enhancedModelList(models: OpenRouterModel[]) {
                 normalizeInferenceProviderId(preferredProvider)
             )?.pricing);
         const pricing = rawPricing ? undoPricingDiscount(rawPricing) : rawPricing;
-        return pricing ? { ...model, pricing } : model;
+        const terminalBench = terminalBenchFor(summaries, model.id);
+        return {
+          ...model,
+          ...(pricing && { pricing }),
+          ...(terminalBench && { terminalBench }),
+        };
       })
       .concat(
         kiloExclusiveModels

diff --git a/apps/web/src/lib/model-stats/terminal-bench.test.ts b/apps/web/src/lib/model-stats/terminal-bench.test.ts
@@ -0,0 +1,70 @@
+import { describe, expect, it } from '@jest/globals';
+import { summarizeTerminalBench, terminalBenchFor } from './terminal-bench';
+
+const summary = { overallScore: 0.551, avgAttemptCostUsd: 53.37 };
+
+function benchmarks(
+  overrides: Partial<{ nAttempts: number | null; avgAttemptCostUsd: number | null }> = {}
+) {
+  return {
+    kiloBench: {
+      overallScore: 0.4,
+      evals: {
+        'terminal-bench': {
+          taskSource: 'terminal-bench',
+          overallScore: summary.overallScore,
+          totalScore: 2.755,
+          avgCostUsd: 1,
+          avgInputTokens: 1,
+          avgOutputTokens: 1,
+          avgCacheReadTokens: 1,
+          avgExecutionMs: 1,
+          nTotalTrials: 5,
+          nAttempts: 5,
+          avgAttemptCostUsd: summary.avgAttemptCostUsd,
+          avgAttemptInputTokens: 1,
+          avgAttemptOutputTokens: 1,
+          avgAttemptCacheReadTokens: 1,
+          nErrored: 0,
+          lastPromotedAt: '2026-06-03T00:00:00.000Z',
+          ...overrides,
+        },
+      },
+    },
+  };
+}
+
+function row(overrides: Partial<Parameters<typeof summarizeTerminalBench>[0][number]> = {}) {
+  return {
+    openrouterId: 'openai/model',
+    isActive: true,
+    isStealth: false,
+    benchmarks: benchmarks(),
+    ...overrides,
+  };
+}
+
+describe('summarizeTerminalBench', () => {
+  it('publishes only eligible summaries', () => {
+    const summaries = summarizeTerminalBench([
+      row(),
+      row({ isActive: false }),
+      row({ isStealth: true }),
+      row({ benchmarks: benchmarks({ nAttempts: 4 }) }),
+      row({ benchmarks: benchmarks({ avgAttemptCostUsd: null }) }),
+      row({ benchmarks: { kiloBench: { overallScore: 0.4, evals: {} } } }),
+      row({ benchmarks: { kiloBench: { overallScore: 'invalid' } } }),
+    ]);
+
+    expect(summaries).toEqual(new Map([['openai/model', summary]]));
+  });
+});
+
+describe('terminalBenchFor', () => {
+  it('matches only safe canonical IDs', () => {
+    const summaries = new Map([['openai/model', summary]]);
+
+    expect(terminalBenchFor(summaries, 'kilo/openai/model')).toEqual(summary);
+    expect(terminalBenchFor(summaries, 'kilo/special-model')).toBeUndefined();
+  });
+});
diff --git a/apps/web/src/lib/model-stats/terminal-bench.ts b/apps/web/src/lib/model-stats/terminal-bench.ts
@@ -0,0 +1,83 @@
+import { createCachedFetch } from '@/lib/cached-fetch';
+import { readDb } from '@/lib/drizzle';
+import { ModelStatsBenchmarksSchema, modelStats } from '@kilocode/db/schema';
+import { unprefixKiloGatewayModelId } from '@kilocode/worker-utils/kilo-model-id';
+import { and, eq } from 'drizzle-orm';
+
+const TTL = process.env.NODE_ENV === 'test' ? 0 : 5 * 60 * 1000;
+
+export type TerminalBenchSummary = {
+  overallScore: number;
+  avgAttemptCostUsd: number;
+};
+
+export type TerminalBenchSummaries = ReadonlyMap<string, TerminalBenchSummary>;
+
+type Row = {
+  openrouterId: string;
+  isActive: boolean | null;
+  isStealth: boolean;
+  benchmarks: unknown;
+};
+
+export function summarizeTerminalBench(rows: readonly Row[]): TerminalBenchSummaries {
+  const summaries = new Map<string, TerminalBenchSummary>();
+
+  for (const row of rows) {
+    if (!row.isActive || row.isStealth) continue;
+    const result = ModelStatsBenchmarksSchema.safeParse(row.benchmarks);
+    if (!result.success) continue;
+    const bench = result.data?.kiloBench?.evals['terminal-bench'];
+    if (
+      !bench ||
+      (bench.nAttempts ?? 0) < 5 ||
+      bench.avgAttemptCostUsd === null ||
+      bench.avgAttemptCostUsd === undefined
+    ) {
+      continue;
+    }
+    summaries.set(row.openrouterId, {
+      overallScore: bench.overallScore,
+      avgAttemptCostUsd: bench.avgAttemptCostUsd,
+    });
+  }
+
+  return summaries;
+}
+
+export function terminalBenchFor(
+  summaries: TerminalBenchSummaries,
+  id: string
+): TerminalBenchSummary | undefined {
+  const exact = summaries.get(id);
+  if (exact) return exact;
+  const unprefixed = unprefixKiloGatewayModelId(id);
+  return unprefixed ? summaries.get(unprefixed) : undefined;
+}
+
+async function loadTerminalBench(): Promise<TerminalBenchSummaries> {
+  const rows = await readDb
+    .select({
+      openrouterId: modelStats.openrouterId,
+      isActive: modelStats.isActive,
+      isStealth: modelStats.isStealth,
+      benchmarks: modelStats.benchmarks,
+    })
+    .from(modelStats)
+    .where(and(eq(modelStats.isActive, true), eq(modelStats.isStealth, false)));
+  return summarizeTerminalBench(rows);
+}
+
+function createTerminalBenchFetch(load = loadTerminalBench) {
+  return createCachedFetch(
+    () =>
+      load().catch(err => {
+        console.error('[terminal-bench] Failed to load model summaries:', err);
+        throw err;
+      }),
+    TTL,
+    new Map<string, TerminalBenchSummary>()
+  );
+}
+
+export const getTerminalBenchSummaries = createTerminalBenchFetch();
diff --git a/apps/web/src/lib/organizations/organization-types.ts b/apps/web/src/lib/organizations/organization-types.ts
@@ -199,6 +199,12 @@ const OpenRouterModelSchema = z.object({
   // kilocode additions:
   preferredIndex: z.number().optional(),
   isFree: z.boolean().optional(),
+  terminalBench: z
+    .object({
+      overallScore: z.number(),
+      avgAttemptCostUsd: z.number(),
+    })
+    .optional(),
   opencode: OpenCodeSettingsSchema.optional(),
 
   id: z.string(),

diff --git a/apps/web/src/tests/openrouter-models.test.ts b/apps/web/src/tests/openrouter-models.test.ts
@@ -1,15 +1,22 @@
-import { test, expect, describe, afterEach, jest, beforeEach } from '@jest/globals';
+import { test, expect, describe, afterEach, beforeEach } from '@jest/globals';
 import { mockOpenRouterModels, createMockResponse } from './helpers/openrouter-models.helper';
 import { GET } from '../app/api/openrouter/models/route';
 import { NextRequest } from 'next/server';
 
 jest.mock('@/lib/user/server', () => ({
-  getUserByAuthorizationHeader: jest.fn().mockImplementation(async () => ({
+  getUserFromAuth: jest.fn(async () => ({
     user: { id: 'test-user-id' },
-    authFailedResponse: null,
+    organizationId: null,
   })),
 }));
 
+jest.mock('@/lib/model-stats/terminal-bench', () => ({
+  getTerminalBenchSummaries: jest.fn(
+    async () => new Map([['some-other-model', { overallScore: 0.551, avgAttemptCostUsd: 53.37 }]])
+  ),
+  terminalBenchFor: jest.fn((summaries: Map<string, unknown>, id: string) => summaries.get(id)),
+}));
+
 function createTestRequest(path: string) {
   return new NextRequest(new URL(path, 'http://localhost:3000'), {
     method: 'GET',
@@ -18,8 +25,7 @@ function createTestRequest(path: string) {
 
 describe('GET /api/openrouter/models', () => {
   beforeEach(() => {
-    // Reset all mocks before each test
-    jest.resetAllMocks();
+    jest.clearAllMocks();
   });
 
   test('should handle OpenRouter API errors', async () => {
@@ -86,6 +92,28 @@ describe('GET /api/openrouter/models', () => {
     expect(responseData.data).toBeDefined();
     expect(Array.isArray(responseData.data)).toBe(true);
   });
+
+  test('should include publishable Terminal Bench summaries for canonical models', async () => {
+    const request = createTestRequest('/api/openrouter/models');
+
+    global.fetch = jest.fn(() => {
+      return Promise.resolve(
+        createMockResponse({
+          ok: true,
+          status: 200,
+          statusText: 'OK',
+          jsonData: mockOpenRouterModels,
+        })
+      );
+    }) as unknown as typeof fetch;
+
+    const response = await GET(request);
+    const responseData = await response.json();
+    const model = responseData.data.find((item: { id: string }) => item.id === 'some-other-model');
+
+    expect(response.status).toBe(200);
+    expect(model.terminalBench).toEqual({ overallScore: 0.551, avgAttemptCostUsd: 53.37 });
+  });
 });
 
 afterEach(() => {