diff --git a/packages/opencode/src/altimate/telemetry/index.ts b/packages/opencode/src/altimate/telemetry/index.ts index b13837597..b2b0e7103 100644 --- a/packages/opencode/src/altimate/telemetry/index.ts +++ b/packages/opencode/src/altimate/telemetry/index.ts @@ -86,10 +86,31 @@ export namespace Telemetry { duration_ms: number // Flat token fields — only present when data is available from the provider. // No nested objects: Azure App Insights custom measures must be top-level numbers. + // + // SEMANTICS (read this before writing dashboard queries): + // tokens_input = UNCACHED input tokens. Equal to 0 on a full cache hit. + // Normalized across providers: Anthropic (and Bedrock + // Anthropic) return this directly; non-Anthropic + // providers return the inclusive total and Session.getUsage + // subtracts cache_read (and cache_write where present) + // to derive the uncached portion. cache_write in + // particular is only populated for Anthropic / Bedrock / + // Venice metadata paths — OpenAI / OpenRouter don't + // surface a "cache write" concept today, so the + // subtraction there is a no-op. + // tokens_input_total = INCLUSIVE input tokens (uncached + cache_read + + // cache_write). This is what most cost/volume queries + // actually want. Always present (since 2026-05-22). + // tokens_cache_read = subset of tokens_input_total served from prompt cache. + // tokens_cache_write = subset of tokens_input_total committed to prompt cache. + // Invariant: tokens_input + tokens_cache_read + tokens_cache_write == tokens_input_total. tokens_input: number tokens_output: number - // altimate_change start — total input tokens including cached (for providers like Anthropic that exclude cache from tokens_input) - tokens_input_total?: number + // altimate_change start — total input tokens including cached. Always emitted + // as of 2026-05-22 (previously conditional, which made dashboard queries that + // assumed presence return null for non-cache-using providers — including the + // false-positive "tokens_input=0 broken" finding in telemetry-2026-05-21). + tokens_input_total: number // altimate_change end tokens_reasoning?: number // only for reasoning models tokens_cache_read?: number // only when a cached prompt was reused diff --git a/packages/opencode/src/session/index.ts b/packages/opencode/src/session/index.ts index 74565cf52..f2be3e10a 100644 --- a/packages/opencode/src/session/index.ts +++ b/packages/opencode/src/session/index.ts @@ -821,9 +821,19 @@ export namespace Session { // Anthropic does it differently though - inputTokens doesn't include cached tokens. // It looks like Altimate Code's cost calculation assumes all providers return inputTokens the same way Anthropic does (I'm guessing getUsage logic was originally implemented with anthropic), so it's causing incorrect cost calculation for OpenRouter and others. const excludesCachedTokens = !!(input.metadata?.["anthropic"] || input.metadata?.["bedrock"]) - const adjustedInputTokens = safe( - excludesCachedTokens ? inputTokens : inputTokens - cacheReadInputTokens - cacheWriteInputTokens, + // altimate_change start — clamp at zero so inconsistent provider counts + // (inputTokens < cachedInputTokens) don't produce negative cost. + // Without the Math.max(0, ...), `tokens.input * costInfo.input` becomes + // negative and the per-message cost is then negative — leaks into the + // session total and Azure App Insights dashboards as a negative-cost + // outlier. Clamping at zero preserves the `input + cache.read + + // cache.write === inputTotal` invariant (just at the clamped value) + // and keeps cost monotonically non-negative. + const adjustedInputTokens = Math.max( + 0, + safe(excludesCachedTokens ? inputTokens : inputTokens - cacheReadInputTokens - cacheWriteInputTokens), ) + // altimate_change end const total = iife(() => { // Anthropic doesn't provide total_tokens, also ai sdk will vastly undercount if we diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts index 0d2f45bab..98409b1a9 100644 --- a/packages/opencode/src/session/processor.ts +++ b/packages/opencode/src/session/processor.ts @@ -327,10 +327,15 @@ export namespace SessionProcessor { duration_ms: Date.now() - stepStartTime, tokens_input: usage.tokens.input, tokens_output: usage.tokens.output, - // altimate_change start — include total input tokens (with cache) when they differ from tokens_input - ...(usage.tokens.inputTotal !== usage.tokens.input && { - tokens_input_total: usage.tokens.inputTotal, - }), + // altimate_change start — always emit tokens_input_total so dashboard + // queries can rely on it without null-handling. Pre-2026-05-22 this + // was conditional on `inputTotal !== input` to save 12 bytes per event, + // but the absent field looked like a bug in queries that didn't know + // to coalesce — the false-positive "Anthropic tokens_input=0 broken" + // finding in telemetry-2026-05-21 was driven by this. See the comment + // block on the `generation` event type in telemetry/index.ts for the + // canonical semantics. Cost: ~12 bytes × generations/day, negligible. + tokens_input_total: usage.tokens.inputTotal, // altimate_change end ...(value.usage.reasoningTokens !== undefined && { tokens_reasoning: usage.tokens.reasoning }), ...(value.usage.cachedInputTokens !== undefined && { tokens_cache_read: usage.tokens.cache.read }), diff --git a/packages/opencode/test/session/processor.test.ts b/packages/opencode/test/session/processor.test.ts index e933032fb..1e7dc51b8 100644 --- a/packages/opencode/test/session/processor.test.ts +++ b/packages/opencode/test/session/processor.test.ts @@ -600,6 +600,7 @@ describe("generation telemetry", () => { agent: "builder", finish_reason: "end_turn", tokens_input: 1000, + tokens_input_total: 1900, // input + cache.read + cache.write tokens_output: 500, tokens_reasoning: 200, tokens_cache_read: 800, diff --git a/packages/opencode/test/session/session-getusage.test.ts b/packages/opencode/test/session/session-getusage.test.ts new file mode 100644 index 000000000..26fe7be8d --- /dev/null +++ b/packages/opencode/test/session/session-getusage.test.ts @@ -0,0 +1,248 @@ +import { describe, test, expect } from "bun:test" +import { Session } from "../../src/session" + +/** + * Tests for Session.getUsage — pins the cross-provider token normalization. + * + * Background: telemetry-2026-05-21 flagged "Anthropic tokens_input=0 broken" + * across 54k Sonnet generations. Investigation showed it was a measurement + * artifact: `tokens_input` is uncached-only by design (normalized across + * providers), and full cache hits legitimately produce `tokens_input=0`. The + * dashboard query should use `tokens_input_total` for inclusive volume. + * + * To make that semantics unambiguous, this PR (a) always emits + * `tokens_input_total` and (b) tightens its type to `number` (was optional). + * These tests pin the contract so future regressions are caught early. + */ + +function fakeModel(npm = "@ai-sdk/anthropic"): any { + return { + id: "test-model", + providerID: "test", + api: { npm }, + cost: {}, + } +} + +describe("Session.getUsage — Anthropic-style (uncached input)", () => { + test("full cache hit produces tokens.input=0 and matching tokens.inputTotal", () => { + const result = Session.getUsage({ + model: fakeModel("@ai-sdk/anthropic"), + usage: { + inputTokens: 0, + outputTokens: 200, + cachedInputTokens: 8000, + } as any, + metadata: { anthropic: { cacheCreationInputTokens: 0 } } as any, + }) + + expect(result.tokens.input).toBe(0) + // inputTotal must include the cached read so dashboard queries are accurate. + expect(result.tokens.inputTotal).toBe(8000) + expect(result.tokens.cache.read).toBe(8000) + expect(result.tokens.output).toBe(200) + }) + + test("partial cache hit sums uncached + cached for inputTotal", () => { + const result = Session.getUsage({ + model: fakeModel("@ai-sdk/anthropic"), + usage: { + inputTokens: 1500, // uncached + outputTokens: 100, + cachedInputTokens: 6000, // cache read + } as any, + metadata: { anthropic: { cacheCreationInputTokens: 2000 } } as any, // cache write + }) + + expect(result.tokens.input).toBe(1500) + expect(result.tokens.inputTotal).toBe(1500 + 6000 + 2000) + expect(result.tokens.cache.read).toBe(6000) + expect(result.tokens.cache.write).toBe(2000) + }) + + test("no cache at all leaves inputTotal === input", () => { + const result = Session.getUsage({ + model: fakeModel("@ai-sdk/anthropic"), + usage: { inputTokens: 5000, outputTokens: 50 } as any, + metadata: { anthropic: {} } as any, + }) + + expect(result.tokens.input).toBe(5000) + expect(result.tokens.inputTotal).toBe(5000) + expect(result.tokens.cache.read).toBe(0) + expect(result.tokens.cache.write).toBe(0) + }) +}) + +describe("Session.getUsage — OpenAI-style (inclusive input)", () => { + test("subtracts cached tokens from inputTokens to derive uncached input", () => { + // OpenAI/OpenRouter return inputTokens as the inclusive total. + // tokens.input should be uncached only (parity with Anthropic). + const result = Session.getUsage({ + model: fakeModel("@ai-sdk/openai"), + usage: { + inputTokens: 7500, // inclusive total + outputTokens: 100, + cachedInputTokens: 6000, // subset + } as any, + metadata: {} as any, + }) + + expect(result.tokens.input).toBe(1500) // 7500 - 6000 + expect(result.tokens.inputTotal).toBe(7500) + expect(result.tokens.cache.read).toBe(6000) + }) + + test("no cache: input === inputTokens === inputTotal", () => { + const result = Session.getUsage({ + model: fakeModel("@ai-sdk/openai"), + usage: { inputTokens: 4000, outputTokens: 200 } as any, + metadata: {} as any, + }) + + expect(result.tokens.input).toBe(4000) + expect(result.tokens.inputTotal).toBe(4000) + }) + + test("OpenAI cache_write is always 0 (provider doesn't expose the concept)", () => { + // Doc-block claim: "OpenAI / OpenRouter don't surface a cache_write concept". + // Verify the subtraction is a no-op rather than producing wrong numbers. + const result = Session.getUsage({ + model: fakeModel("@ai-sdk/openai"), + usage: { + inputTokens: 5000, + outputTokens: 100, + cachedInputTokens: 2000, + } as any, + metadata: {} as any, // no anthropic / bedrock / venice metadata + }) + + expect(result.tokens.cache.write).toBe(0) + expect(result.tokens.input).toBe(3000) // 5000 - 2000 - 0 + expect(result.tokens.inputTotal).toBe(5000) + }) +}) + +describe("Session.getUsage — provider edge cases", () => { + test("@ai-sdk/google-vertex/anthropic uses Anthropic-style accounting (per total branch)", () => { + // The `total` computation at session/index.ts:828 includes this NPM as + // Anthropic-shaped. If `metadata.anthropic` is present (which the Vertex + // adapter does set for Claude calls), the excludesCachedTokens check + // routes through the Anthropic branch. + const result = Session.getUsage({ + model: fakeModel("@ai-sdk/google-vertex/anthropic"), + usage: { + inputTokens: 1000, // uncached only + outputTokens: 50, + cachedInputTokens: 4000, + } as any, + metadata: { anthropic: { cacheCreationInputTokens: 500 } } as any, + }) + + expect(result.tokens.input).toBe(1000) + expect(result.tokens.inputTotal).toBe(1000 + 4000 + 500) + expect(result.tokens.cache.read).toBe(4000) + expect(result.tokens.cache.write).toBe(500) + }) + + test("Venice surfaces cacheCreationInputTokens via metadata.venice.usage", () => { + // Venice's cache_write lives at metadata.venice.usage.cacheCreationInputTokens — + // the third metadata path the reader at session/index.ts:815 supports. + // Venice uses OpenAI-style inclusive inputTokens (not the Anthropic + // exclusive style), so the subtraction branch should run. + const result = Session.getUsage({ + model: fakeModel("@ai-sdk/openai"), // Venice exposes an OpenAI-compatible surface + usage: { + inputTokens: 5000, // inclusive + outputTokens: 80, + cachedInputTokens: 1500, + } as any, + metadata: { venice: { usage: { cacheCreationInputTokens: 400 } } } as any, + }) + + expect(result.tokens.cache.write).toBe(400) + expect(result.tokens.cache.read).toBe(1500) + expect(result.tokens.input).toBe(5000 - 1500 - 400) // 3100 + expect(result.tokens.inputTotal).toBe(5000) + }) + + test("Bedrock surfaces cacheWriteInputTokens via metadata.bedrock.usage", () => { + // Bedrock's cache_write lives at a different metadata path than Anthropic's. + // Pin that the reader at session/index.ts:813 picks it up. + const result = Session.getUsage({ + model: fakeModel("@ai-sdk/amazon-bedrock"), + usage: { + inputTokens: 800, + outputTokens: 40, + cachedInputTokens: 2000, + } as any, + metadata: { bedrock: { usage: { cacheWriteInputTokens: 600 } } } as any, + }) + + expect(result.tokens.input).toBe(800) + expect(result.tokens.cache.read).toBe(2000) + expect(result.tokens.cache.write).toBe(600) + expect(result.tokens.inputTotal).toBe(800 + 2000 + 600) + }) + + test("tokens.input is clamped to zero on inconsistent provider counts (no negative cost)", () => { + // Hypothetical: OpenAI returns inputTokens=1000 but cachedInputTokens=2000 + // (inconsistent — should never happen, but providers occasionally surface + // weird numbers). Without clamping, tokens.input would be -1000 and + // cost = tokens.input × costInfo.input would be negative, leaking a + // negative-cost outlier into telemetry and session totals. + // + // The fix (Math.max(0, ...) around adjustedInputTokens) clamps input to + // zero so cost stays non-negative. The invariant `input + cache.read + + // cache.write === inputTotal` still holds — just at the clamped value + // (0 + 2000 + 0 = 2000), not the algebraic value (-1000 + 2000 + 0 = 1000). + const result = Session.getUsage({ + model: fakeModel("@ai-sdk/openai"), + usage: { + inputTokens: 1000, + outputTokens: 50, + cachedInputTokens: 2000, + } as any, + metadata: {} as any, + }) + + expect(result.tokens.input).toBe(0) + expect(result.tokens.input + result.tokens.cache.read + result.tokens.cache.write).toBe( + result.tokens.inputTotal, + ) + expect(result.tokens.inputTotal).toBe(2000) + // Cost must be non-negative — this is the actual user-facing concern. + expect(result.cost).toBeGreaterThanOrEqual(0) + }) +}) + +describe("Session.getUsage — invariant", () => { + test("input + cache.read + cache.write === inputTotal for cache-using calls", () => { + for (const npm of ["@ai-sdk/anthropic", "@ai-sdk/openai", "@ai-sdk/amazon-bedrock"] as const) { + const result = Session.getUsage({ + model: fakeModel(npm), + usage: { + inputTokens: npm === "@ai-sdk/openai" ? 5000 : 1000, // OpenAI: inclusive + outputTokens: 100, + cachedInputTokens: 3000, + } as any, + metadata: { anthropic: { cacheCreationInputTokens: 1000 } } as any, + }) + + expect(result.tokens.input + result.tokens.cache.read + result.tokens.cache.write).toBe( + result.tokens.inputTotal, + ) + } + }) + + test("inputTotal is always a finite non-negative number", () => { + const result = Session.getUsage({ + model: fakeModel("@ai-sdk/anthropic"), + usage: {} as any, // all fields missing + metadata: undefined, + }) + + expect(Number.isFinite(result.tokens.inputTotal)).toBe(true) + expect(result.tokens.inputTotal).toBeGreaterThanOrEqual(0) + }) +}) diff --git a/packages/opencode/test/telemetry/telemetry.test.ts b/packages/opencode/test/telemetry/telemetry.test.ts index bfd0130db..2f0d2dce8 100644 --- a/packages/opencode/test/telemetry/telemetry.test.ts +++ b/packages/opencode/test/telemetry/telemetry.test.ts @@ -636,6 +636,7 @@ describe("telemetry.toAppInsightsEnvelopes (indirect)", () => { agent: "builder", finish_reason: "end_turn", tokens_input: 100, + tokens_input_total: 115, // input + cache.read + cache.write tokens_output: 200, tokens_reasoning: 50, tokens_cache_read: 10, @@ -649,6 +650,7 @@ describe("telemetry.toAppInsightsEnvelopes (indirect)", () => { const envelopes = JSON.parse(fetchCalls[0].body) const measurements = envelopes[0].data.baseData.measurements expect(measurements.tokens_input).toBe(100) + expect(measurements.tokens_input_total).toBe(115) expect(measurements.tokens_output).toBe(200) expect(measurements.tokens_reasoning).toBe(50) expect(measurements.tokens_cache_read).toBe(10)