Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 13 additions & 14 deletions src/browser/components/Settings/sections/ProvidersSection.tsx
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import React, { useState, useCallback } from "react";
import { ChevronDown, ChevronRight, Check, X, Eye, EyeOff } from "lucide-react";
import { createEditKeyHandler } from "@/browser/utils/ui/keybinds";
import { SUPPORTED_PROVIDERS } from "@/common/constants/providers";
import {
SUPPORTED_PROVIDERS,
OPENAI_SERVICE_TIERS,
OPENAI_DEFAULT_SERVICE_TIER,
isValidOpenAIServiceTier,
} from "@/common/constants/providers";
import type { ProviderName } from "@/common/constants/providers";
import { ProviderWithIcon } from "@/browser/components/ProviderIcon";
import { useAPI } from "@/browser/contexts/API";
Expand Down Expand Up @@ -389,17 +394,10 @@ export function ProvidersSection() {
</TooltipProvider>
</div>
<Select
value={config?.openai?.serviceTier ?? "auto"}
value={config?.openai?.serviceTier ?? OPENAI_DEFAULT_SERVICE_TIER}
onValueChange={(next) => {
if (!api) return;
if (
next !== "auto" &&
next !== "default" &&
next !== "flex" &&
next !== "priority"
) {
return;
}
if (!isValidOpenAIServiceTier(next)) return;

updateOptimistically("openai", { serviceTier: next });
void api.providers.setProviderConfig({
Expand All @@ -413,10 +411,11 @@ export function ProvidersSection() {
<SelectValue />
</SelectTrigger>
<SelectContent>
<SelectItem value="auto">auto</SelectItem>
<SelectItem value="default">default</SelectItem>
<SelectItem value="flex">flex</SelectItem>
<SelectItem value="priority">priority</SelectItem>
{OPENAI_SERVICE_TIERS.map((tier) => (
<SelectItem key={tier} value={tier}>
{tier}
</SelectItem>
))}
</SelectContent>
</Select>
</div>
Expand Down
33 changes: 33 additions & 0 deletions src/common/constants/providers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,39 @@ export const PROVIDER_DEFINITIONS = {
/**
* Union type of all supported provider names
*/
/**
* OpenAI service tier options for API requests.
* - priority: Low-latency responses (default)
* - flex: 50% cheaper, higher latency (o3, o4-mini, gpt-5)
* - auto: Let OpenAI select appropriate tier
* - default: Standard processing
*/
export const OPENAI_SERVICE_TIERS = ["auto", "default", "flex", "priority"] as const;
export type OpenAIServiceTier = (typeof OPENAI_SERVICE_TIERS)[number];

/** Default service tier for OpenAI requests */
export const OPENAI_DEFAULT_SERVICE_TIER: OpenAIServiceTier = "auto";

/**
* Type guard to check if a string is a valid OpenAI service tier
*/
export function isValidOpenAIServiceTier(tier: unknown): tier is OpenAIServiceTier {
return typeof tier === "string" && OPENAI_SERVICE_TIERS.includes(tier as OpenAIServiceTier);
}

/**
* Cost multipliers for OpenAI service tiers relative to standard pricing.
* - flex: 50% cheaper (0.5x)
* - priority: 2x premium
* - default/auto: standard pricing (1x)
*/
export const OPENAI_SERVICE_TIER_COST_MULTIPLIERS: Record<OpenAIServiceTier, number> = {
flex: 0.5,
priority: 2.0,
default: 1.0,
auto: 1.0,
};

export type ProviderName = keyof typeof PROVIDER_DEFINITIONS;

/**
Expand Down
3 changes: 2 additions & 1 deletion src/common/orpc/schemas/api.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { eventIterator } from "@orpc/server";
import { z } from "zod";
import { OPENAI_SERVICE_TIERS } from "@/common/constants/providers";
import { ChatStatsSchema, SessionUsageFileSchema } from "./chatStats";
import { SendMessageErrorSchema } from "./errors";
import { BranchListResultSchema, ImagePartSchema, MuxMessageSchema } from "./message";
Expand Down Expand Up @@ -71,7 +72,7 @@ export const ProviderConfigInfoSchema = z.object({
baseUrl: z.string().optional(),
models: z.array(z.string()).optional(),
/** OpenAI-specific fields */
serviceTier: z.enum(["auto", "default", "flex", "priority"]).optional(),
serviceTier: z.enum(OPENAI_SERVICE_TIERS).optional(),
/** AWS-specific fields (only present for bedrock provider) */
aws: AWSCredentialStatusSchema.optional(),
/** Mux Gateway-specific fields */
Expand Down
1 change: 1 addition & 0 deletions src/common/orpc/schemas/message.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ export const MuxMessageSchema = z.object({
historySequence: z.number().optional(),
timestamp: z.number().optional(),
model: z.string().optional(),
historicalUsage: z.any().optional(),
usage: z.any().optional(),
contextUsage: z.any().optional(),
providerMetadata: z.record(z.string(), z.unknown()).optional(),
Expand Down
3 changes: 2 additions & 1 deletion src/common/orpc/schemas/providerOptions.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { z } from "zod";
import { OPENAI_SERVICE_TIERS } from "@/common/constants/providers";

export const MuxProviderOptionsSchema = z.object({
anthropic: z
Expand All @@ -10,7 +11,7 @@ export const MuxProviderOptionsSchema = z.object({
.optional(),
openai: z
.object({
serviceTier: z.enum(["auto", "default", "flex", "priority"]).optional().meta({
serviceTier: z.enum(OPENAI_SERVICE_TIERS).optional().meta({
description:
"OpenAI service tier: priority (low-latency), flex (50% cheaper, higher latency), auto/default (standard)",
}),
Expand Down
8 changes: 8 additions & 0 deletions src/common/types/message.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import type { UIMessage } from "ai";
import type { ChatUsageDisplay } from "@/common/utils/tokens/usageAggregator";
import type { LanguageModelV2Usage } from "@ai-sdk/provider";
import type { StreamErrorType } from "./errors";
import type { ToolPolicy } from "@/common/utils/tools/toolPolicy";
Expand Down Expand Up @@ -101,6 +102,13 @@ export interface MuxMetadata {
usage?: LanguageModelV2Usage;
// Last step's usage only (for context window display - inputTokens = current context size)
contextUsage?: LanguageModelV2Usage;
/**
* Snapshot of cumulative costs/tokens from before a compaction.
*
* This is only set on compaction summary messages so we can rebuild session usage
* from chat.jsonl if session-usage.json is missing/corrupted.
*/
historicalUsage?: ChatUsageDisplay;
// Aggregated provider metadata across all steps (for cost calculation)
providerMetadata?: Record<string, unknown>;
// Last step's provider metadata (for context window cache display)
Expand Down
3 changes: 2 additions & 1 deletion src/common/utils/ai/providerOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import {
OPENROUTER_REASONING_EFFORT,
} from "@/common/types/thinking";
import { log } from "@/node/services/log";
import { OPENAI_DEFAULT_SERVICE_TIER } from "@/common/constants/providers";
import type { MuxMessage } from "@/common/types/message";
import { enforceThinkingPolicy } from "@/browser/utils/thinking/policy";
import { normalizeGatewayModel } from "./models";
Expand Down Expand Up @@ -217,7 +218,7 @@ export function buildProviderOptions(
disableAutoTruncation,
});

const serviceTier = muxProviderOptions?.openai?.serviceTier ?? "auto";
const serviceTier = muxProviderOptions?.openai?.serviceTier ?? OPENAI_DEFAULT_SERVICE_TIER;

const options: ProviderOptions = {
openai: {
Expand Down
86 changes: 86 additions & 0 deletions src/common/utils/tokens/displayUsage.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -195,4 +195,90 @@ describe("createDisplayUsage", () => {
expect(result!.cacheCreate.tokens).toBe(1500);
});
});

describe("OpenAI service tier cost adjustments", () => {
// gpt-5 has tier-specific pricing in models.json:
// - standard: input $1.25/M, output $10/M
// - flex: input $0.625/M, output $5/M (~50% cheaper)
// - priority: input $2.50/M, output $20/M (~2x)
const usage: LanguageModelV2Usage = {
inputTokens: 1000000, // 1M tokens for easy math
outputTokens: 100000, // 100K tokens
totalTokens: 1100000,
};

test("applies standard pricing when serviceTier is undefined", () => {
const result = createDisplayUsage(usage, "openai:gpt-5");

expect(result).toBeDefined();
// Standard: $1.25/M input = $1.25 for 1M tokens
expect(result!.input.cost_usd).toBeCloseTo(1.25, 2);
// Standard: $10/M output = $1.00 for 100K tokens
expect(result!.output.cost_usd).toBeCloseTo(1.0, 2);
});

test("applies standard pricing when serviceTier is 'default'", () => {
const result = createDisplayUsage(usage, "openai:gpt-5", {
openai: { serviceTier: "default" },
});

expect(result).toBeDefined();
expect(result!.input.cost_usd).toBeCloseTo(1.25, 2);
expect(result!.output.cost_usd).toBeCloseTo(1.0, 2);
});

test("applies flex pricing when serviceTier is 'flex'", () => {
const result = createDisplayUsage(usage, "openai:gpt-5", {
openai: { serviceTier: "flex" },
});

expect(result).toBeDefined();
// Flex: $0.625/M input = $0.625 for 1M tokens
expect(result!.input.cost_usd).toBeCloseTo(0.625, 3);
// Flex: $5/M output = $0.50 for 100K tokens
expect(result!.output.cost_usd).toBeCloseTo(0.5, 2);
});

test("applies priority pricing when serviceTier is 'priority'", () => {
const result = createDisplayUsage(usage, "openai:gpt-5", {
openai: { serviceTier: "priority" },
});

expect(result).toBeDefined();
// Priority: $2.50/M input = $2.50 for 1M tokens
expect(result!.input.cost_usd).toBeCloseTo(2.5, 2);
// Priority: $20/M output = $2.00 for 100K tokens
expect(result!.output.cost_usd).toBeCloseTo(2.0, 2);
});

test("ignores serviceTier for non-OpenAI models", () => {
// Even if serviceTier is present, non-OpenAI models should use standard pricing
const result = createDisplayUsage(usage, "anthropic:claude-sonnet-4-5", {
openai: { serviceTier: "flex" }, // Should be ignored
});

expect(result).toBeDefined();
// Anthropic pricing shouldn't change based on OpenAI serviceTier
// Just verify tokens are correct (pricing varies by model)
expect(result!.input.tokens).toBe(1000000);
expect(result!.output.tokens).toBe(100000);
});

test("applies flex pricing to cached tokens", () => {
const usageWithCache: LanguageModelV2Usage = {
inputTokens: 1000000, // Includes cached
outputTokens: 100000,
totalTokens: 1100000,
cachedInputTokens: 500000, // 500K cached
};

const result = createDisplayUsage(usageWithCache, "openai:gpt-5", {
openai: { serviceTier: "flex" },
});

expect(result).toBeDefined();
// Flex cache: $0.0625/M = $0.03125 for 500K tokens
expect(result!.cached.cost_usd).toBeCloseTo(0.03125, 4);
});
});
});
32 changes: 28 additions & 4 deletions src/common/utils/tokens/displayUsage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,21 @@ import type { LanguageModelV2Usage } from "@ai-sdk/provider";
import { getModelStats } from "./modelStats";
import type { ChatUsageDisplay } from "./usageAggregator";
import { normalizeGatewayModel } from "../ai/models";
import {
OPENAI_SERVICE_TIER_COST_MULTIPLIERS,
isValidOpenAIServiceTier,
} from "@/common/constants/providers";

/**
* Get cost multiplier for OpenAI service tier.
* Returns 1.0 for unknown tiers (standard pricing).
*/
function getServiceTierMultiplier(serviceTier: string | undefined): number {
if (serviceTier && isValidOpenAIServiceTier(serviceTier)) {
return OPENAI_SERVICE_TIER_COST_MULTIPLIERS[serviceTier];
}
return 1.0;
}

/**
* Create a display-friendly usage object from AI SDK usage
Expand Down Expand Up @@ -60,6 +75,12 @@ export function createDisplayUsage(
// Get model stats for cost calculation
const modelStats = getModelStats(model);

// Extract OpenAI service tier from response metadata (actual tier used, not requested)
// AI SDK returns serviceTier in providerMetadata.openai.serviceTier
const serviceTier = isOpenAI
? (providerMetadata?.openai as { serviceTier?: string } | undefined)?.serviceTier
: undefined;

// Calculate costs based on model stats (undefined if model unknown)
let inputCost: number | undefined;
let cachedCost: number | undefined;
Expand All @@ -68,11 +89,14 @@ export function createDisplayUsage(
let reasoningCost: number | undefined;

if (modelStats) {
inputCost = inputTokens * modelStats.input_cost_per_token;
cachedCost = cachedTokens * (modelStats.cache_read_input_token_cost ?? 0);
// Get tier multiplier (flex ~50% cheaper, priority ~2x)
const tierMultiplier = getServiceTierMultiplier(serviceTier);

inputCost = inputTokens * modelStats.input_cost_per_token * tierMultiplier;
cachedCost = cachedTokens * (modelStats.cache_read_input_token_cost ?? 0) * tierMultiplier;
cacheCreateCost = cacheCreateTokens * (modelStats.cache_creation_input_token_cost ?? 0);
outputCost = outputWithoutReasoning * modelStats.output_cost_per_token;
reasoningCost = reasoningTokens * modelStats.output_cost_per_token;
outputCost = outputWithoutReasoning * modelStats.output_cost_per_token * tierMultiplier;
reasoningCost = reasoningTokens * modelStats.output_cost_per_token * tierMultiplier;
}

return {
Expand Down
4 changes: 4 additions & 0 deletions src/node/services/agentSession.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import { prepareUserMessageForSend } from "@/common/types/message";
import { createRuntime } from "@/node/runtime/runtimeFactory";
import { MessageQueue } from "./messageQueue";
import type { StreamEndEvent } from "@/common/types/stream";
import type { SessionUsageService } from "./sessionUsageService";
import { CompactionHandler } from "./compactionHandler";
import type { BackgroundProcessManager } from "./backgroundProcessManager";
import { computeDiff } from "@/node/utils/diff";
Expand Down Expand Up @@ -85,6 +86,7 @@ interface AgentSessionOptions {
config: Config;
historyService: HistoryService;
partialService: PartialService;
sessionUsageService?: SessionUsageService;
aiService: AIService;
initStateManager: InitStateManager;
backgroundProcessManager: BackgroundProcessManager;
Expand Down Expand Up @@ -134,6 +136,7 @@ export class AgentSession {
workspaceId,
config,
historyService,
sessionUsageService,
partialService,
aiService,
initStateManager,
Expand All @@ -156,6 +159,7 @@ export class AgentSession {

this.compactionHandler = new CompactionHandler({
workspaceId: this.workspaceId,
sessionUsageService,
historyService: this.historyService,
partialService: this.partialService,
emitter: this.emitter,
Expand Down
6 changes: 3 additions & 3 deletions src/node/services/aiService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import type { WorkspaceMetadata } from "@/common/types/workspace";
import {
PROVIDER_REGISTRY,
PROVIDER_DEFINITIONS,
isValidOpenAIServiceTier,
type ProviderName,
} from "@/common/constants/providers";

Expand Down Expand Up @@ -502,11 +503,10 @@ export class AIService extends EventEmitter {
}

// Extract serviceTier from config to pass through to buildProviderOptions
const configServiceTier = providerConfig.serviceTier as string | undefined;
if (configServiceTier && muxProviderOptions) {
if (isValidOpenAIServiceTier(providerConfig.serviceTier) && muxProviderOptions) {
muxProviderOptions.openai = {
...muxProviderOptions.openai,
serviceTier: configServiceTier as "auto" | "default" | "flex" | "priority",
serviceTier: providerConfig.serviceTier,
};
}

Expand Down
Loading