diff --git a/README.md b/README.md index 595a1ff305..3680483993 100644 --- a/README.md +++ b/README.md @@ -795,6 +795,40 @@ When agents thrive, you thrive. But I want to help you directly too. - **Thinking Block Validator**: Validates thinking blocks to ensure proper formatting and prevent API errors from malformed thinking content. - **Claude Code Hooks**: Executes hooks from Claude Code's settings.json - this is the compatibility layer that runs PreToolUse/PostToolUse/UserPromptSubmit/Stop hooks. +## Model Config Optimizer + +Automatically generate optimal model configurations based on your available models. + +```bash +# Show optimal config based on your available models +bunx oh-my-opencode model-config + +# Show with full rankings for each category +bunx oh-my-opencode model-config --verbose + +# Auto-apply config to your oh-my-opencode.json +bunx oh-my-opencode model-config --apply +``` + +**How it works:** +1. Detects your available models via `opencode models` +2. Matches them against static rankings by category (orchestrator, reasoning, fast, coding, etc.) +3. Generates optimal agent→model and category→model configuration +4. Outputs JSON config or writes directly with `--apply` + +**No API calls - completely free!** + +### Contributing Rankings + +The model rankings are community-driven. If you have experience with AI models and want to help improve the rankings: + +1. Edit `src/cli/model-optimizer/rankings.ts` +2. Add or reorder models in the appropriate category +3. Test with `bunx oh-my-opencode model-config --verbose` +4. Submit a PR + +See `src/cli/model-optimizer/AGENTS.md` for detailed contribution guidelines. + ## Configuration Highly opinionated, but adjustable to taste. diff --git a/bun.lock b/bun.lock index 3f14292df5..81a2b06737 100644 --- a/bun.lock +++ b/bun.lock @@ -1,5 +1,6 @@ { "lockfileVersion": 1, + "configVersion": 0, "workspaces": { "": { "name": "oh-my-opencode", diff --git a/src/cli/index.ts b/src/cli/index.ts index 40100a9aa5..963e43bbb8 100644 --- a/src/cli/index.ts +++ b/src/cli/index.ts @@ -4,10 +4,12 @@ import { install } from "./install" import { run } from "./run" import { getLocalVersion } from "./get-local-version" import { doctor } from "./doctor" +import { testModels } from "./model-optimizer/test-command" import type { InstallArgs } from "./types" import type { RunOptions } from "./run" import type { GetLocalVersionOptions } from "./get-local-version/types" import type { DoctorOptions } from "./doctor" +import type { TestModelsOptions } from "./model-optimizer/test-command" import packageJson from "../../package.json" with { type: "json" } const VERSION = packageJson.version @@ -38,6 +40,8 @@ Model Providers: Claude Required for Sisyphus (main orchestrator) and Librarian agents ChatGPT Powers the Oracle agent for debugging and architecture Gemini Powers frontend, documentation, and multimodal agents + +Note: Run 'model-config --apply' after install to auto-configure optimal models `) .action(async (options) => { const args: InstallArgs = { @@ -136,6 +140,33 @@ Categories: process.exit(exitCode) }) +program + .command("model-config") + .description("Optimize model configuration based on your available models (no API calls)") + .option("--verbose", "Show full rankings for each category") + .option("--apply", "Write optimal config to oh-my-opencode.json") + .addHelpText("after", ` +Examples: + $ bunx oh-my-opencode model-config + $ bunx oh-my-opencode model-config --verbose + $ bunx oh-my-opencode model-config --apply + +This command: + 1. Detects your available models via 'opencode models' + 2. Shows the best model for each agent based on pre-defined rankings + 3. Outputs recommended config (or writes it with --apply) + +No API calls - completely free! +`) + .action(async (options) => { + const testOptions: TestModelsOptions = { + verbose: options.verbose ?? false, + apply: options.apply ?? false, + } + const exitCode = await testModels(testOptions) + process.exit(exitCode) + }) + program .command("version") .description("Show version information") diff --git a/src/cli/model-optimizer/AGENTS.md b/src/cli/model-optimizer/AGENTS.md new file mode 100644 index 0000000000..af95c9c86a --- /dev/null +++ b/src/cli/model-optimizer/AGENTS.md @@ -0,0 +1,97 @@ +# MODEL OPTIMIZER KNOWLEDGE BASE + +## OVERVIEW +Static model optimizer that uses pre-defined rankings to select optimal models for each agent. Entry: `bunx oh-my-opencode model-config` or `bunx oh-my-opencode model-config --apply`. + +**No API calls - completely free!** + +## STRUCTURE +``` +model-optimizer/ +├── types.ts # Type definitions and Zod schemas (ModelTier, ModelInfo) +├── model-detector.ts # Parse `opencode models`, tier classification +├── rankings.ts # Static model rankings by category +└── test-command.ts # CLI command for showing/applying optimal config +``` + +## HOW IT WORKS +1. Detects available models via `opencode models` +2. Matches available models against static rankings by category +3. Generates optimal agent→model and category→model configuration +4. Outputs recommended config (or writes it with `--apply`) + +## RANKING CATEGORIES +| Category | Purpose | Used By | +|----------|---------|---------| +| orchestrator | Best overall models for complex tasks | Sisyphus, build, plan | +| reasoning | Best reasoning/debugging models | oracle, Metis, Momus | +| fast | Fast + capable models for quick lookups | explore | +| coding | Best coding models | frontend-ui-ux-engineer | +| instruction | Best instruction-following models | librarian, document-writer | +| multimodal | Vision-capable models | multimodal-looker | +| creative | Creative/artistic models | artistry category | +| free | Free/cheap fallback models | Fallback for any agent | + +## CLI USAGE +```bash +# Show optimal config +bunx oh-my-opencode model-config + +# Show with full rankings +bunx oh-my-opencode model-config --verbose + +# Auto-apply config to ~/.config/opencode/oh-my-opencode.json +bunx oh-my-opencode model-config --apply +``` + +## OUTPUT +The command outputs: +1. List of your available models +2. Optimal agent → model mapping with ranking position +3. Optimal category → model mapping with temperature/variant +4. Recommended config JSON (or confirmation if --apply used) + +## HOW TO CONTRIBUTE RANKINGS + +### Adding a New Model +1. Find the model ID by running `opencode models` +2. Edit `rankings.ts` +3. Add the model ID to the appropriate category in `MODEL_RANKINGS` +4. Position matters - place it where it belongs relative to other models +5. Test: `bunx oh-my-opencode model-config --verbose` + +### Reordering Models +1. Edit `rankings.ts` +2. Move the model ID up (higher priority) or down (lower priority) within its category +3. Test to verify the change + +### Adding a New Agent +1. Add entry to `AGENT_RANKING_MAP` mapping agent name → category +2. Optionally set variant in `generateOptimalConfig()` function + +### Adding a New Task Category +1. Add entry to `CATEGORY_RANKING_MAP` mapping category → ranking category +2. Add entry to `CATEGORY_TEMPERATURES` (0.1-0.9) +3. Add entry to `CATEGORY_VARIANTS` ("low", "medium", "high", "max") + +### Testing Changes +```bash +bun test src/cli/model-optimizer/ # Run unit tests +bunx oh-my-opencode model-config # See generated config +bunx oh-my-opencode model-config --verbose # See full rankings with your models +``` + +## TYPE REFERENCE +Key types in `types.ts`: +- `ModelTier`: "flagship" | "standard" | "lite" +- `ModelInfo`: Model info parsed from `opencode models` + +Key types in `rankings.ts`: +- `RankingCategory`: The 8 ranking categories +- `AGENT_RANKING_MAP`: Maps agent names to ranking categories +- `CATEGORY_RANKING_MAP`: Maps task categories to ranking categories + +## ANTI-PATTERNS +- Adding models without testing `model-config` command +- Putting paid models before free models in the `free` category +- Skipping TDD (test first!) diff --git a/src/cli/model-optimizer/model-detector.test.ts b/src/cli/model-optimizer/model-detector.test.ts new file mode 100644 index 0000000000..313021f105 --- /dev/null +++ b/src/cli/model-optimizer/model-detector.test.ts @@ -0,0 +1,327 @@ +import { describe, expect, it, mock, spyOn, beforeEach, afterEach } from "bun:test" +import { + parseModelsOutput, + classifyTier, + extractProvider, + detectAvailableModels, +} from "./model-detector" +import type { ModelInfo, ModelTier } from "./types" + +describe("model-detector", () => { + describe("extractProvider", () => { + it("#given a standard model ID #when extracting provider #then returns provider name", () => { + expect(extractProvider("anthropic/claude-opus-4-5")).toBe("anthropic") + }) + + it("#given openai model ID #when extracting provider #then returns openai", () => { + expect(extractProvider("openai/gpt-5.2")).toBe("openai") + }) + + it("#given google model ID #when extracting provider #then returns google", () => { + expect(extractProvider("google/gemini-3-pro-preview")).toBe("google") + }) + + it("#given opencode model ID #when extracting provider #then returns opencode", () => { + expect(extractProvider("opencode/grok-code")).toBe("opencode") + }) + + it("#given model ID without slash #when extracting provider #then returns empty string", () => { + expect(extractProvider("claude-opus-4-5")).toBe("") + }) + + it("#given empty string #when extracting provider #then returns empty string", () => { + expect(extractProvider("")).toBe("") + }) + + it("#given model ID with multiple slashes #when extracting provider #then returns first part", () => { + expect(extractProvider("provider/model/version")).toBe("provider") + }) + }) + + describe("classifyTier", () => { + describe("flagship tier", () => { + it("#given opus model #when classifying tier #then returns flagship", () => { + expect(classifyTier("claude-opus-4-5")).toBe("flagship") + }) + + it("#given gpt-5 model #when classifying tier #then returns flagship", () => { + expect(classifyTier("gpt-5.2")).toBe("flagship") + }) + + it("#given gpt-4 model #when classifying tier #then returns flagship", () => { + expect(classifyTier("gpt-4")).toBe("flagship") + }) + + it("#given pro model #when classifying tier #then returns flagship", () => { + expect(classifyTier("gemini-3-pro-preview")).toBe("flagship") + }) + + it("#given o1 model #when classifying tier #then returns flagship", () => { + expect(classifyTier("o1-preview")).toBe("flagship") + }) + + it("#given o3 model #when classifying tier #then returns flagship", () => { + expect(classifyTier("o3-mini")).toBe("flagship") + }) + }) + + describe("standard tier", () => { + it("#given sonnet model #when classifying tier #then returns standard", () => { + expect(classifyTier("claude-sonnet-4-5")).toBe("standard") + }) + + it("#given flash model (not flash-lite) #when classifying tier #then returns standard", () => { + expect(classifyTier("gemini-3-flash")).toBe("standard") + }) + + it("#given gpt-4o-mini model #when classifying tier #then returns standard", () => { + expect(classifyTier("gpt-4o-mini")).toBe("standard") + }) + + it("#given turbo model #when classifying tier #then returns standard", () => { + expect(classifyTier("gpt-4-turbo")).toBe("standard") + }) + + it("#given mistral-large model #when classifying tier #then returns standard", () => { + expect(classifyTier("mistral-large")).toBe("standard") + }) + }) + + describe("lite tier", () => { + it("#given haiku model #when classifying tier #then returns lite", () => { + expect(classifyTier("claude-haiku-4-5")).toBe("lite") + }) + + it("#given nano model #when classifying tier #then returns lite", () => { + expect(classifyTier("gemini-nano")).toBe("lite") + }) + + it("#given mini model (not gpt-4o-mini) #when classifying tier #then returns lite", () => { + expect(classifyTier("some-mini-model")).toBe("lite") + }) + + it("#given flash-lite model #when classifying tier #then returns lite", () => { + expect(classifyTier("gemini-flash-lite")).toBe("lite") + }) + + it("#given gpt-3.5 model #when classifying tier #then returns lite", () => { + expect(classifyTier("gpt-3.5-turbo")).toBe("lite") + }) + }) + + describe("default tier", () => { + it("#given unknown model #when classifying tier #then returns standard as default", () => { + expect(classifyTier("unknown-model-xyz")).toBe("standard") + }) + + it("#given empty string #when classifying tier #then returns standard as default", () => { + expect(classifyTier("")).toBe("standard") + }) + }) + }) + + describe("parseModelsOutput", () => { + it("#given empty output #when parsing #then returns empty array", () => { + expect(parseModelsOutput("")).toEqual([]) + }) + + it("#given whitespace-only output #when parsing #then returns empty array", () => { + expect(parseModelsOutput(" \n \n ")).toEqual([]) + }) + + it("#given single model #when parsing #then returns array with one ModelInfo", () => { + const output = "anthropic/claude-opus-4-5" + const result = parseModelsOutput(output) + + expect(result).toHaveLength(1) + expect(result[0]).toEqual({ + id: "anthropic/claude-opus-4-5", + provider: "anthropic", + name: "claude-opus-4-5", + tier: "flagship", + }) + }) + + it("#given multiple models #when parsing #then returns correct ModelInfo array", () => { + const output = `anthropic/claude-opus-4-5 +anthropic/claude-sonnet-4-5 +anthropic/claude-haiku-4-5 +openai/gpt-5.2` + const result = parseModelsOutput(output) + + expect(result).toHaveLength(4) + expect(result[0].tier).toBe("flagship") + expect(result[1].tier).toBe("standard") + expect(result[2].tier).toBe("lite") + expect(result[3].tier).toBe("flagship") + }) + + it("#given output with empty lines #when parsing #then skips empty lines", () => { + const output = `anthropic/claude-opus-4-5 + +openai/gpt-5.2 + +` + const result = parseModelsOutput(output) + expect(result).toHaveLength(2) + }) + + it("#given output with whitespace around model IDs #when parsing #then trims whitespace", () => { + const output = " anthropic/claude-opus-4-5 \n openai/gpt-5.2 " + const result = parseModelsOutput(output) + + expect(result).toHaveLength(2) + expect(result[0].id).toBe("anthropic/claude-opus-4-5") + expect(result[1].id).toBe("openai/gpt-5.2") + }) + + it("#given malformed lines without slash #when parsing #then skips those lines", () => { + const output = `anthropic/claude-opus-4-5 +malformed-no-slash +openai/gpt-5.2` + const result = parseModelsOutput(output) + + expect(result).toHaveLength(2) + expect(result[0].id).toBe("anthropic/claude-opus-4-5") + expect(result[1].id).toBe("openai/gpt-5.2") + }) + + it("#given 50+ models #when parsing #then handles efficiently", () => { + const models = Array.from( + { length: 60 }, + (_, i) => `provider-${i}/model-${i}` + ) + const output = models.join("\n") + const result = parseModelsOutput(output) + + expect(result).toHaveLength(60) + expect(result[0].id).toBe("provider-0/model-0") + expect(result[59].id).toBe("provider-59/model-59") + }) + + it("#given real-world output format #when parsing #then correctly processes all models", () => { + const output = `anthropic/claude-opus-4-5 +anthropic/claude-sonnet-4-5 +anthropic/claude-haiku-4-5 +openai/gpt-5.2 +openai/gpt-4o +google/gemini-3-pro-preview +google/gemini-3-flash +opencode/grok-code +opencode/glm-4.7-free` + const result = parseModelsOutput(output) + + expect(result).toHaveLength(9) + + const opus = result.find((m) => m.id === "anthropic/claude-opus-4-5") + expect(opus).toBeDefined() + expect(opus?.tier).toBe("flagship") + expect(opus?.provider).toBe("anthropic") + expect(opus?.name).toBe("claude-opus-4-5") + + const flash = result.find((m) => m.id === "google/gemini-3-flash") + expect(flash).toBeDefined() + expect(flash?.tier).toBe("standard") + + const haiku = result.find((m) => m.id === "anthropic/claude-haiku-4-5") + expect(haiku).toBeDefined() + expect(haiku?.tier).toBe("lite") + }) + + it("#given output with tabs and special whitespace #when parsing #then handles correctly", () => { + const output = "\tanthropic/claude-opus-4-5\t\n\topenai/gpt-5.2\t" + const result = parseModelsOutput(output) + + expect(result).toHaveLength(2) + expect(result[0].id).toBe("anthropic/claude-opus-4-5") + }) + }) + + describe("detectAvailableModels", () => { + let originalSpawn: typeof Bun.spawn + + beforeEach(() => { + originalSpawn = Bun.spawn + }) + + afterEach(() => { + Bun.spawn = originalSpawn + }) + + it("#given successful command execution #when detecting models #then returns parsed models", async () => { + const mockOutput = `anthropic/claude-opus-4-5 +openai/gpt-5.2` + // @ts-expect-error - mocking Bun.spawn + Bun.spawn = mock(() => ({ + stdout: new ReadableStream({ + start(controller) { + controller.enqueue(new TextEncoder().encode(mockOutput)) + controller.close() + }, + }), + stderr: new ReadableStream({ + start(controller) { + controller.close() + }, + }), + exited: Promise.resolve(0), + })) + + const result = await detectAvailableModels() + + expect(result).toHaveLength(2) + expect(result[0].id).toBe("anthropic/claude-opus-4-5") + expect(result[1].id).toBe("openai/gpt-5.2") + }) + + it("#given command returns empty output #when detecting models #then returns empty array", async () => { + // @ts-expect-error - mocking Bun.spawn + Bun.spawn = mock(() => ({ + stdout: new ReadableStream({ + start(controller) { + controller.close() + }, + }), + stderr: new ReadableStream({ + start(controller) { + controller.close() + }, + }), + exited: Promise.resolve(0), + })) + + const result = await detectAvailableModels() + expect(result).toEqual([]) + }) + + it("#given command fails with non-zero exit #when detecting models #then returns empty array", async () => { + // @ts-expect-error - mocking Bun.spawn + Bun.spawn = mock(() => ({ + stdout: new ReadableStream({ + start(controller) { + controller.close() + }, + }), + stderr: new ReadableStream({ + start(controller) { + controller.enqueue(new TextEncoder().encode("Command not found")) + controller.close() + }, + }), + exited: Promise.resolve(1), + })) + + const result = await detectAvailableModels() + expect(result).toEqual([]) + }) + + it("#given command throws error #when detecting models #then returns empty array", async () => { + Bun.spawn = mock(() => { + throw new Error("Spawn failed") + }) + + const result = await detectAvailableModels() + expect(result).toEqual([]) + }) + }) +}) diff --git a/src/cli/model-optimizer/model-detector.ts b/src/cli/model-optimizer/model-detector.ts new file mode 100644 index 0000000000..aca0cee173 --- /dev/null +++ b/src/cli/model-optimizer/model-detector.ts @@ -0,0 +1,110 @@ +import type { ModelInfo, ModelTier } from "./types" + +const TIER_PATTERNS: Record = { + flagship: [ + /opus/i, + /gpt-5/i, + /gpt-4(?!o-mini)(?!-turbo)/i, // gpt-4 but not gpt-4o-mini or gpt-4-turbo + /pro(?!mpt)/i, // pro but not "prompt" + /\bo1\b/i, // o1 as word + /\bo3\b/i, // o3 as word + ], + standard: [ + /sonnet/i, + /flash(?!-lite)/i, // flash but not flash-lite + /gpt-4o-mini/i, + /turbo/i, + /mistral-large/i, + ], + lite: [/haiku/i, /nano/i, /flash-lite/i, /gpt-3\.5/i, /\bmini\b(? { + try { + const proc = Bun.spawn(["opencode", "models"], { + stdout: "pipe", + stderr: "pipe", + }) + + const output = await new Response(proc.stdout).text() + const exitCode = await proc.exited + + if (exitCode !== 0) { + return [] + } + + return parseModelsOutput(output) + } catch { + return [] + } +} diff --git a/src/cli/model-optimizer/rankings.ts b/src/cli/model-optimizer/rankings.ts new file mode 100644 index 0000000000..b26b9d2302 --- /dev/null +++ b/src/cli/model-optimizer/rankings.ts @@ -0,0 +1,316 @@ +/** + * Static model rankings by category. + * + * HELP WANTED: We need help ranking AI models! + * If you have experience with these models and can help improve the rankings, + * please contribute: https://github.com/code-yeongyu/oh-my-opencode/issues + * + * HOW TO MODIFY RANKINGS: + * ======================== + * + * 1. MODEL_RANKINGS - Add/reorder models within categories: + * - Each category is an array ordered from BEST to WORST + * - Higher position = higher preference when user runs `model-config` + * - Model IDs must match exactly what `opencode models` outputs + * - Example: "anthropic/claude-opus-4-5", "openai/gpt-5.2" + * + * 2. AGENT_RANKING_MAP - Map agents to ranking categories: + * - Key: agent name (e.g., "oracle", "Sisyphus") + * - Value: which MODEL_RANKINGS category to use + * - When user runs `model-config`, agent gets best available model from its category + * + * 3. CATEGORY_RANKING_MAP - Map task categories to ranking categories: + * - Key: task category from delegate_task (e.g., "quick", "visual-engineering") + * - Value: which MODEL_RANKINGS category to use + * + * 4. CATEGORY_TEMPERATURES / CATEGORY_VARIANTS - Per-category defaults: + * - Temperature: 0.1 (deterministic) to 0.9 (creative) + * - Variant: "low", "medium", "high", "max" (thinking effort) + * + * TESTING YOUR CHANGES: + * bun test src/cli/model-optimizer/ # Run tests + * bunx oh-my-opencode model-config # See generated config + * bunx oh-my-opencode model-config --verbose # See full rankings + */ +export const MODEL_RANKINGS = { + // Best overall (orchestrator, complex tasks) + orchestrator: [ + "anthropic/claude-opus-4-5", + "google/antigravity-claude-opus-4-5-thinking", + "openai/gpt-5.2", + "openai/gpt-5.2-codex", + "openai/gpt-5.1-codex-max", + "anthropic/claude-sonnet-4-5", + "google/antigravity-claude-sonnet-4-5-thinking", + "google/antigravity-claude-sonnet-4-5", + "google/gemini-3-pro-preview", + "google/antigravity-gemini-3-pro", + "google/gemini-2.5-pro", + "google/gemini-2.5-pro-preview-06-05", + "google/gemini-2.5-pro-preview-05-06", + "openai/gpt-5.1-codex-mini", + "google/gemini-3-flash-preview", + "google/antigravity-gemini-3-flash", + "google/gemini-2.5-flash", + "google/gemini-2.5-flash-preview-05-20", + "zai-coding-plan/glm-4.7", + "zai-coding-plan/glm-4.6", + "zai-coding-plan/glm-4.6v", + "opencode/glm-4.7-free", + "opencode/grok-code", + "opencode/gpt-5-nano", + ], + + // Best reasoning (oracle, debugging, architecture) + reasoning: [ + "openai/gpt-5.2", + "openai/gpt-5.2-codex", + "anthropic/claude-opus-4-5", + "google/antigravity-claude-opus-4-5-thinking", + "openai/gpt-5.1-codex-max", + "anthropic/claude-sonnet-4-5", + "google/antigravity-claude-sonnet-4-5-thinking", + "google/gemini-3-pro-preview", + "google/antigravity-gemini-3-pro", + "google/gemini-2.5-pro", + "google/gemini-2.5-pro-preview-06-05", + "google/antigravity-claude-sonnet-4-5", + "openai/gpt-5.1-codex-mini", + "google/gemini-3-flash-preview", + "google/antigravity-gemini-3-flash", + "google/gemini-2.5-flash", + "zai-coding-plan/glm-4.7", + "zai-coding-plan/glm-4.6", + "opencode/glm-4.7-free", + "opencode/grok-code", + ], + + // Fast + capable (explore, quick lookups) + fast: [ + "google/gemini-3-flash-preview", + "google/antigravity-gemini-3-flash", + "google/gemini-2.5-flash", + "google/gemini-2.5-flash-preview-05-20", + "google/gemini-2.5-flash-lite", + "google/gemini-2.0-flash", + "google/gemini-2.0-flash-lite", + "google/gemini-1.5-flash", + "google/gemini-1.5-flash-8b", + "openai/gpt-5.1-codex-mini", + "opencode/grok-code", + "opencode/gpt-5-nano", + "zai-coding-plan/glm-4.5-flash", + "zai-coding-plan/glm-4.5-air", + "anthropic/claude-sonnet-4-5", + "google/antigravity-claude-sonnet-4-5", + "google/antigravity-gemini-3-pro", + "zai-coding-plan/glm-4.7", + "opencode/glm-4.7-free", + ], + + // Best coding (frontend, implementation) + coding: [ + "anthropic/claude-sonnet-4-5", + "google/antigravity-claude-sonnet-4-5", + "google/antigravity-claude-sonnet-4-5-thinking", + "anthropic/claude-opus-4-5", + "google/antigravity-claude-opus-4-5-thinking", + "openai/gpt-5.2-codex", + "openai/gpt-5.1-codex-max", + "openai/gpt-5.2", + "google/gemini-3-pro-preview", + "google/antigravity-gemini-3-pro", + "google/gemini-2.5-pro", + "openai/gpt-5.1-codex-mini", + "google/gemini-3-flash-preview", + "google/antigravity-gemini-3-flash", + "google/gemini-2.5-flash", + "opencode/grok-code", + "zai-coding-plan/glm-4.7", + "zai-coding-plan/glm-4.6", + "opencode/glm-4.7-free", + ], + + // Best instruction-following (librarian, document-writer) + instruction: [ + "anthropic/claude-sonnet-4-5", + "google/antigravity-claude-sonnet-4-5", + "anthropic/claude-opus-4-5", + "google/antigravity-claude-opus-4-5-thinking", + "openai/gpt-5.2", + "openai/gpt-5.2-codex", + "google/gemini-3-pro-preview", + "google/antigravity-gemini-3-pro", + "google/gemini-2.5-pro", + "openai/gpt-5.1-codex-max", + "google/gemini-3-flash-preview", + "google/antigravity-gemini-3-flash", + "google/gemini-2.5-flash", + "openai/gpt-5.1-codex-mini", + "zai-coding-plan/glm-4.7", + "zai-coding-plan/glm-4.6", + "opencode/glm-4.7-free", + ], + + // Multimodal / vision capable + multimodal: [ + "google/gemini-3-pro-preview", + "google/antigravity-gemini-3-pro", + "google/gemini-2.5-pro", + "google/gemini-3-flash-preview", + "google/antigravity-gemini-3-flash", + "google/gemini-2.5-flash", + "google/gemini-2.5-flash-image", + "google/gemini-2.5-flash-image-preview", + "google/gemini-2.0-flash", + "google/gemini-1.5-pro", + "google/gemini-1.5-flash", + "anthropic/claude-opus-4-5", + "google/antigravity-claude-opus-4-5-thinking", + "anthropic/claude-sonnet-4-5", + "google/antigravity-claude-sonnet-4-5", + "openai/gpt-5.2", + "zai-coding-plan/glm-4.6v", + "zai-coding-plan/glm-4.5v", + ], + + // Creative / artistic + creative: [ + "google/antigravity-gemini-3-pro", + "google/gemini-3-pro-preview", + "google/gemini-2.5-pro", + "anthropic/claude-opus-4-5", + "google/antigravity-claude-opus-4-5-thinking", + "anthropic/claude-sonnet-4-5", + "google/antigravity-claude-sonnet-4-5", + "openai/gpt-5.2", + "google/antigravity-gemini-3-flash", + "google/gemini-3-flash-preview", + "google/gemini-2.5-flash", + "zai-coding-plan/glm-4.7", + "opencode/glm-4.7-free", + ], + + // Free/cheap fallback + free: [ + "opencode/glm-4.7-free", + "opencode/grok-code", + "opencode/gpt-5-nano", + "opencode/minimax-m2.1-free", + "opencode/big-pickle", + "zai-coding-plan/glm-4.5-flash", + "zai-coding-plan/glm-4.5-air", + "google/gemini-2.5-flash-lite", + "google/gemini-2.0-flash-lite", + "google/gemini-1.5-flash-8b", + ], +} as const + +export type RankingCategory = keyof typeof MODEL_RANKINGS + +export const AGENT_RANKING_MAP: Record = { + "Sisyphus": "orchestrator", + "Sisyphus-Junior": "orchestrator", + "orchestrator-sisyphus": "orchestrator", + "oracle": "reasoning", + "explore": "fast", + "librarian": "instruction", + "frontend-ui-ux-engineer": "coding", + "document-writer": "instruction", + "multimodal-looker": "multimodal", + "Metis (Plan Consultant)": "reasoning", + "Momus (Plan Reviewer)": "reasoning", + "Prometheus (Planner)": "orchestrator", + "build": "orchestrator", + "plan": "orchestrator", + "OpenCode-Builder": "coding", + "general": "orchestrator", +} + +export const CATEGORY_RANKING_MAP: Record = { + "quick": "fast", + "general": "orchestrator", + "visual-engineering": "coding", + "ultrabrain": "reasoning", + "most-capable": "orchestrator", + "writing": "instruction", + "artistry": "creative", +} + +export const CATEGORY_TEMPERATURES: Record = { + "quick": 0.3, + "general": 0.3, + "visual-engineering": 0.7, + "ultrabrain": 0.1, + "most-capable": 0.1, + "writing": 0.5, + "artistry": 0.9, +} + +export const CATEGORY_VARIANTS: Record = { + "quick": "low", + "general": "low", + "visual-engineering": "high", + "ultrabrain": "max", + "most-capable": "max", + "writing": "low", + "artistry": "medium", +} + +export function findBestModel( + availableModelIds: string[], + category: RankingCategory +): string | null { + const ranking = MODEL_RANKINGS[category] + const availableSet = new Set(availableModelIds) + + for (const modelId of ranking) { + if (availableSet.has(modelId)) { + return modelId + } + } + + for (const modelId of MODEL_RANKINGS.free) { + if (availableSet.has(modelId)) { + return modelId + } + } + + return availableModelIds[0] ?? null +} + +export function generateOptimalConfig( + availableModelIds: string[] +): { agents: Record; categories: Record } { + const agents: Record = {} + const categories: Record = {} + + for (const [agent, rankingCategory] of Object.entries(AGENT_RANKING_MAP)) { + const model = findBestModel(availableModelIds, rankingCategory) + if (model) { + agents[agent] = { model } + if (agent === "Sisyphus" || agent === "orchestrator-sisyphus" || agent === "Prometheus (Planner)" || agent === "build" || agent === "plan") { + agents[agent].variant = "max" + } else if (agent === "Sisyphus-Junior") { + agents[agent].variant = "high" + } else if (agent === "frontend-ui-ux-engineer") { + agents[agent].variant = "high" + } else if (agent === "librarian" || agent === "explore" || agent === "document-writer" || agent === "multimodal-looker" || agent === "general") { + agents[agent].variant = "low" + } + } + } + + for (const [category, rankingCategory] of Object.entries(CATEGORY_RANKING_MAP)) { + const model = findBestModel(availableModelIds, rankingCategory) + if (model) { + categories[category] = { + model, + variant: CATEGORY_VARIANTS[category] ?? "low", + temperature: CATEGORY_TEMPERATURES[category] ?? 0.3, + } + } + } + + return { agents, categories } +} diff --git a/src/cli/model-optimizer/test-command.ts b/src/cli/model-optimizer/test-command.ts new file mode 100644 index 0000000000..586167af3a --- /dev/null +++ b/src/cli/model-optimizer/test-command.ts @@ -0,0 +1,179 @@ +import color from "picocolors" +import { existsSync, readFileSync, writeFileSync, mkdirSync } from "node:fs" +import { dirname } from "node:path" +import { detectAvailableModels } from "./model-detector" +import { MODEL_RANKINGS, AGENT_RANKING_MAP, CATEGORY_RANKING_MAP, generateOptimalConfig } from "./rankings" +import type { RankingCategory } from "./rankings" +import { parseJsonc, getOpenCodeConfigPaths } from "../../shared" + +export interface TestModelsOptions { + verbose?: boolean + apply?: boolean +} + +function printHeader(title: string): void { + console.log() + console.log(color.bgCyan(color.black(` ${title} `))) + console.log() +} + +function printSection(title: string): void { + console.log() + console.log(color.bold(color.white(title))) + console.log(color.dim("─".repeat(60))) +} + +export async function testModels(options: TestModelsOptions): Promise { + printHeader("Model Config Optimizer") + + console.log("Detecting available models...") + const allModels = await detectAvailableModels() + + if (allModels.length === 0) { + console.log(color.red("No models detected. Run 'opencode models' to verify.")) + return 1 + } + + const availableIds = allModels.map(m => m.id) + console.log(`Found ${color.cyan(allModels.length.toString())} models`) + + printSection("YOUR AVAILABLE MODELS") + for (const model of allModels) { + const tierColor = model.tier === "flagship" ? color.green : model.tier === "standard" ? color.yellow : color.dim + console.log(` ${tierColor("●")} ${model.id} ${color.dim(`(${model.tier})`)}`) + } + + const config = generateOptimalConfig(availableIds) + + printSection("OPTIMAL AGENT → MODEL MAPPING") + console.log() + + for (const [agent, agentConfig] of Object.entries(config.agents)) { + const category = AGENT_RANKING_MAP[agent] + const ranking = category ? MODEL_RANKINGS[category] : [] + const rank = agentConfig.model ? (ranking as readonly string[]).indexOf(agentConfig.model) + 1 : -1 + + const rankStr = rank > 0 ? `#${rank}` : "" + const variantStr = agentConfig.variant ? color.dim(` (${agentConfig.variant})`) : "" + console.log(` ${color.cyan(agent.padEnd(28))} → ${color.green(agentConfig.model)}${variantStr} ${color.dim(rankStr)}`) + } + + printSection("OPTIMAL CATEGORY → MODEL MAPPING") + console.log() + + for (const [category, catConfig] of Object.entries(config.categories)) { + const rankingCategory = CATEGORY_RANKING_MAP[category] + const ranking = rankingCategory ? MODEL_RANKINGS[rankingCategory] : [] + const rank = catConfig.model ? (ranking as readonly string[]).indexOf(catConfig.model) + 1 : -1 + + const rankStr = rank > 0 ? `#${rank}` : "" + const detailStr = color.dim(` (${catConfig.variant}, temp=${catConfig.temperature})`) + console.log(` ${color.cyan(category.padEnd(20))} → ${color.green(catConfig.model)}${detailStr} ${color.dim(rankStr)}`) + } + + if (options.verbose) { + printSection("FULL RANKINGS BY CATEGORY") + + for (const [category, ranking] of Object.entries(MODEL_RANKINGS)) { + console.log() + console.log(color.bold(category.toUpperCase())) + + for (let i = 0; i < ranking.length; i++) { + const modelId = ranking[i] + const available = availableIds.includes(modelId) + const marker = available ? color.green("✓") : color.dim("○") + const text = available ? color.white(modelId) : color.dim(modelId) + console.log(` ${(i + 1).toString().padStart(2)}. ${marker} ${text}`) + } + } + } + + if (options.apply) { + const result = applyConfig(config) + if (result.success) { + printSection("CONFIG APPLIED") + console.log() + console.log(color.green(`✓ Config written to: ${result.path}`)) + if (result.merged) { + console.log(color.dim(" (merged with existing config)")) + } + } else { + printSection("CONFIG APPLY FAILED") + console.log() + console.log(color.red(`✗ ${result.error}`)) + return 1 + } + } else { + printSection("RECOMMENDED CONFIG") + console.log() + console.log(color.dim("Add to your oh-my-opencode.json:")) + console.log() + console.log(color.cyan(JSON.stringify(config, null, 2))) + console.log() + console.log(color.dim("Use --apply to write this config automatically")) + } + + console.log() + console.log(color.dim("Use --verbose to see full rankings")) + + return 0 +} + +interface ApplyResult { + success: boolean + path?: string + merged?: boolean + error?: string +} + +function applyConfig(config: { agents: Record; categories: Record }): ApplyResult { + // Use shared config path resolution (respects OPENCODE_CONFIG_DIR, XDG, APPDATA) + const paths = getOpenCodeConfigPaths({ binary: "opencode", version: null }) + const configPath = paths.omoConfig + const configDir = dirname(configPath) + + try { + if (!existsSync(configDir)) { + mkdirSync(configDir, { recursive: true }) + } + + let existingConfig: Record = {} + let merged = false + + if (existsSync(configPath)) { + try { + const content = readFileSync(configPath, "utf-8") + if (content.trim()) { + const parsed = parseJsonc>(content) + if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) { + existingConfig = parsed + merged = true + } + } + } catch { + // If parsing fails, we'll overwrite with new config + } + } + + const newConfig = { + ...existingConfig, + agents: { + ...(existingConfig.agents as Record ?? {}), + ...config.agents, + }, + categories: { + ...(existingConfig.categories as Record ?? {}), + ...config.categories, + }, + } + + writeFileSync(configPath, JSON.stringify(newConfig, null, 2) + "\n") + + return { success: true, path: configPath, merged } + } catch (err) { + return { + success: false, + error: err instanceof Error ? err.message : String(err), + } + } +} diff --git a/src/cli/model-optimizer/types.test.ts b/src/cli/model-optimizer/types.test.ts new file mode 100644 index 0000000000..8d2da29164 --- /dev/null +++ b/src/cli/model-optimizer/types.test.ts @@ -0,0 +1,96 @@ +import { describe, expect, it } from "bun:test" +import { + ModelInfoSchema, + ModelTierSchema, +} from "./types" + +describe("ModelTierSchema", () => { + it("should accept valid tiers", () => { + // #given valid tier values + const validTiers = ["flagship", "standard", "lite"] + + // #when parsing each tier + // #then all should succeed + for (const tier of validTiers) { + expect(ModelTierSchema.safeParse(tier).success).toBe(true) + } + }) + + it("should reject invalid tiers", () => { + // #given an invalid tier value + const invalidTier = "premium" + + // #when parsing + const result = ModelTierSchema.safeParse(invalidTier) + + // #then it should fail + expect(result.success).toBe(false) + }) +}) + +describe("ModelInfoSchema", () => { + it("should accept valid model info", () => { + // #given a valid model info object + const validModelInfo = { + id: "anthropic/claude-opus-4-5", + provider: "anthropic", + name: "claude-opus-4-5", + family: "claude", + version: "4.5", + tier: "flagship", + } + + // #when parsing + const result = ModelInfoSchema.safeParse(validModelInfo) + + // #then it should succeed + expect(result.success).toBe(true) + }) + + it("should accept model info without optional fields", () => { + // #given model info with only required fields + const minimalModelInfo = { + id: "openai/gpt-4o", + provider: "openai", + name: "gpt-4o", + tier: "standard", + } + + // #when parsing + const result = ModelInfoSchema.safeParse(minimalModelInfo) + + // #then it should succeed + expect(result.success).toBe(true) + }) + + it("should reject model info with invalid tier", () => { + // #given model info with invalid tier + const invalidModelInfo = { + id: "anthropic/claude-opus-4-5", + provider: "anthropic", + name: "claude-opus-4-5", + tier: "ultra", + } + + // #when parsing + const result = ModelInfoSchema.safeParse(invalidModelInfo) + + // #then it should fail + expect(result.success).toBe(false) + }) + + it("should reject model info missing required fields", () => { + // #given model info missing id + const incompleteModelInfo = { + provider: "anthropic", + name: "claude-opus-4-5", + tier: "flagship", + } + + // #when parsing + const result = ModelInfoSchema.safeParse(incompleteModelInfo) + + // #then it should fail + expect(result.success).toBe(false) + }) +}) diff --git a/src/cli/model-optimizer/types.ts b/src/cli/model-optimizer/types.ts new file mode 100644 index 0000000000..506036f995 --- /dev/null +++ b/src/cli/model-optimizer/types.ts @@ -0,0 +1,23 @@ +import { z } from "zod" + +/** + * Model performance tier classification + * - flagship: Top-tier models for complex tasks (e.g., claude-opus, gpt-4) + * - standard: Balanced performance/cost (e.g., claude-sonnet, gpt-4o) + * - lite: Fast, low-cost models (e.g., claude-haiku, gpt-4o-mini) + */ +export const ModelTierSchema = z.enum(["flagship", "standard", "lite"]) +export type ModelTier = z.infer + +/** + * Model information from `opencode models` output + */ +export const ModelInfoSchema = z.object({ + id: z.string(), + provider: z.string(), + name: z.string(), + family: z.string().optional(), + version: z.string().optional(), + tier: ModelTierSchema, +}) +export type ModelInfo = z.infer