From 62c4df592216f4239c8c3b26c853a4c8eaef59ab Mon Sep 17 00:00:00 2001 From: "V. Feitoza" Date: Mon, 25 May 2026 05:52:54 -0300 Subject: [PATCH] feat(routing): add canonical model pools and admin controls Introduce canonical model pool routing with priority failover, weighted distribution, session affinity, runtime-aware candidate filtering, admin state controls, and user-facing documentation. --- config/config.example.yaml | 66 ++++ config/config.go | 17 + config/config_example_test.go | 16 +- config/config_test.go | 78 ++++ config/routing.go | 152 ++++++++ docs/features/canonical-routing.mdx | 333 ++++++++++++++++++ .../admin/dashboard/static/js/dashboard.js | 6 + .../dashboard/static/js/modules/aliases.js | 148 +++++++- .../static/js/modules/aliases.test.cjs | 35 ++ .../dashboard/templates/model-table-body.html | 24 ++ internal/admin/handler.go | 18 + internal/admin/handler_providers.go | 11 +- internal/admin/handler_routing_pools.go | 159 +++++++++ internal/admin/handler_routing_pools_test.go | 91 +++++ internal/admin/handler_routing_state.go | 104 ++++++ internal/admin/handler_routing_state_test.go | 88 +++++ internal/admin/handler_test.go | 2 +- internal/admin/routes.go | 4 + internal/admin/routes_test.go | 4 + internal/app/app.go | 43 ++- internal/core/canonical_routing.go | 27 ++ internal/core/request_model_resolution.go | 20 +- internal/fallback/resolver.go | 3 + internal/fallback/resolver_test.go | 23 ++ internal/gateway/fallback.go | 45 ++- internal/gateway/inference_orchestrator.go | 4 + internal/gateway/request_model_resolution.go | 49 ++- internal/providers/config_test.go | 21 ++ internal/routing/composed_resolver.go | 81 +++++ internal/routing/exposed_models.go | 116 ++++++ internal/routing/exposed_models_test.go | 98 ++++++ internal/routing/failover_policy.go | 62 ++++ internal/routing/pool_evaluator.go | 178 ++++++++++ internal/routing/resolver.go | 164 +++++++++ internal/routing/resolver_test.go | 325 +++++++++++++++++ internal/routing/runtime.go | 51 +++ internal/routing/session_affinity.go | 85 +++++ internal/routing/strategy.go | 67 ++++ internal/routing/types.go | 41 +++ internal/routingstate/factory.go | 104 ++++++ internal/routingstate/service.go | 191 ++++++++++ internal/routingstate/service_test.go | 47 +++ internal/routingstate/store.go | 57 +++ internal/routingstate/store_mongodb.go | 123 +++++++ internal/routingstate/store_postgresql.go | 121 +++++++ internal/routingstate/store_sqlite.go | 146 ++++++++ internal/routingstate/types.go | 75 ++++ internal/server/fallback_test.go | 10 + internal/server/handlers.go | 3 + internal/server/http.go | 3 + .../internal_chat_completion_executor.go | 3 + .../server/request_model_resolution_test.go | 42 +++ .../server/translated_inference_service.go | 3 + 53 files changed, 3759 insertions(+), 28 deletions(-) create mode 100644 config/routing.go create mode 100644 docs/features/canonical-routing.mdx create mode 100644 internal/admin/handler_routing_pools.go create mode 100644 internal/admin/handler_routing_pools_test.go create mode 100644 internal/admin/handler_routing_state.go create mode 100644 internal/admin/handler_routing_state_test.go create mode 100644 internal/core/canonical_routing.go create mode 100644 internal/routing/composed_resolver.go create mode 100644 internal/routing/exposed_models.go create mode 100644 internal/routing/exposed_models_test.go create mode 100644 internal/routing/failover_policy.go create mode 100644 internal/routing/pool_evaluator.go create mode 100644 internal/routing/resolver.go create mode 100644 internal/routing/resolver_test.go create mode 100644 internal/routing/runtime.go create mode 100644 internal/routing/session_affinity.go create mode 100644 internal/routing/strategy.go create mode 100644 internal/routing/types.go create mode 100644 internal/routingstate/factory.go create mode 100644 internal/routingstate/service.go create mode 100644 internal/routingstate/service_test.go create mode 100644 internal/routingstate/store.go create mode 100644 internal/routingstate/store_mongodb.go create mode 100644 internal/routingstate/store_postgresql.go create mode 100644 internal/routingstate/store_sqlite.go create mode 100644 internal/routingstate/types.go diff --git a/config/config.example.yaml b/config/config.example.yaml index 24ea3bed..ef2044c4 100644 --- a/config/config.example.yaml +++ b/config/config.example.yaml @@ -185,6 +185,72 @@ fallback: "claude-sonnet-4": mode: "off" # disable fallback just for this model +routing: + # Canonical model pool routing. + # + # Use this section when you want clients to call a stable public model name + # (for example: "claude-sonnet-4-6") while the gateway maps that name to one + # or more exact provider/model candidates. + # + # Typical use cases: + # - multiple named accounts expose the same logical model with different IDs; + # - one provider/account should be primary and another should be a standby; + # - traffic should be distributed across equivalent candidates; + # - operators need manual enable/disable control per provider, canonical model, + # or pool candidate through the admin API/dashboard. + defaults: + strategy: "priority_failover" # "priority_failover" = always prefer the lowest priority candidate; "weighted_round_robin" = distribute requests by weight + session_affinity: true # parsed sticky-routing setting for canonical pools; keep enabled if you want future/runtime affinity support to pin repeated requests to the same candidate + session_affinity_ttl: 30m # parsed TTL for session affinity bindings + failover: + enabled: true # when true, eligible errors can move the request to the next candidate in the same canonical pool + max_attempts: 3 # total attempts across pool candidates, including the first candidate + retry_on_statuses: [429, 500, 502, 503, 504] # provider statuses that qualify for retry/failover + retry_on_model_errors: true # also retry/fail over on model-unavailable / model-not-found / model-unsupported style errors + + # model_pools map one public canonical model name to one or more exact provider + # candidates. The canonical key is what clients send in requests to the gateway. + # + # Example A: primary/backup routing. + # - Use strategy: "priority_failover" + # - Lower priority number wins during normal routing + # - Other candidates are used only when failover is triggered + # + # model_pools: + # claude-sonnet-4-6: + # candidates: + # - provider: anthropic_primary + # model: claude-sonnet-4-6 + # priority: 1 + # - provider: anthropic_backup + # model: claude-sonnet-4-6-20250929 + # priority: 2 + # + # Example B: weighted distribution across equivalent candidates. + # - Use strategy: "weighted_round_robin" + # - Higher weight receives more traffic + # - priority is still useful as a deterministic tie-breaker + # + # model_pools: + # claude-opus-4-7: + # candidates: + # - provider: anthropic_primary + # model: claude-opus-4-7 + # weight: 10 + # priority: 1 + # - provider: anthropic_backup + # model: claude-opus-4-7 + # weight: 6 + # priority: 2 + # + # Notes: + # - provider must match the named provider key under `providers:`. + # - model must be the exact provider-facing model ID. + # - canonical names are declared explicitly; the gateway does not infer that + # dated and non-dated model IDs are equivalent. + # - if a canonical model has no pool entry, normal alias/provider resolution + # continues to apply. + providers: openai: type: openai diff --git a/config/config.go b/config/config.go index c19071ee..dc7cf9dc 100644 --- a/config/config.go +++ b/config/config.go @@ -25,6 +25,7 @@ type Config struct { Admin AdminConfig `yaml:"admin"` Guardrails GuardrailsConfig `yaml:"guardrails"` Fallback FallbackConfig `yaml:"fallback"` + Routing RoutingConfig `yaml:"routing"` Workflows WorkflowsConfig `yaml:"workflows"` Resilience ResilienceConfig `yaml:"resilience"` } @@ -115,6 +116,19 @@ func buildDefaultConfig() *Config { Fallback: FallbackConfig{ DefaultMode: FallbackModeManual, }, + Routing: RoutingConfig{ + Defaults: RoutingDefaultsConfig{ + Strategy: RoutingStrategyPriorityFailover, + SessionAffinity: true, + SessionAffinityTTL: 30 * time.Minute, + Failover: RoutingFailoverConfig{ + Enabled: true, + MaxAttempts: 3, + RetryOnStatuses: []int{429, 500, 502, 503, 504}, + RetryOnModelErrors: true, + }, + }, + }, Workflows: WorkflowsConfig{ RefreshInterval: time.Minute, }, @@ -180,6 +194,9 @@ func Load() (*LoadResult, error) { if err := loadFallbackConfig(&cfg.Fallback); err != nil { return nil, err } + if err := loadRoutingConfig(&cfg.Routing); err != nil { + return nil, err + } // When no model cache backend was specified at all, default to local. if cfg.Cache.Model.Local == nil && cfg.Cache.Model.Redis == nil { diff --git a/config/config_example_test.go b/config/config_example_test.go index 0a659423..8abcecaf 100644 --- a/config/config_example_test.go +++ b/config/config_example_test.go @@ -11,12 +11,14 @@ func TestLoad_FromEnvironment(t *testing.T) { _ = os.Unsetenv("PORT") }() - result, err := Load() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } + withTempDir(t, func(string) { + result, err := Load() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } - if result.Config.Server.Port != "9090" { - t.Errorf("expected port 9090, got %s", result.Config.Server.Port) - } + if result.Config.Server.Port != "9090" { + t.Errorf("expected port 9090, got %s", result.Config.Server.Port) + } + }) } diff --git a/config/config_test.go b/config/config_test.go index 60b53852..17eb8474 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -133,6 +133,9 @@ func TestBuildDefaultConfig(t *testing.T) { if cfg.Storage.Type != "sqlite" { t.Errorf("expected Storage.Type=sqlite, got %s", cfg.Storage.Type) } + if cfg.Routing.Defaults.Strategy != RoutingStrategyPriorityFailover { + t.Errorf("expected Routing.Defaults.Strategy=%q, got %q", RoutingStrategyPriorityFailover, cfg.Routing.Defaults.Strategy) + } if cfg.Storage.SQLite.Path != "data/gomodel.db" { t.Errorf("expected Storage.SQLite.Path=data/gomodel.db, got %s", cfg.Storage.SQLite.Path) } @@ -714,6 +717,81 @@ fallback: }) } +func TestLoad_RoutingConfigYAML(t *testing.T) { + clearAllConfigEnvVars(t) + + withTempDir(t, func(dir string) { + yaml := ` +routing: + defaults: + strategy: weighted_round_robin + session_affinity: false + session_affinity_ttl: 45m + failover: + enabled: true + max_attempts: 5 + retry_on_statuses: [429, 503] + retry_on_model_errors: false + model_pools: + claude-sonnet-4-6: + candidates: + - provider: anthropic_b + model: claude-sonnet-4-6 + weight: 10 + priority: 1 + - provider: anthropic_a + model: claude-sonnet-4-6-20250929 + weight: 8 + priority: 2 +` + if err := os.WriteFile(filepath.Join(dir, "config.yaml"), []byte(yaml), 0644); err != nil { + t.Fatalf("Failed to write config.yaml: %v", err) + } + + result, err := Load() + if err != nil { + t.Fatalf("Load() failed: %v", err) + } + cfg := result.Config + if cfg.Routing.Defaults.Strategy != RoutingStrategyWeightedRoundRobin { + t.Fatalf("Routing.Defaults.Strategy = %q, want %q", cfg.Routing.Defaults.Strategy, RoutingStrategyWeightedRoundRobin) + } + if cfg.Routing.Defaults.SessionAffinity { + t.Fatal("expected SessionAffinity=false from YAML") + } + if cfg.Routing.Defaults.SessionAffinityTTL != 45*time.Minute { + t.Fatalf("SessionAffinityTTL = %s, want 45m", cfg.Routing.Defaults.SessionAffinityTTL) + } + pool := cfg.Routing.ModelPools["claude-sonnet-4-6"] + if len(pool.Candidates) != 2 { + t.Fatalf("len(pool.Candidates) = %d, want 2", len(pool.Candidates)) + } + }) +} + +func TestLoad_InvalidRoutingStrategy(t *testing.T) { + clearAllConfigEnvVars(t) + + withTempDir(t, func(dir string) { + yaml := ` +routing: + defaults: + strategy: invalid +` + if err := os.WriteFile(filepath.Join(dir, "config.yaml"), []byte(yaml), 0644); err != nil { + t.Fatalf("Failed to write config.yaml: %v", err) + } + + _, err := Load() + if err == nil { + t.Fatal("expected Load() to fail for invalid routing strategy") + } + if !strings.Contains(err.Error(), "routing.defaults.strategy must be one of") { + t.Fatalf("Load() error = %v, want routing strategy validation error", err) + } + }) +} + func TestLoad_InvalidConfiguredProviderModelsMode(t *testing.T) { clearAllConfigEnvVars(t) diff --git a/config/routing.go b/config/routing.go new file mode 100644 index 00000000..5ad65572 --- /dev/null +++ b/config/routing.go @@ -0,0 +1,152 @@ +package config + +import ( + "fmt" + "sort" + "strings" + "time" +) + +type RoutingStrategy string + +const ( + RoutingStrategyPriorityFailover RoutingStrategy = "priority_failover" + RoutingStrategyWeightedRoundRobin RoutingStrategy = "weighted_round_robin" +) + +func normalizeRoutingStrategy(strategy RoutingStrategy) RoutingStrategy { + return RoutingStrategy(strings.ToLower(strings.TrimSpace(string(strategy)))) +} + +func ResolveRoutingStrategy(strategy RoutingStrategy) RoutingStrategy { + strategy = normalizeRoutingStrategy(strategy) + if strategy == "" { + return RoutingStrategyPriorityFailover + } + return strategy +} + +func (s RoutingStrategy) Valid() bool { + switch normalizeRoutingStrategy(s) { + case RoutingStrategyPriorityFailover, RoutingStrategyWeightedRoundRobin: + return true + default: + return false + } +} + +// RoutingConfig holds canonical model pool routing configuration. +type RoutingConfig struct { + Defaults RoutingDefaultsConfig `yaml:"defaults"` + ModelPools map[string]ModelPoolConfig `yaml:"model_pools"` +} + +// RoutingDefaultsConfig holds default routing behavior for canonical pools. +type RoutingDefaultsConfig struct { + Strategy RoutingStrategy `yaml:"strategy"` + SessionAffinity bool `yaml:"session_affinity"` + SessionAffinityTTL time.Duration `yaml:"session_affinity_ttl"` + Failover RoutingFailoverConfig `yaml:"failover"` +} + +// RoutingFailoverConfig controls fallback between candidates within the same pool. +type RoutingFailoverConfig struct { + Enabled bool `yaml:"enabled"` + MaxAttempts int `yaml:"max_attempts"` + RetryOnStatuses []int `yaml:"retry_on_statuses"` + RetryOnModelErrors bool `yaml:"retry_on_model_errors"` +} + +// ModelPoolConfig maps one public canonical model name to concrete provider candidates. +type ModelPoolConfig struct { + Candidates []ModelPoolCandidateConfig `yaml:"candidates"` +} + +// ModelPoolCandidateConfig defines one concrete provider/model candidate. +type ModelPoolCandidateConfig struct { + Provider string `yaml:"provider"` + Model string `yaml:"model"` + Priority int `yaml:"priority"` + Weight int `yaml:"weight"` +} + +func loadRoutingConfig(cfg *RoutingConfig) error { + if cfg == nil { + return nil + } + + cfg.Defaults.Strategy = ResolveRoutingStrategy(cfg.Defaults.Strategy) + if !cfg.Defaults.Strategy.Valid() { + return fmt.Errorf("routing.defaults.strategy must be one of: priority_failover, weighted_round_robin") + } + if cfg.Defaults.SessionAffinityTTL <= 0 { + cfg.Defaults.SessionAffinityTTL = 30 * time.Minute + } + if cfg.Defaults.Failover.MaxAttempts <= 0 { + cfg.Defaults.Failover.MaxAttempts = 3 + } + if len(cfg.Defaults.Failover.RetryOnStatuses) == 0 { + cfg.Defaults.Failover.RetryOnStatuses = []int{429, 500, 502, 503, 504} + } + + if len(cfg.ModelPools) == 0 { + cfg.ModelPools = nil + return nil + } + + normalized := make(map[string]ModelPoolConfig, len(cfg.ModelPools)) + keys := make([]string, 0, len(cfg.ModelPools)) + for key := range cfg.ModelPools { + keys = append(keys, key) + } + sort.Strings(keys) + + for _, key := range keys { + trimmedKey := strings.TrimSpace(key) + if trimmedKey == "" { + return fmt.Errorf("routing.model_pools: model key cannot be empty") + } + if _, exists := normalized[trimmedKey]; exists { + return fmt.Errorf("routing.model_pools: duplicate model key after trimming: %q", trimmedKey) + } + pool := cfg.ModelPools[key] + if len(pool.Candidates) == 0 { + return fmt.Errorf("routing.model_pools[%q]: at least one candidate is required", trimmedKey) + } + + seenCandidates := make(map[string]struct{}, len(pool.Candidates)) + normalizedCandidates := make([]ModelPoolCandidateConfig, 0, len(pool.Candidates)) + for idx, candidate := range pool.Candidates { + candidate.Provider = strings.TrimSpace(candidate.Provider) + candidate.Model = strings.TrimSpace(candidate.Model) + if candidate.Provider == "" { + return fmt.Errorf("routing.model_pools[%q].candidates[%d].provider is required", trimmedKey, idx) + } + if candidate.Model == "" { + return fmt.Errorf("routing.model_pools[%q].candidates[%d].model is required", trimmedKey, idx) + } + candidateKey := candidate.Provider + "/" + candidate.Model + if _, exists := seenCandidates[candidateKey]; exists { + return fmt.Errorf("routing.model_pools[%q]: duplicate candidate %q", trimmedKey, candidateKey) + } + seenCandidates[candidateKey] = struct{}{} + + switch cfg.Defaults.Strategy { + case RoutingStrategyPriorityFailover: + if candidate.Priority <= 0 { + return fmt.Errorf("routing.model_pools[%q].candidates[%d].priority must be > 0 for priority_failover", trimmedKey, idx) + } + case RoutingStrategyWeightedRoundRobin: + if candidate.Weight <= 0 { + return fmt.Errorf("routing.model_pools[%q].candidates[%d].weight must be > 0 for weighted_round_robin", trimmedKey, idx) + } + } + + normalizedCandidates = append(normalizedCandidates, candidate) + } + normalized[trimmedKey] = ModelPoolConfig{Candidates: normalizedCandidates} + } + + cfg.ModelPools = normalized + return nil +} diff --git a/docs/features/canonical-routing.mdx b/docs/features/canonical-routing.mdx new file mode 100644 index 00000000..ab688c49 --- /dev/null +++ b/docs/features/canonical-routing.mdx @@ -0,0 +1,333 @@ +--- +title: "Canonical Routing" +description: "Route one public model name to multiple concrete provider candidates with priority failover, weighted distribution, session affinity, and admin controls." +icon: "git-branch" +keywords: ["routing", "failover", "session affinity", "canonical models", "model pools"] +--- + +## Overview + +GoModel can now expose one **stable public model name** while routing requests to +one or more **exact provider/model candidates** behind the scenes. + +This is useful when: + +- multiple named provider accounts expose the same logical model under slightly different IDs; +- one provider should be the normal primary and another should be a standby; +- traffic should be distributed across equivalent backends; +- operators need to disable a provider, a canonical model, or a single candidate without changing client requests. + +## What This Feature Adds + +Canonical routing introduces a new `routing` config block with: + +- per-pool routing strategies; +- provider/model candidate lists; +- intra-pool failover rules; +- session affinity for repeated requests; +- runtime-aware candidate filtering; +- admin endpoints and dashboard controls for provider, canonical model, and candidate state. + +Clients keep sending the canonical model name. GoModel resolves it to the best +currently eligible provider/model candidate. + +## Configuration + +### Top-level routing block + +```yaml +routing: + defaults: + strategy: "priority_failover" + session_affinity: true + session_affinity_ttl: 30m + failover: + enabled: true + max_attempts: 3 + retry_on_statuses: [429, 500, 502, 503, 504] + retry_on_model_errors: true +``` + +### Available defaults + +#### `routing.defaults.strategy` + +Controls how GoModel chooses the initial candidate inside each canonical pool. + +Supported values: + +- `priority_failover` +- `weighted_round_robin` + +`priority_failover` always prefers the candidate with the lowest `priority` +value. `weighted_round_robin` distributes requests by `weight`. + +#### `routing.defaults.session_affinity` + +When enabled, repeated requests for the same canonical model and the same +request scope stay pinned to the same candidate while that candidate remains +eligible. + +GoModel currently derives the affinity key from: + +1. `user_path`, when present; +2. request ID as a fallback when no `user_path` is available. + +#### `routing.defaults.session_affinity_ttl` + +How long an affinity binding stays valid. + +If the pinned candidate becomes unavailable, unhealthy, or manually disabled, +GoModel reselects a new candidate and refreshes the binding. + +#### `routing.defaults.failover.enabled` + +Enables failover to the next candidate in the same canonical pool. + +#### `routing.defaults.failover.max_attempts` + +Total number of attempts allowed within one canonical pool, including the first +candidate. + +#### `routing.defaults.failover.retry_on_statuses` + +HTTP statuses that qualify for retry/failover inside the same pool. + +#### `routing.defaults.failover.retry_on_model_errors` + +Also fail over on model-unavailable, model-not-found, and model-unsupported +style errors. + +## Model pools + +Canonical names are declared explicitly in `routing.model_pools`. + +```yaml +routing: + defaults: + strategy: "priority_failover" + session_affinity: true + session_affinity_ttl: 30m + failover: + enabled: true + max_attempts: 3 + retry_on_statuses: [429, 500, 502, 503, 504] + retry_on_model_errors: true + + model_pools: + claude-sonnet-4-6: + candidates: + - provider: anthropic_primary + model: claude-sonnet-4-6 + priority: 1 + - provider: anthropic_backup + model: claude-sonnet-4-6-20250929 + priority: 2 +``` + +### Candidate fields + +Each candidate supports: + +- `provider`: configured provider instance name from `providers:` +- `model`: exact provider-facing model ID +- `priority`: required for `priority_failover` +- `weight`: required for `weighted_round_robin` + +## Weighted distribution example + +```yaml +routing: + defaults: + strategy: "weighted_round_robin" + session_affinity: true + session_affinity_ttl: 30m + + model_pools: + claude-opus-4-7: + candidates: + - provider: anthropic_primary + model: claude-opus-4-7 + weight: 10 + priority: 1 + - provider: anthropic_backup + model: claude-opus-4-7 + weight: 6 + priority: 2 +``` + +Higher `weight` gets more traffic. `priority` still acts as a deterministic +secondary ordering input. + +## Provider inventories and configured model lists + +Canonical routing works best when provider inventories are explicit and stable. + +`models.configured_provider_models_mode` still matters with canonical pools: + +- `fallback`: use configured provider model lists only when provider `/models` is unavailable or empty; +- `allowlist`: expose only configured models and skip provider `/models` for providers that declare a list. + +This matters because pool candidates must reference exact provider model IDs +that exist either in the live provider inventory or in `providers..models`. + +## Runtime eligibility rules + +GoModel now evaluates candidates using both **manual state** and **runtime +health**. + +### Manual state + +A candidate may become ineligible because: + +- its provider was disabled manually; +- the canonical model was disabled manually; +- the candidate itself was disabled manually. + +### Runtime state + +GoModel classifies provider runtime as: + +- `healthy` +- `degraded` +- `unhealthy` + +Routing behavior: + +- `healthy`: eligible +- `degraded`: still eligible, but marked degraded +- `unhealthy`: removed from effective selection + +## `/v1/models` behavior + +`GET /v1/models` now reflects the same effective routing policy. + +For each canonical pool, GoModel exposes the model only when at least one +candidate is effectively eligible. + +The model entry is derived from the **effective selected candidate**, not simply +from the first configured candidate. + +## Admin API and dashboard + +This change adds operational state and routing visibility to the admin surface. + +### Admin endpoints + +- `GET /admin/routing-state` +- `PUT /admin/routing-state` +- `DELETE /admin/routing-state` +- `GET /admin/routing/model-pools` + +### What the dashboard now shows + +For each candidate, the dashboard distinguishes: + +- **Config Primary**: the candidate preferred by pool configuration; +- **Effective Candidate**: the candidate currently selected by the live routing decision. + +It also shows: + +- canonical model status; +- candidate status; +- runtime degradation; +- blocked candidates and their reasons; +- provider/canonical/candidate enable-disable controls. + +## Request observability + +Canonical routing now records structured routing metadata during request +resolution and failover. + +Available fields include: + +- requested model +- canonical model +- routing strategy +- config primary candidate +- effective candidate +- selected provider name +- selected exact model +- blocked candidates +- failover usage +- fallback target + +When a failover happens inside a canonical pool, GoModel updates the request +resolution so downstream logging and diagnostics can tell which candidate was +actually used. + +## Relationship to legacy fallback config + +The legacy `fallback` block still exists and remains useful for model-level +fallback outside canonical pools. + +Canonical routing is different: + +- it is driven by explicit `routing.model_pools`; +- it selects from candidates inside one canonical pool; +- it applies pool-aware failover and session affinity; +- it exposes pool state through the admin API and dashboard. + +## Recommended rollout + +1. Define named providers under `providers:`. +2. Declare exact model IDs for each provider when needed. +3. Create one canonical pool per public model name. +4. Start with `priority_failover` for predictable behavior. +5. Enable `session_affinity` if repeated scoped requests should stay pinned. +6. Use the admin dashboard to verify effective candidates and blocked reasons. + +## Example complete config + +```yaml +models: + configured_provider_models_mode: "allowlist" + +routing: + defaults: + strategy: "priority_failover" + session_affinity: true + session_affinity_ttl: 30m + failover: + enabled: true + max_attempts: 3 + retry_on_statuses: [429, 500, 502, 503, 504] + retry_on_model_errors: true + + model_pools: + claude-sonnet-4-6: + candidates: + - provider: anthropic_primary + model: claude-sonnet-4-6 + priority: 1 + - provider: anthropic_backup + model: claude-sonnet-4-6-20250929 + priority: 2 + +providers: + anthropic_primary: + type: anthropic + api_key: "${ANTHROPIC_PRIMARY_API_KEY}" + models: + - claude-sonnet-4-6 + + anthropic_backup: + type: anthropic + api_key: "${ANTHROPIC_BACKUP_API_KEY}" + models: + - claude-sonnet-4-6-20250929 +``` + +## Summary + +This commit turns canonical model routing into a complete operator-facing +feature with: + +- explicit canonical pools; +- priority or weighted candidate selection; +- session affinity; +- runtime-aware routing eligibility; +- intra-pool failover; +- `/v1/models` alignment with effective routing; +- admin/dashboard controls and visibility; +- structured routing and failover metadata. diff --git a/internal/admin/dashboard/static/js/dashboard.js b/internal/admin/dashboard/static/js/dashboard.js index 3fe33724..937d0b9f 100644 --- a/internal/admin/dashboard/static/js/dashboard.js +++ b/internal/admin/dashboard/static/js/dashboard.js @@ -605,6 +605,12 @@ function dashboard() { if (typeof this.fetchModelOverrides === "function") { requests.push(this.fetchModelOverrides()); } + if (typeof this.fetchRoutingState === "function") { + requests.push(this.fetchRoutingState()); + } + if (typeof this.fetchRoutingPools === "function") { + requests.push(this.fetchRoutingPools()); + } if (typeof this.fetchModelPricingOverrides === "function") { requests.push(this.fetchModelPricingOverrides()); } diff --git a/internal/admin/dashboard/static/js/modules/aliases.js b/internal/admin/dashboard/static/js/modules/aliases.js index b0b13840..6ebd8bd9 100644 --- a/internal/admin/dashboard/static/js/modules/aliases.js +++ b/internal/admin/dashboard/static/js/modules/aliases.js @@ -5,6 +5,9 @@ aliasesAvailable: true, modelOverridesAvailable: true, modelOverrideViews: [], + routingStateViews: [], + routingPools: [], + routingStateAvailable: true, displayModels: [], aliasLoading: false, aliasError: '', @@ -36,6 +39,16 @@ }, buildDisplayModels() { + const routingCandidates = new Map(); + for (const pool of this.routingPools || []) { + const canonical = String(pool && pool.canonical_model || '').trim(); + const candidates = Array.isArray(pool && pool.candidates) ? pool.candidates : []; + for (const candidate of candidates) { + const key = String(candidate && candidate.provider_name || '').trim() + '/' + String(candidate && candidate.model || '').trim(); + if (!key || !canonical) continue; + routingCandidates.set(key, { canonical_model: canonical, routing_state: candidate, pool }); + } + } const rows = this.models.map((model) => ({ key: 'model:' + this.qualifiedModelName(model), display_name: this.qualifiedModelName(model), @@ -49,7 +62,20 @@ kind_badge: '', masking_alias: null, alias_state_class: '', - alias_state_text: '' + alias_state_text: '', + canonical_model: (routingCandidates.get(this.qualifiedModelName(model)) || {}).canonical_model || '', + routing_state: (routingCandidates.get(this.qualifiedModelName(model)) || {}).routing_state || null, + routing_pool: (routingCandidates.get(this.qualifiedModelName(model)) || {}).pool || null, + canonical_enabled: (routingCandidates.get(this.qualifiedModelName(model)) || {}).pool ? ((routingCandidates.get(this.qualifiedModelName(model)) || {}).pool.enabled !== false) : true, + canonical_status: ((routingCandidates.get(this.qualifiedModelName(model)) || {}).pool || {}).status || '', + canonical_reason: ((routingCandidates.get(this.qualifiedModelName(model)) || {}).pool || {}).status_reason || '', + routing_strategy: ((routingCandidates.get(this.qualifiedModelName(model)) || {}).pool || {}).strategy || '', + candidate_priority: (((routingCandidates.get(this.qualifiedModelName(model)) || {}).routing_state || {}).priority ?? null), + candidate_weight: (((routingCandidates.get(this.qualifiedModelName(model)) || {}).routing_state || {}).weight ?? null), + is_config_primary: Boolean(((routingCandidates.get(this.qualifiedModelName(model)) || {}).routing_state || {}).is_config_primary), + is_effective_candidate: Boolean(((routingCandidates.get(this.qualifiedModelName(model)) || {}).routing_state || {}).is_effective_candidate), + effective_candidate: (((routingCandidates.get(this.qualifiedModelName(model)) || {}).pool || {}).effective_candidate || ''), + config_primary_candidate: (((routingCandidates.get(this.qualifiedModelName(model)) || {}).pool || {}).config_primary_candidate || '') })); if (!this.aliasesAvailable) { @@ -187,6 +213,58 @@ } }, + async fetchRoutingState() { + try { + const request = this.adminRequestOptions(); + const res = await fetch('/admin/routing-state', request); + if (res.status === 503) { + this.routingStateAvailable = false; + this.routingStateViews = []; + this.routingPools = []; + this.syncDisplayModels(); + return; + } + const handled = this.handleFetchResponse(res, 'routing state', request); + if (typeof this.isStaleAuthFetchResult === 'function' && this.isStaleAuthFetchResult(handled)) { + return; + } + if (!handled) { + this.routingStateViews = []; + this.routingPools = []; + this.syncDisplayModels(); + return; + } + this.routingStateAvailable = true; + const payload = await res.json(); + this.routingStateViews = Array.isArray(payload) ? payload : []; + } catch (e) { + console.error('Failed to fetch routing state:', e); + this.routingStateViews = []; + } + }, + + async fetchRoutingPools() { + try { + const request = this.adminRequestOptions(); + const res = await fetch('/admin/routing/model-pools', request); + const handled = this.handleFetchResponse(res, 'routing model pools', request); + if (typeof this.isStaleAuthFetchResult === 'function' && this.isStaleAuthFetchResult(handled)) { + return; + } + if (!handled) { + this.routingPools = []; + this.syncDisplayModels(); + return; + } + const payload = await res.json(); + this.routingPools = Array.isArray(payload) ? payload : []; + this.syncDisplayModels(); + } catch (e) { + console.error('Failed to fetch routing pools:', e); + this.routingPools = []; + } + }, + async fetchModelOverrides() { this.modelOverrideError = ''; try { @@ -263,9 +341,23 @@ return Array.from(groups.values()) .map((group) => { const access = this.providerGroupAccess(group.provider_name, group.provider_type, overridesBySelector); + const providerRoutingEnabled = this.providerRoutingEnabled(group.provider_name); + const seenCanonicals = new Set(); + group.rows = group.rows.map((row) => { + const canonical = String(row && row.canonical_model || '').trim(); + const showCanonicalControls = canonical && !seenCanonicals.has(canonical); + if (canonical) { + seenCanonicals.add(canonical); + } + return { + ...row, + show_canonical_controls: Boolean(showCanonicalControls) + }; + }); return { ...group, access, + provider_routing_enabled: providerRoutingEnabled, access_summary: this.modelAccessSummary(access), item_count_label: this.providerGroupItemCountLabel(group.rows) }; @@ -383,6 +475,60 @@ }; }, + providerRoutingEnabled(providerName) { + const normalized = String(providerName || '').trim(); + if (!normalized) return true; + for (const entry of this.routingStateViews || []) { + if (String(entry && entry.kind || '').trim() === 'provider' && String(entry && entry.provider_name || '').trim() === normalized) { + return entry.enabled !== false; + } + } + return true; + }, + + async submitRoutingStateChange(payload) { + const request = this.adminRequestOptions({ method: 'PUT', body: JSON.stringify(payload) }); + const res = await fetch('/admin/routing-state', request); + const handled = this.handleFetchResponse(res, 'routing state update', request); + if (typeof this.isStaleAuthFetchResult === 'function' && this.isStaleAuthFetchResult(handled)) { + return false; + } + if (!handled) { + return false; + } + await Promise.all([this.fetchRoutingState(), this.fetchRoutingPools()]); + return true; + }, + + async toggleProviderEnabled(group) { + if (!group || !group.provider_name) return; + await this.submitRoutingStateChange({ + kind: 'provider', + provider_name: group.provider_name, + enabled: !group.provider_routing_enabled + }); + }, + + async togglePoolCandidateEnabled(row) { + if (!row || !row.provider_name || !row.model || !row.model.id) return; + const enabled = !(row.routing_state && row.routing_state.candidate_enabled === false); + await this.submitRoutingStateChange({ + kind: 'pool_candidate', + provider_name: row.provider_name, + model: row.model.id, + enabled: !enabled + }); + }, + + async toggleCanonicalModelEnabled(row) { + if (!row || !row.canonical_model) return; + await this.submitRoutingStateChange({ + kind: 'canonical_model', + canonical_model: row.canonical_model, + enabled: !(row.canonical_enabled === false) + }); + }, + providerGroupItemCountLabel(rows) { const safeRows = Array.isArray(rows) ? rows : []; const modelCount = safeRows.filter((row) => row && !row.is_alias).length; diff --git a/internal/admin/dashboard/static/js/modules/aliases.test.cjs b/internal/admin/dashboard/static/js/modules/aliases.test.cjs index ac4f017c..2cb13a82 100644 --- a/internal/admin/dashboard/static/js/modules/aliases.test.cjs +++ b/internal/admin/dashboard/static/js/modules/aliases.test.cjs @@ -201,6 +201,41 @@ test('alias mutations send alias name in JSON body', async() => { }); }); +test('buildDisplayModels marks config primary and effective candidate from routing pools', () => { + const module = createAliasesModule(); + module.models = [{ + provider_name: 'anthropic_b', + provider_type: 'anthropic', + model: { + id: 'claude-sonnet-4-6', + object: 'model', + owned_by: 'anthropic', + metadata: { modes: ['chat'], categories: ['text_generation'] } + } + }]; + module.routingPools = [{ + canonical_model: 'claude-sonnet-4-6', + strategy: 'priority_failover', + effective_candidate: 'anthropic_b/claude-sonnet-4-6', + config_primary_candidate: 'anthropic_b/claude-sonnet-4-6', + candidates: [{ + provider_name: 'anthropic_b', + model: 'claude-sonnet-4-6', + priority: 1, + is_config_primary: true, + is_effective_candidate: true + }] + }]; + module.aliases = []; + module.aliasesAvailable = true; + module.syncDisplayModels(); + + assert.equal(module.displayModels.length, 1); + assert.equal(module.displayModels[0].is_config_primary, true); + assert.equal(module.displayModels[0].is_effective_candidate, true); + assert.equal(module.displayModels[0].effective_candidate, 'anthropic_b/claude-sonnet-4-6'); +}); + test('filteredDisplayModelGroups groups rows by provider_name and applies provider-wide overrides', () => { const module = createAliasesModule(); module.models = [ diff --git a/internal/admin/dashboard/templates/model-table-body.html b/internal/admin/dashboard/templates/model-table-body.html index e3b80304..463dae8f 100644 --- a/internal/admin/dashboard/templates/model-table-body.html +++ b/internal/admin/dashboard/templates/model-table-body.html @@ -22,6 +22,10 @@ @click="openProviderPricingOverrideEdit(group)"> {{template "dollar-icon"}} +