Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions config/config.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,72 @@ fallback:
"claude-sonnet-4":
mode: "off" # disable fallback just for this model

routing:
# Canonical model pool routing.
#
# Use this section when you want clients to call a stable public model name
# (for example: "claude-sonnet-4-6") while the gateway maps that name to one
# or more exact provider/model candidates.
#
# Typical use cases:
# - multiple named accounts expose the same logical model with different IDs;
# - one provider/account should be primary and another should be a standby;
# - traffic should be distributed across equivalent candidates;
# - operators need manual enable/disable control per provider, canonical model,
# or pool candidate through the admin API/dashboard.
defaults:
strategy: "priority_failover" # "priority_failover" = always prefer the lowest priority candidate; "weighted_round_robin" = distribute requests by weight
session_affinity: true # parsed sticky-routing setting for canonical pools; keep enabled if you want future/runtime affinity support to pin repeated requests to the same candidate
session_affinity_ttl: 30m # parsed TTL for session affinity bindings
failover:
enabled: true # when true, eligible errors can move the request to the next candidate in the same canonical pool
max_attempts: 3 # total attempts across pool candidates, including the first candidate
retry_on_statuses: [429, 500, 502, 503, 504] # provider statuses that qualify for retry/failover
retry_on_model_errors: true # also retry/fail over on model-unavailable / model-not-found / model-unsupported style errors

# model_pools map one public canonical model name to one or more exact provider
# candidates. The canonical key is what clients send in requests to the gateway.
#
# Example A: primary/backup routing.
# - Use strategy: "priority_failover"
# - Lower priority number wins during normal routing
# - Other candidates are used only when failover is triggered
#
# model_pools:
# claude-sonnet-4-6:
# candidates:
# - provider: anthropic_primary
# model: claude-sonnet-4-6
# priority: 1
# - provider: anthropic_backup
# model: claude-sonnet-4-6-20250929
# priority: 2
#
# Example B: weighted distribution across equivalent candidates.
# - Use strategy: "weighted_round_robin"
# - Higher weight receives more traffic
# - priority is still useful as a deterministic tie-breaker
#
# model_pools:
# claude-opus-4-7:
# candidates:
# - provider: anthropic_primary
# model: claude-opus-4-7
# weight: 10
# priority: 1
# - provider: anthropic_backup
# model: claude-opus-4-7
# weight: 6
# priority: 2
#
# Notes:
# - provider must match the named provider key under `providers:`.
# - model must be the exact provider-facing model ID.
# - canonical names are declared explicitly; the gateway does not infer that
# dated and non-dated model IDs are equivalent.
# - if a canonical model has no pool entry, normal alias/provider resolution
# continues to apply.

providers:
openai:
type: openai
Expand Down
17 changes: 17 additions & 0 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ type Config struct {
Admin AdminConfig `yaml:"admin"`
Guardrails GuardrailsConfig `yaml:"guardrails"`
Fallback FallbackConfig `yaml:"fallback"`
Routing RoutingConfig `yaml:"routing"`
Workflows WorkflowsConfig `yaml:"workflows"`
Resilience ResilienceConfig `yaml:"resilience"`
}
Expand Down Expand Up @@ -115,6 +116,19 @@ func buildDefaultConfig() *Config {
Fallback: FallbackConfig{
DefaultMode: FallbackModeManual,
},
Routing: RoutingConfig{
Defaults: RoutingDefaultsConfig{
Strategy: RoutingStrategyPriorityFailover,
SessionAffinity: true,
SessionAffinityTTL: 30 * time.Minute,
Failover: RoutingFailoverConfig{
Enabled: true,
MaxAttempts: 3,
RetryOnStatuses: []int{429, 500, 502, 503, 504},
RetryOnModelErrors: true,
},
},
},
Workflows: WorkflowsConfig{
RefreshInterval: time.Minute,
},
Expand Down Expand Up @@ -180,6 +194,9 @@ func Load() (*LoadResult, error) {
if err := loadFallbackConfig(&cfg.Fallback); err != nil {
return nil, err
}
if err := loadRoutingConfig(&cfg.Routing); err != nil {
return nil, err
}

// When no model cache backend was specified at all, default to local.
if cfg.Cache.Model.Local == nil && cfg.Cache.Model.Redis == nil {
Expand Down
16 changes: 9 additions & 7 deletions config/config_example_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,14 @@ func TestLoad_FromEnvironment(t *testing.T) {
_ = os.Unsetenv("PORT")
}()

result, err := Load()
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
withTempDir(t, func(string) {
result, err := Load()
if err != nil {
t.Fatalf("unexpected error: %v", err)
}

if result.Config.Server.Port != "9090" {
t.Errorf("expected port 9090, got %s", result.Config.Server.Port)
}
if result.Config.Server.Port != "9090" {
t.Errorf("expected port 9090, got %s", result.Config.Server.Port)
}
})
}
78 changes: 78 additions & 0 deletions config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,9 @@ func TestBuildDefaultConfig(t *testing.T) {
if cfg.Storage.Type != "sqlite" {
t.Errorf("expected Storage.Type=sqlite, got %s", cfg.Storage.Type)
}
if cfg.Routing.Defaults.Strategy != RoutingStrategyPriorityFailover {
t.Errorf("expected Routing.Defaults.Strategy=%q, got %q", RoutingStrategyPriorityFailover, cfg.Routing.Defaults.Strategy)
}
if cfg.Storage.SQLite.Path != "data/gomodel.db" {
t.Errorf("expected Storage.SQLite.Path=data/gomodel.db, got %s", cfg.Storage.SQLite.Path)
}
Expand Down Expand Up @@ -714,6 +717,81 @@ fallback:
})
}

func TestLoad_RoutingConfigYAML(t *testing.T) {
clearAllConfigEnvVars(t)

withTempDir(t, func(dir string) {
yaml := `
routing:
defaults:
strategy: weighted_round_robin
session_affinity: false
session_affinity_ttl: 45m
failover:
enabled: true
max_attempts: 5
retry_on_statuses: [429, 503]
retry_on_model_errors: false
model_pools:
claude-sonnet-4-6:
candidates:
- provider: anthropic_b
model: claude-sonnet-4-6
weight: 10
priority: 1
- provider: anthropic_a
model: claude-sonnet-4-6-20250929
weight: 8
priority: 2
`
if err := os.WriteFile(filepath.Join(dir, "config.yaml"), []byte(yaml), 0644); err != nil {
t.Fatalf("Failed to write config.yaml: %v", err)
}

result, err := Load()
if err != nil {
t.Fatalf("Load() failed: %v", err)
}
cfg := result.Config
if cfg.Routing.Defaults.Strategy != RoutingStrategyWeightedRoundRobin {
t.Fatalf("Routing.Defaults.Strategy = %q, want %q", cfg.Routing.Defaults.Strategy, RoutingStrategyWeightedRoundRobin)
}
if cfg.Routing.Defaults.SessionAffinity {
t.Fatal("expected SessionAffinity=false from YAML")
}
if cfg.Routing.Defaults.SessionAffinityTTL != 45*time.Minute {
t.Fatalf("SessionAffinityTTL = %s, want 45m", cfg.Routing.Defaults.SessionAffinityTTL)
}
pool := cfg.Routing.ModelPools["claude-sonnet-4-6"]
if len(pool.Candidates) != 2 {
t.Fatalf("len(pool.Candidates) = %d, want 2", len(pool.Candidates))
}
})
Comment on lines +720 to +769
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Assert loaded failover fields in the routing YAML test.

Line 730 through Line 735 sets failover inputs, but the test only validates strategy, affinity, TTL, and candidate count. A regression in failover parsing could pass unnoticed.

✅ Suggested test assertions
 		if cfg.Routing.Defaults.SessionAffinityTTL != 45*time.Minute {
 			t.Fatalf("SessionAffinityTTL = %s, want 45m", cfg.Routing.Defaults.SessionAffinityTTL)
 		}
+		failover := cfg.Routing.Defaults.Failover
+		if !failover.Enabled {
+			t.Fatal("expected Failover.Enabled=true from YAML")
+		}
+		if failover.MaxAttempts != 5 {
+			t.Fatalf("Failover.MaxAttempts = %d, want 5", failover.MaxAttempts)
+		}
+		if !reflect.DeepEqual(failover.RetryOnStatuses, []int{429, 503}) {
+			t.Fatalf("Failover.RetryOnStatuses = %v, want [429 503]", failover.RetryOnStatuses)
+		}
+		if failover.RetryOnModelErrors {
+			t.Fatal("expected Failover.RetryOnModelErrors=false from YAML")
+		}
 		pool := cfg.Routing.ModelPools["claude-sonnet-4-6"]

As per coding guidelines: “Add or update tests for behavior changes to cover request translation, response normalization, error handling, default configuration, and provider-specific parameter mapping.”

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@config/config_test.go` around lines 720 - 769, The test
TestLoad_RoutingConfigYAML is missing assertions for the failover block parsed
into cfg.Routing.Defaults.Failover; add assertions that
cfg.Routing.Defaults.Failover.Enabled == true,
cfg.Routing.Defaults.Failover.MaxAttempts == 5,
cfg.Routing.Defaults.Failover.RetryOnStatuses contains 429 and 503 (and has
expected length), and cfg.Routing.Defaults.Failover.RetryOnModelErrors == false
so regressions in failover parsing are caught.

}

func TestLoad_InvalidRoutingStrategy(t *testing.T) {
clearAllConfigEnvVars(t)

withTempDir(t, func(dir string) {
yaml := `
routing:
defaults:
strategy: invalid
`
if err := os.WriteFile(filepath.Join(dir, "config.yaml"), []byte(yaml), 0644); err != nil {
t.Fatalf("Failed to write config.yaml: %v", err)
}

_, err := Load()
if err == nil {
t.Fatal("expected Load() to fail for invalid routing strategy")
}
if !strings.Contains(err.Error(), "routing.defaults.strategy must be one of") {
t.Fatalf("Load() error = %v, want routing strategy validation error", err)
}
})
}

func TestLoad_InvalidConfiguredProviderModelsMode(t *testing.T) {
clearAllConfigEnvVars(t)

Expand Down
152 changes: 152 additions & 0 deletions config/routing.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
package config

import (
"fmt"
"sort"
"strings"
"time"
)

type RoutingStrategy string

const (
RoutingStrategyPriorityFailover RoutingStrategy = "priority_failover"
RoutingStrategyWeightedRoundRobin RoutingStrategy = "weighted_round_robin"
)

func normalizeRoutingStrategy(strategy RoutingStrategy) RoutingStrategy {
return RoutingStrategy(strings.ToLower(strings.TrimSpace(string(strategy))))
}

func ResolveRoutingStrategy(strategy RoutingStrategy) RoutingStrategy {
strategy = normalizeRoutingStrategy(strategy)
if strategy == "" {
return RoutingStrategyPriorityFailover
}
return strategy
}

func (s RoutingStrategy) Valid() bool {
switch normalizeRoutingStrategy(s) {
case RoutingStrategyPriorityFailover, RoutingStrategyWeightedRoundRobin:
return true
default:
return false
}
}

// RoutingConfig holds canonical model pool routing configuration.
type RoutingConfig struct {
Defaults RoutingDefaultsConfig `yaml:"defaults"`
ModelPools map[string]ModelPoolConfig `yaml:"model_pools"`
}

// RoutingDefaultsConfig holds default routing behavior for canonical pools.
type RoutingDefaultsConfig struct {
Strategy RoutingStrategy `yaml:"strategy"`
SessionAffinity bool `yaml:"session_affinity"`
SessionAffinityTTL time.Duration `yaml:"session_affinity_ttl"`
Failover RoutingFailoverConfig `yaml:"failover"`
}

// RoutingFailoverConfig controls fallback between candidates within the same pool.
type RoutingFailoverConfig struct {
Enabled bool `yaml:"enabled"`
MaxAttempts int `yaml:"max_attempts"`
RetryOnStatuses []int `yaml:"retry_on_statuses"`
RetryOnModelErrors bool `yaml:"retry_on_model_errors"`
}

// ModelPoolConfig maps one public canonical model name to concrete provider candidates.
type ModelPoolConfig struct {
Candidates []ModelPoolCandidateConfig `yaml:"candidates"`
}

// ModelPoolCandidateConfig defines one concrete provider/model candidate.
type ModelPoolCandidateConfig struct {
Provider string `yaml:"provider"`
Model string `yaml:"model"`
Priority int `yaml:"priority"`
Weight int `yaml:"weight"`
}

func loadRoutingConfig(cfg *RoutingConfig) error {
if cfg == nil {
return nil
}

cfg.Defaults.Strategy = ResolveRoutingStrategy(cfg.Defaults.Strategy)
if !cfg.Defaults.Strategy.Valid() {
return fmt.Errorf("routing.defaults.strategy must be one of: priority_failover, weighted_round_robin")
}
if cfg.Defaults.SessionAffinityTTL <= 0 {
cfg.Defaults.SessionAffinityTTL = 30 * time.Minute
}
Comment on lines +75 to +84
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Partial defaults disable failover

The loader restores durations, attempts, and retry statuses when they are omitted, but it does not restore boolean defaults. If a config file includes a partial routing.defaults block, YAML zero values can turn session_affinity, failover.enabled, or retry_on_model_errors from their documented defaults of true into false, so pool failover or affinity can be silently disabled by an otherwise unrelated routing setting.

Context Used: CLAUDE.md (source)

if cfg.Defaults.Failover.MaxAttempts <= 0 {
cfg.Defaults.Failover.MaxAttempts = 3
}
if len(cfg.Defaults.Failover.RetryOnStatuses) == 0 {
cfg.Defaults.Failover.RetryOnStatuses = []int{429, 500, 502, 503, 504}
}
Comment on lines +88 to +90
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Validate retry_on_statuses values before accepting config.

Invalid HTTP codes (e.g., 0, 700, negative values) are currently accepted and can silently break failover matching at runtime. Add range validation (100..599) during config load.

Proposed fix
 	if len(cfg.Defaults.Failover.RetryOnStatuses) == 0 {
 		cfg.Defaults.Failover.RetryOnStatuses = []int{429, 500, 502, 503, 504}
 	}
+	for idx, status := range cfg.Defaults.Failover.RetryOnStatuses {
+		if status < 100 || status > 599 {
+			return fmt.Errorf("routing.defaults.failover.retry_on_statuses[%d] must be a valid HTTP status code (100-599)", idx)
+		}
+	}
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@config/routing.go` around lines 88 - 90, Validate and reject or sanitize
invalid HTTP status codes in cfg.Defaults.Failover.RetryOnStatuses during config
load: in the routing config initialization (the code that sets
cfg.Defaults.Failover.RetryOnStatuses in config/routing.go) iterate the slice
and ensure each status is within the HTTP range 100..599, either filtering out
invalid values or returning a config validation error; update the
default-setting block that currently sets []int{429,500,502,503,504} to perform
this validation (refer to cfg.Defaults.Failover.RetryOnStatuses) so runtime
failover matching cannot receive values like 0, negative, or >599.


if len(cfg.ModelPools) == 0 {
cfg.ModelPools = nil
return nil
}

normalized := make(map[string]ModelPoolConfig, len(cfg.ModelPools))
keys := make([]string, 0, len(cfg.ModelPools))
for key := range cfg.ModelPools {
keys = append(keys, key)
}
sort.Strings(keys)

for _, key := range keys {
trimmedKey := strings.TrimSpace(key)
if trimmedKey == "" {
return fmt.Errorf("routing.model_pools: model key cannot be empty")
}
if _, exists := normalized[trimmedKey]; exists {
return fmt.Errorf("routing.model_pools: duplicate model key after trimming: %q", trimmedKey)
}
pool := cfg.ModelPools[key]
if len(pool.Candidates) == 0 {
return fmt.Errorf("routing.model_pools[%q]: at least one candidate is required", trimmedKey)
}

seenCandidates := make(map[string]struct{}, len(pool.Candidates))
normalizedCandidates := make([]ModelPoolCandidateConfig, 0, len(pool.Candidates))
for idx, candidate := range pool.Candidates {
candidate.Provider = strings.TrimSpace(candidate.Provider)
candidate.Model = strings.TrimSpace(candidate.Model)
if candidate.Provider == "" {
return fmt.Errorf("routing.model_pools[%q].candidates[%d].provider is required", trimmedKey, idx)
}
if candidate.Model == "" {
return fmt.Errorf("routing.model_pools[%q].candidates[%d].model is required", trimmedKey, idx)
}
candidateKey := candidate.Provider + "/" + candidate.Model
if _, exists := seenCandidates[candidateKey]; exists {
return fmt.Errorf("routing.model_pools[%q]: duplicate candidate %q", trimmedKey, candidateKey)
}
seenCandidates[candidateKey] = struct{}{}

switch cfg.Defaults.Strategy {
case RoutingStrategyPriorityFailover:
if candidate.Priority <= 0 {
return fmt.Errorf("routing.model_pools[%q].candidates[%d].priority must be > 0 for priority_failover", trimmedKey, idx)
}
case RoutingStrategyWeightedRoundRobin:
if candidate.Weight <= 0 {
return fmt.Errorf("routing.model_pools[%q].candidates[%d].weight must be > 0 for weighted_round_robin", trimmedKey, idx)
}
}

normalizedCandidates = append(normalizedCandidates, candidate)
}
normalized[trimmedKey] = ModelPoolConfig{Candidates: normalizedCandidates}
}

cfg.ModelPools = normalized
return nil
}
Loading