From e11ec028c477b887274ebe2accf2f02f66992868 Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Sat, 4 Apr 2026 23:30:05 -0400 Subject: [PATCH 1/5] Add regex-based instance limit overrides --- DEVELOPMENT.md | 1 + cmd/api/config/config.go | 40 +++++++++++-- cmd/api/config/config_test.go | 84 ++++++++++++++++++++++++++++ cmd/api/config/limits_patterns.go | 53 ++++++++++++++++++ config.example.darwin.yaml | 6 ++ config.example.yaml | 6 ++ lib/instances/create.go | 12 +--- lib/instances/fork.go | 3 + lib/instances/manager.go | 3 +- lib/instances/name_limits.go | 77 +++++++++++++++++++++++++ lib/instances/name_limits_test.go | 93 +++++++++++++++++++++++++++++++ lib/instances/snapshot.go | 3 + lib/providers/instance_limits.go | 89 +++++++++++++++++++++++++++++ lib/providers/providers.go | 24 +------- 14 files changed, 456 insertions(+), 38 deletions(-) create mode 100644 cmd/api/config/limits_patterns.go create mode 100644 lib/instances/name_limits.go create mode 100644 lib/instances/name_limits_test.go create mode 100644 lib/providers/instance_limits.go diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 9247aa27..0ba45de0 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -122,6 +122,7 @@ Common settings: | `metrics.resource_refresh_interval` | Refresh interval for cached resource capacity metrics | `120s` | | `limits.max_concurrent_builds` | Max concurrent image builds | `1` | | `limits.max_overlay_size` | Max overlay filesystem size | `100GB` | +| `limits.name_patterns` | Ordered regex overrides for per-instance CPU/memory/overlay limits | _(empty)_ | | `acme.email` | Email for ACME certificate registration | _(empty)_ | | `acme.dns_provider` | DNS provider for ACME challenges | _(empty)_ | | `acme.cloudflare_api_token` | Cloudflare API token | _(empty)_ | diff --git a/cmd/api/config/config.go b/cmd/api/config/config.go index ce7c90a0..4215fa39 100644 --- a/cmd/api/config/config.go +++ b/cmd/api/config/config.go @@ -153,12 +153,13 @@ type RegistryConfig struct { // LimitsConfig holds per-instance and aggregate resource limits. type LimitsConfig struct { - MaxVcpusPerInstance int `koanf:"max_vcpus_per_instance"` - MaxMemoryPerInstance string `koanf:"max_memory_per_instance"` - MaxTotalVolumeStorage string `koanf:"max_total_volume_storage"` - MaxConcurrentBuilds int `koanf:"max_concurrent_builds"` - MaxOverlaySize string `koanf:"max_overlay_size"` - MaxImageStorage float64 `koanf:"max_image_storage"` + MaxVcpusPerInstance int `koanf:"max_vcpus_per_instance"` + MaxMemoryPerInstance string `koanf:"max_memory_per_instance"` + MaxTotalVolumeStorage string `koanf:"max_total_volume_storage"` + MaxConcurrentBuilds int `koanf:"max_concurrent_builds"` + MaxOverlaySize string `koanf:"max_overlay_size"` + MaxImageStorage float64 `koanf:"max_image_storage"` + NamePatterns []NamePatternLimitsConfig `koanf:"name_patterns"` } // OversubscriptionConfig holds oversubscription ratios (1.0 = no oversubscription). @@ -361,6 +362,7 @@ func defaultConfig() *Config { MaxConcurrentBuilds: 1, MaxOverlaySize: "100GB", MaxImageStorage: 0.2, + NamePatterns: nil, }, Oversubscription: OversubscriptionConfig{ @@ -532,6 +534,24 @@ func (c *Config) Validate() error { if c.Build.Timeout <= 0 { return fmt.Errorf("build.timeout must be positive, got %d", c.Build.Timeout) } + if c.Limits.MaxVcpusPerInstance < 0 { + return fmt.Errorf("limits.max_vcpus_per_instance must be >= 0, got %d", c.Limits.MaxVcpusPerInstance) + } + if err := validateOptionalByteSize("limits.max_memory_per_instance", c.Limits.MaxMemoryPerInstance); err != nil { + return err + } + if err := validateByteSize("limits.max_overlay_size", c.Limits.MaxOverlaySize); err != nil { + return err + } + if c.Limits.MaxConcurrentBuilds <= 0 { + return fmt.Errorf("limits.max_concurrent_builds must be positive, got %d", c.Limits.MaxConcurrentBuilds) + } + if c.Limits.MaxImageStorage < 0 { + return fmt.Errorf("limits.max_image_storage must be >= 0, got %v", c.Limits.MaxImageStorage) + } + if err := validateNamePatternLimits(c.Limits.NamePatterns); err != nil { + return err + } if err := validateDuration("images.auto_delete.unused_for", c.Images.AutoDelete.UnusedFor); err != nil { return err } @@ -606,6 +626,14 @@ func validateByteSize(field string, value string) error { return nil } +func validateOptionalByteSize(field string, value string) error { + value = strings.TrimSpace(value) + if value == "" { + return nil + } + return validateByteSize(field, value) +} + func validateDuration(field string, value string) error { if strings.TrimSpace(value) == "" { return fmt.Errorf("%s must not be empty", field) diff --git a/cmd/api/config/config_test.go b/cmd/api/config/config_test.go index bb451957..a48aca00 100644 --- a/cmd/api/config/config_test.go +++ b/cmd/api/config/config_test.go @@ -288,3 +288,87 @@ func TestValidateAllowsDisabledSnapshotCompressionDefaultWithoutValidAlgorithm(t t.Fatalf("expected disabled snapshot compression default to ignore algorithm/level, got %v", err) } } + +func TestLoadParsesNamePatternLimits(t *testing.T) { + tmp := t.TempDir() + cfgPath := filepath.Join(tmp, "config.yaml") + configYAML := ` +limits: + name_patterns: + - pattern: '^prod-' + max_vcpus_per_instance: 8 + max_memory_per_instance: 64GB + - pattern: '^tiny-' + max_overlay_size: 5GB +` + if err := os.WriteFile(cfgPath, []byte(configYAML), 0600); err != nil { + t.Fatalf("write temp config: %v", err) + } + + cfg, err := Load(cfgPath) + if err != nil { + t.Fatalf("load config: %v", err) + } + + if len(cfg.Limits.NamePatterns) != 2 { + t.Fatalf("expected 2 name pattern limit entries, got %d", len(cfg.Limits.NamePatterns)) + } + if cfg.Limits.NamePatterns[0].Pattern != "^prod-" { + t.Fatalf("expected first pattern to load, got %q", cfg.Limits.NamePatterns[0].Pattern) + } + if cfg.Limits.NamePatterns[0].MaxVcpusPerInstance == nil || *cfg.Limits.NamePatterns[0].MaxVcpusPerInstance != 8 { + t.Fatalf("expected first max_vcpus_per_instance to load, got %#v", cfg.Limits.NamePatterns[0].MaxVcpusPerInstance) + } + if cfg.Limits.NamePatterns[0].MaxMemoryPerInstance == nil || *cfg.Limits.NamePatterns[0].MaxMemoryPerInstance != "64GB" { + t.Fatalf("expected first max_memory_per_instance to load, got %#v", cfg.Limits.NamePatterns[0].MaxMemoryPerInstance) + } + if cfg.Limits.NamePatterns[1].MaxOverlaySize == nil || *cfg.Limits.NamePatterns[1].MaxOverlaySize != "5GB" { + t.Fatalf("expected second max_overlay_size to load, got %#v", cfg.Limits.NamePatterns[1].MaxOverlaySize) + } +} + +func TestValidateRejectsInvalidNamePatternLimitRegex(t *testing.T) { + cfg := defaultConfig() + cfg.Limits.NamePatterns = []NamePatternLimitsConfig{ + {Pattern: "["}, + } + + err := cfg.Validate() + if err == nil || !strings.Contains(err.Error(), "limits.name_patterns[0].pattern") { + t.Fatalf("expected invalid regex validation error, got %v", err) + } +} + +func TestValidateRejectsInvalidNamePatternLimitSize(t *testing.T) { + cfg := defaultConfig() + cfg.Limits.NamePatterns = []NamePatternLimitsConfig{ + { + Pattern: "^prod-", + MaxMemoryPerInstance: strPtr("definitely-not-a-size"), + }, + } + + err := cfg.Validate() + if err == nil || !strings.Contains(err.Error(), "limits.name_patterns[0].max_memory_per_instance") { + t.Fatalf("expected invalid size validation error, got %v", err) + } +} + +func TestValidateAllowsUnlimitedNamePatternLimitSize(t *testing.T) { + cfg := defaultConfig() + cfg.Limits.NamePatterns = []NamePatternLimitsConfig{ + { + Pattern: "^prod-", + MaxMemoryPerInstance: strPtr("0"), + MaxOverlaySize: strPtr("0"), + }, + } + + if err := cfg.Validate(); err != nil { + t.Fatalf("expected zero-valued size overrides to validate, got %v", err) + } +} + +func strPtr(v string) *string { + return &v +} diff --git a/cmd/api/config/limits_patterns.go b/cmd/api/config/limits_patterns.go new file mode 100644 index 00000000..dfa926b1 --- /dev/null +++ b/cmd/api/config/limits_patterns.go @@ -0,0 +1,53 @@ +package config + +import ( + "fmt" + "regexp" + "strings" +) + +// NamePatternLimitsConfig holds per-name regex resource limit overrides. +// The first matching pattern wins. Omitted fields fall back to the global limits block. +type NamePatternLimitsConfig struct { + Pattern string `koanf:"pattern"` + MaxVcpusPerInstance *int `koanf:"max_vcpus_per_instance"` + MaxMemoryPerInstance *string `koanf:"max_memory_per_instance"` + MaxOverlaySize *string `koanf:"max_overlay_size"` +} + +func validateNamePatternLimits(patterns []NamePatternLimitsConfig) error { + for i := range patterns { + cfg := &patterns[i] + cfg.Pattern = strings.TrimSpace(cfg.Pattern) + if cfg.Pattern == "" { + return fmt.Errorf("limits.name_patterns[%d].pattern must not be empty", i) + } + if _, err := regexp.Compile(cfg.Pattern); err != nil { + return fmt.Errorf("limits.name_patterns[%d].pattern must be a valid regex, got %q: %w", i, cfg.Pattern, err) + } + if cfg.MaxVcpusPerInstance != nil && *cfg.MaxVcpusPerInstance < 0 { + return fmt.Errorf("limits.name_patterns[%d].max_vcpus_per_instance must be >= 0, got %d", i, *cfg.MaxVcpusPerInstance) + } + if cfg.MaxMemoryPerInstance != nil { + value := strings.TrimSpace(*cfg.MaxMemoryPerInstance) + if value == "" { + return fmt.Errorf("limits.name_patterns[%d].max_memory_per_instance must not be empty", i) + } + *cfg.MaxMemoryPerInstance = value + if err := validateOptionalByteSize(fmt.Sprintf("limits.name_patterns[%d].max_memory_per_instance", i), value); err != nil { + return err + } + } + if cfg.MaxOverlaySize != nil { + value := strings.TrimSpace(*cfg.MaxOverlaySize) + if value == "" { + return fmt.Errorf("limits.name_patterns[%d].max_overlay_size must not be empty", i) + } + *cfg.MaxOverlaySize = value + if err := validateOptionalByteSize(fmt.Sprintf("limits.name_patterns[%d].max_overlay_size", i), value); err != nil { + return err + } + } + } + return nil +} diff --git a/config.example.darwin.yaml b/config.example.darwin.yaml index 5b16e672..5b2ec6fc 100644 --- a/config.example.darwin.yaml +++ b/config.example.darwin.yaml @@ -115,6 +115,12 @@ limits: max_vcpus_per_instance: 4 max_memory_per_instance: 8GB # max_total_volume_storage: "" # 0 or empty = unlimited + # name_patterns: + # - pattern: '^build-' + # max_vcpus_per_instance: 8 + # - pattern: '^tiny-' + # max_memory_per_instance: 2GB + # max_overlay_size: 5GB # ============================================================================= # OpenTelemetry (optional, same as Linux) diff --git a/config.example.yaml b/config.example.yaml index 34e59d14..a76a8380 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -146,3 +146,9 @@ data_dir: /var/lib/hypeman # max_total_volume_storage: "" # 0 or empty = unlimited # max_concurrent_builds: 1 # max_overlay_size: 100GB +# name_patterns: +# - pattern: '^prod-' +# max_vcpus_per_instance: 32 +# max_memory_per_instance: 64GB +# - pattern: '^sandbox-' +# max_overlay_size: 20GB diff --git a/lib/instances/create.go b/lib/instances/create.go index 1abd4c21..4cb09cf8 100644 --- a/lib/instances/create.go +++ b/lib/instances/create.go @@ -177,22 +177,14 @@ func (m *manager) createInstance( if overlaySize == 0 { overlaySize = 10 * 1024 * 1024 * 1024 // 10GB default } - // Validate overlay size against max - if overlaySize > m.limits.MaxOverlaySize { - return nil, fmt.Errorf("overlay size %d exceeds maximum allowed size %d", overlaySize, m.limits.MaxOverlaySize) - } vcpus := req.Vcpus if vcpus == 0 { vcpus = 2 } - // Validate per-instance resource limits - if m.limits.MaxVcpusPerInstance > 0 && vcpus > m.limits.MaxVcpusPerInstance { - return nil, fmt.Errorf("vcpus %d exceeds maximum allowed %d per instance", vcpus, m.limits.MaxVcpusPerInstance) - } totalMemory := size + hotplugSize - if m.limits.MaxMemoryPerInstance > 0 && totalMemory > m.limits.MaxMemoryPerInstance { - return nil, fmt.Errorf("total memory %d (size + hotplug_size) exceeds maximum allowed %d per instance", totalMemory, m.limits.MaxMemoryPerInstance) + if err := validateResourceLimitsForName(req.Name, m.limits, overlaySize, vcpus, totalMemory); err != nil { + return nil, err } // Validate aggregate resource limits via ResourceValidator (if configured) diff --git a/lib/instances/fork.go b/lib/instances/fork.go index 2c75c94e..c9e2603b 100644 --- a/lib/instances/fork.go +++ b/lib/instances/fork.go @@ -297,6 +297,9 @@ func (m *manager) forkInstanceFromStoppedOrStandby(ctx context.Context, id strin forkMeta.IP = "" forkMeta.MAC = "" } + if err := validateResourceLimitsForName(req.Name, m.limits, forkMeta.OverlaySize, forkMeta.Vcpus, forkMeta.Size+forkMeta.HotplugSize); err != nil { + return nil, err + } if source.State == StateStandby { snapshotConfigPath := m.paths.InstanceSnapshotConfig(forkID) diff --git a/lib/instances/manager.go b/lib/instances/manager.go index 4ef8d9bf..90f5ede7 100644 --- a/lib/instances/manager.go +++ b/lib/instances/manager.go @@ -74,9 +74,10 @@ type ImageUsageRecorderSetter interface { // ResourceLimits contains configurable resource limits for instances type ResourceLimits struct { - MaxOverlaySize int64 // Maximum overlay disk size in bytes per instance + MaxOverlaySize int64 // Maximum overlay disk size in bytes per instance (0 = unlimited) MaxVcpusPerInstance int // Maximum vCPUs per instance (0 = unlimited) MaxMemoryPerInstance int64 // Maximum memory in bytes per instance (0 = unlimited) + NamePatterns []NamedResourceLimit } // ResourceValidator validates if resources can be allocated diff --git a/lib/instances/name_limits.go b/lib/instances/name_limits.go new file mode 100644 index 00000000..4c4e756b --- /dev/null +++ b/lib/instances/name_limits.go @@ -0,0 +1,77 @@ +package instances + +import ( + "fmt" + "regexp" +) + +// NamedResourceLimit applies per-instance limits to names matching Pattern. +// The first matching pattern wins, and omitted fields fall back to global limits. +type NamedResourceLimit struct { + Pattern string + re *regexp.Regexp + MaxVcpusPerInstance *int + MaxMemoryPerInstance *int64 + MaxOverlaySize *int64 +} + +func NewNamedResourceLimit(pattern string, maxVcpus *int, maxMemory *int64, maxOverlay *int64) (NamedResourceLimit, error) { + re, err := regexp.Compile(pattern) + if err != nil { + return NamedResourceLimit{}, fmt.Errorf("compile name limit regex %q: %w", pattern, err) + } + + return NamedResourceLimit{ + Pattern: pattern, + re: re, + MaxVcpusPerInstance: maxVcpus, + MaxMemoryPerInstance: maxMemory, + MaxOverlaySize: maxOverlay, + }, nil +} + +func (l NamedResourceLimit) matches(name string) bool { + return l.re != nil && l.re.MatchString(name) +} + +func (l ResourceLimits) ForName(name string) ResourceLimits { + resolved := ResourceLimits{ + MaxOverlaySize: l.MaxOverlaySize, + MaxVcpusPerInstance: l.MaxVcpusPerInstance, + MaxMemoryPerInstance: l.MaxMemoryPerInstance, + } + + for _, pattern := range l.NamePatterns { + if !pattern.matches(name) { + continue + } + if pattern.MaxOverlaySize != nil { + resolved.MaxOverlaySize = *pattern.MaxOverlaySize + } + if pattern.MaxVcpusPerInstance != nil { + resolved.MaxVcpusPerInstance = *pattern.MaxVcpusPerInstance + } + if pattern.MaxMemoryPerInstance != nil { + resolved.MaxMemoryPerInstance = *pattern.MaxMemoryPerInstance + } + break + } + + return resolved +} + +func validateResourceLimitsForName(name string, limits ResourceLimits, overlaySize int64, vcpus int, totalMemory int64) error { + effective := limits.ForName(name) + + if effective.MaxOverlaySize > 0 && overlaySize > effective.MaxOverlaySize { + return fmt.Errorf("overlay size %d exceeds maximum allowed size %d", overlaySize, effective.MaxOverlaySize) + } + if effective.MaxVcpusPerInstance > 0 && vcpus > effective.MaxVcpusPerInstance { + return fmt.Errorf("vcpus %d exceeds maximum allowed %d per instance", vcpus, effective.MaxVcpusPerInstance) + } + if effective.MaxMemoryPerInstance > 0 && totalMemory > effective.MaxMemoryPerInstance { + return fmt.Errorf("total memory %d (size + hotplug_size) exceeds maximum allowed %d per instance", totalMemory, effective.MaxMemoryPerInstance) + } + + return nil +} diff --git a/lib/instances/name_limits_test.go b/lib/instances/name_limits_test.go new file mode 100644 index 00000000..b7f2de45 --- /dev/null +++ b/lib/instances/name_limits_test.go @@ -0,0 +1,93 @@ +package instances + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestResourceLimitsForName_FirstMatchWins(t *testing.T) { + t.Parallel() + + eight := 8 + four := 4 + sixtyFourGiB := int64(64 * 1024 * 1024 * 1024) + thirtyTwoGiB := int64(32 * 1024 * 1024 * 1024) + twentyGiB := int64(20 * 1024 * 1024 * 1024) + + first, err := NewNamedResourceLimit("^prod-.*", &eight, &sixtyFourGiB, nil) + require.NoError(t, err) + second, err := NewNamedResourceLimit("^prod-api-.*", &four, &thirtyTwoGiB, &twentyGiB) + require.NoError(t, err) + + limits := ResourceLimits{ + MaxOverlaySize: 10 * 1024 * 1024 * 1024, + MaxVcpusPerInstance: 2, + MaxMemoryPerInstance: 8 * 1024 * 1024 * 1024, + NamePatterns: []NamedResourceLimit{first, second}, + } + + resolved := limits.ForName("prod-api-1") + assert.Equal(t, 8, resolved.MaxVcpusPerInstance) + assert.Equal(t, sixtyFourGiB, resolved.MaxMemoryPerInstance) + assert.Equal(t, int64(10*1024*1024*1024), resolved.MaxOverlaySize) +} + +func TestResourceLimitsForName_FallsBackWhenFieldOmitted(t *testing.T) { + t.Parallel() + + twentyGiB := int64(20 * 1024 * 1024 * 1024) + override, err := NewNamedResourceLimit("^small-.*", nil, nil, &twentyGiB) + require.NoError(t, err) + + limits := ResourceLimits{ + MaxOverlaySize: 100 * 1024 * 1024 * 1024, + MaxVcpusPerInstance: 16, + MaxMemoryPerInstance: 32 * 1024 * 1024 * 1024, + NamePatterns: []NamedResourceLimit{override}, + } + + resolved := limits.ForName("small-worker") + assert.Equal(t, int64(20*1024*1024*1024), resolved.MaxOverlaySize) + assert.Equal(t, 16, resolved.MaxVcpusPerInstance) + assert.Equal(t, int64(32*1024*1024*1024), resolved.MaxMemoryPerInstance) +} + +func TestValidateResourceLimitsForName_ZeroOverrideMeansUnlimited(t *testing.T) { + t.Parallel() + + zeroInt := 0 + zeroBytes := int64(0) + override, err := NewNamedResourceLimit("^burst-.*", &zeroInt, &zeroBytes, &zeroBytes) + require.NoError(t, err) + + limits := ResourceLimits{ + MaxOverlaySize: 5 * 1024 * 1024 * 1024, + MaxVcpusPerInstance: 2, + MaxMemoryPerInstance: 4 * 1024 * 1024 * 1024, + NamePatterns: []NamedResourceLimit{override}, + } + + err = validateResourceLimitsForName("burst-worker", limits, 50*1024*1024*1024, 32, 128*1024*1024*1024) + require.NoError(t, err) +} + +func TestValidateResourceLimitsForName_RejectsWhenResolvedLimitExceeded(t *testing.T) { + t.Parallel() + + four := 4 + override, err := NewNamedResourceLimit("^db-.*", &four, nil, nil) + require.NoError(t, err) + + limits := ResourceLimits{ + MaxOverlaySize: 100 * 1024 * 1024 * 1024, + MaxVcpusPerInstance: 16, + MaxMemoryPerInstance: 64 * 1024 * 1024 * 1024, + NamePatterns: []NamedResourceLimit{override}, + } + + err = validateResourceLimitsForName("db-primary", limits, 10*1024*1024*1024, 8, 16*1024*1024*1024) + require.Error(t, err) + assert.Contains(t, err.Error(), "vcpus 8 exceeds maximum allowed 4 per instance") +} diff --git a/lib/instances/snapshot.go b/lib/instances/snapshot.go index be982f74..75505e0e 100644 --- a/lib/instances/snapshot.go +++ b/lib/instances/snapshot.go @@ -450,6 +450,9 @@ func (m *manager) forkSnapshot(ctx context.Context, snapshotID string, req ForkS forkMeta.IP = "" forkMeta.MAC = "" } + if err := validateResourceLimitsForName(req.Name, m.limits, forkMeta.OverlaySize, forkMeta.Vcpus, forkMeta.Size+forkMeta.HotplugSize); err != nil { + return nil, err + } if rec.Snapshot.Kind == SnapshotKindStandby { netCfg := (*hypervisor.ForkNetworkConfig)(nil) diff --git a/lib/providers/instance_limits.go b/lib/providers/instance_limits.go new file mode 100644 index 00000000..1411875b --- /dev/null +++ b/lib/providers/instance_limits.go @@ -0,0 +1,89 @@ +package providers + +import ( + "fmt" + "strings" + + "github.com/c2h5oh/datasize" + "github.com/kernel/hypeman/cmd/api/config" + "github.com/kernel/hypeman/lib/instances" +) + +func parseInstanceLimits(cfg *config.Config) (instances.ResourceLimits, error) { + maxOverlaySize, err := parseByteSizeLimit("limits.max_overlay_size", cfg.Limits.MaxOverlaySize) + if err != nil { + return instances.ResourceLimits{}, err + } + + maxMemoryPerInstance, err := parseOptionalByteSizeLimit("limits.max_memory_per_instance", cfg.Limits.MaxMemoryPerInstance) + if err != nil { + return instances.ResourceLimits{}, err + } + + namePatterns := make([]instances.NamedResourceLimit, 0, len(cfg.Limits.NamePatterns)) + for i, patternCfg := range cfg.Limits.NamePatterns { + var maxVcpus *int + if patternCfg.MaxVcpusPerInstance != nil { + value := *patternCfg.MaxVcpusPerInstance + maxVcpus = &value + } + + maxMemory, err := parseOptionalByteSizePtr(fmt.Sprintf("limits.name_patterns[%d].max_memory_per_instance", i), patternCfg.MaxMemoryPerInstance) + if err != nil { + return instances.ResourceLimits{}, err + } + maxOverlay, err := parseOptionalByteSizePtr(fmt.Sprintf("limits.name_patterns[%d].max_overlay_size", i), patternCfg.MaxOverlaySize) + if err != nil { + return instances.ResourceLimits{}, err + } + + pattern, err := instances.NewNamedResourceLimit(patternCfg.Pattern, maxVcpus, maxMemory, maxOverlay) + if err != nil { + return instances.ResourceLimits{}, fmt.Errorf("parse limits.name_patterns[%d]: %w", i, err) + } + namePatterns = append(namePatterns, pattern) + } + + return instances.ResourceLimits{ + MaxOverlaySize: maxOverlaySize, + MaxVcpusPerInstance: cfg.Limits.MaxVcpusPerInstance, + MaxMemoryPerInstance: maxMemoryPerInstance, + NamePatterns: namePatterns, + }, nil +} + +func parseOptionalByteSizePtr(field string, value *string) (*int64, error) { + if value == nil { + return nil, nil + } + + parsed, err := parseOptionalByteSizeLimit(field, *value) + if err != nil { + return nil, err + } + + return &parsed, nil +} + +func parseOptionalByteSizeLimit(field string, value string) (int64, error) { + value = strings.TrimSpace(value) + if value == "" { + return 0, nil + } + + return parseByteSizeLimit(field, value) +} + +func parseByteSizeLimit(field string, value string) (int64, error) { + value = strings.TrimSpace(value) + if value == "" { + return 0, fmt.Errorf("%s must not be empty", field) + } + + var size datasize.ByteSize + if err := size.UnmarshalText([]byte(value)); err != nil { + return 0, fmt.Errorf("%s must be a valid byte size, got %q: %w", field, value, err) + } + + return int64(size), nil +} diff --git a/lib/providers/providers.go b/lib/providers/providers.go index 4c112915..04f9bc54 100644 --- a/lib/providers/providers.go +++ b/lib/providers/providers.go @@ -9,7 +9,6 @@ import ( "strings" "time" - "github.com/c2h5oh/datasize" "github.com/kernel/hypeman/cmd/api/config" "github.com/kernel/hypeman/lib/builds" "github.com/kernel/hypeman/lib/devices" @@ -100,30 +99,13 @@ func ProvideDeviceManager(p *paths.Paths) devices.Manager { func ProvideInstanceManager(p *paths.Paths, cfg *config.Config, imageManager images.Manager, systemManager system.Manager, networkManager network.Manager, deviceManager devices.Manager, volumeManager volumes.Manager) (instances.Manager, error) { firecracker.SetCustomBinaryPath(cfg.Hypervisor.FirecrackerBinaryPath) - // Parse max overlay size from config - var maxOverlaySize datasize.ByteSize - if err := maxOverlaySize.UnmarshalText([]byte(cfg.Limits.MaxOverlaySize)); err != nil { - return nil, fmt.Errorf("failed to parse MAX_OVERLAY_SIZE '%s': %w (expected format like '100GB', '50G', '10GiB')", cfg.Limits.MaxOverlaySize, err) - } - - // Parse max memory per instance (empty or "0" means unlimited) - var maxMemoryPerInstance int64 - if cfg.Limits.MaxMemoryPerInstance != "" && cfg.Limits.MaxMemoryPerInstance != "0" { - var memSize datasize.ByteSize - if err := memSize.UnmarshalText([]byte(cfg.Limits.MaxMemoryPerInstance)); err != nil { - return nil, fmt.Errorf("failed to parse MAX_MEMORY_PER_INSTANCE '%s': %w", cfg.Limits.MaxMemoryPerInstance, err) - } - maxMemoryPerInstance = int64(memSize) + limits, err := parseInstanceLimits(cfg) + if err != nil { + return nil, err } // Note: Aggregate CPU/memory limits are now handled via oversubscription ratios // in the ResourceManager, wired up via SetResourceValidator after initialization. - limits := instances.ResourceLimits{ - MaxOverlaySize: int64(maxOverlaySize), - MaxVcpusPerInstance: cfg.Limits.MaxVcpusPerInstance, - MaxMemoryPerInstance: maxMemoryPerInstance, - } - meter := otel.GetMeterProvider().Meter("hypeman") tracer := otel.GetTracerProvider().Tracer("hypeman/instances") defaultHypervisor := hypervisor.Type(cfg.Hypervisor.Default) From 958777e74d30433ab99e844bfa50f6cb228394b8 Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Sun, 5 Apr 2026 02:04:42 -0400 Subject: [PATCH 2/5] Fix regex limit providers build --- lib/providers/providers.go | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/providers/providers.go b/lib/providers/providers.go index 04f9bc54..5ca341eb 100644 --- a/lib/providers/providers.go +++ b/lib/providers/providers.go @@ -9,6 +9,7 @@ import ( "strings" "time" + "github.com/c2h5oh/datasize" "github.com/kernel/hypeman/cmd/api/config" "github.com/kernel/hypeman/lib/builds" "github.com/kernel/hypeman/lib/devices" From abded19c42cd10fc710795836f6ea9a28839146a Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Sun, 5 Apr 2026 02:16:08 -0400 Subject: [PATCH 3/5] Add per-pattern provisioned resource caps --- DEVELOPMENT.md | 2 +- cmd/api/config/config_test.go | 27 +++++ cmd/api/config/limits_patterns.go | 109 +++++++++++++++++-- config.example.darwin.yaml | 3 + config.example.yaml | 5 + lib/instances/create.go | 3 + lib/instances/fork.go | 3 + lib/instances/name_limits.go | 74 ++++++++++--- lib/instances/name_limits_test.go | 94 ++++++++++++++++- lib/instances/name_provisioned_limits.go | 129 +++++++++++++++++++++++ lib/instances/snapshot.go | 3 + lib/providers/instance_limits.go | 51 ++++++++- lib/providers/providers_test.go | 70 ++++++++++++ 13 files changed, 544 insertions(+), 29 deletions(-) create mode 100644 lib/instances/name_provisioned_limits.go diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 0ba45de0..185ec08d 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -122,7 +122,7 @@ Common settings: | `metrics.resource_refresh_interval` | Refresh interval for cached resource capacity metrics | `120s` | | `limits.max_concurrent_builds` | Max concurrent image builds | `1` | | `limits.max_overlay_size` | Max overlay filesystem size | `100GB` | -| `limits.name_patterns` | Ordered regex overrides for per-instance CPU/memory/overlay limits | _(empty)_ | +| `limits.name_patterns` | Ordered regex overrides for per-instance limits plus per-pattern provisioned CPU/memory/disk/network/disk I/O caps | _(empty)_ | | `acme.email` | Email for ACME certificate registration | _(empty)_ | | `acme.dns_provider` | DNS provider for ACME challenges | _(empty)_ | | `acme.cloudflare_api_token` | Cloudflare API token | _(empty)_ | diff --git a/cmd/api/config/config_test.go b/cmd/api/config/config_test.go index a48aca00..d6996a8a 100644 --- a/cmd/api/config/config_test.go +++ b/cmd/api/config/config_test.go @@ -298,8 +298,11 @@ limits: - pattern: '^prod-' max_vcpus_per_instance: 8 max_memory_per_instance: 64GB + max_total_vcpus: 32 + max_total_memory: 256GB - pattern: '^tiny-' max_overlay_size: 5GB + max_total_network_bandwidth: 1Gbps ` if err := os.WriteFile(cfgPath, []byte(configYAML), 0600); err != nil { t.Fatalf("write temp config: %v", err) @@ -322,9 +325,18 @@ limits: if cfg.Limits.NamePatterns[0].MaxMemoryPerInstance == nil || *cfg.Limits.NamePatterns[0].MaxMemoryPerInstance != "64GB" { t.Fatalf("expected first max_memory_per_instance to load, got %#v", cfg.Limits.NamePatterns[0].MaxMemoryPerInstance) } + if cfg.Limits.NamePatterns[0].MaxTotalVcpus == nil || *cfg.Limits.NamePatterns[0].MaxTotalVcpus != 32 { + t.Fatalf("expected first max_total_vcpus to load, got %#v", cfg.Limits.NamePatterns[0].MaxTotalVcpus) + } + if cfg.Limits.NamePatterns[0].MaxTotalMemory == nil || *cfg.Limits.NamePatterns[0].MaxTotalMemory != "256GB" { + t.Fatalf("expected first max_total_memory to load, got %#v", cfg.Limits.NamePatterns[0].MaxTotalMemory) + } if cfg.Limits.NamePatterns[1].MaxOverlaySize == nil || *cfg.Limits.NamePatterns[1].MaxOverlaySize != "5GB" { t.Fatalf("expected second max_overlay_size to load, got %#v", cfg.Limits.NamePatterns[1].MaxOverlaySize) } + if cfg.Limits.NamePatterns[1].MaxTotalNetworkBandwidth == nil || *cfg.Limits.NamePatterns[1].MaxTotalNetworkBandwidth != "1Gbps" { + t.Fatalf("expected second max_total_network_bandwidth to load, got %#v", cfg.Limits.NamePatterns[1].MaxTotalNetworkBandwidth) + } } func TestValidateRejectsInvalidNamePatternLimitRegex(t *testing.T) { @@ -369,6 +381,21 @@ func TestValidateAllowsUnlimitedNamePatternLimitSize(t *testing.T) { } } +func TestValidateRejectsInvalidNamePatternAggregateBandwidth(t *testing.T) { + cfg := defaultConfig() + cfg.Limits.NamePatterns = []NamePatternLimitsConfig{ + { + Pattern: "^prod-", + MaxTotalNetworkBandwidth: strPtr("definitely-not-bandwidth"), + }, + } + + err := cfg.Validate() + if err == nil || !strings.Contains(err.Error(), "limits.name_patterns[0].max_total_network_bandwidth") { + t.Fatalf("expected invalid aggregate bandwidth validation error, got %v", err) + } +} + func strPtr(v string) *string { return &v } diff --git a/cmd/api/config/limits_patterns.go b/cmd/api/config/limits_patterns.go index dfa926b1..2331ba48 100644 --- a/cmd/api/config/limits_patterns.go +++ b/cmd/api/config/limits_patterns.go @@ -3,16 +3,24 @@ package config import ( "fmt" "regexp" + "strconv" "strings" + + "github.com/c2h5oh/datasize" ) // NamePatternLimitsConfig holds per-name regex resource limit overrides. // The first matching pattern wins. Omitted fields fall back to the global limits block. type NamePatternLimitsConfig struct { - Pattern string `koanf:"pattern"` - MaxVcpusPerInstance *int `koanf:"max_vcpus_per_instance"` - MaxMemoryPerInstance *string `koanf:"max_memory_per_instance"` - MaxOverlaySize *string `koanf:"max_overlay_size"` + Pattern string `koanf:"pattern"` + MaxVcpusPerInstance *int `koanf:"max_vcpus_per_instance"` + MaxMemoryPerInstance *string `koanf:"max_memory_per_instance"` + MaxOverlaySize *string `koanf:"max_overlay_size"` + MaxTotalVcpus *int `koanf:"max_total_vcpus"` + MaxTotalMemory *string `koanf:"max_total_memory"` + MaxTotalDisk *string `koanf:"max_total_disk"` + MaxTotalNetworkBandwidth *string `koanf:"max_total_network_bandwidth"` + MaxTotalDiskIO *string `koanf:"max_total_disk_io"` } func validateNamePatternLimits(patterns []NamePatternLimitsConfig) error { @@ -25,8 +33,8 @@ func validateNamePatternLimits(patterns []NamePatternLimitsConfig) error { if _, err := regexp.Compile(cfg.Pattern); err != nil { return fmt.Errorf("limits.name_patterns[%d].pattern must be a valid regex, got %q: %w", i, cfg.Pattern, err) } - if cfg.MaxVcpusPerInstance != nil && *cfg.MaxVcpusPerInstance < 0 { - return fmt.Errorf("limits.name_patterns[%d].max_vcpus_per_instance must be >= 0, got %d", i, *cfg.MaxVcpusPerInstance) + if err := validateOptionalNonNegativeInt(fmt.Sprintf("limits.name_patterns[%d].max_vcpus_per_instance", i), cfg.MaxVcpusPerInstance); err != nil { + return err } if cfg.MaxMemoryPerInstance != nil { value := strings.TrimSpace(*cfg.MaxMemoryPerInstance) @@ -48,6 +56,95 @@ func validateNamePatternLimits(patterns []NamePatternLimitsConfig) error { return err } } + if err := validateOptionalNonNegativeInt(fmt.Sprintf("limits.name_patterns[%d].max_total_vcpus", i), cfg.MaxTotalVcpus); err != nil { + return err + } + if err := validateOptionalByteSizePtr(fmt.Sprintf("limits.name_patterns[%d].max_total_memory", i), cfg.MaxTotalMemory); err != nil { + return err + } + if err := validateOptionalByteSizePtr(fmt.Sprintf("limits.name_patterns[%d].max_total_disk", i), cfg.MaxTotalDisk); err != nil { + return err + } + if err := validateOptionalBandwidthPtr(fmt.Sprintf("limits.name_patterns[%d].max_total_network_bandwidth", i), cfg.MaxTotalNetworkBandwidth); err != nil { + return err + } + if err := validateOptionalBandwidthPtr(fmt.Sprintf("limits.name_patterns[%d].max_total_disk_io", i), cfg.MaxTotalDiskIO); err != nil { + return err + } } return nil } + +func validateOptionalNonNegativeInt(field string, value *int) error { + if value == nil { + return nil + } + if *value < 0 { + return fmt.Errorf("%s must be >= 0, got %d", field, *value) + } + return nil +} + +func validateOptionalByteSizePtr(field string, value *string) error { + if value == nil { + return nil + } + trimmed := strings.TrimSpace(*value) + if trimmed == "" { + return fmt.Errorf("%s must not be empty", field) + } + *value = trimmed + return validateOptionalByteSize(field, trimmed) +} + +func validateOptionalBandwidthPtr(field string, value *string) error { + if value == nil { + return nil + } + trimmed := strings.TrimSpace(*value) + if trimmed == "" { + return fmt.Errorf("%s must not be empty", field) + } + *value = trimmed + if _, err := parseBandwidth(trimmed); err != nil { + return fmt.Errorf("%s must be a valid bandwidth, got %q: %w", field, trimmed, err) + } + return nil +} + +func parseBandwidth(limit string) (int64, error) { + limit = strings.TrimSpace(strings.ToLower(limit)) + + if strings.HasSuffix(limit, "bps") { + numPart := strings.TrimSpace(strings.TrimSuffix(limit, "bps")) + + var multiplier int64 = 1 + switch { + case strings.HasSuffix(numPart, "g"): + multiplier = 1000 * 1000 * 1000 + numPart = strings.TrimSuffix(numPart, "g") + case strings.HasSuffix(numPart, "m"): + multiplier = 1000 * 1000 + numPart = strings.TrimSuffix(numPart, "m") + case strings.HasSuffix(numPart, "k"): + multiplier = 1000 + numPart = strings.TrimSuffix(numPart, "k") + } + + bits, err := strconv.ParseInt(strings.TrimSpace(numPart), 10, 64) + if err != nil { + return 0, fmt.Errorf("invalid number: %s", numPart) + } + return (bits * multiplier) / 8, nil + } + + limit = strings.TrimSuffix(limit, "/s") + limit = strings.TrimSuffix(limit, "ps") + + var size datasize.ByteSize + if err := size.UnmarshalText([]byte(limit)); err != nil { + return 0, fmt.Errorf("parse as bytes: %w", err) + } + + return int64(size), nil +} diff --git a/config.example.darwin.yaml b/config.example.darwin.yaml index 5b2ec6fc..d7b14cbb 100644 --- a/config.example.darwin.yaml +++ b/config.example.darwin.yaml @@ -118,9 +118,12 @@ limits: # name_patterns: # - pattern: '^build-' # max_vcpus_per_instance: 8 + # max_total_vcpus: 64 + # max_total_memory: 128GB # - pattern: '^tiny-' # max_memory_per_instance: 2GB # max_overlay_size: 5GB + # max_total_disk: 200GB # ============================================================================= # OpenTelemetry (optional, same as Linux) diff --git a/config.example.yaml b/config.example.yaml index a76a8380..2c2c915a 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -150,5 +150,10 @@ data_dir: /var/lib/hypeman # - pattern: '^prod-' # max_vcpus_per_instance: 32 # max_memory_per_instance: 64GB +# max_total_vcpus: 256 +# max_total_memory: 1TB +# max_total_network_bandwidth: 40Gbps # - pattern: '^sandbox-' # max_overlay_size: 20GB +# max_total_disk: 2TB +# max_total_disk_io: 4GB/s diff --git a/lib/instances/create.go b/lib/instances/create.go index 4cb09cf8..fbd37a8c 100644 --- a/lib/instances/create.go +++ b/lib/instances/create.go @@ -186,6 +186,9 @@ func (m *manager) createInstance( if err := validateResourceLimitsForName(req.Name, m.limits, overlaySize, vcpus, totalMemory); err != nil { return nil, err } + if err := m.validateProvisionedResourceLimitsForName(ctx, req.Name, overlaySize, vcpus, totalMemory, req.NetworkBandwidthDownload, req.NetworkBandwidthUpload, req.DiskIOBps, req.Volumes); err != nil { + return nil, err + } // Validate aggregate resource limits via ResourceValidator (if configured) if m.resourceValidator != nil { diff --git a/lib/instances/fork.go b/lib/instances/fork.go index c9e2603b..ab17cab3 100644 --- a/lib/instances/fork.go +++ b/lib/instances/fork.go @@ -300,6 +300,9 @@ func (m *manager) forkInstanceFromStoppedOrStandby(ctx context.Context, id strin if err := validateResourceLimitsForName(req.Name, m.limits, forkMeta.OverlaySize, forkMeta.Vcpus, forkMeta.Size+forkMeta.HotplugSize); err != nil { return nil, err } + if err := m.validateProvisionedResourceLimitsForName(ctx, req.Name, forkMeta.OverlaySize, forkMeta.Vcpus, forkMeta.Size+forkMeta.HotplugSize, forkMeta.NetworkBandwidthDownload, forkMeta.NetworkBandwidthUpload, forkMeta.DiskIOBps, forkMeta.Volumes); err != nil { + return nil, err + } if source.State == StateStandby { snapshotConfigPath := m.paths.InstanceSnapshotConfig(forkID) diff --git a/lib/instances/name_limits.go b/lib/instances/name_limits.go index 4c4e756b..99e4b183 100644 --- a/lib/instances/name_limits.go +++ b/lib/instances/name_limits.go @@ -5,28 +5,49 @@ import ( "regexp" ) +type NamedResourceLimitConfig struct { + MaxVcpusPerInstance *int + MaxMemoryPerInstance *int64 + MaxOverlaySize *int64 + MaxTotalVcpus *int + MaxTotalMemory *int64 + MaxTotalDisk *int64 + MaxTotalNetworkBandwidth *int64 + MaxTotalDiskIO *int64 +} + // NamedResourceLimit applies per-instance limits to names matching Pattern. // The first matching pattern wins, and omitted fields fall back to global limits. type NamedResourceLimit struct { - Pattern string - re *regexp.Regexp - MaxVcpusPerInstance *int - MaxMemoryPerInstance *int64 - MaxOverlaySize *int64 + Pattern string + re *regexp.Regexp + MaxVcpusPerInstance *int + MaxMemoryPerInstance *int64 + MaxOverlaySize *int64 + MaxTotalVcpus *int + MaxTotalMemory *int64 + MaxTotalDisk *int64 + MaxTotalNetworkBandwidth *int64 + MaxTotalDiskIO *int64 } -func NewNamedResourceLimit(pattern string, maxVcpus *int, maxMemory *int64, maxOverlay *int64) (NamedResourceLimit, error) { +func NewNamedResourceLimit(pattern string, cfg NamedResourceLimitConfig) (NamedResourceLimit, error) { re, err := regexp.Compile(pattern) if err != nil { return NamedResourceLimit{}, fmt.Errorf("compile name limit regex %q: %w", pattern, err) } return NamedResourceLimit{ - Pattern: pattern, - re: re, - MaxVcpusPerInstance: maxVcpus, - MaxMemoryPerInstance: maxMemory, - MaxOverlaySize: maxOverlay, + Pattern: pattern, + re: re, + MaxVcpusPerInstance: cfg.MaxVcpusPerInstance, + MaxMemoryPerInstance: cfg.MaxMemoryPerInstance, + MaxOverlaySize: cfg.MaxOverlaySize, + MaxTotalVcpus: cfg.MaxTotalVcpus, + MaxTotalMemory: cfg.MaxTotalMemory, + MaxTotalDisk: cfg.MaxTotalDisk, + MaxTotalNetworkBandwidth: cfg.MaxTotalNetworkBandwidth, + MaxTotalDiskIO: cfg.MaxTotalDiskIO, }, nil } @@ -34,6 +55,31 @@ func (l NamedResourceLimit) matches(name string) bool { return l.re != nil && l.re.MatchString(name) } +func (l NamedResourceLimit) hasAggregateProvisionedLimits() bool { + return l.MaxTotalVcpus != nil || + l.MaxTotalMemory != nil || + l.MaxTotalDisk != nil || + l.MaxTotalNetworkBandwidth != nil || + l.MaxTotalDiskIO != nil +} + +func (l ResourceLimits) matchingPatternIndex(name string) int { + for i := range l.NamePatterns { + if l.NamePatterns[i].matches(name) { + return i + } + } + return -1 +} + +func (l ResourceLimits) matchingPattern(name string) *NamedResourceLimit { + index := l.matchingPatternIndex(name) + if index < 0 { + return nil + } + return &l.NamePatterns[index] +} + func (l ResourceLimits) ForName(name string) ResourceLimits { resolved := ResourceLimits{ MaxOverlaySize: l.MaxOverlaySize, @@ -41,10 +87,7 @@ func (l ResourceLimits) ForName(name string) ResourceLimits { MaxMemoryPerInstance: l.MaxMemoryPerInstance, } - for _, pattern := range l.NamePatterns { - if !pattern.matches(name) { - continue - } + if pattern := l.matchingPattern(name); pattern != nil { if pattern.MaxOverlaySize != nil { resolved.MaxOverlaySize = *pattern.MaxOverlaySize } @@ -54,7 +97,6 @@ func (l ResourceLimits) ForName(name string) ResourceLimits { if pattern.MaxMemoryPerInstance != nil { resolved.MaxMemoryPerInstance = *pattern.MaxMemoryPerInstance } - break } return resolved diff --git a/lib/instances/name_limits_test.go b/lib/instances/name_limits_test.go index b7f2de45..cb81290d 100644 --- a/lib/instances/name_limits_test.go +++ b/lib/instances/name_limits_test.go @@ -3,6 +3,7 @@ package instances import ( "testing" + "github.com/kernel/hypeman/lib/resources" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -16,9 +17,16 @@ func TestResourceLimitsForName_FirstMatchWins(t *testing.T) { thirtyTwoGiB := int64(32 * 1024 * 1024 * 1024) twentyGiB := int64(20 * 1024 * 1024 * 1024) - first, err := NewNamedResourceLimit("^prod-.*", &eight, &sixtyFourGiB, nil) + first, err := NewNamedResourceLimit("^prod-.*", NamedResourceLimitConfig{ + MaxVcpusPerInstance: &eight, + MaxMemoryPerInstance: &sixtyFourGiB, + }) require.NoError(t, err) - second, err := NewNamedResourceLimit("^prod-api-.*", &four, &thirtyTwoGiB, &twentyGiB) + second, err := NewNamedResourceLimit("^prod-api-.*", NamedResourceLimitConfig{ + MaxVcpusPerInstance: &four, + MaxMemoryPerInstance: &thirtyTwoGiB, + MaxOverlaySize: &twentyGiB, + }) require.NoError(t, err) limits := ResourceLimits{ @@ -38,7 +46,9 @@ func TestResourceLimitsForName_FallsBackWhenFieldOmitted(t *testing.T) { t.Parallel() twentyGiB := int64(20 * 1024 * 1024 * 1024) - override, err := NewNamedResourceLimit("^small-.*", nil, nil, &twentyGiB) + override, err := NewNamedResourceLimit("^small-.*", NamedResourceLimitConfig{ + MaxOverlaySize: &twentyGiB, + }) require.NoError(t, err) limits := ResourceLimits{ @@ -59,7 +69,11 @@ func TestValidateResourceLimitsForName_ZeroOverrideMeansUnlimited(t *testing.T) zeroInt := 0 zeroBytes := int64(0) - override, err := NewNamedResourceLimit("^burst-.*", &zeroInt, &zeroBytes, &zeroBytes) + override, err := NewNamedResourceLimit("^burst-.*", NamedResourceLimitConfig{ + MaxVcpusPerInstance: &zeroInt, + MaxMemoryPerInstance: &zeroBytes, + MaxOverlaySize: &zeroBytes, + }) require.NoError(t, err) limits := ResourceLimits{ @@ -77,7 +91,9 @@ func TestValidateResourceLimitsForName_RejectsWhenResolvedLimitExceeded(t *testi t.Parallel() four := 4 - override, err := NewNamedResourceLimit("^db-.*", &four, nil, nil) + override, err := NewNamedResourceLimit("^db-.*", NamedResourceLimitConfig{ + MaxVcpusPerInstance: &four, + }) require.NoError(t, err) limits := ResourceLimits{ @@ -91,3 +107,71 @@ func TestValidateResourceLimitsForName_RejectsWhenResolvedLimitExceeded(t *testi require.Error(t, err) assert.Contains(t, err.Error(), "vcpus 8 exceeds maximum allowed 4 per instance") } + +func TestValidateProvisionedResourceLimitsForName_RejectsProjectedTotal(t *testing.T) { + t.Parallel() + + eight := 8 + oneHundredTwentyEightGiB := int64(128 * 1024 * 1024 * 1024) + oneTiB := int64(1024 * 1024 * 1024 * 1024) + twoGbps := int64(2 * 1000 * 1000 * 1000 / 8) + oneGiBps := int64(1024 * 1024 * 1024) + + pattern, err := NewNamedResourceLimit("^team-a-", NamedResourceLimitConfig{ + MaxTotalVcpus: &eight, + MaxTotalMemory: &oneHundredTwentyEightGiB, + MaxTotalDisk: &oneTiB, + MaxTotalNetworkBandwidth: &twoGbps, + MaxTotalDiskIO: &oneGiBps, + }) + require.NoError(t, err) + + limits := ResourceLimits{NamePatterns: []NamedResourceLimit{pattern}} + existing := []resources.InstanceAllocation{ + { + Name: "team-a-api-1", + Vcpus: 6, + MemoryBytes: 96 * 1024 * 1024 * 1024, + OverlayBytes: 300 * 1024 * 1024 * 1024, + VolumeBytes: 500 * 1024 * 1024 * 1024, + VolumeOverlayBytes: 50 * 1024 * 1024 * 1024, + NetworkDownloadBps: 100 * 1024 * 1024, + NetworkUploadBps: 200 * 1024 * 1024, + DiskIOBps: 400 * 1024 * 1024, + }, + } + + err = validateProvisionedResourceLimitsForName("team-a-worker-1", limits, existing, provisionedResources{ + Vcpus: 4, + MemoryBytes: 40 * 1024 * 1024 * 1024, + DiskBytes: 300 * 1024 * 1024 * 1024, + NetworkBps: 100 * 1024 * 1024, + DiskIOBps: 200 * 1024 * 1024, + }) + require.Error(t, err) + assert.Contains(t, err.Error(), "total provisioned cpu 10") +} + +func TestValidateProvisionedResourceLimitsForName_UsesFirstMatchingPattern(t *testing.T) { + t.Parallel() + + four := 4 + eight := 8 + first, err := NewNamedResourceLimit("^prod-", NamedResourceLimitConfig{ + MaxTotalVcpus: &four, + }) + require.NoError(t, err) + second, err := NewNamedResourceLimit("^prod-api-", NamedResourceLimitConfig{ + MaxTotalVcpus: &eight, + }) + require.NoError(t, err) + + limits := ResourceLimits{NamePatterns: []NamedResourceLimit{first, second}} + existing := []resources.InstanceAllocation{ + {Name: "prod-api-1", Vcpus: 3}, + } + + err = validateProvisionedResourceLimitsForName("prod-api-2", limits, existing, provisionedResources{Vcpus: 2}) + require.Error(t, err) + assert.Contains(t, err.Error(), `pattern "^prod-"`) +} diff --git a/lib/instances/name_provisioned_limits.go b/lib/instances/name_provisioned_limits.go new file mode 100644 index 00000000..b7382ec7 --- /dev/null +++ b/lib/instances/name_provisioned_limits.go @@ -0,0 +1,129 @@ +package instances + +import ( + "context" + "fmt" + + "github.com/c2h5oh/datasize" + "github.com/kernel/hypeman/lib/resources" +) + +type provisionedResources struct { + Vcpus int + MemoryBytes int64 + DiskBytes int64 + NetworkBps int64 + DiskIOBps int64 +} + +func provisionedResourcesFromAllocation(alloc resources.InstanceAllocation) provisionedResources { + networkBps := alloc.NetworkDownloadBps + if alloc.NetworkUploadBps > networkBps { + networkBps = alloc.NetworkUploadBps + } + + return provisionedResources{ + Vcpus: alloc.Vcpus, + MemoryBytes: alloc.MemoryBytes, + DiskBytes: alloc.OverlayBytes + alloc.VolumeOverlayBytes + alloc.VolumeBytes, + NetworkBps: networkBps, + DiskIOBps: alloc.DiskIOBps, + } +} + +func (r *provisionedResources) add(other provisionedResources) { + r.Vcpus += other.Vcpus + r.MemoryBytes += other.MemoryBytes + r.DiskBytes += other.DiskBytes + r.NetworkBps += other.NetworkBps + r.DiskIOBps += other.DiskIOBps +} + +func validateProvisionedResourceLimitsForName(name string, limits ResourceLimits, existing []resources.InstanceAllocation, requested provisionedResources) error { + patternIndex := limits.matchingPatternIndex(name) + if patternIndex < 0 { + return nil + } + pattern := limits.NamePatterns[patternIndex] + if !pattern.hasAggregateProvisionedLimits() { + return nil + } + + var current provisionedResources + for _, alloc := range existing { + if limits.matchingPatternIndex(alloc.Name) != patternIndex { + continue + } + current.add(provisionedResourcesFromAllocation(alloc)) + } + + projected := current + projected.add(requested) + + if pattern.MaxTotalVcpus != nil && *pattern.MaxTotalVcpus > 0 && projected.Vcpus > *pattern.MaxTotalVcpus { + return fmt.Errorf("total provisioned cpu %d for pattern %q exceeds maximum allowed %d", projected.Vcpus, pattern.Pattern, *pattern.MaxTotalVcpus) + } + if pattern.MaxTotalMemory != nil && *pattern.MaxTotalMemory > 0 && projected.MemoryBytes > *pattern.MaxTotalMemory { + return fmt.Errorf("total provisioned memory %s for pattern %q exceeds maximum allowed %s", datasize.ByteSize(projected.MemoryBytes).HR(), pattern.Pattern, datasize.ByteSize(*pattern.MaxTotalMemory).HR()) + } + if pattern.MaxTotalDisk != nil && *pattern.MaxTotalDisk > 0 && projected.DiskBytes > *pattern.MaxTotalDisk { + return fmt.Errorf("total provisioned disk %s for pattern %q exceeds maximum allowed %s", datasize.ByteSize(projected.DiskBytes).HR(), pattern.Pattern, datasize.ByteSize(*pattern.MaxTotalDisk).HR()) + } + if pattern.MaxTotalNetworkBandwidth != nil && *pattern.MaxTotalNetworkBandwidth > 0 && projected.NetworkBps > *pattern.MaxTotalNetworkBandwidth { + return fmt.Errorf("total provisioned network bandwidth %s/s for pattern %q exceeds maximum allowed %s/s", datasize.ByteSize(projected.NetworkBps).HR(), pattern.Pattern, datasize.ByteSize(*pattern.MaxTotalNetworkBandwidth).HR()) + } + if pattern.MaxTotalDiskIO != nil && *pattern.MaxTotalDiskIO > 0 && projected.DiskIOBps > *pattern.MaxTotalDiskIO { + return fmt.Errorf("total provisioned disk I/O %s/s for pattern %q exceeds maximum allowed %s/s", datasize.ByteSize(projected.DiskIOBps).HR(), pattern.Pattern, datasize.ByteSize(*pattern.MaxTotalDiskIO).HR()) + } + + return nil +} + +func (m *manager) requestedProvisionedResources(ctx context.Context, overlaySize int64, vcpus int, totalMemory int64, networkDownloadBps int64, networkUploadBps int64, diskIOBps int64, volumes []VolumeAttachment) (provisionedResources, error) { + diskBytes := overlaySize + for _, attachment := range volumes { + if attachment.Overlay { + diskBytes += attachment.OverlaySize + } + if m.volumeManager == nil { + continue + } + volume, err := m.volumeManager.GetVolume(ctx, attachment.VolumeID) + if err != nil { + return provisionedResources{}, fmt.Errorf("get volume %s: %w", attachment.VolumeID, err) + } + diskBytes += int64(volume.SizeGb) * 1024 * 1024 * 1024 + } + + networkBps := networkDownloadBps + if networkUploadBps > networkBps { + networkBps = networkUploadBps + } + + return provisionedResources{ + Vcpus: vcpus, + MemoryBytes: totalMemory, + DiskBytes: diskBytes, + NetworkBps: networkBps, + DiskIOBps: diskIOBps, + }, nil +} + +func (m *manager) validateProvisionedResourceLimitsForName(ctx context.Context, name string, overlaySize int64, vcpus int, totalMemory int64, networkDownloadBps int64, networkUploadBps int64, diskIOBps int64, volumes []VolumeAttachment) error { + pattern := m.limits.matchingPattern(name) + if pattern == nil || !pattern.hasAggregateProvisionedLimits() { + return nil + } + + requested, err := m.requestedProvisionedResources(ctx, overlaySize, vcpus, totalMemory, networkDownloadBps, networkUploadBps, diskIOBps, volumes) + if err != nil { + return err + } + + existing, err := m.ListInstanceAllocations(ctx) + if err != nil { + return fmt.Errorf("list existing instance allocations: %w", err) + } + + return validateProvisionedResourceLimitsForName(name, m.limits, existing, requested) +} diff --git a/lib/instances/snapshot.go b/lib/instances/snapshot.go index 75505e0e..c32e48a8 100644 --- a/lib/instances/snapshot.go +++ b/lib/instances/snapshot.go @@ -453,6 +453,9 @@ func (m *manager) forkSnapshot(ctx context.Context, snapshotID string, req ForkS if err := validateResourceLimitsForName(req.Name, m.limits, forkMeta.OverlaySize, forkMeta.Vcpus, forkMeta.Size+forkMeta.HotplugSize); err != nil { return nil, err } + if err := m.validateProvisionedResourceLimitsForName(ctx, req.Name, forkMeta.OverlaySize, forkMeta.Vcpus, forkMeta.Size+forkMeta.HotplugSize, forkMeta.NetworkBandwidthDownload, forkMeta.NetworkBandwidthUpload, forkMeta.DiskIOBps, forkMeta.Volumes); err != nil { + return nil, err + } if rec.Snapshot.Kind == SnapshotKindStandby { netCfg := (*hypervisor.ForkNetworkConfig)(nil) diff --git a/lib/providers/instance_limits.go b/lib/providers/instance_limits.go index 1411875b..bf533461 100644 --- a/lib/providers/instance_limits.go +++ b/lib/providers/instance_limits.go @@ -7,6 +7,7 @@ import ( "github.com/c2h5oh/datasize" "github.com/kernel/hypeman/cmd/api/config" "github.com/kernel/hypeman/lib/instances" + "github.com/kernel/hypeman/lib/resources" ) func parseInstanceLimits(cfg *config.Config) (instances.ResourceLimits, error) { @@ -27,6 +28,11 @@ func parseInstanceLimits(cfg *config.Config) (instances.ResourceLimits, error) { value := *patternCfg.MaxVcpusPerInstance maxVcpus = &value } + var maxTotalVcpus *int + if patternCfg.MaxTotalVcpus != nil { + value := *patternCfg.MaxTotalVcpus + maxTotalVcpus = &value + } maxMemory, err := parseOptionalByteSizePtr(fmt.Sprintf("limits.name_patterns[%d].max_memory_per_instance", i), patternCfg.MaxMemoryPerInstance) if err != nil { @@ -36,8 +42,33 @@ func parseInstanceLimits(cfg *config.Config) (instances.ResourceLimits, error) { if err != nil { return instances.ResourceLimits{}, err } + maxTotalMemory, err := parseOptionalByteSizePtr(fmt.Sprintf("limits.name_patterns[%d].max_total_memory", i), patternCfg.MaxTotalMemory) + if err != nil { + return instances.ResourceLimits{}, err + } + maxTotalDisk, err := parseOptionalByteSizePtr(fmt.Sprintf("limits.name_patterns[%d].max_total_disk", i), patternCfg.MaxTotalDisk) + if err != nil { + return instances.ResourceLimits{}, err + } + maxTotalNetwork, err := parseOptionalBandwidthPtr(fmt.Sprintf("limits.name_patterns[%d].max_total_network_bandwidth", i), patternCfg.MaxTotalNetworkBandwidth) + if err != nil { + return instances.ResourceLimits{}, err + } + maxTotalDiskIO, err := parseOptionalBandwidthPtr(fmt.Sprintf("limits.name_patterns[%d].max_total_disk_io", i), patternCfg.MaxTotalDiskIO) + if err != nil { + return instances.ResourceLimits{}, err + } - pattern, err := instances.NewNamedResourceLimit(patternCfg.Pattern, maxVcpus, maxMemory, maxOverlay) + pattern, err := instances.NewNamedResourceLimit(patternCfg.Pattern, instances.NamedResourceLimitConfig{ + MaxVcpusPerInstance: maxVcpus, + MaxMemoryPerInstance: maxMemory, + MaxOverlaySize: maxOverlay, + MaxTotalVcpus: maxTotalVcpus, + MaxTotalMemory: maxTotalMemory, + MaxTotalDisk: maxTotalDisk, + MaxTotalNetworkBandwidth: maxTotalNetwork, + MaxTotalDiskIO: maxTotalDiskIO, + }) if err != nil { return instances.ResourceLimits{}, fmt.Errorf("parse limits.name_patterns[%d]: %w", i, err) } @@ -65,6 +96,24 @@ func parseOptionalByteSizePtr(field string, value *string) (*int64, error) { return &parsed, nil } +func parseOptionalBandwidthPtr(field string, value *string) (*int64, error) { + if value == nil { + return nil, nil + } + + trimmed := strings.TrimSpace(*value) + if trimmed == "" { + return nil, nil + } + + parsed, err := resources.ParseBandwidth(trimmed) + if err != nil { + return nil, fmt.Errorf("%s must be a valid bandwidth, got %q: %w", field, trimmed, err) + } + + return &parsed, nil +} + func parseOptionalByteSizeLimit(field string, value string) (int64, error) { value = strings.TrimSpace(value) if value == "" { diff --git a/lib/providers/providers_test.go b/lib/providers/providers_test.go index 5ea519ba..18413f4d 100644 --- a/lib/providers/providers_test.go +++ b/lib/providers/providers_test.go @@ -66,6 +66,76 @@ func TestSnapshotDefaultsFromConfigKeepsZstdLevel(t *testing.T) { assert.Equal(t, 5, *defaults.Compression.Level) } +func TestParseInstanceLimitsParsesAggregateNamePatternLimits(t *testing.T) { + t.Parallel() + + cfg := &config.Config{ + Limits: config.LimitsConfig{ + MaxVcpusPerInstance: 4, + MaxMemoryPerInstance: "8GB", + MaxOverlaySize: "20GB", + NamePatterns: []config.NamePatternLimitsConfig{ + { + Pattern: "^team-a-", + MaxTotalVcpus: intPtr(32), + MaxTotalMemory: strPtr("256GB"), + MaxTotalDisk: strPtr("1TB"), + MaxTotalNetworkBandwidth: strPtr("10Gbps"), + MaxTotalDiskIO: strPtr("1GB/s"), + }, + }, + }, + } + + limits, err := parseInstanceLimits(cfg) + require.NoError(t, err) + require.Len(t, limits.NamePatterns, 1) + + pattern := limits.NamePatterns[0] + require.NotNil(t, pattern.MaxTotalVcpus) + assert.Equal(t, 32, *pattern.MaxTotalVcpus) + require.NotNil(t, pattern.MaxTotalMemory) + assert.Equal(t, int64(256*1024*1024*1024), *pattern.MaxTotalMemory) + require.NotNil(t, pattern.MaxTotalDisk) + assert.Equal(t, int64(1024*1024*1024*1024), *pattern.MaxTotalDisk) + require.NotNil(t, pattern.MaxTotalNetworkBandwidth) + assert.Equal(t, int64(10*1000*1000*1000/8), *pattern.MaxTotalNetworkBandwidth) + require.NotNil(t, pattern.MaxTotalDiskIO) + assert.Equal(t, int64(1024*1024*1024), *pattern.MaxTotalDiskIO) +} + +func TestParseInstanceLimitsPreservesPerInstanceFields(t *testing.T) { + t.Parallel() + + cfg := &config.Config{ + Limits: config.LimitsConfig{ + MaxVcpusPerInstance: 4, + MaxMemoryPerInstance: "8GB", + MaxOverlaySize: "20GB", + NamePatterns: []config.NamePatternLimitsConfig{ + { + Pattern: "^small-", + MaxVcpusPerInstance: intPtr(2), + MaxMemoryPerInstance: strPtr("4GB"), + MaxOverlaySize: strPtr("10GB"), + }, + }, + }, + } + + limits, err := parseInstanceLimits(cfg) + require.NoError(t, err) + + resolved := limits.ForName("small-worker") + assert.Equal(t, 2, resolved.MaxVcpusPerInstance) + assert.Equal(t, int64(4*1024*1024*1024), resolved.MaxMemoryPerInstance) + assert.Equal(t, int64(10*1024*1024*1024), resolved.MaxOverlaySize) +} + func intPtr(v int) *int { return &v } + +func strPtr(v string) *string { + return &v +} From 5309a671715d672391e7bd6ecc17103f34d53bce Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Sun, 5 Apr 2026 02:25:09 -0400 Subject: [PATCH 4/5] Simplify regex limit enforcement --- lib/instances/create.go | 5 +- lib/instances/fork.go | 5 +- lib/instances/name_limits.go | 16 +--- lib/instances/name_provisioned_limits.go | 16 ++-- lib/instances/snapshot.go | 5 +- lib/providers/instance_limits.go | 98 +++++++++++++----------- 6 files changed, 66 insertions(+), 79 deletions(-) diff --git a/lib/instances/create.go b/lib/instances/create.go index fbd37a8c..0ef76e74 100644 --- a/lib/instances/create.go +++ b/lib/instances/create.go @@ -183,10 +183,7 @@ func (m *manager) createInstance( } totalMemory := size + hotplugSize - if err := validateResourceLimitsForName(req.Name, m.limits, overlaySize, vcpus, totalMemory); err != nil { - return nil, err - } - if err := m.validateProvisionedResourceLimitsForName(ctx, req.Name, overlaySize, vcpus, totalMemory, req.NetworkBandwidthDownload, req.NetworkBandwidthUpload, req.DiskIOBps, req.Volumes); err != nil { + if err := m.validateNamedResourceLimits(ctx, req.Name, overlaySize, vcpus, totalMemory, req.NetworkBandwidthDownload, req.NetworkBandwidthUpload, req.DiskIOBps, req.Volumes); err != nil { return nil, err } diff --git a/lib/instances/fork.go b/lib/instances/fork.go index ab17cab3..a48383b3 100644 --- a/lib/instances/fork.go +++ b/lib/instances/fork.go @@ -297,10 +297,7 @@ func (m *manager) forkInstanceFromStoppedOrStandby(ctx context.Context, id strin forkMeta.IP = "" forkMeta.MAC = "" } - if err := validateResourceLimitsForName(req.Name, m.limits, forkMeta.OverlaySize, forkMeta.Vcpus, forkMeta.Size+forkMeta.HotplugSize); err != nil { - return nil, err - } - if err := m.validateProvisionedResourceLimitsForName(ctx, req.Name, forkMeta.OverlaySize, forkMeta.Vcpus, forkMeta.Size+forkMeta.HotplugSize, forkMeta.NetworkBandwidthDownload, forkMeta.NetworkBandwidthUpload, forkMeta.DiskIOBps, forkMeta.Volumes); err != nil { + if err := m.validateNamedResourceLimits(ctx, req.Name, forkMeta.OverlaySize, forkMeta.Vcpus, forkMeta.Size+forkMeta.HotplugSize, forkMeta.NetworkBandwidthDownload, forkMeta.NetworkBandwidthUpload, forkMeta.DiskIOBps, forkMeta.Volumes); err != nil { return nil, err } diff --git a/lib/instances/name_limits.go b/lib/instances/name_limits.go index 99e4b183..0bcbef85 100644 --- a/lib/instances/name_limits.go +++ b/lib/instances/name_limits.go @@ -63,21 +63,13 @@ func (l NamedResourceLimit) hasAggregateProvisionedLimits() bool { l.MaxTotalDiskIO != nil } -func (l ResourceLimits) matchingPatternIndex(name string) int { +func (l ResourceLimits) matchingPattern(name string) (int, *NamedResourceLimit) { for i := range l.NamePatterns { if l.NamePatterns[i].matches(name) { - return i + return i, &l.NamePatterns[i] } } - return -1 -} - -func (l ResourceLimits) matchingPattern(name string) *NamedResourceLimit { - index := l.matchingPatternIndex(name) - if index < 0 { - return nil - } - return &l.NamePatterns[index] + return -1, nil } func (l ResourceLimits) ForName(name string) ResourceLimits { @@ -87,7 +79,7 @@ func (l ResourceLimits) ForName(name string) ResourceLimits { MaxMemoryPerInstance: l.MaxMemoryPerInstance, } - if pattern := l.matchingPattern(name); pattern != nil { + if _, pattern := l.matchingPattern(name); pattern != nil { if pattern.MaxOverlaySize != nil { resolved.MaxOverlaySize = *pattern.MaxOverlaySize } diff --git a/lib/instances/name_provisioned_limits.go b/lib/instances/name_provisioned_limits.go index b7382ec7..db882bb8 100644 --- a/lib/instances/name_provisioned_limits.go +++ b/lib/instances/name_provisioned_limits.go @@ -40,18 +40,18 @@ func (r *provisionedResources) add(other provisionedResources) { } func validateProvisionedResourceLimitsForName(name string, limits ResourceLimits, existing []resources.InstanceAllocation, requested provisionedResources) error { - patternIndex := limits.matchingPatternIndex(name) - if patternIndex < 0 { + patternIndex, pattern := limits.matchingPattern(name) + if pattern == nil { return nil } - pattern := limits.NamePatterns[patternIndex] if !pattern.hasAggregateProvisionedLimits() { return nil } var current provisionedResources for _, alloc := range existing { - if limits.matchingPatternIndex(alloc.Name) != patternIndex { + matchedIndex, _ := limits.matchingPattern(alloc.Name) + if matchedIndex != patternIndex { continue } current.add(provisionedResourcesFromAllocation(alloc)) @@ -109,12 +109,10 @@ func (m *manager) requestedProvisionedResources(ctx context.Context, overlaySize }, nil } -func (m *manager) validateProvisionedResourceLimitsForName(ctx context.Context, name string, overlaySize int64, vcpus int, totalMemory int64, networkDownloadBps int64, networkUploadBps int64, diskIOBps int64, volumes []VolumeAttachment) error { - pattern := m.limits.matchingPattern(name) - if pattern == nil || !pattern.hasAggregateProvisionedLimits() { - return nil +func (m *manager) validateNamedResourceLimits(ctx context.Context, name string, overlaySize int64, vcpus int, totalMemory int64, networkDownloadBps int64, networkUploadBps int64, diskIOBps int64, volumes []VolumeAttachment) error { + if err := validateResourceLimitsForName(name, m.limits, overlaySize, vcpus, totalMemory); err != nil { + return err } - requested, err := m.requestedProvisionedResources(ctx, overlaySize, vcpus, totalMemory, networkDownloadBps, networkUploadBps, diskIOBps, volumes) if err != nil { return err diff --git a/lib/instances/snapshot.go b/lib/instances/snapshot.go index c32e48a8..478244af 100644 --- a/lib/instances/snapshot.go +++ b/lib/instances/snapshot.go @@ -450,10 +450,7 @@ func (m *manager) forkSnapshot(ctx context.Context, snapshotID string, req ForkS forkMeta.IP = "" forkMeta.MAC = "" } - if err := validateResourceLimitsForName(req.Name, m.limits, forkMeta.OverlaySize, forkMeta.Vcpus, forkMeta.Size+forkMeta.HotplugSize); err != nil { - return nil, err - } - if err := m.validateProvisionedResourceLimitsForName(ctx, req.Name, forkMeta.OverlaySize, forkMeta.Vcpus, forkMeta.Size+forkMeta.HotplugSize, forkMeta.NetworkBandwidthDownload, forkMeta.NetworkBandwidthUpload, forkMeta.DiskIOBps, forkMeta.Volumes); err != nil { + if err := m.validateNamedResourceLimits(ctx, req.Name, forkMeta.OverlaySize, forkMeta.Vcpus, forkMeta.Size+forkMeta.HotplugSize, forkMeta.NetworkBandwidthDownload, forkMeta.NetworkBandwidthUpload, forkMeta.DiskIOBps, forkMeta.Volumes); err != nil { return nil, err } diff --git a/lib/providers/instance_limits.go b/lib/providers/instance_limits.go index bf533461..14aa77d1 100644 --- a/lib/providers/instance_limits.go +++ b/lib/providers/instance_limits.go @@ -23,52 +23,7 @@ func parseInstanceLimits(cfg *config.Config) (instances.ResourceLimits, error) { namePatterns := make([]instances.NamedResourceLimit, 0, len(cfg.Limits.NamePatterns)) for i, patternCfg := range cfg.Limits.NamePatterns { - var maxVcpus *int - if patternCfg.MaxVcpusPerInstance != nil { - value := *patternCfg.MaxVcpusPerInstance - maxVcpus = &value - } - var maxTotalVcpus *int - if patternCfg.MaxTotalVcpus != nil { - value := *patternCfg.MaxTotalVcpus - maxTotalVcpus = &value - } - - maxMemory, err := parseOptionalByteSizePtr(fmt.Sprintf("limits.name_patterns[%d].max_memory_per_instance", i), patternCfg.MaxMemoryPerInstance) - if err != nil { - return instances.ResourceLimits{}, err - } - maxOverlay, err := parseOptionalByteSizePtr(fmt.Sprintf("limits.name_patterns[%d].max_overlay_size", i), patternCfg.MaxOverlaySize) - if err != nil { - return instances.ResourceLimits{}, err - } - maxTotalMemory, err := parseOptionalByteSizePtr(fmt.Sprintf("limits.name_patterns[%d].max_total_memory", i), patternCfg.MaxTotalMemory) - if err != nil { - return instances.ResourceLimits{}, err - } - maxTotalDisk, err := parseOptionalByteSizePtr(fmt.Sprintf("limits.name_patterns[%d].max_total_disk", i), patternCfg.MaxTotalDisk) - if err != nil { - return instances.ResourceLimits{}, err - } - maxTotalNetwork, err := parseOptionalBandwidthPtr(fmt.Sprintf("limits.name_patterns[%d].max_total_network_bandwidth", i), patternCfg.MaxTotalNetworkBandwidth) - if err != nil { - return instances.ResourceLimits{}, err - } - maxTotalDiskIO, err := parseOptionalBandwidthPtr(fmt.Sprintf("limits.name_patterns[%d].max_total_disk_io", i), patternCfg.MaxTotalDiskIO) - if err != nil { - return instances.ResourceLimits{}, err - } - - pattern, err := instances.NewNamedResourceLimit(patternCfg.Pattern, instances.NamedResourceLimitConfig{ - MaxVcpusPerInstance: maxVcpus, - MaxMemoryPerInstance: maxMemory, - MaxOverlaySize: maxOverlay, - MaxTotalVcpus: maxTotalVcpus, - MaxTotalMemory: maxTotalMemory, - MaxTotalDisk: maxTotalDisk, - MaxTotalNetworkBandwidth: maxTotalNetwork, - MaxTotalDiskIO: maxTotalDiskIO, - }) + pattern, err := parseNamedResourceLimit(i, patternCfg) if err != nil { return instances.ResourceLimits{}, fmt.Errorf("parse limits.name_patterns[%d]: %w", i, err) } @@ -83,6 +38,57 @@ func parseInstanceLimits(cfg *config.Config) (instances.ResourceLimits, error) { }, nil } +func parseNamedResourceLimit(i int, cfg config.NamePatternLimitsConfig) (instances.NamedResourceLimit, error) { + parsed := instances.NamedResourceLimitConfig{ + MaxVcpusPerInstance: parseOptionalIntPtr(cfg.MaxVcpusPerInstance), + MaxTotalVcpus: parseOptionalIntPtr(cfg.MaxTotalVcpus), + } + + byteFields := []struct { + field string + src *string + dst **int64 + }{ + {field: "max_memory_per_instance", src: cfg.MaxMemoryPerInstance, dst: &parsed.MaxMemoryPerInstance}, + {field: "max_overlay_size", src: cfg.MaxOverlaySize, dst: &parsed.MaxOverlaySize}, + {field: "max_total_memory", src: cfg.MaxTotalMemory, dst: &parsed.MaxTotalMemory}, + {field: "max_total_disk", src: cfg.MaxTotalDisk, dst: &parsed.MaxTotalDisk}, + } + for _, field := range byteFields { + value, err := parseOptionalByteSizePtr(fmt.Sprintf("limits.name_patterns[%d].%s", i, field.field), field.src) + if err != nil { + return instances.NamedResourceLimit{}, err + } + *field.dst = value + } + + bandwidthFields := []struct { + field string + src *string + dst **int64 + }{ + {field: "max_total_network_bandwidth", src: cfg.MaxTotalNetworkBandwidth, dst: &parsed.MaxTotalNetworkBandwidth}, + {field: "max_total_disk_io", src: cfg.MaxTotalDiskIO, dst: &parsed.MaxTotalDiskIO}, + } + for _, field := range bandwidthFields { + value, err := parseOptionalBandwidthPtr(fmt.Sprintf("limits.name_patterns[%d].%s", i, field.field), field.src) + if err != nil { + return instances.NamedResourceLimit{}, err + } + *field.dst = value + } + + return instances.NewNamedResourceLimit(cfg.Pattern, parsed) +} + +func parseOptionalIntPtr(value *int) *int { + if value == nil { + return nil + } + parsed := *value + return &parsed +} + func parseOptionalByteSizePtr(field string, value *string) (*int64, error) { if value == nil { return nil, nil From 4f6b17a6533be019ca159b84cc834bda5e5dd7d1 Mon Sep 17 00:00:00 2001 From: Steven Miller Date: Mon, 6 Apr 2026 11:23:32 -0400 Subject: [PATCH 5/5] Restore strict overlay limit semantics --- lib/instances/manager.go | 2 +- lib/instances/name_limits.go | 2 +- lib/instances/name_limits_test.go | 45 +++++++++++++++++++++++++++++-- 3 files changed, 45 insertions(+), 4 deletions(-) diff --git a/lib/instances/manager.go b/lib/instances/manager.go index 90f5ede7..cc1eb2f9 100644 --- a/lib/instances/manager.go +++ b/lib/instances/manager.go @@ -74,7 +74,7 @@ type ImageUsageRecorderSetter interface { // ResourceLimits contains configurable resource limits for instances type ResourceLimits struct { - MaxOverlaySize int64 // Maximum overlay disk size in bytes per instance (0 = unlimited) + MaxOverlaySize int64 // Maximum overlay disk size in bytes per instance MaxVcpusPerInstance int // Maximum vCPUs per instance (0 = unlimited) MaxMemoryPerInstance int64 // Maximum memory in bytes per instance (0 = unlimited) NamePatterns []NamedResourceLimit diff --git a/lib/instances/name_limits.go b/lib/instances/name_limits.go index 0bcbef85..7e6cbfa7 100644 --- a/lib/instances/name_limits.go +++ b/lib/instances/name_limits.go @@ -97,7 +97,7 @@ func (l ResourceLimits) ForName(name string) ResourceLimits { func validateResourceLimitsForName(name string, limits ResourceLimits, overlaySize int64, vcpus int, totalMemory int64) error { effective := limits.ForName(name) - if effective.MaxOverlaySize > 0 && overlaySize > effective.MaxOverlaySize { + if overlaySize > effective.MaxOverlaySize { return fmt.Errorf("overlay size %d exceeds maximum allowed size %d", overlaySize, effective.MaxOverlaySize) } if effective.MaxVcpusPerInstance > 0 && vcpus > effective.MaxVcpusPerInstance { diff --git a/lib/instances/name_limits_test.go b/lib/instances/name_limits_test.go index cb81290d..aece9f03 100644 --- a/lib/instances/name_limits_test.go +++ b/lib/instances/name_limits_test.go @@ -64,7 +64,7 @@ func TestResourceLimitsForName_FallsBackWhenFieldOmitted(t *testing.T) { assert.Equal(t, int64(32*1024*1024*1024), resolved.MaxMemoryPerInstance) } -func TestValidateResourceLimitsForName_ZeroOverrideMeansUnlimited(t *testing.T) { +func TestValidateResourceLimitsForName_ZeroCpuAndMemoryOverrideMeansUnlimited(t *testing.T) { t.Parallel() zeroInt := 0 @@ -72,7 +72,6 @@ func TestValidateResourceLimitsForName_ZeroOverrideMeansUnlimited(t *testing.T) override, err := NewNamedResourceLimit("^burst-.*", NamedResourceLimitConfig{ MaxVcpusPerInstance: &zeroInt, MaxMemoryPerInstance: &zeroBytes, - MaxOverlaySize: &zeroBytes, }) require.NoError(t, err) @@ -87,6 +86,27 @@ func TestValidateResourceLimitsForName_ZeroOverrideMeansUnlimited(t *testing.T) require.NoError(t, err) } +func TestValidateResourceLimitsForName_ZeroOverlayOverrideRejectsPositiveOverlay(t *testing.T) { + t.Parallel() + + zeroBytes := int64(0) + override, err := NewNamedResourceLimit("^burst-.*", NamedResourceLimitConfig{ + MaxOverlaySize: &zeroBytes, + }) + require.NoError(t, err) + + limits := ResourceLimits{ + MaxOverlaySize: 5 * 1024 * 1024 * 1024, + MaxVcpusPerInstance: 2, + MaxMemoryPerInstance: 4 * 1024 * 1024 * 1024, + NamePatterns: []NamedResourceLimit{override}, + } + + err = validateResourceLimitsForName("burst-worker", limits, 1, 2, 4*1024*1024*1024) + require.Error(t, err) + assert.Contains(t, err.Error(), "overlay size 1 exceeds maximum allowed size 0") +} + func TestValidateResourceLimitsForName_RejectsWhenResolvedLimitExceeded(t *testing.T) { t.Parallel() @@ -108,6 +128,27 @@ func TestValidateResourceLimitsForName_RejectsWhenResolvedLimitExceeded(t *testi assert.Contains(t, err.Error(), "vcpus 8 exceeds maximum allowed 4 per instance") } +func TestValidateResourceLimitsForName_GlobalOverlayLimitAppliesWhenNoPatternMatches(t *testing.T) { + t.Parallel() + + four := 4 + override, err := NewNamedResourceLimit("^db-.*", NamedResourceLimitConfig{ + MaxVcpusPerInstance: &four, + }) + require.NoError(t, err) + + limits := ResourceLimits{ + MaxOverlaySize: 10 * 1024 * 1024 * 1024, + MaxVcpusPerInstance: 16, + MaxMemoryPerInstance: 64 * 1024 * 1024 * 1024, + NamePatterns: []NamedResourceLimit{override}, + } + + err = validateResourceLimitsForName("cache-primary", limits, 20*1024*1024*1024, 8, 16*1024*1024*1024) + require.Error(t, err) + assert.Contains(t, err.Error(), "overlay size 21474836480 exceeds maximum allowed size 10737418240") +} + func TestValidateProvisionedResourceLimitsForName_RejectsProjectedTotal(t *testing.T) { t.Parallel()