diff --git a/Makefile b/Makefile index 330328a6d..44fd8ae0e 100644 --- a/Makefile +++ b/Makefile @@ -224,8 +224,10 @@ install-addlicense: FORCE prepare-static-check: FORCE install-goimports install-golangci-lint install-shellcheck install-typos install-go-licence-detector install-addlicense +CONTROLLER_GEN_VERSION ?= v0.21.0 + install-controller-gen: FORCE - @if ! hash controller-gen 2>/dev/null; then printf "\e[1;36m>> Installing controller-gen (this may take a while)...\e[0m\n"; go install sigs.k8s.io/controller-tools/cmd/controller-gen@latest; fi + @if ! hash controller-gen 2>/dev/null || [ "$$(controller-gen --version 2>/dev/null | awk '{print $$2}')" != "$(CONTROLLER_GEN_VERSION)" ]; then printf "\e[1;36m>> Installing controller-gen $(CONTROLLER_GEN_VERSION) (this may take a while)...\e[0m\n"; go install sigs.k8s.io/controller-tools/cmd/controller-gen@$(CONTROLLER_GEN_VERSION); fi install-setup-envtest: FORCE @if ! hash setup-envtest 2>/dev/null; then printf "\e[1;36m>> Installing setup-envtest (this may take a while)...\e[0m\n"; go install sigs.k8s.io/controller-runtime/tools/setup-envtest@latest; fi diff --git a/PROJECT b/PROJECT index 24face925..b8ed63fc1 100644 --- a/PROJECT +++ b/PROJECT @@ -289,4 +289,12 @@ resources: kind: DHCPRelay path: github.com/ironcore-dev/network-operator/api/core/v1alpha1 version: v1alpha1 +- api: + crdVersion: v1 + namespaced: true + controller: true + domain: networking.metal.ironcore.dev + kind: ConfigBackup + path: github.com/ironcore-dev/network-operator/api/core/v1alpha1 + version: v1alpha1 version: "3" diff --git a/api/core/v1alpha1/configbackup_types.go b/api/core/v1alpha1/configbackup_types.go new file mode 100644 index 000000000..ec671920d --- /dev/null +++ b/api/core/v1alpha1/configbackup_types.go @@ -0,0 +1,211 @@ +// SPDX-FileCopyrightText: 2026 SAP SE or an SAP affiliate company and IronCore contributors +// SPDX-License-Identifier: Apache-2.0 + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// ConfigBackupType defines how the device should persist a configuration backup. +// +kubebuilder:validation:Enum=Local;Startup +type ConfigBackupType string + +const ( + // ConfigBackupTypeLocal stores the running configuration in a device-local file path. + ConfigBackupTypeLocal ConfigBackupType = "Local" + // ConfigBackupTypeStartup stores the running configuration as the device startup configuration. + ConfigBackupTypeStartup ConfigBackupType = "Startup" +) + +// ConfigBackupSpec defines the desired state of ConfigBackup. +// +kubebuilder:validation:XValidation:rule="self.type != 'Startup' || (!has(self.path) || size(self.path) == 0)",message="path must be omitted for Startup backups" +// +kubebuilder:validation:XValidation:rule="self.type != 'Local' || (has(self.path) && size(self.path) > 0)",message="path must be set for Local backups" +// +kubebuilder:validation:XValidation:rule="self.type == 'Local' || !has(self.retention)",message="retention must only be specified for Local backups" +// +kubebuilder:validation:XValidation:rule="self.type == 'Local' || !has(self.storageThreshold)",message="storageThreshold must only be specified for Local backups" +type ConfigBackupSpec struct { + // DeviceRef is a reference to the Device this object belongs to. The Device object must exist in the same namespace. + // Immutable. + // +required + // +kubebuilder:validation:XValidation:rule="self == oldSelf",message="DeviceRef is immutable" + DeviceRef LocalObjectReference `json:"deviceRef"` + + // Schedule is an optional cron expression. If omitted, the controller performs a one-shot backup per generation. + // +optional + Schedule string `json:"schedule,omitempty"` + + // Type determines whether the backup is saved as a local file or as startup-config. + // +required + Type ConfigBackupType `json:"type"` + + // Path is the device-local destination path for Local backups. + // Different providers may accept different path formats, such as "bootflash:///backups/". + // +optional + Path string `json:"path,omitempty"` + + // Retention configures automatic cleanup of older backups for Local backups. + // +optional + Retention *ConfigBackupRetention `json:"retention,omitempty"` + + // StorageThreshold defines the minimum free space that must remain before creating a new Local backup. + // +optional + StorageThreshold *ConfigBackupStorageThreshold `json:"storageThreshold,omitempty"` +} + +// ConfigBackupRetention defines how many historical backups are kept on the device. +type ConfigBackupRetention struct { + // KeepLast is the number of most recent backups to keep for Local backups. + // Startup backups always keep a single copy. + // +optional + // +kubebuilder:default=1 + // +kubebuilder:validation:Minimum=1 + KeepLast int32 `json:"keepLast,omitempty"` +} + +// ConfigBackupStorageThreshold defines when the controller must stop writing additional backups. +// +kubebuilder:validation:XValidation:rule="has(self.minFreeBytes) || has(self.minFreePercent)",message="at least one threshold must be specified" +type ConfigBackupStorageThreshold struct { + // MinFreeBytes is the minimum number of free bytes required before a new backup can be written. + // +optional + // +kubebuilder:validation:Minimum=0 + MinFreeBytes *int64 `json:"minFreeBytes,omitempty"` + + // MinFreePercent is the minimum percentage of free storage required before a new backup can be written. + // +optional + // +kubebuilder:validation:Minimum=0 + // +kubebuilder:validation:Maximum=100 + MinFreePercent *int32 `json:"minFreePercent,omitempty"` +} + +// ConfigBackupStatus defines the observed state of ConfigBackup. +type ConfigBackupStatus struct { + // Conditions represent the current state of the ConfigBackup resource. + // +listType=map + // +listMapKey=type + // +optional + Conditions []metav1.Condition `json:"conditions,omitempty"` + + // LastBackup contains details about the most recent successful backup operation. + // +optional + LastBackup *ConfigBackupRunStatus `json:"lastBackup,omitempty"` + + // LastAttemptTime is the timestamp of the most recent backup attempt, regardless of outcome. + // +optional + LastAttemptTime *metav1.Time `json:"lastAttemptTime,omitempty"` + + // NextScheduledBackup is the next time at which the controller intends to trigger a backup. + // +optional + NextScheduledBackup *metav1.Time `json:"nextScheduledBackup,omitempty"` + + // TotalBackups is the number of backups currently discovered on the device for this ConfigBackup policy. + // +optional + TotalBackups int32 `json:"totalBackups,omitempty"` + + // TotalSizeBytes is the total size in bytes of the discovered backups, if known. + // +optional + TotalSizeBytes *int64 `json:"totalSizeBytes,omitempty"` + + // OldestBackupTimestamp is the timestamp of the oldest discovered backup, if known. + // +optional + OldestBackupTimestamp *metav1.Time `json:"oldestBackupTimestamp,omitempty"` + + // Storage contains device-local storage statistics for the configured backup target, if known. + // +optional + Storage *ConfigBackupStorageStatus `json:"storage,omitempty"` +} + +// ConfigBackupRunStatus contains the result of a single successful backup run. +type ConfigBackupRunStatus struct { + // Timestamp is the time at which the backup was created on the device. + // +required + Timestamp metav1.Time `json:"timestamp"` + + // Duration is the duration of the backup operation. + // +required + Duration metav1.Duration `json:"duration"` + + // SizeBytes is the size in bytes of the backup artifact, if known. + // +optional + SizeBytes *int64 `json:"sizeBytes,omitempty"` + + // Checksum is the integrity checksum of the backup artifact, if known. + // +optional + Checksum string `json:"checksum,omitempty"` + + // Location is the device-local backup location or the logical target name. + // +optional + Location string `json:"location,omitempty"` + + // Generation is the metadata generation that produced this backup. + // +optional + Generation int64 `json:"generation,omitempty"` +} + +// ConfigBackupStorageStatus contains storage utilization for the configured backup target. +type ConfigBackupStorageStatus struct { + // TotalBytes is the total storage capacity in bytes, if known. + // +optional + TotalBytes *int64 `json:"totalBytes,omitempty"` + + // UsedBytes is the used storage in bytes, if known. + // +optional + UsedBytes *int64 `json:"usedBytes,omitempty"` + + // FreeBytes is the free storage in bytes, if known. + // +optional + FreeBytes *int64 `json:"freeBytes,omitempty"` + + // FreePercent is the free storage percentage, if known. + // +optional + FreePercent *int32 `json:"freePercent,omitempty"` + + // ThresholdBreached indicates whether the configured threshold currently blocks new backups. + // +optional + ThresholdBreached bool `json:"thresholdBreached,omitempty"` +} + +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status +// +kubebuilder:resource:path=configbackups +// +kubebuilder:resource:singular=configbackup +// +kubebuilder:printcolumn:name="Device",type=string,JSONPath=`.spec.deviceRef.name` +// +kubebuilder:printcolumn:name="Type",type=string,JSONPath=`.spec.type` +// +kubebuilder:printcolumn:name="Ready",type=string,JSONPath=`.status.conditions[?(@.type=="Ready")].status` +// +kubebuilder:printcolumn:name="Last Backup",type=date,JSONPath=`.status.lastBackup.timestamp`,priority=1 +// +kubebuilder:printcolumn:name="Next Backup",type=date,JSONPath=`.status.nextScheduledBackup`,priority=1 +// +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp" + +// ConfigBackup is the Schema for the configbackups API. +type ConfigBackup struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitzero"` + + // +required + Spec ConfigBackupSpec `json:"spec"` + + // +optional + Status ConfigBackupStatus `json:"status,omitzero"` +} + +// GetConditions implements conditions.Getter. +func (c *ConfigBackup) GetConditions() []metav1.Condition { + return c.Status.Conditions +} + +// SetConditions implements conditions.Setter. +func (c *ConfigBackup) SetConditions(conditions []metav1.Condition) { + c.Status.Conditions = conditions +} + +// +kubebuilder:object:root=true + +// ConfigBackupList contains a list of ConfigBackup. +type ConfigBackupList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitzero"` + Items []ConfigBackup `json:"items"` +} + +func init() { + SchemeBuilder.Register(&ConfigBackup{}, &ConfigBackupList{}) +} diff --git a/api/core/v1alpha1/groupversion_info.go b/api/core/v1alpha1/groupversion_info.go index 3fdaf4800..ba3287058 100644 --- a/api/core/v1alpha1/groupversion_info.go +++ b/api/core/v1alpha1/groupversion_info.go @@ -187,6 +187,24 @@ const ( DuplicateResourceOnDevice = "DuplicateResourceOnDevice" ) +// Reasons that are specific to [ConfigBackup] objects. +const ( + // BackupSuccessfulReason indicates that the latest backup operation completed successfully. + BackupSuccessfulReason = "BackupSuccessful" + + // BackupPendingReason indicates that the controller is waiting for the next scheduled backup time. + BackupPendingReason = "BackupPending" + + // BackupFailedReason indicates that a backup operation failed. + BackupFailedReason = "BackupFailed" + + // ScheduleInvalidReason indicates that the configured cron schedule is invalid. + ScheduleInvalidReason = "ScheduleInvalid" + + // StorageThresholdExceededReason indicates that a configured storage safety threshold was exceeded. + StorageThresholdExceededReason = "StorageThresholdExceeded" +) + // Reasons that are specific to [Interface] objects. const ( // InterfaceNotFoundReason indicates that a referenced interface was not found. diff --git a/api/core/v1alpha1/zz_generated.deepcopy.go b/api/core/v1alpha1/zz_generated.deepcopy.go index 3571a97c0..495810ef8 100644 --- a/api/core/v1alpha1/zz_generated.deepcopy.go +++ b/api/core/v1alpha1/zz_generated.deepcopy.go @@ -962,6 +962,237 @@ func (in *CertificateStatus) DeepCopy() *CertificateStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ConfigBackup) DeepCopyInto(out *ConfigBackup) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ConfigBackup. +func (in *ConfigBackup) DeepCopy() *ConfigBackup { + if in == nil { + return nil + } + out := new(ConfigBackup) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *ConfigBackup) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ConfigBackupList) DeepCopyInto(out *ConfigBackupList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]ConfigBackup, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ConfigBackupList. +func (in *ConfigBackupList) DeepCopy() *ConfigBackupList { + if in == nil { + return nil + } + out := new(ConfigBackupList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *ConfigBackupList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ConfigBackupRetention) DeepCopyInto(out *ConfigBackupRetention) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ConfigBackupRetention. +func (in *ConfigBackupRetention) DeepCopy() *ConfigBackupRetention { + if in == nil { + return nil + } + out := new(ConfigBackupRetention) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ConfigBackupRunStatus) DeepCopyInto(out *ConfigBackupRunStatus) { + *out = *in + in.Timestamp.DeepCopyInto(&out.Timestamp) + out.Duration = in.Duration + if in.SizeBytes != nil { + in, out := &in.SizeBytes, &out.SizeBytes + *out = new(int64) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ConfigBackupRunStatus. +func (in *ConfigBackupRunStatus) DeepCopy() *ConfigBackupRunStatus { + if in == nil { + return nil + } + out := new(ConfigBackupRunStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ConfigBackupSpec) DeepCopyInto(out *ConfigBackupSpec) { + *out = *in + out.DeviceRef = in.DeviceRef + if in.Retention != nil { + in, out := &in.Retention, &out.Retention + *out = new(ConfigBackupRetention) + **out = **in + } + if in.StorageThreshold != nil { + in, out := &in.StorageThreshold, &out.StorageThreshold + *out = new(ConfigBackupStorageThreshold) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ConfigBackupSpec. +func (in *ConfigBackupSpec) DeepCopy() *ConfigBackupSpec { + if in == nil { + return nil + } + out := new(ConfigBackupSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ConfigBackupStatus) DeepCopyInto(out *ConfigBackupStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.LastBackup != nil { + in, out := &in.LastBackup, &out.LastBackup + *out = new(ConfigBackupRunStatus) + (*in).DeepCopyInto(*out) + } + if in.LastAttemptTime != nil { + in, out := &in.LastAttemptTime, &out.LastAttemptTime + *out = (*in).DeepCopy() + } + if in.NextScheduledBackup != nil { + in, out := &in.NextScheduledBackup, &out.NextScheduledBackup + *out = (*in).DeepCopy() + } + if in.TotalSizeBytes != nil { + in, out := &in.TotalSizeBytes, &out.TotalSizeBytes + *out = new(int64) + **out = **in + } + if in.OldestBackupTimestamp != nil { + in, out := &in.OldestBackupTimestamp, &out.OldestBackupTimestamp + *out = (*in).DeepCopy() + } + if in.Storage != nil { + in, out := &in.Storage, &out.Storage + *out = new(ConfigBackupStorageStatus) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ConfigBackupStatus. +func (in *ConfigBackupStatus) DeepCopy() *ConfigBackupStatus { + if in == nil { + return nil + } + out := new(ConfigBackupStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ConfigBackupStorageStatus) DeepCopyInto(out *ConfigBackupStorageStatus) { + *out = *in + if in.TotalBytes != nil { + in, out := &in.TotalBytes, &out.TotalBytes + *out = new(int64) + **out = **in + } + if in.UsedBytes != nil { + in, out := &in.UsedBytes, &out.UsedBytes + *out = new(int64) + **out = **in + } + if in.FreeBytes != nil { + in, out := &in.FreeBytes, &out.FreeBytes + *out = new(int64) + **out = **in + } + if in.FreePercent != nil { + in, out := &in.FreePercent, &out.FreePercent + *out = new(int32) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ConfigBackupStorageStatus. +func (in *ConfigBackupStorageStatus) DeepCopy() *ConfigBackupStorageStatus { + if in == nil { + return nil + } + out := new(ConfigBackupStorageStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ConfigBackupStorageThreshold) DeepCopyInto(out *ConfigBackupStorageThreshold) { + *out = *in + if in.MinFreeBytes != nil { + in, out := &in.MinFreeBytes, &out.MinFreeBytes + *out = new(int64) + **out = **in + } + if in.MinFreePercent != nil { + in, out := &in.MinFreePercent, &out.MinFreePercent + *out = new(int32) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ConfigBackupStorageThreshold. +func (in *ConfigBackupStorageThreshold) DeepCopy() *ConfigBackupStorageThreshold { + if in == nil { + return nil + } + out := new(ConfigBackupStorageThreshold) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ConfigMapKeySelector) DeepCopyInto(out *ConfigMapKeySelector) { *out = *in diff --git a/charts/network-operator/templates/crd/configbackups.networking.metal.ironcore.dev.yaml b/charts/network-operator/templates/crd/configbackups.networking.metal.ironcore.dev.yaml new file mode 100644 index 000000000..a81298381 --- /dev/null +++ b/charts/network-operator/templates/crd/configbackups.networking.metal.ironcore.dev.yaml @@ -0,0 +1,307 @@ +{{- if .Values.crd.enable }} +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + {{- if .Values.crd.keep }} + "helm.sh/resource-policy": keep + {{- end }} + controller-gen.kubebuilder.io/version: v0.21.0 + name: configbackups.networking.metal.ironcore.dev +spec: + group: networking.metal.ironcore.dev + names: + kind: ConfigBackup + listKind: ConfigBackupList + plural: configbackups + singular: configbackup + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .spec.deviceRef.name + name: Device + type: string + - jsonPath: .spec.type + name: Type + type: string + - jsonPath: .status.conditions[?(@.type=="Ready")].status + name: Ready + type: string + - jsonPath: .status.lastBackup.timestamp + name: Last Backup + priority: 1 + type: date + - jsonPath: .status.nextScheduledBackup + name: Next Backup + priority: 1 + type: date + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: ConfigBackup is the Schema for the configbackups API. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: ConfigBackupSpec defines the desired state of ConfigBackup. + properties: + deviceRef: + description: |- + DeviceRef is a reference to the Device this object belongs to. The Device object must exist in the same namespace. + Immutable. + properties: + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + maxLength: 63 + minLength: 1 + type: string + required: + - name + type: object + x-kubernetes-map-type: atomic + x-kubernetes-validations: + - message: DeviceRef is immutable + rule: self == oldSelf + path: + description: |- + Path is the device-local destination path for Local backups. + Different providers may accept different path formats, such as "bootflash:///backups/". + type: string + retention: + description: Retention configures automatic cleanup of older backups + for Local backups. + properties: + keepLast: + default: 1 + description: |- + KeepLast is the number of most recent backups to keep for Local backups. + Startup backups always keep a single copy. + format: int32 + minimum: 1 + type: integer + type: object + schedule: + description: Schedule is an optional cron expression. If omitted, + the controller performs a one-shot backup per generation. + type: string + storageThreshold: + description: StorageThreshold defines the minimum free space that + must remain before creating a new Local backup. + properties: + minFreeBytes: + description: MinFreeBytes is the minimum number of free bytes + required before a new backup can be written. + format: int64 + minimum: 0 + type: integer + minFreePercent: + description: MinFreePercent is the minimum percentage of free + storage required before a new backup can be written. + format: int32 + maximum: 100 + minimum: 0 + type: integer + type: object + x-kubernetes-validations: + - message: at least one threshold must be specified + rule: has(self.minFreeBytes) || has(self.minFreePercent) + type: + description: Type determines whether the backup is saved as a local + file or as startup-config. + enum: + - Local + - Startup + type: string + required: + - deviceRef + - type + type: object + x-kubernetes-validations: + - message: path must be omitted for Startup backups + rule: self.type != 'Startup' || (!has(self.path) || size(self.path) + == 0) + - message: path must be set for Local backups + rule: self.type != 'Local' || (has(self.path) && size(self.path) > 0) + - message: retention must only be specified for Local backups + rule: self.type == 'Local' || !has(self.retention) + - message: storageThreshold must only be specified for Local backups + rule: self.type == 'Local' || !has(self.storageThreshold) + status: + description: ConfigBackupStatus defines the observed state of ConfigBackup. + properties: + conditions: + description: Conditions represent the current state of the ConfigBackup + resource. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + lastAttemptTime: + description: LastAttemptTime is the timestamp of the most recent backup + attempt, regardless of outcome. + format: date-time + type: string + lastBackup: + description: LastBackup contains details about the most recent successful + backup operation. + properties: + checksum: + description: Checksum is the integrity checksum of the backup + artifact, if known. + type: string + duration: + description: Duration is the duration of the backup operation. + type: string + generation: + description: Generation is the metadata generation that produced + this backup. + format: int64 + type: integer + location: + description: Location is the device-local backup location or the + logical target name. + type: string + sizeBytes: + description: SizeBytes is the size in bytes of the backup artifact, + if known. + format: int64 + type: integer + timestamp: + description: Timestamp is the time at which the backup was created + on the device. + format: date-time + type: string + required: + - duration + - timestamp + type: object + nextScheduledBackup: + description: NextScheduledBackup is the next time at which the controller + intends to trigger a backup. + format: date-time + type: string + oldestBackupTimestamp: + description: OldestBackupTimestamp is the timestamp of the oldest + discovered backup, if known. + format: date-time + type: string + storage: + description: Storage contains device-local storage statistics for + the configured backup target, if known. + properties: + freeBytes: + description: FreeBytes is the free storage in bytes, if known. + format: int64 + type: integer + freePercent: + description: FreePercent is the free storage percentage, if known. + format: int32 + type: integer + thresholdBreached: + description: ThresholdBreached indicates whether the configured + threshold currently blocks new backups. + type: boolean + totalBytes: + description: TotalBytes is the total storage capacity in bytes, + if known. + format: int64 + type: integer + usedBytes: + description: UsedBytes is the used storage in bytes, if known. + format: int64 + type: integer + type: object + totalBackups: + description: TotalBackups is the number of backups currently discovered + on the device for this ConfigBackup policy. + format: int32 + type: integer + totalSizeBytes: + description: TotalSizeBytes is the total size in bytes of the discovered + backups, if known. + format: int64 + type: integer + type: object + required: + - metadata + - spec + type: object + served: true + storage: true + subresources: + status: {} +{{- end }} diff --git a/charts/network-operator/templates/rbac/configbackup-admin-role.yaml b/charts/network-operator/templates/rbac/configbackup-admin-role.yaml new file mode 100644 index 000000000..133f8bad6 --- /dev/null +++ b/charts/network-operator/templates/rbac/configbackup-admin-role.yaml @@ -0,0 +1,31 @@ +{{- if .Values.rbac.helpers.enable }} +apiVersion: rbac.authorization.k8s.io/v1 +{{- if .Values.rbac.namespaced }} +kind: Role +{{- else }} +kind: ClusterRole +{{- end }} +metadata: +{{- if .Values.rbac.namespaced }} + namespace: {{ .Release.Namespace }} +{{- end }} + labels: + app.kubernetes.io/managed-by: {{ .Release.Service }} + app.kubernetes.io/name: {{ include "network-operator.name" . }} + helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + app.kubernetes.io/instance: {{ .Release.Name }} + name: {{ include "network-operator.resourceName" (dict "suffix" "configbackup-admin-role" "context" $) }} +rules: +- apiGroups: + - networking.metal.ironcore.dev + resources: + - configbackups + verbs: + - '*' +- apiGroups: + - networking.metal.ironcore.dev + resources: + - configbackups/status + verbs: + - get +{{- end }} diff --git a/charts/network-operator/templates/rbac/configbackup-editor-role.yaml b/charts/network-operator/templates/rbac/configbackup-editor-role.yaml new file mode 100644 index 000000000..c7c27c613 --- /dev/null +++ b/charts/network-operator/templates/rbac/configbackup-editor-role.yaml @@ -0,0 +1,37 @@ +{{- if .Values.rbac.helpers.enable }} +apiVersion: rbac.authorization.k8s.io/v1 +{{- if .Values.rbac.namespaced }} +kind: Role +{{- else }} +kind: ClusterRole +{{- end }} +metadata: +{{- if .Values.rbac.namespaced }} + namespace: {{ .Release.Namespace }} +{{- end }} + labels: + app.kubernetes.io/managed-by: {{ .Release.Service }} + app.kubernetes.io/name: {{ include "network-operator.name" . }} + helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + app.kubernetes.io/instance: {{ .Release.Name }} + name: {{ include "network-operator.resourceName" (dict "suffix" "configbackup-editor-role" "context" $) }} +rules: +- apiGroups: + - networking.metal.ironcore.dev + resources: + - configbackups + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - networking.metal.ironcore.dev + resources: + - configbackups/status + verbs: + - get +{{- end }} diff --git a/charts/network-operator/templates/rbac/configbackup-viewer-role.yaml b/charts/network-operator/templates/rbac/configbackup-viewer-role.yaml new file mode 100644 index 000000000..7ae681dd9 --- /dev/null +++ b/charts/network-operator/templates/rbac/configbackup-viewer-role.yaml @@ -0,0 +1,33 @@ +{{- if .Values.rbac.helpers.enable }} +apiVersion: rbac.authorization.k8s.io/v1 +{{- if .Values.rbac.namespaced }} +kind: Role +{{- else }} +kind: ClusterRole +{{- end }} +metadata: +{{- if .Values.rbac.namespaced }} + namespace: {{ .Release.Namespace }} +{{- end }} + labels: + app.kubernetes.io/managed-by: {{ .Release.Service }} + app.kubernetes.io/name: {{ include "network-operator.name" . }} + helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + app.kubernetes.io/instance: {{ .Release.Name }} + name: {{ include "network-operator.resourceName" (dict "suffix" "configbackup-viewer-role" "context" $) }} +rules: +- apiGroups: + - networking.metal.ironcore.dev + resources: + - configbackups + verbs: + - get + - list + - watch +- apiGroups: + - networking.metal.ironcore.dev + resources: + - configbackups/status + verbs: + - get +{{- end }} diff --git a/charts/network-operator/templates/rbac/manager-role.yaml b/charts/network-operator/templates/rbac/manager-role.yaml index 580e6b8bd..550aab614 100644 --- a/charts/network-operator/templates/rbac/manager-role.yaml +++ b/charts/network-operator/templates/rbac/manager-role.yaml @@ -27,6 +27,14 @@ rules: - list - update - watch +- apiGroups: + - "" + - events.k8s.io + resources: + - events + verbs: + - create + - patch - apiGroups: - coordination.k8s.io resources: @@ -38,13 +46,6 @@ rules: - list - update - watch -- apiGroups: - - events.k8s.io - resources: - - events - verbs: - - create - - patch - apiGroups: - networking.metal.ironcore.dev resources: @@ -53,6 +54,7 @@ rules: - bgp - bgppeers - certificates + - configbackups - devices - dhcprelays - dns @@ -117,6 +119,7 @@ rules: - bgp/status - bgppeers/status - certificates/status + - configbackups/status - devices/status - dhcprelays/status - dns/status diff --git a/cmd/main.go b/cmd/main.go index 82de236ce..86c342b76 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -625,6 +625,18 @@ func main() { //nolint:gocyclo os.Exit(1) } + if err := (&corecontroller.ConfigBackupReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Recorder: mgr.GetEventRecorder("configbackup-controller"), + WatchFilterValue: watchFilterValue, + Provider: prov, + Locker: locker, + }).SetupWithManager(ctx, mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "ConfigBackup") + os.Exit(1) + } + if os.Getenv("ENABLE_WEBHOOKS") != "false" { if err := webhookv1alpha1.SetupVRFWebhookWithManager(mgr); err != nil { setupLog.Error(err, "unable to create webhook", "webhook", "VRF") diff --git a/config/crd/bases/networking.metal.ironcore.dev_configbackups.yaml b/config/crd/bases/networking.metal.ironcore.dev_configbackups.yaml new file mode 100644 index 000000000..b72b4cfd8 --- /dev/null +++ b/config/crd/bases/networking.metal.ironcore.dev_configbackups.yaml @@ -0,0 +1,303 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.21.0 + name: configbackups.networking.metal.ironcore.dev +spec: + group: networking.metal.ironcore.dev + names: + kind: ConfigBackup + listKind: ConfigBackupList + plural: configbackups + singular: configbackup + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .spec.deviceRef.name + name: Device + type: string + - jsonPath: .spec.type + name: Type + type: string + - jsonPath: .status.conditions[?(@.type=="Ready")].status + name: Ready + type: string + - jsonPath: .status.lastBackup.timestamp + name: Last Backup + priority: 1 + type: date + - jsonPath: .status.nextScheduledBackup + name: Next Backup + priority: 1 + type: date + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: ConfigBackup is the Schema for the configbackups API. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: ConfigBackupSpec defines the desired state of ConfigBackup. + properties: + deviceRef: + description: |- + DeviceRef is a reference to the Device this object belongs to. The Device object must exist in the same namespace. + Immutable. + properties: + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + maxLength: 63 + minLength: 1 + type: string + required: + - name + type: object + x-kubernetes-map-type: atomic + x-kubernetes-validations: + - message: DeviceRef is immutable + rule: self == oldSelf + path: + description: |- + Path is the device-local destination path for Local backups. + Different providers may accept different path formats, such as "bootflash:///backups/". + type: string + retention: + description: Retention configures automatic cleanup of older backups + for Local backups. + properties: + keepLast: + default: 1 + description: |- + KeepLast is the number of most recent backups to keep for Local backups. + Startup backups always keep a single copy. + format: int32 + minimum: 1 + type: integer + type: object + schedule: + description: Schedule is an optional cron expression. If omitted, + the controller performs a one-shot backup per generation. + type: string + storageThreshold: + description: StorageThreshold defines the minimum free space that + must remain before creating a new Local backup. + properties: + minFreeBytes: + description: MinFreeBytes is the minimum number of free bytes + required before a new backup can be written. + format: int64 + minimum: 0 + type: integer + minFreePercent: + description: MinFreePercent is the minimum percentage of free + storage required before a new backup can be written. + format: int32 + maximum: 100 + minimum: 0 + type: integer + type: object + x-kubernetes-validations: + - message: at least one threshold must be specified + rule: has(self.minFreeBytes) || has(self.minFreePercent) + type: + description: Type determines whether the backup is saved as a local + file or as startup-config. + enum: + - Local + - Startup + type: string + required: + - deviceRef + - type + type: object + x-kubernetes-validations: + - message: path must be omitted for Startup backups + rule: self.type != 'Startup' || (!has(self.path) || size(self.path) + == 0) + - message: path must be set for Local backups + rule: self.type != 'Local' || (has(self.path) && size(self.path) > 0) + - message: retention must only be specified for Local backups + rule: self.type == 'Local' || !has(self.retention) + - message: storageThreshold must only be specified for Local backups + rule: self.type == 'Local' || !has(self.storageThreshold) + status: + description: ConfigBackupStatus defines the observed state of ConfigBackup. + properties: + conditions: + description: Conditions represent the current state of the ConfigBackup + resource. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + lastAttemptTime: + description: LastAttemptTime is the timestamp of the most recent backup + attempt, regardless of outcome. + format: date-time + type: string + lastBackup: + description: LastBackup contains details about the most recent successful + backup operation. + properties: + checksum: + description: Checksum is the integrity checksum of the backup + artifact, if known. + type: string + duration: + description: Duration is the duration of the backup operation. + type: string + generation: + description: Generation is the metadata generation that produced + this backup. + format: int64 + type: integer + location: + description: Location is the device-local backup location or the + logical target name. + type: string + sizeBytes: + description: SizeBytes is the size in bytes of the backup artifact, + if known. + format: int64 + type: integer + timestamp: + description: Timestamp is the time at which the backup was created + on the device. + format: date-time + type: string + required: + - duration + - timestamp + type: object + nextScheduledBackup: + description: NextScheduledBackup is the next time at which the controller + intends to trigger a backup. + format: date-time + type: string + oldestBackupTimestamp: + description: OldestBackupTimestamp is the timestamp of the oldest + discovered backup, if known. + format: date-time + type: string + storage: + description: Storage contains device-local storage statistics for + the configured backup target, if known. + properties: + freeBytes: + description: FreeBytes is the free storage in bytes, if known. + format: int64 + type: integer + freePercent: + description: FreePercent is the free storage percentage, if known. + format: int32 + type: integer + thresholdBreached: + description: ThresholdBreached indicates whether the configured + threshold currently blocks new backups. + type: boolean + totalBytes: + description: TotalBytes is the total storage capacity in bytes, + if known. + format: int64 + type: integer + usedBytes: + description: UsedBytes is the used storage in bytes, if known. + format: int64 + type: integer + type: object + totalBackups: + description: TotalBackups is the number of backups currently discovered + on the device for this ConfigBackup policy. + format: int32 + type: integer + totalSizeBytes: + description: TotalSizeBytes is the total size in bytes of the discovered + backups, if known. + format: int64 + type: integer + type: object + required: + - metadata + - spec + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/crd/kustomization.yaml b/config/crd/kustomization.yaml index 182f057f4..2fde877ac 100644 --- a/config/crd/kustomization.yaml +++ b/config/crd/kustomization.yaml @@ -26,6 +26,7 @@ resources: - bases/networking.metal.ironcore.dev_vlans.yaml - bases/networking.metal.ironcore.dev_vrfs.yaml - bases/networking.metal.ironcore.dev_lldps.yaml +- bases/networking.metal.ironcore.dev_configbackups.yaml - bases/nx.cisco.networking.metal.ironcore.dev_bordergateways.yaml - bases/nx.cisco.networking.metal.ironcore.dev_managementaccessconfigs.yaml - bases/nx.cisco.networking.metal.ironcore.dev_networkvirtualizationedgeconfigs.yaml diff --git a/config/rbac/configbackup_admin_role.yaml b/config/rbac/configbackup_admin_role.yaml new file mode 100644 index 000000000..02b016544 --- /dev/null +++ b/config/rbac/configbackup_admin_role.yaml @@ -0,0 +1,27 @@ +# This rule is not used by the project network-operator itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants full permissions ('*') over networking.metal.ironcore.dev. +# This role is intended for users authorized to modify roles and bindings within the cluster, +# enabling them to delegate specific permissions to other users or groups as needed. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: network-operator + app.kubernetes.io/managed-by: kustomize + name: configbackup-admin-role +rules: + - apiGroups: + - networking.metal.ironcore.dev + resources: + - configbackups + verbs: + - "*" + - apiGroups: + - networking.metal.ironcore.dev + resources: + - configbackups/status + verbs: + - get diff --git a/config/rbac/configbackup_editor_role.yaml b/config/rbac/configbackup_editor_role.yaml new file mode 100644 index 000000000..40dbaa235 --- /dev/null +++ b/config/rbac/configbackup_editor_role.yaml @@ -0,0 +1,33 @@ +# This rule is not used by the project network-operator itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants permissions to create, update, and delete resources within the networking.metal.ironcore.dev. +# This role is intended for users who need to manage these resources +# but should not control RBAC or manage permissions for others. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: network-operator + app.kubernetes.io/managed-by: kustomize + name: configbackup-editor-role +rules: + - apiGroups: + - networking.metal.ironcore.dev + resources: + - configbackups + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - networking.metal.ironcore.dev + resources: + - configbackups/status + verbs: + - get diff --git a/config/rbac/configbackup_viewer_role.yaml b/config/rbac/configbackup_viewer_role.yaml new file mode 100644 index 000000000..499ff80d7 --- /dev/null +++ b/config/rbac/configbackup_viewer_role.yaml @@ -0,0 +1,29 @@ +# This rule is not used by the project network-operator itself. +# It is provided to allow the cluster admin to help manage permissions for users. +# +# Grants read-only access to networking.metal.ironcore.dev resources. +# This role is intended for users who need visibility into these resources +# without permissions to modify them. It is ideal for monitoring purposes and limited-access viewing. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: network-operator + app.kubernetes.io/managed-by: kustomize + name: configbackup-viewer-role +rules: + - apiGroups: + - networking.metal.ironcore.dev + resources: + - configbackups + verbs: + - get + - list + - watch + - apiGroups: + - networking.metal.ironcore.dev + resources: + - configbackups/status + verbs: + - get diff --git a/config/rbac/kustomization.yaml b/config/rbac/kustomization.yaml index f6aab1b99..496449b07 100644 --- a/config/rbac/kustomization.yaml +++ b/config/rbac/kustomization.yaml @@ -37,6 +37,9 @@ resources: - certificate_admin_role.yaml - certificate_editor_role.yaml - certificate_viewer_role.yaml +- configbackup_admin_role.yaml +- configbackup_editor_role.yaml +- configbackup_viewer_role.yaml - device_admin_role.yaml - device_editor_role.yaml - device_viewer_role.yaml diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index e9757ff33..5291062b5 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -21,6 +21,14 @@ rules: - list - update - watch +- apiGroups: + - "" + - events.k8s.io + resources: + - events + verbs: + - create + - patch - apiGroups: - coordination.k8s.io resources: @@ -32,13 +40,6 @@ rules: - list - update - watch -- apiGroups: - - events.k8s.io - resources: - - events - verbs: - - create - - patch - apiGroups: - networking.metal.ironcore.dev resources: @@ -47,6 +48,7 @@ rules: - bgp - bgppeers - certificates + - configbackups - devices - dhcprelays - dns @@ -111,6 +113,7 @@ rules: - bgp/status - bgppeers/status - certificates/status + - configbackups/status - devices/status - dhcprelays/status - dns/status diff --git a/config/samples/kustomization.yaml b/config/samples/kustomization.yaml index ac22ba582..bfa78d555 100644 --- a/config/samples/kustomization.yaml +++ b/config/samples/kustomization.yaml @@ -1,6 +1,7 @@ ## Append samples of your project ## resources: - v1alpha1_device.yaml +- v1alpha1_configbackup.yaml - v1alpha1_dhcprelay.yaml - v1alpha1_interface.yaml - v1alpha1_lldp.yaml diff --git a/config/samples/v1alpha1_configbackup.yaml b/config/samples/v1alpha1_configbackup.yaml new file mode 100644 index 000000000..ec749789b --- /dev/null +++ b/config/samples/v1alpha1_configbackup.yaml @@ -0,0 +1,23 @@ +apiVersion: networking.metal.ironcore.dev/v1alpha1 +kind: ConfigBackup +metadata: + name: leaf-1-local-backup +spec: + deviceRef: + name: leaf-1 + schedule: "0 2 * * *" + type: Local + path: "bootflash:///backups/" + retention: + keepLast: 5 + storageThreshold: + minFreePercent: 10 +--- +apiVersion: networking.metal.ironcore.dev/v1alpha1 +kind: ConfigBackup +metadata: + name: leaf-1-startup-backup +spec: + deviceRef: + name: leaf-1 + type: Startup diff --git a/docs/.vitepress/config.mts b/docs/.vitepress/config.mts index 9f159b373..9ae1e063f 100644 --- a/docs/.vitepress/config.mts +++ b/docs/.vitepress/config.mts @@ -78,6 +78,7 @@ export default withMermaid({ text: 'Concepts', items: [ { text: 'Index', link: '/concepts/' }, + { text: 'Config Backups', link: '/concepts/config-backup' }, { text: 'Pausing Reconciliation', link: '/concepts/pausing' }, ], }, diff --git a/docs/api-reference/index.md b/docs/api-reference/index.md index b333d37e9..bd99295e6 100644 --- a/docs/api-reference/index.md +++ b/docs/api-reference/index.md @@ -19,6 +19,7 @@ SPDX-License-Identifier: Apache-2.0 - [BGPPeer](#bgppeer) - [Banner](#banner) - [Certificate](#certificate) +- [ConfigBackup](#configbackup) - [DHCPRelay](#dhcprelay) - [DNS](#dns) - [Device](#device) @@ -804,6 +805,161 @@ _Appears in:_ | `MD5` | | +#### ConfigBackup + + + +ConfigBackup is the Schema for the configbackups API. + + + + + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `apiVersion` _string_ | `networking.metal.ironcore.dev/v1alpha1` | | | +| `kind` _string_ | `ConfigBackup` | | | +| `metadata` _[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#objectmeta-v1-meta)_ | Refer to Kubernetes API documentation for fields of `metadata`. | | | +| `spec` _[ConfigBackupSpec](#configbackupspec)_ | | | Required: \{\}
| +| `status` _[ConfigBackupStatus](#configbackupstatus)_ | | | Optional: \{\}
| + + +#### ConfigBackupRetention + + + +ConfigBackupRetention defines how many historical backups are kept on the device. + + + +_Appears in:_ +- [ConfigBackupSpec](#configbackupspec) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `keepLast` _integer_ | KeepLast is the number of most recent backups to keep for Local backups.
Startup backups always keep a single copy. | 1 | Minimum: 1
Optional: \{\}
| + + +#### ConfigBackupRunStatus + + + +ConfigBackupRunStatus contains the result of a single successful backup run. + + + +_Appears in:_ +- [ConfigBackupStatus](#configbackupstatus) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `timestamp` _[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#time-v1-meta)_ | Timestamp is the time at which the backup was created on the device. | | Required: \{\}
| +| `duration` _[Duration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#duration-v1-meta)_ | Duration is the duration of the backup operation. | | Required: \{\}
| +| `sizeBytes` _integer_ | SizeBytes is the size in bytes of the backup artifact, if known. | | Optional: \{\}
| +| `checksum` _string_ | Checksum is the integrity checksum of the backup artifact, if known. | | Optional: \{\}
| +| `location` _string_ | Location is the device-local backup location or the logical target name. | | Optional: \{\}
| +| `generation` _integer_ | Generation is the metadata generation that produced this backup. | | Optional: \{\}
| + + +#### ConfigBackupSpec + + + +ConfigBackupSpec defines the desired state of ConfigBackup. + + + +_Appears in:_ +- [ConfigBackup](#configbackup) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `deviceRef` _[LocalObjectReference](#localobjectreference)_ | DeviceRef is a reference to the Device this object belongs to. The Device object must exist in the same namespace.
Immutable. | | Required: \{\}
| +| `schedule` _string_ | Schedule is an optional cron expression. If omitted, the controller performs a one-shot backup per generation. | | Optional: \{\}
| +| `type` _[ConfigBackupType](#configbackuptype)_ | Type determines whether the backup is saved as a local file or as startup-config. | | Enum: [Local Startup]
Required: \{\}
| +| `path` _string_ | Path is the device-local destination path for Local backups.
Different providers may accept different path formats, such as "bootflash:///backups/". | | Optional: \{\}
| +| `retention` _[ConfigBackupRetention](#configbackupretention)_ | Retention configures automatic cleanup of older backups for Local backups. | | Optional: \{\}
| +| `storageThreshold` _[ConfigBackupStorageThreshold](#configbackupstoragethreshold)_ | StorageThreshold defines the minimum free space that must remain before creating a new Local backup. | | Optional: \{\}
| + + +#### ConfigBackupStatus + + + +ConfigBackupStatus defines the observed state of ConfigBackup. + + + +_Appears in:_ +- [ConfigBackup](#configbackup) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `conditions` _[Condition](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#condition-v1-meta) array_ | Conditions represent the current state of the ConfigBackup resource. | | Optional: \{\}
| +| `lastBackup` _[ConfigBackupRunStatus](#configbackuprunstatus)_ | LastBackup contains details about the most recent successful backup operation. | | Optional: \{\}
| +| `lastAttemptTime` _[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#time-v1-meta)_ | LastAttemptTime is the timestamp of the most recent backup attempt, regardless of outcome. | | Optional: \{\}
| +| `nextScheduledBackup` _[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#time-v1-meta)_ | NextScheduledBackup is the next time at which the controller intends to trigger a backup. | | Optional: \{\}
| +| `totalBackups` _integer_ | TotalBackups is the number of backups currently discovered on the device for this ConfigBackup policy. | | Optional: \{\}
| +| `totalSizeBytes` _integer_ | TotalSizeBytes is the total size in bytes of the discovered backups, if known. | | Optional: \{\}
| +| `oldestBackupTimestamp` _[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#time-v1-meta)_ | OldestBackupTimestamp is the timestamp of the oldest discovered backup, if known. | | Optional: \{\}
| +| `storage` _[ConfigBackupStorageStatus](#configbackupstoragestatus)_ | Storage contains device-local storage statistics for the configured backup target, if known. | | Optional: \{\}
| + + +#### ConfigBackupStorageStatus + + + +ConfigBackupStorageStatus contains storage utilization for the configured backup target. + + + +_Appears in:_ +- [ConfigBackupStatus](#configbackupstatus) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `totalBytes` _integer_ | TotalBytes is the total storage capacity in bytes, if known. | | Optional: \{\}
| +| `usedBytes` _integer_ | UsedBytes is the used storage in bytes, if known. | | Optional: \{\}
| +| `freeBytes` _integer_ | FreeBytes is the free storage in bytes, if known. | | Optional: \{\}
| +| `freePercent` _integer_ | FreePercent is the free storage percentage, if known. | | Optional: \{\}
| +| `thresholdBreached` _boolean_ | ThresholdBreached indicates whether the configured threshold currently blocks new backups. | | Optional: \{\}
| + + +#### ConfigBackupStorageThreshold + + + +ConfigBackupStorageThreshold defines when the controller must stop writing additional backups. + + + +_Appears in:_ +- [ConfigBackupSpec](#configbackupspec) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `minFreeBytes` _integer_ | MinFreeBytes is the minimum number of free bytes required before a new backup can be written. | | Minimum: 0
Optional: \{\}
| +| `minFreePercent` _integer_ | MinFreePercent is the minimum percentage of free storage required before a new backup can be written. | | Maximum: 100
Minimum: 0
Optional: \{\}
| + + +#### ConfigBackupType + +_Underlying type:_ _string_ + +ConfigBackupType defines how the device should persist a configuration backup. + +_Validation:_ +- Enum: [Local Startup] + +_Appears in:_ +- [ConfigBackupSpec](#configbackupspec) + +| Field | Description | +| --- | --- | +| `Local` | ConfigBackupTypeLocal stores the running configuration in a device-local file path.
| +| `Startup` | ConfigBackupTypeStartup stores the running configuration as the device startup configuration.
| + + #### ConfigMapKeySelector @@ -1665,6 +1821,7 @@ _Appears in:_ - [BannerSpec](#bannerspec) - [BorderGatewaySpec](#bordergatewayspec) - [CertificateSpec](#certificatespec) +- [ConfigBackupSpec](#configbackupspec) - [DHCPRelaySpec](#dhcprelayspec) - [DNSSpec](#dnsspec) - [DevicePort](#deviceport) diff --git a/docs/concepts/config-backup.md b/docs/concepts/config-backup.md new file mode 100644 index 000000000..7553378a5 --- /dev/null +++ b/docs/concepts/config-backup.md @@ -0,0 +1,58 @@ +# Config Backups + +`ConfigBackup` defines an on-device configuration backup policy for a `Device`. + +The controller can either: + +- write timestamped backups to a device-local filesystem path, or +- persist the running configuration as the device startup configuration + +This resource is intended for fast local restore workflows and for auditing recent configuration history directly on the device. + +## Key Behaviors + +- Optional cron-based scheduling for recurring backups +- One-shot backups when `spec.schedule` is omitted +- Automatic rotation of old local backup files +- Device storage threshold checks before writing new backups +- Status reporting for last backup result, next scheduled backup, and discovered backup inventory +- Best-effort checksum reporting when the implementation can retrieve it + +## Local Backup Example + +```yaml +apiVersion: networking.metal.ironcore.dev/v1alpha1 +kind: ConfigBackup +metadata: + name: leaf-1-local +spec: + deviceRef: + name: leaf-switch-1 + schedule: "0 2 * * *" + type: Local + path: "" + retention: + keepLast: 5 + storageThreshold: + minFreePercent: 10 +``` + +## Startup Backup Example + +```yaml +apiVersion: networking.metal.ironcore.dev/v1alpha1 +kind: ConfigBackup +metadata: + name: leaf-1-startup +spec: + deviceRef: + name: leaf-switch-1 + type: Startup +``` + +## Notes + +- `Startup` backups always keep a single logical copy. +- Local backup rotation only applies to `type: Local`. +- `spec.path` is interpreted by the backing implementation and may use provider-specific device-local path formats. +- `checksum` and `sizeBytes` are optional status fields and depend on what the implementation can retrieve from the device. diff --git a/docs/overview/index.md b/docs/overview/index.md index 109aaba82..38281d035 100644 --- a/docs/overview/index.md +++ b/docs/overview/index.md @@ -45,6 +45,7 @@ graph TD ACL[AccessControlList] BGP[BGP] Banner[Banner] + Backup[ConfigBackup] Cert[Certificate] DNS[DNS] EVI[EVPNInstance] @@ -69,6 +70,7 @@ graph TD ACL -- spec.deviceRef --> D BGP -- spec.deviceRef --> D Banner -- spec.deviceRef --> D + Backup -- spec.deviceRef --> D Cert -- spec.deviceRef --> D DNS -- spec.deviceRef --> D EVI -- spec.deviceRef --> D diff --git a/go.mod b/go.mod index 73d6182c3..49746aeed 100644 --- a/go.mod +++ b/go.mod @@ -15,6 +15,7 @@ require ( github.com/openconfig/ygnmi v0.14.0 github.com/openconfig/ygot v0.34.0 github.com/pin/tftp/v3 v3.2.0 + github.com/robfig/cron/v3 v3.0.1 github.com/sapcc/go-api-declarations v1.22.0 github.com/stretchr/testify v1.11.1 github.com/tidwall/gjson v1.18.0 diff --git a/go.sum b/go.sum index 3db612497..30dce04cf 100644 --- a/go.sum +++ b/go.sum @@ -170,6 +170,8 @@ github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9Z github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA= github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= +github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs= +github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro= github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= diff --git a/internal/controller/core/configbackup_controller.go b/internal/controller/core/configbackup_controller.go new file mode 100644 index 000000000..ea637e6d3 --- /dev/null +++ b/internal/controller/core/configbackup_controller.go @@ -0,0 +1,559 @@ +// SPDX-FileCopyrightText: 2025 SAP SE or an SAP affiliate company and IronCore contributors +// SPDX-License-Identifier: Apache-2.0 + +package core + +import ( + "context" + "errors" + "fmt" + "time" + + "github.com/robfig/cron/v3" + "k8s.io/apimachinery/pkg/api/equality" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + kerrors "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/client-go/tools/events" + "k8s.io/klog/v2" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/predicate" + + "github.com/ironcore-dev/network-operator/api/core/v1alpha1" + "github.com/ironcore-dev/network-operator/internal/conditions" + "github.com/ironcore-dev/network-operator/internal/deviceutil" + "github.com/ironcore-dev/network-operator/internal/paused" + "github.com/ironcore-dev/network-operator/internal/provider" + "github.com/ironcore-dev/network-operator/internal/resourcelock" +) + +// ConfigBackupReconciler reconciles a ConfigBackup object. +type ConfigBackupReconciler struct { + client.Client + Scheme *runtime.Scheme + + // WatchFilterValue is the label value used to filter events prior to reconciliation. + WatchFilterValue string + + // Recorder is used to record events for the controller. + // More info: https://book.kubebuilder.io/reference/raising-events + Recorder events.EventRecorder + + // Provider is the driver that will be used to create & delete the config backup. + Provider provider.ProviderFunc + + // Locker is used to synchronize operations on resources targeting the same device. + Locker *resourcelock.ResourceLocker + + // Now is a function that returns the current time. It can be overridden in tests to control time-dependent behavior. + Now func() time.Time +} + +// +kubebuilder:rbac:groups=networking.metal.ironcore.dev,resources=configbackups,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=networking.metal.ironcore.dev,resources=configbackups/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch +// +kubebuilder:rbac:groups=events.k8s.io,resources=events,verbs=create;patch + +func (r *ConfigBackupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) { + log := ctrl.LoggerFrom(ctx) + log.V(3).Info("Reconciling resource") + + obj := new(v1alpha1.ConfigBackup) + if err := r.Get(ctx, req.NamespacedName, obj); err != nil { + if apierrors.IsNotFound(err) { + log.V(3).Info("Resource not found. Ignoring reconciliation since object must be deleted") + return ctrl.Result{}, nil + } + log.Error(err, "Failed to get resource") + return ctrl.Result{}, err + } + + prov, ok := r.Provider().(provider.ConfigBackupProvider) + if !ok { + if meta.SetStatusCondition(&obj.Status.Conditions, metav1.Condition{ + Type: v1alpha1.ReadyCondition, + Status: metav1.ConditionFalse, + Reason: v1alpha1.NotImplementedReason, + Message: "Provider does not implement provider.ConfigBackupProvider", + }) { + return ctrl.Result{}, r.Status().Update(ctx, obj) + } + return ctrl.Result{}, nil + } + + device, err := deviceutil.GetDeviceByName(ctx, r, obj.Namespace, obj.Spec.DeviceRef.Name) + if err != nil { + return ctrl.Result{}, err + } + + if isPaused, requeue, err := paused.EnsureCondition(ctx, r.Client, device, obj); isPaused || requeue || err != nil { + return ctrl.Result{Requeue: requeue}, err + } + + if err := r.Locker.AcquireLock(ctx, device.Name, "configbackup-controller"); err != nil { + if errors.Is(err, resourcelock.ErrLockAlreadyHeld) { + log.V(3).Info("Device is already locked, requeuing reconciliation") + return ctrl.Result{RequeueAfter: Jitter(time.Second), Priority: new(LockWaitPriorityDefault)}, nil + } + log.Error(err, "Failed to acquire device lock") + return ctrl.Result{}, err + } + defer func() { + if err := r.Locker.ReleaseLock(ctx, device.Name, "configbackup-controller"); err != nil { + log.Error(err, "Failed to release device lock") + reterr = kerrors.NewAggregate([]error{reterr, err}) + } + }() + + conn, err := deviceutil.GetDeviceConnection(ctx, r, device) + if err != nil { + return ctrl.Result{}, err + } + + orig := obj.DeepCopy() + if conditions.InitializeConditions(obj, v1alpha1.ReadyCondition, v1alpha1.ConfiguredCondition, v1alpha1.OperationalCondition) { + log.V(1).Info("Initializing status conditions") + return ctrl.Result{}, r.Status().Update(ctx, obj) + } + + defer func() { + if !equality.Semantic.DeepEqual(orig.ObjectMeta, obj.ObjectMeta) { + // Pass obj.DeepCopy() to avoid Patch() modifying obj and interfering with status update below + if err := r.Patch(ctx, obj.DeepCopy(), client.MergeFrom(orig)); err != nil { + log.Error(err, "Failed to update resource metadata") + reterr = kerrors.NewAggregate([]error{reterr, err}) + } + } + if !equality.Semantic.DeepEqual(orig.Status, obj.Status) { + if err := r.Status().Patch(ctx, obj, client.MergeFrom(orig)); err != nil { + log.Error(err, "Failed to update status") + reterr = kerrors.NewAggregate([]error{reterr, err}) + } + } + }() + + s := &configBackupScope{ + Device: device, + ConfigBackup: obj, + Connection: conn, + Provider: prov, + } + + res, err := r.reconcile(ctx, s) + if err != nil { + log.Error(err, "Failed to reconcile resource") + return ctrl.Result{}, err + } + return res, nil +} + +type configBackupScope struct { + Device *v1alpha1.Device + ConfigBackup *v1alpha1.ConfigBackup + Connection *deviceutil.Connection + Provider provider.ConfigBackupProvider +} + +func (r *ConfigBackupReconciler) reconcile(ctx context.Context, s *configBackupScope) (_ ctrl.Result, reterr error) { + if s.ConfigBackup.Labels == nil { + s.ConfigBackup.Labels = make(map[string]string) + } + s.ConfigBackup.Labels[v1alpha1.DeviceLabel] = s.Device.Name + + if !controllerutil.HasControllerReference(s.ConfigBackup) { + if err := controllerutil.SetOwnerReference(s.Device, s.ConfigBackup, r.Scheme, controllerutil.WithBlockOwnerDeletion(true)); err != nil { + return ctrl.Result{}, err + } + } + + defer func() { + conditions.RecomputeReady(s.ConfigBackup) + }() + + now := time.Now().UTC() + if r.Now != nil { + now = r.Now().UTC() + } + + var schedule cron.Schedule + if s.ConfigBackup.Spec.Schedule != "" { + parsedSchedule, err := cron.ParseStandard(s.ConfigBackup.Spec.Schedule) + if err != nil { + conditions.Set(s.ConfigBackup, metav1.Condition{ + Type: v1alpha1.ConfiguredCondition, + Status: metav1.ConditionFalse, + Reason: v1alpha1.ScheduleInvalidReason, + Message: err.Error(), + }) + conditions.Set(s.ConfigBackup, metav1.Condition{ + Type: v1alpha1.OperationalCondition, + Status: metav1.ConditionFalse, + Reason: v1alpha1.ScheduleInvalidReason, + Message: "Backup schedule is invalid", + }) + return ctrl.Result{}, nil + } + schedule = parsedSchedule + } + + if err := s.Provider.Connect(ctx, s.Connection); err != nil { + return ctrl.Result{}, fmt.Errorf("failed to connect to provider: %w", err) + } + defer func() { + if err := s.Provider.Disconnect(ctx, s.Connection); err != nil { + reterr = kerrors.NewAggregate([]error{reterr, err}) + } + }() + + request := configBackupRequestFor(s.ConfigBackup) + inventory, err := s.Provider.ListConfigBackups(ctx, request) + if err != nil { + observeConfigBackupFailure(s.ConfigBackup.Spec.Type, 0) + return ctrl.Result{}, fmt.Errorf("failed to list config backups: %w", err) + } + + thresholdBreached := applyInventoryStatus(s.ConfigBackup, inventory, s.ConfigBackup.Spec.StorageThreshold) + if thresholdBreached { + conditions.Set(s.ConfigBackup, metav1.Condition{ + Type: v1alpha1.OperationalCondition, + Status: metav1.ConditionFalse, + Reason: v1alpha1.StorageThresholdExceededReason, + Message: "Configured storage threshold prevents creating a new backup", + }) + } else { + conditions.Set(s.ConfigBackup, metav1.Condition{ + Type: v1alpha1.OperationalCondition, + Status: metav1.ConditionTrue, + Reason: v1alpha1.OperationalReason, + Message: "Backup target storage is healthy", + }) + } + + due, nextScheduled := backupDue(now, s.ConfigBackup, schedule) + if nextScheduled != nil { + ts := metav1.NewTime(nextScheduled.UTC()) + s.ConfigBackup.Status.NextScheduledBackup = &ts + } else { + s.ConfigBackup.Status.NextScheduledBackup = nil + } + + if !due { + setConfigBackupPendingCondition(s.ConfigBackup) + if nextScheduled == nil { + return ctrl.Result{}, nil + } + until := nextScheduled.UTC().Sub(now.UTC()) + if until <= 0 { + until = time.Second + } + return ctrl.Result{RequeueAfter: until}, nil + } + + lastAttempt := metav1.NewTime(now) + s.ConfigBackup.Status.LastAttemptTime = &lastAttempt + + if thresholdBreached { + if schedule != nil { + next := metav1.NewTime(schedule.Next(now.UTC()).UTC()) + s.ConfigBackup.Status.NextScheduledBackup = &next + } else { + s.ConfigBackup.Status.NextScheduledBackup = nil + } + conditions.Set(s.ConfigBackup, metav1.Condition{ + Type: v1alpha1.ConfiguredCondition, + Status: metav1.ConditionFalse, + Reason: v1alpha1.StorageThresholdExceededReason, + Message: "Backup skipped because storage threshold is exceeded", + }) + r.Recorder.Eventf(s.ConfigBackup, nil, "Warning", "BackupSkipped", "Reconcile", "Backup skipped because storage threshold is exceeded") + if nextScheduled == nil { + return ctrl.Result{}, nil + } + until := nextScheduled.UTC().Sub(now.UTC()) + if until <= 0 { + until = time.Second + } + return ctrl.Result{RequeueAfter: until}, nil + } + + result, err := s.Provider.CreateConfigBackup(ctx, request) + if err != nil { + if schedule != nil { + next := metav1.NewTime(schedule.Next(now.UTC()).UTC()) + s.ConfigBackup.Status.NextScheduledBackup = &next + } else { + s.ConfigBackup.Status.NextScheduledBackup = nil + } + conditions.Set(s.ConfigBackup, metav1.Condition{ + Type: v1alpha1.ConfiguredCondition, + Status: metav1.ConditionFalse, + Reason: v1alpha1.BackupFailedReason, + Message: err.Error(), + }) + observeConfigBackupFailure(s.ConfigBackup.Spec.Type, 0) + r.Recorder.Eventf(s.ConfigBackup, nil, "Warning", "BackupFailed", "Reconcile", "%v", err) + if nextScheduled == nil { + return ctrl.Result{}, err + } + until := nextScheduled.UTC().Sub(now.UTC()) + if until <= 0 { + until = time.Second + } + return ctrl.Result{RequeueAfter: until}, err + } + + observeConfigBackupSuccess(s.ConfigBackup.Spec.Type, result.Duration, result.SizeBytes) + r.Recorder.Eventf(s.ConfigBackup, nil, "Normal", "BackupCompleted", "Reconcile", "Backup stored at %s", result.Path) + + postInventory, err := s.Provider.ListConfigBackups(ctx, request) + if err != nil { + return ctrl.Result{}, fmt.Errorf("failed to refresh backup inventory: %w", err) + } + + if s.ConfigBackup.Spec.Type == v1alpha1.ConfigBackupTypeLocal { + beforeRotation := len(postInventory.Backups) + if err := s.Provider.DeleteConfigBackups(ctx, &provider.DeleteConfigBackupsRequest{ + ConfigBackup: s.ConfigBackup, + }); err != nil { + return ctrl.Result{}, fmt.Errorf("failed to rotate old backups: %w", err) + } + postInventory, err = s.Provider.ListConfigBackups(ctx, request) + if err != nil { + return ctrl.Result{}, fmt.Errorf("failed to refresh backup inventory after rotation: %w", err) + } + if deleted := beforeRotation - len(postInventory.Backups); deleted > 0 { + r.Recorder.Eventf(s.ConfigBackup, nil, "Normal", "BackupRotation", "Reconcile", "Removed %d old backup(s)", deleted) + } + } + + applyInventoryStatus(s.ConfigBackup, postInventory, s.ConfigBackup.Spec.StorageThreshold) + s.ConfigBackup.Status.LastBackup = &v1alpha1.ConfigBackupRunStatus{ + Timestamp: metav1.NewTime(result.CreatedAt.UTC()), + Duration: metav1.Duration{Duration: result.Duration}, + SizeBytes: cloneInt64Ptr(result.SizeBytes), + Checksum: result.Checksum, + Location: result.Path, + Generation: s.ConfigBackup.Generation, + } + conditions.Set(s.ConfigBackup, metav1.Condition{ + Type: v1alpha1.ConfiguredCondition, + Status: metav1.ConditionTrue, + Reason: v1alpha1.BackupSuccessfulReason, + Message: "Last backup completed successfully", + }) + conditions.Set(s.ConfigBackup, metav1.Condition{ + Type: v1alpha1.OperationalCondition, + Status: metav1.ConditionTrue, + Reason: v1alpha1.OperationalReason, + Message: "Backup target storage is healthy", + }) + + if schedule != nil { + next := schedule.Next(result.CreatedAt.UTC()) + ts := metav1.NewTime(next) + s.ConfigBackup.Status.NextScheduledBackup = &ts + until := next.UTC().Sub(now.UTC()) + if until <= 0 { + until = time.Second + } + return ctrl.Result{RequeueAfter: until}, nil + } + + s.ConfigBackup.Status.NextScheduledBackup = nil + return ctrl.Result{}, nil +} + +func (r *ConfigBackupReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager) error { + labelSelector := metav1.LabelSelector{} + if r.WatchFilterValue != "" { + labelSelector.MatchLabels = map[string]string{v1alpha1.WatchLabel: r.WatchFilterValue} + } + + filter, err := predicate.LabelSelectorPredicate(labelSelector) + if err != nil { + return fmt.Errorf("failed to create label selector predicate: %w", err) + } + + bldr := ctrl.NewControllerManagedBy(mgr). + For(&v1alpha1.ConfigBackup{}). + Named("configbackup"). + WithEventFilter(filter) + + return bldr. + Watches( + &v1alpha1.Device{}, + handler.EnqueueRequestsFromMapFunc(r.deviceToConfigBackups), + builder.WithPredicates(predicate.Funcs{ + UpdateFunc: func(e event.UpdateEvent) bool { + return paused.DevicePausedChanged(e.ObjectOld, e.ObjectNew) + }, + GenericFunc: func(e event.GenericEvent) bool { + return false + }, + }), + ). + Complete(r) +} + +func (r *ConfigBackupReconciler) deviceToConfigBackups(ctx context.Context, obj client.Object) []ctrl.Request { + device, ok := obj.(*v1alpha1.Device) + if !ok { + panic(fmt.Sprintf("expected a Device but got a %T", obj)) + } + + log := ctrl.LoggerFrom(ctx, "Device", klog.KObj(device)) + list := new(v1alpha1.ConfigBackupList) + if err := r.List(ctx, list, client.InNamespace(device.Namespace), client.MatchingLabels{v1alpha1.DeviceLabel: device.Name}); err != nil { + log.Error(err, "Failed to list ConfigBackups") + return nil + } + + reqs := make([]ctrl.Request, len(list.Items)) + for i, item := range list.Items { + reqs[i] = ctrl.Request{NamespacedName: client.ObjectKeyFromObject(&item)} + } + return reqs +} + +func backupDue(now time.Time, obj *v1alpha1.ConfigBackup, schedule cron.Schedule) (bool, *time.Time) { + if schedule == nil { + if obj.Status.LastBackup == nil || obj.Status.LastBackup.Generation != obj.Generation { + return true, nil + } + return false, nil + } + + reference := obj.CreationTimestamp.UTC() + if obj.Status.LastBackup != nil { + reference = obj.Status.LastBackup.Timestamp.UTC() + } + next := schedule.Next(reference) + if next.IsZero() { + return false, nil + } + if !next.After(now) { + return true, &next + } + return false, &next +} + +func setConfigBackupPendingCondition(obj *v1alpha1.ConfigBackup) { + if obj.Status.LastBackup != nil { + conditions.Set(obj, metav1.Condition{ + Type: v1alpha1.ConfiguredCondition, + Status: metav1.ConditionTrue, + Reason: v1alpha1.BackupSuccessfulReason, + Message: "Waiting for the next scheduled backup", + }) + return + } + conditions.Set(obj, metav1.Condition{ + Type: v1alpha1.ConfiguredCondition, + Status: metav1.ConditionFalse, + Reason: v1alpha1.BackupPendingReason, + Message: "Waiting for the first scheduled backup", + }) +} + +func configBackupRequestFor(obj *v1alpha1.ConfigBackup) *provider.ConfigBackupRequest { + return &provider.ConfigBackupRequest{ConfigBackup: obj} +} + +const maxInt32 = int(^uint32(0) >> 1) + +func applyInventoryStatus(obj *v1alpha1.ConfigBackup, inventory *provider.ConfigBackupInventory, threshold *v1alpha1.ConfigBackupStorageThreshold) bool { + obj.Status.TotalBackups = 0 + obj.Status.TotalSizeBytes = nil + obj.Status.OldestBackupTimestamp = nil + obj.Status.Storage = nil + if inventory == nil { + return false + } + + if len(inventory.Backups) > maxInt32 { + obj.Status.TotalBackups = int32(maxInt32) + } else { + obj.Status.TotalBackups = int32(len(inventory.Backups)) //nolint:gosec // status is intentionally capped to int32 + } + var totalSize int64 + var oldest *metav1.Time + for _, backup := range inventory.Backups { + if backup.SizeBytes != nil { + totalSize += *backup.SizeBytes + } + if !backup.CreatedAt.IsZero() { + ts := metav1.NewTime(backup.CreatedAt.UTC()) + if oldest == nil || ts.Before(oldest) { + oldest = &ts + } + } + } + if len(inventory.Backups) > 0 { + obj.Status.TotalSizeBytes = &totalSize + } + obj.Status.OldestBackupTimestamp = oldest + if obj.Spec.Type == v1alpha1.ConfigBackupTypeStartup && obj.Status.OldestBackupTimestamp == nil && obj.Status.LastBackup != nil { + ts := obj.Status.LastBackup.Timestamp + obj.Status.OldestBackupTimestamp = &ts + } + + storage := &v1alpha1.ConfigBackupStorageStatus{ + TotalBytes: cloneInt64Ptr(inventory.TotalBytes), + UsedBytes: cloneInt64Ptr(inventory.UsedBytes), + FreeBytes: cloneInt64Ptr(inventory.FreeBytes), + } + if inventory.TotalBytes != nil && inventory.FreeBytes != nil && *inventory.TotalBytes > 0 { + freePercent := int32((*inventory.FreeBytes * 100) / *inventory.TotalBytes) //nolint:gosec // freePercent is bounded to 0..100 + storage.FreePercent = &freePercent + } + storage.ThresholdBreached = storageThresholdBreached(storage, threshold) + obj.Status.Storage = storage + return storage.ThresholdBreached +} + +func storageThresholdBreached(storage *v1alpha1.ConfigBackupStorageStatus, threshold *v1alpha1.ConfigBackupStorageThreshold) bool { + if storage == nil || threshold == nil { + return false + } + if threshold.MinFreeBytes != nil && storage.FreeBytes != nil && *storage.FreeBytes < *threshold.MinFreeBytes { + return true + } + if threshold.MinFreePercent != nil && storage.FreePercent != nil && *storage.FreePercent < *threshold.MinFreePercent { + return true + } + return false +} + +func cloneInt64Ptr(value *int64) *int64 { + if value == nil { + return nil + } + v := *value + return &v +} + +func observeConfigBackupSuccess(backupType v1alpha1.ConfigBackupType, duration time.Duration, sizeBytes *int64) { + labels := prometheusLabels(backupType, "success") + configBackupOperationsTotal.WithLabelValues(labels...).Inc() + configBackupDurationSeconds.WithLabelValues(labels...).Observe(duration.Seconds()) + if sizeBytes != nil { + configBackupSizeBytes.WithLabelValues(string(backupType)).Observe(float64(*sizeBytes)) + } +} + +func observeConfigBackupFailure(backupType v1alpha1.ConfigBackupType, duration time.Duration) { + labels := prometheusLabels(backupType, "failure") + configBackupOperationsTotal.WithLabelValues(labels...).Inc() + configBackupDurationSeconds.WithLabelValues(labels...).Observe(duration.Seconds()) +} + +func prometheusLabels(backupType v1alpha1.ConfigBackupType, result string) []string { + return []string{result, string(backupType)} +} diff --git a/internal/controller/core/configbackup_controller_test.go b/internal/controller/core/configbackup_controller_test.go new file mode 100644 index 000000000..52a280981 --- /dev/null +++ b/internal/controller/core/configbackup_controller_test.go @@ -0,0 +1,283 @@ +// SPDX-FileCopyrightText: 2025 SAP SE or an SAP affiliate company and IronCore contributors +// SPDX-License-Identifier: Apache-2.0 + +package core + +import ( + "path" + "strings" + "testing" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/robfig/cron/v3" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/ironcore-dev/network-operator/api/core/v1alpha1" + "github.com/ironcore-dev/network-operator/internal/provider" +) + +var _ = Describe("ConfigBackup Controller", func() { + Context("When reconciling a resource", func() { + var ( + device *v1alpha1.Device + deviceKey client.ObjectKey + backup *v1alpha1.ConfigBackup + backupKey client.ObjectKey + deviceName string + backupName string + ) + + BeforeEach(func() { + device = &v1alpha1.Device{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "testconfigbackup-device-", + Namespace: metav1.NamespaceDefault, + }, + Spec: v1alpha1.DeviceSpec{ + Endpoint: v1alpha1.Endpoint{Address: "192.168.10.2:9339"}, + }, + } + Expect(k8sClient.Create(ctx, device)).To(Succeed()) + + orig := device.DeepCopy() + device.Status.Phase = v1alpha1.DevicePhaseRunning + meta.SetStatusCondition(&device.Status.Conditions, metav1.Condition{ + Type: v1alpha1.ReachableCondition, + Status: metav1.ConditionTrue, + Reason: v1alpha1.ReachableReason, + Message: "Device is reachable", + }) + Expect(k8sClient.Status().Patch(ctx, device, client.MergeFrom(orig))).To(Succeed()) + Eventually(func(g Gomega) { + current := &v1alpha1.Device{} + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(device), current)).To(Succeed()) + g.Expect(current.Status.Phase).To(Equal(v1alpha1.DevicePhaseRunning)) + cond := meta.FindStatusCondition(current.Status.Conditions, v1alpha1.ReachableCondition) + g.Expect(cond).NotTo(BeNil()) + g.Expect(cond.Status).To(Equal(metav1.ConditionTrue)) + }).Should(Succeed()) + + deviceName = device.Name + deviceKey = client.ObjectKeyFromObject(device) + + testProvider.Lock() + testProvider.ConfigBackups = nil + storageTotal := int64(1024 * 1024 * 100) + testProvider.StorageTotal = &storageTotal + testProvider.Unlock() + }) + + AfterEach(func() { + if backup != nil { + err := k8sClient.Get(ctx, backupKey, backup) + if err == nil { + Expect(k8sClient.Delete(ctx, backup)).To(Succeed()) + Eventually(func(g Gomega) { + err := k8sClient.Get(ctx, backupKey, &v1alpha1.ConfigBackup{}) + g.Expect(errors.IsNotFound(err)).To(BeTrue()) + }).Should(Succeed()) + } + } + + err := k8sClient.Get(ctx, deviceKey, device) + if err == nil { + Expect(k8sClient.Delete(ctx, device, client.PropagationPolicy(metav1.DeletePropagationForeground))).To(Succeed()) + } + + testProvider.Lock() + testProvider.ConfigBackups = nil + testProvider.Unlock() + }) + + It("creates a one-shot local backup and updates status", func() { + backupName = deviceName + "-local" + backupKey = client.ObjectKey{Name: backupName, Namespace: metav1.NamespaceDefault} + backup = &v1alpha1.ConfigBackup{ + ObjectMeta: metav1.ObjectMeta{ + Name: backupName, + Namespace: metav1.NamespaceDefault, + }, + Spec: v1alpha1.ConfigBackupSpec{ + DeviceRef: v1alpha1.LocalObjectReference{Name: deviceName}, + Type: v1alpha1.ConfigBackupTypeLocal, + Path: "bootflash:///backups/", + }, + } + Expect(k8sClient.Create(ctx, backup)).To(Succeed()) + + Eventually(func(g Gomega) { + current := &v1alpha1.ConfigBackup{} + g.Expect(k8sClient.Get(ctx, backupKey, current)).To(Succeed()) + g.Expect(current.Labels).To(HaveKeyWithValue(v1alpha1.DeviceLabel, deviceName)) + g.Expect(current.Status.LastBackup).NotTo(BeNil()) + g.Expect(current.Status.LastBackup.Checksum).To(Equal("sha256:test-checksum")) + g.Expect(current.Status.LastBackup.Location).To(ContainSubstring("bootflash:///backups/")) + g.Expect(current.Status.TotalBackups).To(Equal(int32(1))) + cond := meta.FindStatusCondition(current.Status.Conditions, v1alpha1.ReadyCondition) + g.Expect(cond).NotTo(BeNil()) + g.Expect(cond.Status).To(Equal(metav1.ConditionTrue)) + }).Should(Succeed()) + }) + + It("creates a startup-config backup and updates status", func() { + backupName = deviceName + "-startup" + backupKey = client.ObjectKey{Name: backupName, Namespace: metav1.NamespaceDefault} + backup = &v1alpha1.ConfigBackup{ + ObjectMeta: metav1.ObjectMeta{ + Name: backupName, + Namespace: metav1.NamespaceDefault, + }, + Spec: v1alpha1.ConfigBackupSpec{ + DeviceRef: v1alpha1.LocalObjectReference{Name: deviceName}, + Type: v1alpha1.ConfigBackupTypeStartup, + }, + } + Expect(k8sClient.Create(ctx, backup)).To(Succeed()) + + Eventually(func(g Gomega) { + current := &v1alpha1.ConfigBackup{} + g.Expect(k8sClient.Get(ctx, backupKey, current)).To(Succeed()) + g.Expect(current.Status.LastBackup).NotTo(BeNil()) + g.Expect(current.Status.LastBackup.Location).To(Equal("startup-config")) + g.Expect(current.Status.LastBackup.Checksum).To(Equal("sha256:test-checksum")) + g.Expect(current.Status.TotalBackups).To(Equal(int32(1))) + g.Expect(current.Status.OldestBackupTimestamp).NotTo(BeNil()) + g.Expect(current.Status.LastBackup).NotTo(BeNil()) + g.Expect(current.Status.OldestBackupTimestamp.Equal(¤t.Status.LastBackup.Timestamp)).To(BeTrue()) + cond := meta.FindStatusCondition(current.Status.Conditions, v1alpha1.ReadyCondition) + g.Expect(cond).NotTo(BeNil()) + g.Expect(cond.Status).To(Equal(metav1.ConditionTrue)) + }).Should(Succeed()) + }) + + It("rotates old local backups according to retention", func() { + backupName = deviceName + "-rotated" + backupKey = client.ObjectKey{Name: backupName, Namespace: metav1.NamespaceDefault} + prefix := "configbackup-default-" + backupName + "-" + + testProvider.Lock() + size := int64(1024) + testProvider.ConfigBackups = []provider.ConfigBackupFile{ + {Path: "bootflash:///backups/" + prefix + "20260410T020000Z-g1", SizeBytes: &size, CreatedAt: time.Date(2026, time.April, 10, 2, 0, 0, 0, time.UTC)}, + {Path: "bootflash:///backups/" + prefix + "20260411T020000Z-g1", SizeBytes: &size, CreatedAt: time.Date(2026, time.April, 11, 2, 0, 0, 0, time.UTC)}, + {Path: "bootflash:///backups/" + prefix + "20260412T020000Z-g1", SizeBytes: &size, CreatedAt: time.Date(2026, time.April, 12, 2, 0, 0, 0, time.UTC)}, + } + testProvider.Unlock() + + backup = &v1alpha1.ConfigBackup{ + ObjectMeta: metav1.ObjectMeta{ + Name: backupName, + Namespace: metav1.NamespaceDefault, + }, + Spec: v1alpha1.ConfigBackupSpec{ + DeviceRef: v1alpha1.LocalObjectReference{Name: deviceName}, + Type: v1alpha1.ConfigBackupTypeLocal, + Path: "bootflash:///backups/", + Retention: &v1alpha1.ConfigBackupRetention{KeepLast: 2}, + }, + } + Expect(k8sClient.Create(ctx, backup)).To(Succeed()) + + Eventually(func(g Gomega) { + testProvider.Lock() + defer testProvider.Unlock() + var matching []provider.ConfigBackupFile + for _, file := range testProvider.ConfigBackups { + if strings.HasPrefix(path.Base(file.Path), prefix) { + matching = append(matching, file) + } + } + g.Expect(matching).To(HaveLen(2)) + }).Should(Succeed()) + + Eventually(func(g Gomega) { + current := &v1alpha1.ConfigBackup{} + g.Expect(k8sClient.Get(ctx, backupKey, current)).To(Succeed()) + g.Expect(current.Status.TotalBackups).To(Equal(int32(2))) + }).Should(Succeed()) + }) + + It("skips backup creation when the storage threshold is exceeded", func() { + backupName = deviceName + "-threshold" + backupKey = client.ObjectKey{Name: backupName, Namespace: metav1.NamespaceDefault} + + testProvider.Lock() + size := int64(95) + storageTotal := int64(100) + testProvider.StorageTotal = &storageTotal + testProvider.ConfigBackups = []provider.ConfigBackupFile{{ + Path: "bootflash:///backups/existing-unrelated-backup", + SizeBytes: &size, + CreatedAt: time.Date(2026, time.April, 10, 2, 0, 0, 0, time.UTC), + }} + testProvider.Unlock() + + minFreeBytes := int64(10) + backup = &v1alpha1.ConfigBackup{ + ObjectMeta: metav1.ObjectMeta{ + Name: backupName, + Namespace: metav1.NamespaceDefault, + }, + Spec: v1alpha1.ConfigBackupSpec{ + DeviceRef: v1alpha1.LocalObjectReference{Name: deviceName}, + Type: v1alpha1.ConfigBackupTypeLocal, + Path: "bootflash:///backups/", + StorageThreshold: &v1alpha1.ConfigBackupStorageThreshold{ + MinFreeBytes: &minFreeBytes, + }, + }, + } + Expect(k8sClient.Create(ctx, backup)).To(Succeed()) + + Eventually(func(g Gomega) { + current := &v1alpha1.ConfigBackup{} + g.Expect(k8sClient.Get(ctx, backupKey, current)).To(Succeed()) + g.Expect(current.Status.LastBackup).To(BeNil()) + g.Expect(current.Status.Storage).NotTo(BeNil()) + g.Expect(current.Status.Storage.ThresholdBreached).To(BeTrue()) + cond := meta.FindStatusCondition(current.Status.Conditions, v1alpha1.ConfiguredCondition) + g.Expect(cond).NotTo(BeNil()) + g.Expect(cond.Reason).To(Equal(v1alpha1.StorageThresholdExceededReason)) + }).Should(Succeed()) + }) + }) +}) + +func TestBackupDue(t *testing.T) { + t.Run("one-shot backup runs once per generation", func(t *testing.T) { + obj := &v1alpha1.ConfigBackup{ObjectMeta: metav1.ObjectMeta{Generation: 1}} + due, next := backupDue(time.Now().UTC(), obj, nil) + if !due || next != nil { + t.Fatalf("expected one-shot backup to be due on first reconcile") + } + + now := metav1.Now() + obj.Status.LastBackup = &v1alpha1.ConfigBackupRunStatus{Timestamp: now, Generation: 1} + due, next = backupDue(time.Now().UTC(), obj, nil) + if due || next != nil { + t.Fatalf("expected one-shot backup to stop running after successful generation") + } + }) + + t.Run("scheduled backup computes next run", func(t *testing.T) { + schedule, err := cron.ParseStandard("0 2 * * *") + if err != nil { + t.Fatalf("cron.ParseStandard() error = %v", err) + } + + obj := &v1alpha1.ConfigBackup{ObjectMeta: metav1.ObjectMeta{CreationTimestamp: metav1.NewTime(time.Date(2026, time.April, 10, 0, 0, 0, 0, time.UTC))}} + now := time.Date(2026, time.April, 10, 1, 0, 0, 0, time.UTC) + due, next := backupDue(now, obj, schedule) + if due { + t.Fatalf("expected scheduled backup to wait until the first cron boundary") + } + if next == nil || !next.Equal(time.Date(2026, time.April, 10, 2, 0, 0, 0, time.UTC)) { + t.Fatalf("unexpected next scheduled backup: %v", next) + } + }) +} diff --git a/internal/controller/core/configbackup_metrics.go b/internal/controller/core/configbackup_metrics.go new file mode 100644 index 000000000..a9cd38c47 --- /dev/null +++ b/internal/controller/core/configbackup_metrics.go @@ -0,0 +1,41 @@ +// SPDX-FileCopyrightText: 2025 SAP SE or an SAP affiliate company and IronCore contributors +// SPDX-License-Identifier: Apache-2.0 + +package core + +import ( + "github.com/prometheus/client_golang/prometheus" + ctrlmetrics "sigs.k8s.io/controller-runtime/pkg/metrics" +) + +var ( + configBackupOperationsTotal = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "network_operator_configbackup_operations_total", + Help: "Total number of config backup operations by result and backup type.", + }, + []string{"result", "type"}, + ) + + configBackupDurationSeconds = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "network_operator_configbackup_duration_seconds", + Help: "Duration of config backup operations.", + Buckets: prometheus.DefBuckets, + }, + []string{"result", "type"}, + ) + + configBackupSizeBytes = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "network_operator_configbackup_size_bytes", + Help: "Observed size of successful config backups.", + Buckets: []float64{1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864}, + }, + []string{"type"}, + ) +) + +func init() { + ctrlmetrics.Registry.MustRegister(configBackupOperationsTotal, configBackupDurationSeconds, configBackupSizeBytes) +} diff --git a/internal/controller/core/suite_test.go b/internal/controller/core/suite_test.go index 81065d523..e0365c0e1 100644 --- a/internal/controller/core/suite_test.go +++ b/internal/controller/core/suite_test.go @@ -6,9 +6,13 @@ package core import ( "context" "errors" + "fmt" "os" + "path" "path/filepath" + "slices" "strconv" + "strings" "sync" "testing" "time" @@ -339,6 +343,15 @@ var _ = BeforeSuite(func() { }).SetupWithManager(ctx, k8sManager) Expect(err).NotTo(HaveOccurred()) + err = (&ConfigBackupReconciler{ + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + Recorder: recorder, + Provider: prov, + Locker: testLocker, + }).SetupWithManager(ctx, k8sManager) + Expect(err).NotTo(HaveOccurred()) + go func() { defer GinkgoRecover() err = k8sManager.Start(ctx) @@ -408,6 +421,7 @@ var ( _ provider.NVEProvider = (*Provider)(nil) _ provider.LLDPProvider = (*Provider)(nil) _ provider.DHCPRelayProvider = (*Provider)(nil) + _ provider.ConfigBackupProvider = (*Provider)(nil) ) // Provider is a simple in-memory provider for testing purposes only. @@ -444,9 +458,13 @@ type Provider struct { LLDPOperStatus bool LLDPNeighbors map[string]*provider.LLDPAdjacency DHCPRelay *v1alpha1.DHCPRelay + ConfigBackups []provider.ConfigBackupFile + StorageTotal *int64 } func NewProvider() *Provider { + storageTotal := int64(1024 * 1024 * 100) + return &Provider{ LastRebootTime: lastRebootTime, Ports: sets.New[string](), @@ -463,6 +481,7 @@ func NewProvider() *Provider { RoutingPolicies: sets.New[string](), LLDPOperStatus: true, LLDPNeighbors: make(map[string]*provider.LLDPAdjacency), + StorageTotal: &storageTotal, } } @@ -905,6 +924,150 @@ func (p *Provider) GetNVEStatus(_ context.Context, _ *provider.NVERequest) (prov return status, nil } +func (p *Provider) CreateConfigBackup(_ context.Context, req *provider.ConfigBackupRequest) (*provider.ConfigBackupResult, error) { + p.Lock() + defer p.Unlock() + + createdAt := time.Now().UTC() + targetName := "startup-config" + if req.ConfigBackup.Spec.Type == v1alpha1.ConfigBackupTypeLocal { + targetName = fmt.Sprintf("%s%s-g%d", provider.ConfigBackupManagedPrefix(req.ConfigBackup), createdAt.Format("20060102T150405Z"), req.ConfigBackup.Generation) + } + path := targetName + if req.ConfigBackup.Spec.Type == v1alpha1.ConfigBackupTypeLocal { + path = req.ConfigBackup.Spec.Path + targetName + } + + size := int64(2048) + file := provider.ConfigBackupFile{ + Path: path, + SizeBytes: &size, + CreatedAt: createdAt, + Checksum: "sha256:test-checksum", + } + + if req.ConfigBackup.Spec.Type == v1alpha1.ConfigBackupTypeStartup { + replaced := false + for i := range p.ConfigBackups { + if p.ConfigBackups[i].Path == "startup-config" { + p.ConfigBackups[i] = file + replaced = true + break + } + } + if !replaced { + p.ConfigBackups = append(p.ConfigBackups, file) + } + } else { + p.ConfigBackups = append(p.ConfigBackups, file) + } + return &provider.ConfigBackupResult{ + Path: file.Path, + SizeBytes: &size, + Checksum: file.Checksum, + CreatedAt: createdAt, + Duration: 2 * time.Second, + }, nil +} + +func (p *Provider) ListConfigBackups(_ context.Context, req *provider.ConfigBackupRequest) (*provider.ConfigBackupInventory, error) { + p.Lock() + defer p.Unlock() + + var backups []provider.ConfigBackupFile + usedBytes := p.usedStorageBytes() + freeBytes := p.freeStorageBytes() + managedPrefix := provider.ConfigBackupManagedPrefix(req.ConfigBackup) + for _, file := range p.ConfigBackups { + if req.ConfigBackup.Spec.Type == v1alpha1.ConfigBackupTypeStartup { + if file.Path == "startup-config" { + backups = append(backups, file) + } + continue + } + if strings.HasPrefix(path.Base(file.Path), managedPrefix) { + backups = append(backups, file) + } + } + + return &provider.ConfigBackupInventory{ + Backups: append([]provider.ConfigBackupFile(nil), backups...), + TotalBytes: providerCloneInt64Ptr(p.StorageTotal), + UsedBytes: usedBytes, + FreeBytes: freeBytes, + }, nil +} + +func (p *Provider) DeleteConfigBackups(_ context.Context, req *provider.DeleteConfigBackupsRequest) error { + p.Lock() + defer p.Unlock() + + deletes := fakeConfigBackupsToDelete(p.ConfigBackups, req.ConfigBackup) + if len(deletes) == 0 { + return nil + } + + remove := sets.New[string]() + for _, backup := range deletes { + remove.Insert(backup.Path) + } + + filtered := p.ConfigBackups[:0] + for _, file := range p.ConfigBackups { + if !remove.Has(file.Path) { + filtered = append(filtered, file) + } + } + p.ConfigBackups = filtered + return nil +} + +func fakeConfigBackupsToDelete(backups []provider.ConfigBackupFile, obj *v1alpha1.ConfigBackup) []provider.ConfigBackupFile { + keep := 1 + if obj != nil && obj.Spec.Type != v1alpha1.ConfigBackupTypeStartup && obj.Spec.Retention != nil && obj.Spec.Retention.KeepLast > 0 { + keep = int(obj.Spec.Retention.KeepLast) + } + if len(backups) <= keep { + return nil + } + sorted := append([]provider.ConfigBackupFile(nil), backups...) + slices.SortFunc(sorted, func(a, b provider.ConfigBackupFile) int { + return b.CreatedAt.Compare(a.CreatedAt) + }) + return sorted[keep:] +} + +func (p *Provider) usedStorageBytes() *int64 { + var used int64 + for _, file := range p.ConfigBackups { + if file.SizeBytes != nil { + used += *file.SizeBytes + } + } + return &used +} + +func (p *Provider) freeStorageBytes() *int64 { + if p.StorageTotal == nil { + return nil + } + used := p.usedStorageBytes() + free := *p.StorageTotal + if used != nil { + free = *p.StorageTotal - *used + } + free = max(0, free) + return &free +} + +func providerCloneInt64Ptr(value *int64) *int64 { + if value == nil { + return nil + } + v := *value + return &v +} + func (p *Provider) EnsureLLDP(_ context.Context, req *provider.LLDPRequest) error { p.Lock() defer p.Unlock() diff --git a/internal/provider/cisco/nxos/bgp.go b/internal/provider/cisco/nxos/bgp.go index 53ed61d88..3d67934cd 100644 --- a/internal/provider/cisco/nxos/bgp.go +++ b/internal/provider/cisco/nxos/bgp.go @@ -151,8 +151,8 @@ func (af *BGPDomAfItem) Key() AddressFamily { return af.Type } func NewInterLeakPDirect(rtMap string) *InterLeakP { return &InterLeakP{ InterLeakPKey: InterLeakPKey{ - Asn: "none", - Inst: "none", + Asn: interLeakProtocolDefault, + Inst: interLeakProtocolDefault, Proto: RtLeakProtoDirect, }, RtMap: rtMap, diff --git a/internal/provider/cisco/nxos/configbackup.go b/internal/provider/cisco/nxos/configbackup.go new file mode 100644 index 000000000..abaaf020f --- /dev/null +++ b/internal/provider/cisco/nxos/configbackup.go @@ -0,0 +1,365 @@ +// SPDX-FileCopyrightText: 2026 SAP SE or an SAP affiliate company and IronCore contributors +// SPDX-License-Identifier: Apache-2.0 + +package nxos + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "path" + "slices" + "strconv" + "strings" + "time" + + "github.com/ironcore-dev/network-operator/api/core/v1alpha1" + "github.com/ironcore-dev/network-operator/internal/provider" + "github.com/ironcore-dev/network-operator/internal/transport/nxapi" +) + +var _ provider.ConfigBackupProvider = (*Provider)(nil) + +const configBackupStartupConfig = "startup-config" + +func (p *Provider) CreateConfigBackup(ctx context.Context, req *provider.ConfigBackupRequest) (*provider.ConfigBackupResult, error) { + started := p.currentTime().UTC() + targetName := configBackupTargetName(req.ConfigBackup, started) + targetPath := configBackupTargetPath(req.ConfigBackup, targetName) + if _, err := p.runNXAPIWithRequest(ctx, configBackupCopyCommand(req.ConfigBackup, targetPath)); err != nil { + return nil, fmt.Errorf("failed to create config backup: %w", err) + } + + res := &provider.ConfigBackupResult{ + Path: targetPath, + CreatedAt: started, + Duration: time.Since(started), + } + + if req.ConfigBackup.Spec.Type != v1alpha1.ConfigBackupTypeLocal { + return res, nil + } + + inventory, err := p.ListConfigBackups(ctx, req) + if err != nil { + return res, err + } + + for _, backup := range inventory.Backups { + if backup.Path != targetPath { + continue + } + res.SizeBytes = cloneInt64PtrNXOS(backup.SizeBytes) + res.Checksum = backup.Checksum + if !backup.CreatedAt.IsZero() { + res.CreatedAt = backup.CreatedAt.UTC() + } + break + } + + return res, nil +} + +func (p *Provider) ListConfigBackups(ctx context.Context, req *provider.ConfigBackupRequest) (*provider.ConfigBackupInventory, error) { + if req.ConfigBackup.Spec.Type == v1alpha1.ConfigBackupTypeStartup { + return &provider.ConfigBackupInventory{ + Backups: []provider.ConfigBackupFile{{ + Path: configBackupStartupConfig, + }}, + }, nil + } + + results, err := p.runNXAPIWithRequest(ctx, "dir "+req.ConfigBackup.Spec.Path) + if err != nil { + return nil, fmt.Errorf("failed to list config backups: %w", err) + } + if len(results) == 0 { + return &provider.ConfigBackupInventory{}, nil + } + + return parseConfigBackupInventory(results[0], req) +} + +func (p *Provider) DeleteConfigBackups(ctx context.Context, req *provider.DeleteConfigBackupsRequest) error { + request := &provider.ConfigBackupRequest{ + ConfigBackup: req.ConfigBackup, + ProviderConfig: req.ProviderConfig, + } + inventory, err := p.ListConfigBackups(ctx, request) + if err != nil { + return fmt.Errorf("failed to determine config backups to delete: %w", err) + } + + deletes := configBackupsToDelete(inventory.Backups, req.ConfigBackup) + if len(deletes) == 0 { + return nil + } + + cmds := make([]string, 0, len(deletes)) + for _, backup := range deletes { + if backup.Path == "" || backup.Path == configBackupStartupConfig { + continue + } + cmds = append(cmds, fmt.Sprintf("delete %s no-prompt", backup.Path)) + } + if len(cmds) == 0 { + return nil + } + + if _, err := p.runNXAPIWithRequest(ctx, cmds...); err != nil { + return fmt.Errorf("failed to delete config backups: %w", err) + } + return nil +} + +func (p *Provider) runNXAPIWithRequest(ctx context.Context, cmds ...string) ([]json.RawMessage, error) { + if len(cmds) == 0 { + return nil, nil + } + if p.nxapi == nil { + return nil, errors.New("nxapi client is not connected") + } + return p.nxapi.Do(ctx, nxapi.NewRequest(cmds...).WithRollback(nxapi.Stop)) +} + +func configBackupCopyCommand(obj *v1alpha1.ConfigBackup, targetPath string) string { + if obj.Spec.Type == v1alpha1.ConfigBackupTypeStartup { + return "copy running-config " + configBackupStartupConfig + } + return "copy running-config " + targetPath +} + +func configBackupTargetName(obj *v1alpha1.ConfigBackup, now time.Time) string { + if obj == nil || obj.Spec.Type == v1alpha1.ConfigBackupTypeStartup { + return configBackupStartupConfig + } + return fmt.Sprintf("%s%s-g%d", provider.ConfigBackupManagedPrefix(obj), now.UTC().Format("20060102T150405Z"), obj.Generation) +} + +func configBackupTargetPath(obj *v1alpha1.ConfigBackup, targetName string) string { + if obj == nil || obj.Spec.Type == v1alpha1.ConfigBackupTypeStartup { + return configBackupStartupConfig + } + base := obj.Spec.Path + if strings.HasSuffix(base, "/") { + return base + targetName + } + return base + "/" + targetName +} + +func (p *Provider) currentTime() time.Time { + if p != nil && p.now != nil { + return p.now() + } + return time.Now() +} + +func parseConfigBackupInventory(raw json.RawMessage, req *provider.ConfigBackupRequest) (*provider.ConfigBackupInventory, error) { + var decoded nxosDirResponse + if err := json.Unmarshal(raw, &decoded); err != nil { + return nil, fmt.Errorf("failed to decode config backup inventory: %w", err) + } + + inventory := &provider.ConfigBackupInventory{ + TotalBytes: decoded.totalBytes(), + UsedBytes: decoded.usedBytes(), + FreeBytes: decoded.freeBytes(), + } + + managedPrefix := provider.ConfigBackupManagedPrefix(req.ConfigBackup) + for _, entry := range decoded.fileEntries() { + if !strings.HasPrefix(configBackupPathBase(entry.Path), managedPrefix) { + continue + } + inventory.Backups = append(inventory.Backups, entry) + } + + slices.SortFunc(inventory.Backups, func(a, b provider.ConfigBackupFile) int { + return b.CreatedAt.Compare(a.CreatedAt) + }) + return inventory, nil +} + +type nxosDirResponse struct { + Storage *nxosStorageStats `json:"storage"` + TotalBytes *nxosInt64 `json:"total_bytes"` + UsedBytes *nxosInt64 `json:"used_bytes"` + FreeBytes *nxosInt64 `json:"free_bytes"` + TableFiles *nxosFileTable `json:"TABLE_files"` +} + +func (r nxosDirResponse) totalBytes() *int64 { + if r.Storage != nil && r.Storage.TotalBytes != nil { + return r.Storage.TotalBytes.ptr() + } + if r.TotalBytes != nil { + return r.TotalBytes.ptr() + } + return nil +} + +func (r nxosDirResponse) usedBytes() *int64 { + if r.Storage != nil && r.Storage.UsedBytes != nil { + return r.Storage.UsedBytes.ptr() + } + if r.UsedBytes != nil { + return r.UsedBytes.ptr() + } + return nil +} + +func (r nxosDirResponse) freeBytes() *int64 { + if r.Storage != nil && r.Storage.FreeBytes != nil { + return r.Storage.FreeBytes.ptr() + } + if r.FreeBytes != nil { + return r.FreeBytes.ptr() + } + return nil +} + +func (r nxosDirResponse) fileEntries() []provider.ConfigBackupFile { + if r.TableFiles == nil { + return nil + } + + entries := make([]provider.ConfigBackupFile, 0, len(r.TableFiles.Rows)) + seen := make(map[string]struct{}, len(r.TableFiles.Rows)) + for _, row := range r.TableFiles.Rows { + entry := row.toProviderFile() + key := entry.Path + if key == "" { + continue + } + if _, ok := seen[key]; ok { + continue + } + seen[key] = struct{}{} + entries = append(entries, entry) + } + return entries +} + +type nxosStorageStats struct { + TotalBytes *nxosInt64 `json:"total_bytes"` + UsedBytes *nxosInt64 `json:"used_bytes"` + FreeBytes *nxosInt64 `json:"free_bytes"` +} + +type nxosFileTable struct { + Rows nxosFileRows `json:"ROW_files"` +} + +type nxosFileRows []nxosFileEntry + +func (r *nxosFileRows) UnmarshalJSON(data []byte) error { + trimmed := strings.TrimSpace(string(data)) + if trimmed == "" || trimmed == "null" { + *r = nil + return nil + } + if trimmed[0] == '[' { + var rows []nxosFileEntry + if err := json.Unmarshal(data, &rows); err != nil { + return err + } + *r = rows + return nil + } + var row nxosFileEntry + if err := json.Unmarshal(data, &row); err != nil { + return err + } + *r = []nxosFileEntry{row} + return nil +} + +type nxosFileEntry struct { + Name string `json:"name"` + Path string `json:"path"` + SizeBytes *nxosInt64 `json:"size_bytes"` + CreatedAt string `json:"created_at"` + Checksum string `json:"checksum"` +} + +func (e nxosFileEntry) toProviderFile() provider.ConfigBackupFile { + entry := provider.ConfigBackupFile{ + Path: e.Path, + Checksum: e.Checksum, + SizeBytes: nil, + } + if entry.Path == "" { + entry.Path = e.Name + } + if e.SizeBytes != nil { + entry.SizeBytes = e.SizeBytes.ptr() + } + if parsed, err := time.Parse(time.RFC3339, strings.TrimSpace(e.CreatedAt)); err == nil { + entry.CreatedAt = parsed.UTC() + } + return entry +} + +func configBackupPathBase(backupPath string) string { + if backupPath == "" { + return "" + } + return path.Base(backupPath) +} + +func configBackupsToDelete(backups []provider.ConfigBackupFile, obj *v1alpha1.ConfigBackup) []provider.ConfigBackupFile { + keep := configBackupKeepLast(obj) + if keep < 1 || len(backups) <= keep { + return nil + } + sorted := append([]provider.ConfigBackupFile(nil), backups...) + slices.SortFunc(sorted, func(a, b provider.ConfigBackupFile) int { + return b.CreatedAt.Compare(a.CreatedAt) + }) + return sorted[keep:] +} + +func configBackupKeepLast(obj *v1alpha1.ConfigBackup) int { + if obj == nil || obj.Spec.Type == v1alpha1.ConfigBackupTypeStartup { + return 1 + } + if obj.Spec.Retention == nil || obj.Spec.Retention.KeepLast <= 0 { + return 1 + } + return int(obj.Spec.Retention.KeepLast) +} + +type nxosInt64 int64 + +func (v *nxosInt64) UnmarshalJSON(data []byte) error { + trimmed := strings.TrimSpace(string(data)) + if trimmed == "" || trimmed == "null" { + return nil + } + if len(trimmed) >= 2 && trimmed[0] == '"' && trimmed[len(trimmed)-1] == '"' { + trimmed = trimmed[1 : len(trimmed)-1] + } + parsed, err := strconv.ParseInt(strings.TrimSpace(trimmed), 10, 64) + if err != nil { + return err + } + *v = nxosInt64(parsed) + return nil +} + +func (v *nxosInt64) ptr() *int64 { + if v == nil { + return nil + } + value := int64(*v) + return &value +} + +func cloneInt64PtrNXOS(value *int64) *int64 { + if value == nil { + return nil + } + v := *value + return &v +} diff --git a/internal/provider/cisco/nxos/configbackup_test.go b/internal/provider/cisco/nxos/configbackup_test.go new file mode 100644 index 000000000..17af6a686 --- /dev/null +++ b/internal/provider/cisco/nxos/configbackup_test.go @@ -0,0 +1,261 @@ +// SPDX-FileCopyrightText: 2026 SAP SE or an SAP affiliate company and IronCore contributors +// SPDX-License-Identifier: Apache-2.0 + +package nxos + +import ( + "context" + "encoding/json" + "reflect" + "testing" + "time" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/ironcore-dev/network-operator/api/core/v1alpha1" + + "github.com/ironcore-dev/network-operator/internal/provider" + "github.com/ironcore-dev/network-operator/internal/transport/nxapi" +) + +func TestCreateConfigBackupStartup(t *testing.T) { + stub := &stubNXAPIDoer{ + t: t, + responses: []stubNXAPIResponse{{ + expectCmds: []string{"copy running-config startup-config"}, + results: []json.RawMessage{json.RawMessage(`null`)}, + }}, + } + p := &Provider{nxapi: stub} + result, err := p.CreateConfigBackup(t.Context(), &provider.ConfigBackupRequest{ + ConfigBackup: &v1alpha1.ConfigBackup{Spec: v1alpha1.ConfigBackupSpec{Type: v1alpha1.ConfigBackupTypeStartup}}, + }) + if err != nil { + t.Fatalf("CreateConfigBackup() error = %v", err) + } + if result.Path != configBackupStartupConfig { + t.Fatalf("result.Path = %q, want %q", result.Path, configBackupStartupConfig) + } + if result.SizeBytes != nil { + t.Fatalf("result.SizeBytes = %v, want nil", *result.SizeBytes) + } + stub.assertExhausted(t) +} + +func TestCreateConfigBackupLocal(t *testing.T) { + fixedNow := time.Date(2026, time.April, 10, 2, 0, 0, 0, time.UTC) + targetName := "configbackup-default-leaf-1-20260410T020000Z-g7" + stub := &stubNXAPIDoer{ + t: t, + responses: []stubNXAPIResponse{ + { + expectCmds: []string{"copy running-config bootflash:///backups/" + targetName}, + results: []json.RawMessage{json.RawMessage(`null`)}, + }, + { + expectCmds: []string{"dir bootflash:///backups/"}, + results: []json.RawMessage{json.RawMessage(`{ + "storage": {"total_bytes": "1000", "used_bytes": "400", "free_bytes": "600"}, + "TABLE_files": {"ROW_files": [ + { + "name": "` + targetName + `", + "path": "bootflash:///backups/` + targetName + `", + "size_bytes": "2048", + "created_at": "2026-04-10T02:00:00Z", + "checksum": "sha256:abc123" + } + ]} + }`)}, + }, + }, + } + p := &Provider{nxapi: stub, now: func() time.Time { return fixedNow }} + result, err := p.CreateConfigBackup(t.Context(), &provider.ConfigBackupRequest{ + ConfigBackup: &v1alpha1.ConfigBackup{ + ObjectMeta: metav1.ObjectMeta{Namespace: corev1.NamespaceDefault, Name: "leaf-1", Generation: 7}, + Spec: v1alpha1.ConfigBackupSpec{Type: v1alpha1.ConfigBackupTypeLocal, Path: "bootflash:///backups/"}, + }, + }) + if err != nil { + t.Fatalf("CreateConfigBackup() error = %v", err) + } + if result.Path != "bootflash:///backups/"+targetName { + t.Fatalf("result.Path = %q", result.Path) + } + if result.SizeBytes == nil || *result.SizeBytes != 2048 { + t.Fatalf("result.SizeBytes = %v, want 2048", result.SizeBytes) + } + if result.Checksum != "sha256:abc123" { + t.Fatalf("result.Checksum = %q, want %q", result.Checksum, "sha256:abc123") + } + wantCreated := time.Date(2026, time.April, 10, 2, 0, 0, 0, time.UTC) + if !result.CreatedAt.Equal(wantCreated) { + t.Fatalf("result.CreatedAt = %v, want %v", result.CreatedAt, wantCreated) + } + stub.assertExhausted(t) +} + +func TestListConfigBackupsLocal(t *testing.T) { + stub := &stubNXAPIDoer{ + t: t, + responses: []stubNXAPIResponse{{ + expectCmds: []string{"dir bootflash:///backups/"}, + results: []json.RawMessage{json.RawMessage(`{ + "total_bytes": "1000", + "used_bytes": "400", + "free_bytes": "600", + "TABLE_files": {"ROW_files": [ + { + "name": "configbackup-default-leaf-1-local-20260410", + "path": "bootflash:///backups/configbackup-default-leaf-1-local-20260410", + "size_bytes": "2048", + "created_at": "2026-04-10T02:00:00Z", + "checksum": "sha256:abc123" + }, + { + "name": "orphan.txt", + "path": "bootflash:///backups/orphan.txt", + "size_bytes": "20", + "created_at": "2026-04-09T02:00:00Z" + } + ]} + }`)}, + }}, + } + p := &Provider{nxapi: stub} + inventory, err := p.ListConfigBackups(t.Context(), &provider.ConfigBackupRequest{ + ConfigBackup: &v1alpha1.ConfigBackup{ + ObjectMeta: metav1.ObjectMeta{Namespace: corev1.NamespaceDefault, Name: "leaf-1"}, + Spec: v1alpha1.ConfigBackupSpec{Type: v1alpha1.ConfigBackupTypeLocal, Path: "bootflash:///backups/"}, + }, + }) + if err != nil { + t.Fatalf("ListConfigBackups() error = %v", err) + } + if len(inventory.Backups) != 1 { + t.Fatalf("len(inventory.Backups) = %d, want 1", len(inventory.Backups)) + } + if inventory.Backups[0].Path != "bootflash:///backups/configbackup-default-leaf-1-local-20260410" { + t.Fatalf("inventory.Backups[0].Path = %q", inventory.Backups[0].Path) + } + if inventory.TotalBytes == nil || *inventory.TotalBytes != 1000 { + t.Fatalf("inventory.TotalBytes = %v, want 1000", inventory.TotalBytes) + } + if inventory.UsedBytes == nil || *inventory.UsedBytes != 400 { + t.Fatalf("inventory.UsedBytes = %v, want 400", inventory.UsedBytes) + } + if inventory.FreeBytes == nil || *inventory.FreeBytes != 600 { + t.Fatalf("inventory.FreeBytes = %v, want 600", inventory.FreeBytes) + } + stub.assertExhausted(t) +} + +func TestDeleteConfigBackups(t *testing.T) { + stub := &stubNXAPIDoer{ + t: t, + responses: []stubNXAPIResponse{ + { + expectCmds: []string{"dir bootflash:///backups/"}, + results: []json.RawMessage{json.RawMessage(`{ + "TABLE_files": {"ROW_files": [ + { + "name": "configbackup-default-leaf-1-20260412", + "path": "bootflash:///backups/configbackup-default-leaf-1-20260412", + "created_at": "2026-04-12T02:00:00Z" + }, + { + "name": "configbackup-default-leaf-1-20260411", + "path": "bootflash:///backups/configbackup-default-leaf-1-20260411", + "created_at": "2026-04-11T02:00:00Z" + }, + { + "name": "configbackup-default-leaf-1-20260410", + "path": "bootflash:///backups/configbackup-default-leaf-1-20260410", + "created_at": "2026-04-10T02:00:00Z" + }, + { + "name": "orphan.txt", + "path": "bootflash:///backups/orphan.txt", + "created_at": "2026-04-09T02:00:00Z" + } + ]} + }`)}, + }, + { + expectCmds: []string{ + "delete bootflash:///backups/configbackup-default-leaf-1-20260410 no-prompt", + }, + results: []json.RawMessage{json.RawMessage(`null`)}, + }, + }, + } + p := &Provider{nxapi: stub} + err := p.DeleteConfigBackups(t.Context(), &provider.DeleteConfigBackupsRequest{ + ConfigBackup: &v1alpha1.ConfigBackup{ + ObjectMeta: metav1.ObjectMeta{Namespace: corev1.NamespaceDefault, Name: "leaf-1"}, + Spec: v1alpha1.ConfigBackupSpec{ + Type: v1alpha1.ConfigBackupTypeLocal, + Path: "bootflash:///backups/", + Retention: &v1alpha1.ConfigBackupRetention{KeepLast: 2}, + }, + }, + }) + if err != nil { + t.Fatalf("DeleteConfigBackups() error = %v", err) + } + stub.assertExhausted(t) +} + +type stubNXAPIResponse struct { + expectCmds []string + results []json.RawMessage + err error +} + +type stubNXAPIDoer struct { + t *testing.T + responses []stubNXAPIResponse + callIndex int +} + +func (s *stubNXAPIDoer) Do(_ context.Context, req nxapi.Request) ([]json.RawMessage, error) { + if s.callIndex >= len(s.responses) { + s.t.Fatalf("unexpected NX-API call %d", s.callIndex+1) + } + current := s.responses[s.callIndex] + s.callIndex++ + gotCmds := decodeNXAPIRequest(s.t, req) + if !reflect.DeepEqual(gotCmds, current.expectCmds) { + s.t.Fatalf("NX-API commands = %#v, want %#v", gotCmds, current.expectCmds) + } + return current.results, current.err +} + +func (s *stubNXAPIDoer) assertExhausted(t *testing.T) { + t.Helper() + if s.callIndex != len(s.responses) { + t.Fatalf("NX-API call count = %d, want %d", s.callIndex, len(s.responses)) + } +} + +func decodeNXAPIRequest(t *testing.T, req nxapi.Request) []string { + t.Helper() + b, err := req.Encode() + if err != nil { + t.Fatalf("req.Encode() error = %v", err) + } + var decoded []struct { + Params struct { + Cmd string `json:"cmd"` + } `json:"params"` + } + if err := json.Unmarshal(b, &decoded); err != nil { + t.Fatalf("json.Unmarshal() error = %v", err) + } + cmds := make([]string, len(decoded)) + for i := range decoded { + cmds[i] = decoded[i].Params.Cmd + } + return cmds +} diff --git a/internal/provider/cisco/nxos/name.go b/internal/provider/cisco/nxos/name.go index 4b14da331..91b97d9e6 100644 --- a/internal/provider/cisco/nxos/name.go +++ b/internal/provider/cisco/nxos/name.go @@ -9,8 +9,10 @@ import ( "regexp" ) +const managementInterfaceName = "mgmt0" + var ( - mgmtRe = regexp.MustCompile(`(?i)^mgmt0$`) + mgmtRe = regexp.MustCompile(`(?i)^` + managementInterfaceName + `$`) ethernetRe = regexp.MustCompile(`(?i)^(ethernet|eth)(\d+/\d+)$`) loopbackRe = regexp.MustCompile(`(?i)^(loopback|lo)(\d+)$`) portchannelRe = regexp.MustCompile(`(?i)^(port-channel|po)(\d+)$`) @@ -38,7 +40,7 @@ func ShortName(name string) (string, error) { return "vlan" + matches[2], nil } if mgmtRe.MatchString(name) { - return "mgmt0", nil + return managementInterfaceName, nil } if matches := encapRoutedRe.FindStringSubmatch(name); matches != nil { return "eth" + matches[2] + "." + matches[3], nil diff --git a/internal/provider/cisco/nxos/ospf.go b/internal/provider/cisco/nxos/ospf.go index 6c276355b..778c09fd1 100644 --- a/internal/provider/cisco/nxos/ospf.go +++ b/internal/provider/cisco/nxos/ospf.go @@ -66,6 +66,8 @@ type InterLeakPKey struct { Proto RtLeakProto `json:"proto"` } +const interLeakProtocolDefault = "none" + type InterLeakP struct { InterLeakPKey diff --git a/internal/provider/cisco/nxos/provider.go b/internal/provider/cisco/nxos/provider.go index d270637cb..249ce600b 100644 --- a/internal/provider/cisco/nxos/provider.go +++ b/internal/provider/cisco/nxos/provider.go @@ -9,6 +9,7 @@ import ( "context" "crypto/rand" "crypto/rsa" + "encoding/json" "errors" "fmt" "maps" @@ -62,10 +63,16 @@ var ( _ provider.DHCPRelayProvider = (*Provider)(nil) ) +type nxapiClient interface { + Do(context.Context, nxapi.Request) ([]json.RawMessage, error) +} + type Provider struct { - conn *grpc.ClientConn - client gnmiext.Client - nxapi *nxapi.Client + baseConn *deviceutil.Connection + conn *grpc.ClientConn + client gnmiext.Client + nxapi nxapiClient + now func() time.Time } func NewProvider() provider.Provider { @@ -75,6 +82,8 @@ func NewProvider() provider.Provider { func (p *Provider) Connect(ctx context.Context, conn *deviceutil.Connection) (err error) { // timeout is the default timeout for all HTTP/gRPC requests made by the provider. const timeout = 30 * time.Second + baseConn := *conn + p.baseConn = &baseConn p.conn, err = grpcext.NewClient(conn, grpcext.WithDefaultTimeout(timeout)) if err != nil { return fmt.Errorf("failed to create grpc connection: %w", err) @@ -98,6 +107,7 @@ func (p *Provider) Connect(ctx context.Context, conn *deviceutil.Connection) (er } func (p *Provider) Disconnect(_ context.Context, _ *deviceutil.Connection) error { + p.baseConn = nil return p.conn.Close() } @@ -882,8 +892,8 @@ func (p *Provider) EnsureInterface(ctx context.Context, req *provider.EnsureInte if req.IPv4 != nil || (req.AggregateParent != nil && req.AggregateParent.Spec.IPv4 != nil) { p.Layer = Layer3 p.RtvrfMbrItems = NewVrfMember(name, vrf) - p.AccessVlan = "unknown" - p.NativeVlan = "unknown" + p.AccessVlan = string(AdjOperStUnknown) + p.NativeVlan = string(AdjOperStUnknown) } if isPointToPoint(req.Interface.Spec.IPv4) || (req.AggregateParent != nil && isPointToPoint(req.AggregateParent.Spec.IPv4)) { @@ -1488,7 +1498,7 @@ func (p *Provider) EnsureISIS(ctx context.Context, req *provider.EnsureISISReque case v1alpha1.OverloadBitAlways: case v1alpha1.OverloadBitOnStartup: dom.OverloadItems.AdminSt = "bootup" - dom.OverloadItems.BgpAsNumStr = "none" + dom.OverloadItems.BgpAsNumStr = string(AdjChangeLogLevelNone) dom.OverloadItems.StartupTime = 61 // seconds dom.OverloadItems.Suppress = "" } @@ -1527,14 +1537,14 @@ func (p *Provider) EnsureISIS(ctx context.Context, req *provider.EnsureISISReque intf.V4Enable = true intf.V4Bfd = "inheritVrf" if iface.Spec.BFD != nil && iface.Spec.BFD.Enabled { - intf.V4Bfd = "enabled" + intf.V4Bfd = string(PassiveControlEnabled) } } if ipv6 { intf.V6Enable = true intf.V6Bfd = "inheritVrf" if iface.Spec.BFD != nil && iface.Spec.BFD.Enabled { - intf.V6Bfd = "enabled" + intf.V6Bfd = string(PassiveControlEnabled) } } dom.IfItems.IfList.Set(intf) @@ -1812,8 +1822,8 @@ func (p *Provider) EnsureOSPF(ctx context.Context, req *provider.EnsureOSPFReque } rd := new(InterLeakP) rd.Proto = rc.Protocol - rd.Asn = "none" - rd.Inst = "none" + rd.Asn = interLeakProtocolDefault + rd.Inst = interLeakProtocolDefault rd.RtMap = rc.RouteMapName dom.InterleakItems.InterLeakPList.Set(rd) } @@ -2318,7 +2328,7 @@ type SyslogConfig struct { func (p *Provider) EnsureSyslog(ctx context.Context, req *provider.EnsureSyslogRequest) error { var cfg SyslogConfig cfg.OriginID = req.Syslog.Name - cfg.SourceInterfaceName = "mgmt0" + cfg.SourceInterfaceName = managementInterfaceName cfg.HistorySize = 500 if req.ProviderConfig != nil { if err := req.ProviderConfig.Into(&cfg); err != nil { diff --git a/internal/provider/provider.go b/internal/provider/provider.go index f1845132e..0ce8fc980 100644 --- a/internal/provider/provider.go +++ b/internal/provider/provider.go @@ -53,6 +53,58 @@ type ProvisioningProvider interface { VerifyProvisioned(context.Context, *deviceutil.Connection, *v1alpha1.Device) bool } +// ConfigBackupProvider performs on-device configuration backup operations. +type ConfigBackupProvider interface { + Provider + + // CreateConfigBackup writes a new configuration backup to the device. + CreateConfigBackup(context.Context, *ConfigBackupRequest) (*ConfigBackupResult, error) + // ListConfigBackups lists the backups currently discovered for the ConfigBackup policy. + ListConfigBackups(context.Context, *ConfigBackupRequest) (*ConfigBackupInventory, error) + // DeleteConfigBackups removes the provided backups from the device. + DeleteConfigBackups(context.Context, *DeleteConfigBackupsRequest) error +} + +type ConfigBackupRequest struct { + ConfigBackup *v1alpha1.ConfigBackup + ProviderConfig *ProviderConfig +} + +type DeleteConfigBackupsRequest struct { + ConfigBackup *v1alpha1.ConfigBackup + ProviderConfig *ProviderConfig +} + +type ConfigBackupResult struct { + Path string + SizeBytes *int64 + Checksum string + CreatedAt time.Time + Duration time.Duration +} + +type ConfigBackupInventory struct { + Backups []ConfigBackupFile + TotalBytes *int64 + UsedBytes *int64 + FreeBytes *int64 +} + +type ConfigBackupFile struct { + Path string + SizeBytes *int64 + CreatedAt time.Time + Checksum string +} + +// ConfigBackupManagedPrefix returns the filename prefix used for backups managed by a ConfigBackup. +func ConfigBackupManagedPrefix(obj *v1alpha1.ConfigBackup) string { + if obj == nil { + return "" + } + return fmt.Sprintf("configbackup-%s-%s-", obj.Namespace, obj.Name) +} + type DevicePort struct { // ID is the unique identifier of the port on the device. ID string diff --git a/internal/transport/gnmiext/empty.go b/internal/transport/gnmiext/empty.go index f8ecc2eaa..d627bbe42 100644 --- a/internal/transport/gnmiext/empty.go +++ b/internal/transport/gnmiext/empty.go @@ -9,6 +9,8 @@ import ( "regexp" ) +const jsonNullLiteral = "null" + // NOTE: Use json.Marshaler and json.Unmarshaler interfaces instead of the // Marshaler interface for types that only need to customize their JSON // representation and do not need to consider the capabilities of the target @@ -32,14 +34,14 @@ type Empty bool // MarshalJSON implements json.Marshaler for Empty. func (e Empty) MarshalJSON() ([]byte, error) { if !e { - return []byte("null"), nil + return []byte(jsonNullLiteral), nil } return []byte("[null]"), nil } // UnmarshalJSON implements json.Unmarshaler for Empty. func (e *Empty) UnmarshalJSON(b []byte) error { - if len(b) == 0 || string(b) == "null" { + if len(b) == 0 || string(b) == jsonNullLiteral { *e = false return nil } diff --git a/internal/transport/gnmiext/list.go b/internal/transport/gnmiext/list.go index 9d02dbd04..7c6d3c69e 100644 --- a/internal/transport/gnmiext/list.go +++ b/internal/transport/gnmiext/list.go @@ -70,7 +70,7 @@ func (l List[K, V]) MarshalJSON() ([]byte, error) { // to determine the map key. func (l *List[K, V]) UnmarshalJSON(data []byte) error { // Handle null - if string(data) == "null" { + if string(data) == jsonNullLiteral { *l = nil return nil }