diff --git a/pkg/controllers/cloud_config_sync_controller_test.go b/pkg/controllers/cloud_config_sync_controller_test.go index ba217895f..a9e370ef9 100644 --- a/pkg/controllers/cloud_config_sync_controller_test.go +++ b/pkg/controllers/cloud_config_sync_controller_test.go @@ -2,6 +2,7 @@ package controllers import ( "context" + "fmt" "time" . "github.com/onsi/ginkgo/v2" @@ -23,6 +24,7 @@ import ( metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" "sigs.k8s.io/controller-runtime/pkg/reconcile" + "github.com/openshift/library-go/pkg/config/clusteroperator/v1helpers" "github.com/openshift/library-go/pkg/operator/configobserver/featuregates" ) @@ -259,19 +261,7 @@ var _ = Describe("Cloud config sync controller", func() { mgrCtxCancel() Eventually(mgrStopped, timeout).Should(BeClosed()) - co := &configv1.ClusterOperator{} - err := cl.Get(context.Background(), client.ObjectKey{Name: clusterOperatorName}, co) - if err == nil || !apierrors.IsNotFound(err) { - Eventually(func() error { - return cl.Delete(context.Background(), co) - }).Should(SatisfyAny( - Not(HaveOccurred()), - MatchError(apierrors.IsNotFound, "IsNotFound"), - )) - } - Eventually(func() error { - return cl.Get(context.Background(), client.ObjectKey{Name: clusterOperatorName}, co) - }).Should(MatchError(apierrors.IsNotFound, "IsNotFound")) + deleteClusterOperator(context.Background(), cl) By("Cleanup resources") deleteOptions := &client.DeleteOptions{ @@ -457,19 +447,7 @@ var _ = Describe("Cloud config sync reconciler", func() { GracePeriodSeconds: ptr.To[int64](0), } - co := &configv1.ClusterOperator{} - err := cl.Get(context.Background(), client.ObjectKey{Name: clusterOperatorName}, co) - if err == nil || !apierrors.IsNotFound(err) { - Eventually(func() error { - return cl.Delete(context.Background(), co) - }).Should(SatisfyAny( - Not(HaveOccurred()), - MatchError(apierrors.IsNotFound, "IsNotFound"), - )) - } - Eventually(func() error { - return cl.Get(context.Background(), client.ObjectKey{Name: clusterOperatorName}, co) - }).Should(MatchError(apierrors.IsNotFound, "IsNotFound")) + deleteClusterOperator(context.Background(), cl) infra := &configv1.Infrastructure{ ObjectMeta: metav1.ObjectMeta{ @@ -867,3 +845,118 @@ var _ = Describe("vSphere managed config sync", func() { }) }) }) + +var _ = Describe("CloudConfigReconciler error handling", func() { + ctx := context.Background() + + AfterEach(func() { + deleteClusterOperator(ctx, cl) + }) + + It("transient error should not set cloudConfigControllerDegraded before threshold", func() { + fakeClock := clocktesting.NewFakeClock(time.Now()) + getErr := fmt.Errorf("connection refused") + faultyClient := errorInjectingClient{Client: cl, getErr: &getErr, failType: &configv1.Infrastructure{}} + + co := &configv1.ClusterOperator{ObjectMeta: metav1.ObjectMeta{Name: clusterOperatorName}} + Expect(cl.Create(ctx, co)).To(Succeed()) + + reconciler := &CloudConfigReconciler{ + ClusterOperatorStatusClient: ClusterOperatorStatusClient{ + Client: &faultyClient, + Clock: fakeClock, + ManagedNamespace: testManagedNamespace, + }, + Scheme: scheme.Scheme, + FeatureGateAccess: featuregates.NewHardcodedFeatureGateAccessForTesting(nil, nil, nil, nil), + } + + stepSize := transientDegradedThreshold / 5 + for range 4 { + _, err := reconciler.Reconcile(ctx, reconcile.Request{}) + Expect(err).To(HaveOccurred(), "transient error should be returned for controller-runtime to requeue") + fakeClock.Step(stepSize) + } + + Expect(cl.Get(ctx, client.ObjectKey{Name: clusterOperatorName}, co)).To(Succeed()) + Expect(v1helpers.IsStatusConditionTrue(co.Status.Conditions, cloudConfigControllerDegradedCondition)).To(BeFalse(), + "cloudConfigControllerDegraded should not be True before the transient threshold elapses") + }) + + It("transient error should set cloudConfigControllerDegraded after threshold is crossed", func() { + fakeClock := clocktesting.NewFakeClock(time.Now()) + getErr := fmt.Errorf("connection refused") + faultyClient := errorInjectingClient{Client: cl, getErr: &getErr, failType: &configv1.Infrastructure{}} + + co := &configv1.ClusterOperator{ObjectMeta: metav1.ObjectMeta{Name: clusterOperatorName}} + Expect(cl.Create(ctx, co)).To(Succeed()) + + reconciler := &CloudConfigReconciler{ + ClusterOperatorStatusClient: ClusterOperatorStatusClient{ + Client: &faultyClient, + Clock: fakeClock, + ManagedNamespace: testManagedNamespace, + }, + Scheme: scheme.Scheme, + FeatureGateAccess: featuregates.NewHardcodedFeatureGateAccessForTesting(nil, nil, nil, nil), + } + + _, err := reconciler.Reconcile(ctx, reconcile.Request{}) + Expect(err).To(HaveOccurred()) + + fakeClock.Step(transientDegradedThreshold + time.Second) + + _, err = reconciler.Reconcile(ctx, reconcile.Request{}) + Expect(err).To(HaveOccurred()) + + Expect(cl.Get(ctx, client.ObjectKey{Name: clusterOperatorName}, co)).To(Succeed()) + var degradedCond *configv1.ClusterOperatorStatusCondition + for i := range co.Status.Conditions { + if co.Status.Conditions[i].Type == cloudConfigControllerDegradedCondition { + degradedCond = &co.Status.Conditions[i] + break + } + } + Expect(degradedCond).NotTo(BeNil(), "cloudConfigControllerDegraded condition should exist after threshold is crossed") + Expect(degradedCond.Status).To(Equal(configv1.ConditionTrue), + "cloudConfigControllerDegraded should be True after threshold is crossed") + }) + + It("successful reconcile resets the failure window so transient errors must start fresh", func() { + fakeClock := clocktesting.NewFakeClock(time.Now()) + getErr := fmt.Errorf("connection refused") + faultyClient := errorInjectingClient{Client: cl, getErr: &getErr, failType: &configv1.Infrastructure{}} + + co := &configv1.ClusterOperator{ObjectMeta: metav1.ObjectMeta{Name: clusterOperatorName}} + Expect(cl.Create(ctx, co)).To(Succeed()) + + reconciler := &CloudConfigReconciler{ + ClusterOperatorStatusClient: ClusterOperatorStatusClient{ + Client: &faultyClient, + Clock: fakeClock, + ManagedNamespace: testManagedNamespace, + }, + Scheme: scheme.Scheme, + FeatureGateAccess: featuregates.NewHardcodedFeatureGateAccessForTesting(nil, nil, nil, nil), + } + + By("opening the failure window with a transient error") + _, err := reconciler.Reconcile(ctx, reconcile.Request{}) + Expect(err).To(HaveOccurred()) + + By("clearing the fault so Reconcile succeeds via the Infrastructure-not-found path") + getErr = nil + _, err = reconciler.Reconcile(ctx, reconcile.Request{}) + Expect(err).NotTo(HaveOccurred()) + + By("advancing past the original threshold and re-injecting the fault") + fakeClock.Step(transientDegradedThreshold + time.Second) + getErr = fmt.Errorf("connection refused") + _, err = reconciler.Reconcile(ctx, reconcile.Request{}) + Expect(err).To(HaveOccurred()) + + Expect(cl.Get(ctx, client.ObjectKey{Name: clusterOperatorName}, co)).To(Succeed()) + Expect(v1helpers.IsStatusConditionTrue(co.Status.Conditions, cloudConfigControllerDegradedCondition)).To(BeFalse(), + "cloudConfigControllerDegraded should not be True because the failure window was reset by the successful reconcile") + }) +}) diff --git a/pkg/controllers/clusteroperator_controller.go b/pkg/controllers/clusteroperator_controller.go index a5fefa1f3..94f7f1950 100644 --- a/pkg/controllers/clusteroperator_controller.go +++ b/pkg/controllers/clusteroperator_controller.go @@ -121,11 +121,16 @@ func (r *CloudOperatorReconciler) Reconcile(ctx context.Context, _ ctrl.Request) return ctrl.Result{}, reconcile.TerminalError(fmt.Errorf("failed to build operator config: %w", err)) } - if err := r.sync(ctx, operatorConfig, conditionOverrides); err != nil { + progressing, err := r.sync(ctx, operatorConfig, conditionOverrides) + if err != nil { klog.Errorf("Unable to sync operands: %s", err) return ctrl.Result{}, fmt.Errorf("failed to sync operands: %w", err) } + if progressing { + return ctrl.Result{}, nil + } + if err := r.setStatusAvailable(ctx, conditionOverrides); err != nil { klog.Errorf("Unable to sync cluster operator status: %s", err) return ctrl.Result{}, fmt.Errorf("error syncing ClusterOperatorStatus: %w", err) @@ -143,33 +148,35 @@ func (r *CloudOperatorReconciler) clearFailureWindow() { r.failures.clear() } -func (r *CloudOperatorReconciler) sync(ctx context.Context, config config.OperatorConfig, conditionOverrides []configv1.ClusterOperatorStatusCondition) error { - // Deploy resources for platform +func (r *CloudOperatorReconciler) sync(ctx context.Context, config config.OperatorConfig, conditionOverrides []configv1.ClusterOperatorStatusCondition) (bool, error) { resources, err := cloud.GetResources(config) if err != nil { - return err + return false, err } updated, err := r.applyResources(ctx, resources) if err != nil { - return err + return false, err } if updated { - return r.setStatusProgressing(ctx, conditionOverrides) + if err := r.setStatusProgressing(ctx, conditionOverrides); err != nil { + return false, err + } + return true, nil } - return nil + return false, nil } // applyResources will apply all resources as-is to the cluster, allowing adding of custom annotations and lables func (r *CloudOperatorReconciler) applyResources(ctx context.Context, resources []client.Object) (bool, error) { - updated := false - var err error + anyUpdated := false for _, resource := range resources { - updated, err = resourceapply.ApplyResource(ctx, r.Client, r.Recorder, resource) + updated, err := resourceapply.ApplyResource(ctx, r.Client, r.Recorder, resource) if err != nil { return false, err } + anyUpdated = anyUpdated || updated if err := r.watcher.Watch(ctx, resource); err != nil { klog.Errorf("Unable to establish watch on object %s '%s': %+v", resource.GetObjectKind().GroupVersionKind(), resource.GetName(), err) @@ -182,7 +189,7 @@ func (r *CloudOperatorReconciler) applyResources(ctx context.Context, resources klog.V(2).Info("Resources applied successfully.") } - return updated, nil + return anyUpdated, nil } // SetupWithManager sets up the controller with the Manager. diff --git a/pkg/controllers/clusteroperator_controller_test.go b/pkg/controllers/clusteroperator_controller_test.go index ae93f1513..a062971bb 100644 --- a/pkg/controllers/clusteroperator_controller_test.go +++ b/pkg/controllers/clusteroperator_controller_test.go @@ -52,18 +52,7 @@ var _ = Describe("Cluster Operator status controller", func() { }) AfterEach(func() { - co := &configv1.ClusterOperator{} - err := cl.Get(context.Background(), client.ObjectKey{Name: clusterOperatorName}, co) - if err == nil || !apierrors.IsNotFound(err) { - Eventually(func() error { - err := cl.Delete(context.Background(), operator) - if err == nil || apierrors.IsNotFound(err) { - return nil - } - return err - }).Should(Succeed()) - } - Expect(cl.Get(context.Background(), client.ObjectKey{Name: clusterOperatorName}, co)).To(MatchError(apierrors.IsNotFound, "ClusterOperator should have been deleted")) + deleteClusterOperator(context.Background(), cl) }) type testCase struct { @@ -605,19 +594,77 @@ var _ = Describe("Apply resources should", func() { Expect(dep.Labels[common.CloudControllerManagerProviderLabel]).To(Equal("AWS")) }) - AfterEach(func() { - co := &configv1.ClusterOperator{} - err := cl.Get(context.Background(), client.ObjectKey{Name: clusterOperatorName}, co) - if err == nil || !apierrors.IsNotFound(err) { - Eventually(func() error { - err := cl.Delete(context.Background(), co) - if err == nil || apierrors.IsNotFound(err) { - return nil - } - return err - }, timeout).Should(Succeed()) + It("reports updated=true when only a non-final resource changed", func() { + operatorConfig := getConfigForPlatform(&configv1.PlatformStatus{Type: configv1.AWSPlatformType}) + awsResources, err := cloud.GetResources(operatorConfig) + Expect(err).To(Succeed()) + Expect(len(awsResources)).To(BeNumerically(">=", 2)) + + resources = append(resources, awsResources...) + + updated, err := reconciler.applyResources(context.TODO(), resources) + Expect(err).ShouldNot(HaveOccurred()) + Expect(updated).To(BeTrue()) + + // Drain recorder events from initial creation + for len(recorder.Events) > 0 { + <-recorder.Events + } + + // Modify only the deployment so it gets updated, while later resources remain unchanged + for i, res := range resources { + if dep, ok := res.(*appsv1.Deployment); ok { + dep.Spec.Replicas = ptr.To[int32](99) + resources[i] = dep + break + } } - Expect(cl.Get(context.Background(), client.ObjectKey{Name: clusterOperatorName}, co)).To(MatchError(apierrors.IsNotFound, "ClusterOperator should have been deleted")) + + updated, err = reconciler.applyResources(context.TODO(), resources) + Expect(err).ShouldNot(HaveOccurred()) + Expect(updated).To(BeTrue(), "should report updated even when only a non-final resource changed") + }) + + It("should set Progressing=True on first sync and not signal progressing when resources are stable", func() { + reconciler.ManagedNamespace = DefaultManagedNamespace + + co := &configv1.ClusterOperator{} + co.SetName(clusterOperatorName) + Expect(cl.Create(context.TODO(), co)).To(Succeed()) + + operatorConfig := getConfigForPlatform(&configv1.PlatformStatus{Type: configv1.AWSPlatformType}) + awsResources, err := cloud.GetResources(operatorConfig) + Expect(err).To(Succeed()) + resources = append(resources, awsResources...) + + // First sync: resources do not yet exist, so applyResources reports updated=true. + progressing, err := reconciler.sync(context.TODO(), operatorConfig, nil) + Expect(err).To(Succeed()) + Expect(progressing).To(BeTrue(), "sync should report progressing when resources are newly applied") + + Expect(cl.Get(context.TODO(), client.ObjectKey{Name: clusterOperatorName}, co)).To(Succeed()) + progressingCond := v1helpers.FindStatusCondition(co.Status.Conditions, configv1.OperatorProgressing) + Expect(progressingCond).NotTo(BeNil(), "Progressing condition should exist after first sync") + Expect(progressingCond.Status).To( + Equal(configv1.ConditionTrue), "Progressing should be True after resources are first applied", + ) + + // Second sync: resources exist and are unchanged, so applyResources reports updated=false. + progressing, err = reconciler.sync(context.TODO(), operatorConfig, nil) + Expect(err).To(Succeed()) + Expect(progressing).To(BeFalse(), "sync should not report progressing when resources are already up to date") + + // Progressing remains True because sync() does not clear it; only setStatusAvailable() does. + Expect(cl.Get(context.TODO(), client.ObjectKey{Name: clusterOperatorName}, co)).To(Succeed()) + progressingCond = v1helpers.FindStatusCondition(co.Status.Conditions, configv1.OperatorProgressing) + Expect(progressingCond).NotTo(BeNil(), "Progressing condition should still exist after second sync") + Expect(progressingCond.Status).To( + Equal(configv1.ConditionTrue), "Progressing should remain True until setStatusAvailable is called", + ) + }) + + AfterEach(func() { + deleteClusterOperator(context.Background(), cl) for _, operand := range resources { Expect(cl.Delete(context.Background(), operand)).To(Succeed()) @@ -634,24 +681,13 @@ var _ = Describe("Apply resources should", func() { }, timeout).Should(Succeed()) } }) - }) var _ = Describe("CloudOperatorReconciler error handling", func() { ctx := context.Background() AfterEach(func() { - co := &configv1.ClusterOperator{} - if err := cl.Get(ctx, client.ObjectKey{Name: clusterOperatorName}, co); err == nil { - Eventually(func() error { - err := cl.Delete(ctx, co) - if err == nil || apierrors.IsNotFound(err) { - return nil - } - return err - }).Should(Succeed()) - } - Expect(cl.Get(ctx, client.ObjectKey{Name: clusterOperatorName}, co)).To(MatchError(apierrors.IsNotFound, "ClusterOperator should have been deleted")) + deleteClusterOperator(ctx, cl) infra := &configv1.Infrastructure{ObjectMeta: metav1.ObjectMeta{Name: infrastructureResourceName}} cl.Delete(ctx, infra) //nolint:errcheck @@ -748,7 +784,113 @@ var _ = Describe("CloudOperatorReconciler error handling", func() { Expect(v1helpers.FindStatusCondition(co.Status.Conditions, configv1.OperatorDegraded).Status).To(Equal(configv1.ConditionTrue)) }) - It("successful reconcile clears the failure window so subsequent transient errors start fresh", func() { + It("transient error should not set OperatorDegraded before threshold", func() { + fakeClock := clocktesting.NewFakeClock(time.Now()) + getErr := fmt.Errorf("connection refused") + faultyClient := errorInjectingClient{Client: cl, getErr: &getErr, failType: &configv1.Infrastructure{}} + + co := &configv1.ClusterOperator{ObjectMeta: metav1.ObjectMeta{Name: clusterOperatorName}} + Expect(cl.Create(ctx, co)).To(Succeed()) + + reconciler := &CloudOperatorReconciler{ + ClusterOperatorStatusClient: ClusterOperatorStatusClient{ + Client: &faultyClient, + Clock: fakeClock, + ManagedNamespace: defaultManagementNamespace, + Recorder: record.NewFakeRecorder(32), + }, + Scheme: scheme.Scheme, + } + + // Reconcile several times, advancing the clock each iteration but + // staying under the threshold. OperatorDegraded must never be set. + stepSize := aggregatedTransientDegradedThreshold / 5 + for range 4 { + _, err := reconciler.Reconcile(ctx, reconcile.Request{}) + Expect(err).To(HaveOccurred(), "transient error should be returned for controller-runtime to requeue") + fakeClock.Step(stepSize) + } + + Expect(cl.Get(ctx, client.ObjectKey{Name: clusterOperatorName}, co)).To(Succeed()) + Expect(v1helpers.IsStatusConditionTrue(co.Status.Conditions, configv1.OperatorDegraded)).To(BeFalse(), + "OperatorDegraded should not be True before the transient threshold elapses") + }) + + It("transient error should set OperatorDegraded after threshold is crossed", func() { + fakeClock := clocktesting.NewFakeClock(time.Now()) + getErr := fmt.Errorf("connection refused") + faultyClient := errorInjectingClient{Client: cl, getErr: &getErr, failType: &configv1.Infrastructure{}} + + co := &configv1.ClusterOperator{ObjectMeta: metav1.ObjectMeta{Name: clusterOperatorName}} + Expect(cl.Create(ctx, co)).To(Succeed()) + + reconciler := &CloudOperatorReconciler{ + ClusterOperatorStatusClient: ClusterOperatorStatusClient{ + Client: &faultyClient, + Clock: fakeClock, + ManagedNamespace: defaultManagementNamespace, + Recorder: record.NewFakeRecorder(32), + }, + Scheme: scheme.Scheme, + } + + // First Reconcile opens the failure window. + _, err := reconciler.Reconcile(ctx, reconcile.Request{}) + Expect(err).To(HaveOccurred()) + + // Advance past the threshold. + fakeClock.Step(aggregatedTransientDegradedThreshold + time.Second) + + // Next Reconcile crosses the threshold and sets OperatorDegraded. + _, err = reconciler.Reconcile(ctx, reconcile.Request{}) + Expect(err).To(HaveOccurred()) + + Expect(cl.Get(ctx, client.ObjectKey{Name: clusterOperatorName}, co)).To(Succeed()) + degradedCond := v1helpers.FindStatusCondition(co.Status.Conditions, configv1.OperatorDegraded) + Expect(degradedCond).NotTo(BeNil(), "OperatorDegraded condition should exist after threshold is crossed") + Expect(degradedCond.Status).To(Equal(configv1.ConditionTrue), + "OperatorDegraded should be True after threshold is crossed") + }) + + It("successful reconcile resets the failure window so transient errors must start fresh", func() { + fakeClock := clocktesting.NewFakeClock(time.Now()) + getErr := fmt.Errorf("connection refused") + faultyClient := errorInjectingClient{Client: cl, getErr: &getErr, failType: &configv1.Infrastructure{}} + + co := &configv1.ClusterOperator{ObjectMeta: metav1.ObjectMeta{Name: clusterOperatorName}} + Expect(cl.Create(ctx, co)).To(Succeed()) + + reconciler := &CloudOperatorReconciler{ + ClusterOperatorStatusClient: ClusterOperatorStatusClient{ + Client: &faultyClient, + Clock: fakeClock, + ManagedNamespace: defaultManagementNamespace, + Recorder: record.NewFakeRecorder(32), + }, + Scheme: scheme.Scheme, + } + + By("opening the failure window with a transient error") + _, err := reconciler.Reconcile(ctx, reconcile.Request{}) + Expect(err).To(HaveOccurred()) + + By("clearing the fault so Reconcile succeeds via the Infrastructure-not-found path") + getErr = nil + _, err = reconciler.Reconcile(ctx, reconcile.Request{}) + Expect(err).NotTo(HaveOccurred()) + + By("advancing past the original threshold and re-injecting the fault") + fakeClock.Step(aggregatedTransientDegradedThreshold + time.Second) + getErr = fmt.Errorf("connection refused") + _, err = reconciler.Reconcile(ctx, reconcile.Request{}) + Expect(err).To(HaveOccurred()) + + Expect(cl.Get(ctx, client.ObjectKey{Name: clusterOperatorName}, co)).To(Succeed()) + Expect(v1helpers.IsStatusConditionTrue(co.Status.Conditions, configv1.OperatorDegraded)).To(BeFalse(), + "OperatorDegraded should not be True because the failure window was reset by the successful reconcile") + }) + + It("successful finalizeReconcile clears the failure window so subsequent transient errors start fresh", func() { fakeClock := clocktesting.NewFakeClock(time.Now()) reconciler := &CloudOperatorReconciler{ ClusterOperatorStatusClient: ClusterOperatorStatusClient{ @@ -799,3 +941,121 @@ var _ = Describe("CloudOperatorReconciler error handling", func() { "should not be degraded because the failure window was reset by the successful reconcile") }) }) + +var _ = Describe("Reconcile progressing flow", func() { + ctx := context.Background() + var reconciler *CloudOperatorReconciler + var operandResources []client.Object + + BeforeEach(func() { + c, err := cache.New(cfg, cache.Options{}) + Expect(err).To(Succeed()) + + w, err := NewObjectWatcher(WatcherOptions{Cache: c}) + Expect(err).To(Succeed()) + + reconciler = &CloudOperatorReconciler{ + ClusterOperatorStatusClient: ClusterOperatorStatusClient{ + Client: cl, + Clock: clocktesting.NewFakePassiveClock(time.Now()), + ManagedNamespace: DefaultManagedNamespace, + Recorder: record.NewFakeRecorder(32), + }, + Scheme: scheme.Scheme, + watcher: w, + ImagesFile: testImagesFilePath, + } + + infra := &configv1.Infrastructure{ + ObjectMeta: metav1.ObjectMeta{Name: infrastructureResourceName}, + } + Expect(cl.Create(ctx, infra)).To(Succeed()) + + infra.Status = configv1.InfrastructureStatus{ + PlatformStatus: &configv1.PlatformStatus{Type: configv1.AWSPlatformType}, + Platform: configv1.AWSPlatformType, + InfrastructureTopology: configv1.HighlyAvailableTopologyMode, + ControlPlaneTopology: configv1.HighlyAvailableTopologyMode, + } + Expect(cl.Status().Update(ctx, infra)).To(Succeed()) + + co := &configv1.ClusterOperator{ObjectMeta: metav1.ObjectMeta{Name: clusterOperatorName}} + Expect(cl.Create(ctx, co)).To(Succeed()) + + co.Status.Conditions = []configv1.ClusterOperatorStatusCondition{ + {Type: cloudConfigControllerAvailableCondition, Status: configv1.ConditionTrue, LastTransitionTime: metav1.Now()}, + {Type: cloudConfigControllerDegradedCondition, Status: configv1.ConditionFalse, LastTransitionTime: metav1.Now()}, + {Type: trustedCABundleControllerAvailableCondition, Status: configv1.ConditionTrue, LastTransitionTime: metav1.Now()}, + {Type: trustedCABundleControllerDegradedCondition, Status: configv1.ConditionFalse, LastTransitionTime: metav1.Now()}, + } + Expect(cl.Status().Update(ctx, co)).To(Succeed()) + }) + + AfterEach(func() { + deleteClusterOperator(ctx, cl) + + infra := &configv1.Infrastructure{ObjectMeta: metav1.ObjectMeta{Name: infrastructureResourceName}} + cl.Delete(ctx, infra) //nolint:errcheck + + for _, res := range operandResources { + cl.Delete(ctx, res) //nolint:errcheck + Eventually(func() error { + err := cl.Get(ctx, client.ObjectKeyFromObject(res), res) + if apierrors.IsNotFound(err) { + return nil + } + if err != nil { + return err + } + return fmt.Errorf("expected operand %s to be deleted", res.GetName()) + }, timeout).Should(Succeed()) + } + }) + + It("sets Progressing=True on first reconcile, then Available on second", func() { + // First Reconcile: resources are created, Progressing=True, Available not set. + _, err := reconciler.Reconcile(ctx, reconcile.Request{}) + Expect(err).NotTo(HaveOccurred()) + + // Track created resources for cleanup + operatorConfig := config.OperatorConfig{ + ManagedNamespace: DefaultManagedNamespace, + ImagesReference: config.ImagesReference{ + CloudControllerManagerOperator: "registry.ci.openshift.org/openshift:cluster-cloud-controller-manager-operator", + CloudControllerManagerAWS: "registry.ci.openshift.org/openshift:aws-cloud-controller-manager", + }, + PlatformStatus: &configv1.PlatformStatus{Type: configv1.AWSPlatformType}, + } + operandResources, err = cloud.GetResources(operatorConfig) + Expect(err).NotTo(HaveOccurred()) + + co := &configv1.ClusterOperator{} + Expect(cl.Get(ctx, client.ObjectKey{Name: clusterOperatorName}, co)).To(Succeed()) + progressingCond := v1helpers.FindStatusCondition(co.Status.Conditions, configv1.OperatorProgressing) + Expect(progressingCond).NotTo(BeNil(), "Progressing condition should exist after first reconcile") + Expect(progressingCond.Status).To( + Equal(configv1.ConditionTrue), "Progressing should be True after first reconcile creates resources", + ) + availCond := v1helpers.FindStatusCondition(co.Status.Conditions, configv1.OperatorAvailable) + if availCond != nil { + Expect(availCond.Status).NotTo(Equal(configv1.ConditionTrue), + "Available should not be True while progressing") + } + + // Second Reconcile: nothing changed, Progressing=False, Available=True. + _, err = reconciler.Reconcile(ctx, reconcile.Request{}) + Expect(err).NotTo(HaveOccurred()) + + Expect(cl.Get(ctx, client.ObjectKey{Name: clusterOperatorName}, co)).To(Succeed()) + progressingCond = v1helpers.FindStatusCondition(co.Status.Conditions, configv1.OperatorProgressing) + Expect(progressingCond).NotTo(BeNil(), "Progressing condition should exist after second reconcile") + Expect(progressingCond.Status).To( + Equal(configv1.ConditionFalse), "Progressing should be False after second reconcile with no changes", + ) + availCond = v1helpers.FindStatusCondition(co.Status.Conditions, configv1.OperatorAvailable) + Expect(availCond).NotTo(BeNil(), "Available condition should exist after second reconcile") + Expect(availCond.Status).To( + Equal(configv1.ConditionTrue), "Available should be True after second reconcile", + ) + }) +}) diff --git a/pkg/controllers/reconcile_dispatch_test.go b/pkg/controllers/reconcile_dispatch_test.go new file mode 100644 index 000000000..0c257c20b --- /dev/null +++ b/pkg/controllers/reconcile_dispatch_test.go @@ -0,0 +1,44 @@ +package controllers + +import ( + "context" + "reflect" + + . "github.com/onsi/gomega" + configv1 "github.com/openshift/api/config/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// errorInjectingClient wraps a real client and injects errors on Get calls +// for a specific object type. This simulates transient API server failures +// (e.g. network blips) without depending on any internal controller code path. +// The getErr pointer allows tests to toggle faults between reconcile calls. +type errorInjectingClient struct { + client.Client + getErr *error + failType client.Object +} + +func (c *errorInjectingClient) Get(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { + if c.getErr != nil && *c.getErr != nil && reflect.TypeOf(obj) == reflect.TypeOf(c.failType) { + return *c.getErr + } + return c.Client.Get(ctx, key, obj, opts...) +} + +func deleteClusterOperator(ctx context.Context, cl client.Client) { + co := &configv1.ClusterOperator{} + if err := cl.Get(ctx, client.ObjectKey{Name: clusterOperatorName}, co); err == nil { + Eventually(func() error { + err := cl.Delete(ctx, co) + if err == nil || apierrors.IsNotFound(err) { + return nil + } + return err + }).Should(Succeed()) + } + Eventually(func() error { + return cl.Get(ctx, client.ObjectKey{Name: clusterOperatorName}, co) + }).Should(MatchError(apierrors.IsNotFound, "IsNotFound")) +} diff --git a/pkg/controllers/trusted_ca_bundle_controller_test.go b/pkg/controllers/trusted_ca_bundle_controller_test.go index f3dd6cbed..2aefc2d27 100644 --- a/pkg/controllers/trusted_ca_bundle_controller_test.go +++ b/pkg/controllers/trusted_ca_bundle_controller_test.go @@ -162,20 +162,7 @@ var _ = Describe("Trusted CA bundle sync controller", func() { GracePeriodSeconds: ptr.To[int64](0), } - co := &v1.ClusterOperator{} - err := cl.Get(context.Background(), client.ObjectKey{Name: clusterOperatorName}, co) - if err == nil || !apierrors.IsNotFound(err) { - Eventually(func() error { - err := cl.Delete(context.Background(), co) - if err == nil || apierrors.IsNotFound(err) { - return nil - } - return err - }).Should(Succeed()) - } - Eventually(func() bool { - return apierrors.IsNotFound(cl.Get(context.Background(), client.ObjectKey{Name: clusterOperatorName}, co)) - }).Should(BeTrue()) + deleteClusterOperator(context.Background(), cl) if proxyResource != nil { Expect(cl.Delete(ctx, proxyResource, deleteOptions)).To(Succeed()) @@ -326,20 +313,7 @@ var _ = Describe("Trusted CA bundle reconciler unit tests", func() { ctx := context.Background() AfterEach(func() { - co := &v1.ClusterOperator{} - if err := cl.Get(ctx, client.ObjectKey{Name: clusterOperatorName}, co); err == nil { - Expect(cl.Delete(ctx, co)).To(Succeed()) - Eventually(func() error { - err := cl.Get(ctx, client.ObjectKey{Name: clusterOperatorName}, co) - if apierrors.IsNotFound(err) { - return nil - } - if err != nil { - return err - } - return fmt.Errorf("expected ClusterOperator to be deleted") - }).Should(Succeed()) - } + deleteClusterOperator(ctx, cl) }) It("reconcile should succeed and be available if no proxy resource found", func() {