diff --git a/controllers/chartmanager/chartmanager.go b/controllers/chartmanager/chartmanager.go index 3d59d7a8..0a89bb69 100644 --- a/controllers/chartmanager/chartmanager.go +++ b/controllers/chartmanager/chartmanager.go @@ -433,6 +433,9 @@ func (m *instance) GetManagerForChart(clusterNamespace, clusterName string, clusterKey := m.getClusterKey(clusterNamespace, clusterName, clusterType) releaseKey := m.GetReleaseKey(chart.ReleaseNamespace, chart.ReleaseName) + m.chartMux.Lock() + defer m.chartMux.Unlock() + if _, ok := m.perClusterChartMap[clusterKey]; !ok { return "", fmt.Errorf("no ClusterSummary manging helm chart %s (release %s)", chart.ChartName, chart.ReleaseName) diff --git a/controllers/clustersummary_deployer.go b/controllers/clustersummary_deployer.go index f6edfc2d..f8413986 100644 --- a/controllers/clustersummary_deployer.go +++ b/controllers/clustersummary_deployer.go @@ -77,7 +77,7 @@ func (r *ClusterSummaryReconciler) deployFeature(ctx context.Context, clusterSum clusterSummary := clusterSummaryScope.ClusterSummary logger = logger.WithValues("clusternamespace", clusterSummary.Spec.ClusterNamespace, - "clustername", clusterSummary.Spec.ClusterNamespace, + "clustername", clusterSummary.Spec.ClusterName, "applicant", clusterSummary.Name, "feature", string(f.id)) logger.V(logs.LogDebug).Info("request to deploy") @@ -414,7 +414,7 @@ func (r *ClusterSummaryReconciler) undeployFeature(ctx context.Context, clusterS clusterSummary := clusterSummaryScope.ClusterSummary logger = logger.WithValues("clusternamespace", clusterSummary.Spec.ClusterNamespace, - "clustername", clusterSummary.Spec.ClusterNamespace, + "clustername", clusterSummary.Spec.ClusterName, "applicant", clusterSummary.Name, "feature", string(f.id)) logger.V(logs.LogDebug).Info("request to un-deploy") @@ -732,15 +732,20 @@ func (r *ClusterSummaryReconciler) proceesAgentDeploymentStatus(ctx context.Cont provisioning := libsveltosv1beta1.FeatureStatusProvisioning return &provisioning, true } - errorMsg := err.Error() - clusterSummaryScope.SetFailureMessage(f.id, &errorMsg) - } else if pullmode.IsActionNotSetToDeploy(err) { - _ = pullmode.TerminateDeploymentTracking(ctx, r.Client, clusterSummary.Spec.ClusterNamespace, - clusterSummary.Spec.ClusterName, clusterSummary.Kind, clusterSummary.Name, string(f.id), logger) - } else if status.FailureMessage != nil { + if pullmode.IsActionNotSetToDeploy(err) { + _ = pullmode.TerminateDeploymentTracking(ctx, r.Client, clusterSummary.Spec.ClusterNamespace, + clusterSummary.Spec.ClusterName, clusterSummary.Kind, clusterSummary.Name, string(f.id), logger) + } else { + errorMsg := err.Error() + clusterSummaryScope.SetFailureMessage(f.id, &errorMsg) + } + } else if status != nil && status.FailureMessage != nil { clusterSummaryScope.SetFailureMessage(f.id, status.FailureMessage) } + if status == nil { + return nil, false + } return status.DeploymentStatus, false } diff --git a/controllers/dependencymanager/manager.go b/controllers/dependencymanager/manager.go index cf5d5e16..43cd8f04 100644 --- a/controllers/dependencymanager/manager.go +++ b/controllers/dependencymanager/manager.go @@ -489,10 +489,22 @@ func (m *instance) updateClusterProfile(ctx context.Context, c client.Client, cl func (m *instance) updateProfiles(ctx context.Context, c client.Client, logger logr.Logger) { const interval = 30 * time.Second for { + // Snapshot the dirty set and their cluster deployments under the lock. m.chartMux.Lock() + toUpdate := make([]corev1.ObjectReference, 0, len(m.profileToBeUpdated)) + for profile := range m.profileToBeUpdated { + toUpdate = append(toUpdate, profile) + } + clustersByProfile := make(map[corev1.ObjectReference]ClusterDeployments, len(toUpdate)) + for i := range toUpdate { + clustersByProfile[toUpdate[i]] = m.profileClusterRequests.getClusterDeployments(&toUpdate[i]) + } + m.chartMux.Unlock() + // Make API calls without holding the lock. + succeeded := make([]corev1.ObjectReference, 0, len(toUpdate)) canceled := false - for profile := range m.profileToBeUpdated { + for i := range toUpdate { select { case <-ctx.Done(): canceled = true @@ -501,15 +513,22 @@ func (m *instance) updateProfiles(ctx context.Context, c client.Client, logger l if canceled { break } - clusters := m.profileClusterRequests.getClusterDeployments(&profile) + profile := toUpdate[i] + clusters := clustersByProfile[profile] logger.V(logs.LogDebug).Info(fmt.Sprintf("updating prerequestite profile %s/%s", profile.Namespace, profile.Name)) - err := m.updateProfileInstance(ctx, c, &profile, clusters) - if err == nil { - delete(m.profileToBeUpdated, profile) + if err := m.updateProfileInstance(ctx, c, &profile, clusters); err == nil { + succeeded = append(succeeded, profile) } } - m.chartMux.Unlock() + // Remove successfully updated entries under the lock. + if len(succeeded) > 0 { + m.chartMux.Lock() + for i := range succeeded { + delete(m.profileToBeUpdated, succeeded[i]) + } + m.chartMux.Unlock() + } if canceled { return @@ -524,11 +543,22 @@ func (m *instance) updateProfiles(ctx context.Context, c client.Client, logger l } func (m *instance) rebuildState(ctx context.Context, c client.Client, logger logr.Logger) { + const retryInterval = 5 * time.Second for { if err := m.rebuildStateWithProfiles(ctx, c, logger); err != nil { + select { + case <-ctx.Done(): + return + case <-time.After(retryInterval): + } continue } if err := m.rebuildStateWithClusterProfiles(ctx, c, logger); err != nil { + select { + case <-ctx.Done(): + return + case <-time.After(retryInterval): + } continue } break diff --git a/test/pullmode-sveltosapplier.yaml b/test/pullmode-sveltosapplier.yaml index 462d4a0c..a3c952e6 100644 --- a/test/pullmode-sveltosapplier.yaml +++ b/test/pullmode-sveltosapplier.yaml @@ -99,7 +99,7 @@ spec: valueFrom: fieldRef: fieldPath: metadata.namespace - image: docker.io/projectsveltos/sveltos-applier@sha256:8262f5598d29079afe5d71c221d9db2e89a752b9798fa16ab03a3496abc631c2 + image: docker.io/projectsveltos/sveltos-applier@sha256:d9009aaba4eea6fce0a94c17f9ed93fb495bc1bedee829bad0cf7c7a37725491 livenessProbe: failureThreshold: 3 httpGet: