From 2f8153e43e6e3069fb6b759b50d76c2f9a9d7df9 Mon Sep 17 00:00:00 2001 From: Gianluca Mardente Date: Sat, 4 Apr 2026 14:06:29 +0200 Subject: [PATCH] (bug) Fix stale Helm repository cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a ClusterProfile is updated to reference a newer Helm chart version, addon-controller reports that the version does not exist even though it is present in the repository. A controller restart is required to deploy successfully. Root cause: repoAddOrUpdate skips re-downloading the repository index when the repo is already registered in the in-memory storage variable (which persists for the lifetime of the process). As a result, LocateChart reads a stale on-disk index that does not contain the newly published version. A reactive recovery function (handleLocateChartError) existed but had two flaws: 1. It returned the error immediately after refreshing the cache, forcing the caller to wait for the next reconciliation cycle to succeed. 2. The cache refresh was gated on the error string "no chart version found for", which is only produced by non-OCI repositories — OCI repos emit a different error message and were never healed. Fix In both the install (locateLoadAndValidateChart) and upgrade (upgradeRelease) paths, when LocateChart fails, the cache is cleared via removeCachedData and the call is retried once within the same reconciliation. If the retry also fails (e.g., the version genuinely does not exist), the error is returned as before. This approach: - Remains fully reactive — the index is never re-downloaded unless LocateChart actually fails, so deployments to N clusters where the version is already cached are unaffected. - Covers both OCI and non-OCI repositories, since the fix is unconditional on the error message. - Eliminates the wasted reconciliation cycle that required a controller restart to recover. --- controllers/handlers_helm.go | 41 +++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/controllers/handlers_helm.go b/controllers/handlers_helm.go index 42879eea..2d5db80c 100644 --- a/controllers/handlers_helm.go +++ b/controllers/handlers_helm.go @@ -1659,13 +1659,33 @@ func installRelease(ctx context.Context, clusterSummary *configv1beta1.ClusterSu return rel, nil } +// locateChartWithCacheRetry calls locateFn to resolve the chart path. If the call fails (e.g. +// because the local repository index is stale and does not yet contain a newly published version), +// the cache is cleared and the call is retried once so that the current reconciliation can succeed +// without waiting for the next cycle. +func locateChartWithCacheRetry( + locateFn func(string, *cli.EnvSettings) (string, error), + chartName string, settings *cli.EnvSettings, + requestedChart *configv1beta1.HelmChart, registryOptions *registryClientOptions, + logger logr.Logger) (string, error) { + + cp, err := locateFn(chartName, settings) + if err != nil { + logger.V(logs.LogInfo).Info(fmt.Sprintf("LocateChart failed: %v; refreshing cache and retrying", err)) + removeCachedData(settings, requestedChart.RepositoryName, requestedChart.RepositoryURL, + registryOptions, logger) + cp, err = locateFn(chartName, settings) + } + return cp, err +} + func locateLoadAndValidateChart(chartName string, settings *cli.EnvSettings, requestedChart *configv1beta1.HelmChart, registryOptions *registryClientOptions, installClient *action.Install, logger logr.Logger, ) (*chart.Chart, error) { - cp, err := installClient.LocateChart(chartName, settings) + cp, err := locateChartWithCacheRetry(installClient.LocateChart, chartName, settings, + requestedChart, registryOptions, logger) if err != nil { - handleLocateChartError(settings, requestedChart, registryOptions, err, logger) return nil, err } @@ -1838,9 +1858,9 @@ func upgradeRelease(ctx context.Context, clusterSummary *configv1beta1.ClusterSu upgradeClient := getHelmUpgradeClient(requestedChart, actionConfig, registryOptions, patches) - cp, err := upgradeClient.LocateChart(chartName, settings) + cp, err := locateChartWithCacheRetry(upgradeClient.LocateChart, chartName, settings, + requestedChart, registryOptions, logger) if err != nil { - handleLocateChartError(settings, requestedChart, registryOptions, err, logger) return nil, err } @@ -4941,19 +4961,6 @@ func getHelmActionInPullMode(ctx context.Context, clusterSummary *configv1beta1. } } -func handleLocateChartError(settings *cli.EnvSettings, requestedChart *configv1beta1.HelmChart, - registryOptions *registryClientOptions, err error, logger logr.Logger) { - - logger.V(logs.LogDebug).Info(fmt.Sprintf("LocateChart failed %v", err)) - - if strings.Contains(err.Error(), "no chart version found for") { - logger.V(logs.LogInfo).Info("Chart version not found, cleaning repository cache", - "repo", requestedChart.RepositoryName) - removeCachedData(settings, requestedChart.RepositoryName, requestedChart.RepositoryURL, - registryOptions, logger) - } -} - func removeCachedData(settings *cli.EnvSettings, name, repoURL string, registryOptions *registryClientOptions, logger logr.Logger) {