From f8cefd398c5fe881127394135d1d92622fc53e51 Mon Sep 17 00:00:00 2001 From: Brian DeHamer Date: Mon, 23 Mar 2026 13:29:30 -0700 Subject: [PATCH 1/2] add deployemnt informer in place of direct API call Signed-off-by: Brian DeHamer --- README.md | 2 ++ .../templates/clusterrole.yaml | 2 ++ deploy/manifest.yaml | 2 +- internal/controller/controller.go | 30 +++++++++++-------- .../controller/controller_integration_test.go | 2 +- 5 files changed, 24 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 0f86d4e..f9d8693 100644 --- a/README.md +++ b/README.md @@ -140,6 +140,8 @@ The controller requires the following minimum permissions: | API Group | Resource | Verbs | |-----------|----------|-------| | `""` (core) | `pods` | `get`, `list`, `watch` | +| `apps` | `deployments` | `get`, `list`, `watch` | +| `apps` | `replicasets` | `get` | If you only need to monitor a single namespace, you can modify the manifest to use a `Role` and `RoleBinding` instead of `ClusterRole` and `ClusterRoleBinding` for more restricted permissions. diff --git a/deploy/charts/deployment-tracker/templates/clusterrole.yaml b/deploy/charts/deployment-tracker/templates/clusterrole.yaml index 090d4cf..fa69c59 100644 --- a/deploy/charts/deployment-tracker/templates/clusterrole.yaml +++ b/deploy/charts/deployment-tracker/templates/clusterrole.yaml @@ -19,6 +19,8 @@ rules: - deployments verbs: - get + - list + - watch - apiGroups: - apps resources: diff --git a/deploy/manifest.yaml b/deploy/manifest.yaml index f5b8e72..8df9ceb 100644 --- a/deploy/manifest.yaml +++ b/deploy/manifest.yaml @@ -19,7 +19,7 @@ rules: verbs: ["get", "list", "watch"] - apiGroups: ["apps"] resources: ["deployments"] - verbs: ["get"] + verbs: ["get", "list", "watch"] - apiGroups: ["apps"] resources: ["replicasets"] verbs: ["get"] diff --git a/internal/controller/controller.go b/internal/controller/controller.go index 22c2d22..abc0367 100644 --- a/internal/controller/controller.go +++ b/internal/controller/controller.go @@ -22,6 +22,7 @@ import ( "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/informers" "k8s.io/client-go/kubernetes" + appslisters "k8s.io/client-go/listers/apps/v1" "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/workqueue" ) @@ -64,6 +65,8 @@ type Controller struct { clientset kubernetes.Interface metadataAggregator podMetadataAggregator podInformer cache.SharedIndexInformer + deploymentInformer cache.SharedIndexInformer + deploymentLister appslisters.DeploymentLister workqueue workqueue.TypedRateLimitingInterface[PodEvent] apiClient deploymentRecordPoster cfg *Config @@ -82,6 +85,8 @@ func New(clientset kubernetes.Interface, metadataAggregator podMetadataAggregato factory := createInformerFactory(clientset, namespace, excludeNamespaces) podInformer := factory.Core().V1().Pods().Informer() + deploymentInformer := factory.Apps().V1().Deployments().Informer() + deploymentLister := factory.Apps().V1().Deployments().Lister() // Create work queue with rate limiting queue := workqueue.NewTypedRateLimitingQueue( @@ -117,6 +122,8 @@ func New(clientset kubernetes.Interface, metadataAggregator podMetadataAggregato clientset: clientset, metadataAggregator: metadataAggregator, podInformer: podInformer, + deploymentInformer: deploymentInformer, + deploymentLister: deploymentLister, workqueue: queue, apiClient: apiClient, cfg: cfg, @@ -237,14 +244,15 @@ func (c *Controller) Run(ctx context.Context, workers int) error { defer runtime.HandleCrash() defer c.workqueue.ShutDown() - slog.Info("Starting pod informer") + slog.Info("Starting informers") - // Start the informer + // Start the informers go c.podInformer.Run(ctx.Done()) + go c.deploymentInformer.Run(ctx.Done()) - // Wait for the cache to be synced - slog.Info("Waiting for informer cache to sync") - if !cache.WaitForCacheSync(ctx.Done(), c.podInformer.HasSynced) { + // Wait for the caches to be synced + slog.Info("Waiting for informer caches to sync") + if !cache.WaitForCacheSync(ctx.Done(), c.podInformer.HasSynced, c.deploymentInformer.HasSynced) { return errors.New("timed out waiting for caches to sync") } @@ -327,7 +335,7 @@ func (c *Controller) processEvent(ctx context.Context, event PodEvent) error { // the referenced image digest to the newly observed (via // the create event). deploymentName := getDeploymentName(pod) - if deploymentName != "" && c.deploymentExists(ctx, pod.Namespace, deploymentName) { + if deploymentName != "" && c.deploymentExists(pod.Namespace, deploymentName) { slog.Debug("Deployment still exists, skipping pod delete (scale down)", "namespace", pod.Namespace, "deployment", deploymentName, @@ -390,16 +398,14 @@ func (c *Controller) processEvent(ctx context.Context, event PodEvent) error { return lastErr } -// deploymentExists checks if a deployment exists in the cluster. -func (c *Controller) deploymentExists(ctx context.Context, namespace, name string) bool { - _, err := c.clientset.AppsV1().Deployments(namespace).Get(ctx, name, metav1.GetOptions{}) +// deploymentExists checks if a deployment exists in the local informer cache. +func (c *Controller) deploymentExists(namespace, name string) bool { + _, err := c.deploymentLister.Deployments(namespace).Get(name) if err != nil { if k8serrors.IsNotFound(err) { return false } - // On error, assume it exists to be safe - // (avoid false decommissions) - slog.Warn("Failed to check if deployment exists, assuming it does", + slog.Warn("Failed to check if deployment exists in cache, assuming it does", "namespace", namespace, "deployment", name, "error", err, diff --git a/internal/controller/controller_integration_test.go b/internal/controller/controller_integration_test.go index 3263c97..75e6265 100644 --- a/internal/controller/controller_integration_test.go +++ b/internal/controller/controller_integration_test.go @@ -119,7 +119,7 @@ func setup(t *testing.T, onlyNamespace string, excludeNamespaces string) (*kuber go func() { _ = ctrl.Run(ctx, 1) }() - if !cache.WaitForCacheSync(ctx.Done(), ctrl.podInformer.HasSynced) { + if !cache.WaitForCacheSync(ctx.Done(), ctrl.podInformer.HasSynced, ctrl.deploymentInformer.HasSynced) { t.Fatal("timed out waiting for informer cache to sync") } From 9c4f628a400aca6bfd870b23363e58df72c29681 Mon Sep 17 00:00:00 2001 From: Brian DeHamer Date: Mon, 23 Mar 2026 13:38:44 -0700 Subject: [PATCH 2/2] clarify ClusterRole permission reqs in README Signed-off-by: Brian DeHamer --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f9d8693..986dd6c 100644 --- a/README.md +++ b/README.md @@ -110,7 +110,7 @@ which includes: - **Namespace**: `deployment-tracker` - **ServiceAccount**: Identity for the controller pod -- **ClusterRole**: Minimal permissions (`get`, `list`, `watch` on pods; `get` on other supported objects) +- **ClusterRole**: Minimal permissions (`get`, `list`, `watch` on pods and deployments; `get` on other supported objects) - **ClusterRoleBinding**: Binds the ServiceAccount to the ClusterRole - **Deployment**: Runs the controller with security hardening