diff --git a/cmd/catalogd/main.go b/cmd/catalogd/main.go index af2463e2c6..c60de16077 100644 --- a/cmd/catalogd/main.go +++ b/cmd/catalogd/main.go @@ -425,7 +425,7 @@ func run(ctx context.Context) error { } gc := &garbagecollection.GarbageCollector{ - CachePath: unpackCacheBasePath, + CachePaths: []string{unpackCacheBasePath, storeDir}, Logger: ctrl.Log.WithName("garbage-collector"), MetadataClient: metaClient, Interval: cfg.gcInterval, diff --git a/internal/catalogd/garbagecollection/garbage_collector.go b/internal/catalogd/garbagecollection/garbage_collector.go index f13abdad21..8c97c98545 100644 --- a/internal/catalogd/garbagecollection/garbage_collector.go +++ b/internal/catalogd/garbagecollection/garbage_collector.go @@ -25,7 +25,11 @@ var _ manager.Runnable = (*GarbageCollector)(nil) // that no longer exist. This should only clean up cache entries that // were missed by the handling of a DELETE event on a Catalog resource. type GarbageCollector struct { - CachePath string + // CachePaths is the list of directories to garbage-collect. Each directory + // is expected to contain only subdirectories named after existing ClusterCatalogs; + // any entry whose name does not match a known ClusterCatalog — including orphaned + // temporary directories left by interrupted operations — is removed. + CachePaths []string Logger logr.Logger MetadataClient metadata.Interface Interval time.Duration @@ -37,13 +41,7 @@ type GarbageCollector struct { // supplied garbage collection interval. func (gc *GarbageCollector) Start(ctx context.Context) error { // Run once on startup - removed, err := runGarbageCollection(ctx, gc.CachePath, gc.MetadataClient) - if err != nil { - gc.Logger.Error(err, "running garbage collection") - } - if len(removed) > 0 { - gc.Logger.Info("removed stale cache entries", "removed entries", removed) - } + gc.runAndLog(ctx) // Loop until context is canceled, running garbage collection // at the configured interval @@ -52,13 +50,19 @@ func (gc *GarbageCollector) Start(ctx context.Context) error { case <-ctx.Done(): return ctx.Err() case <-time.After(gc.Interval): - removed, err := runGarbageCollection(ctx, gc.CachePath, gc.MetadataClient) - if err != nil { - gc.Logger.Error(err, "running garbage collection") - } - if len(removed) > 0 { - gc.Logger.Info("removed stale cache entries", "removed entries", removed) - } + gc.runAndLog(ctx) + } + } +} + +func (gc *GarbageCollector) runAndLog(ctx context.Context) { + for _, path := range gc.CachePaths { + removed, err := runGarbageCollection(ctx, path, gc.MetadataClient) + if err != nil { + gc.Logger.Error(err, "running garbage collection", "path", path) + } + if len(removed) > 0 { + gc.Logger.Info("removed stale cache entries", "path", path, "removed entries", removed) } } } diff --git a/internal/catalogd/garbagecollection/garbage_collector_test.go b/internal/catalogd/garbagecollection/garbage_collector_test.go index 5c5fbc6cdf..1ac81bbf42 100644 --- a/internal/catalogd/garbagecollection/garbage_collector_test.go +++ b/internal/catalogd/garbagecollection/garbage_collector_test.go @@ -6,6 +6,7 @@ import ( "path/filepath" "testing" + "github.com/go-logr/logr" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -15,6 +16,50 @@ import ( ocv1 "github.com/operator-framework/operator-controller/api/v1" ) +// TestGarbageCollectorStoragePath verifies that the GarbageCollector cleans up +// orphaned temporary directories in the StoragePath (e.g. /var/cache/catalogs). +// These dirs are created by LocalDirV1.Store during catalog unpacking and normally +// removed by a deferred RemoveAll, but can persist if the process is killed. +func TestGarbageCollectorStoragePath(t *testing.T) { + ctx := context.Background() + scheme := runtime.NewScheme() + require.NoError(t, metav1.AddMetaToScheme(scheme)) + + storagePath := t.TempDir() + + // Known catalog — its directory must be preserved. + existingCatalog := &metav1.PartialObjectMetadata{ + TypeMeta: metav1.TypeMeta{Kind: "ClusterCatalog", APIVersion: ocv1.GroupVersion.String()}, + ObjectMeta: metav1.ObjectMeta{Name: "openshift-redhat-operators"}, + } + require.NoError(t, os.MkdirAll(filepath.Join(storagePath, existingCatalog.Name), 0700)) + + // Orphaned temp dirs left by a previously interrupted Store — must be removed. + for _, orphan := range []string{ + ".openshift-redhat-operators-4015104162", + ".openshift-redhat-operators-3615668944", + } { + require.NoError(t, os.MkdirAll(filepath.Join(storagePath, orphan), 0700)) + } + + metaClient := fake.NewSimpleMetadataClient(scheme, existingCatalog) + + gc := &GarbageCollector{ + CachePaths: []string{t.TempDir(), storagePath}, + Logger: logr.Discard(), + MetadataClient: metaClient, + Interval: 0, + } + gc.runAndLog(ctx) + + entries, err := os.ReadDir(storagePath) + require.NoError(t, err) + + // Only the real catalog dir should remain. + require.Len(t, entries, 1) + assert.Equal(t, existingCatalog.Name, entries[0].Name()) +} + func TestRunGarbageCollection(t *testing.T) { for _, tt := range []struct { name string diff --git a/internal/catalogd/storage/localdir.go b/internal/catalogd/storage/localdir.go index bbe708ca26..572ffbf363 100644 --- a/internal/catalogd/storage/localdir.go +++ b/internal/catalogd/storage/localdir.go @@ -11,6 +11,7 @@ import ( "net/url" "os" "path/filepath" + "strings" "sync" "golang.org/x/sync/errgroup" @@ -53,6 +54,13 @@ func (s *LocalDirV1) Store(ctx context.Context, catalog string, fsys fs.FS) erro if err := os.MkdirAll(s.RootDir, 0700); err != nil { return err } + + // Remove any orphaned temporary directories left by previously interrupted Store + // operations (e.g. after a process crash where deferred cleanup did not run). + if err := s.removeOrphanedTempDirs(catalog); err != nil { + return fmt.Errorf("error removing orphaned temp directories: %w", err) + } + tmpCatalogDir, err := os.MkdirTemp(s.RootDir, fmt.Sprintf(".%s-*", catalog)) if err != nil { return err @@ -107,6 +115,30 @@ func (s *LocalDirV1) Store(ctx context.Context, catalog string, fsys fs.FS) erro ) } +// removeOrphanedTempDirs removes temporary staging directories that were created by a +// previous Store call for the given catalog but were not cleaned up because the process +// was interrupted (e.g. killed by the OOM killer) before the deferred RemoveAll could run. +// Temp dirs use the prefix ".{catalog}-" as created by os.MkdirTemp. +// This method must be called while the write lock is held. +func (s *LocalDirV1) removeOrphanedTempDirs(catalog string) error { + entries, err := os.ReadDir(s.RootDir) + if os.IsNotExist(err) { + return nil + } + if err != nil { + return fmt.Errorf("error reading storage directory: %w", err) + } + prefix := fmt.Sprintf(".%s-", catalog) + for _, entry := range entries { + if strings.HasPrefix(entry.Name(), prefix) { + if err := os.RemoveAll(filepath.Join(s.RootDir, entry.Name())); err != nil { + return fmt.Errorf("error removing orphaned temp directory %q: %w", entry.Name(), err) + } + } + } + return nil +} + func (s *LocalDirV1) Delete(catalog string) error { s.m.Lock() defer s.m.Unlock() diff --git a/internal/catalogd/storage/localdir_test.go b/internal/catalogd/storage/localdir_test.go index 72aafba1c7..44387cb845 100644 --- a/internal/catalogd/storage/localdir_test.go +++ b/internal/catalogd/storage/localdir_test.go @@ -11,6 +11,7 @@ import ( "net/http/httptest" "net/url" "os" + "path/filepath" "strings" "sync" "testing" @@ -138,6 +139,71 @@ func TestLocalDirStoraget(t *testing.T) { } }, }, + { + name: "orphaned temp dirs from a previous interrupted store are cleaned up", + setup: func(t *testing.T) (*LocalDirV1, fs.FS) { + rootDir := t.TempDir() + s := &LocalDirV1{RootDir: rootDir} + + // Simulate temp dirs left behind by a previous crashed Store run. + for _, orphan := range []string{ + ".test-catalog-1234567890", + ".test-catalog-9876543210", + } { + if err := os.MkdirAll(filepath.Join(rootDir, orphan), 0700); err != nil { + t.Fatal(err) + } + } + // A dir for a different catalog must not be removed. + if err := os.MkdirAll(filepath.Join(rootDir, ".other-catalog-1111111111"), 0700); err != nil { + t.Fatal(err) + } + return s, createTestFS(t) + }, + test: func(t *testing.T, s *LocalDirV1, fsys fs.FS) { + const catalog = "test-catalog" + + if err := s.Store(context.Background(), catalog, fsys); err != nil { + t.Fatalf("Store failed: %v", err) + } + + entries, err := os.ReadDir(s.RootDir) + if err != nil { + t.Fatal(err) + } + + names := make([]string, 0, len(entries)) + for _, e := range entries { + names = append(names, e.Name()) + } + + // Orphaned dirs for "test-catalog" must be gone. + for _, orphan := range []string{".test-catalog-1234567890", ".test-catalog-9876543210"} { + for _, name := range names { + if name == orphan { + t.Errorf("expected orphaned temp dir %q to be removed, but it still exists", orphan) + } + } + } + + // The catalog dir itself must exist. + if !s.ContentExists(catalog) { + t.Error("catalog content should exist after store") + } + + // The unrelated catalog temp dir must still be present. + found := false + for _, name := range names { + if name == ".other-catalog-1111111111" { + found = true + break + } + } + if !found { + t.Error("temp dir for a different catalog should not have been removed") + } + }, + }, { name: "store with invalid permissions", setup: func(t *testing.T) (*LocalDirV1, fs.FS) {