From ad6f61e2eb46790c65b3baf4d545595b0d440932 Mon Sep 17 00:00:00 2001 From: Rob Nester Date: Wed, 1 Apr 2026 13:23:43 -0400 Subject: [PATCH] Remove filesystem image layer cache The image layer cache was not thread-safe and caused random redhat_manifests.redhat_manifests_missing violations when validating multiple images sharing layers (conforma/cli#1109). All Tekton tasks already disabled it via EC_CACHE=false, and benchmarking showed no measurable performance gain from caching. Remove the cache entirely rather than fixing it: - Remove imgCache/initCache and cache.Image wrapping from OCI client - Remove EC_CACHE env var from Tekton task definitions - Remove EC_CACHE=false from benchmark setup - Remove related test (TestCacheInit) and simplify TestImage - Remove stale caching TODO in Layer() Ref: EC-1706 Ref: EC-1669 Signed-off-by: Rob Nester Assisted-by: Cursor Made-with: Cursor --- benchmark/simple/simple.go | 3 - internal/utils/oci/client.go | 40 +--------- internal/utils/oci/client_test.go | 80 +++++++------------ .../0.1/verify-conforma-konflux-ta.yaml | 5 -- .../0.1/verify-conforma-konflux-vsa-ta.yaml | 8 -- .../0.1/verify-enterprise-contract.yaml | 5 -- 6 files changed, 28 insertions(+), 113 deletions(-) diff --git a/benchmark/simple/simple.go b/benchmark/simple/simple.go index d4c2ca201..a682155d6 100644 --- a/benchmark/simple/simple.go +++ b/benchmark/simple/simple.go @@ -97,9 +97,6 @@ func ec(dir string) func() { }`, dir) return func() { - - os.Setenv("EC_CACHE", "false") - if err := suite.Execute([]string{ "validate", "image", diff --git a/internal/utils/oci/client.go b/internal/utils/oci/client.go index 5e8a7b7d0..0fbbe377c 100644 --- a/internal/utils/oci/client.go +++ b/internal/utils/oci/client.go @@ -20,16 +20,11 @@ import ( "context" "fmt" "net/http" - "os" - "path" "runtime/trace" - "strconv" - "sync" "github.com/google/go-containerregistry/pkg/authn" "github.com/google/go-containerregistry/pkg/name" v1 "github.com/google/go-containerregistry/pkg/v1" - "github.com/google/go-containerregistry/pkg/v1/cache" "github.com/google/go-containerregistry/pkg/v1/remote" "github.com/sigstore/cosign/v3/pkg/cosign" "github.com/sigstore/cosign/v3/pkg/oci" @@ -43,28 +38,6 @@ type contextKey string const clientContextKey contextKey = "ec.oci.client" -var imgCache = sync.OnceValue(initCache) - -func initCache() cache.Cache { - // if a value was set and it is parsed as false, turn the cache off - if v, err := strconv.ParseBool(os.Getenv("EC_CACHE")); err == nil && !v { - return nil - } - - if userCache, err := os.UserCacheDir(); err != nil { - log.Debug("unable to find user cache directory") - return nil - } else { - imgCacheDir := path.Join(userCache, "ec", "images") - if err := os.MkdirAll(imgCacheDir, 0700); err != nil { - log.Debugf("unable to create temporary directory for image cache in %q: %v", imgCacheDir, err) - return nil - } - log.Debugf("using %q directory to store image cache", imgCacheDir) - return cache.NewFilesystemCache(imgCacheDir) - } -} - func CreateRemoteOptions(ctx context.Context) []remote.Option { backoff := remote.Backoff{ Duration: echttp.DefaultBackoff.Duration, @@ -217,16 +190,7 @@ func (c *defaultClient) Image(ref name.Reference) (v1.Image, error) { trace.Logf(c.ctx, "", "image=%q", ref) } - img, err := remote.Image(ref, c.opts...) - if err != nil { - return nil, err - } - - if c := imgCache(); c != nil { - img = cache.Image(img, c) - } - - return img, nil + return remote.Image(ref, c.opts...) } func (c *defaultClient) Layer(ref name.Digest) (v1.Layer, error) { @@ -236,8 +200,6 @@ func (c *defaultClient) Layer(ref name.Digest) (v1.Layer, error) { trace.Logf(c.ctx, "", "image=%q", ref) } - // TODO: Caching a layer directly is difficult and may not be possible, see: - // https://github.com/google/go-containerregistry/issues/1821 layer, err := remote.Layer(ref, c.opts...) if err != nil { return nil, fmt.Errorf("fetching layer: %w", err) diff --git a/internal/utils/oci/client_test.go b/internal/utils/oci/client_test.go index 4e954c736..068d251a3 100644 --- a/internal/utils/oci/client_test.go +++ b/internal/utils/oci/client_test.go @@ -19,7 +19,6 @@ package oci import ( - "bytes" "context" "fmt" "io" @@ -27,12 +26,12 @@ import ( "net/http" "net/http/httptest" "net/url" - "strings" "testing" "github.com/google/go-containerregistry/pkg/authn" "github.com/google/go-containerregistry/pkg/name" "github.com/google/go-containerregistry/pkg/registry" + v1 "github.com/google/go-containerregistry/pkg/v1" "github.com/google/go-containerregistry/pkg/v1/random" "github.com/google/go-containerregistry/pkg/v1/remote" "github.com/google/go-containerregistry/pkg/v1/types" @@ -101,25 +100,21 @@ func TestCreateRemoteOptions(t *testing.T) { } } -func TestCacheInit(t *testing.T) { - // by default the cache should be on - assert.NotNil(t, initCache()) - - t.Setenv("EC_CACHE", "false") - assert.Nil(t, initCache()) - - t.Cleanup(func() { - t.Setenv("EC_CACHE", "true") - assert.NotNil(t, initCache()) - }) +func readAndVerifyLayer(t *testing.T, l v1.Layer) { + t.Helper() + r, err := l.Uncompressed() + require.NoError(t, err) + defer r.Close() + _, err = io.ReadAll(r) + require.NoError(t, err) } func TestImage(t *testing.T) { - img, err := random.Image(4096, 2) + const numLayers = 2 + img, err := random.Image(4096, numLayers) require.NoError(t, err) - l := &bytes.Buffer{} - registry := httptest.NewServer(registry.New(registry.Logger(log.New(l, "", 0)))) + registry := httptest.NewServer(registry.New(registry.Logger(log.New(io.Discard, "", 0)))) t.Cleanup(registry.Close) u, err := url.Parse(registry.URL) @@ -130,35 +125,24 @@ func TestImage(t *testing.T) { require.NoError(t, remote.Push(ref, img)) - fetchFully := func() { - client := defaultClient{} - - img, err := client.Image(ref) - require.NoError(t, err) - layers, err := img.Layers() - require.NoError(t, err) - for _, l := range layers { - r, err := l.Uncompressed() - require.NoError(t, err) - _, err = io.ReadAll(r) - require.NoError(t, err) - } - } + client := defaultClient{ctx: context.Background()} + fetched, err := client.Image(ref) + require.NoError(t, err) - fetchFully() - fetchFully() - fetchFully() + layers, err := fetched.Layers() + require.NoError(t, err) + assert.Len(t, layers, numLayers) - blobDownloadCount := strings.Count(l.String(), "GET /v2/repository/image/blobs/sha256:") - assert.Equal(t, 5, blobDownloadCount) // three configs fetched each time and two layers fetched only once + for _, l := range layers { + readAndVerifyLayer(t, l) + } } func TestLayer(t *testing.T) { layer, err := random.Layer(1024, types.OCIUncompressedLayer) require.NoError(t, err) - l := &bytes.Buffer{} - registry := httptest.NewServer(registry.New(registry.Logger(log.New(l, "", 0)))) + registry := httptest.NewServer(registry.New(registry.Logger(log.New(io.Discard, "", 0)))) t.Cleanup(registry.Close) u, err := url.Parse(registry.URL) @@ -174,24 +158,14 @@ func TestLayer(t *testing.T) { require.NoError(t, err) layerURI := fmt.Sprintf("%s@%s", uri, digest) - fetchCount := 5 - for i := 0; i < fetchCount; i++ { - d, err := name.NewDigest(layerURI) - require.NoError(t, err) - - client := defaultClient{} - l, err := client.Layer(d) + d, err := name.NewDigest(layerURI) + require.NoError(t, err) - require.NoError(t, err) - r, err := l.Uncompressed() - require.NoError(t, err) - _, err = io.ReadAll(r) - require.NoError(t, err) - } + client := defaultClient{ctx: context.Background()} + fetched, err := client.Layer(d) + require.NoError(t, err) - msg := fmt.Sprintf("GET /v2/repository/image/blobs/%s", digest) - blobDownloadCount := strings.Count(l.String(), msg) - assert.Equal(t, fetchCount, blobDownloadCount) + readAndVerifyLayer(t, fetched) } func TestScopedAuth(t *testing.T) { diff --git a/tasks/verify-conforma-konflux-ta/0.1/verify-conforma-konflux-ta.yaml b/tasks/verify-conforma-konflux-ta/0.1/verify-conforma-konflux-ta.yaml index 6ea237378..506cc073c 100644 --- a/tasks/verify-conforma-konflux-ta/0.1/verify-conforma-konflux-ta.yaml +++ b/tasks/verify-conforma-konflux-ta/0.1/verify-conforma-konflux-ta.yaml @@ -505,11 +505,6 @@ spec: # value expected by Tekton Pipelines. NOTE: If params.SSL_CERT_DIR is empty, the value # will contain a trailing ":" - this is ok. value: "/tekton-custom-certs:/etc/ssl/certs:/etc/pki/tls/certs:/system/etc/security/cacerts:$(params.SSL_CERT_DIR)" - # The EC cache is used to avoid fetching the same image layers from the registry more than - # once. However, this is not thread safe. This results in inconsistencies when extracting - # files from an image, see https://github.com/conforma/cli/issues/1109 - - name: EC_CACHE - value: "false" computeResources: requests: cpu: 250m diff --git a/tasks/verify-conforma-konflux-vsa-ta/0.1/verify-conforma-konflux-vsa-ta.yaml b/tasks/verify-conforma-konflux-vsa-ta/0.1/verify-conforma-konflux-vsa-ta.yaml index 6f3e70c09..a4a45da82 100644 --- a/tasks/verify-conforma-konflux-vsa-ta/0.1/verify-conforma-konflux-vsa-ta.yaml +++ b/tasks/verify-conforma-konflux-vsa-ta/0.1/verify-conforma-konflux-vsa-ta.yaml @@ -290,8 +290,6 @@ spec: - name: SSL_CERT_DIR value: >- /tekton-custom-certs:/etc/ssl/certs:/etc/pki/tls/certs:/system/etc/security/cacerts:$(params.SSL_CERT_DIR) - - name: EC_CACHE - value: "false" computeResources: requests: cpu: 100m @@ -371,12 +369,6 @@ spec: # trailing ":" - this is ok. value: >- /tekton-custom-certs:/etc/ssl/certs:/etc/pki/tls/certs:/system/etc/security/cacerts:$(params.SSL_CERT_DIR) - # The EC cache is used to avoid fetching the same image layers - # from the registry more than once. However, this is not thread - # safe. This results in inconsistencies when extracting files from - # an image, see https://github.com/conforma/cli/issues/1109 - - name: EC_CACHE - value: "false" computeResources: requests: cpu: 100m diff --git a/tasks/verify-enterprise-contract/0.1/verify-enterprise-contract.yaml b/tasks/verify-enterprise-contract/0.1/verify-enterprise-contract.yaml index c4edae2f7..723ae159d 100644 --- a/tasks/verify-enterprise-contract/0.1/verify-enterprise-contract.yaml +++ b/tasks/verify-enterprise-contract/0.1/verify-enterprise-contract.yaml @@ -396,11 +396,6 @@ spec: # value expected by Tekton Pipelines. NOTE: If params.SSL_CERT_DIR is empty, the value # will contain a trailing ":" - this is ok. value: "/tekton-custom-certs:/etc/ssl/certs:/etc/pki/tls/certs:/system/etc/security/cacerts:$(params.SSL_CERT_DIR)" - # The EC cache is used to avoid fetching the same image layers from the registry more than - # once. However, this is not thread safe. This results in inconsistencies when extracting - # files from an image, see https://github.com/conforma/cli/issues/1109 - - name: EC_CACHE - value: "false" computeResources: requests: cpu: 1800m