Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -243,9 +243,10 @@ verify-crd-compatibility: $(CRD_DIFF) manifests
.PHONY: test
test: manifests generate fmt lint test-unit test-e2e test-regression #HELP Run all tests.

E2E_TIMEOUT ?= 10m
.PHONY: e2e
e2e: #EXHELP Run the e2e tests.
go test -count=1 -v ./test/e2e/features_test.go
go test -count=1 -v ./test/e2e/features_test.go -timeout=$(E2E_TIMEOUT)

E2E_REGISTRY_NAME := docker-registry
E2E_REGISTRY_NAMESPACE := operator-controller-e2e
Expand Down Expand Up @@ -317,6 +318,7 @@ test-experimental-e2e: GO_BUILD_EXTRA_FLAGS := -cover
test-experimental-e2e: COVERAGE_NAME := experimental-e2e
test-experimental-e2e: export MANIFEST := $(EXPERIMENTAL_RELEASE_MANIFEST)
test-experimental-e2e: PROMETHEUS_VALUES := helm/prom_experimental.yaml
test-experimental-e2e: E2E_TIMEOUT := 15m
test-experimental-e2e: run-internal image-registry prometheus e2e e2e-coverage kind-clean #HELP Run experimental e2e test suite on local kind cluster

.PHONY: prometheus
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,10 @@ type Sourcerer interface {
}

func (c *ClusterExtensionRevisionReconciler) SetupWithManager(mgr ctrl.Manager) error {
// Initialize probes once at setup time
if err := initializeProbes(); err != nil {
return err
}
Comment on lines 323 to +327
Copy link

Copilot AI Mar 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

initializeProbes() is only invoked from SetupWithManager, but unit tests (and any direct calls to Reconcile without calling SetupWithManager) construct the reconciler and call Reconcile directly. In that case namespaceActiveProbe/pvcBoundProbe remain zero values, so the new readiness probes are effectively disabled (and could diverge from production behavior). Consider initializing these probes via a sync.Once guard that’s called from toBoxcutterRevision/Reconcile (and optionally from SetupWithManager), so probes are always available when reconciliation runs.

Copilot uses AI. Check for mistakes.
skipProgressDeadlineExceededPredicate := predicate.Funcs{
UpdateFunc: func(e event.UpdateEvent) bool {
rev, ok := e.ObjectNew.(*ocv1.ClusterExtensionRevision)
Expand Down Expand Up @@ -465,7 +469,7 @@ func (c *ClusterExtensionRevisionReconciler) toBoxcutterRevision(ctx context.Con
opts := []boxcutter.RevisionReconcileOption{
boxcutter.WithPreviousOwners(previousObjs),
boxcutter.WithProbe(boxcutter.ProgressProbeType, probing.And{
deploymentProbe, statefulSetProbe, crdProbe, issuerProbe, certProbe,
&namespaceActiveProbe, deploymentProbe, statefulSetProbe, crdProbe, issuerProbe, certProbe, &pvcBoundProbe,
}),
}

Expand Down Expand Up @@ -511,6 +515,28 @@ func EffectiveCollisionProtection(cp ...ocv1.CollisionProtection) ocv1.Collision
return ecp
}

// initializeProbes is used to initialize CEL probes at startup time, so we don't recreate them on every reconcile
func initializeProbes() error {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would actually wrap this function into sync.OnceValue and then call it from reconcile loop - it looks cleaner from workflow point of view.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would wrap this into sync.OnceValue and then call it from reconcile loop, it feels more natural and matches the flow.

nsCEL, err := probing.NewCELProbe(namespaceActiveCEL, `namespace phase must be "Active"`)
if err != nil {
return fmt.Errorf("constructing namespace CEL probe: %w", err)
}
pvcCEL, err := probing.NewCELProbe(pvcBoundCEL, `persistentvolumeclaim phase must be "Bound"`)
if err != nil {
return fmt.Errorf("constructing PVC CEL probe: %w", err)
}
namespaceActiveProbe = probing.GroupKindSelector{
GroupKind: schema.GroupKind{Group: corev1.GroupName, Kind: "Namespace"},
Prober: nsCEL,
}
pvcBoundProbe = probing.GroupKindSelector{
GroupKind: schema.GroupKind{Group: corev1.GroupName, Kind: "PersistentVolumeClaim"},
Prober: pvcCEL,
}

return nil
}

var (
deploymentProbe = &probing.GroupKindSelector{
GroupKind: schema.GroupKind{Group: appsv1.GroupName, Kind: "Deployment"},
Expand Down Expand Up @@ -542,6 +568,14 @@ var (
},
}

// namespaceActiveCEL is a CEL rule which asserts that the namespace is in "Active" phase
namespaceActiveCEL = `self.status.phase == "Active"`
namespaceActiveProbe probing.GroupKindSelector

// pvcBoundCEL is a CEL rule which asserts that the PVC is in "Bound" phase
pvcBoundCEL = `self.status.phase == "Bound"`
pvcBoundProbe probing.GroupKindSelector

// deplStaefulSetProbe probes Deployment, StatefulSet objects.
deplStatefulSetProbe = &probing.ObservedGenerationProbe{
Prober: probing.And{
Expand Down
1 change: 1 addition & 0 deletions test/e2e/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ Leverage existing steps for common operations:
Use these variables in YAML templates:

- `${NAME}`: Scenario-specific ClusterExtension name (e.g., `ce-123`)
- `${CER_NAME}`: Scenario-specific ClusterExtensionRevision name (e.g., `cer-123`; for applying CERs directly)
- `${TEST_NAMESPACE}`: Scenario-specific namespace (e.g., `ns-123`)
- `${CATALOG_IMG}`: Catalog image reference (defaults to in-cluster registry, overridable via `CATALOG_IMG` env var)

Expand Down
204 changes: 204 additions & 0 deletions test/e2e/features/revision.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
Feature: Install ClusterExtensionRevision

As an OLM user I would like to install a cluster extension revision.

Background:
Given OLM is available
And ServiceAccount "olm-sa" with needed permissions is available in ${TEST_NAMESPACE}

@BoxcutterRuntime
Scenario: Install simple revision
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how is this scenario related to the logic introduced in PR? We have plenty of scenarios under install.feature that install exactly these resources.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO it's an easy way to prove that CERs can be used to install content without a ClusterExtension owner. I thought this was necessary since we had discussed the possibility that users might (but probably won't) use the API directly - plus its runtime is only about 1.5 seconds. I can remove it if you think it's unnecessary.

When ClusterExtensionRevision is applied
"""
apiVersion: olm.operatorframework.io/v1
kind: ClusterExtensionRevision
metadata:
annotations:
olm.operatorframework.io/service-account-name: olm-sa
olm.operatorframework.io/service-account-namespace: ${TEST_NAMESPACE}
name: ${CER_NAME}
spec:
lifecycleState: Active
collisionProtection: Prevent
phases:
- name: policies
objects:
- object:
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: test-operator-network-policy
namespace: ${TEST_NAMESPACE}
spec:
podSelector: {}
policyTypes:
- Ingress
- name: deploy
objects:
- object:
apiVersion: v1
data:
httpd.sh: |
#!/bin/sh
echo "Version 1.2.0"
echo true > /var/www/started
echo true > /var/www/ready
echo true > /var/www/live
exec httpd -f -h /var/www -p 80
kind: ConfigMap
metadata:
name: httpd-script
namespace: ${TEST_NAMESPACE}
- object:
apiVersion: v1
data:
name: test-configmap
version: v1.2.0
kind: ConfigMap
metadata:
annotations:
shouldNotTemplate: |
The namespace is {{ $labels.namespace }}. The templated $labels.namespace is NOT expected to be processed by OLM's rendering engine for registry+v1 bundles.
name: test-configmap
namespace: ${TEST_NAMESPACE}
revision: 1
"""

And ClusterExtensionRevision "${CER_NAME}" reports Progressing as True with Reason Succeeded
And ClusterExtensionRevision "${CER_NAME}" reports Available as True with Reason ProbesSucceeded
And resource "networkpolicy/test-operator-network-policy" is installed
And resource "configmap/test-configmap" is installed

@BoxcutterRuntime
Scenario: Probe failure for PersistentVolumeClaim halts phase progression
When ClusterExtensionRevision is applied
"""
apiVersion: olm.operatorframework.io/v1
kind: ClusterExtensionRevision
metadata:
annotations:
olm.operatorframework.io/service-account-name: olm-sa
olm.operatorframework.io/service-account-namespace: ${TEST_NAMESPACE}
name: ${CER_NAME}
spec:
lifecycleState: Active
collisionProtection: Prevent
phases:
- name: pvc
objects:
- object:
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: test-pvc
namespace: ${TEST_NAMESPACE}
spec:
accessModes:
- ReadWriteOnce
storageClassName: ""
volumeName: test-pv
resources:
requests:
storage: 1Mi
- name: configmap
objects:
- object:
apiVersion: v1
kind: ConfigMap
metadata:
annotations:
shouldNotTemplate: |
The namespace is {{ $labels.namespace }}. The templated $labels.namespace is NOT expected to be processed by OLM's rendering engine for registry+v1 bundles.
name: test-configmap
namespace: ${TEST_NAMESPACE}
data:
name: test-configmap
version: v1.2.0
revision: 1
"""

And resource "persistentvolumeclaim/test-pvc" is installed
And ClusterExtensionRevision "${CER_NAME}" reports Available as False with Reason ProbeFailure
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there any message we could assert?

And resource "configmap/test-configmap" is not installed

@BoxcutterRuntime
Scenario: Phases progress when PersistentVolumeClaim becomes "Bound"
When ClusterExtensionRevision is applied
"""
apiVersion: olm.operatorframework.io/v1
kind: ClusterExtensionRevision
metadata:
annotations:
olm.operatorframework.io/service-account-name: olm-sa
olm.operatorframework.io/service-account-namespace: ${TEST_NAMESPACE}
name: ${CER_NAME}
spec:
lifecycleState: Active
collisionProtection: Prevent
phases:
- name: pvc
objects:
- object:
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: test-pvc
namespace: ${TEST_NAMESPACE}
spec:
accessModes:
- ReadWriteOnce
storageClassName: ""
volumeName: test-pv
resources:
requests:
storage: 1Mi
- object:
apiVersion: v1
kind: PersistentVolume
metadata:
name: test-pv
spec:
accessModes:
- ReadWriteOnce
capacity:
storage: 1Mi
claimRef:
apiVersion: v1
kind: PersistentVolumeClaim
name: test-pvc
namespace: ${TEST_NAMESPACE}
persistentVolumeReclaimPolicy: Delete
storageClassName: ""
volumeMode: Filesystem
local:
path: /tmp/persistent-volume
nodeAffinity:
required:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- a-node-name-that-should-not-exist
- name: configmap
objects:
- object:
apiVersion: v1
kind: ConfigMap
metadata:
annotations:
shouldNotTemplate: |
The namespace is {{ $labels.namespace }}. The templated $labels.namespace is NOT expected to be processed by OLM's rendering engine for registry+v1 bundles.
name: test-configmap
namespace: ${TEST_NAMESPACE}
data:
name: test-configmap
version: v1.2.0
revision: 1
"""

And ClusterExtensionRevision "${CER_NAME}" reports Progressing as True with Reason Succeeded
And ClusterExtensionRevision "${CER_NAME}" reports Available as True with Reason ProbesSucceeded
And resource "persistentvolume/test-pv" is installed
And resource "persistentvolumeclaim/test-pvc" is installed
And resource "configmap/test-configmap" is installed
35 changes: 20 additions & 15 deletions test/e2e/steps/hooks.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,13 @@ type resource struct {
}

type scenarioContext struct {
id string
namespace string
clusterExtensionName string
removedResources []unstructured.Unstructured
backGroundCmds []*exec.Cmd
metricsResponse map[string]string
id string
namespace string
clusterExtensionName string
clusterExtensionRevisionName string
removedResources []unstructured.Unstructured
backGroundCmds []*exec.Cmd
metricsResponse map[string]string

extensionObjects []client.Object
}
Expand Down Expand Up @@ -142,9 +143,10 @@ func CheckFeatureTags(ctx context.Context, sc *godog.Scenario) (context.Context,

func CreateScenarioContext(ctx context.Context, sc *godog.Scenario) (context.Context, error) {
scCtx := &scenarioContext{
id: sc.Id,
namespace: fmt.Sprintf("ns-%s", sc.Id),
clusterExtensionName: fmt.Sprintf("ce-%s", sc.Id),
id: sc.Id,
namespace: fmt.Sprintf("ns-%s", sc.Id),
clusterExtensionName: fmt.Sprintf("ce-%s", sc.Id),
clusterExtensionRevisionName: fmt.Sprintf("cer-%s", sc.Id),
}
return context.WithValue(ctx, scenarioContextKey, scCtx), nil
}
Expand Down Expand Up @@ -176,13 +178,16 @@ func ScenarioCleanup(ctx context.Context, _ *godog.Scenario, err error) (context
if sc.clusterExtensionName != "" {
forDeletion = append(forDeletion, resource{name: sc.clusterExtensionName, kind: "clusterextension"})
}
if sc.clusterExtensionRevisionName != "" {
forDeletion = append(forDeletion, resource{name: sc.clusterExtensionRevisionName, kind: "clusterextensionrevision"})
}
forDeletion = append(forDeletion, resource{name: sc.namespace, kind: "namespace"})
go func() {
for _, r := range forDeletion {
if _, err := k8sClient("delete", r.kind, r.name, "--ignore-not-found=true"); err != nil {
logger.Info("Error deleting resource", "name", r.name, "namespace", sc.namespace, "stderr", stderrOutput(err))
for _, r := range forDeletion {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how is this change related to the PR?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Before the change, in my test runs, the after hook would only delete a single item in forDeletion (the clusterextension) and would leave remaining resources (namespaces and owner-less clusterextensionrevisions) on the cluster. This fixed that and also made the deletion run more quickly by executing them asynchronously.

go func(res resource) {
if _, err := k8sClient("delete", res.kind, res.name, "--ignore-not-found=true"); err != nil {
logger.Info("Error deleting resource", "name", res.name, "namespace", sc.namespace, "stderr", stderrOutput(err))
}
}
}()
}(r)
}
return ctx, nil
}
Loading
Loading