From 944188b18176fdf0dc105918a7b13eda09a83d5f Mon Sep 17 00:00:00 2001 From: Daniel Franz Date: Tue, 3 Mar 2026 23:50:59 +0900 Subject: [PATCH] Namespace and PVC probes Adds readiness probing via CEL for namespaces and PVCs, to prevent subsequent phases from installing until their readiness checks have passed. Also adds e2e coverage via direct CER creation. Increased e2e timeout to 15m for experimental target. Signed-off-by: Daniel Franz --- Makefile | 4 +- .../clusterextensionrevision_controller.go | 36 ++++- test/e2e/README.md | 1 + test/e2e/features/revision.feature | 144 ++++++++++++++++++ test/e2e/steps/hooks.go | 35 +++-- test/e2e/steps/steps.go | 39 +++-- test/e2e/steps/testdata/rbac-template.yaml | 2 + 7 files changed, 232 insertions(+), 29 deletions(-) create mode 100644 test/e2e/features/revision.feature diff --git a/Makefile b/Makefile index 15d9b1319..646facd6b 100644 --- a/Makefile +++ b/Makefile @@ -243,9 +243,10 @@ verify-crd-compatibility: $(CRD_DIFF) manifests .PHONY: test test: manifests generate fmt lint test-unit test-e2e test-regression #HELP Run all tests. +E2E_TIMEOUT ?= 10m .PHONY: e2e e2e: #EXHELP Run the e2e tests. - go test -count=1 -v ./test/e2e/features_test.go + go test -count=1 -v ./test/e2e/features_test.go -timeout=$(E2E_TIMEOUT) E2E_REGISTRY_NAME := docker-registry E2E_REGISTRY_NAMESPACE := operator-controller-e2e @@ -317,6 +318,7 @@ test-experimental-e2e: GO_BUILD_EXTRA_FLAGS := -cover test-experimental-e2e: COVERAGE_NAME := experimental-e2e test-experimental-e2e: export MANIFEST := $(EXPERIMENTAL_RELEASE_MANIFEST) test-experimental-e2e: PROMETHEUS_VALUES := helm/prom_experimental.yaml +test-experimental-e2e: E2E_TIMEOUT := 15m test-experimental-e2e: run-internal image-registry prometheus e2e e2e-coverage kind-clean #HELP Run experimental e2e test suite on local kind cluster .PHONY: prometheus diff --git a/internal/operator-controller/controllers/clusterextensionrevision_controller.go b/internal/operator-controller/controllers/clusterextensionrevision_controller.go index 221d583fa..aa757b7f9 100644 --- a/internal/operator-controller/controllers/clusterextensionrevision_controller.go +++ b/internal/operator-controller/controllers/clusterextensionrevision_controller.go @@ -8,6 +8,7 @@ import ( "errors" "fmt" "strings" + "sync" "time" appsv1 "k8s.io/api/apps/v1" @@ -462,10 +463,13 @@ func (c *ClusterExtensionRevisionReconciler) toBoxcutterRevision(ctx context.Con previousObjs[i] = rev } + if err = initializeProbes(); err != nil { + return nil, nil, err + } opts := []boxcutter.RevisionReconcileOption{ boxcutter.WithPreviousOwners(previousObjs), boxcutter.WithProbe(boxcutter.ProgressProbeType, probing.And{ - deploymentProbe, statefulSetProbe, crdProbe, issuerProbe, certProbe, + &namespaceActiveProbe, deploymentProbe, statefulSetProbe, crdProbe, issuerProbe, certProbe, &pvcBoundProbe, }), } @@ -511,6 +515,28 @@ func EffectiveCollisionProtection(cp ...ocv1.CollisionProtection) ocv1.Collision return ecp } +// initializeProbes is used to initialize CEL probes once, so we don't recreate them on every reconcile +var initializeProbes = sync.OnceValue(func() error { + nsCEL, err := probing.NewCELProbe(namespaceActiveCEL, `namespace phase must be "Active"`) + if err != nil { + return fmt.Errorf("initializing namespace CEL probe: %w", err) + } + pvcCEL, err := probing.NewCELProbe(pvcBoundCEL, `persistentvolumeclaim phase must be "Bound"`) + if err != nil { + return fmt.Errorf("initializing PVC CEL probe: %w", err) + } + namespaceActiveProbe = probing.GroupKindSelector{ + GroupKind: schema.GroupKind{Group: corev1.GroupName, Kind: "Namespace"}, + Prober: nsCEL, + } + pvcBoundProbe = probing.GroupKindSelector{ + GroupKind: schema.GroupKind{Group: corev1.GroupName, Kind: "PersistentVolumeClaim"}, + Prober: pvcCEL, + } + + return nil +}) + var ( deploymentProbe = &probing.GroupKindSelector{ GroupKind: schema.GroupKind{Group: appsv1.GroupName, Kind: "Deployment"}, @@ -542,6 +568,14 @@ var ( }, } + // namespaceActiveCEL is a CEL rule which asserts that the namespace is in "Active" phase + namespaceActiveCEL = `self.status.phase == "Active"` + namespaceActiveProbe probing.GroupKindSelector + + // pvcBoundCEL is a CEL rule which asserts that the PVC is in "Bound" phase + pvcBoundCEL = `self.status.phase == "Bound"` + pvcBoundProbe probing.GroupKindSelector + // deplStaefulSetProbe probes Deployment, StatefulSet objects. deplStatefulSetProbe = &probing.ObservedGenerationProbe{ Prober: probing.And{ diff --git a/test/e2e/README.md b/test/e2e/README.md index 5f9957c5a..f41b612fd 100644 --- a/test/e2e/README.md +++ b/test/e2e/README.md @@ -199,6 +199,7 @@ Leverage existing steps for common operations: Use these variables in YAML templates: - `${NAME}`: Scenario-specific ClusterExtension name (e.g., `ce-123`) +- `${CER_NAME}`: Scenario-specific ClusterExtensionRevision name (e.g., `cer-123`; for applying CERs directly) - `${TEST_NAMESPACE}`: Scenario-specific namespace (e.g., `ns-123`) - `${CATALOG_IMG}`: Catalog image reference (defaults to in-cluster registry, overridable via `CATALOG_IMG` env var) diff --git a/test/e2e/features/revision.feature b/test/e2e/features/revision.feature new file mode 100644 index 000000000..352659861 --- /dev/null +++ b/test/e2e/features/revision.feature @@ -0,0 +1,144 @@ +Feature: Install ClusterExtensionRevision + + As an OLM user I would like to install a cluster extension revision. + + Background: + Given OLM is available + And ServiceAccount "olm-sa" with needed permissions is available in ${TEST_NAMESPACE} + + @BoxcutterRuntime + Scenario: Probe failure for PersistentVolumeClaim halts phase progression + When ClusterExtensionRevision is applied + """ + apiVersion: olm.operatorframework.io/v1 + kind: ClusterExtensionRevision + metadata: + annotations: + olm.operatorframework.io/service-account-name: olm-sa + olm.operatorframework.io/service-account-namespace: ${TEST_NAMESPACE} + name: ${CER_NAME} + spec: + lifecycleState: Active + collisionProtection: Prevent + phases: + - name: pvc + objects: + - object: + apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: test-pvc + namespace: ${TEST_NAMESPACE} + spec: + accessModes: + - ReadWriteOnce + storageClassName: "" + volumeName: test-pv + resources: + requests: + storage: 1Mi + - name: configmap + objects: + - object: + apiVersion: v1 + kind: ConfigMap + metadata: + annotations: + shouldNotTemplate: | + The namespace is {{ $labels.namespace }}. The templated $labels.namespace is NOT expected to be processed by OLM's rendering engine for registry+v1 bundles. + name: test-configmap + namespace: ${TEST_NAMESPACE} + data: + name: test-configmap + version: v1.2.0 + revision: 1 + """ + + And resource "persistentvolumeclaim/test-pvc" is installed + And ClusterExtensionRevision "${CER_NAME}" reports Available as False with Reason ProbeFailure and Message includes: + """ + persistentvolumeclaim phase must be "Bound" + """ + And resource "configmap/test-configmap" is not installed + + @BoxcutterRuntime + Scenario: Phases progress when PersistentVolumeClaim becomes "Bound" + When ClusterExtensionRevision is applied + """ + apiVersion: olm.operatorframework.io/v1 + kind: ClusterExtensionRevision + metadata: + annotations: + olm.operatorframework.io/service-account-name: olm-sa + olm.operatorframework.io/service-account-namespace: ${TEST_NAMESPACE} + name: ${CER_NAME} + spec: + lifecycleState: Active + collisionProtection: Prevent + phases: + - name: pvc + objects: + - object: + apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: test-pvc + namespace: ${TEST_NAMESPACE} + spec: + accessModes: + - ReadWriteOnce + storageClassName: "" + volumeName: test-pv + resources: + requests: + storage: 1Mi + - object: + apiVersion: v1 + kind: PersistentVolume + metadata: + name: test-pv + spec: + accessModes: + - ReadWriteOnce + capacity: + storage: 1Mi + claimRef: + apiVersion: v1 + kind: PersistentVolumeClaim + name: test-pvc + namespace: ${TEST_NAMESPACE} + persistentVolumeReclaimPolicy: Delete + storageClassName: "" + volumeMode: Filesystem + local: + path: /tmp/persistent-volume + nodeAffinity: + required: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: NotIn + values: + - a-node-name-that-should-not-exist + - name: configmap + objects: + - object: + apiVersion: v1 + kind: ConfigMap + metadata: + annotations: + shouldNotTemplate: | + The namespace is {{ $labels.namespace }}. The templated $labels.namespace is NOT expected to be processed by OLM's rendering engine for registry+v1 bundles. + name: test-configmap + namespace: ${TEST_NAMESPACE} + data: + name: test-configmap + version: v1.2.0 + revision: 1 + """ + + And ClusterExtensionRevision "${CER_NAME}" reports Progressing as True with Reason Succeeded + And ClusterExtensionRevision "${CER_NAME}" reports Available as True with Reason ProbesSucceeded + And resource "persistentvolume/test-pv" is installed + And resource "persistentvolumeclaim/test-pvc" is installed + And resource "configmap/test-configmap" is installed diff --git a/test/e2e/steps/hooks.go b/test/e2e/steps/hooks.go index 929138046..a4f518e4f 100644 --- a/test/e2e/steps/hooks.go +++ b/test/e2e/steps/hooks.go @@ -27,12 +27,13 @@ type resource struct { } type scenarioContext struct { - id string - namespace string - clusterExtensionName string - removedResources []unstructured.Unstructured - backGroundCmds []*exec.Cmd - metricsResponse map[string]string + id string + namespace string + clusterExtensionName string + clusterExtensionRevisionName string + removedResources []unstructured.Unstructured + backGroundCmds []*exec.Cmd + metricsResponse map[string]string extensionObjects []client.Object } @@ -142,9 +143,10 @@ func CheckFeatureTags(ctx context.Context, sc *godog.Scenario) (context.Context, func CreateScenarioContext(ctx context.Context, sc *godog.Scenario) (context.Context, error) { scCtx := &scenarioContext{ - id: sc.Id, - namespace: fmt.Sprintf("ns-%s", sc.Id), - clusterExtensionName: fmt.Sprintf("ce-%s", sc.Id), + id: sc.Id, + namespace: fmt.Sprintf("ns-%s", sc.Id), + clusterExtensionName: fmt.Sprintf("ce-%s", sc.Id), + clusterExtensionRevisionName: fmt.Sprintf("cer-%s", sc.Id), } return context.WithValue(ctx, scenarioContextKey, scCtx), nil } @@ -176,13 +178,16 @@ func ScenarioCleanup(ctx context.Context, _ *godog.Scenario, err error) (context if sc.clusterExtensionName != "" { forDeletion = append(forDeletion, resource{name: sc.clusterExtensionName, kind: "clusterextension"}) } + if sc.clusterExtensionRevisionName != "" { + forDeletion = append(forDeletion, resource{name: sc.clusterExtensionRevisionName, kind: "clusterextensionrevision"}) + } forDeletion = append(forDeletion, resource{name: sc.namespace, kind: "namespace"}) - go func() { - for _, r := range forDeletion { - if _, err := k8sClient("delete", r.kind, r.name, "--ignore-not-found=true"); err != nil { - logger.Info("Error deleting resource", "name", r.name, "namespace", sc.namespace, "stderr", stderrOutput(err)) + for _, r := range forDeletion { + go func(res resource) { + if _, err := k8sClient("delete", res.kind, res.name, "--ignore-not-found=true"); err != nil { + logger.Info("Error deleting resource", "name", res.name, "namespace", sc.namespace, "stderr", stderrOutput(err)) } - } - }() + }(r) + } return ctx, nil } diff --git a/test/e2e/steps/steps.go b/test/e2e/steps/steps.go index abea3fcb2..1082fbd91 100644 --- a/test/e2e/steps/steps.go +++ b/test/e2e/steps/steps.go @@ -71,7 +71,9 @@ func RegisterSteps(sc *godog.ScenarioContext) { sc.Step(`^(?i)ClusterExtension reports ([[:alnum:]]+) as ([[:alnum:]]+) with Reason ([[:alnum:]]+)$`, ClusterExtensionReportsConditionWithoutMsg) sc.Step(`^(?i)ClusterExtension reports ([[:alnum:]]+) as ([[:alnum:]]+)$`, ClusterExtensionReportsConditionWithoutReason) sc.Step(`^(?i)ClusterExtensionRevision "([^"]+)" reports ([[:alnum:]]+) as ([[:alnum:]]+) with Reason ([[:alnum:]]+)$`, ClusterExtensionRevisionReportsConditionWithoutMsg) + sc.Step(`^(?i)ClusterExtensionRevision "([^"]+)" reports ([[:alnum:]]+) as ([[:alnum:]]+) with Reason ([[:alnum:]]+) and Message includes:$`, ClusterExtensionRevisionReportsConditionWithMsg) sc.Step(`^(?i)ClusterExtension reports ([[:alnum:]]+) transition between (\d+) and (\d+) minutes since its creation$`, ClusterExtensionReportsConditionTransitionTime) + sc.Step(`^(?i)ClusterExtensionRevision is applied(?:\s+.*)?$`, ResourceIsApplied) sc.Step(`^(?i)ClusterExtensionRevision "([^"]+)" is archived$`, ClusterExtensionRevisionIsArchived) sc.Step(`^(?i)ClusterExtensionRevision "([^"]+)" contains annotation "([^"]+)" with value$`, ClusterExtensionRevisionHasAnnotationWithValue) sc.Step(`^(?i)ClusterExtensionRevision "([^"]+)" has label "([^"]+)" with value "([^"]+)"$`, ClusterExtensionRevisionHasLabelWithValue) @@ -80,7 +82,7 @@ func RegisterSteps(sc *godog.ScenarioContext) { sc.Step(`^(?i)resource "([^"]+)" is installed$`, ResourceAvailable) sc.Step(`^(?i)resource "([^"]+)" is available$`, ResourceAvailable) sc.Step(`^(?i)resource "([^"]+)" is removed$`, ResourceRemoved) - sc.Step(`^(?i)resource "([^"]+)" is eventually not found$`, ResourceEventuallyNotFound) + sc.Step(`^(?i)resource "([^"]+)" is (?:eventually not found|not installed)$`, ResourceEventuallyNotFound) sc.Step(`^(?i)resource "([^"]+)" exists$`, ResourceAvailable) sc.Step(`^(?i)resource is applied$`, ResourceIsApplied) sc.Step(`^(?i)resource "deployment/test-operator" reports as (not ready|ready)$`, MarkTestOperatorNotReady) @@ -186,6 +188,7 @@ func substituteScenarioVars(content string, sc *scenarioContext) string { vars := map[string]string{ "TEST_NAMESPACE": sc.namespace, "NAME": sc.clusterExtensionName, + "CER_NAME": sc.clusterExtensionRevisionName, "CATALOG_IMG": "docker-registry.operator-controller-e2e.svc.cluster.local:5000/e2e/test-catalog:v1", } if v, found := os.LookupEnv("CATALOG_IMG"); found { @@ -235,8 +238,8 @@ func ClusterExtensionVersionUpdate(ctx context.Context, version string) error { return err } -// ResourceIsApplied applies the provided YAML resource to the cluster and in case of ClusterExtension it captures its name in the test context -// so that it can be referred to in later steps with ${NAME} +// ResourceIsApplied applies the provided YAML resource to the cluster and in case of ClusterExtension or ClusterExtensionRevision it captures +// its name in the test context so that it can be referred to in later steps with ${NAME} or ${CER_NAME}, respectively func ResourceIsApplied(ctx context.Context, yamlTemplate *godog.DocString) error { sc := scenarioCtx(ctx) yamlContent := substituteScenarioVars(yamlTemplate.Content, sc) @@ -246,10 +249,12 @@ func ResourceIsApplied(ctx context.Context, yamlTemplate *godog.DocString) error } out, err := k8scliWithInput(yamlContent, "apply", "-f", "-") if err != nil { - return fmt.Errorf("failed to apply resource %v %w", out, err) + return fmt.Errorf("failed to apply resource %v; err: %w; stderr: %s", out, err, stderrOutput(err)) } if res.GetKind() == "ClusterExtension" { sc.clusterExtensionName = res.GetName() + } else if res.GetKind() == "ClusterExtensionRevision" { + sc.clusterExtensionRevisionName = res.GetName() } return nil } @@ -378,6 +383,17 @@ type msgMatchFn func(string) bool func alwaysMatch(_ string) bool { return true } +func messageFragmentComparison(ctx context.Context, msgFragment *godog.DocString) msgMatchFn { + msgCmp := alwaysMatch + if msgFragment != nil { + expectedMsgFragment := substituteScenarioVars(strings.Join(strings.Fields(msgFragment.Content), " "), scenarioCtx(ctx)) + msgCmp = func(actualMsg string) bool { + return strings.Contains(actualMsg, expectedMsgFragment) + } + } + return msgCmp +} + func waitForCondition(ctx context.Context, resourceType, resourceName, conditionType, conditionStatus string, conditionReason *string, msgCmp msgMatchFn) error { require.Eventually(godog.T(ctx), func() bool { v, err := k8sClient("get", resourceType, resourceName, "-o", fmt.Sprintf("jsonpath={.status.conditions[?(@.type==\"%s\")]}", conditionType)) @@ -425,14 +441,7 @@ func ClusterExtensionReportsCondition(ctx context.Context, conditionType, condit // ClusterExtensionReportsConditionWithMessageFragment waits for the ClusterExtension to have a condition matching // type, status, and reason, with a message containing the specified fragment. Polls with timeout. func ClusterExtensionReportsConditionWithMessageFragment(ctx context.Context, conditionType, conditionStatus, conditionReason string, msgFragment *godog.DocString) error { - msgCmp := alwaysMatch - if msgFragment != nil { - expectedMsgFragment := substituteScenarioVars(strings.Join(strings.Fields(msgFragment.Content), " "), scenarioCtx(ctx)) - msgCmp = func(actualMsg string) bool { - return strings.Contains(actualMsg, expectedMsgFragment) - } - } - return waitForExtensionCondition(ctx, conditionType, conditionStatus, &conditionReason, msgCmp) + return waitForExtensionCondition(ctx, conditionType, conditionStatus, &conditionReason, messageFragmentComparison(ctx, msgFragment)) } // ClusterExtensionReportsConditionWithoutMsg waits for the ClusterExtension to have a condition matching type, @@ -512,6 +521,12 @@ func ClusterExtensionRevisionReportsConditionWithoutMsg(ctx context.Context, rev return waitForCondition(ctx, "clusterextensionrevision", substituteScenarioVars(revisionName, scenarioCtx(ctx)), conditionType, conditionStatus, &conditionReason, nil) } +// ClusterExtensionRevisionReportsConditionWithMsg waits for the named ClusterExtensionRevision to have a condition +// matching type, status, reason, and message. Polls with timeout. +func ClusterExtensionRevisionReportsConditionWithMsg(ctx context.Context, revisionName, conditionType, conditionStatus, conditionReason string, msgFragment *godog.DocString) error { + return waitForCondition(ctx, "clusterextensionrevision", substituteScenarioVars(revisionName, scenarioCtx(ctx)), conditionType, conditionStatus, &conditionReason, messageFragmentComparison(ctx, msgFragment)) +} + // ClusterExtensionRevisionIsArchived waits for the named ClusterExtensionRevision to have Progressing=False // with reason Archived. Polls with timeout. func ClusterExtensionRevisionIsArchived(ctx context.Context, revisionName string) error { diff --git a/test/e2e/steps/testdata/rbac-template.yaml b/test/e2e/steps/testdata/rbac-template.yaml index 90138b9c6..2342f7b82 100644 --- a/test/e2e/steps/testdata/rbac-template.yaml +++ b/test/e2e/steps/testdata/rbac-template.yaml @@ -28,6 +28,8 @@ rules: - serviceaccounts - events - namespaces + - persistentvolumes + - persistentvolumeclaims verbs: [update, create, list, watch, get, delete, patch] - apiGroups: ["apps"] resources: