Skip to content

Commit 1f0a99f

Browse files
committed
Namespace and PVC probes
Adds readiness probing via CEL for namespaces and PVCs, to prevent subsequent phases from installing until their readiness checks have passed. Also adds e2e coverage via direct CER creation. Increased e2e timeout to 15m for experimental target. Signed-off-by: Daniel Franz <dfranz@redhat.com>
1 parent 55473d8 commit 1f0a99f

File tree

7 files changed

+273
-21
lines changed

7 files changed

+273
-21
lines changed

Makefile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,9 +243,10 @@ verify-crd-compatibility: $(CRD_DIFF) manifests
243243
.PHONY: test
244244
test: manifests generate fmt lint test-unit test-e2e test-regression #HELP Run all tests.
245245

246+
E2E_TIMEOUT ?= 10m
246247
.PHONY: e2e
247248
e2e: #EXHELP Run the e2e tests.
248-
go test -count=1 -v ./test/e2e/features_test.go
249+
go test -count=1 -v ./test/e2e/features_test.go -timeout=$(E2E_TIMEOUT)
249250

250251
E2E_REGISTRY_NAME := docker-registry
251252
E2E_REGISTRY_NAMESPACE := operator-controller-e2e
@@ -317,6 +318,7 @@ test-experimental-e2e: GO_BUILD_EXTRA_FLAGS := -cover
317318
test-experimental-e2e: COVERAGE_NAME := experimental-e2e
318319
test-experimental-e2e: export MANIFEST := $(EXPERIMENTAL_RELEASE_MANIFEST)
319320
test-experimental-e2e: PROMETHEUS_VALUES := helm/prom_experimental.yaml
321+
test-experimental-e2e: E2E_TIMEOUT := 15m
320322
test-experimental-e2e: run-internal image-registry prometheus e2e e2e-coverage kind-clean #HELP Run experimental e2e test suite on local kind cluster
321323

322324
.PHONY: prometheus

internal/operator-controller/controllers/clusterextensionrevision_controller.go

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,10 @@ type Sourcerer interface {
321321
}
322322

323323
func (c *ClusterExtensionRevisionReconciler) SetupWithManager(mgr ctrl.Manager) error {
324+
// Initialize probes once at setup time
325+
if err := initializeProbes(); err != nil {
326+
return err
327+
}
324328
skipProgressDeadlineExceededPredicate := predicate.Funcs{
325329
UpdateFunc: func(e event.UpdateEvent) bool {
326330
rev, ok := e.ObjectNew.(*ocv1.ClusterExtensionRevision)
@@ -465,7 +469,7 @@ func (c *ClusterExtensionRevisionReconciler) toBoxcutterRevision(ctx context.Con
465469
opts := []boxcutter.RevisionReconcileOption{
466470
boxcutter.WithPreviousOwners(previousObjs),
467471
boxcutter.WithProbe(boxcutter.ProgressProbeType, probing.And{
468-
deploymentProbe, statefulSetProbe, crdProbe, issuerProbe, certProbe,
472+
&namespaceActiveProbe, deploymentProbe, statefulSetProbe, crdProbe, issuerProbe, certProbe, &pvcBoundProbe,
469473
}),
470474
}
471475

@@ -511,6 +515,28 @@ func EffectiveCollisionProtection(cp ...ocv1.CollisionProtection) ocv1.Collision
511515
return ecp
512516
}
513517

518+
// initializeProbes is used to initialize CEL probes at startup time, so we don't recreate them on every reconcile
519+
func initializeProbes() error {
520+
nsCEL, err := probing.NewCELProbe(namespaceActiveCEL, `namespace phase must be "Active"`)
521+
if err != nil {
522+
return fmt.Errorf("constructing namespace CEL probe: %w", err)
523+
}
524+
pvcCEL, err := probing.NewCELProbe(pvcBoundCEL, `persistentvolumeclaim phase must be "Bound"`)
525+
if err != nil {
526+
return fmt.Errorf("constructing PVC CEL probe: %w", err)
527+
}
528+
namespaceActiveProbe = probing.GroupKindSelector{
529+
GroupKind: schema.GroupKind{Group: corev1.GroupName, Kind: "Namespace"},
530+
Prober: nsCEL,
531+
}
532+
pvcBoundProbe = probing.GroupKindSelector{
533+
GroupKind: schema.GroupKind{Group: corev1.GroupName, Kind: "PersistentVolumeClaim"},
534+
Prober: pvcCEL,
535+
}
536+
537+
return nil
538+
}
539+
514540
var (
515541
deploymentProbe = &probing.GroupKindSelector{
516542
GroupKind: schema.GroupKind{Group: appsv1.GroupName, Kind: "Deployment"},
@@ -542,6 +568,14 @@ var (
542568
},
543569
}
544570

571+
// namespaceActiveCEL is a CEL rule which asserts that the namespace is in "Active" phase
572+
namespaceActiveCEL = `self.status.phase == "Active"`
573+
namespaceActiveProbe probing.GroupKindSelector
574+
575+
// pvcBoundCEL is a CEL rule which asserts that the PVC is in "Bound" phase
576+
pvcBoundCEL = `self.status.phase == "Bound"`
577+
pvcBoundProbe probing.GroupKindSelector
578+
545579
// deplStaefulSetProbe probes Deployment, StatefulSet objects.
546580
deplStatefulSetProbe = &probing.ObservedGenerationProbe{
547581
Prober: probing.And{

test/e2e/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ Leverage existing steps for common operations:
199199
Use these variables in YAML templates:
200200

201201
- `${NAME}`: Scenario-specific ClusterExtension name (e.g., `ce-123`)
202+
- `${CER_NAME}`: Scenario-specific ClusterExtensionRevision name (e.g., `cer-123`; for applying CERs directly)
202203
- `${TEST_NAMESPACE}`: Scenario-specific namespace (e.g., `ns-123`)
203204
- `${CATALOG_IMG}`: Catalog image reference (defaults to in-cluster registry, overridable via `CATALOG_IMG` env var)
204205

test/e2e/features/revision.feature

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
Feature: Install ClusterExtensionRevision
2+
3+
As an OLM user I would like to install a cluster extension revision.
4+
5+
Background:
6+
Given OLM is available
7+
And ServiceAccount "olm-sa" with needed permissions is available in ${TEST_NAMESPACE}
8+
9+
@BoxcutterRuntime
10+
Scenario: Install simple revision
11+
When ClusterExtensionRevision is applied
12+
"""
13+
apiVersion: olm.operatorframework.io/v1
14+
kind: ClusterExtensionRevision
15+
metadata:
16+
annotations:
17+
olm.operatorframework.io/service-account-name: olm-sa
18+
olm.operatorframework.io/service-account-namespace: ${TEST_NAMESPACE}
19+
name: ${CER_NAME}
20+
spec:
21+
lifecycleState: Active
22+
collisionProtection: Prevent
23+
phases:
24+
- name: policies
25+
objects:
26+
- object:
27+
apiVersion: networking.k8s.io/v1
28+
kind: NetworkPolicy
29+
metadata:
30+
name: test-operator-network-policy
31+
namespace: ${TEST_NAMESPACE}
32+
spec:
33+
podSelector: {}
34+
policyTypes:
35+
- Ingress
36+
- name: deploy
37+
objects:
38+
- object:
39+
apiVersion: v1
40+
data:
41+
httpd.sh: |
42+
#!/bin/sh
43+
echo "Version 1.2.0"
44+
echo true > /var/www/started
45+
echo true > /var/www/ready
46+
echo true > /var/www/live
47+
exec httpd -f -h /var/www -p 80
48+
kind: ConfigMap
49+
metadata:
50+
name: httpd-script
51+
namespace: ${TEST_NAMESPACE}
52+
- object:
53+
apiVersion: v1
54+
data:
55+
name: test-configmap
56+
version: v1.2.0
57+
kind: ConfigMap
58+
metadata:
59+
annotations:
60+
shouldNotTemplate: |
61+
The namespace is {{ $labels.namespace }}. The templated $labels.namespace is NOT expected to be processed by OLM's rendering engine for registry+v1 bundles.
62+
name: test-configmap
63+
namespace: ${TEST_NAMESPACE}
64+
revision: 1
65+
"""
66+
67+
And ClusterExtensionRevision "${CER_NAME}" reports Progressing as True with Reason Succeeded
68+
And ClusterExtensionRevision "${CER_NAME}" reports Available as True with Reason ProbesSucceeded
69+
And resource "networkpolicy/test-operator-network-policy" is installed
70+
And resource "configmap/test-configmap" is installed
71+
72+
@BoxcutterRuntime
73+
Scenario: Probe failure for PersistentVolumeClaim halts phase progression
74+
When ClusterExtensionRevision is applied
75+
"""
76+
apiVersion: olm.operatorframework.io/v1
77+
kind: ClusterExtensionRevision
78+
metadata:
79+
annotations:
80+
olm.operatorframework.io/service-account-name: olm-sa
81+
olm.operatorframework.io/service-account-namespace: ${TEST_NAMESPACE}
82+
name: ${CER_NAME}
83+
spec:
84+
lifecycleState: Active
85+
collisionProtection: Prevent
86+
phases:
87+
- name: pvc
88+
objects:
89+
- object:
90+
apiVersion: v1
91+
kind: PersistentVolumeClaim
92+
metadata:
93+
name: test-pvc
94+
namespace: ${TEST_NAMESPACE}
95+
spec:
96+
accessModes:
97+
- ReadWriteOnce
98+
storageClassName: ""
99+
volumeName: test-pv
100+
resources:
101+
requests:
102+
storage: 1Mi
103+
- name: configmap
104+
objects:
105+
- object:
106+
apiVersion: v1
107+
kind: ConfigMap
108+
metadata:
109+
annotations:
110+
shouldNotTemplate: |
111+
The namespace is {{ $labels.namespace }}. The templated $labels.namespace is NOT expected to be processed by OLM's rendering engine for registry+v1 bundles.
112+
name: test-configmap
113+
namespace: ${TEST_NAMESPACE}
114+
data:
115+
name: test-configmap
116+
version: v1.2.0
117+
revision: 1
118+
"""
119+
120+
And resource "persistentvolumeclaim/test-pvc" is installed
121+
And ClusterExtensionRevision "${CER_NAME}" reports Available as False with Reason ProbeFailure
122+
And resource "configmap/test-configmap" is not installed
123+
124+
@BoxcutterRuntime
125+
Scenario: Phases progress when PersistentVolumeClaim becomes "Bound"
126+
When ClusterExtensionRevision is applied
127+
"""
128+
apiVersion: olm.operatorframework.io/v1
129+
kind: ClusterExtensionRevision
130+
metadata:
131+
annotations:
132+
olm.operatorframework.io/service-account-name: olm-sa
133+
olm.operatorframework.io/service-account-namespace: ${TEST_NAMESPACE}
134+
name: ${CER_NAME}
135+
spec:
136+
lifecycleState: Active
137+
collisionProtection: Prevent
138+
phases:
139+
- name: pvc
140+
objects:
141+
- object:
142+
apiVersion: v1
143+
kind: PersistentVolumeClaim
144+
metadata:
145+
name: test-pvc
146+
namespace: ${TEST_NAMESPACE}
147+
spec:
148+
accessModes:
149+
- ReadWriteOnce
150+
storageClassName: ""
151+
volumeName: test-pv
152+
resources:
153+
requests:
154+
storage: 1Mi
155+
- object:
156+
apiVersion: v1
157+
kind: PersistentVolume
158+
metadata:
159+
name: test-pv
160+
spec:
161+
accessModes:
162+
- ReadWriteOnce
163+
capacity:
164+
storage: 1Mi
165+
claimRef:
166+
apiVersion: v1
167+
kind: PersistentVolumeClaim
168+
name: test-pvc
169+
namespace: ${TEST_NAMESPACE}
170+
persistentVolumeReclaimPolicy: Delete
171+
storageClassName: ""
172+
volumeMode: Filesystem
173+
local:
174+
path: /tmp/persistent-volume
175+
nodeAffinity:
176+
required:
177+
nodeSelectorTerms:
178+
- matchExpressions:
179+
- key: kubernetes.io/hostname
180+
operator: NotIn
181+
values:
182+
- a-node-name-that-should-not-exist
183+
- name: configmap
184+
objects:
185+
- object:
186+
apiVersion: v1
187+
kind: ConfigMap
188+
metadata:
189+
annotations:
190+
shouldNotTemplate: |
191+
The namespace is {{ $labels.namespace }}. The templated $labels.namespace is NOT expected to be processed by OLM's rendering engine for registry+v1 bundles.
192+
name: test-configmap
193+
namespace: ${TEST_NAMESPACE}
194+
data:
195+
name: test-configmap
196+
version: v1.2.0
197+
revision: 1
198+
"""
199+
200+
And ClusterExtensionRevision "${CER_NAME}" reports Progressing as True with Reason Succeeded
201+
And ClusterExtensionRevision "${CER_NAME}" reports Available as True with Reason ProbesSucceeded
202+
And resource "persistentvolume/test-pv" is installed
203+
And resource "persistentvolumeclaim/test-pvc" is installed
204+
And resource "configmap/test-configmap" is installed

test/e2e/steps/hooks.go

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,13 @@ type resource struct {
2727
}
2828

2929
type scenarioContext struct {
30-
id string
31-
namespace string
32-
clusterExtensionName string
33-
removedResources []unstructured.Unstructured
34-
backGroundCmds []*exec.Cmd
35-
metricsResponse map[string]string
30+
id string
31+
namespace string
32+
clusterExtensionName string
33+
clusterExtensionRevisionName string
34+
removedResources []unstructured.Unstructured
35+
backGroundCmds []*exec.Cmd
36+
metricsResponse map[string]string
3637

3738
extensionObjects []client.Object
3839
}
@@ -142,9 +143,10 @@ func CheckFeatureTags(ctx context.Context, sc *godog.Scenario) (context.Context,
142143

143144
func CreateScenarioContext(ctx context.Context, sc *godog.Scenario) (context.Context, error) {
144145
scCtx := &scenarioContext{
145-
id: sc.Id,
146-
namespace: fmt.Sprintf("ns-%s", sc.Id),
147-
clusterExtensionName: fmt.Sprintf("ce-%s", sc.Id),
146+
id: sc.Id,
147+
namespace: fmt.Sprintf("ns-%s", sc.Id),
148+
clusterExtensionName: fmt.Sprintf("ce-%s", sc.Id),
149+
clusterExtensionRevisionName: fmt.Sprintf("cer-%s", sc.Id),
148150
}
149151
return context.WithValue(ctx, scenarioContextKey, scCtx), nil
150152
}
@@ -176,13 +178,16 @@ func ScenarioCleanup(ctx context.Context, _ *godog.Scenario, err error) (context
176178
if sc.clusterExtensionName != "" {
177179
forDeletion = append(forDeletion, resource{name: sc.clusterExtensionName, kind: "clusterextension"})
178180
}
181+
if sc.clusterExtensionRevisionName != "" {
182+
forDeletion = append(forDeletion, resource{name: sc.clusterExtensionRevisionName, kind: "clusterextensionrevision"})
183+
}
179184
forDeletion = append(forDeletion, resource{name: sc.namespace, kind: "namespace"})
180-
go func() {
181-
for _, r := range forDeletion {
182-
if _, err := k8sClient("delete", r.kind, r.name, "--ignore-not-found=true"); err != nil {
183-
logger.Info("Error deleting resource", "name", r.name, "namespace", sc.namespace, "stderr", stderrOutput(err))
185+
for _, r := range forDeletion {
186+
go func(res resource) {
187+
if _, err := k8sClient("delete", res.kind, res.name, "--ignore-not-found=true"); err != nil {
188+
logger.Info("Error deleting resource", "name", res.name, "namespace", sc.namespace, "stderr", stderrOutput(err))
184189
}
185-
}
186-
}()
190+
}(r)
191+
}
187192
return ctx, nil
188193
}

0 commit comments

Comments
 (0)