-
Notifications
You must be signed in to change notification settings - Fork 71
✨Namespace and PVC probes #2535
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -321,6 +321,10 @@ type Sourcerer interface { | |
| } | ||
|
|
||
| func (c *ClusterExtensionRevisionReconciler) SetupWithManager(mgr ctrl.Manager) error { | ||
| // Initialize probes once at setup time | ||
| if err := initializeProbes(); err != nil { | ||
| return err | ||
| } | ||
| skipProgressDeadlineExceededPredicate := predicate.Funcs{ | ||
| UpdateFunc: func(e event.UpdateEvent) bool { | ||
| rev, ok := e.ObjectNew.(*ocv1.ClusterExtensionRevision) | ||
|
|
@@ -465,7 +469,7 @@ func (c *ClusterExtensionRevisionReconciler) toBoxcutterRevision(ctx context.Con | |
| opts := []boxcutter.RevisionReconcileOption{ | ||
| boxcutter.WithPreviousOwners(previousObjs), | ||
| boxcutter.WithProbe(boxcutter.ProgressProbeType, probing.And{ | ||
| deploymentProbe, statefulSetProbe, crdProbe, issuerProbe, certProbe, | ||
| &namespaceActiveProbe, deploymentProbe, statefulSetProbe, crdProbe, issuerProbe, certProbe, &pvcBoundProbe, | ||
| }), | ||
| } | ||
|
|
||
|
|
@@ -511,6 +515,28 @@ func EffectiveCollisionProtection(cp ...ocv1.CollisionProtection) ocv1.Collision | |
| return ecp | ||
| } | ||
|
|
||
| // initializeProbes is used to initialize CEL probes at startup time, so we don't recreate them on every reconcile | ||
| func initializeProbes() error { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would actually wrap this function into
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would wrap this into |
||
| nsCEL, err := probing.NewCELProbe(namespaceActiveCEL, `namespace phase must be "Active"`) | ||
| if err != nil { | ||
| return fmt.Errorf("constructing namespace CEL probe: %w", err) | ||
| } | ||
| pvcCEL, err := probing.NewCELProbe(pvcBoundCEL, `persistentvolumeclaim phase must be "Bound"`) | ||
| if err != nil { | ||
| return fmt.Errorf("constructing PVC CEL probe: %w", err) | ||
| } | ||
| namespaceActiveProbe = probing.GroupKindSelector{ | ||
| GroupKind: schema.GroupKind{Group: corev1.GroupName, Kind: "Namespace"}, | ||
| Prober: nsCEL, | ||
| } | ||
| pvcBoundProbe = probing.GroupKindSelector{ | ||
| GroupKind: schema.GroupKind{Group: corev1.GroupName, Kind: "PersistentVolumeClaim"}, | ||
| Prober: pvcCEL, | ||
| } | ||
|
|
||
| return nil | ||
| } | ||
|
|
||
| var ( | ||
| deploymentProbe = &probing.GroupKindSelector{ | ||
| GroupKind: schema.GroupKind{Group: appsv1.GroupName, Kind: "Deployment"}, | ||
|
|
@@ -542,6 +568,14 @@ var ( | |
| }, | ||
| } | ||
|
|
||
| // namespaceActiveCEL is a CEL rule which asserts that the namespace is in "Active" phase | ||
| namespaceActiveCEL = `self.status.phase == "Active"` | ||
| namespaceActiveProbe probing.GroupKindSelector | ||
|
|
||
| // pvcBoundCEL is a CEL rule which asserts that the PVC is in "Bound" phase | ||
| pvcBoundCEL = `self.status.phase == "Bound"` | ||
| pvcBoundProbe probing.GroupKindSelector | ||
|
|
||
| // deplStaefulSetProbe probes Deployment, StatefulSet objects. | ||
| deplStatefulSetProbe = &probing.ObservedGenerationProbe{ | ||
| Prober: probing.And{ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,204 @@ | ||
| Feature: Install ClusterExtensionRevision | ||
|
|
||
| As an OLM user I would like to install a cluster extension revision. | ||
|
|
||
| Background: | ||
| Given OLM is available | ||
| And ServiceAccount "olm-sa" with needed permissions is available in ${TEST_NAMESPACE} | ||
|
|
||
| @BoxcutterRuntime | ||
| Scenario: Install simple revision | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how is this scenario related to the logic introduced in PR? We have plenty of scenarios under
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IMO it's an easy way to prove that CERs can be used to install content without a ClusterExtension owner. I thought this was necessary since we had discussed the possibility that users might (but probably won't) use the API directly - plus its runtime is only about 1.5 seconds. I can remove it if you think it's unnecessary. |
||
| When ClusterExtensionRevision is applied | ||
| """ | ||
| apiVersion: olm.operatorframework.io/v1 | ||
| kind: ClusterExtensionRevision | ||
| metadata: | ||
| annotations: | ||
| olm.operatorframework.io/service-account-name: olm-sa | ||
| olm.operatorframework.io/service-account-namespace: ${TEST_NAMESPACE} | ||
| name: ${CER_NAME} | ||
| spec: | ||
| lifecycleState: Active | ||
| collisionProtection: Prevent | ||
| phases: | ||
| - name: policies | ||
| objects: | ||
| - object: | ||
| apiVersion: networking.k8s.io/v1 | ||
| kind: NetworkPolicy | ||
| metadata: | ||
| name: test-operator-network-policy | ||
| namespace: ${TEST_NAMESPACE} | ||
| spec: | ||
| podSelector: {} | ||
| policyTypes: | ||
| - Ingress | ||
| - name: deploy | ||
| objects: | ||
| - object: | ||
| apiVersion: v1 | ||
| data: | ||
| httpd.sh: | | ||
| #!/bin/sh | ||
| echo "Version 1.2.0" | ||
| echo true > /var/www/started | ||
| echo true > /var/www/ready | ||
| echo true > /var/www/live | ||
| exec httpd -f -h /var/www -p 80 | ||
| kind: ConfigMap | ||
| metadata: | ||
| name: httpd-script | ||
| namespace: ${TEST_NAMESPACE} | ||
| - object: | ||
| apiVersion: v1 | ||
| data: | ||
| name: test-configmap | ||
| version: v1.2.0 | ||
| kind: ConfigMap | ||
| metadata: | ||
| annotations: | ||
| shouldNotTemplate: | | ||
| The namespace is {{ $labels.namespace }}. The templated $labels.namespace is NOT expected to be processed by OLM's rendering engine for registry+v1 bundles. | ||
| name: test-configmap | ||
| namespace: ${TEST_NAMESPACE} | ||
| revision: 1 | ||
| """ | ||
|
|
||
| And ClusterExtensionRevision "${CER_NAME}" reports Progressing as True with Reason Succeeded | ||
| And ClusterExtensionRevision "${CER_NAME}" reports Available as True with Reason ProbesSucceeded | ||
| And resource "networkpolicy/test-operator-network-policy" is installed | ||
| And resource "configmap/test-configmap" is installed | ||
|
|
||
| @BoxcutterRuntime | ||
| Scenario: Probe failure for PersistentVolumeClaim halts phase progression | ||
| When ClusterExtensionRevision is applied | ||
| """ | ||
| apiVersion: olm.operatorframework.io/v1 | ||
| kind: ClusterExtensionRevision | ||
| metadata: | ||
| annotations: | ||
| olm.operatorframework.io/service-account-name: olm-sa | ||
| olm.operatorframework.io/service-account-namespace: ${TEST_NAMESPACE} | ||
| name: ${CER_NAME} | ||
| spec: | ||
| lifecycleState: Active | ||
| collisionProtection: Prevent | ||
| phases: | ||
| - name: pvc | ||
| objects: | ||
| - object: | ||
| apiVersion: v1 | ||
| kind: PersistentVolumeClaim | ||
| metadata: | ||
| name: test-pvc | ||
| namespace: ${TEST_NAMESPACE} | ||
| spec: | ||
| accessModes: | ||
| - ReadWriteOnce | ||
| storageClassName: "" | ||
| volumeName: test-pv | ||
| resources: | ||
| requests: | ||
| storage: 1Mi | ||
| - name: configmap | ||
| objects: | ||
| - object: | ||
| apiVersion: v1 | ||
| kind: ConfigMap | ||
| metadata: | ||
| annotations: | ||
| shouldNotTemplate: | | ||
| The namespace is {{ $labels.namespace }}. The templated $labels.namespace is NOT expected to be processed by OLM's rendering engine for registry+v1 bundles. | ||
| name: test-configmap | ||
| namespace: ${TEST_NAMESPACE} | ||
| data: | ||
| name: test-configmap | ||
| version: v1.2.0 | ||
| revision: 1 | ||
| """ | ||
|
|
||
| And resource "persistentvolumeclaim/test-pvc" is installed | ||
| And ClusterExtensionRevision "${CER_NAME}" reports Available as False with Reason ProbeFailure | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is there any message we could assert? |
||
| And resource "configmap/test-configmap" is not installed | ||
|
|
||
| @BoxcutterRuntime | ||
| Scenario: Phases progress when PersistentVolumeClaim becomes "Bound" | ||
| When ClusterExtensionRevision is applied | ||
| """ | ||
| apiVersion: olm.operatorframework.io/v1 | ||
| kind: ClusterExtensionRevision | ||
| metadata: | ||
| annotations: | ||
| olm.operatorframework.io/service-account-name: olm-sa | ||
| olm.operatorframework.io/service-account-namespace: ${TEST_NAMESPACE} | ||
| name: ${CER_NAME} | ||
| spec: | ||
| lifecycleState: Active | ||
| collisionProtection: Prevent | ||
| phases: | ||
| - name: pvc | ||
| objects: | ||
| - object: | ||
| apiVersion: v1 | ||
| kind: PersistentVolumeClaim | ||
| metadata: | ||
| name: test-pvc | ||
| namespace: ${TEST_NAMESPACE} | ||
| spec: | ||
| accessModes: | ||
| - ReadWriteOnce | ||
| storageClassName: "" | ||
| volumeName: test-pv | ||
| resources: | ||
| requests: | ||
| storage: 1Mi | ||
| - object: | ||
| apiVersion: v1 | ||
| kind: PersistentVolume | ||
| metadata: | ||
| name: test-pv | ||
| spec: | ||
| accessModes: | ||
| - ReadWriteOnce | ||
| capacity: | ||
| storage: 1Mi | ||
| claimRef: | ||
| apiVersion: v1 | ||
| kind: PersistentVolumeClaim | ||
| name: test-pvc | ||
| namespace: ${TEST_NAMESPACE} | ||
| persistentVolumeReclaimPolicy: Delete | ||
| storageClassName: "" | ||
| volumeMode: Filesystem | ||
| local: | ||
| path: /tmp/persistent-volume | ||
| nodeAffinity: | ||
| required: | ||
| nodeSelectorTerms: | ||
| - matchExpressions: | ||
| - key: kubernetes.io/hostname | ||
| operator: NotIn | ||
| values: | ||
| - a-node-name-that-should-not-exist | ||
| - name: configmap | ||
| objects: | ||
| - object: | ||
| apiVersion: v1 | ||
| kind: ConfigMap | ||
| metadata: | ||
| annotations: | ||
| shouldNotTemplate: | | ||
| The namespace is {{ $labels.namespace }}. The templated $labels.namespace is NOT expected to be processed by OLM's rendering engine for registry+v1 bundles. | ||
| name: test-configmap | ||
| namespace: ${TEST_NAMESPACE} | ||
| data: | ||
| name: test-configmap | ||
| version: v1.2.0 | ||
| revision: 1 | ||
| """ | ||
|
|
||
| And ClusterExtensionRevision "${CER_NAME}" reports Progressing as True with Reason Succeeded | ||
| And ClusterExtensionRevision "${CER_NAME}" reports Available as True with Reason ProbesSucceeded | ||
| And resource "persistentvolume/test-pv" is installed | ||
| And resource "persistentvolumeclaim/test-pvc" is installed | ||
| And resource "configmap/test-configmap" is installed | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,12 +27,13 @@ type resource struct { | |
| } | ||
|
|
||
| type scenarioContext struct { | ||
| id string | ||
| namespace string | ||
| clusterExtensionName string | ||
| removedResources []unstructured.Unstructured | ||
| backGroundCmds []*exec.Cmd | ||
| metricsResponse map[string]string | ||
| id string | ||
| namespace string | ||
| clusterExtensionName string | ||
| clusterExtensionRevisionName string | ||
| removedResources []unstructured.Unstructured | ||
| backGroundCmds []*exec.Cmd | ||
| metricsResponse map[string]string | ||
|
|
||
| extensionObjects []client.Object | ||
| } | ||
|
|
@@ -142,9 +143,10 @@ func CheckFeatureTags(ctx context.Context, sc *godog.Scenario) (context.Context, | |
|
|
||
| func CreateScenarioContext(ctx context.Context, sc *godog.Scenario) (context.Context, error) { | ||
| scCtx := &scenarioContext{ | ||
| id: sc.Id, | ||
| namespace: fmt.Sprintf("ns-%s", sc.Id), | ||
| clusterExtensionName: fmt.Sprintf("ce-%s", sc.Id), | ||
| id: sc.Id, | ||
| namespace: fmt.Sprintf("ns-%s", sc.Id), | ||
| clusterExtensionName: fmt.Sprintf("ce-%s", sc.Id), | ||
| clusterExtensionRevisionName: fmt.Sprintf("cer-%s", sc.Id), | ||
| } | ||
| return context.WithValue(ctx, scenarioContextKey, scCtx), nil | ||
| } | ||
|
|
@@ -176,13 +178,16 @@ func ScenarioCleanup(ctx context.Context, _ *godog.Scenario, err error) (context | |
| if sc.clusterExtensionName != "" { | ||
| forDeletion = append(forDeletion, resource{name: sc.clusterExtensionName, kind: "clusterextension"}) | ||
| } | ||
| if sc.clusterExtensionRevisionName != "" { | ||
| forDeletion = append(forDeletion, resource{name: sc.clusterExtensionRevisionName, kind: "clusterextensionrevision"}) | ||
| } | ||
| forDeletion = append(forDeletion, resource{name: sc.namespace, kind: "namespace"}) | ||
| go func() { | ||
| for _, r := range forDeletion { | ||
| if _, err := k8sClient("delete", r.kind, r.name, "--ignore-not-found=true"); err != nil { | ||
| logger.Info("Error deleting resource", "name", r.name, "namespace", sc.namespace, "stderr", stderrOutput(err)) | ||
| for _, r := range forDeletion { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how is this change related to the PR?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Before the change, in my test runs, the after hook would only delete a single item in |
||
| go func(res resource) { | ||
| if _, err := k8sClient("delete", res.kind, res.name, "--ignore-not-found=true"); err != nil { | ||
| logger.Info("Error deleting resource", "name", res.name, "namespace", sc.namespace, "stderr", stderrOutput(err)) | ||
| } | ||
| } | ||
| }() | ||
| }(r) | ||
| } | ||
| return ctx, nil | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
initializeProbes()is only invoked fromSetupWithManager, but unit tests (and any direct calls toReconcilewithout callingSetupWithManager) construct the reconciler and callReconciledirectly. In that casenamespaceActiveProbe/pvcBoundProberemain zero values, so the new readiness probes are effectively disabled (and could diverge from production behavior). Consider initializing these probes via async.Onceguard that’s called fromtoBoxcutterRevision/Reconcile(and optionally fromSetupWithManager), so probes are always available when reconciliation runs.