From 77b6e9d330ebd42e97f9eedbabcde6f063a101ea Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 8 Nov 2024 13:43:17 +0100 Subject: [PATCH 1/7] WIP, untested --- .../nifi/pages/usage_guide/monitoring.adoc | 2 ++ rust/operator-binary/src/controller.rs | 25 +++++++------- .../operator-binary/src/reporting_task/mod.rs | 34 ++++++++++++------- .../{60-assert.yaml => 60-assert.yaml.j2} | 2 ++ 4 files changed, 38 insertions(+), 25 deletions(-) rename tests/templates/kuttl/smoke/{60-assert.yaml => 60-assert.yaml.j2} (82%) diff --git a/docs/modules/nifi/pages/usage_guide/monitoring.adoc b/docs/modules/nifi/pages/usage_guide/monitoring.adoc index 82f1e688..4a0400a0 100644 --- a/docs/modules/nifi/pages/usage_guide/monitoring.adoc +++ b/docs/modules/nifi/pages/usage_guide/monitoring.adoc @@ -3,6 +3,8 @@ :k8s-job: https://kubernetes.io/docs/concepts/workloads/controllers/job/ :k8s-network-policies: https://kubernetes.io/docs/concepts/services-networking/network-policies/ +TODO: Update docs + The operator automatically configures NiFi to export Prometheus metrics. This is done by creating a {k8s-job}[Job] that connects to NiFi and configures a reporting task. diff --git a/rust/operator-binary/src/controller.rs b/rust/operator-binary/src/controller.rs index df4282d6..c6e3111a 100644 --- a/rust/operator-binary/src/controller.rs +++ b/rust/operator-binary/src/controller.rs @@ -85,7 +85,7 @@ use crate::{ }, operations::{graceful_shutdown::add_graceful_shutdown_config, pdb::add_pdbs}, product_logging::{extend_role_group_config_map, resolve_vector_aggregator_address}, - reporting_task::{self, build_reporting_task, build_reporting_task_service_name}, + reporting_task::{self, build_maybe_reporting_task, build_reporting_task_service_name}, security::{ authentication::{ NifiAuthenticationConfig, AUTHORIZERS_XML_FILE_NAME, @@ -616,24 +616,25 @@ pub async fn reconcile_nifi( // Only add the reporting task in case it is enabled. if nifi.spec.cluster_config.create_reporting_task_job.enabled { - let (reporting_task_job, reporting_task_service) = build_reporting_task( + if let Some((reporting_task_job, reporting_task_service)) = build_maybe_reporting_task( nifi, &resolved_product_image, &client.kubernetes_cluster_info, &nifi_authentication_config, &rbac_sa.name_any(), ) - .context(ReportingTaskSnafu)?; - - cluster_resources - .add(client, reporting_task_service) - .await - .context(ApplyCreateReportingTaskServiceSnafu)?; + .context(ReportingTaskSnafu)? + { + cluster_resources + .add(client, reporting_task_service) + .await + .context(ApplyCreateReportingTaskServiceSnafu)?; - cluster_resources - .add(client, reporting_task_job) - .await - .context(ApplyCreateReportingTaskJobSnafu)?; + cluster_resources + .add(client, reporting_task_job) + .await + .context(ApplyCreateReportingTaskJobSnafu)?; + } } // Remove any orphaned resources that still exist in k8s, but have not been added to diff --git a/rust/operator-binary/src/reporting_task/mod.rs b/rust/operator-binary/src/reporting_task/mod.rs index b0a6b59e..cf32756a 100644 --- a/rust/operator-binary/src/reporting_task/mod.rs +++ b/rust/operator-binary/src/reporting_task/mod.rs @@ -112,7 +112,8 @@ pub enum Error { type Result = std::result::Result; -/// Build required resources to create the reporting task in NiFi. +/// Build required resources to create the reporting task in NiFi versions 1.x. +/// /// This will return /// * a Job that creates and runs the reporting task via the NiFi Rest API. /// * a Service that contains of one single NiFi node. @@ -122,23 +123,30 @@ type Result = std::result::Result; /// from SingleUserLoginIdentityProvider to the FQDN of the pod. /// The NiFi role service will randomly delegate to different NiFi nodes which will /// then fail requests to other nodes. -pub fn build_reporting_task( +/// +/// NiFi 2.x and above automatically server Prometheus metrics via the API, but as of 2024-11-08 +/// requires authentication. +pub fn build_maybe_reporting_task( nifi: &NifiCluster, resolved_product_image: &ResolvedProductImage, cluster_info: &KubernetesClusterInfo, nifi_auth_config: &NifiAuthenticationConfig, sa_name: &str, -) -> Result<(Job, Service)> { - Ok(( - build_reporting_task_job( - nifi, - resolved_product_image, - cluster_info, - nifi_auth_config, - sa_name, - )?, - build_reporting_task_service(nifi, resolved_product_image)?, - )) +) -> Result> { + if resolved_product_image.product_version.starts_with("1.") { + Ok(Some(( + build_reporting_task_job( + nifi, + resolved_product_image, + cluster_info, + nifi_auth_config, + sa_name, + )?, + build_reporting_task_service(nifi, resolved_product_image)?, + ))) + } else { + Ok(None) + } } /// Return the name of the reporting task. diff --git a/tests/templates/kuttl/smoke/60-assert.yaml b/tests/templates/kuttl/smoke/60-assert.yaml.j2 similarity index 82% rename from tests/templates/kuttl/smoke/60-assert.yaml rename to tests/templates/kuttl/smoke/60-assert.yaml.j2 index 1d531af0..d3240ee8 100644 --- a/tests/templates/kuttl/smoke/60-assert.yaml +++ b/tests/templates/kuttl/smoke/60-assert.yaml.j2 @@ -4,4 +4,6 @@ kind: TestAssert timeout: 300 commands: - script: kubectl exec -n $NAMESPACE test-nifi-0 -- python /tmp/test_nifi.py -u admin -p 'passwordWithSpecialCharacter\@<&>"'"'" -n $NAMESPACE -c 3 +{% if test_scenario['values']['nifi'].startswith('1.') %} - script: kubectl exec -n $NAMESPACE test-nifi-0 -- python /tmp/test_nifi_metrics.py -n $NAMESPACE +{% endif %} From a660934d87642325295f07ca2fd55a3a430b291f Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 11 Nov 2024 10:48:22 +0100 Subject: [PATCH 2/7] add 2.0.0 metric endpoint docs --- .../nifi/pages/usage_guide/monitoring.adoc | 31 +++++++++++++++++-- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/docs/modules/nifi/pages/usage_guide/monitoring.adoc b/docs/modules/nifi/pages/usage_guide/monitoring.adoc index 4a0400a0..226ceed7 100644 --- a/docs/modules/nifi/pages/usage_guide/monitoring.adoc +++ b/docs/modules/nifi/pages/usage_guide/monitoring.adoc @@ -3,10 +3,14 @@ :k8s-job: https://kubernetes.io/docs/concepts/workloads/controllers/job/ :k8s-network-policies: https://kubernetes.io/docs/concepts/services-networking/network-policies/ -TODO: Update docs +In November 2024, Apache NiFi released a new major version https://cwiki.apache.org/confluence/display/NIFI/Release+Notes#ReleaseNotes-Version2.0.0[`2.0.0`]. -The operator automatically configures NiFi to export Prometheus metrics. -This is done by creating a {k8s-job}[Job] that connects to NiFi and configures a reporting task. +The NiFi `2.0.0` release changed the way of exposing Prometheus metrics significantly. The following steps explain on how to expose Metrics in NiFi versions `1.x.x` and `2.x.x`. + +== Configure metrics in NiFi `1.x.x` + +For NiFi versions `1.x.x`, the operator automatically configures NiFi to export Prometheus metrics. +This is done by creating a {k8s-job}[Job] that connects to NiFi and configures a https://nifi.apache.org/docs/nifi-docs/components/org.apache.nifi/nifi-prometheus-nar/1.26.0/org.apache.nifi.reporting.prometheus.PrometheusReportingTask/index.html[Prometheus Reporting Task]. IMPORTANT: Network access from the Job to NiFi is required. If you are running a Kubernetes with restrictive {k8s-network-policies}[NetworkPolicies], make sure to allow access from the Job to NiFi. @@ -26,3 +30,24 @@ spec: createReportingTaskJob: enabled: false ---- + +== Configure metrics in NiFi `2.x.x` + +The Prometheus Reporting Task was removed in NiFi `2.x.x`. Metrics now are exposed automatically and can be scraped using the NiFi pod FQDN and the route `/nifi-api/flow/metrics/prometheus`. +This means automatic depoloyment of the ProtheusReportingTask should be disabled: + +[source,yaml] +---- +spec: + clusterConfig: + createReportingTaskJob: + enabled: false +---- + +For a deployed single node NiFi cluster called `simple-nifi`, containing a rolegroup called `default`, the metrics endpoint is reachable under: + +``` +https://simple-nifi-node-default-0.simple-nifi-node-default..svc.cluster.local:8443/nifi-api/flow/metrics/prometheus +``` + +IMPORTANT: If NiFi is configured to use any Authentication, requests to the metric endpoint must be authenticated and authorized as well. From 184fb81527731f01a052fd1126cfd8a318ec6f5c Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 11 Nov 2024 10:53:29 +0100 Subject: [PATCH 3/7] review feedback --- docs/modules/nifi/pages/usage_guide/monitoring.adoc | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/docs/modules/nifi/pages/usage_guide/monitoring.adoc b/docs/modules/nifi/pages/usage_guide/monitoring.adoc index 226ceed7..58dd018b 100644 --- a/docs/modules/nifi/pages/usage_guide/monitoring.adoc +++ b/docs/modules/nifi/pages/usage_guide/monitoring.adoc @@ -33,16 +33,7 @@ spec: == Configure metrics in NiFi `2.x.x` -The Prometheus Reporting Task was removed in NiFi `2.x.x`. Metrics now are exposed automatically and can be scraped using the NiFi pod FQDN and the route `/nifi-api/flow/metrics/prometheus`. -This means automatic depoloyment of the ProtheusReportingTask should be disabled: - -[source,yaml] ----- -spec: - clusterConfig: - createReportingTaskJob: - enabled: false ----- +The Prometheus Reporting Task was removed in NiFi `2.x.x`. Metrics are now exposed automatically and can be scraped using the NiFi pod FQDN and the route `/nifi-api/flow/metrics/prometheus`. For a deployed single node NiFi cluster called `simple-nifi`, containing a rolegroup called `default`, the metrics endpoint is reachable under: @@ -50,4 +41,4 @@ For a deployed single node NiFi cluster called `simple-nifi`, containing a roleg https://simple-nifi-node-default-0.simple-nifi-node-default..svc.cluster.local:8443/nifi-api/flow/metrics/prometheus ``` -IMPORTANT: If NiFi is configured to use any Authentication, requests to the metric endpoint must be authenticated and authorized as well. +IMPORTANT: If NiFi is configured to use any Authentication, requests to the metric endpoint must be authenticated and authorized. From 2054682afa43a2812948970f4da5bfb57df2157a Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 11 Nov 2024 10:59:01 +0100 Subject: [PATCH 4/7] adapted changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5de1c31c..2385e2fb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ All notable changes to this project will be documented in this file. - `extraVolumes` - Increase `log` Volume size from 33 MiB to 500 MiB ([#671]). - Replaced experimental NiFi `2.0.0-M4` with `2.0.0` ([#702]). +- Remove the automatic deployment of the PrometheusReportingTask for NiFi versions `2.x.x` and up ([#708]). ### Fixed @@ -46,6 +47,7 @@ All notable changes to this project will be documented in this file. [#694]: https://github.com/stackabletech/nifi-operator/pull/694 [#698]: https://github.com/stackabletech/nifi-operator/pull/698 [#702]: https://github.com/stackabletech/nifi-operator/pull/702 +[#708]: https://github.com/stackabletech/nifi-operator/pull/708 ## [24.7.0] - 2024-07-24 From a06fc4b4c0fafb92f2f113787041cd4b9f917a4b Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 11 Nov 2024 11:03:54 +0100 Subject: [PATCH 5/7] Update CHANGELOG.md Co-authored-by: Sebastian Bernauer --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2385e2fb..cf9255a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,7 +21,7 @@ All notable changes to this project will be documented in this file. - `extraVolumes` - Increase `log` Volume size from 33 MiB to 500 MiB ([#671]). - Replaced experimental NiFi `2.0.0-M4` with `2.0.0` ([#702]). -- Remove the automatic deployment of the PrometheusReportingTask for NiFi versions `2.x.x` and up ([#708]). +- Don't deploy the `PrometheusReportingTask` Job for NiFi versions `2.x.x` and up ([#708]). ### Fixed From aea3d8c7a0859182702184ed34c79965c0b529bf Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Mon, 11 Nov 2024 12:24:25 +0100 Subject: [PATCH 6/7] added condition to upgrade test for testing nifi metrics --- .../kuttl/upgrade/{04-assert.yaml => 04-assert.yaml.j2} | 2 ++ .../kuttl/upgrade/{07-assert.yaml => 07-assert.yaml.j2} | 2 ++ 2 files changed, 4 insertions(+) rename tests/templates/kuttl/upgrade/{04-assert.yaml => 04-assert.yaml.j2} (87%) rename tests/templates/kuttl/upgrade/{07-assert.yaml => 07-assert.yaml.j2} (90%) diff --git a/tests/templates/kuttl/upgrade/04-assert.yaml b/tests/templates/kuttl/upgrade/04-assert.yaml.j2 similarity index 87% rename from tests/templates/kuttl/upgrade/04-assert.yaml rename to tests/templates/kuttl/upgrade/04-assert.yaml.j2 index d6a4856d..9f90b3f1 100644 --- a/tests/templates/kuttl/upgrade/04-assert.yaml +++ b/tests/templates/kuttl/upgrade/04-assert.yaml.j2 @@ -4,5 +4,7 @@ kind: TestAssert timeout: 300 commands: - script: kubectl exec -n $NAMESPACE test-nifi-0 -- python /tmp/test_nifi.py -u admin -p supersecretpassword -n $NAMESPACE -c 3 +{% if test_scenario['values']['nifi_old'].startswith('1.') %} - script: kubectl exec -n $NAMESPACE test-nifi-0 -- python /tmp/test_nifi_metrics.py -n $NAMESPACE +{% endif %} - script: kubectl exec -n $NAMESPACE test-nifi-0 -- sh -c "python /tmp/flow.py -e https://test-nifi-node-default-0.test-nifi-node-default.$NAMESPACE.svc.cluster.local:8443 run -t /tmp/generate-and-log-flowfiles.xml > /tmp/old_input" diff --git a/tests/templates/kuttl/upgrade/07-assert.yaml b/tests/templates/kuttl/upgrade/07-assert.yaml.j2 similarity index 90% rename from tests/templates/kuttl/upgrade/07-assert.yaml rename to tests/templates/kuttl/upgrade/07-assert.yaml.j2 index eda1e9fc..24abfbc8 100644 --- a/tests/templates/kuttl/upgrade/07-assert.yaml +++ b/tests/templates/kuttl/upgrade/07-assert.yaml.j2 @@ -6,7 +6,9 @@ metadata: timeout: 300 commands: - script: kubectl exec -n $NAMESPACE test-nifi-0 -- python /tmp/test_nifi.py -u admin -p supersecretpassword -n $NAMESPACE -c 3 +{% if test_scenario['values']['nifi_new'].startswith('1.') %} - script: kubectl exec -n $NAMESPACE test-nifi-0 -- python /tmp/test_nifi_metrics.py -n $NAMESPACE +{% endif %} - script: kubectl exec -n $NAMESPACE test-nifi-0 -- sh -c "python /tmp/flow.py -e https://test-nifi-node-default-0.test-nifi-node-default.$NAMESPACE.svc.cluster.local:8443 query > /tmp/new_input" # This tests that the number of input records stays the same after the upgrade. - script: kubectl exec -n $NAMESPACE test-nifi-0 -- diff /tmp/old_input /tmp/new_input From 34a01e47e102a498066f10600dc6bc5393717864 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 12 Nov 2024 10:54:19 +0100 Subject: [PATCH 7/7] Apply suggestions from code review Co-authored-by: Sebastian Bernauer --- docs/modules/nifi/pages/usage_guide/monitoring.adoc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/modules/nifi/pages/usage_guide/monitoring.adoc b/docs/modules/nifi/pages/usage_guide/monitoring.adoc index 58dd018b..98d92b10 100644 --- a/docs/modules/nifi/pages/usage_guide/monitoring.adoc +++ b/docs/modules/nifi/pages/usage_guide/monitoring.adoc @@ -5,7 +5,8 @@ In November 2024, Apache NiFi released a new major version https://cwiki.apache.org/confluence/display/NIFI/Release+Notes#ReleaseNotes-Version2.0.0[`2.0.0`]. -The NiFi `2.0.0` release changed the way of exposing Prometheus metrics significantly. The following steps explain on how to expose Metrics in NiFi versions `1.x.x` and `2.x.x`. +The NiFi `2.0.0` release changed the way of exposing Prometheus metrics significantly. +The following steps explain on how to expose Metrics in NiFi versions `1.x.x` and `2.x.x`. == Configure metrics in NiFi `1.x.x` @@ -33,7 +34,8 @@ spec: == Configure metrics in NiFi `2.x.x` -The Prometheus Reporting Task was removed in NiFi `2.x.x`. Metrics are now exposed automatically and can be scraped using the NiFi pod FQDN and the route `/nifi-api/flow/metrics/prometheus`. +The Prometheus Reporting Task was removed in NiFi `2.x.x` in https://issues.apache.org/jira/browse/NIFI-13507[NIFI-13507]. +Metrics are now always exposed and can be scraped using the NiFi Pod FQDN and the HTTP path `/nifi-api/flow/metrics/prometheus`. For a deployed single node NiFi cluster called `simple-nifi`, containing a rolegroup called `default`, the metrics endpoint is reachable under: @@ -41,4 +43,4 @@ For a deployed single node NiFi cluster called `simple-nifi`, containing a roleg https://simple-nifi-node-default-0.simple-nifi-node-default..svc.cluster.local:8443/nifi-api/flow/metrics/prometheus ``` -IMPORTANT: If NiFi is configured to use any Authentication, requests to the metric endpoint must be authenticated and authorized. +IMPORTANT: If NiFi is configured to do any user authentication, requests to the metric endpoint must be authenticated and authorized.