From 232279099cf2b63a04158fb873b00262d2cac48f Mon Sep 17 00:00:00 2001 From: Siegfried Weber Date: Wed, 27 Aug 2025 14:54:18 +0200 Subject: [PATCH 1/6] feat: Add Prometheus labels and annotations and add an integration test with Prometheus --- .../src/controller/build/node_config.rs | 65 +++++- .../src/controller/build/role_builder.rs | 2 +- .../controller/build/role_group_builder.rs | 95 +++++++-- rust/operator-binary/src/crd/mod.rs | 2 +- .../templates/kuttl/metrics/00-patch-ns.yaml | 15 ++ tests/templates/kuttl/metrics/01-rbac.yaml | 37 ++++ tests/templates/kuttl/metrics/10-assert.yaml | 12 ++ .../kuttl/metrics/10-install-prometheus.yaml | 17 ++ .../10_kube-prometheus-stack-values.yaml | 17 ++ .../metrics/11-create-service-monitor.yaml | 47 +++++ .../templates/kuttl/metrics/20-assert.yaml.j2 | 12 ++ .../metrics/20-install-opensearch.yaml.j2 | 189 ++++++++++++++++++ tests/templates/kuttl/metrics/30-assert.yaml | 11 + .../kuttl/metrics/30-check-metrics.yaml | 21 ++ tests/test-definition.yaml | 5 + 15 files changed, 522 insertions(+), 25 deletions(-) create mode 100644 tests/templates/kuttl/metrics/00-patch-ns.yaml create mode 100644 tests/templates/kuttl/metrics/01-rbac.yaml create mode 100644 tests/templates/kuttl/metrics/10-assert.yaml create mode 100644 tests/templates/kuttl/metrics/10-install-prometheus.yaml create mode 100644 tests/templates/kuttl/metrics/10_kube-prometheus-stack-values.yaml create mode 100644 tests/templates/kuttl/metrics/11-create-service-monitor.yaml create mode 100644 tests/templates/kuttl/metrics/20-assert.yaml.j2 create mode 100644 tests/templates/kuttl/metrics/20-install-opensearch.yaml.j2 create mode 100644 tests/templates/kuttl/metrics/30-assert.yaml create mode 100644 tests/templates/kuttl/metrics/30-check-metrics.yaml diff --git a/rust/operator-binary/src/controller/build/node_config.rs b/rust/operator-binary/src/controller/build/node_config.rs index 6eb9ba6..ab81bc4 100644 --- a/rust/operator-binary/src/controller/build/node_config.rs +++ b/rust/operator-binary/src/controller/build/node_config.rs @@ -1,3 +1,5 @@ +use std::str::FromStr; + use serde_json::{Value, json}; use stackable_operator::builder::pod::container::FieldPathEnvVar; @@ -88,7 +90,12 @@ impl NodeConfig { } /// static for the cluster - pub fn static_opensearch_config(&self) -> String { + pub fn static_opensearch_config_file(&self) -> String { + Self::to_yaml(self.static_opensearch_config()) + } + + /// static for the cluster + pub fn static_opensearch_config(&self) -> serde_json::Map { let mut config = serde_json::Map::new(); config.insert( @@ -124,7 +131,24 @@ impl NodeConfig { // Ensure a deterministic result config.sort_keys(); - Self::to_yaml(config) + config + } + + pub fn tls_on_http_port_enabled(&self) -> bool { + self.static_opensearch_config() + .get("plugins.security.ssl.http.enabled") + .and_then(Self::value_as_bool) + == Some(true) + } + + pub fn value_as_bool(value: &Value) -> Option { + value.as_bool().or( + // OpenSearch parses the strings "true" and "false" as boolean, see + // https://github.com/opensearch-project/OpenSearch/blob/3.1.0/libs/common/src/main/java/org/opensearch/common/Booleans.java#L45-L84 + value + .as_str() + .and_then(|value| FromStr::from_str(value).ok()), + ) } /// different for every node @@ -262,6 +286,43 @@ mod tests { framework::{ClusterName, ProductVersion, role_utils::GenericProductSpecificCommonConfig}, }; + #[test] + pub fn test_value_as_bool() { + // boolean + assert_eq!(Some(true), NodeConfig::value_as_bool(&Value::Bool(true))); + assert_eq!(Some(false), NodeConfig::value_as_bool(&Value::Bool(false))); + + // valid strings + assert_eq!( + Some(true), + NodeConfig::value_as_bool(&Value::String("true".to_owned())) + ); + assert_eq!( + Some(false), + NodeConfig::value_as_bool(&Value::String("false".to_owned())) + ); + + // invalid strings + assert_eq!( + None, + NodeConfig::value_as_bool(&Value::String("True".to_owned())) + ); + + // invalid types + assert_eq!(None, NodeConfig::value_as_bool(&Value::Null)); + assert_eq!( + None, + NodeConfig::value_as_bool(&Value::Number( + serde_json::Number::from_i128(1).expect("should be a valid number") + )) + ); + assert_eq!(None, NodeConfig::value_as_bool(&Value::Array(vec![]))); + assert_eq!( + None, + NodeConfig::value_as_bool(&Value::Object(serde_json::Map::new())) + ); + } + #[test] pub fn test_environment_variables() { let image: ProductImage = serde_json::from_str(r#"{"productVersion": "3.0.0"}"#) diff --git a/rust/operator-binary/src/controller/build/role_builder.rs b/rust/operator-binary/src/controller/build/role_builder.rs index 6f3b44d..fe3c3db 100644 --- a/rust/operator-binary/src/controller/build/role_builder.rs +++ b/rust/operator-binary/src/controller/build/role_builder.rs @@ -51,7 +51,7 @@ impl<'a> RoleBuilder<'a> { // TODO Only one builder function which calls the other ones? - pub fn role_group_builders(&self) -> Vec { + pub fn role_group_builders(&self) -> Vec> { self.cluster .role_group_configs .iter() diff --git a/rust/operator-binary/src/controller/build/role_group_builder.rs b/rust/operator-binary/src/controller/build/role_group_builder.rs index 49d5f22..a7ed83f 100644 --- a/rust/operator-binary/src/controller/build/role_group_builder.rs +++ b/rust/operator-binary/src/controller/build/role_group_builder.rs @@ -13,8 +13,7 @@ use stackable_operator::{ }, apimachinery::pkg::{apis::meta::v1::LabelSelector, util::intstr::IntOrString}, }, - kube::api::ObjectMeta, - kvp::{Label, Labels}, + kvp::{Annotations, Label, Labels}, }; use super::node_config::{CONFIGURATION_FILE_OPENSEARCH_YML, NodeConfig}; @@ -82,12 +81,13 @@ impl<'a> RoleGroupBuilder<'a> { } pub fn build_config_map(&self) -> ConfigMap { - let metadata = - self.common_metadata(self.resource_names.role_group_config_map(), Labels::new()); + let metadata = self + .common_metadata(self.resource_names.role_group_config_map()) + .build(); let data = [( CONFIGURATION_FILE_OPENSEARCH_YML.to_owned(), - self.node_config.static_opensearch_config(), + self.node_config.static_opensearch_config_file(), )] .into(); @@ -99,7 +99,9 @@ impl<'a> RoleGroupBuilder<'a> { } pub fn build_stateful_set(&self) -> StatefulSet { - let metadata = self.common_metadata(self.resource_names.stateful_set_name(), Labels::new()); + let metadata = self + .common_metadata(self.resource_names.stateful_set_name()) + .build(); let template = self.build_pod_template(); @@ -349,17 +351,53 @@ impl<'a> RoleGroupBuilder<'a> { self.build_role_group_service( self.resource_names.headless_service_name(), ports, - Labels::new(), + Self::prometheus_labels(), + Self::prometheus_annotations(self.node_config.tls_on_http_port_enabled()), ) } + /// Common labels for Prometheus + fn prometheus_labels() -> Labels { + Labels::try_from([("prometheus.io/scrape", "true")]).expect("should be a valid label") + } + + /// Common annotations for Prometheus + /// + /// These annotations can be used in a ServiceMonitor. + /// + /// see also https://github.com/prometheus-community/helm-charts/blob/prometheus-27.32.0/charts/prometheus/values.yaml#L983-L1036 + fn prometheus_annotations(tls_on_http_port_enabled: bool) -> Annotations { + Annotations::try_from([ + ( + "prometheus.io/path".to_owned(), + "/_prometheus/metrics".to_owned(), + ), + ("prometheus.io/port".to_owned(), HTTP_PORT.to_string()), + ( + "prometheus.io/scheme".to_owned(), + if tls_on_http_port_enabled { + "https".to_owned() + } else { + "http".to_owned() + }, + ), + ("prometheus.io/scrape".to_owned(), "true".to_owned()), + ]) + .expect("should be valid annotations") + } + fn build_role_group_service( &self, service_name: impl Into, ports: Vec, extra_labels: Labels, + extra_annotations: Annotations, ) -> Service { - let metadata = self.common_metadata(service_name, extra_labels); + let metadata = self + .common_metadata(service_name) + .with_labels(extra_labels) + .with_annotations(extra_annotations) + .build(); let service_spec = ServiceSpec { // Internal communication does not need to be exposed @@ -379,8 +417,9 @@ impl<'a> RoleGroupBuilder<'a> { } pub fn build_listener(&self) -> listener::v1alpha1::Listener { - let metadata = - self.common_metadata(self.resource_names.listener_service_name(), Labels::new()); + let metadata = self + .common_metadata(self.resource_names.listener_service_name()) + .build(); let listener_class = self.role_group_config.config.listener_class.to_owned(); @@ -395,8 +434,6 @@ impl<'a> RoleGroupBuilder<'a> { } } - /// We only use the http port here and intentionally omit - /// the metrics one. fn listener_ports(&self) -> Vec { vec![listener::v1alpha1::ListenerPort { name: HTTP_PORT_NAME.to_string(), @@ -405,12 +442,10 @@ impl<'a> RoleGroupBuilder<'a> { }] } - fn common_metadata( - &self, - resource_name: impl Into, - extra_labels: Labels, - ) -> ObjectMeta { - ObjectMetaBuilder::new() + fn common_metadata(&self, resource_name: impl Into) -> ObjectMetaBuilder { + let mut builder = ObjectMetaBuilder::new(); + + builder .name(resource_name) .namespace(&self.cluster.namespace) .ownerreference(ownerreference_from_resource( @@ -418,9 +453,9 @@ impl<'a> RoleGroupBuilder<'a> { None, Some(true), )) - .with_labels(self.recommended_labels()) - .with_labels(extra_labels) - .build() + .with_labels(self.recommended_labels()); + + builder } fn recommended_labels(&self) -> Labels { @@ -444,3 +479,21 @@ impl<'a> RoleGroupBuilder<'a> { ) } } + +#[cfg(test)] +mod tests { + use super::RoleGroupBuilder; + + #[test] + pub fn test_prometheus_labels() { + // Test that the function does not panic + RoleGroupBuilder::prometheus_labels(); + } + + #[test] + pub fn test_prometheus_annotations() { + // Test that the function does not panic on all possible execution pathes + RoleGroupBuilder::prometheus_annotations(false); + RoleGroupBuilder::prometheus_annotations(true); + } +} diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 84a6efa..48cbde7 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -251,7 +251,7 @@ impl NodeRoles { self.0.contains(node_role) } - pub fn iter(&self) -> slice::Iter { + pub fn iter(&self) -> slice::Iter<'_, v1alpha1::NodeRole> { self.0.iter() } } diff --git a/tests/templates/kuttl/metrics/00-patch-ns.yaml b/tests/templates/kuttl/metrics/00-patch-ns.yaml new file mode 100644 index 0000000..d4f91fa --- /dev/null +++ b/tests/templates/kuttl/metrics/00-patch-ns.yaml @@ -0,0 +1,15 @@ +# see https://github.com/stackabletech/issues/issues/566 +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: | + kubectl patch namespace $NAMESPACE --patch=' + { + "metadata": { + "labels": { + "pod-security.kubernetes.io/enforce": "privileged" + } + } + }' + timeout: 120 diff --git a/tests/templates/kuttl/metrics/01-rbac.yaml b/tests/templates/kuttl/metrics/01-rbac.yaml new file mode 100644 index 0000000..d1a7ebc --- /dev/null +++ b/tests/templates/kuttl/metrics/01-rbac.yaml @@ -0,0 +1,37 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: test-service-account +--- +kind: Role +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: test-role +rules: + - apiGroups: + - security.openshift.io + resources: + - securitycontextconstraints + resourceNames: + - privileged + verbs: + - use +--- +kind: RoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: test-role-binding +subjects: + - kind: ServiceAccount + name: test-service-account + - kind: ServiceAccount + name: prometheus-stack-kube-prom-admission + - kind: ServiceAccount + name: prometheus-stack-kube-prom-operator + - kind: ServiceAccount + name: prometheus-stack-kube-prom-prometheus +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: test-role diff --git a/tests/templates/kuttl/metrics/10-assert.yaml b/tests/templates/kuttl/metrics/10-assert.yaml new file mode 100644 index 0000000..6c215fd --- /dev/null +++ b/tests/templates/kuttl/metrics/10-assert.yaml @@ -0,0 +1,12 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 600 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: prometheus-prometheus-stack-kube-prom-prometheus +status: + readyReplicas: 1 + replicas: 1 diff --git a/tests/templates/kuttl/metrics/10-install-prometheus.yaml b/tests/templates/kuttl/metrics/10-install-prometheus.yaml new file mode 100644 index 0000000..33d83bf --- /dev/null +++ b/tests/templates/kuttl/metrics/10-install-prometheus.yaml @@ -0,0 +1,17 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + # The kube-prometheus-stack Helm chart installs cluster wide + # resources, e.g. ClusterRoles. If all test cases use the same Helm + # chart version, then their contents are equal. Therefore, it is safe + # to take ownership instead of failing if the resources already exist. + - script: > + helm install prometheus-stack + --namespace $NAMESPACE + --version 77.0.1 + --values 10_kube-prometheus-stack-values.yaml + --take-ownership + --wait + oci://ghcr.io/prometheus-community/charts/kube-prometheus-stack + timeout: 600 diff --git a/tests/templates/kuttl/metrics/10_kube-prometheus-stack-values.yaml b/tests/templates/kuttl/metrics/10_kube-prometheus-stack-values.yaml new file mode 100644 index 0000000..465b088 --- /dev/null +++ b/tests/templates/kuttl/metrics/10_kube-prometheus-stack-values.yaml @@ -0,0 +1,17 @@ +--- +alertmanager: + enabled: false +grafana: + enabled: false +kubeStateMetrics: + enabled: false +kubernetesServiceMonitors: + enabled: false +nodeExporter: + enabled: false +prometheus: + serviceMonitor: + selfMonitor: false +prometheusOperator: + serviceMonitor: + selfMonitor: false diff --git a/tests/templates/kuttl/metrics/11-create-service-monitor.yaml b/tests/templates/kuttl/metrics/11-create-service-monitor.yaml new file mode 100644 index 0000000..9579f0a --- /dev/null +++ b/tests/templates/kuttl/metrics/11-create-service-monitor.yaml @@ -0,0 +1,47 @@ +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: stackable-opensearch + labels: + release: prometheus-stack +spec: + selector: + matchLabels: + prometheus.io/scrape: "true" + endpoints: + - relabelings: + - sourceLabels: + - __meta_kubernetes_service_annotation_prometheus_io_scheme + action: replace + targetLabel: __scheme__ + regex: (https?) + - sourceLabels: + - __meta_kubernetes_service_annotation_prometheus_io_path + action: replace + targetLabel: __metrics_path__ + regex: (.+) + # Use the FQDN instead of the IP address because the IP address + # is not contained in the certificate. + - sourceLabels: + - __meta_kubernetes_pod_name + - __meta_kubernetes_service_name + - __meta_kubernetes_namespace + - __meta_kubernetes_service_annotation_prometheus_io_port + action: replace + targetLabel: __address__ + regex: (.+);(.+);(.+);(\d+) + replacement: $1.$2.$3.svc.cluster.local:$4 + tlsConfig: + ca: + configMap: + name: truststore + key: ca.crt +--- +apiVersion: secrets.stackable.tech/v1alpha1 +kind: TrustStore +metadata: + name: truststore +spec: + secretClassName: tls + format: tls-pem diff --git a/tests/templates/kuttl/metrics/20-assert.yaml.j2 b/tests/templates/kuttl/metrics/20-assert.yaml.j2 new file mode 100644 index 0000000..65425bd --- /dev/null +++ b/tests/templates/kuttl/metrics/20-assert.yaml.j2 @@ -0,0 +1,12 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 600 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: opensearch-nodes-default +status: + readyReplicas: 3 + replicas: 3 diff --git a/tests/templates/kuttl/metrics/20-install-opensearch.yaml.j2 b/tests/templates/kuttl/metrics/20-install-opensearch.yaml.j2 new file mode 100644 index 0000000..9bc9797 --- /dev/null +++ b/tests/templates/kuttl/metrics/20-install-opensearch.yaml.j2 @@ -0,0 +1,189 @@ +--- +apiVersion: opensearch.stackable.tech/v1alpha1 +kind: OpenSearchCluster +metadata: + name: opensearch +spec: + image: +{% if test_scenario['values']['opensearch'].find(",") > 0 %} + custom: "{{ test_scenario['values']['opensearch'].split(',')[1] }}" + productVersion: "{{ test_scenario['values']['opensearch'].split(',')[0] }}" +{% else %} + productVersion: "{{ test_scenario['values']['opensearch'] }}" +{% endif %} + pullPolicy: IfNotPresent + nodes: + roleGroups: + default: + config: + resources: + storage: + data: + capacity: 100Mi + listenerClass: external-stable + replicas: 3 + envOverrides: + OPENSEARCH_HOME: {{ test_scenario['values']['opensearch_home'] }} + configOverrides: + opensearch.yml: + # Disable memory mapping in this test; If memory mapping were activated, the kernel setting + # vm.max_map_count would have to be increased to 262144 on the node. + node.store.allow_mmap: "false" + plugins.security.allow_default_init_securityindex: "true" + plugins.security.ssl.transport.enabled: "true" + plugins.security.ssl.transport.pemcert_filepath: {{ test_scenario['values']['opensearch_home'] }}/config/tls/tls.crt + plugins.security.ssl.transport.pemkey_filepath: {{ test_scenario['values']['opensearch_home'] }}/config/tls/tls.key + plugins.security.ssl.transport.pemtrustedcas_filepath: {{ test_scenario['values']['opensearch_home'] }}/config/tls/ca.crt + plugins.security.ssl.http.enabled: "true" + plugins.security.ssl.http.pemcert_filepath: {{ test_scenario['values']['opensearch_home'] }}/config/tls/tls.crt + plugins.security.ssl.http.pemkey_filepath: {{ test_scenario['values']['opensearch_home'] }}/config/tls/tls.key + plugins.security.ssl.http.pemtrustedcas_filepath: {{ test_scenario['values']['opensearch_home'] }}/config/tls/ca.crt + plugins.security.authcz.admin_dn: "CN=generated certificate for pod" + podOverrides: + spec: + containers: + - name: opensearch + volumeMounts: + - name: security-config + mountPath: {{ test_scenario['values']['opensearch_home'] }}/config/opensearch-security + readOnly: true + - name: tls + mountPath: {{ test_scenario['values']['opensearch_home'] }}/config/tls + readOnly: true + securityContext: + fsGroup: 1000 + volumes: + - name: security-config + secret: + secretName: opensearch-security-config + - name: tls + ephemeral: + volumeClaimTemplate: + metadata: + annotations: + secrets.stackable.tech/class: tls + secrets.stackable.tech/scope: node,pod,service=opensearch,service=opensearch-nodes-default-headless + spec: + storageClassName: secrets.stackable.tech + accessModes: + - ReadWriteOnce + resources: + requests: + storage: "1" +--- +apiVersion: v1 +kind: Secret +metadata: + name: opensearch-security-config +stringData: + action_groups.yml: | + --- + _meta: + type: actiongroups + config_version: 2 + allowlist.yml: | + --- + _meta: + type: allowlist + config_version: 2 + + config: + enabled: false + audit.yml: | + --- + _meta: + type: audit + config_version: 2 + + config: + enabled: false + config.yml: | + --- + _meta: + type: config + config_version: 2 + + config: + dynamic: + authc: + basic_internal_auth_domain: + description: Authenticate via HTTP Basic against internal users database + http_enabled: true + transport_enabled: true + order: 1 + http_authenticator: + type: basic + challenge: false + authentication_backend: + type: intern + authz: {} + http: + anonymous_auth_enabled: true + internal_users.yml: | + --- + # The hash value is a bcrypt hash and can be generated with plugin/tools/hash.sh + + _meta: + type: internalusers + config_version: 2 + + admin: + hash: $2y$10$xRtHZFJ9QhG9GcYhRpAGpufCZYsk//nxsuel5URh0GWEBgmiI4Q/e + reserved: true + backend_roles: + - admin + description: OpenSearch admin user + + kibanaserver: + hash: $2y$10$vPgQ/6ilKDM5utawBqxoR.7euhVQ0qeGl8mPTeKhmFT475WUDrfQS + reserved: true + description: OpenSearch Dashboards user + nodes_dn.yml: | + --- + _meta: + type: nodesdn + config_version: 2 + roles.yml: | + --- + _meta: + type: roles + config_version: 2 + + monitoring: + reserved: true + cluster_permissions: + - cluster:monitor/health + - cluster:monitor/nodes/info + - cluster:monitor/nodes/stats + - cluster:monitor/prometheus/metrics + - cluster:monitor/state + index_permissions: + - index_patterns: + - "*" + allowed_actions: + - indices:monitor/health + - indices:monitor/stats + roles_mapping.yml: | + --- + _meta: + type: rolesmapping + config_version: 2 + + all_access: + reserved: false + backend_roles: + - admin + + kibana_server: + reserved: true + users: + - kibanaserver + + monitoring: + backend_roles: + - opendistro_security_anonymous_backendrole + tenants.yml: | + --- + _meta: + type: tenants + config_version: 2 diff --git a/tests/templates/kuttl/metrics/30-assert.yaml b/tests/templates/kuttl/metrics/30-assert.yaml new file mode 100644 index 0000000..b928342 --- /dev/null +++ b/tests/templates/kuttl/metrics/30-assert.yaml @@ -0,0 +1,11 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 600 +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: check-metrics +status: + succeeded: 1 diff --git a/tests/templates/kuttl/metrics/30-check-metrics.yaml b/tests/templates/kuttl/metrics/30-check-metrics.yaml new file mode 100644 index 0000000..0aa4f4d --- /dev/null +++ b/tests/templates/kuttl/metrics/30-check-metrics.yaml @@ -0,0 +1,21 @@ +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: check-metrics +spec: + template: + spec: + containers: + - name: check-metrics + image: oci.stackable.tech/sdp/testing-tools:0.2.0-stackable0.0.0-dev + command: + - /bin/bash + - -euo + - pipefail + - -c + - > + curl http://prometheus-operated:9090/api/v1/query?query=opensearch_cluster_nodes_number%7Bpod%3D%22opensearch-nodes-default-0%22%7D | + jq --exit-status '.data.result[0].value[1] == "3"' + serviceAccountName: test-service-account + restartPolicy: OnFailure diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index 161a87d..a48bbbf 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -25,6 +25,11 @@ tests: - opensearch - openshift - opensearch_home + - name: metrics + dimensions: + - opensearch + - openshift + - opensearch_home suites: - name: nightly patch: From f2772deb6834cda0f2190ce58b1270534ff876ea Mon Sep 17 00:00:00 2001 From: Siegfried Weber Date: Wed, 27 Aug 2025 16:50:49 +0200 Subject: [PATCH 2/6] doc: Document monitoring with Prometheus --- .../pages/usage-guide/monitoring.adoc | 149 ++++++++++++++++++ docs/modules/opensearch/partials/nav.adoc | 1 + 2 files changed, 150 insertions(+) create mode 100644 docs/modules/opensearch/pages/usage-guide/monitoring.adoc diff --git a/docs/modules/opensearch/pages/usage-guide/monitoring.adoc b/docs/modules/opensearch/pages/usage-guide/monitoring.adoc new file mode 100644 index 0000000..a525440 --- /dev/null +++ b/docs/modules/opensearch/pages/usage-guide/monitoring.adoc @@ -0,0 +1,149 @@ += Monitoring +:description: Use Prometheus to monitor OpenSearch + +OpenSearch clusters can be monitored with Prometheus, see also the general xref:operators:monitoring.adoc[] page. +The Prometheus metrics are exposed on the HTTP port 9200 at the path `/_prometheus/metrics`. + +The role group services contain the corresponding labels and annotations: + +[source,yaml] +---- +--- +apiVersion: v1 +kind: Service +metadata: + name: opensearch-nodes-default-headless + labels: + prometheus.io/scrape: "true" + annotations: + prometheus.io/path: /_prometheus/metrics + prometheus.io/port: "9200" + prometheus.io/scheme: https + prometheus.io/scrape: "true" +---- + +If authentication is enabled in the OpenSearch security plugin, then the metrics endpoint is also secured. +To make the metrics accessible for all users, especially Prometheus, anonymous authentication can be enabled and access to the monitoring statistics can be allowed for the role of the anonymous user: + +[source,yaml] +---- +--- +apiVersion: v1 +kind: Secret +metadata: + name: opensearch-security-config +stringData: + config.yml: | + --- + _meta: + type: config + config_version: 2 + config: + dynamic: + authc: + basic_internal_auth_domain: + description: Authenticate via HTTP Basic against internal users database + http_enabled: true + transport_enabled: true + order: 1 + http_authenticator: + type: basic + challenge: false # <1> + authentication_backend: + type: intern + authz: {} + http: + anonymous_auth_enabled: true # <2> + roles.yml: | + --- + _meta: + type: roles + config_version: 2 + monitoring: # <3> + reserved: true + cluster_permissions: + - cluster:monitor/health + - cluster:monitor/nodes/info + - cluster:monitor/nodes/stats + - cluster:monitor/prometheus/metrics + - cluster:monitor/state + index_permissions: + - index_patterns: + - "*" + allowed_actions: + - indices:monitor/health + - indices:monitor/stats + roles_mapping.yml: | + --- + _meta: + type: rolesmapping + config_version: 2 + monitoring: # <4> + backend_roles: + - opendistro_security_anonymous_backendrole +---- +<1> If anonymous authentication is enabled, then all defined HTTP authenticators are non-challenging. +<2> Enable https://docs.opensearch.org/latest/security/access-control/anonymous-authentication/[anonymous authentication] +<3> Create a role "monitoring" with the required permissions for the Prometheus endpoint +<4> Map the role "monitoring" to the backend role "opendistro_security_anonymous_backendrole" that is assigned to the anonymous user + +If you use the https://prometheus-operator.dev/[Prometheus Operator] to install Prometheus, then you can define a https://prometheus-operator.dev/docs/api-reference/api/#monitoring.coreos.com/v1.ServiceMonitor[ServiceMonitor] to collect the metrics: + +[source,yaml] +---- +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: stackable-opensearch + labels: + release: prometheus-stack # <1> +spec: + selector: + matchLabels: # <2> + prometheus.io/scrape: "true" + endpoints: + - relabelings: + - sourceLabels: # <3> + - __meta_kubernetes_service_annotation_prometheus_io_scheme + action: replace + targetLabel: __scheme__ + regex: (https?) + - sourceLabels: # <4> + - __meta_kubernetes_service_annotation_prometheus_io_path + action: replace + targetLabel: __metrics_path__ + regex: (.+) + - sourceLabels: # <5> + - __meta_kubernetes_pod_name + - __meta_kubernetes_service_name + - __meta_kubernetes_namespace + - __meta_kubernetes_service_annotation_prometheus_io_port + action: replace + targetLabel: __address__ + regex: (.+);(.+);(.+);(\d+) + replacement: $1.$2.$3.svc.cluster.local:$4 + tlsConfig: # <6> + ca: + configMap: + name: truststore + key: ca.crt +--- +apiVersion: secrets.stackable.tech/v1alpha1 +kind: TrustStore +metadata: + name: truststore +spec: + secretClassName: tls + format: tls-pem +---- +<1> The `release` label must match the Helm release name. + This Helm release was installed with `helm install prometheus-stack oci://ghcr.io/prometheus-community/charts/kube-prometheus-stack ...`. +<2> Label selector to select the Kubernetes `Endpoints` objects to scrape metrics from. + The Endpoints inherit the labels from their Service. +<3> Use the schema (`http` or `https`) from the Service annotation `prometheus.io/scheme` +<4> Use the path (`/_prometheus/metrics`) from the Service annotation `prometheus.io/path`. + These values could also be hard-coded in the ServiceMonitor but it is better to use the ones provided by the operator if they change in the future. +<5> Use the FQDN instead of the IP address because the IP address is not contained in the certificate. + The FQDN is constructed from the pod name, service name, namespace and the HTTP port provided in the Service annotation `prometheus.io/port`, e.g. `opensearch-nodes-default-0.opensearch-nodes-default-headless.my-namespace.svc.cluster.local:9200`. +<6> If TLS is used and the CA is not already provided to Prometheus in another way, then it can be taken from a xref:secret-operator:truststore.adoc[] ConfigMap. diff --git a/docs/modules/opensearch/partials/nav.adoc b/docs/modules/opensearch/partials/nav.adoc index 3ddf9b3..42649b5 100644 --- a/docs/modules/opensearch/partials/nav.adoc +++ b/docs/modules/opensearch/partials/nav.adoc @@ -6,6 +6,7 @@ ** xref:opensearch:usage-guide/listenerclass.adoc[] ** xref:opensearch:usage-guide/storage-resource-configuration.adoc[] ** xref:opensearch:usage-guide/configuration-environment-overrides.adoc[] +** xref:opensearch:usage-guide/monitoring.adoc[] ** xref:opensearch:usage-guide/operations/index.adoc[] *** xref:opensearch:usage-guide/operations/cluster-operations.adoc[] *** xref:opensearch:usage-guide/operations/pod-placement.adoc[] From 2f401ca5e61b5584d49d4d0dd01ca56eebbb2483 Mon Sep 17 00:00:00 2001 From: Siegfried Weber Date: Wed, 27 Aug 2025 16:56:21 +0200 Subject: [PATCH 3/6] chore: Update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 811eb55..3ed8168 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,7 +19,9 @@ All notable changes to this project will be documented in this file. - Add Listener support ([#17]). - Make the environment variables `OPENSEARCH_HOME` and `OPENSEARCH_PATH_CONF` overridable, so that images can be used which have a different directory structure than the Stackable image ([#18]). +- Add Prometheus labels and annotations to role-group services ([#26]). [#10]: https://github.com/stackabletech/opensearch-operator/pull/10 [#17]: https://github.com/stackabletech/opensearch-operator/pull/17 [#18]: https://github.com/stackabletech/opensearch-operator/pull/18 +[#26]: https://github.com/stackabletech/opensearch-operator/pull/26 From 0ce6f320f4063ba5b5c515098a5160664af4869d Mon Sep 17 00:00:00 2001 From: Siegfried Weber Date: Wed, 27 Aug 2025 17:01:31 +0200 Subject: [PATCH 4/6] chore: Fix typo in code comment --- rust/operator-binary/src/controller/build/role_group_builder.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/operator-binary/src/controller/build/role_group_builder.rs b/rust/operator-binary/src/controller/build/role_group_builder.rs index a7ed83f..dc08a9c 100644 --- a/rust/operator-binary/src/controller/build/role_group_builder.rs +++ b/rust/operator-binary/src/controller/build/role_group_builder.rs @@ -492,7 +492,7 @@ mod tests { #[test] pub fn test_prometheus_annotations() { - // Test that the function does not panic on all possible execution pathes + // Test that the function does not panic on all possible execution paths RoleGroupBuilder::prometheus_annotations(false); RoleGroupBuilder::prometheus_annotations(true); } From 253747557031651aafd5e485e74261fb8db93188 Mon Sep 17 00:00:00 2001 From: Siegfried Weber Date: Wed, 27 Aug 2025 17:06:36 +0200 Subject: [PATCH 5/6] chore: Fix rustdoc warning --- rust/operator-binary/src/controller/build/role_group_builder.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/operator-binary/src/controller/build/role_group_builder.rs b/rust/operator-binary/src/controller/build/role_group_builder.rs index dc08a9c..8d8a6a4 100644 --- a/rust/operator-binary/src/controller/build/role_group_builder.rs +++ b/rust/operator-binary/src/controller/build/role_group_builder.rs @@ -365,7 +365,7 @@ impl<'a> RoleGroupBuilder<'a> { /// /// These annotations can be used in a ServiceMonitor. /// - /// see also https://github.com/prometheus-community/helm-charts/blob/prometheus-27.32.0/charts/prometheus/values.yaml#L983-L1036 + /// see also fn prometheus_annotations(tls_on_http_port_enabled: bool) -> Annotations { Annotations::try_from([ ( From 051d69e1bbddb968cd364d6bc6cb539d1e49001b Mon Sep 17 00:00:00 2001 From: Siegfried Weber Date: Thu, 28 Aug 2025 11:20:22 +0200 Subject: [PATCH 6/6] doc: Mention CA rotation in the monitoring docs --- docs/modules/opensearch/pages/usage-guide/monitoring.adoc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/modules/opensearch/pages/usage-guide/monitoring.adoc b/docs/modules/opensearch/pages/usage-guide/monitoring.adoc index a525440..5d2ee46 100644 --- a/docs/modules/opensearch/pages/usage-guide/monitoring.adoc +++ b/docs/modules/opensearch/pages/usage-guide/monitoring.adoc @@ -147,3 +147,5 @@ spec: <5> Use the FQDN instead of the IP address because the IP address is not contained in the certificate. The FQDN is constructed from the pod name, service name, namespace and the HTTP port provided in the Service annotation `prometheus.io/port`, e.g. `opensearch-nodes-default-0.opensearch-nodes-default-headless.my-namespace.svc.cluster.local:9200`. <6> If TLS is used and the CA is not already provided to Prometheus in another way, then it can be taken from a xref:secret-operator:truststore.adoc[] ConfigMap. + The TrustStore ConfigMap is updated whenever the CA is rotated. + In this case, Prometheus takes over the new certificate.