Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ All notable changes to this project will be documented in this file.
- `EOS_DISABLED` (`--eos-disabled`) to disable the EoS checker completely.
- Helm: Allow Pod `priorityClassName` to be configured ([#974]).
- Add support for `3.9.4` ([#977]).
- Add `prometheus.io/path|port|scheme` annotations to metrics service (for native metrics) ([#978]).

### Changed

Expand All @@ -20,6 +21,7 @@ All notable changes to this project will be documented in this file.
[#974]: https://github.com/stackabletech/zookeeper-operator/pull/974
[#976]: https://github.com/stackabletech/zookeeper-operator/pull/976
[#977]: https://github.com/stackabletech/zookeeper-operator/pull/977
[#978]: https://github.com/stackabletech/zookeeper-operator/pull/978

## [25.7.0] - 2025-07-23

Expand Down
6 changes: 2 additions & 4 deletions rust/operator-binary/src/crd/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ use strum::{Display, EnumIter, EnumString, IntoEnumIterator};

use crate::{
crd::{affinity::get_affinity, v1alpha1::ZookeeperServerRoleConfig},
discovery::build_role_group_headless_service_name,
listener::role_listener_name,
};

Expand Down Expand Up @@ -626,9 +625,8 @@ impl v1alpha1::ZookeeperCluster {
for i in 0..rolegroup.replicas.unwrap_or(1) {
pod_refs.push(ZookeeperPodRef {
namespace: ns.clone(),
role_group_headless_service_name: build_role_group_headless_service_name(
rolegroup_ref.object_name(),
),
role_group_headless_service_name: rolegroup_ref
.rolegroup_headless_service_name(),
pod_name: format!("{role_group}-{i}", role_group = rolegroup_ref.object_name()),
zookeeper_myid: i + myid_offset,
});
Expand Down
12 changes: 0 additions & 12 deletions rust/operator-binary/src/discovery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,15 +168,3 @@ fn listener_addresses(
false => Ok(address_port_pairs),
}
}

// TODO (@NickLarsenNZ): Implement this directly on RoleGroupRef, ie:
// RoleGroupRef<K: Resource>::metrics_service_name(&self) to restrict what _name_ can be.
pub fn build_role_group_headless_service_name(name: String) -> String {
format!("{name}-headless")
}

// TODO (@NickLarsenNZ): Implement this directly on RoleGroupRef, ie:
// RoleGroupRef<K: Resource>::metrics_service_name(&self) to restrict what _name_ can be.
pub fn build_role_group_metrics_service_name(name: String) -> String {
format!("{name}-metrics")
}
1 change: 1 addition & 0 deletions rust/operator-binary/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ mod discovery;
mod listener;
mod operations;
mod product_logging;
mod service;
mod utils;
mod zk_controller;
mod znode_controller;
Expand Down
15 changes: 0 additions & 15 deletions rust/operator-binary/src/product_logging.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,6 @@ use crate::crd::{

#[derive(Snafu, Debug)]
pub enum Error {
#[snafu(display("object has no namespace"))]
ObjectHasNoNamespace,

#[snafu(display("failed to retrieve the ConfigMap {cm_name}"))]
ConfigMapNotFound {
source: stackable_operator::client::Error,
cm_name: String,
},

#[snafu(display("failed to retrieve the entry {entry} for ConfigMap {cm_name}"))]
MissingConfigMapEntry {
entry: &'static str,
cm_name: String,
},

#[snafu(display("crd validation failure"))]
CrdValidationFailure { source: crate::crd::Error },
}
Expand Down
210 changes: 210 additions & 0 deletions rust/operator-binary/src/service.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
use std::{
collections::{BTreeMap, HashMap},
str::FromStr,
};

use product_config::types::PropertyNameKind;
use snafu::{OptionExt, ResultExt, Snafu};
use stackable_operator::{
builder::meta::ObjectMetaBuilder,
commons::product_image_selection::ResolvedProductImage,
k8s_openapi::api::core::v1::{Service, ServicePort, ServiceSpec},
kvp::{Annotations, Labels},
role_utils::RoleGroupRef,
};

use crate::{
crd::{
APP_NAME, JMX_METRICS_PORT, JMX_METRICS_PORT_NAME, METRICS_PROVIDER_HTTP_PORT,
METRICS_PROVIDER_HTTP_PORT_KEY, METRICS_PROVIDER_HTTP_PORT_NAME, ZOOKEEPER_ELECTION_PORT,
ZOOKEEPER_ELECTION_PORT_NAME, ZOOKEEPER_LEADER_PORT, ZOOKEEPER_LEADER_PORT_NAME,
ZOOKEEPER_PROPERTIES_FILE, v1alpha1,
},
utils::build_recommended_labels,
zk_controller::ZK_CONTROLLER_NAME,
};

#[derive(Snafu, Debug)]
pub enum Error {
#[snafu(display("object is missing metadata to build owner reference"))]
ObjectMissingMetadataForOwnerRef {
source: stackable_operator::builder::meta::Error,
},

#[snafu(display("failed to build Metadata"))]
BuildMetadata {
source: stackable_operator::builder::meta::Error,
},

#[snafu(display("failed to build Labels"))]
BuildLabel {
source: stackable_operator::kvp::LabelError,
},

#[snafu(display("missing zookeeper properties file {ZOOKEEPER_PROPERTIES_FILE} in config"))]
MissingPropertiesFile,

#[snafu(display("missing provider http port key {METRICS_PROVIDER_HTTP_PORT_KEY} in config"))]
MissingProviderHttpPortKey,
}

/// The rolegroup [`Service`] is a headless service that allows internal access to the instances of a certain rolegroup
///
/// This is mostly useful for internal communication between peers, or for clients that perform client-side load balancing.
pub(crate) fn build_server_rolegroup_headless_service(
zk: &v1alpha1::ZookeeperCluster,
rolegroup: &RoleGroupRef<v1alpha1::ZookeeperCluster>,
resolved_product_image: &ResolvedProductImage,
) -> Result<Service, Error> {
let metadata = ObjectMetaBuilder::new()
.name_and_namespace(zk)
.name(rolegroup.rolegroup_headless_service_name())
.ownerreference_from_resource(zk, None, Some(true))
.context(ObjectMissingMetadataForOwnerRefSnafu)?
.with_recommended_labels(build_recommended_labels(
zk,
ZK_CONTROLLER_NAME,
&resolved_product_image.app_version_label_value,
&rolegroup.role,
&rolegroup.role_group,
))
.context(BuildMetadataSnafu)?
.build();

let service_selector_labels =
Labels::role_group_selector(zk, APP_NAME, &rolegroup.role, &rolegroup.role_group)
.context(BuildLabelSnafu)?;

let service_spec = ServiceSpec {
// Internal communication does not need to be exposed
type_: Some("ClusterIP".to_string()),
cluster_ip: Some("None".to_string()),
ports: Some(vec![
ServicePort {
name: Some(ZOOKEEPER_LEADER_PORT_NAME.to_string()),
port: ZOOKEEPER_LEADER_PORT.into(),
protocol: Some("TCP".to_string()),
..ServicePort::default()
},
ServicePort {
name: Some(ZOOKEEPER_ELECTION_PORT_NAME.to_string()),
port: ZOOKEEPER_ELECTION_PORT.into(),
protocol: Some("TCP".to_string()),
..ServicePort::default()
},
]),
selector: Some(service_selector_labels.into()),
publish_not_ready_addresses: Some(true),
..ServiceSpec::default()
};

Ok(Service {
metadata,
spec: Some(service_spec),
status: None,
})
}

/// The rolegroup [`Service`] for exposing metrics
pub(crate) fn build_server_rolegroup_metrics_service(
zk: &v1alpha1::ZookeeperCluster,
rolegroup: &RoleGroupRef<v1alpha1::ZookeeperCluster>,
resolved_product_image: &ResolvedProductImage,
rolegroup_config: &HashMap<PropertyNameKind, BTreeMap<String, String>>,
) -> Result<Service, Error> {
let metrics_port = metrics_port_from_rolegroup_config(rolegroup_config)?;

let metadata = ObjectMetaBuilder::new()
.name_and_namespace(zk)
.name(rolegroup.rolegroup_metrics_service_name())
.ownerreference_from_resource(zk, None, Some(true))
.context(ObjectMissingMetadataForOwnerRefSnafu)?
.with_recommended_labels(build_recommended_labels(
zk,
ZK_CONTROLLER_NAME,
&resolved_product_image.app_version_label_value,
&rolegroup.role,
&rolegroup.role_group,
))
.context(BuildMetadataSnafu)?
.with_labels(prometheus_labels())
.with_annotations(prometheus_annotations(metrics_port))
.build();

let service_selector_labels =
Labels::role_group_selector(zk, APP_NAME, &rolegroup.role, &rolegroup.role_group)
.context(BuildLabelSnafu)?;

let service_spec = ServiceSpec {
// Internal communication does not need to be exposed
type_: Some("ClusterIP".to_string()),
cluster_ip: Some("None".to_string()),
ports: Some(vec![
// We keep this for legacy compatibility
ServicePort {
name: Some(JMX_METRICS_PORT_NAME.to_string()),
port: JMX_METRICS_PORT.into(),
protocol: Some("TCP".to_string()),
..ServicePort::default()
},
ServicePort {
name: Some(METRICS_PROVIDER_HTTP_PORT_NAME.to_string()),
port: metrics_port.into(),
protocol: Some("TCP".to_string()),
..ServicePort::default()
},
]),
selector: Some(service_selector_labels.into()),
publish_not_ready_addresses: Some(true),
..ServiceSpec::default()
};

Ok(Service {
metadata,
spec: Some(service_spec),
status: None,
})
}

pub(crate) fn metrics_port_from_rolegroup_config(
rolegroup_config: &HashMap<PropertyNameKind, BTreeMap<String, String>>,
) -> Result<u16, Error> {
let metrics_port = rolegroup_config
.get(&PropertyNameKind::File(
ZOOKEEPER_PROPERTIES_FILE.to_string(),
))
.context(MissingPropertiesFileSnafu)?
.get(METRICS_PROVIDER_HTTP_PORT_KEY)
.context(MissingProviderHttpPortKeySnafu)?;

let port = match u16::from_str(metrics_port) {
Ok(port) => port,
Err(err) => {
tracing::error!("{err}");
tracing::info!("Defaulting to using {METRICS_PROVIDER_HTTP_PORT} as metrics port.");
METRICS_PROVIDER_HTTP_PORT
}
};

Ok(port)
}

/// Common labels for Prometheus
fn prometheus_labels() -> Labels {
Labels::try_from([("prometheus.io/scrape", "true")]).expect("should be a valid label")
}

/// Common annotations for Prometheus
///
/// These annotations can be used in a ServiceMonitor.
///
/// see also <https://github.com/prometheus-community/helm-charts/blob/prometheus-27.32.0/charts/prometheus/values.yaml#L983-L1036>
fn prometheus_annotations(metrics_port: u16) -> Annotations {
Annotations::try_from([
("prometheus.io/path".to_owned(), "/metrics".to_owned()),
("prometheus.io/port".to_owned(), metrics_port.to_string()),
("prometheus.io/scheme".to_owned(), "http".to_owned()),
("prometheus.io/scrape".to_owned(), "true".to_owned()),
])
.expect("should be valid annotations")
}
Loading
Loading