From eee69fe46fc77a5657d1b530af1242b03fa07279 Mon Sep 17 00:00:00 2001 From: Benedikt Labrenz Date: Wed, 11 Jun 2025 15:44:47 +0200 Subject: [PATCH 01/18] add integration test --- deploy/helm/druid-operator/crds/crds.yaml | 53 +++-- rust/operator-binary/src/crd/mod.rs | 143 +++++++++---- rust/operator-binary/src/crd/security.rs | 31 +-- rust/operator-binary/src/druid_controller.rs | 193 +++++++++++------- .../kuttl/external-access/00-patch-ns.yaml.j2 | 9 + .../kuttl/external-access/10-assert.yaml.j2 | 10 + ...tor-aggregator-discovery-configmap.yaml.j2 | 9 + .../kuttl/external-access/20-assert.yaml | 12 ++ .../external-access/20-install-postgres.yaml | 10 + .../20_helm-bitnami-postgresql-values.yaml.j2 | 28 +++ .../kuttl/external-access/30-assert.yaml | 17 ++ .../external-access/30-install-zk.yaml.j2 | 36 ++++ .../kuttl/external-access/40-assert.yaml | 28 +++ .../external-access/40-install-hdfs.yaml.j2 | 42 ++++ .../45-create-listener-classes.yaml | 6 + .../external-access/45_listener-classes.yaml | 21 ++ .../kuttl/external-access/50-assert.yaml | 91 +++++++++ .../external-access/50-install-druid.yaml | 7 + .../kuttl/external-access/50_druid.yaml.j2 | 97 +++++++++ 19 files changed, 708 insertions(+), 135 deletions(-) create mode 100644 tests/templates/kuttl/external-access/00-patch-ns.yaml.j2 create mode 100644 tests/templates/kuttl/external-access/10-assert.yaml.j2 create mode 100644 tests/templates/kuttl/external-access/10-install-vector-aggregator-discovery-configmap.yaml.j2 create mode 100644 tests/templates/kuttl/external-access/20-assert.yaml create mode 100644 tests/templates/kuttl/external-access/20-install-postgres.yaml create mode 100644 tests/templates/kuttl/external-access/20_helm-bitnami-postgresql-values.yaml.j2 create mode 100644 tests/templates/kuttl/external-access/30-assert.yaml create mode 100644 tests/templates/kuttl/external-access/30-install-zk.yaml.j2 create mode 100644 tests/templates/kuttl/external-access/40-assert.yaml create mode 100644 tests/templates/kuttl/external-access/40-install-hdfs.yaml.j2 create mode 100644 tests/templates/kuttl/external-access/45-create-listener-classes.yaml create mode 100644 tests/templates/kuttl/external-access/45_listener-classes.yaml create mode 100644 tests/templates/kuttl/external-access/50-assert.yaml create mode 100644 tests/templates/kuttl/external-access/50-install-druid.yaml create mode 100644 tests/templates/kuttl/external-access/50_druid.yaml.j2 diff --git a/deploy/helm/druid-operator/crds/crds.yaml b/deploy/helm/druid-operator/crds/crds.yaml index 935e575e..0ecede04 100644 --- a/deploy/helm/druid-operator/crds/crds.yaml +++ b/deploy/helm/druid-operator/crds/crds.yaml @@ -70,6 +70,10 @@ spec: description: The time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Read more about graceful shutdown in the [graceful shutdown documentation](https://docs.stackable.tech/home/nightly/druid/usage-guide/operations/graceful-shutdown). nullable: true type: string + listenerClass: + description: This field controls which [ListenerClass](https://docs.stackable.tech/home/nightly/listener-operator/listenerclass.html) is used to expose the brokers. + nullable: true + type: string logging: default: containers: {} @@ -318,6 +322,10 @@ spec: description: The time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Read more about graceful shutdown in the [graceful shutdown documentation](https://docs.stackable.tech/home/nightly/druid/usage-guide/operations/graceful-shutdown). nullable: true type: string + listenerClass: + description: This field controls which [ListenerClass](https://docs.stackable.tech/home/nightly/listener-operator/listenerclass.html) is used to expose the brokers. + nullable: true + type: string logging: default: containers: {} @@ -879,19 +887,6 @@ spec: type: string type: object type: object - listenerClass: - default: cluster-internal - description: |- - This field controls which type of Service the Operator creates for this DruidCluster: - - * `cluster-internal`: Use a ClusterIP service * `external-unstable`: Use a NodePort service * `external-stable`: Use a LoadBalancer service - - This is a temporary solution with the goal to keep yaml manifests forward compatible. In the future, this setting will control which [ListenerClass](https://docs.stackable.tech/home/nightly/listener-operator/listenerclass.html) will be used to expose the service, and ListenerClass names will stay the same, allowing for a non-breaking change. - enum: - - cluster-internal - - external-unstable - - external-stable - type: string metadataStorageDatabase: description: Druid requires an SQL database to store metadata into. Specify connection information here. properties: @@ -1007,6 +1002,10 @@ spec: description: The time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Read more about graceful shutdown in the [graceful shutdown documentation](https://docs.stackable.tech/home/nightly/druid/usage-guide/operations/graceful-shutdown). nullable: true type: string + listenerClass: + description: This field controls which [ListenerClass](https://docs.stackable.tech/home/nightly/listener-operator/listenerclass.html) is used to expose the coordinators. + nullable: true + type: string logging: default: containers: {} @@ -1255,6 +1254,10 @@ spec: description: The time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Read more about graceful shutdown in the [graceful shutdown documentation](https://docs.stackable.tech/home/nightly/druid/usage-guide/operations/graceful-shutdown). nullable: true type: string + listenerClass: + description: This field controls which [ListenerClass](https://docs.stackable.tech/home/nightly/listener-operator/listenerclass.html) is used to expose the coordinators. + nullable: true + type: string logging: default: containers: {} @@ -1483,6 +1486,10 @@ spec: description: The time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Read more about graceful shutdown in the [graceful shutdown documentation](https://docs.stackable.tech/home/nightly/druid/usage-guide/operations/graceful-shutdown). nullable: true type: string + listenerClass: + description: This field controls which [ListenerClass](https://docs.stackable.tech/home/nightly/listener-operator/listenerclass.html) is used to expose the historicals. + nullable: true + type: string logging: default: containers: {} @@ -1762,6 +1769,10 @@ spec: description: The time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Read more about graceful shutdown in the [graceful shutdown documentation](https://docs.stackable.tech/home/nightly/druid/usage-guide/operations/graceful-shutdown). nullable: true type: string + listenerClass: + description: This field controls which [ListenerClass](https://docs.stackable.tech/home/nightly/listener-operator/listenerclass.html) is used to expose the historicals. + nullable: true + type: string logging: default: containers: {} @@ -2069,6 +2080,10 @@ spec: description: The time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Read more about graceful shutdown in the [graceful shutdown documentation](https://docs.stackable.tech/home/nightly/druid/usage-guide/operations/graceful-shutdown). nullable: true type: string + listenerClass: + description: This field controls which [ListenerClass](https://docs.stackable.tech/home/nightly/listener-operator/listenerclass.html) is used to expose the middle managers. + nullable: true + type: string logging: default: containers: {} @@ -2317,6 +2332,10 @@ spec: description: The time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Read more about graceful shutdown in the [graceful shutdown documentation](https://docs.stackable.tech/home/nightly/druid/usage-guide/operations/graceful-shutdown). nullable: true type: string + listenerClass: + description: This field controls which [ListenerClass](https://docs.stackable.tech/home/nightly/listener-operator/listenerclass.html) is used to expose the middle managers. + nullable: true + type: string logging: default: containers: {} @@ -2545,6 +2564,10 @@ spec: description: The time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Read more about graceful shutdown in the [graceful shutdown documentation](https://docs.stackable.tech/home/nightly/druid/usage-guide/operations/graceful-shutdown). nullable: true type: string + listenerClass: + description: This field controls which [ListenerClass](https://docs.stackable.tech/home/nightly/listener-operator/listenerclass.html) is used to expose the routers. + nullable: true + type: string logging: default: containers: {} @@ -2793,6 +2816,10 @@ spec: description: The time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Read more about graceful shutdown in the [graceful shutdown documentation](https://docs.stackable.tech/home/nightly/druid/usage-guide/operations/graceful-shutdown). nullable: true type: string + listenerClass: + description: This field controls which [ListenerClass](https://docs.stackable.tech/home/nightly/listener-operator/listenerclass.html) is used to expose the routers. + nullable: true + type: string logging: default: containers: {} diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 406990bf..89e76edd 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -5,6 +5,7 @@ use product_config::types::PropertyNameKind; use serde::{Deserialize, Serialize}; use snafu::{OptionExt, ResultExt, Snafu}; use stackable_operator::{ + builder::pod::volume::ListenerOperatorVolumeSourceBuilderError, client::Client, commons::{ affinity::StackableAffinity, @@ -27,7 +28,9 @@ use stackable_operator::{ framework::{create_vector_shutdown_file_command, remove_vector_shutdown_file_command}, spec::Logging, }, - role_utils::{CommonConfiguration, GenericRoleConfig, JavaCommonConfig, Role, RoleGroup}, + role_utils::{ + CommonConfiguration, GenericRoleConfig, JavaCommonConfig, Role, RoleGroup, RoleGroupRef, + }, schemars::{self, JsonSchema}, status::condition::{ClusterCondition, HasStatusCondition}, time::Duration, @@ -126,6 +129,7 @@ pub const MAX_DRUID_LOG_FILES_SIZE: MemoryQuantity = MemoryQuantity { }; // metrics pub const PROMETHEUS_PORT: &str = "druid.emitter.prometheus.port"; +pub const METRICS_PORT_NAME: &str = "metrics"; pub const METRICS_PORT: u16 = 9090; pub const COOKIE_PASSPHRASE_ENV: &str = "OIDC_COOKIE_PASSPHRASE"; @@ -149,6 +153,10 @@ const DEFAULT_MIDDLE_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(1 const DEFAULT_ROUTER_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(1); const DEFAULT_HISTORICAL_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(1); +// listener +pub const LISTENER_VOLUME_NAME: &str = "listener"; +pub const LISTENER_VOLUME_DIR: &str = "/stackable/listener"; + #[derive(Snafu, Debug, EnumDiscriminants)] #[strum_discriminants(derive(IntoStaticStr))] #[allow(clippy::enum_variant_names)] @@ -177,6 +185,21 @@ pub enum Error { #[snafu(display("fragment validation failure"))] FragmentValidationFailure { source: ValidationError }, + + #[snafu(display("failed to build Labels"))] + LabelBuild { + source: stackable_operator::kvp::LabelError, + }, + + #[snafu(display("failed to build listener volume"))] + BuildListenerVolume { + source: ListenerOperatorVolumeSourceBuilderError, + }, + + #[snafu(display("failed to apply group listener"))] + ApplyGroupListener { + source: stackable_operator::cluster_resources::Error, + }, } #[versioned(version(name = "v1alpha1"))] @@ -287,19 +310,6 @@ pub mod versioned { #[serde(default, skip_serializing_if = "Vec::is_empty")] #[schemars(schema_with = "raw_object_list_schema")] pub extra_volumes: Vec, - - /// This field controls which type of Service the Operator creates for this DruidCluster: - /// - /// * `cluster-internal`: Use a ClusterIP service - /// * `external-unstable`: Use a NodePort service - /// * `external-stable`: Use a LoadBalancer service - /// - /// This is a temporary solution with the goal to keep yaml manifests forward compatible. - /// In the future, this setting will control which - /// [ListenerClass](DOCS_BASE_URL_PLACEHOLDER/listener-operator/listenerclass.html) - /// will be used to expose the service, and ListenerClass names will stay the same, allowing for a non-breaking change. - #[serde(default)] - pub listener_class: CurrentlySupportedListenerClasses, } } @@ -440,6 +450,22 @@ impl v1alpha1::DruidCluster { .collect() } + /// The name of the group-listener provided for a specific role-group. + /// Webservers will use this group listener so that only one load balancer + /// is needed (per role group). + pub fn group_listener_name( + &self, + role: &DruidRole, + rolegroup: &RoleGroupRef, + ) -> Option { + match role { + DruidRole::Coordinator | DruidRole::Broker | DruidRole::Router => { + Some(rolegroup.object_name()) + } + DruidRole::Historical | DruidRole::MiddleManager => None, + } + } + /// The name of the role-level load-balanced Kubernetes `Service` pub fn role_service_name(&self, role: &DruidRole) -> Option { Some(format!("{}-{}", self.metadata.name.clone()?, role)) @@ -583,6 +609,47 @@ impl v1alpha1::DruidCluster { }) } + pub fn merged_listener_class( + &self, + role: &DruidRole, + rolegroup_name: &String, + ) -> Result, Error> { + let merged_config = self.merged_config()?; + match role { + &DruidRole::Broker => Ok(Some( + merged_config + .brokers + .get(rolegroup_name) + .context(CannotRetrieveRoleGroupSnafu { rolegroup_name })? + .config + .config + .listener_class + .to_owned(), + )), + &DruidRole::Coordinator => Ok(Some( + merged_config + .coordinators + .get(rolegroup_name) + .context(CannotRetrieveRoleGroupSnafu { rolegroup_name })? + .config + .config + .listener_class + .to_owned(), + )), + &DruidRole::Router => Ok(Some( + merged_config + .routers + .get(rolegroup_name) + .context(CannotRetrieveRoleGroupSnafu { rolegroup_name })? + .config + .config + .listener_class + .to_owned(), + )), + &DruidRole::Historical | DruidRole::MiddleManager => Ok(None), + } + } + /// Merges and validates the role groups of the given role with the given default configuration fn merged_role( role: &Role, @@ -754,29 +821,6 @@ pub enum Container { Vector, } -// TODO: Temporary solution until listener-operator is finished -#[derive(Clone, Debug, Default, Display, Deserialize, Eq, JsonSchema, PartialEq, Serialize)] -#[serde(rename_all = "PascalCase")] -pub enum CurrentlySupportedListenerClasses { - #[default] - #[serde(rename = "cluster-internal")] - ClusterInternal, - #[serde(rename = "external-unstable")] - ExternalUnstable, - #[serde(rename = "external-stable")] - ExternalStable, -} - -impl CurrentlySupportedListenerClasses { - pub fn k8s_service_type(&self) -> String { - match self { - CurrentlySupportedListenerClasses::ClusterInternal => "ClusterIP".to_string(), - CurrentlySupportedListenerClasses::ExternalUnstable => "NodePort".to_string(), - CurrentlySupportedListenerClasses::ExternalStable => "LoadBalancer".to_string(), - } - } -} - /// Common configuration for all role groups pub struct CommonRoleGroupConfig { pub resources: RoleResource, @@ -1173,6 +1217,10 @@ pub struct BrokerConfig { /// This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. #[fragment_attrs(serde(default))] pub requested_secret_lifetime: Option, + + /// This field controls which [ListenerClass](DOCS_BASE_URL_PLACEHOLDER/listener-operator/listenerclass.html) is used to expose the brokers. + #[serde(default)] + pub listener_class: String, } impl BrokerConfig { @@ -1187,6 +1235,7 @@ impl BrokerConfig { affinity: get_affinity(cluster_name, role, deep_storage), graceful_shutdown_timeout: Some(role.default_graceful_shutdown_timeout()), requested_secret_lifetime: Some(DEFAULT_BROKER_SECRET_LIFETIME), + listener_class: Some("cluster-internal".to_owned()), } } } @@ -1222,6 +1271,10 @@ pub struct CoordinatorConfig { /// This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. #[fragment_attrs(serde(default))] pub requested_secret_lifetime: Option, + + /// This field controls which [ListenerClass](DOCS_BASE_URL_PLACEHOLDER/listener-operator/listenerclass.html) is used to expose the coordinators. + #[serde(default)] + pub listener_class: String, } impl CoordinatorConfig { @@ -1236,6 +1289,7 @@ impl CoordinatorConfig { affinity: get_affinity(cluster_name, role, deep_storage), graceful_shutdown_timeout: Some(role.default_graceful_shutdown_timeout()), requested_secret_lifetime: Some(DEFAULT_COORDINATOR_SECRET_LIFETIME), + listener_class: Some("cluster-internal".to_owned()), } } } @@ -1271,6 +1325,10 @@ pub struct MiddleManagerConfig { /// This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. #[fragment_attrs(serde(default))] pub requested_secret_lifetime: Option, + + /// This field controls which [ListenerClass](DOCS_BASE_URL_PLACEHOLDER/listener-operator/listenerclass.html) is used to expose the middle managers. + #[serde(default)] + pub listener_class: String, } impl MiddleManagerConfig { @@ -1285,6 +1343,7 @@ impl MiddleManagerConfig { affinity: get_affinity(cluster_name, role, deep_storage), graceful_shutdown_timeout: Some(role.default_graceful_shutdown_timeout()), requested_secret_lifetime: Some(DEFAULT_MIDDLE_SECRET_LIFETIME), + listener_class: Some("cluster-internal".to_owned()), } } } @@ -1320,6 +1379,10 @@ pub struct RouterConfig { /// This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. #[fragment_attrs(serde(default))] pub requested_secret_lifetime: Option, + + /// This field controls which [ListenerClass](DOCS_BASE_URL_PLACEHOLDER/listener-operator/listenerclass.html) is used to expose the routers. + #[serde(default)] + pub listener_class: String, } impl RouterConfig { @@ -1334,6 +1397,7 @@ impl RouterConfig { affinity: get_affinity(cluster_name, role, deep_storage), graceful_shutdown_timeout: Some(role.default_graceful_shutdown_timeout()), requested_secret_lifetime: Some(DEFAULT_ROUTER_SECRET_LIFETIME), + listener_class: Some("cluster-internal".to_owned()), } } } @@ -1369,6 +1433,10 @@ pub struct HistoricalConfig { /// This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. #[fragment_attrs(serde(default))] pub requested_secret_lifetime: Option, + + /// This field controls which [ListenerClass](DOCS_BASE_URL_PLACEHOLDER/listener-operator/listenerclass.html) is used to expose the historicals. + #[serde(default)] + pub listener_class: String, } impl HistoricalConfig { @@ -1383,6 +1451,7 @@ impl HistoricalConfig { affinity: get_affinity(cluster_name, role, deep_storage), graceful_shutdown_timeout: Some(role.default_graceful_shutdown_timeout()), requested_secret_lifetime: Some(DEFAULT_HISTORICAL_SECRET_LIFETIME), + listener_class: Some("cluster-internal".to_owned()), } } } diff --git a/rust/operator-binary/src/crd/security.rs b/rust/operator-binary/src/crd/security.rs index aedc6a7f..6f751b3d 100644 --- a/rust/operator-binary/src/crd/security.rs +++ b/rust/operator-binary/src/crd/security.rs @@ -13,15 +13,16 @@ use stackable_operator::{ }, }, }, + crd::listener, k8s_openapi::{ - api::core::v1::{ContainerPort, Probe, ServicePort, TCPSocketAction}, + api::core::v1::{ContainerPort, Probe, TCPSocketAction}, apimachinery::pkg::util::intstr::IntOrString, }, time::Duration, }; use crate::crd::{ - DruidRole, METRICS_PORT, STACKABLE_TRUST_STORE, STACKABLE_TRUST_STORE_PASSWORD, + DruidRole, STACKABLE_TRUST_STORE, STACKABLE_TRUST_STORE_PASSWORD, authentication::{self, AuthenticationClassesResolved}, v1alpha1, }; @@ -59,7 +60,6 @@ const TLS_PORT: &str = "druid.tlsPort"; // Port names const PLAINTEXT_PORT_NAME: &str = "http"; const TLS_PORT_NAME: &str = "https"; -const METRICS_PORT_NAME: &str = "metrics"; // Client side (Druid) TLS const CLIENT_HTTPS_KEY_STORE_PATH: &str = "druid.client.https.keyStorePath"; const CLIENT_HTTPS_KEY_STORE_TYPE: &str = "druid.client.https.keyStoreType"; @@ -162,28 +162,29 @@ impl DruidTlsSecurity { .collect() } - pub fn service_ports(&self, role: &DruidRole) -> Vec { - self.exposed_ports(role) + pub fn listener_ports( + &self, + role: &DruidRole, + ) -> Option> { + let listener_ports = self + .exposed_ports(role) .into_iter() - .map(|(name, val)| ServicePort { - name: Some(name), + .map(|(name, val)| listener::v1alpha1::ListenerPort { + name, port: val.into(), protocol: Some("TCP".to_string()), - ..ServicePort::default() }) - .collect() + .collect(); + + Some(listener_ports) } fn exposed_ports(&self, role: &DruidRole) -> Vec<(String, u16)> { - let mut ports = vec![(METRICS_PORT_NAME.to_string(), METRICS_PORT)]; - if self.tls_enabled() { - ports.push((TLS_PORT_NAME.to_string(), role.get_https_port())); + vec![(TLS_PORT_NAME.to_string(), role.get_https_port())] } else { - ports.push((PLAINTEXT_PORT_NAME.to_string(), role.get_http_port())); + vec![(PLAINTEXT_PORT_NAME.to_string(), role.get_http_port())] } - - ports } /// Adds required tls volume mounts to image and product container builders diff --git a/rust/operator-binary/src/druid_controller.rs b/rust/operator-binary/src/druid_controller.rs index d00ac0e0..a8fd0362 100644 --- a/rust/operator-binary/src/druid_controller.rs +++ b/rust/operator-binary/src/druid_controller.rs @@ -20,8 +20,14 @@ use stackable_operator::{ configmap::ConfigMapBuilder, meta::ObjectMetaBuilder, pod::{ - PodBuilder, container::ContainerBuilder, resources::ResourceRequirementsBuilder, - security::PodSecurityContextBuilder, volume::VolumeBuilder, + PodBuilder, + container::ContainerBuilder, + resources::ResourceRequirementsBuilder, + security::PodSecurityContextBuilder, + volume::{ + ListenerOperatorVolumeSourceBuilder, ListenerOperatorVolumeSourceBuilderError, + ListenerReference, VolumeBuilder, + }, }, }, cluster_resources::{ClusterResourceApplyStrategy, ClusterResources}, @@ -29,12 +35,15 @@ use stackable_operator::{ opa::OpaApiVersion, product_image_selection::ResolvedProductImage, rbac::build_rbac_resources, tls_verification::TlsClientDetailsError, }, - crd::s3, + crd::{listener, s3}, k8s_openapi::{ DeepMerge, api::{ apps::v1::{StatefulSet, StatefulSetSpec}, - core::v1::{ConfigMap, EnvVar, Service, ServiceAccount, ServiceSpec}, + core::v1::{ + ConfigMap, EnvVar, PersistentVolumeClaim, Service, ServiceAccount, ServicePort, + ServiceSpec, + }, }, apimachinery::pkg::apis::meta::v1::LabelSelector, }, @@ -70,7 +79,8 @@ use crate::{ APP_NAME, AUTH_AUTHORIZER_OPA_URI, CREDENTIALS_SECRET_PROPERTY, CommonRoleGroupConfig, Container, DB_PASSWORD_ENV, DB_USERNAME_ENV, DRUID_CONFIG_DIRECTORY, DS_BUCKET, DeepStorageSpec, DruidClusterStatus, DruidRole, EXTENSIONS_LOADLIST, HDFS_CONFIG_DIRECTORY, - JVM_CONFIG, JVM_SECURITY_PROPERTIES_FILE, LOG_CONFIG_DIRECTORY, MAX_DRUID_LOG_FILES_SIZE, + JVM_CONFIG, JVM_SECURITY_PROPERTIES_FILE, LISTENER_VOLUME_DIR, LISTENER_VOLUME_NAME, + LOG_CONFIG_DIRECTORY, MAX_DRUID_LOG_FILES_SIZE, METRICS_PORT, METRICS_PORT_NAME, OPERATOR_NAME, RUNTIME_PROPS, RW_CONFIG_DIRECTORY, S3_ACCESS_KEY, S3_ENDPOINT_URL, S3_PATH_STYLE_ACCESS, S3_SECRET_KEY, STACKABLE_LOG_DIR, ZOOKEEPER_CONNECTION_STRING, authentication::AuthenticationClassesResolved, authorization::DruidAuthorization, @@ -359,6 +369,16 @@ pub enum Error { InvalidDruidCluster { source: error_boundary::InvalidObject, }, + + #[snafu(display("failed to build listener volume"))] + BuildListenerVolume { + source: ListenerOperatorVolumeSourceBuilderError, + }, + + #[snafu(display("failed to apply group listener"))] + ApplyGroupListener { + source: stackable_operator::cluster_resources::Error, + }, } type Result = std::result::Result; @@ -496,17 +516,6 @@ pub async fn reconcile_druid( role: role_name.to_string(), })?; - let role_service = build_role_service( - druid, - &resolved_product_image, - &druid_role, - &druid_tls_security, - )?; - cluster_resources - .add(client, role_service) - .await - .context(ApplyRoleServiceSnafu)?; - create_shared_internal_secret(druid, client, DRUID_CONTROLLER_NAME) .await .context(FailedInternalSecretCreationSnafu)?; @@ -522,12 +531,7 @@ pub async fn reconcile_druid( .common_config(&druid_role, rolegroup_name) .context(FailedToResolveConfigSnafu)?; - let rg_service = build_rolegroup_services( - druid, - &resolved_product_image, - &rolegroup, - &druid_tls_security, - )?; + let rg_service = build_rolegroup_service(druid, &resolved_product_image, &rolegroup)?; let rg_configmap = build_rolegroup_config_map( druid, &resolved_product_image, @@ -553,6 +557,28 @@ pub async fn reconcile_druid( &druid_auth_config, &rbac_sa, )?; + if let Some(listener_class) = druid + .merged_listener_class(&druid_role, &rolegroup.role_group) + .context(FailedToResolveConfigSnafu)? + { + if let Some(listener_group_name) = + druid.group_listener_name(&druid_role, &rolegroup) + { + let rg_group_listener = build_group_listener( + druid, + &resolved_product_image, + &druid_role, + &rolegroup, + listener_class, + listener_group_name, + &druid_tls_security, + )?; + cluster_resources + .add(client, rg_group_listener) + .await + .context(ApplyGroupListenerSnafu)?; + } + } cluster_resources .add(client, rg_service) .await @@ -624,49 +650,6 @@ pub async fn reconcile_druid( Ok(Action::await_change()) } -/// The server-role service is the primary endpoint that should be used by clients that do not perform internal load balancing, -/// including targets outside of the cluster. -pub fn build_role_service( - druid: &v1alpha1::DruidCluster, - resolved_product_image: &ResolvedProductImage, - role: &DruidRole, - druid_tls_security: &DruidTlsSecurity, -) -> Result { - let role_name = role.to_string(); - let role_svc_name = format!( - "{}-{}", - druid.metadata.name.as_ref().unwrap_or(&"druid".to_string()), - role_name - ); - Ok(Service { - metadata: ObjectMetaBuilder::new() - .name_and_namespace(druid) - .name(&role_svc_name) - .ownerreference_from_resource(druid, None, Some(true)) - .context(ObjectMissingMetadataForOwnerRefSnafu)? - .with_recommended_labels(build_recommended_labels( - druid, - DRUID_CONTROLLER_NAME, - &resolved_product_image.app_version_label, - &role_name, - "global", - )) - .context(MetadataBuildSnafu)? - .build(), - spec: Some(ServiceSpec { - type_: Some(druid.spec.cluster_config.listener_class.k8s_service_type()), - ports: Some(druid_tls_security.service_ports(role)), - selector: Some( - Labels::role_selector(druid, APP_NAME, &role_name) - .context(LabelBuildSnafu)? - .into(), - ), - ..ServiceSpec::default() - }), - status: None, - }) -} - #[allow(clippy::too_many_arguments)] /// The rolegroup [`ConfigMap`] configures the rolegroup based on the configuration given by the administrator fn build_rolegroup_config_map( @@ -862,18 +845,22 @@ fn build_rolegroup_config_map( /// The rolegroup [`Service`] is a headless service that allows direct access to the instances of a certain rolegroup /// /// This is mostly useful for internal communication between peers, or for clients that perform client-side load balancing. -fn build_rolegroup_services( +fn build_rolegroup_service( druid: &v1alpha1::DruidCluster, resolved_product_image: &ResolvedProductImage, rolegroup: &RoleGroupRef, - druid_tls_security: &DruidTlsSecurity, ) -> Result { - let role = DruidRole::from_str(&rolegroup.role).unwrap(); + let ports = vec![ServicePort { + name: Some(METRICS_PORT_NAME.into()), + port: METRICS_PORT.into(), + protocol: Some("TCP".to_string()), + ..Default::default() + }]; Ok(Service { metadata: ObjectMetaBuilder::new() .name_and_namespace(druid) - .name(rolegroup.object_name()) + .name(format!("{name}-metrics", name = rolegroup.object_name())) .ownerreference_from_resource(druid, None, Some(true)) .context(ObjectMissingMetadataForOwnerRefSnafu)? .with_recommended_labels(build_recommended_labels( @@ -890,7 +877,7 @@ fn build_rolegroup_services( // Internal communication does not need to be exposed type_: Some("ClusterIP".to_string()), cluster_ip: Some("None".to_string()), - ports: Some(druid_tls_security.service_ports(&role)), + ports: Some(ports), selector: Some( Labels::role_group_selector( druid, @@ -908,6 +895,39 @@ fn build_rolegroup_services( }) } +pub fn build_group_listener( + druid: &v1alpha1::DruidCluster, + resolved_product_image: &ResolvedProductImage, + role: &DruidRole, + rolegroup: &RoleGroupRef, + listener_class: String, + listener_group_name: String, + druid_tls_security: &DruidTlsSecurity, +) -> Result { + Ok(listener::v1alpha1::Listener { + metadata: ObjectMetaBuilder::new() + .name_and_namespace(druid) + .name(listener_group_name) + .ownerreference_from_resource(druid, None, Some(true)) + .context(ObjectMissingMetadataForOwnerRefSnafu)? + .with_recommended_labels(build_recommended_labels( + druid, + DRUID_CONTROLLER_NAME, + &resolved_product_image.app_version_label, + &rolegroup.role, + &rolegroup.role_group, + )) + .context(ObjectMissingMetadataForOwnerRefSnafu)? + .build(), + spec: listener::v1alpha1::ListenerSpec { + class_name: Some(listener_class), + ports: druid_tls_security.listener_ports(role), + ..listener::v1alpha1::ListenerSpec::default() + }, + status: None, + }) +} + #[allow(clippy::too_many_arguments)] /// The rolegroup [`StatefulSet`] runs the rolegroup, as configured by the administrator. /// @@ -1123,6 +1143,38 @@ fn build_rolegroup_statefulset( .context(AddVolumeMountSnafu)?; } + // Used for PVC templates that cannot be modified once they are deployed + let unversioned_recommended_labels = Labels::recommended(build_recommended_labels( + druid, + DRUID_CONTROLLER_NAME, + // A version value is required, and we do want to use the "recommended" format for the other desired labels + "none", + &rolegroup_ref.role, + &rolegroup_ref.role_group, + )) + .context(LabelBuildSnafu)?; + + let mut pvcs: Option> = None; + + if let Some(listener_group_name) = druid.group_listener_name(role, rolegroup_ref) { + // Listener endpoints for the Webserver role will use persistent volumes + // so that load balancers can hard-code the target addresses. This will + // be the case even when no class is set (and the value defaults to + // cluster-internal) as the address should still be consistent. + let listener_pvc = ListenerOperatorVolumeSourceBuilder::new( + &ListenerReference::ListenerName(listener_group_name), + &unversioned_recommended_labels, + ) + .context(BuildListenerVolumeSnafu)? + .build_pvc(LISTENER_VOLUME_NAME.to_string()) + .context(BuildListenerVolumeSnafu)?; + pvcs = Some(vec![listener_pvc]); + + cb_druid + .add_volume_mount(LISTENER_VOLUME_NAME, LISTENER_VOLUME_DIR) + .context(AddVolumeMountSnafu)?; + } + let metadata = ObjectMetaBuilder::new() .with_recommended_labels(build_recommended_labels( druid, @@ -1210,6 +1262,7 @@ fn build_rolegroup_statefulset( }, service_name: Some(rolegroup_ref.object_name()), template: pod_template, + volume_claim_templates: pvcs, ..StatefulSetSpec::default() }), status: None, diff --git a/tests/templates/kuttl/external-access/00-patch-ns.yaml.j2 b/tests/templates/kuttl/external-access/00-patch-ns.yaml.j2 new file mode 100644 index 00000000..67185acf --- /dev/null +++ b/tests/templates/kuttl/external-access/00-patch-ns.yaml.j2 @@ -0,0 +1,9 @@ +{% if test_scenario['values']['openshift'] == 'true' %} +# see https://github.com/stackabletech/issues/issues/566 +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: kubectl patch namespace $NAMESPACE -p '{"metadata":{"labels":{"pod-security.kubernetes.io/enforce":"privileged"}}}' + timeout: 120 +{% endif %} diff --git a/tests/templates/kuttl/external-access/10-assert.yaml.j2 b/tests/templates/kuttl/external-access/10-assert.yaml.j2 new file mode 100644 index 00000000..50b1d4c3 --- /dev/null +++ b/tests/templates/kuttl/external-access/10-assert.yaml.j2 @@ -0,0 +1,10 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +{% if lookup('env', 'VECTOR_AGGREGATOR') %} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: vector-aggregator-discovery +{% endif %} diff --git a/tests/templates/kuttl/external-access/10-install-vector-aggregator-discovery-configmap.yaml.j2 b/tests/templates/kuttl/external-access/10-install-vector-aggregator-discovery-configmap.yaml.j2 new file mode 100644 index 00000000..2d6a0df5 --- /dev/null +++ b/tests/templates/kuttl/external-access/10-install-vector-aggregator-discovery-configmap.yaml.j2 @@ -0,0 +1,9 @@ +{% if lookup('env', 'VECTOR_AGGREGATOR') %} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: vector-aggregator-discovery +data: + ADDRESS: {{ lookup('env', 'VECTOR_AGGREGATOR') }} +{% endif %} diff --git a/tests/templates/kuttl/external-access/20-assert.yaml b/tests/templates/kuttl/external-access/20-assert.yaml new file mode 100644 index 00000000..7702af69 --- /dev/null +++ b/tests/templates/kuttl/external-access/20-assert.yaml @@ -0,0 +1,12 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 480 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: druid-postgresql +status: + readyReplicas: 1 + replicas: 1 diff --git a/tests/templates/kuttl/external-access/20-install-postgres.yaml b/tests/templates/kuttl/external-access/20-install-postgres.yaml new file mode 100644 index 00000000..5a9a0a01 --- /dev/null +++ b/tests/templates/kuttl/external-access/20-install-postgres.yaml @@ -0,0 +1,10 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: >- + helm install druid-postgresql + --namespace $NAMESPACE + --version 12.5.6 + -f 20_helm-bitnami-postgresql-values.yaml + --repo https://charts.bitnami.com/bitnami postgresql diff --git a/tests/templates/kuttl/external-access/20_helm-bitnami-postgresql-values.yaml.j2 b/tests/templates/kuttl/external-access/20_helm-bitnami-postgresql-values.yaml.j2 new file mode 100644 index 00000000..6ff8b187 --- /dev/null +++ b/tests/templates/kuttl/external-access/20_helm-bitnami-postgresql-values.yaml.j2 @@ -0,0 +1,28 @@ +--- +volumePermissions: + enabled: false + securityContext: + runAsUser: auto + +primary: + extendedConfiguration: | + password_encryption=md5 + podSecurityContext: +{% if test_scenario['values']['openshift'] == 'true' %} + enabled: false +{% else %} + enabled: true +{% endif %} + containerSecurityContext: + enabled: false + resources: + requests: + memory: "128Mi" + cpu: "512m" + limits: + memory: "128Mi" + cpu: "1" +auth: + username: druid + password: druid + database: druid diff --git a/tests/templates/kuttl/external-access/30-assert.yaml b/tests/templates/kuttl/external-access/30-assert.yaml new file mode 100644 index 00000000..f21094ef --- /dev/null +++ b/tests/templates/kuttl/external-access/30-assert.yaml @@ -0,0 +1,17 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 600 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: druid-zk-server-default +status: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: hdfs-znode diff --git a/tests/templates/kuttl/external-access/30-install-zk.yaml.j2 b/tests/templates/kuttl/external-access/30-install-zk.yaml.j2 new file mode 100644 index 00000000..1b307f07 --- /dev/null +++ b/tests/templates/kuttl/external-access/30-install-zk.yaml.j2 @@ -0,0 +1,36 @@ +--- +apiVersion: zookeeper.stackable.tech/v1alpha1 +kind: ZookeeperCluster +metadata: + name: druid-zk +spec: + image: + productVersion: "{{ test_scenario['values']['zookeeper-latest'] }}" + pullPolicy: IfNotPresent +{% if lookup('env', 'VECTOR_AGGREGATOR') %} + clusterConfig: + vectorAggregatorConfigMapName: vector-aggregator-discovery +{% endif %} + servers: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 1 +--- +apiVersion: zookeeper.stackable.tech/v1alpha1 +kind: ZookeeperZnode +metadata: + name: druid-znode +spec: + clusterRef: + name: druid-zk +--- +apiVersion: zookeeper.stackable.tech/v1alpha1 +kind: ZookeeperZnode +metadata: + name: hdfs-znode +spec: + clusterRef: + name: druid-zk diff --git a/tests/templates/kuttl/external-access/40-assert.yaml b/tests/templates/kuttl/external-access/40-assert.yaml new file mode 100644 index 00000000..7138c1b0 --- /dev/null +++ b/tests/templates/kuttl/external-access/40-assert.yaml @@ -0,0 +1,28 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 600 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: druid-hdfs-namenode-default +status: + readyReplicas: 2 + replicas: 2 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: druid-hdfs-journalnode-default +status: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: druid-hdfs-datanode-default +status: + readyReplicas: 1 + replicas: 1 diff --git a/tests/templates/kuttl/external-access/40-install-hdfs.yaml.j2 b/tests/templates/kuttl/external-access/40-install-hdfs.yaml.j2 new file mode 100644 index 00000000..f823394b --- /dev/null +++ b/tests/templates/kuttl/external-access/40-install-hdfs.yaml.j2 @@ -0,0 +1,42 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +metadata: + name: druid-hdfs +timeout: 600 +--- +apiVersion: hdfs.stackable.tech/v1alpha1 +kind: HdfsCluster +metadata: + name: druid-hdfs +spec: + image: + productVersion: "{{ test_scenario['values']['hadoop'] }}" + pullPolicy: IfNotPresent + clusterConfig: + dfsReplication: 1 +{% if lookup('env', 'VECTOR_AGGREGATOR') %} + vectorAggregatorConfigMapName: vector-aggregator-discovery +{% endif %} + zookeeperConfigMapName: hdfs-znode + nameNodes: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 2 + dataNodes: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 1 + journalNodes: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 1 diff --git a/tests/templates/kuttl/external-access/45-create-listener-classes.yaml b/tests/templates/kuttl/external-access/45-create-listener-classes.yaml new file mode 100644 index 00000000..d6e0c781 --- /dev/null +++ b/tests/templates/kuttl/external-access/45-create-listener-classes.yaml @@ -0,0 +1,6 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: | + envsubst < 45_listener-classes.yaml | kubectl apply -n $NAMESPACE -f - diff --git a/tests/templates/kuttl/external-access/45_listener-classes.yaml b/tests/templates/kuttl/external-access/45_listener-classes.yaml new file mode 100644 index 00000000..4131526a --- /dev/null +++ b/tests/templates/kuttl/external-access/45_listener-classes.yaml @@ -0,0 +1,21 @@ +--- +apiVersion: listeners.stackable.tech/v1alpha1 +kind: ListenerClass +metadata: + name: test-cluster-internal-$NAMESPACE +spec: + serviceType: ClusterIP +--- +apiVersion: listeners.stackable.tech/v1alpha1 +kind: ListenerClass +metadata: + name: test-external-stable-$NAMESPACE +spec: + serviceType: NodePort +--- +apiVersion: listeners.stackable.tech/v1alpha1 +kind: ListenerClass +metadata: + name: test-external-unstable-$NAMESPACE +spec: + serviceType: NodePort diff --git a/tests/templates/kuttl/external-access/50-assert.yaml b/tests/templates/kuttl/external-access/50-assert.yaml new file mode 100644 index 00000000..e1b73482 --- /dev/null +++ b/tests/templates/kuttl/external-access/50-assert.yaml @@ -0,0 +1,91 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 600 +commands: + - script: kubectl -n $NAMESPACE wait --for=condition=available druidclusters.druid.stackable.tech/druid --timeout 301s +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: druid-broker-default +status: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: druid-coordinator-default +status: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: druid-historical-default +status: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: druid-middlemanager-default +status: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: druid-router-default +status: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: druid-broker +status: + expectedPods: 1 + currentHealthy: 1 + disruptionsAllowed: 1 +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: druid-historical +status: + expectedPods: 1 + currentHealthy: 1 + disruptionsAllowed: 1 +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: druid-broker +status: + expectedPods: 1 + currentHealthy: 1 + disruptionsAllowed: 1 +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: druid-middlemanager +status: + expectedPods: 1 + currentHealthy: 1 + disruptionsAllowed: 1 +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: druid-router +status: + expectedPods: 1 + currentHealthy: 1 + disruptionsAllowed: 1 diff --git a/tests/templates/kuttl/external-access/50-install-druid.yaml b/tests/templates/kuttl/external-access/50-install-druid.yaml new file mode 100644 index 00000000..32e99286 --- /dev/null +++ b/tests/templates/kuttl/external-access/50-install-druid.yaml @@ -0,0 +1,7 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: | + envsubst < 50_druid.yaml | kubectl apply -n $NAMESPACE -f - +--- diff --git a/tests/templates/kuttl/external-access/50_druid.yaml.j2 b/tests/templates/kuttl/external-access/50_druid.yaml.j2 new file mode 100644 index 00000000..d34498a0 --- /dev/null +++ b/tests/templates/kuttl/external-access/50_druid.yaml.j2 @@ -0,0 +1,97 @@ +--- +apiVersion: druid.stackable.tech/v1alpha1 +kind: DruidCluster +metadata: + name: druid +spec: + image: +{% if test_scenario['values']['druid'].find(",") > 0 %} + custom: "{{ test_scenario['values']['druid'].split(',')[1] }}" + productVersion: "{{ test_scenario['values']['druid'].split(',')[0] }}" +{% else %} + productVersion: "{{ test_scenario['values']['druid'] }}" +{% endif %} + pullPolicy: IfNotPresent + clusterConfig: + metadataStorageDatabase: + dbType: postgresql + connString: jdbc:postgresql://druid-postgresql/druid + host: druid-postgresql + port: 5432 + credentialsSecret: druid-credentials + deepStorage: + hdfs: + configMapName: druid-hdfs + directory: /druid +{% if lookup('env', 'VECTOR_AGGREGATOR') %} + vectorAggregatorConfigMapName: vector-aggregator-discovery +{% endif %} + zookeeperConfigMapName: druid-znode + brokers: + config: + gracefulShutdownTimeout: 1s # Let the test run faster + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + listenerClass: test-external-stable-$NAMESPACE + roleGroups: + default: + replicas: 1 + coordinators: + config: + gracefulShutdownTimeout: 1s # Let the test run faster + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + listenerClass: test-external-stable-$NAMESPACE + roleGroups: + default: + replicas: 1 + historicals: + config: + gracefulShutdownTimeout: 1s # Let the test run faster + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 1 + middleManagers: + config: + gracefulShutdownTimeout: 1s # Let the test run faster + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + roleGroups: + default: + replicas: 1 + routers: + config: + gracefulShutdownTimeout: 1s # Let the test run faster + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + listenerClass: test-external-stable-$NAMESPACE + roleGroups: + default: + replicas: 1 +--- +apiVersion: v1 +kind: Secret +metadata: + name: druid-credentials +type: Opaque +stringData: + username: druid + password: druid +--- +apiVersion: listeners.stackable.tech/v1alpha1 +kind: Listener +metadata: + creationTimestamp: "2025-06-11T11:58:17Z" + generation: 1 + labels: + app.kubernetes.io/component: historical + app.kubernetes.io/instance: druid + name: druid-historical-default +spec: + className: test-cluster-internal-kuttl-test-perfect-moray + ports: + - name: https + port: 8283 + protocol: TCP From 3fda7f0408148ec5fcc30273f09430318e0e28a2 Mon Sep 17 00:00:00 2001 From: Benedikt Labrenz Date: Wed, 11 Jun 2025 17:07:46 +0200 Subject: [PATCH 02/18] fix dns issue --- rust/operator-binary/src/druid_controller.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rust/operator-binary/src/druid_controller.rs b/rust/operator-binary/src/druid_controller.rs index a8fd0362..1481a39f 100644 --- a/rust/operator-binary/src/druid_controller.rs +++ b/rust/operator-binary/src/druid_controller.rs @@ -1260,7 +1260,10 @@ fn build_rolegroup_statefulset( ), ..LabelSelector::default() }, - service_name: Some(rolegroup_ref.object_name()), + service_name: Some(format!( + "{name}-metrics", + name = rolegroup_ref.object_name() + )), template: pod_template, volume_claim_templates: pvcs, ..StatefulSetSpec::default() From 2daad12263c41204d3148fe180fbf39d57289472 Mon Sep 17 00:00:00 2001 From: Benedikt Labrenz Date: Thu, 12 Jun 2025 10:23:29 +0200 Subject: [PATCH 03/18] update discovery cm --- rust/operator-binary/src/crd/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 89e76edd..dd85adf7 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -468,7 +468,7 @@ impl v1alpha1::DruidCluster { /// The name of the role-level load-balanced Kubernetes `Service` pub fn role_service_name(&self, role: &DruidRole) -> Option { - Some(format!("{}-{}", self.metadata.name.clone()?, role)) + Some(format!("{}-{}-metrics", self.metadata.name.clone()?, role)) } /// The fully-qualified domain name of the role-level load-balanced Kubernetes `Service` @@ -1668,12 +1668,12 @@ mod tests { assert_eq!( cluster.role_service_name(&DruidRole::Router), - Some("testcluster-router".to_string()) + Some("testcluster-router-metrics".to_string()) ); assert_eq!( cluster.role_service_fqdn(&DruidRole::Router, &dummy_cluster_info), - Some("testcluster-router.default.svc.cluster.local".to_string()) + Some("testcluster-router-metrics.default.svc.cluster.local".to_string()) ) } From 597d0caa4f35a0df29e6203cb51a7fb50d465b89 Mon Sep 17 00:00:00 2001 From: Benedikt Labrenz Date: Thu, 12 Jun 2025 13:52:30 +0200 Subject: [PATCH 04/18] fix rustdoc --- rust/operator-binary/src/druid_controller.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/operator-binary/src/druid_controller.rs b/rust/operator-binary/src/druid_controller.rs index 1481a39f..d98f05c8 100644 --- a/rust/operator-binary/src/druid_controller.rs +++ b/rust/operator-binary/src/druid_controller.rs @@ -931,7 +931,7 @@ pub fn build_group_listener( #[allow(clippy::too_many_arguments)] /// The rolegroup [`StatefulSet`] runs the rolegroup, as configured by the administrator. /// -/// The [`Pod`](`stackable_operator::k8s_openapi::api::core::v1::Pod`)s are accessible through the corresponding [`Service`] (from [`build_rolegroup_services`]). +/// The [`Pod`](`stackable_operator::k8s_openapi::api::core::v1::Pod`)s are accessible through the corresponding [`Service`] (from [`build_rolegroup_service`]). fn build_rolegroup_statefulset( druid: &v1alpha1::DruidCluster, resolved_product_image: &ResolvedProductImage, From b42c0757a7a69427d733fe0401bcc452d48ffcda Mon Sep 17 00:00:00 2001 From: Benedikt Labrenz Date: Thu, 12 Jun 2025 15:08:26 +0200 Subject: [PATCH 05/18] create headless service name in function --- rust/operator-binary/src/druid_controller.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/rust/operator-binary/src/druid_controller.rs b/rust/operator-binary/src/druid_controller.rs index d98f05c8..67e3bdac 100644 --- a/rust/operator-binary/src/druid_controller.rs +++ b/rust/operator-binary/src/druid_controller.rs @@ -842,6 +842,10 @@ fn build_rolegroup_config_map( }) } +pub fn rolegroup_service_name(rolegroup: &RoleGroupRef) -> String { + format!("{name}-metrics", name = rolegroup.object_name()) +} + /// The rolegroup [`Service`] is a headless service that allows direct access to the instances of a certain rolegroup /// /// This is mostly useful for internal communication between peers, or for clients that perform client-side load balancing. @@ -860,7 +864,7 @@ fn build_rolegroup_service( Ok(Service { metadata: ObjectMetaBuilder::new() .name_and_namespace(druid) - .name(format!("{name}-metrics", name = rolegroup.object_name())) + .name(rolegroup_service_name(rolegroup)) .ownerreference_from_resource(druid, None, Some(true)) .context(ObjectMissingMetadataForOwnerRefSnafu)? .with_recommended_labels(build_recommended_labels( @@ -1260,10 +1264,7 @@ fn build_rolegroup_statefulset( ), ..LabelSelector::default() }, - service_name: Some(format!( - "{name}-metrics", - name = rolegroup_ref.object_name() - )), + service_name: Some(rolegroup_service_name(rolegroup_ref)), template: pod_template, volume_claim_templates: pvcs, ..StatefulSetSpec::default() From 4fdb065cc75051d540beca1d6b3081ce6aa035d5 Mon Sep 17 00:00:00 2001 From: Benedikt Labrenz Date: Thu, 12 Jun 2025 15:40:42 +0200 Subject: [PATCH 06/18] update integration tests with new headless service name --- tests/templates/kuttl/commons/healthcheck.py | 4 +++- .../kuttl/commons/ingestioncheck-tls.py | 20 +++++++++---------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/tests/templates/kuttl/commons/healthcheck.py b/tests/templates/kuttl/commons/healthcheck.py index ebf9ca3a..0d17a040 100755 --- a/tests/templates/kuttl/commons/healthcheck.py +++ b/tests/templates/kuttl/commons/healthcheck.py @@ -24,7 +24,9 @@ } for role, port in druid_role_ports.items(): - url = f"https://{druid_cluster_name}-{role}-default:{port}/status/health" + url = ( + f"https://{druid_cluster_name}-{role}-default-metrics:{port}/status/health" + ) count = 1 # As this script is intended to be executed by Kuttl which is in charge of overall test timeouts it is ok diff --git a/tests/templates/kuttl/commons/ingestioncheck-tls.py b/tests/templates/kuttl/commons/ingestioncheck-tls.py index 0328d177..079c7761 100755 --- a/tests/templates/kuttl/commons/ingestioncheck-tls.py +++ b/tests/templates/kuttl/commons/ingestioncheck-tls.py @@ -94,7 +94,7 @@ def query_datasource(self, url, sql, expected, iterations): Query tasks ===========""") tasks = druid.get_tasks( - url=f"{protocol}://{druid_cluster_name}-coordinator-default-0.{druid_cluster_name}-coordinator-default.{namespace}.svc.cluster.local:{coordinator_port}/druid/indexer/v1/tasks", + url=f"{protocol}://{druid_cluster_name}-coordinator-default-0.{druid_cluster_name}-coordinator-default-metrics.{namespace}.svc.cluster.local:{coordinator_port}/druid/indexer/v1/tasks", ) task_count = len(json.loads(tasks)) print(f"existing tasks: {task_count}") @@ -103,7 +103,7 @@ def query_datasource(self, url, sql, expected, iterations): Start ingestion task ====================""") ingestion = druid.post_task( - url=f"{protocol}://{druid_cluster_name}-coordinator-default-0.{druid_cluster_name}-coordinator-default.{namespace}.svc.cluster.local:{coordinator_port}/druid/indexer/v1/task", + url=f"{protocol}://{druid_cluster_name}-coordinator-default-0.{druid_cluster_name}-coordinator-default-metrics.{namespace}.svc.cluster.local:{coordinator_port}/druid/indexer/v1/task", input="/tmp/druid-quickstartimport.json", ) task_id = json.loads(ingestion)["task"] @@ -113,11 +113,11 @@ def query_datasource(self, url, sql, expected, iterations): Re-query tasks ==============""") tasks = druid.get_tasks( - url=f"{protocol}://{druid_cluster_name}-coordinator-default-0.{druid_cluster_name}-coordinator-default.{namespace}.svc.cluster.local:{coordinator_port}/druid/indexer/v1/tasks", + url=f"{protocol}://{druid_cluster_name}-coordinator-default-0.{druid_cluster_name}-coordinator-default-metrics.{namespace}.svc.cluster.local:{coordinator_port}/druid/indexer/v1/tasks", ) new_task_count = len(json.loads(tasks)) print(f"new tasks: {new_task_count}") -print(f"assert {new_task_count} == {task_count+1}") +print(f"assert {new_task_count} == {task_count + 1}") assert new_task_count == task_count + 1 print(""" @@ -127,13 +127,13 @@ def query_datasource(self, url, sql, expected, iterations): while not job_finished: time.sleep(5) task = druid.get( - url=f"{protocol}://{druid_cluster_name}-coordinator-default-0.{druid_cluster_name}-coordinator-default.{namespace}.svc.cluster.local:{coordinator_port}/druid/indexer/v1/task/{url_encoded_taskid}/status", + url=f"{protocol}://{druid_cluster_name}-coordinator-default-0.{druid_cluster_name}-coordinator-default-metrics.{namespace}.svc.cluster.local:{coordinator_port}/druid/indexer/v1/task/{url_encoded_taskid}/status", ) task_status = json.loads(task)["status"]["statusCode"] print(f"Current task status: [{task_status}]") - assert ( - task_status == "RUNNING" or task_status == "SUCCESS" - ), f"Taskstatus not running or succeeeded: {task_status}" + assert task_status == "RUNNING" or task_status == "SUCCESS", ( + f"Taskstatus not running or succeeeded: {task_status}" + ) job_finished = task_status == "SUCCESS" print(""" @@ -143,7 +143,7 @@ def query_datasource(self, url, sql, expected, iterations): while not broker_ready: time.sleep(2) broker_ready_rc = druid.check_rc( - f"{protocol}://{druid_cluster_name}-broker-default-0.{druid_cluster_name}-broker-default.{namespace}.svc.cluster.local:{broker_port}/druid/broker/v1/readiness" + f"{protocol}://{druid_cluster_name}-broker-default-0.{druid_cluster_name}-broker-default-metrics.{namespace}.svc.cluster.local:{broker_port}/druid/broker/v1/readiness" ) broker_ready = broker_ready_rc == 200 print(f"Broker respondend with [{broker_ready_rc}] to readiness check") @@ -153,7 +153,7 @@ def query_datasource(self, url, sql, expected, iterations): ==============""") sample_data_size = 39244 result = druid.query_datasource( - url=f"{protocol}://{druid_cluster_name}-broker-default-0.{druid_cluster_name}-broker-default.{namespace}.svc.cluster.local:{broker_port}/druid/v2/sql", + url=f"{protocol}://{druid_cluster_name}-broker-default-0.{druid_cluster_name}-broker-default-metrics.{namespace}.svc.cluster.local:{broker_port}/druid/v2/sql", sql={"query": 'select count(*) as c from "wikipedia-2015-09-12"'}, expected=sample_data_size, iterations=12, From 726d592622ba74f1ba840110707f7e43fb5b88b5 Mon Sep 17 00:00:00 2001 From: Benedikt Labrenz Date: Mon, 16 Jun 2025 15:12:20 +0200 Subject: [PATCH 07/18] add external-access test --- tests/test-definition.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index 2d666992..47f97425 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -151,6 +151,13 @@ tests: - hadoop-latest - druid-latest - openshift + - name: external-access + dimensions: + - druid + - zookeeper-latest + - opa + - hadoop + - openshift suites: - name: nightly patch: From dccf8a217fda5ee68f50b44cc2e1edddd262e884 Mon Sep 17 00:00:00 2001 From: Benedikt Labrenz Date: Tue, 17 Jun 2025 11:54:06 +0200 Subject: [PATCH 08/18] add permissions on listeners for service account --- deploy/helm/druid-operator/templates/roles.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/deploy/helm/druid-operator/templates/roles.yaml b/deploy/helm/druid-operator/templates/roles.yaml index dd0924b9..d27f4821 100644 --- a/deploy/helm/druid-operator/templates/roles.yaml +++ b/deploy/helm/druid-operator/templates/roles.yaml @@ -84,6 +84,17 @@ rules: - customresourcedefinitions verbs: - get + - apiGroups: + - listeners.stackable.tech + resources: + - listeners + verbs: + - create + - delete + - get + - list + - patch + - watch - apiGroups: - events.k8s.io resources: From 6c9d18cd7a0af96247e87f58d478a1ded90892aa Mon Sep 17 00:00:00 2001 From: Benedikt Labrenz Date: Tue, 17 Jun 2025 16:21:22 +0200 Subject: [PATCH 09/18] fix authorizer test --- tests/templates/kuttl/authorizer/authcheck.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/templates/kuttl/authorizer/authcheck.py b/tests/templates/kuttl/authorizer/authcheck.py index ec453118..1f1801f7 100755 --- a/tests/templates/kuttl/authorizer/authcheck.py +++ b/tests/templates/kuttl/authorizer/authcheck.py @@ -2,7 +2,7 @@ import sys import logging -coordinator_host = "derby-druid-coordinator-default" +coordinator_host = "derby-druid-coordinator-default-metrics" coordinator_port = "8281" authenticator_name = "MyBasicMetadataAuthenticator" @@ -52,7 +52,7 @@ def create_user(user_name): } for role, port in druid_role_ports.items(): - url = f"https://{druid_cluster_name}-{role}-default:{port}/status" + url = f"https://{druid_cluster_name}-{role}-default-metrics:{port}/status" # make an authorized request -> return 401 expected print("Checking Unauthorized") res = requests.get(url, verify=False) From 0707aad65649d7fb28cce2334174ecb6372f7e8e Mon Sep 17 00:00:00 2001 From: Benedikt Labrenz Date: Thu, 26 Jun 2025 14:59:21 +0200 Subject: [PATCH 10/18] move listenerClass to roleConfig --- deploy/helm/druid-operator/crds/crds.yaml | 52 +--- .../druid/examples/getting_started/druid.yaml | 7 +- .../examples/getting_started/druid.yaml.j2 | 7 +- .../pages/usage-guide/listenerclass.adoc | 25 +- rust/operator-binary/src/crd/mod.rs | 232 ++++++++++-------- rust/operator-binary/src/druid_controller.rs | 161 +++++------- rust/operator-binary/src/listener.rs | 86 +++++++ rust/operator-binary/src/main.rs | 1 + .../templates/kuttl/commons/ingestioncheck.py | 20 +- .../external-access/45_listener-classes.yaml | 7 - .../kuttl/external-access/50_druid.yaml.j2 | 9 +- tests/templates/kuttl/ldap/authcheck.py | 2 +- tests/templates/kuttl/oidc/40_druid.yaml.j2 | 1 - tests/templates/kuttl/oidc/login.py | 2 +- tests/templates/kuttl/tls/check-tls.sh | 6 +- 15 files changed, 329 insertions(+), 289 deletions(-) create mode 100644 rust/operator-binary/src/listener.rs diff --git a/deploy/helm/druid-operator/crds/crds.yaml b/deploy/helm/druid-operator/crds/crds.yaml index 0ecede04..e86ee397 100644 --- a/deploy/helm/druid-operator/crds/crds.yaml +++ b/deploy/helm/druid-operator/crds/crds.yaml @@ -70,10 +70,6 @@ spec: description: The time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Read more about graceful shutdown in the [graceful shutdown documentation](https://docs.stackable.tech/home/nightly/druid/usage-guide/operations/graceful-shutdown). nullable: true type: string - listenerClass: - description: This field controls which [ListenerClass](https://docs.stackable.tech/home/nightly/listener-operator/listenerclass.html) is used to expose the brokers. - nullable: true - type: string logging: default: containers: {} @@ -249,11 +245,15 @@ spec: x-kubernetes-preserve-unknown-fields: true roleConfig: default: + listenerClass: cluster-internal podDisruptionBudget: enabled: true maxUnavailable: null description: This is a product-agnostic RoleConfig, which is sufficient for most of the products. properties: + listenerClass: + default: cluster-internal + type: string podDisruptionBudget: default: enabled: true @@ -322,10 +322,6 @@ spec: description: The time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Read more about graceful shutdown in the [graceful shutdown documentation](https://docs.stackable.tech/home/nightly/druid/usage-guide/operations/graceful-shutdown). nullable: true type: string - listenerClass: - description: This field controls which [ListenerClass](https://docs.stackable.tech/home/nightly/listener-operator/listenerclass.html) is used to expose the brokers. - nullable: true - type: string logging: default: containers: {} @@ -1002,10 +998,6 @@ spec: description: The time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Read more about graceful shutdown in the [graceful shutdown documentation](https://docs.stackable.tech/home/nightly/druid/usage-guide/operations/graceful-shutdown). nullable: true type: string - listenerClass: - description: This field controls which [ListenerClass](https://docs.stackable.tech/home/nightly/listener-operator/listenerclass.html) is used to expose the coordinators. - nullable: true - type: string logging: default: containers: {} @@ -1181,11 +1173,15 @@ spec: x-kubernetes-preserve-unknown-fields: true roleConfig: default: + listenerClass: cluster-internal podDisruptionBudget: enabled: true maxUnavailable: null description: This is a product-agnostic RoleConfig, which is sufficient for most of the products. properties: + listenerClass: + default: cluster-internal + type: string podDisruptionBudget: default: enabled: true @@ -1254,10 +1250,6 @@ spec: description: The time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Read more about graceful shutdown in the [graceful shutdown documentation](https://docs.stackable.tech/home/nightly/druid/usage-guide/operations/graceful-shutdown). nullable: true type: string - listenerClass: - description: This field controls which [ListenerClass](https://docs.stackable.tech/home/nightly/listener-operator/listenerclass.html) is used to expose the coordinators. - nullable: true - type: string logging: default: containers: {} @@ -1486,10 +1478,6 @@ spec: description: The time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Read more about graceful shutdown in the [graceful shutdown documentation](https://docs.stackable.tech/home/nightly/druid/usage-guide/operations/graceful-shutdown). nullable: true type: string - listenerClass: - description: This field controls which [ListenerClass](https://docs.stackable.tech/home/nightly/listener-operator/listenerclass.html) is used to expose the historicals. - nullable: true - type: string logging: default: containers: {} @@ -1769,10 +1757,6 @@ spec: description: The time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Read more about graceful shutdown in the [graceful shutdown documentation](https://docs.stackable.tech/home/nightly/druid/usage-guide/operations/graceful-shutdown). nullable: true type: string - listenerClass: - description: This field controls which [ListenerClass](https://docs.stackable.tech/home/nightly/listener-operator/listenerclass.html) is used to expose the historicals. - nullable: true - type: string logging: default: containers: {} @@ -2080,10 +2064,6 @@ spec: description: The time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Read more about graceful shutdown in the [graceful shutdown documentation](https://docs.stackable.tech/home/nightly/druid/usage-guide/operations/graceful-shutdown). nullable: true type: string - listenerClass: - description: This field controls which [ListenerClass](https://docs.stackable.tech/home/nightly/listener-operator/listenerclass.html) is used to expose the middle managers. - nullable: true - type: string logging: default: containers: {} @@ -2332,10 +2312,6 @@ spec: description: The time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Read more about graceful shutdown in the [graceful shutdown documentation](https://docs.stackable.tech/home/nightly/druid/usage-guide/operations/graceful-shutdown). nullable: true type: string - listenerClass: - description: This field controls which [ListenerClass](https://docs.stackable.tech/home/nightly/listener-operator/listenerclass.html) is used to expose the middle managers. - nullable: true - type: string logging: default: containers: {} @@ -2564,10 +2540,6 @@ spec: description: The time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Read more about graceful shutdown in the [graceful shutdown documentation](https://docs.stackable.tech/home/nightly/druid/usage-guide/operations/graceful-shutdown). nullable: true type: string - listenerClass: - description: This field controls which [ListenerClass](https://docs.stackable.tech/home/nightly/listener-operator/listenerclass.html) is used to expose the routers. - nullable: true - type: string logging: default: containers: {} @@ -2743,11 +2715,15 @@ spec: x-kubernetes-preserve-unknown-fields: true roleConfig: default: + listenerClass: cluster-internal podDisruptionBudget: enabled: true maxUnavailable: null description: This is a product-agnostic RoleConfig, which is sufficient for most of the products. properties: + listenerClass: + default: cluster-internal + type: string podDisruptionBudget: default: enabled: true @@ -2816,10 +2792,6 @@ spec: description: The time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Read more about graceful shutdown in the [graceful shutdown documentation](https://docs.stackable.tech/home/nightly/druid/usage-guide/operations/graceful-shutdown). nullable: true type: string - listenerClass: - description: This field controls which [ListenerClass](https://docs.stackable.tech/home/nightly/listener-operator/listenerclass.html) is used to expose the routers. - nullable: true - type: string logging: default: containers: {} diff --git a/docs/modules/druid/examples/getting_started/druid.yaml b/docs/modules/druid/examples/getting_started/druid.yaml index 9b10d0ba..c3255371 100644 --- a/docs/modules/druid/examples/getting_started/druid.yaml +++ b/docs/modules/druid/examples/getting_started/druid.yaml @@ -7,7 +7,6 @@ spec: image: productVersion: 33.0.0 clusterConfig: - listenerClass: external-stable # This exposes your Stacklet outside of Kubernetes. Remove this configuration if this is not desired zookeeperConfigMapName: simple-druid-znode deepStorage: hdfs: @@ -23,10 +22,14 @@ spec: roleGroups: default: replicas: 1 + roleConfig: + listenerClass: external-stable # This exposes your Stacklet outside of Kubernetes. Remove this configuration if this is not desired coordinators: roleGroups: default: replicas: 1 + roleConfig: + listenerClass: external-stable # This exposes your Stacklet outside of Kubernetes. Remove this configuration if this is not desired historicals: roleGroups: default: @@ -39,6 +42,8 @@ spec: roleGroups: default: replicas: 1 + roleConfig: + listenerClass: external-stable # This exposes your Stacklet outside of Kubernetes. Remove this configuration if this is not desired --- apiVersion: v1 kind: Secret diff --git a/docs/modules/druid/examples/getting_started/druid.yaml.j2 b/docs/modules/druid/examples/getting_started/druid.yaml.j2 index 9b10d0ba..c3255371 100644 --- a/docs/modules/druid/examples/getting_started/druid.yaml.j2 +++ b/docs/modules/druid/examples/getting_started/druid.yaml.j2 @@ -7,7 +7,6 @@ spec: image: productVersion: 33.0.0 clusterConfig: - listenerClass: external-stable # This exposes your Stacklet outside of Kubernetes. Remove this configuration if this is not desired zookeeperConfigMapName: simple-druid-znode deepStorage: hdfs: @@ -23,10 +22,14 @@ spec: roleGroups: default: replicas: 1 + roleConfig: + listenerClass: external-stable # This exposes your Stacklet outside of Kubernetes. Remove this configuration if this is not desired coordinators: roleGroups: default: replicas: 1 + roleConfig: + listenerClass: external-stable # This exposes your Stacklet outside of Kubernetes. Remove this configuration if this is not desired historicals: roleGroups: default: @@ -39,6 +42,8 @@ spec: roleGroups: default: replicas: 1 + roleConfig: + listenerClass: external-stable # This exposes your Stacklet outside of Kubernetes. Remove this configuration if this is not desired --- apiVersion: v1 kind: Secret diff --git a/docs/modules/druid/pages/usage-guide/listenerclass.adoc b/docs/modules/druid/pages/usage-guide/listenerclass.adoc index c2ef0d61..2ee789f7 100644 --- a/docs/modules/druid/pages/usage-guide/listenerclass.adoc +++ b/docs/modules/druid/pages/usage-guide/listenerclass.adoc @@ -2,20 +2,23 @@ :description: Configure Apache Druid service exposure using ListenerClass to control service types: cluster-internal, external-unstable, or external-stable. Apache Druid offers a web UI and an API, both are exposed by the `router` role. -Other roles also expose API endpoints such as the `broker` and `coordinator`. -The Operator deploys services called `-` (where `` is the name of the DruidCluster and `` is the role for the Service) through which Druid can be reached. +The `broker` and `coordinator` roles expose additional API endpoints. -These services can have three different types: `cluster-internal`, `external-unstable` and `external-stable`. -Read more about the types in the xref:concepts:service-exposition.adoc[service exposition] documentation at platform level. - -This is how the listener class is configured: +The operator deploys a xref:listener-operator:listener.adoc[Listener] for the the `router`, `broker` and `coordinator` roles. +The listener defaults to only being accessible from within the Kubernetes cluster, but this can be changed by configuring a listener class +for specific roles: [source,yaml] ---- spec: - clusterConfig: - listenerClass: cluster-internal # <1> + brokers: + roleConfig: + listenerClass: cluster-internal # <1> + coordinators: + roleConfig: + listenerClass: cluster-internal # <1> + routers: + roleConfig: + listenerClass: external-unstable # <1> ---- -<1> The default `cluster-internal` setting. - -This setting affects all role Services at once. +<1> Specify one of `external-stable`, `external-unstable`, `cluster-internal` or a custom ListenerClass (the default setting is `cluster-internal`). diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index dd85adf7..e2f699fb 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -28,9 +28,7 @@ use stackable_operator::{ framework::{create_vector_shutdown_file_command, remove_vector_shutdown_file_command}, spec::Logging, }, - role_utils::{ - CommonConfiguration, GenericRoleConfig, JavaCommonConfig, Role, RoleGroup, RoleGroupRef, - }, + role_utils::{CommonConfiguration, GenericRoleConfig, JavaCommonConfig, Role, RoleGroup}, schemars::{self, JsonSchema}, status::condition::{ClusterCondition, HasStatusCondition}, time::Duration, @@ -47,6 +45,7 @@ use crate::crd::{ authorization::DruidAuthorization, resource::RoleResource, tls::{DruidTls, default_druid_tls}, + v1alpha1::DruidRoleConfig, }; pub mod affinity; @@ -153,10 +152,6 @@ const DEFAULT_MIDDLE_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(1 const DEFAULT_ROUTER_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(1); const DEFAULT_HISTORICAL_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(1); -// listener -pub const LISTENER_VOLUME_NAME: &str = "listener"; -pub const LISTENER_VOLUME_DIR: &str = "/stackable/listener"; - #[derive(Snafu, Debug, EnumDiscriminants)] #[strum_discriminants(derive(IntoStaticStr))] #[allow(clippy::enum_variant_names)] @@ -204,6 +199,8 @@ pub enum Error { #[versioned(version(name = "v1alpha1"))] pub mod versioned { + use crate::crd::v1alpha1::{DruidBrokerConfig, DruidRouterConfig}; + /// A Druid cluster stacklet. This resource is managed by the Stackable operator for Apache Druid. /// Find more information on how to use it and the resources that the operator generates in the /// [operator documentation](DOCS_BASE_URL_PLACEHOLDER/druid/). @@ -230,10 +227,10 @@ pub mod versioned { pub image: ProductImage, // no doc - docs provided by the struct. - pub brokers: Role, + pub brokers: Role, // no doc - docs provided by the struct. - pub coordinators: Role, + pub coordinators: Role, // no doc - docs provided by the struct. pub historicals: Role, @@ -242,7 +239,7 @@ pub mod versioned { pub middle_managers: Role, // no doc - docs provided by the struct. - pub routers: Role, + pub routers: Role, // no doc - docs provided by the struct. #[serde(default)] @@ -311,6 +308,15 @@ pub mod versioned { #[schemars(schema_with = "raw_object_list_schema")] pub extra_volumes: Vec, } + #[derive(Clone, Debug, Deserialize, JsonSchema, PartialEq, Serialize)] + #[serde(rename_all = "camelCase")] + pub struct DruidRoleConfig { + #[serde(flatten)] + pub common: GenericRoleConfig, + + #[serde(default = "druid_default_listener_class")] + pub listener_class: String, + } } // Required to retrieve the conditions from the cluster status @@ -424,15 +430,25 @@ impl v1alpha1::DruidCluster { vec![ ( DruidRole::Broker.to_string(), - (config_files.clone(), self.spec.brokers.clone().erase()), + ( + config_files.clone(), + extract_role_from_role_config::(self.spec.brokers.clone()) + .erase(), + ), ), ( - DruidRole::Historical.to_string(), - (config_files.clone(), self.spec.historicals.clone().erase()), + DruidRole::Coordinator.to_string(), + ( + config_files.clone(), + extract_role_from_role_config::( + self.spec.coordinators.clone(), + ) + .erase(), + ), ), ( - DruidRole::Router.to_string(), - (config_files.clone(), self.spec.routers.clone().erase()), + DruidRole::Historical.to_string(), + (config_files.clone(), self.spec.historicals.clone().erase()), ), ( DruidRole::MiddleManager.to_string(), @@ -442,30 +458,18 @@ impl v1alpha1::DruidCluster { ), ), ( - DruidRole::Coordinator.to_string(), - (config_files, self.spec.coordinators.clone().erase()), + DruidRole::Router.to_string(), + ( + config_files, + extract_role_from_role_config::(self.spec.routers.clone()) + .erase(), + ), ), ] .into_iter() .collect() } - /// The name of the group-listener provided for a specific role-group. - /// Webservers will use this group listener so that only one load balancer - /// is needed (per role group). - pub fn group_listener_name( - &self, - role: &DruidRole, - rolegroup: &RoleGroupRef, - ) -> Option { - match role { - DruidRole::Coordinator | DruidRole::Broker | DruidRole::Router => { - Some(rolegroup.object_name()) - } - DruidRole::Historical | DruidRole::MiddleManager => None, - } - } - /// The name of the role-level load-balanced Kubernetes `Service` pub fn role_service_name(&self, role: &DruidRole) -> Option { Some(format!("{}-{}-metrics", self.metadata.name.clone()?, role)) @@ -575,11 +579,11 @@ impl v1alpha1::DruidCluster { Ok(MergedConfig { brokers: v1alpha1::DruidCluster::merged_role( - &self.spec.brokers, + &extract_role_from_role_config::(self.spec.brokers.clone()), &BrokerConfig::default_config(&self.name_any(), &DruidRole::Broker, deep_storage), )?, coordinators: v1alpha1::DruidCluster::merged_role( - &self.spec.coordinators, + &extract_role_from_role_config::(self.spec.coordinators.clone()), &CoordinatorConfig::default_config( &self.name_any(), &DruidRole::Coordinator, @@ -603,53 +607,12 @@ impl v1alpha1::DruidCluster { ), )?, routers: v1alpha1::DruidCluster::merged_role( - &self.spec.routers, + &extract_role_from_role_config::(self.spec.routers.clone()), &RouterConfig::default_config(&self.name_any(), &DruidRole::Router, deep_storage), )?, }) } - pub fn merged_listener_class( - &self, - role: &DruidRole, - rolegroup_name: &String, - ) -> Result, Error> { - let merged_config = self.merged_config()?; - match role { - &DruidRole::Broker => Ok(Some( - merged_config - .brokers - .get(rolegroup_name) - .context(CannotRetrieveRoleGroupSnafu { rolegroup_name })? - .config - .config - .listener_class - .to_owned(), - )), - &DruidRole::Coordinator => Ok(Some( - merged_config - .coordinators - .get(rolegroup_name) - .context(CannotRetrieveRoleGroupSnafu { rolegroup_name })? - .config - .config - .listener_class - .to_owned(), - )), - &DruidRole::Router => Ok(Some( - merged_config - .routers - .get(rolegroup_name) - .context(CannotRetrieveRoleGroupSnafu { rolegroup_name })? - .config - .config - .listener_class - .to_owned(), - )), - &DruidRole::Historical | DruidRole::MiddleManager => Ok(None), - } - } - /// Merges and validates the role groups of the given role with the given default configuration fn merged_role( role: &Role, @@ -704,13 +667,13 @@ impl v1alpha1::DruidCluster { }) } - pub fn role_config(&self, role: &DruidRole) -> &GenericRoleConfig { + pub fn generic_role_config(&self, role: &DruidRole) -> &GenericRoleConfig { match role { - DruidRole::Broker => &self.spec.brokers.role_config, - DruidRole::Coordinator => &self.spec.coordinators.role_config, + DruidRole::Broker => &self.spec.brokers.role_config.common, + DruidRole::Coordinator => &self.spec.coordinators.role_config.common, DruidRole::Historical => &self.spec.historicals.role_config, DruidRole::MiddleManager => &self.spec.middle_managers.role_config, - DruidRole::Router => &self.spec.routers.role_config, + DruidRole::Router => &self.spec.routers.role_config.common, } } @@ -742,11 +705,18 @@ impl v1alpha1::DruidCluster { JavaCommonConfig, > { match druid_role { - DruidRole::Coordinator => self.spec.coordinators.clone().erase(), - DruidRole::Broker => self.spec.brokers.clone().erase(), + DruidRole::Broker => { + extract_role_from_role_config::(self.spec.brokers.clone()).erase() + } + DruidRole::Coordinator => { + extract_role_from_role_config::(self.spec.coordinators.clone()) + .erase() + } DruidRole::Historical => self.spec.historicals.clone().erase(), DruidRole::MiddleManager => self.spec.middle_managers.clone().erase(), - DruidRole::Router => self.spec.routers.clone().erase(), + DruidRole::Router => { + extract_role_from_role_config::(self.spec.routers.clone()).erase() + } } } @@ -953,6 +923,19 @@ impl MergedConfig { } } +impl Default for v1alpha1::DruidRoleConfig { + fn default() -> Self { + v1alpha1::DruidRoleConfig { + listener_class: druid_default_listener_class(), + common: Default::default(), + } + } +} + +fn druid_default_listener_class() -> String { + "cluster-internal".to_string() +} + #[derive( Clone, Debug, @@ -1087,6 +1070,22 @@ impl DruidRole { create_vector_shutdown_file_command(STACKABLE_LOG_DIR), } } + + pub fn listener_class_name(&self, druid: &v1alpha1::DruidCluster) -> Option { + match self { + Self::Broker => Some(druid.spec.brokers.role_config.listener_class.to_owned()), + Self::Coordinator => Some( + druid + .spec + .coordinators + .role_config + .listener_class + .to_owned(), + ), + Self::Router => Some(druid.spec.routers.role_config.listener_class.to_owned()), + Self::Historical | Self::MiddleManager => None, + } + } } #[derive(Clone, Debug, Default, Deserialize, JsonSchema, Serialize)] @@ -1217,10 +1216,6 @@ pub struct BrokerConfig { /// This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. #[fragment_attrs(serde(default))] pub requested_secret_lifetime: Option, - - /// This field controls which [ListenerClass](DOCS_BASE_URL_PLACEHOLDER/listener-operator/listenerclass.html) is used to expose the brokers. - #[serde(default)] - pub listener_class: String, } impl BrokerConfig { @@ -1235,7 +1230,6 @@ impl BrokerConfig { affinity: get_affinity(cluster_name, role, deep_storage), graceful_shutdown_timeout: Some(role.default_graceful_shutdown_timeout()), requested_secret_lifetime: Some(DEFAULT_BROKER_SECRET_LIFETIME), - listener_class: Some("cluster-internal".to_owned()), } } } @@ -1271,10 +1265,6 @@ pub struct CoordinatorConfig { /// This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. #[fragment_attrs(serde(default))] pub requested_secret_lifetime: Option, - - /// This field controls which [ListenerClass](DOCS_BASE_URL_PLACEHOLDER/listener-operator/listenerclass.html) is used to expose the coordinators. - #[serde(default)] - pub listener_class: String, } impl CoordinatorConfig { @@ -1289,7 +1279,6 @@ impl CoordinatorConfig { affinity: get_affinity(cluster_name, role, deep_storage), graceful_shutdown_timeout: Some(role.default_graceful_shutdown_timeout()), requested_secret_lifetime: Some(DEFAULT_COORDINATOR_SECRET_LIFETIME), - listener_class: Some("cluster-internal".to_owned()), } } } @@ -1325,10 +1314,6 @@ pub struct MiddleManagerConfig { /// This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. #[fragment_attrs(serde(default))] pub requested_secret_lifetime: Option, - - /// This field controls which [ListenerClass](DOCS_BASE_URL_PLACEHOLDER/listener-operator/listenerclass.html) is used to expose the middle managers. - #[serde(default)] - pub listener_class: String, } impl MiddleManagerConfig { @@ -1343,7 +1328,6 @@ impl MiddleManagerConfig { affinity: get_affinity(cluster_name, role, deep_storage), graceful_shutdown_timeout: Some(role.default_graceful_shutdown_timeout()), requested_secret_lifetime: Some(DEFAULT_MIDDLE_SECRET_LIFETIME), - listener_class: Some("cluster-internal".to_owned()), } } } @@ -1379,10 +1363,6 @@ pub struct RouterConfig { /// This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. #[fragment_attrs(serde(default))] pub requested_secret_lifetime: Option, - - /// This field controls which [ListenerClass](DOCS_BASE_URL_PLACEHOLDER/listener-operator/listenerclass.html) is used to expose the routers. - #[serde(default)] - pub listener_class: String, } impl RouterConfig { @@ -1397,7 +1377,6 @@ impl RouterConfig { affinity: get_affinity(cluster_name, role, deep_storage), graceful_shutdown_timeout: Some(role.default_graceful_shutdown_timeout()), requested_secret_lifetime: Some(DEFAULT_ROUTER_SECRET_LIFETIME), - listener_class: Some("cluster-internal".to_owned()), } } } @@ -1433,10 +1412,6 @@ pub struct HistoricalConfig { /// This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. #[fragment_attrs(serde(default))] pub requested_secret_lifetime: Option, - - /// This field controls which [ListenerClass](DOCS_BASE_URL_PLACEHOLDER/listener-operator/listenerclass.html) is used to expose the historicals. - #[serde(default)] - pub listener_class: String, } impl HistoricalConfig { @@ -1451,7 +1426,6 @@ impl HistoricalConfig { affinity: get_affinity(cluster_name, role, deep_storage), graceful_shutdown_timeout: Some(role.default_graceful_shutdown_timeout()), requested_secret_lifetime: Some(DEFAULT_HISTORICAL_SECRET_LIFETIME), - listener_class: Some("cluster-internal".to_owned()), } } } @@ -1649,6 +1623,46 @@ pub fn build_recommended_labels<'a, T>( } } +fn extract_role_from_role_config( + fragment: Role, +) -> Role +where + T: FromFragment, + T::Fragment: Clone + Merge, +{ + Role { + config: CommonConfiguration { + config: fragment.config.config, + config_overrides: fragment.config.config_overrides, + env_overrides: fragment.config.env_overrides, + cli_overrides: fragment.config.cli_overrides, + pod_overrides: fragment.config.pod_overrides, + product_specific_common_config: fragment.config.product_specific_common_config, + }, + role_config: fragment.role_config.common, + role_groups: fragment + .role_groups + .into_iter() + .map(|(k, v)| { + ( + k, + RoleGroup { + config: CommonConfiguration { + config: v.config.config, + config_overrides: v.config.config_overrides, + env_overrides: v.config.env_overrides, + cli_overrides: v.config.cli_overrides, + pod_overrides: v.config.pod_overrides, + product_specific_common_config: v.config.product_specific_common_config, + }, + replicas: v.replicas, + }, + ) + }) + .collect(), + } +} + #[cfg(test)] mod tests { use stackable_operator::commons::networking::DomainName; diff --git a/rust/operator-binary/src/druid_controller.rs b/rust/operator-binary/src/druid_controller.rs index 67e3bdac..77478b31 100644 --- a/rust/operator-binary/src/druid_controller.rs +++ b/rust/operator-binary/src/druid_controller.rs @@ -20,14 +20,8 @@ use stackable_operator::{ configmap::ConfigMapBuilder, meta::ObjectMetaBuilder, pod::{ - PodBuilder, - container::ContainerBuilder, - resources::ResourceRequirementsBuilder, - security::PodSecurityContextBuilder, - volume::{ - ListenerOperatorVolumeSourceBuilder, ListenerOperatorVolumeSourceBuilderError, - ListenerReference, VolumeBuilder, - }, + PodBuilder, container::ContainerBuilder, resources::ResourceRequirementsBuilder, + security::PodSecurityContextBuilder, volume::VolumeBuilder, }, }, cluster_resources::{ClusterResourceApplyStrategy, ClusterResources}, @@ -35,7 +29,7 @@ use stackable_operator::{ opa::OpaApiVersion, product_image_selection::ResolvedProductImage, rbac::build_rbac_resources, tls_verification::TlsClientDetailsError, }, - crd::{listener, s3}, + crd::s3, k8s_openapi::{ DeepMerge, api::{ @@ -79,16 +73,20 @@ use crate::{ APP_NAME, AUTH_AUTHORIZER_OPA_URI, CREDENTIALS_SECRET_PROPERTY, CommonRoleGroupConfig, Container, DB_PASSWORD_ENV, DB_USERNAME_ENV, DRUID_CONFIG_DIRECTORY, DS_BUCKET, DeepStorageSpec, DruidClusterStatus, DruidRole, EXTENSIONS_LOADLIST, HDFS_CONFIG_DIRECTORY, - JVM_CONFIG, JVM_SECURITY_PROPERTIES_FILE, LISTENER_VOLUME_DIR, LISTENER_VOLUME_NAME, - LOG_CONFIG_DIRECTORY, MAX_DRUID_LOG_FILES_SIZE, METRICS_PORT, METRICS_PORT_NAME, - OPERATOR_NAME, RUNTIME_PROPS, RW_CONFIG_DIRECTORY, S3_ACCESS_KEY, S3_ENDPOINT_URL, - S3_PATH_STYLE_ACCESS, S3_SECRET_KEY, STACKABLE_LOG_DIR, ZOOKEEPER_CONNECTION_STRING, - authentication::AuthenticationClassesResolved, authorization::DruidAuthorization, - build_recommended_labels, build_string_list, security::DruidTlsSecurity, v1alpha1, + JVM_CONFIG, JVM_SECURITY_PROPERTIES_FILE, LOG_CONFIG_DIRECTORY, MAX_DRUID_LOG_FILES_SIZE, + METRICS_PORT, METRICS_PORT_NAME, OPERATOR_NAME, RUNTIME_PROPS, RW_CONFIG_DIRECTORY, + S3_ACCESS_KEY, S3_ENDPOINT_URL, S3_PATH_STYLE_ACCESS, S3_SECRET_KEY, STACKABLE_LOG_DIR, + ZOOKEEPER_CONNECTION_STRING, authentication::AuthenticationClassesResolved, + authorization::DruidAuthorization, build_recommended_labels, build_string_list, + security::DruidTlsSecurity, v1alpha1, }, discovery::{self, build_discovery_configmaps}, extensions::get_extension_list, internal_secret::{create_shared_internal_secret, env_var_from_secret}, + listener::{ + LISTENER_VOLUME_DIR, LISTENER_VOLUME_NAME, build_group_listener, build_group_listener_pvc, + group_listener_name, + }, operations::{graceful_shutdown::add_graceful_shutdown_config, pdb::add_pdbs}, product_logging::extend_role_group_config_map, }; @@ -370,15 +368,13 @@ pub enum Error { source: error_boundary::InvalidObject, }, - #[snafu(display("failed to build listener volume"))] - BuildListenerVolume { - source: ListenerOperatorVolumeSourceBuilderError, - }, - #[snafu(display("failed to apply group listener"))] ApplyGroupListener { source: stackable_operator::cluster_resources::Error, }, + + #[snafu(display("failed to configure listener"))] + ListenerConfiguration { source: crate::listener::Error }, } type Result = std::result::Result; @@ -557,28 +553,7 @@ pub async fn reconcile_druid( &druid_auth_config, &rbac_sa, )?; - if let Some(listener_class) = druid - .merged_listener_class(&druid_role, &rolegroup.role_group) - .context(FailedToResolveConfigSnafu)? - { - if let Some(listener_group_name) = - druid.group_listener_name(&druid_role, &rolegroup) - { - let rg_group_listener = build_group_listener( - druid, - &resolved_product_image, - &druid_role, - &rolegroup, - listener_class, - listener_group_name, - &druid_tls_security, - )?; - cluster_resources - .add(client, rg_group_listener) - .await - .context(ApplyGroupListenerSnafu)?; - } - } + cluster_resources .add(client, rg_service) .await @@ -601,7 +576,32 @@ pub async fn reconcile_druid( ); } - let role_config = druid.role_config(&druid_role); + if let Some(listener_class) = druid_role.listener_class_name(druid) { + if let Some(listener_group_name) = group_listener_name(druid, &druid_role) { + let role_group_listener = build_group_listener( + druid, + build_recommended_labels( + druid, + DRUID_CONTROLLER_NAME, + &resolved_product_image.app_version_label, + role_name, + "none", + ), + listener_class.to_string(), + listener_group_name, + &druid_role, + &druid_tls_security, + ) + .context(ListenerConfigurationSnafu)?; + + cluster_resources + .add(client, role_group_listener) + .await + .context(ApplyGroupListenerSnafu)?; + } + } + + let role_config = druid.generic_role_config(&druid_role); add_pdbs( &role_config.pod_disruption_budget, @@ -899,39 +899,6 @@ fn build_rolegroup_service( }) } -pub fn build_group_listener( - druid: &v1alpha1::DruidCluster, - resolved_product_image: &ResolvedProductImage, - role: &DruidRole, - rolegroup: &RoleGroupRef, - listener_class: String, - listener_group_name: String, - druid_tls_security: &DruidTlsSecurity, -) -> Result { - Ok(listener::v1alpha1::Listener { - metadata: ObjectMetaBuilder::new() - .name_and_namespace(druid) - .name(listener_group_name) - .ownerreference_from_resource(druid, None, Some(true)) - .context(ObjectMissingMetadataForOwnerRefSnafu)? - .with_recommended_labels(build_recommended_labels( - druid, - DRUID_CONTROLLER_NAME, - &resolved_product_image.app_version_label, - &rolegroup.role, - &rolegroup.role_group, - )) - .context(ObjectMissingMetadataForOwnerRefSnafu)? - .build(), - spec: listener::v1alpha1::ListenerSpec { - class_name: Some(listener_class), - ports: druid_tls_security.listener_ports(role), - ..listener::v1alpha1::ListenerSpec::default() - }, - status: None, - }) -} - #[allow(clippy::too_many_arguments)] /// The rolegroup [`StatefulSet`] runs the rolegroup, as configured by the administrator. /// @@ -1147,36 +1114,28 @@ fn build_rolegroup_statefulset( .context(AddVolumeMountSnafu)?; } - // Used for PVC templates that cannot be modified once they are deployed - let unversioned_recommended_labels = Labels::recommended(build_recommended_labels( - druid, - DRUID_CONTROLLER_NAME, - // A version value is required, and we do want to use the "recommended" format for the other desired labels - "none", - &rolegroup_ref.role, - &rolegroup_ref.role_group, - )) - .context(LabelBuildSnafu)?; - let mut pvcs: Option> = None; - if let Some(listener_group_name) = druid.group_listener_name(role, rolegroup_ref) { - // Listener endpoints for the Webserver role will use persistent volumes - // so that load balancers can hard-code the target addresses. This will - // be the case even when no class is set (and the value defaults to - // cluster-internal) as the address should still be consistent. - let listener_pvc = ListenerOperatorVolumeSourceBuilder::new( - &ListenerReference::ListenerName(listener_group_name), - &unversioned_recommended_labels, - ) - .context(BuildListenerVolumeSnafu)? - .build_pvc(LISTENER_VOLUME_NAME.to_string()) - .context(BuildListenerVolumeSnafu)?; - pvcs = Some(vec![listener_pvc]); - + if let Some(group_listener_name) = group_listener_name(druid, role) { cb_druid .add_volume_mount(LISTENER_VOLUME_NAME, LISTENER_VOLUME_DIR) .context(AddVolumeMountSnafu)?; + + // Used for PVC templates that cannot be modified once they are deployed + let unversioned_recommended_labels = Labels::recommended(build_recommended_labels( + druid, + DRUID_CONTROLLER_NAME, + // A version value is required, and we do want to use the "recommended" format for the other desired labels + "none", + &rolegroup_ref.role, + &rolegroup_ref.role_group, + )) + .context(LabelBuildSnafu)?; + + pvcs = Some(vec![ + build_group_listener_pvc(&group_listener_name, &unversioned_recommended_labels) + .context(ListenerConfigurationSnafu)?, + ]); } let metadata = ObjectMetaBuilder::new() diff --git a/rust/operator-binary/src/listener.rs b/rust/operator-binary/src/listener.rs new file mode 100644 index 00000000..f7e76067 --- /dev/null +++ b/rust/operator-binary/src/listener.rs @@ -0,0 +1,86 @@ +use snafu::{ResultExt, Snafu}; +use stackable_operator::{ + builder::{ + meta::ObjectMetaBuilder, + pod::volume::{ListenerOperatorVolumeSourceBuilder, ListenerReference}, + }, + crd::listener::{self, v1alpha1::Listener}, + k8s_openapi::api::core::v1::PersistentVolumeClaim, + kube::ResourceExt, + kvp::{Labels, ObjectLabels}, +}; + +use crate::crd::{DruidRole, security::DruidTlsSecurity, v1alpha1}; + +pub const LISTENER_VOLUME_NAME: &str = "listener"; +pub const LISTENER_VOLUME_DIR: &str = "/stackable/listener"; + +#[derive(Snafu, Debug)] +pub enum Error { + #[snafu(display("listener object is missing metadata to build owner reference"))] + ObjectMissingMetadataForOwnerRef { + source: stackable_operator::builder::meta::Error, + }, + + #[snafu(display("failed to build listener object meta data"))] + BuildObjectMeta { + source: stackable_operator::builder::meta::Error, + }, + + #[snafu(display("failed to build listener volume"))] + BuildListenerPersistentVolume { + source: stackable_operator::builder::pod::volume::ListenerOperatorVolumeSourceBuilderError, + }, +} + +pub fn build_group_listener( + druid: &v1alpha1::DruidCluster, + object_labels: ObjectLabels, + listener_class: String, + listener_group_name: String, + druid_role: &DruidRole, + druid_tls_security: &DruidTlsSecurity, +) -> Result { + Ok(Listener { + metadata: ObjectMetaBuilder::new() + .name_and_namespace(druid) + .name(listener_group_name) + .ownerreference_from_resource(druid, None, Some(true)) + .context(ObjectMissingMetadataForOwnerRefSnafu)? + .with_recommended_labels(object_labels) + .context(BuildObjectMetaSnafu)? + .build(), + spec: listener::v1alpha1::ListenerSpec { + class_name: Some(listener_class), + ports: druid_tls_security.listener_ports(druid_role), + ..listener::v1alpha1::ListenerSpec::default() + }, + status: None, + }) +} + +pub fn build_group_listener_pvc( + group_listener_name: &String, + unversioned_recommended_labels: &Labels, +) -> Result { + ListenerOperatorVolumeSourceBuilder::new( + &ListenerReference::ListenerName(group_listener_name.to_string()), + unversioned_recommended_labels, + ) + .context(BuildListenerPersistentVolumeSnafu)? + .build_pvc(LISTENER_VOLUME_NAME.to_string()) + .context(BuildListenerPersistentVolumeSnafu) +} + +pub fn group_listener_name( + druid: &v1alpha1::DruidCluster, + druid_role: &DruidRole, +) -> Option { + match druid_role { + DruidRole::Coordinator | DruidRole::Broker | DruidRole::Router => Some(format!( + "{cluster_name}-{druid_role}", + cluster_name = druid.name_any(), + )), + DruidRole::Historical | DruidRole::MiddleManager => None, + } +} diff --git a/rust/operator-binary/src/main.rs b/rust/operator-binary/src/main.rs index a0ccb28a..9e40184f 100644 --- a/rust/operator-binary/src/main.rs +++ b/rust/operator-binary/src/main.rs @@ -34,6 +34,7 @@ mod discovery; mod druid_controller; mod extensions; mod internal_secret; +mod listener; mod operations; mod product_logging; diff --git a/tests/templates/kuttl/commons/ingestioncheck.py b/tests/templates/kuttl/commons/ingestioncheck.py index 7d8c0c65..9acb7884 100755 --- a/tests/templates/kuttl/commons/ingestioncheck.py +++ b/tests/templates/kuttl/commons/ingestioncheck.py @@ -55,7 +55,7 @@ def query_datasource(self, url, sql, expected, iterations): Query tasks ===========""") tasks = druid.get_tasks( - url=f"https://{druid_cluster_name}-coordinator-default:8281/druid/indexer/v1/tasks", + url=f"https://{druid_cluster_name}-coordinator-default-metrics:8281/druid/indexer/v1/tasks", ) task_count = len(json.loads(tasks)) print(f"existing tasks: {task_count}") @@ -64,7 +64,7 @@ def query_datasource(self, url, sql, expected, iterations): Start ingestion task ====================""") ingestion = druid.post_task( - url=f"https://{druid_cluster_name}-coordinator-default:8281/druid/indexer/v1/task", + url=f"https://{druid_cluster_name}-coordinator-default-metrics:8281/druid/indexer/v1/task", input="/tmp/druid-quickstartimport.json", ) task_id = json.loads(ingestion)["task"] @@ -74,11 +74,11 @@ def query_datasource(self, url, sql, expected, iterations): Re-query tasks ==============""") tasks = druid.get_tasks( - url=f"https://{druid_cluster_name}-coordinator-default:8281/druid/indexer/v1/tasks", + url=f"https://{druid_cluster_name}-coordinator-default-metrics:8281/druid/indexer/v1/tasks", ) new_task_count = len(json.loads(tasks)) print(f"new tasks: {new_task_count}") -print(f"assert {new_task_count} == {task_count+1}") +print(f"assert {new_task_count} == {task_count + 1}") assert new_task_count == task_count + 1 print(""" @@ -88,13 +88,13 @@ def query_datasource(self, url, sql, expected, iterations): while not job_finished: time.sleep(5) task = druid.get( - url=f"https://{druid_cluster_name}-coordinator-default:8281/druid/indexer/v1/task/{url_encoded_taskid}/status", + url=f"https://{druid_cluster_name}-coordinator-default-metrics:8281/druid/indexer/v1/task/{url_encoded_taskid}/status", ) task_status = json.loads(task)["status"]["statusCode"] print(f"Current task status: [{task_status}]") - assert ( - task_status == "RUNNING" or task_status == "SUCCESS" - ), f"Taskstatus not running or succeeeded: {task_status}" + assert task_status == "RUNNING" or task_status == "SUCCESS", ( + f"Taskstatus not running or succeeeded: {task_status}" + ) job_finished = task_status == "SUCCESS" print(""" @@ -104,7 +104,7 @@ def query_datasource(self, url, sql, expected, iterations): while not broker_ready: time.sleep(2) broker_ready_rc = druid.check_rc( - f"https://{druid_cluster_name}-broker-default:8282/druid/broker/v1/readiness" + f"https://{druid_cluster_name}-broker-default-metrics:8282/druid/broker/v1/readiness" ) broker_ready = broker_ready_rc == 200 print(f"Broker respondend with [{broker_ready_rc}] to readiness check") @@ -114,7 +114,7 @@ def query_datasource(self, url, sql, expected, iterations): ==============""") sample_data_size = 39244 result = druid.query_datasource( - url=f"https://{druid_cluster_name}-broker-default:8282/druid/v2/sql", + url=f"https://{druid_cluster_name}-broker-default-metrics:8282/druid/v2/sql", sql={"query": 'select count(*) as c from "wikipedia-2015-09-12"'}, expected=sample_data_size, iterations=12, diff --git a/tests/templates/kuttl/external-access/45_listener-classes.yaml b/tests/templates/kuttl/external-access/45_listener-classes.yaml index 4131526a..c60f5ffd 100644 --- a/tests/templates/kuttl/external-access/45_listener-classes.yaml +++ b/tests/templates/kuttl/external-access/45_listener-classes.yaml @@ -8,13 +8,6 @@ spec: --- apiVersion: listeners.stackable.tech/v1alpha1 kind: ListenerClass -metadata: - name: test-external-stable-$NAMESPACE -spec: - serviceType: NodePort ---- -apiVersion: listeners.stackable.tech/v1alpha1 -kind: ListenerClass metadata: name: test-external-unstable-$NAMESPACE spec: diff --git a/tests/templates/kuttl/external-access/50_druid.yaml.j2 b/tests/templates/kuttl/external-access/50_druid.yaml.j2 index d34498a0..79948044 100644 --- a/tests/templates/kuttl/external-access/50_druid.yaml.j2 +++ b/tests/templates/kuttl/external-access/50_druid.yaml.j2 @@ -32,7 +32,8 @@ spec: gracefulShutdownTimeout: 1s # Let the test run faster logging: enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} - listenerClass: test-external-stable-$NAMESPACE + roleConfig: + listenerClass: test-cluster-internal-$NAMESPACE roleGroups: default: replicas: 1 @@ -41,7 +42,8 @@ spec: gracefulShutdownTimeout: 1s # Let the test run faster logging: enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} - listenerClass: test-external-stable-$NAMESPACE + roleConfig: + listenerClass: test-cluster-internal-$NAMESPACE roleGroups: default: replicas: 1 @@ -66,7 +68,8 @@ spec: gracefulShutdownTimeout: 1s # Let the test run faster logging: enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} - listenerClass: test-external-stable-$NAMESPACE + roleConfig: + listenerClass: test-external-unstable-$NAMESPACE roleGroups: default: replicas: 1 diff --git a/tests/templates/kuttl/ldap/authcheck.py b/tests/templates/kuttl/ldap/authcheck.py index c80a1acd..93a3f343 100755 --- a/tests/templates/kuttl/ldap/authcheck.py +++ b/tests/templates/kuttl/ldap/authcheck.py @@ -26,7 +26,7 @@ def main(): ) for role, port in druid_role_ports.items(): - url = f"https://{druid_cluster_name}-{role}-default:{port}/status" + url = f"https://{druid_cluster_name}-{role}-default-metrics:{port}/status" # make an authorized request -> return 401 expected logging.info(f"making unauthorized request to {role}.") res = requests.get(url, verify=False) diff --git a/tests/templates/kuttl/oidc/40_druid.yaml.j2 b/tests/templates/kuttl/oidc/40_druid.yaml.j2 index 851b278d..1d1a831f 100644 --- a/tests/templates/kuttl/oidc/40_druid.yaml.j2 +++ b/tests/templates/kuttl/oidc/40_druid.yaml.j2 @@ -37,7 +37,6 @@ spec: - authenticationClass: druid-oidc-auth-class-$NAMESPACE oidc: clientCredentialsSecret: druid-oidc-client - listenerClass: external-unstable deepStorage: s3: bucket: diff --git a/tests/templates/kuttl/oidc/login.py b/tests/templates/kuttl/oidc/login.py index 8b4b8937..e05d3ae7 100644 --- a/tests/templates/kuttl/oidc/login.py +++ b/tests/templates/kuttl/oidc/login.py @@ -15,7 +15,7 @@ session = requests.Session() -druid_router_service = f"druid-router-default.{namespace}.svc.cluster.local" +druid_router_service = f"druid-router-default-metrics.{namespace}.svc.cluster.local" keycloak_service = f"keycloak.{namespace}.svc.cluster.local" # Open Druid web UI which will redirect to OIDC login diff --git a/tests/templates/kuttl/tls/check-tls.sh b/tests/templates/kuttl/tls/check-tls.sh index 69682d6d..b1335864 100755 --- a/tests/templates/kuttl/tls/check-tls.sh +++ b/tests/templates/kuttl/tls/check-tls.sh @@ -7,7 +7,7 @@ TYPE=$2 # No encryption if [[ $TYPE == "no-tls" ]] then - HOST=http://derby-druid-router-default-0.derby-druid-router-default.${NAMESPACE}.svc.cluster.local:8888/status/health + HOST=http://derby-druid-router-default-0.derby-druid-router-default-metrics.${NAMESPACE}.svc.cluster.local:8888/status/health # should work echo "[NO_TLS] Test unsecured access" @@ -23,7 +23,7 @@ fi # Only encryption if [[ $TYPE == "internal-and-server-tls" ]] then - HOST=https://derby-druid-router-default-0.derby-druid-router-default.${NAMESPACE}.svc.cluster.local:9088/status/health + HOST=https://derby-druid-router-default-0.derby-druid-router-default-metrics.${NAMESPACE}.svc.cluster.local:9088/status/health # should not work without --insecure echo "[TLS_ENCRYPTION] Test TLS without trusted CA and without insecure access" @@ -69,7 +69,7 @@ fi # Encryption and TLS client auth if [[ $TYPE == "internal-and-server-tls-and-tls-client-auth" ]] then - HOST=https://derby-druid-router-default-0.derby-druid-router-default.${NAMESPACE}.svc.cluster.local:9088/status/health + HOST=https://derby-druid-router-default-0.derby-druid-router-default-metrics.${NAMESPACE}.svc.cluster.local:9088/status/health # Should fail echo "[TLS_AUTH] Test insecure access" From 91f9b741cce23b012bc04aa92f60708166e140ed Mon Sep 17 00:00:00 2001 From: Benedikt Labrenz Date: Thu, 26 Jun 2025 15:09:09 +0200 Subject: [PATCH 11/18] add changelog entry --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f238c546..6a5b8710 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ All notable changes to this project will be documented in this file. - Use `--file-log-rotation-period` (or `FILE_LOG_ROTATION_PERIOD`) to configure the frequency of rotation. - Use `--console-log-format` (or `CONSOLE_LOG_FORMAT`) to set the format to `plain` (default) or `json`. - Add support for `33.0.0` ([#722]). +- Add Listener support for Druid ([#731]). ### Changed @@ -52,6 +53,7 @@ All notable changes to this project will be documented in this file. [#721]: https://github.com/stackabletech/druid-operator/pull/721 [#722]: https://github.com/stackabletech/druid-operator/pull/722 [#725]: https://github.com/stackabletech/druid-operator/pull/725 +[#731]: https://github.com/stackabletech/druid-operator/pull/731 ## [25.3.0] - 2025-03-21 From 5924e5a7edf73fe11333e8e3c2757d1ea0ead8b9 Mon Sep 17 00:00:00 2001 From: Benedikt Labrenz Date: Mon, 30 Jun 2025 11:05:56 +0200 Subject: [PATCH 12/18] rename headless service --- rust/operator-binary/src/crd/mod.rs | 6 +++--- rust/operator-binary/src/druid_controller.rs | 2 +- tests/templates/kuttl/authorizer/authcheck.py | 4 ++-- tests/templates/kuttl/commons/healthcheck.py | 2 +- tests/templates/kuttl/commons/ingestioncheck-tls.py | 12 ++++++------ tests/templates/kuttl/commons/ingestioncheck.py | 12 ++++++------ tests/templates/kuttl/ldap/authcheck.py | 2 +- tests/templates/kuttl/oidc/login.py | 2 +- tests/templates/kuttl/tls/check-tls.sh | 6 +++--- 9 files changed, 24 insertions(+), 24 deletions(-) diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index e2f699fb..7d33d022 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -472,7 +472,7 @@ impl v1alpha1::DruidCluster { /// The name of the role-level load-balanced Kubernetes `Service` pub fn role_service_name(&self, role: &DruidRole) -> Option { - Some(format!("{}-{}-metrics", self.metadata.name.clone()?, role)) + Some(format!("{}-{}-headless", self.metadata.name.clone()?, role)) } /// The fully-qualified domain name of the role-level load-balanced Kubernetes `Service` @@ -1682,12 +1682,12 @@ mod tests { assert_eq!( cluster.role_service_name(&DruidRole::Router), - Some("testcluster-router-metrics".to_string()) + Some("testcluster-router-headless".to_string()) ); assert_eq!( cluster.role_service_fqdn(&DruidRole::Router, &dummy_cluster_info), - Some("testcluster-router-metrics.default.svc.cluster.local".to_string()) + Some("testcluster-router-headless.default.svc.cluster.local".to_string()) ) } diff --git a/rust/operator-binary/src/druid_controller.rs b/rust/operator-binary/src/druid_controller.rs index 77478b31..f7777628 100644 --- a/rust/operator-binary/src/druid_controller.rs +++ b/rust/operator-binary/src/druid_controller.rs @@ -843,7 +843,7 @@ fn build_rolegroup_config_map( } pub fn rolegroup_service_name(rolegroup: &RoleGroupRef) -> String { - format!("{name}-metrics", name = rolegroup.object_name()) + format!("{name}-headless", name = rolegroup.object_name()) } /// The rolegroup [`Service`] is a headless service that allows direct access to the instances of a certain rolegroup diff --git a/tests/templates/kuttl/authorizer/authcheck.py b/tests/templates/kuttl/authorizer/authcheck.py index 1f1801f7..b6f8bf47 100755 --- a/tests/templates/kuttl/authorizer/authcheck.py +++ b/tests/templates/kuttl/authorizer/authcheck.py @@ -2,7 +2,7 @@ import sys import logging -coordinator_host = "derby-druid-coordinator-default-metrics" +coordinator_host = "derby-druid-coordinator-default-headless" coordinator_port = "8281" authenticator_name = "MyBasicMetadataAuthenticator" @@ -52,7 +52,7 @@ def create_user(user_name): } for role, port in druid_role_ports.items(): - url = f"https://{druid_cluster_name}-{role}-default-metrics:{port}/status" + url = f"https://{druid_cluster_name}-{role}-default-headless:{port}/status" # make an authorized request -> return 401 expected print("Checking Unauthorized") res = requests.get(url, verify=False) diff --git a/tests/templates/kuttl/commons/healthcheck.py b/tests/templates/kuttl/commons/healthcheck.py index 0d17a040..2f2e36dd 100755 --- a/tests/templates/kuttl/commons/healthcheck.py +++ b/tests/templates/kuttl/commons/healthcheck.py @@ -25,7 +25,7 @@ for role, port in druid_role_ports.items(): url = ( - f"https://{druid_cluster_name}-{role}-default-metrics:{port}/status/health" + f"https://{druid_cluster_name}-{role}-default-headless:{port}/status/health" ) count = 1 diff --git a/tests/templates/kuttl/commons/ingestioncheck-tls.py b/tests/templates/kuttl/commons/ingestioncheck-tls.py index 079c7761..4cad8bcd 100755 --- a/tests/templates/kuttl/commons/ingestioncheck-tls.py +++ b/tests/templates/kuttl/commons/ingestioncheck-tls.py @@ -94,7 +94,7 @@ def query_datasource(self, url, sql, expected, iterations): Query tasks ===========""") tasks = druid.get_tasks( - url=f"{protocol}://{druid_cluster_name}-coordinator-default-0.{druid_cluster_name}-coordinator-default-metrics.{namespace}.svc.cluster.local:{coordinator_port}/druid/indexer/v1/tasks", + url=f"{protocol}://{druid_cluster_name}-coordinator-default-0.{druid_cluster_name}-coordinator-default-headless.{namespace}.svc.cluster.local:{coordinator_port}/druid/indexer/v1/tasks", ) task_count = len(json.loads(tasks)) print(f"existing tasks: {task_count}") @@ -103,7 +103,7 @@ def query_datasource(self, url, sql, expected, iterations): Start ingestion task ====================""") ingestion = druid.post_task( - url=f"{protocol}://{druid_cluster_name}-coordinator-default-0.{druid_cluster_name}-coordinator-default-metrics.{namespace}.svc.cluster.local:{coordinator_port}/druid/indexer/v1/task", + url=f"{protocol}://{druid_cluster_name}-coordinator-default-0.{druid_cluster_name}-coordinator-default-headless.{namespace}.svc.cluster.local:{coordinator_port}/druid/indexer/v1/task", input="/tmp/druid-quickstartimport.json", ) task_id = json.loads(ingestion)["task"] @@ -113,7 +113,7 @@ def query_datasource(self, url, sql, expected, iterations): Re-query tasks ==============""") tasks = druid.get_tasks( - url=f"{protocol}://{druid_cluster_name}-coordinator-default-0.{druid_cluster_name}-coordinator-default-metrics.{namespace}.svc.cluster.local:{coordinator_port}/druid/indexer/v1/tasks", + url=f"{protocol}://{druid_cluster_name}-coordinator-default-0.{druid_cluster_name}-coordinator-default-headless.{namespace}.svc.cluster.local:{coordinator_port}/druid/indexer/v1/tasks", ) new_task_count = len(json.loads(tasks)) print(f"new tasks: {new_task_count}") @@ -127,7 +127,7 @@ def query_datasource(self, url, sql, expected, iterations): while not job_finished: time.sleep(5) task = druid.get( - url=f"{protocol}://{druid_cluster_name}-coordinator-default-0.{druid_cluster_name}-coordinator-default-metrics.{namespace}.svc.cluster.local:{coordinator_port}/druid/indexer/v1/task/{url_encoded_taskid}/status", + url=f"{protocol}://{druid_cluster_name}-coordinator-default-0.{druid_cluster_name}-coordinator-default-headless.{namespace}.svc.cluster.local:{coordinator_port}/druid/indexer/v1/task/{url_encoded_taskid}/status", ) task_status = json.loads(task)["status"]["statusCode"] print(f"Current task status: [{task_status}]") @@ -143,7 +143,7 @@ def query_datasource(self, url, sql, expected, iterations): while not broker_ready: time.sleep(2) broker_ready_rc = druid.check_rc( - f"{protocol}://{druid_cluster_name}-broker-default-0.{druid_cluster_name}-broker-default-metrics.{namespace}.svc.cluster.local:{broker_port}/druid/broker/v1/readiness" + f"{protocol}://{druid_cluster_name}-broker-default-0.{druid_cluster_name}-broker-default-headless.{namespace}.svc.cluster.local:{broker_port}/druid/broker/v1/readiness" ) broker_ready = broker_ready_rc == 200 print(f"Broker respondend with [{broker_ready_rc}] to readiness check") @@ -153,7 +153,7 @@ def query_datasource(self, url, sql, expected, iterations): ==============""") sample_data_size = 39244 result = druid.query_datasource( - url=f"{protocol}://{druid_cluster_name}-broker-default-0.{druid_cluster_name}-broker-default-metrics.{namespace}.svc.cluster.local:{broker_port}/druid/v2/sql", + url=f"{protocol}://{druid_cluster_name}-broker-default-0.{druid_cluster_name}-broker-default-headless.{namespace}.svc.cluster.local:{broker_port}/druid/v2/sql", sql={"query": 'select count(*) as c from "wikipedia-2015-09-12"'}, expected=sample_data_size, iterations=12, diff --git a/tests/templates/kuttl/commons/ingestioncheck.py b/tests/templates/kuttl/commons/ingestioncheck.py index 9acb7884..e0b3f6bc 100755 --- a/tests/templates/kuttl/commons/ingestioncheck.py +++ b/tests/templates/kuttl/commons/ingestioncheck.py @@ -55,7 +55,7 @@ def query_datasource(self, url, sql, expected, iterations): Query tasks ===========""") tasks = druid.get_tasks( - url=f"https://{druid_cluster_name}-coordinator-default-metrics:8281/druid/indexer/v1/tasks", + url=f"https://{druid_cluster_name}-coordinator-default-headless:8281/druid/indexer/v1/tasks", ) task_count = len(json.loads(tasks)) print(f"existing tasks: {task_count}") @@ -64,7 +64,7 @@ def query_datasource(self, url, sql, expected, iterations): Start ingestion task ====================""") ingestion = druid.post_task( - url=f"https://{druid_cluster_name}-coordinator-default-metrics:8281/druid/indexer/v1/task", + url=f"https://{druid_cluster_name}-coordinator-default-headless:8281/druid/indexer/v1/task", input="/tmp/druid-quickstartimport.json", ) task_id = json.loads(ingestion)["task"] @@ -74,7 +74,7 @@ def query_datasource(self, url, sql, expected, iterations): Re-query tasks ==============""") tasks = druid.get_tasks( - url=f"https://{druid_cluster_name}-coordinator-default-metrics:8281/druid/indexer/v1/tasks", + url=f"https://{druid_cluster_name}-coordinator-default-headless:8281/druid/indexer/v1/tasks", ) new_task_count = len(json.loads(tasks)) print(f"new tasks: {new_task_count}") @@ -88,7 +88,7 @@ def query_datasource(self, url, sql, expected, iterations): while not job_finished: time.sleep(5) task = druid.get( - url=f"https://{druid_cluster_name}-coordinator-default-metrics:8281/druid/indexer/v1/task/{url_encoded_taskid}/status", + url=f"https://{druid_cluster_name}-coordinator-default-headless:8281/druid/indexer/v1/task/{url_encoded_taskid}/status", ) task_status = json.loads(task)["status"]["statusCode"] print(f"Current task status: [{task_status}]") @@ -104,7 +104,7 @@ def query_datasource(self, url, sql, expected, iterations): while not broker_ready: time.sleep(2) broker_ready_rc = druid.check_rc( - f"https://{druid_cluster_name}-broker-default-metrics:8282/druid/broker/v1/readiness" + f"https://{druid_cluster_name}-broker-default-headless:8282/druid/broker/v1/readiness" ) broker_ready = broker_ready_rc == 200 print(f"Broker respondend with [{broker_ready_rc}] to readiness check") @@ -114,7 +114,7 @@ def query_datasource(self, url, sql, expected, iterations): ==============""") sample_data_size = 39244 result = druid.query_datasource( - url=f"https://{druid_cluster_name}-broker-default-metrics:8282/druid/v2/sql", + url=f"https://{druid_cluster_name}-broker-default-headless:8282/druid/v2/sql", sql={"query": 'select count(*) as c from "wikipedia-2015-09-12"'}, expected=sample_data_size, iterations=12, diff --git a/tests/templates/kuttl/ldap/authcheck.py b/tests/templates/kuttl/ldap/authcheck.py index 93a3f343..6a42f0ac 100755 --- a/tests/templates/kuttl/ldap/authcheck.py +++ b/tests/templates/kuttl/ldap/authcheck.py @@ -26,7 +26,7 @@ def main(): ) for role, port in druid_role_ports.items(): - url = f"https://{druid_cluster_name}-{role}-default-metrics:{port}/status" + url = f"https://{druid_cluster_name}-{role}-default-headless:{port}/status" # make an authorized request -> return 401 expected logging.info(f"making unauthorized request to {role}.") res = requests.get(url, verify=False) diff --git a/tests/templates/kuttl/oidc/login.py b/tests/templates/kuttl/oidc/login.py index e05d3ae7..7a3043ed 100644 --- a/tests/templates/kuttl/oidc/login.py +++ b/tests/templates/kuttl/oidc/login.py @@ -15,7 +15,7 @@ session = requests.Session() -druid_router_service = f"druid-router-default-metrics.{namespace}.svc.cluster.local" +druid_router_service = f"druid-router-default-headless.{namespace}.svc.cluster.local" keycloak_service = f"keycloak.{namespace}.svc.cluster.local" # Open Druid web UI which will redirect to OIDC login diff --git a/tests/templates/kuttl/tls/check-tls.sh b/tests/templates/kuttl/tls/check-tls.sh index b1335864..19b7a488 100755 --- a/tests/templates/kuttl/tls/check-tls.sh +++ b/tests/templates/kuttl/tls/check-tls.sh @@ -7,7 +7,7 @@ TYPE=$2 # No encryption if [[ $TYPE == "no-tls" ]] then - HOST=http://derby-druid-router-default-0.derby-druid-router-default-metrics.${NAMESPACE}.svc.cluster.local:8888/status/health + HOST=http://derby-druid-router-default-0.derby-druid-router-default-headless.${NAMESPACE}.svc.cluster.local:8888/status/health # should work echo "[NO_TLS] Test unsecured access" @@ -23,7 +23,7 @@ fi # Only encryption if [[ $TYPE == "internal-and-server-tls" ]] then - HOST=https://derby-druid-router-default-0.derby-druid-router-default-metrics.${NAMESPACE}.svc.cluster.local:9088/status/health + HOST=https://derby-druid-router-default-0.derby-druid-router-default-headless.${NAMESPACE}.svc.cluster.local:9088/status/health # should not work without --insecure echo "[TLS_ENCRYPTION] Test TLS without trusted CA and without insecure access" @@ -69,7 +69,7 @@ fi # Encryption and TLS client auth if [[ $TYPE == "internal-and-server-tls-and-tls-client-auth" ]] then - HOST=https://derby-druid-router-default-0.derby-druid-router-default-metrics.${NAMESPACE}.svc.cluster.local:9088/status/health + HOST=https://derby-druid-router-default-0.derby-druid-router-default-headless.${NAMESPACE}.svc.cluster.local:9088/status/health # Should fail echo "[TLS_AUTH] Test insecure access" From 4d9b8f119da768787b996a54ed14bb9830fc7db6 Mon Sep 17 00:00:00 2001 From: Benedikt Labrenz Date: Mon, 30 Jun 2025 16:37:04 +0200 Subject: [PATCH 13/18] fix tls test --- tests/templates/kuttl/tls/04-assert.yaml.j2 | 56 --------------------- 1 file changed, 56 deletions(-) diff --git a/tests/templates/kuttl/tls/04-assert.yaml.j2 b/tests/templates/kuttl/tls/04-assert.yaml.j2 index 3cc45147..050044c2 100644 --- a/tests/templates/kuttl/tls/04-assert.yaml.j2 +++ b/tests/templates/kuttl/tls/04-assert.yaml.j2 @@ -49,10 +49,6 @@ metadata: name: derby-druid-coordinator spec: ports: - - name: metrics - port: 9090 - protocol: TCP - targetPort: 9090 {% if test_scenario['values']['tls-mode'] == 'internal-and-server-tls' or test_scenario['values']['tls-mode'] == 'internal-and-server-tls-and-tls-client-auth' %} - name: https port: 8281 @@ -67,58 +63,10 @@ spec: --- apiVersion: v1 kind: Service -metadata: - name: derby-druid-middlemanager -spec: - ports: - - name: metrics - port: 9090 - protocol: TCP - targetPort: 9090 -{% if test_scenario['values']['tls-mode'] == 'internal-and-server-tls' or test_scenario['values']['tls-mode'] == 'internal-and-server-tls-and-tls-client-auth' %} - - name: https - port: 8291 - protocol: TCP - targetPort: 8291 -{% else %} - - name: http - port: 8091 - protocol: TCP - targetPort: 8091 -{% endif %} ---- -apiVersion: v1 -kind: Service -metadata: - name: derby-druid-historical -spec: - ports: - - name: metrics - port: 9090 - protocol: TCP - targetPort: 9090 -{% if test_scenario['values']['tls-mode'] == 'internal-and-server-tls' or test_scenario['values']['tls-mode'] == 'internal-and-server-tls-and-tls-client-auth' %} - - name: https - port: 8283 - protocol: TCP - targetPort: 8283 -{% else %} - - name: http - port: 8083 - protocol: TCP - targetPort: 8083 -{% endif %} ---- -apiVersion: v1 -kind: Service metadata: name: derby-druid-router spec: ports: - - name: metrics - port: 9090 - protocol: TCP - targetPort: 9090 {% if test_scenario['values']['tls-mode'] == 'internal-and-server-tls' or test_scenario['values']['tls-mode'] == 'internal-and-server-tls-and-tls-client-auth' %} - name: https port: 9088 @@ -137,10 +85,6 @@ metadata: name: derby-druid-broker spec: ports: - - name: metrics - port: 9090 - protocol: TCP - targetPort: 9090 {% if test_scenario['values']['tls-mode'] == 'internal-and-server-tls' or test_scenario['values']['tls-mode'] == 'internal-and-server-tls-and-tls-client-auth' %} - name: https port: 8282 From 546a9908165dc020c07690e31525b38fe9bdb6d5 Mon Sep 17 00:00:00 2001 From: Benedikt Labrenz Date: Tue, 1 Jul 2025 10:14:18 +0200 Subject: [PATCH 14/18] fix docs --- docs/modules/druid/examples/getting_started/druid.yaml | 6 +++--- docs/modules/druid/pages/usage-guide/listenerclass.adoc | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/modules/druid/examples/getting_started/druid.yaml b/docs/modules/druid/examples/getting_started/druid.yaml index c3255371..981f69b4 100644 --- a/docs/modules/druid/examples/getting_started/druid.yaml +++ b/docs/modules/druid/examples/getting_started/druid.yaml @@ -23,13 +23,13 @@ spec: default: replicas: 1 roleConfig: - listenerClass: external-stable # This exposes your Stacklet outside of Kubernetes. Remove this configuration if this is not desired + listenerClass: external-stable # This exposes this role outside of Kubernetes. Remove this configuration if this is not desired coordinators: roleGroups: default: replicas: 1 roleConfig: - listenerClass: external-stable # This exposes your Stacklet outside of Kubernetes. Remove this configuration if this is not desired + listenerClass: external-stable # This exposes this role outside of Kubernetes. Remove this configuration if this is not desired historicals: roleGroups: default: @@ -43,7 +43,7 @@ spec: default: replicas: 1 roleConfig: - listenerClass: external-stable # This exposes your Stacklet outside of Kubernetes. Remove this configuration if this is not desired + listenerClass: external-stable # This exposes this role outside of Kubernetes. Remove this configuration if this is not desired --- apiVersion: v1 kind: Secret diff --git a/docs/modules/druid/pages/usage-guide/listenerclass.adoc b/docs/modules/druid/pages/usage-guide/listenerclass.adoc index 2ee789f7..7754114b 100644 --- a/docs/modules/druid/pages/usage-guide/listenerclass.adoc +++ b/docs/modules/druid/pages/usage-guide/listenerclass.adoc @@ -4,8 +4,8 @@ Apache Druid offers a web UI and an API, both are exposed by the `router` role. The `broker` and `coordinator` roles expose additional API endpoints. -The operator deploys a xref:listener-operator:listener.adoc[Listener] for the the `router`, `broker` and `coordinator` roles. -The listener defaults to only being accessible from within the Kubernetes cluster, but this can be changed by configuring a listener class +The operator deploys a xref:listener-operator:listener.adoc[Listener] for the `router`, `broker` and `coordinator` roles. +The Listener defaults to only being accessible from within the Kubernetes cluster, but this can be changed by configuring a ListenerClass for specific roles: [source,yaml] From 3880fa7ea067adb9ff4fd21323e1c324d427e5b0 Mon Sep 17 00:00:00 2001 From: Benedikt Labrenz Date: Tue, 1 Jul 2025 10:19:39 +0200 Subject: [PATCH 15/18] remove unnecessary listener from test --- .../kuttl/external-access/50_druid.yaml.j2 | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/tests/templates/kuttl/external-access/50_druid.yaml.j2 b/tests/templates/kuttl/external-access/50_druid.yaml.j2 index 79948044..904b6001 100644 --- a/tests/templates/kuttl/external-access/50_druid.yaml.j2 +++ b/tests/templates/kuttl/external-access/50_druid.yaml.j2 @@ -82,19 +82,3 @@ type: Opaque stringData: username: druid password: druid ---- -apiVersion: listeners.stackable.tech/v1alpha1 -kind: Listener -metadata: - creationTimestamp: "2025-06-11T11:58:17Z" - generation: 1 - labels: - app.kubernetes.io/component: historical - app.kubernetes.io/instance: druid - name: druid-historical-default -spec: - className: test-cluster-internal-kuttl-test-perfect-moray - ports: - - name: https - port: 8283 - protocol: TCP From b8d356e3ad5af20b325bb876d50bf2d50c0532f2 Mon Sep 17 00:00:00 2001 From: Benedikt Labrenz Date: Tue, 1 Jul 2025 23:41:13 +0200 Subject: [PATCH 16/18] create separate headless services and fix discovery cm --- rust/operator-binary/src/crd/mod.rs | 50 +----- rust/operator-binary/src/crd/security.rs | 18 +- rust/operator-binary/src/discovery.rs | 31 ++-- rust/operator-binary/src/druid_controller.rs | 164 +++++++++---------- rust/operator-binary/src/listener.rs | 46 +++++- rust/operator-binary/src/main.rs | 1 + rust/operator-binary/src/service.rs | 119 ++++++++++++++ 7 files changed, 271 insertions(+), 158 deletions(-) create mode 100644 rust/operator-binary/src/service.rs diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 7d33d022..26656d36 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -32,10 +32,7 @@ use stackable_operator::{ schemars::{self, JsonSchema}, status::condition::{ClusterCondition, HasStatusCondition}, time::Duration, - utils::{ - COMMON_BASH_TRAP_FUNCTIONS, cluster_info::KubernetesClusterInfo, - crds::raw_object_list_schema, - }, + utils::{COMMON_BASH_TRAP_FUNCTIONS, crds::raw_object_list_schema}, versioned::versioned, }; use strum::{Display, EnumDiscriminants, EnumIter, EnumString, IntoStaticStr}; @@ -470,25 +467,6 @@ impl v1alpha1::DruidCluster { .collect() } - /// The name of the role-level load-balanced Kubernetes `Service` - pub fn role_service_name(&self, role: &DruidRole) -> Option { - Some(format!("{}-{}-headless", self.metadata.name.clone()?, role)) - } - - /// The fully-qualified domain name of the role-level load-balanced Kubernetes `Service` - pub fn role_service_fqdn( - &self, - role: &DruidRole, - cluster_info: &KubernetesClusterInfo, - ) -> Option { - Some(format!( - "{service_name}.{namespace}.svc.{cluster_domain}", - service_name = self.role_service_name(role)?, - namespace = self.metadata.namespace.as_ref()?, - cluster_domain = cluster_info.cluster_domain, - )) - } - /// If an s3 connection for ingestion is given, as well as an s3 connection for deep storage, they need to be the same. /// This function returns the resolved connection, or raises an Error if the connections are not identical. pub async fn get_s3_connection( @@ -1665,32 +1643,6 @@ where #[cfg(test)] mod tests { - use stackable_operator::commons::networking::DomainName; - - use super::*; - - #[test] - fn test_service_name_generation() { - let cluster = deserialize_yaml_file::( - "test/resources/crd/role_service/druid_cluster.yaml", - ); - let dummy_cluster_info = KubernetesClusterInfo { - cluster_domain: DomainName::try_from("cluster.local").unwrap(), - }; - - assert_eq!(cluster.metadata.name, Some("testcluster".to_string())); - - assert_eq!( - cluster.role_service_name(&DruidRole::Router), - Some("testcluster-router-headless".to_string()) - ); - - assert_eq!( - cluster.role_service_fqdn(&DruidRole::Router, &dummy_cluster_info), - Some("testcluster-router-headless.default.svc.cluster.local".to_string()) - ) - } - pub fn deserialize_yaml_str<'a, T: serde::de::Deserialize<'a>>(value: &'a str) -> T { let deserializer = serde_yaml::Deserializer::from_str(value); serde_yaml::with::singleton_map_recursive::deserialize(deserializer).unwrap() diff --git a/rust/operator-binary/src/crd/security.rs b/rust/operator-binary/src/crd/security.rs index 6f751b3d..a2d180a8 100644 --- a/rust/operator-binary/src/crd/security.rs +++ b/rust/operator-binary/src/crd/security.rs @@ -15,7 +15,7 @@ use stackable_operator::{ }, crd::listener, k8s_openapi::{ - api::core::v1::{ContainerPort, Probe, TCPSocketAction}, + api::core::v1::{ContainerPort, Probe, ServicePort, TCPSocketAction}, apimachinery::pkg::util::intstr::IntOrString, }, time::Duration, @@ -58,8 +58,8 @@ const PLAINTEXT_PORT: &str = "druid.plaintextPort"; const ENABLE_TLS_PORT: &str = "druid.enableTlsPort"; const TLS_PORT: &str = "druid.tlsPort"; // Port names -const PLAINTEXT_PORT_NAME: &str = "http"; -const TLS_PORT_NAME: &str = "https"; +pub const PLAINTEXT_PORT_NAME: &str = "http"; +pub const TLS_PORT_NAME: &str = "https"; // Client side (Druid) TLS const CLIENT_HTTPS_KEY_STORE_PATH: &str = "druid.client.https.keyStorePath"; const CLIENT_HTTPS_KEY_STORE_TYPE: &str = "druid.client.https.keyStoreType"; @@ -162,6 +162,18 @@ impl DruidTlsSecurity { .collect() } + pub fn service_ports(&self, role: &DruidRole) -> Vec { + self.exposed_ports(role) + .into_iter() + .map(|(name, val)| ServicePort { + name: Some(name), + port: val.into(), + protocol: Some("TCP".to_string()), + ..ServicePort::default() + }) + .collect() + } + pub fn listener_ports( &self, role: &DruidRole, diff --git a/rust/operator-binary/src/discovery.rs b/rust/operator-binary/src/discovery.rs index 0ec1ddcb..c8778abe 100644 --- a/rust/operator-binary/src/discovery.rs +++ b/rust/operator-binary/src/discovery.rs @@ -1,18 +1,19 @@ //! Discovery for Druid. We make Druid discoverable by putting a connection string to the router service //! inside a config map. We only provide a connection string to the router service, since it serves as //! a gateway to the cluster for client queries. -use snafu::{OptionExt, ResultExt, Snafu}; +use snafu::{ResultExt, Snafu}; use stackable_operator::{ builder::{configmap::ConfigMapBuilder, meta::ObjectMetaBuilder}, commons::product_image_selection::ResolvedProductImage, + crd::listener::v1alpha1::Listener, k8s_openapi::api::core::v1::ConfigMap, kube::{Resource, ResourceExt, runtime::reflector::ObjectRef}, - utils::cluster_info::KubernetesClusterInfo, }; use crate::{ DRUID_CONTROLLER_NAME, crd::{DruidRole, build_recommended_labels, security::DruidTlsSecurity, v1alpha1}, + listener::build_listener_connection_string, }; #[derive(Snafu, Debug)] @@ -35,24 +36,27 @@ pub enum Error { AddRecommendedLabels { source: stackable_operator::builder::meta::Error, }, + + #[snafu(display("failed to configure listener discovery configmap"))] + ListenerConfiguration { source: crate::listener::Error }, } /// Builds discovery [`ConfigMap`]s for connecting to a [`v1alpha1::DruidCluster`]. pub async fn build_discovery_configmaps( druid: &v1alpha1::DruidCluster, owner: &impl Resource, - cluster_info: &KubernetesClusterInfo, resolved_product_image: &ResolvedProductImage, druid_tls_security: &DruidTlsSecurity, + listener: Listener, ) -> Result, Error> { let name = owner.name_unchecked(); Ok(vec![build_discovery_configmap( druid, owner, - cluster_info, resolved_product_image, druid_tls_security, &name, + listener, )?]) } @@ -60,22 +64,17 @@ pub async fn build_discovery_configmaps( fn build_discovery_configmap( druid: &v1alpha1::DruidCluster, owner: &impl Resource, - cluster_info: &KubernetesClusterInfo, resolved_product_image: &ResolvedProductImage, druid_tls_security: &DruidTlsSecurity, name: &str, + listener: Listener, ) -> Result { - let router_host = format!( - "{}:{}", - druid - .role_service_fqdn(&DruidRole::Router, cluster_info) - .with_context(|| NoServiceFqdnSnafu)?, - if druid_tls_security.tls_enabled() { - DruidRole::Router.get_https_port() - } else { - DruidRole::Router.get_http_port() - } - ); + let router_host = build_listener_connection_string( + listener, + druid_tls_security, + &DruidRole::Router.to_string(), + ) + .context(ListenerConfigurationSnafu)?; let sqlalchemy_conn_str = format!("druid://{}/druid/v2/sql", router_host); let avatica_conn_str = format!( "jdbc:avatica:remote:url=http://{}/druid/v2/sql/avatica/", diff --git a/rust/operator-binary/src/druid_controller.rs b/rust/operator-binary/src/druid_controller.rs index f7777628..84eb1121 100644 --- a/rust/operator-binary/src/druid_controller.rs +++ b/rust/operator-binary/src/druid_controller.rs @@ -34,10 +34,7 @@ use stackable_operator::{ DeepMerge, api::{ apps::v1::{StatefulSet, StatefulSetSpec}, - core::v1::{ - ConfigMap, EnvVar, PersistentVolumeClaim, Service, ServiceAccount, ServicePort, - ServiceSpec, - }, + core::v1::{ConfigMap, EnvVar, PersistentVolumeClaim, ServiceAccount}, }, apimachinery::pkg::apis::meta::v1::LabelSelector, }, @@ -46,7 +43,7 @@ use stackable_operator::{ core::{DeserializeGuard, error_boundary}, runtime::{controller::Action, reflector::ObjectRef}, }, - kvp::{KeyValuePairError, Label, LabelError, LabelValueError, Labels}, + kvp::{KeyValuePairError, LabelError, LabelValueError, Labels}, logging::controller::ReconcilerError, product_config_utils::{transform_all_roles_to_config, validate_all_roles_and_groups_config}, product_logging::{ @@ -74,11 +71,10 @@ use crate::{ Container, DB_PASSWORD_ENV, DB_USERNAME_ENV, DRUID_CONFIG_DIRECTORY, DS_BUCKET, DeepStorageSpec, DruidClusterStatus, DruidRole, EXTENSIONS_LOADLIST, HDFS_CONFIG_DIRECTORY, JVM_CONFIG, JVM_SECURITY_PROPERTIES_FILE, LOG_CONFIG_DIRECTORY, MAX_DRUID_LOG_FILES_SIZE, - METRICS_PORT, METRICS_PORT_NAME, OPERATOR_NAME, RUNTIME_PROPS, RW_CONFIG_DIRECTORY, - S3_ACCESS_KEY, S3_ENDPOINT_URL, S3_PATH_STYLE_ACCESS, S3_SECRET_KEY, STACKABLE_LOG_DIR, - ZOOKEEPER_CONNECTION_STRING, authentication::AuthenticationClassesResolved, - authorization::DruidAuthorization, build_recommended_labels, build_string_list, - security::DruidTlsSecurity, v1alpha1, + OPERATOR_NAME, RUNTIME_PROPS, RW_CONFIG_DIRECTORY, S3_ACCESS_KEY, S3_ENDPOINT_URL, + S3_PATH_STYLE_ACCESS, S3_SECRET_KEY, STACKABLE_LOG_DIR, ZOOKEEPER_CONNECTION_STRING, + authentication::AuthenticationClassesResolved, authorization::DruidAuthorization, + build_recommended_labels, build_string_list, security::DruidTlsSecurity, v1alpha1, }, discovery::{self, build_discovery_configmaps}, extensions::get_extension_list, @@ -89,6 +85,10 @@ use crate::{ }, operations::{graceful_shutdown::add_graceful_shutdown_config, pdb::add_pdbs}, product_logging::extend_role_group_config_map, + service::{ + build_rolegroup_headless_service, build_rolegroup_metrics_service, + rolegroup_headless_service_name, + }, }; pub const DRUID_CONTROLLER_NAME: &str = "druidcluster"; @@ -375,6 +375,9 @@ pub enum Error { #[snafu(display("failed to configure listener"))] ListenerConfiguration { source: crate::listener::Error }, + + #[snafu(display("failed to configure service"))] + ServiceConfiguration { source: crate::service::Error }, } type Result = std::result::Result; @@ -527,7 +530,39 @@ pub async fn reconcile_druid( .common_config(&druid_role, rolegroup_name) .context(FailedToResolveConfigSnafu)?; - let rg_service = build_rolegroup_service(druid, &resolved_product_image, &rolegroup)?; + let role_group_service_recommended_labels = build_recommended_labels( + druid, + DRUID_CONTROLLER_NAME, + &resolved_product_image.app_version_label, + &rolegroup.role, + &rolegroup.role_group, + ); + + let role_group_service_selector = Labels::role_group_selector( + druid, + APP_NAME, + &rolegroup.role, + &rolegroup.role_group, + ) + .context(LabelBuildSnafu)?; + + let rg_headless_service = build_rolegroup_headless_service( + druid, + &druid_tls_security, + &druid_role, + &rolegroup, + role_group_service_recommended_labels.clone(), + role_group_service_selector.clone().into(), + ) + .context(ServiceConfigurationSnafu)?; + let rg_metrics_service = build_rolegroup_metrics_service( + druid, + &rolegroup, + role_group_service_recommended_labels, + role_group_service_selector.into(), + ) + .context(ServiceConfigurationSnafu)?; + let rg_configmap = build_rolegroup_config_map( druid, &resolved_product_image, @@ -555,7 +590,13 @@ pub async fn reconcile_druid( )?; cluster_resources - .add(client, rg_service) + .add(client, rg_headless_service) + .await + .with_context(|_| ApplyRoleGroupServiceSnafu { + rolegroup: rolegroup.clone(), + })?; + cluster_resources + .add(client, rg_metrics_service) .await .with_context(|_| ApplyRoleGroupServiceSnafu { rolegroup: rolegroup.clone(), @@ -594,10 +635,29 @@ pub async fn reconcile_druid( ) .context(ListenerConfigurationSnafu)?; - cluster_resources + let listener = cluster_resources .add(client, role_group_listener) .await .context(ApplyGroupListenerSnafu)?; + + if druid_role == DruidRole::Router { + // discovery + for discovery_cm in build_discovery_configmaps( + druid, + druid, + &resolved_product_image, + &druid_tls_security, + listener, + ) + .await + .context(BuildDiscoveryConfigSnafu)? + { + cluster_resources + .add(client, discovery_cm) + .await + .context(ApplyDiscoveryConfigSnafu)?; + } + } } } @@ -614,23 +674,6 @@ pub async fn reconcile_druid( .context(FailedToCreatePdbSnafu)?; } - // discovery - for discovery_cm in build_discovery_configmaps( - druid, - druid, - &client.kubernetes_cluster_info, - &resolved_product_image, - &druid_tls_security, - ) - .await - .context(BuildDiscoveryConfigSnafu)? - { - cluster_resources - .add(client, discovery_cm) - .await - .context(ApplyDiscoveryConfigSnafu)?; - } - let cluster_operation_cond_builder = ClusterOperationsConditionBuilder::new(&druid.spec.cluster_operation); @@ -842,63 +885,6 @@ fn build_rolegroup_config_map( }) } -pub fn rolegroup_service_name(rolegroup: &RoleGroupRef) -> String { - format!("{name}-headless", name = rolegroup.object_name()) -} - -/// The rolegroup [`Service`] is a headless service that allows direct access to the instances of a certain rolegroup -/// -/// This is mostly useful for internal communication between peers, or for clients that perform client-side load balancing. -fn build_rolegroup_service( - druid: &v1alpha1::DruidCluster, - resolved_product_image: &ResolvedProductImage, - rolegroup: &RoleGroupRef, -) -> Result { - let ports = vec![ServicePort { - name: Some(METRICS_PORT_NAME.into()), - port: METRICS_PORT.into(), - protocol: Some("TCP".to_string()), - ..Default::default() - }]; - - Ok(Service { - metadata: ObjectMetaBuilder::new() - .name_and_namespace(druid) - .name(rolegroup_service_name(rolegroup)) - .ownerreference_from_resource(druid, None, Some(true)) - .context(ObjectMissingMetadataForOwnerRefSnafu)? - .with_recommended_labels(build_recommended_labels( - druid, - DRUID_CONTROLLER_NAME, - &resolved_product_image.app_version_label, - &rolegroup.role, - &rolegroup.role_group, - )) - .context(MetadataBuildSnafu)? - .with_label(Label::try_from(("prometheus.io/scrape", "true")).context(LabelBuildSnafu)?) - .build(), - spec: Some(ServiceSpec { - // Internal communication does not need to be exposed - type_: Some("ClusterIP".to_string()), - cluster_ip: Some("None".to_string()), - ports: Some(ports), - selector: Some( - Labels::role_group_selector( - druid, - APP_NAME, - &rolegroup.role, - &rolegroup.role_group, - ) - .context(LabelBuildSnafu)? - .into(), - ), - publish_not_ready_addresses: Some(true), - ..ServiceSpec::default() - }), - status: None, - }) -} - #[allow(clippy::too_many_arguments)] /// The rolegroup [`StatefulSet`] runs the rolegroup, as configured by the administrator. /// @@ -1223,7 +1209,9 @@ fn build_rolegroup_statefulset( ), ..LabelSelector::default() }, - service_name: Some(rolegroup_service_name(rolegroup_ref)), + service_name: Some(rolegroup_headless_service_name( + &rolegroup_ref.object_name(), + )), template: pod_template, volume_claim_templates: pvcs, ..StatefulSetSpec::default() diff --git a/rust/operator-binary/src/listener.rs b/rust/operator-binary/src/listener.rs index f7e76067..ac5d9222 100644 --- a/rust/operator-binary/src/listener.rs +++ b/rust/operator-binary/src/listener.rs @@ -1,4 +1,4 @@ -use snafu::{ResultExt, Snafu}; +use snafu::{OptionExt, ResultExt, Snafu}; use stackable_operator::{ builder::{ meta::ObjectMetaBuilder, @@ -10,7 +10,11 @@ use stackable_operator::{ kvp::{Labels, ObjectLabels}, }; -use crate::crd::{DruidRole, security::DruidTlsSecurity, v1alpha1}; +use crate::crd::{ + DruidRole, + security::{DruidTlsSecurity, PLAINTEXT_PORT_NAME, TLS_PORT_NAME}, + v1alpha1, +}; pub const LISTENER_VOLUME_NAME: &str = "listener"; pub const LISTENER_VOLUME_DIR: &str = "/stackable/listener"; @@ -31,6 +35,15 @@ pub enum Error { BuildListenerPersistentVolume { source: stackable_operator::builder::pod::volume::ListenerOperatorVolumeSourceBuilderError, }, + + #[snafu(display("{role_name} listener has no adress"))] + RoleListenerHasNoAddress { role_name: String }, + + #[snafu(display("could not find port [{port_name}] for rolegroup listener {role_name}"))] + NoServicePort { + port_name: String, + role_name: String, + }, } pub fn build_group_listener( @@ -84,3 +97,32 @@ pub fn group_listener_name( DruidRole::Historical | DruidRole::MiddleManager => None, } } + +// Builds the connection string with respect to the listener provided objects +pub fn build_listener_connection_string( + listener: Listener, + druid_tls_security: &DruidTlsSecurity, + role_name: &String, +) -> Result { + // We only need the first address corresponding to the role + let listener_address = listener + .status + .and_then(|s| s.ingress_addresses?.into_iter().next()) + .context(RoleListenerHasNoAddressSnafu { role_name })?; + let port_name = match druid_tls_security.tls_enabled() { + true => TLS_PORT_NAME, + false => PLAINTEXT_PORT_NAME, + }; + Ok(format!( + "{address}:{port}", + address = listener_address.address, + port = listener_address + .ports + .get(port_name) + .copied() + .context(NoServicePortSnafu { + port_name, + role_name + })? + )) +} diff --git a/rust/operator-binary/src/main.rs b/rust/operator-binary/src/main.rs index 9e40184f..1b65e18a 100644 --- a/rust/operator-binary/src/main.rs +++ b/rust/operator-binary/src/main.rs @@ -37,6 +37,7 @@ mod internal_secret; mod listener; mod operations; mod product_logging; +mod service; mod built_info { include!(concat!(env!("OUT_DIR"), "/built.rs")); diff --git a/rust/operator-binary/src/service.rs b/rust/operator-binary/src/service.rs new file mode 100644 index 00000000..62f25a1f --- /dev/null +++ b/rust/operator-binary/src/service.rs @@ -0,0 +1,119 @@ +use std::collections::BTreeMap; + +use snafu::{ResultExt, Snafu}; +use stackable_operator::{ + builder::meta::ObjectMetaBuilder, + k8s_openapi::api::core::v1::{Service, ServicePort, ServiceSpec}, + kvp::{Label, ObjectLabels}, + role_utils::RoleGroupRef, +}; + +use crate::crd::{ + DruidRole, METRICS_PORT, METRICS_PORT_NAME, security::DruidTlsSecurity, v1alpha1, +}; + +const METRICS_SERVICE_SUFFIX: &str = "metrics"; +const HEADLESS_SERVICE_SUFFIX: &str = "headless"; + +#[derive(Snafu, Debug)] +pub enum Error { + #[snafu(display("object is missing metadata to build owner reference"))] + ObjectMissingMetadataForOwnerRef { + source: stackable_operator::builder::meta::Error, + }, + + #[snafu(display("failed to build Metadata"))] + MetadataBuild { + source: stackable_operator::builder::meta::Error, + }, + + #[snafu(display("failed to build Labels"))] + LabelBuild { + source: stackable_operator::kvp::LabelError, + }, +} + +/// The rolegroup headless [`Service`] is a service that allows direct access to the instances of a certain rolegroup +/// This is mostly useful for internal communication between peers, or for clients that perform client-side load balancing. +pub fn build_rolegroup_headless_service( + druid: &v1alpha1::DruidCluster, + druid_tls_security: &DruidTlsSecurity, + druid_role: &DruidRole, + role_group_ref: &RoleGroupRef, + object_labels: ObjectLabels, + selector: BTreeMap, +) -> Result { + Ok(Service { + metadata: ObjectMetaBuilder::new() + .name_and_namespace(druid) + .name(rolegroup_headless_service_name( + &role_group_ref.object_name(), + )) + .ownerreference_from_resource(druid, None, Some(true)) + .context(ObjectMissingMetadataForOwnerRefSnafu)? + .with_recommended_labels(object_labels) + .context(MetadataBuildSnafu)? + .build(), + spec: Some(ServiceSpec { + // Internal communication does not need to be exposed + type_: Some("ClusterIP".to_string()), + cluster_ip: Some("None".to_string()), + ports: Some(druid_tls_security.service_ports(druid_role)), + selector: Some(selector), + publish_not_ready_addresses: Some(true), + ..ServiceSpec::default() + }), + status: None, + }) +} + +/// The rolegroup metrics [`Service`] is a service that exposes metrics and a prometheus scraping label. +pub fn build_rolegroup_metrics_service( + druid: &v1alpha1::DruidCluster, + role_group_ref: &RoleGroupRef, + object_labels: ObjectLabels, + selector: BTreeMap, +) -> Result { + Ok(Service { + metadata: ObjectMetaBuilder::new() + .name_and_namespace(druid) + .name(rolegroup_metrics_service_name( + &role_group_ref.object_name(), + )) + .ownerreference_from_resource(druid, None, Some(true)) + .context(ObjectMissingMetadataForOwnerRefSnafu)? + .with_recommended_labels(object_labels) + .context(MetadataBuildSnafu)? + .with_label(Label::try_from(("prometheus.io/scrape", "true")).context(LabelBuildSnafu)?) + .build(), + spec: Some(ServiceSpec { + // Internal communication does not need to be exposed + type_: Some("ClusterIP".to_string()), + cluster_ip: Some("None".to_string()), + ports: Some(metrics_service_ports()), + selector: Some(selector), + publish_not_ready_addresses: Some(true), + ..ServiceSpec::default() + }), + status: None, + }) +} + +fn metrics_service_ports() -> Vec { + vec![ServicePort { + name: Some(METRICS_PORT_NAME.to_string()), + port: METRICS_PORT.into(), + protocol: Some("TCP".to_string()), + ..ServicePort::default() + }] +} + +/// Returns the metrics rolegroup service name `---`. +fn rolegroup_metrics_service_name(role_group_ref_object_name: &str) -> String { + format!("{role_group_ref_object_name}-{METRICS_SERVICE_SUFFIX}") +} + +/// Returns the headless rolegroup service name `---`. +pub fn rolegroup_headless_service_name(role_group_ref_object_name: &str) -> String { + format!("{role_group_ref_object_name}-{HEADLESS_SERVICE_SUFFIX}") +} From 16c14725583c7fefafa2f09651c5a50338cc435c Mon Sep 17 00:00:00 2001 From: Benedikt Labrenz Date: Wed, 2 Jul 2025 11:33:28 +0200 Subject: [PATCH 17/18] fix rustdocs --- rust/operator-binary/src/druid_controller.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rust/operator-binary/src/druid_controller.rs b/rust/operator-binary/src/druid_controller.rs index 84eb1121..3c3b32f4 100644 --- a/rust/operator-binary/src/druid_controller.rs +++ b/rust/operator-binary/src/druid_controller.rs @@ -888,7 +888,8 @@ fn build_rolegroup_config_map( #[allow(clippy::too_many_arguments)] /// The rolegroup [`StatefulSet`] runs the rolegroup, as configured by the administrator. /// -/// The [`Pod`](`stackable_operator::k8s_openapi::api::core::v1::Pod`)s are accessible through the corresponding [`Service`] (from [`build_rolegroup_service`]). +/// The [`Pod`](`stackable_operator::k8s_openapi::api::core::v1::Pod`)s are accessible through the +/// corresponding [`stackable_operator::k8s_openapi::api::core::v1::Service`] (from [`build_rolegroup_headless_service`]). fn build_rolegroup_statefulset( druid: &v1alpha1::DruidCluster, resolved_product_image: &ResolvedProductImage, From 09cb1751274c257ecb84187e168ecdb077794356 Mon Sep 17 00:00:00 2001 From: Benedikt Labrenz Date: Wed, 2 Jul 2025 15:51:52 +0200 Subject: [PATCH 18/18] use listener scope for tls --- rust/operator-binary/src/crd/security.rs | 21 ++++++++++++++------ rust/operator-binary/src/druid_controller.rs | 4 +++- rust/operator-binary/src/listener.rs | 10 ++++++++++ 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/rust/operator-binary/src/crd/security.rs b/rust/operator-binary/src/crd/security.rs index a2d180a8..3da91b15 100644 --- a/rust/operator-binary/src/crd/security.rs +++ b/rust/operator-binary/src/crd/security.rs @@ -207,19 +207,28 @@ impl DruidTlsSecurity { druid: &mut ContainerBuilder, pod: &mut PodBuilder, requested_secret_lifetime: &Duration, + listener_scope: Option, ) -> Result<(), Error> { // `ResolvedAuthenticationClasses::validate` already checked that the tls AuthenticationClass // uses the same SecretClass as the Druid server itself. if let Some(secret_class) = &self.server_and_internal_secret_class { + let mut secret_volume_source_builder = + SecretOperatorVolumeSourceBuilder::new(secret_class); + + secret_volume_source_builder + .with_pod_scope() + .with_format(SecretFormat::TlsPkcs12) + .with_tls_pkcs12_password(TLS_STORE_PASSWORD) + .with_auto_tls_cert_lifetime(*requested_secret_lifetime); + + if let Some(listener_scope) = &listener_scope { + secret_volume_source_builder.with_listener_volume_scope(listener_scope); + } + pod.add_volume( VolumeBuilder::new(TLS_MOUNT_VOLUME_NAME) .ephemeral( - SecretOperatorVolumeSourceBuilder::new(secret_class) - .with_pod_scope() - .with_node_scope() - .with_format(SecretFormat::TlsPkcs12) - .with_tls_pkcs12_password(TLS_STORE_PASSWORD) - .with_auto_tls_cert_lifetime(*requested_secret_lifetime) + secret_volume_source_builder .build() .context(SecretVolumeBuildSnafu)?, ) diff --git a/rust/operator-binary/src/druid_controller.rs b/rust/operator-binary/src/druid_controller.rs index 3c3b32f4..db286b93 100644 --- a/rust/operator-binary/src/druid_controller.rs +++ b/rust/operator-binary/src/druid_controller.rs @@ -81,7 +81,7 @@ use crate::{ internal_secret::{create_shared_internal_secret, env_var_from_secret}, listener::{ LISTENER_VOLUME_DIR, LISTENER_VOLUME_NAME, build_group_listener, build_group_listener_pvc, - group_listener_name, + group_listener_name, secret_volume_listener_scope, }, operations::{graceful_shutdown::add_graceful_shutdown_config, pdb::add_pdbs}, product_logging::extend_role_group_config_map, @@ -968,6 +968,8 @@ fn build_rolegroup_statefulset( &mut cb_druid, &mut pb, &merged_rolegroup_config.requested_secret_lifetime, + // add listener + secret_volume_listener_scope(role), ) .context(FailedToInitializeSecurityContextSnafu)?; diff --git a/rust/operator-binary/src/listener.rs b/rust/operator-binary/src/listener.rs index ac5d9222..a1800142 100644 --- a/rust/operator-binary/src/listener.rs +++ b/rust/operator-binary/src/listener.rs @@ -126,3 +126,13 @@ pub fn build_listener_connection_string( })? )) } + +/// The listener volume name depending on the role +pub fn secret_volume_listener_scope(role: &DruidRole) -> Option { + match role { + DruidRole::Broker | DruidRole::Coordinator | DruidRole::Router => { + Some(LISTENER_VOLUME_NAME.to_string()) + } + DruidRole::Historical | DruidRole::MiddleManager => None, + } +}