From ca734f5b43ba6b61e7c453b041961c6f1cd0ec08 Mon Sep 17 00:00:00 2001 From: zeminzhou Date: Mon, 9 Mar 2026 11:00:13 +0800 Subject: [PATCH 01/11] topology: support manager-based TiDB discovery with namespace filter --- src/common/topology/fetch/mod.rs | 49 ++- src/common/topology/fetch/tidb_manager.rs | 358 ++++++++++++++++++++++ src/sources/keyviz.rs | 2 + src/sources/system_tables/controller.rs | 6 + src/sources/system_tables/mod.rs | 28 +- src/sources/topsql/controller.rs | 4 + src/sources/topsql/mod.rs | 12 + 7 files changed, 455 insertions(+), 4 deletions(-) create mode 100644 src/common/topology/fetch/tidb_manager.rs diff --git a/src/common/topology/fetch/mod.rs b/src/common/topology/fetch/mod.rs index 360dfd40..003eac18 100644 --- a/src/common/topology/fetch/mod.rs +++ b/src/common/topology/fetch/mod.rs @@ -2,6 +2,7 @@ mod models; mod pd; mod store; mod tidb; +mod tidb_manager; mod utils; pub mod tidb_nextgen; @@ -43,6 +44,8 @@ pub enum FetchError { FetchPDTopology { source: pd::FetchError }, #[snafu(display("Failed to fetch tidb topology: {}", source))] FetchTiDBTopology { source: tidb::FetchError }, + #[snafu(display("Failed to fetch tidb topology from manager server: {}", source))] + FetchTiDBFromManagerServerTopology { source: tidb_manager::FetchError }, #[snafu(display("Failed to fetch store topology: {}", source))] FetchStoreTopology { source: store::FetchError }, #[snafu(display("Failed to fetch tidb nextgen topology: {}", source))] @@ -56,6 +59,8 @@ pub enum FetchError { // Legacy topology fetcher pub struct LegacyTopologyFetcher { pd_address: String, + manager_server_address: Option, + manager_server_namespace: Option, http_client: HttpClient, pub etcd_client: etcd_client::Client, } @@ -63,15 +68,22 @@ pub struct LegacyTopologyFetcher { impl LegacyTopologyFetcher { pub async fn new( pd_address: String, + manager_server_address: Option, + manager_server_namespace: Option, tls_config: Option, proxy_config: &ProxyConfig, ) -> Result { let pd_address = Self::polish_address(pd_address, &tls_config)?; + let manager_server_address = manager_server_address + .map(Self::polish_manager_server_address) + .transpose()?; let http_client = Self::build_http_client(tls_config.as_ref(), proxy_config)?; let etcd_client = Self::build_etcd_client(&pd_address, &tls_config).await?; Ok(Self { pd_address, + manager_server_address, + manager_server_namespace, http_client, etcd_client, }) @@ -85,10 +97,21 @@ impl LegacyTopologyFetcher { .get_up_pds(components) .await .context(FetchPDTopologySnafu)?; - tidb::TiDBTopologyFetcher::new(&mut self.etcd_client) + if let Some(manager_server_address) = self.manager_server_address.as_deref() { + tidb_manager::TiDBManagerTopologyFetcher::new( + manager_server_address, + self.manager_server_namespace.as_deref(), + &self.http_client, + ) .get_up_tidbs(components) .await - .context(FetchTiDBTopologySnafu)?; + .context(FetchTiDBFromManagerServerTopologySnafu)?; + } else { + tidb::TiDBTopologyFetcher::new(&mut self.etcd_client) + .get_up_tidbs(components) + .await + .context(FetchTiDBTopologySnafu)?; + } store::StoreTopologyFetcher::new(&self.pd_address, &self.http_client) .get_up_stores(components) .await @@ -114,6 +137,17 @@ impl LegacyTopologyFetcher { Ok(address) } + fn polish_manager_server_address(mut address: String) -> Result { + let uri: hyper::Uri = address.parse().context(ParseAddressSnafu)?; + if uri.scheme().is_none() { + address = format!("http://{address}"); + } + if address.ends_with('/') { + address.pop(); + } + Ok(address) + } + fn build_http_client( tls_config: Option<&TlsConfig>, proxy_config: &ProxyConfig, @@ -234,6 +268,8 @@ impl TopologyFetcher { /// Create a new topology fetcher based on the current feature configuration pub async fn new( pd_address: Option, + manager_server_address: Option, + manager_server_namespace: Option, tls_config: Option, proxy_config: &ProxyConfig, tidb_group: Option, @@ -252,7 +288,14 @@ impl TopologyFetcher { let pd_address = pd_address.ok_or_else(|| FetchError::ConfigurationError { message: "PD address is required in legacy mode".to_string(), })?; - let fetcher = LegacyTopologyFetcher::new(pd_address, tls_config, proxy_config).await?; + let fetcher = LegacyTopologyFetcher::new( + pd_address, + manager_server_address, + manager_server_namespace, + tls_config, + proxy_config, + ) + .await?; Ok(Self { inner: TopologyFetcherImpl::Legacy(Box::new(fetcher)), }) diff --git a/src/common/topology/fetch/tidb_manager.rs b/src/common/topology/fetch/tidb_manager.rs new file mode 100644 index 00000000..8a20ddad --- /dev/null +++ b/src/common/topology/fetch/tidb_manager.rs @@ -0,0 +1,358 @@ +use std::collections::HashSet; + +use serde_json::{Map, Value}; +use snafu::{ResultExt, Snafu}; +use vector::http::HttpClient; + +use crate::common::topology::fetch::utils; +use crate::common::topology::{Component, InstanceType}; + +const GET_ACTIVE_TIDB_PATH: &str = "/api/tidb/get_active_tidb"; +const DEFAULT_TIDB_PRIMARY_PORT: u16 = 4000; +const DEFAULT_TIDB_STATUS_PORT: u16 = 10080; +const MAX_RESPONSE_DEPTH: usize = 8; + +#[derive(Debug, Snafu)] +pub enum FetchError { + #[snafu(display("Failed to build request: {}", source))] + BuildRequest { source: http::Error }, + #[snafu(display("Failed to get active tidb addresses from manager server: {}", source))] + GetActiveTiDBs { source: vector::http::HttpError }, + #[snafu(display("Failed to read active tidb response bytes: {}", source))] + GetActiveTiDBsBytes { source: hyper::Error }, + #[snafu(display("Failed to parse active tidb response JSON text: {}", source))] + ActiveTiDBJsonFromStr { source: serde_json::Error }, + #[snafu(display("Invalid manager server response: {}", message))] + InvalidManagerResponse { message: String }, + #[snafu(display("Failed to parse tidb host from manager response: {}", source))] + ParseTiDBHost { source: utils::ParseError }, +} + +#[derive(Debug, Clone, Eq, PartialEq)] +struct ActiveTiDBAddress { + host: String, + port: Option, + status_port: Option, +} + +pub struct TiDBManagerTopologyFetcher<'a> { + manager_server_address: &'a str, + manager_server_namespace: Option<&'a str>, + http_client: &'a HttpClient, +} + +impl<'a> TiDBManagerTopologyFetcher<'a> { + pub fn new( + manager_server_address: &'a str, + manager_server_namespace: Option<&'a str>, + http_client: &'a HttpClient, + ) -> Self { + Self { + manager_server_address, + manager_server_namespace, + http_client, + } + } + + pub async fn get_up_tidbs( + &self, + components: &mut HashSet, + ) -> Result<(), FetchError> { + let active_tidb_addresses = self.fetch_active_tidb_addresses().await?; + + for active_tidb in active_tidb_addresses { + let (host, primary_port) = + Self::parse_tidb_host_and_primary(&active_tidb.host, active_tidb.port)?; + let secondary_port = active_tidb.status_port.unwrap_or(DEFAULT_TIDB_STATUS_PORT); + + components.insert(Component { + instance_type: InstanceType::TiDB, + host, + primary_port, + secondary_port, + }); + } + + Ok(()) + } + + async fn fetch_active_tidb_addresses(&self) -> Result, FetchError> { + let Some(endpoint_url) = self.active_tidb_endpoint_url() else { + return Ok(Vec::new()); + }; + + let req = http::Request::get(endpoint_url) + .body(hyper::Body::empty()) + .context(BuildRequestSnafu)?; + + let res = self + .http_client + .send(req) + .await + .context(GetActiveTiDBsSnafu)?; + let bytes = hyper::body::to_bytes(res.into_body()) + .await + .context(GetActiveTiDBsBytesSnafu)?; + + Self::parse_active_tidb_addresses_response(&bytes) + } + + fn active_tidb_endpoint_url(&self) -> Option { + let namespaces = Self::normalize_namespaces(self.manager_server_namespace)?; + Some(Self::build_active_tidb_endpoint_url( + self.manager_server_address, + &namespaces, + )) + } + + fn normalize_namespaces(namespaces: Option<&str>) -> Option { + let namespaces = namespaces?; + let normalized = namespaces + .split(',') + .map(str::trim) + .filter(|ns| !ns.is_empty()) + .collect::>(); + if normalized.is_empty() { + None + } else { + Some(normalized.join(",")) + } + } + + fn build_active_tidb_endpoint_url(manager_server_address: &str, namespaces: &str) -> String { + let mut endpoint = if manager_server_address.ends_with(GET_ACTIVE_TIDB_PATH) { + manager_server_address.to_owned() + } else { + format!("{manager_server_address}{GET_ACTIVE_TIDB_PATH}") + }; + endpoint.push_str("?namespace="); + endpoint.push_str(namespaces); + endpoint + } + + fn parse_tidb_host_and_primary( + host_or_address: &str, + explicit_port: Option, + ) -> Result<(String, u16), FetchError> { + let host_or_address = host_or_address.trim_end_matches('/'); + if let Ok((host, parsed_port)) = utils::parse_host_port(host_or_address) { + return Ok((host, explicit_port.unwrap_or(parsed_port))); + } + + let default_address = format!("{host_or_address}:{DEFAULT_TIDB_PRIMARY_PORT}"); + let (host, _) = utils::parse_host_port(&default_address).context(ParseTiDBHostSnafu)?; + Ok((host, explicit_port.unwrap_or(DEFAULT_TIDB_PRIMARY_PORT))) + } + + fn parse_active_tidb_addresses_response( + bytes: &[u8], + ) -> Result, FetchError> { + let value = serde_json::from_slice::(bytes).context(ActiveTiDBJsonFromStrSnafu)?; + let addresses = Self::extract_active_tidb_addresses(&value, 0)?; + + if addresses.is_empty() { + return Err(FetchError::InvalidManagerResponse { + message: "no active tidb addresses found".to_owned(), + }); + } + + Ok(addresses) + } + + fn extract_active_tidb_addresses( + value: &Value, + depth: usize, + ) -> Result, FetchError> { + if depth > MAX_RESPONSE_DEPTH { + return Err(FetchError::InvalidManagerResponse { + message: "response nesting is too deep".to_owned(), + }); + } + + match value { + Value::String(host) => Ok(vec![ActiveTiDBAddress { + host: host.clone(), + port: None, + status_port: None, + }]), + Value::Array(items) => { + let mut addresses = Vec::new(); + for item in items { + addresses.extend(Self::extract_active_tidb_addresses(item, depth + 1)?); + } + Ok(addresses) + } + Value::Object(obj) => { + if let Some(address) = Self::extract_active_tidb_address_from_object(obj) { + return Ok(vec![address]); + } + + for key in [ + "data", + "result", + "active_tidb_addresses", + "tidb_addresses", + "active_tidbs", + "tidbs", + "addresses", + "instances", + "items", + "nodes", + "list", + ] { + if let Some(next_value) = obj.get(key) { + let addresses = Self::extract_active_tidb_addresses(next_value, depth + 1)?; + if !addresses.is_empty() { + return Ok(addresses); + } + } + } + + Ok(Vec::new()) + } + _ => Ok(Vec::new()), + } + } + + fn extract_active_tidb_address_from_object( + obj: &Map, + ) -> Option { + let host = Self::extract_string_field( + obj, + &["host", "address", "tidb_address", "active_tidb_address"], + )?; + let port = Self::extract_u16_field(obj, &["port", "primary_port"]); + let status_port = Self::extract_u16_field(obj, &["status_port", "secondary_port"]); + + Some(ActiveTiDBAddress { + host, + port, + status_port, + }) + } + + fn extract_string_field(obj: &Map, keys: &[&str]) -> Option { + keys.iter() + .find_map(|key| obj.get(*key).and_then(Value::as_str).map(str::to_owned)) + } + + fn extract_u16_field(obj: &Map, keys: &[&str]) -> Option { + keys.iter().find_map(|key| { + obj.get(*key) + .and_then(Value::as_u64) + .and_then(|raw| u16::try_from(raw).ok()) + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_response_new_schema() { + let bytes = br#"[ + {"host":"10.0.0.1","port":4000,"status_port":10080}, + {"host":"10.0.0.2","port":4000,"status_port":10080} + ]"#; + let addresses = + TiDBManagerTopologyFetcher::parse_active_tidb_addresses_response(bytes).unwrap(); + + assert_eq!( + addresses, + vec![ + ActiveTiDBAddress { + host: "10.0.0.1".to_owned(), + port: Some(4000), + status_port: Some(10080), + }, + ActiveTiDBAddress { + host: "10.0.0.2".to_owned(), + port: Some(4000), + status_port: Some(10080), + } + ] + ); + } + + #[test] + fn parse_response_invalid_format() { + let bytes = br#"{"code":0,"message":"ok"}"#; + let err = TiDBManagerTopologyFetcher::parse_active_tidb_addresses_response(bytes) + .expect_err("expected invalid manager response"); + assert!(matches!(err, FetchError::InvalidManagerResponse { .. })); + } + + #[test] + fn parse_tidb_host_and_primary_with_address() { + let (host, primary_port) = + TiDBManagerTopologyFetcher::parse_tidb_host_and_primary("10.0.0.1:4100", None).unwrap(); + assert_eq!(host, "10.0.0.1"); + assert_eq!(primary_port, 4100); + } + + #[test] + fn parse_tidb_host_and_primary_with_host_only() { + let (host, primary_port) = + TiDBManagerTopologyFetcher::parse_tidb_host_and_primary("10.0.0.1", None).unwrap(); + assert_eq!(host, "10.0.0.1"); + assert_eq!(primary_port, DEFAULT_TIDB_PRIMARY_PORT); + } + + #[test] + fn parse_tidb_host_and_primary_with_explicit_port() { + let (host, primary_port) = + TiDBManagerTopologyFetcher::parse_tidb_host_and_primary("10.0.0.1", Some(4200)) + .unwrap(); + assert_eq!(host, "10.0.0.1"); + assert_eq!(primary_port, 4200); + } + + #[test] + fn build_endpoint_url_with_namespaces() { + let endpoint = TiDBManagerTopologyFetcher::build_active_tidb_endpoint_url( + "http://manager:8080", + "super-vip-tidb-pool,canary-super-vip-tidb-pool", + ); + assert_eq!( + endpoint, + "http://manager:8080/api/tidb/get_active_tidb?namespace=super-vip-tidb-pool,canary-super-vip-tidb-pool" + ); + } + + #[test] + fn build_endpoint_url_with_full_path() { + let endpoint = TiDBManagerTopologyFetcher::build_active_tidb_endpoint_url( + "http://manager:8080/api/tidb/get_active_tidb", + "super-vip-tidb-pool,canary-super-vip-tidb-pool", + ); + assert_eq!( + endpoint, + "http://manager:8080/api/tidb/get_active_tidb?namespace=super-vip-tidb-pool,canary-super-vip-tidb-pool" + ); + } + + #[test] + fn normalize_namespaces_none_or_empty() { + assert_eq!(TiDBManagerTopologyFetcher::normalize_namespaces(None), None); + assert_eq!( + TiDBManagerTopologyFetcher::normalize_namespaces(Some("")), + None + ); + assert_eq!( + TiDBManagerTopologyFetcher::normalize_namespaces(Some(" , ")), + None + ); + } + + #[test] + fn normalize_namespaces_trim_and_filter() { + let normalized = TiDBManagerTopologyFetcher::normalize_namespaces(Some( + " super-vip-tidb-pool, canary-super-vip-tidb-pool , ", + )); + assert_eq!( + normalized.as_deref(), + Some("super-vip-tidb-pool,canary-super-vip-tidb-pool") + ); + } +} diff --git a/src/sources/keyviz.rs b/src/sources/keyviz.rs index b1820411..81a68d47 100644 --- a/src/sources/keyviz.rs +++ b/src/sources/keyviz.rs @@ -92,6 +92,8 @@ impl SourceConfig for KeyvizConfig { // Since we already checked is_nextgen_mode() above, we know we're in legacy mode here let topo = TopologyFetcher::new( Some(pd_address.clone()), + None, // manager_server_address + None, // manager_server_namespace tls.clone(), &cx.proxy, None, // tidb_group diff --git a/src/sources/system_tables/controller.rs b/src/sources/system_tables/controller.rs index 3ddc962c..5dfed39f 100644 --- a/src/sources/system_tables/controller.rs +++ b/src/sources/system_tables/controller.rs @@ -43,6 +43,8 @@ impl Controller { /// Create a new controller with abstracted collectors pub async fn new( pd_address: Option, + manager_server_address: Option, + manager_server_namespace: Option, tidb_group: Option, label_k8s_instance: Option, topology_fetch_interval: Duration, @@ -70,6 +72,8 @@ impl Controller { TopologyFetcher::new( Some(String::new()), None, + None, + None, proxy_config, tidb_group.clone(), label_k8s_instance.clone(), @@ -94,6 +98,8 @@ impl Controller { TopologyFetcher::new( Some(pd_addr), + manager_server_address, + manager_server_namespace, pd_tls.clone(), proxy_config, tidb_group.clone(), diff --git a/src/sources/system_tables/mod.rs b/src/sources/system_tables/mod.rs index 17574ad1..bbb0b639 100644 --- a/src/sources/system_tables/mod.rs +++ b/src/sources/system_tables/mod.rs @@ -41,6 +41,8 @@ impl DatabaseEnvVars { // PD/Topology related environment variables pub const PD_ADDRESS: &'static str = "PD_ADDRESS"; + pub const MANAGER_SERVER_ADDRESS: &'static str = "MANAGER_SERVER_ADDRESS"; + pub const MANAGER_SERVER_NAMESPACE: &'static str = "MANAGER_SERVER_NAMESPACE"; pub const TIDB_GROUP: &'static str = "TIDB_GROUP"; pub const LABEL_K8S_INSTANCE: &'static str = "LABEL_K8S_INSTANCE"; @@ -63,9 +65,15 @@ impl DatabaseEnvVars { #[configurable_component(source("system_tables"))] #[derive(Debug, Clone)] pub struct SystemTablesConfig { - /// PD address for legacy mode (to discover TiDB instances) + /// PD address for legacy mode (for PD/store discovery and etcd access) pub pd_address: Option, + /// Manager server HTTP endpoint for TiDB discovery in legacy mode + pub manager_server_address: Option, + + /// Namespace filter for manager server active TiDB discovery + pub manager_server_namespace: Option, + /// TiDB group name for nextgen mode pub tidb_group: Option, @@ -253,6 +261,12 @@ impl SystemTablesConfig { if let Ok(val) = env::var(DatabaseEnvVars::PD_ADDRESS) { self.pd_address = Some(val); } + if let Ok(val) = env::var(DatabaseEnvVars::MANAGER_SERVER_ADDRESS) { + self.manager_server_address = Some(val); + } + if let Ok(val) = env::var(DatabaseEnvVars::MANAGER_SERVER_NAMESPACE) { + self.manager_server_namespace = Some(val); + } if let Ok(val) = env::var(DatabaseEnvVars::TIDB_GROUP) { self.tidb_group = Some(val); } @@ -337,6 +351,8 @@ impl GenerateConfig for SystemTablesConfig { fn generate_config() -> toml::Value { toml::Value::try_from(Self { pd_address: Some("127.0.0.1:2379".to_owned()), + manager_server_address: None, + manager_server_namespace: None, tidb_group: None, label_k8s_instance: None, database_username: Some("root".to_owned()), @@ -399,12 +415,20 @@ impl SourceConfig for SystemTablesConfig { if let Some(ref pd_addr) = config.pd_address { info!(" PD address: {}", pd_addr); } + if let Some(ref manager_server_addr) = config.manager_server_address { + info!(" Manager server address: {}", manager_server_addr); + } + if let Some(ref manager_server_namespace) = config.manager_server_namespace { + info!(" Manager server namespace: {}", manager_server_namespace); + } info!(" PD TLS enabled: {}", config.pd_tls.is_some()); info!(" Tables configured: {}", config.tables.len()); let topology_fetch_interval = Duration::from_secs_f64(config.topology_fetch_interval_seconds); let pd_address = config.pd_address.clone(); + let manager_server_address = config.manager_server_address.clone(); + let manager_server_namespace = config.manager_server_namespace.clone(); let tidb_group = config.tidb_group.clone(); let label_k8s_instance = config.label_k8s_instance.clone(); @@ -454,6 +478,8 @@ impl SourceConfig for SystemTablesConfig { info!("Using system tables controller with abstracted collectors"); let controller = Controller::new( pd_address, + manager_server_address, + manager_server_namespace, tidb_group, label_k8s_instance, topology_fetch_interval, diff --git a/src/sources/topsql/controller.rs b/src/sources/topsql/controller.rs index d0f3cbdc..464c4f51 100644 --- a/src/sources/topsql/controller.rs +++ b/src/sources/topsql/controller.rs @@ -47,6 +47,8 @@ impl Controller { pub async fn new( sharedpool_id: Option, pd_address: Option, + manager_server_address: Option, + manager_server_namespace: Option, topo_fetch_interval: Duration, init_retry_delay: Duration, top_n: usize, @@ -61,6 +63,8 @@ impl Controller { ) -> vector::Result { let topo_fetcher = TopologyFetcher::new( pd_address, + manager_server_address, + manager_server_namespace, tls_config.clone(), proxy_config, tidb_group, diff --git a/src/sources/topsql/mod.rs b/src/sources/topsql/mod.rs index 8b8e57d3..26091677 100644 --- a/src/sources/topsql/mod.rs +++ b/src/sources/topsql/mod.rs @@ -37,6 +37,12 @@ pub struct TopSQLConfig { /// PLACEHOLDER pub pd_address: Option, + /// PLACEHOLDER + pub manager_server_address: Option, + + /// PLACEHOLDER + pub manager_server_namespace: Option, + /// PLACEHOLDER pub tls: Option, @@ -81,6 +87,8 @@ impl GenerateConfig for TopSQLConfig { label_k8s_instance: None, keyspace_to_vmtenants: None, pd_address: None, + manager_server_address: None, + manager_server_namespace: None, tls: None, init_retry_delay_seconds: default_init_retry_delay(), topology_fetch_interval_seconds: default_topology_fetch_interval(), @@ -102,6 +110,8 @@ impl SourceConfig for TopSQLConfig { let label_k8s_instance = self.label_k8s_instance.clone(); let keyspace_to_vmtenants = self.keyspace_to_vmtenants.clone(); let pd_address = self.pd_address.clone(); + let manager_server_address = self.manager_server_address.clone(); + let manager_server_namespace = self.manager_server_namespace.clone(); let tls = self.tls.clone(); let topology_fetch_interval = Duration::from_secs_f64(self.topology_fetch_interval_seconds); let init_retry_delay = Duration::from_secs_f64(self.init_retry_delay_seconds); @@ -127,6 +137,8 @@ impl SourceConfig for TopSQLConfig { let controller = Controller::new( sharedpool_id, pd_address, + manager_server_address, + manager_server_namespace, topology_fetch_interval, init_retry_delay, top_n, From fc139e7180a73df8985a200e81164ce0b568b7b3 Mon Sep 17 00:00:00 2001 From: zeminzhou Date: Tue, 10 Mar 2026 10:37:03 +0800 Subject: [PATCH 02/11] fix name Signed-off-by: zeminzhou --- src/common/topology/fetch/mod.rs | 12 ++++++------ src/common/topology/fetch/tidb_manager.rs | 8 ++++---- src/sources/keyviz.rs | 2 +- src/sources/system_tables/controller.rs | 4 ++-- src/sources/system_tables/mod.rs | 18 +++++++++--------- src/sources/topsql/controller.rs | 4 ++-- src/sources/topsql/mod.rs | 8 ++++---- 7 files changed, 28 insertions(+), 28 deletions(-) diff --git a/src/common/topology/fetch/mod.rs b/src/common/topology/fetch/mod.rs index 003eac18..44036495 100644 --- a/src/common/topology/fetch/mod.rs +++ b/src/common/topology/fetch/mod.rs @@ -60,7 +60,7 @@ pub enum FetchError { pub struct LegacyTopologyFetcher { pd_address: String, manager_server_address: Option, - manager_server_namespace: Option, + tidb_namespace: Option, http_client: HttpClient, pub etcd_client: etcd_client::Client, } @@ -69,7 +69,7 @@ impl LegacyTopologyFetcher { pub async fn new( pd_address: String, manager_server_address: Option, - manager_server_namespace: Option, + tidb_namespace: Option, tls_config: Option, proxy_config: &ProxyConfig, ) -> Result { @@ -83,7 +83,7 @@ impl LegacyTopologyFetcher { Ok(Self { pd_address, manager_server_address, - manager_server_namespace, + tidb_namespace, http_client, etcd_client, }) @@ -100,7 +100,7 @@ impl LegacyTopologyFetcher { if let Some(manager_server_address) = self.manager_server_address.as_deref() { tidb_manager::TiDBManagerTopologyFetcher::new( manager_server_address, - self.manager_server_namespace.as_deref(), + self.tidb_namespace.as_deref(), &self.http_client, ) .get_up_tidbs(components) @@ -269,7 +269,7 @@ impl TopologyFetcher { pub async fn new( pd_address: Option, manager_server_address: Option, - manager_server_namespace: Option, + tidb_namespace: Option, tls_config: Option, proxy_config: &ProxyConfig, tidb_group: Option, @@ -291,7 +291,7 @@ impl TopologyFetcher { let fetcher = LegacyTopologyFetcher::new( pd_address, manager_server_address, - manager_server_namespace, + tidb_namespace, tls_config, proxy_config, ) diff --git a/src/common/topology/fetch/tidb_manager.rs b/src/common/topology/fetch/tidb_manager.rs index 8a20ddad..cb99d2ca 100644 --- a/src/common/topology/fetch/tidb_manager.rs +++ b/src/common/topology/fetch/tidb_manager.rs @@ -37,19 +37,19 @@ struct ActiveTiDBAddress { pub struct TiDBManagerTopologyFetcher<'a> { manager_server_address: &'a str, - manager_server_namespace: Option<&'a str>, + tidb_namespace: Option<&'a str>, http_client: &'a HttpClient, } impl<'a> TiDBManagerTopologyFetcher<'a> { pub fn new( manager_server_address: &'a str, - manager_server_namespace: Option<&'a str>, + tidb_namespace: Option<&'a str>, http_client: &'a HttpClient, ) -> Self { Self { manager_server_address, - manager_server_namespace, + tidb_namespace, http_client, } } @@ -98,7 +98,7 @@ impl<'a> TiDBManagerTopologyFetcher<'a> { } fn active_tidb_endpoint_url(&self) -> Option { - let namespaces = Self::normalize_namespaces(self.manager_server_namespace)?; + let namespaces = Self::normalize_namespaces(self.tidb_namespace)?; Some(Self::build_active_tidb_endpoint_url( self.manager_server_address, &namespaces, diff --git a/src/sources/keyviz.rs b/src/sources/keyviz.rs index 81a68d47..6395951e 100644 --- a/src/sources/keyviz.rs +++ b/src/sources/keyviz.rs @@ -93,7 +93,7 @@ impl SourceConfig for KeyvizConfig { let topo = TopologyFetcher::new( Some(pd_address.clone()), None, // manager_server_address - None, // manager_server_namespace + None, // tidb_namespace tls.clone(), &cx.proxy, None, // tidb_group diff --git a/src/sources/system_tables/controller.rs b/src/sources/system_tables/controller.rs index 5dfed39f..4aa1cda4 100644 --- a/src/sources/system_tables/controller.rs +++ b/src/sources/system_tables/controller.rs @@ -44,7 +44,7 @@ impl Controller { pub async fn new( pd_address: Option, manager_server_address: Option, - manager_server_namespace: Option, + tidb_namespace: Option, tidb_group: Option, label_k8s_instance: Option, topology_fetch_interval: Duration, @@ -99,7 +99,7 @@ impl Controller { TopologyFetcher::new( Some(pd_addr), manager_server_address, - manager_server_namespace, + tidb_namespace, pd_tls.clone(), proxy_config, tidb_group.clone(), diff --git a/src/sources/system_tables/mod.rs b/src/sources/system_tables/mod.rs index bbb0b639..def5dc10 100644 --- a/src/sources/system_tables/mod.rs +++ b/src/sources/system_tables/mod.rs @@ -42,7 +42,7 @@ impl DatabaseEnvVars { // PD/Topology related environment variables pub const PD_ADDRESS: &'static str = "PD_ADDRESS"; pub const MANAGER_SERVER_ADDRESS: &'static str = "MANAGER_SERVER_ADDRESS"; - pub const MANAGER_SERVER_NAMESPACE: &'static str = "MANAGER_SERVER_NAMESPACE"; + pub const TIDB_NAMESPACE: &'static str = "TIDB_NAMESPACE"; pub const TIDB_GROUP: &'static str = "TIDB_GROUP"; pub const LABEL_K8S_INSTANCE: &'static str = "LABEL_K8S_INSTANCE"; @@ -72,7 +72,7 @@ pub struct SystemTablesConfig { pub manager_server_address: Option, /// Namespace filter for manager server active TiDB discovery - pub manager_server_namespace: Option, + pub tidb_namespace: Option, /// TiDB group name for nextgen mode pub tidb_group: Option, @@ -264,8 +264,8 @@ impl SystemTablesConfig { if let Ok(val) = env::var(DatabaseEnvVars::MANAGER_SERVER_ADDRESS) { self.manager_server_address = Some(val); } - if let Ok(val) = env::var(DatabaseEnvVars::MANAGER_SERVER_NAMESPACE) { - self.manager_server_namespace = Some(val); + if let Ok(val) = env::var(DatabaseEnvVars::TIDB_NAMESPACE) { + self.tidb_namespace = Some(val); } if let Ok(val) = env::var(DatabaseEnvVars::TIDB_GROUP) { self.tidb_group = Some(val); @@ -352,7 +352,7 @@ impl GenerateConfig for SystemTablesConfig { toml::Value::try_from(Self { pd_address: Some("127.0.0.1:2379".to_owned()), manager_server_address: None, - manager_server_namespace: None, + tidb_namespace: None, tidb_group: None, label_k8s_instance: None, database_username: Some("root".to_owned()), @@ -418,8 +418,8 @@ impl SourceConfig for SystemTablesConfig { if let Some(ref manager_server_addr) = config.manager_server_address { info!(" Manager server address: {}", manager_server_addr); } - if let Some(ref manager_server_namespace) = config.manager_server_namespace { - info!(" Manager server namespace: {}", manager_server_namespace); + if let Some(ref tidb_namespace) = config.tidb_namespace { + info!(" TiDB namespace: {}", tidb_namespace); } info!(" PD TLS enabled: {}", config.pd_tls.is_some()); info!(" Tables configured: {}", config.tables.len()); @@ -428,7 +428,7 @@ impl SourceConfig for SystemTablesConfig { Duration::from_secs_f64(config.topology_fetch_interval_seconds); let pd_address = config.pd_address.clone(); let manager_server_address = config.manager_server_address.clone(); - let manager_server_namespace = config.manager_server_namespace.clone(); + let tidb_namespace = config.tidb_namespace.clone(); let tidb_group = config.tidb_group.clone(); let label_k8s_instance = config.label_k8s_instance.clone(); @@ -479,7 +479,7 @@ impl SourceConfig for SystemTablesConfig { let controller = Controller::new( pd_address, manager_server_address, - manager_server_namespace, + tidb_namespace, tidb_group, label_k8s_instance, topology_fetch_interval, diff --git a/src/sources/topsql/controller.rs b/src/sources/topsql/controller.rs index 464c4f51..f371b9cf 100644 --- a/src/sources/topsql/controller.rs +++ b/src/sources/topsql/controller.rs @@ -48,7 +48,7 @@ impl Controller { sharedpool_id: Option, pd_address: Option, manager_server_address: Option, - manager_server_namespace: Option, + tidb_namespace: Option, topo_fetch_interval: Duration, init_retry_delay: Duration, top_n: usize, @@ -64,7 +64,7 @@ impl Controller { let topo_fetcher = TopologyFetcher::new( pd_address, manager_server_address, - manager_server_namespace, + tidb_namespace, tls_config.clone(), proxy_config, tidb_group, diff --git a/src/sources/topsql/mod.rs b/src/sources/topsql/mod.rs index 26091677..72b837bd 100644 --- a/src/sources/topsql/mod.rs +++ b/src/sources/topsql/mod.rs @@ -41,7 +41,7 @@ pub struct TopSQLConfig { pub manager_server_address: Option, /// PLACEHOLDER - pub manager_server_namespace: Option, + pub tidb_namespace: Option, /// PLACEHOLDER pub tls: Option, @@ -88,7 +88,7 @@ impl GenerateConfig for TopSQLConfig { keyspace_to_vmtenants: None, pd_address: None, manager_server_address: None, - manager_server_namespace: None, + tidb_namespace: None, tls: None, init_retry_delay_seconds: default_init_retry_delay(), topology_fetch_interval_seconds: default_topology_fetch_interval(), @@ -111,7 +111,7 @@ impl SourceConfig for TopSQLConfig { let keyspace_to_vmtenants = self.keyspace_to_vmtenants.clone(); let pd_address = self.pd_address.clone(); let manager_server_address = self.manager_server_address.clone(); - let manager_server_namespace = self.manager_server_namespace.clone(); + let tidb_namespace = self.tidb_namespace.clone(); let tls = self.tls.clone(); let topology_fetch_interval = Duration::from_secs_f64(self.topology_fetch_interval_seconds); let init_retry_delay = Duration::from_secs_f64(self.init_retry_delay_seconds); @@ -138,7 +138,7 @@ impl SourceConfig for TopSQLConfig { sharedpool_id, pd_address, manager_server_address, - manager_server_namespace, + tidb_namespace, topology_fetch_interval, init_retry_delay, top_n, From e69f8a0818020570b69c17884d5ca972e0f1d4ca Mon Sep 17 00:00:00 2001 From: zeminzhou Date: Tue, 10 Mar 2026 15:38:41 +0800 Subject: [PATCH 03/11] fix: align topsql_v2 topology fetcher params --- src/sources/topsql_v2/controller.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/sources/topsql_v2/controller.rs b/src/sources/topsql_v2/controller.rs index f042685b..ff06fefb 100644 --- a/src/sources/topsql_v2/controller.rs +++ b/src/sources/topsql_v2/controller.rs @@ -56,6 +56,8 @@ impl Controller { ) -> vector::Result { let topo_fetcher = TopologyFetcher::new( pd_address, + None, // manager_server_address + None, // tidb_namespace tls_config.clone(), proxy_config, tidb_group, From 1db61da977b2eaa0931e8e855aeba953bd64300e Mon Sep 17 00:00:00 2001 From: zeminzhou Date: Thu, 12 Mar 2026 21:35:55 +0800 Subject: [PATCH 04/11] fix: wire manager discovery into topsql v2 --- src/common/topology/fetch/mod.rs | 12 ++++++++++++ src/common/topology/fetch/tidb_manager.rs | 8 ++++++++ src/sources/topsql_v2/controller.rs | 6 ++++-- src/sources/topsql_v2/mod.rs | 12 ++++++++++++ 4 files changed, 36 insertions(+), 2 deletions(-) diff --git a/src/common/topology/fetch/mod.rs b/src/common/topology/fetch/mod.rs index 44036495..0b3a656d 100644 --- a/src/common/topology/fetch/mod.rs +++ b/src/common/topology/fetch/mod.rs @@ -98,6 +98,18 @@ impl LegacyTopologyFetcher { .await .context(FetchPDTopologySnafu)?; if let Some(manager_server_address) = self.manager_server_address.as_deref() { + if self + .tidb_namespace + .as_deref() + .map(str::trim) + .is_none_or(str::is_empty) + { + info!( + message = + "Skipping manager-based TiDB discovery because tidb namespace is empty", + manager_server_address + ); + } tidb_manager::TiDBManagerTopologyFetcher::new( manager_server_address, self.tidb_namespace.as_deref(), diff --git a/src/common/topology/fetch/tidb_manager.rs b/src/common/topology/fetch/tidb_manager.rs index cb99d2ca..a3190c97 100644 --- a/src/common/topology/fetch/tidb_manager.rs +++ b/src/common/topology/fetch/tidb_manager.rs @@ -59,6 +59,14 @@ impl<'a> TiDBManagerTopologyFetcher<'a> { components: &mut HashSet, ) -> Result<(), FetchError> { let active_tidb_addresses = self.fetch_active_tidb_addresses().await?; + if !active_tidb_addresses.is_empty() { + info!( + message = "Fetched active TiDB instances from manager server", + manager_server_address = self.manager_server_address, + tidb_namespace = ?self.tidb_namespace, + tidb_count = active_tidb_addresses.len() + ); + } for active_tidb in active_tidb_addresses { let (host, primary_port) = diff --git a/src/sources/topsql_v2/controller.rs b/src/sources/topsql_v2/controller.rs index ff06fefb..fdec290f 100644 --- a/src/sources/topsql_v2/controller.rs +++ b/src/sources/topsql_v2/controller.rs @@ -43,6 +43,8 @@ struct ActiveSchemaManager { impl Controller { pub async fn new( pd_address: Option, + manager_server_address: Option, + tidb_namespace: Option, topo_fetch_interval: Duration, init_retry_delay: Duration, top_n: usize, @@ -56,8 +58,8 @@ impl Controller { ) -> vector::Result { let topo_fetcher = TopologyFetcher::new( pd_address, - None, // manager_server_address - None, // tidb_namespace + manager_server_address, + tidb_namespace, tls_config.clone(), proxy_config, tidb_group, diff --git a/src/sources/topsql_v2/mod.rs b/src/sources/topsql_v2/mod.rs index ea1a15b1..b7b96ad9 100644 --- a/src/sources/topsql_v2/mod.rs +++ b/src/sources/topsql_v2/mod.rs @@ -28,6 +28,12 @@ pub struct TopSQLConfig { /// PLACEHOLDER pub pd_address: Option, + /// PLACEHOLDER + pub manager_server_address: Option, + + /// PLACEHOLDER + pub tidb_namespace: Option, + /// PLACEHOLDER pub tls: Option, @@ -70,6 +76,8 @@ impl GenerateConfig for TopSQLConfig { tidb_group: None, label_k8s_instance: None, pd_address: None, + manager_server_address: None, + tidb_namespace: None, tls: None, init_retry_delay_seconds: default_init_retry_delay(), topology_fetch_interval_seconds: default_topology_fetch_interval(), @@ -89,6 +97,8 @@ impl SourceConfig for TopSQLConfig { let tidb_group = self.tidb_group.clone(); let label_k8s_instance = self.label_k8s_instance.clone(); let pd_address = self.pd_address.clone(); + let manager_server_address = self.manager_server_address.clone(); + let tidb_namespace = self.tidb_namespace.clone(); let tls = self.tls.clone(); let topology_fetch_interval = Duration::from_secs_f64(self.topology_fetch_interval_seconds); let init_retry_delay = Duration::from_secs_f64(self.init_retry_delay_seconds); @@ -99,6 +109,8 @@ impl SourceConfig for TopSQLConfig { Ok(Box::pin(async move { let controller = Controller::new( pd_address, + manager_server_address, + tidb_namespace, topology_fetch_interval, init_retry_delay, top_n, From 59d76bbdd5a5d808411d87c76090aad30f6cc58b Mon Sep 17 00:00:00 2001 From: zeminzhou Date: Fri, 13 Mar 2026 15:36:31 +0800 Subject: [PATCH 05/11] topsql: use manager hostname for instance identity --- src/common/topology/fetch/pd.rs | 1 + src/common/topology/fetch/store.rs | 1 + src/common/topology/fetch/tidb.rs | 1 + src/common/topology/fetch/tidb_manager.rs | 11 ++++-- src/common/topology/fetch/tidb_nextgen.rs | 1 + src/common/topology/fetch/tikv_nextgen.rs | 1 + src/common/topology/mod.rs | 33 +++++++++++++++++- src/sources/topsql/upstream/mod.rs | 2 +- src/sources/topsql_v2/upstream/mod.rs | 41 +++++++---------------- 9 files changed, 60 insertions(+), 32 deletions(-) diff --git a/src/common/topology/fetch/pd.rs b/src/common/topology/fetch/pd.rs index bf458042..7abed2e8 100644 --- a/src/common/topology/fetch/pd.rs +++ b/src/common/topology/fetch/pd.rs @@ -63,6 +63,7 @@ impl<'a> PDTopologyFetcher<'a> { host, primary_port: port, secondary_port: port, + instance_name: None, }); } } diff --git a/src/common/topology/fetch/store.rs b/src/common/topology/fetch/store.rs index 51d21b49..43adb7a9 100644 --- a/src/common/topology/fetch/store.rs +++ b/src/common/topology/fetch/store.rs @@ -58,6 +58,7 @@ impl<'a> StoreTopologyFetcher<'a> { host, primary_port, secondary_port, + instance_name: None, }); } diff --git a/src/common/topology/fetch/tidb.rs b/src/common/topology/fetch/tidb.rs index 14eb754c..2b1b2bdd 100644 --- a/src/common/topology/fetch/tidb.rs +++ b/src/common/topology/fetch/tidb.rs @@ -78,6 +78,7 @@ impl<'a> TiDBTopologyFetcher<'a> { host, primary_port: port, secondary_port: value.status_port, + instance_name: None, }, )); } diff --git a/src/common/topology/fetch/tidb_manager.rs b/src/common/topology/fetch/tidb_manager.rs index a3190c97..21a94b33 100644 --- a/src/common/topology/fetch/tidb_manager.rs +++ b/src/common/topology/fetch/tidb_manager.rs @@ -33,6 +33,7 @@ struct ActiveTiDBAddress { host: String, port: Option, status_port: Option, + hostname: Option, } pub struct TiDBManagerTopologyFetcher<'a> { @@ -78,6 +79,7 @@ impl<'a> TiDBManagerTopologyFetcher<'a> { host, primary_port, secondary_port, + instance_name: active_tidb.hostname.filter(|name| !name.trim().is_empty()), }); } @@ -182,6 +184,7 @@ impl<'a> TiDBManagerTopologyFetcher<'a> { host: host.clone(), port: None, status_port: None, + hostname: None, }]), Value::Array(items) => { let mut addresses = Vec::new(); @@ -231,11 +234,13 @@ impl<'a> TiDBManagerTopologyFetcher<'a> { )?; let port = Self::extract_u16_field(obj, &["port", "primary_port"]); let status_port = Self::extract_u16_field(obj, &["status_port", "secondary_port"]); + let hostname = Self::extract_string_field(obj, &["hostname", "pod_name", "instance_name"]); Some(ActiveTiDBAddress { host, port, status_port, + hostname, }) } @@ -260,8 +265,8 @@ mod tests { #[test] fn parse_response_new_schema() { let bytes = br#"[ - {"host":"10.0.0.1","port":4000,"status_port":10080}, - {"host":"10.0.0.2","port":4000,"status_port":10080} + {"host":"10.0.0.1","port":4000,"status_port":10080,"hostname":"tidb-0"}, + {"host":"10.0.0.2","port":4000,"status_port":10080,"hostname":"tidb-1"} ]"#; let addresses = TiDBManagerTopologyFetcher::parse_active_tidb_addresses_response(bytes).unwrap(); @@ -273,11 +278,13 @@ mod tests { host: "10.0.0.1".to_owned(), port: Some(4000), status_port: Some(10080), + hostname: Some("tidb-0".to_owned()), }, ActiveTiDBAddress { host: "10.0.0.2".to_owned(), port: Some(4000), status_port: Some(10080), + hostname: Some("tidb-1".to_owned()), } ] ); diff --git a/src/common/topology/fetch/tidb_nextgen.rs b/src/common/topology/fetch/tidb_nextgen.rs index aa4fc7d2..0b8eab63 100644 --- a/src/common/topology/fetch/tidb_nextgen.rs +++ b/src/common/topology/fetch/tidb_nextgen.rs @@ -66,6 +66,7 @@ impl TiDBNextGenTopologyFetcher { host: pod_ip, primary_port: 4000, secondary_port: 10080, + instance_name: pod.metadata.name.clone(), }); } } diff --git a/src/common/topology/fetch/tikv_nextgen.rs b/src/common/topology/fetch/tikv_nextgen.rs index c1516be4..a380f37d 100644 --- a/src/common/topology/fetch/tikv_nextgen.rs +++ b/src/common/topology/fetch/tikv_nextgen.rs @@ -64,6 +64,7 @@ impl TiKVNextGenTopologyFetcher { host: pod_ip, primary_port: 20160, secondary_port: 20180, + instance_name: pod.metadata.name.clone(), }); } } diff --git a/src/common/topology/mod.rs b/src/common/topology/mod.rs index 056324a5..8124c6dd 100644 --- a/src/common/topology/mod.rs +++ b/src/common/topology/mod.rs @@ -23,12 +23,35 @@ impl fmt::Display for InstanceType { } } -#[derive(Debug, Clone, Eq, Hash, PartialEq)] +#[derive(Debug, Clone)] pub struct Component { pub instance_type: InstanceType, pub host: String, pub primary_port: u16, pub secondary_port: u16, + /// Optional display/upload identifier. When set (e.g. K8s pod name), used for instance + /// identification in metrics instead of host:port. Connection still uses host. + pub instance_name: Option, +} + +impl PartialEq for Component { + fn eq(&self, other: &Self) -> bool { + self.instance_type == other.instance_type + && self.host == other.host + && self.primary_port == other.primary_port + && self.secondary_port == other.secondary_port + } +} + +impl Eq for Component {} + +impl std::hash::Hash for Component { + fn hash(&self, state: &mut H) { + self.instance_type.hash(state); + self.host.hash(state); + self.primary_port.hash(state); + self.secondary_port.hash(state); + } } impl Component { @@ -39,6 +62,14 @@ impl Component { _ => None, } } + + /// Instance identifier for metrics/tags. Uses instance_name when set (e.g. K8s pod name), + /// otherwise falls back to topsql_address (host:port). + pub fn instance_id(&self) -> String { + self.instance_name + .clone() + .unwrap_or_else(|| self.topsql_address().unwrap_or_default()) + } } impl fmt::Display for Component { diff --git a/src/sources/topsql/upstream/mod.rs b/src/sources/topsql/upstream/mod.rs index a40353c7..69ebae7b 100644 --- a/src/sources/topsql/upstream/mod.rs +++ b/src/sources/topsql/upstream/mod.rs @@ -100,7 +100,7 @@ impl BaseTopSQLSource { match component.topsql_address() { Some(address) => Some(BaseTopSQLSource { sharedpool_id, - instance: address.clone(), + instance: component.instance_id(), instance_type: component.instance_type, uri: if tls.is_some() { format!("https://{}", address) diff --git a/src/sources/topsql_v2/upstream/mod.rs b/src/sources/topsql_v2/upstream/mod.rs index 251109c4..f98924e1 100644 --- a/src/sources/topsql_v2/upstream/mod.rs +++ b/src/sources/topsql_v2/upstream/mod.rs @@ -5,8 +5,8 @@ pub mod tikv; pub mod consts; mod tls_proxy; -use std::time::Duration; use std::sync::Arc; +use std::time::Duration; use futures::StreamExt; use tokio::time; @@ -26,11 +26,7 @@ use crate::common::topology::{Component, InstanceType}; use crate::sources::topsql_v2::{ schema_cache::SchemaCache, shutdown::ShutdownSubscriber, - upstream::{ - parser::UpstreamEventParser, - tidb::TiDBUpstream, - tikv::TiKVUpstream, - }, + upstream::{parser::UpstreamEventParser, tidb::TiDBUpstream, tikv::TiKVUpstream}, }; #[async_trait::async_trait] @@ -93,7 +89,7 @@ impl BaseTopSQLSource { }; match component.topsql_address() { Some(address) => Some(BaseTopSQLSource { - instance: address.clone(), + instance: component.instance_id(), instance_type: component.instance_type, uri: if tls.is_some() { format!("https://{}", address) @@ -114,21 +110,12 @@ impl BaseTopSQLSource { } } - async fn run_loop( - &mut self, - shutdown_subscriber: ShutdownSubscriber, - ) { + async fn run_loop(&mut self, shutdown_subscriber: ShutdownSubscriber) { loop { let shutdown_subscriber = shutdown_subscriber.clone(); let state = match self.instance_type { - InstanceType::TiDB => { - self.run_once::(shutdown_subscriber) - .await - } - InstanceType::TiKV => { - self.run_once::(shutdown_subscriber) - .await - } + InstanceType::TiDB => self.run_once::(shutdown_subscriber).await, + InstanceType::TiKV => self.run_once::(shutdown_subscriber).await, _ => unreachable!(), }; @@ -149,10 +136,7 @@ impl BaseTopSQLSource { } } - async fn run_once( - &mut self, - shutdown_subscriber: ShutdownSubscriber, - ) -> State { + async fn run_once(&mut self, shutdown_subscriber: ShutdownSubscriber) -> State { let response_stream = self.build_stream::(shutdown_subscriber).await; let mut response_stream = match response_stream { Ok(stream) => stream, @@ -239,7 +223,7 @@ impl BaseTopSQLSource { U::UpstreamEventParser::keep_top_n(responses, self.top_n) } else { responses - }; + }; // parse let mut batch: Vec = vec![]; for response in responses { @@ -249,7 +233,10 @@ impl BaseTopSQLSource { self.schema_cache.clone(), ); // Convert Vec to Vec - let mut events: Vec = log_events.into_iter().map(vector::event::Event::Log).collect(); + let mut events: Vec = log_events + .into_iter() + .map(vector::event::Event::Log) + .collect(); batch.append(&mut events); } // send @@ -290,9 +277,7 @@ impl TopSQLSource { downsampling_interval, schema_cache, )?; - Some(TopSQLSource { - base, - }) + Some(TopSQLSource { base }) } pub async fn run(mut self, mut shutdown: ShutdownSubscriber) { From 173b999c7273cff46643224c462a2fd21b8a0d32 Mon Sep 17 00:00:00 2001 From: zeminzhou Date: Fri, 13 Mar 2026 15:59:30 +0800 Subject: [PATCH 06/11] add meta-store routing for topsql deltalake sink --- src/common/meta_store.rs | 186 ++++++ src/common/mod.rs | 1 + src/sinks/topsql_data_deltalake/mod.rs | 22 +- src/sinks/topsql_data_deltalake/processor.rs | 645 ++++++++++++------- 4 files changed, 606 insertions(+), 248 deletions(-) create mode 100644 src/common/meta_store.rs diff --git a/src/common/meta_store.rs b/src/common/meta_store.rs new file mode 100644 index 00000000..3b8a7dc3 --- /dev/null +++ b/src/common/meta_store.rs @@ -0,0 +1,186 @@ +use std::collections::HashMap; +use std::sync::Arc; +use std::time::Duration; + +use reqwest::{Client, StatusCode}; +use serde::Deserialize; +use tokio::sync::Mutex; + +type BoxError = Box; + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct KeyspaceRoute { + pub org_id: String, + pub cluster_id: String, +} + +#[derive(Clone)] +pub struct MetaStoreResolver { + base_url: String, + client: Client, + cache: Arc>>, +} + +#[derive(Debug, Deserialize)] +struct MetaStoreKeyspaceMetadata { + #[serde(rename = "ClusterId", alias = "cluster_id")] + cluster_id: String, + #[serde(rename = "TenantID", alias = "tenant_id")] + tenant_id: String, +} + +impl MetaStoreResolver { + pub fn new(meta_store_addr: impl Into) -> Result { + let client = Client::builder() + .timeout(Duration::from_secs(10)) + .connect_timeout(Duration::from_secs(3)) + .build()?; + Ok(Self::new_with_client(meta_store_addr, client)) + } + + pub fn new_with_client(meta_store_addr: impl Into, client: Client) -> Self { + Self { + base_url: normalize_meta_store_addr(&meta_store_addr.into()), + client, + cache: Arc::new(Mutex::new(HashMap::new())), + } + } + + pub async fn resolve_keyspace( + &self, + keyspace_name: &str, + ) -> Result, BoxError> { + if keyspace_name.is_empty() { + return Ok(None); + } + + if let Some(cached) = self.cache.lock().await.get(keyspace_name).cloned() { + return Ok(Some(cached)); + } + + let response = self + .client + .get(format!("{}/api/v2/meta", self.base_url)) + .query(&[("keyspace_name", keyspace_name)]) + .send() + .await?; + + match response.status() { + StatusCode::NOT_FOUND => return Ok(None), + status if !status.is_success() => { + return Err(format!( + "meta-store lookup failed for keyspace {} with status {}", + keyspace_name, status + ) + .into()); + } + _ => {} + } + + let metadata: Vec = response.json().await?; + let Some(first) = metadata.into_iter().next() else { + return Ok(None); + }; + + if first.cluster_id.is_empty() || first.tenant_id.is_empty() { + return Ok(None); + } + + let route = KeyspaceRoute { + org_id: first.tenant_id, + cluster_id: first.cluster_id, + }; + self.cache + .lock() + .await + .insert(keyspace_name.to_string(), route.clone()); + + Ok(Some(route)) + } +} + +fn normalize_meta_store_addr(meta_store_addr: &str) -> String { + let trimmed = meta_store_addr.trim().trim_end_matches('/'); + if trimmed.starts_with("http://") || trimmed.starts_with("https://") { + trimmed.to_string() + } else { + format!("http://{}", trimmed) + } +} + +#[cfg(test)] +mod tests { + use std::convert::Infallible; + use std::net::TcpListener; + use std::sync::atomic::{AtomicUsize, Ordering}; + use std::sync::Arc; + + use hyper::service::{make_service_fn, service_fn}; + use hyper::{Body, Request, Response, Server}; + + use super::*; + + #[test] + fn normalize_meta_store_addr_adds_scheme() { + assert_eq!( + normalize_meta_store_addr("meta-store:9088/"), + "http://meta-store:9088" + ); + assert_eq!( + normalize_meta_store_addr("https://meta-store:9088"), + "https://meta-store:9088" + ); + } + + #[tokio::test] + async fn resolve_keyspace_uses_tenant_as_org_id_and_caches_result() { + let request_count = Arc::new(AtomicUsize::new(0)); + let counter = Arc::clone(&request_count); + + let listener = TcpListener::bind("127.0.0.1:0").unwrap(); + let address = listener.local_addr().unwrap(); + let server = Server::from_tcp(listener) + .unwrap() + .serve(make_service_fn(move |_| { + let counter = Arc::clone(&counter); + async move { + Ok::<_, Infallible>(service_fn(move |request: Request| { + let counter = Arc::clone(&counter); + async move { + counter.fetch_add(1, Ordering::SeqCst); + assert_eq!(request.uri().path(), "/api/v2/meta"); + assert!( + request + .uri() + .query() + .unwrap_or_default() + .contains("keyspace_name=test_keyspace"), + "query should contain keyspace_name=test_keyspace" + ); + Ok::<_, Infallible>(Response::new(Body::from( + r#"[{"ClusterId":"10110362358366286743","TenantID":"1369847559692509642"}]"#, + ))) + } + })) + } + })); + let server_handle = tokio::spawn(server); + + let resolver = MetaStoreResolver::new(format!("http://{}", address)).unwrap(); + + let first = resolver.resolve_keyspace("test_keyspace").await.unwrap(); + let second = resolver.resolve_keyspace("test_keyspace").await.unwrap(); + + assert_eq!( + first, + Some(KeyspaceRoute { + org_id: "1369847559692509642".to_string(), + cluster_id: "10110362358366286743".to_string(), + }) + ); + assert_eq!(second, first); + assert_eq!(request_count.load(Ordering::SeqCst), 1); + + server_handle.abort(); + } +} diff --git a/src/common/mod.rs b/src/common/mod.rs index 764714f2..9c85f579 100644 --- a/src/common/mod.rs +++ b/src/common/mod.rs @@ -2,4 +2,5 @@ pub mod checkpointer; pub mod deltalake_s3; pub mod deltalake_writer; pub mod features; +pub mod meta_store; pub mod topology; diff --git a/src/sinks/topsql_data_deltalake/mod.rs b/src/sinks/topsql_data_deltalake/mod.rs index 21a65b86..02632354 100644 --- a/src/sinks/topsql_data_deltalake/mod.rs +++ b/src/sinks/topsql_data_deltalake/mod.rs @@ -25,8 +25,9 @@ use tracing::{error, info, warn}; mod processor; // Import default functions from common module -use crate::common::deltalake_writer::{default_batch_size, default_timeout_secs}; use crate::common::deltalake_s3; +use crate::common::deltalake_writer::{default_batch_size, default_timeout_secs}; +use crate::common::meta_store::MetaStoreResolver; pub const fn default_max_delay_secs() -> u64 { 180 @@ -55,6 +56,9 @@ pub struct DeltaLakeConfig { #[serde(default = "default_max_delay_secs")] pub max_delay_secs: u64, + /// Meta-store address used to resolve keyspace to org/cluster path segments + pub meta_store_addr: Option, + /// Storage options for cloud storage pub storage_options: Option>, @@ -100,6 +104,7 @@ impl GenerateConfig for DeltaLakeConfig { batch_size: default_batch_size(), timeout_secs: default_timeout_secs(), max_delay_secs: default_max_delay_secs(), + meta_store_addr: None, storage_options: None, bucket: None, options: None, @@ -219,12 +224,25 @@ impl DeltaLakeConfig { info!("No S3 service available - using default storage options only"); } + let meta_store_resolver = self + .meta_store_addr + .as_deref() + .map(MetaStoreResolver::new) + .transpose() + .map_err(|error| { + vector::Error::from(format!( + "failed to build meta-store resolver from meta_store_addr: {}", + error + )) + })?; + let sink = TopSQLDeltaLakeSink::new( base_path, table_configs, write_config, self.max_delay_secs, Some(storage_options), + meta_store_resolver, ); Ok(VectorSink::from_event_streamsink(sink)) @@ -272,4 +290,4 @@ mod tests { fn generate_config() { vector::test_util::test_generate_config::(); } -} \ No newline at end of file +} diff --git a/src/sinks/topsql_data_deltalake/processor.rs b/src/sinks/topsql_data_deltalake/processor.rs index 173eb507..ecba22c4 100644 --- a/src/sinks/topsql_data_deltalake/processor.rs +++ b/src/sinks/topsql_data_deltalake/processor.rs @@ -3,21 +3,21 @@ use std::path::PathBuf; use std::sync::Arc; use futures::{stream::BoxStream, StreamExt}; -use tokio::sync::Mutex; use tokio::sync::mpsc; -use vector_lib::event::Event; +use tokio::sync::Mutex; +use vector_lib::event::{Event, LogEvent}; use vector_lib::sink::StreamSink; use crate::common::deltalake_writer::{DeltaLakeWriter, DeltaTableConfig, WriteConfig}; +use crate::common::meta_store::{KeyspaceRoute, MetaStoreResolver}; use crate::sources::topsql_v2::upstream::consts::{ - LABEL_PLAN_DIGEST, LABEL_REGION_ID, LABEL_INSTANCE_KEY, LABEL_SQL_DIGEST, LABEL_TIMESTAMPS, - LABEL_DATE, LABEL_KEYSPACE, LABEL_TAG_LABEL, LABEL_DB_NAME, LABEL_TABLE_NAME, LABEL_TABLE_ID, - LABEL_SOURCE_TABLE, LABEL_USER, SOURCE_TABLE_TOPRU, - METRIC_NAME_CPU_TIME_MS, METRIC_NAME_LOGICAL_READ_BYTES, METRIC_NAME_LOGICAL_WRITE_BYTES, + LABEL_DATE, LABEL_DB_NAME, LABEL_INSTANCE_KEY, LABEL_KEYSPACE, LABEL_PLAN_DIGEST, + LABEL_REGION_ID, LABEL_SOURCE_TABLE, LABEL_SQL_DIGEST, LABEL_TABLE_ID, LABEL_TABLE_NAME, + LABEL_TAG_LABEL, LABEL_TIMESTAMPS, LABEL_USER, METRIC_NAME_CPU_TIME_MS, METRIC_NAME_EXEC_COUNT, + METRIC_NAME_EXEC_DURATION, METRIC_NAME_LOGICAL_READ_BYTES, METRIC_NAME_LOGICAL_WRITE_BYTES, METRIC_NAME_NETWORK_IN_BYTES, METRIC_NAME_NETWORK_OUT_BYTES, METRIC_NAME_READ_KEYS, - METRIC_NAME_STMT_EXEC_COUNT, METRIC_NAME_WRITE_KEYS, - METRIC_NAME_STMT_DURATION_COUNT, METRIC_NAME_STMT_DURATION_SUM_NS, - METRIC_NAME_TOTAL_RU, METRIC_NAME_EXEC_COUNT, METRIC_NAME_EXEC_DURATION, + METRIC_NAME_STMT_DURATION_COUNT, METRIC_NAME_STMT_DURATION_SUM_NS, METRIC_NAME_STMT_EXEC_COUNT, + METRIC_NAME_TOTAL_RU, METRIC_NAME_WRITE_KEYS, SOURCE_TABLE_TOPRU, }; use lazy_static::lazy_static; @@ -72,7 +72,7 @@ lazy_static! { "mysql_type": "text", "is_nullable": true }), - ); + ); schema_info.insert( LABEL_SQL_DIGEST.into(), serde_json::json!({ @@ -254,10 +254,17 @@ pub struct TopSQLDeltaLakeSink { write_config: WriteConfig, max_delay_secs: u64, storage_options: Option>, - writers: Arc>>, + meta_store_resolver: Option, + writers: Arc>>, tx: Arc>>>, } +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +struct WriterKey { + table_name: String, + table_path: PathBuf, +} + impl TopSQLDeltaLakeSink { /// Create a new Delta Lake sink pub fn new( @@ -266,11 +273,12 @@ impl TopSQLDeltaLakeSink { write_config: WriteConfig, max_delay_secs: u64, storage_options: Option>, + meta_store_resolver: Option, ) -> Self { // Create a channel with capacity 1 let (tx, rx) = mpsc::channel(1); let tx = Arc::new(tx); - + // Create sink instance let sink = Arc::new(Self { base_path, @@ -278,16 +286,17 @@ impl TopSQLDeltaLakeSink { write_config, max_delay_secs, storage_options, + meta_store_resolver, writers: Arc::new(Mutex::new(HashMap::new())), tx: Arc::clone(&tx), }); - + // Spawn process_events_loop as a separate tokio task to avoid blocking let sink_clone = Arc::clone(&sink); tokio::spawn(async move { sink_clone.process_events_loop(rx).await; }); - + // Return the sink (Arc::try_unwrap will fail because tokio task holds a reference, // so we use unsafe to manually get the inner value without decrementing the reference count) // Safety: We know there's exactly one more reference (the tokio task), @@ -306,6 +315,7 @@ impl TopSQLDeltaLakeSink { write_config: inner_ref.write_config.clone(), max_delay_secs: inner_ref.max_delay_secs, storage_options: inner_ref.storage_options.clone(), + meta_store_resolver: inner_ref.meta_store_resolver.clone(), writers: Arc::clone(&inner_ref.writers), tx: Arc::clone(&inner_ref.tx), }; @@ -314,7 +324,7 @@ impl TopSQLDeltaLakeSink { inner_value } } - + #[cfg(test)] /// Create a new Delta Lake sink for testing, returning both the sink and the receiver /// The receiver can be used to verify messages sent through the channel @@ -325,11 +335,15 @@ impl TopSQLDeltaLakeSink { write_config: WriteConfig, max_delay_secs: u64, storage_options: Option>, + meta_store_resolver: Option, ) -> (Self, mpsc::Receiver>>) { // Create a channel with capacity 1 - let (tx, rx): (mpsc::Sender>>, mpsc::Receiver>>) = mpsc::channel(1); + let (tx, rx): ( + mpsc::Sender>>, + mpsc::Receiver>>, + ) = mpsc::channel(1); let tx = Arc::new(tx); - + // Create sink instance (without starting process_events_loop) let sink = Self { base_path, @@ -337,19 +351,17 @@ impl TopSQLDeltaLakeSink { write_config, max_delay_secs, storage_options, + meta_store_resolver, writers: Arc::new(Mutex::new(HashMap::new())), tx, }; - + // Return the sink and receiver for testing (sink, rx) } /// Process events from channel and write to Delta Lake - async fn process_events_loop( - &self, - mut rx: mpsc::Receiver>>, - ) { + async fn process_events_loop(&self, mut rx: mpsc::Receiver>>) { while let Some(events_vec) = rx.recv().await { if let Err(e) = self.process_events(events_vec).await { error!("Failed to process events: {}", e); @@ -365,36 +377,27 @@ impl TopSQLDeltaLakeSink { if events_vec.is_empty() { return Ok(()); } - // Group events by table_name (instance_key for topsql/tikv, source_table for topru) - let mut table_events: HashMap> = HashMap::new(); + let mut table_events: HashMap> = HashMap::new(); + let mut resolved_routes: HashMap> = HashMap::new(); for events in events_vec { for event in events { if let Event::Log(log_event) = event { - let table_name: Option = log_event - .get(LABEL_INSTANCE_KEY) - .and_then(|v| v.as_str()) - .map(|s| s.to_string()) - .or_else(|| { - // TopRU events lack instance_key; use source_table as grouping key - log_event - .get(LABEL_SOURCE_TABLE) - .and_then(|v| v.as_str()) - .filter(|s| *s == SOURCE_TABLE_TOPRU) - .map(|s| s.to_string()) - }); - if let Some(name) = table_name { + if let Some(writer_key) = self + .resolve_writer_key(&log_event, &mut resolved_routes) + .await + { table_events - .entry(name) + .entry(writer_key) .or_insert_with(Vec::new) .push(Event::Log(log_event)); } } } } - // Write table's events - for (table_name, mut events) in table_events { - self.add_schema_info(&mut events, &table_name); - if let Err(e) = self.write_table_events(&table_name, events).await { + + for (writer_key, mut events) in table_events { + self.add_schema_info(&mut events, &writer_key.table_name); + if let Err(e) = self.write_table_events(&writer_key, events).await { let error_msg = e.to_string(); if error_msg.contains("log segment") || error_msg.contains("Invalid table version") @@ -403,10 +406,15 @@ impl TopSQLDeltaLakeSink { { panic!( "Delta Lake corruption detected for table {}: {}", - table_name, error_msg + writer_key.table_name, error_msg ); } else { - error!("Failed to write events to table {}: {}", table_name, e); + error!( + "Failed to write events to table {} at {}: {}", + writer_key.table_name, + writer_key.table_path.display(), + e + ); } } } @@ -429,63 +437,141 @@ impl TopSQLDeltaLakeSink { log.insert("_schema_metadata", serde_json::Value::Object(schema)); } + fn extract_table_name(log_event: &LogEvent) -> Option { + log_event + .get(LABEL_INSTANCE_KEY) + .and_then(|value| value.as_str()) + .map(|value| value.to_string()) + .or_else(|| { + log_event + .get(LABEL_SOURCE_TABLE) + .and_then(|value| value.as_str()) + .filter(|value| *value == SOURCE_TABLE_TOPRU) + .map(|value| value.to_string()) + }) + } + + async fn resolve_writer_key( + &self, + log_event: &LogEvent, + resolved_routes: &mut HashMap>, + ) -> Option { + let table_name = Self::extract_table_name(log_event)?; + let route = self + .resolve_keyspace_route(log_event, resolved_routes) + .await; + Some(WriterKey { + table_name: table_name.clone(), + table_path: self.build_table_path(&table_name, route.as_ref()), + }) + } + + async fn resolve_keyspace_route( + &self, + log_event: &LogEvent, + resolved_routes: &mut HashMap>, + ) -> Option { + let resolver = self.meta_store_resolver.as_ref()?; + let keyspace = log_event + .get(LABEL_KEYSPACE) + .and_then(|value| value.as_str())?; + + if let Some(route) = resolved_routes.get(keyspace.as_ref()) { + return route.clone(); + } + + let route = match resolver.resolve_keyspace(keyspace.as_ref()).await { + Ok(route) => route, + Err(error) => { + warn!( + "Failed to resolve keyspace {} from meta-store, falling back to base_path: {}", + keyspace, error + ); + None + } + }; + resolved_routes.insert(keyspace.to_string(), route.clone()); + route + } + + fn build_table_path(&self, table_name: &str, route: Option<&KeyspaceRoute>) -> PathBuf { + let (table_type, table_instance) = Self::table_partition_values(table_name); + + let mut segments = Vec::new(); + if let Some(route) = route { + segments.push(format!("org={}", route.org_id)); + segments.push(format!("cluster={}", route.cluster_id)); + } + segments.push(format!("type=topsql_{}", table_type)); + segments.push(format!("instance={}", table_instance)); + + let segment_refs: Vec<&str> = segments.iter().map(|segment| segment.as_str()).collect(); + Self::join_path(&self.base_path, &segment_refs) + } + + fn table_partition_values(table_name: &str) -> (&str, &str) { + if table_name == SOURCE_TABLE_TOPRU { + ("topru", "default") + } else { + match table_name + .strip_prefix("topsql_") + .and_then(|rest| rest.split_once('_')) + { + Some((table_type, table_instance)) + if !table_type.is_empty() && !table_instance.is_empty() => + { + (table_type, table_instance) + } + _ => { + error!( + "Unexpected table_name format (expected `topsql_{{type}}_{{instance}}` or `topsql_topru`): {}", + table_name + ); + ("unknown", "unknown") + } + } + } + } + + fn join_path(base_path: &PathBuf, segments: &[&str]) -> PathBuf { + if base_path.to_string_lossy().starts_with("s3://") { + let mut path = base_path + .to_string_lossy() + .trim_end_matches('/') + .to_string(); + for segment in segments { + path.push('/'); + path.push_str(segment); + } + PathBuf::from(path) + } else { + let mut path = base_path.clone(); + for segment in segments { + path = path.join(segment); + } + path + } + } + /// Write events to a specific table async fn write_table_events( &self, - table_name: &str, + writer_key: &WriterKey, events: Vec, ) -> Result<(), Box> { - // Get or create writer for this table let mut writers = self.writers.lock().await; - let writer = writers.entry(table_name.to_string()).or_insert_with(|| { - let (table_type, table_instance) = if table_name == SOURCE_TABLE_TOPRU { - ("topru", "default") - } else { - match table_name - .strip_prefix("topsql_") - .and_then(|rest| rest.split_once('_')) - { - Some((t, inst)) if !t.is_empty() && !inst.is_empty() => (t, inst), - _ => { - error!( - "Unexpected table_name format (expected `topsql_{{type}}_{{instance}}` or `topsql_topru`): {}", - table_name - ); - ("unknown", "unknown") - } - } - }; - - let type_dir = format!("type=topsql_{}", table_type); - let instance_dir = format!("instance={}", table_instance); - - let table_path = if self.base_path.to_string_lossy().starts_with("s3://") { - // For S3 paths, build a partition-like directory structure - // /topsql/data/type=.../instance=.../ - let base = self.base_path.to_string_lossy(); - let base = base.trim_end_matches('/'); - PathBuf::from(format!( - "{}/{}/{}", - base, type_dir, instance_dir - )) - } else { - // For local paths, use join as before - self.base_path - .join(&type_dir) - .join(&instance_dir) - }; - + let writer = writers.entry(writer_key.clone()).or_insert_with(|| { let table_config = self .tables .iter() - .find(|t| t.name == table_name) + .find(|table| table.name == writer_key.table_name) .cloned() .unwrap_or_else(|| DeltaTableConfig { - name: table_name.to_string(), + name: writer_key.table_name.clone(), schema_evolution: Some(true), }); DeltaLakeWriter::new_with_options( - table_path, + writer_key.table_path.clone(), table_config, self.write_config.clone(), self.storage_options.clone(), @@ -493,7 +579,6 @@ impl TopSQLDeltaLakeSink { ) }); - // Write events writer.write_events(events).await?; Ok(()) @@ -537,13 +622,15 @@ impl StreamSink for TopSQLDeltaLakeSink { events_cache.push(events); // Allow max delay to configured value, continue if not ready to send - if events_count + cur_cached_size < sink.write_config.batch_size - && latest_timestamp < oldest_timestamp + sink.max_delay_secs as i64 { + if events_count + cur_cached_size < sink.write_config.batch_size + && latest_timestamp < oldest_timestamp + sink.max_delay_secs as i64 + { continue; } // Send events to process_events through channel - let should_drop_on_full = latest_timestamp >= oldest_timestamp + sink.max_delay_secs as i64; + let should_drop_on_full = + latest_timestamp >= oldest_timestamp + sink.max_delay_secs as i64; match tx.try_send(events_cache) { Ok(_) => { // Successfully sent, clear the cache @@ -570,7 +657,7 @@ impl StreamSink for TopSQLDeltaLakeSink { } } } - + // When the input stream ends, try to send any remaining cached events if !events_cache.is_empty() { // Send remaining events, wait if channel is full @@ -579,7 +666,7 @@ impl StreamSink for TopSQLDeltaLakeSink { error!("Channel closed when flushing remaining events, dropping events"); } } - + // Note: We don't drop tx here as it's owned by the sink and may be used by other run() calls // The channel will be closed when the sink is dropped Ok(()) @@ -601,7 +688,9 @@ mod tests { event } - fn create_test_sink_with_receiver(batch_size: usize) -> (TopSQLDeltaLakeSink, mpsc::Receiver>>) { + fn create_test_sink_with_receiver( + batch_size: usize, + ) -> (TopSQLDeltaLakeSink, mpsc::Receiver>>) { TopSQLDeltaLakeSink::new_for_test( PathBuf::from("/tmp/test"), vec![], @@ -611,52 +700,107 @@ mod tests { }, 180, // Use default value for tests None, + None, ) } + #[test] + fn test_build_table_path_with_meta_route_for_s3() { + let (sink, _) = TopSQLDeltaLakeSink::new_for_test( + PathBuf::from("s3://o11y-prod-shared-us-west-2-premium/deltalake"), + vec![], + WriteConfig { + batch_size: 1, + timeout_secs: 0, + }, + 180, + None, + None, + ); + + let table_path = sink.build_table_path( + "topsql_tidb_127.0.0.1:10080", + Some(&KeyspaceRoute { + org_id: "1369847559692509642".to_string(), + cluster_id: "10110362358366286743".to_string(), + }), + ); + + assert_eq!( + table_path, + PathBuf::from( + "s3://o11y-prod-shared-us-west-2-premium/deltalake/org=1369847559692509642/cluster=10110362358366286743/type=topsql_tidb/instance=127.0.0.1:10080" + ) + ); + } + + #[test] + fn test_build_table_path_without_meta_route_preserves_existing_layout() { + let (sink, _) = TopSQLDeltaLakeSink::new_for_test( + PathBuf::from("/tmp/deltalake"), + vec![], + WriteConfig { + batch_size: 1, + timeout_secs: 0, + }, + 180, + None, + None, + ); + + let table_path = sink.build_table_path("topsql_topru", None); + + assert_eq!( + table_path, + PathBuf::from("/tmp/deltalake/type=topsql_topru/instance=default") + ); + } + #[tokio::test] async fn test_send_when_batch_size_reached() { let batch_size = 5; let (sink, mut rx) = create_test_sink_with_receiver(batch_size); - + // Create events that will reach batch size let events: Vec = (0..batch_size) .map(|i| create_test_event(1000 + i as i64)) .collect(); - + let input_stream = stream::iter(events.clone()).boxed(); let sink_box = Box::new(sink); - + // Run the function in a task - let run_handle = tokio::spawn(async move { - sink_box.run(input_stream).await - }); - + let run_handle = tokio::spawn(async move { sink_box.run(input_stream).await }); + // Wait a bit for the message to be sent tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; - + // Verify that a message was sent through the channel - let received = tokio::time::timeout( - tokio::time::Duration::from_millis(500), - rx.recv() - ).await; - + let received = + tokio::time::timeout(tokio::time::Duration::from_millis(500), rx.recv()).await; + assert!(received.is_ok(), "Should receive a message from channel"); if let Ok(Some(events_vec)) = received { // Verify the message content // Count total events let total_events: usize = events_vec.iter().map(|v| v.len()).sum(); - assert_eq!(total_events, batch_size, "Should receive exactly batch_size events"); - + assert_eq!( + total_events, batch_size, + "Should receive exactly batch_size events" + ); + // Verify event structure assert!(!events_vec.is_empty(), "Events vector should not be empty"); for event_batch in &events_vec { - assert!(!event_batch.is_empty(), "Each event batch should not be empty"); + assert!( + !event_batch.is_empty(), + "Each event batch should not be empty" + ); } } else { panic!("Failed to receive message from channel"); } - + // Wait for run to complete let _ = run_handle.await; } @@ -665,44 +809,43 @@ mod tests { async fn test_send_when_timeout_reached() { let batch_size = 100; // Large batch size so we don't reach it let (sink, mut rx) = create_test_sink_with_receiver(batch_size); - + // Create events with timestamps that exceed timeout (180 seconds) let oldest_ts = 1000; let latest_ts = oldest_ts + 181; // Exceeds 180 second timeout - + // Create two events: one at the start, one after timeout - let events = vec![ - create_test_event(oldest_ts), - create_test_event(latest_ts), - ]; - + let events = vec![create_test_event(oldest_ts), create_test_event(latest_ts)]; + let input_stream = stream::iter(events.clone()).boxed(); let sink_box = Box::new(sink); - + // Run the function in a task - let run_handle = tokio::spawn(async move { - sink_box.run(input_stream).await - }); - + let run_handle = tokio::spawn(async move { sink_box.run(input_stream).await }); + // Wait a bit for the message to be sent tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; - + // Verify that a message was sent through the channel due to timeout - let received = tokio::time::timeout( - tokio::time::Duration::from_millis(500), - rx.recv() - ).await; - - assert!(received.is_ok(), "Should receive a message from channel due to timeout"); + let received = + tokio::time::timeout(tokio::time::Duration::from_millis(500), rx.recv()).await; + + assert!( + received.is_ok(), + "Should receive a message from channel due to timeout" + ); if let Ok(Some(events_vec)) = received { // Verify the message content // Verify events were sent let total_events: usize = events_vec.iter().map(|v| v.len()).sum(); - assert_eq!(total_events, 2, "Should receive both events (oldest and latest)"); + assert_eq!( + total_events, 2, + "Should receive both events (oldest and latest)" + ); } else { panic!("Failed to receive message from channel"); } - + // Wait for run to complete let _ = run_handle.await; } @@ -711,55 +854,60 @@ mod tests { async fn test_channel_full_keep_cache_when_not_timeout() { let batch_size = 5; let (sink, mut rx) = create_test_sink_with_receiver(batch_size); - + // Create many events to fill the channel (capacity 1) // The first batch will fill the channel, second batch should be kept in cache // and retried later let events: Vec = (0..batch_size * 2) .map(|i| create_test_event(1000 + i as i64)) // All within timeout window .collect(); - + let input_stream = stream::iter(events.clone()).boxed(); let sink_box = Box::new(sink); - + // Run the function in a task - let run_handle = tokio::spawn(async move { - sink_box.run(input_stream).await - }); - + let run_handle = tokio::spawn(async move { sink_box.run(input_stream).await }); + // Don't consume from rx immediately to fill the channel // Wait a bit for the first message to be sent // The channel should be full now, and subsequent sends should keep data in cache // Since we're not consuming, the channel stays full // After a bit more time, the run should complete tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; - + // Now consume the first message let first_msg = rx.recv().await; assert!(first_msg.is_some(), "Should receive first message"); if let Some(events_vec) = first_msg { // Verify first message content let total_events: usize = events_vec.iter().map(|v| v.len()).sum(); - assert_eq!(total_events, batch_size, "First message should contain batch_size events"); + assert_eq!( + total_events, batch_size, + "First message should contain batch_size events" + ); } - + // Wait a bit more - the second batch should be sent after channel has space tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; - + // Check if second message was sent (data was kept in cache and retried) - let second_msg = tokio::time::timeout( - tokio::time::Duration::from_millis(200), - rx.recv() - ).await; - + let second_msg = + tokio::time::timeout(tokio::time::Duration::from_millis(200), rx.recv()).await; + // The second batch should eventually be sent (kept in cache and retried) - assert!(second_msg.is_ok(), "Should eventually receive second message after retry"); + assert!( + second_msg.is_ok(), + "Should eventually receive second message after retry" + ); if let Ok(Some(events_vec)) = second_msg { // Verify second message content let total_events: usize = events_vec.iter().map(|v| v.len()).sum(); - assert_eq!(total_events, batch_size, "Second message should contain batch_size events"); + assert_eq!( + total_events, batch_size, + "Second message should contain batch_size events" + ); } - + // Wait for run to complete let _ = run_handle.await; } @@ -768,7 +916,7 @@ mod tests { async fn test_channel_full_drop_when_timeout() { let batch_size = 5; let (sink, mut rx) = create_test_sink_with_receiver(batch_size); - + // Create events with timeout: first batch, then events after timeout let mut events = vec![]; // First batch at timestamp 1000 @@ -780,54 +928,59 @@ mod tests { events.push(create_test_event(1005 + i as i64)); } events.push(create_test_event(1186)); - + let input_stream = stream::iter(events.clone()).boxed(); let sink_box = Box::new(sink); - + // Run the function in a task - let run_handle = tokio::spawn(async move { - sink_box.run(input_stream).await - }); - + let run_handle = tokio::spawn(async move { sink_box.run(input_stream).await }); + // Don't consume from rx to fill the channel // Wait for first message to be sent // Channel should be full now // When the timeout event arrives and channel is full, data should be dropped tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; - + // Consume the first message let first_msg = rx.recv().await; assert!(first_msg.is_some(), "Should receive first message"); if let Some(events_vec) = first_msg { // Verify first message content let total_events: usize = events_vec.iter().map(|v| v.len()).sum(); - assert_eq!(total_events, batch_size, "First message should contain batch_size events"); - + assert_eq!( + total_events, batch_size, + "First message should contain batch_size events" + ); + // Verify timestamps are from the first batch (1000-1004) for event_batch in &events_vec { for event in event_batch { if let Event::Log(ref log_event) = event { - if let Some(timestamp) = log_event.get("timestamps").and_then(|v| v.as_integer()) { - assert!(timestamp >= 1000 && timestamp < 1000 + batch_size as i64, - "First message should contain events from first batch"); + if let Some(timestamp) = + log_event.get("timestamps").and_then(|v| v.as_integer()) + { + assert!( + timestamp >= 1000 && timestamp < 1000 + batch_size as i64, + "First message should contain events from first batch" + ); } } } } } - + // Wait a bit more - the timeout event should have been dropped, not sent tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; - + // Check if a second message was sent (it shouldn't be, as data was dropped) - let second_msg = tokio::time::timeout( - tokio::time::Duration::from_millis(200), - rx.recv() - ).await; + let second_msg = + tokio::time::timeout(tokio::time::Duration::from_millis(200), rx.recv()).await; // The second message should NOT be sent because data was dropped due to timeout - assert!(second_msg.is_err() || second_msg.unwrap().is_none(), - "Should NOT receive second message as data was dropped due to timeout"); - + assert!( + second_msg.is_err() || second_msg.unwrap().is_none(), + "Should NOT receive second message as data was dropped due to timeout" + ); + // Wait for run to complete let _ = run_handle.await; } @@ -836,41 +989,38 @@ mod tests { async fn test_not_send_when_batch_size_and_timeout_not_reached() { let batch_size = 10; let (sink, mut rx) = create_test_sink_with_receiver(batch_size); - + // Create events that don't reach batch size and don't timeout - let events: Vec = (0..3) - .map(|i| create_test_event(1000 + i)) - .collect(); - + let events: Vec = (0..3).map(|i| create_test_event(1000 + i)).collect(); + let input_stream = stream::iter(events.clone()).boxed(); let sink_box = Box::new(sink); - + // Run the function in a task - let run_handle = tokio::spawn(async move { - sink_box.run(input_stream).await - }); - + let run_handle = tokio::spawn(async move { sink_box.run(input_stream).await }); + // Wait for run to complete let result = run_handle.await; assert!(result.is_ok()); assert!(result.unwrap().is_ok()); - + // Verify that no message was sent (data doesn't meet send conditions) // Note: When stream ends, remaining data might be flushed, but with only 3 events // and batch_size 10, and no timeout, it should not send immediately // However, when the stream ends, the loop exits and remaining cache might be sent // Let's check if any message was received - let received = tokio::time::timeout( - tokio::time::Duration::from_millis(200), - rx.recv() - ).await; - + let received = + tokio::time::timeout(tokio::time::Duration::from_millis(200), rx.recv()).await; + // With the current implementation, when stream ends, remaining cache might be sent // So we check if a message was received and verify its content if let Ok(Some(events_vec)) = received { // Verify the message content let total_events: usize = events_vec.iter().map(|v| v.len()).sum(); - assert_eq!(total_events, 3, "Should receive the 3 events that were cached"); + assert_eq!( + total_events, 3, + "Should receive the 3 events that were cached" + ); } else { // If no message was received, that's also valid - data wasn't sent // This depends on implementation details of when remaining cache is flushed @@ -881,41 +1031,42 @@ mod tests { async fn test_batch_size_sending_behavior() { let batch_size = 3; let (sink, mut rx) = create_test_sink_with_receiver(batch_size); - + // Create exactly batch_size events let events: Vec = (0..batch_size) .map(|i| create_test_event(1000 + i as i64)) .collect(); - + let input_stream = stream::iter(events.clone()).boxed(); let sink_box = Box::new(sink); - + // Run the function in a task - let run_handle = tokio::spawn(async move { - sink_box.run(input_stream).await - }); - + let run_handle = tokio::spawn(async move { sink_box.run(input_stream).await }); + // Wait a bit for the message to be sent tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; - + // Verify that a message was sent through the channel - let received = tokio::time::timeout( - tokio::time::Duration::from_millis(500), - rx.recv() - ).await; - + let received = + tokio::time::timeout(tokio::time::Duration::from_millis(500), rx.recv()).await; + assert!(received.is_ok(), "Should receive a message from channel"); if let Ok(Some(events_vec)) = received { // Verify the message content // Count total events let total_events: usize = events_vec.iter().map(|v| v.len()).sum(); - assert_eq!(total_events, batch_size, "Should receive exactly batch_size events"); - + assert_eq!( + total_events, batch_size, + "Should receive exactly batch_size events" + ); + // Verify event timestamps for event_batch in events_vec { for (i, event) in event_batch.iter().enumerate() { if let Event::Log(ref log_event) = event { - if let Some(timestamp) = log_event.get("timestamps").and_then(|v| v.as_integer()) { + if let Some(timestamp) = + log_event.get("timestamps").and_then(|v| v.as_integer()) + { assert_eq!(timestamp, 1000 + i as i64, "Event timestamp should match"); } } @@ -924,7 +1075,7 @@ mod tests { } else { panic!("Failed to receive message from channel"); } - + // Wait for run to complete let _ = run_handle.await; } @@ -933,56 +1084,58 @@ mod tests { async fn test_timeout_sending_behavior() { let batch_size = 100; // Large batch size let (sink, mut rx) = create_test_sink_with_receiver(batch_size); - + // Create events with large time gap (exceeding 180 seconds) let oldest_ts = 1000; let latest_ts = 1181; // 181 seconds later, exceeds timeout - let events = vec![ - create_test_event(oldest_ts), - create_test_event(latest_ts), - ]; - + let events = vec![create_test_event(oldest_ts), create_test_event(latest_ts)]; + let input_stream = stream::iter(events.clone()).boxed(); let sink_box = Box::new(sink); - + // Run the function in a task - let run_handle = tokio::spawn(async move { - sink_box.run(input_stream).await - }); - + let run_handle = tokio::spawn(async move { sink_box.run(input_stream).await }); + // Wait a bit for the message to be sent tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; - + // Verify that a message was sent through the channel due to timeout - let received = tokio::time::timeout( - tokio::time::Duration::from_millis(500), - rx.recv() - ).await; - - assert!(received.is_ok(), "Should receive a message from channel due to timeout"); + let received = + tokio::time::timeout(tokio::time::Duration::from_millis(500), rx.recv()).await; + + assert!( + received.is_ok(), + "Should receive a message from channel due to timeout" + ); if let Ok(Some(events_vec)) = received { // Verify the message content // Count total events let total_events: usize = events_vec.iter().map(|v| v.len()).sum(); assert_eq!(total_events, 2, "Should receive both events"); - + // Verify event timestamps let mut timestamps = Vec::new(); for event_batch in &events_vec { for event in event_batch { if let Event::Log(ref log_event) = event { - if let Some(timestamp) = log_event.get("timestamps").and_then(|v| v.as_integer()) { + if let Some(timestamp) = + log_event.get("timestamps").and_then(|v| v.as_integer()) + { timestamps.push(timestamp); } } } } timestamps.sort(); - assert_eq!(timestamps, vec![oldest_ts, latest_ts], "Should receive events with correct timestamps"); + assert_eq!( + timestamps, + vec![oldest_ts, latest_ts], + "Should receive events with correct timestamps" + ); } else { panic!("Failed to receive message from channel"); } - + // Wait for run to complete let _ = run_handle.await; } @@ -991,51 +1144,51 @@ mod tests { async fn test_multiple_batches() { let batch_size = 3; let (sink, mut rx) = create_test_sink_with_receiver(batch_size); - + // Create multiple batches worth of events let total_events = batch_size * 3; let events: Vec = (0..total_events) .map(|i| create_test_event(1000 + i as i64)) .collect(); - + let input_stream = stream::iter(events.clone()).boxed(); let sink_box = Box::new(sink); - + // Run the function in a task - let run_handle = tokio::spawn(async move { - sink_box.run(input_stream).await - }); - + let run_handle = tokio::spawn(async move { sink_box.run(input_stream).await }); + // Collect all messages from the channel let mut received_messages = Vec::new(); let expected_batches = (total_events + batch_size - 1) / batch_size; // Ceiling division - + // Wait for all batches to be sent for _ in 0..expected_batches { - let received = tokio::time::timeout( - tokio::time::Duration::from_millis(500), - rx.recv() - ).await; + let received = + tokio::time::timeout(tokio::time::Duration::from_millis(500), rx.recv()).await; if let Ok(Some(msg)) = received { received_messages.push(msg); } else { break; } } - + // Verify we received the expected number of batches assert!(received_messages.len() >= 1); // Verify total events received - let total_received: usize = received_messages.iter() + let total_received: usize = received_messages + .iter() .map(|events_vec| events_vec.iter().map(|v| v.len()).sum::()) .sum(); - assert_eq!(total_received, total_events, "Should receive all events across batches"); - + assert_eq!( + total_received, total_events, + "Should receive all events across batches" + ); + // Verify each message for events_vec in &received_messages { assert!(!events_vec.is_empty(), "Each batch should contain events"); } - + // Wait for run to complete let _ = run_handle.await; } From 8e30d11bcb3880fa4dba74cc189589753da0242a Mon Sep 17 00:00:00 2001 From: zeminzhou Date: Mon, 16 Mar 2026 13:58:37 +0800 Subject: [PATCH 07/11] Add meta-store routing for topsql meta sink --- src/sinks/topsql_meta_deltalake/mod.rs | 22 +- src/sinks/topsql_meta_deltalake/processor.rs | 760 +++++++++++------- src/sources/topsql_v2/upstream/tidb/parser.rs | 503 ++++++++---- src/sources/topsql_v2/upstream/tidb/proto.rs | 23 +- 4 files changed, 856 insertions(+), 452 deletions(-) diff --git a/src/sinks/topsql_meta_deltalake/mod.rs b/src/sinks/topsql_meta_deltalake/mod.rs index 6b332a3b..dd8ee970 100644 --- a/src/sinks/topsql_meta_deltalake/mod.rs +++ b/src/sinks/topsql_meta_deltalake/mod.rs @@ -25,8 +25,9 @@ use tracing::{error, info, warn}; mod processor; // Import default functions from common module -use crate::common::deltalake_writer::{default_batch_size, default_timeout_secs}; use crate::common::deltalake_s3; +use crate::common::deltalake_writer::{default_batch_size, default_timeout_secs}; +use crate::common::meta_store::MetaStoreResolver; pub const fn default_max_delay_secs() -> u64 { 180 @@ -63,6 +64,9 @@ pub struct DeltaLakeConfig { #[serde(default = "default_meta_cache_capacity")] pub meta_cache_capacity: usize, + /// Meta-store address used to resolve keyspace to org/cluster path segments + pub meta_store_addr: Option, + /// Storage options for cloud storage pub storage_options: Option>, @@ -109,6 +113,7 @@ impl GenerateConfig for DeltaLakeConfig { timeout_secs: default_timeout_secs(), max_delay_secs: default_max_delay_secs(), meta_cache_capacity: default_meta_cache_capacity(), + meta_store_addr: None, storage_options: None, bucket: None, options: None, @@ -228,12 +233,25 @@ impl DeltaLakeConfig { info!("No S3 service available - using default storage options only"); } + let meta_store_resolver = self + .meta_store_addr + .as_deref() + .map(MetaStoreResolver::new) + .transpose() + .map_err(|error| { + vector::Error::from(format!( + "failed to build meta-store resolver from meta_store_addr: {}", + error + )) + })?; + let sink = TopSQLDeltaLakeSink::new( base_path, table_configs, write_config, self.max_delay_secs, Some(storage_options), + meta_store_resolver, self.meta_cache_capacity, ); @@ -282,4 +300,4 @@ mod tests { fn generate_config() { vector::test_util::test_generate_config::(); } -} \ No newline at end of file +} diff --git a/src/sinks/topsql_meta_deltalake/processor.rs b/src/sinks/topsql_meta_deltalake/processor.rs index beae8dcd..10bc1ea7 100644 --- a/src/sinks/topsql_meta_deltalake/processor.rs +++ b/src/sinks/topsql_meta_deltalake/processor.rs @@ -5,16 +5,18 @@ use std::time::{Duration, Instant}; use futures::{stream::BoxStream, StreamExt}; use lru::LruCache; -use tokio::sync::Mutex; use tokio::sync::mpsc; +use tokio::sync::Mutex; +use tracing::{error, info, warn}; use vector_lib::event::Event; use vector_lib::sink::StreamSink; use crate::common::deltalake_writer::{DeltaLakeWriter, DeltaTableConfig, WriteConfig}; +use crate::common::meta_store::{KeyspaceRoute, MetaStoreResolver}; use crate::sources::topsql_v2::upstream::consts::{ - LABEL_PLAN_DIGEST, LABEL_SQL_DIGEST, LABEL_NORMALIZED_SQL, - LABEL_DATE, LABEL_ENCODED_NORMALIZED_PLAN, LABEL_NORMALIZED_PLAN, - LABEL_SOURCE_TABLE, SOURCE_TABLE_TOPSQL_SQL_META, SOURCE_TABLE_TOPSQL_PLAN_META, + LABEL_DATE, LABEL_ENCODED_NORMALIZED_PLAN, LABEL_KEYSPACE, LABEL_NORMALIZED_PLAN, + LABEL_NORMALIZED_SQL, LABEL_PLAN_DIGEST, LABEL_SOURCE_TABLE, LABEL_SQL_DIGEST, + SOURCE_TABLE_TOPSQL_PLAN_META, SOURCE_TABLE_TOPSQL_SQL_META, }; use lazy_static::lazy_static; @@ -27,7 +29,7 @@ lazy_static! { "mysql_type": "text", "is_nullable": false }), - ); + ); schema_info.insert( LABEL_SQL_DIGEST.into(), serde_json::json!({ @@ -35,6 +37,13 @@ lazy_static! { "is_nullable": true }), ); + schema_info.insert( + LABEL_KEYSPACE.into(), + serde_json::json!({ + "mysql_type": "text", + "is_nullable": true + }), + ); schema_info.insert( LABEL_NORMALIZED_SQL.into(), serde_json::json!({ @@ -57,7 +66,7 @@ lazy_static! { "mysql_type": "text", "is_nullable": false }), - ); + ); schema_info.insert( LABEL_PLAN_DIGEST.into(), serde_json::json!({ @@ -65,6 +74,13 @@ lazy_static! { "is_nullable": true }), ); + schema_info.insert( + LABEL_KEYSPACE.into(), + serde_json::json!({ + "mysql_type": "text", + "is_nullable": true + }), + ); schema_info.insert( LABEL_NORMALIZED_PLAN.into(), serde_json::json!({ @@ -85,12 +101,24 @@ lazy_static! { serde_json::json!(vec![LABEL_DATE.to_string()]), ); schema_info - }; + }; } /// When buffer size exceeds this value, events will be flushed const EVENT_BUFFER_MAX_SIZE: usize = 1000; +#[derive(Clone, Debug)] +struct BufferedEvent { + writer_key: WriterKey, + event: Event, +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +struct WriterKey { + table_name: String, + table_path: PathBuf, +} + /// Delta Lake sink processor pub struct TopSQLDeltaLakeSink { base_path: PathBuf, @@ -98,14 +126,15 @@ pub struct TopSQLDeltaLakeSink { write_config: WriteConfig, max_delay_secs: u64, storage_options: Option>, - writers: Arc>>, + meta_store_resolver: Option, + writers: Arc>>, tx: Arc>>>, // LRU cache for SQL meta deduplication: key -> () seen_keys_sql_meta: Arc>>, // LRU cache for PLAN meta deduplication: key -> () seen_keys_plan_meta: Arc>>, // Buffer for events to be flushed - new_event_buffer: Arc>>, + new_event_buffer: Arc>>, // Last flush time last_flush_time: Arc>, } @@ -118,12 +147,13 @@ impl TopSQLDeltaLakeSink { write_config: WriteConfig, max_delay_secs: u64, storage_options: Option>, + meta_store_resolver: Option, meta_cache_capacity: usize, ) -> Self { // Create a channel with capacity 1 let (tx, rx) = mpsc::channel(1); let tx = Arc::new(tx); - + // Create sink instance let sink = Arc::new(Self { base_path, @@ -131,20 +161,25 @@ impl TopSQLDeltaLakeSink { write_config, max_delay_secs, storage_options, + meta_store_resolver, writers: Arc::new(Mutex::new(HashMap::new())), tx: Arc::clone(&tx), - seen_keys_sql_meta: Arc::new(Mutex::new(LruCache::new(std::num::NonZeroUsize::new(meta_cache_capacity).unwrap()))), // LRU cache with configurable capacity - seen_keys_plan_meta: Arc::new(Mutex::new(LruCache::new(std::num::NonZeroUsize::new(meta_cache_capacity).unwrap()))), // LRU cache with configurable capacity + seen_keys_sql_meta: Arc::new(Mutex::new(LruCache::new( + std::num::NonZeroUsize::new(meta_cache_capacity).unwrap(), + ))), // LRU cache with configurable capacity + seen_keys_plan_meta: Arc::new(Mutex::new(LruCache::new( + std::num::NonZeroUsize::new(meta_cache_capacity).unwrap(), + ))), // LRU cache with configurable capacity new_event_buffer: Arc::new(Mutex::new(Vec::new())), last_flush_time: Arc::new(Mutex::new(Instant::now())), }); - + // Spawn process_events_loop as a separate tokio task to avoid blocking let sink_clone = Arc::clone(&sink); tokio::spawn(async move { sink_clone.process_events_loop(rx).await; }); - + // Return the sink (Arc::try_unwrap will fail because tokio task holds a reference, // so we use unsafe to manually get the inner value without decrementing the reference count) // Safety: We know there's exactly one more reference (the tokio task), @@ -163,6 +198,7 @@ impl TopSQLDeltaLakeSink { write_config: inner_ref.write_config.clone(), max_delay_secs: inner_ref.max_delay_secs, storage_options: inner_ref.storage_options.clone(), + meta_store_resolver: inner_ref.meta_store_resolver.clone(), writers: Arc::clone(&inner_ref.writers), tx: Arc::clone(&inner_ref.tx), seen_keys_sql_meta: Arc::clone(&inner_ref.seen_keys_sql_meta), @@ -175,7 +211,7 @@ impl TopSQLDeltaLakeSink { inner_value } } - + #[cfg(test)] /// Create a new Delta Lake sink for testing, returning both the sink and the receiver /// The receiver can be used to verify messages sent through the channel @@ -186,12 +222,16 @@ impl TopSQLDeltaLakeSink { write_config: WriteConfig, max_delay_secs: u64, storage_options: Option>, + meta_store_resolver: Option, meta_cache_capacity: usize, ) -> (Self, mpsc::Receiver>>) { // Create a channel with capacity 1 - let (tx, rx): (mpsc::Sender>>, mpsc::Receiver>>) = mpsc::channel(1); + let (tx, rx): ( + mpsc::Sender>>, + mpsc::Receiver>>, + ) = mpsc::channel(1); let tx = Arc::new(tx); - + // Create sink instance (without starting process_events_loop) let sink = Self { base_path, @@ -199,23 +239,25 @@ impl TopSQLDeltaLakeSink { write_config, max_delay_secs, storage_options, + meta_store_resolver, writers: Arc::new(Mutex::new(HashMap::new())), tx, - seen_keys_sql_meta: Arc::new(Mutex::new(LruCache::new(std::num::NonZeroUsize::new(meta_cache_capacity).unwrap()))), // LRU cache with configurable capacity - seen_keys_plan_meta: Arc::new(Mutex::new(LruCache::new(std::num::NonZeroUsize::new(meta_cache_capacity).unwrap()))), // LRU cache with configurable capacity + seen_keys_sql_meta: Arc::new(Mutex::new(LruCache::new( + std::num::NonZeroUsize::new(meta_cache_capacity).unwrap(), + ))), // LRU cache with configurable capacity + seen_keys_plan_meta: Arc::new(Mutex::new(LruCache::new( + std::num::NonZeroUsize::new(meta_cache_capacity).unwrap(), + ))), // LRU cache with configurable capacity new_event_buffer: Arc::new(Mutex::new(Vec::new())), last_flush_time: Arc::new(Mutex::new(Instant::now())), }; - + // Return the sink and receiver for testing (sink, rx) } /// Process events from channel and write to Delta Lake - async fn process_events_loop( - &self, - mut rx: mpsc::Receiver>>, - ) { + async fn process_events_loop(&self, mut rx: mpsc::Receiver>>) { while let Some(events_vec) = rx.recv().await { if let Err(e) = self.process_events(events_vec).await { error!("Failed to process events: {}", e); @@ -223,44 +265,108 @@ impl TopSQLDeltaLakeSink { } } - /// Extract deduplication key from event - /// Returns (table_name, key) if key can be extracted, None otherwise - /// table_name is the value of LABEL_SOURCE_TABLE (e.g., SOURCE_TABLE_TOPSQL_SQL_META or SOURCE_TABLE_TOPSQL_PLAN_META) - /// key format: digest_date (e.g., sql_digest_2024-01-01) - fn extract_event_key(&self, log_event: &vector_lib::event::LogEvent) -> Option<(String, String)> { - // Get table_name from source_table - let table_name = log_event.get(LABEL_SOURCE_TABLE) - .and_then(|v| v.as_str()) - .map(|s| s.to_string())?; - - // Get date from log_event - let date = log_event.get(LABEL_DATE) - .and_then(|v| v.as_str()) - .map(|s| s.to_string())?; - - // Extract key based on source_table type + /// Extract a route-aware deduplication key from event. + fn extract_event_key( + &self, + log_event: &vector_lib::event::LogEvent, + writer_key: &WriterKey, + ) -> Option { + let table_name = writer_key.table_name.as_str(); + let route_key = writer_key.table_path.to_string_lossy(); + let date = log_event.get(LABEL_DATE).and_then(|v| v.as_str())?; + if table_name == SOURCE_TABLE_TOPSQL_SQL_META { - // For SQL meta: use sql_digest_date format - if let Some(sql_digest) = log_event.get(LABEL_SQL_DIGEST) - .and_then(|v| v.as_str()) - .map(|s| s.to_string()) { - let key = format!("{}_{}", sql_digest, date); - return Some((table_name, key)); + if let Some(sql_digest) = log_event.get(LABEL_SQL_DIGEST).and_then(|v| v.as_str()) { + return Some(format!("{}|{}_{}", route_key, sql_digest, date)); } } else if table_name == SOURCE_TABLE_TOPSQL_PLAN_META { - // For PLAN meta: use plan_digest_date format - if let Some(plan_digest) = log_event.get(LABEL_PLAN_DIGEST) - .and_then(|v| v.as_str()) - .map(|s| s.to_string()) { - let key = format!("{}_{}", plan_digest, date); - return Some((table_name, key)); + if let Some(plan_digest) = log_event.get(LABEL_PLAN_DIGEST).and_then(|v| v.as_str()) { + return Some(format!("{}|{}_{}", route_key, plan_digest, date)); } } - // If no key found or source_table doesn't match, return None (event will be skipped) None } + async fn resolve_writer_key( + &self, + log_event: &vector_lib::event::LogEvent, + resolved_routes: &mut HashMap>, + ) -> Option { + let table_name = log_event + .get(LABEL_SOURCE_TABLE) + .and_then(|value| value.as_str())? + .to_string(); + let route = self + .resolve_keyspace_route(log_event, resolved_routes) + .await; + Some(WriterKey { + table_name: table_name.clone(), + table_path: self.build_table_path(&table_name, route.as_ref()), + }) + } + + async fn resolve_keyspace_route( + &self, + log_event: &vector_lib::event::LogEvent, + resolved_routes: &mut HashMap>, + ) -> Option { + let resolver = self.meta_store_resolver.as_ref()?; + let keyspace = log_event + .get(LABEL_KEYSPACE) + .and_then(|value| value.as_str())?; + let keyspace_ref = keyspace.as_ref(); + + if let Some(route) = resolved_routes.get(keyspace_ref) { + return route.clone(); + } + + let route = match resolver.resolve_keyspace(keyspace_ref).await { + Ok(route) => route, + Err(error) => { + warn!( + "Failed to resolve keyspace {} from meta-store for meta sink, falling back to base_path: {}", + keyspace_ref, error + ); + None + } + }; + resolved_routes.insert(keyspace_ref.to_string(), route.clone()); + route + } + + fn build_table_path(&self, table_name: &str, route: Option<&KeyspaceRoute>) -> PathBuf { + let mut segments = Vec::new(); + if let Some(route) = route { + segments.push(format!("org={}", route.org_id)); + segments.push(format!("cluster={}", route.cluster_id)); + } + segments.push(format!("type={}", table_name)); + + let segment_refs: Vec<&str> = segments.iter().map(|segment| segment.as_str()).collect(); + Self::join_path(&self.base_path, &segment_refs) + } + + fn join_path(base_path: &PathBuf, segments: &[&str]) -> PathBuf { + if base_path.to_string_lossy().starts_with("s3://") { + let mut path = base_path + .to_string_lossy() + .trim_end_matches('/') + .to_string(); + for segment in segments { + path.push('/'); + path.push_str(segment); + } + PathBuf::from(path) + } else { + let mut path = base_path.clone(); + for segment in segments { + path = path.join(segment); + } + path + } + } + /// Flush buffer to Delta Lake async fn flush_buffer(&self) -> Result<(), Box> { let mut buffer = self.new_event_buffer.lock().await; @@ -268,26 +374,19 @@ impl TopSQLDeltaLakeSink { return Ok(()); } - // Group events by table_name - let mut table_events: HashMap> = HashMap::new(); - for event in buffer.drain(..) { - if let Event::Log(log_event) = event { - let table_name = log_event.get(LABEL_SOURCE_TABLE) - .and_then(|v| v.as_str()) - .map(|s| s.to_string()); - if let Some(table_name) = table_name { - table_events - .entry(table_name) - .or_insert_with(Vec::new) - .push(Event::Log(log_event)); - } - } + // Group events by resolved writer key + let mut table_events: HashMap> = HashMap::new(); + for buffered_event in buffer.drain(..) { + table_events + .entry(buffered_event.writer_key) + .or_insert_with(Vec::new) + .push(buffered_event.event); } // Write table's events - for (table_name, mut events) in table_events { - self.add_schema_info(&table_name, &mut events); - if let Err(e) = self.write_table_events(&table_name, events).await { + for (writer_key, mut events) in table_events { + self.add_schema_info(&writer_key.table_name, &mut events); + if let Err(e) = self.write_table_events(&writer_key, events).await { let error_msg = e.to_string(); if error_msg.contains("log segment") || error_msg.contains("Invalid table version") @@ -296,10 +395,15 @@ impl TopSQLDeltaLakeSink { { panic!( "Delta Lake corruption detected for table {}: {}", - table_name, error_msg + writer_key.table_name, error_msg ); } else { - error!("Failed to write events to table {}: {}", table_name, e); + error!( + "Failed to write events to table {} at {}: {}", + writer_key.table_name, + writer_key.table_path.display(), + e + ); } } } @@ -319,43 +423,52 @@ impl TopSQLDeltaLakeSink { return Ok(()); } - let mut seen_keys_sql_meta = self.seen_keys_sql_meta.lock().await; - let mut seen_keys_plan_meta = self.seen_keys_plan_meta.lock().await; - let mut buffer = self.new_event_buffer.lock().await; let last_flush = *self.last_flush_time.lock().await; let current_time = Instant::now(); let flush_interval = Duration::from_secs(self.max_delay_secs); + let mut resolved_routes: HashMap> = HashMap::new(); + let mut pending_events: Vec<(WriterKey, String, Event)> = Vec::new(); // Process all events for events in events_vec { for event in events { if let Event::Log(log_event) = event { - // Extract key from event - if let Some((table_name, key)) = self.extract_event_key(&log_event) { - // Select the appropriate LRU cache based on table_name (source_table) - let seen_keys = match table_name.as_str() { - SOURCE_TABLE_TOPSQL_SQL_META => &mut *seen_keys_sql_meta, - SOURCE_TABLE_TOPSQL_PLAN_META => &mut *seen_keys_plan_meta, - _ => continue, // Skip unknown event types - }; - - // Check if key is already in LRU cache - if seen_keys.get(&key).is_some() { - // Update key in LRU cache (touch it) - get() already does this - continue; - } else { - // Insert key to LRU cache - seen_keys.put(key.clone(), ()); - // Put event in buffer - buffer.push(Event::Log(log_event)); - } - } - // If key cannot be extracted, skip the event + let Some(writer_key) = self + .resolve_writer_key(&log_event, &mut resolved_routes) + .await + else { + continue; + }; + + let Some(key) = self.extract_event_key(&log_event, &writer_key) else { + continue; + }; + + pending_events.push((writer_key, key, Event::Log(log_event))); } } } - // Release locks before checking flush conditions + let mut seen_keys_sql_meta = self.seen_keys_sql_meta.lock().await; + let mut seen_keys_plan_meta = self.seen_keys_plan_meta.lock().await; + let mut buffer = self.new_event_buffer.lock().await; + + for (writer_key, key, event) in pending_events { + // Select the appropriate LRU cache based on table_name (source_table) + let seen_keys = match writer_key.table_name.as_str() { + SOURCE_TABLE_TOPSQL_SQL_META => &mut *seen_keys_sql_meta, + SOURCE_TABLE_TOPSQL_PLAN_META => &mut *seen_keys_plan_meta, + _ => continue, // Skip unknown event types + }; + + if seen_keys.get(&key).is_some() { + continue; + } + + seen_keys.put(key, ()); + buffer.push(BufferedEvent { writer_key, event }); + } + drop(seen_keys_sql_meta); drop(seen_keys_plan_meta); @@ -366,7 +479,7 @@ impl TopSQLDeltaLakeSink { if buffer_full || time_reached { // Release buffer lock before flushing drop(buffer); - + // Flush buffer to deltalake self.flush_buffer().await?; } @@ -381,7 +494,7 @@ impl TopSQLDeltaLakeSink { } let first_event = &mut events[0]; let log = first_event.as_mut_log(); - + // Select schema based on table_name (which is actually source_table) let schema = match table_name { SOURCE_TABLE_TOPSQL_SQL_META => &*SQL_META_SCHEMA, @@ -391,7 +504,7 @@ impl TopSQLDeltaLakeSink { return; // Return early if table_name doesn't match any known type } }; - + log.insert( "_schema_metadata", serde_json::Value::Object(schema.clone()), @@ -401,35 +514,23 @@ impl TopSQLDeltaLakeSink { /// Write events to a specific table async fn write_table_events( &self, - table_name: &str, + writer_key: &WriterKey, events: Vec, ) -> Result<(), Box> { // Get or create writer for this table let mut writers = self.writers.lock().await; - let writer = writers.entry(table_name.to_string()).or_insert_with(|| { - let table_path = if self.base_path.to_string_lossy().starts_with("s3://") { - // For S3 paths, append the table name to the S3 path - PathBuf::from(format!( - "{}/type={}", - self.base_path.to_string_lossy(), - table_name - )) - } else { - // For local paths, use join as before - self.base_path.join(format!("type={}", table_name)) - }; - + let writer = writers.entry(writer_key.clone()).or_insert_with(|| { let table_config = self .tables .iter() - .find(|t| t.name == table_name) + .find(|t| t.name == writer_key.table_name) .cloned() .unwrap_or_else(|| DeltaTableConfig { - name: table_name.to_string(), + name: writer_key.table_name.clone(), schema_evolution: Some(true), }); DeltaLakeWriter::new_with_options( - table_path, + writer_key.table_path.clone(), table_config, self.write_config.clone(), self.storage_options.clone(), @@ -482,13 +583,15 @@ impl StreamSink for TopSQLDeltaLakeSink { events_cache.push(events); // Allow max delay to configured value, continue if not ready to send - if events_count + cur_cached_size < sink.write_config.batch_size - && latest_timestamp < oldest_timestamp + sink.max_delay_secs as i64 { + if events_count + cur_cached_size < sink.write_config.batch_size + && latest_timestamp < oldest_timestamp + sink.max_delay_secs as i64 + { continue; } // Send events to process_events through channel - let should_drop_on_full = latest_timestamp >= oldest_timestamp + sink.max_delay_secs as i64; + let should_drop_on_full = + latest_timestamp >= oldest_timestamp + sink.max_delay_secs as i64; match tx.try_send(events_cache) { Ok(_) => { // Successfully sent, clear the cache @@ -515,7 +618,7 @@ impl StreamSink for TopSQLDeltaLakeSink { } } } - + // When the input stream ends, try to send any remaining cached events if !events_cache.is_empty() { // Send remaining events, wait if channel is full @@ -524,7 +627,7 @@ impl StreamSink for TopSQLDeltaLakeSink { error!("Channel closed when flushing remaining events, dropping events"); } } - + // Note: We don't drop tx here as it's owned by the sink and may be used by other run() calls // The channel will be closed when the sink is dropped Ok(()) @@ -546,7 +649,9 @@ mod tests { event } - fn create_test_sink_with_receiver(batch_size: usize) -> (TopSQLDeltaLakeSink, mpsc::Receiver>>) { + fn create_test_sink_with_receiver( + batch_size: usize, + ) -> (TopSQLDeltaLakeSink, mpsc::Receiver>>) { TopSQLDeltaLakeSink::new_for_test( PathBuf::from("/tmp/test"), vec![], @@ -556,53 +661,149 @@ mod tests { }, 180, // Use default value for tests None, + None, 10000, // Use default LRU cache capacity for tests ) } + fn create_sql_meta_event(keyspace: &str, sql_digest: &str, date: &str) -> LogEvent { + let mut log = LogEvent::default(); + log.insert(LABEL_SOURCE_TABLE, SOURCE_TABLE_TOPSQL_SQL_META); + log.insert(LABEL_SQL_DIGEST, sql_digest); + log.insert(LABEL_KEYSPACE, keyspace); + log.insert(LABEL_DATE, date); + log + } + + #[test] + fn test_build_table_path_with_meta_route_for_s3() { + let (sink, _) = TopSQLDeltaLakeSink::new_for_test( + PathBuf::from("s3://o11y-prod-shared-us-west-2-premium/deltalake"), + vec![], + WriteConfig { + batch_size: 1, + timeout_secs: 0, + }, + 180, + None, + None, + 10000, + ); + + let table_path = sink.build_table_path( + SOURCE_TABLE_TOPSQL_SQL_META, + Some(&KeyspaceRoute { + org_id: "30018".to_string(), + cluster_id: "10762701230946915645".to_string(), + }), + ); + + assert_eq!( + table_path, + PathBuf::from( + "s3://o11y-prod-shared-us-west-2-premium/deltalake/org=30018/cluster=10762701230946915645/type=topsql_sql_meta" + ) + ); + } + + #[test] + fn test_build_table_path_without_meta_route_preserves_existing_layout() { + let (sink, _) = TopSQLDeltaLakeSink::new_for_test( + PathBuf::from("/tmp/deltalake"), + vec![], + WriteConfig { + batch_size: 1, + timeout_secs: 0, + }, + 180, + None, + None, + 10000, + ); + + let table_path = sink.build_table_path(SOURCE_TABLE_TOPSQL_PLAN_META, None); + + assert_eq!( + table_path, + PathBuf::from("/tmp/deltalake/type=topsql_plan_meta") + ); + } + + #[test] + fn test_extract_event_key_isolated_by_route() { + let (sink, _) = TopSQLDeltaLakeSink::new_for_test( + PathBuf::from("/tmp/deltalake"), + vec![], + WriteConfig { + batch_size: 1, + timeout_secs: 0, + }, + 180, + None, + None, + 10000, + ); + let log_event = create_sql_meta_event("test_keyspace", "sql_digest_1", "2026-03-16"); + let route_a = WriterKey { + table_name: SOURCE_TABLE_TOPSQL_SQL_META.to_string(), + table_path: PathBuf::from("/tmp/deltalake/org=30018/cluster=101/type=topsql_sql_meta"), + }; + let route_b = WriterKey { + table_name: SOURCE_TABLE_TOPSQL_SQL_META.to_string(), + table_path: PathBuf::from("/tmp/deltalake/org=30019/cluster=102/type=topsql_sql_meta"), + }; + + let key_a = sink.extract_event_key(&log_event, &route_a); + let key_b = sink.extract_event_key(&log_event, &route_b); + + assert_ne!(key_a, key_b); + } + #[tokio::test] async fn test_send_when_batch_size_reached() { let batch_size = 5; let (sink, mut rx) = create_test_sink_with_receiver(batch_size); - + // Create events that will reach batch size let events: Vec = (0..batch_size) .map(|i| create_test_event(1000 + i as i64)) .collect(); - + let input_stream = stream::iter(events.clone()).boxed(); let sink_box = Box::new(sink); - + // Run the function in a task - let run_handle = tokio::spawn(async move { - sink_box.run(input_stream).await - }); - + let run_handle = tokio::spawn(async move { sink_box.run(input_stream).await }); + // Wait a bit for the message to be sent tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; - + // Verify that a message was sent through the channel - let received = tokio::time::timeout( - tokio::time::Duration::from_millis(500), - rx.recv() - ).await; - + let received = + tokio::time::timeout(tokio::time::Duration::from_millis(500), rx.recv()).await; + assert!(received.is_ok(), "Should receive a message from channel"); if let Ok(Some(events_vec)) = received { // Verify the message content // Count total events let total_events: usize = events_vec.iter().map(|v| v.len()).sum(); - assert_eq!(total_events, batch_size, "Should receive exactly batch_size events"); - + assert_eq!( + total_events, batch_size, + "Should receive exactly batch_size events" + ); + // Verify event structure assert!(!events_vec.is_empty(), "Events vector should not be empty"); for event_batch in &events_vec { - assert!(!event_batch.is_empty(), "Each event batch should not be empty"); + assert!( + !event_batch.is_empty(), + "Each event batch should not be empty" + ); } } else { panic!("Failed to receive message from channel"); } - + // Wait for run to complete let _ = run_handle.await; } @@ -611,44 +812,43 @@ mod tests { async fn test_send_when_timeout_reached() { let batch_size = 100; // Large batch size so we don't reach it let (sink, mut rx) = create_test_sink_with_receiver(batch_size); - + // Create events with timestamps that exceed timeout (180 seconds) let oldest_ts = 1000; let latest_ts = oldest_ts + 181; // Exceeds 180 second timeout - + // Create two events: one at the start, one after timeout - let events = vec![ - create_test_event(oldest_ts), - create_test_event(latest_ts), - ]; - + let events = vec![create_test_event(oldest_ts), create_test_event(latest_ts)]; + let input_stream = stream::iter(events.clone()).boxed(); let sink_box = Box::new(sink); - + // Run the function in a task - let run_handle = tokio::spawn(async move { - sink_box.run(input_stream).await - }); - + let run_handle = tokio::spawn(async move { sink_box.run(input_stream).await }); + // Wait a bit for the message to be sent tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; - + // Verify that a message was sent through the channel due to timeout - let received = tokio::time::timeout( - tokio::time::Duration::from_millis(500), - rx.recv() - ).await; - - assert!(received.is_ok(), "Should receive a message from channel due to timeout"); + let received = + tokio::time::timeout(tokio::time::Duration::from_millis(500), rx.recv()).await; + + assert!( + received.is_ok(), + "Should receive a message from channel due to timeout" + ); if let Ok(Some(events_vec)) = received { // Verify the message content // Verify events were sent let total_events: usize = events_vec.iter().map(|v| v.len()).sum(); - assert_eq!(total_events, 2, "Should receive both events (oldest and latest)"); + assert_eq!( + total_events, 2, + "Should receive both events (oldest and latest)" + ); } else { panic!("Failed to receive message from channel"); } - + // Wait for run to complete let _ = run_handle.await; } @@ -657,55 +857,60 @@ mod tests { async fn test_channel_full_keep_cache_when_not_timeout() { let batch_size = 5; let (sink, mut rx) = create_test_sink_with_receiver(batch_size); - + // Create many events to fill the channel (capacity 1) // The first batch will fill the channel, second batch should be kept in cache // and retried later let events: Vec = (0..batch_size * 2) .map(|i| create_test_event(1000 + i as i64)) // All within timeout window .collect(); - + let input_stream = stream::iter(events.clone()).boxed(); let sink_box = Box::new(sink); - + // Run the function in a task - let run_handle = tokio::spawn(async move { - sink_box.run(input_stream).await - }); - + let run_handle = tokio::spawn(async move { sink_box.run(input_stream).await }); + // Don't consume from rx immediately to fill the channel // Wait a bit for the first message to be sent // The channel should be full now, and subsequent sends should keep data in cache // Since we're not consuming, the channel stays full // After a bit more time, the run should complete tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; - + // Now consume the first message let first_msg = rx.recv().await; assert!(first_msg.is_some(), "Should receive first message"); if let Some(events_vec) = first_msg { // Verify first message content let total_events: usize = events_vec.iter().map(|v| v.len()).sum(); - assert_eq!(total_events, batch_size, "First message should contain batch_size events"); + assert_eq!( + total_events, batch_size, + "First message should contain batch_size events" + ); } - + // Wait a bit more - the second batch should be sent after channel has space tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; - + // Check if second message was sent (data was kept in cache and retried) - let second_msg = tokio::time::timeout( - tokio::time::Duration::from_millis(200), - rx.recv() - ).await; - + let second_msg = + tokio::time::timeout(tokio::time::Duration::from_millis(200), rx.recv()).await; + // The second batch should eventually be sent (kept in cache and retried) - assert!(second_msg.is_ok(), "Should eventually receive second message after retry"); + assert!( + second_msg.is_ok(), + "Should eventually receive second message after retry" + ); if let Ok(Some(events_vec)) = second_msg { // Verify second message content let total_events: usize = events_vec.iter().map(|v| v.len()).sum(); - assert_eq!(total_events, batch_size, "Second message should contain batch_size events"); + assert_eq!( + total_events, batch_size, + "Second message should contain batch_size events" + ); } - + // Wait for run to complete let _ = run_handle.await; } @@ -714,7 +919,7 @@ mod tests { async fn test_channel_full_drop_when_timeout() { let batch_size = 5; let (sink, mut rx) = create_test_sink_with_receiver(batch_size); - + // Create events with timeout: first batch, then events after timeout let mut events = vec![]; // First batch at timestamp 1000 @@ -726,54 +931,59 @@ mod tests { events.push(create_test_event(1005 + i as i64)); } events.push(create_test_event(1186)); - + let input_stream = stream::iter(events.clone()).boxed(); let sink_box = Box::new(sink); - + // Run the function in a task - let run_handle = tokio::spawn(async move { - sink_box.run(input_stream).await - }); - + let run_handle = tokio::spawn(async move { sink_box.run(input_stream).await }); + // Don't consume from rx to fill the channel // Wait for first message to be sent // Channel should be full now // When the timeout event arrives and channel is full, data should be dropped tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; - + // Consume the first message let first_msg = rx.recv().await; assert!(first_msg.is_some(), "Should receive first message"); if let Some(events_vec) = first_msg { // Verify first message content let total_events: usize = events_vec.iter().map(|v| v.len()).sum(); - assert_eq!(total_events, batch_size, "First message should contain batch_size events"); - + assert_eq!( + total_events, batch_size, + "First message should contain batch_size events" + ); + // Verify timestamps are from the first batch (1000-1004) for event_batch in &events_vec { for event in event_batch { if let Event::Log(ref log_event) = event { - if let Some(timestamp) = log_event.get("timestamps").and_then(|v| v.as_integer()) { - assert!(timestamp >= 1000 && timestamp < 1000 + batch_size as i64, - "First message should contain events from first batch"); + if let Some(timestamp) = + log_event.get("timestamps").and_then(|v| v.as_integer()) + { + assert!( + timestamp >= 1000 && timestamp < 1000 + batch_size as i64, + "First message should contain events from first batch" + ); } } } } } - + // Wait a bit more - the timeout event should have been dropped, not sent tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; - + // Check if a second message was sent (it shouldn't be, as data was dropped) - let second_msg = tokio::time::timeout( - tokio::time::Duration::from_millis(200), - rx.recv() - ).await; + let second_msg = + tokio::time::timeout(tokio::time::Duration::from_millis(200), rx.recv()).await; // The second message should NOT be sent because data was dropped due to timeout - assert!(second_msg.is_err() || second_msg.unwrap().is_none(), - "Should NOT receive second message as data was dropped due to timeout"); - + assert!( + second_msg.is_err() || second_msg.unwrap().is_none(), + "Should NOT receive second message as data was dropped due to timeout" + ); + // Wait for run to complete let _ = run_handle.await; } @@ -782,41 +992,38 @@ mod tests { async fn test_not_send_when_batch_size_and_timeout_not_reached() { let batch_size = 10; let (sink, mut rx) = create_test_sink_with_receiver(batch_size); - + // Create events that don't reach batch size and don't timeout - let events: Vec = (0..3) - .map(|i| create_test_event(1000 + i)) - .collect(); - + let events: Vec = (0..3).map(|i| create_test_event(1000 + i)).collect(); + let input_stream = stream::iter(events.clone()).boxed(); let sink_box = Box::new(sink); - + // Run the function in a task - let run_handle = tokio::spawn(async move { - sink_box.run(input_stream).await - }); - + let run_handle = tokio::spawn(async move { sink_box.run(input_stream).await }); + // Wait for run to complete let result = run_handle.await; assert!(result.is_ok()); assert!(result.unwrap().is_ok()); - + // Verify that no message was sent (data doesn't meet send conditions) // Note: When stream ends, remaining data might be flushed, but with only 3 events // and batch_size 10, and no timeout, it should not send immediately // However, when the stream ends, the loop exits and remaining cache might be sent // Let's check if any message was received - let received = tokio::time::timeout( - tokio::time::Duration::from_millis(200), - rx.recv() - ).await; - + let received = + tokio::time::timeout(tokio::time::Duration::from_millis(200), rx.recv()).await; + // With the current implementation, when stream ends, remaining cache might be sent // So we check if a message was received and verify its content if let Ok(Some(events_vec)) = received { // Verify the message content let total_events: usize = events_vec.iter().map(|v| v.len()).sum(); - assert_eq!(total_events, 3, "Should receive the 3 events that were cached"); + assert_eq!( + total_events, 3, + "Should receive the 3 events that were cached" + ); } else { // If no message was received, that's also valid - data wasn't sent // This depends on implementation details of when remaining cache is flushed @@ -827,41 +1034,42 @@ mod tests { async fn test_batch_size_sending_behavior() { let batch_size = 3; let (sink, mut rx) = create_test_sink_with_receiver(batch_size); - + // Create exactly batch_size events let events: Vec = (0..batch_size) .map(|i| create_test_event(1000 + i as i64)) .collect(); - + let input_stream = stream::iter(events.clone()).boxed(); let sink_box = Box::new(sink); - + // Run the function in a task - let run_handle = tokio::spawn(async move { - sink_box.run(input_stream).await - }); - + let run_handle = tokio::spawn(async move { sink_box.run(input_stream).await }); + // Wait a bit for the message to be sent tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; - + // Verify that a message was sent through the channel - let received = tokio::time::timeout( - tokio::time::Duration::from_millis(500), - rx.recv() - ).await; - + let received = + tokio::time::timeout(tokio::time::Duration::from_millis(500), rx.recv()).await; + assert!(received.is_ok(), "Should receive a message from channel"); if let Ok(Some(events_vec)) = received { // Verify the message content // Count total events let total_events: usize = events_vec.iter().map(|v| v.len()).sum(); - assert_eq!(total_events, batch_size, "Should receive exactly batch_size events"); - + assert_eq!( + total_events, batch_size, + "Should receive exactly batch_size events" + ); + // Verify event timestamps for event_batch in events_vec { for (i, event) in event_batch.iter().enumerate() { if let Event::Log(ref log_event) = event { - if let Some(timestamp) = log_event.get("timestamps").and_then(|v| v.as_integer()) { + if let Some(timestamp) = + log_event.get("timestamps").and_then(|v| v.as_integer()) + { assert_eq!(timestamp, 1000 + i as i64, "Event timestamp should match"); } } @@ -870,7 +1078,7 @@ mod tests { } else { panic!("Failed to receive message from channel"); } - + // Wait for run to complete let _ = run_handle.await; } @@ -879,56 +1087,58 @@ mod tests { async fn test_timeout_sending_behavior() { let batch_size = 100; // Large batch size let (sink, mut rx) = create_test_sink_with_receiver(batch_size); - + // Create events with large time gap (exceeding 180 seconds) let oldest_ts = 1000; let latest_ts = 1181; // 181 seconds later, exceeds timeout - let events = vec![ - create_test_event(oldest_ts), - create_test_event(latest_ts), - ]; - + let events = vec![create_test_event(oldest_ts), create_test_event(latest_ts)]; + let input_stream = stream::iter(events.clone()).boxed(); let sink_box = Box::new(sink); - + // Run the function in a task - let run_handle = tokio::spawn(async move { - sink_box.run(input_stream).await - }); - + let run_handle = tokio::spawn(async move { sink_box.run(input_stream).await }); + // Wait a bit for the message to be sent tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; - + // Verify that a message was sent through the channel due to timeout - let received = tokio::time::timeout( - tokio::time::Duration::from_millis(500), - rx.recv() - ).await; - - assert!(received.is_ok(), "Should receive a message from channel due to timeout"); + let received = + tokio::time::timeout(tokio::time::Duration::from_millis(500), rx.recv()).await; + + assert!( + received.is_ok(), + "Should receive a message from channel due to timeout" + ); if let Ok(Some(events_vec)) = received { // Verify the message content // Count total events let total_events: usize = events_vec.iter().map(|v| v.len()).sum(); assert_eq!(total_events, 2, "Should receive both events"); - + // Verify event timestamps let mut timestamps = Vec::new(); for event_batch in &events_vec { for event in event_batch { if let Event::Log(ref log_event) = event { - if let Some(timestamp) = log_event.get("timestamps").and_then(|v| v.as_integer()) { + if let Some(timestamp) = + log_event.get("timestamps").and_then(|v| v.as_integer()) + { timestamps.push(timestamp); } } } } timestamps.sort(); - assert_eq!(timestamps, vec![oldest_ts, latest_ts], "Should receive events with correct timestamps"); + assert_eq!( + timestamps, + vec![oldest_ts, latest_ts], + "Should receive events with correct timestamps" + ); } else { panic!("Failed to receive message from channel"); } - + // Wait for run to complete let _ = run_handle.await; } @@ -937,51 +1147,51 @@ mod tests { async fn test_multiple_batches() { let batch_size = 3; let (sink, mut rx) = create_test_sink_with_receiver(batch_size); - + // Create multiple batches worth of events let total_events = batch_size * 3; let events: Vec = (0..total_events) .map(|i| create_test_event(1000 + i as i64)) .collect(); - + let input_stream = stream::iter(events.clone()).boxed(); let sink_box = Box::new(sink); - + // Run the function in a task - let run_handle = tokio::spawn(async move { - sink_box.run(input_stream).await - }); - + let run_handle = tokio::spawn(async move { sink_box.run(input_stream).await }); + // Collect all messages from the channel let mut received_messages = Vec::new(); let expected_batches = (total_events + batch_size - 1) / batch_size; // Ceiling division - + // Wait for all batches to be sent for _ in 0..expected_batches { - let received = tokio::time::timeout( - tokio::time::Duration::from_millis(500), - rx.recv() - ).await; + let received = + tokio::time::timeout(tokio::time::Duration::from_millis(500), rx.recv()).await; if let Ok(Some(msg)) = received { received_messages.push(msg); } else { break; } } - + // Verify we received the expected number of batches assert!(received_messages.len() >= 1); // Verify total events received - let total_received: usize = received_messages.iter() + let total_received: usize = received_messages + .iter() .map(|events_vec| events_vec.iter().map(|v| v.len()).sum::()) .sum(); - assert_eq!(total_received, total_events, "Should receive all events across batches"); - + assert_eq!( + total_received, total_events, + "Should receive all events across batches" + ); + // Verify each message for events_vec in &received_messages { assert!(!events_vec.is_empty(), "Each batch should contain events"); } - + // Wait for run to complete let _ = run_handle.await; } diff --git a/src/sources/topsql_v2/upstream/tidb/parser.rs b/src/sources/topsql_v2/upstream/tidb/parser.rs index 5c35c038..86bc1117 100644 --- a/src/sources/topsql_v2/upstream/tidb/parser.rs +++ b/src/sources/topsql_v2/upstream/tidb/parser.rs @@ -1,27 +1,36 @@ use std::collections::{BTreeMap, HashMap}; use std::sync::Arc; -use chrono::Utc; -use vector::event::Event; -use vector_lib::event::{LogEvent, Value as LogValue}; use crate::sources::topsql_v2::schema_cache::SchemaCache; use crate::sources::topsql_v2::upstream::consts::{ - LABEL_DATE, LABEL_ENCODED_NORMALIZED_PLAN, LABEL_INSTANCE_KEY, - LABEL_NORMALIZED_PLAN, LABEL_NORMALIZED_SQL, LABEL_PLAN_DIGEST, - LABEL_SQL_DIGEST, LABEL_SOURCE_TABLE, LABEL_TIMESTAMPS, LABEL_KEYSPACE, LABEL_USER, - METRIC_NAME_CPU_TIME_MS, METRIC_NAME_NETWORK_IN_BYTES, METRIC_NAME_NETWORK_OUT_BYTES, - METRIC_NAME_STMT_DURATION_COUNT, METRIC_NAME_STMT_DURATION_SUM_NS, METRIC_NAME_STMT_EXEC_COUNT, - METRIC_NAME_TOTAL_RU, METRIC_NAME_EXEC_COUNT, METRIC_NAME_EXEC_DURATION, - SOURCE_TABLE_TIDB_TOPSQL, SOURCE_TABLE_TOPSQL_PLAN_META, SOURCE_TABLE_TOPSQL_SQL_META, SOURCE_TABLE_TOPRU, + LABEL_DATE, LABEL_ENCODED_NORMALIZED_PLAN, LABEL_INSTANCE_KEY, LABEL_KEYSPACE, + LABEL_NORMALIZED_PLAN, LABEL_NORMALIZED_SQL, LABEL_PLAN_DIGEST, LABEL_SOURCE_TABLE, + LABEL_SQL_DIGEST, LABEL_TIMESTAMPS, LABEL_USER, METRIC_NAME_CPU_TIME_MS, + METRIC_NAME_EXEC_COUNT, METRIC_NAME_EXEC_DURATION, METRIC_NAME_NETWORK_IN_BYTES, + METRIC_NAME_NETWORK_OUT_BYTES, METRIC_NAME_STMT_DURATION_COUNT, + METRIC_NAME_STMT_DURATION_SUM_NS, METRIC_NAME_STMT_EXEC_COUNT, METRIC_NAME_TOTAL_RU, + SOURCE_TABLE_TIDB_TOPSQL, SOURCE_TABLE_TOPRU, SOURCE_TABLE_TOPSQL_PLAN_META, + SOURCE_TABLE_TOPSQL_SQL_META, }; use crate::sources::topsql_v2::upstream::parser::UpstreamEventParser; use crate::sources::topsql_v2::upstream::tidb::proto::top_sql_sub_response::RespOneof; use crate::sources::topsql_v2::upstream::tidb::proto::{ PlanMeta, SqlMeta, TopSqlRecord, TopSqlRecordItem, TopSqlSubResponse, }; +use chrono::Utc; +use vector::event::Event; +use vector_lib::event::{LogEvent, Value as LogValue}; pub struct TopSqlSubResponseParser; +fn decode_keyspace_name(keyspace_name: &[u8]) -> Option { + if keyspace_name.is_empty() { + return None; + } + + String::from_utf8(keyspace_name.to_vec()).ok() +} + impl UpstreamEventParser for TopSqlSubResponseParser { type UpstreamEvent = TopSqlSubResponse; @@ -31,12 +40,12 @@ impl UpstreamEventParser for TopSqlSubResponseParser { _schema_cache: Arc, ) -> Vec { match response.resp_oneof { - Some(RespOneof::Record(record)) => { - Self::parse_tidb_record(record, instance) - } + Some(RespOneof::Record(record)) => Self::parse_tidb_record(record, instance), Some(RespOneof::SqlMeta(sql_meta)) => Self::parse_tidb_sql_meta(sql_meta), Some(RespOneof::PlanMeta(plan_meta)) => Self::parse_tidb_plan_meta(plan_meta), - Some(RespOneof::TopRuRecords(top_ru_records)) => Self::parse_top_ru_records(top_ru_records), + Some(RespOneof::TopRuRecords(top_ru_records)) => { + Self::parse_top_ru_records(top_ru_records) + } None => vec![], } } @@ -103,14 +112,15 @@ impl UpstreamEventParser for TopSqlSubResponseParser { let mut cpu_values: Vec = v.iter().map(|psd| psd.cpu_time_ms).collect(); cpu_values.select_nth_unstable_by(top_n, |a, b| b.cmp(a)); let cpu_threshold = cpu_values[top_n]; - + // Find top_n threshold for network bytes using partial selection - let mut network_values: Vec = v.iter() + let mut network_values: Vec = v + .iter() .map(|psd| psd.stmt_network_in_bytes + psd.stmt_network_out_bytes) .collect(); network_values.select_nth_unstable_by(top_n, |a, b| b.cmp(a)); let network_threshold = network_values[top_n]; - + // Keep records that meet either threshold let mut kept = Vec::new(); for psd in v.iter() { @@ -132,7 +142,7 @@ impl UpstreamEventParser for TopSqlSubResponseParser { others.stmt_network_out_bytes += psd.stmt_network_out_bytes; } } - + *v = kept; } @@ -215,16 +225,8 @@ impl UpstreamEventParser for TopSqlSubResponseParser { } impl TopSqlSubResponseParser { - fn parse_tidb_record( - record: TopSqlRecord, - instance: String, - ) -> Vec { - let mut keyspace_name_str = "".to_string(); - if !record.keyspace_name.is_empty() { - if let Ok(ks) = String::from_utf8(record.keyspace_name.clone()) { - keyspace_name_str = ks; - } - } + fn parse_tidb_record(record: TopSqlRecord, instance: String) -> Vec { + let keyspace_name_str = decode_keyspace_name(&record.keyspace_name); let mut events = vec![]; let instance_key = format!("topsql_tidb_{}", instance); let mut date = String::new(); @@ -237,13 +239,13 @@ impl TopSqlSubResponseParser { log.insert(LABEL_TIMESTAMPS, LogValue::from(item.timestamp_sec)); if date.is_empty() { date = chrono::DateTime::from_timestamp(item.timestamp_sec as i64, 0) - .map(|dt| dt.format("%Y-%m-%d").to_string()) - .unwrap_or_else(|| "1970-01-01".to_string()); + .map(|dt| dt.format("%Y-%m-%d").to_string()) + .unwrap_or_else(|| "1970-01-01".to_string()); } log.insert(LABEL_DATE, LogValue::from(date.clone())); log.insert(LABEL_INSTANCE_KEY, instance_key.clone()); - if !keyspace_name_str.is_empty() { - log.insert(LABEL_KEYSPACE, keyspace_name_str.clone()); + if let Some(keyspace_name) = &keyspace_name_str { + log.insert(LABEL_KEYSPACE, keyspace_name.clone()); } log.insert( LABEL_SQL_DIGEST, @@ -282,11 +284,15 @@ impl TopSqlSubResponseParser { fn parse_tidb_sql_meta(sql_meta: SqlMeta) -> Vec { let mut events = vec![]; let sql_digest = hex::encode_upper(sql_meta.sql_digest); + let keyspace_name = decode_keyspace_name(&sql_meta.keyspace_name); let mut event = Event::Log(LogEvent::default()); let log = event.as_mut_log(); log.insert(LABEL_SOURCE_TABLE, SOURCE_TABLE_TOPSQL_SQL_META); log.insert(LABEL_SQL_DIGEST, sql_digest); + if let Some(keyspace_name) = keyspace_name { + log.insert(LABEL_KEYSPACE, keyspace_name); + } log.insert(LABEL_NORMALIZED_SQL, sql_meta.normalized_sql); let now = Utc::now(); log.insert(LABEL_TIMESTAMPS, LogValue::from(now.timestamp())); @@ -299,19 +305,19 @@ impl TopSqlSubResponseParser { fn parse_tidb_plan_meta(plan_meta: PlanMeta) -> Vec { let mut events = vec![]; let plan_digest = hex::encode_upper(plan_meta.plan_digest); - let encoded_normalized_plan = - hex::encode_upper(plan_meta.encoded_normalized_plan); + let keyspace_name = decode_keyspace_name(&plan_meta.keyspace_name); + let encoded_normalized_plan = hex::encode_upper(plan_meta.encoded_normalized_plan); let mut event = Event::Log(LogEvent::default()); let log = event.as_mut_log(); // Add metadata with Vector prefix (ensure all fields have values) log.insert(LABEL_SOURCE_TABLE, SOURCE_TABLE_TOPSQL_PLAN_META); log.insert(LABEL_PLAN_DIGEST, plan_digest); + if let Some(keyspace_name) = keyspace_name { + log.insert(LABEL_KEYSPACE, keyspace_name); + } log.insert(LABEL_NORMALIZED_PLAN, plan_meta.normalized_plan); - log.insert( - LABEL_ENCODED_NORMALIZED_PLAN, - encoded_normalized_plan, - ); + log.insert(LABEL_ENCODED_NORMALIZED_PLAN, encoded_normalized_plan); let now = Utc::now(); log.insert(LABEL_TIMESTAMPS, LogValue::from(now.timestamp())); let date_str = now.format("%Y-%m-%d").to_string(); @@ -320,18 +326,15 @@ impl TopSqlSubResponseParser { events } - fn parse_top_ru_records(top_ru_records: crate::sources::topsql_v2::upstream::tidb::proto::ReportTopRuRecords) -> Vec { + fn parse_top_ru_records( + top_ru_records: crate::sources::topsql_v2::upstream::tidb::proto::ReportTopRuRecords, + ) -> Vec { let mut events = vec![]; let mut date = String::new(); - + for record in top_ru_records.records { - let mut keyspace_name_str = "".to_string(); - if !record.keyspace_name.is_empty() { - if let Ok(ks) = String::from_utf8(record.keyspace_name.clone()) { - keyspace_name_str = ks; - } - } - + let keyspace_name_str = decode_keyspace_name(&record.keyspace_name); + for item in record.items { let mut event = Event::Log(LogEvent::default()); let log = event.as_mut_log(); @@ -339,17 +342,17 @@ impl TopSqlSubResponseParser { // Add metadata with Vector prefix log.insert(LABEL_SOURCE_TABLE, SOURCE_TABLE_TOPRU); log.insert(LABEL_TIMESTAMPS, LogValue::from(item.timestamp_sec)); - + if date.is_empty() { date = chrono::DateTime::from_timestamp(item.timestamp_sec as i64, 0) .map(|dt| dt.format("%Y-%m-%d").to_string()) .unwrap_or_else(|| "1970-01-01".to_string()); } log.insert(LABEL_DATE, LogValue::from(date.clone())); - + // Note: TopRU doesn't use instance_key - all instances write to same table - if !keyspace_name_str.is_empty() { - log.insert(LABEL_KEYSPACE, keyspace_name_str.clone()); + if let Some(keyspace_name) = &keyspace_name_str { + log.insert(LABEL_KEYSPACE, keyspace_name.clone()); } log.insert(LABEL_USER, record.user.clone()); log.insert( @@ -362,8 +365,11 @@ impl TopSqlSubResponseParser { ); log.insert(METRIC_NAME_TOTAL_RU, LogValue::from(item.total_ru)); log.insert(METRIC_NAME_EXEC_COUNT, LogValue::from(item.exec_count)); - log.insert(METRIC_NAME_EXEC_DURATION, LogValue::from(item.exec_duration)); - + log.insert( + METRIC_NAME_EXEC_DURATION, + LogValue::from(item.exec_duration), + ); + events.push(event.into_log()); } } @@ -374,7 +380,9 @@ impl TopSqlSubResponseParser { #[cfg(test)] mod tests { use super::*; - use crate::sources::topsql_v2::upstream::tidb::proto::{TopSqlRecordItem, TopRuRecord, TopRuRecordItem, ReportTopRuRecords}; + use crate::sources::topsql_v2::upstream::tidb::proto::{ + PlanMeta, ReportTopRuRecords, SqlMeta, TopRuRecord, TopRuRecordItem, TopSqlRecordItem, + }; const MOCK_RECORDS: &'static str = include_str!("testdata/mock-records.json"); @@ -435,7 +443,7 @@ mod tests { let plan_digest = vec![4, 5, 6]; let timestamp = 1000u64; let test_keyspace_name = b"test_keyspace_2".to_vec(); - + // Create 5 records with same timestamp let items: Vec = (0..5) .map(|i| TopSqlRecordItem { @@ -449,7 +457,7 @@ mod tests { stmt_network_out_bytes: 200 + i as u64, }) .collect(); - + responses.push(TopSqlSubResponse { resp_oneof: Some(RespOneof::Record(TopSqlRecord { sql_digest: sql_digest.clone(), @@ -458,21 +466,24 @@ mod tests { keyspace_name: test_keyspace_name.clone(), })), }); - + // top_n = 10, which is greater than 5, so all should be kept let result = TopSqlSubResponseParser::keep_top_n(responses.clone(), 10); - + // Should have same number of responses (all kept) assert_eq!(result.len(), 1); if let Some(RespOneof::Record(record)) = &result[0].resp_oneof { assert_eq!(record.items.len(), 5); assert_eq!(record.sql_digest, sql_digest); assert_eq!(record.plan_digest, plan_digest); - assert_eq!(record.keyspace_name, test_keyspace_name, "keyspace_name should be preserved"); + assert_eq!( + record.keyspace_name, test_keyspace_name, + "keyspace_name should be preserved" + ); } else { panic!("Expected Record"); } - + // top_n = 5, which equals 5, so all should be kept let result2 = TopSqlSubResponseParser::keep_top_n(responses, 5); assert_eq!(result2.len(), 1); @@ -480,7 +491,10 @@ mod tests { assert_eq!(record.items.len(), 5); assert_eq!(record.sql_digest, sql_digest); assert_eq!(record.plan_digest, plan_digest); - assert_eq!(record.keyspace_name, test_keyspace_name, "keyspace_name should be preserved"); + assert_eq!( + record.keyspace_name, test_keyspace_name, + "keyspace_name should be preserved" + ); } else { panic!("Expected Record"); } @@ -495,7 +509,7 @@ mod tests { let plan_digest = vec![4, 5, 6]; let timestamp = 1000u64; let test_keyspace_name = b"test_keyspace_3".to_vec(); - + // Create 10 records with same cpu_time_ms and same network bytes let items: Vec = (0..10) .map(|_| TopSqlRecordItem { @@ -505,11 +519,11 @@ mod tests { stmt_kv_exec_count: BTreeMap::new(), stmt_duration_sum_ns: 1000, stmt_duration_count: 1, - stmt_network_in_bytes: 100, // All same + stmt_network_in_bytes: 100, // All same stmt_network_out_bytes: 200, // All same, total = 300 }) .collect(); - + responses.push(TopSqlSubResponse { resp_oneof: Some(RespOneof::Record(TopSqlRecord { sql_digest: sql_digest.clone(), @@ -518,44 +532,45 @@ mod tests { keyspace_name: test_keyspace_name.clone(), })), }); - + // top_n = 5, all values are same // New logic: threshold equals the value (top_n-th largest, which is the same value), // so no records satisfy > threshold condition, all should go to others let result = TopSqlSubResponseParser::keep_top_n(responses, 5); - + // Verify all records go to others let mut total_cpu_kept = 0u32; let mut total_network_kept = 0u64; let mut kept_count = 0; let mut total_cpu_others = 0u32; let mut total_network_others = 0u64; - + for response in result { if let Some(RespOneof::Record(record)) = response.resp_oneof { // Verify keyspace_name is preserved assert_eq!( - record.keyspace_name, - test_keyspace_name, + record.keyspace_name, test_keyspace_name, "keyspace_name should be preserved in all records" ); - + if record.sql_digest.is_empty() { // This is others for item in record.items { total_cpu_others += item.cpu_time_ms; - total_network_others += item.stmt_network_in_bytes + item.stmt_network_out_bytes; + total_network_others += + item.stmt_network_in_bytes + item.stmt_network_out_bytes; } } else { kept_count += record.items.len(); for item in record.items { total_cpu_kept += item.cpu_time_ms; - total_network_kept += item.stmt_network_in_bytes + item.stmt_network_out_bytes; + total_network_kept += + item.stmt_network_in_bytes + item.stmt_network_out_bytes; } } } } - + // New behavior: all records go to others (none satisfy > threshold when all values are same) assert_eq!(kept_count, 0); assert_eq!(total_cpu_kept, 0); @@ -572,7 +587,7 @@ mod tests { let mut responses = vec![]; let top_n = 3; let test_keyspace_name = b"test_keyspace_timestamps".to_vec(); - + // Timestamp 1000: 8 records mixing high CPU/low network, low CPU/high network, both high, both low // Expected: Keep records that meet either CPU threshold (>20) OR network threshold (>40) // Top 3 CPU: 100, 90, 80 -> threshold = 20 (4th largest) @@ -580,16 +595,16 @@ mod tests { let timestamp1 = 1000u64; let test_cases_ts1 = vec![ // (sql_id, plan_id, cpu_time_ms, network_in_bytes, network_out_bytes, reason) - (1, 1, 100, 10, 10), // High CPU (100), low network (20) -> keep (CPU > 20) - (2, 2, 90, 10, 10), // High CPU (90), low network (20) -> keep (CPU > 20) - (3, 3, 80, 10, 10), // High CPU (80), low network (20) -> keep (CPU > 20) + (1, 1, 100, 10, 10), // High CPU (100), low network (20) -> keep (CPU > 20) + (2, 2, 90, 10, 10), // High CPU (90), low network (20) -> keep (CPU > 20) + (3, 3, 80, 10, 10), // High CPU (80), low network (20) -> keep (CPU > 20) (4, 4, 10, 200, 200), // Low CPU (10), high network (400) -> keep (network > 40) (5, 5, 10, 175, 175), // Low CPU (10), high network (350) -> keep (network > 40) (6, 6, 10, 150, 150), // Low CPU (10), high network (300) -> keep (network > 40) - (7, 7, 20, 20, 20), // Low CPU (20), low network (40) -> evict (CPU == 20, network == 40) - (8, 8, 15, 15, 15), // Low CPU (15), low network (30) -> evict + (7, 7, 20, 20, 20), // Low CPU (20), low network (40) -> evict (CPU == 20, network == 40) + (8, 8, 15, 15, 15), // Low CPU (15), low network (30) -> evict ]; - + for (sql_id, plan_id, cpu_time, net_in, net_out) in test_cases_ts1.iter() { let sql_digest = vec![*sql_id]; let plan_digest = vec![*plan_id]; @@ -611,22 +626,22 @@ mod tests { })), }); } - + // Timestamp 2000: 7 records mixing different combinations // Expected: Keep records that meet either CPU threshold (>20) OR network threshold (>60) // Top 3 CPU: 100, 90, 70 -> threshold = 20 (4th largest) // Top 3 Network: 380, 360, 140 -> threshold = 60 (4th largest) let timestamp2 = 2000u64; let test_cases_ts2 = vec![ - (9, 9, 100, 10, 10), // High CPU (100), low network (20) -> keep (CPU > 20) - (10, 10, 90, 10, 10), // High CPU (90), low network (20) -> keep (CPU > 20) - (11, 11, 70, 10, 10), // High CPU (70), low network (20) -> keep (CPU > 20) + (9, 9, 100, 10, 10), // High CPU (100), low network (20) -> keep (CPU > 20) + (10, 10, 90, 10, 10), // High CPU (90), low network (20) -> keep (CPU > 20) + (11, 11, 70, 10, 10), // High CPU (70), low network (20) -> keep (CPU > 20) (12, 12, 10, 190, 190), // Low CPU (10), high network (380) -> keep (network > 60) (13, 13, 10, 180, 180), // Low CPU (10), high network (360) -> keep (network > 60) (14, 14, 10, 70, 70), // Low CPU (10), high network (140) -> keep (network > 60) - (15, 15, 20, 30, 30), // Low CPU (20), low network (60) -> evict (CPU == 20, network == 60) + (15, 15, 20, 30, 30), // Low CPU (20), low network (60) -> evict (CPU == 20, network == 60) ]; - + for (sql_id, plan_id, cpu_time, net_in, net_out) in test_cases_ts2.iter() { let sql_digest = vec![*sql_id]; let plan_digest = vec![*plan_id]; @@ -648,14 +663,11 @@ mod tests { })), }); } - + // Timestamp 3000: 2 records (both should be kept since 2 <= top_n=3) let timestamp3 = 3000u64; - let test_cases_ts3 = vec![ - (16, 16, 50, 50, 50), - (17, 17, 40, 40, 40), - ]; - + let test_cases_ts3 = vec![(16, 16, 50, 50, 50), (17, 17, 40, 40, 40)]; + for (sql_id, plan_id, cpu_time, net_in, net_out) in test_cases_ts3.iter() { let sql_digest = vec![*sql_id]; let plan_digest = vec![*plan_id]; @@ -677,26 +689,25 @@ mod tests { })), }); } - + let result = TopSqlSubResponseParser::keep_top_n(responses, top_n); - + // Group results by timestamp let mut results_by_timestamp: BTreeMap> = BTreeMap::new(); // timestamp -> [(sql_id, cpu, network), ...] let mut others_by_timestamp: BTreeMap = BTreeMap::new(); // timestamp -> (cpu, network) - + for response in result { if let Some(RespOneof::Record(record)) = response.resp_oneof { // Verify keyspace_name is preserved assert_eq!( - record.keyspace_name, - test_keyspace_name, + record.keyspace_name, test_keyspace_name, "keyspace_name should be preserved in all records" ); - + for item in record.items { let timestamp = item.timestamp_sec; let network_total = item.stmt_network_in_bytes + item.stmt_network_out_bytes; - + if record.sql_digest.is_empty() { // This is others let entry = others_by_timestamp.entry(timestamp).or_insert((0, 0)); @@ -713,7 +724,7 @@ mod tests { } } } - + // Verify timestamp 1000: should keep 6 records (3 high CPU + 3 high network), evict 2 // CPU threshold = 20 (4th largest), keep records with CPU > 20 // Network threshold = 40 (4th largest), keep records with network > 40 @@ -721,19 +732,47 @@ mod tests { .get(×tamp1) .map(|records| records.iter().map(|r| r.0).collect()) .unwrap_or_default(); - assert_eq!(ts1_kept.len(), 6, "Timestamp 1000 should keep 6 records (3 high CPU + 3 high network)"); + assert_eq!( + ts1_kept.len(), + 6, + "Timestamp 1000 should keep 6 records (3 high CPU + 3 high network)" + ); // High CPU records (1, 2, 3) should be kept - assert!(ts1_kept.contains(&1), "Timestamp 1000 should keep sql_id 1 (high CPU)"); - assert!(ts1_kept.contains(&2), "Timestamp 1000 should keep sql_id 2 (high CPU)"); - assert!(ts1_kept.contains(&3), "Timestamp 1000 should keep sql_id 3 (high CPU)"); + assert!( + ts1_kept.contains(&1), + "Timestamp 1000 should keep sql_id 1 (high CPU)" + ); + assert!( + ts1_kept.contains(&2), + "Timestamp 1000 should keep sql_id 2 (high CPU)" + ); + assert!( + ts1_kept.contains(&3), + "Timestamp 1000 should keep sql_id 3 (high CPU)" + ); // High network records (4, 5, 6) should be kept - assert!(ts1_kept.contains(&4), "Timestamp 1000 should keep sql_id 4 (high network)"); - assert!(ts1_kept.contains(&5), "Timestamp 1000 should keep sql_id 5 (high network)"); - assert!(ts1_kept.contains(&6), "Timestamp 1000 should keep sql_id 6 (high network)"); + assert!( + ts1_kept.contains(&4), + "Timestamp 1000 should keep sql_id 4 (high network)" + ); + assert!( + ts1_kept.contains(&5), + "Timestamp 1000 should keep sql_id 5 (high network)" + ); + assert!( + ts1_kept.contains(&6), + "Timestamp 1000 should keep sql_id 6 (high network)" + ); // Low both records (7, 8) should be evicted - assert!(!ts1_kept.contains(&7), "Timestamp 1000 should NOT keep sql_id 7 (low both)"); - assert!(!ts1_kept.contains(&8), "Timestamp 1000 should NOT keep sql_id 8 (low both)"); - + assert!( + !ts1_kept.contains(&7), + "Timestamp 1000 should NOT keep sql_id 7 (low both)" + ); + assert!( + !ts1_kept.contains(&8), + "Timestamp 1000 should NOT keep sql_id 8 (low both)" + ); + // Verify kept records meet at least one threshold if let Some(records) = results_by_timestamp.get(×tamp1) { let cpu_threshold = 20u32; @@ -748,14 +787,22 @@ mod tests { ); } } - + if let Some((others_cpu, others_network)) = others_by_timestamp.get(×tamp1) { - assert_eq!(*others_cpu, 20 + 15, "Timestamp 1000 others CPU should be 35 (20+15)"); - assert_eq!(*others_network, 40 + 30, "Timestamp 1000 others network should be 70 (40+30)"); + assert_eq!( + *others_cpu, + 20 + 15, + "Timestamp 1000 others CPU should be 35 (20+15)" + ); + assert_eq!( + *others_network, + 40 + 30, + "Timestamp 1000 others network should be 70 (40+30)" + ); } else { panic!("Timestamp 1000 should have others records"); } - + // Verify timestamp 2000: should keep 6 records (3 high CPU + 3 high network), evict 1 // CPU threshold = 20 (4th largest), keep records with CPU > 20 // Network threshold = 60 (4th largest), keep records with network > 60 @@ -763,18 +810,43 @@ mod tests { .get(×tamp2) .map(|records| records.iter().map(|r| r.0).collect()) .unwrap_or_default(); - assert_eq!(ts2_kept.len(), 6, "Timestamp 2000 should keep 6 records (3 high CPU + 3 high network)"); + assert_eq!( + ts2_kept.len(), + 6, + "Timestamp 2000 should keep 6 records (3 high CPU + 3 high network)" + ); // High CPU records (9, 10, 11) should be kept - assert!(ts2_kept.contains(&9), "Timestamp 2000 should keep sql_id 9 (high CPU)"); - assert!(ts2_kept.contains(&10), "Timestamp 2000 should keep sql_id 10 (high CPU)"); - assert!(ts2_kept.contains(&11), "Timestamp 2000 should keep sql_id 11 (high CPU)"); + assert!( + ts2_kept.contains(&9), + "Timestamp 2000 should keep sql_id 9 (high CPU)" + ); + assert!( + ts2_kept.contains(&10), + "Timestamp 2000 should keep sql_id 10 (high CPU)" + ); + assert!( + ts2_kept.contains(&11), + "Timestamp 2000 should keep sql_id 11 (high CPU)" + ); // High network records (12, 13, 14) should be kept - assert!(ts2_kept.contains(&12), "Timestamp 2000 should keep sql_id 12 (high network)"); - assert!(ts2_kept.contains(&13), "Timestamp 2000 should keep sql_id 13 (high network)"); - assert!(ts2_kept.contains(&14), "Timestamp 2000 should keep sql_id 14 (high network)"); + assert!( + ts2_kept.contains(&12), + "Timestamp 2000 should keep sql_id 12 (high network)" + ); + assert!( + ts2_kept.contains(&13), + "Timestamp 2000 should keep sql_id 13 (high network)" + ); + assert!( + ts2_kept.contains(&14), + "Timestamp 2000 should keep sql_id 14 (high network)" + ); // Low both record (15) should be evicted - assert!(!ts2_kept.contains(&15), "Timestamp 2000 should NOT keep sql_id 15 (low both)"); - + assert!( + !ts2_kept.contains(&15), + "Timestamp 2000 should NOT keep sql_id 15 (low both)" + ); + // Verify kept records meet at least one threshold if let Some(records) = results_by_timestamp.get(×tamp2) { let cpu_threshold = 20u32; @@ -789,28 +861,47 @@ mod tests { ); } } - + if let Some((others_cpu, others_network)) = others_by_timestamp.get(×tamp2) { assert_eq!(*others_cpu, 20, "Timestamp 2000 others CPU should be 20"); - assert_eq!(*others_network, 60, "Timestamp 2000 others network should be 60 (30+30)"); + assert_eq!( + *others_network, 60, + "Timestamp 2000 others network should be 60 (30+30)" + ); } else { panic!("Timestamp 2000 should have others records"); } - + // Verify timestamp 3000: should keep all 2 records (2 <= top_n=3) let ts3_kept: Vec = results_by_timestamp .get(×tamp3) .map(|records| records.iter().map(|r| r.0).collect()) .unwrap_or_default(); - assert_eq!(ts3_kept.len(), 2, "Timestamp 3000 should keep all 2 records"); - assert!(ts3_kept.contains(&16), "Timestamp 3000 should keep sql_id 16"); - assert!(ts3_kept.contains(&17), "Timestamp 3000 should keep sql_id 17"); - + assert_eq!( + ts3_kept.len(), + 2, + "Timestamp 3000 should keep all 2 records" + ); + assert!( + ts3_kept.contains(&16), + "Timestamp 3000 should keep sql_id 16" + ); + assert!( + ts3_kept.contains(&17), + "Timestamp 3000 should keep sql_id 17" + ); + // Timestamp 3000 should not have others since all records are kept - assert!(!others_by_timestamp.contains_key(×tamp3), "Timestamp 3000 should not have others"); - + assert!( + !others_by_timestamp.contains_key(×tamp3), + "Timestamp 3000 should not have others" + ); + // Verify total counts - let total_kept: usize = results_by_timestamp.values().map(|records| records.len()).sum(); + let total_kept: usize = results_by_timestamp + .values() + .map(|records| records.len()) + .sum(); assert_eq!(total_kept, 14, "Total kept records should be 14 (6+6+2)"); } @@ -881,34 +972,35 @@ mod tests { assert_eq!(sum_old.stmt_duration_count, sum_new.stmt_duration_count); assert_eq!(sum_old.stmt_duration_sum_ns, sum_new.stmt_duration_sum_ns); assert_eq!(sum_old.stmt_network_in_bytes, sum_new.stmt_network_in_bytes); - assert_eq!(sum_old.stmt_network_out_bytes, sum_new.stmt_network_out_bytes); + assert_eq!( + sum_old.stmt_network_out_bytes, + sum_new.stmt_network_out_bytes + ); } #[test] fn test_parse_top_ru_records() { let top_ru_records = ReportTopRuRecords { - records: vec![ - TopRuRecord { - keyspace_name: b"test_keyspace".to_vec(), - user: "test_user".to_string(), - sql_digest: b"sql_digest_123".to_vec(), - plan_digest: b"plan_digest_456".to_vec(), - items: vec![ - TopRuRecordItem { - timestamp_sec: 1709646900, - total_ru: 100.5, - exec_count: 10, - exec_duration: 50000000, // 50ms in nanoseconds - }, - TopRuRecordItem { - timestamp_sec: 1709646960, - total_ru: 200.0, - exec_count: 20, - exec_duration: 100000000, // 100ms in nanoseconds - }, - ], - }, - ], + records: vec![TopRuRecord { + keyspace_name: b"test_keyspace".to_vec(), + user: "test_user".to_string(), + sql_digest: b"sql_digest_123".to_vec(), + plan_digest: b"plan_digest_456".to_vec(), + items: vec![ + TopRuRecordItem { + timestamp_sec: 1709646900, + total_ru: 100.5, + exec_count: 10, + exec_duration: 50000000, // 50ms in nanoseconds + }, + TopRuRecordItem { + timestamp_sec: 1709646960, + total_ru: 200.0, + exec_count: 20, + exec_duration: 100000000, // 100ms in nanoseconds + }, + ], + }], }; let events = TopSqlSubResponseParser::parse_top_ru_records(top_ru_records); @@ -917,25 +1009,106 @@ mod tests { // Check first event let event1 = &events[0]; let log1 = event1; - assert_eq!(log1.get(LABEL_SOURCE_TABLE), Some(&LogValue::from(SOURCE_TABLE_TOPRU))); - assert_eq!(log1.get(LABEL_TIMESTAMPS), Some(&LogValue::from(1709646900))); + assert_eq!( + log1.get(LABEL_SOURCE_TABLE), + Some(&LogValue::from(SOURCE_TABLE_TOPRU)) + ); + assert_eq!( + log1.get(LABEL_TIMESTAMPS), + Some(&LogValue::from(1709646900)) + ); assert_eq!(log1.get(LABEL_DATE), Some(&LogValue::from("2024-03-05"))); - assert_eq!(log1.get(LABEL_KEYSPACE), Some(&LogValue::from("test_keyspace"))); + assert_eq!( + log1.get(LABEL_KEYSPACE), + Some(&LogValue::from("test_keyspace")) + ); assert_eq!(log1.get(LABEL_USER), Some(&LogValue::from("test_user"))); - assert_eq!(log1.get(LABEL_SQL_DIGEST), Some(&LogValue::from("73716C5F6469676573745F313233"))); - assert_eq!(log1.get(LABEL_PLAN_DIGEST), Some(&LogValue::from("706C616E5F6469676573745F343536"))); + assert_eq!( + log1.get(LABEL_SQL_DIGEST), + Some(&LogValue::from("73716C5F6469676573745F313233")) + ); + assert_eq!( + log1.get(LABEL_PLAN_DIGEST), + Some(&LogValue::from("706C616E5F6469676573745F343536")) + ); assert_eq!(log1.get(METRIC_NAME_TOTAL_RU), Some(&LogValue::from(100.5))); assert_eq!(log1.get(METRIC_NAME_EXEC_COUNT), Some(&LogValue::from(10))); - assert_eq!(log1.get(METRIC_NAME_EXEC_DURATION), Some(&LogValue::from(50000000))); + assert_eq!( + log1.get(METRIC_NAME_EXEC_DURATION), + Some(&LogValue::from(50000000)) + ); // Check second event let event2 = &events[1]; let log2 = event2; - assert_eq!(log2.get(LABEL_SOURCE_TABLE), Some(&LogValue::from(SOURCE_TABLE_TOPRU))); - assert_eq!(log2.get(LABEL_TIMESTAMPS), Some(&LogValue::from(1709646960))); + assert_eq!( + log2.get(LABEL_SOURCE_TABLE), + Some(&LogValue::from(SOURCE_TABLE_TOPRU)) + ); + assert_eq!( + log2.get(LABEL_TIMESTAMPS), + Some(&LogValue::from(1709646960)) + ); assert_eq!(log2.get(LABEL_DATE), Some(&LogValue::from("2024-03-05"))); assert_eq!(log2.get(METRIC_NAME_TOTAL_RU), Some(&LogValue::from(200.0))); assert_eq!(log2.get(METRIC_NAME_EXEC_COUNT), Some(&LogValue::from(20))); - assert_eq!(log2.get(METRIC_NAME_EXEC_DURATION), Some(&LogValue::from(100000000))); + assert_eq!( + log2.get(METRIC_NAME_EXEC_DURATION), + Some(&LogValue::from(100000000)) + ); + } + + #[test] + fn test_parse_tidb_sql_meta_preserves_keyspace() { + let sql_meta = SqlMeta { + sql_digest: b"sql_digest_123".to_vec(), + normalized_sql: "select * from t".to_string(), + is_internal_sql: false, + keyspace_name: b"test_keyspace".to_vec(), + }; + + let events = TopSqlSubResponseParser::parse_tidb_sql_meta(sql_meta); + assert_eq!(events.len(), 1); + + let log = &events[0]; + assert_eq!( + log.get(LABEL_SOURCE_TABLE), + Some(&LogValue::from(SOURCE_TABLE_TOPSQL_SQL_META)) + ); + assert_eq!( + log.get(LABEL_KEYSPACE), + Some(&LogValue::from("test_keyspace")) + ); + assert_eq!( + log.get(LABEL_SQL_DIGEST), + Some(&LogValue::from("73716C5F6469676573745F313233")) + ); + } + + #[test] + fn test_parse_tidb_plan_meta_preserves_keyspace() { + let plan_meta = PlanMeta { + plan_digest: b"plan_digest_456".to_vec(), + normalized_plan: "Point_Get".to_string(), + encoded_normalized_plan: "plan_binary".to_string(), + keyspace_name: b"test_keyspace".to_vec(), + }; + + let events = TopSqlSubResponseParser::parse_tidb_plan_meta(plan_meta); + assert_eq!(events.len(), 1); + + let log = &events[0]; + assert_eq!( + log.get(LABEL_SOURCE_TABLE), + Some(&LogValue::from(SOURCE_TABLE_TOPSQL_PLAN_META)) + ); + assert_eq!( + log.get(LABEL_KEYSPACE), + Some(&LogValue::from("test_keyspace")) + ); + assert_eq!( + log.get(LABEL_PLAN_DIGEST), + Some(&LogValue::from("706C616E5F6469676573745F343536")) + ); } } diff --git a/src/sources/topsql_v2/upstream/tidb/proto.rs b/src/sources/topsql_v2/upstream/tidb/proto.rs index 345584b3..66695636 100644 --- a/src/sources/topsql_v2/upstream/tidb/proto.rs +++ b/src/sources/topsql_v2/upstream/tidb/proto.rs @@ -19,14 +19,17 @@ impl ByteSizeOf for RespOneof { record.items.size_of() + record.sql_digest.len() + record.plan_digest.len() } RespOneof::SqlMeta(sql_meta) => { - sql_meta.sql_digest.len() + sql_meta.normalized_sql.len() + sql_meta.sql_digest.len() + + sql_meta.normalized_sql.len() + + sql_meta.keyspace_name.len() } RespOneof::PlanMeta(plan_meta) => { - plan_meta.plan_digest.len() + plan_meta.normalized_plan.len() - } - RespOneof::TopRuRecords(top_ru_records) => { - top_ru_records.records.size_of() + plan_meta.plan_digest.len() + + plan_meta.normalized_plan.len() + + plan_meta.encoded_normalized_plan.len() + + plan_meta.keyspace_name.len() } + RespOneof::TopRuRecords(top_ru_records) => top_ru_records.records.size_of(), } } } @@ -39,11 +42,11 @@ impl ByteSizeOf for TopSqlRecordItem { impl ByteSizeOf for TopRuRecord { fn allocated_bytes(&self) -> usize { - self.keyspace_name.len() + - self.user.len() + - self.sql_digest.len() + - self.plan_digest.len() + - self.items.size_of() + self.keyspace_name.len() + + self.user.len() + + self.sql_digest.len() + + self.plan_digest.len() + + self.items.size_of() } } From eab2a834aaee8be83fba62799dd34b42f13b9e23 Mon Sep 17 00:00:00 2001 From: zeminzhou Date: Fri, 20 Mar 2026 13:09:07 +0800 Subject: [PATCH 08/11] topsql: route deltalake sinks by pd keyspace --- src/common/keyspace_cluster.rs | 319 +++++++++++++++++++ src/common/meta_store.rs | 186 ----------- src/common/mod.rs | 2 +- src/sinks/topsql_data_deltalake/mod.rs | 49 ++- src/sinks/topsql_data_deltalake/processor.rs | 86 +++-- src/sinks/topsql_meta_deltalake/mod.rs | 49 ++- src/sinks/topsql_meta_deltalake/processor.rs | 89 ++++-- 7 files changed, 499 insertions(+), 281 deletions(-) create mode 100644 src/common/keyspace_cluster.rs delete mode 100644 src/common/meta_store.rs diff --git a/src/common/keyspace_cluster.rs b/src/common/keyspace_cluster.rs new file mode 100644 index 00000000..7aeee19d --- /dev/null +++ b/src/common/keyspace_cluster.rs @@ -0,0 +1,319 @@ +use std::collections::HashMap; +use std::fs; +use std::sync::Arc; +use std::time::Duration; + +use reqwest::{Certificate, Client, Identity, StatusCode}; +use serde::Deserialize; +use tokio::sync::Mutex; +use url::form_urlencoded::byte_serialize; +use vector_lib::tls::TlsConfig; + +type BoxError = Box; + +const REQUEST_TIMEOUT: Duration = Duration::from_secs(10); +const CONNECT_TIMEOUT: Duration = Duration::from_secs(3); + +const ORG_ID_KEYS: &[&str] = &["tenant_id", "TenantID", "org_id", "organization_id"]; +const CLUSTER_ID_KEYS: &[&str] = &["cluster_id", "ClusterId", "tidb_cluster_id"]; + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct KeyspaceRoute { + pub org_id: String, + pub cluster_id: String, +} + +#[derive(Clone)] +pub struct PdKeyspaceResolver { + base_url: String, + client: Client, + cache: Arc>>, +} + +#[derive(Debug, Deserialize)] +struct PdKeyspaceMetadata { + config: Option>, +} + +impl PdKeyspaceResolver { + pub fn new(pd_address: impl Into, pd_tls: Option) -> Result { + let client = build_http_client(pd_tls.as_ref())?; + Ok(Self::new_with_client(pd_address, pd_tls.as_ref(), client)) + } + + pub fn new_with_client( + pd_address: impl Into, + pd_tls: Option<&TlsConfig>, + client: Client, + ) -> Self { + Self { + base_url: normalize_pd_address(&pd_address.into(), pd_tls.is_some()), + client, + cache: Arc::new(Mutex::new(HashMap::new())), + } + } + + pub async fn resolve_keyspace( + &self, + keyspace_name: &str, + ) -> Result, BoxError> { + if keyspace_name.is_empty() { + return Ok(None); + } + + if let Some(cached) = self.cache.lock().await.get(keyspace_name).cloned() { + return Ok(Some(cached)); + } + + let encoded_keyspace = byte_serialize(keyspace_name.as_bytes()).collect::(); + let response = self + .client + .get(format!( + "{}/pd/api/v2/keyspaces/{}", + self.base_url, encoded_keyspace + )) + .send() + .await?; + + match response.status() { + StatusCode::NOT_FOUND => return Ok(None), + status if !status.is_success() => { + let body = response.text().await.unwrap_or_default(); + if is_not_found_body(&body) { + return Ok(None); + } + return Err(format!( + "pd keyspace lookup failed for {} with status {}: {}", + keyspace_name, status, body + ) + .into()); + } + _ => {} + } + + let metadata: PdKeyspaceMetadata = response.json().await?; + let route = metadata.config.as_ref().and_then(extract_route_from_config); + + if let Some(route) = route.clone() { + self.cache + .lock() + .await + .insert(keyspace_name.to_string(), route); + } + + Ok(route) + } +} + +fn build_http_client(pd_tls: Option<&TlsConfig>) -> Result { + let mut builder = Client::builder() + .timeout(REQUEST_TIMEOUT) + .connect_timeout(CONNECT_TIMEOUT); + + if let Some(tls) = pd_tls { + builder = builder + .danger_accept_invalid_certs(!tls.verify_certificate.unwrap_or(true)) + .danger_accept_invalid_hostnames(!tls.verify_hostname.unwrap_or(true)); + + if let Some(ca_file) = tls.ca_file.as_ref() { + let ca = fs::read(ca_file)?; + builder = builder.add_root_certificate(Certificate::from_pem(&ca)?); + } + + match (tls.crt_file.as_ref(), tls.key_file.as_ref()) { + (Some(crt_file), Some(key_file)) => { + let crt = fs::read(crt_file)?; + let key = fs::read(key_file)?; + builder = builder.identity(Identity::from_pkcs8_pem(&crt, &key)?); + } + (None, None) => {} + _ => { + return Err( + "pd_tls.crt_file and pd_tls.key_file must both be set when client TLS is enabled" + .into(), + ); + } + } + } + + Ok(builder.build()?) +} + +fn normalize_pd_address(pd_address: &str, use_tls: bool) -> String { + let trimmed = pd_address.trim().trim_end_matches('/'); + if trimmed.starts_with("http://") || trimmed.starts_with("https://") { + trimmed.to_string() + } else if use_tls { + format!("https://{}", trimmed) + } else { + format!("http://{}", trimmed) + } +} + +fn is_not_found_body(body: &str) -> bool { + let lower = body.to_ascii_lowercase(); + lower.contains("not found") +} + +fn extract_route_from_config(config: &HashMap) -> Option { + let org_id = find_config_value(config, ORG_ID_KEYS)?; + let cluster_id = find_config_value(config, CLUSTER_ID_KEYS)?; + + if org_id.is_empty() || cluster_id.is_empty() { + return None; + } + + Some(KeyspaceRoute { + org_id: org_id.to_string(), + cluster_id: cluster_id.to_string(), + }) +} + +fn find_config_value<'a>(config: &'a HashMap, keys: &[&str]) -> Option<&'a str> { + keys.iter() + .find_map(|key| config.get(*key)) + .map(String::as_str) + .filter(|value| !value.is_empty()) +} + +#[cfg(test)] +mod tests { + use std::convert::Infallible; + use std::net::TcpListener; + use std::sync::atomic::{AtomicUsize, Ordering}; + use std::sync::Arc; + + use hyper::service::{make_service_fn, service_fn}; + use hyper::{Body, Request, Response, Server, StatusCode as HyperStatusCode}; + + use super::*; + + #[test] + fn normalize_pd_address_adds_expected_scheme() { + assert_eq!(normalize_pd_address("pd:2379/", false), "http://pd:2379"); + assert_eq!(normalize_pd_address("pd:2379/", true), "https://pd:2379"); + assert_eq!( + normalize_pd_address("https://pd:2379", false), + "https://pd:2379" + ); + } + + #[test] + fn extract_route_from_config_supports_expected_aliases() { + let mut config = HashMap::new(); + config.insert("tenant_id".to_string(), "30018".to_string()); + config.insert( + "tidb_cluster_id".to_string(), + "10762701230946915645".to_string(), + ); + + assert_eq!( + extract_route_from_config(&config), + Some(KeyspaceRoute { + org_id: "30018".to_string(), + cluster_id: "10762701230946915645".to_string(), + }) + ); + } + + #[tokio::test] + async fn resolve_keyspace_uses_pd_keyspace_api_and_caches_result() { + let request_count = Arc::new(AtomicUsize::new(0)); + let counter = Arc::clone(&request_count); + + let listener = TcpListener::bind("127.0.0.1:0").unwrap(); + let address = listener.local_addr().unwrap(); + let server = Server::from_tcp(listener) + .unwrap() + .serve(make_service_fn(move |_| { + let counter = Arc::clone(&counter); + async move { + Ok::<_, Infallible>(service_fn(move |request: Request| { + let counter = Arc::clone(&counter); + async move { + counter.fetch_add(1, Ordering::SeqCst); + assert_eq!(request.uri().path(), "/pd/api/v2/keyspaces/test_keyspace"); + Ok::<_, Infallible>(Response::new(Body::from( + r#"{"config":{"tenant_id":"30018","cluster_id":"10762701230946915645"}}"#, + ))) + } + })) + } + })); + let server_handle = tokio::spawn(server); + + let client = Client::builder().no_proxy().build().unwrap(); + let resolver = + PdKeyspaceResolver::new_with_client(format!("http://{}", address), None, client); + + let first = resolver.resolve_keyspace("test_keyspace").await.unwrap(); + let second = resolver.resolve_keyspace("test_keyspace").await.unwrap(); + + assert_eq!( + first, + Some(KeyspaceRoute { + org_id: "30018".to_string(), + cluster_id: "10762701230946915645".to_string(), + }) + ); + assert_eq!(second, first); + assert_eq!(request_count.load(Ordering::SeqCst), 1); + + server_handle.abort(); + } + + #[tokio::test] + async fn resolve_keyspace_returns_none_for_missing_route() { + let listener = TcpListener::bind("127.0.0.1:0").unwrap(); + let address = listener.local_addr().unwrap(); + let server = + Server::from_tcp(listener) + .unwrap() + .serve(make_service_fn(move |_| async move { + Ok::<_, Infallible>(service_fn(move |_request: Request| async move { + Ok::<_, Infallible>(Response::new(Body::from( + r#"{"config":{"tenant_id":"30018"}}"#, + ))) + })) + })); + let server_handle = tokio::spawn(server); + + let client = Client::builder().no_proxy().build().unwrap(); + let resolver = + PdKeyspaceResolver::new_with_client(format!("http://{}", address), None, client); + let route = resolver.resolve_keyspace("test_keyspace").await.unwrap(); + + assert_eq!(route, None); + + server_handle.abort(); + } + + #[tokio::test] + async fn resolve_keyspace_treats_not_found_error_body_as_empty_result() { + let listener = TcpListener::bind("127.0.0.1:0").unwrap(); + let address = listener.local_addr().unwrap(); + let server = + Server::from_tcp(listener) + .unwrap() + .serve(make_service_fn(move |_| async move { + Ok::<_, Infallible>(service_fn(move |_request: Request| async move { + Ok::<_, Infallible>( + Response::builder() + .status(HyperStatusCode::INTERNAL_SERVER_ERROR) + .body(Body::from("keyspace not found")) + .unwrap(), + ) + })) + })); + let server_handle = tokio::spawn(server); + + let client = Client::builder().no_proxy().build().unwrap(); + let resolver = + PdKeyspaceResolver::new_with_client(format!("http://{}", address), None, client); + let route = resolver.resolve_keyspace("missing_keyspace").await.unwrap(); + + assert_eq!(route, None); + + server_handle.abort(); + } +} diff --git a/src/common/meta_store.rs b/src/common/meta_store.rs deleted file mode 100644 index 3b8a7dc3..00000000 --- a/src/common/meta_store.rs +++ /dev/null @@ -1,186 +0,0 @@ -use std::collections::HashMap; -use std::sync::Arc; -use std::time::Duration; - -use reqwest::{Client, StatusCode}; -use serde::Deserialize; -use tokio::sync::Mutex; - -type BoxError = Box; - -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct KeyspaceRoute { - pub org_id: String, - pub cluster_id: String, -} - -#[derive(Clone)] -pub struct MetaStoreResolver { - base_url: String, - client: Client, - cache: Arc>>, -} - -#[derive(Debug, Deserialize)] -struct MetaStoreKeyspaceMetadata { - #[serde(rename = "ClusterId", alias = "cluster_id")] - cluster_id: String, - #[serde(rename = "TenantID", alias = "tenant_id")] - tenant_id: String, -} - -impl MetaStoreResolver { - pub fn new(meta_store_addr: impl Into) -> Result { - let client = Client::builder() - .timeout(Duration::from_secs(10)) - .connect_timeout(Duration::from_secs(3)) - .build()?; - Ok(Self::new_with_client(meta_store_addr, client)) - } - - pub fn new_with_client(meta_store_addr: impl Into, client: Client) -> Self { - Self { - base_url: normalize_meta_store_addr(&meta_store_addr.into()), - client, - cache: Arc::new(Mutex::new(HashMap::new())), - } - } - - pub async fn resolve_keyspace( - &self, - keyspace_name: &str, - ) -> Result, BoxError> { - if keyspace_name.is_empty() { - return Ok(None); - } - - if let Some(cached) = self.cache.lock().await.get(keyspace_name).cloned() { - return Ok(Some(cached)); - } - - let response = self - .client - .get(format!("{}/api/v2/meta", self.base_url)) - .query(&[("keyspace_name", keyspace_name)]) - .send() - .await?; - - match response.status() { - StatusCode::NOT_FOUND => return Ok(None), - status if !status.is_success() => { - return Err(format!( - "meta-store lookup failed for keyspace {} with status {}", - keyspace_name, status - ) - .into()); - } - _ => {} - } - - let metadata: Vec = response.json().await?; - let Some(first) = metadata.into_iter().next() else { - return Ok(None); - }; - - if first.cluster_id.is_empty() || first.tenant_id.is_empty() { - return Ok(None); - } - - let route = KeyspaceRoute { - org_id: first.tenant_id, - cluster_id: first.cluster_id, - }; - self.cache - .lock() - .await - .insert(keyspace_name.to_string(), route.clone()); - - Ok(Some(route)) - } -} - -fn normalize_meta_store_addr(meta_store_addr: &str) -> String { - let trimmed = meta_store_addr.trim().trim_end_matches('/'); - if trimmed.starts_with("http://") || trimmed.starts_with("https://") { - trimmed.to_string() - } else { - format!("http://{}", trimmed) - } -} - -#[cfg(test)] -mod tests { - use std::convert::Infallible; - use std::net::TcpListener; - use std::sync::atomic::{AtomicUsize, Ordering}; - use std::sync::Arc; - - use hyper::service::{make_service_fn, service_fn}; - use hyper::{Body, Request, Response, Server}; - - use super::*; - - #[test] - fn normalize_meta_store_addr_adds_scheme() { - assert_eq!( - normalize_meta_store_addr("meta-store:9088/"), - "http://meta-store:9088" - ); - assert_eq!( - normalize_meta_store_addr("https://meta-store:9088"), - "https://meta-store:9088" - ); - } - - #[tokio::test] - async fn resolve_keyspace_uses_tenant_as_org_id_and_caches_result() { - let request_count = Arc::new(AtomicUsize::new(0)); - let counter = Arc::clone(&request_count); - - let listener = TcpListener::bind("127.0.0.1:0").unwrap(); - let address = listener.local_addr().unwrap(); - let server = Server::from_tcp(listener) - .unwrap() - .serve(make_service_fn(move |_| { - let counter = Arc::clone(&counter); - async move { - Ok::<_, Infallible>(service_fn(move |request: Request| { - let counter = Arc::clone(&counter); - async move { - counter.fetch_add(1, Ordering::SeqCst); - assert_eq!(request.uri().path(), "/api/v2/meta"); - assert!( - request - .uri() - .query() - .unwrap_or_default() - .contains("keyspace_name=test_keyspace"), - "query should contain keyspace_name=test_keyspace" - ); - Ok::<_, Infallible>(Response::new(Body::from( - r#"[{"ClusterId":"10110362358366286743","TenantID":"1369847559692509642"}]"#, - ))) - } - })) - } - })); - let server_handle = tokio::spawn(server); - - let resolver = MetaStoreResolver::new(format!("http://{}", address)).unwrap(); - - let first = resolver.resolve_keyspace("test_keyspace").await.unwrap(); - let second = resolver.resolve_keyspace("test_keyspace").await.unwrap(); - - assert_eq!( - first, - Some(KeyspaceRoute { - org_id: "1369847559692509642".to_string(), - cluster_id: "10110362358366286743".to_string(), - }) - ); - assert_eq!(second, first); - assert_eq!(request_count.load(Ordering::SeqCst), 1); - - server_handle.abort(); - } -} diff --git a/src/common/mod.rs b/src/common/mod.rs index 9c85f579..7397729b 100644 --- a/src/common/mod.rs +++ b/src/common/mod.rs @@ -2,5 +2,5 @@ pub mod checkpointer; pub mod deltalake_s3; pub mod deltalake_writer; pub mod features; -pub mod meta_store; +pub mod keyspace_cluster; pub mod topology; diff --git a/src/sinks/topsql_data_deltalake/mod.rs b/src/sinks/topsql_data_deltalake/mod.rs index 02632354..e18be0bc 100644 --- a/src/sinks/topsql_data_deltalake/mod.rs +++ b/src/sinks/topsql_data_deltalake/mod.rs @@ -27,7 +27,11 @@ mod processor; // Import default functions from common module use crate::common::deltalake_s3; use crate::common::deltalake_writer::{default_batch_size, default_timeout_secs}; -use crate::common::meta_store::MetaStoreResolver; +use crate::common::keyspace_cluster::PdKeyspaceResolver; + +pub const fn default_enable_keyspace_cluster_mapping() -> bool { + false +} pub const fn default_max_delay_secs() -> u64 { 180 @@ -56,8 +60,15 @@ pub struct DeltaLakeConfig { #[serde(default = "default_max_delay_secs")] pub max_delay_secs: u64, - /// Meta-store address used to resolve keyspace to org/cluster path segments - pub meta_store_addr: Option, + /// Whether to resolve keyspace to org/cluster path segments through PD. + #[serde(default = "default_enable_keyspace_cluster_mapping")] + pub enable_keyspace_cluster_mapping: bool, + + /// PD address used to resolve keyspace to org/cluster path segments. + pub pd_address: Option, + + /// TLS configuration for PD keyspace lookup. + pub pd_tls: Option, /// Storage options for cloud storage pub storage_options: Option>, @@ -104,7 +115,9 @@ impl GenerateConfig for DeltaLakeConfig { batch_size: default_batch_size(), timeout_secs: default_timeout_secs(), max_delay_secs: default_max_delay_secs(), - meta_store_addr: None, + enable_keyspace_cluster_mapping: default_enable_keyspace_cluster_mapping(), + pd_address: None, + pd_tls: None, storage_options: None, bucket: None, options: None, @@ -224,17 +237,23 @@ impl DeltaLakeConfig { info!("No S3 service available - using default storage options only"); } - let meta_store_resolver = self - .meta_store_addr - .as_deref() - .map(MetaStoreResolver::new) - .transpose() - .map_err(|error| { - vector::Error::from(format!( - "failed to build meta-store resolver from meta_store_addr: {}", - error - )) + let keyspace_route_resolver = if self.enable_keyspace_cluster_mapping { + let pd_address = self.pd_address.as_deref().ok_or_else(|| { + vector::Error::from( + "pd_address is required when enable_keyspace_cluster_mapping is true", + ) })?; + Some( + PdKeyspaceResolver::new(pd_address, self.pd_tls.clone()).map_err(|error| { + vector::Error::from(format!( + "failed to build PD keyspace resolver from pd_address: {}", + error + )) + })?, + ) + } else { + None + }; let sink = TopSQLDeltaLakeSink::new( base_path, @@ -242,7 +261,7 @@ impl DeltaLakeConfig { write_config, self.max_delay_secs, Some(storage_options), - meta_store_resolver, + keyspace_route_resolver, ); Ok(VectorSink::from_event_streamsink(sink)) diff --git a/src/sinks/topsql_data_deltalake/processor.rs b/src/sinks/topsql_data_deltalake/processor.rs index ecba22c4..77314e38 100644 --- a/src/sinks/topsql_data_deltalake/processor.rs +++ b/src/sinks/topsql_data_deltalake/processor.rs @@ -1,6 +1,7 @@ use std::collections::HashMap; use std::path::PathBuf; use std::sync::Arc; +use std::time::Duration; use futures::{stream::BoxStream, StreamExt}; use tokio::sync::mpsc; @@ -9,7 +10,7 @@ use vector_lib::event::{Event, LogEvent}; use vector_lib::sink::StreamSink; use crate::common::deltalake_writer::{DeltaLakeWriter, DeltaTableConfig, WriteConfig}; -use crate::common::meta_store::{KeyspaceRoute, MetaStoreResolver}; +use crate::common::keyspace_cluster::{KeyspaceRoute, PdKeyspaceResolver}; use crate::sources::topsql_v2::upstream::consts::{ LABEL_DATE, LABEL_DB_NAME, LABEL_INSTANCE_KEY, LABEL_KEYSPACE, LABEL_PLAN_DIGEST, LABEL_REGION_ID, LABEL_SOURCE_TABLE, LABEL_SQL_DIGEST, LABEL_TABLE_ID, LABEL_TABLE_NAME, @@ -247,6 +248,8 @@ lazy_static! { }; } +const ROUTE_RESOLUTION_RETRY_DELAY: Duration = Duration::from_secs(5); + /// Delta Lake sink processor pub struct TopSQLDeltaLakeSink { base_path: PathBuf, @@ -254,7 +257,7 @@ pub struct TopSQLDeltaLakeSink { write_config: WriteConfig, max_delay_secs: u64, storage_options: Option>, - meta_store_resolver: Option, + keyspace_route_resolver: Option, writers: Arc>>, tx: Arc>>>, } @@ -273,7 +276,7 @@ impl TopSQLDeltaLakeSink { write_config: WriteConfig, max_delay_secs: u64, storage_options: Option>, - meta_store_resolver: Option, + keyspace_route_resolver: Option, ) -> Self { // Create a channel with capacity 1 let (tx, rx) = mpsc::channel(1); @@ -286,7 +289,7 @@ impl TopSQLDeltaLakeSink { write_config, max_delay_secs, storage_options, - meta_store_resolver, + keyspace_route_resolver, writers: Arc::new(Mutex::new(HashMap::new())), tx: Arc::clone(&tx), }); @@ -315,7 +318,7 @@ impl TopSQLDeltaLakeSink { write_config: inner_ref.write_config.clone(), max_delay_secs: inner_ref.max_delay_secs, storage_options: inner_ref.storage_options.clone(), - meta_store_resolver: inner_ref.meta_store_resolver.clone(), + keyspace_route_resolver: inner_ref.keyspace_route_resolver.clone(), writers: Arc::clone(&inner_ref.writers), tx: Arc::clone(&inner_ref.tx), }; @@ -335,7 +338,7 @@ impl TopSQLDeltaLakeSink { write_config: WriteConfig, max_delay_secs: u64, storage_options: Option>, - meta_store_resolver: Option, + keyspace_route_resolver: Option, ) -> (Self, mpsc::Receiver>>) { // Create a channel with capacity 1 let (tx, rx): ( @@ -351,7 +354,7 @@ impl TopSQLDeltaLakeSink { write_config, max_delay_secs, storage_options, - meta_store_resolver, + keyspace_route_resolver, writers: Arc::new(Mutex::new(HashMap::new())), tx, }; @@ -363,8 +366,22 @@ impl TopSQLDeltaLakeSink { /// Process events from channel and write to Delta Lake async fn process_events_loop(&self, mut rx: mpsc::Receiver>>) { while let Some(events_vec) = rx.recv().await { - if let Err(e) = self.process_events(events_vec).await { - error!("Failed to process events: {}", e); + let retry_on_failure = self.keyspace_route_resolver.is_some(); + let mut pending_events = events_vec; + + loop { + let retry_snapshot = retry_on_failure.then(|| pending_events.clone()); + match self.process_events(pending_events).await { + Ok(()) => break, + Err(error) => { + error!("Failed to process events: {}", error); + let Some(events) = retry_snapshot else { + break; + }; + tokio::time::sleep(ROUTE_RESOLUTION_RETRY_DELAY).await; + pending_events = events; + } + } } } } @@ -384,7 +401,7 @@ impl TopSQLDeltaLakeSink { if let Event::Log(log_event) = event { if let Some(writer_key) = self .resolve_writer_key(&log_event, &mut resolved_routes) - .await + .await? { table_events .entry(writer_key) @@ -455,43 +472,50 @@ impl TopSQLDeltaLakeSink { &self, log_event: &LogEvent, resolved_routes: &mut HashMap>, - ) -> Option { - let table_name = Self::extract_table_name(log_event)?; + ) -> Result, Box> { + let Some(table_name) = Self::extract_table_name(log_event) else { + return Ok(None); + }; let route = self .resolve_keyspace_route(log_event, resolved_routes) - .await; - Some(WriterKey { + .await?; + if self.keyspace_route_resolver.is_some() && route.is_none() { + return Ok(None); + } + Ok(Some(WriterKey { table_name: table_name.clone(), table_path: self.build_table_path(&table_name, route.as_ref()), - }) + })) } async fn resolve_keyspace_route( &self, log_event: &LogEvent, resolved_routes: &mut HashMap>, - ) -> Option { - let resolver = self.meta_store_resolver.as_ref()?; - let keyspace = log_event + ) -> Result, Box> { + let Some(resolver) = self.keyspace_route_resolver.as_ref() else { + return Ok(None); + }; + let Some(keyspace) = log_event .get(LABEL_KEYSPACE) - .and_then(|value| value.as_str())?; + .and_then(|value| value.as_str()) + else { + return Ok(None); + }; if let Some(route) = resolved_routes.get(keyspace.as_ref()) { - return route.clone(); + return Ok(route.clone()); } - let route = match resolver.resolve_keyspace(keyspace.as_ref()).await { - Ok(route) => route, - Err(error) => { - warn!( - "Failed to resolve keyspace {} from meta-store, falling back to base_path: {}", - keyspace, error - ); - None - } - }; + let route = resolver.resolve_keyspace(keyspace.as_ref()).await?; resolved_routes.insert(keyspace.to_string(), route.clone()); - route + if route.is_none() { + warn!( + "No cluster route found for keyspace {}, skipping TopSQL data event", + keyspace + ); + } + Ok(route) } fn build_table_path(&self, table_name: &str, route: Option<&KeyspaceRoute>) -> PathBuf { diff --git a/src/sinks/topsql_meta_deltalake/mod.rs b/src/sinks/topsql_meta_deltalake/mod.rs index dd8ee970..6292f71f 100644 --- a/src/sinks/topsql_meta_deltalake/mod.rs +++ b/src/sinks/topsql_meta_deltalake/mod.rs @@ -27,7 +27,11 @@ mod processor; // Import default functions from common module use crate::common::deltalake_s3; use crate::common::deltalake_writer::{default_batch_size, default_timeout_secs}; -use crate::common::meta_store::MetaStoreResolver; +use crate::common::keyspace_cluster::PdKeyspaceResolver; + +pub const fn default_enable_keyspace_cluster_mapping() -> bool { + false +} pub const fn default_max_delay_secs() -> u64 { 180 @@ -64,8 +68,15 @@ pub struct DeltaLakeConfig { #[serde(default = "default_meta_cache_capacity")] pub meta_cache_capacity: usize, - /// Meta-store address used to resolve keyspace to org/cluster path segments - pub meta_store_addr: Option, + /// Whether to resolve keyspace to org/cluster path segments through PD. + #[serde(default = "default_enable_keyspace_cluster_mapping")] + pub enable_keyspace_cluster_mapping: bool, + + /// PD address used to resolve keyspace to org/cluster path segments. + pub pd_address: Option, + + /// TLS configuration for PD keyspace lookup. + pub pd_tls: Option, /// Storage options for cloud storage pub storage_options: Option>, @@ -113,7 +124,9 @@ impl GenerateConfig for DeltaLakeConfig { timeout_secs: default_timeout_secs(), max_delay_secs: default_max_delay_secs(), meta_cache_capacity: default_meta_cache_capacity(), - meta_store_addr: None, + enable_keyspace_cluster_mapping: default_enable_keyspace_cluster_mapping(), + pd_address: None, + pd_tls: None, storage_options: None, bucket: None, options: None, @@ -233,17 +246,23 @@ impl DeltaLakeConfig { info!("No S3 service available - using default storage options only"); } - let meta_store_resolver = self - .meta_store_addr - .as_deref() - .map(MetaStoreResolver::new) - .transpose() - .map_err(|error| { - vector::Error::from(format!( - "failed to build meta-store resolver from meta_store_addr: {}", - error - )) + let keyspace_route_resolver = if self.enable_keyspace_cluster_mapping { + let pd_address = self.pd_address.as_deref().ok_or_else(|| { + vector::Error::from( + "pd_address is required when enable_keyspace_cluster_mapping is true", + ) })?; + Some( + PdKeyspaceResolver::new(pd_address, self.pd_tls.clone()).map_err(|error| { + vector::Error::from(format!( + "failed to build PD keyspace resolver from pd_address: {}", + error + )) + })?, + ) + } else { + None + }; let sink = TopSQLDeltaLakeSink::new( base_path, @@ -251,7 +270,7 @@ impl DeltaLakeConfig { write_config, self.max_delay_secs, Some(storage_options), - meta_store_resolver, + keyspace_route_resolver, self.meta_cache_capacity, ); diff --git a/src/sinks/topsql_meta_deltalake/processor.rs b/src/sinks/topsql_meta_deltalake/processor.rs index 10bc1ea7..79209d9d 100644 --- a/src/sinks/topsql_meta_deltalake/processor.rs +++ b/src/sinks/topsql_meta_deltalake/processor.rs @@ -12,7 +12,7 @@ use vector_lib::event::Event; use vector_lib::sink::StreamSink; use crate::common::deltalake_writer::{DeltaLakeWriter, DeltaTableConfig, WriteConfig}; -use crate::common::meta_store::{KeyspaceRoute, MetaStoreResolver}; +use crate::common::keyspace_cluster::{KeyspaceRoute, PdKeyspaceResolver}; use crate::sources::topsql_v2::upstream::consts::{ LABEL_DATE, LABEL_ENCODED_NORMALIZED_PLAN, LABEL_KEYSPACE, LABEL_NORMALIZED_PLAN, LABEL_NORMALIZED_SQL, LABEL_PLAN_DIGEST, LABEL_SOURCE_TABLE, LABEL_SQL_DIGEST, @@ -106,6 +106,7 @@ lazy_static! { /// When buffer size exceeds this value, events will be flushed const EVENT_BUFFER_MAX_SIZE: usize = 1000; +const ROUTE_RESOLUTION_RETRY_DELAY: Duration = Duration::from_secs(5); #[derive(Clone, Debug)] struct BufferedEvent { @@ -126,7 +127,7 @@ pub struct TopSQLDeltaLakeSink { write_config: WriteConfig, max_delay_secs: u64, storage_options: Option>, - meta_store_resolver: Option, + keyspace_route_resolver: Option, writers: Arc>>, tx: Arc>>>, // LRU cache for SQL meta deduplication: key -> () @@ -147,7 +148,7 @@ impl TopSQLDeltaLakeSink { write_config: WriteConfig, max_delay_secs: u64, storage_options: Option>, - meta_store_resolver: Option, + keyspace_route_resolver: Option, meta_cache_capacity: usize, ) -> Self { // Create a channel with capacity 1 @@ -161,7 +162,7 @@ impl TopSQLDeltaLakeSink { write_config, max_delay_secs, storage_options, - meta_store_resolver, + keyspace_route_resolver, writers: Arc::new(Mutex::new(HashMap::new())), tx: Arc::clone(&tx), seen_keys_sql_meta: Arc::new(Mutex::new(LruCache::new( @@ -198,7 +199,7 @@ impl TopSQLDeltaLakeSink { write_config: inner_ref.write_config.clone(), max_delay_secs: inner_ref.max_delay_secs, storage_options: inner_ref.storage_options.clone(), - meta_store_resolver: inner_ref.meta_store_resolver.clone(), + keyspace_route_resolver: inner_ref.keyspace_route_resolver.clone(), writers: Arc::clone(&inner_ref.writers), tx: Arc::clone(&inner_ref.tx), seen_keys_sql_meta: Arc::clone(&inner_ref.seen_keys_sql_meta), @@ -222,7 +223,7 @@ impl TopSQLDeltaLakeSink { write_config: WriteConfig, max_delay_secs: u64, storage_options: Option>, - meta_store_resolver: Option, + keyspace_route_resolver: Option, meta_cache_capacity: usize, ) -> (Self, mpsc::Receiver>>) { // Create a channel with capacity 1 @@ -239,7 +240,7 @@ impl TopSQLDeltaLakeSink { write_config, max_delay_secs, storage_options, - meta_store_resolver, + keyspace_route_resolver, writers: Arc::new(Mutex::new(HashMap::new())), tx, seen_keys_sql_meta: Arc::new(Mutex::new(LruCache::new( @@ -259,8 +260,22 @@ impl TopSQLDeltaLakeSink { /// Process events from channel and write to Delta Lake async fn process_events_loop(&self, mut rx: mpsc::Receiver>>) { while let Some(events_vec) = rx.recv().await { - if let Err(e) = self.process_events(events_vec).await { - error!("Failed to process events: {}", e); + let retry_on_failure = self.keyspace_route_resolver.is_some(); + let mut pending_events = events_vec; + + loop { + let retry_snapshot = retry_on_failure.then(|| pending_events.clone()); + match self.process_events(pending_events).await { + Ok(()) => break, + Err(error) => { + error!("Failed to process events: {}", error); + let Some(events) = retry_snapshot else { + break; + }; + tokio::time::sleep(ROUTE_RESOLUTION_RETRY_DELAY).await; + pending_events = events; + } + } } } } @@ -292,47 +307,55 @@ impl TopSQLDeltaLakeSink { &self, log_event: &vector_lib::event::LogEvent, resolved_routes: &mut HashMap>, - ) -> Option { - let table_name = log_event + ) -> Result, Box> { + let Some(table_name) = log_event .get(LABEL_SOURCE_TABLE) - .and_then(|value| value.as_str())? - .to_string(); + .and_then(|value| value.as_str()) + .map(|value| value.to_string()) + else { + return Ok(None); + }; let route = self .resolve_keyspace_route(log_event, resolved_routes) - .await; - Some(WriterKey { + .await?; + if self.keyspace_route_resolver.is_some() && route.is_none() { + return Ok(None); + } + Ok(Some(WriterKey { table_name: table_name.clone(), table_path: self.build_table_path(&table_name, route.as_ref()), - }) + })) } async fn resolve_keyspace_route( &self, log_event: &vector_lib::event::LogEvent, resolved_routes: &mut HashMap>, - ) -> Option { - let resolver = self.meta_store_resolver.as_ref()?; - let keyspace = log_event + ) -> Result, Box> { + let Some(resolver) = self.keyspace_route_resolver.as_ref() else { + return Ok(None); + }; + let Some(keyspace) = log_event .get(LABEL_KEYSPACE) - .and_then(|value| value.as_str())?; + .and_then(|value| value.as_str()) + else { + return Ok(None); + }; let keyspace_ref = keyspace.as_ref(); if let Some(route) = resolved_routes.get(keyspace_ref) { - return route.clone(); + return Ok(route.clone()); } - let route = match resolver.resolve_keyspace(keyspace_ref).await { - Ok(route) => route, - Err(error) => { - warn!( - "Failed to resolve keyspace {} from meta-store for meta sink, falling back to base_path: {}", - keyspace_ref, error - ); - None - } - }; + let route = resolver.resolve_keyspace(keyspace_ref).await?; resolved_routes.insert(keyspace_ref.to_string(), route.clone()); - route + if route.is_none() { + warn!( + "No cluster route found for keyspace {}, skipping TopSQL meta event", + keyspace_ref + ); + } + Ok(route) } fn build_table_path(&self, table_name: &str, route: Option<&KeyspaceRoute>) -> PathBuf { @@ -435,7 +458,7 @@ impl TopSQLDeltaLakeSink { if let Event::Log(log_event) = event { let Some(writer_key) = self .resolve_writer_key(&log_event, &mut resolved_routes) - .await + .await? else { continue; }; From 7d40dc4f42fc885f63be48cab086708a06e66d6d Mon Sep 17 00:00:00 2001 From: zeminzhou Date: Fri, 20 Mar 2026 17:50:52 +0800 Subject: [PATCH 09/11] topsql_v2: add TiKV collection toggle --- src/sources/topsql_v2/arch.md | 5 +++ src/sources/topsql_v2/controller.rs | 66 +++++++++++++++++++++++++++++ src/sources/topsql_v2/mod.rs | 11 +++++ 3 files changed, 82 insertions(+) diff --git a/src/sources/topsql_v2/arch.md b/src/sources/topsql_v2/arch.md index 3ffd579b..4e0fc70a 100644 --- a/src/sources/topsql_v2/arch.md +++ b/src/sources/topsql_v2/arch.md @@ -45,6 +45,11 @@ pub struct TopSQLV2Config { } ``` +Notable source-specific options: + +- `manager_server_address` + `tidb_namespace`: discover active TiDB instances from manager. +- `enable_tikv_topsql`: when `false`, only TiDB TopSQL is collected; TiKV TopSQL subscriptions are skipped. + ## Data Flow Same as TopSQL v1 but with improved reliability and performance. diff --git a/src/sources/topsql_v2/controller.rs b/src/sources/topsql_v2/controller.rs index e5199392..6b27fdd5 100644 --- a/src/sources/topsql_v2/controller.rs +++ b/src/sources/topsql_v2/controller.rs @@ -28,6 +28,7 @@ pub struct Controller { init_retry_delay: Duration, top_n: usize, downsampling_interval: u32, + enable_tikv_topsql: bool, topru: TopRUConfig, schema_cache: Arc, @@ -51,6 +52,7 @@ impl Controller { init_retry_delay: Duration, top_n: usize, downsampling_interval: u32, + enable_tikv_topsql: bool, schema_update_interval: Duration, tls_config: Option, proxy_config: &ProxyConfig, @@ -85,6 +87,7 @@ impl Controller { init_retry_delay, top_n, downsampling_interval, + enable_tikv_topsql, topru, schema_cache, schema_update_interval, @@ -126,6 +129,8 @@ impl Controller { self.topo_fetcher .get_up_components(&mut latest_components) .await?; + latest_components = + Self::filter_topsql_components(latest_components, self.enable_tikv_topsql); let prev_components = self.components.clone(); let newcomers = latest_components.difference(&prev_components); @@ -158,6 +163,20 @@ impl Controller { Ok(has_change) } + fn filter_topsql_components( + components: HashSet, + enable_tikv_topsql: bool, + ) -> HashSet { + if enable_tikv_topsql { + return components; + } + + components + .into_iter() + .filter(|component| component.instance_type != InstanceType::TiKV) + .collect() + } + async fn update_schema_manager(&mut self, available_components: &HashSet) { // If there is a running schema manager, shut it down if let Some(instance) = self.active_schema_manager.take() { @@ -347,3 +366,50 @@ impl Controller { info!(message = "All TopSQL sources have been shut down."); } } + +#[cfg(test)] +mod tests { + use super::Controller; + use crate::common::topology::{Component, InstanceType}; + use std::collections::HashSet; + + #[test] + fn filter_topsql_components_keeps_tikv_when_enabled() { + let components = sample_components(); + let filtered = Controller::filter_topsql_components(components.clone(), true); + assert_eq!(filtered.len(), components.len()); + assert!(filtered + .iter() + .any(|c| c.instance_type == InstanceType::TiKV)); + } + + #[test] + fn filter_topsql_components_removes_tikv_when_disabled() { + let filtered = Controller::filter_topsql_components(sample_components(), false); + assert!(filtered + .iter() + .all(|c| c.instance_type != InstanceType::TiKV)); + assert!(filtered + .iter() + .any(|c| c.instance_type == InstanceType::TiDB)); + } + + fn sample_components() -> HashSet { + HashSet::from([ + Component { + instance_type: InstanceType::TiDB, + host: "tidb.example".to_owned(), + primary_port: 4000, + secondary_port: 10080, + instance_name: Some("tidb-0".to_owned()), + }, + Component { + instance_type: InstanceType::TiKV, + host: "tikv.example".to_owned(), + primary_port: 20160, + secondary_port: 20180, + instance_name: Some("tikv-0".to_owned()), + }, + ]) + } +} diff --git a/src/sources/topsql_v2/mod.rs b/src/sources/topsql_v2/mod.rs index fbac2d00..874eef83 100644 --- a/src/sources/topsql_v2/mod.rs +++ b/src/sources/topsql_v2/mod.rs @@ -93,6 +93,10 @@ pub struct TopSQLConfig { #[serde(default = "default_downsampling_interval")] pub downsampling_interval: u32, + /// Whether to collect TopSQL data from TiKV components. + #[serde(default = "default_enable_tikv_topsql")] + pub enable_tikv_topsql: bool, + /// TopRU (Resource Unit) collection config. Only applies to TiDB upstream. #[serde(default)] pub topru: TopRUConfig, @@ -114,6 +118,10 @@ pub const fn default_downsampling_interval() -> u32 { 60 } +pub const fn default_enable_tikv_topsql() -> bool { + true +} + impl GenerateConfig for TopSQLConfig { fn generate_config() -> toml::Value { toml::Value::try_from(Self { @@ -127,6 +135,7 @@ impl GenerateConfig for TopSQLConfig { topology_fetch_interval_seconds: default_topology_fetch_interval(), top_n: default_top_n(), downsampling_interval: default_downsampling_interval(), + enable_tikv_topsql: default_enable_tikv_topsql(), topru: TopRUConfig::default(), }) .unwrap() @@ -149,6 +158,7 @@ impl SourceConfig for TopSQLConfig { let init_retry_delay = Duration::from_secs_f64(self.init_retry_delay_seconds); let top_n = self.top_n; let downsampling_interval = self.downsampling_interval; + let enable_tikv_topsql = self.enable_tikv_topsql; let topru = self.topru.clone(); let schema_update_interval = Duration::from_secs(60); @@ -161,6 +171,7 @@ impl SourceConfig for TopSQLConfig { init_retry_delay, top_n, downsampling_interval, + enable_tikv_topsql, schema_update_interval, tls, &cx.proxy, From fc223dc37ac09fb41f29636c32d35bb96bd00d33 Mon Sep 17 00:00:00 2001 From: zeminzhou Date: Fri, 20 Mar 2026 19:11:54 +0800 Subject: [PATCH 10/11] topsql: parse serverless keyspace route from pd --- src/common/keyspace_cluster.rs | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/src/common/keyspace_cluster.rs b/src/common/keyspace_cluster.rs index 7aeee19d..a5d91f12 100644 --- a/src/common/keyspace_cluster.rs +++ b/src/common/keyspace_cluster.rs @@ -14,8 +14,19 @@ type BoxError = Box; const REQUEST_TIMEOUT: Duration = Duration::from_secs(10); const CONNECT_TIMEOUT: Duration = Duration::from_secs(3); -const ORG_ID_KEYS: &[&str] = &["tenant_id", "TenantID", "org_id", "organization_id"]; -const CLUSTER_ID_KEYS: &[&str] = &["cluster_id", "ClusterId", "tidb_cluster_id"]; +const ORG_ID_KEYS: &[&str] = &[ + "tenant_id", + "TenantID", + "org_id", + "organization_id", + "serverless_tenant_id", +]; +const CLUSTER_ID_KEYS: &[&str] = &[ + "cluster_id", + "ClusterId", + "tidb_cluster_id", + "serverless_cluster_id", +]; #[derive(Clone, Debug, Eq, PartialEq)] pub struct KeyspaceRoute { @@ -214,6 +225,21 @@ mod tests { cluster_id: "10762701230946915645".to_string(), }) ); + + let mut serverless_config = HashMap::new(); + serverless_config.insert("serverless_tenant_id".to_string(), "30018".to_string()); + serverless_config.insert( + "serverless_cluster_id".to_string(), + "10155668891296301432".to_string(), + ); + + assert_eq!( + extract_route_from_config(&serverless_config), + Some(KeyspaceRoute { + org_id: "30018".to_string(), + cluster_id: "10155668891296301432".to_string(), + }) + ); } #[tokio::test] From b780f14fc0bdad9cd361ef38fe3b3440450bf4ee Mon Sep 17 00:00:00 2001 From: zeminzhou Date: Sat, 21 Mar 2026 09:36:32 +0800 Subject: [PATCH 11/11] topsql: only use serverless route keys --- src/common/keyspace_cluster.rs | 46 ++++++++++++---------------------- 1 file changed, 16 insertions(+), 30 deletions(-) diff --git a/src/common/keyspace_cluster.rs b/src/common/keyspace_cluster.rs index a5d91f12..41a52498 100644 --- a/src/common/keyspace_cluster.rs +++ b/src/common/keyspace_cluster.rs @@ -14,19 +14,8 @@ type BoxError = Box; const REQUEST_TIMEOUT: Duration = Duration::from_secs(10); const CONNECT_TIMEOUT: Duration = Duration::from_secs(3); -const ORG_ID_KEYS: &[&str] = &[ - "tenant_id", - "TenantID", - "org_id", - "organization_id", - "serverless_tenant_id", -]; -const CLUSTER_ID_KEYS: &[&str] = &[ - "cluster_id", - "ClusterId", - "tidb_cluster_id", - "serverless_cluster_id", -]; +const ORG_ID_KEYS: &[&str] = &["serverless_tenant_id"]; +const CLUSTER_ID_KEYS: &[&str] = &["serverless_cluster_id"]; #[derive(Clone, Debug, Eq, PartialEq)] pub struct KeyspaceRoute { @@ -210,22 +199,7 @@ mod tests { } #[test] - fn extract_route_from_config_supports_expected_aliases() { - let mut config = HashMap::new(); - config.insert("tenant_id".to_string(), "30018".to_string()); - config.insert( - "tidb_cluster_id".to_string(), - "10762701230946915645".to_string(), - ); - - assert_eq!( - extract_route_from_config(&config), - Some(KeyspaceRoute { - org_id: "30018".to_string(), - cluster_id: "10762701230946915645".to_string(), - }) - ); - + fn extract_route_from_config_uses_serverless_route_keys() { let mut serverless_config = HashMap::new(); serverless_config.insert("serverless_tenant_id".to_string(), "30018".to_string()); serverless_config.insert( @@ -242,6 +216,18 @@ mod tests { ); } + #[test] + fn extract_route_from_config_ignores_legacy_route_keys() { + let mut legacy_config = HashMap::new(); + legacy_config.insert("tenant_id".to_string(), "30018".to_string()); + legacy_config.insert( + "tidb_cluster_id".to_string(), + "10762701230946915645".to_string(), + ); + + assert_eq!(extract_route_from_config(&legacy_config), None); + } + #[tokio::test] async fn resolve_keyspace_uses_pd_keyspace_api_and_caches_result() { let request_count = Arc::new(AtomicUsize::new(0)); @@ -260,7 +246,7 @@ mod tests { counter.fetch_add(1, Ordering::SeqCst); assert_eq!(request.uri().path(), "/pd/api/v2/keyspaces/test_keyspace"); Ok::<_, Infallible>(Response::new(Body::from( - r#"{"config":{"tenant_id":"30018","cluster_id":"10762701230946915645"}}"#, + r#"{"config":{"serverless_tenant_id":"30018","serverless_cluster_id":"10762701230946915645"}}"#, ))) } }))