From 96ee4b6e50080c7835aba5031d7d1a58c10b9fcf Mon Sep 17 00:00:00 2001 From: dev-lew <> Date: Fri, 30 Jan 2026 15:16:43 -0500 Subject: [PATCH 01/12] Create initial comment removal function --- pgdog/src/frontend/router/parser/comment.rs | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/pgdog/src/frontend/router/parser/comment.rs b/pgdog/src/frontend/router/parser/comment.rs index a87883ad..bcbfa63e 100644 --- a/pgdog/src/frontend/router/parser/comment.rs +++ b/pgdog/src/frontend/router/parser/comment.rs @@ -1,6 +1,7 @@ use once_cell::sync::Lazy; +use pg_query::protobuf::ScanResult; use pg_query::scan_raw; -use pg_query::{protobuf::Token, scan}; +use pg_query::{protobuf::ScanToken, protobuf::Token, scan}; use pgdog_config::QueryParserEngine; use regex::Regex; @@ -86,6 +87,20 @@ pub fn comment( Ok((None, role)) } +pub fn remove_comments(query: &str, engine: QueryParserEngine) -> Result { + let mut result = match engine { + QueryParserEngine::PgQueryProtobuf => scan(query), + QueryParserEngine::PgQueryRaw => scan_raw(query), + } + .map_err(Error::PgQuery)?; + + result + .tokens + .retain(|st| st.token != Token::CComment as i32 && st.token != Token::SqlComment as i32); + + Ok(result) +} + #[cfg(test)] mod tests { use pgdog_config::SystemCatalogsBehavior; From f58258c9d193c298dcde4784299d654f8fbade00 Mon Sep 17 00:00:00 2001 From: dev-lew <> Date: Sat, 31 Jan 2026 14:59:44 -0500 Subject: [PATCH 02/12] Change the implementation for remove_comments The previous approach is incorrect because we need a str representing the query instead of just a ScanResult. This approach reconstructs the query with string slicing by using the indices where comments are present as returned by the scanner. --- pgdog/src/frontend/router/parser/comment.rs | 44 +++++++++++++++++---- 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/pgdog/src/frontend/router/parser/comment.rs b/pgdog/src/frontend/router/parser/comment.rs index bcbfa63e..0bd2d759 100644 --- a/pgdog/src/frontend/router/parser/comment.rs +++ b/pgdog/src/frontend/router/parser/comment.rs @@ -1,7 +1,6 @@ use once_cell::sync::Lazy; -use pg_query::protobuf::ScanResult; use pg_query::scan_raw; -use pg_query::{protobuf::ScanToken, protobuf::Token, scan}; +use pg_query::{protobuf::Token, scan}; use pgdog_config::QueryParserEngine; use regex::Regex; @@ -87,18 +86,49 @@ pub fn comment( Ok((None, role)) } -pub fn remove_comments(query: &str, engine: QueryParserEngine) -> Result { - let mut result = match engine { +pub fn has_comments(query: &str, engine: QueryParserEngine) -> Result { + let result = match engine { QueryParserEngine::PgQueryProtobuf => scan(query), QueryParserEngine::PgQueryRaw => scan_raw(query), } .map_err(Error::PgQuery)?; - result + Ok(result .tokens - .retain(|st| st.token != Token::CComment as i32 && st.token != Token::SqlComment as i32); + .iter() + .any(|st| st.token == Token::CComment as i32 || st.token == Token::SqlComment as i32)) +} + +pub fn remove_comments(query: &str, engine: QueryParserEngine) -> Result { + let result = match engine { + QueryParserEngine::PgQueryProtobuf => scan(query), + QueryParserEngine::PgQueryRaw => scan_raw(query), + } + .map_err(Error::PgQuery)?; + + let comment_ranges: Vec<(usize, usize)> = result + .tokens + .iter() + .filter(|st| st.token == Token::CComment as i32 || st.token == Token::SqlComment as i32) + .map(|t| (t.start as usize, t.end as usize)) + .collect(); + + let mut filtered_query = String::with_capacity(query.len()); + let mut cursor = 0usize; + + for (start, end) in comment_ranges { + if cursor < start { + filtered_query.push_str(&query[cursor..start]); + } + + cursor = end; + } + + if cursor < query.len() { + filtered_query.push_str(&query[cursor..]); + } - Ok(result) + Ok(filtered_query.trim().to_string()) } #[cfg(test)] From 9b5a0bcca6f46213a68ca9fadba851136260f58b Mon Sep 17 00:00:00 2001 From: dev-lew <> Date: Sat, 31 Jan 2026 15:03:57 -0500 Subject: [PATCH 03/12] Rename variable for clarity --- pgdog/src/frontend/router/parser/comment.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pgdog/src/frontend/router/parser/comment.rs b/pgdog/src/frontend/router/parser/comment.rs index 0bd2d759..104c06b6 100644 --- a/pgdog/src/frontend/router/parser/comment.rs +++ b/pgdog/src/frontend/router/parser/comment.rs @@ -110,7 +110,7 @@ pub fn remove_comments(query: &str, engine: QueryParserEngine) -> Result Date: Tue, 3 Feb 2026 12:07:20 -0500 Subject: [PATCH 04/12] Upload untested except version This version will accept a list of regexs to keep --- pgdog/src/frontend/router/parser/comment.rs | 26 +++++++++++++++++---- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/pgdog/src/frontend/router/parser/comment.rs b/pgdog/src/frontend/router/parser/comment.rs index 104c06b6..b45d5e3d 100644 --- a/pgdog/src/frontend/router/parser/comment.rs +++ b/pgdog/src/frontend/router/parser/comment.rs @@ -11,11 +11,12 @@ use crate::frontend::router::sharding::ContextBuilder; use super::super::parser::Shard; use super::Error; -static SHARD: Lazy = Lazy::new(|| Regex::new(r#"pgdog_shard: *([0-9]+)"#).unwrap()); -static SHARDING_KEY: Lazy = Lazy::new(|| { +pub static SHARD: Lazy = Lazy::new(|| Regex::new(r#"pgdog_shard: *([0-9]+)"#).unwrap()); +pub static SHARDING_KEY: Lazy = Lazy::new(|| { Regex::new(r#"pgdog_sharding_key: *(?:"([^"]*)"|'([^']*)'|([0-9a-zA-Z-]+))"#).unwrap() }); -static ROLE: Lazy = Lazy::new(|| Regex::new(r#"pgdog_role: *(primary|replica)"#).unwrap()); +pub static ROLE: Lazy = + Lazy::new(|| Regex::new(r#"pgdog_role: *(primary|replica)"#).unwrap()); fn get_matched_value<'a>(caps: &'a regex::Captures<'a>) -> Option<&'a str> { caps.get(1) @@ -99,17 +100,32 @@ pub fn has_comments(query: &str, engine: QueryParserEngine) -> Result Result { +pub fn remove_comments( + query: &str, + engine: QueryParserEngine, + except: Option<&[&Regex]>, +) -> Result { let result = match engine { QueryParserEngine::PgQueryProtobuf => scan(query), QueryParserEngine::PgQueryRaw => scan_raw(query), } .map_err(Error::PgQuery)?; - let comment_ranges: Vec<(usize, usize)> = result + let tokens_to_remove = result .tokens .iter() .filter(|st| st.token == Token::CComment as i32 || st.token == Token::SqlComment as i32) + .filter(|st| { + if let Some(except) = except { + let text = &query[st.start as usize..st.end as usize]; + + !except.iter().any(|re| re.is_match(text)) + } else { + true + } + }); + + let comment_ranges: Vec<(usize, usize)> = tokens_to_remove .map(|st| (st.start as usize, st.end as usize)) .collect(); From 0319387cdff37bf37fe77bf365b18edd30c86fa5 Mon Sep 17 00:00:00 2001 From: dev-lew <> Date: Tue, 3 Feb 2026 22:26:12 -0500 Subject: [PATCH 05/12] Simplify removal code Add ability to keep pgdog metadata comments anywhere in the string --- pgdog/src/frontend/router/parser/comment.rs | 53 +++++++++++---------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/pgdog/src/frontend/router/parser/comment.rs b/pgdog/src/frontend/router/parser/comment.rs index b45d5e3d..09164c98 100644 --- a/pgdog/src/frontend/router/parser/comment.rs +++ b/pgdog/src/frontend/router/parser/comment.rs @@ -111,40 +111,41 @@ pub fn remove_comments( } .map_err(Error::PgQuery)?; - let tokens_to_remove = result - .tokens - .iter() - .filter(|st| st.token == Token::CComment as i32 || st.token == Token::SqlComment as i32) - .filter(|st| { - if let Some(except) = except { - let text = &query[st.start as usize..st.end as usize]; - - !except.iter().any(|re| re.is_match(text)) - } else { - true - } - }); + let mut out = String::with_capacity(query.len()); - let comment_ranges: Vec<(usize, usize)> = tokens_to_remove - .map(|st| (st.start as usize, st.end as usize)) - .collect(); + for st in &result.tokens { + let start = st.start as usize; + let end = st.end as usize; - let mut filtered_query = String::with_capacity(query.len()); - let mut cursor = 0usize; + match st.token { + t if t == Token::CComment as i32 => { + let comment = &query[start..end]; - for (start, end) in comment_ranges { - if cursor < start { - filtered_query.push_str(&query[cursor..start]); - } + if let Some(except) = except { + let rewritten = keep_only_matching(comment, except); - cursor = end; + out.push_str(&rewritten); + } + } + _ => { + out.push_str(&query[start..end]); + } + } } - if cursor < query.len() { - filtered_query.push_str(&query[cursor..]); + Ok(out) +} + +fn keep_only_matching(comment: &str, regs: &[&Regex]) -> String { + let mut out = String::new(); + + for reg in regs { + for m in reg.find_iter(comment) { + out.push_str(m.as_str()); + } } - Ok(filtered_query.trim().to_string()) + out } #[cfg(test)] From 212e1434482d4077bdab354b0569502e2deeee54 Mon Sep 17 00:00:00 2001 From: dev-lew <> Date: Tue, 3 Feb 2026 22:38:46 -0500 Subject: [PATCH 06/12] Preserve non token characters Not preserving these would mean queries with comments would never match their commentless counterparts --- pgdog/src/frontend/router/parser/comment.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pgdog/src/frontend/router/parser/comment.rs b/pgdog/src/frontend/router/parser/comment.rs index 09164c98..a1a8acb8 100644 --- a/pgdog/src/frontend/router/parser/comment.rs +++ b/pgdog/src/frontend/router/parser/comment.rs @@ -111,12 +111,15 @@ pub fn remove_comments( } .map_err(Error::PgQuery)?; + let mut cursor = 0; let mut out = String::with_capacity(query.len()); for st in &result.tokens { let start = st.start as usize; let end = st.end as usize; + out.push_str(&query[cursor..start]); + match st.token { t if t == Token::CComment as i32 => { let comment = &query[start..end]; @@ -131,6 +134,12 @@ pub fn remove_comments( out.push_str(&query[start..end]); } } + + cursor = end; + } + + if cursor < query.len() { + out.push_str(&query[cursor..]); } Ok(out) From f80d1e7390787a4acde3137c271259240ea5a50e Mon Sep 17 00:00:00 2001 From: dev-lew <> Date: Wed, 4 Feb 2026 16:59:50 -0500 Subject: [PATCH 07/12] Add retry logic in cache_impl.rs --- .../router/parser/cache/cache_impl.rs | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/pgdog/src/frontend/router/parser/cache/cache_impl.rs b/pgdog/src/frontend/router/parser/cache/cache_impl.rs index 4c50e61f..c6cfe112 100644 --- a/pgdog/src/frontend/router/parser/cache/cache_impl.rs +++ b/pgdog/src/frontend/router/parser/cache/cache_impl.rs @@ -11,6 +11,7 @@ use tracing::debug; use super::super::{Error, Route}; use super::{Ast, AstContext}; +use crate::frontend::router::parser::comment; use crate::frontend::{BufferedQuery, PreparedStatements}; static CACHE: Lazy = Lazy::new(Cache::new); @@ -118,6 +119,27 @@ impl Cache { } } + if comment::has_comments(query.query(), ctx.sharding_schema.query_parser_engine)? { + // Check cache again after removing comments. + let filtered_query = comment::remove_comments( + query.query(), + ctx.sharding_schema.query_parser_engine, + Some(&[&*comment::SHARD, &*comment::SHARDING_KEY, &*comment::ROLE]), + )?; + + let mut guard = self.inner.lock(); + let ast = guard.queries.get_mut(&filtered_query).map(|entry| { + entry.stats.lock().hits += 1; + entry.clone() + }); + + if let Some(ast) = ast { + guard.stats.hits += 1; + + return Ok(ast); + } + } + // Parse query without holding lock. let entry = Ast::with_context(query, ctx, prepared_statements)?; let parse_time = entry.stats.lock().parse_time; From fe745e115cb73f25a9f4646c245c7182b831d6a4 Mon Sep 17 00:00:00 2001 From: dev-lew <55928726+dev-lew@users.noreply.github.com> Date: Sat, 7 Feb 2026 12:53:49 -0500 Subject: [PATCH 08/12] Refactor according to comments Logic for comment parsing happens before Ast creation resulting in fewer calls to scan. --- pgdog/src/frontend/router/parser/cache/ast.rs | 11 ++- .../router/parser/cache/cache_impl.rs | 38 +++++++--- pgdog/src/frontend/router/parser/comment.rs | 76 +++++++++---------- 3 files changed, 69 insertions(+), 56 deletions(-) diff --git a/pgdog/src/frontend/router/parser/cache/ast.rs b/pgdog/src/frontend/router/parser/cache/ast.rs index 2be0e0fb..c80c560e 100644 --- a/pgdog/src/frontend/router/parser/cache/ast.rs +++ b/pgdog/src/frontend/router/parser/cache/ast.rs @@ -7,9 +7,7 @@ use std::{collections::HashSet, ops::Deref}; use parking_lot::Mutex; use std::sync::Arc; -use super::super::{ - comment::comment, Error, Route, Shard, StatementRewrite, StatementRewriteContext, Table, -}; +use super::super::{Error, Route, Shard, StatementRewrite, StatementRewriteContext, Table}; use super::{Fingerprint, Stats}; use crate::backend::schema::Schema; use crate::frontend::router::parser::rewrite::statement::RewritePlan; @@ -72,6 +70,8 @@ impl Ast { schema: &ShardingSchema, db_schema: &Schema, prepared_statements: &mut PreparedStatements, + comment_shard: Option, + comment_role: Option, user: &str, search_path: Option<&ParameterValue>, ) -> Result { @@ -81,7 +81,6 @@ impl Ast { QueryParserEngine::PgQueryRaw => parse_raw(query), } .map_err(Error::PgQuery)?; - let (comment_shard, comment_role) = comment(query, schema)?; let fingerprint = Fingerprint::new(query, schema.query_parser_engine).map_err(Error::PgQuery)?; @@ -125,12 +124,16 @@ impl Ast { query: &BufferedQuery, ctx: &super::AstContext<'_>, prepared_statements: &mut PreparedStatements, + shard: Option, + role: Option, ) -> Result { Self::new( query, &ctx.sharding_schema, &ctx.db_schema, prepared_statements, + shard, + role, ctx.user, ctx.search_path, ) diff --git a/pgdog/src/frontend/router/parser/cache/cache_impl.rs b/pgdog/src/frontend/router/parser/cache/cache_impl.rs index c6cfe112..cd8aac8b 100644 --- a/pgdog/src/frontend/router/parser/cache/cache_impl.rs +++ b/pgdog/src/frontend/router/parser/cache/cache_impl.rs @@ -2,8 +2,10 @@ use lru::LruCache; use once_cell::sync::Lazy; use pg_query::normalize; use pgdog_config::QueryParserEngine; +use std::borrow::Cow; use std::collections::HashMap; use std::time::Duration; +use tracing_subscriber::filter::Filtered; use parking_lot::Mutex; use std::sync::Arc; @@ -98,6 +100,10 @@ impl Cache { /// Parse a statement by either getting it from cache /// or using pg_query parser. /// + /// In the event of cache miss, we retry after removing all comments except + /// for pgdog metadata. We retain it for correctness, since a query with + /// that metadata must not map to an identical query without it. + /// /// N.B. There is a race here that allows multiple threads to /// parse the same query. That's better imo than locking the data structure /// while we parse the query. @@ -119,33 +125,38 @@ impl Cache { } } - if comment::has_comments(query.query(), ctx.sharding_schema.query_parser_engine)? { - // Check cache again after removing comments. - let filtered_query = comment::remove_comments( - query.query(), - ctx.sharding_schema.query_parser_engine, - Some(&[&*comment::SHARD, &*comment::SHARDING_KEY, &*comment::ROLE]), - )?; + let (maybe_shard, maybe_role, maybe_filtered_query) = + comment::parse_comment(&query, &ctx.sharding_schema)?; + + let query_to_cache: Cow<'_, str>; + if let Some(filtered_query) = maybe_filtered_query { + query_to_cache = Cow::Owned(filtered_query); + + // Check cache again after removing comments from query let mut guard = self.inner.lock(); - let ast = guard.queries.get_mut(&filtered_query).map(|entry| { + + let ast = guard.queries.get_mut(&*query_to_cache).map(|entry| { entry.stats.lock().hits += 1; entry.clone() }); if let Some(ast) = ast { guard.stats.hits += 1; - return Ok(ast); } + } else { + query_to_cache = Cow::Borrowed(query.query()); } // Parse query without holding lock. - let entry = Ast::with_context(query, ctx, prepared_statements)?; + let entry = Ast::with_context(query, ctx, prepared_statements, maybe_shard, maybe_role)?; let parse_time = entry.stats.lock().parse_time; let mut guard = self.inner.lock(); - guard.queries.put(query.query().to_string(), entry.clone()); + guard + .queries + .put(query_to_cache.into_owned(), entry.clone()); guard.stats.misses += 1; guard.stats.parse_time += parse_time; @@ -160,7 +171,10 @@ impl Cache { ctx: &AstContext<'_>, prepared_statements: &mut PreparedStatements, ) -> Result { - let mut entry = Ast::with_context(query, ctx, prepared_statements)?; + let (maybe_shard, maybe_role, _) = comment::parse_comment(&query, &ctx.sharding_schema)?; + + let mut entry = + Ast::with_context(query, ctx, prepared_statements, maybe_shard, maybe_role)?; entry.cached = false; let parse_time = entry.stats.lock().parse_time; diff --git a/pgdog/src/frontend/router/parser/comment.rs b/pgdog/src/frontend/router/parser/comment.rs index a1a8acb8..b65968d8 100644 --- a/pgdog/src/frontend/router/parser/comment.rs +++ b/pgdog/src/frontend/router/parser/comment.rs @@ -1,4 +1,5 @@ use once_cell::sync::Lazy; +use pg_query::protobuf::ScanToken; use pg_query::scan_raw; use pg_query::{protobuf::Token, scan}; use pgdog_config::QueryParserEngine; @@ -25,23 +26,26 @@ fn get_matched_value<'a>(caps: &'a regex::Captures<'a>) -> Option<&'a str> { .map(|m| m.as_str()) } -/// Extract shard number from a comment. +/// Extract shard number from a comment. Additionally returns the entire +/// comment string if it exists. /// -/// Comment style uses the C-style comments (not SQL comments!) +/// Comment style for the shard metadata uses the C-style comments (not SQL comments!) /// as to allow the comment to appear anywhere in the query. /// /// See [`SHARD`] and [`SHARDING_KEY`] for the style of comment we expect. /// -pub fn comment( +pub fn parse_comment( query: &str, schema: &ShardingSchema, -) -> Result<(Option, Option), Error> { +) -> Result<(Option, Option, Option), Error> { let tokens = match schema.query_parser_engine { QueryParserEngine::PgQueryProtobuf => scan(query), QueryParserEngine::PgQueryRaw => scan_raw(query), } .map_err(Error::PgQuery)?; + let mut shard = None; let mut role = None; + let mut filtered_query = None; for token in tokens.tokens.iter() { if token.token == Token::CComment as i32 { @@ -58,63 +62,55 @@ pub fn comment( if let Some(cap) = SHARDING_KEY.captures(comment) { if let Some(sharding_key) = get_matched_value(&cap) { if let Some(schema) = schema.schemas.get(Some(sharding_key.into())) { - return Ok((Some(schema.shard().into()), role)); + shard = Some(schema.shard().into()); + } else { + let ctx = ContextBuilder::infer_from_from_and_config(sharding_key, schema)? + .shards(schema.shards) + .build()?; + shard = Some(ctx.apply()?); } - let ctx = ContextBuilder::infer_from_from_and_config(sharding_key, schema)? - .shards(schema.shards) - .build()?; - return Ok((Some(ctx.apply()?), role)); } } if let Some(cap) = SHARD.captures(comment) { - if let Some(shard) = cap.get(1) { - return Ok(( - Some( - shard - .as_str() - .parse::() - .ok() - .map(Shard::Direct) - .unwrap_or(Shard::All), - ), - role, - )); + if let Some(s) = cap.get(1) { + shard = Some( + s.as_str() + .parse::() + .ok() + .map(Shard::Direct) + .unwrap_or(Shard::All), + ); } } } } - Ok((None, role)) -} - -pub fn has_comments(query: &str, engine: QueryParserEngine) -> Result { - let result = match engine { - QueryParserEngine::PgQueryProtobuf => scan(query), - QueryParserEngine::PgQueryRaw => scan_raw(query), + if has_comments(&tokens.tokens) { + filtered_query = Some(remove_comments( + query, + &tokens.tokens, + Some(&[&SHARD, &*SHARDING_KEY, &ROLE]), + )?); } - .map_err(Error::PgQuery)?; - Ok(result - .tokens + Ok((shard, role, filtered_query)) +} + +pub fn has_comments(tokenized_query: &Vec) -> bool { + tokenized_query .iter() - .any(|st| st.token == Token::CComment as i32 || st.token == Token::SqlComment as i32)) + .any(|st| st.token == Token::CComment as i32 || st.token == Token::SqlComment as i32) } pub fn remove_comments( query: &str, - engine: QueryParserEngine, + tokenized_query: &Vec, except: Option<&[&Regex]>, ) -> Result { - let result = match engine { - QueryParserEngine::PgQueryProtobuf => scan(query), - QueryParserEngine::PgQueryRaw => scan_raw(query), - } - .map_err(Error::PgQuery)?; - let mut cursor = 0; let mut out = String::with_capacity(query.len()); - for st in &result.tokens { + for st in tokenized_query { let start = st.start as usize; let end = st.end as usize; From 22c74536e875aaeb47c00a88ce6d88b70e5eb253 Mon Sep 17 00:00:00 2001 From: dev-lew Date: Sun, 8 Feb 2026 21:14:11 -0500 Subject: [PATCH 09/12] Update some tests to use the new function --- .../src/frontend/router/parser/cache/test.rs | 2 +- pgdog/src/frontend/router/parser/comment.rs | 12 +++---- .../frontend/router/parser/query/explain.rs | 36 ++++++++++++++++++- .../src/frontend/router/parser/query/show.rs | 4 +++ 4 files changed, 46 insertions(+), 8 deletions(-) diff --git a/pgdog/src/frontend/router/parser/cache/test.rs b/pgdog/src/frontend/router/parser/cache/test.rs index f996b916..c15641c8 100644 --- a/pgdog/src/frontend/router/parser/cache/test.rs +++ b/pgdog/src/frontend/router/parser/cache/test.rs @@ -121,7 +121,7 @@ fn test_tables_list() { "DELETE FROM private_schema.test", "DROP TABLE private_schema.test", ] { - let ast = Ast::new(&BufferedQuery::Query(Query::new(q)), &ShardingSchema::default(), &db_schema, &mut prepared_statements, "", None).unwrap(); + let ast = Ast::new(&BufferedQuery::Query(Query::new(q)), &ShardingSchema::default(), &db_schema, &mut prepared_statements, None, None, "", None).unwrap(); let tables = ast.tables(); println!("{:?}", tables); } diff --git a/pgdog/src/frontend/router/parser/comment.rs b/pgdog/src/frontend/router/parser/comment.rs index b65968d8..6081fb3c 100644 --- a/pgdog/src/frontend/router/parser/comment.rs +++ b/pgdog/src/frontend/router/parser/comment.rs @@ -233,7 +233,7 @@ mod tests { }; let query = "SELECT * FROM users /* pgdog_role: primary */"; - let result = comment(query, &schema).unwrap(); + let result = parse_comment(query, &schema).unwrap(); assert_eq!(result.1, Some(Role::Primary)); } @@ -248,7 +248,7 @@ mod tests { }; let query = "SELECT * FROM users /* pgdog_role: replica pgdog_shard: 2 */"; - let result = comment(query, &schema).unwrap(); + let result = parse_comment(query, &schema).unwrap(); assert_eq!(result.0, Some(Shard::Direct(2))); assert_eq!(result.1, Some(Role::Replica)); } @@ -264,7 +264,7 @@ mod tests { }; let query = "SELECT * FROM users /* pgdog_role: replica */"; - let result = comment(query, &schema).unwrap(); + let result = parse_comment(query, &schema).unwrap(); assert_eq!(result.1, Some(Role::Replica)); } @@ -279,7 +279,7 @@ mod tests { }; let query = "SELECT * FROM users /* pgdog_role: invalid */"; - let result = comment(query, &schema).unwrap(); + let result = parse_comment(query, &schema).unwrap(); assert_eq!(result.1, None); } @@ -294,7 +294,7 @@ mod tests { }; let query = "SELECT * FROM users"; - let result = comment(query, &schema).unwrap(); + let result = parse_comment(query, &schema).unwrap(); assert_eq!(result.1, None); } @@ -319,7 +319,7 @@ mod tests { }; let query = "SELECT * FROM users /* pgdog_sharding_key: sales */"; - let result = comment(query, &schema).unwrap(); + let result = parse_comment(query, &schema).unwrap(); assert_eq!(result.0, Some(Shard::Direct(1))); } } diff --git a/pgdog/src/frontend/router/parser/query/explain.rs b/pgdog/src/frontend/router/parser/query/explain.rs index 6e7b382d..bf1e0f5f 100644 --- a/pgdog/src/frontend/router/parser/query/explain.rs +++ b/pgdog/src/frontend/router/parser/query/explain.rs @@ -81,6 +81,38 @@ mod tests { &cluster.sharding_schema(), &cluster.schema(), &mut stmts, + None, + None, + "", + None, + ) + .unwrap(); + let mut buffer = ClientRequest::from(vec![Query::new(sql).into()]); + buffer.ast = Some(ast); + + let params = Parameters::default(); + let ctx = RouterContext::new(&buffer, &cluster, ¶ms, None, Sticky::new()).unwrap(); + + match QueryParser::default().parse(ctx).unwrap().clone() { + Command::Query(route) => route, + _ => panic!("expected Query command"), + } + } + + fn route_comment(sql: &str) -> Route { + enable_expanded_explain(); + let cluster = Cluster::new_test(&config()); + let mut stmts = PreparedStatements::default(); + + let (shard, role, _) = comment::parse_comment(sql, &cluster.sharding_schema()).unwrap(); + + let ast = Ast::new( + &BufferedQuery::Query(Query::new(sql)), + &cluster.sharding_schema(), + &cluster.schema(), + &mut stmts, + shard, + role, "", None, ) @@ -119,6 +151,8 @@ mod tests { &cluster.sharding_schema(), &cluster.schema(), &mut stmts, + None, + None, "", None, ) @@ -244,7 +278,7 @@ mod tests { #[test] fn test_explain_with_comment_override() { - let r = route("/* pgdog_shard: 5 */ EXPLAIN SELECT * FROM sharded"); + let r = route_comment("/* pgdog_shard: 5 */ EXPLAIN SELECT * FROM sharded"); assert_eq!(r.shard(), &Shard::Direct(5)); let lines = r.explain().unwrap().render_lines(); assert_eq!(lines[3], " Shard 5: manual override to shard=5"); diff --git a/pgdog/src/frontend/router/parser/query/show.rs b/pgdog/src/frontend/router/parser/query/show.rs index f59f1c6b..7e7be98a 100644 --- a/pgdog/src/frontend/router/parser/query/show.rs +++ b/pgdog/src/frontend/router/parser/query/show.rs @@ -51,6 +51,8 @@ mod test_show { &c.sharding_schema(), &c.schema(), &mut PreparedStatements::default(), + None, + None, "", None, ) @@ -72,6 +74,8 @@ mod test_show { &c.sharding_schema(), &c.schema(), &mut PreparedStatements::default(), + None, + None, "", None, ) From a5abacc69b924ff808c460a5bfa7a6d6be45a4e2 Mon Sep 17 00:00:00 2001 From: dev-lew Date: Mon, 9 Feb 2026 10:46:49 -0500 Subject: [PATCH 10/12] Remove random import --- pgdog/src/frontend/router/parser/cache/cache_impl.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/pgdog/src/frontend/router/parser/cache/cache_impl.rs b/pgdog/src/frontend/router/parser/cache/cache_impl.rs index cd8aac8b..4b28343b 100644 --- a/pgdog/src/frontend/router/parser/cache/cache_impl.rs +++ b/pgdog/src/frontend/router/parser/cache/cache_impl.rs @@ -5,7 +5,6 @@ use pgdog_config::QueryParserEngine; use std::borrow::Cow; use std::collections::HashMap; use std::time::Duration; -use tracing_subscriber::filter::Filtered; use parking_lot::Mutex; use std::sync::Arc; From 6d4e81236e72fa063d475cad86dd4519a3f6b6ff Mon Sep 17 00:00:00 2001 From: dev-lew Date: Mon, 9 Feb 2026 10:54:10 -0500 Subject: [PATCH 11/12] Regexes no longer have to be public --- pgdog/src/frontend/router/parser/comment.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pgdog/src/frontend/router/parser/comment.rs b/pgdog/src/frontend/router/parser/comment.rs index 6081fb3c..ed9a145c 100644 --- a/pgdog/src/frontend/router/parser/comment.rs +++ b/pgdog/src/frontend/router/parser/comment.rs @@ -12,12 +12,11 @@ use crate::frontend::router::sharding::ContextBuilder; use super::super::parser::Shard; use super::Error; -pub static SHARD: Lazy = Lazy::new(|| Regex::new(r#"pgdog_shard: *([0-9]+)"#).unwrap()); -pub static SHARDING_KEY: Lazy = Lazy::new(|| { +static SHARD: Lazy = Lazy::new(|| Regex::new(r#"pgdog_shard: *([0-9]+)"#).unwrap()); +static SHARDING_KEY: Lazy = Lazy::new(|| { Regex::new(r#"pgdog_sharding_key: *(?:"([^"]*)"|'([^']*)'|([0-9a-zA-Z-]+))"#).unwrap() }); -pub static ROLE: Lazy = - Lazy::new(|| Regex::new(r#"pgdog_role: *(primary|replica)"#).unwrap()); +static ROLE: Lazy = Lazy::new(|| Regex::new(r#"pgdog_role: *(primary|replica)"#).unwrap()); fn get_matched_value<'a>(caps: &'a regex::Captures<'a>) -> Option<&'a str> { caps.get(1) From 02efcaab73c25ff1147d9f2c2ab9984c384b8adf Mon Sep 17 00:00:00 2001 From: dev-lew Date: Wed, 11 Feb 2026 18:10:35 -0500 Subject: [PATCH 12/12] Implement ordering optimization Standardized the returned query if there are comments. We prepend and sort the metadata comments so they are consistent. --- pgdog/src/frontend/router/parser/comment.rs | 28 +++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/pgdog/src/frontend/router/parser/comment.rs b/pgdog/src/frontend/router/parser/comment.rs index ed9a145c..a6ad9f2c 100644 --- a/pgdog/src/frontend/router/parser/comment.rs +++ b/pgdog/src/frontend/router/parser/comment.rs @@ -108,6 +108,7 @@ pub fn remove_comments( ) -> Result { let mut cursor = 0; let mut out = String::with_capacity(query.len()); + let mut metadata = Vec::with_capacity(3); for st in tokenized_query { let start = st.start as usize; @@ -120,9 +121,9 @@ pub fn remove_comments( let comment = &query[start..end]; if let Some(except) = except { - let rewritten = keep_only_matching(comment, except); + let m = keep_only_matching(comment, except); - out.push_str(&rewritten); + metadata.push(m.to_string()); } } _ => { @@ -137,6 +138,9 @@ pub fn remove_comments( out.push_str(&query[cursor..]); } + metadata.sort_unstable(); + out.insert_str(0, &metadata.join("")); + Ok(out) } @@ -252,6 +256,26 @@ mod tests { assert_eq!(result.1, Some(Role::Replica)); } + #[test] + fn test_remove_comments_no_exceptions() { + let query = "SELECT * FROM table /* comment */ WHERE id = 1"; + let tokens = scan(query).unwrap().tokens; + + let result = remove_comments(query, &tokens, None).unwrap(); + + assert_eq!(result, "SELECT * FROM table WHERE id = 1"); + } + + #[test] + fn test_remove_comments_with_exceptions() { + let query = "SELECT /* comment */ * FROM table /* pgdog_shard: 4 comment */ WHERE id = 1"; + let tokens = scan(query).unwrap().tokens; + + let result = remove_comments(query, &tokens, Some(&[&SHARD])).unwrap(); + + assert_eq!(result, "pgdog_shard: 4SELECT * FROM table WHERE id = 1"); + } + #[test] fn test_replica_role_detection() { use crate::backend::ShardedTables;