From 0cf6f882921f8b6c524408b8c837f3e42c40b13c Mon Sep 17 00:00:00 2001 From: Steve Dignam Date: Fri, 6 Mar 2026 18:32:51 -0500 Subject: [PATCH] parser: we weren't handling compound selects with extra parens This bit of the parser is pretty messy and could use a rethink. --- crates/squawk_parser/src/grammar.rs | 26 ++- .../data/ok/select_compound_union_select.sql | 6 + ...ests__select_compound_union_select_ok.snap | 148 ++++++++++++++++++ 3 files changed, 176 insertions(+), 4 deletions(-) diff --git a/crates/squawk_parser/src/grammar.rs b/crates/squawk_parser/src/grammar.rs index c4f7fe82..5b6c039f 100644 --- a/crates/squawk_parser/src/grammar.rs +++ b/crates/squawk_parser/src/grammar.rs @@ -118,7 +118,7 @@ fn opt_paren_select(p: &mut Parser<'_>, m: Option) -> Option, cm: CompletedMarker) -> CompletedMarker { p.error("expected start of a select statement") } } - select_trailing_clauses(p); + opt_select_trailing_clauses(p); m.complete(p, COMPOUND_SELECT) } @@ -2757,12 +2757,19 @@ fn select(p: &mut Parser, m: Option, r: &SelectRestrictions) -> Option) { +const SELECT_TRAILING_CLAUSES_FIRST: TokenSet = + TokenSet::new(&[ORDER_KW, FOR_KW, LIMIT_KW, OFFSET_KW, FETCH_KW]); + +fn opt_select_trailing_clauses(p: &mut Parser<'_>) -> bool { + if !p.at_ts(SELECT_TRAILING_CLAUSES_FIRST) { + return false; + } + opt_order_by_clause(p); let mut has_locking_clause = false; while p.at(FOR_KW) { @@ -2780,6 +2787,7 @@ fn select_trailing_clauses(p: &mut Parser<'_>) { opt_locking_clause(p); } } + true } // INTO [ TEMPORARY | TEMP | UNLOGGED ] [ TABLE ] new_table @@ -3337,6 +3345,16 @@ fn paren_data_source(p: &mut Parser<'_>) -> Option { p.bump(L_PAREN); // Then try to parse as a FROM_ITEM (which includes table references and joins) if opt_from_item(p) { + if p.at_ts(COMPOUND_SELECT_FIRST) { + let cm = m.complete(p, PAREN_SELECT); + let cm = compound_select(p, cm); + p.expect(R_PAREN); + return Some(cm.precede(p).complete(p, PAREN_SELECT)); + } + if opt_select_trailing_clauses(p) { + p.expect(R_PAREN); + return Some(m.complete(p, PAREN_SELECT)); + } p.expect(R_PAREN); return Some(m.complete(p, PAREN_EXPR)); } else { diff --git a/crates/squawk_parser/tests/data/ok/select_compound_union_select.sql b/crates/squawk_parser/tests/data/ok/select_compound_union_select.sql index d816ae85..54f6a3da 100644 --- a/crates/squawk_parser/tests/data/ok/select_compound_union_select.sql +++ b/crates/squawk_parser/tests/data/ok/select_compound_union_select.sql @@ -19,3 +19,9 @@ table t union table t; values (1), (2) union values (3), (4); +(((((select 1))) union select 1)); + +select * from (((((select 1))) union select 1)); + +select * from (((select 1) union select 2) limit 1) as t; + diff --git a/crates/squawk_parser/tests/snapshots/tests__select_compound_union_select_ok.snap b/crates/squawk_parser/tests/snapshots/tests__select_compound_union_select_ok.snap index f69c1dc8..e3c0dcee 100644 --- a/crates/squawk_parser/tests/snapshots/tests__select_compound_union_select_ok.snap +++ b/crates/squawk_parser/tests/snapshots/tests__select_compound_union_select_ok.snap @@ -440,3 +440,151 @@ SOURCE_FILE R_PAREN ")" SEMICOLON ";" WHITESPACE "\n\n" + PAREN_SELECT + L_PAREN "(" + PAREN_SELECT + L_PAREN "(" + COMPOUND_SELECT + PAREN_SELECT + L_PAREN "(" + PAREN_SELECT + L_PAREN "(" + PAREN_SELECT + L_PAREN "(" + SELECT + SELECT_CLAUSE + SELECT_KW "select" + WHITESPACE " " + TARGET_LIST + TARGET + LITERAL + INT_NUMBER "1" + R_PAREN ")" + R_PAREN ")" + R_PAREN ")" + WHITESPACE " " + UNION_KW "union" + WHITESPACE " " + SELECT + SELECT_CLAUSE + SELECT_KW "select" + WHITESPACE " " + TARGET_LIST + TARGET + LITERAL + INT_NUMBER "1" + R_PAREN ")" + R_PAREN ")" + SEMICOLON ";" + WHITESPACE "\n\n" + SELECT + SELECT_CLAUSE + SELECT_KW "select" + WHITESPACE " " + TARGET_LIST + TARGET + STAR "*" + WHITESPACE " " + FROM_CLAUSE + FROM_KW "from" + WHITESPACE " " + FROM_ITEM + PAREN_EXPR + L_PAREN "(" + FROM_ITEM + PAREN_SELECT + COMPOUND_SELECT + PAREN_SELECT + L_PAREN "(" + FROM_ITEM + PAREN_EXPR + L_PAREN "(" + FROM_ITEM + PAREN_EXPR + L_PAREN "(" + FROM_ITEM + PAREN_SELECT + L_PAREN "(" + SELECT + SELECT_CLAUSE + SELECT_KW "select" + WHITESPACE " " + TARGET_LIST + TARGET + LITERAL + INT_NUMBER "1" + R_PAREN ")" + R_PAREN ")" + R_PAREN ")" + WHITESPACE " " + UNION_KW "union" + WHITESPACE " " + SELECT + SELECT_CLAUSE + SELECT_KW "select" + WHITESPACE " " + TARGET_LIST + TARGET + LITERAL + INT_NUMBER "1" + R_PAREN ")" + R_PAREN ")" + SEMICOLON ";" + WHITESPACE "\n\n" + SELECT + SELECT_CLAUSE + SELECT_KW "select" + WHITESPACE " " + TARGET_LIST + TARGET + STAR "*" + WHITESPACE " " + FROM_CLAUSE + FROM_KW "from" + WHITESPACE " " + FROM_ITEM + PAREN_SELECT + L_PAREN "(" + FROM_ITEM + PAREN_EXPR + L_PAREN "(" + FROM_ITEM + COMPOUND_SELECT + PAREN_SELECT + L_PAREN "(" + SELECT + SELECT_CLAUSE + SELECT_KW "select" + WHITESPACE " " + TARGET_LIST + TARGET + LITERAL + INT_NUMBER "1" + R_PAREN ")" + WHITESPACE " " + UNION_KW "union" + WHITESPACE " " + SELECT + SELECT_CLAUSE + SELECT_KW "select" + WHITESPACE " " + TARGET_LIST + TARGET + LITERAL + INT_NUMBER "2" + R_PAREN ")" + WHITESPACE " " + LIMIT_CLAUSE + LIMIT_KW "limit" + WHITESPACE " " + LITERAL + INT_NUMBER "1" + R_PAREN ")" + WHITESPACE " " + ALIAS + AS_KW "as" + WHITESPACE " " + NAME + IDENT "t" + SEMICOLON ";" + WHITESPACE "\n\n"