From 76bb987a2c708f1c3783011d72465f10bd3fe628 Mon Sep 17 00:00:00 2001 From: Steve Dignam Date: Sun, 15 Jun 2025 20:57:17 -0400 Subject: [PATCH] parser: fix paren select issues we weren't parsing trailing clauses for paren select and had some dupe paren select logic hanging around. --- crates/squawk_parser/src/grammar.rs | 57 +++++++++---------- .../snapshots/tests__regression_inherit.snap | 35 +----------- .../snapshots/tests__regression_join.snap | 21 +------ .../tests__regression_privileges.snap | 15 +---- .../tests__regression_suite_errors.snap | 4 -- .../snapshots/tests__regression_union.snap | 16 +----- ...ests__select_compound_union_select_ok.snap | 2 +- 7 files changed, 33 insertions(+), 117 deletions(-) diff --git a/crates/squawk_parser/src/grammar.rs b/crates/squawk_parser/src/grammar.rs index 4108790d..a4ef5664 100644 --- a/crates/squawk_parser/src/grammar.rs +++ b/crates/squawk_parser/src/grammar.rs @@ -68,7 +68,7 @@ fn array_expr(p: &mut Parser<'_>, m: Option) -> CompletedMarker { m.complete(p, ARRAY_EXPR) } -fn paren_select(p: &mut Parser<'_>) -> Option { +fn opt_paren_select(p: &mut Parser<'_>) -> Option { let m = p.start(); if !p.eat(L_PAREN) { m.abandon(p); @@ -81,7 +81,7 @@ fn paren_select(p: &mut Parser<'_>) -> Option { if p.at_ts(SELECT_FIRST) && (select(p, None).is_none() || p.at(EOF) || p.at(R_PAREN)) { break; } - if paren_select(p).is_none() { + if opt_paren_select(p).is_none() { break; } if !p.at(R_PAREN) { @@ -89,7 +89,14 @@ fn paren_select(p: &mut Parser<'_>) -> Option { } } p.expect(R_PAREN); - Some(m.complete(p, PAREN_SELECT)) + let cm = m.complete(p, PAREN_SELECT); + let cm = if p.at_ts(COMPOUND_SELECT_FIRST) { + compound_select(p, cm) + } else { + cm + }; + select_trailing_clauses(p); + Some(cm) } const SELECT_FIRST: TokenSet = TokenSet::new(&[SELECT_KW, TABLE_KW, WITH_KW, VALUES_KW]); @@ -2436,7 +2443,7 @@ fn compound_select(p: &mut Parser<'_>, cm: CompletedMarker) -> CompletedMarker { p.eat(DISTINCT_KW); } if p.at(L_PAREN) { - tuple_expr(p); + opt_paren_select(p); } else { if p.at_ts(SELECT_FIRST) { select(p, None); @@ -2488,6 +2495,11 @@ fn select(p: &mut Parser, m: Option) -> Option { let cm = m.complete(p, SELECT); return Some(compound_select(p, cm)); } + select_trailing_clauses(p); + Some(m.complete(p, out_kind)) +} + +fn select_trailing_clauses(p: &mut Parser<'_>) { opt_order_by_clause(p); let mut has_locking_clause = false; while p.at(FOR_KW) { @@ -2505,7 +2517,6 @@ fn select(p: &mut Parser, m: Option) -> Option { opt_locking_clause(p); } } - Some(m.complete(p, out_kind)) } // INTO [ TEMPORARY | TEMP | UNLOGGED ] [ TABLE ] new_table @@ -2778,11 +2789,14 @@ const FUNC_EXPR_COMMON_SUBEXPR_FIRST: TokenSet = TokenSet::new(&[ GREATEST_KW, JSON_KW, JSON_ARRAY_KW, + JSON_ARRAYAGG_KW, JSON_EXISTS_KW, JSON_OBJECT_KW, + JSON_OBJECTAGG_KW, JSON_QUERY_KW, JSON_SCALAR_KW, JSON_SERIALIZE_KW, + JSON_TABLE_KW, JSON_VALUE_KW, LEAST_KW, LOCALTIME_KW, @@ -2806,6 +2820,7 @@ const FUNC_EXPR_COMMON_SUBEXPR_FIRST: TokenSet = TokenSet::new(&[ XMLPI_KW, XMLROOT_KW, XMLSERIALIZE_KW, + XMLTABLE_KW, ]); const FROM_ITEM_KEYWORDS_FIRST: TokenSet = TokenSet::new(&[]) @@ -2993,29 +3008,23 @@ fn xml_namespace_element(p: &mut Parser<'_>) { } } -fn paren_data_source(p: &mut Parser<'_>) -> CompletedMarker { +fn paren_data_source(p: &mut Parser<'_>) -> Option { assert!(p.at(L_PAREN)); + if p.at(L_PAREN) && p.nth_at_ts(1, SELECT_FIRST) { + return opt_paren_select(p); + } let m = p.start(); p.bump(L_PAREN); - - // Try to parse as a SELECT statement first - if p.at_ts(SELECT_FIRST) { - if select(p, None).is_some() { - p.expect(R_PAREN); - return m.complete(p, PAREN_SELECT); - } - } - // Then try to parse as a FROM_ITEM (which includes table references and joins) if opt_from_item(p) { p.expect(R_PAREN); - return m.complete(p, PAREN_EXPR); + return Some(m.complete(p, PAREN_EXPR)); } else { p.error("expected table name or SELECT"); } p.expect(R_PAREN); - m.complete(p, PAREN_EXPR) + Some(m.complete(p, PAREN_EXPR)) } // USING data_source ON join_condition @@ -5289,17 +5298,7 @@ fn stmt(p: &mut Parser, r: &StmtRestrictions) -> Option { (INSERT_KW, _) => Some(insert(p, None)), (L_PAREN, _) if p.nth_at_ts(1, SELECT_FIRST) || p.at(L_PAREN) => { // can have select nested in parens, i.e., ((select 1)); - let cm = paren_select(p)?; - let cm = if p.at_ts(COMPOUND_SELECT_FIRST) { - compound_select(p, cm) - } else { - cm - }; - // TODO: this needs to be rethinked - if p.at(ORDER_KW) { - opt_order_by_clause(p); - } - Some(cm) + opt_paren_select(p) } (LISTEN_KW, _) => Some(listen(p)), (LOAD_KW, _) => Some(load(p)), @@ -11987,7 +11986,7 @@ fn create_schema(p: &mut Parser<'_>) -> CompletedMarker { fn query(p: &mut Parser<'_>) { // TODO: this needs to be more general - if (!p.at_ts(SELECT_FIRST) || select(p, None).is_none()) && paren_select(p).is_none() { + if (!p.at_ts(SELECT_FIRST) || select(p, None).is_none()) && opt_paren_select(p).is_none() { p.error("expected select stmt") } } diff --git a/crates/squawk_parser/tests/snapshots/tests__regression_inherit.snap b/crates/squawk_parser/tests/snapshots/tests__regression_inherit.snap index 1149b92b..f10483d6 100644 --- a/crates/squawk_parser/tests/snapshots/tests__regression_inherit.snap +++ b/crates/squawk_parser/tests/snapshots/tests__regression_inherit.snap @@ -2,37 +2,4 @@ source: crates/squawk_parser/tests/tests.rs input_file: crates/squawk_parser/tests/data/regression_suite/inherit.sql --- -ERROR@30331: expected R_PAREN -ERROR@30426: missing comma -ERROR@30455: expected SEMICOLON -ERROR@30455: expected command, found R_PAREN -ERROR@30457: expected command, found FROM_KW -ERROR@30462: expected command, found IDENT -ERROR@30478: expected R_PAREN -ERROR@30478: expected SEMICOLON -ERROR@30478: expected command, found INT_NUMBER -ERROR@30479: expected command, found COMMA -ERROR@30481: expected command, found INT_NUMBER -ERROR@30482: expected command, found R_PAREN -ERROR@30484: expected command, found IDENT -ERROR@30486: expected R_PAREN -ERROR@30486: expected SEMICOLON -ERROR@30486: expected command, found IDENT -ERROR@30487: expected command, found R_PAREN -ERROR@30594: expected R_PAREN -ERROR@30689: missing comma -ERROR@30718: expected SEMICOLON -ERROR@30718: expected command, found R_PAREN -ERROR@30720: expected command, found FROM_KW -ERROR@30725: expected command, found IDENT -ERROR@30741: expected R_PAREN -ERROR@30741: expected SEMICOLON -ERROR@30741: expected command, found INT_NUMBER -ERROR@30742: expected command, found COMMA -ERROR@30744: expected command, found INT_NUMBER -ERROR@30745: expected command, found R_PAREN -ERROR@30747: expected command, found IDENT -ERROR@30749: expected R_PAREN -ERROR@30749: expected SEMICOLON -ERROR@30749: expected command, found IDENT -ERROR@30750: expected command, found R_PAREN + diff --git a/crates/squawk_parser/tests/snapshots/tests__regression_join.snap b/crates/squawk_parser/tests/snapshots/tests__regression_join.snap index b8cc19d1..f9639486 100644 --- a/crates/squawk_parser/tests/snapshots/tests__regression_join.snap +++ b/crates/squawk_parser/tests/snapshots/tests__regression_join.snap @@ -2,23 +2,4 @@ source: crates/squawk_parser/tests/tests.rs input_file: crates/squawk_parser/tests/data/regression_suite/join.sql --- -ERROR@93160: expected R_PAREN -ERROR@93186: expected SEMICOLON -ERROR@93186: expected command, found R_PAREN -ERROR@93188: expected command, found AS_KW -ERROR@93191: expected command, found IDENT -ERROR@93196: expected command, found CROSS_KW -ERROR@93202: expected command, found JOIN_KW -ERROR@93207: expected command, found LATERAL_KW -ERROR@93255: expected R_PAREN -ERROR@93282: expected R_PAREN -ERROR@93282: expected SEMICOLON -ERROR@93283: expected command, found AS_KW -ERROR@93286: expected command, found IDENT -ERROR@93288: expected command, found R_PAREN -ERROR@93293: expected command, found UNION_KW -ERROR@93299: expected command, found ALL_KW -ERROR@93319: expected SEMICOLON -ERROR@93322: expected command, found R_PAREN -ERROR@93324: expected command, found AS_KW -ERROR@93327: expected command, found IDENT + diff --git a/crates/squawk_parser/tests/snapshots/tests__regression_privileges.snap b/crates/squawk_parser/tests/snapshots/tests__regression_privileges.snap index e5d35748..16080ec0 100644 --- a/crates/squawk_parser/tests/snapshots/tests__regression_privileges.snap +++ b/crates/squawk_parser/tests/snapshots/tests__regression_privileges.snap @@ -2,17 +2,4 @@ source: crates/squawk_parser/tests/tests.rs input_file: crates/squawk_parser/tests/data/regression_suite/privileges.sql --- -ERROR@17722: expected R_PAREN -ERROR@17782: expected SEMICOLON -ERROR@17782: expected command, found R_PAREN -ERROR@17784: expected command, found IDENT -ERROR@17787: expected command, found WHERE_KW -ERROR@17793: expected command, found FALSE_KW -ERROR@17906: expected R_PAREN -ERROR@17980: expected SEMICOLON -ERROR@17980: expected command, found R_PAREN -ERROR@17982: expected command, found IDENT -ERROR@17985: expected command, found WHERE_KW -ERROR@17991: expected command, found IDENT -ERROR@17993: expected command, found L_ANGLE -ERROR@17995: expected command, found INT_NUMBER + diff --git a/crates/squawk_parser/tests/snapshots/tests__regression_suite_errors.snap b/crates/squawk_parser/tests/snapshots/tests__regression_suite_errors.snap index f8134089..e045d6bb 100644 --- a/crates/squawk_parser/tests/snapshots/tests__regression_suite_errors.snap +++ b/crates/squawk_parser/tests/snapshots/tests__regression_suite_errors.snap @@ -2,10 +2,6 @@ source: crates/squawk_parser/tests/tests.rs expression: "out.join(\"\\n\")" --- -tests/snapshots/tests__regression_inherit.snap:34 -tests/snapshots/tests__regression_join.snap:20 tests/snapshots/tests__regression_merge.snap:61 -tests/snapshots/tests__regression_privileges.snap:14 tests/snapshots/tests__regression_strings.snap:49 -tests/snapshots/tests__regression_union.snap:15 tests/snapshots/tests__regression_xml.snap:382 diff --git a/crates/squawk_parser/tests/snapshots/tests__regression_union.snap b/crates/squawk_parser/tests/snapshots/tests__regression_union.snap index 25a07864..80db14e1 100644 --- a/crates/squawk_parser/tests/snapshots/tests__regression_union.snap +++ b/crates/squawk_parser/tests/snapshots/tests__regression_union.snap @@ -2,18 +2,4 @@ source: crates/squawk_parser/tests/tests.rs input_file: crates/squawk_parser/tests/data/regression_suite/union.sql --- -ERROR@996: expected SEMICOLON -ERROR@997: expected command, found ORDER_KW -ERROR@1003: expected command, found BY_KW -ERROR@1006: expected command, found INT_NUMBER -ERROR@10339: expected SEMICOLON -ERROR@10340: expected command, found ORDER_KW -ERROR@10346: expected command, found BY_KW -ERROR@10349: expected command, found INT_NUMBER -ERROR@11233: expected SEMICOLON -ERROR@11234: expected command, found ORDER_KW -ERROR@11240: expected command, found BY_KW -ERROR@11243: expected command, found INT_NUMBER -ERROR@18725: expected SEMICOLON -ERROR@18726: expected command, found LIMIT_KW -ERROR@18732: expected command, found INT_NUMBER + diff --git a/crates/squawk_parser/tests/snapshots/tests__select_compound_union_select_ok.snap b/crates/squawk_parser/tests/snapshots/tests__select_compound_union_select_ok.snap index 6f793a89..dcc16922 100644 --- a/crates/squawk_parser/tests/snapshots/tests__select_compound_union_select_ok.snap +++ b/crates/squawk_parser/tests/snapshots/tests__select_compound_union_select_ok.snap @@ -77,7 +77,7 @@ SOURCE_FILE WHITESPACE "\n " UNION_KW "UNION" WHITESPACE "\n " - PAREN_EXPR + PAREN_SELECT L_PAREN "(" SELECT SELECT_CLAUSE