From 7dac6d08a6298a2f23ab5a5af930cc9e7fad9c1d Mon Sep 17 00:00:00 2001 From: Steve Dignam Date: Sun, 19 Oct 2025 21:53:23 -0400 Subject: [PATCH] parser: improve error recovery in group by --- crates/squawk_parser/src/grammar.rs | 75 +++--- .../tests/data/err/create_table.sql | 8 + .../squawk_parser/tests/data/err/select.sql | 11 + .../snapshots/tests__create_table_err.snap | 97 +++++++- .../tests/snapshots/tests__select_err.snap | 228 ++++++++++++++++-- .../squawk_syntax/src/ast/generated/nodes.rs | 72 ++++++ crates/squawk_syntax/src/postgresql.ungram | 12 +- 7 files changed, 447 insertions(+), 56 deletions(-) diff --git a/crates/squawk_parser/src/grammar.rs b/crates/squawk_parser/src/grammar.rs index 35b95e69..7536cda7 100644 --- a/crates/squawk_parser/src/grammar.rs +++ b/crates/squawk_parser/src/grammar.rs @@ -4333,41 +4333,48 @@ fn group_by_list(p: &mut Parser<'_>) { // an arbitrary expression formed from input-column values. In case of // ambiguity, a GROUP BY name will be interpreted as an input-column name // rather than an output column name. + while !p.at(EOF) && !p.at(SEMICOLON) { - group_by_item(p); + if opt_group_by_item(p).is_none() { + p.error("expected group by item"); + } if !p.eat(COMMA) { break; } } } -fn group_by_item(p: &mut Parser<'_>) { +const GROUP_BY_ITEM_FIRST: TokenSet = + TokenSet::new(&[ROLLUP_KW, CUBE_KW, GROUPING_KW]).union(EXPR_FIRST); + +fn opt_group_by_item(p: &mut Parser<'_>) -> Option { + if !p.at_ts(GROUP_BY_ITEM_FIRST) { + return None; + } let m = p.start(); let kind = match p.current() { ROLLUP_KW => { p.bump_any(); - p.expect(L_PAREN); - if !expr_list(p) { - p.error("expected expression list"); - }; - p.expect(R_PAREN); + paren_expr_list(p); GROUPING_ROLLUP } CUBE_KW => { p.bump_any(); - p.expect(L_PAREN); - if !expr_list(p) { - p.error("expected expression list"); - }; - p.expect(R_PAREN); + paren_expr_list(p); GROUPING_CUBE } GROUPING_KW if p.nth_at(1, SETS_KW) => { p.bump(GROUPING_KW); p.bump(SETS_KW); - p.expect(L_PAREN); - group_by_list(p); - p.expect(R_PAREN); + delimited( + p, + L_PAREN, + R_PAREN, + COMMA, + || "unexpected comma".to_string(), + GROUP_BY_ITEM_FIRST, + |p| opt_group_by_item(p).is_some(), + ); GROUPING_SETS } _ => { @@ -4377,7 +4384,7 @@ fn group_by_item(p: &mut Parser<'_>) { GROUPING_EXPR } }; - m.complete(p, kind); + Some(m.complete(p, kind)) } /// @@ -4575,11 +4582,7 @@ fn opt_all_or_distinct(p: &mut Parser) { let m = p.start(); if p.eat(DISTINCT_KW) { if p.eat(ON_KW) { - p.expect(L_PAREN); - if !expr_list(p) { - p.error("expected expression in paren_expr_list"); - } - p.expect(R_PAREN); + paren_expr_list(p); } m.complete(p, DISTINCT_CLAUSE); } else { @@ -4587,6 +4590,18 @@ fn opt_all_or_distinct(p: &mut Parser) { } } +fn paren_expr_list(p: &mut Parser<'_>) { + delimited( + p, + L_PAREN, + R_PAREN, + COMMA, + || "unexpected comma".to_string(), + EXPR_FIRST, + |p| opt_expr(p).is_some(), + ); +} + /// All keywords const COL_LABEL_FIRST: TokenSet = TokenSet::new(&[IDENT]) .union(UNRESERVED_KEYWORDS) @@ -4908,25 +4923,13 @@ fn partition_option(p: &mut Parser<'_>) { PARTITION_FOR_VALUES_WITH // FOR VALUES IN '(' expr_list ')' } else if p.eat(IN_KW) { - p.expect(L_PAREN); - if !expr_list(p) { - p.error("expected expr list"); - } - p.expect(R_PAREN); + paren_expr_list(p); PARTITION_FOR_VALUES_IN // FOR VALUES FROM '(' expr_list ')' TO '(' expr_list ')' } else if p.eat(FROM_KW) { - p.expect(L_PAREN); - if !expr_list(p) { - p.error("expected expr list"); - } - p.expect(R_PAREN); + paren_expr_list(p); p.expect(TO_KW); - p.expect(L_PAREN); - if !expr_list(p) { - p.error("expected expr list"); - } - p.expect(R_PAREN); + paren_expr_list(p); PARTITION_FOR_VALUES_FROM } else { p.error("expected partition option"); diff --git a/crates/squawk_parser/tests/data/err/create_table.sql b/crates/squawk_parser/tests/data/err/create_table.sql index 58862f39..07546bbb 100644 --- a/crates/squawk_parser/tests/data/err/create_table.sql +++ b/crates/squawk_parser/tests/data/err/create_table.sql @@ -46,6 +46,14 @@ create unlogged table t ( ) ); +create table t partition of u + -- missing some commas + for values from ('2024-01-01' 1) to ('2024-04-01' 5); + +create table t partition of u + -- missing some commas + for values in (1 2 3); + -- exclude missing a comma create table t ( a int, diff --git a/crates/squawk_parser/tests/data/err/select.sql b/crates/squawk_parser/tests/data/err/select.sql index 048fb8d3..2f5e1f86 100644 --- a/crates/squawk_parser/tests/data/err/select.sql +++ b/crates/squawk_parser/tests/data/err/select.sql @@ -13,6 +13,17 @@ select a, b c d, e from t; -- ^ ^ comma missing -- \-- this is a label +-- distinct on missing a comma +SELECT DISTINCT ON (a b) a, b, c + FROM t + order by a, b desc; + +-- group bys with missing commas +select * from t group by rollup (1 2 3); +select * from t group by cube (1 2 3); +select * from u + group by grouping sets((1 2) grouping sets((), grouping sets(()))); + -- trailing comma in args select f(1,); diff --git a/crates/squawk_parser/tests/snapshots/tests__create_table_err.snap b/crates/squawk_parser/tests/snapshots/tests__create_table_err.snap index 14784698..109e6a2a 100644 --- a/crates/squawk_parser/tests/snapshots/tests__create_table_err.snap +++ b/crates/squawk_parser/tests/snapshots/tests__create_table_err.snap @@ -422,6 +422,95 @@ SOURCE_FILE R_PAREN ")" SEMICOLON ";" WHITESPACE "\n\n" + CREATE_TABLE + CREATE_KW "create" + WHITESPACE " " + TABLE_KW "table" + WHITESPACE " " + PATH + PATH_SEGMENT + NAME + IDENT "t" + WHITESPACE " " + PARTITION_OF + PARTITION_KW "partition" + WHITESPACE " " + OF_KW "of" + WHITESPACE " " + PATH + PATH_SEGMENT + NAME_REF + IDENT "u" + WHITESPACE "\n " + COMMENT "-- missing some commas" + WHITESPACE "\n " + PARTITION_FOR_VALUES_FROM + FOR_KW "for" + WHITESPACE " " + VALUES_KW "values" + WHITESPACE " " + FROM_KW "from" + WHITESPACE " " + L_PAREN "(" + LITERAL + STRING "'2024-01-01'" + WHITESPACE " " + LITERAL + INT_NUMBER "1" + R_PAREN ")" + WHITESPACE " " + TO_KW "to" + WHITESPACE " " + L_PAREN "(" + LITERAL + STRING "'2024-04-01'" + WHITESPACE " " + LITERAL + INT_NUMBER "5" + R_PAREN ")" + SEMICOLON ";" + WHITESPACE "\n\n" + CREATE_TABLE + CREATE_KW "create" + WHITESPACE " " + TABLE_KW "table" + WHITESPACE " " + PATH + PATH_SEGMENT + NAME + IDENT "t" + WHITESPACE " " + PARTITION_OF + PARTITION_KW "partition" + WHITESPACE " " + OF_KW "of" + WHITESPACE " " + PATH + PATH_SEGMENT + NAME_REF + IDENT "u" + WHITESPACE "\n " + COMMENT "-- missing some commas" + WHITESPACE "\n " + PARTITION_FOR_VALUES_IN + FOR_KW "for" + WHITESPACE " " + VALUES_KW "values" + WHITESPACE " " + IN_KW "in" + WHITESPACE " " + L_PAREN "(" + LITERAL + INT_NUMBER "1" + WHITESPACE " " + LITERAL + INT_NUMBER "2" + WHITESPACE " " + LITERAL + INT_NUMBER "3" + R_PAREN ")" + SEMICOLON ";" + WHITESPACE "\n\n" CREATE_TABLE COMMENT "-- exclude missing a comma" WHITESPACE "\n" @@ -571,5 +660,9 @@ ERROR@198: unexpected comma ERROR@199: unexpected comma ERROR@200: unexpected comma ERROR@201: unexpected comma -ERROR@1021: expected COMMA -ERROR@1063: expected SEMICOLON +ERROR@1011: expected COMMA +ERROR@1032: expected COMMA +ERROR@1116: expected COMMA +ERROR@1119: expected COMMA +ERROR@1226: expected COMMA +ERROR@1268: expected SEMICOLON diff --git a/crates/squawk_parser/tests/snapshots/tests__select_err.snap b/crates/squawk_parser/tests/snapshots/tests__select_err.snap index 478bc1e7..a6b24a5c 100644 --- a/crates/squawk_parser/tests/snapshots/tests__select_err.snap +++ b/crates/squawk_parser/tests/snapshots/tests__select_err.snap @@ -133,6 +133,197 @@ SOURCE_FILE WHITESPACE "\n" COMMENT "-- \\-- this is a label" WHITESPACE "\n\n" + COMMENT "-- distinct on missing a comma" + WHITESPACE "\n" + SELECT + SELECT_CLAUSE + SELECT_KW "SELECT" + WHITESPACE " " + DISTINCT_CLAUSE + DISTINCT_KW "DISTINCT" + WHITESPACE " " + ON_KW "ON" + WHITESPACE " " + L_PAREN "(" + NAME_REF + IDENT "a" + WHITESPACE " " + NAME_REF + IDENT "b" + R_PAREN ")" + WHITESPACE " " + TARGET_LIST + TARGET + NAME_REF + IDENT "a" + COMMA "," + WHITESPACE " " + TARGET + NAME_REF + IDENT "b" + COMMA "," + WHITESPACE " " + TARGET + NAME_REF + IDENT "c" + WHITESPACE "\n " + FROM_CLAUSE + FROM_KW "FROM" + WHITESPACE " " + FROM_ITEM + NAME_REF + IDENT "t" + WHITESPACE "\n " + ORDER_BY_CLAUSE + ORDER_KW "order" + WHITESPACE " " + BY_KW "by" + WHITESPACE " " + SORT_BY + NAME_REF + IDENT "a" + COMMA "," + WHITESPACE " " + SORT_BY + NAME_REF + IDENT "b" + WHITESPACE " " + SORT_DESC + DESC_KW "desc" + SEMICOLON ";" + WHITESPACE " \n\n" + COMMENT "-- group bys with missing commas" + WHITESPACE "\n" + SELECT + SELECT_CLAUSE + SELECT_KW "select" + WHITESPACE " " + TARGET_LIST + TARGET + STAR "*" + WHITESPACE " " + FROM_CLAUSE + FROM_KW "from" + WHITESPACE " " + FROM_ITEM + NAME_REF + IDENT "t" + WHITESPACE " " + GROUP_BY_CLAUSE + GROUP_KW "group" + WHITESPACE " " + BY_KW "by" + WHITESPACE " " + GROUPING_ROLLUP + ROLLUP_KW "rollup" + WHITESPACE " " + L_PAREN "(" + LITERAL + INT_NUMBER "1" + WHITESPACE " " + LITERAL + INT_NUMBER "2" + WHITESPACE " " + LITERAL + INT_NUMBER "3" + R_PAREN ")" + SEMICOLON ";" + WHITESPACE "\n" + SELECT + SELECT_CLAUSE + SELECT_KW "select" + WHITESPACE " " + TARGET_LIST + TARGET + STAR "*" + WHITESPACE " " + FROM_CLAUSE + FROM_KW "from" + WHITESPACE " " + FROM_ITEM + NAME_REF + IDENT "t" + WHITESPACE " " + GROUP_BY_CLAUSE + GROUP_KW "group" + WHITESPACE " " + BY_KW "by" + WHITESPACE " " + GROUPING_CUBE + CUBE_KW "cube" + WHITESPACE " " + L_PAREN "(" + LITERAL + INT_NUMBER "1" + WHITESPACE " " + LITERAL + INT_NUMBER "2" + WHITESPACE " " + LITERAL + INT_NUMBER "3" + R_PAREN ")" + SEMICOLON ";" + WHITESPACE "\n" + SELECT + SELECT_CLAUSE + SELECT_KW "select" + WHITESPACE " " + TARGET_LIST + TARGET + STAR "*" + WHITESPACE " " + FROM_CLAUSE + FROM_KW "from" + WHITESPACE " " + FROM_ITEM + NAME_REF + IDENT "u" + WHITESPACE "\n " + GROUP_BY_CLAUSE + GROUP_KW "group" + WHITESPACE " " + BY_KW "by" + WHITESPACE " " + GROUPING_SETS + GROUPING_KW "grouping" + WHITESPACE " " + SETS_KW "sets" + L_PAREN "(" + GROUPING_EXPR + TUPLE_EXPR + L_PAREN "(" + LITERAL + INT_NUMBER "1" + WHITESPACE " " + LITERAL + INT_NUMBER "2" + R_PAREN ")" + WHITESPACE " " + GROUPING_SETS + GROUPING_KW "grouping" + WHITESPACE " " + SETS_KW "sets" + L_PAREN "(" + GROUPING_EXPR + TUPLE_EXPR + L_PAREN "(" + R_PAREN ")" + COMMA "," + WHITESPACE " " + GROUPING_SETS + GROUPING_KW "grouping" + WHITESPACE " " + SETS_KW "sets" + L_PAREN "(" + GROUPING_EXPR + TUPLE_EXPR + L_PAREN "(" + R_PAREN ")" + R_PAREN ")" + R_PAREN ")" + R_PAREN ")" + SEMICOLON ";" + WHITESPACE "\n\n" COMMENT "-- trailing comma in args" WHITESPACE "\n" SELECT @@ -433,18 +624,25 @@ ERROR@124: unexpected trailing comma ERROR@153: unexpected trailing comma ERROR@213: unexpected trailing comma ERROR@248: missing comma -ERROR@362: unexpected trailing comma -ERROR@394: expected expression -ERROR@395: expected expression -ERROR@396: expected expression -ERROR@397: expected expression -ERROR@520: missing comma -ERROR@559: expected COMMA -ERROR@597: unexpected comma -ERROR@638: unexpected trailing comma -ERROR@708: expected COMMA -ERROR@746: unexpected comma -ERROR@778: unexpected trailing comma -ERROR@902: expected SEMICOLON -ERROR@935: unexpected trailing comma -ERROR@979: unexpected trailing comma +ERROR@378: expected COMMA +ERROR@494: expected COMMA +ERROR@496: expected COMMA +ERROR@533: expected COMMA +ERROR@535: expected COMMA +ERROR@583: expected COMMA +ERROR@586: expected COMMA +ERROR@663: unexpected trailing comma +ERROR@695: expected expression +ERROR@696: expected expression +ERROR@697: expected expression +ERROR@698: expected expression +ERROR@821: missing comma +ERROR@860: expected COMMA +ERROR@898: unexpected comma +ERROR@939: unexpected trailing comma +ERROR@1009: expected COMMA +ERROR@1047: unexpected comma +ERROR@1079: unexpected trailing comma +ERROR@1203: expected SEMICOLON +ERROR@1236: unexpected trailing comma +ERROR@1280: unexpected trailing comma diff --git a/crates/squawk_syntax/src/ast/generated/nodes.rs b/crates/squawk_syntax/src/ast/generated/nodes.rs index be44131f..cb71fd18 100644 --- a/crates/squawk_syntax/src/ast/generated/nodes.rs +++ b/crates/squawk_syntax/src/ast/generated/nodes.rs @@ -3309,10 +3309,26 @@ pub struct DistinctClause { pub(crate) syntax: SyntaxNode, } impl DistinctClause { + #[inline] + pub fn exprs(&self) -> AstChildren { + support::children(&self.syntax) + } + #[inline] + pub fn l_paren_token(&self) -> Option { + support::token(&self.syntax, SyntaxKind::L_PAREN) + } + #[inline] + pub fn r_paren_token(&self) -> Option { + support::token(&self.syntax, SyntaxKind::R_PAREN) + } #[inline] pub fn distinct_token(&self) -> Option { support::token(&self.syntax, SyntaxKind::DISTINCT_KW) } + #[inline] + pub fn on_token(&self) -> Option { + support::token(&self.syntax, SyntaxKind::ON_KW) + } } #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -4762,10 +4778,42 @@ pub struct FetchClause { pub(crate) syntax: SyntaxNode, } impl FetchClause { + #[inline] + pub fn expr(&self) -> Option { + support::child(&self.syntax) + } #[inline] pub fn fetch_token(&self) -> Option { support::token(&self.syntax, SyntaxKind::FETCH_KW) } + #[inline] + pub fn first_token(&self) -> Option { + support::token(&self.syntax, SyntaxKind::FIRST_KW) + } + #[inline] + pub fn next_token(&self) -> Option { + support::token(&self.syntax, SyntaxKind::NEXT_KW) + } + #[inline] + pub fn only_token(&self) -> Option { + support::token(&self.syntax, SyntaxKind::ONLY_KW) + } + #[inline] + pub fn row_token(&self) -> Option { + support::token(&self.syntax, SyntaxKind::ROW_KW) + } + #[inline] + pub fn rows_token(&self) -> Option { + support::token(&self.syntax, SyntaxKind::ROWS_KW) + } + #[inline] + pub fn ties_token(&self) -> Option { + support::token(&self.syntax, SyntaxKind::TIES_KW) + } + #[inline] + pub fn with_token(&self) -> Option { + support::token(&self.syntax, SyntaxKind::WITH_KW) + } } #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -5177,6 +5225,10 @@ pub struct HavingClause { pub(crate) syntax: SyntaxNode, } impl HavingClause { + #[inline] + pub fn expr(&self) -> Option { + support::child(&self.syntax) + } #[inline] pub fn having_token(&self) -> Option { support::token(&self.syntax, SyntaxKind::HAVING_KW) @@ -6432,6 +6484,14 @@ pub struct LimitClause { pub(crate) syntax: SyntaxNode, } impl LimitClause { + #[inline] + pub fn expr(&self) -> Option { + support::child(&self.syntax) + } + #[inline] + pub fn all_token(&self) -> Option { + support::token(&self.syntax, SyntaxKind::ALL_KW) + } #[inline] pub fn limit_token(&self) -> Option { support::token(&self.syntax, SyntaxKind::LIMIT_KW) @@ -7216,10 +7276,22 @@ pub struct OffsetClause { pub(crate) syntax: SyntaxNode, } impl OffsetClause { + #[inline] + pub fn expr(&self) -> Option { + support::child(&self.syntax) + } #[inline] pub fn offset_token(&self) -> Option { support::token(&self.syntax, SyntaxKind::OFFSET_KW) } + #[inline] + pub fn row_token(&self) -> Option { + support::token(&self.syntax, SyntaxKind::ROW_KW) + } + #[inline] + pub fn rows_token(&self) -> Option { + support::token(&self.syntax, SyntaxKind::ROWS_KW) + } } #[derive(Debug, Clone, PartialEq, Eq, Hash)] diff --git a/crates/squawk_syntax/src/postgresql.ungram b/crates/squawk_syntax/src/postgresql.ungram index 9bc8d324..52bb31bb 100644 --- a/crates/squawk_syntax/src/postgresql.ungram +++ b/crates/squawk_syntax/src/postgresql.ungram @@ -553,7 +553,7 @@ GroupingExpr = Expr HavingClause = - 'having' + 'having' Expr WindowDef = Name 'as' '(' WindowSpec ')' @@ -562,16 +562,22 @@ WindowClause = 'window' (WindowDef (',' WindowDef)*) LimitClause = - 'limit' + 'limit' ('all' | Expr) FetchClause = 'fetch' + ('first' | 'next') + Expr? + ('row' | 'rows') + ('only' | 'with' 'ties') OffsetClause = 'offset' + Expr + ('row' | 'rows')? DistinctClause = - 'distinct' + 'distinct' 'on' '(' (Expr (',' Expr)*) ')' Target = '*'