From 18d53efe4010023446a36eb0a29965124830b16e Mon Sep 17 00:00:00 2001 From: Steve Dignam Date: Thu, 13 Nov 2025 18:30:08 -0500 Subject: [PATCH] parser: add slice expr node to syntax tree --- PLAN.md | 88 +++++++++++++++++++ .../src/generated/syntax_kind.rs | 1 + crates/squawk_parser/src/grammar.rs | 8 +- .../tests/snapshots/tests__insert_ok.snap | 4 +- .../snapshots/tests__select_operators_ok.snap | 14 +-- .../squawk_syntax/src/ast/generated/nodes.rs | 47 ++++++++++ crates/squawk_syntax/src/ast/node_ext.rs | 28 ++++++ crates/squawk_syntax/src/lib.rs | 49 +++++++++++ crates/squawk_syntax/src/postgresql.ungram | 4 + 9 files changed, 231 insertions(+), 12 deletions(-) diff --git a/PLAN.md b/PLAN.md index 15548526..0f95c8ad 100644 --- a/PLAN.md +++ b/PLAN.md @@ -439,6 +439,36 @@ SELECT customer_id, merchant_id, request_id, count(*) from t group by all; ``` +and an action to go backwards: + +```sql +SELECT customer_id, merchant_id, request_id, count(*) from t +group by all; +-- ^action:expand-group-by-all-names +``` + +becomes: + +```sql +SELECT customer_id, merchant_id, request_id, count(*) from t +group by customer_id, merchant_id, request_id; +``` + +or with column numbers: + +```sql +SELECT customer_id, merchant_id, request_id, count(*) from t +group by all; +-- ^action:expand-group-by-all-numbers +``` + +becomes: + +```sql +SELECT customer_id, merchant_id, request_id, count(*) from t +group by 1, 2, 3; +``` + ### Rule: unused column ```sql @@ -1004,6 +1034,64 @@ select foo, "a" from t; select foo, 'a' from t; ``` +### Quick Fix: Quote and Unquote columns, tables, etc. + +```sql +select "x" from "t"; +-- ^ Quick Fix: unquote +``` + +gives + +```sql +select x from "t"; +``` + +and vice versa: + +```sql +select x from "t"; +-- ^ Quick Fix: quote +``` + +gives: + +Note: we have to be mindful of casing here, + +```sql +select "x" from "t"; +``` + +Note: there's some gotchas with this that we need to handle: + +```sql +-- okay +with t("X") as (select 1) +select "X" from t; + +-- err +with t("X") as (select 1) +select X from t; + +-- err +with t("X") as (select 1) +select x from t; +``` + +or invalid column names: + +```sql +-- ok +with t("a-b") as (select 1) +select "a-b" from t; + +-- err +with t("a-b") as (select 1) +select a-b from t; +-- Query 1 ERROR at Line 2: ERROR: column "a" does not exist +-- LINE 2: select a-b from t; +``` + ### Quick Fix: in array ```sql diff --git a/crates/squawk_parser/src/generated/syntax_kind.rs b/crates/squawk_parser/src/generated/syntax_kind.rs index 012141a4..9155e38f 100644 --- a/crates/squawk_parser/src/generated/syntax_kind.rs +++ b/crates/squawk_parser/src/generated/syntax_kind.rs @@ -1002,6 +1002,7 @@ pub enum SyntaxKind { SET_WITHOUT_OIDS, SHOW, SIMILAR_TO, + SLICE_EXPR, SORT_ASC, SORT_BY, SORT_BY_LIST, diff --git a/crates/squawk_parser/src/grammar.rs b/crates/squawk_parser/src/grammar.rs index c273090a..c708a97b 100644 --- a/crates/squawk_parser/src/grammar.rs +++ b/crates/squawk_parser/src/grammar.rs @@ -2120,14 +2120,14 @@ fn index_expr(p: &mut Parser<'_>, lhs: CompletedMarker) -> CompletedMarker { if p.eat(COLON) { // foo[:] if p.eat(R_BRACK) { - return m.complete(p, INDEX_EXPR); + return m.complete(p, SLICE_EXPR); } else { // foo[:b] if expr(p).is_none() { p.error("expected an expression"); } p.expect(R_BRACK); - return m.complete(p, INDEX_EXPR); + return m.complete(p, SLICE_EXPR); } } // foo[a] @@ -2139,12 +2139,14 @@ fn index_expr(p: &mut Parser<'_>, lhs: CompletedMarker) -> CompletedMarker { if p.eat(COLON) { // foo[a:] if p.eat(R_BRACK) { - return m.complete(p, INDEX_EXPR); + return m.complete(p, SLICE_EXPR); } // foo[a:b] if expr(p).is_none() { p.error("expected an expression"); } + p.expect(R_BRACK); + return m.complete(p, SLICE_EXPR); } p.expect(R_BRACK); } diff --git a/crates/squawk_parser/tests/snapshots/tests__insert_ok.snap b/crates/squawk_parser/tests/snapshots/tests__insert_ok.snap index 1e81e124..24be80e3 100644 --- a/crates/squawk_parser/tests/snapshots/tests__insert_ok.snap +++ b/crates/squawk_parser/tests/snapshots/tests__insert_ok.snap @@ -728,8 +728,8 @@ SOURCE_FILE COMMA "," WHITESPACE " " COLUMN - INDEX_EXPR - INDEX_EXPR + SLICE_EXPR + SLICE_EXPR NAME_REF IDENT "board" L_BRACK "[" diff --git a/crates/squawk_parser/tests/snapshots/tests__select_operators_ok.snap b/crates/squawk_parser/tests/snapshots/tests__select_operators_ok.snap index 3bb5b225..4a36aae5 100644 --- a/crates/squawk_parser/tests/snapshots/tests__select_operators_ok.snap +++ b/crates/squawk_parser/tests/snapshots/tests__select_operators_ok.snap @@ -1416,8 +1416,8 @@ SOURCE_FILE WHITESPACE " " TARGET_LIST TARGET - INDEX_EXPR - INDEX_EXPR + SLICE_EXPR + SLICE_EXPR NAME_REF IDENT "b" L_BRACK "[" @@ -1444,8 +1444,8 @@ SOURCE_FILE WHITESPACE " " TARGET_LIST TARGET - INDEX_EXPR - INDEX_EXPR + SLICE_EXPR + SLICE_EXPR NAME_REF IDENT "c" L_BRACK "[" @@ -1468,8 +1468,8 @@ SOURCE_FILE WHITESPACE " " TARGET_LIST TARGET - INDEX_EXPR - INDEX_EXPR + SLICE_EXPR + SLICE_EXPR NAME_REF IDENT "schedule" L_BRACK "[" @@ -3931,7 +3931,7 @@ SOURCE_FILE WHITESPACE " " TARGET_LIST TARGET - INDEX_EXPR + SLICE_EXPR LITERAL POSITIONAL_PARAM "$1" L_BRACK "[" diff --git a/crates/squawk_syntax/src/ast/generated/nodes.rs b/crates/squawk_syntax/src/ast/generated/nodes.rs index 3ad774e2..f6cbd31b 100644 --- a/crates/squawk_syntax/src/ast/generated/nodes.rs +++ b/crates/squawk_syntax/src/ast/generated/nodes.rs @@ -9678,6 +9678,25 @@ impl SimilarTo { } } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct SliceExpr { + pub(crate) syntax: SyntaxNode, +} +impl SliceExpr { + #[inline] + pub fn l_brack_token(&self) -> Option { + support::token(&self.syntax, SyntaxKind::L_BRACK) + } + #[inline] + pub fn r_brack_token(&self) -> Option { + support::token(&self.syntax, SyntaxKind::R_BRACK) + } + #[inline] + pub fn colon_token(&self) -> Option { + support::token(&self.syntax, SyntaxKind::COLON) + } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct SortAsc { pub(crate) syntax: SyntaxNode, @@ -10846,6 +10865,7 @@ pub enum Expr { ParenExpr(ParenExpr), PostfixExpr(PostfixExpr), PrefixExpr(PrefixExpr), + SliceExpr(SliceExpr), TupleExpr(TupleExpr), } @@ -19488,6 +19508,24 @@ impl AstNode for SimilarTo { &self.syntax } } +impl AstNode for SliceExpr { + #[inline] + fn can_cast(kind: SyntaxKind) -> bool { + kind == SyntaxKind::SLICE_EXPR + } + #[inline] + fn cast(syntax: SyntaxNode) -> Option { + if Self::can_cast(syntax.kind()) { + Some(Self { syntax }) + } else { + None + } + } + #[inline] + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} impl AstNode for SortAsc { #[inline] fn can_cast(kind: SyntaxKind) -> bool { @@ -21471,6 +21509,7 @@ impl AstNode for Expr { | SyntaxKind::PAREN_EXPR | SyntaxKind::POSTFIX_EXPR | SyntaxKind::PREFIX_EXPR + | SyntaxKind::SLICE_EXPR | SyntaxKind::TUPLE_EXPR ) } @@ -21490,6 +21529,7 @@ impl AstNode for Expr { SyntaxKind::PAREN_EXPR => Expr::ParenExpr(ParenExpr { syntax }), SyntaxKind::POSTFIX_EXPR => Expr::PostfixExpr(PostfixExpr { syntax }), SyntaxKind::PREFIX_EXPR => Expr::PrefixExpr(PrefixExpr { syntax }), + SyntaxKind::SLICE_EXPR => Expr::SliceExpr(SliceExpr { syntax }), SyntaxKind::TUPLE_EXPR => Expr::TupleExpr(TupleExpr { syntax }), _ => { return None; @@ -21513,6 +21553,7 @@ impl AstNode for Expr { Expr::ParenExpr(it) => &it.syntax, Expr::PostfixExpr(it) => &it.syntax, Expr::PrefixExpr(it) => &it.syntax, + Expr::SliceExpr(it) => &it.syntax, Expr::TupleExpr(it) => &it.syntax, } } @@ -21595,6 +21636,12 @@ impl From for Expr { Expr::PrefixExpr(node) } } +impl From for Expr { + #[inline] + fn from(node: SliceExpr) -> Expr { + Expr::SliceExpr(node) + } +} impl From for Expr { #[inline] fn from(node: TupleExpr) -> Expr { diff --git a/crates/squawk_syntax/src/ast/node_ext.rs b/crates/squawk_syntax/src/ast/node_ext.rs index c508338b..ef0df48b 100644 --- a/crates/squawk_syntax/src/ast/node_ext.rs +++ b/crates/squawk_syntax/src/ast/node_ext.rs @@ -78,6 +78,34 @@ impl ast::IndexExpr { } } +impl ast::SliceExpr { + #[inline] + pub fn base(&self) -> Option { + support::children(&self.syntax).next() + } + + #[inline] + pub fn start(&self) -> Option { + // With `select x[1:]`, we have two exprs, `x` and `1`. + // We skip over the first one, and then we want the second one, but we + // want to make sure we don't choose the end expr if instead we had: + // `select x[:1]` + let colon = self.colon_token()?; + support::children(&self.syntax) + .skip(1) + .find(|expr: &ast::Expr| expr.syntax().text_range().end() <= colon.text_range().start()) + } + + #[inline] + pub fn end(&self) -> Option { + // We want to make sure we get the last expr after the `:` which is the + // end of the slice, i.e., `2` in: `select x[:2]` + let colon = self.colon_token()?; + support::children(&self.syntax) + .find(|expr: &ast::Expr| expr.syntax().text_range().start() >= colon.text_range().end()) + } +} + impl ast::BetweenExpr { #[inline] pub fn target(&self) -> Option { diff --git a/crates/squawk_syntax/src/lib.rs b/crates/squawk_syntax/src/lib.rs index a5b9c224..320807f7 100644 --- a/crates/squawk_syntax/src/lib.rs +++ b/crates/squawk_syntax/src/lib.rs @@ -509,6 +509,55 @@ fn index_expr() { assert_eq!(index.syntax().text(), "bar"); } +#[test] +fn slice_expr() { + use insta::assert_snapshot; + let source_code = " + select x[1:2], x[2:], x[:3], x[:]; + "; + let parse = SourceFile::parse(source_code); + assert!(parse.errors().is_empty()); + let file: SourceFile = parse.tree(); + let stmt = file.stmts().next().unwrap(); + let ast::Stmt::Select(select) = stmt else { + unreachable!() + }; + let select_clause = select.select_clause().unwrap(); + let mut targets = select_clause.target_list().unwrap().targets(); + + let ast::Expr::SliceExpr(slice) = targets.next().unwrap().expr().unwrap() else { + unreachable!() + }; + assert_snapshot!(slice.syntax(), @"x[1:2]"); + assert_eq!(slice.base().unwrap().syntax().text(), "x"); + assert_eq!(slice.start().unwrap().syntax().text(), "1"); + assert_eq!(slice.end().unwrap().syntax().text(), "2"); + + let ast::Expr::SliceExpr(slice) = targets.next().unwrap().expr().unwrap() else { + unreachable!() + }; + assert_snapshot!(slice.syntax(), @"x[2:]"); + assert_eq!(slice.base().unwrap().syntax().text(), "x"); + assert_eq!(slice.start().unwrap().syntax().text(), "2"); + assert!(slice.end().is_none()); + + let ast::Expr::SliceExpr(slice) = targets.next().unwrap().expr().unwrap() else { + unreachable!() + }; + assert_snapshot!(slice.syntax(), @"x[:3]"); + assert_eq!(slice.base().unwrap().syntax().text(), "x"); + assert!(slice.start().is_none()); + assert_eq!(slice.end().unwrap().syntax().text(), "3"); + + let ast::Expr::SliceExpr(slice) = targets.next().unwrap().expr().unwrap() else { + unreachable!() + }; + assert_snapshot!(slice.syntax(), @"x[:]"); + assert_eq!(slice.base().unwrap().syntax().text(), "x"); + assert!(slice.start().is_none()); + assert!(slice.end().is_none()); +} + #[test] fn field_expr() { let source_code = " diff --git a/crates/squawk_syntax/src/postgresql.ungram b/crates/squawk_syntax/src/postgresql.ungram index f466e6c9..1e01d989 100644 --- a/crates/squawk_syntax/src/postgresql.ungram +++ b/crates/squawk_syntax/src/postgresql.ungram @@ -224,6 +224,7 @@ Expr = | CaseExpr | FieldExpr | IndexExpr +| SliceExpr | BetweenExpr | ParenExpr | TupleExpr @@ -1064,6 +1065,9 @@ NonStandardParam = IndexExpr = base:Expr '[' index:Expr ']' +SliceExpr = + base:Expr '[' start:Expr? ':' end:Expr? ']' + BetweenExpr = target:Expr 'between' start:Expr 'and' end:Expr