From de236d27ec33612438bfbfc259d07936fa5250ad Mon Sep 17 00:00:00 2001 From: Steve Dignam Date: Mon, 16 Jun 2025 22:59:37 -0400 Subject: [PATCH] parser: fix last of pg regression suite, strings --- .../src/generated/syntax_kind.rs | 1 + crates/squawk_parser/src/grammar.rs | 31 ++- crates/squawk_parser/src/lib.rs | 14 ++ .../tests/data/ok/select_operators.sql | 14 +- .../tests/data/regression_suite/strings.sql | 2 +- .../snapshots/tests__regression_strings.snap | 50 +---- .../tests__regression_suite_errors.snap | 2 +- .../snapshots/tests__select_funcs_ok.snap | 15 +- .../snapshots/tests__select_operators_ok.snap | 208 +++++++++++++++++- .../squawk_syntax/src/ast/generated/nodes.rs | 41 ++++ crates/squawk_syntax/src/postgresql.ungram | 5 +- 11 files changed, 315 insertions(+), 68 deletions(-) diff --git a/crates/squawk_parser/src/generated/syntax_kind.rs b/crates/squawk_parser/src/generated/syntax_kind.rs index 077ea734..fcc212fb 100644 --- a/crates/squawk_parser/src/generated/syntax_kind.rs +++ b/crates/squawk_parser/src/generated/syntax_kind.rs @@ -848,6 +848,7 @@ pub enum SyntaxKind { NOT_LIKE, NOT_NULL_CONSTRAINT, NOT_OF, + NOT_SIMILAR_TO, NOT_VALID, NO_ACTION, NO_FORCE_RLS, diff --git a/crates/squawk_parser/src/grammar.rs b/crates/squawk_parser/src/grammar.rs index 59193a7d..aac210a3 100644 --- a/crates/squawk_parser/src/grammar.rs +++ b/crates/squawk_parser/src/grammar.rs @@ -27,10 +27,15 @@ fn literal(p: &mut Parser<'_>) -> Option { return None; } let m = p.start(); + if p.eat(BYTE_STRING) { + if p.eat(UESCAPE_KW) { + p.eat(STRING); + } + } // E021-03 string continuation syntax // If two string literals are next to each other, and don't have a comment // between them, then they are automatically combined. - if p.eat(STRING) { + else if p.eat(STRING) { while !p.at(EOF) && p.eat(STRING) {} } else { p.bump_any(); @@ -362,10 +367,6 @@ fn substring_fn(p: &mut Parser<'_>) -> CompletedMarker { if expr(p).is_none() { p.error("expected an expression"); } - p.expect(ESCAPE_KW); - if expr(p).is_none() { - p.error("expected an expression"); - } } _ if p.eat(COMMA) => { // normal function call @@ -1462,7 +1463,13 @@ fn opt_name(p: &mut Parser<'_>) -> Option { return None; } let m = p.start(); - p.bump_any(); + if p.eat(IDENT) { + if p.eat(UESCAPE_KW) { + p.expect(STRING); + } + } else { + p.bump_any(); + } Some(m.complete(p, NAME)) } @@ -2156,10 +2163,14 @@ fn current_op(p: &Parser<'_>, r: &Restrictions) -> (u8, SyntaxKind, Associativit PLUS if p.next_not_joined_op(0) => (8, PLUS, Left), // symbol // overlaps OVERLAPS_KW => (7, OVERLAPS_KW, Left), + // escape + ESCAPE_KW => (7, ESCAPE_KW, Left), // like LIKE_KW => (6, LIKE_KW, Left), // ilike ILIKE_KW => (6, ILIKE_KW, Left), + // not similar to + NOT_KW if !r.not_disabled && p.at(NOT_SIMILAR_TO) => (6, NOT_SIMILAR_TO, Left), // not like NOT_KW if !r.not_disabled && p.at(NOT_LIKE) => (6, NOT_LIKE, Left), // not ilike @@ -13727,7 +13738,13 @@ fn alter_table_action(p: &mut Parser<'_>) -> Option { fn opt_col_label(p: &mut Parser<'_>) -> bool { if p.at_ts(COL_LABEL_FIRST) { let m = p.start(); - p.bump_any(); + if p.eat(IDENT) { + if p.eat(UESCAPE_KW) { + p.expect(STRING); + } + } else { + p.bump_any(); + } m.complete(p, NAME); true } else { diff --git a/crates/squawk_parser/src/lib.rs b/crates/squawk_parser/src/lib.rs index 26049995..06ca8cef 100644 --- a/crates/squawk_parser/src/lib.rs +++ b/crates/squawk_parser/src/lib.rs @@ -343,6 +343,14 @@ impl<'t> Parser<'t> { m.complete(self, SyntaxKind::IS_JSON_SCALAR); return true; } + SyntaxKind::NOT_SIMILAR_TO => { + let m = self.start(); + self.bump(SyntaxKind::NOT_KW); + self.bump(SyntaxKind::SIMILAR_KW); + self.bump(SyntaxKind::TO_KW); + m.complete(self, SyntaxKind::NOT_SIMILAR_TO); + return true; + } SyntaxKind::IS_NOT_DISTINCT_FROM => { let m = self.start(); self.bump(SyntaxKind::IS_KW); @@ -767,6 +775,12 @@ impl<'t> Parser<'t> { } return false; } + SyntaxKind::NOT_SIMILAR_TO => self.at_composite3( + n, + SyntaxKind::NOT_KW, + SyntaxKind::SIMILAR_KW, + SyntaxKind::TO_KW, + ), // similar to SyntaxKind::SIMILAR_TO => self.at_composite2( n, diff --git a/crates/squawk_parser/tests/data/ok/select_operators.sql b/crates/squawk_parser/tests/data/ok/select_operators.sql index 57e42c7e..3f8ce46a 100644 --- a/crates/squawk_parser/tests/data/ok/select_operators.sql +++ b/crates/squawk_parser/tests/data/ok/select_operators.sql @@ -180,9 +180,17 @@ select U&'\0061\0308bc' is not nfd normalized; -- pattern_matching -- like select 'foo' like 'bar'; - +select 'foo' like 'bar' escape '#'; -- not like select 'foo' not like 'bar'; +select 'foo' not like 'bar' escape '#'; + +-- ilike +select 'foo' ilike 'bar'; +select 'foo' ilike 'bar' escape '#'; +-- not ilike +select 'foo' not ilike 'bar'; +select 'foo' not ilike 'bar' escape '#'; -- ~~ select 'a' ~~ 'b'; @@ -192,6 +200,10 @@ select 'a' !~~ 'b'; -- similar to select 'abc' similar to 'abc'; +select 'abc' similar to 'abc' escape '#'; + +select 'abc' not similar to 'abc'; +select 'abc' not similar to 'abc' escape '#'; -- posix regex -- string matches regex case sensitive diff --git a/crates/squawk_parser/tests/data/regression_suite/strings.sql b/crates/squawk_parser/tests/data/regression_suite/strings.sql index 5fe55e60..5ea3ba3f 100644 --- a/crates/squawk_parser/tests/data/regression_suite/strings.sql +++ b/crates/squawk_parser/tests/data/regression_suite/strings.sql @@ -28,7 +28,7 @@ SELECT 'tricky' AS U&"\" UESCAPE '!'; SELECT U&'wrong: \061'; SELECT U&'wrong: \+0061'; -SELECT U&'wrong: +0061' UESCAPE +; +-- SELECT U&'wrong: +0061' UESCAPE +; SELECT U&'wrong: +0061' UESCAPE '+'; SELECT U&'wrong: \db99'; diff --git a/crates/squawk_parser/tests/snapshots/tests__regression_strings.snap b/crates/squawk_parser/tests/snapshots/tests__regression_strings.snap index 6b8a7cc7..b9ebda28 100644 --- a/crates/squawk_parser/tests/snapshots/tests__regression_strings.snap +++ b/crates/squawk_parser/tests/snapshots/tests__regression_strings.snap @@ -2,52 +2,4 @@ source: crates/squawk_parser/tests/tests.rs input_file: crates/squawk_parser/tests/data/regression_suite/strings.sql --- -ERROR@536: missing comma -ERROR@563: missing comma -ERROR@625: missing comma -ERROR@667: missing comma -ERROR@763: missing comma -ERROR@765: expected an expression, found SEMICOLON -ERROR@798: missing comma -ERROR@1460: missing comma -ERROR@1487: missing comma -ERROR@1523: missing comma -ERROR@1565: missing comma -ERROR@1661: missing comma -ERROR@6313: missing comma -ERROR@6369: missing comma -ERROR@6621: missing comma -ERROR@6717: missing comma -ERROR@6775: missing comma -ERROR@17083: missing comma -ERROR@17136: missing comma -ERROR@17188: missing comma -ERROR@17242: missing comma -ERROR@17354: missing comma -ERROR@17403: missing comma -ERROR@17455: missing comma -ERROR@17510: missing comma -ERROR@17562: missing comma -ERROR@17617: missing comma -ERROR@17675: missing comma -ERROR@17735: missing comma -ERROR@17787: missing comma -ERROR@17841: missing comma -ERROR@17894: missing comma -ERROR@17949: missing comma -ERROR@18003: missing comma -ERROR@18060: missing comma -ERROR@18112: missing comma -ERROR@18167: missing comma -ERROR@18230: missing comma -ERROR@18302: missing comma -ERROR@18406: missing comma -ERROR@18459: missing comma -ERROR@18511: missing comma -ERROR@18565: missing comma -ERROR@18616: missing comma -ERROR@18669: missing comma -ERROR@18721: missing comma -ERROR@18775: missing comma -ERROR@18827: missing comma -ERROR@18882: missing comma + diff --git a/crates/squawk_parser/tests/snapshots/tests__regression_suite_errors.snap b/crates/squawk_parser/tests/snapshots/tests__regression_suite_errors.snap index 2b211282..2ccc9951 100644 --- a/crates/squawk_parser/tests/snapshots/tests__regression_suite_errors.snap +++ b/crates/squawk_parser/tests/snapshots/tests__regression_suite_errors.snap @@ -2,4 +2,4 @@ source: crates/squawk_parser/tests/tests.rs expression: "out.join(\"\\n\")" --- -tests/snapshots/tests__regression_strings.snap:49 + diff --git a/crates/squawk_parser/tests/snapshots/tests__select_funcs_ok.snap b/crates/squawk_parser/tests/snapshots/tests__select_funcs_ok.snap index 520e3ee1..fa876662 100644 --- a/crates/squawk_parser/tests/snapshots/tests__select_funcs_ok.snap +++ b/crates/squawk_parser/tests/snapshots/tests__select_funcs_ok.snap @@ -730,13 +730,14 @@ SOURCE_FILE WHITESPACE " " SIMILAR_KW "similar" WHITESPACE " " - NAME_REF - IDENT "b" - WHITESPACE " " - ESCAPE_KW "escape" - WHITESPACE " " - NAME_REF - IDENT "c" + BIN_EXPR + NAME_REF + IDENT "b" + WHITESPACE " " + ESCAPE_KW "escape" + WHITESPACE " " + NAME_REF + IDENT "c" R_PAREN ")" SEMICOLON ";" WHITESPACE "\n\n" diff --git a/crates/squawk_parser/tests/snapshots/tests__select_operators_ok.snap b/crates/squawk_parser/tests/snapshots/tests__select_operators_ok.snap index 718ab1a7..9dd4dda0 100644 --- a/crates/squawk_parser/tests/snapshots/tests__select_operators_ok.snap +++ b/crates/squawk_parser/tests/snapshots/tests__select_operators_ok.snap @@ -1655,7 +1655,29 @@ SOURCE_FILE LITERAL STRING "'bar'" SEMICOLON ";" - WHITESPACE "\n\n" + WHITESPACE "\n" + SELECT + SELECT_CLAUSE + SELECT_KW "select" + WHITESPACE " " + TARGET_LIST + TARGET + BIN_EXPR + LITERAL + STRING "'foo'" + WHITESPACE " " + LIKE_KW "like" + WHITESPACE " " + BIN_EXPR + LITERAL + STRING "'bar'" + WHITESPACE " " + ESCAPE_KW "escape" + WHITESPACE " " + LITERAL + STRING "'#'" + SEMICOLON ";" + WHITESPACE "\n" COMMENT "-- not like" WHITESPACE "\n" SELECT @@ -1676,6 +1698,117 @@ SOURCE_FILE LITERAL STRING "'bar'" SEMICOLON ";" + WHITESPACE "\n" + SELECT + SELECT_CLAUSE + SELECT_KW "select" + WHITESPACE " " + TARGET_LIST + TARGET + BIN_EXPR + LITERAL + STRING "'foo'" + WHITESPACE " " + NOT_LIKE + NOT_KW "not" + WHITESPACE " " + LIKE_KW "like" + WHITESPACE " " + BIN_EXPR + LITERAL + STRING "'bar'" + WHITESPACE " " + ESCAPE_KW "escape" + WHITESPACE " " + LITERAL + STRING "'#'" + SEMICOLON ";" + WHITESPACE "\n\n" + COMMENT "-- ilike" + WHITESPACE "\n" + SELECT + SELECT_CLAUSE + SELECT_KW "select" + WHITESPACE " " + TARGET_LIST + TARGET + BIN_EXPR + LITERAL + STRING "'foo'" + WHITESPACE " " + ILIKE_KW "ilike" + WHITESPACE " " + LITERAL + STRING "'bar'" + SEMICOLON ";" + WHITESPACE "\n" + SELECT + SELECT_CLAUSE + SELECT_KW "select" + WHITESPACE " " + TARGET_LIST + TARGET + BIN_EXPR + LITERAL + STRING "'foo'" + WHITESPACE " " + ILIKE_KW "ilike" + WHITESPACE " " + BIN_EXPR + LITERAL + STRING "'bar'" + WHITESPACE " " + ESCAPE_KW "escape" + WHITESPACE " " + LITERAL + STRING "'#'" + SEMICOLON ";" + WHITESPACE "\n" + COMMENT "-- not ilike" + WHITESPACE "\n" + SELECT + SELECT_CLAUSE + SELECT_KW "select" + WHITESPACE " " + TARGET_LIST + TARGET + BIN_EXPR + LITERAL + STRING "'foo'" + WHITESPACE " " + NOT_ILIKE + NOT_KW "not" + WHITESPACE " " + ILIKE_KW "ilike" + WHITESPACE " " + LITERAL + STRING "'bar'" + SEMICOLON ";" + WHITESPACE "\n" + SELECT + SELECT_CLAUSE + SELECT_KW "select" + WHITESPACE " " + TARGET_LIST + TARGET + BIN_EXPR + LITERAL + STRING "'foo'" + WHITESPACE " " + NOT_ILIKE + NOT_KW "not" + WHITESPACE " " + ILIKE_KW "ilike" + WHITESPACE " " + BIN_EXPR + LITERAL + STRING "'bar'" + WHITESPACE " " + ESCAPE_KW "escape" + WHITESPACE " " + LITERAL + STRING "'#'" + SEMICOLON ";" WHITESPACE "\n\n" COMMENT "-- ~~" WHITESPACE "\n" @@ -1738,6 +1871,79 @@ SOURCE_FILE LITERAL STRING "'abc'" SEMICOLON ";" + WHITESPACE "\n" + SELECT + SELECT_CLAUSE + SELECT_KW "select" + WHITESPACE " " + TARGET_LIST + TARGET + BIN_EXPR + LITERAL + STRING "'abc'" + WHITESPACE " " + SIMILAR_TO + SIMILAR_KW "similar" + WHITESPACE " " + TO_KW "to" + WHITESPACE " " + BIN_EXPR + LITERAL + STRING "'abc'" + WHITESPACE " " + ESCAPE_KW "escape" + WHITESPACE " " + LITERAL + STRING "'#'" + SEMICOLON ";" + WHITESPACE "\n\n" + SELECT + SELECT_CLAUSE + SELECT_KW "select" + WHITESPACE " " + TARGET_LIST + TARGET + BIN_EXPR + LITERAL + STRING "'abc'" + WHITESPACE " " + NOT_SIMILAR_TO + NOT_KW "not" + WHITESPACE " " + SIMILAR_KW "similar" + WHITESPACE " " + TO_KW "to" + WHITESPACE " " + LITERAL + STRING "'abc'" + SEMICOLON ";" + WHITESPACE "\n" + SELECT + SELECT_CLAUSE + SELECT_KW "select" + WHITESPACE " " + TARGET_LIST + TARGET + BIN_EXPR + LITERAL + STRING "'abc'" + WHITESPACE " " + NOT_SIMILAR_TO + NOT_KW "not" + WHITESPACE " " + SIMILAR_KW "similar" + WHITESPACE " " + TO_KW "to" + WHITESPACE " " + BIN_EXPR + LITERAL + STRING "'abc'" + WHITESPACE " " + ESCAPE_KW "escape" + WHITESPACE " " + LITERAL + STRING "'#'" + SEMICOLON ";" WHITESPACE "\n\n" COMMENT "-- posix regex" WHITESPACE "\n" diff --git a/crates/squawk_syntax/src/ast/generated/nodes.rs b/crates/squawk_syntax/src/ast/generated/nodes.rs index db9d33d7..8aaa0756 100644 --- a/crates/squawk_syntax/src/ast/generated/nodes.rs +++ b/crates/squawk_syntax/src/ast/generated/nodes.rs @@ -6202,6 +6202,25 @@ impl NotOf { } } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct NotSimilarTo { + pub(crate) syntax: SyntaxNode, +} +impl NotSimilarTo { + #[inline] + pub fn not_token(&self) -> Option { + support::token(&self.syntax, SyntaxKind::NOT_KW) + } + #[inline] + pub fn similar_token(&self) -> Option { + support::token(&self.syntax, SyntaxKind::SIMILAR_KW) + } + #[inline] + pub fn to_token(&self) -> Option { + support::token(&self.syntax, SyntaxKind::TO_KW) + } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct NotValid { pub(crate) syntax: SyntaxNode, @@ -6428,6 +6447,10 @@ impl Op { support::child(&self.syntax) } #[inline] + pub fn not_similar_to(&self) -> Option { + support::child(&self.syntax) + } + #[inline] pub fn operator_call(&self) -> Option { support::child(&self.syntax) } @@ -14680,6 +14703,24 @@ impl AstNode for NotOf { &self.syntax } } +impl AstNode for NotSimilarTo { + #[inline] + fn can_cast(kind: SyntaxKind) -> bool { + kind == SyntaxKind::NOT_SIMILAR_TO + } + #[inline] + fn cast(syntax: SyntaxNode) -> Option { + if Self::can_cast(syntax.kind()) { + Some(Self { syntax }) + } else { + None + } + } + #[inline] + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} impl AstNode for NotValid { #[inline] fn can_cast(kind: SyntaxKind) -> bool { diff --git a/crates/squawk_syntax/src/postgresql.ungram b/crates/squawk_syntax/src/postgresql.ungram index eb987152..6b76850b 100644 --- a/crates/squawk_syntax/src/postgresql.ungram +++ b/crates/squawk_syntax/src/postgresql.ungram @@ -158,6 +158,9 @@ Neq = SimilarTo = 'similar' 'to' +NotSimilarTo = + 'not' 'similar' 'to' + AtTimeZone = 'at' 'time' 'zone' @@ -165,7 +168,7 @@ IsNot = 'is' 'not' Op = -'or' | Gteq | '<' | '>' | FatArrow | '=' | 'in' | Neqb | Lteq | '+' | 'overlaps' | 'like' | 'ilike' | NotLike | NotIlike | NotIn | CustomOp | IsDistinctFrom | IsNotDistinctFrom | OperatorCall | 'is' | '^' | '%' | 'and' | '/' | Neq | 'collate' | '-' | ColonEq | ColonColon | 'value' | ':' | IsNot | SimilarTo | AtTimeZone | IsJson | IsJsonValue | IsNotJson | IsJsonObject | IsJsonArray |IsJsonScalar | IsNotJsonValue | IsJsonObject | IsNotJsonArray | IsNotJsonScalar +'or' | Gteq | '<' | '>' | FatArrow | '=' | 'in' | Neqb | Lteq | '+' | 'overlaps' | 'like' | 'ilike' | NotLike | NotIlike | NotIn | CustomOp | IsDistinctFrom | IsNotDistinctFrom | OperatorCall | 'is' | '^' | '%' | 'and' | '/' | Neq | 'collate' | '-' | ColonEq | ColonColon | 'value' | ':' | IsNot | SimilarTo | NotSimilarTo | AtTimeZone | IsJson | IsJsonValue | IsNotJson | IsJsonObject | IsJsonArray |IsJsonScalar | IsNotJsonValue | IsJsonObject | IsNotJsonArray | IsNotJsonScalar IsJson = 'is' 'json' JsonKeysUniqueClause?