diff --git a/crates/squawk_parser/src/generated/syntax_kind.rs b/crates/squawk_parser/src/generated/syntax_kind.rs index e83b8653..ad215842 100644 --- a/crates/squawk_parser/src/generated/syntax_kind.rs +++ b/crates/squawk_parser/src/generated/syntax_kind.rs @@ -780,8 +780,10 @@ pub enum SyntaxKind { INTERVAL_TYPE, INTO_CLAUSE, IS_DISTINCT_FROM, + IS_NORMALIZED, IS_NOT, IS_NOT_DISTINCT_FROM, + IS_NOT_NORMALIZED, JOIN, JSON_BEHAVIOR_CLAUSE, JSON_FORMAT_CLAUSE, @@ -945,6 +947,7 @@ pub enum SyntaxKind { TRUNCATE, TUPLE_EXPR, TYPE, + UNICODE_NORMAL_FORM, UNIQUE_CONSTRAINT, UNLISTEN, UPDATE, diff --git a/crates/squawk_parser/src/grammar.rs b/crates/squawk_parser/src/grammar.rs index 4d070b24..47d5c8b1 100644 --- a/crates/squawk_parser/src/grammar.rs +++ b/crates/squawk_parser/src/grammar.rs @@ -1318,6 +1318,18 @@ fn postfix_expr( lhs = m.complete(p, POSTFIX_EXPR); break; } + IS_KW if p.at(IS_NOT_NORMALIZED)=> { + let m = lhs.precede(p); + p.bump(IS_NOT_NORMALIZED); + lhs = m.complete(p, POSTFIX_EXPR); + break; + } + IS_KW if p.at(IS_NORMALIZED)=> { + let m = lhs.precede(p); + p.bump(IS_NORMALIZED); + lhs = m.complete(p, POSTFIX_EXPR); + break; + } NOTNULL_KW => { let m = lhs.precede(p); p.bump(NOTNULL_KW); @@ -2092,6 +2104,10 @@ fn current_op(p: &Parser<'_>, r: &Restrictions) -> (u8, SyntaxKind, Associativit NOT_KW if !r.not_disabled && p.at(NOT_ILIKE) => (6, NOT_ILIKE, Left), // not in NOT_KW if !r.not_disabled && p.at(NOT_IN) => (6, NOT_IN, Left), + // is normalized + IS_KW if !r.is_disabled && p.at(IS_NORMALIZED) => NOT_AN_OP, + // is not normalized + IS_KW if !r.is_disabled && p.at(IS_NOT_NORMALIZED) => NOT_AN_OP, // is distinct from IS_KW if !r.is_disabled && p.at(IS_DISTINCT_FROM) => (4, IS_DISTINCT_FROM, Left), // is not distinct from @@ -2765,6 +2781,12 @@ fn data_source(p: &mut Parser<'_>) { opt_alias(p); } IDENT => from_item_name(p), + CAST_KW | TREAT_KW => { + if expr(p).is_none() { + p.error("expected expression"); + } + opt_alias(p); + } _ if p.at_ts(FROM_ITEM_KEYWORDS_FIRST) => from_item_name(p), _ => {} } diff --git a/crates/squawk_parser/src/lib.rs b/crates/squawk_parser/src/lib.rs index 5e8b9b17..856d165d 100644 --- a/crates/squawk_parser/src/lib.rs +++ b/crates/squawk_parser/src/lib.rs @@ -211,6 +211,39 @@ impl<'t> Parser<'t> { m.complete(self, SyntaxKind::AT_TIME_ZONE); return true; } + SyntaxKind::IS_NOT_NORMALIZED => { + let m = self.start(); + self.bump(SyntaxKind::IS_KW); + self.bump(SyntaxKind::NOT_KW); + if matches!( + self.current(), + SyntaxKind::NFC_KW + | SyntaxKind::NFD_KW + | SyntaxKind::NFKC_KW + | SyntaxKind::NFKD_KW + ) { + self.bump_any(); + } + self.bump(SyntaxKind::NORMALIZED_KW); + m.complete(self, SyntaxKind::IS_NOT_NORMALIZED); + return true; + } + SyntaxKind::IS_NORMALIZED => { + let m = self.start(); + self.bump(SyntaxKind::IS_KW); + if matches!( + self.current(), + SyntaxKind::NFC_KW + | SyntaxKind::NFD_KW + | SyntaxKind::NFKC_KW + | SyntaxKind::NFKD_KW + ) { + self.bump_any(); + } + self.bump(SyntaxKind::NORMALIZED_KW); + m.complete(self, SyntaxKind::IS_NORMALIZED); + return true; + } SyntaxKind::IS_NOT_DISTINCT_FROM => { let m = self.start(); self.bump(SyntaxKind::IS_KW); @@ -564,6 +597,52 @@ impl<'t> Parser<'t> { SyntaxKind::DISTINCT_KW, SyntaxKind::FROM_KW, ), + // is normalized + SyntaxKind::IS_NORMALIZED => { + if self.at(SyntaxKind::IS_KW) { + if matches!( + self.nth(1), + SyntaxKind::NFC_KW + | SyntaxKind::NFD_KW + | SyntaxKind::NFKC_KW + | SyntaxKind::NFKD_KW + ) { + if self.nth_at(2, SyntaxKind::NORMALIZED_KW) { + return true; + } + } else { + if self.nth_at(1, SyntaxKind::NORMALIZED_KW) { + return true; + } + } + } + return false; + } + // is not normalized + SyntaxKind::IS_NOT_NORMALIZED => { + if self.at(SyntaxKind::IS_KW) && self.nth_at(1, SyntaxKind::NOT_KW) { + if matches!( + self.nth(2), + SyntaxKind::NFC_KW + | SyntaxKind::NFD_KW + | SyntaxKind::NFKC_KW + | SyntaxKind::NFKD_KW + ) { + if self.nth_at(3, SyntaxKind::NOT_KW) + && self.nth_at(4, SyntaxKind::NORMALIZED_KW) + { + return true; + } + } else { + if self.nth_at(2, SyntaxKind::NOT_KW) + && self.nth_at(3, SyntaxKind::NORMALIZED_KW) + { + return true; + } + } + } + return false; + } // similar to SyntaxKind::SIMILAR_TO => self.at_composite2( n, diff --git a/crates/squawk_parser/tests/snapshots/tests__regression_create_view.snap b/crates/squawk_parser/tests/snapshots/tests__regression_create_view.snap index 8b8b76e2..33a46f2b 100644 --- a/crates/squawk_parser/tests/snapshots/tests__regression_create_view.snap +++ b/crates/squawk_parser/tests/snapshots/tests__regression_create_view.snap @@ -2,30 +2,6 @@ source: crates/squawk_parser/tests/tests.rs input_file: crates/squawk_parser/tests/data/regression_suite/create_view.sql --- -ERROR@21471: expected name -ERROR@21471: expected SEMICOLON -ERROR@21471: expected command, found CAST_KW -ERROR@21475: expected command, found L_PAREN -ERROR@21476: expected command, found INT_NUMBER -ERROR@21477: expected command, found PLUS -ERROR@21478: expected command, found INT_NUMBER -ERROR@21480: expected command, found AS_KW -ERROR@21483: expected command, found IDENT -ERROR@21487: expected command, found R_PAREN -ERROR@21489: expected command, found AS_KW -ERROR@21492: expected command, found IDENT -ERROR@21494: expected command, found COMMA -ERROR@21498: expected command, found CAST_KW -ERROR@21502: expected command, found L_PAREN -ERROR@21503: expected command, found INT_NUMBER -ERROR@21504: expected command, found PLUS -ERROR@21505: expected command, found INT_NUMBER -ERROR@21507: expected command, found AS_KW -ERROR@21510: expected command, found IDENT -ERROR@21514: expected command, found R_PAREN -ERROR@21516: expected command, found AS_KW -ERROR@21519: expected command, found IDENT -ERROR@21912: missing comma ERROR@22280: expected R_PAREN ERROR@22285: expected from item, got STRING ERROR@22285: expected SEMICOLON diff --git a/crates/squawk_parser/tests/snapshots/tests__regression_suite_errors.snap b/crates/squawk_parser/tests/snapshots/tests__regression_suite_errors.snap index 0783e340..bd92c53a 100644 --- a/crates/squawk_parser/tests/snapshots/tests__regression_suite_errors.snap +++ b/crates/squawk_parser/tests/snapshots/tests__regression_suite_errors.snap @@ -2,7 +2,7 @@ source: crates/squawk_parser/tests/tests.rs expression: "out.join(\"\\n\")" --- -tests/snapshots/tests__regression_create_view.snap:235 +tests/snapshots/tests__regression_create_view.snap:211 tests/snapshots/tests__regression_errors.snap:286 tests/snapshots/tests__regression_foreign_data.snap:51 tests/snapshots/tests__regression_foreign_key.snap:24 @@ -29,7 +29,6 @@ tests/snapshots/tests__regression_transactions.snap:114 tests/snapshots/tests__regression_triggers.snap:51 tests/snapshots/tests__regression_tsearch.snap:62 tests/snapshots/tests__regression_tuplesort.snap:188 -tests/snapshots/tests__regression_unicode.snap:40 tests/snapshots/tests__regression_union.snap:25 tests/snapshots/tests__regression_update.snap:29 tests/snapshots/tests__regression_vacuum.snap:21 diff --git a/crates/squawk_parser/tests/snapshots/tests__regression_unicode.snap b/crates/squawk_parser/tests/snapshots/tests__regression_unicode.snap deleted file mode 100644 index 0972c861..00000000 --- a/crates/squawk_parser/tests/snapshots/tests__regression_unicode.snap +++ /dev/null @@ -1,44 +0,0 @@ ---- -source: crates/squawk_parser/tests/tests.rs -input_file: crates/squawk_parser/tests/data/regression_suite/unicode.sql ---- -ERROR@919: expected SEMICOLON -ERROR@920: expected command, found AS_KW -ERROR@923: expected command, found IDENT -ERROR@976: expected SEMICOLON -ERROR@977: expected command, found AS_KW -ERROR@980: expected command, found NFC_KW -ERROR@983: expected command, found COMMA -ERROR@989: expected command, found IDENT -ERROR@993: expected command, found IS_KW -ERROR@996: expected command, found NFD_KW -ERROR@1000: expected command, found NORMALIZED_KW -ERROR@1011: expected command, found AS_KW -ERROR@1014: expected command, found NFD_KW -ERROR@1017: expected command, found COMMA -ERROR@1023: expected command, found IDENT -ERROR@1027: expected command, found IS_KW -ERROR@1030: expected command, found NFKC_KW -ERROR@1035: expected command, found NORMALIZED_KW -ERROR@1046: expected command, found AS_KW -ERROR@1049: expected command, found NFKC_KW -ERROR@1053: expected command, found COMMA -ERROR@1059: expected command, found IDENT -ERROR@1063: expected command, found IS_KW -ERROR@1066: expected command, found NFKD_KW -ERROR@1071: expected command, found NORMALIZED_KW -ERROR@1082: expected command, found AS_KW -ERROR@1085: expected command, found NFKD_KW -ERROR@1090: expected command, found FROM_KW -ERROR@1097: expected command, found L_PAREN -ERROR@1242: expected SEMICOLON -ERROR@1242: expected command, found R_PAREN -ERROR@1244: expected command, found IDENT -ERROR@1249: expected command, found L_PAREN -ERROR@1250: expected command, found IDENT -ERROR@1253: expected command, found COMMA -ERROR@1255: expected command, found IDENT -ERROR@1258: expected command, found R_PAREN -ERROR@1260: expected command, found ORDER_KW -ERROR@1266: expected command, found BY_KW -ERROR@1269: expected command, found IDENT diff --git a/crates/squawk_parser/tests/snapshots/tests__select_operators_ok.snap b/crates/squawk_parser/tests/snapshots/tests__select_operators_ok.snap index 1b65bf3e..72f9c050 100644 --- a/crates/squawk_parser/tests/snapshots/tests__select_operators_ok.snap +++ b/crates/squawk_parser/tests/snapshots/tests__select_operators_ok.snap @@ -1533,13 +1533,13 @@ SOURCE_FILE WHITESPACE " " TARGET_LIST TARGET - BIN_EXPR + POSTFIX_EXPR LITERAL BYTE_STRING "U&'\\0061\\0308bc'" WHITESPACE " " - IS_KW "is" - WHITESPACE " " - NAME_REF + IS_NORMALIZED + IS_KW "is" + WHITESPACE " " NORMALIZED_KW "normalized" SEMICOLON ";" WHITESPACE "\n" @@ -1549,17 +1549,16 @@ SOURCE_FILE WHITESPACE " " TARGET_LIST TARGET - BIN_EXPR + POSTFIX_EXPR LITERAL BYTE_STRING "U&'\\0061\\0308bc'" WHITESPACE " " - IS_KW "is" - WHITESPACE " " - NAME_REF + IS_NORMALIZED + IS_KW "is" + WHITESPACE " " NFC_KW "nfc" - WHITESPACE " " - NAME - NORMALIZED_KW "normalized" + WHITESPACE " " + NORMALIZED_KW "normalized" SEMICOLON ";" WHITESPACE "\n" SELECT @@ -1568,17 +1567,16 @@ SOURCE_FILE WHITESPACE " " TARGET_LIST TARGET - BIN_EXPR + POSTFIX_EXPR LITERAL BYTE_STRING "U&'\\0061\\0308bc'" WHITESPACE " " - IS_KW "is" - WHITESPACE " " - NAME_REF + IS_NORMALIZED + IS_KW "is" + WHITESPACE " " NFD_KW "nfd" - WHITESPACE " " - NAME - NORMALIZED_KW "normalized" + WHITESPACE " " + NORMALIZED_KW "normalized" SEMICOLON ";" WHITESPACE "\n" SELECT @@ -1587,17 +1585,16 @@ SOURCE_FILE WHITESPACE " " TARGET_LIST TARGET - BIN_EXPR + POSTFIX_EXPR LITERAL BYTE_STRING "U&'\\0061\\0308bc'" WHITESPACE " " - IS_KW "is" - WHITESPACE " " - NAME_REF + IS_NORMALIZED + IS_KW "is" + WHITESPACE " " NFKC_KW "nfkc" - WHITESPACE " " - NAME - NORMALIZED_KW "normalized" + WHITESPACE " " + NORMALIZED_KW "normalized" SEMICOLON ";" WHITESPACE "\n" SELECT @@ -1606,17 +1603,16 @@ SOURCE_FILE WHITESPACE " " TARGET_LIST TARGET - BIN_EXPR + POSTFIX_EXPR LITERAL BYTE_STRING "U&'\\0061\\0308bc'" WHITESPACE " " - IS_KW "is" - WHITESPACE " " - NAME_REF + IS_NORMALIZED + IS_KW "is" + WHITESPACE " " NFKD_KW "nfkd" - WHITESPACE " " - NAME - NORMALIZED_KW "normalized" + WHITESPACE " " + NORMALIZED_KW "normalized" SEMICOLON ";" WHITESPACE "\n" SELECT diff --git a/crates/squawk_syntax/src/ast/generated/nodes.rs b/crates/squawk_syntax/src/ast/generated/nodes.rs index 69384472..12231f26 100644 --- a/crates/squawk_syntax/src/ast/generated/nodes.rs +++ b/crates/squawk_syntax/src/ast/generated/nodes.rs @@ -4939,6 +4939,25 @@ impl IsDistinctFrom { } } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct IsNormalized { + pub(crate) syntax: SyntaxNode, +} +impl IsNormalized { + #[inline] + pub fn unicode_normal_form(&self) -> Option { + support::child(&self.syntax) + } + #[inline] + pub fn is_token(&self) -> Option { + support::token(&self.syntax, SyntaxKind::IS_KW) + } + #[inline] + pub fn normalized_token(&self) -> Option { + support::token(&self.syntax, SyntaxKind::NORMALIZED_KW) + } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct IsNot { pub(crate) syntax: SyntaxNode, @@ -4977,6 +4996,29 @@ impl IsNotDistinctFrom { } } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct IsNotNormalized { + pub(crate) syntax: SyntaxNode, +} +impl IsNotNormalized { + #[inline] + pub fn unicode_normal_form(&self) -> Option { + support::child(&self.syntax) + } + #[inline] + pub fn is_token(&self) -> Option { + support::token(&self.syntax, SyntaxKind::IS_KW) + } + #[inline] + pub fn normalized_token(&self) -> Option { + support::token(&self.syntax, SyntaxKind::NORMALIZED_KW) + } + #[inline] + pub fn not_token(&self) -> Option { + support::token(&self.syntax, SyntaxKind::NOT_KW) + } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Join { pub(crate) syntax: SyntaxNode, @@ -7825,6 +7867,29 @@ impl TupleExpr { } } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct UnicodeNormalForm { + pub(crate) syntax: SyntaxNode, +} +impl UnicodeNormalForm { + #[inline] + pub fn nfc_token(&self) -> Option { + support::token(&self.syntax, SyntaxKind::NFC_KW) + } + #[inline] + pub fn nfd_token(&self) -> Option { + support::token(&self.syntax, SyntaxKind::NFD_KW) + } + #[inline] + pub fn nfkc_token(&self) -> Option { + support::token(&self.syntax, SyntaxKind::NFKC_KW) + } + #[inline] + pub fn nfkd_token(&self) -> Option { + support::token(&self.syntax, SyntaxKind::NFKD_KW) + } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct UniqueConstraint { pub(crate) syntax: SyntaxNode, @@ -12713,6 +12778,24 @@ impl AstNode for IsDistinctFrom { &self.syntax } } +impl AstNode for IsNormalized { + #[inline] + fn can_cast(kind: SyntaxKind) -> bool { + kind == SyntaxKind::IS_NORMALIZED + } + #[inline] + fn cast(syntax: SyntaxNode) -> Option { + if Self::can_cast(syntax.kind()) { + Some(Self { syntax }) + } else { + None + } + } + #[inline] + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} impl AstNode for IsNot { #[inline] fn can_cast(kind: SyntaxKind) -> bool { @@ -12749,6 +12832,24 @@ impl AstNode for IsNotDistinctFrom { &self.syntax } } +impl AstNode for IsNotNormalized { + #[inline] + fn can_cast(kind: SyntaxKind) -> bool { + kind == SyntaxKind::IS_NOT_NORMALIZED + } + #[inline] + fn cast(syntax: SyntaxNode) -> Option { + if Self::can_cast(syntax.kind()) { + Some(Self { syntax }) + } else { + None + } + } + #[inline] + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} impl AstNode for Join { #[inline] fn can_cast(kind: SyntaxKind) -> bool { @@ -15539,6 +15640,24 @@ impl AstNode for TupleExpr { &self.syntax } } +impl AstNode for UnicodeNormalForm { + #[inline] + fn can_cast(kind: SyntaxKind) -> bool { + kind == SyntaxKind::UNICODE_NORMAL_FORM + } + #[inline] + fn cast(syntax: SyntaxNode) -> Option { + if Self::can_cast(syntax.kind()) { + Some(Self { syntax }) + } else { + None + } + } + #[inline] + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } +} impl AstNode for UniqueConstraint { #[inline] fn can_cast(kind: SyntaxKind) -> bool { diff --git a/crates/squawk_syntax/src/postgresql.ungram b/crates/squawk_syntax/src/postgresql.ungram index feb334fa..5cc17182 100644 --- a/crates/squawk_syntax/src/postgresql.ungram +++ b/crates/squawk_syntax/src/postgresql.ungram @@ -131,6 +131,18 @@ IsDistinctFrom = IsNotDistinctFrom = 'is' 'not' 'distinct' 'from' +UnicodeNormalForm = + 'nfc' +| 'nfd' +| 'nfkc' +| 'nfkd' + +IsNormalized = + 'is' UnicodeNormalForm? 'normalized' + +IsNotNormalized = + 'is' UnicodeNormalForm? 'not' 'normalized' + OperatorCall = 'operator' '(' (Path '.')? Op ')'