Skip to content

Commit f728f57

Browse files
lemonaderntanzaku
andauthored
(tree-sitter module) add parse_2way function (#24)
* fix parsing 2way SQL * change: remove ts_parse re-export from lib.rs * fix testcase * feat: Support /*$ */ style replacement strings in 2-way SQL tests --------- Co-authored-by: tanzaku <tanzaku@users.noreply.github.com>
1 parent 6622c98 commit f728f57

File tree

7 files changed

+266
-215
lines changed

7 files changed

+266
-215
lines changed

crates/postgresql-cst-parser/examples/tree_sitter_like.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use postgresql_cst_parser::{tree_sitter::TreeCursor, ts_parse};
1+
use postgresql_cst_parser::tree_sitter::{self, TreeCursor};
22

33
fn main() {
44
let src = r#"
@@ -18,7 +18,7 @@ select
1818
1919
"#;
2020

21-
let tree = ts_parse(&src).unwrap();
21+
let tree = tree_sitter::parse(&src).unwrap();
2222
let root = tree.root_node();
2323
let mut cursor = root.walk();
2424

crates/postgresql-cst-parser/src/cst.rs

Lines changed: 54 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@ pub(crate) mod lr_parse_state;
44
pub(crate) use extra::*;
55
pub(crate) use lr_parse_state::*;
66

7-
use std::collections::HashSet;
8-
97
use cstree::{
108
build::GreenNodeBuilder, green::GreenNode, interning::Resolver, RawSyntaxKind, Syntax,
119
};
@@ -66,17 +64,24 @@ pub struct ParseError {
6664
pub end_byte_pos: usize,
6765
}
6866

67+
/// ノードがトークンを含むか否かを判定する
68+
/// トークンを含まないノードは削除し、ダミートークンを含む補完されたノードは残すために使用する
69+
fn contains_token(node: &Node) -> bool {
70+
if node.token.is_some() {
71+
return true;
72+
}
73+
74+
node.children.iter().any(contains_token)
75+
}
76+
6977
impl Parser {
7078
fn parse_rec(
7179
&mut self,
7280
node: &Node,
7381
peekable: &mut std::iter::Peekable<std::vec::IntoIter<Extra>>,
74-
complement_token: &HashSet<usize>,
7582
) {
7683
if cfg!(feature = "remove-empty-node") {
77-
if node.start_byte_pos == node.end_byte_pos
78-
&& !complement_token.contains(&node.start_byte_pos)
79-
{
84+
if node.start_byte_pos == node.end_byte_pos && !contains_token(node) {
8085
return;
8186
}
8287
}
@@ -104,23 +109,18 @@ impl Parser {
104109
self.builder.start_node(kind);
105110
node.children
106111
.iter()
107-
.for_each(|c| self.parse_rec(c, peekable, complement_token));
112+
.for_each(|c| self.parse_rec(c, peekable));
108113
self.builder.finish_node();
109114
}
110115
}
111116

112-
fn parse(
113-
mut self,
114-
nodes: &Vec<&Node>,
115-
extras: Vec<Extra>,
116-
complement_token: &HashSet<usize>,
117-
) -> (GreenNode, impl Resolver) {
117+
fn parse(mut self, nodes: &Vec<&Node>, extras: Vec<Extra>) -> (GreenNode, impl Resolver) {
118118
let mut peekable = extras.into_iter().peekable();
119119

120120
self.builder.start_node(SyntaxKind::Root);
121121

122122
for node in nodes {
123-
self.parse_rec(node, &mut peekable, complement_token);
123+
self.parse_rec(node, &mut peekable);
124124
}
125125

126126
while let Some(Extra { kind, comment, .. }) = peekable.peek() {
@@ -235,8 +235,43 @@ pub fn parse_with_transformer(
235235
let action_table = unsafe { action_table_u8.align_to::<i16>().1 };
236236
let goto_table = unsafe { goto_table_u8.align_to::<i16>().1 };
237237

238+
struct TokenQueue {
239+
tokens: std::iter::Peekable<std::vec::IntoIter<Token>>,
240+
dummy_token: Option<Token>,
241+
}
242+
243+
impl TokenQueue {
244+
fn new(tokens: Vec<Token>) -> Self {
245+
Self {
246+
tokens: tokens.into_iter().peekable(),
247+
dummy_token: None,
248+
}
249+
}
250+
251+
fn next(&mut self) -> Option<Token> {
252+
if self.dummy_token.is_some() {
253+
let dummy_token = self.dummy_token.take();
254+
dummy_token
255+
} else {
256+
self.tokens.next()
257+
}
258+
}
259+
260+
fn peek(&mut self) -> Option<&Token> {
261+
self.dummy_token.as_ref().or_else(|| self.tokens.peek())
262+
}
263+
264+
fn insert_dummy_token(&mut self, token: Token) {
265+
if self.dummy_token.is_some() {
266+
panic!();
267+
}
268+
269+
self.dummy_token = Some(token);
270+
}
271+
}
272+
238273
let mut stack: Vec<(u32, Node)> = Vec::new();
239-
let mut tokens: std::iter::Peekable<std::vec::IntoIter<Token>> = tokens.into_iter().peekable();
274+
let mut tokens = TokenQueue::new(tokens);
240275

241276
stack.push((
242277
0,
@@ -251,7 +286,6 @@ pub fn parse_with_transformer(
251286

252287
let mut last_pos = 0;
253288
let mut extras: Vec<Extra> = Vec::new();
254-
let mut complement_token = HashSet::new();
255289

256290
loop {
257291
let state = stack.last().unwrap().0;
@@ -292,7 +326,6 @@ pub fn parse_with_transformer(
292326
continue;
293327
}
294328

295-
let mut insert_dummy_token = false;
296329
let mut action = match action_table[(state * num_terminal_symbol() + cid) as usize] {
297330
0x7FFF => Action::Error,
298331
v if v > 0 => Action::Shift((v - 1) as usize),
@@ -301,7 +334,7 @@ pub fn parse_with_transformer(
301334
};
302335

303336
// transform
304-
{
337+
if action == Action::Error {
305338
let lr_parse_state = LRParseState {
306339
state,
307340
stack: &stack,
@@ -326,7 +359,6 @@ pub fn parse_with_transformer(
326359
kind: token_kind,
327360
value: String::new(),
328361
};
329-
complement_token.insert(token.start_byte_pos);
330362

331363
action = match action_table[(state * num_terminal_symbol() + cid) as usize]
332364
{
@@ -335,7 +367,7 @@ pub fn parse_with_transformer(
335367
v if v < 0 => Action::Reduce((-v - 1) as usize),
336368
_ => Action::Accept,
337369
};
338-
insert_dummy_token = true;
370+
tokens.insert_dummy_token(token.clone());
339371
}
340372

341373
ParseTransform::SkipToken => {
@@ -387,9 +419,7 @@ pub fn parse_with_transformer(
387419
last_pos = token.end_byte_pos;
388420

389421
stack.push((next_state as u32, node));
390-
if !insert_dummy_token {
391-
tokens.next();
392-
}
422+
tokens.next();
393423
}
394424
Action::Reduce(rule_index) => {
395425
let rule = &RULES[rule_index];
@@ -495,7 +525,7 @@ pub fn parse_with_transformer(
495525
builder: GreenNodeBuilder::new(),
496526
};
497527
let root: Vec<&Node> = stack[1..].iter().map(|s| &s.1).collect();
498-
let (ast, resolver) = parser.parse(&root, extras, &complement_token);
528+
let (ast, resolver) = parser.parse(&root, extras);
499529

500530
Ok(SyntaxNode::new_root_with_resolver(ast, resolver))
501531
}

crates/postgresql-cst-parser/src/lib.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ use transform::ParseTransformer;
2828
use transform::SkipExtraComma;
2929
use transform::SkipExtraOperator;
3030
pub use tree_sitter::parse as ts_parse;
31+
pub use tree_sitter::parse_2way as ts_parse_2way;
3132

3233
pub fn parse(input: &str) -> Result<ResolvedNode, ParseError> {
3334
cst::parse(input)
@@ -56,3 +57,17 @@ pub fn parse_2way_with_transformers(
5657
) -> Result<ResolvedNode, ParseError> {
5758
parse_with_transformer(input, transformers)
5859
}
60+
61+
#[cfg(test)]
62+
mod tests {
63+
use crate::parse_2way;
64+
65+
#[test]
66+
fn test() {
67+
let s = r#"select /*param*/ as A
68+
from /*#foo*/
69+
;
70+
"#;
71+
assert!(parse_2way(s).is_ok());
72+
}
73+
}
Lines changed: 75 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,74 +1,75 @@
1-
use cstree::{RawSyntaxKind, Syntax};
2-
3-
use crate::{lexer::TokenKind, syntax_kind::SyntaxKind};
4-
5-
use super::{num_terminal_symbol, LRParseState, ParseTransform, ParseTransformer};
6-
7-
/// Complete missing replacement string sample values ​​(FROM clause only)
8-
pub struct ComplementMissingFromTableTransformer;
9-
10-
fn is_replacement_string_comment(comment: &str) -> bool {
11-
comment.starts_with("/*#") && comment.ends_with("*/")
12-
}
13-
14-
fn is_missing_replacement_string_comment<'a>(lr_parse_state: &LRParseState<'a>) -> bool {
15-
let Some(extra) = lr_parse_state.previous_extra() else {
16-
return false;
17-
};
18-
19-
if !is_replacement_string_comment(extra.comment) {
20-
return false;
21-
}
22-
23-
// If there is a space after the comment immediately following the replacement string, the table name that should be there is omitted.
24-
extra.end_byte_pos != lr_parse_state.token.start_byte_pos
25-
}
26-
27-
fn is_from_table<'a>(lr_parse_state: &LRParseState<'a>) -> bool {
28-
match SyntaxKind::from_raw(RawSyntaxKind(
29-
lr_parse_state.stack.last().unwrap().1.component_id,
30-
)) {
31-
SyntaxKind::FROM => true,
32-
SyntaxKind::Comma => {
33-
let prev2_kind = lr_parse_state
34-
.stack
35-
.iter()
36-
.nth_back(1)
37-
.map(|(_, node)| SyntaxKind::from_raw(RawSyntaxKind(node.component_id)));
38-
39-
if prev2_kind == Some(SyntaxKind::from_list) {
40-
true
41-
} else {
42-
false
43-
}
44-
}
45-
_ => false,
46-
}
47-
}
48-
49-
fn is_missing_from_replacement_value<'a>(lr_parse_state: &LRParseState<'a>) -> bool {
50-
if is_from_table(lr_parse_state) {
51-
// Check if IDENT is in SHIFT enabled state
52-
let action_index =
53-
(lr_parse_state.state * num_terminal_symbol()) as usize + SyntaxKind::IDENT as usize;
54-
55-
let a = lr_parse_state.action_table[action_index];
56-
a != 0x7FFF
57-
} else {
58-
false
59-
}
60-
}
61-
62-
impl ParseTransformer for ComplementMissingFromTableTransformer {
63-
fn transform<'a>(&self, lr_parse_state: &LRParseState<'a>) -> Option<ParseTransform> {
64-
if !is_missing_replacement_string_comment(lr_parse_state) {
65-
return None;
66-
}
67-
68-
if !is_missing_from_replacement_value(&lr_parse_state) {
69-
return None;
70-
}
71-
72-
Some(ParseTransform::InsertToken(TokenKind::IDENT))
73-
}
74-
}
1+
use cstree::{RawSyntaxKind, Syntax};
2+
3+
use crate::{lexer::TokenKind, syntax_kind::SyntaxKind};
4+
5+
use super::{num_terminal_symbol, LRParseState, ParseTransform, ParseTransformer};
6+
7+
/// Complete missing replacement string sample values ​​(FROM clause only)
8+
pub struct ComplementMissingFromTableTransformer;
9+
10+
fn is_replacement_string_comment(comment: &str) -> bool {
11+
comment.starts_with("/*#") && comment.ends_with("*/")
12+
|| comment.starts_with("/*$") && comment.ends_with("*/")
13+
}
14+
15+
fn is_missing_replacement_string_comment<'a>(lr_parse_state: &LRParseState<'a>) -> bool {
16+
let Some(extra) = lr_parse_state.previous_extra() else {
17+
return false;
18+
};
19+
20+
if !is_replacement_string_comment(extra.comment) {
21+
return false;
22+
}
23+
24+
// If there is a space after the comment immediately following the replacement string, the table name that should be there is omitted.
25+
extra.end_byte_pos != lr_parse_state.token.start_byte_pos
26+
}
27+
28+
fn is_from_table<'a>(lr_parse_state: &LRParseState<'a>) -> bool {
29+
match SyntaxKind::from_raw(RawSyntaxKind(
30+
lr_parse_state.stack.last().unwrap().1.component_id,
31+
)) {
32+
SyntaxKind::FROM => true,
33+
SyntaxKind::Comma => {
34+
let prev2_kind = lr_parse_state
35+
.stack
36+
.iter()
37+
.nth_back(1)
38+
.map(|(_, node)| SyntaxKind::from_raw(RawSyntaxKind(node.component_id)));
39+
40+
if prev2_kind == Some(SyntaxKind::from_list) {
41+
true
42+
} else {
43+
false
44+
}
45+
}
46+
_ => false,
47+
}
48+
}
49+
50+
fn is_missing_from_replacement_value<'a>(lr_parse_state: &LRParseState<'a>) -> bool {
51+
if is_from_table(lr_parse_state) {
52+
// Check if IDENT is in SHIFT enabled state
53+
let action_index =
54+
(lr_parse_state.state * num_terminal_symbol()) as usize + SyntaxKind::IDENT as usize;
55+
56+
let a = lr_parse_state.action_table[action_index];
57+
a != 0x7FFF
58+
} else {
59+
false
60+
}
61+
}
62+
63+
impl ParseTransformer for ComplementMissingFromTableTransformer {
64+
fn transform<'a>(&self, lr_parse_state: &LRParseState<'a>) -> Option<ParseTransform> {
65+
if !is_missing_replacement_string_comment(lr_parse_state) {
66+
return None;
67+
}
68+
69+
if !is_missing_from_replacement_value(&lr_parse_state) {
70+
return None;
71+
}
72+
73+
Some(ParseTransform::InsertToken(TokenKind::IDENT))
74+
}
75+
}

crates/postgresql-cst-parser/src/tree_sitter.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,12 @@ pub fn parse(input: &str) -> Result<Tree, cst::ParseError> {
2222
Ok(Tree::new(input, root, range_map))
2323
}
2424

25+
pub fn parse_2way(input: &str) -> Result<Tree, cst::ParseError> {
26+
let parsed = crate::parse_2way(input)?;
27+
let (root, range_map) = get_ts_tree_and_range_map(input, &parsed);
28+
Ok(Tree::new(input, root, range_map))
29+
}
30+
2531
pub struct Tree {
2632
src: String,
2733
root: ResolvedNode,

0 commit comments

Comments
 (0)