1+ pub ( crate ) mod extra;
2+ pub ( crate ) mod lr_parse_state;
3+
4+ pub ( crate ) use extra:: * ;
5+ pub ( crate ) use lr_parse_state:: * ;
6+
7+ use std:: collections:: HashSet ;
8+
19use cstree:: {
210 build:: GreenNodeBuilder , green:: GreenNode , interning:: Resolver , RawSyntaxKind , Syntax ,
311} ;
@@ -10,18 +18,25 @@ use crate::{
1018 rule_name_to_component_id, token_kind_to_component_id, Action , ACTION_TABLE , GOTO_TABLE ,
1119 RULES ,
1220 } ,
21+ transform:: { ParseTransform , ParseTransformer } ,
1322} ;
1423
1524use super :: { lexer:: Token , syntax_kind:: SyntaxKind } ;
1625
17- struct Node {
26+ pub ( crate ) struct Node {
1827 token : Option < Token > ,
19- component_id : u32 ,
28+ pub component_id : u32 ,
2029 children : Vec < Node > ,
2130 start_byte_pos : usize ,
2231 end_byte_pos : usize ,
2332}
2433
34+ impl From < & Node > for SyntaxKind {
35+ fn from ( value : & Node ) -> Self {
36+ SyntaxKind :: from_raw ( RawSyntaxKind ( value. component_id ) )
37+ }
38+ }
39+
2540pub type PostgreSQLSyntax = SyntaxKind ;
2641
2742impl From < SyntaxKind > for cstree:: RawSyntaxKind {
@@ -55,21 +70,30 @@ impl Parser {
5570 fn parse_rec (
5671 & mut self ,
5772 node : & Node ,
58- peekable : & mut std:: iter:: Peekable < std:: vec:: IntoIter < ( SyntaxKind , usize , usize , & str ) > > ,
73+ peekable : & mut std:: iter:: Peekable < std:: vec:: IntoIter < Extra > > ,
74+ complement_token : & HashSet < usize > ,
5975 ) {
6076 if cfg ! ( feature = "remove-empty-node" ) {
61- if node. start_byte_pos == node. end_byte_pos {
77+ if node. start_byte_pos == node. end_byte_pos
78+ && !complement_token. contains ( & node. start_byte_pos )
79+ {
6280 return ;
6381 }
6482 }
6583
66- while let Some ( ( kind, start, _, text) ) = peekable. peek ( ) {
84+ while let Some ( Extra {
85+ kind,
86+ start_byte_pos,
87+ comment,
88+ ..
89+ } ) = peekable. peek ( )
90+ {
6791 // TODO: Consider whether the presence or absence of an equals sign changes the position of comments. Determine which option is preferable
68- if * start >= node. start_byte_pos {
92+ if * start_byte_pos >= node. start_byte_pos {
6993 // if *start > node.start_byte_pos {
7094 break ;
7195 }
72- self . builder . token ( * kind, text ) ;
96+ self . builder . token ( * kind, & comment ) ;
7397 peekable. next ( ) ;
7498 }
7599
@@ -80,26 +104,27 @@ impl Parser {
80104 self . builder . start_node ( kind) ;
81105 node. children
82106 . iter ( )
83- . for_each ( |c| self . parse_rec ( c, peekable) ) ;
107+ . for_each ( |c| self . parse_rec ( c, peekable, complement_token ) ) ;
84108 self . builder . finish_node ( ) ;
85109 }
86110 }
87111
88112 fn parse (
89113 mut self ,
90114 nodes : & Vec < & Node > ,
91- extras : Vec < ( SyntaxKind , usize , usize , & str ) > ,
115+ extras : Vec < Extra > ,
116+ complement_token : & HashSet < usize > ,
92117 ) -> ( GreenNode , impl Resolver ) {
93118 let mut peekable = extras. into_iter ( ) . peekable ( ) ;
94119
95120 self . builder . start_node ( SyntaxKind :: Root ) ;
96121
97122 for node in nodes {
98- self . parse_rec ( node, & mut peekable) ;
123+ self . parse_rec ( node, & mut peekable, complement_token ) ;
99124 }
100125
101- while let Some ( ( kind, _ , _ , text ) ) = peekable. peek ( ) {
102- self . builder . token ( * kind, text ) ;
126+ while let Some ( Extra { kind, comment , .. } ) = peekable. peek ( ) {
127+ self . builder . token ( * kind, comment ) ;
103128 peekable. next ( ) ;
104129 }
105130
@@ -184,6 +209,14 @@ fn init_tokens(tokens: &mut [Token]) {
184209
185210/// Parsing a string as PostgreSQL syntax and converting it into a ResolvedNode
186211pub fn parse ( input : & str ) -> Result < ResolvedNode , ParseError > {
212+ parse_with_transformer ( input, & [ ] )
213+ }
214+
215+ /// Parsing a string as PostgreSQL syntax and converting it into a ResolvedNode
216+ pub fn parse_with_transformer (
217+ input : & str ,
218+ transformers : & [ & dyn ParseTransformer ] ,
219+ ) -> Result < ResolvedNode , ParseError > {
187220 let mut tokens = lex ( input) ;
188221
189222 if !tokens. is_empty ( ) {
@@ -217,12 +250,13 @@ pub fn parse(input: &str) -> Result<ResolvedNode, ParseError> {
217250 ) ) ;
218251
219252 let mut last_pos = 0 ;
220- let mut extras: Vec < ( SyntaxKind , usize , usize , & str ) > = Vec :: new ( ) ;
253+ let mut extras: Vec < Extra > = Vec :: new ( ) ;
254+ let mut complement_token = HashSet :: new ( ) ;
221255
222256 loop {
223257 let state = stack. last ( ) . unwrap ( ) . 0 ;
224- let token = match tokens. peek ( ) {
225- Some ( token) => token,
258+ let mut token = match tokens. peek ( ) {
259+ Some ( token) => token. clone ( ) ,
226260 None => {
227261 return Err ( ParseError {
228262 message : "unexpected end of input" . to_string ( ) ,
@@ -232,39 +266,105 @@ pub fn parse(input: &str) -> Result<ResolvedNode, ParseError> {
232266 }
233267 } ;
234268
235- let cid = token_kind_to_component_id ( & token. kind ) ;
269+ let mut cid = token_kind_to_component_id ( & token. kind ) ;
236270
237271 if matches ! ( token. kind, TokenKind :: C_COMMENT | TokenKind :: SQL_COMMENT ) {
238272 if last_pos < token. start_byte_pos {
239- extras. push ( (
240- SyntaxKind :: Whitespace ,
241- last_pos,
242- token. start_byte_pos ,
243- & input[ last_pos..token. start_byte_pos ] ,
244- ) ) ;
273+ extras. push ( Extra {
274+ kind : SyntaxKind :: Whitespace ,
275+ start_byte_pos : last_pos,
276+ end_byte_pos : token. start_byte_pos ,
277+ comment : & input[ last_pos..token. start_byte_pos ] ,
278+ } ) ;
245279 }
246280
247281 last_pos = token. end_byte_pos ;
248282
249283 let kind = SyntaxKind :: from_raw ( RawSyntaxKind ( cid) ) ;
250- extras. push ( (
284+ extras. push ( Extra {
251285 kind,
252- token. start_byte_pos ,
253- token. end_byte_pos ,
254- & input[ token. start_byte_pos ..token. end_byte_pos ] ,
255- ) ) ;
286+ start_byte_pos : token. start_byte_pos ,
287+ end_byte_pos : token. end_byte_pos ,
288+ comment : & input[ token. start_byte_pos ..token. end_byte_pos ] ,
289+ } ) ;
256290 tokens. next ( ) ;
257291
258292 continue ;
259293 }
260294
261- let action = match action_table[ ( state * num_terminal_symbol ( ) + cid) as usize ] {
295+ let mut insert_dummy_token = false ;
296+ let mut action = match action_table[ ( state * num_terminal_symbol ( ) + cid) as usize ] {
262297 0x7FFF => Action :: Error ,
263298 v if v > 0 => Action :: Shift ( ( v - 1 ) as usize ) ,
264299 v if v < 0 => Action :: Reduce ( ( -v - 1 ) as usize ) ,
265300 _ => Action :: Accept ,
266301 } ;
267302
303+ // transform
304+ {
305+ let lr_parse_state = LRParseState {
306+ state,
307+ stack : & stack,
308+ action_table,
309+ goto_table,
310+ extras : & extras,
311+ token : & token,
312+ } ;
313+
314+ if let Some ( parse_transform) = transformers
315+ . iter ( )
316+ . find_map ( |t| t. transform ( & lr_parse_state) )
317+ {
318+ match parse_transform {
319+ ParseTransform :: InsertToken ( token_kind) => {
320+ let last_extra = extras. last ( ) . unwrap ( ) ;
321+
322+ cid = token_kind_to_component_id ( & token_kind) ;
323+ token = Token {
324+ start_byte_pos : last_extra. end_byte_pos ,
325+ end_byte_pos : last_extra. end_byte_pos ,
326+ kind : token_kind,
327+ value : String :: new ( ) ,
328+ } ;
329+ complement_token. insert ( token. start_byte_pos ) ;
330+
331+ action = match action_table[ ( state * num_terminal_symbol ( ) + cid) as usize ]
332+ {
333+ 0x7FFF => Action :: Error ,
334+ v if v > 0 => Action :: Shift ( ( v - 1 ) as usize ) ,
335+ v if v < 0 => Action :: Reduce ( ( -v - 1 ) as usize ) ,
336+ _ => Action :: Accept ,
337+ } ;
338+ insert_dummy_token = true ;
339+ }
340+
341+ ParseTransform :: SkipToken => {
342+ // Skip tokens are treated as extras
343+ if last_pos < token. start_byte_pos {
344+ extras. push ( Extra {
345+ kind : SyntaxKind :: Whitespace ,
346+ start_byte_pos : last_pos,
347+ end_byte_pos : token. start_byte_pos ,
348+ comment : & input[ last_pos..token. start_byte_pos ] ,
349+ } ) ;
350+ }
351+
352+ last_pos = token. end_byte_pos ;
353+
354+ let kind = SyntaxKind :: from_raw ( RawSyntaxKind ( cid) ) ;
355+ extras. push ( Extra {
356+ kind,
357+ start_byte_pos : token. start_byte_pos ,
358+ end_byte_pos : token. end_byte_pos ,
359+ comment : & input[ token. start_byte_pos ..token. end_byte_pos ] ,
360+ } ) ;
361+ tokens. next ( ) ;
362+ continue ;
363+ }
364+ }
365+ }
366+ }
367+
268368 match action {
269369 Action :: Shift ( next_state) => {
270370 let node = Node {
@@ -276,18 +376,20 @@ pub fn parse(input: &str) -> Result<ResolvedNode, ParseError> {
276376 } ;
277377
278378 if last_pos < token. start_byte_pos {
279- extras. push ( (
280- SyntaxKind :: Whitespace ,
281- last_pos,
282- token. start_byte_pos ,
283- & input[ last_pos..token. start_byte_pos ] ,
284- ) ) ;
379+ extras. push ( Extra {
380+ kind : SyntaxKind :: Whitespace ,
381+ start_byte_pos : last_pos,
382+ end_byte_pos : token. start_byte_pos ,
383+ comment : & input[ last_pos..token. start_byte_pos ] ,
384+ } ) ;
285385 }
286386
287387 last_pos = token. end_byte_pos ;
288388
289389 stack. push ( ( next_state as u32 , node) ) ;
290- tokens. next ( ) ;
390+ if !insert_dummy_token {
391+ tokens. next ( ) ;
392+ }
291393 }
292394 Action :: Reduce ( rule_index) => {
293395 let rule = & RULES [ rule_index] ;
@@ -308,7 +410,7 @@ pub fn parse(input: &str) -> Result<ResolvedNode, ParseError> {
308410 // Adopt the larger of the end position of the previous token or the end of the space.
309411 extras
310412 . last ( )
311- . map ( |e| e. 2 )
413+ . map ( |e| e. end_byte_pos )
312414 . unwrap_or_default ( )
313415 . max ( stack. last ( ) . unwrap ( ) . 1 . end_byte_pos )
314416 } ) ;
@@ -364,12 +466,12 @@ pub fn parse(input: &str) -> Result<ResolvedNode, ParseError> {
364466
365467 while let Some ( token) = tokens. next ( ) {
366468 if last_pos < token. start_byte_pos {
367- extras. push ( (
368- SyntaxKind :: Whitespace ,
369- last_pos,
370- token. start_byte_pos ,
371- & input[ last_pos..token. start_byte_pos ] ,
372- ) ) ;
469+ extras. push ( Extra {
470+ kind : SyntaxKind :: Whitespace ,
471+ start_byte_pos : last_pos,
472+ end_byte_pos : token. start_byte_pos ,
473+ comment : & input[ last_pos..token. start_byte_pos ] ,
474+ } ) ;
373475 }
374476
375477 last_pos = token. end_byte_pos ;
@@ -381,19 +483,19 @@ pub fn parse(input: &str) -> Result<ResolvedNode, ParseError> {
381483
382484 let cid = token_kind_to_component_id ( & token. kind ) ;
383485 let kind = SyntaxKind :: from_raw ( RawSyntaxKind ( cid) ) ;
384- extras. push ( (
486+ extras. push ( Extra {
385487 kind,
386- token. start_byte_pos ,
387- token. end_byte_pos ,
388- & input[ token. start_byte_pos ..token. end_byte_pos ] ,
389- ) ) ;
488+ start_byte_pos : token. start_byte_pos ,
489+ end_byte_pos : token. end_byte_pos ,
490+ comment : & input[ token. start_byte_pos ..token. end_byte_pos ] ,
491+ } ) ;
390492 }
391493
392494 let parser = Parser {
393495 builder : GreenNodeBuilder :: new ( ) ,
394496 } ;
395497 let root: Vec < & Node > = stack[ 1 ..] . iter ( ) . map ( |s| & s. 1 ) . collect ( ) ;
396- let ( ast, resolver) = parser. parse ( & root, extras) ;
498+ let ( ast, resolver) = parser. parse ( & root, extras, & complement_token ) ;
397499
398500 Ok ( SyntaxNode :: new_root_with_resolver ( ast, resolver) )
399501}
0 commit comments