@@ -115,10 +115,6 @@ where
115115 * self . window . last_mut ( ) . expect ( "never empty" ) = next;
116116 next
117117 }
118-
119- fn change_first ( & mut self , ch : char ) {
120- * self . window . first_mut ( ) . expect ( "never empty" ) = Some ( ch) ;
121- }
122118}
123119
124120impl < T , const N : usize , Idx > Index < Idx > for CharWindow < T , N >
@@ -135,7 +131,6 @@ where
135131
136132pub struct Lexer < T : Iterator < Item = char > > {
137133 window : CharWindow < T , 3 > ,
138-
139134 at_begin_of_line : bool ,
140135 nesting : usize , // Amount of parenthesis
141136 indentations : Indentations ,
@@ -160,60 +155,7 @@ pub fn make_tokenizer_located(
160155 source : & str ,
161156 start_location : Location ,
162157) -> impl Iterator < Item = LexResult > + ' _ {
163- let nlh = NewlineHandler :: new ( source. chars ( ) ) ;
164- Lexer :: new ( nlh, start_location)
165- }
166-
167- // The newline handler is an iterator which collapses different newline
168- // types into \n always.
169- pub struct NewlineHandler < T : Iterator < Item = char > > {
170- window : CharWindow < T , 2 > ,
171- }
172-
173- impl < T > NewlineHandler < T >
174- where
175- T : Iterator < Item = char > ,
176- {
177- pub fn new ( source : T ) -> Self {
178- let mut nlh = NewlineHandler {
179- window : CharWindow :: new ( source) ,
180- } ;
181- nlh. shift ( ) ;
182- nlh. shift ( ) ;
183- nlh
184- }
185-
186- fn shift ( & mut self ) -> Option < char > {
187- let result = self . window [ 0 ] ;
188- self . window . slide ( ) ;
189- result
190- }
191- }
192-
193- impl < T > Iterator for NewlineHandler < T >
194- where
195- T : Iterator < Item = char > ,
196- {
197- type Item = char ;
198-
199- fn next ( & mut self ) -> Option < Self :: Item > {
200- // Collapse \r\n into \n
201- loop {
202- match self . window [ ..2 ] {
203- [ Some ( '\r' ) , Some ( '\n' ) ] => {
204- // Windows EOL into \n
205- self . shift ( ) ;
206- }
207- [ Some ( '\r' ) , _] => {
208- // MAC EOL into \n
209- self . window . change_first ( '\n' ) ;
210- }
211- _ => break ,
212- }
213- }
214-
215- self . shift ( )
216- }
158+ Lexer :: new ( source. chars ( ) , start_location)
217159}
218160
219161impl < T > Lexer < T >
@@ -446,10 +388,9 @@ where
446388 fn lex_comment ( & mut self ) -> LexResult {
447389 let start_pos = self . get_pos ( ) ;
448390 let mut value = String :: new ( ) ;
449- value. push ( self . next_char ( ) . unwrap ( ) ) ;
450391 loop {
451392 match self . window [ 0 ] {
452- Some ( '\n' ) | None => {
393+ Some ( '\n' | '\r' ) | None => {
453394 let end_pos = self . get_pos ( ) ;
454395 return Ok ( ( start_pos, Tok :: Comment ( value) , end_pos) ) ;
455396 }
@@ -487,7 +428,6 @@ where
487428 continue ;
488429 }
489430 }
490-
491431 if c == '\n' && !triple_quoted {
492432 return Err ( LexicalError {
493433 error : LexicalErrorType :: OtherError (
@@ -613,7 +553,7 @@ where
613553 spaces = 0 ;
614554 tabs = 0 ;
615555 }
616- Some ( '\n' ) => {
556+ Some ( '\n' | '\r' ) => {
617557 // Empty line!
618558 self . next_char ( ) ;
619559 spaces = 0 ;
@@ -1059,7 +999,7 @@ where
1059999 }
10601000 }
10611001 }
1062- '\n' => {
1002+ '\n' | '\r' => {
10631003 let tok_start = self . get_pos ( ) ;
10641004 self . next_char ( ) ;
10651005 let tok_end = self . get_pos ( ) ;
@@ -1082,13 +1022,16 @@ where
10821022 }
10831023 '\\' => {
10841024 self . next_char ( ) ;
1085- if let Some ( '\n' ) = self . window [ 0 ] {
1086- self . next_char ( ) ;
1087- } else {
1088- return Err ( LexicalError {
1089- error : LexicalErrorType :: LineContinuationError ,
1090- location : self . get_pos ( ) ,
1091- } ) ;
1025+ match self . window [ 0 ] {
1026+ Some ( '\n' | '\r' ) => {
1027+ self . next_char ( ) ;
1028+ }
1029+ _ => {
1030+ return Err ( LexicalError {
1031+ error : LexicalErrorType :: LineContinuationError ,
1032+ location : self . get_pos ( ) ,
1033+ } )
1034+ }
10921035 }
10931036
10941037 if self . window [ 0 ] . is_none ( ) {
@@ -1136,12 +1079,22 @@ where
11361079
11371080 /// Helper function to go to the next character coming up.
11381081 fn next_char ( & mut self ) -> Option < char > {
1139- let c = self . window [ 0 ] ;
1082+ let mut c = self . window [ 0 ] ;
11401083 self . window . slide ( ) ;
1141- if c == Some ( '\n' ) {
1142- self . location . newline ( ) ;
1143- } else {
1144- self . location . go_right ( ) ;
1084+ match c {
1085+ Some ( '\n' ) => {
1086+ self . location . newline ( ) ;
1087+ }
1088+ Some ( '\r' ) => {
1089+ if self . window [ 0 ] == Some ( '\n' ) {
1090+ self . window . slide ( ) ;
1091+ }
1092+ self . location . newline ( ) ;
1093+ c = Some ( '\n' ) ;
1094+ }
1095+ _ => {
1096+ self . location . go_right ( ) ;
1097+ }
11451098 }
11461099 c
11471100 }
@@ -1189,7 +1142,7 @@ where
11891142
11901143#[ cfg( test) ]
11911144mod tests {
1192- use super :: { make_tokenizer, NewlineHandler , StringKind , Tok } ;
1145+ use super :: { make_tokenizer, StringKind , Tok } ;
11931146 use num_bigint:: BigInt ;
11941147
11951148 const WINDOWS_EOL : & str = "\r \n " ;
@@ -1201,16 +1154,6 @@ mod tests {
12011154 lexer. map ( |x| x. unwrap ( ) . 1 ) . collect ( )
12021155 }
12031156
1204- #[ test]
1205- fn test_newline_processor ( ) {
1206- // Escape \ followed by \n (by removal):
1207- let src = "b\\ \r \n " ;
1208- assert_eq ! ( 4 , src. len( ) ) ;
1209- let nlh = NewlineHandler :: new ( src. chars ( ) ) ;
1210- let x: Vec < char > = nlh. collect ( ) ;
1211- assert_eq ! ( vec![ 'b' , '\\' , '\n' ] , x) ;
1212- }
1213-
12141157 fn stok ( s : & str ) -> Tok {
12151158 Tok :: String {
12161159 value : s. to_owned ( ) ,
@@ -1645,4 +1588,33 @@ mod tests {
16451588 let tokens = lex_source ( source) ;
16461589 assert_eq ! ( tokens, vec![ stok( r"\N{EN SPACE}" ) , Tok :: Newline ] )
16471590 }
1591+
1592+ macro_rules! test_triple_quoted {
1593+ ( $( $name: ident: $eol: expr, ) * ) => {
1594+ $(
1595+ #[ test]
1596+ fn $name( ) {
1597+ let source = format!( "\" \" \" {0} test string{0} \" \" \" " , $eol) ;
1598+ let tokens = lex_source( & source) ;
1599+ assert_eq!(
1600+ tokens,
1601+ vec![
1602+ Tok :: String {
1603+ value: "\n test string\n " . to_owned( ) ,
1604+ kind: StringKind :: String ,
1605+ triple_quoted: true ,
1606+ } ,
1607+ Tok :: Newline ,
1608+ ]
1609+ )
1610+ }
1611+ ) *
1612+ }
1613+ }
1614+
1615+ test_triple_quoted ! {
1616+ test_triple_quoted_windows_eol: WINDOWS_EOL ,
1617+ test_triple_quoted_mac_eol: MAC_EOL ,
1618+ test_triple_quoted_unix_eol: UNIX_EOL ,
1619+ }
16481620}
0 commit comments