@@ -18,8 +18,8 @@ use unic_ucd_ident::{is_xid_continue, is_xid_start};
1818
1919#[ derive( Clone , Copy , PartialEq , Debug , Default ) ]
2020struct IndentationLevel {
21- tabs : usize ,
22- spaces : usize ,
21+ tabs : u32 ,
22+ spaces : u32 ,
2323}
2424
2525impl IndentationLevel {
@@ -225,7 +225,8 @@ where
225225 at_begin_of_line : true ,
226226 nesting : 0 ,
227227 indentations : Indentations :: default ( ) ,
228- pending : Vec :: new ( ) ,
228+ // Usually we have less than 5 tokens pending.
229+ pending : Vec :: with_capacity ( 5 ) ,
229230 location : start,
230231 window : CharWindow :: new ( input) ,
231232 } ;
@@ -257,13 +258,13 @@ where
257258 } ;
258259
259260 let start_pos = self . get_pos ( ) ;
260- let mut name = String :: new ( ) ;
261+ let mut name = String :: with_capacity ( 8 ) ;
261262 while self . is_identifier_continuation ( ) {
262263 name. push ( self . next_char ( ) . unwrap ( ) ) ;
263264 }
264265 let end_pos = self . get_pos ( ) ;
265266
266- if let Some ( tok) = KEYWORDS . get ( name. as_str ( ) ) {
267+ if let Some ( tok) = KEYWORDS . get ( & name) {
267268 Ok ( ( start_pos, tok. clone ( ) , end_pos) )
268269 } else {
269270 Ok ( ( start_pos, Tok :: Name { name } , end_pos) )
@@ -464,7 +465,7 @@ where
464465 self . next_char ( ) ;
465466 }
466467 let quote_char = self . next_char ( ) . unwrap ( ) ;
467- let mut string_content = String :: new ( ) ;
468+ let mut string_content = String :: with_capacity ( 5 ) ;
468469
469470 // If the next two characters are also the quote character, then we have a triple-quoted
470471 // string; consume those two characters and ensure that we require a triple-quote to close
@@ -534,12 +535,15 @@ where
534535 }
535536
536537 fn is_identifier_start ( & self , c : char ) -> bool {
537- c == '_' || is_xid_start ( c)
538+ match c {
539+ 'a' ..='z' | 'A' ..='Z' | '_' => true ,
540+ _ => is_xid_start ( c) ,
541+ }
538542 }
539543
540544 fn is_identifier_continuation ( & self ) -> bool {
541545 match self . window [ 0 ] {
542- Some ( '_' | '0' ..='9' ) => true ,
546+ Some ( 'a' ..= 'z' | 'A' ..= 'Z' | ' _' | '0' ..='9' ) => true ,
543547 Some ( c) => is_xid_continue ( c) ,
544548 _ => false ,
545549 }
@@ -564,8 +568,8 @@ where
564568 /// Given we are at the start of a line, count the number of spaces and/or tabs until the first character.
565569 fn eat_indentation ( & mut self ) -> Result < IndentationLevel , LexicalError > {
566570 // Determine indentation:
567- let mut spaces: usize = 0 ;
568- let mut tabs: usize = 0 ;
571+ let mut spaces: u32 = 0 ;
572+ let mut tabs: u32 = 0 ;
569573 loop {
570574 match self . window [ 0 ] {
571575 Some ( ' ' ) => {
@@ -686,21 +690,9 @@ where
686690 fn consume_normal ( & mut self ) -> Result < ( ) , LexicalError > {
687691 // Check if we have some character:
688692 if let Some ( c) = self . window [ 0 ] {
689- // First check identifier:
690693 if self . is_identifier_start ( c) {
691694 let identifier = self . lex_identifier ( ) ?;
692695 self . emit ( identifier) ;
693- } else if is_emoji_presentation ( c) {
694- let tok_start = self . get_pos ( ) ;
695- self . next_char ( ) ;
696- let tok_end = self . get_pos ( ) ;
697- self . emit ( (
698- tok_start,
699- Tok :: Name {
700- name : c. to_string ( ) ,
701- } ,
702- tok_end,
703- ) ) ;
704696 } else {
705697 self . consume_character ( c) ?;
706698 }
@@ -1047,10 +1039,7 @@ where
10471039 }
10481040 }
10491041 ',' => {
1050- let tok_start = self . get_pos ( ) ;
1051- self . next_char ( ) ;
1052- let tok_end = self . get_pos ( ) ;
1053- self . emit ( ( tok_start, Tok :: Comma , tok_end) ) ;
1042+ self . eat_single_char ( Tok :: Comma ) ;
10541043 }
10551044 '.' => {
10561045 if let Some ( '0' ..='9' ) = self . window [ 1 ] {
@@ -1109,13 +1098,25 @@ where
11091098 } ) ;
11101099 }
11111100 }
1112-
11131101 _ => {
1114- let c = self . next_char ( ) ;
1115- return Err ( LexicalError {
1116- error : LexicalErrorType :: UnrecognizedToken { tok : c. unwrap ( ) } ,
1117- location : self . get_pos ( ) ,
1118- } ) ;
1102+ if is_emoji_presentation ( c) {
1103+ let tok_start = self . get_pos ( ) ;
1104+ self . next_char ( ) ;
1105+ let tok_end = self . get_pos ( ) ;
1106+ self . emit ( (
1107+ tok_start,
1108+ Tok :: Name {
1109+ name : c. to_string ( ) ,
1110+ } ,
1111+ tok_end,
1112+ ) ) ;
1113+ } else {
1114+ let c = self . next_char ( ) ;
1115+ return Err ( LexicalError {
1116+ error : LexicalErrorType :: UnrecognizedToken { tok : c. unwrap ( ) } ,
1117+ location : self . get_pos ( ) ,
1118+ } ) ;
1119+ }
11191120 } // Ignore all the rest..
11201121 }
11211122
@@ -1124,7 +1125,11 @@ where
11241125
11251126 fn eat_single_char ( & mut self , ty : Tok ) {
11261127 let tok_start = self . get_pos ( ) ;
1127- self . next_char ( ) . unwrap ( ) ;
1128+ self . next_char ( ) . unwrap_or_else ( || unsafe {
1129+ // SAFETY: eat_single_char has been called only after a character has been read
1130+ // from the window, so the window is guaranteed to be non-empty.
1131+ std:: hint:: unreachable_unchecked ( )
1132+ } ) ;
11281133 let tok_end = self . get_pos ( ) ;
11291134 self . emit ( ( tok_start, ty, tok_end) ) ;
11301135 }
0 commit comments