Skip to content

Commit e4096fb

Browse files
committed
Move NewLineHandler inline, don't check each character twice.
1 parent a3c372f commit e4096fb

File tree

1 file changed

+59
-87
lines changed

1 file changed

+59
-87
lines changed

compiler/parser/src/lexer.rs

Lines changed: 59 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -115,10 +115,6 @@ where
115115
*self.window.last_mut().expect("never empty") = next;
116116
next
117117
}
118-
119-
fn change_first(&mut self, ch: char) {
120-
*self.window.first_mut().expect("never empty") = Some(ch);
121-
}
122118
}
123119

124120
impl<T, const N: usize, Idx> Index<Idx> for CharWindow<T, N>
@@ -135,7 +131,6 @@ where
135131

136132
pub struct Lexer<T: Iterator<Item = char>> {
137133
window: CharWindow<T, 3>,
138-
139134
at_begin_of_line: bool,
140135
nesting: usize, // Amount of parenthesis
141136
indentations: Indentations,
@@ -160,60 +155,7 @@ pub fn make_tokenizer_located(
160155
source: &str,
161156
start_location: Location,
162157
) -> impl Iterator<Item = LexResult> + '_ {
163-
let nlh = NewlineHandler::new(source.chars());
164-
Lexer::new(nlh, start_location)
165-
}
166-
167-
// The newline handler is an iterator which collapses different newline
168-
// types into \n always.
169-
pub struct NewlineHandler<T: Iterator<Item = char>> {
170-
window: CharWindow<T, 2>,
171-
}
172-
173-
impl<T> NewlineHandler<T>
174-
where
175-
T: Iterator<Item = char>,
176-
{
177-
pub fn new(source: T) -> Self {
178-
let mut nlh = NewlineHandler {
179-
window: CharWindow::new(source),
180-
};
181-
nlh.shift();
182-
nlh.shift();
183-
nlh
184-
}
185-
186-
fn shift(&mut self) -> Option<char> {
187-
let result = self.window[0];
188-
self.window.slide();
189-
result
190-
}
191-
}
192-
193-
impl<T> Iterator for NewlineHandler<T>
194-
where
195-
T: Iterator<Item = char>,
196-
{
197-
type Item = char;
198-
199-
fn next(&mut self) -> Option<Self::Item> {
200-
// Collapse \r\n into \n
201-
loop {
202-
match self.window[..2] {
203-
[Some('\r'), Some('\n')] => {
204-
// Windows EOL into \n
205-
self.shift();
206-
}
207-
[Some('\r'), _] => {
208-
// MAC EOL into \n
209-
self.window.change_first('\n');
210-
}
211-
_ => break,
212-
}
213-
}
214-
215-
self.shift()
216-
}
158+
Lexer::new(source.chars(), start_location)
217159
}
218160

219161
impl<T> Lexer<T>
@@ -446,10 +388,9 @@ where
446388
fn lex_comment(&mut self) -> LexResult {
447389
let start_pos = self.get_pos();
448390
let mut value = String::new();
449-
value.push(self.next_char().unwrap());
450391
loop {
451392
match self.window[0] {
452-
Some('\n') | None => {
393+
Some('\n' | '\r') | None => {
453394
let end_pos = self.get_pos();
454395
return Ok((start_pos, Tok::Comment(value), end_pos));
455396
}
@@ -487,7 +428,6 @@ where
487428
continue;
488429
}
489430
}
490-
491431
if c == '\n' && !triple_quoted {
492432
return Err(LexicalError {
493433
error: LexicalErrorType::OtherError(
@@ -613,7 +553,7 @@ where
613553
spaces = 0;
614554
tabs = 0;
615555
}
616-
Some('\n') => {
556+
Some('\n' | '\r') => {
617557
// Empty line!
618558
self.next_char();
619559
spaces = 0;
@@ -1059,7 +999,7 @@ where
1059999
}
10601000
}
10611001
}
1062-
'\n' => {
1002+
'\n' | '\r' => {
10631003
let tok_start = self.get_pos();
10641004
self.next_char();
10651005
let tok_end = self.get_pos();
@@ -1082,13 +1022,16 @@ where
10821022
}
10831023
'\\' => {
10841024
self.next_char();
1085-
if let Some('\n') = self.window[0] {
1086-
self.next_char();
1087-
} else {
1088-
return Err(LexicalError {
1089-
error: LexicalErrorType::LineContinuationError,
1090-
location: self.get_pos(),
1091-
});
1025+
match self.window[0] {
1026+
Some('\n' | '\r') => {
1027+
self.next_char();
1028+
}
1029+
_ => {
1030+
return Err(LexicalError {
1031+
error: LexicalErrorType::LineContinuationError,
1032+
location: self.get_pos(),
1033+
})
1034+
}
10921035
}
10931036

10941037
if self.window[0].is_none() {
@@ -1136,12 +1079,22 @@ where
11361079

11371080
/// Helper function to go to the next character coming up.
11381081
fn next_char(&mut self) -> Option<char> {
1139-
let c = self.window[0];
1082+
let mut c = self.window[0];
11401083
self.window.slide();
1141-
if c == Some('\n') {
1142-
self.location.newline();
1143-
} else {
1144-
self.location.go_right();
1084+
match c {
1085+
Some('\n') => {
1086+
self.location.newline();
1087+
}
1088+
Some('\r') => {
1089+
if self.window[0] == Some('\n') {
1090+
self.window.slide();
1091+
}
1092+
self.location.newline();
1093+
c = Some('\n');
1094+
}
1095+
_ => {
1096+
self.location.go_right();
1097+
}
11451098
}
11461099
c
11471100
}
@@ -1189,7 +1142,7 @@ where
11891142

11901143
#[cfg(test)]
11911144
mod tests {
1192-
use super::{make_tokenizer, NewlineHandler, StringKind, Tok};
1145+
use super::{make_tokenizer, StringKind, Tok};
11931146
use num_bigint::BigInt;
11941147

11951148
const WINDOWS_EOL: &str = "\r\n";
@@ -1201,16 +1154,6 @@ mod tests {
12011154
lexer.map(|x| x.unwrap().1).collect()
12021155
}
12031156

1204-
#[test]
1205-
fn test_newline_processor() {
1206-
// Escape \ followed by \n (by removal):
1207-
let src = "b\\\r\n";
1208-
assert_eq!(4, src.len());
1209-
let nlh = NewlineHandler::new(src.chars());
1210-
let x: Vec<char> = nlh.collect();
1211-
assert_eq!(vec!['b', '\\', '\n'], x);
1212-
}
1213-
12141157
fn stok(s: &str) -> Tok {
12151158
Tok::String {
12161159
value: s.to_owned(),
@@ -1645,4 +1588,33 @@ mod tests {
16451588
let tokens = lex_source(source);
16461589
assert_eq!(tokens, vec![stok(r"\N{EN SPACE}"), Tok::Newline])
16471590
}
1591+
1592+
macro_rules! test_triple_quoted {
1593+
($($name:ident: $eol:expr,)*) => {
1594+
$(
1595+
#[test]
1596+
fn $name() {
1597+
let source = format!("\"\"\"{0} test string{0} \"\"\"", $eol);
1598+
let tokens = lex_source(&source);
1599+
assert_eq!(
1600+
tokens,
1601+
vec![
1602+
Tok::String {
1603+
value: "\n test string\n ".to_owned(),
1604+
kind: StringKind::String,
1605+
triple_quoted: true,
1606+
},
1607+
Tok::Newline,
1608+
]
1609+
)
1610+
}
1611+
)*
1612+
}
1613+
}
1614+
1615+
test_triple_quoted! {
1616+
test_triple_quoted_windows_eol: WINDOWS_EOL,
1617+
test_triple_quoted_mac_eol: MAC_EOL,
1618+
test_triple_quoted_unix_eol: UNIX_EOL,
1619+
}
16481620
}

0 commit comments

Comments
 (0)