Skip to content

Commit 382df8e

Browse files
committed
Use muiltipeek
1 parent c5db729 commit 382df8e

File tree

2 files changed

+93
-84
lines changed

2 files changed

+93
-84
lines changed

compiler/parser/src/parser.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,11 @@
1414
1515
use crate::lexer::{LexResult, Tok};
1616
pub use crate::mode::Mode;
17+
use crate::soft_keywords::SoftKeywordTransformer;
1718
use crate::{ast, error::ParseError, lexer, python};
1819
use ast::Location;
1920
use itertools::Itertools;
2021
use std::iter;
21-
use crate::soft_keywords::soft_keywords;
2222

2323
/// Parse a full Python program usually consisting of multiple lines.
2424
///
@@ -190,7 +190,7 @@ pub fn parse_tokens(
190190
.chain(lxr)
191191
.filter_ok(|(_, tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
192192
python::TopParser::new()
193-
.parse(soft_keywords(tokenizer, mode).into_iter())
193+
.parse(SoftKeywordTransformer::new(tokenizer, mode).into_iter())
194194
.map_err(|e| crate::error::parse_error_from_lalrpop(e, source_path))
195195
}
196196

Lines changed: 91 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,10 @@
1+
use itertools::{Itertools, MultiPeek};
2+
13
use crate::lexer::{LexResult, Tok};
24
pub use crate::mode::Mode;
35

4-
/// Collect all tokens from a token stream in a vector.
5-
fn collect_tokens(tokenizer: impl IntoIterator<Item = LexResult>) -> Vec<LexResult> {
6-
let mut tokens: Vec<LexResult> = vec![];
7-
for tok in tokenizer {
8-
let is_err = tok.is_err();
9-
tokens.push(tok);
10-
if is_err {
11-
break;
12-
}
13-
}
14-
tokens
15-
}
16-
17-
/// Modify a token stream to accommodate soft keywords (namely, `match` and `case`).
6+
/// An [`Iterator`] that transforms a token stream to accommodate soft keywords (namely, `match`
7+
/// and `case`).
188
///
199
/// [PEP 634](https://www.python.org/dev/peps/pep-0634/) introduced the `match` and `case` keywords
2010
/// as soft keywords, meaning that they can be used as identifiers (e.g., variable names) in certain
@@ -25,93 +15,112 @@ fn collect_tokens(tokenizer: impl IntoIterator<Item = LexResult>) -> Vec<LexResu
2515
///
2616
/// Handling soft keywords in this intermediary pass allows us to simplify both the lexer and
2717
/// parser, as neither of them need to be aware of soft keywords.
28-
pub fn soft_keywords(
29-
tokenizer: impl IntoIterator<Item = LexResult>,
30-
mode: Mode,
31-
) -> Vec<LexResult> {
32-
let mut tokenizer: Vec<LexResult> = collect_tokens(tokenizer);
33-
let mut start_of_line = matches!(mode, Mode::Module | Mode::Interactive);
34-
for i in 0..tokenizer.len() {
35-
// If the token is a `match` or `case` token, check if it's used as an identifier.
36-
// We assume every `match` or `case` is an identifier unless both of the following
37-
// conditions are met:
38-
// 1. The token is at the start of a logical line.
39-
// 2. The logical line contains a top-level colon (that is, a colon that is not nested
40-
// inside a parenthesized expression, list, or dictionary).
41-
// 3. The top-level colon is not the immediate sibling of a `match` or `case` token.
42-
// (This is to avoid treating `match` and `case` as identifiers when annotated with
43-
// type hints.)
44-
if tokenizer[i]
45-
.as_ref()
46-
.map_or(false, |(_, tok, _)| matches!(tok, Tok::Match | Tok::Case))
47-
{
48-
let is_identifier = {
49-
if !start_of_line {
50-
// If the `match` or `case` token is not at the start of a line, it's definitely
51-
// an identifier.
52-
true
18+
pub struct SoftKeywordTransformer<I>
19+
where
20+
I: Iterator<Item = LexResult>,
21+
{
22+
pub underlying: MultiPeek<I>,
23+
pub start_of_line: bool,
24+
}
25+
26+
impl<I> SoftKeywordTransformer<I>
27+
where
28+
I: Iterator<Item = LexResult>,
29+
{
30+
pub fn new(tokenizer: I, mode: Mode) -> Self {
31+
Self {
32+
underlying: tokenizer.multipeek(),
33+
start_of_line: matches!(mode, Mode::Interactive | Mode::Module),
34+
}
35+
}
36+
}
37+
38+
impl<I> Iterator for SoftKeywordTransformer<I>
39+
where
40+
I: Iterator<Item = LexResult>,
41+
{
42+
type Item = LexResult;
43+
44+
#[inline]
45+
fn next(&mut self) -> Option<LexResult> {
46+
let mut next = self.underlying.next();
47+
if let Some(Ok((start, tok, end))) = next.as_ref() {
48+
// If the token is a `match` or `case` token, check if it's used as an identifier.
49+
// We assume every `match` or `case` is an identifier unless both of the following
50+
// conditions are met:
51+
// 1. The token is at the start of a logical line.
52+
// 2. The logical line contains a top-level colon (that is, a colon that is not nested
53+
// inside a parenthesized expression, list, or dictionary).
54+
// 3. The top-level colon is not the immediate sibling of a `match` or `case` token.
55+
// (This is to avoid treating `match` and `case` as identifiers when annotated with
56+
// type hints.)
57+
if matches!(tok, Tok::Match | Tok::Case) {
58+
if !self.start_of_line {
59+
next = Some(Ok((
60+
*start,
61+
Tok::Name {
62+
name: if matches!(tok, Tok::Match) {
63+
"match".to_string()
64+
} else {
65+
"case".to_string()
66+
},
67+
},
68+
*end,
69+
)));
5370
} else {
54-
//
55-
let mut seen_colon = false;
56-
let mut first = true;
5771
let mut par_count = 0;
5872
let mut sqb_count = 0;
5973
let mut brace_count = 0;
60-
for (_, tok, _) in tokenizer.iter().skip(i + 1).flatten() {
74+
let mut first = true;
75+
let mut seen_colon = false;
76+
while let Some(Ok((_, tok, _))) = self.underlying.peek() {
6177
match tok {
6278
Tok::Newline => break,
6379
Tok::Colon if par_count == 0 && sqb_count == 0 && brace_count == 0 => {
6480
if !first {
6581
seen_colon = true;
6682
}
67-
break;
68-
}
69-
Tok::Lpar => {
70-
par_count += 1;
71-
}
72-
Tok::Rpar => {
73-
par_count -= 1;
74-
}
75-
Tok::Lsqb => {
76-
sqb_count += 1;
77-
}
78-
Tok::Rsqb => {
79-
sqb_count -= 1;
80-
}
81-
Tok::Lbrace => {
82-
brace_count += 1;
83-
}
84-
Tok::Rbrace => {
85-
brace_count -= 1;
8683
}
84+
Tok::Lpar => par_count += 1,
85+
Tok::Rpar => par_count -= 1,
86+
Tok::Lsqb => sqb_count += 1,
87+
Tok::Rsqb => sqb_count -= 1,
88+
Tok::Lbrace => brace_count += 1,
89+
Tok::Rbrace => brace_count -= 1,
8790
_ => {}
8891
}
8992
first = false;
9093
}
91-
!seen_colon
92-
}
93-
};
94-
if is_identifier {
95-
if let Ok((_, tok, _)) = &mut tokenizer[i] {
96-
if let Tok::Match = tok {
97-
*tok = Tok::Name {
98-
name: "match".to_string(),
99-
};
100-
} else if let Tok::Case = tok {
101-
*tok = Tok::Name {
102-
name: "case".to_string(),
103-
};
94+
if !seen_colon {
95+
next = Some(Ok((
96+
*start,
97+
Tok::Name {
98+
name: if matches!(tok, Tok::Match) {
99+
"match".to_string()
100+
} else {
101+
"case".to_string()
102+
},
103+
},
104+
*end,
105+
)));
104106
}
105107
}
106108
}
107109
}
108-
start_of_line = tokenizer[i].as_ref().map_or(false, |(_, tok, _)| {
109-
matches!(
110-
tok,
111-
Tok::StartModule | Tok::StartInteractive | Tok::Newline | Tok::Indent | Tok::Dedent
112-
)
110+
111+
self.start_of_line = next.as_ref().map_or(false, |lex_result| {
112+
lex_result.as_ref().map_or(false, |(_, tok, _)| {
113+
matches!(
114+
tok,
115+
Tok::StartModule
116+
| Tok::StartInteractive
117+
| Tok::Newline
118+
| Tok::Indent
119+
| Tok::Dedent
120+
)
121+
})
113122
});
114-
}
115123

116-
tokenizer
124+
next
125+
}
117126
}

0 commit comments

Comments
 (0)