Skip to content

Commit 7be4860

Browse files
killme2008fanninpm
andauthored
fix: non-utf8 source causes panic, close RustPython#3798 (RustPython#3884)
Co-authored-by: fanninpm <fanninpm@miamioh.edu>
1 parent 42d7c51 commit 7be4860

File tree

3 files changed

+23
-0
lines changed

3 files changed

+23
-0
lines changed

extra_tests/snippets/non_utf8.txt

191 Bytes
Binary file not shown.
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import os
2+
import platform
3+
4+
from testutils import assert_raises
5+
6+
dir_path = os.path.dirname(os.path.realpath(__file__))
7+
8+
# TODO: RUSTPYTHON, RustPython raises a SyntaxError here, but cpython raise a ValueError
9+
error = SyntaxError if platform.python_implementation() == 'RustPython' else ValueError
10+
with assert_raises(error):
11+
with open(os.path.join(dir_path , "non_utf8.txt")) as f:
12+
eval(f.read())

parser/src/lexer.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,9 @@ where
145145
}
146146
}
147147

148+
/// unicode_name2 does not expose `MAX_NAME_LENGTH`, so we replicate that constant here, fix #3798
149+
const MAX_UNICODE_NAME: usize = 88;
150+
148151
impl<T> Lexer<T>
149152
where
150153
T: Iterator<Item = char>,
@@ -466,6 +469,14 @@ where
466469
}
467470
}
468471
}
472+
473+
if name.len() > MAX_UNICODE_NAME {
474+
return Err(LexicalError {
475+
error: LexicalErrorType::UnicodeError,
476+
location: self.get_pos(),
477+
});
478+
}
479+
469480
unicode_names2::character(&name).ok_or(LexicalError {
470481
error: LexicalErrorType::UnicodeError,
471482
location: start_pos,

0 commit comments

Comments
 (0)