py/lexer: Simplify lexer startup by using dummy bytes and next_char().
Now consistently uses the EOL processing ("\r" and "\r\n" convert to "\n") and EOF processing (ensure "\n" before EOF) provided by next_char(). In particular the lexer can now correctly handle input that starts with CR.
This commit is contained in:
parent
e711e2d44a
commit
2998647c4e
29
py/lexer.c
29
py/lexer.c
|
@ -677,7 +677,7 @@ mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader) {
|
|||
lex->source_name = src_name;
|
||||
lex->reader = reader;
|
||||
lex->line = 1;
|
||||
lex->column = 1;
|
||||
lex->column = -2; // account for 3 dummy bytes
|
||||
lex->emit_dent = 0;
|
||||
lex->nested_bracket_level = 0;
|
||||
lex->alloc_indent_level = MICROPY_ALLOC_LEXER_INDENT_INIT;
|
||||
|
@ -688,27 +688,12 @@ mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader) {
|
|||
// store sentinel for first indentation level
|
||||
lex->indent_level[0] = 0;
|
||||
|
||||
// preload characters
|
||||
lex->chr0 = reader.readbyte(reader.data);
|
||||
lex->chr1 = reader.readbyte(reader.data);
|
||||
lex->chr2 = reader.readbyte(reader.data);
|
||||
|
||||
// if input stream is 0, 1 or 2 characters long and doesn't end in a newline, then insert a newline at the end
|
||||
if (lex->chr0 == MP_LEXER_EOF) {
|
||||
lex->chr0 = '\n';
|
||||
} else if (lex->chr1 == MP_LEXER_EOF) {
|
||||
if (lex->chr0 == '\r') {
|
||||
lex->chr0 = '\n';
|
||||
} else if (lex->chr0 != '\n') {
|
||||
lex->chr1 = '\n';
|
||||
}
|
||||
} else if (lex->chr2 == MP_LEXER_EOF) {
|
||||
if (lex->chr1 == '\r') {
|
||||
lex->chr1 = '\n';
|
||||
} else if (lex->chr1 != '\n') {
|
||||
lex->chr2 = '\n';
|
||||
}
|
||||
}
|
||||
// load lexer with start of file, advancing lex->column to 1
|
||||
// start with dummy bytes and use next_char() for proper EOL/EOF handling
|
||||
lex->chr0 = lex->chr1 = lex->chr2 = 0;
|
||||
next_char(lex);
|
||||
next_char(lex);
|
||||
next_char(lex);
|
||||
|
||||
// preload first token
|
||||
mp_lexer_to_next(lex);
|
||||
|
|
Loading…
Reference in New Issue