Skip to content

Commit cadddad

Browse files
Move location of syntax colorizer to _colorize
1 parent ff52e90 commit cadddad

File tree

6 files changed

+277
-251
lines changed

6 files changed

+277
-251
lines changed

Lib/_colorize.py

Lines changed: 243 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,16 @@
1+
import builtins
2+
import keyword
13
import os
24
import sys
5+
import token as T
6+
import tokenize
37

8+
from collections import deque
49
from collections.abc import Callable, Iterator, Mapping
510
from dataclasses import dataclass, field, Field
11+
from io import StringIO
12+
from tokenize import TokenInfo as TI
13+
from typing import Iterable, Match, NamedTuple
614

715
COLORIZE = True
816

@@ -373,3 +381,238 @@ def set_theme(t: Theme) -> None:
373381

374382

375383
set_theme(default_theme)
384+
385+
386+
# --------------------------- Syntax colorizer ------------------------------- #
387+
388+
IDENTIFIERS_AFTER = {"def", "class"}
389+
KEYWORD_CONSTANTS = {"True", "False", "None"}
390+
BUILTINS = {str(name) for name in dir(builtins) if not name.startswith('_')}
391+
_keyword_first_sets_match = {"False", "None", "True", "await", "lambda", "not"}
392+
_keyword_first_sets_case = {"False", "None", "True"}
393+
394+
395+
class _Span(NamedTuple):
396+
"""Span indexing that's inclusive on both ends."""
397+
398+
start: int
399+
end: int
400+
401+
@classmethod
402+
def from_re(cls, m: Match[str], group: int | str):
403+
re_span = m.span(group)
404+
return cls(re_span[0], re_span[1] - 1)
405+
406+
@classmethod
407+
def from_token(cls, token: TI, line_len: list[int]):
408+
end_offset = -1
409+
if (token.type in {T.FSTRING_MIDDLE, T.TSTRING_MIDDLE}
410+
and token.string.endswith(("{", "}"))):
411+
# gh-134158: a visible trailing brace comes from a double brace in input
412+
end_offset += 1
413+
414+
return cls(
415+
line_len[token.start[0] - 1] + token.start[1],
416+
line_len[token.end[0] - 1] + token.end[1] + end_offset,
417+
)
418+
419+
420+
class _ColorSpan(NamedTuple):
421+
span: Span
422+
tag: str
423+
424+
425+
def _prev_next_window[T](
426+
iterable: Iterable[T]
427+
) -> Iterator[tuple[T | None, ...]]:
428+
"""Generates three-tuples of (previous, current, next) items.
429+
430+
On the first iteration previous is None. On the last iteration next
431+
is None. In case of exception next is None and the exception is re-raised
432+
on a subsequent next() call.
433+
434+
Inspired by `sliding_window` from `itertools` recipes.
435+
"""
436+
437+
iterator = iter(iterable)
438+
window = deque((None, next(iterator)), maxlen=3)
439+
try:
440+
for x in iterator:
441+
window.append(x)
442+
yield tuple(window)
443+
except Exception:
444+
raise
445+
finally:
446+
window.append(None)
447+
yield tuple(window)
448+
449+
450+
def _is_soft_keyword_used(*tokens: TI | None) -> bool:
451+
"""Returns True if the current token is a keyword in this context.
452+
453+
For the `*tokens` to match anything, they have to be a three-tuple of
454+
(previous, current, next).
455+
"""
456+
match tokens:
457+
case (
458+
None | TI(T.NEWLINE) | TI(T.INDENT) | TI(string=":"),
459+
TI(string="match"),
460+
TI(T.NUMBER | T.STRING | T.FSTRING_START | T.TSTRING_START)
461+
| TI(T.OP, string="(" | "*" | "[" | "{" | "~" | "...")
462+
):
463+
return True
464+
case (
465+
None | TI(T.NEWLINE) | TI(T.INDENT) | TI(string=":"),
466+
TI(string="match"),
467+
TI(T.NAME, string=s)
468+
):
469+
if keyword.iskeyword(s):
470+
return s in _keyword_first_sets_match
471+
return True
472+
case (
473+
None | TI(T.NEWLINE) | TI(T.INDENT) | TI(T.DEDENT) | TI(string=":"),
474+
TI(string="case"),
475+
TI(T.NUMBER | T.STRING | T.FSTRING_START | T.TSTRING_START)
476+
| TI(T.OP, string="(" | "*" | "-" | "[" | "{")
477+
):
478+
return True
479+
case (
480+
None | TI(T.NEWLINE) | TI(T.INDENT) | TI(T.DEDENT) | TI(string=":"),
481+
TI(string="case"),
482+
TI(T.NAME, string=s)
483+
):
484+
if keyword.iskeyword(s):
485+
return s in _keyword_first_sets_case
486+
return True
487+
case (TI(string="case"), TI(string="_"), TI(string=":")):
488+
return True
489+
case (
490+
None | TI(T.NEWLINE) | TI(T.INDENT) | TI(T.DEDENT) | TI(string=":"),
491+
TI(string="type"),
492+
TI(T.NAME, string=s)
493+
):
494+
return not keyword.iskeyword(s)
495+
case _:
496+
return False
497+
498+
499+
def _gen_colors_from_token_stream(
500+
token_generator: Iterator[TI],
501+
line_lengths: list[int],
502+
) -> Iterator[_ColorSpan]:
503+
token_window = _prev_next_window(token_generator)
504+
505+
is_def_name = False
506+
bracket_level = 0
507+
for prev_token, token, next_token in token_window:
508+
assert token is not None
509+
if token.start == token.end:
510+
continue
511+
512+
match token.type:
513+
case (
514+
T.STRING
515+
| T.FSTRING_START | T.FSTRING_MIDDLE | T.FSTRING_END
516+
| T.TSTRING_START | T.TSTRING_MIDDLE | T.TSTRING_END
517+
):
518+
span = _Span.from_token(token, line_lengths)
519+
yield _ColorSpan(span, "string")
520+
case T.COMMENT:
521+
span = _Span.from_token(token, line_lengths)
522+
yield _ColorSpan(span, "comment")
523+
case T.NUMBER:
524+
span = _Span.from_token(token, line_lengths)
525+
yield _ColorSpan(span, "number")
526+
case T.OP:
527+
if token.string in "([{":
528+
bracket_level += 1
529+
elif token.string in ")]}":
530+
bracket_level -= 1
531+
span = _Span.from_token(token, line_lengths)
532+
yield _ColorSpan(span, "op")
533+
case T.NAME:
534+
if is_def_name:
535+
is_def_name = False
536+
span = _Span.from_token(token, line_lengths)
537+
yield _ColorSpan(span, "definition")
538+
elif keyword.iskeyword(token.string):
539+
span_cls = "keyword"
540+
if token.string in KEYWORD_CONSTANTS:
541+
span_cls = "keyword_constant"
542+
span = _Span.from_token(token, line_lengths)
543+
yield _ColorSpan(span, span_cls)
544+
if token.string in IDENTIFIERS_AFTER:
545+
is_def_name = True
546+
elif (
547+
keyword.issoftkeyword(token.string)
548+
and bracket_level == 0
549+
and _is_soft_keyword_used(prev_token, token, next_token)
550+
):
551+
span = _Span.from_token(token, line_lengths)
552+
yield _ColorSpan(span, "soft_keyword")
553+
elif (
554+
token.string in BUILTINS
555+
and not (prev_token and prev_token.exact_type == T.DOT)
556+
):
557+
span = _Span.from_token(token, line_lengths)
558+
yield _ColorSpan(span, "builtin")
559+
560+
561+
def _recover_unterminated_string(
562+
exc: tokenize.TokenError,
563+
line_lengths: list[int],
564+
last_emitted: _ColorSpan | None,
565+
buffer: str,
566+
) -> Iterator[_ColorSpan]:
567+
msg, loc = exc.args
568+
if loc is None:
569+
return
570+
571+
line_no, column = loc
572+
573+
if msg.startswith(
574+
(
575+
"unterminated string literal",
576+
"unterminated f-string literal",
577+
"unterminated t-string literal",
578+
"EOF in multi-line string",
579+
"unterminated triple-quoted f-string literal",
580+
"unterminated triple-quoted t-string literal",
581+
)
582+
):
583+
start = line_lengths[line_no - 1] + column - 1
584+
end = line_lengths[-1] - 1
585+
586+
# in case FSTRING_START was already emitted
587+
if last_emitted and start <= last_emitted.span.start:
588+
start = last_emitted.span.end + 1
589+
590+
span = _Span(start, end)
591+
yield _ColorSpan(span, "string")
592+
593+
594+
def _gen_colors(buffer: str) -> Iterator[_ColorSpan]:
595+
"""Returns a list of index spans to color using the given color tag.
596+
597+
The input `buffer` should be a valid start of a Python code block, i.e.
598+
it cannot be a block starting in the middle of a multiline string.
599+
"""
600+
sio = StringIO(buffer)
601+
line_lengths = [0] + [len(line) for line in sio.readlines()]
602+
# make line_lengths cumulative
603+
for i in range(1, len(line_lengths)):
604+
line_lengths[i] += line_lengths[i-1]
605+
606+
sio.seek(0)
607+
gen = tokenize.generate_tokens(sio.readline)
608+
last_emitted: _ColorSpan | None = None
609+
try:
610+
for color in _gen_colors_from_token_stream(gen, line_lengths):
611+
yield color
612+
last_emitted = color
613+
except SyntaxError:
614+
return
615+
except tokenize.TokenError as te:
616+
yield from _recover_unterminated_string(
617+
te, line_lengths, last_emitted, buffer
618+
)

Lib/_pyrepl/reader.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
from dataclasses import dataclass, field, fields
2929

3030
from . import commands, console, input
31-
from .utils import wlen, unbracket, disp_str, gen_colors, THEME
31+
from .utils import wlen, unbracket, disp_str, THEME
3232
from .trace import trace
3333

3434

@@ -312,7 +312,7 @@ def calc_screen(self) -> list[str]:
312312
prompt_from_cache = (offset and self.buffer[offset - 1] != "\n")
313313

314314
if self.can_colorize:
315-
colors = list(gen_colors(self.get_unicode()))
315+
colors = list(_colorize._gen_colors(self.get_unicode()))
316316
else:
317317
colors = None
318318
trace("colors = {colors}", colors=colors)

0 commit comments

Comments
 (0)