Skip to content

Commit 23babd6

Browse files
committed
Handle punctuation as separate words in vi motions
Vi has three character classes: word chars (alnum + _), punctuation (non-word, non-whitespace), and whitespace. Now w, e, and b treat punctuation sequences as separate words, matching vim behavior.
1 parent 0b14fce commit 23babd6

File tree

2 files changed

+96
-35
lines changed

2 files changed

+96
-35
lines changed

Lib/_pyrepl/reader.py

Lines changed: 79 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -521,46 +521,79 @@ def vi_eow(self, p: int | None = None) -> int:
521521
"""Return the 0-based index of the last character of the word
522522
following p most immediately (vi 'e' semantics).
523523
524-
Unlike eow(), this returns the position ON the last word character,
525-
not past it. p defaults to self.pos; word boundaries use vi rules
526-
(alphanumeric + underscore)."""
524+
Vi has three character classes: word chars (alnum + _), punctuation
525+
(non-word, non-whitespace), and whitespace. 'e' moves to the end
526+
of the current or next word/punctuation sequence."""
527527
if p is None:
528528
p = self.pos
529529
b = self.buffer
530530

531-
# If we're already at the end of a word, move past it
532-
if (p < len(b) and _is_vi_word_char(b[p]) and
533-
(p + 1 >= len(b) or not _is_vi_word_char(b[p + 1]))):
534-
p += 1
531+
if not b:
532+
return 0
533+
534+
# Helper to check if at end of current sequence
535+
def at_sequence_end(pos: int) -> bool:
536+
if pos >= len(b) - 1:
537+
return True
538+
curr_is_word = _is_vi_word_char(b[pos])
539+
next_is_word = _is_vi_word_char(b[pos + 1])
540+
curr_is_space = b[pos].isspace()
541+
next_is_space = b[pos + 1].isspace()
542+
if curr_is_word:
543+
return not next_is_word
544+
elif not curr_is_space:
545+
# Punctuation - at end if next is word or whitespace
546+
return next_is_word or next_is_space
547+
return True
535548

536-
# Skip non-word characters to find the start of next word
537-
while p < len(b) and not _is_vi_word_char(b[p]):
549+
# If already at end of a word/punctuation, move forward
550+
if p < len(b) and at_sequence_end(p):
538551
p += 1
539552

540-
# Move to the last character of this word (not past it)
541-
while p + 1 < len(b) and _is_vi_word_char(b[p + 1]):
553+
# Skip whitespace
554+
while p < len(b) and b[p].isspace():
542555
p += 1
543556

544-
# Clamp to valid buffer range
545-
return min(p, len(b) - 1) if b else 0
557+
if p >= len(b):
558+
return len(b) - 1
559+
560+
# Move to end of current word or punctuation sequence
561+
if _is_vi_word_char(b[p]):
562+
while p + 1 < len(b) and _is_vi_word_char(b[p + 1]):
563+
p += 1
564+
else:
565+
# Punctuation sequence
566+
while p + 1 < len(b) and not _is_vi_word_char(b[p + 1]) and not b[p + 1].isspace():
567+
p += 1
568+
569+
return min(p, len(b) - 1)
546570

547571
def vi_forward_word(self, p: int | None = None) -> int:
548572
"""Return the 0-based index of the first character of the next word
549573
(vi 'w' semantics).
550574
551-
Unlike eow(), this lands ON the first character of the next word,
552-
not past it. p defaults to self.pos; word boundaries use vi rules
553-
(alphanumeric + underscore)."""
575+
Vi has three character classes: word chars (alnum + _), punctuation
576+
(non-word, non-whitespace), and whitespace. 'w' moves to the start
577+
of the next word or punctuation sequence."""
554578
if p is None:
555579
p = self.pos
556580
b = self.buffer
557581

558-
# Skip the rest of the current word if we're on one
559-
while p < len(b) and _is_vi_word_char(b[p]):
560-
p += 1
561-
562-
# Skip non-word characters to find the start of next word
563-
while p < len(b) and not _is_vi_word_char(b[p]):
582+
if not b or p >= len(b):
583+
return max(0, len(b) - 1) if b else 0
584+
585+
# Skip current word or punctuation sequence
586+
if _is_vi_word_char(b[p]):
587+
# On a word char - skip word chars
588+
while p < len(b) and _is_vi_word_char(b[p]):
589+
p += 1
590+
elif not b[p].isspace():
591+
# On punctuation - skip punctuation
592+
while p < len(b) and not _is_vi_word_char(b[p]) and not b[p].isspace():
593+
p += 1
594+
595+
# Skip whitespace to find next word or punctuation
596+
while p < len(b) and b[p].isspace():
564597
p += 1
565598

566599
# Clamp to valid buffer range
@@ -570,19 +603,35 @@ def vi_bow(self, p: int | None = None) -> int:
570603
"""Return the 0-based index of the beginning of the word preceding p
571604
(vi 'b' semantics).
572605
573-
p defaults to self.pos; word boundaries use vi rules
574-
(alphanumeric + underscore)."""
606+
Vi has three character classes: word chars (alnum + _), punctuation
607+
(non-word, non-whitespace), and whitespace. 'b' moves to the start
608+
of the current or previous word/punctuation sequence."""
575609
if p is None:
576610
p = self.pos
577611
b = self.buffer
612+
613+
if not b or p <= 0:
614+
return 0
615+
578616
p -= 1
579-
# Skip non-word characters
580-
while p >= 0 and not _is_vi_word_char(b[p]):
581-
p -= 1
582-
# Skip word characters to find beginning of word
583-
while p >= 0 and _is_vi_word_char(b[p]):
617+
618+
# Skip whitespace going backward
619+
while p >= 0 and b[p].isspace():
584620
p -= 1
585-
return p + 1
621+
622+
if p < 0:
623+
return 0
624+
625+
# Now skip the word or punctuation sequence we landed in
626+
if _is_vi_word_char(b[p]):
627+
while p > 0 and _is_vi_word_char(b[p - 1]):
628+
p -= 1
629+
else:
630+
# Punctuation sequence
631+
while p > 0 and not _is_vi_word_char(b[p - 1]) and not b[p - 1].isspace():
632+
p -= 1
633+
634+
return p
586635

587636
def bol(self, p: int | None = None) -> int:
588637
"""Return the 0-based index of the line break preceding p most

Lib/test/test_pyrepl/test_reader.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -979,16 +979,26 @@ def test_repeat_count_with_word_motions(self):
979979
self.assertEqual(reader2.buffer[reader2.pos], 'a') # Last 'a' of "beta"
980980

981981
def test_vi_word_boundaries(self):
982-
"""Test vi word motions match vim behavior for word characters.
982+
"""Test vi word motions match vim behavior.
983983
984-
In vi, word characters are alphanumeric + underscore.
984+
Vi has three character classes:
985+
1. Word chars: alphanumeric + underscore
986+
2. Punctuation: non-word, non-whitespace (forms separate words)
987+
3. Whitespace: delimiters
985988
"""
986989
# Test cases: (text, start_key_sequence, expected_pos, description)
987990
test_cases = [
988991
# Underscore is part of word in vi, unlike emacs mode
989992
("function_name", "0w", 12, "underscore is word char, w clamps to end"),
990-
("hello_world test", "0w", 12, "underscore word, then to next word"),
991-
("get_value(x)", "0w", 10, "underscore word, skip ( to x"),
993+
("hello_world test", "0w", 12, "underscore word to end"),
994+
995+
# Punctuation is a separate word
996+
("foo.bar", "0w", 3, "w stops at dot (punctuation)"),
997+
("foo.bar", "0ww", 4, "second w goes to bar"),
998+
("foo..bar", "0w", 3, "w stops at first dot"),
999+
("foo..bar", "0ww", 5, "second w skips dot sequence to bar"),
1000+
("get_value(x)", "0w", 9, "underscore word stops at ("),
1001+
("get_value(x)", "0ww", 10, "second w goes to x"),
9921002

9931003
# Basic word motion
9941004
("hello world", "0w", 6, "basic word jump"),
@@ -998,7 +1008,9 @@ def test_vi_word_boundaries(self):
9981008
# End of word (e) - lands ON last char
9991009
("function_name", "0e", 12, "e lands on last char of underscore word"),
10001010
("foo bar", "0e", 2, "e lands on last char of foo"),
1001-
("one two three", "0ee", 6, "two e's land on end of two"),
1011+
("foo.bar", "0e", 2, "e lands on last o of foo"),
1012+
("foo.bar", "0ee", 3, "second e lands on dot"),
1013+
("foo.bar", "0eee", 6, "third e lands on last r of bar"),
10021014
]
10031015

10041016
for text, keys, expected_pos, desc in test_cases:

0 commit comments

Comments
 (0)