Skip to content

Commit 17ef56b

Browse files
committed
Fix word boundary handling
In emacs mode _ is not a word boundary but in vi it is.
1 parent cd7b85d commit 17ef56b

File tree

2 files changed

+73
-15
lines changed

2 files changed

+73
-15
lines changed

Lib/_pyrepl/reader.py

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,10 @@ def make_default_syntax_table() -> dict[str, int]:
5757
return st
5858

5959

60+
def _is_vi_word_char(c: str) -> bool:
61+
return c.isalnum() or c == '_'
62+
63+
6064
def make_default_commands() -> dict[CommandName, type[Command]]:
6165
result: dict[CommandName, type[Command]] = {}
6266
all_commands = itertools.chain(vars(commands).values(), vars(vi_commands).values())
@@ -512,24 +516,23 @@ def vi_eow(self, p: int | None = None) -> int:
512516
following p most immediately (vi 'e' semantics).
513517
514518
Unlike eow(), this returns the position ON the last word character,
515-
not past it. p defaults to self.pos; word boundaries are determined
516-
using self.syntax_table."""
519+
not past it. p defaults to self.pos; word boundaries use vi rules
520+
(alphanumeric + underscore)."""
517521
if p is None:
518522
p = self.pos
519-
st = self.syntax_table
520523
b = self.buffer
521524

522525
# If we're already at the end of a word, move past it
523-
if (p < len(b) and st.get(b[p], SYNTAX_WORD) == SYNTAX_WORD and
524-
(p + 1 >= len(b) or st.get(b[p + 1], SYNTAX_WORD) != SYNTAX_WORD)):
526+
if (p < len(b) and _is_vi_word_char(b[p]) and
527+
(p + 1 >= len(b) or not _is_vi_word_char(b[p + 1]))):
525528
p += 1
526529

527530
# Skip non-word characters to find the start of next word
528-
while p < len(b) and st.get(b[p], SYNTAX_WORD) != SYNTAX_WORD:
531+
while p < len(b) and not _is_vi_word_char(b[p]):
529532
p += 1
530533

531534
# Move to the last character of this word (not past it)
532-
while p + 1 < len(b) and st.get(b[p + 1], SYNTAX_WORD) == SYNTAX_WORD:
535+
while p + 1 < len(b) and _is_vi_word_char(b[p + 1]):
533536
p += 1
534537

535538
# Clamp to valid buffer range
@@ -540,24 +543,41 @@ def vi_forward_word(self, p: int | None = None) -> int:
540543
(vi 'w' semantics).
541544
542545
Unlike eow(), this lands ON the first character of the next word,
543-
not past it. p defaults to self.pos; word boundaries are determined
544-
using self.syntax_table."""
546+
not past it. p defaults to self.pos; word boundaries use vi rules
547+
(alphanumeric + underscore)."""
545548
if p is None:
546549
p = self.pos
547-
st = self.syntax_table
548550
b = self.buffer
549551

550552
# Skip the rest of the current word if we're on one
551-
while p < len(b) and st.get(b[p], SYNTAX_WORD) == SYNTAX_WORD:
553+
while p < len(b) and _is_vi_word_char(b[p]):
552554
p += 1
553555

554556
# Skip non-word characters to find the start of next word
555-
while p < len(b) and st.get(b[p], SYNTAX_WORD) != SYNTAX_WORD:
557+
while p < len(b) and not _is_vi_word_char(b[p]):
556558
p += 1
557559

558560
# Clamp to valid buffer range
559561
return min(p, len(b) - 1) if b else 0
560562

563+
def vi_bow(self, p: int | None = None) -> int:
564+
"""Return the 0-based index of the beginning of the word preceding p
565+
(vi 'b' semantics).
566+
567+
p defaults to self.pos; word boundaries use vi rules
568+
(alphanumeric + underscore)."""
569+
if p is None:
570+
p = self.pos
571+
b = self.buffer
572+
p -= 1
573+
# Skip non-word characters
574+
while p >= 0 and not _is_vi_word_char(b[p]):
575+
p -= 1
576+
# Skip word characters to find beginning of word
577+
while p >= 0 and _is_vi_word_char(b[p]):
578+
p -= 1
579+
return p + 1
580+
561581
def bol(self, p: int | None = None) -> int:
562582
"""Return the 0-based index of the line break preceding p most
563583
immediately.

Lib/test/test_pyrepl/test_reader.py

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -911,7 +911,7 @@ def test_first_non_whitespace_character(self):
911911
self.assertEqual(reader2.buffer[reader2.pos], 't')
912912

913913
def test_word_motion_edge_cases(self):
914-
# Test with punctuation - underscore should be a word boundary
914+
# Test with underscore - in vi mode, underscore IS a word character
915915
events = itertools.chain(
916916
code_to_events("hello_world"),
917917
[
@@ -921,8 +921,9 @@ def test_word_motion_edge_cases(self):
921921
],
922922
)
923923
reader, _ = self._run_vi(events)
924-
# 'w' moves to next word, underscore is not alphanumeric so treated as boundary
925-
self.assertIn(reader.pos, [5, 6]) # Could be on '_' or 'w' depending on implementation
924+
# In vi mode, underscore is part of word, so 'w' goes past end of "hello_world"
925+
# which clamps to end of buffer (pos 10, on 'd')
926+
self.assertEqual(reader.pos, 10)
926927

927928
# Test 'e' at end of buffer stays in bounds
928929
events2 = itertools.chain(
@@ -977,6 +978,43 @@ def test_repeat_count_with_word_motions(self):
977978
# Should be at end of "beta"
978979
self.assertEqual(reader2.buffer[reader2.pos], 'a') # Last 'a' of "beta"
979980

981+
def test_vi_word_boundaries(self):
982+
"""Test vi word motions match vim behavior for word characters.
983+
984+
In vi, word characters are alphanumeric + underscore.
985+
"""
986+
# Test cases: (text, start_key_sequence, expected_pos, description)
987+
test_cases = [
988+
# Underscore is part of word in vi, unlike emacs mode
989+
("function_name", "0w", 12, "underscore is word char, w clamps to end"),
990+
("hello_world test", "0w", 12, "underscore word, then to next word"),
991+
("get_value(x)", "0w", 10, "underscore word, skip ( to x"),
992+
993+
# Basic word motion
994+
("hello world", "0w", 6, "basic word jump"),
995+
("one two", "0w", 5, "double space handled"),
996+
("abc def ghi", "0ww", 8, "two w's"),
997+
998+
# End of word (e) - lands ON last char
999+
("function_name", "0e", 12, "e lands on last char of underscore word"),
1000+
("foo bar", "0e", 2, "e lands on last char of foo"),
1001+
("one two three", "0ee", 6, "two e's land on end of two"),
1002+
]
1003+
1004+
for text, keys, expected_pos, desc in test_cases:
1005+
with self.subTest(text=text, keys=keys, desc=desc):
1006+
key_events = []
1007+
for k in keys:
1008+
key_events.append(Event(evt="key", data=k, raw=bytearray(k.encode())))
1009+
events = itertools.chain(
1010+
code_to_events(text),
1011+
[Event(evt="key", data="\x1b", raw=bytearray(b"\x1b"))], # ESC
1012+
key_events,
1013+
)
1014+
reader, _ = self._run_vi(events)
1015+
self.assertEqual(reader.pos, expected_pos,
1016+
f"Expected pos {expected_pos} but got {reader.pos} for '{text}' with keys '{keys}'")
1017+
9801018

9811019
@force_not_colorized_test_class
9821020
class TestHistoricalReaderBindings(TestCase):

0 commit comments

Comments
 (0)