Skip to content

Commit 24b183b

Browse files
committed
differ argument instead of 2 matchers where appropriate + differ tests
1 parent 3b2d26b commit 24b183b

File tree

2 files changed

+61
-23
lines changed

2 files changed

+61
-23
lines changed

Lib/difflib.py

Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1490,8 +1490,7 @@ def decode(s):
14901490
for line in lines:
14911491
yield line.encode('ascii', 'surrogateescape')
14921492

1493-
def ndiff(a, b, linejunk=None, charjunk=IS_CHARACTER_JUNK,
1494-
linematcher=None, charmatcher=None):
1493+
def ndiff(a, b, linejunk=None, charjunk=IS_CHARACTER_JUNK, differ=None):
14951494
r"""
14961495
Compare `a` and `b` (lists of strings); return a `Differ`-style delta.
14971496
@@ -1509,13 +1508,9 @@ def ndiff(a, b, linejunk=None, charjunk=IS_CHARACTER_JUNK,
15091508
whitespace characters (a blank or tab; note: it's a bad idea to
15101509
include newline in this!).
15111510
1512-
- `linematcher`: callable that takes 3 positional arguments.
1513-
i.e. matcher(isjunk, a, b) which returns SequenceMatcherBase instance
1514-
Default (if None) is SequenceMatcher class.
1515-
1516-
- `charmatcher`: callable that takes 3 positional arguments.
1517-
i.e. matcher(isjunk, a, b) which returns SequenceMatcherBase instance
1518-
Default (if None) is SequenceMatcher class.
1511+
- `differ`: callable that takes 2 positional arguments.
1512+
i.e. differ(linejunk, charjunk) which returns `Differ` instance
1513+
Default (if None) is Differ class.
15191514
15201515
Tools/scripts/ndiff.py is a command-line front-end to this function.
15211516
@@ -1534,11 +1529,15 @@ def ndiff(a, b, linejunk=None, charjunk=IS_CHARACTER_JUNK,
15341529
+ tree
15351530
+ emu
15361531
"""
1537-
return Differ(linejunk, charjunk, linematcher, charmatcher).compare(a, b)
1532+
if differ is None:
1533+
differ = Differ
1534+
elif not callable(differ):
1535+
raise TypeError("differ must be callable: %r" % (differ,))
1536+
1537+
return differ(linejunk, charjunk).compare(a, b)
15381538

15391539
def _mdiff(fromlines, tolines, context=None,
1540-
linejunk=None, charjunk=IS_CHARACTER_JUNK,
1541-
linematcher=None, charmatcher=None):
1540+
linejunk=None, charjunk=IS_CHARACTER_JUNK, differ=None):
15421541
r"""Returns generator yielding marked up from/to side by side differences.
15431542
15441543
Arguments:
@@ -1548,7 +1547,7 @@ def _mdiff(fromlines, tolines, context=None,
15481547
if None, all from/to text lines will be generated.
15491548
linejunk -- passed on to ndiff (see ndiff documentation)
15501549
charjunk -- passed on to ndiff (see ndiff documentation)
1551-
linematcher -- passed on to ndiff (see ndiff documentation)
1550+
differ -- passed on to ndiff (see ndiff documentation)
15521551
charmatcher -- passed on to ndiff (see ndiff documentation)
15531552
15541553
This function returns an iterator which returns a tuple:
@@ -1579,8 +1578,7 @@ def _mdiff(fromlines, tolines, context=None,
15791578
change_re = re.compile(r'(\++|\-+|\^+)')
15801579

15811580
# create the difference iterator to generate the differences
1582-
diff_lines_iterator = ndiff(fromlines, tolines, linejunk, charjunk,
1583-
linematcher, charmatcher)
1581+
diff_lines_iterator = ndiff(fromlines, tolines, linejunk, charjunk, differ)
15841582

15851583
def _make_line(lines, format_key, side, num_lines=[0,0]):
15861584
"""Returns line of text with user's change markup and line formatting.
@@ -1925,15 +1923,14 @@ class HtmlDiff(object):
19251923
_default_prefix = 0
19261924

19271925
def __init__(self,tabsize=8, wrapcolumn=None,
1928-
linejunk=None, charjunk=IS_CHARACTER_JUNK,
1929-
linematcher=None, charmatcher=None):
1926+
linejunk=None, charjunk=IS_CHARACTER_JUNK, differ=None):
19301927
"""HtmlDiff instance initializer
19311928
19321929
Arguments:
19331930
tabsize -- tab stop spacing, defaults to 8.
19341931
wrapcolumn -- column number where lines are broken and wrapped,
19351932
defaults to None where lines are not wrapped.
1936-
linejunk,charjunk,linematcher,charmatcher -- keyword arguments
1933+
linejunk,charjunk,differ -- keyword arguments
19371934
passed into ndiff() (used by HtmlDiff() to generate the side
19381935
by side HTML differences). See ndiff() documentation for
19391936
argument default values and descriptions.
@@ -1942,8 +1939,7 @@ def __init__(self,tabsize=8, wrapcolumn=None,
19421939
self._wrapcolumn = wrapcolumn
19431940
self._linejunk = linejunk
19441941
self._charjunk = charjunk
1945-
self._linematcher = linematcher
1946-
self._charmatcher = charmatcher
1942+
self._differ = differ
19471943

19481944
def make_file(self, fromlines, tolines, fromdesc='', todesc='',
19491945
context=False, numlines=5, *, charset='utf-8'):
@@ -2216,7 +2212,7 @@ def make_table(self,fromlines,tolines,fromdesc='',todesc='',context=False,
22162212
context_lines = None
22172213
diffs = _mdiff(fromlines, tolines, context_lines,
22182214
linejunk=self._linejunk, charjunk=self._charjunk,
2219-
linematcher=self._linematcher, charmatcher=self._charmatcher)
2215+
differ=self._differ)
22202216

22212217
# set up iterator to wrap lines that exceed desired width
22222218
if self._wrapcolumn:
@@ -2774,7 +2770,7 @@ class GestaltSequenceMatcher(SequenceMatcherBase):
27742770
27752771
Time Complexity:
27762772
find_longest_match : O(n)
2777-
get_matching_blocks : O(n) average for common diff case
2773+
get_matching_blocks : O(n) - O(nlogn) for average diff case
27782774
O(n^2) worst case.
27792775
27802776
Example of worst case complexity `get_matching_blocks` case:

Lib/test/test_difflib.py

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import unittest
44
import doctest
55
import sys
6+
import functools
67

78

89
class TestWithAscii(unittest.TestCase):
@@ -283,6 +284,7 @@ def test_make_file_usascii_charset_with_nonascii_input(self):
283284
self.assertIn('ımplıcıt', output)
284285

285286
class TestDiffer(unittest.TestCase):
287+
286288
def test_close_matches_aligned(self):
287289
# Of the 4 closely matching pairs, we want 1 to match with 3,
288290
# and 2 with 4, to align with a "top to bottom" mental model.
@@ -311,6 +313,46 @@ def test_one_delete(self):
311313
m = difflib.Differ().compare('a' + 'b' * 2, 'b' * 2)
312314
self.assertEqual(list(m), ['- a', ' b', ' b'])
313315

316+
def test_differ_with_balancing_gestalt_matcher(self):
317+
gsm_cls = functools.partial(difflib.GestaltSequenceMatcher, balancing=2/3)
318+
d1 = difflib.Differ()
319+
d2 = difflib.Differ(linematcher=gsm_cls, charmatcher=gsm_cls)
320+
a = ["a\n", "b\n", "-\n", "a\n", "b\n", "close match 1\n", "a\n", "b\n", "c\n"]
321+
b = ["a\n", "b\n", "c\n", "+\n", "a\n", "b\n", "close match 2\n", "a\n", "b\n"]
322+
m = list(d1.compare(a, b))
323+
self.assertEqual(m,
324+
['- a\n',
325+
'- b\n',
326+
'- -\n',
327+
'- a\n',
328+
'- b\n',
329+
'- close match 1\n',
330+
' a\n',
331+
' b\n',
332+
' c\n',
333+
'+ +\n',
334+
'+ a\n',
335+
'+ b\n',
336+
'+ close match 2\n',
337+
'+ a\n',
338+
'+ b\n'])
339+
m = list(d2.compare(a, b))
340+
self.assertEqual(m,
341+
[' a\n',
342+
' b\n',
343+
'- -\n',
344+
'+ c\n',
345+
'+ +\n',
346+
' a\n',
347+
' b\n',
348+
'- close match 1\n',
349+
'? ^\n',
350+
'+ close match 2\n',
351+
'? ^\n',
352+
' a\n',
353+
' b\n',
354+
'- c\n'])
355+
314356

315357
class TestOutputFormat(unittest.TestCase):
316358
def test_tab_delimiter(self):
@@ -724,7 +766,7 @@ def foo2(a, b):
724766
class TestGestaltSequenceMatcher(unittest.TestCase):
725767
def test_cross_test_with_autojunk_false(self):
726768
cases = [
727-
("ABCDEFGHIJKLMNOP" * 50, "ACEGIKMOQBDFHJLNP" * 50),
769+
("ABCDEFGHIJKLMNOP" * 10, "ACEGIKMOQBDFHJLNP" * 10),
728770
(
729771
"".join(chr(ord('a') + i % 10) * (i + 1) for i in range(30)),
730772
"".join(chr(ord('a') + i % 10) * (30 - i) for i in range(30))

0 commit comments

Comments
 (0)