@@ -220,9 +220,9 @@ def set_seq2(self, b):
220220 self .b = b
221221 self .matching_blocks = self .opcodes = None
222222 self .fullbcount = None
223- self ._prepare_b ()
223+ self ._prepare_seq2 ()
224224
225- def _prepare_b (self ):
225+ def _prepare_seq2 (self ):
226226 """Preparation function that is called at the end of `set_seq2`.
227227 It is usually used to:
228228 a) Process junk
@@ -591,7 +591,7 @@ def __init__(self, isjunk=None, a='', b='', autojunk=True):
591591 # kinds of matches, it's best to call set_seq2 once, then set_seq1
592592 # repeatedly
593593
594- def _prepare_b (self ):
594+ def _prepare_seq2 (self ):
595595 self .__chain_b ()
596596
597597 def __chain_b (self ):
@@ -2786,7 +2786,7 @@ def __init__(self, isjunk=None, a='', b='', balancing=0):
27862786 balancing : float in [0, 1]
27872787 a ratio that specifies the proportion of `skew` for which
27882788 balancing action will be attempted.
2789- if 0, no balancing actions will occur
2789+ If 0, it is turned off and no balancing will take place.
27902790
27912791 Balancing:
27922792 Balancing action will commence if abs(skew) >= 1 - balancing,
@@ -2795,19 +2795,43 @@ def __init__(self, isjunk=None, a='', b='', balancing=0):
27952795 worst possible skew values will be eligible for balancing.
27962796 Note for the future: balancing procedure scales to k-strings well
27972797
2798- Skewed matching block visually:
2798+ Balancing in action:
2799+ balancing = 2/3
2800+ seq1 = '-xx-yy-###-'
2801+ seq2 = '_###_xx_yy_'
27992802
2800- m1 = (6 + 9 ) / 2 = 7 .5
2801- |
2802- ------ ###- -
2803- -- ###------
2804- |
2805- m2 = (2 + 5 ) / 2 = 3 .5
2803+ m1 = (7 + 10 ) / 2 = 8 .5
2804+ A |
2805+ -xx-yy- ###-
2806+ _ ###_xx_yy_
2807+ | B
2808+ m2 = (1 + 4 ) / 2 = 2 .5
28062809
2807- skew = 7 .5 / 11 - 3 .5 / 11 = 0.3636
2808- do_balancing = abs(skew) > 1 - balancing
2810+ skew = 8 .5 / 11 - 2 .5 / 11 = 0.545
2811+ do_balancing = abs(skew) > 1 - balancing = 0.545 > 0.333 = True
28092812
2810- with balancing == 2/3, this would try alternatives with lookahead
2813+ Once it has been decided to do balancing, the procedure is:
2814+ 1. Select a set of alternative candidate blocks
2815+ To do so, we find longest substring for 2 ranges
2816+ that exclude matched block in one of the sequences:
2817+
2818+ a) -xx-yy-
2819+ _###_xx_yy_
2820+
2821+ b) -xx-yy-###-
2822+ _xx_yy_
2823+
2824+ Thus the full candidate set is:
2825+ ### - initial longest block
2826+ xx - found in both ranges
2827+ 2. For each candidate find 2 additional blocks (on each side)
2828+ ### - has no nearby matches
2829+ xx - has another 'yy' on the right
2830+ 3. Select a candidate for which the sum of 3 block lengths is highest
2831+ ### - 3 (only the candidate)
2832+ xx - 4 (xx + yy)
2833+
2834+ Thus, for this example, xx is picked.
28112835
28122836 Comparison to SequenceMatcher:
28132837 In terms of results, the following 2 are equivalent:
@@ -2831,7 +2855,7 @@ def __init__(self, isjunk=None, a='', b='', balancing=0):
28312855 self .balancing = balancing
28322856 super ().__init__ (isjunk , a , b )
28332857
2834- def _prepare_b (self ):
2858+ def _prepare_seq2 (self ):
28352859 b = self .b
28362860 self .bjunk = bjunk = set ()
28372861 if self .isjunk :
0 commit comments