@@ -71,7 +71,8 @@ class HashDiffer(TableDiffer):
7171 """
7272
7373 bisection_factor : int = DEFAULT_BISECTION_FACTOR
74- bisection_threshold : Number = DEFAULT_BISECTION_THRESHOLD # Accepts inf for tests
74+ bisection_threshold : int = DEFAULT_BISECTION_THRESHOLD
75+ bisection_disabled : bool = False # i.e. always download the rows (used in tests)
7576
7677 stats : dict = attrs .field (factory = dict )
7778
@@ -157,7 +158,7 @@ def _diff_segments(
157158 # default, data-diff will checksum the section first (when it's below
158159 # the threshold) and _then_ download it.
159160 if BENCHMARK :
160- if max_rows < self .bisection_threshold :
161+ if self . bisection_disabled or max_rows < self .bisection_threshold :
161162 return self ._bisect_and_diff_segments (ti , table1 , table2 , info_tree , level = level , max_rows = max_rows )
162163
163164 (count1 , checksum1 ), (count2 , checksum2 ) = self ._threaded_call ("count_and_checksum" , [table1 , table2 ])
@@ -202,7 +203,7 @@ def _bisect_and_diff_segments(
202203
203204 # If count is below the threshold, just download and compare the columns locally
204205 # This saves time, as bisection speed is limited by ping and query performance.
205- if max_rows < self .bisection_threshold or max_space_size < self .bisection_factor * 2 :
206+ if self . bisection_disabled or max_rows < self .bisection_threshold or max_space_size < self .bisection_factor * 2 :
206207 rows1 , rows2 = self ._threaded_call ("get_values" , [table1 , table2 ])
207208 json_cols = {
208209 i : colname
0 commit comments