@@ -274,3 +274,80 @@ void test_merge_trees_renames__submodules(void)
274274 cl_assert (merge_test_index (index , merge_index_entries , 7 ));
275275 git_index_free (index );
276276}
277+
278+ void test_merge_trees_renames__cache_recomputation (void )
279+ {
280+ git_oid blob , binary , ancestor_oid , theirs_oid , ours_oid ;
281+ git_merge_options opts = GIT_MERGE_OPTIONS_INIT ;
282+ git_buf path = GIT_BUF_INIT ;
283+ git_treebuilder * builder ;
284+ git_tree * ancestor_tree , * their_tree , * our_tree ;
285+ git_index * index ;
286+ size_t blob_size ;
287+ void * data ;
288+ size_t i ;
289+
290+ cl_git_pass (git_oid_fromstr (& blob , "a2d8d1824c68541cca94ffb90f79291eba495921" ));
291+
292+ /*
293+ * Create a 50MB blob that consists of NUL bytes only. It is important
294+ * that this blob is of a special format, most importantly it cannot
295+ * contain more than four non-consecutive newlines or NUL bytes. This
296+ * is because of git_hashsig's inner workings where all files with less
297+ * than four "lines" are deemed to small.
298+ */
299+ blob_size = 50 * 1024 * 1024 ;
300+ cl_assert (data = git__calloc (blob_size , 1 ));
301+ cl_git_pass (git_blob_create_from_buffer (& binary , repo , data , blob_size ));
302+
303+ /*
304+ * Create the common ancestor, which has 1000 dummy blobs and the binary
305+ * blob. The dummy blobs serve as potential rename targets for the
306+ * dummy blob.
307+ */
308+ cl_git_pass (git_treebuilder_new (& builder , repo , NULL ));
309+ for (i = 0 ; i < 1000 ; i ++ ) {
310+ cl_git_pass (git_buf_printf (& path , "%" PRIuMAX ".txt" , i ));
311+ cl_git_pass (git_treebuilder_insert (NULL , builder , path .ptr , & blob , GIT_FILEMODE_BLOB ));
312+ git_buf_clear (& path );
313+ }
314+ cl_git_pass (git_treebuilder_insert (NULL , builder , "original.bin" , & binary , GIT_FILEMODE_BLOB ));
315+ cl_git_pass (git_treebuilder_write (& ancestor_oid , builder ));
316+
317+ /* We now the binary blob in our tree. */
318+ cl_git_pass (git_treebuilder_remove (builder , "original.bin" ));
319+ cl_git_pass (git_treebuilder_insert (NULL , builder , "renamed.bin" , & binary , GIT_FILEMODE_BLOB ));
320+ cl_git_pass (git_treebuilder_write (& ours_oid , builder ));
321+
322+ git_treebuilder_free (builder );
323+
324+ /* And move everything into a subdirectory in their tree. */
325+ cl_git_pass (git_treebuilder_new (& builder , repo , NULL ));
326+ cl_git_pass (git_treebuilder_insert (NULL , builder , "subdir" , & ancestor_oid , GIT_FILEMODE_TREE ));
327+ cl_git_pass (git_treebuilder_write (& theirs_oid , builder ));
328+
329+ /*
330+ * Now merge ancestor, ours and theirs. As `git_hashsig` refuses to
331+ * create a hash signature for the 50MB binary file, we historically
332+ * didn't cache the hashsig computation for it. As a result, we now
333+ * started looking up the 50MB blob and scanning it at least 1000
334+ * times, which takes a long time.
335+ *
336+ * The number of 1000 blobs is chosen in such a way that it's
337+ * noticeable when the bug creeps in again, as it takes around 12
338+ * minutes on my machine to compute the following merge.
339+ */
340+ opts .target_limit = 5000 ;
341+ cl_git_pass (git_tree_lookup (& ancestor_tree , repo , & ancestor_oid ));
342+ cl_git_pass (git_tree_lookup (& their_tree , repo , & theirs_oid ));
343+ cl_git_pass (git_tree_lookup (& our_tree , repo , & ours_oid ));
344+ cl_git_pass (git_merge_trees (& index , repo , ancestor_tree , our_tree , their_tree , & opts ));
345+
346+ git_treebuilder_free (builder );
347+ git_buf_dispose (& path );
348+ git_index_free (index );
349+ git_tree_free (ancestor_tree );
350+ git_tree_free (their_tree );
351+ git_tree_free (our_tree );
352+ git__free (data );
353+ }
0 commit comments