Skip to content

Commit cee1e7a

Browse files
committed
merge: perform exact rename detection in linear time
The current exact rename detection has order n^2 complexity. We can do better by using a map to first aggregate deletes and using that to match deletes to adds. This results in a substantial performance improvement for merges with a large quantity of adds and deletes.
1 parent f9d3b0d commit cee1e7a

File tree

1 file changed

+152
-31
lines changed

1 file changed

+152
-31
lines changed

src/merge.c

Lines changed: 152 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
#include "commit.h"
3333
#include "oidarray.h"
3434
#include "merge_driver.h"
35+
#include "oidmap.h"
36+
#include "array.h"
3537

3638
#include "git2/types.h"
3739
#include "git2/repository.h"
@@ -1005,27 +1007,6 @@ struct merge_diff_similarity {
10051007
size_t other_idx;
10061008
};
10071009

1008-
static int index_entry_similarity_exact(
1009-
git_repository *repo,
1010-
git_index_entry *a,
1011-
size_t a_idx,
1012-
git_index_entry *b,
1013-
size_t b_idx,
1014-
void **cache,
1015-
const git_merge_options *opts)
1016-
{
1017-
GIT_UNUSED(repo);
1018-
GIT_UNUSED(a_idx);
1019-
GIT_UNUSED(b_idx);
1020-
GIT_UNUSED(cache);
1021-
GIT_UNUSED(opts);
1022-
1023-
if (git_oid__cmp(&a->id, &b->id) == 0)
1024-
return 100;
1025-
1026-
return 0;
1027-
}
1028-
10291010
static int index_entry_similarity_calc(
10301011
void **out,
10311012
git_repository *repo,
@@ -1102,12 +1083,154 @@ static int index_entry_similarity_inexact(
11021083
return score;
11031084
}
11041085

1105-
static int merge_diff_mark_similarity(
1086+
/* Tracks deletes by oid for merge_diff_mark_similarity_exact(). This is a
1087+
* non-shrinking queue where next_pos is the next position to dequeue.
1088+
*/
1089+
typedef struct {
1090+
git_array_t(size_t) arr;
1091+
size_t next_pos;
1092+
size_t first_entry;
1093+
} deletes_by_oid_queue;
1094+
1095+
static void deletes_by_oid_free(git_oidmap *map) {
1096+
deletes_by_oid_queue *queue;
1097+
1098+
if (!map)
1099+
return;
1100+
1101+
git_oidmap_foreach_value(map, queue, {
1102+
git_array_clear(queue->arr);
1103+
});
1104+
git_oidmap_free(map);
1105+
}
1106+
1107+
static int deletes_by_oid_enqueue(git_oidmap *map, git_pool* pool, const git_oid *id, size_t idx) {
1108+
khint_t pos;
1109+
deletes_by_oid_queue *queue;
1110+
size_t *array_entry;
1111+
int error;
1112+
1113+
pos = git_oidmap_lookup_index(map, id);
1114+
if (!git_oidmap_valid_index(map, pos)) {
1115+
queue = git_pool_malloc(pool, sizeof(deletes_by_oid_queue));
1116+
GITERR_CHECK_ALLOC(queue);
1117+
1118+
git_array_init(queue->arr);
1119+
queue->next_pos = 0;
1120+
queue->first_entry = idx;
1121+
1122+
git_oidmap_insert(map, id, queue, &error);
1123+
if (error < 0)
1124+
return -1;
1125+
} else {
1126+
queue = git_oidmap_value_at(map, pos);
1127+
array_entry = git_array_alloc(queue->arr);
1128+
GITERR_CHECK_ALLOC(array_entry);
1129+
*array_entry = idx;
1130+
}
1131+
1132+
return 0;
1133+
}
1134+
1135+
static int deletes_by_oid_dequeue(size_t *idx, git_oidmap *map, const git_oid *id) {
1136+
khint_t pos;
1137+
deletes_by_oid_queue *queue;
1138+
size_t *array_entry;
1139+
1140+
pos = git_oidmap_lookup_index(map, id);
1141+
1142+
if (!git_oidmap_valid_index(map, pos))
1143+
return GIT_ENOTFOUND;
1144+
1145+
queue = git_oidmap_value_at(map, pos);
1146+
1147+
if (queue->next_pos == 0) {
1148+
*idx = queue->first_entry;
1149+
} else {
1150+
array_entry = git_array_get(queue->arr, queue->next_pos - 1);
1151+
if (array_entry == NULL)
1152+
return GIT_ENOTFOUND;
1153+
1154+
*idx = *array_entry;
1155+
}
1156+
1157+
queue->next_pos++;
1158+
return 0;
1159+
}
1160+
1161+
static int merge_diff_mark_similarity_exact(
1162+
git_merge_diff_list *diff_list,
1163+
struct merge_diff_similarity *similarity_ours,
1164+
struct merge_diff_similarity *similarity_theirs)
1165+
{
1166+
size_t i, j;
1167+
git_merge_diff *conflict_src, *conflict_tgt;
1168+
git_oidmap *ours_deletes_by_oid, *theirs_deletes_by_oid;
1169+
int error = 0;
1170+
1171+
if (!(ours_deletes_by_oid = git_oidmap_alloc()) ||
1172+
!(theirs_deletes_by_oid = git_oidmap_alloc())) {
1173+
error = -1;
1174+
goto done;
1175+
}
1176+
1177+
/* Build a map of object ids to conflicts */
1178+
git_vector_foreach(&diff_list->conflicts, i, conflict_src) {
1179+
/* Items can be the source of a rename iff they have an item in the
1180+
* ancestor slot and lack an item in the ours or theirs slot. */
1181+
if (!GIT_MERGE_INDEX_ENTRY_EXISTS(conflict_src->ancestor_entry))
1182+
continue;
1183+
1184+
if (!GIT_MERGE_INDEX_ENTRY_EXISTS(conflict_src->our_entry)) {
1185+
error = deletes_by_oid_enqueue(ours_deletes_by_oid, &diff_list->pool, &conflict_src->ancestor_entry.id, i);
1186+
if (error < 0)
1187+
goto done;
1188+
}
1189+
1190+
if (!GIT_MERGE_INDEX_ENTRY_EXISTS(conflict_src->their_entry)) {
1191+
error = deletes_by_oid_enqueue(theirs_deletes_by_oid, &diff_list->pool, &conflict_src->ancestor_entry.id, i);
1192+
if (error < 0)
1193+
goto done;
1194+
}
1195+
}
1196+
1197+
git_vector_foreach(&diff_list->conflicts, j, conflict_tgt) {
1198+
if (GIT_MERGE_INDEX_ENTRY_EXISTS(conflict_tgt->ancestor_entry))
1199+
continue;
1200+
1201+
if (GIT_MERGE_INDEX_ENTRY_EXISTS(conflict_tgt->our_entry)) {
1202+
if (deletes_by_oid_dequeue(&i, ours_deletes_by_oid, &conflict_tgt->our_entry.id) == 0) {
1203+
similarity_ours[i].similarity = 100;
1204+
similarity_ours[i].other_idx = j;
1205+
1206+
similarity_ours[j].similarity = 100;
1207+
similarity_ours[j].other_idx = i;
1208+
}
1209+
}
1210+
1211+
if (GIT_MERGE_INDEX_ENTRY_EXISTS(conflict_tgt->their_entry)) {
1212+
if (deletes_by_oid_dequeue(&i, theirs_deletes_by_oid, &conflict_tgt->their_entry.id) == 0) {
1213+
similarity_theirs[i].similarity = 100;
1214+
similarity_theirs[i].other_idx = j;
1215+
1216+
similarity_theirs[j].similarity = 100;
1217+
similarity_theirs[j].other_idx = i;
1218+
}
1219+
}
1220+
}
1221+
1222+
done:
1223+
deletes_by_oid_free(ours_deletes_by_oid);
1224+
deletes_by_oid_free(theirs_deletes_by_oid);
1225+
1226+
return error;
1227+
}
1228+
1229+
static int merge_diff_mark_similarity_inexact(
11061230
git_repository *repo,
11071231
git_merge_diff_list *diff_list,
11081232
struct merge_diff_similarity *similarity_ours,
11091233
struct merge_diff_similarity *similarity_theirs,
1110-
int (*similarity_fn)(git_repository *, git_index_entry *, size_t, git_index_entry *, size_t, void **, const git_merge_options *),
11111234
void **cache,
11121235
const git_merge_options *opts)
11131236
{
@@ -1132,7 +1255,7 @@ static int merge_diff_mark_similarity(
11321255

11331256
if (GIT_MERGE_INDEX_ENTRY_EXISTS(conflict_tgt->our_entry) &&
11341257
!GIT_MERGE_INDEX_ENTRY_EXISTS(conflict_src->our_entry)) {
1135-
similarity = similarity_fn(repo, &conflict_src->ancestor_entry, i, &conflict_tgt->our_entry, our_idx, cache, opts);
1258+
similarity = index_entry_similarity_inexact(repo, &conflict_src->ancestor_entry, i, &conflict_tgt->our_entry, our_idx, cache, opts);
11361259

11371260
if (similarity == GIT_EBUFS)
11381261
continue;
@@ -1158,7 +1281,7 @@ static int merge_diff_mark_similarity(
11581281

11591282
if (GIT_MERGE_INDEX_ENTRY_EXISTS(conflict_tgt->their_entry) &&
11601283
!GIT_MERGE_INDEX_ENTRY_EXISTS(conflict_src->their_entry)) {
1161-
similarity = similarity_fn(repo, &conflict_src->ancestor_entry, i, &conflict_tgt->their_entry, their_idx, cache, opts);
1284+
similarity = index_entry_similarity_inexact(repo, &conflict_src->ancestor_entry, i, &conflict_tgt->their_entry, their_idx, cache, opts);
11621285

11631286
if (similarity > similarity_theirs[i].similarity &&
11641287
similarity > similarity_theirs[j].similarity) {
@@ -1396,11 +1519,10 @@ int git_merge_diff_list__find_renames(
13961519
/* Calculate similarity between items that were deleted from the ancestor
13971520
* and added in the other branch.
13981521
*/
1399-
if ((error = merge_diff_mark_similarity(repo, diff_list, similarity_ours,
1400-
similarity_theirs, index_entry_similarity_exact, NULL, opts)) < 0)
1522+
if ((error = merge_diff_mark_similarity_exact(diff_list, similarity_ours, similarity_theirs)) < 0)
14011523
goto done;
14021524

1403-
if (diff_list->conflicts.length <= opts->target_limit) {
1525+
if (opts->rename_threshold < 100 && diff_list->conflicts.length <= opts->target_limit) {
14041526
GITERR_CHECK_ALLOC_MULTIPLY(&cache_size, diff_list->conflicts.length, 3);
14051527
cache = git__calloc(cache_size, sizeof(void *));
14061528
GITERR_CHECK_ALLOC(cache);
@@ -1410,9 +1532,8 @@ int git_merge_diff_list__find_renames(
14101532
if (src_count > opts->target_limit || tgt_count > opts->target_limit) {
14111533
/* TODO: report! */
14121534
} else {
1413-
if ((error = merge_diff_mark_similarity(
1414-
repo, diff_list, similarity_ours, similarity_theirs,
1415-
index_entry_similarity_inexact, cache, opts)) < 0)
1535+
if ((error = merge_diff_mark_similarity_inexact(
1536+
repo, diff_list, similarity_ours, similarity_theirs, cache, opts)) < 0)
14161537
goto done;
14171538
}
14181539
}

0 commit comments

Comments
 (0)