Skip to content

Commit 6708618

Browse files
committed
revwalk: get closer to git
We had some home-grown logic to figure out which objects to show during the revision walk, but it was rather inefficient, looking over the same list multiple times to figure out when we had run out of interesting commits. We now use the lists in a smarter way. We also introduce the slop mechanism to determine when to stpo looking. When we run out of interesting objects, we continue preparing the walk for another 5 rounds in order to make it less likely that we miss objects in situations with complex graphs.
1 parent 565fb8d commit 6708618

File tree

5 files changed

+182
-47
lines changed

5 files changed

+182
-47
lines changed

src/commit_list.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ typedef struct git_commit_list_node {
2828
uninteresting:1,
2929
topo_delay:1,
3030
parsed:1,
31+
added:1,
3132
flags : FLAG_BITS;
3233

3334
unsigned short in_degree;

src/revwalk.c

Lines changed: 174 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ static int mark_uninteresting(git_revwalk *walk, git_commit_list_node *commit)
8686
tmp = git_array_pop(pending);
8787
commit = tmp ? *tmp : NULL;
8888

89-
} while (commit != NULL && !interesting_arr(pending));
89+
} while (commit != NULL && interesting_arr(pending));
9090

9191
git_array_clear(pending);
9292

@@ -398,81 +398,194 @@ static int revwalk_next_reverse(git_commit_list_node **object_out, git_revwalk *
398398
return *object_out ? 0 : GIT_ITEROVER;
399399
}
400400

401-
402-
static int interesting(git_pqueue *list)
401+
static int contains(git_pqueue *list, git_commit_list_node *node)
403402
{
404403
size_t i;
405404

406405
for (i = 0; i < git_pqueue_size(list); i++) {
407406
git_commit_list_node *commit = git_pqueue_get(list, i);
408-
if (!commit->uninteresting)
407+
if (commit == node)
409408
return 1;
410409
}
411410

412411
return 0;
413412
}
414413

415-
static int contains(git_pqueue *list, git_commit_list_node *node)
414+
static void mark_parents_uninteresting(git_commit_list_node *commit)
416415
{
417-
size_t i;
416+
unsigned short i;
417+
git_commit_list *parents = NULL;
418418

419-
for (i = 0; i < git_pqueue_size(list); i++) {
420-
git_commit_list_node *commit = git_pqueue_get(list, i);
421-
if (commit == node)
422-
return 1;
423-
}
419+
for (i = 0; i < commit->out_degree; i++)
420+
git_commit_list_insert(commit->parents[i], &parents);
424421

425-
return 0;
422+
423+
while (parents) {
424+
git_commit_list_node *commit = git_commit_list_pop(&parents);
425+
426+
while (commit) {
427+
if (commit->uninteresting)
428+
break;
429+
430+
commit->uninteresting = 1;
431+
/*
432+
* If we've reached this commit some other way
433+
* already, we need to mark its parents uninteresting
434+
* as well.
435+
*/
436+
if (!commit->parents)
437+
break;
438+
439+
for (i = 0; i < commit->out_degree; i++)
440+
git_commit_list_insert(commit->parents[i], &parents);
441+
commit = commit->parents[0];
442+
}
443+
}
426444
}
427445

428-
static int premark_uninteresting(git_revwalk *walk)
446+
static int add_parents_to_list(git_revwalk *walk, git_commit_list_node *commit, git_commit_list **list)
429447
{
430-
int error = 0;
431448
unsigned short i;
432-
git_pqueue q;
433-
git_commit_list *list;
434-
git_commit_list_node *commit, *parent;
449+
int error;
435450

436-
if ((error = git_pqueue_init(&q, 0, 8, git_commit_list_time_cmp)) < 0)
437-
return error;
451+
if (commit->added)
452+
return 0;
438453

439-
for (list = walk->user_input; list; list = list->next) {
440-
if ((error = git_commit_list_parse(walk, list->item)) < 0)
441-
goto cleanup;
454+
commit->added = 1;
455+
456+
/* TODO: add the insertion callback here as well */
442457

443-
if ((error = git_pqueue_insert(&q, list->item)) < 0)
444-
goto cleanup;
458+
/*
459+
* Go full on in the uninteresting case as we want to include
460+
* as many of these as we can.
461+
*
462+
* Usually we haven't parsed the parent of a parent, but if we
463+
* have it we reached it via other means so we want to mark
464+
* its parents recursively too.
465+
*/
466+
if (commit->uninteresting) {
467+
for (i = 0; i < commit->out_degree; i++) {
468+
git_commit_list_node *p = commit->parents[i];
469+
p->uninteresting = 1;
470+
471+
/* git does it gently here, but we don't like missing objects */
472+
if ((error = git_commit_list_parse(walk, p)) < 0)
473+
return error;
474+
475+
if (p->parents)
476+
mark_parents_uninteresting(p);
477+
478+
p->seen = 1;
479+
git_commit_list_insert_by_date(p, list);
480+
}
481+
482+
return 0;
445483
}
446484

447-
while (interesting(&q)) {
448-
commit = git_pqueue_pop(&q);
485+
/*
486+
* Now on to what we do if the commit is indeed
487+
* interesting. Here we do want things like first-parent take
488+
* effect as this is what we'll be showing.
489+
*/
490+
for (i = 0; i < commit->out_degree; i++) {
491+
git_commit_list_node *p = commit->parents[i];
449492

450-
for (i = 0; i < commit->out_degree; i++) {
451-
parent = commit->parents[i];
493+
if ((error = git_commit_list_parse(walk, p)) < 0)
494+
return error;
452495

453-
if ((error = git_commit_list_parse(walk, parent)) < 0)
454-
goto cleanup;
496+
if (walk->hide_cb && walk->hide_cb(&p->oid, walk->hide_cb_payload))
497+
continue;
455498

456-
if (commit->uninteresting)
457-
parent->uninteresting = 1;
499+
if (!p->seen) {
500+
p->seen = 1;
501+
git_commit_list_insert_by_date(p, list);
502+
}
503+
504+
if (walk->first_parent)
505+
break;
506+
}
507+
return 0;
508+
}
458509

459-
if (contains(&q, parent))
510+
static int everybody_uninteresting(git_commit_list *orig)
511+
{
512+
git_commit_list *list = orig;
513+
514+
while (list) {
515+
git_commit_list_node *commit = list->item;
516+
list = list->next;
517+
if (commit->uninteresting)
518+
continue;
519+
520+
return 0;
521+
}
522+
523+
return 1;
524+
}
525+
526+
/* How many unintersting commits we want to look at after we run out of interesting ones */
527+
#define SLOP 5
528+
529+
static int still_interesting(git_commit_list *list, int64_t time, int slop)
530+
{
531+
/* The empty list is pretty boring */
532+
if (!list)
533+
return 0;
534+
535+
/*
536+
* If the destination list has commits with an earlier date
537+
* than our source we want to continue looking.
538+
*/
539+
if (time <= list->item->time)
540+
return SLOP;
541+
542+
/* If we find interesting commits, we reset the slop count */
543+
if (!everybody_uninteresting(list))
544+
return SLOP;
545+
546+
/* Everything's uninteresting, reduce the count */
547+
return slop - 1;
548+
}
549+
550+
static int limit_list(git_commit_list **out, git_revwalk *walk, git_commit_list *commits)
551+
{
552+
int error, slop = SLOP;
553+
int64_t time = ~0ll;
554+
git_commit_list *list = commits;
555+
git_commit_list *newlist = NULL;
556+
git_commit_list **p = &newlist;
557+
558+
while (list) {
559+
git_commit_list_node *commit = git_commit_list_pop(&list);
560+
561+
if ((error = add_parents_to_list(walk, commit, &list)) < 0)
562+
return error;
563+
564+
if (commit->uninteresting) {
565+
mark_parents_uninteresting(commit);
566+
567+
slop = still_interesting(list, time, slop);
568+
if (slop)
460569
continue;
461570

462-
if ((error = git_pqueue_insert(&q, parent)) < 0)
463-
goto cleanup;
571+
break;
464572
}
573+
574+
if (!commit->uninteresting && walk->hide_cb && walk->hide_cb(&commit->oid, walk->hide_cb_payload))
575+
continue;
576+
577+
time = commit->time;
578+
p = &git_commit_list_insert(commit, p)->next;
465579
}
466580

467-
cleanup:
468-
git_pqueue_free(&q);
469-
return error;
581+
*out = newlist;
582+
return 0;
470583
}
471584

472585
static int prepare_walk(git_revwalk *walk)
473586
{
474587
int error;
475-
git_commit_list *list;
588+
git_commit_list *list, *commits = NULL;
476589
git_commit_list_node *next;
477590

478591
/* If there were no pushes, we know that the walk is already over */
@@ -481,12 +594,29 @@ static int prepare_walk(git_revwalk *walk)
481594
return GIT_ITEROVER;
482595
}
483596

484-
if (walk->did_hide && (error = premark_uninteresting(walk)) < 0)
597+
for (list = walk->user_input; list; list = list->next) {
598+
git_commit_list_node *commit = list->item;
599+
if ((error = git_commit_list_parse(walk, commit)) < 0)
600+
return error;
601+
602+
if (commit->uninteresting)
603+
mark_parents_uninteresting(commit);
604+
605+
if (!commit->seen) {
606+
commit->seen = 1;
607+
git_commit_list_insert(commit, &commits);
608+
}
609+
}
610+
611+
if ((error = limit_list(&commits, walk, commits)) < 0)
485612
return error;
486613

487-
for (list = walk->user_input; list; list = list->next) {
488-
if (process_commit(walk, list->item, list->item->uninteresting) < 0)
489-
return -1;
614+
for (list = commits; list; list = list->next) {
615+
if (list->item->uninteresting)
616+
continue;
617+
618+
if ((error = walk->enqueue(walk, list->item)) < 0)
619+
return error;
490620
}
491621

492622

@@ -632,6 +762,7 @@ void git_revwalk_reset(git_revwalk *walk)
632762
commit->in_degree = 0;
633763
commit->topo_delay = 0;
634764
commit->uninteresting = 0;
765+
commit->added = 0;
635766
commit->flags = 0;
636767
});
637768

tests/revwalk/basic.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,9 @@ static const int commit_sorting_time_reverse[][6] = {
3838
{4, 5, 2, 1, 3, 0}
3939
};
4040

41+
/* This is specified unsorted, so both combinations are possible */
4142
static const int commit_sorting_segment[][6] = {
42-
{1, 2, -1, -1, -1, -1}
43+
{1, 2, -1, -1, -1, -1}, {2, 1, -1, -1, -1, -1}
4344
};
4445

4546
#define commit_count 6
@@ -155,9 +156,8 @@ void test_revwalk_basic__glob_heads(void)
155156

156157
cl_git_pass(git_revwalk_push_glob(_walk, "heads"));
157158

158-
while (git_revwalk_next(&oid, _walk) == 0) {
159+
while (git_revwalk_next(&oid, _walk) == 0)
159160
i++;
160-
}
161161

162162
/* git log --branches --oneline | wc -l => 14 */
163163
cl_assert_equal_i(i, 14);
@@ -338,7 +338,7 @@ void test_revwalk_basic__push_range(void)
338338
git_revwalk_reset(_walk);
339339
git_revwalk_sorting(_walk, 0);
340340
cl_git_pass(git_revwalk_push_range(_walk, "9fd738e~2..9fd738e"));
341-
cl_git_pass(test_walk_only(_walk, commit_sorting_segment, 1));
341+
cl_git_pass(test_walk_only(_walk, commit_sorting_segment, 2));
342342
}
343343

344344
void test_revwalk_basic__push_mixed(void)

tests/revwalk/hidecb.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ void test_revwalk_hidecb__hide_some_commits(void)
158158

159159
cl_git_pass(git_revwalk_new(&walk, _repo));
160160
cl_git_pass(git_revwalk_push(walk, &_head_id));
161+
git_revwalk_sorting(walk, GIT_SORT_TOPOLOGICAL | GIT_SORT_TIME);
161162

162163
/* Add hide callback */
163164
cl_git_pass(git_revwalk_add_hide_cb(walk, hide_commit_cb, NULL));
@@ -182,6 +183,7 @@ void test_revwalk_hidecb__test_payload(void)
182183

183184
cl_git_pass(git_revwalk_new(&walk, _repo));
184185
cl_git_pass(git_revwalk_push(walk, &_head_id));
186+
git_revwalk_sorting(walk, GIT_SORT_TOPOLOGICAL | GIT_SORT_TIME);
185187

186188
/* Add hide callback, pass id of parent of initial commit as payload data */
187189
cl_git_pass(git_revwalk_add_hide_cb(walk, hide_commit_use_payload_cb, &commit_ids[5]));

tests/revwalk/simplify.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ void test_revwalk_simplify__first_parent(void)
4040

4141
git_oid_fromstr(&id, commit_head);
4242
cl_git_pass(git_revwalk_push(walk, &id));
43+
git_revwalk_sorting(walk, GIT_SORT_TOPOLOGICAL);
4344
git_revwalk_simplify_first_parent(walk);
4445

4546
i = 0;

0 commit comments

Comments
 (0)