Skip to content

Commit 2a6d095

Browse files
committed
revwalk: avoid walking the entire history when output is unsorted
As part of reducing our divergence from git, its code for revwalk was ported into our codebase. A detail about when to limit the list was lost and we ended up always calling that code. Limiting the list means performing the walk and creating the final list of commits to be output during the preparation stage. This is unavoidable when sorting and when there are negative refs. We did this even when asked for unsorted output with no negative refs, which you might do to retrieve something like the "last 10 commits on HEAD" for a nominally unsorted meaning of "last". This commit adds and sets a flag indicating when we do need to limit the list, letting us avoid doing so when we can. The previously mentioned query thus no longer loads the entire history of the project during the prepare stage, but loads it iteratively during the walk.
1 parent dc27772 commit 2a6d095

File tree

2 files changed

+59
-10
lines changed

2 files changed

+59
-10
lines changed

src/revwalk.c

Lines changed: 57 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
#include "merge.h"
1616
#include "vector.h"
1717

18+
static int get_revision(git_commit_list_node **out, git_revwalk *walk, git_commit_list **list);
19+
1820
git_commit_list_node *git_revwalk__commit_lookup(
1921
git_revwalk *walk, const git_oid *oid)
2022
{
@@ -76,10 +78,12 @@ static int push_commit(git_revwalk *walk, const git_oid *oid, int uninteresting,
7678
if (commit->uninteresting)
7779
return 0;
7880

79-
if (uninteresting)
81+
if (uninteresting) {
82+
walk->limited = 1;
8083
walk->did_hide = 1;
81-
else
84+
} else {
8285
walk->did_push = 1;
86+
}
8387

8488
commit->uninteresting = uninteresting;
8589
list = walk->user_input;
@@ -245,34 +249,34 @@ static int revwalk_next_timesort(git_commit_list_node **object_out, git_revwalk
245249

246250
static int revwalk_next_unsorted(git_commit_list_node **object_out, git_revwalk *walk)
247251
{
252+
int error;
248253
git_commit_list_node *next;
249254

250-
while ((next = git_commit_list_pop(&walk->iterator_rand)) != NULL) {
255+
while (!(error = get_revision(&next, walk, &walk->iterator_rand))) {
251256
/* Some commits might become uninteresting after being added to the list */
252257
if (!next->uninteresting) {
253258
*object_out = next;
254259
return 0;
255260
}
256261
}
257262

258-
giterr_clear();
259-
return GIT_ITEROVER;
263+
return error;
260264
}
261265

262266
static int revwalk_next_toposort(git_commit_list_node **object_out, git_revwalk *walk)
263267
{
268+
int error;
264269
git_commit_list_node *next;
265270

266-
while ((next = git_commit_list_pop(&walk->iterator_topo)) != NULL) {
271+
while (!(error = get_revision(&next, walk, &walk->iterator_topo))) {
267272
/* Some commits might become uninteresting after being added to the list */
268273
if (!next->uninteresting) {
269274
*object_out = next;
270275
return 0;
271276
}
272277
}
273278

274-
giterr_clear();
275-
return GIT_ITEROVER;
279+
return error;
276280
}
277281

278282
static int revwalk_next_reverse(git_commit_list_node **object_out, git_revwalk *walk)
@@ -449,6 +453,45 @@ static int limit_list(git_commit_list **out, git_revwalk *walk, git_commit_list
449453
return 0;
450454
}
451455

456+
static int get_one_revision(git_commit_list_node **out, git_revwalk *walk, git_commit_list **list)
457+
{
458+
int error;
459+
git_commit_list_node *commit;
460+
461+
while(true) {
462+
commit = git_commit_list_pop(list);
463+
if (!commit) {
464+
giterr_clear();
465+
return GIT_ITEROVER;
466+
}
467+
468+
/*
469+
* If we did not run limit_list and we must add parents to the
470+
* list ourselves.
471+
*/
472+
if (!walk->limited) {
473+
if ((error = add_parents_to_list(walk, commit, list)) < 0)
474+
return error;
475+
}
476+
477+
*out = commit;
478+
return 0;
479+
}
480+
}
481+
482+
static int get_revision(git_commit_list_node **out, git_revwalk *walk, git_commit_list **list)
483+
{
484+
int error;
485+
git_commit_list_node *commit;
486+
487+
if ((error = get_one_revision(&commit, walk, list)) < 0)
488+
return error;
489+
490+
/* Here is where we would handle boundary commits if we implement that */
491+
*out = commit;
492+
return 0;
493+
}
494+
452495
static int sort_in_topological_order(git_commit_list **out, git_revwalk *walk, git_commit_list *list)
453496
{
454497
git_commit_list *ll = NULL, *newlist, **pptr;
@@ -561,7 +604,7 @@ static int prepare_walk(git_revwalk *walk)
561604
}
562605
}
563606

564-
if ((error = limit_list(&commits, walk, commits)) < 0)
607+
if (walk->limited && (error = limit_list(&commits, walk, commits)) < 0)
565608
return error;
566609

567610
if (walk->sorting & GIT_SORT_TOPOLOGICAL) {
@@ -664,6 +707,9 @@ void git_revwalk_sorting(git_revwalk *walk, unsigned int sort_mode)
664707
walk->get_next = &revwalk_next_unsorted;
665708
walk->enqueue = &revwalk_enqueue_unsorted;
666709
}
710+
711+
if (sort_mode != GIT_SORT_NONE)
712+
walk->limited = 1;
667713
}
668714

669715
void git_revwalk_simplify_first_parent(git_revwalk *walk)
@@ -719,6 +765,7 @@ void git_revwalk_reset(git_revwalk *walk)
719765
git_commit_list_free(&walk->user_input);
720766
walk->first_parent = 0;
721767
walk->walking = 0;
768+
walk->limited = 0;
722769
walk->did_push = walk->did_hide = 0;
723770
}
724771

@@ -740,6 +787,7 @@ int git_revwalk_add_hide_cb(
740787

741788
walk->hide_cb = hide_cb;
742789
walk->hide_cb_payload = payload;
790+
walk->limited = 1;
743791

744792
return 0;
745793
}

src/revwalk.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ struct git_revwalk {
3636
unsigned walking:1,
3737
first_parent: 1,
3838
did_hide: 1,
39-
did_push: 1;
39+
did_push: 1,
40+
limited: 1;
4041
unsigned int sorting;
4142

4243
/* the pushes and hides */

0 commit comments

Comments
 (0)