Skip to content

Commit 2c4eb83

Browse files
Colin Stolleyderrickstolee
authored andcommitted
commit-graph: only verify csum on git_commit_graph_open().
It is expensive to compute the sha1 of the entire commit-graph file each time we open it. Git only does this if it is re-writing the file. This patch will only verify the checksum when calling the external API git_commit_graph_open(), which explicitly says it opens and verifies the commit graph in the documentation. For internal library calls, we call git_commit_graph_get_file(), which mmaps the commit-graph file in read-only mode. Therefore it is safe to skip the validation check there. Tests were added to check that the validation works in the happy path, and prevents us from opening the file when validation fails. (Note from Derrick Stolee: This patch was applied internally at GitHub after we recognized the performance impact it had during an upgrade of libgit2. The original author left the company before we remembered to send it upstream.) Signed-off-by: Derrick Stolee <derrickstolee@github.com>
1 parent a3841af commit 2c4eb83

File tree

3 files changed

+65
-7
lines changed

3 files changed

+65
-7
lines changed

src/libgit2/commit_graph.c

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,6 @@ int git_commit_graph_file_parse(
201201
struct git_commit_graph_chunk *last_chunk;
202202
uint32_t i;
203203
off64_t last_chunk_offset, chunk_offset, trailer_offset;
204-
unsigned char checksum[GIT_HASH_SHA1_SIZE];
205204
size_t checksum_size;
206205
int error;
207206
struct git_commit_graph_chunk chunk_oid_fanout = {0}, chunk_oid_lookup = {0},
@@ -234,11 +233,6 @@ int git_commit_graph_file_parse(
234233
return commit_graph_error("wrong commit-graph size");
235234
memcpy(file->checksum, (data + trailer_offset), checksum_size);
236235

237-
if (git_hash_buf(checksum, data, (size_t)trailer_offset, GIT_HASH_ALGORITHM_SHA1) < 0)
238-
return commit_graph_error("could not calculate signature");
239-
if (memcmp(checksum, file->checksum, checksum_size) != 0)
240-
return commit_graph_error("index signature mismatch");
241-
242236
chunk_hdr = data + sizeof(struct git_commit_graph_header);
243237
last_chunk = NULL;
244238
for (i = 0; i < hdr->chunks; ++i, chunk_hdr += 12) {
@@ -331,9 +325,29 @@ int git_commit_graph_new(git_commit_graph **cgraph_out, const char *objects_dir,
331325
return error;
332326
}
333327

328+
int git_commit_graph_validate(git_commit_graph *cgraph) {
329+
unsigned char checksum[GIT_HASH_SHA1_SIZE];
330+
size_t checksum_size = GIT_HASH_SHA1_SIZE;
331+
size_t trailer_offset = cgraph->file->graph_map.len - checksum_size;
332+
333+
if (cgraph->file->graph_map.len < checksum_size)
334+
return commit_graph_error("map length too small");
335+
336+
if (git_hash_buf(checksum, cgraph->file->graph_map.data, trailer_offset, GIT_HASH_ALGORITHM_SHA1) < 0)
337+
return commit_graph_error("could not calculate signature");
338+
if (memcmp(checksum, cgraph->file->checksum, checksum_size) != 0)
339+
return commit_graph_error("index signature mismatch");
340+
341+
return 0;
342+
}
343+
334344
int git_commit_graph_open(git_commit_graph **cgraph_out, const char *objects_dir)
335345
{
336-
return git_commit_graph_new(cgraph_out, objects_dir, true);
346+
int error = git_commit_graph_new(cgraph_out, objects_dir, true);
347+
if (!error) {
348+
return git_commit_graph_validate(*cgraph_out);
349+
}
350+
return error;
337351
}
338352

339353
int git_commit_graph_file_open(git_commit_graph_file **file_out, const char *path)

src/libgit2/commit_graph.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,9 @@ struct git_commit_graph {
106106
/** Create a new commit-graph, optionally opening the underlying file. */
107107
int git_commit_graph_new(git_commit_graph **cgraph_out, const char *objects_dir, bool open_file);
108108

109+
/** Validate the checksum of a commit graph */
110+
int git_commit_graph_validate(git_commit_graph *cgraph);
111+
109112
/** Open and validate a commit-graph file. */
110113
int git_commit_graph_file_open(git_commit_graph_file **file_out, const char *path);
111114

tests/libgit2/graph/commitgraph.c

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,3 +124,44 @@ void test_graph_commitgraph__writer(void)
124124
git_commit_graph_writer_free(w);
125125
git_repository_free(repo);
126126
}
127+
128+
void test_graph_commitgraph__validate(void)
129+
{
130+
git_repository *repo;
131+
struct git_commit_graph *cgraph;
132+
git_str objects_dir = GIT_STR_INIT;
133+
134+
cl_git_pass(git_repository_open(&repo, cl_fixture("testrepo.git")));
135+
cl_git_pass(git_str_joinpath(&objects_dir, git_repository_path(repo), "objects"));
136+
137+
/* git_commit_graph_open() calls git_commit_graph_validate() */
138+
cl_git_pass(git_commit_graph_open(&cgraph, git_str_cstr(&objects_dir)));
139+
140+
git_commit_graph_free(cgraph);
141+
git_str_dispose(&objects_dir);
142+
git_repository_free(repo);
143+
}
144+
145+
void test_graph_commitgraph__validate_corrupt(void)
146+
{
147+
git_repository *repo;
148+
struct git_commit_graph *cgraph;
149+
int fd = -1;
150+
151+
cl_fixture_sandbox("testrepo.git");
152+
cl_git_pass(git_repository_open(&repo, cl_git_sandbox_path(1, "testrepo.git", NULL)));
153+
154+
/* corrupt commit graph checksum at the end of the file */
155+
cl_assert((fd = p_open(cl_git_sandbox_path(0, "testrepo.git", "objects", "info", "commit-graph", NULL), O_WRONLY)) > 0);
156+
cl_assert(p_lseek(fd, -5, SEEK_END) > 0);
157+
cl_must_pass(p_write(fd, "\0\0", 2));
158+
cl_must_pass(p_close(fd));
159+
160+
/* git_commit_graph_open() calls git_commit_graph_validate() */
161+
cl_git_fail(git_commit_graph_open(&cgraph, cl_git_sandbox_path(1, "testrepo.git", "objects", NULL)));
162+
163+
git_commit_graph_free(cgraph);
164+
git_repository_free(repo);
165+
166+
cl_fixture_cleanup("testrepo.git");
167+
}

0 commit comments

Comments
 (0)