Skip to content

Commit 28a0741

Browse files
committed
odb: verify object hashes
The upstream git.git project verifies objects when looking them up from disk. This avoids scenarios where objects have somehow become corrupt on disk, e.g. due to hardware failures or bit flips. While our mantra is usually to follow upstream behavior, we do not do so in this case, as we never check hashes of objects we have just read from disk. To fix this, we create a new error class `GIT_EMISMATCH` which denotes that we have looked up an object with a hashsum mismatch. `odb_read_1` will then, after having read the object from its backend, hash the object and compare the resulting hash to the expected hash. If hashes do not match, it will return an error. This obviously introduces another computation of checksums and could potentially impact performance. Note though that we usually perform I/O operations directly before doing this computation, and as such the actual overhead should be drowned out by I/O. Running our test suite seems to confirm this guess. On a Linux system with best-of-five timings, we had 21.592s with the check enabled and 21.590s with the ckeck disabled. Note though that our test suite mostly contains very small blobs only. It is expected that repositories with bigger blobs may notice an increased hit by this check. In addition to a new test, we also had to change the odb::backend::nonrefreshing test suite, which now triggers a hashsum mismatch when looking up the commit "deadbeef...". This is expected, as the fake backend allocated inside of the test will return an empty object for the OID "deadbeef...", which will obviously not hash back to "deadbeef..." again. We can simply adjust the hash to equal the hash of the empty object here to fix this test.
1 parent d59dabe commit 28a0741

File tree

5 files changed

+61
-4
lines changed

5 files changed

+61
-4
lines changed

include/git2/errors.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ typedef enum {
5454
GIT_PASSTHROUGH = -30, /**< Internal only */
5555
GIT_ITEROVER = -31, /**< Signals end of iteration with iterator */
5656
GIT_RETRY = -32, /**< Internal only */
57+
GIT_EMISMATCH = -33, /**< Hashsum mismatch in object */
5758
} git_error_code;
5859

5960
/**

src/odb.c

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -998,7 +998,9 @@ static int odb_read_1(git_odb_object **out, git_odb *db, const git_oid *id,
998998
size_t i;
999999
git_rawobj raw;
10001000
git_odb_object *object;
1001+
git_oid hashed;
10011002
bool found = false;
1003+
int error;
10021004

10031005
if (!only_refreshed && odb_read_hardcoded(&raw, id) == 0)
10041006
found = true;
@@ -1011,7 +1013,7 @@ static int odb_read_1(git_odb_object **out, git_odb *db, const git_oid *id,
10111013
continue;
10121014

10131015
if (b->read != NULL) {
1014-
int error = b->read(&raw.data, &raw.len, &raw.type, b, id);
1016+
error = b->read(&raw.data, &raw.len, &raw.type, b, id);
10151017
if (error == GIT_PASSTHROUGH || error == GIT_ENOTFOUND)
10161018
continue;
10171019

@@ -1025,12 +1027,24 @@ static int odb_read_1(git_odb_object **out, git_odb *db, const git_oid *id,
10251027
if (!found)
10261028
return GIT_ENOTFOUND;
10271029

1030+
if ((error = git_odb_hash(&hashed, raw.data, raw.len, raw.type)) < 0)
1031+
goto out;
1032+
1033+
if (!git_oid_equal(id, &hashed)) {
1034+
error = git_odb__error_mismatch(id, &hashed);
1035+
goto out;
1036+
}
1037+
10281038
giterr_clear();
10291039
if ((object = odb_object__alloc(id, &raw)) == NULL)
1030-
return -1;
1040+
goto out;
10311041

10321042
*out = git_cache_store_raw(odb_cache(db), object);
1033-
return 0;
1043+
1044+
out:
1045+
if (error)
1046+
git__free(raw.data);
1047+
return error;
10341048
}
10351049

10361050
int git_odb_read(git_odb_object **out, git_odb *db, const git_oid *id)
@@ -1411,6 +1425,19 @@ int git_odb_refresh(struct git_odb *db)
14111425
return 0;
14121426
}
14131427

1428+
int git_odb__error_mismatch(const git_oid *expected, const git_oid *actual)
1429+
{
1430+
char expected_oid[GIT_OID_HEXSZ + 1], actual_oid[GIT_OID_HEXSZ + 1];
1431+
1432+
git_oid_tostr(expected_oid, sizeof(expected_oid), expected);
1433+
git_oid_tostr(actual_oid, sizeof(actual_oid), actual);
1434+
1435+
giterr_set(GITERR_ODB, "object hash mismatch - expected %s but got %s",
1436+
expected_oid, actual_oid);
1437+
1438+
return GIT_EMISMATCH;
1439+
}
1440+
14141441
int git_odb__error_notfound(
14151442
const char *message, const git_oid *oid, size_t oid_len)
14161443
{

src/odb.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,12 @@ int git_odb__hashfd_filtered(
9696
*/
9797
int git_odb__hashlink(git_oid *out, const char *path);
9898

99+
/**
100+
* Generate a GIT_EMISMATCH error for the ODB.
101+
*/
102+
int git_odb__error_mismatch(
103+
const git_oid *expected, const git_oid *actual);
104+
99105
/*
100106
* Generate a GIT_ENOTFOUND error for the ODB.
101107
*/

tests/object/lookup.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,3 +92,25 @@ void test_object_lookup__lookup_corrupt_object_returns_error(void)
9292
git_buf_free(&contents);
9393
}
9494

95+
void test_object_lookup__lookup_object_with_wrong_hash_returns_error(void)
96+
{
97+
const char *oldloose = "objects/8e/73b769e97678d684b809b163bebdae2911720f",
98+
*newloose = "objects/8e/73b769e97678d684b809b163bebdae2911720e",
99+
*commit = "8e73b769e97678d684b809b163bebdae2911720e";
100+
git_buf oldpath = GIT_BUF_INIT, newpath = GIT_BUF_INIT;
101+
git_object *object;
102+
git_oid oid;
103+
104+
cl_git_pass(git_oid_fromstr(&oid, commit));
105+
106+
/* Copy object to another location with wrong hash */
107+
cl_git_pass(git_buf_joinpath(&oldpath, git_repository_path(g_repo), oldloose));
108+
cl_git_pass(git_buf_joinpath(&newpath, git_repository_path(g_repo), newloose));
109+
cl_git_pass(git_futils_cp(oldpath.ptr, newpath.ptr, 0644));
110+
111+
/* Verify that lookup fails due to a hashsum mismatch */
112+
cl_git_fail_with(GIT_EMISMATCH, git_object_lookup(&object, g_repo, &oid, GIT_OBJ_COMMIT));
113+
114+
git_buf_free(&oldpath);
115+
git_buf_free(&newpath);
116+
}

tests/odb/backend/nonrefreshing.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ static fake_backend *_fake;
1818
static git_oid _oid;
1919

2020
#define HASH "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef"
21+
#define EMPTY_HASH "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"
2122

2223
static int fake_backend__exists(git_odb_backend *backend, const git_oid *oid)
2324
{
@@ -225,7 +226,7 @@ void test_odb_backend_nonrefreshing__read_is_invoked_once_on_success(void)
225226
{
226227
git_object *obj;
227228

228-
setup_repository_and_backend(GIT_OK, HASH);
229+
setup_repository_and_backend(GIT_OK, EMPTY_HASH);
229230

230231
cl_git_pass(git_object_lookup(&obj, _repo, &_oid, GIT_OBJ_ANY));
231232

0 commit comments

Comments
 (0)