1010#include "git2/indexer.h"
1111#include "git2/object.h"
1212
13+ #include "commit.h"
14+ #include "tree.h"
15+ #include "tag.h"
1316#include "pack.h"
1417#include "mwindow.h"
1518#include "posix.h"
@@ -36,12 +39,15 @@ struct git_indexer {
3639 pack_committed :1 ,
3740 have_stream :1 ,
3841 have_delta :1 ,
39- do_fsync :1 ;
42+ do_fsync :1 ,
43+ do_verify :1 ;
4044 struct git_pack_header hdr ;
4145 struct git_pack_file * pack ;
4246 unsigned int mode ;
4347 git_off_t off ;
4448 git_off_t entry_start ;
49+ git_otype entry_type ;
50+ git_buf entry_data ;
4551 git_packfile_stream stream ;
4652 size_t nr_objects ;
4753 git_vector objects ;
@@ -53,6 +59,9 @@ struct git_indexer {
5359 void * progress_payload ;
5460 char objbuf [8 * 1024 ];
5561
62+ /* OIDs referenced from pack objects. Used for verification. */
63+ git_oidmap * expected_oids ;
64+
5665 /* Needed to look up objects which we want to inject to fix a thin pack */
5766 git_odb * odb ;
5867
@@ -125,6 +134,11 @@ int git_indexer_new(
125134 idx -> mode = mode ? mode : GIT_PACK_FILE_MODE ;
126135 git_hash_ctx_init (& idx -> hash_ctx );
127136 git_hash_ctx_init (& idx -> trailer );
137+ git_buf_init (& idx -> entry_data , 0 );
138+ idx -> expected_oids = git_oidmap_alloc ();
139+ GITERR_CHECK_ALLOC (idx -> expected_oids );
140+
141+ idx -> do_verify = !!idx -> odb ;
128142
129143 if (git_repository__fsync_gitdir )
130144 idx -> do_fsync = 1 ;
@@ -210,6 +224,9 @@ static int hash_object_stream(git_indexer*idx, git_packfile_stream *stream)
210224 if ((read = git_packfile_stream_read (stream , idx -> objbuf , sizeof (idx -> objbuf ))) < 0 )
211225 break ;
212226
227+ if (idx -> do_verify )
228+ git_buf_put (& idx -> entry_data , idx -> objbuf , read );
229+
213230 git_hash_update (& idx -> hash_ctx , idx -> objbuf , read );
214231 } while (read > 0 );
215232
@@ -279,6 +296,97 @@ static int crc_object(uint32_t *crc_out, git_mwindow_file *mwf, git_off_t start,
279296 return 0 ;
280297}
281298
299+ static void add_expected_oid (git_indexer * idx , const git_oid * oid )
300+ {
301+ int ret ;
302+
303+ /*
304+ * If we know about that object because it is stored in our ODB or
305+ * because we have already processed it as part of our pack file, we do
306+ * not have to expect it.
307+ */
308+ if (!git_odb_exists (idx -> odb , oid ) &&
309+ !git_oidmap_exists (idx -> pack -> idx_cache , oid ) &&
310+ !git_oidmap_exists (idx -> expected_oids , oid )) {
311+ git_oid * dup = git__malloc (sizeof (* oid ));
312+ git_oid_cpy (dup , oid );
313+ git_oidmap_put (idx -> expected_oids , dup , & ret );
314+ }
315+ }
316+
317+ static int check_object_connectivity (git_indexer * idx , const git_rawobj * obj )
318+ {
319+ git_object * object ;
320+ size_t keyidx ;
321+ int error ;
322+
323+ if (obj -> type != GIT_OBJ_BLOB &&
324+ obj -> type != GIT_OBJ_TREE &&
325+ obj -> type != GIT_OBJ_COMMIT &&
326+ obj -> type != GIT_OBJ_TAG )
327+ return 0 ;
328+
329+ if ((error = git_object__from_raw (& object , obj -> data , obj -> len , obj -> type )) < 0 )
330+ goto out ;
331+
332+ keyidx = git_oidmap_lookup_index (idx -> expected_oids , & object -> cached .oid );
333+ if (git_oidmap_valid_index (idx -> expected_oids , keyidx )) {
334+ const git_oid * key = git_oidmap_key (idx -> expected_oids , keyidx );
335+ git__free ((git_oid * ) key );
336+ git_oidmap_delete_at (idx -> expected_oids , keyidx );
337+ }
338+
339+ /*
340+ * Check whether this is a known object. If so, we can just continue as
341+ * we assume that the ODB has a complete graph.
342+ */
343+ if (git_odb_exists (idx -> odb , & object -> cached .oid ))
344+ return 0 ;
345+
346+ switch (obj -> type ) {
347+ case GIT_OBJ_TREE :
348+ {
349+ git_tree * tree = (git_tree * ) object ;
350+ git_tree_entry * entry ;
351+ size_t i ;
352+
353+ git_array_foreach (tree -> entries , i , entry )
354+ add_expected_oid (idx , entry -> oid );
355+
356+ break ;
357+ }
358+ case GIT_OBJ_COMMIT :
359+ {
360+ git_commit * commit = (git_commit * ) object ;
361+ git_oid * parent_oid ;
362+ size_t i ;
363+
364+ git_array_foreach (commit -> parent_ids , i , parent_oid )
365+ add_expected_oid (idx , parent_oid );
366+
367+ add_expected_oid (idx , & commit -> tree_id );
368+
369+ break ;
370+ }
371+ case GIT_OBJ_TAG :
372+ {
373+ git_tag * tag = (git_tag * ) object ;
374+
375+ add_expected_oid (idx , & tag -> target );
376+
377+ break ;
378+ }
379+ case GIT_OBJ_BLOB :
380+ default :
381+ break ;
382+ }
383+
384+ out :
385+ git_object_free (object );
386+
387+ return error ;
388+ }
389+
282390static int store_object (git_indexer * idx )
283391{
284392 int i , error ;
@@ -304,6 +412,17 @@ static int store_object(git_indexer *idx)
304412 entry -> offset = (uint32_t )entry_start ;
305413 }
306414
415+ if (idx -> do_verify ) {
416+ git_rawobj rawobj = {
417+ idx -> entry_data .ptr ,
418+ idx -> entry_data .size ,
419+ idx -> entry_type
420+ };
421+
422+ if ((error = check_object_connectivity (idx , & rawobj )) < 0 )
423+ goto on_error ;
424+ }
425+
307426 git_oid_cpy (& pentry -> sha1 , & oid );
308427 pentry -> offset = entry_start ;
309428
@@ -549,6 +668,7 @@ static int read_stream_object(git_indexer *idx, git_transfer_progress *stats)
549668 git_mwindow_close (& w );
550669 idx -> entry_start = entry_start ;
551670 git_hash_init (& idx -> hash_ctx );
671+ git_buf_clear (& idx -> entry_data );
552672
553673 if (type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA ) {
554674 error = advance_delta_offset (idx , type );
@@ -569,6 +689,7 @@ static int read_stream_object(git_indexer *idx, git_transfer_progress *stats)
569689 }
570690
571691 idx -> have_stream = 1 ;
692+ idx -> entry_type = type ;
572693
573694 error = git_packfile_stream_open (stream , idx -> pack , idx -> off );
574695 if (error < 0 )
@@ -884,6 +1005,10 @@ static int resolve_deltas(git_indexer *idx, git_transfer_progress *stats)
8841005 return -1 ;
8851006 }
8861007
1008+ if (idx -> do_verify && check_object_connectivity (idx , & obj ) < 0 )
1009+ /* TODO: error? continue? */
1010+ continue ;
1011+
8871012 if (hash_and_save (idx , & obj , delta -> delta_off ) < 0 )
8881013 continue ;
8891014
@@ -1014,6 +1139,18 @@ int git_indexer_commit(git_indexer *idx, git_transfer_progress *stats)
10141139 write_at (idx , & trailer_hash , idx -> pack -> mwf .size - GIT_OID_RAWSZ , GIT_OID_RAWSZ );
10151140 }
10161141
1142+ /*
1143+ * Is the resulting graph fully connected or are we still
1144+ * missing some objects? In the second case, we can
1145+ * bail out due to an incomplete and thus corrupt
1146+ * packfile.
1147+ */
1148+ if (git_oidmap_size (idx -> expected_oids ) > 0 ) {
1149+ giterr_set (GITERR_INDEXER , "packfile is missing %" PRIuZ " objects" ,
1150+ git_oidmap_size (idx -> expected_oids ));
1151+ return -1 ;
1152+ }
1153+
10171154 git_vector_sort (& idx -> objects );
10181155
10191156 /* Use the trailer hash as the pack file name to ensure
@@ -1143,6 +1280,8 @@ int git_indexer_commit(git_indexer *idx, git_transfer_progress *stats)
11431280
11441281void git_indexer_free (git_indexer * idx )
11451282{
1283+ khiter_t pos ;
1284+
11461285 if (idx == NULL )
11471286 return ;
11481287
@@ -1170,7 +1309,18 @@ void git_indexer_free(git_indexer *idx)
11701309 git_mutex_unlock (& git__mwindow_mutex );
11711310 }
11721311
1312+ for (pos = git_oidmap_begin (idx -> expected_oids );
1313+ pos != git_oidmap_end (idx -> expected_oids ); pos ++ )
1314+ {
1315+ if (git_oidmap_has_data (idx -> expected_oids , pos )) {
1316+ git__free ((git_oid * ) git_oidmap_key (idx -> expected_oids , pos ));
1317+ git_oidmap_delete_at (idx -> expected_oids , pos );
1318+ }
1319+ }
1320+
11731321 git_hash_ctx_cleanup (& idx -> trailer );
11741322 git_hash_ctx_cleanup (& idx -> hash_ctx );
1323+ git_buf_dispose (& idx -> entry_data );
1324+ git_oidmap_free (idx -> expected_oids );
11751325 git__free (idx );
11761326}
0 commit comments