Skip to content

Commit 322c15e

Browse files
committed
Make the pack and mwindow implementations data-race-free
This change fixes a packfile heap corruption that can happen when interacting with multiple packfiles concurrently across multiple threads. This is exacerbated by setting a lower mwindow open file limit. This change: * Renames most of the internal methods in pack.c to clearly indicate that they expect to be called with a certain lock held, making reasoning about the state of locks a bit easier. * Splits the `git_pack_file` lock in two: the one in `git_pack_file` only protects the `index_map`. The protection to `git_mwindow_file` is now in that struct. * Explicitly checks for freshness of the `git_pack_file` in `git_packfile_unpack_header`: this allows the mwindow implementation to close files whenever there is enough cache pressure, and `git_packfile_unpack_header` will reopen the packfile if needed. * After a call to `p_munmap()`, the `data` and `len` fields are poisoned with `NULL` to make use-after-frees more evident and crash rather than being open to the possibility of heap corruption. * Adds a test case to prevent this from regressing in the future. Fixes: libgit2#5591
1 parent 4ae41f9 commit 322c15e

File tree

9 files changed

+322
-140
lines changed

9 files changed

+322
-140
lines changed

script/thread-sanitizer.supp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,19 @@
33
# consistent lock hierarchy that is easy to understand.
44
deadlock:attr_cache_lock
55

6+
# git_mwindow_file_register has the possibility of evicting some files from the
7+
# global cache. In order to avoid races and closing files that are currently
8+
# being accessed, before evicting any file it will attempt to acquire that
9+
# file's lock. Finally, git_mwindow_file_register is typically called with a
10+
# file lock held, because the caller will use the fd in the mwf immediately
11+
# after registering it. This causes ThreadSanitizer to observe different orders
12+
# of acquisition of the mutex (which implies a possibility of a deadlock),
13+
# _but_ since the files are added to the cache after other files have been
14+
# evicted, there cannot be a case where mwf A is trying to be registered while
15+
# evicting mwf B concurrently and viceversa: at most one of them can be present
16+
# in the cache.
17+
deadlock:git_mwindow_file_register
18+
619
# When invoking the time/timezone functions from git_signature_now(), they
720
# access libc methods that need to be instrumented to correctly analyze the
821
# data races.

src/indexer.c

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,6 @@
2424
#include "zstream.h"
2525
#include "object.h"
2626

27-
extern git_mutex git__mwindow_mutex;
28-
2927
size_t git_indexer__max_objects = UINT32_MAX;
3028

3129
#define UINT31_MAX (0x7FFFFFFF)
@@ -679,7 +677,7 @@ static int read_stream_object(git_indexer *idx, git_indexer_progress *stats)
679677
return GIT_EBUFS;
680678

681679
if (!idx->have_stream) {
682-
error = git_packfile_unpack_header(&entry_size, &type, &idx->pack->mwf, &w, &idx->off);
680+
error = git_packfile_unpack_header(&entry_size, &type, idx->pack, &w, &idx->off);
683681
if (error == GIT_EBUFS) {
684682
idx->off = entry_start;
685683
return error;
@@ -970,7 +968,7 @@ static int fix_thin_pack(git_indexer *idx, git_indexer_progress *stats)
970968
continue;
971969

972970
curpos = delta->delta_off;
973-
error = git_packfile_unpack_header(&size, &type, &idx->pack->mwf, &w, &curpos);
971+
error = git_packfile_unpack_header(&size, &type, idx->pack, &w, &curpos);
974972
if (error < 0)
975973
return error;
976974

@@ -1333,13 +1331,7 @@ void git_indexer_free(git_indexer *idx)
13331331

13341332
git_vector_free_deep(&idx->deltas);
13351333

1336-
if (!git_mutex_lock(&git__mwindow_mutex)) {
1337-
if (!idx->pack_committed)
1338-
git_packfile_close(idx->pack, true);
1339-
1340-
git_packfile_free(idx->pack);
1341-
git_mutex_unlock(&git__mwindow_mutex);
1342-
}
1334+
git_packfile_free(idx->pack, !idx->pack_committed);
13431335

13441336
iter = 0;
13451337
while (git_oidmap_iterate((void **) &value, idx->expected_oids, &iter, &key) == 0)

src/mwindow.c

Lines changed: 74 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,10 @@ size_t git_mwindow__window_size = DEFAULT_WINDOW_SIZE;
2929
size_t git_mwindow__mapped_limit = DEFAULT_MAPPED_LIMIT;
3030
size_t git_mwindow__file_limit = DEFAULT_FILE_LIMIT;
3131

32-
/* Mutex to control access */
32+
/* Mutex to control access to `git_mwindow__mem_ctl` and `git__pack_cache`. */
3333
git_mutex git__mwindow_mutex;
3434

35-
/* Whenever you want to read or modify this, grab git__mwindow_mutex */
35+
/* Whenever you want to read or modify this, grab `git__mwindow_mutex` */
3636
git_mwindow_ctl git_mwindow__mem_ctl;
3737

3838
/* Global list of mwindow files, to open packs once across repos */
@@ -95,10 +95,9 @@ int git_mwindow_get_pack(struct git_pack_file **out, const char *path)
9595

9696
error = git_strmap_set(git__pack_cache, pack->pack_name, pack);
9797
git_mutex_unlock(&git__mwindow_mutex);
98-
9998
if (error < 0) {
100-
git_packfile_free(pack);
101-
return -1;
99+
git_packfile_free(pack, false);
100+
return error;
102101
}
103102

104103
*out = pack;
@@ -108,6 +107,7 @@ int git_mwindow_get_pack(struct git_pack_file **out, const char *path)
108107
int git_mwindow_put_pack(struct git_pack_file *pack)
109108
{
110109
int count, error;
110+
struct git_pack_file *pack_to_delete = NULL;
111111

112112
if ((error = git_mutex_lock(&git__mwindow_mutex)) < 0)
113113
return error;
@@ -121,34 +121,19 @@ int git_mwindow_put_pack(struct git_pack_file *pack)
121121
count = git_atomic_dec(&pack->refcount);
122122
if (count == 0) {
123123
git_strmap_delete(git__pack_cache, pack->pack_name);
124-
git_packfile_free(pack);
124+
pack_to_delete = pack;
125125
}
126-
127126
git_mutex_unlock(&git__mwindow_mutex);
128-
return 0;
129-
}
127+
git_packfile_free(pack_to_delete, false);
130128

131-
int git_mwindow_free_all(git_mwindow_file *mwf)
132-
{
133-
int error;
134-
135-
if (git_mutex_lock(&git__mwindow_mutex)) {
136-
git_error_set(GIT_ERROR_THREAD, "unable to lock mwindow mutex");
137-
return -1;
138-
}
139-
140-
error = git_mwindow_free_all_locked(mwf);
141-
142-
git_mutex_unlock(&git__mwindow_mutex);
143-
144-
return error;
129+
return 0;
145130
}
146131

147132
/*
148133
* Free all the windows in a sequence, typically because we're done
149-
* with the file
134+
* with the file. Needs to hold the git__mwindow_mutex.
150135
*/
151-
int git_mwindow_free_all_locked(git_mwindow_file *mwf)
136+
static int git_mwindow_free_all_locked(git_mwindow_file *mwf)
152137
{
153138
git_mwindow_ctl *ctl = &git_mwindow__mem_ctl;
154139
size_t i;
@@ -184,6 +169,22 @@ int git_mwindow_free_all_locked(git_mwindow_file *mwf)
184169
return 0;
185170
}
186171

172+
int git_mwindow_free_all(git_mwindow_file *mwf)
173+
{
174+
int error;
175+
176+
if (git_mutex_lock(&git__mwindow_mutex)) {
177+
git_error_set(GIT_ERROR_THREAD, "unable to lock mwindow mutex");
178+
return -1;
179+
}
180+
181+
error = git_mwindow_free_all_locked(mwf);
182+
183+
git_mutex_unlock(&git__mwindow_mutex);
184+
185+
return error;
186+
}
187+
187188
/*
188189
* Check if a window 'win' contains the address 'offset'
189190
*/
@@ -256,9 +257,9 @@ static bool git_mwindow_scan_recently_used(
256257

257258
/*
258259
* Close the least recently used window (that is currently not being used) out
259-
* of all the files. Called under lock from new_window.
260+
* of all the files. Called under lock from new_window_locked.
260261
*/
261-
static int git_mwindow_close_lru_window(void)
262+
static int git_mwindow_close_lru_window_locked(void)
262263
{
263264
git_mwindow_ctl *ctl = &git_mwindow__mem_ctl;
264265
git_mwindow_file *cur;
@@ -292,13 +293,13 @@ static int git_mwindow_close_lru_window(void)
292293
}
293294

294295
/*
295-
* Close the file that does not have any open windows AND whose
296+
* Finds the file that does not have any open windows AND whose
296297
* most-recently-used window is the least-recently used one across all
297298
* currently open files.
298299
*
299-
* Called under lock from new_window.
300+
* Called under lock from new_window_locked.
300301
*/
301-
static int git_mwindow_close_lru_file(void)
302+
static int git_mwindow_find_lru_file_locked(git_mwindow_file **out)
302303
{
303304
git_mwindow_ctl *ctl = &git_mwindow__mem_ctl;
304305
git_mwindow_file *lru_file = NULL, *current_file = NULL;
@@ -320,15 +321,12 @@ static int git_mwindow_close_lru_file(void)
320321
return -1;
321322
}
322323

323-
git_mwindow_free_all_locked(lru_file);
324-
p_close(lru_file->fd);
325-
lru_file->fd = -1;
326-
324+
*out = lru_file;
327325
return 0;
328326
}
329327

330328
/* This gets called under lock from git_mwindow_open */
331-
static git_mwindow *new_window(
329+
static git_mwindow *new_window_locked(
332330
git_file fd,
333331
off64_t size,
334332
off64_t offset)
@@ -338,12 +336,11 @@ static git_mwindow *new_window(
338336
off64_t len;
339337
git_mwindow *w;
340338

341-
w = git__malloc(sizeof(*w));
339+
w = git__calloc(1, sizeof(*w));
342340

343341
if (w == NULL)
344342
return NULL;
345343

346-
memset(w, 0x0, sizeof(*w));
347344
w->offset = (offset / walign) * walign;
348345

349346
len = size - w->offset;
@@ -353,7 +350,7 @@ static git_mwindow *new_window(
353350
ctl->mapped += (size_t)len;
354351

355352
while (git_mwindow__mapped_limit < ctl->mapped &&
356-
git_mwindow_close_lru_window() == 0) /* nop */;
353+
git_mwindow_close_lru_window_locked() == 0) /* nop */;
357354

358355
/*
359356
* We treat `mapped_limit` as a soft limit. If we can't find a
@@ -367,7 +364,7 @@ static git_mwindow *new_window(
367364
* we're below our soft limits, so free up what we can and try again.
368365
*/
369366

370-
while (git_mwindow_close_lru_window() == 0)
367+
while (git_mwindow_close_lru_window_locked() == 0)
371368
/* nop */;
372369

373370
if (git_futils_mmap_ro(&w->window_map, fd, w->offset, (size_t)len) < 0) {
@@ -423,7 +420,7 @@ unsigned char *git_mwindow_open(
423420
* one.
424421
*/
425422
if (!w) {
426-
w = new_window(mwf->fd, mwf->size, offset);
423+
w = new_window_locked(mwf->fd, mwf->size, offset);
427424
if (w == NULL) {
428425
git_mutex_unlock(&git__mwindow_mutex);
429426
return NULL;
@@ -451,29 +448,60 @@ unsigned char *git_mwindow_open(
451448

452449
int git_mwindow_file_register(git_mwindow_file *mwf)
453450
{
451+
git_vector closed_files = GIT_VECTOR_INIT;
454452
git_mwindow_ctl *ctl = &git_mwindow__mem_ctl;
455-
int ret;
453+
int error;
454+
size_t i;
455+
git_mwindow_file *closed_file = NULL;
456456

457457
if (git_mutex_lock(&git__mwindow_mutex)) {
458458
git_error_set(GIT_ERROR_THREAD, "unable to lock mwindow mutex");
459459
return -1;
460460
}
461461

462462
if (ctl->windowfiles.length == 0 &&
463-
git_vector_init(&ctl->windowfiles, 8, NULL) < 0) {
463+
(error = git_vector_init(&ctl->windowfiles, 8, NULL)) < 0) {
464464
git_mutex_unlock(&git__mwindow_mutex);
465-
return -1;
465+
goto cleanup;
466466
}
467467

468468
if (git_mwindow__file_limit) {
469+
git_mwindow_file *lru_file;
469470
while (git_mwindow__file_limit <= ctl->windowfiles.length &&
470-
git_mwindow_close_lru_file() == 0) /* nop */;
471+
git_mwindow_find_lru_file_locked(&lru_file) == 0) {
472+
if ((error = git_vector_insert(&closed_files, lru_file)) < 0) {
473+
/*
474+
* Exceeding the file limit seems preferrable to being open to
475+
* data races that can end up corrupting the heap.
476+
*/
477+
break;
478+
}
479+
git_mwindow_free_all_locked(lru_file);
480+
}
471481
}
472482

473-
ret = git_vector_insert(&ctl->windowfiles, mwf);
483+
error = git_vector_insert(&ctl->windowfiles, mwf);
474484
git_mutex_unlock(&git__mwindow_mutex);
485+
if (error < 0)
486+
goto cleanup;
475487

476-
return ret;
488+
/*
489+
* Once we have released the global windowfiles lock, we can close each
490+
* individual file. Before doing so, acquire that file's lock to avoid
491+
* closing a file that is currently being used.
492+
*/
493+
git_vector_foreach(&closed_files, i, closed_file) {
494+
error = git_mutex_lock(&closed_file->lock);
495+
if (error < 0)
496+
continue;
497+
p_close(closed_file->fd);
498+
closed_file->fd = -1;
499+
git_mutex_unlock(&closed_file->lock);
500+
}
501+
502+
cleanup:
503+
git_vector_free(&closed_files);
504+
return error;
477505
}
478506

479507
void git_mwindow_file_deregister(git_mwindow_file *mwf)

src/mwindow.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@
1313
#include "map.h"
1414
#include "vector.h"
1515

16-
extern git_mutex git__mwindow_mutex;
17-
1816
typedef struct git_mwindow {
1917
struct git_mwindow *next;
2018
git_map window_map;
@@ -24,6 +22,7 @@ typedef struct git_mwindow {
2422
} git_mwindow;
2523

2624
typedef struct git_mwindow_file {
25+
git_mutex lock; /* protects updates to fd */
2726
git_mwindow *windows;
2827
int fd;
2928
off64_t size;
@@ -41,7 +40,6 @@ typedef struct git_mwindow_ctl {
4140

4241
int git_mwindow_contains(git_mwindow *win, off64_t offset);
4342
int git_mwindow_free_all(git_mwindow_file *mwf); /* locks */
44-
int git_mwindow_free_all_locked(git_mwindow_file *mwf); /* run under lock */
4543
unsigned char *git_mwindow_open(git_mwindow_file *mwf, git_mwindow **cursor, off64_t offset, size_t extra, unsigned int *left);
4644
int git_mwindow_file_register(git_mwindow_file *mwf);
4745
void git_mwindow_file_deregister(git_mwindow_file *mwf);

0 commit comments

Comments
 (0)