Skip to content

Commit 3bc95cf

Browse files
authored
Merge pull request libgit2#4236 from pks-t/pks/index-v4-fixes
Fix path computations for compressed index entries
2 parents 6a13cf1 + 92d3ea4 commit 3bc95cf

File tree

16 files changed

+197
-58
lines changed

16 files changed

+197
-58
lines changed

src/index.c

Lines changed: 75 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,6 @@ static int index_apply_to_wd_diff(git_index *index, int action, const git_strarr
5454
unsigned int flags,
5555
git_index_matched_path_cb cb, void *payload);
5656

57-
#define entry_size(type,len) ((offsetof(type, path) + (len) + 8) & ~7)
58-
#define short_entry_size(len) entry_size(struct entry_short, len)
59-
#define long_entry_size(len) entry_size(struct entry_long, len)
60-
6157
#define minimal_entry_size (offsetof(struct entry_short, path))
6258

6359
static const size_t INDEX_FOOTER_SIZE = GIT_OID_RAWSZ;
@@ -2282,12 +2278,29 @@ static int read_conflict_names(git_index *index, const char *buffer, size_t size
22822278
return 0;
22832279
}
22842280

2281+
static size_t index_entry_size(size_t path_len, size_t varint_len, uint32_t flags)
2282+
{
2283+
if (varint_len) {
2284+
if (flags & GIT_IDXENTRY_EXTENDED)
2285+
return offsetof(struct entry_long, path) + path_len + 1 + varint_len;
2286+
else
2287+
return offsetof(struct entry_short, path) + path_len + 1 + varint_len;
2288+
} else {
2289+
#define entry_size(type,len) ((offsetof(type, path) + (len) + 8) & ~7)
2290+
if (flags & GIT_IDXENTRY_EXTENDED)
2291+
return entry_size(struct entry_long, path_len);
2292+
else
2293+
return entry_size(struct entry_short, path_len);
2294+
#undef entry_size
2295+
}
2296+
}
2297+
22852298
static size_t read_entry(
22862299
git_index_entry **out,
22872300
git_index *index,
22882301
const void *buffer,
22892302
size_t buffer_size,
2290-
const char **last)
2303+
const char *last)
22912304
{
22922305
size_t path_length, entry_size;
22932306
const char *path_ptr;
@@ -2344,35 +2357,34 @@ static size_t read_entry(
23442357
path_length = path_end - path_ptr;
23452358
}
23462359

2347-
if (entry.flags & GIT_IDXENTRY_EXTENDED)
2348-
entry_size = long_entry_size(path_length);
2349-
else
2350-
entry_size = short_entry_size(path_length);
2351-
2352-
if (INDEX_FOOTER_SIZE + entry_size > buffer_size)
2353-
return 0;
2354-
2360+
entry_size = index_entry_size(path_length, 0, entry.flags);
23552361
entry.path = (char *)path_ptr;
23562362
} else {
23572363
size_t varint_len;
2358-
size_t shared = git_decode_varint((const unsigned char *)path_ptr,
2359-
&varint_len);
2360-
size_t len = strlen(path_ptr + varint_len);
2361-
size_t last_len = strlen(*last);
2362-
size_t tmp_path_len;
2364+
size_t strip_len = git_decode_varint((const unsigned char *)path_ptr,
2365+
&varint_len);
2366+
size_t last_len = strlen(last);
2367+
size_t prefix_len = last_len - strip_len;
2368+
size_t suffix_len = strlen(path_ptr + varint_len);
2369+
size_t path_len;
23632370

23642371
if (varint_len == 0)
23652372
return index_error_invalid("incorrect prefix length");
23662373

2367-
GITERR_CHECK_ALLOC_ADD(&tmp_path_len, shared, len + 1);
2368-
tmp_path = git__malloc(tmp_path_len);
2374+
GITERR_CHECK_ALLOC_ADD(&path_len, prefix_len, suffix_len);
2375+
GITERR_CHECK_ALLOC_ADD(&path_len, path_len, 1);
2376+
tmp_path = git__malloc(path_len);
23692377
GITERR_CHECK_ALLOC(tmp_path);
2370-
memcpy(tmp_path, last, last_len);
2371-
memcpy(tmp_path + last_len, path_ptr + varint_len, len);
2372-
entry_size = long_entry_size(shared + len);
2378+
2379+
memcpy(tmp_path, last, prefix_len);
2380+
memcpy(tmp_path + prefix_len, path_ptr + varint_len, suffix_len + 1);
2381+
entry_size = index_entry_size(suffix_len, varint_len, entry.flags);
23732382
entry.path = tmp_path;
23742383
}
23752384

2385+
if (INDEX_FOOTER_SIZE + entry_size > buffer_size)
2386+
return 0;
2387+
23762388
if (index_entry_dup(out, index, &entry) < 0) {
23772389
git__free(tmp_path);
23782390
return 0;
@@ -2445,7 +2457,7 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size)
24452457
unsigned int i;
24462458
struct index_header header = { 0 };
24472459
git_oid checksum_calculated, checksum_expected;
2448-
const char **last = NULL;
2460+
const char *last = NULL;
24492461
const char *empty = "";
24502462

24512463
#define seek_forward(_increase) { \
@@ -2469,7 +2481,7 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size)
24692481

24702482
index->version = header.version;
24712483
if (index->version >= INDEX_VERSION_NUMBER_COMP)
2472-
last = &empty;
2484+
last = empty;
24732485

24742486
seek_forward(INDEX_HEADER_SIZE);
24752487

@@ -2504,6 +2516,9 @@ static int parse_index(git_index *index, const char *buffer, size_t buffer_size)
25042516
}
25052517
error = 0;
25062518

2519+
if (index->version >= INDEX_VERSION_NUMBER_COMP)
2520+
last = entry->path;
2521+
25072522
seek_forward(entry_size);
25082523
}
25092524

@@ -2574,19 +2589,20 @@ static bool is_index_extended(git_index *index)
25742589
return (extended > 0);
25752590
}
25762591

2577-
static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const char **last)
2592+
static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const char *last)
25782593
{
25792594
void *mem = NULL;
25802595
struct entry_short *ondisk;
25812596
size_t path_len, disk_size;
2597+
int varint_len = 0;
25822598
char *path;
25832599
const char *path_start = entry->path;
25842600
size_t same_len = 0;
25852601

25862602
path_len = ((struct entry_internal *)entry)->pathlen;
25872603

25882604
if (last) {
2589-
const char *last_c = *last;
2605+
const char *last_c = last;
25902606

25912607
while (*path_start == *last_c) {
25922608
if (!*path_start || !*last_c)
@@ -2596,13 +2612,10 @@ static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const cha
25962612
++same_len;
25972613
}
25982614
path_len -= same_len;
2599-
*last = entry->path;
2615+
varint_len = git_encode_varint(NULL, 0, same_len);
26002616
}
26012617

2602-
if (entry->flags & GIT_IDXENTRY_EXTENDED)
2603-
disk_size = long_entry_size(path_len);
2604-
else
2605-
disk_size = short_entry_size(path_len);
2618+
disk_size = index_entry_size(path_len, varint_len, entry->flags);
26062619

26072620
if (git_filebuf_reserve(file, &mem, disk_size) < 0)
26082621
return -1;
@@ -2642,16 +2655,34 @@ static int write_disk_entry(git_filebuf *file, git_index_entry *entry, const cha
26422655
ondisk_ext->flags_extended = htons(entry->flags_extended &
26432656
GIT_IDXENTRY_EXTENDED_FLAGS);
26442657
path = ondisk_ext->path;
2645-
}
2646-
else
2658+
disk_size -= offsetof(struct entry_long, path);
2659+
} else {
26472660
path = ondisk->path;
2661+
disk_size -= offsetof(struct entry_short, path);
2662+
}
26482663

26492664
if (last) {
2650-
path += git_encode_varint((unsigned char *) path,
2651-
disk_size,
2652-
path_len - same_len);
2665+
varint_len = git_encode_varint((unsigned char *) path,
2666+
disk_size, same_len);
2667+
assert(varint_len > 0);
2668+
path += varint_len;
2669+
disk_size -= varint_len;
2670+
2671+
/*
2672+
* If using path compression, we are not allowed
2673+
* to have additional trailing NULs.
2674+
*/
2675+
assert(disk_size == path_len + 1);
2676+
} else {
2677+
/*
2678+
* If no path compression is used, we do have
2679+
* NULs as padding. As such, simply assert that
2680+
* we have enough space left to write the path.
2681+
*/
2682+
assert(disk_size > path_len);
26532683
}
2654-
memcpy(path, path_start, path_len);
2684+
2685+
memcpy(path, path_start, path_len + 1);
26552686

26562687
return 0;
26572688
}
@@ -2662,8 +2693,7 @@ static int write_entries(git_index *index, git_filebuf *file)
26622693
size_t i;
26632694
git_vector case_sorted, *entries;
26642695
git_index_entry *entry;
2665-
const char **last = NULL;
2666-
const char *empty = "";
2696+
const char *last = NULL;
26672697

26682698
/* If index->entries is sorted case-insensitively, then we need
26692699
* to re-sort it case-sensitively before writing */
@@ -2676,11 +2706,14 @@ static int write_entries(git_index *index, git_filebuf *file)
26762706
}
26772707

26782708
if (index->version >= INDEX_VERSION_NUMBER_COMP)
2679-
last = &empty;
2709+
last = "";
26802710

2681-
git_vector_foreach(entries, i, entry)
2711+
git_vector_foreach(entries, i, entry) {
26822712
if ((error = write_disk_entry(file, entry, last)) < 0)
26832713
break;
2714+
if (index->version >= INDEX_VERSION_NUMBER_COMP)
2715+
last = entry->path;
2716+
}
26842717

26852718
if (index->ignore_case)
26862719
git_vector_free(&case_sorted);

src/varint.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ int git_encode_varint(unsigned char *buf, size_t bufsize, uintmax_t value)
3636
while (value >>= 7)
3737
varint[--pos] = 128 | (--value & 127);
3838
if (buf) {
39-
if (bufsize < pos)
39+
if (bufsize < (sizeof(varint) - pos))
4040
return -1;
4141
memcpy(buf, varint + pos, sizeof(varint) - pos);
4242
}

tests/core/encoding.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ void test_core_encoding__encode(void)
2929
cl_assert(git_encode_varint(buf, 100, 65) == 1);
3030
cl_assert(buf[0] == 'A');
3131

32+
cl_assert(git_encode_varint(buf, 1, 1) == 1);
33+
cl_assert(!memcmp(buf, "\x01", 1));
34+
3235
cl_assert(git_encode_varint(buf, 100, 267869656) == 4);
3336
cl_assert(!memcmp(buf, "\xfe\xdc\xbaX", 4));
3437

tests/index/version.c

Lines changed: 111 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,39 +3,135 @@
33

44
static git_repository *g_repo = NULL;
55

6-
void test_index_version__can_write_v4(void)
6+
void test_index_version__cleanup(void)
7+
{
8+
cl_git_sandbox_cleanup();
9+
g_repo = NULL;
10+
}
11+
12+
void test_index_version__can_read_v4(void)
713
{
14+
const char *paths[] = {
15+
"file.tx", "file.txt", "file.txz", "foo", "zzz",
16+
};
817
git_index *index;
9-
const git_index_entry *entry;
18+
size_t i;
19+
20+
g_repo = cl_git_sandbox_init("indexv4");
1021

11-
g_repo = cl_git_sandbox_init("filemodes");
1222
cl_git_pass(git_repository_index(&index, g_repo));
23+
cl_assert_equal_sz(git_index_entrycount(index), 5);
1324

14-
cl_assert(index->on_disk);
15-
cl_assert(git_index_version(index) == 2);
25+
for (i = 0; i < ARRAY_SIZE(paths); i++) {
26+
const git_index_entry *entry =
27+
git_index_get_bypath(index, paths[i], GIT_INDEX_STAGE_NORMAL);
1628

17-
cl_assert(git_index_entrycount(index) == 6);
29+
cl_assert(entry != NULL);
30+
}
1831

32+
git_index_free(index);
33+
}
34+
35+
void test_index_version__can_write_v4(void)
36+
{
37+
const char *paths[] = {
38+
"foo",
39+
"foox",
40+
"foobar",
41+
"foobal",
42+
"x",
43+
"xz",
44+
"xyzzyx"
45+
};
46+
git_index_entry entry;
47+
git_index *index;
48+
size_t i;
49+
50+
g_repo = cl_git_sandbox_init("empty_standard_repo");
51+
cl_git_pass(git_repository_index(&index, g_repo));
1952
cl_git_pass(git_index_set_version(index, 4));
2053

54+
for (i = 0; i < ARRAY_SIZE(paths); i++) {
55+
memset(&entry, 0, sizeof(entry));
56+
entry.path = paths[i];
57+
entry.mode = GIT_FILEMODE_BLOB;
58+
cl_git_pass(git_index_add_frombuffer(index, &entry, paths[i],
59+
strlen(paths[i]) + 1));
60+
}
61+
cl_assert_equal_sz(git_index_entrycount(index), ARRAY_SIZE(paths));
62+
2163
cl_git_pass(git_index_write(index));
2264
git_index_free(index);
2365

2466
cl_git_pass(git_repository_index(&index, g_repo));
2567
cl_assert(git_index_version(index) == 4);
2668

27-
entry = git_index_get_bypath(index, "exec_off", 0);
28-
cl_assert(entry);
29-
entry = git_index_get_bypath(index, "exec_off2on_staged", 0);
30-
cl_assert(entry);
31-
entry = git_index_get_bypath(index, "exec_on", 0);
32-
cl_assert(entry);
69+
for (i = 0; i < ARRAY_SIZE(paths); i++) {
70+
const git_index_entry *e;
71+
72+
cl_assert(e = git_index_get_bypath(index, paths[i], 0));
73+
cl_assert_equal_s(paths[i], e->path);
74+
}
3375

3476
git_index_free(index);
3577
}
3678

37-
void test_index_version__cleanup(void)
79+
void test_index_version__v4_uses_path_compression(void)
3880
{
39-
cl_git_sandbox_cleanup();
40-
g_repo = NULL;
81+
git_index_entry entry;
82+
git_index *index;
83+
char path[250], buf[1];
84+
struct stat st;
85+
char i, j;
86+
87+
memset(path, 'a', sizeof(path));
88+
memset(buf, 'a', sizeof(buf));
89+
90+
memset(&entry, 0, sizeof(entry));
91+
entry.path = path;
92+
entry.mode = GIT_FILEMODE_BLOB;
93+
94+
g_repo = cl_git_sandbox_init("indexv4");
95+
cl_git_pass(git_repository_index(&index, g_repo));
96+
97+
/* write 676 paths of 250 bytes length */
98+
for (i = 'a'; i <= 'z'; i++) {
99+
for (j = 'a'; j < 'z'; j++) {
100+
path[ARRAY_SIZE(path) - 3] = i;
101+
path[ARRAY_SIZE(path) - 2] = j;
102+
path[ARRAY_SIZE(path) - 1] = '\0';
103+
cl_git_pass(git_index_add_frombuffer(index, &entry, buf, sizeof(buf)));
104+
}
105+
}
106+
107+
cl_git_pass(git_index_write(index));
108+
cl_git_pass(p_stat(git_index_path(index), &st));
109+
110+
/*
111+
* Without path compression, the written paths would at
112+
* least take
113+
*
114+
* (entries * pathlen) = len
115+
* (676 * 250) = 169000
116+
*
117+
* bytes. As index v4 uses suffix-compression and our
118+
* written paths only differ in the last two entries,
119+
* this number will be much smaller, e.g.
120+
*
121+
* (1 * pathlen) + (675 * 2) = len
122+
* 676 + 1350 = 2026
123+
*
124+
* bytes.
125+
*
126+
* Note that the above calculations do not include
127+
* additional metadata of the index, e.g. OIDs or
128+
* index extensions. Including those we get an index
129+
* of approx. 200kB without compression and 40kB with
130+
* compression. As this is a lot smaller than without
131+
* compression, we can verify that path compression is
132+
* used.
133+
*/
134+
cl_assert_(st.st_size < 75000, "path compression not enabled");
135+
136+
git_index_free(index);
41137
}

0 commit comments

Comments
 (0)