Skip to content

Commit e14bf97

Browse files
authored
Merge pull request libgit2#4443 from libgit2/ethomson/large_loose_blobs
Inflate large loose blobs
2 parents 083b1a2 + 456e521 commit e14bf97

File tree

10 files changed

+352
-136
lines changed

10 files changed

+352
-136
lines changed

src/hash/hash_common_crypto.h

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ struct git_hash_ctx {
1616
CC_SHA1_CTX c;
1717
};
1818

19+
#define CC_LONG_MAX ((CC_LONG)-1)
20+
1921
#define git_hash_global_init() 0
2022
#define git_hash_ctx_init(ctx) git_hash_init(ctx)
2123
#define git_hash_ctx_cleanup(ctx)
@@ -27,10 +29,21 @@ GIT_INLINE(int) git_hash_init(git_hash_ctx *ctx)
2729
return 0;
2830
}
2931

30-
GIT_INLINE(int) git_hash_update(git_hash_ctx *ctx, const void *data, size_t len)
32+
GIT_INLINE(int) git_hash_update(git_hash_ctx *ctx, const void *_data, size_t len)
3133
{
34+
const unsigned char *data = _data;
35+
3236
assert(ctx);
33-
CC_SHA1_Update(&ctx->c, data, len);
37+
38+
while (len > 0) {
39+
CC_LONG chunk = (len > CC_LONG_MAX) ? CC_LONG_MAX : (CC_LONG)len;
40+
41+
CC_SHA1_Update(&ctx->c, data, chunk);
42+
43+
data += chunk;
44+
len -= chunk;
45+
}
46+
3447
return 0;
3548
}
3649

src/hash/hash_win32.c

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -136,12 +136,21 @@ GIT_INLINE(int) hash_cryptoapi_init(git_hash_ctx *ctx)
136136
return 0;
137137
}
138138

139-
GIT_INLINE(int) hash_cryptoapi_update(git_hash_ctx *ctx, const void *data, size_t len)
139+
GIT_INLINE(int) hash_cryptoapi_update(git_hash_ctx *ctx, const void *_data, size_t len)
140140
{
141+
const BYTE *data = (BYTE *)_data;
142+
141143
assert(ctx->ctx.cryptoapi.valid);
142144

143-
if (!CryptHashData(ctx->ctx.cryptoapi.hash_handle, (const BYTE *)data, (DWORD)len, 0))
144-
return -1;
145+
while (len > 0) {
146+
DWORD chunk = (len > MAXDWORD) ? MAXDWORD : (DWORD)len;
147+
148+
if (!CryptHashData(ctx->ctx.cryptoapi.hash_handle, data, chunk, 0))
149+
return -1;
150+
151+
data += chunk;
152+
len -= chunk;
153+
}
145154

146155
return 0;
147156
}
@@ -202,10 +211,19 @@ GIT_INLINE(int) hash_cng_init(git_hash_ctx *ctx)
202211
return 0;
203212
}
204213

205-
GIT_INLINE(int) hash_cng_update(git_hash_ctx *ctx, const void *data, size_t len)
214+
GIT_INLINE(int) hash_cng_update(git_hash_ctx *ctx, const void *_data, size_t len)
206215
{
207-
if (ctx->prov->prov.cng.hash_data(ctx->ctx.cng.hash_handle, (PBYTE)data, (ULONG)len, 0) < 0)
208-
return -1;
216+
PBYTE data = (PBYTE)_data;
217+
218+
while (len > 0) {
219+
ULONG chunk = (len > ULONG_MAX) ? ULONG_MAX : (ULONG)len;
220+
221+
if (ctx->prov->prov.cng.hash_data(ctx->ctx.cng.hash_handle, data, chunk, 0) < 0)
222+
return -1;
223+
224+
data += chunk;
225+
len -= chunk;
226+
}
209227

210228
return 0;
211229
}

src/object.c

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,14 +235,23 @@ const char *git_object_type2string(git_otype type)
235235
}
236236

237237
git_otype git_object_string2type(const char *str)
238+
{
239+
if (!str)
240+
return GIT_OBJ_BAD;
241+
242+
return git_object_stringn2type(str, strlen(str));
243+
}
244+
245+
git_otype git_object_stringn2type(const char *str, size_t len)
238246
{
239247
size_t i;
240248

241-
if (!str || !*str)
249+
if (!str || !len || !*str)
242250
return GIT_OBJ_BAD;
243251

244252
for (i = 0; i < ARRAY_SIZE(git_objects_table); i++)
245-
if (!strcmp(str, git_objects_table[i].str))
253+
if (*git_objects_table[i].str &&
254+
!git__prefixncmp(str, len, git_objects_table[i].str))
246255
return (git_otype)i;
247256

248257
return GIT_OBJ_BAD;

src/object.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ int git_object__from_odb_object(
3030

3131
int git_object__resolve_to_type(git_object **obj, git_otype type);
3232

33+
git_otype git_object_stringn2type(const char *str, size_t len);
34+
3335
int git_oid__parse(git_oid *oid, const char **buffer_out, const char *buffer_end, const char *header);
3436

3537
void git_oid__writebuf(git_buf *buf, const char *header, const git_oid *oid);

src/odb_loose.c

Lines changed: 97 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "delta.h"
1717
#include "filebuf.h"
1818
#include "object.h"
19+
#include "zstream.h"
1920

2021
#include "git2/odb_backend.h"
2122
#include "git2/types.h"
@@ -119,53 +120,58 @@ static size_t get_binary_object_header(obj_hdr *hdr, git_buf *obj)
119120
return used;
120121
}
121122

122-
static size_t get_object_header(obj_hdr *hdr, unsigned char *data)
123+
static int parse_header(
124+
obj_hdr *out,
125+
size_t *out_len,
126+
const unsigned char *_data,
127+
size_t data_len)
123128
{
124-
char c, typename[10];
125-
size_t size, used = 0;
129+
const char *data = (char *)_data;
130+
size_t i, typename_len, size_idx, size_len;
131+
int64_t size;
126132

127-
/*
128-
* type name string followed by space.
129-
*/
130-
while ((c = data[used]) != ' ') {
131-
typename[used++] = c;
132-
if (used >= sizeof(typename))
133-
return 0;
133+
*out_len = 0;
134+
135+
/* find the object type name */
136+
for (i = 0, typename_len = 0; i < data_len; i++, typename_len++) {
137+
if (data[i] == ' ')
138+
break;
134139
}
135-
typename[used] = 0;
136-
if (used == 0)
137-
return 0;
138-
hdr->type = git_object_string2type(typename);
139-
used++; /* consume the space */
140140

141-
/*
142-
* length follows immediately in decimal (without
143-
* leading zeros).
144-
*/
145-
size = data[used++] - '0';
146-
if (size > 9)
147-
return 0;
148-
if (size) {
149-
while ((c = data[used]) != '\0') {
150-
size_t d = c - '0';
151-
if (d > 9)
152-
break;
153-
used++;
154-
size = size * 10 + d;
155-
}
141+
if (typename_len == data_len)
142+
goto on_error;
143+
144+
out->type = git_object_stringn2type(data, typename_len);
145+
146+
size_idx = typename_len + 1;
147+
for (i = size_idx, size_len = 0; i < data_len; i++, size_len++) {
148+
if (data[i] == '\0')
149+
break;
156150
}
157-
hdr->size = size;
158151

159-
/*
160-
* the length must be followed by a zero byte
161-
*/
162-
if (data[used++] != '\0')
163-
return 0;
152+
if (i == data_len)
153+
goto on_error;
164154

165-
return used;
166-
}
155+
if (git__strntol64(&size, &data[size_idx], size_len, NULL, 10) < 0 ||
156+
size < 0)
157+
goto on_error;
158+
159+
if ((uint64_t)size > SIZE_MAX) {
160+
giterr_set(GITERR_OBJECT, "object is larger than available memory");
161+
return -1;
162+
}
167163

164+
out->size = size;
168165

166+
if (GIT_ADD_SIZET_OVERFLOW(out_len, i, 1))
167+
goto on_error;
168+
169+
return 0;
170+
171+
on_error:
172+
giterr_set(GITERR_OBJECT, "failed to parse loose object: invalid header");
173+
return -1;
174+
}
169175

170176
/***********************************************************
171177
*
@@ -269,45 +275,6 @@ static int inflate_buffer(void *in, size_t inlen, void *out, size_t outlen)
269275
return 0;
270276
}
271277

272-
static void *inflate_tail(z_stream *s, void *hb, size_t used, obj_hdr *hdr)
273-
{
274-
unsigned char *buf, *head = hb;
275-
size_t tail, alloc_size;
276-
277-
/*
278-
* allocate a buffer to hold the inflated data and copy the
279-
* initial sequence of inflated data from the tail of the
280-
* head buffer, if any.
281-
*/
282-
if (GIT_ADD_SIZET_OVERFLOW(&alloc_size, hdr->size, 1) ||
283-
(buf = git__malloc(alloc_size)) == NULL) {
284-
inflateEnd(s);
285-
return NULL;
286-
}
287-
tail = s->total_out - used;
288-
if (used > 0 && tail > 0) {
289-
if (tail > hdr->size)
290-
tail = hdr->size;
291-
memcpy(buf, head + used, tail);
292-
}
293-
used = tail;
294-
295-
/*
296-
* inflate the remainder of the object data, if any
297-
*/
298-
if (hdr->size < used)
299-
inflateEnd(s);
300-
else {
301-
set_stream_output(s, buf + used, hdr->size - used);
302-
if (finish_inflate(s)) {
303-
git__free(buf);
304-
return NULL;
305-
}
306-
}
307-
308-
return buf;
309-
}
310-
311278
/*
312279
* At one point, there was a loose object format that was intended to
313280
* mimic the format used in pack-files. This was to allow easy copying
@@ -354,43 +321,74 @@ static int inflate_packlike_loose_disk_obj(git_rawobj *out, git_buf *obj)
354321

355322
static int inflate_disk_obj(git_rawobj *out, git_buf *obj)
356323
{
357-
unsigned char head[64], *buf;
358-
z_stream zs;
324+
git_zstream zstream = GIT_ZSTREAM_INIT;
325+
unsigned char head[64], *body = NULL;
326+
size_t decompressed, head_len, body_len, alloc_size;
359327
obj_hdr hdr;
360-
size_t used;
328+
int error;
361329

362-
/*
363-
* check for a pack-like loose object
364-
*/
330+
/* check for a pack-like loose object */
365331
if (!is_zlib_compressed_data((unsigned char *)obj->ptr))
366332
return inflate_packlike_loose_disk_obj(out, obj);
367333

334+
if ((error = git_zstream_init(&zstream, GIT_ZSTREAM_INFLATE)) < 0 ||
335+
(error = git_zstream_set_input(&zstream, git_buf_cstr(obj), git_buf_len(obj))) < 0)
336+
goto done;
337+
338+
decompressed = sizeof(head);
339+
368340
/*
369-
* inflate the initial part of the io buffer in order
370-
* to parse the object header (type and size).
371-
*/
372-
if (start_inflate(&zs, obj, head, sizeof(head)) < Z_OK ||
373-
(used = get_object_header(&hdr, head)) == 0 ||
374-
!git_object_typeisloose(hdr.type))
375-
{
376-
abort_inflate(&zs);
341+
* inflate the initial part of the compressed buffer in order to parse the
342+
* header; read the largest header possible, then push back the remainder.
343+
*/
344+
if ((error = git_zstream_get_output(head, &decompressed, &zstream)) < 0 ||
345+
(error = parse_header(&hdr, &head_len, head, decompressed)) < 0)
346+
goto done;
347+
348+
if (!git_object_typeisloose(hdr.type)) {
377349
giterr_set(GITERR_ODB, "failed to inflate disk object");
378-
return -1;
350+
error = -1;
351+
goto done;
379352
}
380353

381354
/*
382355
* allocate a buffer and inflate the object data into it
383356
* (including the initial sequence in the head buffer).
384357
*/
385-
if ((buf = inflate_tail(&zs, head, used, &hdr)) == NULL)
386-
return -1;
387-
buf[hdr.size] = '\0';
358+
if (GIT_ADD_SIZET_OVERFLOW(&alloc_size, hdr.size, 1) ||
359+
(body = git__malloc(alloc_size)) == NULL) {
360+
error = -1;
361+
goto done;
362+
}
388363

389-
out->data = buf;
364+
assert(decompressed >= head_len);
365+
body_len = decompressed - head_len;
366+
367+
if (body_len)
368+
memcpy(body, head + head_len, body_len);
369+
370+
decompressed = hdr.size - body_len;
371+
if ((error = git_zstream_get_output(body + body_len, &decompressed, &zstream)) < 0)
372+
goto done;
373+
374+
if (!git_zstream_done(&zstream)) {
375+
giterr_set(GITERR_ZLIB, "failed to finish zlib inflation: stream aborted prematurely");
376+
error = -1;
377+
goto done;
378+
}
379+
380+
body[hdr.size] = '\0';
381+
382+
out->data = body;
390383
out->len = hdr.size;
391384
out->type = hdr.type;
392385

393-
return 0;
386+
done:
387+
if (error < 0)
388+
git__free(body);
389+
390+
git_zstream_free(&zstream);
391+
return error;
394392
}
395393

396394

@@ -435,6 +433,7 @@ static int read_header_loose(git_rawobj *out, git_buf *loc)
435433
git_file fd;
436434
z_stream zs;
437435
obj_hdr header_obj;
436+
size_t header_len;
438437
unsigned char raw_buffer[16], inflated_buffer[64];
439438

440439
assert(out && loc);
@@ -460,7 +459,7 @@ static int read_header_loose(git_rawobj *out, git_buf *loc)
460459
}
461460

462461
if ((z_return != Z_STREAM_END && z_return != Z_BUF_ERROR)
463-
|| get_object_header(&header_obj, inflated_buffer) == 0
462+
|| parse_header(&header_obj, &header_len, inflated_buffer, sizeof(inflated_buffer)) < 0
464463
|| git_object_typeisloose(header_obj.type) == 0)
465464
{
466465
giterr_set(GITERR_ZLIB, "failed to read loose object header");

0 commit comments

Comments
 (0)