Skip to content

Commit ddefea7

Browse files
committed
odb: support large loose objects
zlib will only inflate/deflate an `int`s worth of data at a time. We need to loop through large files in order to ensure that we inflate the entire file, not just an `int`s worth of data. Thankfully, we already have this loop in our `git_zstream` layer. Handle large objects using the `git_zstream`.
1 parent d1e4465 commit ddefea7

File tree

1 file changed

+92
-98
lines changed

1 file changed

+92
-98
lines changed

src/odb_loose.c

Lines changed: 92 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "delta.h"
1717
#include "filebuf.h"
1818
#include "object.h"
19+
#include "zstream.h"
1920

2021
#include "git2/odb_backend.h"
2122
#include "git2/types.h"
@@ -119,53 +120,53 @@ static size_t get_binary_object_header(obj_hdr *hdr, git_buf *obj)
119120
return used;
120121
}
121122

122-
static size_t get_object_header(obj_hdr *hdr, unsigned char *data)
123+
static int parse_header(
124+
obj_hdr *out,
125+
size_t *out_len,
126+
const unsigned char *_data,
127+
size_t data_len)
123128
{
124-
char c, typename[10];
125-
size_t size, used = 0;
129+
const char *data = (char *)_data;
130+
size_t i, typename_len, size_idx, size_len;
131+
int64_t size;
126132

127-
/*
128-
* type name string followed by space.
129-
*/
130-
while ((c = data[used]) != ' ') {
131-
typename[used++] = c;
132-
if (used >= sizeof(typename))
133-
return 0;
133+
*out_len = 0;
134+
135+
/* find the object type name */
136+
for (i = 0, typename_len = 0; i < data_len; i++, typename_len++) {
137+
if (data[i] == ' ')
138+
break;
134139
}
135-
typename[used] = 0;
136-
if (used == 0)
137-
return 0;
138-
hdr->type = git_object_string2type(typename);
139-
used++; /* consume the space */
140140

141-
/*
142-
* length follows immediately in decimal (without
143-
* leading zeros).
144-
*/
145-
size = data[used++] - '0';
146-
if (size > 9)
147-
return 0;
148-
if (size) {
149-
while ((c = data[used]) != '\0') {
150-
size_t d = c - '0';
151-
if (d > 9)
152-
break;
153-
used++;
154-
size = size * 10 + d;
155-
}
141+
if (typename_len == data_len)
142+
goto on_error;
143+
144+
out->type = git_object_stringn2type(data, typename_len);
145+
146+
size_idx = typename_len + 1;
147+
for (i = size_idx, size_len = 0; i < data_len; i++, size_len++) {
148+
if (data[i] == '\0')
149+
break;
156150
}
157-
hdr->size = size;
158151

159-
/*
160-
* the length must be followed by a zero byte
161-
*/
162-
if (data[used++] != '\0')
163-
return 0;
152+
if (i == data_len)
153+
goto on_error;
164154

165-
return used;
166-
}
155+
if (git__strntol64(&size, &data[size_idx], size_len, NULL, 10) < 0 ||
156+
size < 0)
157+
goto on_error;
158+
159+
out->size = size;
167160

161+
if (GIT_ADD_SIZET_OVERFLOW(out_len, i, 1))
162+
goto on_error;
163+
164+
return 0;
168165

166+
on_error:
167+
giterr_set(GITERR_OBJECT, "failed to parse loose object: invalid header");
168+
return -1;
169+
}
169170

170171
/***********************************************************
171172
*
@@ -269,45 +270,6 @@ static int inflate_buffer(void *in, size_t inlen, void *out, size_t outlen)
269270
return 0;
270271
}
271272

272-
static void *inflate_tail(z_stream *s, void *hb, size_t used, obj_hdr *hdr)
273-
{
274-
unsigned char *buf, *head = hb;
275-
size_t tail, alloc_size;
276-
277-
/*
278-
* allocate a buffer to hold the inflated data and copy the
279-
* initial sequence of inflated data from the tail of the
280-
* head buffer, if any.
281-
*/
282-
if (GIT_ADD_SIZET_OVERFLOW(&alloc_size, hdr->size, 1) ||
283-
(buf = git__malloc(alloc_size)) == NULL) {
284-
inflateEnd(s);
285-
return NULL;
286-
}
287-
tail = s->total_out - used;
288-
if (used > 0 && tail > 0) {
289-
if (tail > hdr->size)
290-
tail = hdr->size;
291-
memcpy(buf, head + used, tail);
292-
}
293-
used = tail;
294-
295-
/*
296-
* inflate the remainder of the object data, if any
297-
*/
298-
if (hdr->size < used)
299-
inflateEnd(s);
300-
else {
301-
set_stream_output(s, buf + used, hdr->size - used);
302-
if (finish_inflate(s)) {
303-
git__free(buf);
304-
return NULL;
305-
}
306-
}
307-
308-
return buf;
309-
}
310-
311273
/*
312274
* At one point, there was a loose object format that was intended to
313275
* mimic the format used in pack-files. This was to allow easy copying
@@ -354,43 +316,74 @@ static int inflate_packlike_loose_disk_obj(git_rawobj *out, git_buf *obj)
354316

355317
static int inflate_disk_obj(git_rawobj *out, git_buf *obj)
356318
{
357-
unsigned char head[64], *buf;
358-
z_stream zs;
319+
git_zstream zstream = GIT_ZSTREAM_INIT;
320+
unsigned char head[64], *body = NULL;
321+
size_t decompressed, head_len, body_len, alloc_size;
359322
obj_hdr hdr;
360-
size_t used;
323+
int error;
361324

362-
/*
363-
* check for a pack-like loose object
364-
*/
325+
/* check for a pack-like loose object */
365326
if (!is_zlib_compressed_data((unsigned char *)obj->ptr))
366327
return inflate_packlike_loose_disk_obj(out, obj);
367328

329+
if ((error = git_zstream_init(&zstream, GIT_ZSTREAM_INFLATE)) < 0 ||
330+
(error = git_zstream_set_input(&zstream, git_buf_cstr(obj), git_buf_len(obj))) < 0)
331+
goto done;
332+
333+
decompressed = sizeof(head);
334+
368335
/*
369-
* inflate the initial part of the io buffer in order
370-
* to parse the object header (type and size).
371-
*/
372-
if (start_inflate(&zs, obj, head, sizeof(head)) < Z_OK ||
373-
(used = get_object_header(&hdr, head)) == 0 ||
374-
!git_object_typeisloose(hdr.type))
375-
{
376-
abort_inflate(&zs);
336+
* inflate the initial part of the compressed buffer in order to parse the
337+
* header; read the largest header possible, then push back the remainder.
338+
*/
339+
if ((error = git_zstream_get_output(head, &decompressed, &zstream)) < 0 ||
340+
(error = parse_header(&hdr, &head_len, head, decompressed)) < 0)
341+
goto done;
342+
343+
if (!git_object_typeisloose(hdr.type)) {
377344
giterr_set(GITERR_ODB, "failed to inflate disk object");
378-
return -1;
345+
error = -1;
346+
goto done;
379347
}
380348

381349
/*
382350
* allocate a buffer and inflate the object data into it
383351
* (including the initial sequence in the head buffer).
384352
*/
385-
if ((buf = inflate_tail(&zs, head, used, &hdr)) == NULL)
386-
return -1;
387-
buf[hdr.size] = '\0';
353+
if (GIT_ADD_SIZET_OVERFLOW(&alloc_size, hdr.size, 1) ||
354+
(body = git__malloc(alloc_size)) == NULL) {
355+
error = -1;
356+
goto done;
357+
}
388358

389-
out->data = buf;
359+
assert(decompressed >= head_len);
360+
body_len = decompressed - head_len;
361+
362+
if (body_len)
363+
memcpy(body, head + head_len, body_len);
364+
365+
decompressed = hdr.size - body_len;
366+
if ((error = git_zstream_get_output(body + body_len, &decompressed, &zstream)) < 0)
367+
goto done;
368+
369+
if (!git_zstream_done(&zstream)) {
370+
giterr_set(GITERR_ZLIB, "failed to finish zlib inflation: stream aborted prematurely");
371+
error = -1;
372+
goto done;
373+
}
374+
375+
body[hdr.size] = '\0';
376+
377+
out->data = body;
390378
out->len = hdr.size;
391379
out->type = hdr.type;
392380

393-
return 0;
381+
done:
382+
if (error < 0)
383+
git__free(body);
384+
385+
git_zstream_free(&zstream);
386+
return error;
394387
}
395388

396389

@@ -435,6 +428,7 @@ static int read_header_loose(git_rawobj *out, git_buf *loc)
435428
git_file fd;
436429
z_stream zs;
437430
obj_hdr header_obj;
431+
size_t header_len;
438432
unsigned char raw_buffer[16], inflated_buffer[64];
439433

440434
assert(out && loc);
@@ -460,7 +454,7 @@ static int read_header_loose(git_rawobj *out, git_buf *loc)
460454
}
461455

462456
if ((z_return != Z_STREAM_END && z_return != Z_BUF_ERROR)
463-
|| get_object_header(&header_obj, inflated_buffer) == 0
457+
|| parse_header(&header_obj, &header_len, inflated_buffer, sizeof(inflated_buffer)) < 0
464458
|| git_object_typeisloose(header_obj.type) == 0)
465459
{
466460
giterr_set(GITERR_ZLIB, "failed to read loose object header");

0 commit comments

Comments
 (0)