Skip to content

Commit 623647a

Browse files
authored
Merge pull request libgit2#4864 from pks-t/pks/object-parse-fixes
Object parse fixes
2 parents 814389d + 7655b2d commit 623647a

File tree

7 files changed

+546
-6
lines changed

7 files changed

+546
-6
lines changed

src/commit.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,7 @@ int git_commit__parse_raw(void *_commit, const char *data, size_t size)
444444
while (eoln < buffer_end && *eoln != '\n')
445445
++eoln;
446446

447-
if (git__prefixcmp(buffer, "encoding ") == 0) {
447+
if (git__prefixncmp(buffer, buffer_end - buffer, "encoding ") == 0) {
448448
buffer += strlen("encoding ");
449449

450450
commit->message_encoding = git__strndup(buffer, eoln - buffer);

src/tag.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,9 @@ static int tag_parse(git_tag *tag, const char *buffer, const char *buffer_end)
7070
static const char *tag_types[] = {
7171
NULL, "commit\n", "tree\n", "blob\n", "tag\n"
7272
};
73-
74-
unsigned int i;
7573
size_t text_len, alloc_len;
76-
char *search;
74+
const char *search;
75+
unsigned int i;
7776

7877
if (git_oid__parse(&tag->target, &buffer, buffer_end, "object ") < 0)
7978
return tag_error("object field invalid");
@@ -138,8 +137,9 @@ static int tag_parse(git_tag *tag, const char *buffer, const char *buffer_end)
138137
tag->message = NULL;
139138
if (buffer < buffer_end) {
140139
/* If we're not at the end of the header, search for it */
141-
if( *buffer != '\n' ) {
142-
search = strstr(buffer, "\n\n");
140+
if(*buffer != '\n') {
141+
search = git__memmem(buffer, buffer_end - buffer,
142+
"\n\n", 2);
143143
if (search)
144144
buffer = search + 1;
145145
else

src/util.c

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,47 @@ size_t git__linenlen(const char *buffer, size_t buffer_len)
357357
return nl ? (size_t)(nl - buffer) + 1 : buffer_len;
358358
}
359359

360+
/*
361+
* Adapted Not So Naive algorithm from http://www-igm.univ-mlv.fr/~lecroq/string/
362+
*/
363+
const void * git__memmem(const void *haystack, size_t haystacklen,
364+
const void *needle, size_t needlelen)
365+
{
366+
const char *h, *n;
367+
size_t j, k, l;
368+
369+
if (needlelen > haystacklen || !haystacklen || !needlelen)
370+
return NULL;
371+
372+
h = (const char *) haystack,
373+
n = (const char *) needle;
374+
375+
if (needlelen == 1)
376+
return memchr(haystack, *n, haystacklen);
377+
378+
if (n[0] == n[1]) {
379+
k = 2;
380+
l = 1;
381+
} else {
382+
k = 1;
383+
l = 2;
384+
}
385+
386+
j = 0;
387+
while (j <= haystacklen - needlelen) {
388+
if (n[1] != h[j + 1]) {
389+
j += k;
390+
} else {
391+
if (memcmp(n + 2, h + j + 2, needlelen - 2) == 0 &&
392+
n[0] == h[j])
393+
return h + j;
394+
j += l;
395+
}
396+
}
397+
398+
return NULL;
399+
}
400+
360401
void git__hexdump(const char *buffer, size_t len)
361402
{
362403
static const size_t LINE_WIDTH = 16;

src/util.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,9 @@ GIT_INLINE(const void *) git__memrchr(const void *s, int c, size_t n)
111111
return NULL;
112112
}
113113

114+
extern const void * git__memmem(const void *haystack, size_t haystacklen,
115+
const void *needle, size_t needlelen);
116+
114117
typedef int (*git__tsort_cmp)(const void *a, const void *b);
115118

116119
extern void git__tsort(void **dst, size_t size, git__tsort_cmp cmp);

tests/core/memmem.c

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#include "clar_libgit2.h"
2+
3+
static void assert_found(const char *haystack, const char *needle, size_t expected_pos)
4+
{
5+
cl_assert_equal_p(git__memmem(haystack, haystack ? strlen(haystack) : 0,
6+
needle, needle ? strlen(needle) : 0),
7+
haystack + expected_pos);
8+
}
9+
10+
static void assert_absent(const char *haystack, const char *needle)
11+
{
12+
cl_assert_equal_p(git__memmem(haystack, haystack ? strlen(haystack) : 0,
13+
needle, needle ? strlen(needle) : 0),
14+
NULL);
15+
}
16+
17+
void test_core_memmem__found(void)
18+
{
19+
assert_found("a", "a", 0);
20+
assert_found("ab", "a", 0);
21+
assert_found("ba", "a", 1);
22+
assert_found("aa", "a", 0);
23+
assert_found("aab", "aa", 0);
24+
assert_found("baa", "aa", 1);
25+
assert_found("dabc", "abc", 1);
26+
assert_found("abababc", "abc", 4);
27+
}
28+
29+
void test_core_memmem__absent(void)
30+
{
31+
assert_absent("a", "b");
32+
assert_absent("a", "aa");
33+
assert_absent("ba", "ab");
34+
assert_absent("ba", "ab");
35+
assert_absent("abc", "abcd");
36+
assert_absent("abcabcabc", "bcac");
37+
}
38+
39+
void test_core_memmem__edgecases(void)
40+
{
41+
assert_absent(NULL, NULL);
42+
assert_absent("a", NULL);
43+
assert_absent(NULL, "a");
44+
assert_absent("", "a");
45+
assert_absent("a", "");
46+
}

tests/object/commit/parse.c

Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
#include "clar_libgit2.h"
2+
#include "commit.h"
3+
#include "object.h"
4+
#include "signature.h"
5+
6+
static void assert_commit_parses(const char *data, size_t datalen,
7+
const char *expected_treeid,
8+
const char *expected_author,
9+
const char *expected_committer,
10+
const char *expected_encoding,
11+
const char *expected_message,
12+
size_t expected_parents)
13+
{
14+
git_commit *commit;
15+
if (!datalen)
16+
datalen = strlen(data);
17+
cl_git_pass(git_object__from_raw((git_object **) &commit, data, datalen, GIT_OBJ_COMMIT));
18+
19+
if (expected_author) {
20+
git_signature *author;
21+
cl_git_pass(git_signature_from_buffer(&author, expected_author));
22+
cl_assert(git_signature__equal(author, commit->author));
23+
cl_assert_equal_s(author->name, commit->author->name);
24+
cl_assert_equal_s(author->email, commit->author->email);
25+
cl_assert_equal_i(author->when.time, commit->author->when.time);
26+
cl_assert_equal_i(author->when.offset, commit->author->when.offset);
27+
cl_assert_equal_i(author->when.sign, commit->author->when.sign);
28+
git_signature_free(author);
29+
}
30+
31+
if (expected_committer) {
32+
git_signature *committer;
33+
cl_git_pass(git_signature_from_buffer(&committer, expected_committer));
34+
cl_assert_equal_s(committer->name, commit->committer->name);
35+
cl_assert_equal_s(committer->email, commit->committer->email);
36+
cl_assert_equal_i(committer->when.time, commit->committer->when.time);
37+
cl_assert_equal_i(committer->when.offset, commit->committer->when.offset);
38+
cl_assert_equal_i(committer->when.sign, commit->committer->when.sign);
39+
git_signature_free(committer);
40+
}
41+
42+
if (expected_encoding)
43+
cl_assert_equal_s(commit->message_encoding, expected_encoding);
44+
else
45+
cl_assert_equal_p(commit->message_encoding, NULL);
46+
47+
if (expected_message)
48+
cl_assert_equal_s(commit->raw_message, expected_message);
49+
else
50+
cl_assert_equal_p(commit->message_encoding, NULL);
51+
52+
if (expected_treeid) {
53+
git_oid tree_oid;
54+
cl_git_pass(git_oid_fromstr(&tree_oid, expected_treeid));
55+
cl_assert_equal_oid(&tree_oid, &commit->tree_id);
56+
}
57+
58+
cl_assert_equal_i(commit->parent_ids.size, expected_parents);
59+
60+
git_object__free(&commit->object);
61+
}
62+
63+
static void assert_commit_fails(const char *data, size_t datalen)
64+
{
65+
git_object *object;
66+
if (!datalen)
67+
datalen = strlen(data);
68+
cl_git_fail(git_object__from_raw(&object, data, datalen, GIT_OBJ_COMMIT));
69+
}
70+
71+
void test_object_commit_parse__parsing_commit_succeeds(void)
72+
{
73+
const char *commit =
74+
"tree 3e7ac388cadacccdf1c6c5f3445895b71d9cb0f8\n"
75+
"author Author <author@example.com>\n"
76+
"committer Committer <committer@example.com>\n"
77+
"encoding Encoding\n"
78+
"\n"
79+
"Message";
80+
assert_commit_parses(commit, 0,
81+
"3e7ac388cadacccdf1c6c5f3445895b71d9cb0f8",
82+
"Author <author@example.com>",
83+
"Committer <committer@example.com>",
84+
"Encoding",
85+
"Message", 0);
86+
}
87+
88+
void test_object_commit_parse__parsing_commit_without_encoding_succeeds(void)
89+
{
90+
const char *commit =
91+
"tree 3e7ac388cadacccdf1c6c5f3445895b71d9cb0f8\n"
92+
"author Author <author@example.com>\n"
93+
"committer Committer <committer@example.com>\n"
94+
"\n"
95+
"Message";
96+
assert_commit_parses(commit, 0,
97+
"3e7ac388cadacccdf1c6c5f3445895b71d9cb0f8",
98+
"Author <author@example.com>",
99+
"Committer <committer@example.com>",
100+
NULL,
101+
"Message", 0);
102+
}
103+
104+
void test_object_commit_parse__parsing_commit_with_multiple_authors_succeeds(void)
105+
{
106+
const char *commit =
107+
"tree 3e7ac388cadacccdf1c6c5f3445895b71d9cb0f8\n"
108+
"author Author1 <author@example.com>\n"
109+
"author Author2 <author@example.com>\n"
110+
"author Author3 <author@example.com>\n"
111+
"author Author4 <author@example.com>\n"
112+
"committer Committer <committer@example.com>\n"
113+
"\n"
114+
"Message";
115+
assert_commit_parses(commit, 0,
116+
"3e7ac388cadacccdf1c6c5f3445895b71d9cb0f8",
117+
"Author1 <author@example.com>",
118+
"Committer <committer@example.com>",
119+
NULL,
120+
"Message", 0);
121+
}
122+
123+
void test_object_commit_parse__parsing_commit_with_multiple_committers_succeeds(void)
124+
{
125+
const char *commit =
126+
"tree 3e7ac388cadacccdf1c6c5f3445895b71d9cb0f8\n"
127+
"author Author <author@example.com>\n"
128+
"committer Committer1 <committer@example.com>\n"
129+
"committer Committer2 <committer@example.com>\n"
130+
"committer Committer3 <committer@example.com>\n"
131+
"committer Committer4 <committer@example.com>\n"
132+
"\n"
133+
"Message";
134+
assert_commit_parses(commit, 0,
135+
"3e7ac388cadacccdf1c6c5f3445895b71d9cb0f8",
136+
"Author <author@example.com>",
137+
"Committer1 <committer@example.com>",
138+
NULL,
139+
"Message", 0);
140+
}
141+
142+
void test_object_commit_parse__parsing_commit_without_message_succeeds(void)
143+
{
144+
const char *commit =
145+
"tree 3e7ac388cadacccdf1c6c5f3445895b71d9cb0f8\n"
146+
"author Author <author@example.com>\n"
147+
"committer Committer <committer@example.com>\n";
148+
assert_commit_parses(commit, 0,
149+
"3e7ac388cadacccdf1c6c5f3445895b71d9cb0f8",
150+
"Author <author@example.com>",
151+
"Committer <committer@example.com>",
152+
NULL,
153+
"", 0);
154+
}
155+
156+
void test_object_commit_parse__parsing_commit_with_unknown_fields_succeeds(void)
157+
{
158+
const char *commit =
159+
"tree 3e7ac388cadacccdf1c6c5f3445895b71d9cb0f8\n"
160+
"author Author <author@example.com>\n"
161+
"committer Committer <committer@example.com>\n"
162+
"foo bar\n"
163+
"more garbage\n"
164+
"\n"
165+
"Message";
166+
assert_commit_parses(commit, 0,
167+
"3e7ac388cadacccdf1c6c5f3445895b71d9cb0f8",
168+
"Author <author@example.com>",
169+
"Committer <committer@example.com>",
170+
NULL,
171+
"Message", 0);
172+
}
173+
174+
void test_object_commit_parse__parsing_commit_with_invalid_tree_fails(void)
175+
{
176+
const char *commit =
177+
"tree 3e7ac388cadacccdf1xxx5f3445895b71d9cb0f8\n"
178+
"author Author <author@example.com>\n"
179+
"committer Committer <committer@example.com>\n"
180+
"\n"
181+
"Message";
182+
assert_commit_fails(commit, 0);
183+
}
184+
185+
void test_object_commit_parse__parsing_commit_without_tree_fails(void)
186+
{
187+
const char *commit =
188+
"author Author <author@example.com>\n"
189+
"committer Committer <committer@example.com>\n"
190+
"\n"
191+
"Message";
192+
assert_commit_fails(commit, 0);
193+
}
194+
195+
void test_object_commit_parse__parsing_commit_without_author_fails(void)
196+
{
197+
const char *commit =
198+
"tree 3e7ac388cadacccdf1c6c5f3445895b71d9cb0f8\n"
199+
"committer Committer <committer@example.com>\n"
200+
"\n"
201+
"Message";
202+
assert_commit_fails(commit, 0);
203+
}
204+
205+
void test_object_commit_parse__parsing_commit_without_committer_fails(void)
206+
{
207+
const char *commit =
208+
"tree 3e7ac388cadacccdf1c6c5f3445895b71d9cb0f8\n"
209+
"author Author <author@example.com>\n"
210+
"\n"
211+
"Message";
212+
assert_commit_fails(commit, 0);
213+
}
214+
215+
void test_object_commit_parse__parsing_encoding_will_not_cause_oob_read(void)
216+
{
217+
const char *commit =
218+
"tree 3e7ac388cadacccdf1c6c5f3445895b71d9cb0f8\n"
219+
"author <>\n"
220+
"committer <>\n"
221+
"encoding foo\n";
222+
/*
223+
* As we ignore unknown fields, the cut-off encoding field will be
224+
* parsed just fine.
225+
*/
226+
assert_commit_parses(commit, strlen(commit) - strlen("ncoding foo\n"),
227+
"3e7ac388cadacccdf1c6c5f3445895b71d9cb0f8",
228+
"<>",
229+
"<>",
230+
NULL,
231+
"", 0);
232+
}

0 commit comments

Comments
 (0)