Skip to content

Commit 83e8a6b

Browse files
committed
util: provide git__memmem function
Unfortunately, neither the `memmem` nor the `strnstr` functions are part of any C standard but are merely extensions of C that are implemented by e.g. glibc. Thus, there is no standardized way to search for a string in a block of memory with a limited size, and using `strstr` is to be considered unsafe in case where the buffer has not been sanitized. In fact, there are some uses of `strstr` in exactly that unsafe way in our codebase. Provide a new function `git__memmem` that implements the `memmem` semantics. That is in a given haystack of `n` bytes, search for the occurrence of a byte sequence of `m` bytes and return a pointer to the first occurrence. The implementation chosen is the "Not So Naive" algorithm from [1]. It was chosen as the implementation is comparably simple while still being reasonably efficient in most cases. Preprocessing happens in constant time and space, searching has a time complexity of O(n*m) with a slightly sub-linear average case. [1]: http://www-igm.univ-mlv.fr/~lecroq/string/
1 parent f010b66 commit 83e8a6b

File tree

3 files changed

+90
-0
lines changed

3 files changed

+90
-0
lines changed

src/util.c

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,47 @@ size_t git__linenlen(const char *buffer, size_t buffer_len)
355355
return nl ? (size_t)(nl - buffer) + 1 : buffer_len;
356356
}
357357

358+
/*
359+
* Adapted Not So Naive algorithm from http://www-igm.univ-mlv.fr/~lecroq/string/
360+
*/
361+
const void * git__memmem(const void *haystack, size_t haystacklen,
362+
const void *needle, size_t needlelen)
363+
{
364+
const char *h, *n;
365+
size_t j, k, l;
366+
367+
if (needlelen > haystacklen || !haystacklen || !needlelen)
368+
return NULL;
369+
370+
h = (const char *) haystack,
371+
n = (const char *) needle;
372+
373+
if (needlelen == 1)
374+
return memchr(haystack, *n, haystacklen);
375+
376+
if (n[0] == n[1]) {
377+
k = 2;
378+
l = 1;
379+
} else {
380+
k = 1;
381+
l = 2;
382+
}
383+
384+
j = 0;
385+
while (j <= haystacklen - needlelen) {
386+
if (n[1] != h[j + 1]) {
387+
j += k;
388+
} else {
389+
if (memcmp(n + 2, h + j + 2, needlelen - 2) == 0 &&
390+
n[0] == h[j])
391+
return h + j;
392+
j += l;
393+
}
394+
}
395+
396+
return NULL;
397+
}
398+
358399
void git__hexdump(const char *buffer, size_t len)
359400
{
360401
static const size_t LINE_WIDTH = 16;

src/util.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,9 @@ GIT_INLINE(const void *) git__memrchr(const void *s, int c, size_t n)
113113
return NULL;
114114
}
115115

116+
extern const void * git__memmem(const void *haystack, size_t haystacklen,
117+
const void *needle, size_t needlelen);
118+
116119
typedef int (*git__tsort_cmp)(const void *a, const void *b);
117120

118121
extern void git__tsort(void **dst, size_t size, git__tsort_cmp cmp);

tests/core/memmem.c

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#include "clar_libgit2.h"
2+
3+
static void assert_found(const char *haystack, const char *needle, size_t expected_pos)
4+
{
5+
cl_assert_equal_p(git__memmem(haystack, haystack ? strlen(haystack) : 0,
6+
needle, needle ? strlen(needle) : 0),
7+
haystack + expected_pos);
8+
}
9+
10+
static void assert_absent(const char *haystack, const char *needle)
11+
{
12+
cl_assert_equal_p(git__memmem(haystack, haystack ? strlen(haystack) : 0,
13+
needle, needle ? strlen(needle) : 0),
14+
NULL);
15+
}
16+
17+
void test_core_memmem__found(void)
18+
{
19+
assert_found("a", "a", 0);
20+
assert_found("ab", "a", 0);
21+
assert_found("ba", "a", 1);
22+
assert_found("aa", "a", 0);
23+
assert_found("aab", "aa", 0);
24+
assert_found("baa", "aa", 1);
25+
assert_found("dabc", "abc", 1);
26+
assert_found("abababc", "abc", 4);
27+
}
28+
29+
void test_core_memmem__absent(void)
30+
{
31+
assert_absent("a", "b");
32+
assert_absent("a", "aa");
33+
assert_absent("ba", "ab");
34+
assert_absent("ba", "ab");
35+
assert_absent("abc", "abcd");
36+
assert_absent("abcabcabc", "bcac");
37+
}
38+
39+
void test_core_memmem__edgecases(void)
40+
{
41+
assert_absent(NULL, NULL);
42+
assert_absent("a", NULL);
43+
assert_absent(NULL, "a");
44+
assert_absent("", "a");
45+
assert_absent("a", "");
46+
}

0 commit comments

Comments
 (0)