Skip to content

Commit a9f5762

Browse files
committed
wildmatch: import wildmatch from git.git
In commit 70a8fc999d (stop using fnmatch (either native or compat), 2014-02-15), upstream git has switched over all code from their internal fnmatch copy to its new wildmatch code. We haven't followed suit, and thus have developed some incompatibilities in how we match regular expressions. Import git's wildmatch from v2.22.0 and add a test suite based on their t3070-wildmatch.sh tests.
1 parent c3179ef commit a9f5762

File tree

4 files changed

+619
-0
lines changed

4 files changed

+619
-0
lines changed

COPYING

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -991,3 +991,31 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
991991
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
992992
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
993993
SOFTWARE.
994+
995+
----------------------------------------------------------------------
996+
997+
The bundled wildmatch code is licensed under the BSD license:
998+
999+
Copyright Rich Salz.
1000+
All rights reserved.
1001+
1002+
Redistribution and use in any form are permitted provided that the
1003+
following restrictions are are met:
1004+
1005+
1. Source distributions must retain this entire copyright notice
1006+
and comment.
1007+
2. Binary distributions must include the acknowledgement ``This
1008+
product includes software developed by Rich Salz'' in the
1009+
documentation or other materials provided with the
1010+
distribution. This must not be represented as an endorsement
1011+
or promotion without specific prior written permission.
1012+
3. The origin of this software must not be misrepresented, either
1013+
by explicit claim or by omission. Credits must appear in the
1014+
source and documentation.
1015+
4. Altered versions must be plainly marked as such in the source
1016+
and documentation and must not be misrepresented as being the
1017+
original software.
1018+
1019+
THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
1020+
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
1021+
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.

src/wildmatch.c

Lines changed: 320 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,320 @@
1+
/*
2+
* Copyright (C) the libgit2 contributors. All rights reserved.
3+
*
4+
* This file is part of libgit2, distributed under the GNU GPL v2 with
5+
* a Linking Exception. For full terms see the included COPYING file.
6+
*
7+
* Do shell-style pattern matching for ?, \, [], and * characters.
8+
* It is 8bit clean.
9+
*
10+
* Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
11+
* Rich $alz is now <rsalz@bbn.com>.
12+
*
13+
* Modified by Wayne Davison to special-case '/' matching, to make '**'
14+
* work differently than '*', and to fix the character-class code.
15+
*
16+
* Imported from git.git.
17+
*/
18+
19+
#include "wildmatch.h"
20+
21+
#define GIT_SPACE 0x01
22+
#define GIT_DIGIT 0x02
23+
#define GIT_ALPHA 0x04
24+
#define GIT_GLOB_SPECIAL 0x08
25+
#define GIT_REGEX_SPECIAL 0x10
26+
#define GIT_PATHSPEC_MAGIC 0x20
27+
#define GIT_CNTRL 0x40
28+
#define GIT_PUNCT 0x80
29+
30+
enum {
31+
S = GIT_SPACE,
32+
A = GIT_ALPHA,
33+
D = GIT_DIGIT,
34+
G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */
35+
R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | */
36+
P = GIT_PATHSPEC_MAGIC, /* other non-alnum, except for ] and } */
37+
X = GIT_CNTRL,
38+
U = GIT_PUNCT,
39+
Z = GIT_CNTRL | GIT_SPACE
40+
};
41+
42+
static const unsigned char sane_ctype[256] = {
43+
X, X, X, X, X, X, X, X, X, Z, Z, X, X, Z, X, X, /* 0.. 15 */
44+
X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 16.. 31 */
45+
S, P, P, P, R, P, P, P, R, R, G, R, P, P, R, P, /* 32.. 47 */
46+
D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G, /* 48.. 63 */
47+
P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */
48+
A, A, A, A, A, A, A, A, A, A, A, G, G, U, R, P, /* 80.. 95 */
49+
P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */
50+
A, A, A, A, A, A, A, A, A, A, A, R, R, U, P, X, /* 112..127 */
51+
/* Nothing in the 128.. range */
52+
};
53+
54+
#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
55+
#define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
56+
57+
typedef unsigned char uchar;
58+
59+
/* What character marks an inverted character class? */
60+
#define NEGATE_CLASS '!'
61+
#define NEGATE_CLASS2 '^'
62+
63+
#define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \
64+
&& *(class) == *(litmatch) \
65+
&& strncmp((char*)class, litmatch, len) == 0)
66+
67+
#if defined STDC_HEADERS || !defined isascii
68+
# define ISASCII(c) 1
69+
#else
70+
# define ISASCII(c) isascii(c)
71+
#endif
72+
73+
#ifdef isblank
74+
# define ISBLANK(c) (ISASCII(c) && isblank(c))
75+
#else
76+
# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
77+
#endif
78+
79+
#ifdef isgraph
80+
# define ISGRAPH(c) (ISASCII(c) && isgraph(c))
81+
#else
82+
# define ISGRAPH(c) (ISASCII(c) && isprint(c) && !isspace(c))
83+
#endif
84+
85+
#define ISPRINT(c) (ISASCII(c) && isprint(c))
86+
#define ISDIGIT(c) (ISASCII(c) && isdigit(c))
87+
#define ISALNUM(c) (ISASCII(c) && isalnum(c))
88+
#define ISALPHA(c) (ISASCII(c) && isalpha(c))
89+
#define ISCNTRL(c) (ISASCII(c) && iscntrl(c))
90+
#define ISLOWER(c) (ISASCII(c) && islower(c))
91+
#define ISPUNCT(c) (ISASCII(c) && ispunct(c))
92+
#define ISSPACE(c) (ISASCII(c) && isspace(c))
93+
#define ISUPPER(c) (ISASCII(c) && isupper(c))
94+
#define ISXDIGIT(c) (ISASCII(c) && isxdigit(c))
95+
96+
/* Match pattern "p" against "text" */
97+
static int dowild(const uchar *p, const uchar *text, unsigned int flags)
98+
{
99+
uchar p_ch;
100+
const uchar *pattern = p;
101+
102+
for ( ; (p_ch = *p) != '\0'; text++, p++) {
103+
int matched, match_slash, negated;
104+
uchar t_ch, prev_ch;
105+
if ((t_ch = *text) == '\0' && p_ch != '*')
106+
return WM_ABORT_ALL;
107+
if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
108+
t_ch = tolower(t_ch);
109+
if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
110+
p_ch = tolower(p_ch);
111+
switch (p_ch) {
112+
case '\\':
113+
/* Literal match with following character. Note that the test
114+
* in "default" handles the p[1] == '\0' failure case. */
115+
p_ch = *++p;
116+
/* FALLTHROUGH */
117+
default:
118+
if (t_ch != p_ch)
119+
return WM_NOMATCH;
120+
continue;
121+
case '?':
122+
/* Match anything but '/'. */
123+
if ((flags & WM_PATHNAME) && t_ch == '/')
124+
return WM_NOMATCH;
125+
continue;
126+
case '*':
127+
if (*++p == '*') {
128+
const uchar *prev_p = p - 2;
129+
while (*++p == '*') {}
130+
if (!(flags & WM_PATHNAME))
131+
/* without WM_PATHNAME, '*' == '**' */
132+
match_slash = 1;
133+
else if ((prev_p < pattern || *prev_p == '/') &&
134+
(*p == '\0' || *p == '/' ||
135+
(p[0] == '\\' && p[1] == '/'))) {
136+
/*
137+
* Assuming we already match 'foo/' and are at
138+
* <star star slash>, just assume it matches
139+
* nothing and go ahead match the rest of the
140+
* pattern with the remaining string. This
141+
* helps make foo/<*><*>/bar (<> because
142+
* otherwise it breaks C comment syntax) match
143+
* both foo/bar and foo/a/bar.
144+
*/
145+
if (p[0] == '/' &&
146+
dowild(p + 1, text, flags) == WM_MATCH)
147+
return WM_MATCH;
148+
match_slash = 1;
149+
} else /* WM_PATHNAME is set */
150+
match_slash = 0;
151+
} else
152+
/* without WM_PATHNAME, '*' == '**' */
153+
match_slash = flags & WM_PATHNAME ? 0 : 1;
154+
if (*p == '\0') {
155+
/* Trailing "**" matches everything. Trailing "*" matches
156+
* only if there are no more slash characters. */
157+
if (!match_slash) {
158+
if (strchr((char*)text, '/') != NULL)
159+
return WM_NOMATCH;
160+
}
161+
return WM_MATCH;
162+
} else if (!match_slash && *p == '/') {
163+
/*
164+
* _one_ asterisk followed by a slash
165+
* with WM_PATHNAME matches the next
166+
* directory
167+
*/
168+
const char *slash = strchr((char*)text, '/');
169+
if (!slash)
170+
return WM_NOMATCH;
171+
text = (const uchar*)slash;
172+
/* the slash is consumed by the top-level for loop */
173+
break;
174+
}
175+
while (1) {
176+
if (t_ch == '\0')
177+
break;
178+
/*
179+
* Try to advance faster when an asterisk is
180+
* followed by a literal. We know in this case
181+
* that the string before the literal
182+
* must belong to "*".
183+
* If match_slash is false, do not look past
184+
* the first slash as it cannot belong to '*'.
185+
*/
186+
if (!is_glob_special(*p)) {
187+
p_ch = *p;
188+
if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
189+
p_ch = tolower(p_ch);
190+
while ((t_ch = *text) != '\0' &&
191+
(match_slash || t_ch != '/')) {
192+
if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
193+
t_ch = tolower(t_ch);
194+
if (t_ch == p_ch)
195+
break;
196+
text++;
197+
}
198+
if (t_ch != p_ch)
199+
return WM_NOMATCH;
200+
}
201+
if ((matched = dowild(p, text, flags)) != WM_NOMATCH) {
202+
if (!match_slash || matched != WM_ABORT_TO_STARSTAR)
203+
return matched;
204+
} else if (!match_slash && t_ch == '/')
205+
return WM_ABORT_TO_STARSTAR;
206+
t_ch = *++text;
207+
}
208+
return WM_ABORT_ALL;
209+
case '[':
210+
p_ch = *++p;
211+
#ifdef NEGATE_CLASS2
212+
if (p_ch == NEGATE_CLASS2)
213+
p_ch = NEGATE_CLASS;
214+
#endif
215+
/* Assign literal 1/0 because of "matched" comparison. */
216+
negated = p_ch == NEGATE_CLASS ? 1 : 0;
217+
if (negated) {
218+
/* Inverted character class. */
219+
p_ch = *++p;
220+
}
221+
prev_ch = 0;
222+
matched = 0;
223+
do {
224+
if (!p_ch)
225+
return WM_ABORT_ALL;
226+
if (p_ch == '\\') {
227+
p_ch = *++p;
228+
if (!p_ch)
229+
return WM_ABORT_ALL;
230+
if (t_ch == p_ch)
231+
matched = 1;
232+
} else if (p_ch == '-' && prev_ch && p[1] && p[1] != ']') {
233+
p_ch = *++p;
234+
if (p_ch == '\\') {
235+
p_ch = *++p;
236+
if (!p_ch)
237+
return WM_ABORT_ALL;
238+
}
239+
if (t_ch <= p_ch && t_ch >= prev_ch)
240+
matched = 1;
241+
else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch)) {
242+
uchar t_ch_upper = toupper(t_ch);
243+
if (t_ch_upper <= p_ch && t_ch_upper >= prev_ch)
244+
matched = 1;
245+
}
246+
p_ch = 0; /* This makes "prev_ch" get set to 0. */
247+
} else if (p_ch == '[' && p[1] == ':') {
248+
const uchar *s;
249+
int i;
250+
for (s = p += 2; (p_ch = *p) && p_ch != ']'; p++) {} /*SHARED ITERATOR*/
251+
if (!p_ch)
252+
return WM_ABORT_ALL;
253+
i = p - s - 1;
254+
if (i < 0 || p[-1] != ':') {
255+
/* Didn't find ":]", so treat like a normal set. */
256+
p = s - 2;
257+
p_ch = '[';
258+
if (t_ch == p_ch)
259+
matched = 1;
260+
continue;
261+
}
262+
if (CC_EQ(s,i, "alnum")) {
263+
if (ISALNUM(t_ch))
264+
matched = 1;
265+
} else if (CC_EQ(s,i, "alpha")) {
266+
if (ISALPHA(t_ch))
267+
matched = 1;
268+
} else if (CC_EQ(s,i, "blank")) {
269+
if (ISBLANK(t_ch))
270+
matched = 1;
271+
} else if (CC_EQ(s,i, "cntrl")) {
272+
if (ISCNTRL(t_ch))
273+
matched = 1;
274+
} else if (CC_EQ(s,i, "digit")) {
275+
if (ISDIGIT(t_ch))
276+
matched = 1;
277+
} else if (CC_EQ(s,i, "graph")) {
278+
if (ISGRAPH(t_ch))
279+
matched = 1;
280+
} else if (CC_EQ(s,i, "lower")) {
281+
if (ISLOWER(t_ch))
282+
matched = 1;
283+
} else if (CC_EQ(s,i, "print")) {
284+
if (ISPRINT(t_ch))
285+
matched = 1;
286+
} else if (CC_EQ(s,i, "punct")) {
287+
if (ISPUNCT(t_ch))
288+
matched = 1;
289+
} else if (CC_EQ(s,i, "space")) {
290+
if (ISSPACE(t_ch))
291+
matched = 1;
292+
} else if (CC_EQ(s,i, "upper")) {
293+
if (ISUPPER(t_ch))
294+
matched = 1;
295+
else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch))
296+
matched = 1;
297+
} else if (CC_EQ(s,i, "xdigit")) {
298+
if (ISXDIGIT(t_ch))
299+
matched = 1;
300+
} else /* malformed [:class:] string */
301+
return WM_ABORT_ALL;
302+
p_ch = 0; /* This makes "prev_ch" get set to 0. */
303+
} else if (t_ch == p_ch)
304+
matched = 1;
305+
} while (prev_ch = p_ch, (p_ch = *++p) != ']');
306+
if (matched == negated ||
307+
((flags & WM_PATHNAME) && t_ch == '/'))
308+
return WM_NOMATCH;
309+
continue;
310+
}
311+
}
312+
313+
return *text ? WM_NOMATCH : WM_MATCH;
314+
}
315+
316+
/* Match the "pattern" against the "text" string. */
317+
int wildmatch(const char *pattern, const char *text, unsigned int flags)
318+
{
319+
return dowild((const uchar*)pattern, (const uchar*)text, flags);
320+
}

src/wildmatch.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
/*
2+
* Copyright (C) the libgit2 contributors. All rights reserved.
3+
*
4+
* This file is part of libgit2, distributed under the GNU GPL v2 with
5+
* a Linking Exception. For full terms see the included COPYING file.
6+
*/
7+
8+
#ifndef INCLUDE_wildmatch_h__
9+
#define INCLUDE_wildmatch_h__
10+
11+
#include "common.h"
12+
13+
#define WM_CASEFOLD 1
14+
#define WM_PATHNAME 2
15+
16+
#define WM_NOMATCH 1
17+
#define WM_MATCH 0
18+
#define WM_ABORT_ALL -1
19+
#define WM_ABORT_TO_STARSTAR -2
20+
21+
int wildmatch(const char *pattern, const char *text, unsigned int flags);
22+
23+
#endif

0 commit comments

Comments
 (0)