Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(6)

Side by Side Diff: fusl/src/regex/fnmatch.c

Issue 1714623002: [fusl] clang-format fusl (Closed) Base URL: git@github.com:domokit/mojo.git@master
Patch Set: headers too Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * An implementation of what I call the "Sea of Stars" algorithm for 2 * An implementation of what I call the "Sea of Stars" algorithm for
3 * POSIX fnmatch(). The basic idea is that we factor the pattern into 3 * POSIX fnmatch(). The basic idea is that we factor the pattern into
4 * a head component (which we match first and can reject without ever 4 * a head component (which we match first and can reject without ever
5 * measuring the length of the string), an optional tail component 5 * measuring the length of the string), an optional tail component
6 * (which only exists if the pattern contains at least one star), and 6 * (which only exists if the pattern contains at least one star), and
7 * an optional "sea of stars", a set of star-separated components 7 * an optional "sea of stars", a set of star-separated components
8 * between the head and tail. After the head and tail matches have 8 * between the head and tail. After the head and tail matches have
9 * been removed from the input string, the components in the "sea of 9 * been removed from the input string, the components in the "sea of
10 * stars" are matched sequentially by searching for their first 10 * stars" are matched sequentially by searching for their first
11 * occurrence past the end of the previous match. 11 * occurrence past the end of the previous match.
12 * 12 *
13 * - Rich Felker, April 2012 13 * - Rich Felker, April 2012
14 */ 14 */
15 15
16 #include <string.h> 16 #include <string.h>
17 #include <fnmatch.h> 17 #include <fnmatch.h>
18 #include <stdlib.h> 18 #include <stdlib.h>
19 #include <wchar.h> 19 #include <wchar.h>
20 #include <wctype.h> 20 #include <wctype.h>
21 #include "locale_impl.h" 21 #include "locale_impl.h"
22 22
23 #define END 0 23 #define END 0
24 #define UNMATCHABLE -2 24 #define UNMATCHABLE -2
25 #define BRACKET -3 25 #define BRACKET -3
26 #define QUESTION -4 26 #define QUESTION -4
27 #define STAR -5 27 #define STAR -5
28 28
29 static int str_next(const char *str, size_t n, size_t *step) 29 static int str_next(const char* str, size_t n, size_t* step) {
30 { 30 if (!n) {
31 » if (!n) { 31 *step = 0;
32 » » *step = 0; 32 return 0;
33 » » return 0; 33 }
34 » } 34 if (str[0] >= 128U) {
35 » if (str[0] >= 128U) { 35 wchar_t wc;
36 » » wchar_t wc; 36 int k = mbtowc(&wc, str, n);
37 » » int k = mbtowc(&wc, str, n); 37 if (k < 0) {
38 » » if (k<0) { 38 *step = 1;
39 » » » *step = 1; 39 return -1;
40 » » » return -1; 40 }
41 » » } 41 *step = k;
42 » » *step = k; 42 return wc;
43 » » return wc; 43 }
44 » } 44 *step = 1;
45 » *step = 1; 45 return str[0];
46 » return str[0]; 46 }
47 } 47
48 48 static int pat_next(const char* pat, size_t m, size_t* step, int flags) {
49 static int pat_next(const char *pat, size_t m, size_t *step, int flags) 49 int esc = 0;
50 { 50 if (!m || !*pat) {
51 » int esc = 0; 51 *step = 0;
52 » if (!m || !*pat) { 52 return END;
53 » » *step = 0; 53 }
54 » » return END; 54 *step = 1;
55 » } 55 if (pat[0] == '\\' && pat[1] && !(flags & FNM_NOESCAPE)) {
56 » *step = 1; 56 *step = 2;
57 » if (pat[0]=='\\' && pat[1] && !(flags & FNM_NOESCAPE)) { 57 pat++;
58 » » *step = 2; 58 esc = 1;
59 » » pat++; 59 goto escaped;
60 » » esc = 1; 60 }
61 » » goto escaped; 61 if (pat[0] == '[') {
62 » } 62 size_t k = 1;
63 » if (pat[0]=='[') { 63 if (k < m)
64 » » size_t k = 1; 64 if (pat[k] == '^' || pat[k] == '!')
65 » » if (k<m) if (pat[k] == '^' || pat[k] == '!') k++; 65 k++;
66 » » if (k<m) if (pat[k] == ']') k++; 66 if (k < m)
67 » » for (; k<m && pat[k] && pat[k]!=']'; k++) { 67 if (pat[k] == ']')
68 » » » if (k+1<m && pat[k+1] && pat[k]=='[' && (pat[k+1]==':' | | pat[k+1]=='.' || pat[k+1]=='=')) { 68 k++;
69 » » » » int z = pat[k+1]; 69 for (; k < m && pat[k] && pat[k] != ']'; k++) {
70 » » » » k+=2; 70 if (k + 1 < m && pat[k + 1] && pat[k] == '[' &&
71 » » » » if (k<m && pat[k]) k++; 71 (pat[k + 1] == ':' || pat[k + 1] == '.' || pat[k + 1] == '=')) {
72 » » » » while (k<m && pat[k] && (pat[k-1]!=z || pat[k]!= ']')) k++; 72 int z = pat[k + 1];
73 » » » » if (k==m || !pat[k]) break; 73 k += 2;
74 » » » } 74 if (k < m && pat[k])
75 » » } 75 k++;
76 » » if (k==m || !pat[k]) { 76 while (k < m && pat[k] && (pat[k - 1] != z || pat[k] != ']'))
77 » » » *step = 1; 77 k++;
78 » » » return '['; 78 if (k == m || !pat[k])
79 » » } 79 break;
80 » » *step = k+1; 80 }
81 » » return BRACKET; 81 }
82 » } 82 if (k == m || !pat[k]) {
83 » if (pat[0] == '*') 83 *step = 1;
84 » » return STAR; 84 return '[';
85 » if (pat[0] == '?') 85 }
86 » » return QUESTION; 86 *step = k + 1;
87 return BRACKET;
88 }
89 if (pat[0] == '*')
90 return STAR;
91 if (pat[0] == '?')
92 return QUESTION;
87 escaped: 93 escaped:
88 if (pat[0] >= 128U) { 94 if (pat[0] >= 128U) {
89 wchar_t wc; 95 wchar_t wc;
90 int k = mbtowc(&wc, pat, m); 96 int k = mbtowc(&wc, pat, m);
91 if (k<0) { 97 if (k < 0) {
92 *step = 0; 98 *step = 0;
93 return UNMATCHABLE; 99 return UNMATCHABLE;
94 } 100 }
95 *step = k + esc; 101 *step = k + esc;
96 return wc; 102 return wc;
97 } 103 }
98 return pat[0]; 104 return pat[0];
99 } 105 }
100 106
101 static int casefold(int k) 107 static int casefold(int k) {
102 { 108 int c = towupper(k);
103 int c = towupper(k); 109 return c == k ? towlower(k) : c;
104 return c == k ? towlower(k) : c; 110 }
105 } 111
106 112 static int match_bracket(const char* p, int k, int kfold) {
107 static int match_bracket(const char *p, int k, int kfold) 113 wchar_t wc;
108 { 114 int inv = 0;
109 wchar_t wc; 115 p++;
110 int inv = 0; 116 if (*p == '^' || *p == '!') {
111 p++; 117 inv = 1;
112 if (*p=='^' || *p=='!') { 118 p++;
113 inv = 1; 119 }
114 p++; 120 if (*p == ']') {
115 } 121 if (k == ']')
116 if (*p==']') { 122 return !inv;
117 if (k==']') return !inv; 123 p++;
118 p++; 124 } else if (*p == '-') {
119 } else if (*p=='-') { 125 if (k == '-')
120 if (k=='-') return !inv; 126 return !inv;
121 p++; 127 p++;
122 } 128 }
123 wc = p[-1]; 129 wc = p[-1];
124 for (; *p != ']'; p++) { 130 for (; *p != ']'; p++) {
125 if (p[0]=='-' && p[1]!=']') { 131 if (p[0] == '-' && p[1] != ']') {
126 wchar_t wc2; 132 wchar_t wc2;
127 int l = mbtowc(&wc2, p+1, 4); 133 int l = mbtowc(&wc2, p + 1, 4);
128 if (l < 0) return 0; 134 if (l < 0)
129 if (wc <= wc2) 135 return 0;
130 if ((unsigned)k-wc <= wc2-wc || 136 if (wc <= wc2)
131 (unsigned)kfold-wc <= wc2-wc) 137 if ((unsigned)k - wc <= wc2 - wc || (unsigned)kfold - wc <= wc2 - wc)
132 return !inv; 138 return !inv;
133 p += l-1; 139 p += l - 1;
134 continue; 140 continue;
135 } 141 }
136 if (p[0]=='[' && (p[1]==':' || p[1]=='.' || p[1]=='=')) { 142 if (p[0] == '[' && (p[1] == ':' || p[1] == '.' || p[1] == '=')) {
137 const char *p0 = p+2; 143 const char* p0 = p + 2;
138 int z = p[1]; 144 int z = p[1];
139 p+=3; 145 p += 3;
140 while (p[-1]!=z || p[0]!=']') p++; 146 while (p[-1] != z || p[0] != ']')
141 if (z == ':' && p-1-p0 < 16) { 147 p++;
142 char buf[16]; 148 if (z == ':' && p - 1 - p0 < 16) {
143 memcpy(buf, p0, p-1-p0); 149 char buf[16];
144 buf[p-1-p0] = 0; 150 memcpy(buf, p0, p - 1 - p0);
145 if (iswctype(k, wctype(buf)) || 151 buf[p - 1 - p0] = 0;
146 iswctype(kfold, wctype(buf))) 152 if (iswctype(k, wctype(buf)) || iswctype(kfold, wctype(buf)))
147 return !inv; 153 return !inv;
148 } 154 }
149 continue; 155 continue;
150 } 156 }
151 if (*p < 128U) { 157 if (*p < 128U) {
152 wc = (unsigned char)*p; 158 wc = (unsigned char)*p;
153 } else { 159 } else {
154 int l = mbtowc(&wc, p, 4); 160 int l = mbtowc(&wc, p, 4);
155 if (l < 0) return 0; 161 if (l < 0)
156 p += l-1; 162 return 0;
157 } 163 p += l - 1;
158 if (wc==k || wc==kfold) return !inv; 164 }
159 } 165 if (wc == k || wc == kfold)
160 return inv; 166 return !inv;
161 } 167 }
162 168 return inv;
163 static int fnmatch_internal(const char *pat, size_t m, const char *str, size_t n , int flags) 169 }
164 { 170
165 const char *p, *ptail, *endpat; 171 static int fnmatch_internal(const char* pat,
166 const char *s, *stail, *endstr; 172 size_t m,
167 size_t pinc, sinc, tailcnt=0; 173 const char* str,
168 int c, k, kfold; 174 size_t n,
169 175 int flags) {
170 if (flags & FNM_PERIOD) { 176 const char *p, *ptail, *endpat;
171 if (*str == '.' && *pat != '.') 177 const char *s, *stail, *endstr;
172 return FNM_NOMATCH; 178 size_t pinc, sinc, tailcnt = 0;
173 } 179 int c, k, kfold;
174 for (;;) { 180
175 switch ((c = pat_next(pat, m, &pinc, flags))) { 181 if (flags & FNM_PERIOD) {
176 case UNMATCHABLE: 182 if (*str == '.' && *pat != '.')
177 return FNM_NOMATCH; 183 return FNM_NOMATCH;
178 case STAR: 184 }
179 pat++; 185 for (;;) {
180 m--; 186 switch ((c = pat_next(pat, m, &pinc, flags))) {
181 break; 187 case UNMATCHABLE:
182 default: 188 return FNM_NOMATCH;
183 k = str_next(str, n, &sinc); 189 case STAR:
184 if (k <= 0) 190 pat++;
185 return (c==END) ? 0 : FNM_NOMATCH; 191 m--;
186 str += sinc; 192 break;
187 n -= sinc; 193 default:
188 kfold = flags & FNM_CASEFOLD ? casefold(k) : k; 194 k = str_next(str, n, &sinc);
189 if (c == BRACKET) { 195 if (k <= 0)
190 if (!match_bracket(pat, k, kfold)) 196 return (c == END) ? 0 : FNM_NOMATCH;
191 return FNM_NOMATCH; 197 str += sinc;
192 } else if (c != QUESTION && k != c && kfold != c) { 198 n -= sinc;
193 return FNM_NOMATCH; 199 kfold = flags & FNM_CASEFOLD ? casefold(k) : k;
194 } 200 if (c == BRACKET) {
195 pat+=pinc; 201 if (!match_bracket(pat, k, kfold))
196 m-=pinc; 202 return FNM_NOMATCH;
197 continue; 203 } else if (c != QUESTION && k != c && kfold != c) {
198 } 204 return FNM_NOMATCH;
199 break; 205 }
200 } 206 pat += pinc;
201 207 m -= pinc;
202 /* Compute real pat length if it was initially unknown/-1 */ 208 continue;
203 m = strnlen(pat, m); 209 }
204 endpat = pat + m; 210 break;
205 211 }
206 /* Find the last * in pat and count chars needed after it */ 212
207 for (p=ptail=pat; p<endpat; p+=pinc) { 213 /* Compute real pat length if it was initially unknown/-1 */
208 switch (pat_next(p, endpat-p, &pinc, flags)) { 214 m = strnlen(pat, m);
209 case UNMATCHABLE: 215 endpat = pat + m;
210 return FNM_NOMATCH; 216
211 case STAR: 217 /* Find the last * in pat and count chars needed after it */
212 tailcnt=0; 218 for (p = ptail = pat; p < endpat; p += pinc) {
213 ptail = p+1; 219 switch (pat_next(p, endpat - p, &pinc, flags)) {
214 break; 220 case UNMATCHABLE:
215 default: 221 return FNM_NOMATCH;
216 tailcnt++; 222 case STAR:
217 break; 223 tailcnt = 0;
218 } 224 ptail = p + 1;
219 } 225 break;
220 226 default:
221 /* Past this point we need not check for UNMATCHABLE in pat, 227 tailcnt++;
222 * because all of pat has already been parsed once. */ 228 break;
223 229 }
224 /* Compute real str length if it was initially unknown/-1 */ 230 }
225 n = strnlen(str, n); 231
226 endstr = str + n; 232 /* Past this point we need not check for UNMATCHABLE in pat,
227 if (n < tailcnt) return FNM_NOMATCH; 233 * because all of pat has already been parsed once. */
228 234
229 /* Find the final tailcnt chars of str, accounting for UTF-8. 235 /* Compute real str length if it was initially unknown/-1 */
230 * On illegal sequences we may get it wrong, but in that case 236 n = strnlen(str, n);
231 * we necessarily have a matching failure anyway. */ 237 endstr = str + n;
232 for (s=endstr; s>str && tailcnt; tailcnt--) { 238 if (n < tailcnt)
233 if (s[-1] < 128U || MB_CUR_MAX==1) s--; 239 return FNM_NOMATCH;
234 else while ((unsigned char)*--s-0x80U<0x40 && s>str); 240
235 } 241 /* Find the final tailcnt chars of str, accounting for UTF-8.
236 if (tailcnt) return FNM_NOMATCH; 242 * On illegal sequences we may get it wrong, but in that case
237 stail = s; 243 * we necessarily have a matching failure anyway. */
238 244 for (s = endstr; s > str && tailcnt; tailcnt--) {
239 /* Check that the pat and str tails match */ 245 if (s[-1] < 128U || MB_CUR_MAX == 1)
240 p = ptail; 246 s--;
241 for (;;) { 247 else
242 c = pat_next(p, endpat-p, &pinc, flags); 248 while ((unsigned char)*--s - 0x80U < 0x40 && s > str)
243 p += pinc; 249 ;
244 if ((k = str_next(s, endstr-s, &sinc)) <= 0) { 250 }
245 if (c != END) return FNM_NOMATCH; 251 if (tailcnt)
246 break; 252 return FNM_NOMATCH;
247 } 253 stail = s;
248 s += sinc; 254
249 kfold = flags & FNM_CASEFOLD ? casefold(k) : k; 255 /* Check that the pat and str tails match */
250 if (c == BRACKET) { 256 p = ptail;
251 if (!match_bracket(p-pinc, k, kfold)) 257 for (;;) {
252 return FNM_NOMATCH; 258 c = pat_next(p, endpat - p, &pinc, flags);
253 } else if (c != QUESTION && k != c && kfold != c) { 259 p += pinc;
254 return FNM_NOMATCH; 260 if ((k = str_next(s, endstr - s, &sinc)) <= 0) {
255 } 261 if (c != END)
256 } 262 return FNM_NOMATCH;
257 263 break;
258 /* We're all done with the tails now, so throw them out */ 264 }
259 endstr = stail; 265 s += sinc;
260 endpat = ptail; 266 kfold = flags & FNM_CASEFOLD ? casefold(k) : k;
261 267 if (c == BRACKET) {
262 /* Match pattern components until there are none left */ 268 if (!match_bracket(p - pinc, k, kfold))
263 while (pat<endpat) { 269 return FNM_NOMATCH;
264 p = pat; 270 } else if (c != QUESTION && k != c && kfold != c) {
265 s = str; 271 return FNM_NOMATCH;
266 for (;;) { 272 }
267 c = pat_next(p, endpat-p, &pinc, flags); 273 }
268 p += pinc; 274
269 /* Encountering * completes/commits a component */ 275 /* We're all done with the tails now, so throw them out */
270 if (c == STAR) { 276 endstr = stail;
271 pat = p; 277 endpat = ptail;
272 str = s; 278
273 break; 279 /* Match pattern components until there are none left */
274 } 280 while (pat < endpat) {
275 k = str_next(s, endstr-s, &sinc); 281 p = pat;
276 if (!k) 282 s = str;
277 return FNM_NOMATCH; 283 for (;;) {
278 kfold = flags & FNM_CASEFOLD ? casefold(k) : k; 284 c = pat_next(p, endpat - p, &pinc, flags);
279 if (c == BRACKET) { 285 p += pinc;
280 if (!match_bracket(p-pinc, k, kfold)) 286 /* Encountering * completes/commits a component */
281 break; 287 if (c == STAR) {
282 } else if (c != QUESTION && k != c && kfold != c) { 288 pat = p;
283 break; 289 str = s;
284 } 290 break;
285 s += sinc; 291 }
286 } 292 k = str_next(s, endstr - s, &sinc);
287 if (c == STAR) continue; 293 if (!k)
288 /* If we failed, advance str, by 1 char if it's a valid 294 return FNM_NOMATCH;
289 * char, or past all invalid bytes otherwise. */ 295 kfold = flags & FNM_CASEFOLD ? casefold(k) : k;
290 k = str_next(str, endstr-str, &sinc); 296 if (c == BRACKET) {
291 if (k > 0) str += sinc; 297 if (!match_bracket(p - pinc, k, kfold))
292 else for (str++; str_next(str, endstr-str, &sinc)<0; str++); 298 break;
293 } 299 } else if (c != QUESTION && k != c && kfold != c) {
294 300 break;
295 return 0; 301 }
296 } 302 s += sinc;
297 303 }
298 int fnmatch(const char *pat, const char *str, int flags) 304 if (c == STAR)
299 { 305 continue;
300 const char *s, *p; 306 /* If we failed, advance str, by 1 char if it's a valid
301 size_t inc; 307 * char, or past all invalid bytes otherwise. */
302 int c; 308 k = str_next(str, endstr - str, &sinc);
303 if (flags & FNM_PATHNAME) for (;;) { 309 if (k > 0)
304 for (s=str; *s && *s!='/'; s++); 310 str += sinc;
305 for (p=pat; (c=pat_next(p, -1, &inc, flags))!=END && c!='/'; p+= inc); 311 else
306 if (c!=*s && (!*s || !(flags & FNM_LEADING_DIR))) 312 for (str++; str_next(str, endstr - str, &sinc) < 0; str++)
307 return FNM_NOMATCH; 313 ;
308 if (fnmatch_internal(pat, p-pat, str, s-str, flags)) 314 }
309 return FNM_NOMATCH; 315
310 if (!c) return 0; 316 return 0;
311 str = s+1; 317 }
312 pat = p+inc; 318
313 } else if (flags & FNM_LEADING_DIR) { 319 int fnmatch(const char* pat, const char* str, int flags) {
314 for (s=str; *s; s++) { 320 const char *s, *p;
315 if (*s != '/') continue; 321 size_t inc;
316 if (!fnmatch_internal(pat, -1, str, s-str, flags)) 322 int c;
317 return 0; 323 if (flags & FNM_PATHNAME)
318 } 324 for (;;) {
319 } 325 for (s = str; *s && *s != '/'; s++)
320 return fnmatch_internal(pat, -1, str, -1, flags); 326 ;
321 } 327 for (p = pat; (c = pat_next(p, -1, &inc, flags)) != END && c != '/';
328 p += inc)
329 ;
330 if (c != *s && (!*s || !(flags & FNM_LEADING_DIR)))
331 return FNM_NOMATCH;
332 if (fnmatch_internal(pat, p - pat, str, s - str, flags))
333 return FNM_NOMATCH;
334 if (!c)
335 return 0;
336 str = s + 1;
337 pat = p + inc;
338 }
339 else if (flags & FNM_LEADING_DIR) {
340 for (s = str; *s; s++) {
341 if (*s != '/')
342 continue;
343 if (!fnmatch_internal(pat, -1, str, s - str, flags))
344 return 0;
345 }
346 }
347 return fnmatch_internal(pat, -1, str, -1, flags);
348 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698