OLD | NEW |
(Empty) | |
| 1 /* Copyright (C) 1991-1993, 1996-2006, 2009-2012 Free Software Foundation, Inc. |
| 2 This file is part of the GNU C Library. |
| 3 |
| 4 This program is free software; you can redistribute it and/or modify |
| 5 it under the terms of the GNU General Public License as published by |
| 6 the Free Software Foundation; either version 3, or (at your option) |
| 7 any later version. |
| 8 |
| 9 This program is distributed in the hope that it will be useful, |
| 10 but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 GNU General Public License for more details. |
| 13 |
| 14 You should have received a copy of the GNU General Public License |
| 15 along with this program; if not, see <http://www.gnu.org/licenses/>. */ |
| 16 |
| 17 /* Match STRING against the file name pattern PATTERN, returning zero if |
| 18 it matches, nonzero if not. */ |
| 19 static int EXT (INT opt, const CHAR *pattern, const CHAR *string, |
| 20 const CHAR *string_end, bool no_leading_period, int flags) |
| 21 internal_function; |
| 22 static const CHAR *END (const CHAR *patternp) internal_function; |
| 23 |
| 24 static int |
| 25 internal_function |
| 26 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, |
| 27 bool no_leading_period, int flags) |
| 28 { |
| 29 register const CHAR *p = pattern, *n = string; |
| 30 register UCHAR c; |
| 31 #ifdef _LIBC |
| 32 # if WIDE_CHAR_VERSION |
| 33 const char *collseq = (const char *) |
| 34 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC); |
| 35 # else |
| 36 const UCHAR *collseq = (const UCHAR *) |
| 37 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB); |
| 38 # endif |
| 39 #endif |
| 40 |
| 41 while ((c = *p++) != L_('\0')) |
| 42 { |
| 43 bool new_no_leading_period = false; |
| 44 c = FOLD (c); |
| 45 |
| 46 switch (c) |
| 47 { |
| 48 case L_('?'): |
| 49 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') |
| 50 { |
| 51 int res; |
| 52 |
| 53 res = EXT (c, p, n, string_end, no_leading_period, |
| 54 flags); |
| 55 if (res != -1) |
| 56 return res; |
| 57 } |
| 58 |
| 59 if (n == string_end) |
| 60 return FNM_NOMATCH; |
| 61 else if (*n == L_('/') && (flags & FNM_FILE_NAME)) |
| 62 return FNM_NOMATCH; |
| 63 else if (*n == L_('.') && no_leading_period) |
| 64 return FNM_NOMATCH; |
| 65 break; |
| 66 |
| 67 case L_('\\'): |
| 68 if (!(flags & FNM_NOESCAPE)) |
| 69 { |
| 70 c = *p++; |
| 71 if (c == L_('\0')) |
| 72 /* Trailing \ loses. */ |
| 73 return FNM_NOMATCH; |
| 74 c = FOLD (c); |
| 75 } |
| 76 if (n == string_end || FOLD ((UCHAR) *n) != c) |
| 77 return FNM_NOMATCH; |
| 78 break; |
| 79 |
| 80 case L_('*'): |
| 81 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') |
| 82 { |
| 83 int res; |
| 84 |
| 85 res = EXT (c, p, n, string_end, no_leading_period, |
| 86 flags); |
| 87 if (res != -1) |
| 88 return res; |
| 89 } |
| 90 |
| 91 if (n != string_end && *n == L_('.') && no_leading_period) |
| 92 return FNM_NOMATCH; |
| 93 |
| 94 for (c = *p++; c == L_('?') || c == L_('*'); c = *p++) |
| 95 { |
| 96 if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0) |
| 97 { |
| 98 const CHAR *endp = END (p); |
| 99 if (endp != p) |
| 100 { |
| 101 /* This is a pattern. Skip over it. */ |
| 102 p = endp; |
| 103 continue; |
| 104 } |
| 105 } |
| 106 |
| 107 if (c == L_('?')) |
| 108 { |
| 109 /* A ? needs to match one character. */ |
| 110 if (n == string_end) |
| 111 /* There isn't another character; no match. */ |
| 112 return FNM_NOMATCH; |
| 113 else if (*n == L_('/') |
| 114 && __builtin_expect (flags & FNM_FILE_NAME, 0)) |
| 115 /* A slash does not match a wildcard under |
| 116 FNM_FILE_NAME. */ |
| 117 return FNM_NOMATCH; |
| 118 else |
| 119 /* One character of the string is consumed in matching |
| 120 this ? wildcard, so *??? won't match if there are |
| 121 less than three characters. */ |
| 122 ++n; |
| 123 } |
| 124 } |
| 125 |
| 126 if (c == L_('\0')) |
| 127 /* The wildcard(s) is/are the last element of the pattern. |
| 128 If the name is a file name and contains another slash |
| 129 this means it cannot match, unless the FNM_LEADING_DIR |
| 130 flag is set. */ |
| 131 { |
| 132 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH; |
| 133 |
| 134 if (flags & FNM_FILE_NAME) |
| 135 { |
| 136 if (flags & FNM_LEADING_DIR) |
| 137 result = 0; |
| 138 else |
| 139 { |
| 140 if (MEMCHR (n, L_('/'), string_end - n) == NULL) |
| 141 result = 0; |
| 142 } |
| 143 } |
| 144 |
| 145 return result; |
| 146 } |
| 147 else |
| 148 { |
| 149 const CHAR *endp; |
| 150 |
| 151 endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'), |
| 152 string_end - n); |
| 153 if (endp == NULL) |
| 154 endp = string_end; |
| 155 |
| 156 if (c == L_('[') |
| 157 || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0 |
| 158 && (c == L_('@') || c == L_('+') || c == L_('!')) |
| 159 && *p == L_('('))) |
| 160 { |
| 161 int flags2 = ((flags & FNM_FILE_NAME) |
| 162 ? flags : (flags & ~FNM_PERIOD)); |
| 163 bool no_leading_period2 = no_leading_period; |
| 164 |
| 165 for (--p; n < endp; ++n, no_leading_period2 = false) |
| 166 if (FCT (p, n, string_end, no_leading_period2, flags2) |
| 167 == 0) |
| 168 return 0; |
| 169 } |
| 170 else if (c == L_('/') && (flags & FNM_FILE_NAME)) |
| 171 { |
| 172 while (n < string_end && *n != L_('/')) |
| 173 ++n; |
| 174 if (n < string_end && *n == L_('/') |
| 175 && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags) |
| 176 == 0)) |
| 177 return 0; |
| 178 } |
| 179 else |
| 180 { |
| 181 int flags2 = ((flags & FNM_FILE_NAME) |
| 182 ? flags : (flags & ~FNM_PERIOD)); |
| 183 int no_leading_period2 = no_leading_period; |
| 184 |
| 185 if (c == L_('\\') && !(flags & FNM_NOESCAPE)) |
| 186 c = *p; |
| 187 c = FOLD (c); |
| 188 for (--p; n < endp; ++n, no_leading_period2 = false) |
| 189 if (FOLD ((UCHAR) *n) == c |
| 190 && (FCT (p, n, string_end, no_leading_period2, flags2) |
| 191 == 0)) |
| 192 return 0; |
| 193 } |
| 194 } |
| 195 |
| 196 /* If we come here no match is possible with the wildcard. */ |
| 197 return FNM_NOMATCH; |
| 198 |
| 199 case L_('['): |
| 200 { |
| 201 /* Nonzero if the sense of the character class is inverted. */ |
| 202 const CHAR *p_init = p; |
| 203 const CHAR *n_init = n; |
| 204 register bool not; |
| 205 CHAR cold; |
| 206 UCHAR fn; |
| 207 |
| 208 if (posixly_correct == 0) |
| 209 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; |
| 210 |
| 211 if (n == string_end) |
| 212 return FNM_NOMATCH; |
| 213 |
| 214 if (*n == L_('.') && no_leading_period) |
| 215 return FNM_NOMATCH; |
| 216 |
| 217 if (*n == L_('/') && (flags & FNM_FILE_NAME)) |
| 218 /* '/' cannot be matched. */ |
| 219 return FNM_NOMATCH; |
| 220 |
| 221 not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^'))); |
| 222 if (not) |
| 223 ++p; |
| 224 |
| 225 fn = FOLD ((UCHAR) *n); |
| 226 |
| 227 c = *p++; |
| 228 for (;;) |
| 229 { |
| 230 if (!(flags & FNM_NOESCAPE) && c == L_('\\')) |
| 231 { |
| 232 if (*p == L_('\0')) |
| 233 return FNM_NOMATCH; |
| 234 c = FOLD ((UCHAR) *p); |
| 235 ++p; |
| 236 |
| 237 goto normal_bracket; |
| 238 } |
| 239 else if (c == L_('[') && *p == L_(':')) |
| 240 { |
| 241 /* Leave room for the null. */ |
| 242 CHAR str[CHAR_CLASS_MAX_LENGTH + 1]; |
| 243 size_t c1 = 0; |
| 244 #if defined _LIBC || WIDE_CHAR_SUPPORT |
| 245 wctype_t wt; |
| 246 #endif |
| 247 const CHAR *startp = p; |
| 248 |
| 249 for (;;) |
| 250 { |
| 251 if (c1 == CHAR_CLASS_MAX_LENGTH) |
| 252 /* The name is too long and therefore the pattern |
| 253 is ill-formed. */ |
| 254 return FNM_NOMATCH; |
| 255 |
| 256 c = *++p; |
| 257 if (c == L_(':') && p[1] == L_(']')) |
| 258 { |
| 259 p += 2; |
| 260 break; |
| 261 } |
| 262 if (c < L_('a') || c >= L_('z')) |
| 263 { |
| 264 /* This cannot possibly be a character class name. |
| 265 Match it as a normal range. */ |
| 266 p = startp; |
| 267 c = L_('['); |
| 268 goto normal_bracket; |
| 269 } |
| 270 str[c1++] = c; |
| 271 } |
| 272 str[c1] = L_('\0'); |
| 273 |
| 274 #if defined _LIBC || WIDE_CHAR_SUPPORT |
| 275 wt = IS_CHAR_CLASS (str); |
| 276 if (wt == 0) |
| 277 /* Invalid character class name. */ |
| 278 return FNM_NOMATCH; |
| 279 |
| 280 # if defined _LIBC && ! WIDE_CHAR_VERSION |
| 281 /* The following code is glibc specific but does |
| 282 there a good job in speeding up the code since |
| 283 we can avoid the btowc() call. */ |
| 284 if (_ISCTYPE ((UCHAR) *n, wt)) |
| 285 goto matched; |
| 286 # else |
| 287 if (ISWCTYPE (BTOWC ((UCHAR) *n), wt)) |
| 288 goto matched; |
| 289 # endif |
| 290 #else |
| 291 if ((STREQ (str, L_("alnum")) && isalnum ((UCHAR) *n)) |
| 292 || (STREQ (str, L_("alpha")) && isalpha ((UCHAR) *n)) |
| 293 || (STREQ (str, L_("blank")) && isblank ((UCHAR) *n)) |
| 294 || (STREQ (str, L_("cntrl")) && iscntrl ((UCHAR) *n)) |
| 295 || (STREQ (str, L_("digit")) && isdigit ((UCHAR) *n)) |
| 296 || (STREQ (str, L_("graph")) && isgraph ((UCHAR) *n)) |
| 297 || (STREQ (str, L_("lower")) && islower ((UCHAR) *n)) |
| 298 || (STREQ (str, L_("print")) && isprint ((UCHAR) *n)) |
| 299 || (STREQ (str, L_("punct")) && ispunct ((UCHAR) *n)) |
| 300 || (STREQ (str, L_("space")) && isspace ((UCHAR) *n)) |
| 301 || (STREQ (str, L_("upper")) && isupper ((UCHAR) *n)) |
| 302 || (STREQ (str, L_("xdigit")) && isxdigit ((UCHAR) *n))) |
| 303 goto matched; |
| 304 #endif |
| 305 c = *p++; |
| 306 } |
| 307 #ifdef _LIBC |
| 308 else if (c == L_('[') && *p == L_('=')) |
| 309 { |
| 310 UCHAR str[1]; |
| 311 uint32_t nrules = |
| 312 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); |
| 313 const CHAR *startp = p; |
| 314 |
| 315 c = *++p; |
| 316 if (c == L_('\0')) |
| 317 { |
| 318 p = startp; |
| 319 c = L_('['); |
| 320 goto normal_bracket; |
| 321 } |
| 322 str[0] = c; |
| 323 |
| 324 c = *++p; |
| 325 if (c != L_('=') || p[1] != L_(']')) |
| 326 { |
| 327 p = startp; |
| 328 c = L_('['); |
| 329 goto normal_bracket; |
| 330 } |
| 331 p += 2; |
| 332 |
| 333 if (nrules == 0) |
| 334 { |
| 335 if ((UCHAR) *n == str[0]) |
| 336 goto matched; |
| 337 } |
| 338 else |
| 339 { |
| 340 const int32_t *table; |
| 341 # if WIDE_CHAR_VERSION |
| 342 const int32_t *weights; |
| 343 const int32_t *extra; |
| 344 # else |
| 345 const unsigned char *weights; |
| 346 const unsigned char *extra; |
| 347 # endif |
| 348 const int32_t *indirect; |
| 349 int32_t idx; |
| 350 const UCHAR *cp = (const UCHAR *) str; |
| 351 |
| 352 /* This #include defines a local function! */ |
| 353 # if WIDE_CHAR_VERSION |
| 354 # include <locale/weightwc.h> |
| 355 # else |
| 356 # include <locale/weight.h> |
| 357 # endif |
| 358 |
| 359 # if WIDE_CHAR_VERSION |
| 360 table = (const int32_t *) |
| 361 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC); |
| 362 weights = (const int32_t *) |
| 363 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC); |
| 364 extra = (const int32_t *) |
| 365 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC); |
| 366 indirect = (const int32_t *) |
| 367 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC); |
| 368 # else |
| 369 table = (const int32_t *) |
| 370 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); |
| 371 weights = (const unsigned char *) |
| 372 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); |
| 373 extra = (const unsigned char *) |
| 374 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); |
| 375 indirect = (const int32_t *) |
| 376 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); |
| 377 # endif |
| 378 |
| 379 idx = findidx (&cp); |
| 380 if (idx != 0) |
| 381 { |
| 382 /* We found a table entry. Now see whether the |
| 383 character we are currently at has the same |
| 384 equivalence class value. */ |
| 385 int len = weights[idx & 0xffffff]; |
| 386 int32_t idx2; |
| 387 const UCHAR *np = (const UCHAR *) n; |
| 388 |
| 389 idx2 = findidx (&np); |
| 390 if (idx2 != 0 |
| 391 && (idx >> 24) == (idx2 >> 24) |
| 392 && len == weights[idx2 & 0xffffff]) |
| 393 { |
| 394 int cnt = 0; |
| 395 |
| 396 idx &= 0xffffff; |
| 397 idx2 &= 0xffffff; |
| 398 |
| 399 while (cnt < len |
| 400 && (weights[idx + 1 + cnt] |
| 401 == weights[idx2 + 1 + cnt])) |
| 402 ++cnt; |
| 403 |
| 404 if (cnt == len) |
| 405 goto matched; |
| 406 } |
| 407 } |
| 408 } |
| 409 |
| 410 c = *p++; |
| 411 } |
| 412 #endif |
| 413 else if (c == L_('\0')) |
| 414 { |
| 415 /* [ unterminated, treat as normal character. */ |
| 416 p = p_init; |
| 417 n = n_init; |
| 418 c = L_('['); |
| 419 goto normal_match; |
| 420 } |
| 421 else |
| 422 { |
| 423 bool is_range = false; |
| 424 |
| 425 #ifdef _LIBC |
| 426 bool is_seqval = false; |
| 427 |
| 428 if (c == L_('[') && *p == L_('.')) |
| 429 { |
| 430 uint32_t nrules = |
| 431 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); |
| 432 const CHAR *startp = p; |
| 433 size_t c1 = 0; |
| 434 |
| 435 while (1) |
| 436 { |
| 437 c = *++p; |
| 438 if (c == L_('.') && p[1] == L_(']')) |
| 439 { |
| 440 p += 2; |
| 441 break; |
| 442 } |
| 443 if (c == '\0') |
| 444 return FNM_NOMATCH; |
| 445 ++c1; |
| 446 } |
| 447 |
| 448 /* We have to handling the symbols differently in |
| 449 ranges since then the collation sequence is |
| 450 important. */ |
| 451 is_range = *p == L_('-') && p[1] != L_('\0'); |
| 452 |
| 453 if (nrules == 0) |
| 454 { |
| 455 /* There are no names defined in the collation |
| 456 data. Therefore we only accept the trivial |
| 457 names consisting of the character itself. */ |
| 458 if (c1 != 1) |
| 459 return FNM_NOMATCH; |
| 460 |
| 461 if (!is_range && *n == startp[1]) |
| 462 goto matched; |
| 463 |
| 464 cold = startp[1]; |
| 465 c = *p++; |
| 466 } |
| 467 else |
| 468 { |
| 469 int32_t table_size; |
| 470 const int32_t *symb_table; |
| 471 # ifdef WIDE_CHAR_VERSION |
| 472 char str[c1]; |
| 473 size_t strcnt; |
| 474 # else |
| 475 # define str (startp + 1) |
| 476 # endif |
| 477 const unsigned char *extra; |
| 478 int32_t idx; |
| 479 int32_t elem; |
| 480 int32_t second; |
| 481 int32_t hash; |
| 482 |
| 483 # ifdef WIDE_CHAR_VERSION |
| 484 /* We have to convert the name to a single-byte |
| 485 string. This is possible since the names |
| 486 consist of ASCII characters and the internal |
| 487 representation is UCS4. */ |
| 488 for (strcnt = 0; strcnt < c1; ++strcnt) |
| 489 str[strcnt] = startp[1 + strcnt]; |
| 490 # endif |
| 491 |
| 492 table_size = |
| 493 _NL_CURRENT_WORD (LC_COLLATE, |
| 494 _NL_COLLATE_SYMB_HASH_SIZEMB); |
| 495 symb_table = (const int32_t *) |
| 496 _NL_CURRENT (LC_COLLATE, |
| 497 _NL_COLLATE_SYMB_TABLEMB); |
| 498 extra = (const unsigned char *) |
| 499 _NL_CURRENT (LC_COLLATE, |
| 500 _NL_COLLATE_SYMB_EXTRAMB); |
| 501 |
| 502 /* Locate the character in the hashing table. */ |
| 503 hash = elem_hash (str, c1); |
| 504 |
| 505 idx = 0; |
| 506 elem = hash % table_size; |
| 507 if (symb_table[2 * elem] != 0) |
| 508 { |
| 509 second = hash % (table_size - 2) + 1; |
| 510 |
| 511 do |
| 512 { |
| 513 /* First compare the hashing value. */ |
| 514 if (symb_table[2 * elem] == hash |
| 515 && (c1 |
| 516 == extra[symb_table[2 * elem + 1]]) |
| 517 && memcmp (str, |
| 518 &extra[symb_table[2 * elem |
| 519 + 1] |
| 520 + 1], c1) == 0) |
| 521 { |
| 522 /* Yep, this is the entry. */ |
| 523 idx = symb_table[2 * elem + 1]; |
| 524 idx += 1 + extra[idx]; |
| 525 break; |
| 526 } |
| 527 |
| 528 /* Next entry. */ |
| 529 elem += second; |
| 530 } |
| 531 while (symb_table[2 * elem] != 0); |
| 532 } |
| 533 |
| 534 if (symb_table[2 * elem] != 0) |
| 535 { |
| 536 /* Compare the byte sequence but only if |
| 537 this is not part of a range. */ |
| 538 # ifdef WIDE_CHAR_VERSION |
| 539 int32_t *wextra; |
| 540 |
| 541 idx += 1 + extra[idx]; |
| 542 /* Adjust for the alignment. */ |
| 543 idx = (idx + 3) & ~3; |
| 544 |
| 545 wextra = (int32_t *) &extra[idx + 4]; |
| 546 # endif |
| 547 |
| 548 if (! is_range) |
| 549 { |
| 550 # ifdef WIDE_CHAR_VERSION |
| 551 for (c1 = 0; |
| 552 (int32_t) c1 < wextra[idx]; |
| 553 ++c1) |
| 554 if (n[c1] != wextra[1 + c1]) |
| 555 break; |
| 556 |
| 557 if ((int32_t) c1 == wextra[idx]) |
| 558 goto matched; |
| 559 # else |
| 560 for (c1 = 0; c1 < extra[idx]; ++c1) |
| 561 if (n[c1] != extra[1 + c1]) |
| 562 break; |
| 563 |
| 564 if (c1 == extra[idx]) |
| 565 goto matched; |
| 566 # endif |
| 567 } |
| 568 |
| 569 /* Get the collation sequence value. */ |
| 570 is_seqval = true; |
| 571 # ifdef WIDE_CHAR_VERSION |
| 572 cold = wextra[1 + wextra[idx]]; |
| 573 # else |
| 574 /* Adjust for the alignment. */ |
| 575 idx += 1 + extra[idx]; |
| 576 idx = (idx + 3) & ~4; |
| 577 cold = *((int32_t *) &extra[idx]); |
| 578 # endif |
| 579 |
| 580 c = *p++; |
| 581 } |
| 582 else if (c1 == 1) |
| 583 { |
| 584 /* No valid character. Match it as a |
| 585 single byte. */ |
| 586 if (!is_range && *n == str[0]) |
| 587 goto matched; |
| 588 |
| 589 cold = str[0]; |
| 590 c = *p++; |
| 591 } |
| 592 else |
| 593 return FNM_NOMATCH; |
| 594 } |
| 595 } |
| 596 else |
| 597 # undef str |
| 598 #endif |
| 599 { |
| 600 c = FOLD (c); |
| 601 normal_bracket: |
| 602 |
| 603 /* We have to handling the symbols differently in |
| 604 ranges since then the collation sequence is |
| 605 important. */ |
| 606 is_range = (*p == L_('-') && p[1] != L_('\0') |
| 607 && p[1] != L_(']')); |
| 608 |
| 609 if (!is_range && c == fn) |
| 610 goto matched; |
| 611 |
| 612 #if _LIBC |
| 613 /* This is needed if we goto normal_bracket; from |
| 614 outside of is_seqval's scope. */ |
| 615 is_seqval = false; |
| 616 #endif |
| 617 |
| 618 cold = c; |
| 619 c = *p++; |
| 620 } |
| 621 |
| 622 if (c == L_('-') && *p != L_(']')) |
| 623 { |
| 624 #if _LIBC |
| 625 /* We have to find the collation sequence |
| 626 value for C. Collation sequence is nothing |
| 627 we can regularly access. The sequence |
| 628 value is defined by the order in which the |
| 629 definitions of the collation values for the |
| 630 various characters appear in the source |
| 631 file. A strange concept, nowhere |
| 632 documented. */ |
| 633 uint32_t fcollseq; |
| 634 uint32_t lcollseq; |
| 635 UCHAR cend = *p++; |
| 636 |
| 637 # ifdef WIDE_CHAR_VERSION |
| 638 /* Search in the 'names' array for the characters. */ |
| 639 fcollseq = __collseq_table_lookup (collseq, fn); |
| 640 if (fcollseq == ~((uint32_t) 0)) |
| 641 /* XXX We don't know anything about the character |
| 642 we are supposed to match. This means we are |
| 643 failing. */ |
| 644 goto range_not_matched; |
| 645 |
| 646 if (is_seqval) |
| 647 lcollseq = cold; |
| 648 else |
| 649 lcollseq = __collseq_table_lookup (collseq, cold); |
| 650 # else |
| 651 fcollseq = collseq[fn]; |
| 652 lcollseq = is_seqval ? cold : collseq[(UCHAR) cold]; |
| 653 # endif |
| 654 |
| 655 is_seqval = false; |
| 656 if (cend == L_('[') && *p == L_('.')) |
| 657 { |
| 658 uint32_t nrules = |
| 659 _NL_CURRENT_WORD (LC_COLLATE, |
| 660 _NL_COLLATE_NRULES); |
| 661 const CHAR *startp = p; |
| 662 size_t c1 = 0; |
| 663 |
| 664 while (1) |
| 665 { |
| 666 c = *++p; |
| 667 if (c == L_('.') && p[1] == L_(']')) |
| 668 { |
| 669 p += 2; |
| 670 break; |
| 671 } |
| 672 if (c == '\0') |
| 673 return FNM_NOMATCH; |
| 674 ++c1; |
| 675 } |
| 676 |
| 677 if (nrules == 0) |
| 678 { |
| 679 /* There are no names defined in the |
| 680 collation data. Therefore we only |
| 681 accept the trivial names consisting |
| 682 of the character itself. */ |
| 683 if (c1 != 1) |
| 684 return FNM_NOMATCH; |
| 685 |
| 686 cend = startp[1]; |
| 687 } |
| 688 else |
| 689 { |
| 690 int32_t table_size; |
| 691 const int32_t *symb_table; |
| 692 # ifdef WIDE_CHAR_VERSION |
| 693 char str[c1]; |
| 694 size_t strcnt; |
| 695 # else |
| 696 # define str (startp + 1) |
| 697 # endif |
| 698 const unsigned char *extra; |
| 699 int32_t idx; |
| 700 int32_t elem; |
| 701 int32_t second; |
| 702 int32_t hash; |
| 703 |
| 704 # ifdef WIDE_CHAR_VERSION |
| 705 /* We have to convert the name to a single-byte |
| 706 string. This is possible since the names |
| 707 consist of ASCII characters and the internal |
| 708 representation is UCS4. */ |
| 709 for (strcnt = 0; strcnt < c1; ++strcnt) |
| 710 str[strcnt] = startp[1 + strcnt]; |
| 711 # endif |
| 712 |
| 713 table_size = |
| 714 _NL_CURRENT_WORD (LC_COLLATE, |
| 715 _NL_COLLATE_SYMB_HASH_SIZEMB
); |
| 716 symb_table = (const int32_t *) |
| 717 _NL_CURRENT (LC_COLLATE, |
| 718 _NL_COLLATE_SYMB_TABLEMB); |
| 719 extra = (const unsigned char *) |
| 720 _NL_CURRENT (LC_COLLATE, |
| 721 _NL_COLLATE_SYMB_EXTRAMB); |
| 722 |
| 723 /* Locate the character in the hashing |
| 724 table. */ |
| 725 hash = elem_hash (str, c1); |
| 726 |
| 727 idx = 0; |
| 728 elem = hash % table_size; |
| 729 if (symb_table[2 * elem] != 0) |
| 730 { |
| 731 second = hash % (table_size - 2) + 1; |
| 732 |
| 733 do |
| 734 { |
| 735 /* First compare the hashing value. */ |
| 736 if (symb_table[2 * elem] == hash |
| 737 && (c1 |
| 738 == extra[symb_table[2 * elem + 1
]]) |
| 739 && memcmp (str, |
| 740 &extra[symb_table[2 * ele
m + 1] |
| 741 + 1], c1) == 0) |
| 742 { |
| 743 /* Yep, this is the entry. */ |
| 744 idx = symb_table[2 * elem + 1]; |
| 745 idx += 1 + extra[idx]; |
| 746 break; |
| 747 } |
| 748 |
| 749 /* Next entry. */ |
| 750 elem += second; |
| 751 } |
| 752 while (symb_table[2 * elem] != 0); |
| 753 } |
| 754 |
| 755 if (symb_table[2 * elem] != 0) |
| 756 { |
| 757 /* Compare the byte sequence but only if |
| 758 this is not part of a range. */ |
| 759 # ifdef WIDE_CHAR_VERSION |
| 760 int32_t *wextra; |
| 761 |
| 762 idx += 1 + extra[idx]; |
| 763 /* Adjust for the alignment. */ |
| 764 idx = (idx + 3) & ~4; |
| 765 |
| 766 wextra = (int32_t *) &extra[idx + 4]; |
| 767 # endif |
| 768 /* Get the collation sequence value. */ |
| 769 is_seqval = true; |
| 770 # ifdef WIDE_CHAR_VERSION |
| 771 cend = wextra[1 + wextra[idx]]; |
| 772 # else |
| 773 /* Adjust for the alignment. */ |
| 774 idx += 1 + extra[idx]; |
| 775 idx = (idx + 3) & ~4; |
| 776 cend = *((int32_t *) &extra[idx]); |
| 777 # endif |
| 778 } |
| 779 else if (symb_table[2 * elem] != 0 && c1 == 1) |
| 780 { |
| 781 cend = str[0]; |
| 782 c = *p++; |
| 783 } |
| 784 else |
| 785 return FNM_NOMATCH; |
| 786 } |
| 787 # undef str |
| 788 } |
| 789 else |
| 790 { |
| 791 if (!(flags & FNM_NOESCAPE) && cend == L_('\\')) |
| 792 cend = *p++; |
| 793 if (cend == L_('\0')) |
| 794 return FNM_NOMATCH; |
| 795 cend = FOLD (cend); |
| 796 } |
| 797 |
| 798 /* XXX It is not entirely clear to me how to handle |
| 799 characters which are not mentioned in the |
| 800 collation specification. */ |
| 801 if ( |
| 802 # ifdef WIDE_CHAR_VERSION |
| 803 lcollseq == 0xffffffff || |
| 804 # endif |
| 805 lcollseq <= fcollseq) |
| 806 { |
| 807 /* We have to look at the upper bound. */ |
| 808 uint32_t hcollseq; |
| 809 |
| 810 if (is_seqval) |
| 811 hcollseq = cend; |
| 812 else |
| 813 { |
| 814 # ifdef WIDE_CHAR_VERSION |
| 815 hcollseq = |
| 816 __collseq_table_lookup (collseq, cend); |
| 817 if (hcollseq == ~((uint32_t) 0)) |
| 818 { |
| 819 /* Hum, no information about the upper |
| 820 bound. The matching succeeds if the |
| 821 lower bound is matched exactly. */ |
| 822 if (lcollseq != fcollseq) |
| 823 goto range_not_matched; |
| 824 |
| 825 goto matched; |
| 826 } |
| 827 # else |
| 828 hcollseq = collseq[cend]; |
| 829 # endif |
| 830 } |
| 831 |
| 832 if (lcollseq <= hcollseq && fcollseq <= hcollseq) |
| 833 goto matched; |
| 834 } |
| 835 # ifdef WIDE_CHAR_VERSION |
| 836 range_not_matched: |
| 837 # endif |
| 838 #else |
| 839 /* We use a boring value comparison of the character |
| 840 values. This is better than comparing using |
| 841 'strcoll' since the latter would have surprising |
| 842 and sometimes fatal consequences. */ |
| 843 UCHAR cend = *p++; |
| 844 |
| 845 if (!(flags & FNM_NOESCAPE) && cend == L_('\\')) |
| 846 cend = *p++; |
| 847 if (cend == L_('\0')) |
| 848 return FNM_NOMATCH; |
| 849 |
| 850 /* It is a range. */ |
| 851 if (cold <= fn && fn <= cend) |
| 852 goto matched; |
| 853 #endif |
| 854 |
| 855 c = *p++; |
| 856 } |
| 857 } |
| 858 |
| 859 if (c == L_(']')) |
| 860 break; |
| 861 } |
| 862 |
| 863 if (!not) |
| 864 return FNM_NOMATCH; |
| 865 break; |
| 866 |
| 867 matched: |
| 868 /* Skip the rest of the [...] that already matched. */ |
| 869 do |
| 870 { |
| 871 ignore_next: |
| 872 c = *p++; |
| 873 |
| 874 if (c == L_('\0')) |
| 875 /* [... (unterminated) loses. */ |
| 876 return FNM_NOMATCH; |
| 877 |
| 878 if (!(flags & FNM_NOESCAPE) && c == L_('\\')) |
| 879 { |
| 880 if (*p == L_('\0')) |
| 881 return FNM_NOMATCH; |
| 882 /* XXX 1003.2d11 is unclear if this is right. */ |
| 883 ++p; |
| 884 } |
| 885 else if (c == L_('[') && *p == L_(':')) |
| 886 { |
| 887 int c1 = 0; |
| 888 const CHAR *startp = p; |
| 889 |
| 890 while (1) |
| 891 { |
| 892 c = *++p; |
| 893 if (++c1 == CHAR_CLASS_MAX_LENGTH) |
| 894 return FNM_NOMATCH; |
| 895 |
| 896 if (*p == L_(':') && p[1] == L_(']')) |
| 897 break; |
| 898 |
| 899 if (c < L_('a') || c >= L_('z')) |
| 900 { |
| 901 p = startp; |
| 902 goto ignore_next; |
| 903 } |
| 904 } |
| 905 p += 2; |
| 906 c = *p++; |
| 907 } |
| 908 else if (c == L_('[') && *p == L_('=')) |
| 909 { |
| 910 c = *++p; |
| 911 if (c == L_('\0')) |
| 912 return FNM_NOMATCH; |
| 913 c = *++p; |
| 914 if (c != L_('=') || p[1] != L_(']')) |
| 915 return FNM_NOMATCH; |
| 916 p += 2; |
| 917 c = *p++; |
| 918 } |
| 919 else if (c == L_('[') && *p == L_('.')) |
| 920 { |
| 921 ++p; |
| 922 while (1) |
| 923 { |
| 924 c = *++p; |
| 925 if (c == '\0') |
| 926 return FNM_NOMATCH; |
| 927 |
| 928 if (*p == L_('.') && p[1] == L_(']')) |
| 929 break; |
| 930 } |
| 931 p += 2; |
| 932 c = *p++; |
| 933 } |
| 934 } |
| 935 while (c != L_(']')); |
| 936 if (not) |
| 937 return FNM_NOMATCH; |
| 938 } |
| 939 break; |
| 940 |
| 941 case L_('+'): |
| 942 case L_('@'): |
| 943 case L_('!'): |
| 944 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') |
| 945 { |
| 946 int res; |
| 947 |
| 948 res = EXT (c, p, n, string_end, no_leading_period, flags); |
| 949 if (res != -1) |
| 950 return res; |
| 951 } |
| 952 goto normal_match; |
| 953 |
| 954 case L_('/'): |
| 955 if (NO_LEADING_PERIOD (flags)) |
| 956 { |
| 957 if (n == string_end || c != (UCHAR) *n) |
| 958 return FNM_NOMATCH; |
| 959 |
| 960 new_no_leading_period = true; |
| 961 break; |
| 962 } |
| 963 /* FALLTHROUGH */ |
| 964 default: |
| 965 normal_match: |
| 966 if (n == string_end || c != FOLD ((UCHAR) *n)) |
| 967 return FNM_NOMATCH; |
| 968 } |
| 969 |
| 970 no_leading_period = new_no_leading_period; |
| 971 ++n; |
| 972 } |
| 973 |
| 974 if (n == string_end) |
| 975 return 0; |
| 976 |
| 977 if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/')) |
| 978 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */ |
| 979 return 0; |
| 980 |
| 981 return FNM_NOMATCH; |
| 982 } |
| 983 |
| 984 |
| 985 static const CHAR * |
| 986 internal_function |
| 987 END (const CHAR *pattern) |
| 988 { |
| 989 const CHAR *p = pattern; |
| 990 |
| 991 while (1) |
| 992 if (*++p == L_('\0')) |
| 993 /* This is an invalid pattern. */ |
| 994 return pattern; |
| 995 else if (*p == L_('[')) |
| 996 { |
| 997 /* Handle brackets special. */ |
| 998 if (posixly_correct == 0) |
| 999 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; |
| 1000 |
| 1001 /* Skip the not sign. We have to recognize it because of a possibly |
| 1002 following ']'. */ |
| 1003 if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^'))) |
| 1004 ++p; |
| 1005 /* A leading ']' is recognized as such. */ |
| 1006 if (*p == L_(']')) |
| 1007 ++p; |
| 1008 /* Skip over all characters of the list. */ |
| 1009 while (*p != L_(']')) |
| 1010 if (*p++ == L_('\0')) |
| 1011 /* This is no valid pattern. */ |
| 1012 return pattern; |
| 1013 } |
| 1014 else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@') |
| 1015 || *p == L_('!')) && p[1] == L_('(')) |
| 1016 p = END (p + 1); |
| 1017 else if (*p == L_(')')) |
| 1018 break; |
| 1019 |
| 1020 return p + 1; |
| 1021 } |
| 1022 |
| 1023 |
| 1024 static int |
| 1025 internal_function |
| 1026 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end, |
| 1027 bool no_leading_period, int flags) |
| 1028 { |
| 1029 const CHAR *startp; |
| 1030 size_t level; |
| 1031 struct patternlist |
| 1032 { |
| 1033 struct patternlist *next; |
| 1034 CHAR str[1]; |
| 1035 } *list = NULL; |
| 1036 struct patternlist **lastp = &list; |
| 1037 size_t pattern_len = STRLEN (pattern); |
| 1038 const CHAR *p; |
| 1039 const CHAR *rs; |
| 1040 enum { ALLOCA_LIMIT = 8000 }; |
| 1041 |
| 1042 /* Parse the pattern. Store the individual parts in the list. */ |
| 1043 level = 0; |
| 1044 for (startp = p = pattern + 1; ; ++p) |
| 1045 if (*p == L_('\0')) |
| 1046 /* This is an invalid pattern. */ |
| 1047 return -1; |
| 1048 else if (*p == L_('[')) |
| 1049 { |
| 1050 /* Handle brackets special. */ |
| 1051 if (posixly_correct == 0) |
| 1052 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; |
| 1053 |
| 1054 /* Skip the not sign. We have to recognize it because of a possibly |
| 1055 following ']'. */ |
| 1056 if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^'))) |
| 1057 ++p; |
| 1058 /* A leading ']' is recognized as such. */ |
| 1059 if (*p == L_(']')) |
| 1060 ++p; |
| 1061 /* Skip over all characters of the list. */ |
| 1062 while (*p != L_(']')) |
| 1063 if (*p++ == L_('\0')) |
| 1064 /* This is no valid pattern. */ |
| 1065 return -1; |
| 1066 } |
| 1067 else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@') |
| 1068 || *p == L_('!')) && p[1] == L_('(')) |
| 1069 /* Remember the nesting level. */ |
| 1070 ++level; |
| 1071 else if (*p == L_(')')) |
| 1072 { |
| 1073 if (level-- == 0) |
| 1074 { |
| 1075 /* This means we found the end of the pattern. */ |
| 1076 #define NEW_PATTERN \ |
| 1077 struct patternlist *newp; \ |
| 1078 size_t plen; \ |
| 1079 size_t plensize; \ |
| 1080 size_t newpsize; \ |
| 1081 \ |
| 1082 plen = (opt == L_('?') || opt == L_('@') \ |
| 1083 ? pattern_len \ |
| 1084 : p - startp + 1UL); \ |
| 1085 plensize = plen * sizeof (CHAR); \ |
| 1086 newpsize = offsetof (struct patternlist, str) + plensize; \ |
| 1087 if ((size_t) -1 / sizeof (CHAR) < plen \ |
| 1088 || newpsize < offsetof (struct patternlist, str) \ |
| 1089 || ALLOCA_LIMIT <= newpsize) \ |
| 1090 return -1; \ |
| 1091 newp = (struct patternlist *) alloca (newpsize); \ |
| 1092 *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L_('\0'); \ |
| 1093 newp->next = NULL; \ |
| 1094 *lastp = newp; \ |
| 1095 lastp = &newp->next |
| 1096 NEW_PATTERN; |
| 1097 break; |
| 1098 } |
| 1099 } |
| 1100 else if (*p == L_('|')) |
| 1101 { |
| 1102 if (level == 0) |
| 1103 { |
| 1104 NEW_PATTERN; |
| 1105 startp = p + 1; |
| 1106 } |
| 1107 } |
| 1108 assert (list != NULL); |
| 1109 assert (p[-1] == L_(')')); |
| 1110 #undef NEW_PATTERN |
| 1111 |
| 1112 switch (opt) |
| 1113 { |
| 1114 case L_('*'): |
| 1115 if (FCT (p, string, string_end, no_leading_period, flags) == 0) |
| 1116 return 0; |
| 1117 /* FALLTHROUGH */ |
| 1118 |
| 1119 case L_('+'): |
| 1120 do |
| 1121 { |
| 1122 for (rs = string; rs <= string_end; ++rs) |
| 1123 /* First match the prefix with the current pattern with the |
| 1124 current pattern. */ |
| 1125 if (FCT (list->str, string, rs, no_leading_period, |
| 1126 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0 |
| 1127 /* This was successful. Now match the rest with the rest |
| 1128 of the pattern. */ |
| 1129 && (FCT (p, rs, string_end, |
| 1130 rs == string |
| 1131 ? no_leading_period |
| 1132 : rs[-1] == '/' && NO_LEADING_PERIOD (flags), |
| 1133 flags & FNM_FILE_NAME |
| 1134 ? flags : flags & ~FNM_PERIOD) == 0 |
| 1135 /* This didn't work. Try the whole pattern. */ |
| 1136 || (rs != string |
| 1137 && FCT (pattern - 1, rs, string_end, |
| 1138 rs == string |
| 1139 ? no_leading_period |
| 1140 : rs[-1] == '/' && NO_LEADING_PERIOD (flags), |
| 1141 flags & FNM_FILE_NAME |
| 1142 ? flags : flags & ~FNM_PERIOD) == 0))) |
| 1143 /* It worked. Signal success. */ |
| 1144 return 0; |
| 1145 } |
| 1146 while ((list = list->next) != NULL); |
| 1147 |
| 1148 /* None of the patterns lead to a match. */ |
| 1149 return FNM_NOMATCH; |
| 1150 |
| 1151 case L_('?'): |
| 1152 if (FCT (p, string, string_end, no_leading_period, flags) == 0) |
| 1153 return 0; |
| 1154 /* FALLTHROUGH */ |
| 1155 |
| 1156 case L_('@'): |
| 1157 do |
| 1158 /* I cannot believe it but 'strcat' is actually acceptable |
| 1159 here. Match the entire string with the prefix from the |
| 1160 pattern list and the rest of the pattern following the |
| 1161 pattern list. */ |
| 1162 if (FCT (STRCAT (list->str, p), string, string_end, |
| 1163 no_leading_period, |
| 1164 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0) |
| 1165 /* It worked. Signal success. */ |
| 1166 return 0; |
| 1167 while ((list = list->next) != NULL); |
| 1168 |
| 1169 /* None of the patterns lead to a match. */ |
| 1170 return FNM_NOMATCH; |
| 1171 |
| 1172 case L_('!'): |
| 1173 for (rs = string; rs <= string_end; ++rs) |
| 1174 { |
| 1175 struct patternlist *runp; |
| 1176 |
| 1177 for (runp = list; runp != NULL; runp = runp->next) |
| 1178 if (FCT (runp->str, string, rs, no_leading_period, |
| 1179 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0) |
| 1180 break; |
| 1181 |
| 1182 /* If none of the patterns matched see whether the rest does. */ |
| 1183 if (runp == NULL |
| 1184 && (FCT (p, rs, string_end, |
| 1185 rs == string |
| 1186 ? no_leading_period |
| 1187 : rs[-1] == '/' && NO_LEADING_PERIOD (flags), |
| 1188 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) |
| 1189 == 0)) |
| 1190 /* This is successful. */ |
| 1191 return 0; |
| 1192 } |
| 1193 |
| 1194 /* None of the patterns together with the rest of the pattern |
| 1195 lead to a match. */ |
| 1196 return FNM_NOMATCH; |
| 1197 |
| 1198 default: |
| 1199 assert (! "Invalid extended matching operator"); |
| 1200 break; |
| 1201 } |
| 1202 |
| 1203 return -1; |
| 1204 } |
| 1205 |
| 1206 |
| 1207 #undef FOLD |
| 1208 #undef CHAR |
| 1209 #undef UCHAR |
| 1210 #undef INT |
| 1211 #undef FCT |
| 1212 #undef EXT |
| 1213 #undef END |
| 1214 #undef MEMPCPY |
| 1215 #undef MEMCHR |
| 1216 #undef STRLEN |
| 1217 #undef STRCAT |
| 1218 #undef L_ |
| 1219 #undef BTOWC |
OLD | NEW |