| OLD | NEW |
| (Empty) |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "base/strings/pattern.h" | |
| 6 | |
| 7 #include "base/third_party/icu/icu_utf.h" | |
| 8 | |
| 9 namespace base { | |
| 10 | |
| 11 namespace { | |
| 12 | |
| 13 static bool IsWildcard(base_icu::UChar32 character) { | |
| 14 return character == '*' || character == '?'; | |
| 15 } | |
| 16 | |
| 17 // Move the strings pointers to the point where they start to differ. | |
| 18 template <typename CHAR, typename NEXT> | |
| 19 static void EatSameChars(const CHAR** pattern, | |
| 20 const CHAR* pattern_end, | |
| 21 const CHAR** string, | |
| 22 const CHAR* string_end, | |
| 23 NEXT next) { | |
| 24 const CHAR* escape = NULL; | |
| 25 while (*pattern != pattern_end && *string != string_end) { | |
| 26 if (!escape && IsWildcard(**pattern)) { | |
| 27 // We don't want to match wildcard here, except if it's escaped. | |
| 28 return; | |
| 29 } | |
| 30 | |
| 31 // Check if the escapement char is found. If so, skip it and move to the | |
| 32 // next character. | |
| 33 if (!escape && **pattern == '\\') { | |
| 34 escape = *pattern; | |
| 35 next(pattern, pattern_end); | |
| 36 continue; | |
| 37 } | |
| 38 | |
| 39 // Check if the chars match, if so, increment the ptrs. | |
| 40 const CHAR* pattern_next = *pattern; | |
| 41 const CHAR* string_next = *string; | |
| 42 base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end); | |
| 43 if (pattern_char == next(&string_next, string_end) && | |
| 44 pattern_char != CBU_SENTINEL) { | |
| 45 *pattern = pattern_next; | |
| 46 *string = string_next; | |
| 47 } else { | |
| 48 // Uh oh, it did not match, we are done. If the last char was an | |
| 49 // escapement, that means that it was an error to advance the ptr here, | |
| 50 // let's put it back where it was. This also mean that the MatchPattern | |
| 51 // function will return false because if we can't match an escape char | |
| 52 // here, then no one will. | |
| 53 if (escape) { | |
| 54 *pattern = escape; | |
| 55 } | |
| 56 return; | |
| 57 } | |
| 58 | |
| 59 escape = NULL; | |
| 60 } | |
| 61 } | |
| 62 | |
| 63 template <typename CHAR, typename NEXT> | |
| 64 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) { | |
| 65 while (*pattern != end) { | |
| 66 if (!IsWildcard(**pattern)) | |
| 67 return; | |
| 68 next(pattern, end); | |
| 69 } | |
| 70 } | |
| 71 | |
| 72 template <typename CHAR, typename NEXT> | |
| 73 static bool MatchPatternT(const CHAR* eval, | |
| 74 const CHAR* eval_end, | |
| 75 const CHAR* pattern, | |
| 76 const CHAR* pattern_end, | |
| 77 int depth, | |
| 78 NEXT next) { | |
| 79 const int kMaxDepth = 16; | |
| 80 if (depth > kMaxDepth) | |
| 81 return false; | |
| 82 | |
| 83 // Eat all the matching chars. | |
| 84 EatSameChars(&pattern, pattern_end, &eval, eval_end, next); | |
| 85 | |
| 86 // If the string is empty, then the pattern must be empty too, or contains | |
| 87 // only wildcards. | |
| 88 if (eval == eval_end) { | |
| 89 EatWildcard(&pattern, pattern_end, next); | |
| 90 return pattern == pattern_end; | |
| 91 } | |
| 92 | |
| 93 // Pattern is empty but not string, this is not a match. | |
| 94 if (pattern == pattern_end) | |
| 95 return false; | |
| 96 | |
| 97 // If this is a question mark, then we need to compare the rest with | |
| 98 // the current string or the string with one character eaten. | |
| 99 const CHAR* next_pattern = pattern; | |
| 100 next(&next_pattern, pattern_end); | |
| 101 if (pattern[0] == '?') { | |
| 102 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, depth + 1, | |
| 103 next)) | |
| 104 return true; | |
| 105 const CHAR* next_eval = eval; | |
| 106 next(&next_eval, eval_end); | |
| 107 if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end, depth + 1, | |
| 108 next)) | |
| 109 return true; | |
| 110 } | |
| 111 | |
| 112 // This is a *, try to match all the possible substrings with the remainder | |
| 113 // of the pattern. | |
| 114 if (pattern[0] == '*') { | |
| 115 // Collapse duplicate wild cards (********** into *) so that the | |
| 116 // method does not recurse unnecessarily. http://crbug.com/52839 | |
| 117 EatWildcard(&next_pattern, pattern_end, next); | |
| 118 | |
| 119 while (eval != eval_end) { | |
| 120 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, depth + 1, | |
| 121 next)) | |
| 122 return true; | |
| 123 eval++; | |
| 124 } | |
| 125 | |
| 126 // We reached the end of the string, let see if the pattern contains only | |
| 127 // wildcards. | |
| 128 if (eval == eval_end) { | |
| 129 EatWildcard(&pattern, pattern_end, next); | |
| 130 if (pattern != pattern_end) | |
| 131 return false; | |
| 132 return true; | |
| 133 } | |
| 134 } | |
| 135 | |
| 136 return false; | |
| 137 } | |
| 138 | |
| 139 struct NextCharUTF8 { | |
| 140 base_icu::UChar32 operator()(const char** p, const char* end) { | |
| 141 base_icu::UChar32 c; | |
| 142 int offset = 0; | |
| 143 CBU8_NEXT(*p, offset, end - *p, c); | |
| 144 *p += offset; | |
| 145 return c; | |
| 146 } | |
| 147 }; | |
| 148 | |
| 149 struct NextCharUTF16 { | |
| 150 base_icu::UChar32 operator()(const char16** p, const char16* end) { | |
| 151 base_icu::UChar32 c; | |
| 152 int offset = 0; | |
| 153 CBU16_NEXT(*p, offset, end - *p, c); | |
| 154 *p += offset; | |
| 155 return c; | |
| 156 } | |
| 157 }; | |
| 158 | |
| 159 } // namespace | |
| 160 | |
| 161 bool MatchPattern(const StringPiece& eval, const StringPiece& pattern) { | |
| 162 return MatchPatternT(eval.data(), eval.data() + eval.size(), pattern.data(), | |
| 163 pattern.data() + pattern.size(), 0, NextCharUTF8()); | |
| 164 } | |
| 165 | |
| 166 bool MatchPattern(const StringPiece16& eval, const StringPiece16& pattern) { | |
| 167 return MatchPatternT(eval.data(), eval.data() + eval.size(), pattern.data(), | |
| 168 pattern.data() + pattern.size(), 0, NextCharUTF16()); | |
| 169 } | |
| 170 | |
| 171 } // namespace base | |
| OLD | NEW |