OLD | NEW |
(Empty) | |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "base/strings/pattern.h" |
| 6 |
| 7 #include "base/third_party/icu/icu_utf.h" |
| 8 |
| 9 namespace base { |
| 10 |
| 11 namespace { |
| 12 |
| 13 static bool IsWildcard(base_icu::UChar32 character) { |
| 14 return character == '*' || character == '?'; |
| 15 } |
| 16 |
| 17 // Move the strings pointers to the point where they start to differ. |
| 18 template <typename CHAR, typename NEXT> |
| 19 static void EatSameChars(const CHAR** pattern, |
| 20 const CHAR* pattern_end, |
| 21 const CHAR** string, |
| 22 const CHAR* string_end, |
| 23 NEXT next) { |
| 24 const CHAR* escape = NULL; |
| 25 while (*pattern != pattern_end && *string != string_end) { |
| 26 if (!escape && IsWildcard(**pattern)) { |
| 27 // We don't want to match wildcard here, except if it's escaped. |
| 28 return; |
| 29 } |
| 30 |
| 31 // Check if the escapement char is found. If so, skip it and move to the |
| 32 // next character. |
| 33 if (!escape && **pattern == '\\') { |
| 34 escape = *pattern; |
| 35 next(pattern, pattern_end); |
| 36 continue; |
| 37 } |
| 38 |
| 39 // Check if the chars match, if so, increment the ptrs. |
| 40 const CHAR* pattern_next = *pattern; |
| 41 const CHAR* string_next = *string; |
| 42 base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end); |
| 43 if (pattern_char == next(&string_next, string_end) && |
| 44 pattern_char != CBU_SENTINEL) { |
| 45 *pattern = pattern_next; |
| 46 *string = string_next; |
| 47 } else { |
| 48 // Uh oh, it did not match, we are done. If the last char was an |
| 49 // escapement, that means that it was an error to advance the ptr here, |
| 50 // let's put it back where it was. This also mean that the MatchPattern |
| 51 // function will return false because if we can't match an escape char |
| 52 // here, then no one will. |
| 53 if (escape) { |
| 54 *pattern = escape; |
| 55 } |
| 56 return; |
| 57 } |
| 58 |
| 59 escape = NULL; |
| 60 } |
| 61 } |
| 62 |
| 63 template <typename CHAR, typename NEXT> |
| 64 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) { |
| 65 while (*pattern != end) { |
| 66 if (!IsWildcard(**pattern)) |
| 67 return; |
| 68 next(pattern, end); |
| 69 } |
| 70 } |
| 71 |
| 72 template <typename CHAR, typename NEXT> |
| 73 static bool MatchPatternT(const CHAR* eval, |
| 74 const CHAR* eval_end, |
| 75 const CHAR* pattern, |
| 76 const CHAR* pattern_end, |
| 77 int depth, |
| 78 NEXT next) { |
| 79 const int kMaxDepth = 16; |
| 80 if (depth > kMaxDepth) |
| 81 return false; |
| 82 |
| 83 // Eat all the matching chars. |
| 84 EatSameChars(&pattern, pattern_end, &eval, eval_end, next); |
| 85 |
| 86 // If the string is empty, then the pattern must be empty too, or contains |
| 87 // only wildcards. |
| 88 if (eval == eval_end) { |
| 89 EatWildcard(&pattern, pattern_end, next); |
| 90 return pattern == pattern_end; |
| 91 } |
| 92 |
| 93 // Pattern is empty but not string, this is not a match. |
| 94 if (pattern == pattern_end) |
| 95 return false; |
| 96 |
| 97 // If this is a question mark, then we need to compare the rest with |
| 98 // the current string or the string with one character eaten. |
| 99 const CHAR* next_pattern = pattern; |
| 100 next(&next_pattern, pattern_end); |
| 101 if (pattern[0] == '?') { |
| 102 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, depth + 1, |
| 103 next)) |
| 104 return true; |
| 105 const CHAR* next_eval = eval; |
| 106 next(&next_eval, eval_end); |
| 107 if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end, depth + 1, |
| 108 next)) |
| 109 return true; |
| 110 } |
| 111 |
| 112 // This is a *, try to match all the possible substrings with the remainder |
| 113 // of the pattern. |
| 114 if (pattern[0] == '*') { |
| 115 // Collapse duplicate wild cards (********** into *) so that the |
| 116 // method does not recurse unnecessarily. http://crbug.com/52839 |
| 117 EatWildcard(&next_pattern, pattern_end, next); |
| 118 |
| 119 while (eval != eval_end) { |
| 120 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, depth + 1, |
| 121 next)) |
| 122 return true; |
| 123 eval++; |
| 124 } |
| 125 |
| 126 // We reached the end of the string, let see if the pattern contains only |
| 127 // wildcards. |
| 128 if (eval == eval_end) { |
| 129 EatWildcard(&pattern, pattern_end, next); |
| 130 if (pattern != pattern_end) |
| 131 return false; |
| 132 return true; |
| 133 } |
| 134 } |
| 135 |
| 136 return false; |
| 137 } |
| 138 |
| 139 struct NextCharUTF8 { |
| 140 base_icu::UChar32 operator()(const char** p, const char* end) { |
| 141 base_icu::UChar32 c; |
| 142 int offset = 0; |
| 143 CBU8_NEXT(*p, offset, end - *p, c); |
| 144 *p += offset; |
| 145 return c; |
| 146 } |
| 147 }; |
| 148 |
| 149 struct NextCharUTF16 { |
| 150 base_icu::UChar32 operator()(const char16** p, const char16* end) { |
| 151 base_icu::UChar32 c; |
| 152 int offset = 0; |
| 153 CBU16_NEXT(*p, offset, end - *p, c); |
| 154 *p += offset; |
| 155 return c; |
| 156 } |
| 157 }; |
| 158 |
| 159 } // namespace |
| 160 |
| 161 bool MatchPattern(const StringPiece& eval, const StringPiece& pattern) { |
| 162 return MatchPatternT(eval.data(), eval.data() + eval.size(), pattern.data(), |
| 163 pattern.data() + pattern.size(), 0, NextCharUTF8()); |
| 164 } |
| 165 |
| 166 bool MatchPattern(const StringPiece16& eval, const StringPiece16& pattern) { |
| 167 return MatchPatternT(eval.data(), eval.data() + eval.size(), pattern.data(), |
| 168 pattern.data() + pattern.size(), 0, NextCharUTF16()); |
| 169 } |
| 170 |
| 171 } // namespace base |
OLD | NEW |