OLD | NEW |
(Empty) | |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "base/strings/pattern.h" |
| 6 |
| 7 #include "base/third_party/icu/icu_utf.h" |
| 8 |
| 9 namespace base { |
| 10 |
| 11 namespace { |
| 12 |
| 13 static bool IsWildcard(base_icu::UChar32 character) { |
| 14 return character == '*' || character == '?'; |
| 15 } |
| 16 |
| 17 // Move the strings pointers to the point where they start to differ. |
| 18 template <typename CHAR, typename NEXT> |
| 19 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end, |
| 20 const CHAR** string, const CHAR* string_end, |
| 21 NEXT next) { |
| 22 const CHAR* escape = NULL; |
| 23 while (*pattern != pattern_end && *string != string_end) { |
| 24 if (!escape && IsWildcard(**pattern)) { |
| 25 // We don't want to match wildcard here, except if it's escaped. |
| 26 return; |
| 27 } |
| 28 |
| 29 // Check if the escapement char is found. If so, skip it and move to the |
| 30 // next character. |
| 31 if (!escape && **pattern == '\\') { |
| 32 escape = *pattern; |
| 33 next(pattern, pattern_end); |
| 34 continue; |
| 35 } |
| 36 |
| 37 // Check if the chars match, if so, increment the ptrs. |
| 38 const CHAR* pattern_next = *pattern; |
| 39 const CHAR* string_next = *string; |
| 40 base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end); |
| 41 if (pattern_char == next(&string_next, string_end) && |
| 42 pattern_char != CBU_SENTINEL) { |
| 43 *pattern = pattern_next; |
| 44 *string = string_next; |
| 45 } else { |
| 46 // Uh oh, it did not match, we are done. If the last char was an |
| 47 // escapement, that means that it was an error to advance the ptr here, |
| 48 // let's put it back where it was. This also mean that the MatchPattern |
| 49 // function will return false because if we can't match an escape char |
| 50 // here, then no one will. |
| 51 if (escape) { |
| 52 *pattern = escape; |
| 53 } |
| 54 return; |
| 55 } |
| 56 |
| 57 escape = NULL; |
| 58 } |
| 59 } |
| 60 |
| 61 template <typename CHAR, typename NEXT> |
| 62 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) { |
| 63 while (*pattern != end) { |
| 64 if (!IsWildcard(**pattern)) |
| 65 return; |
| 66 next(pattern, end); |
| 67 } |
| 68 } |
| 69 |
| 70 template <typename CHAR, typename NEXT> |
| 71 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end, |
| 72 const CHAR* pattern, const CHAR* pattern_end, |
| 73 int depth, |
| 74 NEXT next) { |
| 75 const int kMaxDepth = 16; |
| 76 if (depth > kMaxDepth) |
| 77 return false; |
| 78 |
| 79 // Eat all the matching chars. |
| 80 EatSameChars(&pattern, pattern_end, &eval, eval_end, next); |
| 81 |
| 82 // If the string is empty, then the pattern must be empty too, or contains |
| 83 // only wildcards. |
| 84 if (eval == eval_end) { |
| 85 EatWildcard(&pattern, pattern_end, next); |
| 86 return pattern == pattern_end; |
| 87 } |
| 88 |
| 89 // Pattern is empty but not string, this is not a match. |
| 90 if (pattern == pattern_end) |
| 91 return false; |
| 92 |
| 93 // If this is a question mark, then we need to compare the rest with |
| 94 // the current string or the string with one character eaten. |
| 95 const CHAR* next_pattern = pattern; |
| 96 next(&next_pattern, pattern_end); |
| 97 if (pattern[0] == '?') { |
| 98 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, |
| 99 depth + 1, next)) |
| 100 return true; |
| 101 const CHAR* next_eval = eval; |
| 102 next(&next_eval, eval_end); |
| 103 if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end, |
| 104 depth + 1, next)) |
| 105 return true; |
| 106 } |
| 107 |
| 108 // This is a *, try to match all the possible substrings with the remainder |
| 109 // of the pattern. |
| 110 if (pattern[0] == '*') { |
| 111 // Collapse duplicate wild cards (********** into *) so that the |
| 112 // method does not recurse unnecessarily. http://crbug.com/52839 |
| 113 EatWildcard(&next_pattern, pattern_end, next); |
| 114 |
| 115 while (eval != eval_end) { |
| 116 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, |
| 117 depth + 1, next)) |
| 118 return true; |
| 119 eval++; |
| 120 } |
| 121 |
| 122 // We reached the end of the string, let see if the pattern contains only |
| 123 // wildcards. |
| 124 if (eval == eval_end) { |
| 125 EatWildcard(&pattern, pattern_end, next); |
| 126 if (pattern != pattern_end) |
| 127 return false; |
| 128 return true; |
| 129 } |
| 130 } |
| 131 |
| 132 return false; |
| 133 } |
| 134 |
| 135 struct NextCharUTF8 { |
| 136 base_icu::UChar32 operator()(const char** p, const char* end) { |
| 137 base_icu::UChar32 c; |
| 138 int offset = 0; |
| 139 CBU8_NEXT(*p, offset, end - *p, c); |
| 140 *p += offset; |
| 141 return c; |
| 142 } |
| 143 }; |
| 144 |
| 145 struct NextCharUTF16 { |
| 146 base_icu::UChar32 operator()(const char16** p, const char16* end) { |
| 147 base_icu::UChar32 c; |
| 148 int offset = 0; |
| 149 CBU16_NEXT(*p, offset, end - *p, c); |
| 150 *p += offset; |
| 151 return c; |
| 152 } |
| 153 }; |
| 154 |
| 155 } // namespace |
| 156 |
| 157 bool MatchPattern(const StringPiece& eval, const StringPiece& pattern) { |
| 158 return MatchPatternT(eval.data(), eval.data() + eval.size(), |
| 159 pattern.data(), pattern.data() + pattern.size(), |
| 160 0, NextCharUTF8()); |
| 161 } |
| 162 |
| 163 bool MatchPattern(const StringPiece16& eval, const StringPiece16& pattern) { |
| 164 return MatchPatternT(eval.data(), eval.data() + eval.size(), |
| 165 pattern.data(), pattern.data() + pattern.size(), |
| 166 0, NextCharUTF16()); |
| 167 } |
| 168 |
| 169 } // namespace base |
OLD | NEW |