Index: base/strings/pattern.cc |
diff --git a/base/strings/pattern.cc b/base/strings/pattern.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..56915fe9f383bfca7a5df2760131de31a35f962e |
--- /dev/null |
+++ b/base/strings/pattern.cc |
@@ -0,0 +1,171 @@ |
+// Copyright 2015 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include "base/strings/pattern.h" |
+ |
+#include "base/third_party/icu/icu_utf.h" |
+ |
+namespace base { |
+ |
+namespace { |
+ |
+static bool IsWildcard(base_icu::UChar32 character) { |
+ return character == '*' || character == '?'; |
+} |
+ |
+// Move the strings pointers to the point where they start to differ. |
+template <typename CHAR, typename NEXT> |
+static void EatSameChars(const CHAR** pattern, |
+ const CHAR* pattern_end, |
+ const CHAR** string, |
+ const CHAR* string_end, |
+ NEXT next) { |
+ const CHAR* escape = NULL; |
+ while (*pattern != pattern_end && *string != string_end) { |
+ if (!escape && IsWildcard(**pattern)) { |
+ // We don't want to match wildcard here, except if it's escaped. |
+ return; |
+ } |
+ |
+ // Check if the escapement char is found. If so, skip it and move to the |
+ // next character. |
+ if (!escape && **pattern == '\\') { |
+ escape = *pattern; |
+ next(pattern, pattern_end); |
+ continue; |
+ } |
+ |
+ // Check if the chars match, if so, increment the ptrs. |
+ const CHAR* pattern_next = *pattern; |
+ const CHAR* string_next = *string; |
+ base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end); |
+ if (pattern_char == next(&string_next, string_end) && |
+ pattern_char != CBU_SENTINEL) { |
+ *pattern = pattern_next; |
+ *string = string_next; |
+ } else { |
+ // Uh oh, it did not match, we are done. If the last char was an |
+ // escapement, that means that it was an error to advance the ptr here, |
+ // let's put it back where it was. This also mean that the MatchPattern |
+ // function will return false because if we can't match an escape char |
+ // here, then no one will. |
+ if (escape) { |
+ *pattern = escape; |
+ } |
+ return; |
+ } |
+ |
+ escape = NULL; |
+ } |
+} |
+ |
+template <typename CHAR, typename NEXT> |
+static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) { |
+ while (*pattern != end) { |
+ if (!IsWildcard(**pattern)) |
+ return; |
+ next(pattern, end); |
+ } |
+} |
+ |
+template <typename CHAR, typename NEXT> |
+static bool MatchPatternT(const CHAR* eval, |
+ const CHAR* eval_end, |
+ const CHAR* pattern, |
+ const CHAR* pattern_end, |
+ int depth, |
+ NEXT next) { |
+ const int kMaxDepth = 16; |
+ if (depth > kMaxDepth) |
+ return false; |
+ |
+ // Eat all the matching chars. |
+ EatSameChars(&pattern, pattern_end, &eval, eval_end, next); |
+ |
+ // If the string is empty, then the pattern must be empty too, or contains |
+ // only wildcards. |
+ if (eval == eval_end) { |
+ EatWildcard(&pattern, pattern_end, next); |
+ return pattern == pattern_end; |
+ } |
+ |
+ // Pattern is empty but not string, this is not a match. |
+ if (pattern == pattern_end) |
+ return false; |
+ |
+ // If this is a question mark, then we need to compare the rest with |
+ // the current string or the string with one character eaten. |
+ const CHAR* next_pattern = pattern; |
+ next(&next_pattern, pattern_end); |
+ if (pattern[0] == '?') { |
+ if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, depth + 1, |
+ next)) |
+ return true; |
+ const CHAR* next_eval = eval; |
+ next(&next_eval, eval_end); |
+ if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end, depth + 1, |
+ next)) |
+ return true; |
+ } |
+ |
+ // This is a *, try to match all the possible substrings with the remainder |
+ // of the pattern. |
+ if (pattern[0] == '*') { |
+ // Collapse duplicate wild cards (********** into *) so that the |
+ // method does not recurse unnecessarily. http://crbug.com/52839 |
+ EatWildcard(&next_pattern, pattern_end, next); |
+ |
+ while (eval != eval_end) { |
+ if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, depth + 1, |
+ next)) |
+ return true; |
+ eval++; |
+ } |
+ |
+ // We reached the end of the string, let see if the pattern contains only |
+ // wildcards. |
+ if (eval == eval_end) { |
+ EatWildcard(&pattern, pattern_end, next); |
+ if (pattern != pattern_end) |
+ return false; |
+ return true; |
+ } |
+ } |
+ |
+ return false; |
+} |
+ |
+struct NextCharUTF8 { |
+ base_icu::UChar32 operator()(const char** p, const char* end) { |
+ base_icu::UChar32 c; |
+ int offset = 0; |
+ CBU8_NEXT(*p, offset, end - *p, c); |
+ *p += offset; |
+ return c; |
+ } |
+}; |
+ |
+struct NextCharUTF16 { |
+ base_icu::UChar32 operator()(const char16** p, const char16* end) { |
+ base_icu::UChar32 c; |
+ int offset = 0; |
+ CBU16_NEXT(*p, offset, end - *p, c); |
+ *p += offset; |
+ return c; |
+ } |
+}; |
+ |
+} // namespace |
+ |
+bool MatchPattern(const StringPiece& eval, const StringPiece& pattern) { |
+ return MatchPatternT(eval.data(), eval.data() + eval.size(), pattern.data(), |
+ pattern.data() + pattern.size(), 0, NextCharUTF8()); |
+} |
+ |
+bool MatchPattern(const StringPiece16& eval, const StringPiece16& pattern) { |
+ return MatchPatternT(eval.data(), eval.data() + eval.size(), pattern.data(), |
+ pattern.data() + pattern.size(), 0, NextCharUTF16()); |
+} |
+ |
+} // namespace base |