Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1391)

Unified Diff: base/strings/pattern.cc

Issue 1226673003: Move MatchPattern to its own header and the base namespace. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « base/strings/pattern.h ('k') | base/strings/pattern_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: base/strings/pattern.cc
diff --git a/base/strings/pattern.cc b/base/strings/pattern.cc
new file mode 100644
index 0000000000000000000000000000000000000000..8739c95637a462a1f860eeeb5a431f4062d31398
--- /dev/null
+++ b/base/strings/pattern.cc
@@ -0,0 +1,169 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/strings/pattern.h"
+
+#include "base/third_party/icu/icu_utf.h"
+
+namespace base {
+
+namespace {
+
+static bool IsWildcard(base_icu::UChar32 character) {
+ return character == '*' || character == '?';
+}
+
+// Move the strings pointers to the point where they start to differ.
+template <typename CHAR, typename NEXT>
+static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
+ const CHAR** string, const CHAR* string_end,
+ NEXT next) {
+ const CHAR* escape = NULL;
+ while (*pattern != pattern_end && *string != string_end) {
+ if (!escape && IsWildcard(**pattern)) {
+ // We don't want to match wildcard here, except if it's escaped.
+ return;
+ }
+
+ // Check if the escapement char is found. If so, skip it and move to the
+ // next character.
+ if (!escape && **pattern == '\\') {
+ escape = *pattern;
+ next(pattern, pattern_end);
+ continue;
+ }
+
+ // Check if the chars match, if so, increment the ptrs.
+ const CHAR* pattern_next = *pattern;
+ const CHAR* string_next = *string;
+ base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
+ if (pattern_char == next(&string_next, string_end) &&
+ pattern_char != CBU_SENTINEL) {
+ *pattern = pattern_next;
+ *string = string_next;
+ } else {
+ // Uh oh, it did not match, we are done. If the last char was an
+ // escapement, that means that it was an error to advance the ptr here,
+ // let's put it back where it was. This also mean that the MatchPattern
+ // function will return false because if we can't match an escape char
+ // here, then no one will.
+ if (escape) {
+ *pattern = escape;
+ }
+ return;
+ }
+
+ escape = NULL;
+ }
+}
+
+template <typename CHAR, typename NEXT>
+static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
+ while (*pattern != end) {
+ if (!IsWildcard(**pattern))
+ return;
+ next(pattern, end);
+ }
+}
+
+template <typename CHAR, typename NEXT>
+static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
+ const CHAR* pattern, const CHAR* pattern_end,
+ int depth,
+ NEXT next) {
+ const int kMaxDepth = 16;
+ if (depth > kMaxDepth)
+ return false;
+
+ // Eat all the matching chars.
+ EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
+
+ // If the string is empty, then the pattern must be empty too, or contains
+ // only wildcards.
+ if (eval == eval_end) {
+ EatWildcard(&pattern, pattern_end, next);
+ return pattern == pattern_end;
+ }
+
+ // Pattern is empty but not string, this is not a match.
+ if (pattern == pattern_end)
+ return false;
+
+ // If this is a question mark, then we need to compare the rest with
+ // the current string or the string with one character eaten.
+ const CHAR* next_pattern = pattern;
+ next(&next_pattern, pattern_end);
+ if (pattern[0] == '?') {
+ if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
+ depth + 1, next))
+ return true;
+ const CHAR* next_eval = eval;
+ next(&next_eval, eval_end);
+ if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
+ depth + 1, next))
+ return true;
+ }
+
+ // This is a *, try to match all the possible substrings with the remainder
+ // of the pattern.
+ if (pattern[0] == '*') {
+ // Collapse duplicate wild cards (********** into *) so that the
+ // method does not recurse unnecessarily. http://crbug.com/52839
+ EatWildcard(&next_pattern, pattern_end, next);
+
+ while (eval != eval_end) {
+ if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
+ depth + 1, next))
+ return true;
+ eval++;
+ }
+
+ // We reached the end of the string, let see if the pattern contains only
+ // wildcards.
+ if (eval == eval_end) {
+ EatWildcard(&pattern, pattern_end, next);
+ if (pattern != pattern_end)
+ return false;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+struct NextCharUTF8 {
+ base_icu::UChar32 operator()(const char** p, const char* end) {
+ base_icu::UChar32 c;
+ int offset = 0;
+ CBU8_NEXT(*p, offset, end - *p, c);
+ *p += offset;
+ return c;
+ }
+};
+
+struct NextCharUTF16 {
+ base_icu::UChar32 operator()(const char16** p, const char16* end) {
+ base_icu::UChar32 c;
+ int offset = 0;
+ CBU16_NEXT(*p, offset, end - *p, c);
+ *p += offset;
+ return c;
+ }
+};
+
+} // namespace
+
+bool MatchPattern(const StringPiece& eval, const StringPiece& pattern) {
+ return MatchPatternT(eval.data(), eval.data() + eval.size(),
+ pattern.data(), pattern.data() + pattern.size(),
+ 0, NextCharUTF8());
+}
+
+bool MatchPattern(const StringPiece16& eval, const StringPiece16& pattern) {
+ return MatchPatternT(eval.data(), eval.data() + eval.size(),
+ pattern.data(), pattern.data() + pattern.size(),
+ 0, NextCharUTF16());
+}
+
+} // namespace base
« no previous file with comments | « base/strings/pattern.h ('k') | base/strings/pattern_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698