base/strings/pattern.cc - Issue 1641513004: Update //base to chromium 9659b08ea5a34f889dc4166217f438095ddc10d2

Unified Diff: base/strings/pattern.cc

Issue 1641513004: Update //base to chromium 9659b08ea5a34f889dc4166217f438095ddc10d2 (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: base/strings/pattern.cc

diff --git a/base/strings/pattern.cc b/base/strings/pattern.cc

new file mode 100644

index 0000000000000000000000000000000000000000..56915fe9f383bfca7a5df2760131de31a35f962e

--- /dev/null

+++ b/base/strings/pattern.cc

@@ -0,0 +1,171 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#include "base/strings/pattern.h"

+#include "base/third_party/icu/icu_utf.h"

+namespace base {

+namespace {

+static bool IsWildcard(base_icu::UChar32 character) {

+ return character == '*' || character == '?';

+// Move the strings pointers to the point where they start to differ.

+template <typename CHAR, typename NEXT>

+static void EatSameChars(const CHAR** pattern,

+ const CHAR* pattern_end,

+ const CHAR** string,

+ const CHAR* string_end,

+ NEXT next) {

+ const CHAR* escape = NULL;

+ while (*pattern != pattern_end && *string != string_end) {

+ if (!escape && IsWildcard(**pattern)) {

+ // We don't want to match wildcard here, except if it's escaped.

+ return;

+ }

+ // Check if the escapement char is found. If so, skip it and move to the

+ // next character.

+ if (!escape && **pattern == '\\') {

+ escape = *pattern;

+ next(pattern, pattern_end);

+ continue;

+ }

+ // Check if the chars match, if so, increment the ptrs.

+ const CHAR* pattern_next = *pattern;

+ const CHAR* string_next = *string;

+ base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);

+ if (pattern_char == next(&string_next, string_end) &&

+ pattern_char != CBU_SENTINEL) {

+ *pattern = pattern_next;

+ *string = string_next;

+ } else {

+ // Uh oh, it did not match, we are done. If the last char was an

+ // escapement, that means that it was an error to advance the ptr here,

+ // let's put it back where it was. This also mean that the MatchPattern

+ // function will return false because if we can't match an escape char

+ // here, then no one will.

+ if (escape) {

+ *pattern = escape;

+ }

+ return;

+ }

+ escape = NULL;

+ }

+template <typename CHAR, typename NEXT>

+static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {

+ while (*pattern != end) {

+ if (!IsWildcard(**pattern))

+ return;

+ next(pattern, end);

+ }

+template <typename CHAR, typename NEXT>

+static bool MatchPatternT(const CHAR* eval,

+ const CHAR* eval_end,

+ const CHAR* pattern,

+ const CHAR* pattern_end,

+ int depth,

+ NEXT next) {

+ const int kMaxDepth = 16;

+ if (depth > kMaxDepth)

+ return false;

+ // Eat all the matching chars.

+ EatSameChars(&pattern, pattern_end, &eval, eval_end, next);

+ // If the string is empty, then the pattern must be empty too, or contains

+ // only wildcards.

+ if (eval == eval_end) {

+ EatWildcard(&pattern, pattern_end, next);

+ return pattern == pattern_end;

+ }

+ // Pattern is empty but not string, this is not a match.

+ if (pattern == pattern_end)

+ return false;

+ // If this is a question mark, then we need to compare the rest with

+ // the current string or the string with one character eaten.

+ const CHAR* next_pattern = pattern;

+ next(&next_pattern, pattern_end);

+ if (pattern[0] == '?') {

+ if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, depth + 1,

+ next))

+ return true;

+ const CHAR* next_eval = eval;

+ next(&next_eval, eval_end);

+ if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end, depth + 1,

+ next))

+ return true;

+ }

+ // This is a *, try to match all the possible substrings with the remainder

+ // of the pattern.

+ if (pattern[0] == '*') {

+ // Collapse duplicate wild cards (********** into *) so that the

+ // method does not recurse unnecessarily. http://crbug.com/52839

+ EatWildcard(&next_pattern, pattern_end, next);

+ while (eval != eval_end) {

+ if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, depth + 1,

+ next))

+ return true;

+ eval++;

+ }

+ // We reached the end of the string, let see if the pattern contains only

+ // wildcards.

+ if (eval == eval_end) {

+ EatWildcard(&pattern, pattern_end, next);

+ if (pattern != pattern_end)

+ return false;

+ return true;

+ }

+ return false;

+struct NextCharUTF8 {

+ base_icu::UChar32 operator()(const char** p, const char* end) {

+ base_icu::UChar32 c;

+ int offset = 0;

+ CBU8_NEXT(*p, offset, end - *p, c);

+ *p += offset;

+ return c;

+ }

+};

+struct NextCharUTF16 {

+ base_icu::UChar32 operator()(const char16** p, const char16* end) {

+ base_icu::UChar32 c;

+ int offset = 0;

+ CBU16_NEXT(*p, offset, end - *p, c);

+ *p += offset;

+ return c;

+ }

+};

+} // namespace

+bool MatchPattern(const StringPiece& eval, const StringPiece& pattern) {

+ return MatchPatternT(eval.data(), eval.data() + eval.size(), pattern.data(),

+ pattern.data() + pattern.size(), 0, NextCharUTF8());

+bool MatchPattern(const StringPiece16& eval, const StringPiece16& pattern) {

+ return MatchPatternT(eval.data(), eval.data() + eval.size(), pattern.data(),

+ pattern.data() + pattern.size(), 0, NextCharUTF16());

+} // namespace base

« no previous file with comments | « base/strings/pattern.h ('k') | base/strings/pattern_unittest.cc » ('j') | no next file with comments »