| Index: base/strings/pattern.cc
|
| diff --git a/base/strings/pattern.cc b/base/strings/pattern.cc
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..8739c95637a462a1f860eeeb5a431f4062d31398
|
| --- /dev/null
|
| +++ b/base/strings/pattern.cc
|
| @@ -0,0 +1,169 @@
|
| +// Copyright 2015 The Chromium Authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| +
|
| +#include "base/strings/pattern.h"
|
| +
|
| +#include "base/third_party/icu/icu_utf.h"
|
| +
|
| +namespace base {
|
| +
|
| +namespace {
|
| +
|
| +static bool IsWildcard(base_icu::UChar32 character) {
|
| + return character == '*' || character == '?';
|
| +}
|
| +
|
| +// Move the strings pointers to the point where they start to differ.
|
| +template <typename CHAR, typename NEXT>
|
| +static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
|
| + const CHAR** string, const CHAR* string_end,
|
| + NEXT next) {
|
| + const CHAR* escape = NULL;
|
| + while (*pattern != pattern_end && *string != string_end) {
|
| + if (!escape && IsWildcard(**pattern)) {
|
| + // We don't want to match wildcard here, except if it's escaped.
|
| + return;
|
| + }
|
| +
|
| + // Check if the escapement char is found. If so, skip it and move to the
|
| + // next character.
|
| + if (!escape && **pattern == '\\') {
|
| + escape = *pattern;
|
| + next(pattern, pattern_end);
|
| + continue;
|
| + }
|
| +
|
| + // Check if the chars match, if so, increment the ptrs.
|
| + const CHAR* pattern_next = *pattern;
|
| + const CHAR* string_next = *string;
|
| + base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
|
| + if (pattern_char == next(&string_next, string_end) &&
|
| + pattern_char != CBU_SENTINEL) {
|
| + *pattern = pattern_next;
|
| + *string = string_next;
|
| + } else {
|
| + // Uh oh, it did not match, we are done. If the last char was an
|
| + // escapement, that means that it was an error to advance the ptr here,
|
| + // let's put it back where it was. This also mean that the MatchPattern
|
| + // function will return false because if we can't match an escape char
|
| + // here, then no one will.
|
| + if (escape) {
|
| + *pattern = escape;
|
| + }
|
| + return;
|
| + }
|
| +
|
| + escape = NULL;
|
| + }
|
| +}
|
| +
|
| +template <typename CHAR, typename NEXT>
|
| +static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
|
| + while (*pattern != end) {
|
| + if (!IsWildcard(**pattern))
|
| + return;
|
| + next(pattern, end);
|
| + }
|
| +}
|
| +
|
| +template <typename CHAR, typename NEXT>
|
| +static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
|
| + const CHAR* pattern, const CHAR* pattern_end,
|
| + int depth,
|
| + NEXT next) {
|
| + const int kMaxDepth = 16;
|
| + if (depth > kMaxDepth)
|
| + return false;
|
| +
|
| + // Eat all the matching chars.
|
| + EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
|
| +
|
| + // If the string is empty, then the pattern must be empty too, or contains
|
| + // only wildcards.
|
| + if (eval == eval_end) {
|
| + EatWildcard(&pattern, pattern_end, next);
|
| + return pattern == pattern_end;
|
| + }
|
| +
|
| + // Pattern is empty but not string, this is not a match.
|
| + if (pattern == pattern_end)
|
| + return false;
|
| +
|
| + // If this is a question mark, then we need to compare the rest with
|
| + // the current string or the string with one character eaten.
|
| + const CHAR* next_pattern = pattern;
|
| + next(&next_pattern, pattern_end);
|
| + if (pattern[0] == '?') {
|
| + if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
|
| + depth + 1, next))
|
| + return true;
|
| + const CHAR* next_eval = eval;
|
| + next(&next_eval, eval_end);
|
| + if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
|
| + depth + 1, next))
|
| + return true;
|
| + }
|
| +
|
| + // This is a *, try to match all the possible substrings with the remainder
|
| + // of the pattern.
|
| + if (pattern[0] == '*') {
|
| + // Collapse duplicate wild cards (********** into *) so that the
|
| + // method does not recurse unnecessarily. http://crbug.com/52839
|
| + EatWildcard(&next_pattern, pattern_end, next);
|
| +
|
| + while (eval != eval_end) {
|
| + if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
|
| + depth + 1, next))
|
| + return true;
|
| + eval++;
|
| + }
|
| +
|
| + // We reached the end of the string, let see if the pattern contains only
|
| + // wildcards.
|
| + if (eval == eval_end) {
|
| + EatWildcard(&pattern, pattern_end, next);
|
| + if (pattern != pattern_end)
|
| + return false;
|
| + return true;
|
| + }
|
| + }
|
| +
|
| + return false;
|
| +}
|
| +
|
| +struct NextCharUTF8 {
|
| + base_icu::UChar32 operator()(const char** p, const char* end) {
|
| + base_icu::UChar32 c;
|
| + int offset = 0;
|
| + CBU8_NEXT(*p, offset, end - *p, c);
|
| + *p += offset;
|
| + return c;
|
| + }
|
| +};
|
| +
|
| +struct NextCharUTF16 {
|
| + base_icu::UChar32 operator()(const char16** p, const char16* end) {
|
| + base_icu::UChar32 c;
|
| + int offset = 0;
|
| + CBU16_NEXT(*p, offset, end - *p, c);
|
| + *p += offset;
|
| + return c;
|
| + }
|
| +};
|
| +
|
| +} // namespace
|
| +
|
| +bool MatchPattern(const StringPiece& eval, const StringPiece& pattern) {
|
| + return MatchPatternT(eval.data(), eval.data() + eval.size(),
|
| + pattern.data(), pattern.data() + pattern.size(),
|
| + 0, NextCharUTF8());
|
| +}
|
| +
|
| +bool MatchPattern(const StringPiece16& eval, const StringPiece16& pattern) {
|
| + return MatchPatternT(eval.data(), eval.data() + eval.size(),
|
| + pattern.data(), pattern.data() + pattern.size(),
|
| + 0, NextCharUTF16());
|
| +}
|
| +
|
| +} // namespace base
|
|
|