base/strings/string_util.cc - Issue 1226673003: Move MatchPattern to its own header and the base namespace.

Side by Side Diff: base/strings/string_util.cc

Issue 1226673003: Move MatchPattern to its own header and the base namespace. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 5 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2013 The Chromium Authors. All rights reserved.	1 // Copyright 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "base/strings/string_util.h"	5 #include "base/strings/string_util.h"

6	6

7 #include <ctype.h>	7 #include <ctype.h>

8 #include <errno.h>	8 #include <errno.h>

9 #include <math.h>	9 #include <math.h>

10 #include <stdarg.h>	10 #include <stdarg.h>

(...skipping 930 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
941 std::vector<string16> subst;	941 std::vector<string16> subst;

942 subst.push_back(a);	942 subst.push_back(a);

943 string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);	943 string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);

944	944

945 DCHECK_EQ(1U, offsets.size());	945 DCHECK_EQ(1U, offsets.size());

946 if (offset)	946 if (offset)

947 *offset = offsets[0];	947 *offset = offsets[0];

948 return result;	948 return result;

949 }	949 }

950	950

951 static bool IsWildcard(base_icu::UChar32 character) {

952 return character == '*' \|\| character == '?';

953 }

954

955 // Move the strings pointers to the point where they start to differ.

956 template <typename CHAR, typename NEXT>

957 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,

958 const CHAR** string, const CHAR* string_end,

959 NEXT next) {

960 const CHAR* escape = NULL;

961 while (pattern != pattern_end && string != string_end) {

962 if (!escape && IsWildcard(**pattern)) {

963 // We don't want to match wildcard here, except if it's escaped.

964 return;

965 }

966

967 // Check if the escapement char is found. If so, skip it and move to the

968 // next character.

969 if (!escape && **pattern == '\\') {

970 escape = *pattern;

971 next(pattern, pattern_end);

972 continue;

973 }

974

975 // Check if the chars match, if so, increment the ptrs.

976 const CHAR* pattern_next = *pattern;

977 const CHAR* string_next = *string;

978 base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);

979 if (pattern_char == next(&string_next, string_end) &&

980 pattern_char != CBU_SENTINEL) {

981 *pattern = pattern_next;

982 *string = string_next;

983 } else {

984 // Uh oh, it did not match, we are done. If the last char was an

985 // escapement, that means that it was an error to advance the ptr here,

986 // let's put it back where it was. This also mean that the MatchPattern

987 // function will return false because if we can't match an escape char

988 // here, then no one will.

989 if (escape) {

990 *pattern = escape;

991 }

992 return;

993 }

994

995 escape = NULL;

996 }

997 }

998

999 template <typename CHAR, typename NEXT>

1000 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {

1001 while (*pattern != end) {

1002 if (!IsWildcard(**pattern))

1003 return;

1004 next(pattern, end);

1005 }

1006 }

1007

1008 template <typename CHAR, typename NEXT>

1009 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,

1010 const CHAR* pattern, const CHAR* pattern_end,

1011 int depth,

1012 NEXT next) {

1013 const int kMaxDepth = 16;

1014 if (depth > kMaxDepth)

1015 return false;

1016

1017 // Eat all the matching chars.

1018 EatSameChars(&pattern, pattern_end, &eval, eval_end, next);

1019

1020 // If the string is empty, then the pattern must be empty too, or contains

1021 // only wildcards.

1022 if (eval == eval_end) {

1023 EatWildcard(&pattern, pattern_end, next);

1024 return pattern == pattern_end;

1025 }

1026

1027 // Pattern is empty but not string, this is not a match.

1028 if (pattern == pattern_end)

1029 return false;

1030

1031 // If this is a question mark, then we need to compare the rest with

1032 // the current string or the string with one character eaten.

1033 const CHAR* next_pattern = pattern;

1034 next(&next_pattern, pattern_end);

1035 if (pattern[0] == '?') {

1036 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,

1037 depth + 1, next))

1038 return true;

1039 const CHAR* next_eval = eval;

1040 next(&next_eval, eval_end);

1041 if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,

1042 depth + 1, next))

1043 return true;

1044 }

1045

1046 // This is a *, try to match all the possible substrings with the remainder

1047 // of the pattern.

1048 if (pattern[0] == '*') {

1049 // Collapse duplicate wild cards (********** into *) so that the

1050 // method does not recurse unnecessarily. http://crbug.com/52839

1051 EatWildcard(&next_pattern, pattern_end, next);

1052

1053 while (eval != eval_end) {

1054 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,

1055 depth + 1, next))

1056 return true;

1057 eval++;

1058 }

1059

1060 // We reached the end of the string, let see if the pattern contains only

1061 // wildcards.

1062 if (eval == eval_end) {

1063 EatWildcard(&pattern, pattern_end, next);

1064 if (pattern != pattern_end)

1065 return false;

1066 return true;

1067 }

1068 }

1069

1070 return false;

1071 }

1072

1073 struct NextCharUTF8 {

1074 base_icu::UChar32 operator()(const char** p, const char* end) {

1075 base_icu::UChar32 c;

1076 int offset = 0;

1077 CBU8_NEXT(p, offset, end - p, c);

1078 *p += offset;

1079 return c;

1080 }

1081 };

1082

1083 struct NextCharUTF16 {

1084 base_icu::UChar32 operator()(const char16** p, const char16* end) {

1085 base_icu::UChar32 c;

1086 int offset = 0;

1087 CBU16_NEXT(p, offset, end - p, c);

1088 *p += offset;

1089 return c;

1090 }

1091 };

1092

1093 bool MatchPattern(const base::StringPiece& eval,

1094 const base::StringPiece& pattern) {

1095 return MatchPatternT(eval.data(), eval.data() + eval.size(),

1096 pattern.data(), pattern.data() + pattern.size(),

1097 0, NextCharUTF8());

1098 }

1099

1100 bool MatchPattern(const string16& eval, const string16& pattern) {

1101 return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(),

1102 pattern.c_str(), pattern.c_str() + pattern.size(),

1103 0, NextCharUTF16());

1104 }

1105

1106 // The following code is compatible with the OpenBSD lcpy interface. See:	951 // The following code is compatible with the OpenBSD lcpy interface. See:

1107 // http://www.gratisoft.us/todd/papers/strlcpy.html	952 // http://www.gratisoft.us/todd/papers/strlcpy.html

1108 // ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c	953 // ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c

1109	954

1110 namespace {	955 namespace {

1111	956

1112 template <typename CHAR>	957 template <typename CHAR>

1113 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {	958 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {

1114 for (size_t i = 0; i < dst_size; ++i) {	959 for (size_t i = 0; i < dst_size; ++i) {

1115 if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL.	960 if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL.

(...skipping 10 matching lines...) Expand all Loading...
1126 }	971 }

1127	972

1128 } // namespace	973 } // namespace

1129	974

1130 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {	975 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {

1131 return lcpyT<char>(dst, src, dst_size);	976 return lcpyT<char>(dst, src, dst_size);

1132 }	977 }

1133 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {	978 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {

1134 return lcpyT<wchar_t>(dst, src, dst_size);	979 return lcpyT<wchar_t>(dst, src, dst_size);

1135 }	980 }

OLD	NEW

« no previous file with comments | « base/strings/string_util.h ('k') | base/strings/string_util_unittest.cc » ('j') | no next file with comments »