Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(310)

Side by Side Diff: base/strings/string_util.cc

Issue 1226673003: Move MatchPattern to its own header and the base namespace. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « base/strings/string_util.h ('k') | base/strings/string_util_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "base/strings/string_util.h" 5 #include "base/strings/string_util.h"
6 6
7 #include <ctype.h> 7 #include <ctype.h>
8 #include <errno.h> 8 #include <errno.h>
9 #include <math.h> 9 #include <math.h>
10 #include <stdarg.h> 10 #include <stdarg.h>
(...skipping 930 matching lines...) Expand 10 before | Expand all | Expand 10 after
941 std::vector<string16> subst; 941 std::vector<string16> subst;
942 subst.push_back(a); 942 subst.push_back(a);
943 string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets); 943 string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
944 944
945 DCHECK_EQ(1U, offsets.size()); 945 DCHECK_EQ(1U, offsets.size());
946 if (offset) 946 if (offset)
947 *offset = offsets[0]; 947 *offset = offsets[0];
948 return result; 948 return result;
949 } 949 }
950 950
951 static bool IsWildcard(base_icu::UChar32 character) {
952 return character == '*' || character == '?';
953 }
954
955 // Move the strings pointers to the point where they start to differ.
956 template <typename CHAR, typename NEXT>
957 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
958 const CHAR** string, const CHAR* string_end,
959 NEXT next) {
960 const CHAR* escape = NULL;
961 while (*pattern != pattern_end && *string != string_end) {
962 if (!escape && IsWildcard(**pattern)) {
963 // We don't want to match wildcard here, except if it's escaped.
964 return;
965 }
966
967 // Check if the escapement char is found. If so, skip it and move to the
968 // next character.
969 if (!escape && **pattern == '\\') {
970 escape = *pattern;
971 next(pattern, pattern_end);
972 continue;
973 }
974
975 // Check if the chars match, if so, increment the ptrs.
976 const CHAR* pattern_next = *pattern;
977 const CHAR* string_next = *string;
978 base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
979 if (pattern_char == next(&string_next, string_end) &&
980 pattern_char != CBU_SENTINEL) {
981 *pattern = pattern_next;
982 *string = string_next;
983 } else {
984 // Uh oh, it did not match, we are done. If the last char was an
985 // escapement, that means that it was an error to advance the ptr here,
986 // let's put it back where it was. This also mean that the MatchPattern
987 // function will return false because if we can't match an escape char
988 // here, then no one will.
989 if (escape) {
990 *pattern = escape;
991 }
992 return;
993 }
994
995 escape = NULL;
996 }
997 }
998
999 template <typename CHAR, typename NEXT>
1000 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
1001 while (*pattern != end) {
1002 if (!IsWildcard(**pattern))
1003 return;
1004 next(pattern, end);
1005 }
1006 }
1007
1008 template <typename CHAR, typename NEXT>
1009 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
1010 const CHAR* pattern, const CHAR* pattern_end,
1011 int depth,
1012 NEXT next) {
1013 const int kMaxDepth = 16;
1014 if (depth > kMaxDepth)
1015 return false;
1016
1017 // Eat all the matching chars.
1018 EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
1019
1020 // If the string is empty, then the pattern must be empty too, or contains
1021 // only wildcards.
1022 if (eval == eval_end) {
1023 EatWildcard(&pattern, pattern_end, next);
1024 return pattern == pattern_end;
1025 }
1026
1027 // Pattern is empty but not string, this is not a match.
1028 if (pattern == pattern_end)
1029 return false;
1030
1031 // If this is a question mark, then we need to compare the rest with
1032 // the current string or the string with one character eaten.
1033 const CHAR* next_pattern = pattern;
1034 next(&next_pattern, pattern_end);
1035 if (pattern[0] == '?') {
1036 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
1037 depth + 1, next))
1038 return true;
1039 const CHAR* next_eval = eval;
1040 next(&next_eval, eval_end);
1041 if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
1042 depth + 1, next))
1043 return true;
1044 }
1045
1046 // This is a *, try to match all the possible substrings with the remainder
1047 // of the pattern.
1048 if (pattern[0] == '*') {
1049 // Collapse duplicate wild cards (********** into *) so that the
1050 // method does not recurse unnecessarily. http://crbug.com/52839
1051 EatWildcard(&next_pattern, pattern_end, next);
1052
1053 while (eval != eval_end) {
1054 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
1055 depth + 1, next))
1056 return true;
1057 eval++;
1058 }
1059
1060 // We reached the end of the string, let see if the pattern contains only
1061 // wildcards.
1062 if (eval == eval_end) {
1063 EatWildcard(&pattern, pattern_end, next);
1064 if (pattern != pattern_end)
1065 return false;
1066 return true;
1067 }
1068 }
1069
1070 return false;
1071 }
1072
1073 struct NextCharUTF8 {
1074 base_icu::UChar32 operator()(const char** p, const char* end) {
1075 base_icu::UChar32 c;
1076 int offset = 0;
1077 CBU8_NEXT(*p, offset, end - *p, c);
1078 *p += offset;
1079 return c;
1080 }
1081 };
1082
1083 struct NextCharUTF16 {
1084 base_icu::UChar32 operator()(const char16** p, const char16* end) {
1085 base_icu::UChar32 c;
1086 int offset = 0;
1087 CBU16_NEXT(*p, offset, end - *p, c);
1088 *p += offset;
1089 return c;
1090 }
1091 };
1092
1093 bool MatchPattern(const base::StringPiece& eval,
1094 const base::StringPiece& pattern) {
1095 return MatchPatternT(eval.data(), eval.data() + eval.size(),
1096 pattern.data(), pattern.data() + pattern.size(),
1097 0, NextCharUTF8());
1098 }
1099
1100 bool MatchPattern(const string16& eval, const string16& pattern) {
1101 return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(),
1102 pattern.c_str(), pattern.c_str() + pattern.size(),
1103 0, NextCharUTF16());
1104 }
1105
1106 // The following code is compatible with the OpenBSD lcpy interface. See: 951 // The following code is compatible with the OpenBSD lcpy interface. See:
1107 // http://www.gratisoft.us/todd/papers/strlcpy.html 952 // http://www.gratisoft.us/todd/papers/strlcpy.html
1108 // ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c 953 // ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
1109 954
1110 namespace { 955 namespace {
1111 956
1112 template <typename CHAR> 957 template <typename CHAR>
1113 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) { 958 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
1114 for (size_t i = 0; i < dst_size; ++i) { 959 for (size_t i = 0; i < dst_size; ++i) {
1115 if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL. 960 if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL.
(...skipping 10 matching lines...) Expand all
1126 } 971 }
1127 972
1128 } // namespace 973 } // namespace
1129 974
1130 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) { 975 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {
1131 return lcpyT<char>(dst, src, dst_size); 976 return lcpyT<char>(dst, src, dst_size);
1132 } 977 }
1133 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) { 978 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
1134 return lcpyT<wchar_t>(dst, src, dst_size); 979 return lcpyT<wchar_t>(dst, src, dst_size);
1135 } 980 }
OLDNEW
« no previous file with comments | « base/strings/string_util.h ('k') | base/strings/string_util_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698