OLD | NEW |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "base/strings/string_util.h" | 5 #include "base/strings/string_util.h" |
6 | 6 |
7 #include <ctype.h> | 7 #include <ctype.h> |
8 #include <errno.h> | 8 #include <errno.h> |
9 #include <math.h> | 9 #include <math.h> |
10 #include <stdarg.h> | 10 #include <stdarg.h> |
(...skipping 930 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
941 std::vector<string16> subst; | 941 std::vector<string16> subst; |
942 subst.push_back(a); | 942 subst.push_back(a); |
943 string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets); | 943 string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets); |
944 | 944 |
945 DCHECK_EQ(1U, offsets.size()); | 945 DCHECK_EQ(1U, offsets.size()); |
946 if (offset) | 946 if (offset) |
947 *offset = offsets[0]; | 947 *offset = offsets[0]; |
948 return result; | 948 return result; |
949 } | 949 } |
950 | 950 |
951 static bool IsWildcard(base_icu::UChar32 character) { | |
952 return character == '*' || character == '?'; | |
953 } | |
954 | |
955 // Move the strings pointers to the point where they start to differ. | |
956 template <typename CHAR, typename NEXT> | |
957 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end, | |
958 const CHAR** string, const CHAR* string_end, | |
959 NEXT next) { | |
960 const CHAR* escape = NULL; | |
961 while (*pattern != pattern_end && *string != string_end) { | |
962 if (!escape && IsWildcard(**pattern)) { | |
963 // We don't want to match wildcard here, except if it's escaped. | |
964 return; | |
965 } | |
966 | |
967 // Check if the escapement char is found. If so, skip it and move to the | |
968 // next character. | |
969 if (!escape && **pattern == '\\') { | |
970 escape = *pattern; | |
971 next(pattern, pattern_end); | |
972 continue; | |
973 } | |
974 | |
975 // Check if the chars match, if so, increment the ptrs. | |
976 const CHAR* pattern_next = *pattern; | |
977 const CHAR* string_next = *string; | |
978 base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end); | |
979 if (pattern_char == next(&string_next, string_end) && | |
980 pattern_char != CBU_SENTINEL) { | |
981 *pattern = pattern_next; | |
982 *string = string_next; | |
983 } else { | |
984 // Uh oh, it did not match, we are done. If the last char was an | |
985 // escapement, that means that it was an error to advance the ptr here, | |
986 // let's put it back where it was. This also mean that the MatchPattern | |
987 // function will return false because if we can't match an escape char | |
988 // here, then no one will. | |
989 if (escape) { | |
990 *pattern = escape; | |
991 } | |
992 return; | |
993 } | |
994 | |
995 escape = NULL; | |
996 } | |
997 } | |
998 | |
999 template <typename CHAR, typename NEXT> | |
1000 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) { | |
1001 while (*pattern != end) { | |
1002 if (!IsWildcard(**pattern)) | |
1003 return; | |
1004 next(pattern, end); | |
1005 } | |
1006 } | |
1007 | |
1008 template <typename CHAR, typename NEXT> | |
1009 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end, | |
1010 const CHAR* pattern, const CHAR* pattern_end, | |
1011 int depth, | |
1012 NEXT next) { | |
1013 const int kMaxDepth = 16; | |
1014 if (depth > kMaxDepth) | |
1015 return false; | |
1016 | |
1017 // Eat all the matching chars. | |
1018 EatSameChars(&pattern, pattern_end, &eval, eval_end, next); | |
1019 | |
1020 // If the string is empty, then the pattern must be empty too, or contains | |
1021 // only wildcards. | |
1022 if (eval == eval_end) { | |
1023 EatWildcard(&pattern, pattern_end, next); | |
1024 return pattern == pattern_end; | |
1025 } | |
1026 | |
1027 // Pattern is empty but not string, this is not a match. | |
1028 if (pattern == pattern_end) | |
1029 return false; | |
1030 | |
1031 // If this is a question mark, then we need to compare the rest with | |
1032 // the current string or the string with one character eaten. | |
1033 const CHAR* next_pattern = pattern; | |
1034 next(&next_pattern, pattern_end); | |
1035 if (pattern[0] == '?') { | |
1036 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, | |
1037 depth + 1, next)) | |
1038 return true; | |
1039 const CHAR* next_eval = eval; | |
1040 next(&next_eval, eval_end); | |
1041 if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end, | |
1042 depth + 1, next)) | |
1043 return true; | |
1044 } | |
1045 | |
1046 // This is a *, try to match all the possible substrings with the remainder | |
1047 // of the pattern. | |
1048 if (pattern[0] == '*') { | |
1049 // Collapse duplicate wild cards (********** into *) so that the | |
1050 // method does not recurse unnecessarily. http://crbug.com/52839 | |
1051 EatWildcard(&next_pattern, pattern_end, next); | |
1052 | |
1053 while (eval != eval_end) { | |
1054 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, | |
1055 depth + 1, next)) | |
1056 return true; | |
1057 eval++; | |
1058 } | |
1059 | |
1060 // We reached the end of the string, let see if the pattern contains only | |
1061 // wildcards. | |
1062 if (eval == eval_end) { | |
1063 EatWildcard(&pattern, pattern_end, next); | |
1064 if (pattern != pattern_end) | |
1065 return false; | |
1066 return true; | |
1067 } | |
1068 } | |
1069 | |
1070 return false; | |
1071 } | |
1072 | |
1073 struct NextCharUTF8 { | |
1074 base_icu::UChar32 operator()(const char** p, const char* end) { | |
1075 base_icu::UChar32 c; | |
1076 int offset = 0; | |
1077 CBU8_NEXT(*p, offset, end - *p, c); | |
1078 *p += offset; | |
1079 return c; | |
1080 } | |
1081 }; | |
1082 | |
1083 struct NextCharUTF16 { | |
1084 base_icu::UChar32 operator()(const char16** p, const char16* end) { | |
1085 base_icu::UChar32 c; | |
1086 int offset = 0; | |
1087 CBU16_NEXT(*p, offset, end - *p, c); | |
1088 *p += offset; | |
1089 return c; | |
1090 } | |
1091 }; | |
1092 | |
1093 bool MatchPattern(const base::StringPiece& eval, | |
1094 const base::StringPiece& pattern) { | |
1095 return MatchPatternT(eval.data(), eval.data() + eval.size(), | |
1096 pattern.data(), pattern.data() + pattern.size(), | |
1097 0, NextCharUTF8()); | |
1098 } | |
1099 | |
1100 bool MatchPattern(const string16& eval, const string16& pattern) { | |
1101 return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(), | |
1102 pattern.c_str(), pattern.c_str() + pattern.size(), | |
1103 0, NextCharUTF16()); | |
1104 } | |
1105 | |
1106 // The following code is compatible with the OpenBSD lcpy interface. See: | 951 // The following code is compatible with the OpenBSD lcpy interface. See: |
1107 // http://www.gratisoft.us/todd/papers/strlcpy.html | 952 // http://www.gratisoft.us/todd/papers/strlcpy.html |
1108 // ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c | 953 // ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c |
1109 | 954 |
1110 namespace { | 955 namespace { |
1111 | 956 |
1112 template <typename CHAR> | 957 template <typename CHAR> |
1113 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) { | 958 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) { |
1114 for (size_t i = 0; i < dst_size; ++i) { | 959 for (size_t i = 0; i < dst_size; ++i) { |
1115 if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL. | 960 if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL. |
(...skipping 10 matching lines...) Expand all Loading... |
1126 } | 971 } |
1127 | 972 |
1128 } // namespace | 973 } // namespace |
1129 | 974 |
1130 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) { | 975 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) { |
1131 return lcpyT<char>(dst, src, dst_size); | 976 return lcpyT<char>(dst, src, dst_size); |
1132 } | 977 } |
1133 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) { | 978 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) { |
1134 return lcpyT<wchar_t>(dst, src, dst_size); | 979 return lcpyT<wchar_t>(dst, src, dst_size); |
1135 } | 980 } |
OLD | NEW |