Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 980 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 991 | 991 |
| 992 // buffers reused by BoyerMoore | 992 // buffers reused by BoyerMoore |
| 993 static int bad_char_occurence[kBMAlphabetSize]; | 993 static int bad_char_occurence[kBMAlphabetSize]; |
| 994 static BMGoodSuffixBuffers bmgs_buffers; | 994 static BMGoodSuffixBuffers bmgs_buffers; |
| 995 | 995 |
| 996 // Restricted Boyer-Moore string matching. Restricts tables to a | 996 // Restricted Boyer-Moore string matching. Restricts tables to a |
| 997 // suffix of long pattern strings and handles only equivalence classes | 997 // suffix of long pattern strings and handles only equivalence classes |
| 998 // of the full alphabet. This allows us to ensure that tables take only | 998 // of the full alphabet. This allows us to ensure that tables take only |
| 999 // a fixed amount of space. | 999 // a fixed amount of space. |
| 1000 template <typename schar, typename pchar> | 1000 template <typename schar, typename pchar> |
| 1001 static int BoyerMooreIndexOf(Vector<const schar> subject, | 1001 static int BoyerMooreIndexOf(Vector<const schar> subject, |
|
Erik Corry
2008/10/14 12:42:05
This function is too big now. It has to be split
| |
| 1002 Vector<const pchar> pattern, | 1002 Vector<const pchar> pattern, |
| 1003 int start_index) { | 1003 int start_index) { |
| 1004 int m = pattern.length(); | 1004 int m = pattern.length(); |
| 1005 int n = subject.length(); | 1005 int n = subject.length(); |
| 1006 | 1006 |
| 1007 // Only preprocess at most kBMMaxShift last characters of pattern. | 1007 // Only preprocess at most kBMMaxShift last characters of pattern. |
| 1008 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift; | 1008 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift; |
| 1009 int len = m - start; | 1009 int len = m - start; |
| 1010 | 1010 |
| 1011 // Run forwards to populate bad_char_table, so that *last* instance | 1011 // Run forwards to populate bad_char_table, so that *last* instance |
| 1012 // of character equivalence class is the one registered. | 1012 // of character equivalence class is the one registered. |
| 1013 // Notice: Doesn't include last character. | 1013 // Notice: Doesn't include last character. |
| 1014 for (int i = 0; i < kBMAlphabetSize; i++) { | 1014 for (int i = 0; i < kBMAlphabetSize; i++) { |
| 1015 bad_char_occurence[i] = start - 1; | 1015 bad_char_occurence[i] = start - 1; |
| 1016 } | 1016 } |
| 1017 for (int i = start; i < m; i++) { | 1017 for (int i = start; i < m; i++) { |
| 1018 uc32 c = pattern[i]; | 1018 uc32 c = pattern[i]; |
| 1019 bad_char_occurence[c % kBMAlphabetSize] = i; | 1019 bad_char_occurence[c % kBMAlphabetSize] = i; |
| 1020 if (sizeof(schar) == 1 && | 1020 if (sizeof(schar) == 1 && |
| 1021 sizeof(pchar) > 1 && | 1021 sizeof(pchar) > 1 && |
| 1022 c > String::kMaxAsciiCharCode) { | 1022 c > String::kMaxAsciiCharCode) { |
| 1023 return -1; | 1023 return -1; |
| 1024 } | 1024 } |
| 1025 } | 1025 } |
| 1026 // End of Bad Char computation. | 1026 // End of Bad Char computation. |
| 1027 | 1027 |
| 1028 // Compute Good Suffix shift table. | |
| 1029 bmgs_buffers.init(m); | |
| 1030 | 1028 |
| 1031 bmgs_buffers.shift(m-1) = 1; | 1029 |
| 1032 bmgs_buffers.suffix(m) = m + 1; | 1030 int badness = 0; // How bad we are doing without a good-suffix table. |
| 1033 pchar last_char = pattern[m - 1]; | 1031 int idx; // No matches found prior to this index. |
| 1034 int suffix = m + 1; | 1032 // Perform search |
| 1035 for (int i = m; i > start;) { | 1033 for (idx = start_index; idx <= n - m;) { |
| 1036 for (pchar c = pattern[i - 1]; suffix <= m && c != pattern[suffix - 1];) { | 1034 int j = m - 1; |
| 1037 if (bmgs_buffers.shift(suffix) == len) { | 1035 schar c; |
| 1038 bmgs_buffers.shift(suffix) = suffix - i; | 1036 while (j >= 0 && pattern[j] == (c = subject[idx + j])) j--; |
| 1037 if (j < 0) { | |
| 1038 return idx; | |
| 1039 } else { | |
| 1040 int bc_occ = bad_char_occurence[c % kBMAlphabetSize]; | |
| 1041 int shift = bc_occ < j ? j - bc_occ : 1; | |
| 1042 idx += shift; | |
| 1043 // Badness increases by the number of characters we have | |
| 1044 // checked, and decreases by the number of characters we | |
| 1045 // can skip by shifting. It's a measure of how we are doing | |
| 1046 // compared to reading each character exactly once. | |
| 1047 badness += (m - j) - shift; | |
| 1048 if (badness > m) break; | |
| 1049 } | |
| 1050 } | |
| 1051 | |
| 1052 // If we are not done, we got here because we should build the Good Suffix | |
| 1053 // table and continue searching. | |
| 1054 if (idx <= n - m) { | |
| 1055 // Compute Good Suffix tables. | |
| 1056 bmgs_buffers.init(m); | |
| 1057 | |
| 1058 bmgs_buffers.shift(m-1) = 1; | |
| 1059 bmgs_buffers.suffix(m) = m + 1; | |
| 1060 pchar last_char = pattern[m - 1]; | |
| 1061 int suffix = m + 1; | |
| 1062 for (int i = m; i > start;) { | |
| 1063 for (pchar c = pattern[i - 1]; suffix <= m && c != pattern[suffix - 1];) { | |
| 1064 if (bmgs_buffers.shift(suffix) == len) { | |
| 1065 bmgs_buffers.shift(suffix) = suffix - i; | |
| 1066 } | |
| 1067 suffix = bmgs_buffers.suffix(suffix); | |
| 1039 } | 1068 } |
| 1040 suffix = bmgs_buffers.suffix(suffix); | 1069 i--; |
| 1041 } | 1070 suffix--; |
| 1042 i--; | 1071 bmgs_buffers.suffix(i) = suffix; |
| 1043 suffix--; | 1072 if (suffix == m) { |
| 1044 bmgs_buffers.suffix(i) = suffix; | 1073 // no suffix to extend, so we check against last_char only. |
| 1045 if (suffix == m) { | 1074 while (i > start && pattern[i - 1] != last_char) { |
| 1046 // no suffix to extend, so we check against last_char only. | 1075 if (bmgs_buffers.shift(m) == len) { |
| 1047 while (i > start && pattern[i - 1] != last_char) { | 1076 bmgs_buffers.shift(m) = m - i; |
| 1048 if (bmgs_buffers.shift(m) == len) { | 1077 } |
| 1049 bmgs_buffers.shift(m) = m - i; | 1078 i--; |
| 1079 bmgs_buffers.suffix(i) = m; | |
| 1050 } | 1080 } |
| 1051 i--; | 1081 if (i > start) { |
| 1052 bmgs_buffers.suffix(i) = m; | 1082 i--; |
| 1053 } | 1083 suffix--; |
| 1054 if (i > start) { | 1084 bmgs_buffers.suffix(i) = suffix; |
| 1055 i--; | 1085 } |
| 1056 suffix--; | |
| 1057 bmgs_buffers.suffix(i) = suffix; | |
| 1058 } | 1086 } |
| 1059 } | 1087 } |
| 1060 } | 1088 if (suffix < m) { |
| 1061 if (suffix < m) { | 1089 for (int i = start; i <= m; i++) { |
| 1062 for (int i = start; i <= m; i++) { | 1090 if (bmgs_buffers.shift(i) == len) { |
| 1063 if (bmgs_buffers.shift(i) == len) { | 1091 bmgs_buffers.shift(i) = suffix - start; |
| 1064 bmgs_buffers.shift(i) = suffix - start; | 1092 } |
| 1065 } | 1093 if (i == suffix) { |
| 1066 if (i == suffix) { | 1094 suffix = bmgs_buffers.suffix(suffix); |
| 1067 suffix = bmgs_buffers.suffix(suffix); | 1095 } |
| 1068 } | 1096 } |
| 1069 } | 1097 } |
| 1098 // End of Good Suffix computation. | |
| 1099 | |
| 1100 // Continue search from i. | |
| 1101 do { | |
| 1102 int j = m - 1; | |
| 1103 schar c; | |
| 1104 while (j >= 0 && pattern[j] == (c = subject[idx + j])) j--; | |
| 1105 if (j < 0) { | |
| 1106 return idx; | |
| 1107 } else if (j < start) { | |
| 1108 // we have matched more than our tables allow us to be smart about. | |
| 1109 idx += 1; | |
| 1110 } else { | |
| 1111 int gs_shift = bmgs_buffers.shift(j + 1); | |
| 1112 int bc_occ = bad_char_occurence[c % kBMAlphabetSize]; | |
| 1113 int bc_shift = j - bc_occ; | |
| 1114 idx += (gs_shift > bc_shift) ? gs_shift : bc_shift; | |
| 1115 } | |
| 1116 } while (idx <= n - m); | |
| 1070 } | 1117 } |
| 1071 // End of Good Suffix computation. | |
| 1072 | 1118 |
| 1073 | |
| 1074 // Perform search | |
| 1075 for (int i = start_index; i <= n - m;) { | |
| 1076 int j = m - 1; | |
| 1077 schar c; | |
| 1078 while (j >= 0 && pattern[j] == (c = subject[i + j])) j--; | |
| 1079 if (j < 0) { | |
| 1080 return i; | |
| 1081 } else if (j < start) { | |
| 1082 // we have matched more than our tables allow us to be smart about. | |
| 1083 i += 1; | |
| 1084 } else { | |
| 1085 int gs_shift = bmgs_buffers.shift(j + 1); | |
| 1086 int bc_occ = bad_char_occurence[c % kBMAlphabetSize]; | |
| 1087 int bc_shift = j - bc_occ; | |
| 1088 i += (gs_shift > bc_shift) ? gs_shift : bc_shift; | |
| 1089 } | |
| 1090 } | |
| 1091 return -1; | 1119 return -1; |
| 1092 } | 1120 } |
| 1093 | 1121 |
| 1094 template <typename schar, typename pchar> | 1122 template <typename schar, typename pchar> |
| 1095 static int SingleCharIndexOf(Vector<const schar> string, | 1123 static int SingleCharIndexOf(Vector<const schar> string, |
| 1096 pchar pattern_char, | 1124 pchar pattern_char, |
| 1097 int start_index) { | 1125 int start_index) { |
| 1098 for (int i = start_index, n = string.length(); i < n; i++) { | 1126 for (int i = start_index, n = string.length(); i < n; i++) { |
| 1099 if (pattern_char == string[i]) { | 1127 if (pattern_char == string[i]) { |
| 1100 return i; | 1128 return i; |
| 1101 } | 1129 } |
| 1102 } | 1130 } |
| 1103 return -1; | 1131 return -1; |
| 1104 } | 1132 } |
| 1105 | 1133 |
| 1106 // Trivial string search for shorter strings. | 1134 // Trivial string search for shorter strings. |
| 1107 template <typename pchar, typename schar> | 1135 template <typename pchar, typename schar> |
| 1108 static int SimpleIndexOf(Vector<const schar> subject, | 1136 static int SimpleIndexOf(Vector<const schar> subject, |
| 1109 Vector<const pchar> pattern, | 1137 Vector<const pchar> pattern, |
| 1110 int start_index) { | 1138 int start_index) { |
| 1111 int pattern_length = pattern.length(); | 1139 int pattern_length = pattern.length(); |
| 1112 int subject_length = subject.length(); | 1140 int subject_length = subject.length(); |
| 1113 // We know our pattern is at least 2 characters, we cache the first so | 1141 // We know our pattern is at least 2 characters, we cache the first so |
| 1114 // the common case of the first character not matching is faster. | 1142 // the common case of the first character not matching is faster. |
| 1115 pchar pattern_first_char = pattern[0]; | 1143 pchar pattern_first_char = pattern[0]; |
| 1116 for (int i = start_index, n = subject_length - pattern_length; i <= n; i++) { | 1144 for (int i = start_index, n = subject_length - pattern_length; i <= n; i++) { |
| 1117 if (subject[i] != pattern_first_char) continue; | 1145 if (subject[i] != pattern_first_char) continue; |
| 1118 int j = 1; | 1146 int j = 1; |
| 1119 while (pattern[j] == subject[j+i]) { | 1147 do { |
| 1148 if (pattern[j] != subject[j+i]) { | |
| 1149 break; | |
| 1150 } | |
| 1120 j++; | 1151 j++; |
| 1121 if (j == pattern_length) { | 1152 } while (j < pattern_length); |
| 1122 return i; | 1153 if (j == pattern_length) { |
| 1123 } | 1154 return i; |
| 1124 } | 1155 } |
| 1125 } | 1156 } |
| 1126 return -1; | 1157 return -1; |
| 1127 } | 1158 } |
| 1128 | 1159 |
| 1129 // Dispatch to different algorithms for different length of pattern/subject | 1160 // Dispatch to different algorithms for different length of pattern/subject |
| 1130 template <typename schar, typename pchar> | 1161 template <typename schar, typename pchar> |
| 1131 static int StringMatchStrategy(Vector<const schar> sub, | 1162 static int StringMatchStrategy(Vector<const schar> sub, |
| 1132 Vector<const pchar> pat, | 1163 Vector<const pchar> pat, |
| 1133 int start_index) { | 1164 int start_index) { |
| 1134 int pattern_length = pat.length(); | 1165 int pattern_length = pat.length(); |
| 1135 ASSERT(pattern_length > 1); | 1166 ASSERT(pattern_length > 1); |
| 1136 | 1167 |
| 1137 // For small searches, a complex sort is not worth the setup overhead. | 1168 // For small searches, a complex sort is not worth the setup overhead. |
| 1138 int subject_length = sub.length() - start_index; | 1169 int subject_length = sub.length() - start_index; |
| 1139 if (subject_length < 100 || pattern_length < 4) { | 1170 if (subject_length < 100 || pattern_length < 8) { |
| 1140 return SimpleIndexOf(sub, pat, start_index); | 1171 return SimpleIndexOf(sub, pat, start_index); |
| 1141 } | 1172 } |
| 1142 | 1173 |
| 1143 return BoyerMooreIndexOf(sub, pat, start_index); | 1174 return BoyerMooreIndexOf(sub, pat, start_index); |
| 1144 } | 1175 } |
| 1145 | 1176 |
| 1146 // Perform string match of pattern on subject, starting at start index. | 1177 // Perform string match of pattern on subject, starting at start index. |
| 1147 // Caller must ensure that 0 <= start_index <= sub->length(), | 1178 // Caller must ensure that 0 <= start_index <= sub->length(), |
| 1148 // and should check that pat->length() + start_index <= sub->length() | 1179 // and should check that pat->length() + start_index <= sub->length() |
| 1149 int Runtime::StringMatch(Handle<String> sub, | 1180 int Runtime::StringMatch(Handle<String> sub, |
| (...skipping 4116 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5266 | 5297 |
| 5267 void Runtime::PerformGC(Object* result) { | 5298 void Runtime::PerformGC(Object* result) { |
| 5268 Failure* failure = Failure::cast(result); | 5299 Failure* failure = Failure::cast(result); |
| 5269 // Try to do a garbage collection; ignore it if it fails. The C | 5300 // Try to do a garbage collection; ignore it if it fails. The C |
| 5270 // entry stub will throw an out-of-memory exception in that case. | 5301 // entry stub will throw an out-of-memory exception in that case. |
| 5271 Heap::CollectGarbage(failure->requested(), failure->allocation_space()); | 5302 Heap::CollectGarbage(failure->requested(), failure->allocation_space()); |
| 5272 } | 5303 } |
| 5273 | 5304 |
| 5274 | 5305 |
| 5275 } } // namespace v8::internal | 5306 } } // namespace v8::internal |
| OLD | NEW |