| OLD | NEW |
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 947 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 958 static const int kBMMaxShift = 0xff; | 958 static const int kBMMaxShift = 0xff; |
| 959 // Reduce alphabet to this size. | 959 // Reduce alphabet to this size. |
| 960 static const int kBMAlphabetSize = 0x100; | 960 static const int kBMAlphabetSize = 0x100; |
| 961 // For patterns below this length, the skip length of Boyer-Moore is too short | 961 // For patterns below this length, the skip length of Boyer-Moore is too short |
| 962 // to compensate for the algorithmic overhead compared to simple brute force. | 962 // to compensate for the algorithmic overhead compared to simple brute force. |
| 963 static const int kBMMinPatternLength = 5; | 963 static const int kBMMinPatternLength = 5; |
| 964 | 964 |
| 965 // Holds the two buffers used by Boyer-Moore string search's Good Suffix | 965 // Holds the two buffers used by Boyer-Moore string search's Good Suffix |
| 966 // shift. Only allows the last kBMMaxShift characters of the needle | 966 // shift. Only allows the last kBMMaxShift characters of the needle |
| 967 // to be indexed. | 967 // to be indexed. |
| 968 class BMGoodSuffixBuffers: public AllStatic { | 968 class BMGoodSuffixBuffers { |
| 969 public: | 969 public: |
| 970 BMGoodSuffixBuffers() {} | 970 BMGoodSuffixBuffers() {} |
| 971 inline void init(int needle_length) { | 971 inline void init(int needle_length) { |
| 972 ASSERT(needle_length > 1); | 972 ASSERT(needle_length > 1); |
| 973 int start = needle_length < kBMMaxShift ? 0 : needle_length - kBMMaxShift; | 973 int start = needle_length < kBMMaxShift ? 0 : needle_length - kBMMaxShift; |
| 974 int len = needle_length - start; | 974 int len = needle_length - start; |
| 975 biased_suffixes_ = suffixes_ - start; | 975 biased_suffixes_ = suffixes_ - start; |
| 976 biased_good_suffix_shift_ = good_suffix_shift_ - start; | 976 biased_good_suffix_shift_ = good_suffix_shift_ - start; |
| 977 for (int i = 0; i <= len; i++) { | 977 for (int i = 0; i <= len; i++) { |
| 978 good_suffix_shift_[i] = len; | 978 good_suffix_shift_[i] = len; |
| (...skipping 19 matching lines...) Expand all Loading... |
| 998 static int bad_char_occurence[kBMAlphabetSize]; | 998 static int bad_char_occurence[kBMAlphabetSize]; |
| 999 static BMGoodSuffixBuffers bmgs_buffers; | 999 static BMGoodSuffixBuffers bmgs_buffers; |
| 1000 | 1000 |
| 1001 // Compute the bad-char table for Boyer-Moore in the static buffer. | 1001 // Compute the bad-char table for Boyer-Moore in the static buffer. |
| 1002 template <typename pchar> | 1002 template <typename pchar> |
| 1003 static void BoyerMoorePopulateBadCharTable(Vector<const pchar> pattern, | 1003 static void BoyerMoorePopulateBadCharTable(Vector<const pchar> pattern, |
| 1004 int start) { | 1004 int start) { |
| 1005 // Run forwards to populate bad_char_table, so that *last* instance | 1005 // Run forwards to populate bad_char_table, so that *last* instance |
| 1006 // of character equivalence class is the one registered. | 1006 // of character equivalence class is the one registered. |
| 1007 // Notice: Doesn't include the last character. | 1007 // Notice: Doesn't include the last character. |
| 1008 for (int i = 0; i < kBMAlphabetSize; i++) { | 1008 int table_size = (sizeof(pchar) == 1) ? String::kMaxAsciiCharCode + 1 |
| 1009 bad_char_occurence[i] = start - 1; | 1009 : kBMAlphabetSize; |
| 1010 if (start == 0) { // All patterns less than kBMMaxShift in length. |
| 1011 memset(bad_char_occurence, -1, table_size * sizeof(*bad_char_occurence)); |
| 1012 } else { |
| 1013 for (int i = 0; i < table_size; i++) { |
| 1014 bad_char_occurence[i] = start - 1; |
| 1015 } |
| 1010 } | 1016 } |
| 1011 for (int i = start; i < pattern.length() - 1; i++) { | 1017 for (int i = start; i < pattern.length() - 1; i++) { |
| 1012 pchar c = pattern[i]; | 1018 pchar c = pattern[i]; |
| 1013 int bucket = c % kBMAlphabetSize; | 1019 int bucket = (sizeof(pchar) ==1) ? c : c % kBMAlphabetSize; |
| 1014 bad_char_occurence[bucket] = i; | 1020 bad_char_occurence[bucket] = i; |
| 1015 } | 1021 } |
| 1016 } | 1022 } |
| 1017 | 1023 |
| 1018 template <typename pchar> | 1024 template <typename pchar> |
| 1019 static void BoyerMoorePopulateGoodSuffixTable(Vector<const pchar> pattern, | 1025 static void BoyerMoorePopulateGoodSuffixTable(Vector<const pchar> pattern, |
| 1020 int start) { | 1026 int start) { |
| 1021 int m = pattern.length(); | 1027 int m = pattern.length(); |
| 1022 int len = m - start; | 1028 int len = m - start; |
| 1023 // Compute Good Suffix tables. | 1029 // Compute Good Suffix tables. |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1058 if (bmgs_buffers.shift(i) == len) { | 1064 if (bmgs_buffers.shift(i) == len) { |
| 1059 bmgs_buffers.shift(i) = suffix - start; | 1065 bmgs_buffers.shift(i) = suffix - start; |
| 1060 } | 1066 } |
| 1061 if (i == suffix) { | 1067 if (i == suffix) { |
| 1062 suffix = bmgs_buffers.suffix(suffix); | 1068 suffix = bmgs_buffers.suffix(suffix); |
| 1063 } | 1069 } |
| 1064 } | 1070 } |
| 1065 } | 1071 } |
| 1066 } | 1072 } |
| 1067 | 1073 |
| 1074 template <typename schar, typename pchar> |
| 1075 static inline int CharOccurence(int char_code) { |
| 1076 if (sizeof(schar) == 1) { |
| 1077 return bad_char_occurence[char_code]; |
| 1078 } |
| 1079 if (sizeof(pchar) == 1) { |
| 1080 if (char_code > String::kMaxAsciiCharCode) { |
| 1081 return -1; |
| 1082 } |
| 1083 return bad_char_occurence[char_code]; |
| 1084 } |
| 1085 return bad_char_occurence[char_code % kBMAlphabetSize]; |
| 1086 } |
| 1068 | 1087 |
| 1069 // Restricted simplified Boyer-Moore string matching. Restricts tables to a | 1088 // Restricted simplified Boyer-Moore string matching. Restricts tables to a |
| 1070 // suffix of long pattern strings and handles only equivalence classes | 1089 // suffix of long pattern strings and handles only equivalence classes |
| 1071 // of the full alphabet. This allows us to ensure that tables take only | 1090 // of the full alphabet. This allows us to ensure that tables take only |
| 1072 // a fixed amount of space. | 1091 // a fixed amount of space. |
| 1073 template <typename schar, typename pchar> | 1092 template <typename schar, typename pchar> |
| 1074 static int BoyerMooreSimplified(Vector<const schar> subject, | 1093 static int BoyerMooreSimplified(Vector<const schar> subject, |
| 1075 Vector<const pchar> pattern, | 1094 Vector<const pchar> pattern, |
| 1076 int start_index, | 1095 int start_index, |
| 1077 bool& complete) { | 1096 bool& complete) { |
| 1078 int n = subject.length(); | 1097 int n = subject.length(); |
| 1079 int m = pattern.length(); | 1098 int m = pattern.length(); |
| 1080 // Only preprocess at most kBMMaxShift last characters of pattern. | 1099 // Only preprocess at most kBMMaxShift last characters of pattern. |
| 1081 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift; | 1100 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift; |
| 1082 | 1101 |
| 1083 BoyerMoorePopulateBadCharTable(pattern, start); | 1102 BoyerMoorePopulateBadCharTable(pattern, start); |
| 1084 | 1103 |
| 1085 int badness = -m; // How bad we are doing without a good-suffix table. | 1104 int badness = -m; // How bad we are doing without a good-suffix table. |
| 1086 int idx; // No matches found prior to this index. | 1105 int idx; // No matches found prior to this index. |
| 1087 pchar last_char = pattern[m - 1]; | 1106 pchar last_char = pattern[m - 1]; |
| 1088 // Perform search | 1107 // Perform search |
| 1089 for (idx = start_index; idx <= n - m;) { | 1108 for (idx = start_index; idx <= n - m;) { |
| 1090 int j = m - 1; | 1109 int j = m - 1; |
| 1091 int c; | 1110 int c; |
| 1092 while (last_char != (c = subject[idx + j])) { | 1111 while (last_char != (c = subject[idx + j])) { |
| 1093 int bc_occ = bad_char_occurence[c % kBMAlphabetSize]; | 1112 int bc_occ = CharOccurence<schar, pchar>(c); |
| 1094 int shift = j - bc_occ; | 1113 int shift = j - bc_occ; |
| 1095 idx += shift; | 1114 idx += shift; |
| 1096 badness += 1 - shift; // at most zero, so badness cannot increase. | 1115 badness += 1 - shift; // at most zero, so badness cannot increase. |
| 1097 if (idx > n - m) { | 1116 if (idx > n - m) { |
| 1098 complete = true; | 1117 complete = true; |
| 1099 return -1; | 1118 return -1; |
| 1100 } | 1119 } |
| 1101 } | 1120 } |
| 1102 j--; | 1121 j--; |
| 1103 while (j >= 0 && pattern[j] == (c = subject[idx + j])) j--; | 1122 while (j >= 0 && pattern[j] == (c = subject[idx + j])) j--; |
| 1104 if (j < 0) { | 1123 if (j < 0) { |
| 1105 complete = true; | 1124 complete = true; |
| 1106 return idx; | 1125 return idx; |
| 1107 } else { | 1126 } else { |
| 1108 int bc_occ = bad_char_occurence[c % kBMAlphabetSize]; | 1127 int bc_occ = CharOccurence<schar, pchar>(c); |
| 1109 int shift = bc_occ < j ? j - bc_occ : 1; | 1128 int shift = bc_occ < j ? j - bc_occ : 1; |
| 1110 idx += shift; | 1129 idx += shift; |
| 1111 // Badness increases by the number of characters we have | 1130 // Badness increases by the number of characters we have |
| 1112 // checked, and decreases by the number of characters we | 1131 // checked, and decreases by the number of characters we |
| 1113 // can skip by shifting. It's a measure of how we are doing | 1132 // can skip by shifting. It's a measure of how we are doing |
| 1114 // compared to reading each character exactly once. | 1133 // compared to reading each character exactly once. |
| 1115 badness += (m - j) - shift; | 1134 badness += (m - j) - shift; |
| 1116 if (badness > 0) { | 1135 if (badness > 0) { |
| 1117 complete = false; | 1136 complete = false; |
| 1118 return idx; | 1137 return idx; |
| (...skipping 15 matching lines...) Expand all Loading... |
| 1134 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift; | 1153 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift; |
| 1135 | 1154 |
| 1136 // Build the Good Suffix table and continue searching. | 1155 // Build the Good Suffix table and continue searching. |
| 1137 BoyerMoorePopulateGoodSuffixTable(pattern, start); | 1156 BoyerMoorePopulateGoodSuffixTable(pattern, start); |
| 1138 pchar last_char = pattern[m - 1]; | 1157 pchar last_char = pattern[m - 1]; |
| 1139 // Continue search from i. | 1158 // Continue search from i. |
| 1140 do { | 1159 do { |
| 1141 int j = m - 1; | 1160 int j = m - 1; |
| 1142 schar c; | 1161 schar c; |
| 1143 while (last_char != (c = subject[idx + j])) { | 1162 while (last_char != (c = subject[idx + j])) { |
| 1144 int shift = j - bad_char_occurence[c % kBMAlphabetSize]; | 1163 int shift = j - CharOccurence<schar, pchar>(c); |
| 1145 idx += shift; | 1164 idx += shift; |
| 1146 if (idx > n - m) { | 1165 if (idx > n - m) { |
| 1147 return -1; | 1166 return -1; |
| 1148 } | 1167 } |
| 1149 } | 1168 } |
| 1150 while (j >= 0 && pattern[j] == (c = subject[idx + j])) j--; | 1169 while (j >= 0 && pattern[j] == (c = subject[idx + j])) j--; |
| 1151 if (j < 0) { | 1170 if (j < 0) { |
| 1152 return idx; | 1171 return idx; |
| 1153 } else if (j < start) { | 1172 } else if (j < start) { |
| 1154 // we have matched more than our tables allow us to be smart about. | 1173 // we have matched more than our tables allow us to be smart about. |
| 1155 idx += 1; | 1174 idx += 1; |
| 1156 } else { | 1175 } else { |
| 1157 int gs_shift = bmgs_buffers.shift(j + 1); // Good suffix shift. | 1176 int gs_shift = bmgs_buffers.shift(j + 1); // Good suffix shift. |
| 1158 int bc_occ = bad_char_occurence[c % kBMAlphabetSize]; | 1177 int bc_occ = CharOccurence<schar, pchar>(c); |
| 1159 int shift = j - bc_occ; // Bad-char shift. | 1178 int shift = j - bc_occ; // Bad-char shift. |
| 1160 shift = (gs_shift > shift) ? gs_shift : shift; | 1179 shift = (gs_shift > shift) ? gs_shift : shift; |
| 1161 idx += shift; | 1180 idx += shift; |
| 1162 } | 1181 } |
| 1163 } while (idx <= n - m); | 1182 } while (idx <= n - m); |
| 1164 | 1183 |
| 1165 return -1; | 1184 return -1; |
| 1166 } | 1185 } |
| 1167 | 1186 |
| 1168 | 1187 |
| (...skipping 4234 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5403 | 5422 |
| 5404 void Runtime::PerformGC(Object* result) { | 5423 void Runtime::PerformGC(Object* result) { |
| 5405 Failure* failure = Failure::cast(result); | 5424 Failure* failure = Failure::cast(result); |
| 5406 // Try to do a garbage collection; ignore it if it fails. The C | 5425 // Try to do a garbage collection; ignore it if it fails. The C |
| 5407 // entry stub will throw an out-of-memory exception in that case. | 5426 // entry stub will throw an out-of-memory exception in that case. |
| 5408 Heap::CollectGarbage(failure->requested(), failure->allocation_space()); | 5427 Heap::CollectGarbage(failure->requested(), failure->allocation_space()); |
| 5409 } | 5428 } |
| 5410 | 5429 |
| 5411 | 5430 |
| 5412 } } // namespace v8::internal | 5431 } } // namespace v8::internal |
| OLD | NEW |