| OLD | NEW |
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 934 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 945 uint32_t code; | 945 uint32_t code; |
| 946 if (Array::IndexFromObject(args[0], &code)) { | 946 if (Array::IndexFromObject(args[0], &code)) { |
| 947 if (code <= 0xffff) { | 947 if (code <= 0xffff) { |
| 948 return Heap::LookupSingleCharacterStringFromCode(code); | 948 return Heap::LookupSingleCharacterStringFromCode(code); |
| 949 } | 949 } |
| 950 } | 950 } |
| 951 return Heap::empty_string(); | 951 return Heap::empty_string(); |
| 952 } | 952 } |
| 953 | 953 |
| 954 | 954 |
| 955 // Cap on the maximal shift in the Boyer-Moore implementation. By setting a |
| 956 // limit, we can fix the size of tables. |
| 957 static const int kBMMaxShift = 0xff; |
| 958 static const int kBMAlphabetSize = 0x100; // Reduce alphabet to this size. |
| 959 |
| 960 // Holds the two buffers used by Boyer-Moore string search's Good Suffix |
| 961 // shift. Only allows the last kBMMaxShift characters of the needle |
| 962 // to be indexed. |
| 963 class BMGoodSuffixBuffers: public AllStatic { |
| 964 public: |
| 965 BMGoodSuffixBuffers() {} |
| 966 inline void init(int needle_length) { |
| 967 ASSERT(needle_length > 1); |
| 968 int start = needle_length < kBMMaxShift ? 0 : needle_length - kBMMaxShift; |
| 969 int len = needle_length - start; |
| 970 biased_suffixes_ = suffixes_ - start; |
| 971 biased_good_suffix_shift_ = good_suffix_shift_ - start; |
| 972 for (int i = 0; i <= len; i++) { |
| 973 good_suffix_shift_[i] = len; |
| 974 } |
| 975 } |
| 976 inline int& suffix(int index) { |
| 977 ASSERT(biased_suffixes_ + index >= suffixes_); |
| 978 return biased_suffixes_[index]; |
| 979 } |
| 980 inline int& shift(int index) { |
| 981 ASSERT(biased_good_suffix_shift_ + index >= good_suffix_shift_); |
| 982 return biased_good_suffix_shift_[index]; |
| 983 } |
| 984 private: |
| 985 int suffixes_[kBMMaxShift + 1]; |
| 986 int good_suffix_shift_[kBMMaxShift + 1]; |
| 987 int *biased_suffixes_; |
| 988 int *biased_good_suffix_shift_; |
| 989 DISALLOW_COPY_AND_ASSIGN(BMGoodSuffixBuffers); |
| 990 }; |
| 991 |
| 992 // buffers reused by BoyerMoore |
| 993 static int bad_char_occurence[kBMAlphabetSize]; |
| 994 static BMGoodSuffixBuffers bmgs_buffers; |
| 995 |
| 996 // Restricted Boyer-Moore string matching. Restricts tables to a |
| 997 // suffix of long pattern strings and handles only equivalence classes |
| 998 // of the full alphabet. This allows us to ensure that tables take only |
| 999 // a fixed amount of space. |
| 1000 template <typename schar, typename pchar> |
| 1001 static int BoyerMooreIndexOf(Vector<const schar> subject, |
| 1002 Vector<const pchar> pattern, |
| 1003 int start_index) { |
| 1004 int m = pattern.length(); |
| 1005 int n = subject.length(); |
| 1006 |
| 1007 // Only preprocess at most kBMMaxShift last characters of pattern. |
| 1008 int start = m < kBMMaxShift ? 0 : m - kBMMaxShift; |
| 1009 int len = m - start; |
| 1010 |
| 1011 // Run forwards to populate bad_char_table, so that *last* instance |
| 1012 // of character equivalence class is the one registered. |
| 1013 // Notice: Doesn't include last character. |
| 1014 for (int i = 0; i < kBMAlphabetSize; i++) { |
| 1015 bad_char_occurence[i] = start - 1; |
| 1016 } |
| 1017 for (int i = start; i < m; i++) { |
| 1018 uc32 c = pattern[i]; |
| 1019 bad_char_occurence[c % kBMAlphabetSize] = i; |
| 1020 if (sizeof(schar) == 1 && |
| 1021 sizeof(pchar) > 1 && |
| 1022 c > String::kMaxAsciiCharCode) { |
| 1023 return -1; |
| 1024 } |
| 1025 } |
| 1026 // End of Bad Char computation. |
| 1027 |
| 1028 // Compute Good Suffix shift table. |
| 1029 bmgs_buffers.init(m); |
| 1030 |
| 1031 bmgs_buffers.shift(m-1) = 1; |
| 1032 bmgs_buffers.suffix(m) = m + 1; |
| 1033 pchar last_char = pattern[m - 1]; |
| 1034 int suffix = m + 1; |
| 1035 for (int i = m; i > start;) { |
| 1036 for (pchar c = pattern[i - 1]; suffix <= m && c != pattern[suffix - 1];) { |
| 1037 if (bmgs_buffers.shift(suffix) == len) { |
| 1038 bmgs_buffers.shift(suffix) = suffix - i; |
| 1039 } |
| 1040 suffix = bmgs_buffers.suffix(suffix); |
| 1041 } |
| 1042 i--; |
| 1043 suffix--; |
| 1044 bmgs_buffers.suffix(i) = suffix; |
| 1045 if (suffix == m) { |
| 1046 // no suffix to extend, so we check against last_char only. |
| 1047 while (i > start && pattern[i - 1] != last_char) { |
| 1048 if (bmgs_buffers.shift(m) == len) { |
| 1049 bmgs_buffers.shift(m) = m - i; |
| 1050 } |
| 1051 i--; |
| 1052 bmgs_buffers.suffix(i) = m; |
| 1053 } |
| 1054 if (i > start) { |
| 1055 i--; |
| 1056 suffix--; |
| 1057 bmgs_buffers.suffix(i) = suffix; |
| 1058 } |
| 1059 } |
| 1060 } |
| 1061 if (suffix < m) { |
| 1062 for (int i = start; i <= m; i++) { |
| 1063 if (bmgs_buffers.shift(i) == len) { |
| 1064 bmgs_buffers.shift(i) = suffix - start; |
| 1065 } |
| 1066 if (i == suffix) { |
| 1067 suffix = bmgs_buffers.suffix(suffix); |
| 1068 } |
| 1069 } |
| 1070 } |
| 1071 // End of Good Suffix computation. |
| 1072 |
| 1073 |
| 1074 // Perform search |
| 1075 for (int i = start_index; i <= n - m;) { |
| 1076 int j = m - 1; |
| 1077 schar c; |
| 1078 while (j >= 0 && pattern[j] == (c = subject[i + j])) j--; |
| 1079 if (j < 0) { |
| 1080 return i; |
| 1081 } else if (j < start) { |
| 1082 // we have matched more than our tables allow us to be smart about. |
| 1083 i += 1; |
| 1084 } else { |
| 1085 int gs_shift = bmgs_buffers.shift(j + 1); |
| 1086 int bc_occ = bad_char_occurence[c % kBMAlphabetSize]; |
| 1087 int bc_shift = j - bc_occ; |
| 1088 i += (gs_shift > bc_shift) ? gs_shift : bc_shift; |
| 1089 } |
| 1090 } |
| 1091 return -1; |
| 1092 } |
| 1093 |
| 955 template <typename schar, typename pchar> | 1094 template <typename schar, typename pchar> |
| 956 static int SingleCharIndexOf(Vector<const schar> string, | 1095 static int SingleCharIndexOf(Vector<const schar> string, |
| 957 pchar pattern_char, | 1096 pchar pattern_char, |
| 958 int start_index) { | 1097 int start_index) { |
| 959 for (int i = start_index, n = string.length(); i < n; i++) { | 1098 for (int i = start_index, n = string.length(); i < n; i++) { |
| 960 if (pattern_char == string[i]) { | 1099 if (pattern_char == string[i]) { |
| 961 return i; | 1100 return i; |
| 962 } | 1101 } |
| 963 } | 1102 } |
| 964 return -1; | 1103 return -1; |
| 965 } | 1104 } |
| 966 | 1105 |
| 967 // Trivial string search for shorter strings. | 1106 // Trivial string search for shorter strings. |
| 968 template <typename pchar, typename schar> | 1107 template <typename pchar, typename schar> |
| 969 static int SimpleIndexOf(Vector<const schar> subject, | 1108 static int SimpleIndexOf(Vector<const schar> subject, |
| 970 Vector<const pchar> pattern, | 1109 Vector<const pchar> pattern, |
| 971 int start_index) { | 1110 int start_index) { |
| 972 int pattern_length = pattern.length(); | 1111 int pattern_length = pattern.length(); |
| 973 int subject_length = subject.length(); | 1112 int subject_length = subject.length(); |
| 974 // We know our pattern is at least 2 characters, we cache the first so | 1113 // We know our pattern is at least 2 characters, we cache the first so |
| 975 // the common case of the first character not matching is faster. | 1114 // the common case of the first character not matching is faster. |
| 976 pchar pattern_first_char = pattern[0]; | 1115 pchar pattern_first_char = pattern[0]; |
| 977 for (int i = start_index, n = subject_length - pattern_length; i <= n; i++) { | 1116 for (int i = start_index, n = subject_length - pattern_length; i <= n; i++) { |
| 978 if (subject[i] != pattern_first_char) continue; | 1117 if (subject[i] != pattern_first_char) continue; |
| 979 | 1118 int j = 1; |
| 980 bool failure = false; | 1119 while (pattern[j] == subject[j+i]) { |
| 981 for (int j = 1; j < pattern_length; j++) { | 1120 j++; |
| 982 if (pattern[j] != subject[j+i]) { | 1121 if (j == pattern_length) { |
| 983 failure = true; | 1122 return i; |
| 984 break; | |
| 985 } | 1123 } |
| 986 } | 1124 } |
| 987 if (!failure) { | |
| 988 return i; | |
| 989 } | |
| 990 } | 1125 } |
| 991 return -1; | 1126 return -1; |
| 992 } | |
| 993 | |
| 994 // Maximal length (+1) of suffix that is indexed. Also the size of the | |
| 995 // maximal bad-character skip. | |
| 996 static const int kBMHSignificantSuffixLength = 0xff; | |
| 997 | |
| 998 // Significant bits taken from characters to use in bad-character | |
| 999 // skips, to reduce size of the table for Unicode letters. | |
| 1000 static const int kBMHSignificantBitsMask = 0xff; | |
| 1001 | |
| 1002 // Number of elements in bad-char table. | |
| 1003 static const int kBMHBadCharCount = kBMHSignificantBitsMask + 1; | |
| 1004 | |
| 1005 // Simplified Boyer-Moore string matching. Only uses bad-char skipping, | |
| 1006 // and restricts table to a suffix of long strings (also restricting | |
| 1007 // the maximum possible skip-length) in order to reduce space. | |
| 1008 template <typename schar, typename pchar> | |
| 1009 static int BoyerMooreHorspoolIndexOf(Vector<const schar> subject, | |
| 1010 Vector<const pchar> pattern, | |
| 1011 int start_index) { | |
| 1012 ASSERT(kBMHSignificantSuffixLength < 0x100); // We can use bytes as skips. | |
| 1013 static byte bad_char_map[kBMHBadCharCount]; | |
| 1014 | |
| 1015 int m = pattern.length(); | |
| 1016 int n = subject.length(); | |
| 1017 // Cap bad char table to last p chars of pattern. Also max skip value. | |
| 1018 int p = m < kBMHSignificantSuffixLength ? m : kBMHSignificantSuffixLength; | |
| 1019 | |
| 1020 memset(bad_char_map, p, kBMHBadCharCount); | |
| 1021 | |
| 1022 // Run forwards to populate bad_char_table, so that *last* instance | |
| 1023 // of character equivalence class is the one registered. | |
| 1024 // Notice: Doesn't include last character. | |
| 1025 for (int i = p < m ? m - p : 0; i < m - 1; i++) { | |
| 1026 uc32 c = pattern[i]; | |
| 1027 if (sizeof(schar) == 1 && | |
| 1028 sizeof(pchar) > 1 && | |
| 1029 c > String::kMaxAsciiCharCode) { | |
| 1030 return -1; | |
| 1031 } | |
| 1032 bad_char_map[c & kBMHSignificantBitsMask] = m - 1 - i; | |
| 1033 } | |
| 1034 | |
| 1035 for (int i = start_index + m - 1, j = m - 1; i < n;) { | |
| 1036 schar c = subject[i]; | |
| 1037 if (c == pattern[j]) { | |
| 1038 if (j == 0) { | |
| 1039 return i; | |
| 1040 } | |
| 1041 j--; | |
| 1042 i--; | |
| 1043 } else { | |
| 1044 int skip = bad_char_map[c & kBMHSignificantBitsMask]; | |
| 1045 if (skip < (m - j)) { | |
| 1046 skip = m - j; | |
| 1047 } | |
| 1048 i += skip; | |
| 1049 j = m - 1; | |
| 1050 } | |
| 1051 } | |
| 1052 return -1; | |
| 1053 } | |
| 1054 | |
| 1055 | |
| 1056 // Full KMP pattern match. | |
| 1057 template <typename schar, typename pchar> // Pattern & subject char types | |
| 1058 static int KMPIndexOf(Vector<const schar> subject, | |
| 1059 Vector<const pchar> pattern, | |
| 1060 int start_index) { | |
| 1061 int subject_length = subject.length(); | |
| 1062 int pattern_length = pattern.length(); | |
| 1063 SmartPointer<int> next_table(NewArray<int>(pattern_length)); | |
| 1064 | |
| 1065 // Compute KMP "next" table | |
| 1066 int i = 0; | |
| 1067 int j = -1; | |
| 1068 next_table[0] = -1; | |
| 1069 | |
| 1070 pchar p = pattern[0]; | |
| 1071 while (i < pattern_length - 1) { | |
| 1072 while (j > -1 && p != pattern[j]) { | |
| 1073 j = next_table[j]; | |
| 1074 } | |
| 1075 i++; | |
| 1076 j++; | |
| 1077 p = pattern[i]; | |
| 1078 if (p == pattern[j]) { | |
| 1079 next_table[i] = next_table[j]; | |
| 1080 } else { | |
| 1081 next_table[i] = j; | |
| 1082 } | |
| 1083 } | |
| 1084 | |
| 1085 // Search using the 'next' table. | |
| 1086 int pattern_index = 0; | |
| 1087 int subject_index = start_index; | |
| 1088 while (subject_index < subject_length) { | |
| 1089 schar subject_char = subject[subject_index]; | |
| 1090 while (pattern_index > -1 && pattern[pattern_index] != subject_char) { | |
| 1091 pattern_index = next_table[pattern_index]; | |
| 1092 } | |
| 1093 pattern_index++; | |
| 1094 subject_index++; | |
| 1095 if (pattern_index >= pattern_length) { | |
| 1096 return subject_index - pattern_index; | |
| 1097 } | |
| 1098 } | |
| 1099 return -1; | |
| 1100 } | 1127 } |
| 1101 | 1128 |
| 1102 // Dispatch to different algorithms for different length of pattern/subject | 1129 // Dispatch to different algorithms for different length of pattern/subject |
| 1103 template <typename schar, typename pchar> | 1130 template <typename schar, typename pchar> |
| 1104 static int StringMatchStrategy(Vector<const schar> sub, | 1131 static int StringMatchStrategy(Vector<const schar> sub, |
| 1105 Vector<const pchar> pat, | 1132 Vector<const pchar> pat, |
| 1106 int start_index) { | 1133 int start_index) { |
| 1107 int pattern_length = pat.length(); | 1134 int pattern_length = pat.length(); |
| 1108 // Searching for one specific character is common. For one | 1135 ASSERT(pattern_length > 1); |
| 1109 // character patterns the KMP algorithm is guaranteed to slow down | |
| 1110 // the search, so we just run through the subject string. | |
| 1111 if (pattern_length == 1) { | |
| 1112 return SingleCharIndexOf(sub, pat[0], start_index); | |
| 1113 } | |
| 1114 | 1136 |
| 1115 // For small searches, a complex sort is not worth the setup overhead. | 1137 // For small searches, a complex sort is not worth the setup overhead. |
| 1116 if (sub.length() - start_index < 25) { | 1138 int subject_length = sub.length() - start_index; |
| 1139 if (subject_length < 100 || pattern_length < 4) { |
| 1117 return SimpleIndexOf(sub, pat, start_index); | 1140 return SimpleIndexOf(sub, pat, start_index); |
| 1118 } | 1141 } |
| 1119 | 1142 |
| 1120 return BoyerMooreHorspoolIndexOf(sub, pat, start_index); | 1143 return BoyerMooreIndexOf(sub, pat, start_index); |
| 1121 } | 1144 } |
| 1122 | 1145 |
| 1123 // Perform string match of pattern on subject, starting at start index. | 1146 // Perform string match of pattern on subject, starting at start index. |
| 1124 // Caller must ensure that 0 <= start_index <= sub->length(), | 1147 // Caller must ensure that 0 <= start_index <= sub->length(), |
| 1125 // and should check that pat->length() + start_index <= sub->length() | 1148 // and should check that pat->length() + start_index <= sub->length() |
| 1126 int Runtime::StringMatch(Handle<String> sub, | 1149 int Runtime::StringMatch(Handle<String> sub, |
| 1127 Handle<String> pat, | 1150 Handle<String> pat, |
| 1128 int start_index) { | 1151 int start_index) { |
| 1129 ASSERT(0 <= start_index); | 1152 ASSERT(0 <= start_index); |
| 1130 ASSERT(start_index <= sub->length()); | 1153 ASSERT(start_index <= sub->length()); |
| 1131 | 1154 |
| 1132 int pattern_length = pat->length(); | 1155 int pattern_length = pat->length(); |
| 1133 if (pattern_length == 0) return start_index; | 1156 if (pattern_length == 0) return start_index; |
| 1134 | 1157 |
| 1135 int subject_length = sub->length(); | 1158 int subject_length = sub->length(); |
| 1136 if (start_index + pattern_length > subject_length) return -1; | 1159 if (start_index + pattern_length > subject_length) return -1; |
| 1137 | 1160 |
| 1138 FlattenString(sub); | 1161 FlattenString(sub); |
| 1162 // Searching for one specific character is common. For one |
| 1163 // character patterns linear search is necessary, so any smart |
| 1164 // algorithm is unnecessary overhead. |
| 1165 if (pattern_length == 1) { |
| 1166 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid |
| 1167 if (sub->is_ascii_representation()) { |
| 1168 return SingleCharIndexOf(sub->ToAsciiVector(), pat->Get(0), start_index); |
| 1169 } |
| 1170 return SingleCharIndexOf(sub->ToUC16Vector(), pat->Get(0), start_index); |
| 1171 } |
| 1172 |
| 1139 FlattenString(pat); | 1173 FlattenString(pat); |
| 1140 | 1174 |
| 1141 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid | 1175 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid |
| 1142 // dispatch on type of strings | 1176 // dispatch on type of strings |
| 1143 if (pat->is_ascii_representation()) { | 1177 if (pat->is_ascii_representation()) { |
| 1144 Vector<const char> pat_vector = pat->ToAsciiVector(); | 1178 Vector<const char> pat_vector = pat->ToAsciiVector(); |
| 1145 if (sub->is_ascii_representation()) { | 1179 if (sub->is_ascii_representation()) { |
| 1146 return StringMatchStrategy(sub->ToAsciiVector(), pat_vector, start_index); | 1180 return StringMatchStrategy(sub->ToAsciiVector(), pat_vector, start_index); |
| 1147 } | 1181 } |
| 1148 return StringMatchStrategy(sub->ToUC16Vector(), pat_vector, start_index); | 1182 return StringMatchStrategy(sub->ToUC16Vector(), pat_vector, start_index); |
| (...skipping 4073 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5222 | 5256 |
| 5223 void Runtime::PerformGC(Object* result) { | 5257 void Runtime::PerformGC(Object* result) { |
| 5224 Failure* failure = Failure::cast(result); | 5258 Failure* failure = Failure::cast(result); |
| 5225 // Try to do a garbage collection; ignore it if it fails. The C | 5259 // Try to do a garbage collection; ignore it if it fails. The C |
| 5226 // entry stub will throw an out-of-memory exception in that case. | 5260 // entry stub will throw an out-of-memory exception in that case. |
| 5227 Heap::CollectGarbage(failure->requested(), failure->allocation_space()); | 5261 Heap::CollectGarbage(failure->requested(), failure->allocation_space()); |
| 5228 } | 5262 } |
| 5229 | 5263 |
| 5230 | 5264 |
| 5231 } } // namespace v8::internal | 5265 } } // namespace v8::internal |
| OLD | NEW |