| OLD | NEW |
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 1011 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1022 break; | 1022 break; |
| 1023 } | 1023 } |
| 1024 } | 1024 } |
| 1025 if (!failure) { | 1025 if (!failure) { |
| 1026 return i; | 1026 return i; |
| 1027 } | 1027 } |
| 1028 } | 1028 } |
| 1029 return -1; | 1029 return -1; |
| 1030 } | 1030 } |
| 1031 | 1031 |
| 1032 // Maximal length (+1) of suffix that is indexed. Also the size of the |
| 1033 // maximal bad-character skip. |
| 1034 static const int kBMHSignificantSuffixLength = 0xff; |
| 1035 |
| 1036 // Significant bits taken from characters to use in bad-character |
| 1037 // skips, to reduce size of the table for Unicode letters. |
| 1038 static const int kBMHSignificantBitsMask = 0xff; |
| 1039 |
| 1040 // Number of elements in bad-char table. |
| 1041 static const int kBMHBadCharCount = kBMHSignificantBitsMask + 1; |
| 1042 |
| 1043 // Simplified Boyer-Moore string matching. Only uses bad-char skipping, |
| 1044 // and restricts table to a suffix of long strings (also restricting |
| 1045 // the maximum possible skip-length) in order to reduce space. |
| 1046 template <typename schar, typename pchar> |
| 1047 static int BoyerMooreHorspoolIndexOf(Vector<const schar> subject, |
| 1048 Vector<const pchar> pattern, |
| 1049 int start_index) { |
| 1050 ASSERT(kBMHSignificantSuffixLength < 0x100); // We can use bytes as skips. |
| 1051 static byte bad_char_map[kBMHBadCharCount]; |
| 1052 |
| 1053 int m = pattern.length(); |
| 1054 int n = subject.length(); |
| 1055 // Cap bad char table to last p chars of pattern. Also max skip value. |
| 1056 int p = m < kBMHSignificantSuffixLength ? m : kBMHSignificantSuffixLength; |
| 1057 |
| 1058 memset(bad_char_map, p, kBMHBadCharCount); |
| 1059 |
| 1060 // Run forwards to populate bad_char_table, so that *last* instance |
| 1061 // of character equivalence class is the one registered. |
| 1062 // Notice: Doesn't include last character. |
| 1063 for (int i = p < m ? m - p : 0; i < m - 1; i++) { |
| 1064 pchar c = pattern[i]; |
| 1065 if (sizeof(schar) == 1 && c > 255) return -1; |
| 1066 bad_char_map[c & kBMHSignificantBitsMask] = m - 1 - i; |
| 1067 } |
| 1068 |
| 1069 for (int i = start_index + m - 1, j = m - 1; i < n;) { |
| 1070 schar c = subject[i]; |
| 1071 if (c == pattern[j]) { |
| 1072 if (j == 0) { |
| 1073 return i; |
| 1074 } |
| 1075 j--; |
| 1076 i--; |
| 1077 } else { |
| 1078 int skip = bad_char_map[c & kBMHSignificantBitsMask]; |
| 1079 if (skip < (m - j)) { |
| 1080 skip = m - j; |
| 1081 } |
| 1082 i += skip; |
| 1083 j = m - 1; |
| 1084 } |
| 1085 } |
| 1086 return -1; |
| 1087 } |
| 1088 |
| 1089 |
| 1032 // Full KMP pattern match. | 1090 // Full KMP pattern match. |
| 1033 template <typename schar, typename pchar> // Pattern & subject char types | 1091 template <typename schar, typename pchar> // Pattern & subject char types |
| 1034 static int KMPIndexOf(Vector<const schar> subject, | 1092 static int KMPIndexOf(Vector<const schar> subject, |
| 1035 Vector<const pchar> pattern, | 1093 Vector<const pchar> pattern, |
| 1036 int start_index) { | 1094 int start_index) { |
| 1037 int subject_length = subject.length(); | 1095 int subject_length = subject.length(); |
| 1038 int pattern_length = pattern.length(); | 1096 int pattern_length = pattern.length(); |
| 1039 SmartPointer<int> next_table(NewArray<int>(pattern_length)); | 1097 SmartPointer<int> next_table(NewArray<int>(pattern_length)); |
| 1040 | 1098 |
| 1041 // Compute KMP "next" table | 1099 // Compute KMP "next" table |
| (...skipping 28 matching lines...) Expand all Loading... |
| 1070 subject_index++; | 1128 subject_index++; |
| 1071 if (pattern_index >= pattern_length) { | 1129 if (pattern_index >= pattern_length) { |
| 1072 return subject_index - pattern_index; | 1130 return subject_index - pattern_index; |
| 1073 } | 1131 } |
| 1074 } | 1132 } |
| 1075 return -1; | 1133 return -1; |
| 1076 } | 1134 } |
| 1077 | 1135 |
| 1078 // Dispatch to different algorithms for different length of pattern/subject | 1136 // Dispatch to different algorithms for different length of pattern/subject |
| 1079 template <typename schar, typename pchar> | 1137 template <typename schar, typename pchar> |
| 1080 static int StringMatchKMP(Vector<const schar> sub, | 1138 static int StringMatchStrategy(Vector<const schar> sub, |
| 1081 Vector<const pchar> pat, | 1139 Vector<const pchar> pat, |
| 1082 int start_index) { | 1140 int start_index) { |
| 1141 int pattern_length = pat.length(); |
| 1083 // Searching for one specific character is common. For one | 1142 // Searching for one specific character is common. For one |
| 1084 // character patterns the KMP algorithm is guaranteed to slow down | 1143 // character patterns the KMP algorithm is guaranteed to slow down |
| 1085 // the search, so we just run through the subject string. | 1144 // the search, so we just run through the subject string. |
| 1086 if (pat.length() == 1) { | 1145 if (pattern_length == 1) { |
| 1087 return SingleCharIndexOf(sub, pat[0], start_index); | 1146 return SingleCharIndexOf(sub, pat[0], start_index); |
| 1088 } | 1147 } |
| 1089 | 1148 |
| 1090 // For small searches, KMP is not worth the setup overhead. | 1149 // For small searches, a complex sort is not worth the setup overhead. |
| 1091 if (sub.length() - start_index < 100) { | 1150 if (sub.length() - start_index < 25) { |
| 1092 return SimpleIndexOf(sub, pat, start_index); | 1151 return SimpleIndexOf(sub, pat, start_index); |
| 1093 } | 1152 } |
| 1094 | 1153 |
| 1095 // For patterns with a larger length we use the KMP algorithm. | 1154 return BoyerMooreHorspoolIndexOf(sub, pat, start_index); |
| 1096 return KMPIndexOf(sub, pat, start_index); | |
| 1097 } | 1155 } |
| 1098 | 1156 |
| 1099 // Perform string match of pattern on subject, starting at start index. | 1157 // Perform string match of pattern on subject, starting at start index. |
| 1100 // Caller must ensure that 0 <= start_index <= sub->length(), | 1158 // Caller must ensure that 0 <= start_index <= sub->length(), |
| 1101 // and should check that pat->length() + start_index <= sub->length() | 1159 // and should check that pat->length() + start_index <= sub->length() |
| 1102 int Runtime::StringMatchKmp(Handle<String> sub, | 1160 int Runtime::StringMatch(Handle<String> sub, |
| 1103 Handle<String> pat, | 1161 Handle<String> pat, |
| 1104 int start_index) { | 1162 int start_index) { |
| 1105 ASSERT(0 <= start_index); | 1163 ASSERT(0 <= start_index); |
| 1106 ASSERT(start_index <= sub->length()); | 1164 ASSERT(start_index <= sub->length()); |
| 1107 | 1165 |
| 1108 if (pat->length() == 0) return start_index; | 1166 int pattern_length = pat->length(); |
| 1167 if (pattern_length == 0) return start_index; |
| 1168 |
| 1169 int subject_length = sub->length(); |
| 1170 if (start_index + pattern_length > subject_length) return -1; |
| 1171 |
| 1109 FlattenString(sub); | 1172 FlattenString(sub); |
| 1110 FlattenString(pat); | 1173 FlattenString(pat); |
| 1111 | 1174 |
| 1112 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid | 1175 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid |
| 1113 // dispatch on type of strings | 1176 // dispatch on type of strings |
| 1114 if (pat->is_ascii()) { | 1177 if (pat->is_ascii()) { |
| 1115 Vector<const char> pat_vector = ToAsciiVector(*pat); | 1178 Vector<const char> pat_vector = ToAsciiVector(*pat); |
| 1116 if (sub->is_ascii()) { | 1179 if (sub->is_ascii()) { |
| 1117 return StringMatchKMP(ToAsciiVector(*sub), pat_vector, start_index); | 1180 return StringMatchStrategy(ToAsciiVector(*sub), pat_vector, start_index); |
| 1118 } | 1181 } |
| 1119 return StringMatchKMP(ToUC16Vector(*sub), pat_vector, start_index); | 1182 return StringMatchStrategy(ToUC16Vector(*sub), pat_vector, start_index); |
| 1120 } | 1183 } |
| 1121 Vector<const uc16> pat_vector = ToUC16Vector(*pat); | 1184 Vector<const uc16> pat_vector = ToUC16Vector(*pat); |
| 1122 if (sub->is_ascii()) { | 1185 if (sub->is_ascii()) { |
| 1123 return StringMatchKMP(ToAsciiVector(*sub), pat_vector, start_index); | 1186 return StringMatchStrategy(ToAsciiVector(*sub), pat_vector, start_index); |
| 1124 } | 1187 } |
| 1125 return StringMatchKMP(ToUC16Vector(*sub), pat_vector, start_index); | 1188 return StringMatchStrategy(ToUC16Vector(*sub), pat_vector, start_index); |
| 1126 } | 1189 } |
| 1127 | 1190 |
| 1128 | 1191 |
| 1129 static Object* Runtime_StringIndexOf(Arguments args) { | 1192 static Object* Runtime_StringIndexOf(Arguments args) { |
| 1130 HandleScope scope; // create a new handle scope | 1193 HandleScope scope; // create a new handle scope |
| 1131 ASSERT(args.length() == 3); | 1194 ASSERT(args.length() == 3); |
| 1132 | 1195 |
| 1133 CONVERT_ARG_CHECKED(String, sub, 0); | 1196 CONVERT_ARG_CHECKED(String, sub, 0); |
| 1134 CONVERT_ARG_CHECKED(String, pat, 1); | 1197 CONVERT_ARG_CHECKED(String, pat, 1); |
| 1135 | 1198 |
| 1136 Object* index = args[2]; | 1199 Object* index = args[2]; |
| 1137 uint32_t start_index; | 1200 uint32_t start_index; |
| 1138 if (!Array::IndexFromObject(index, &start_index)) return Smi::FromInt(-1); | 1201 if (!Array::IndexFromObject(index, &start_index)) return Smi::FromInt(-1); |
| 1139 | 1202 |
| 1140 int position = Runtime::StringMatchKmp(sub, pat, start_index); | 1203 int position = Runtime::StringMatch(sub, pat, start_index); |
| 1141 return Smi::FromInt(position); | 1204 return Smi::FromInt(position); |
| 1142 } | 1205 } |
| 1143 | 1206 |
| 1144 | 1207 |
| 1145 static Object* Runtime_StringLastIndexOf(Arguments args) { | 1208 static Object* Runtime_StringLastIndexOf(Arguments args) { |
| 1146 NoHandleAllocation ha; | 1209 NoHandleAllocation ha; |
| 1147 ASSERT(args.length() == 3); | 1210 ASSERT(args.length() == 3); |
| 1148 | 1211 |
| 1149 CONVERT_CHECKED(String, sub, args[0]); | 1212 CONVERT_CHECKED(String, sub, args[0]); |
| 1150 CONVERT_CHECKED(String, pat, args[1]); | 1213 CONVERT_CHECKED(String, pat, args[1]); |
| (...skipping 4015 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5166 | 5229 |
| 5167 void Runtime::PerformGC(Object* result) { | 5230 void Runtime::PerformGC(Object* result) { |
| 5168 Failure* failure = Failure::cast(result); | 5231 Failure* failure = Failure::cast(result); |
| 5169 // Try to do a garbage collection; ignore it if it fails. The C | 5232 // Try to do a garbage collection; ignore it if it fails. The C |
| 5170 // entry stub will throw an out-of-memory exception in that case. | 5233 // entry stub will throw an out-of-memory exception in that case. |
| 5171 Heap::CollectGarbage(failure->requested(), failure->allocation_space()); | 5234 Heap::CollectGarbage(failure->requested(), failure->allocation_space()); |
| 5172 } | 5235 } |
| 5173 | 5236 |
| 5174 | 5237 |
| 5175 } } // namespace v8::internal | 5238 } } // namespace v8::internal |
| OLD | NEW |