Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(324)

Side by Side Diff: src/runtime.cc

Issue 6533: * Added simplified Boyer-Moore-Horspool text search. (Closed)
Patch Set: Included comments from review Created 12 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/runtime.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 1011 matching lines...) Expand 10 before | Expand all | Expand 10 after
1022 break; 1022 break;
1023 } 1023 }
1024 } 1024 }
1025 if (!failure) { 1025 if (!failure) {
1026 return i; 1026 return i;
1027 } 1027 }
1028 } 1028 }
1029 return -1; 1029 return -1;
1030 } 1030 }
1031 1031
1032 // Maximal length (+1) of suffix that is indexed. Also the size of the
1033 // maximal bad-character skip.
1034 static const int kBMHSignificantSuffixLength = 0xff;
1035
1036 // Significant bits taken from characters to use in bad-character
1037 // skips, to reduce size of the table for Unicode letters.
1038 static const int kBMHSignificantBitsMask = 0xff;
1039
1040 // Number of elements in bad-char table.
1041 static const int kBMHBadCharCount = kBMHSignificantBitsMask + 1;
1042
1043 // Simplified Boyer-Moore string matching. Only uses bad-char skipping,
1044 // and restricts table to a suffix of long strings (also restricting
1045 // the maximum possible skip-length) in order to reduce space.
1046 template <typename schar, typename pchar>
1047 static int BoyerMooreHorspoolIndexOf(Vector<const schar> subject,
1048 Vector<const pchar> pattern,
1049 int start_index) {
1050 ASSERT(kBMHSignificantSuffixLength < 0x100); // We can use bytes as skips.
1051 static byte bad_char_map[kBMHBadCharCount];
1052
1053 int m = pattern.length();
1054 int n = subject.length();
1055 // Cap bad char table to last p chars of pattern. Also max skip value.
1056 int p = m < kBMHSignificantSuffixLength ? m : kBMHSignificantSuffixLength;
1057
1058 memset(bad_char_map, p, kBMHBadCharCount);
1059
1060 // Run forwards to populate bad_char_table, so that *last* instance
1061 // of character equivalence class is the one registered.
1062 // Notice: Doesn't include last character.
1063 for (int i = p < m ? m - p : 0; i < m - 1; i++) {
1064 pchar c = pattern[i];
1065 if (sizeof(schar) == 1 && c > 255) return -1;
1066 bad_char_map[c & kBMHSignificantBitsMask] = m - 1 - i;
1067 }
1068
1069 for (int i = start_index + m - 1, j = m - 1; i < n;) {
1070 schar c = subject[i];
1071 if (c == pattern[j]) {
1072 if (j == 0) {
1073 return i;
1074 }
1075 j--;
1076 i--;
1077 } else {
1078 int skip = bad_char_map[c & kBMHSignificantBitsMask];
1079 if (skip < (m - j)) {
1080 skip = m - j;
1081 }
1082 i += skip;
1083 j = m - 1;
1084 }
1085 }
1086 return -1;
1087 }
1088
1089
1032 // Full KMP pattern match. 1090 // Full KMP pattern match.
1033 template <typename schar, typename pchar> // Pattern & subject char types 1091 template <typename schar, typename pchar> // Pattern & subject char types
1034 static int KMPIndexOf(Vector<const schar> subject, 1092 static int KMPIndexOf(Vector<const schar> subject,
1035 Vector<const pchar> pattern, 1093 Vector<const pchar> pattern,
1036 int start_index) { 1094 int start_index) {
1037 int subject_length = subject.length(); 1095 int subject_length = subject.length();
1038 int pattern_length = pattern.length(); 1096 int pattern_length = pattern.length();
1039 SmartPointer<int> next_table(NewArray<int>(pattern_length)); 1097 SmartPointer<int> next_table(NewArray<int>(pattern_length));
1040 1098
1041 // Compute KMP "next" table 1099 // Compute KMP "next" table
(...skipping 28 matching lines...) Expand all
1070 subject_index++; 1128 subject_index++;
1071 if (pattern_index >= pattern_length) { 1129 if (pattern_index >= pattern_length) {
1072 return subject_index - pattern_index; 1130 return subject_index - pattern_index;
1073 } 1131 }
1074 } 1132 }
1075 return -1; 1133 return -1;
1076 } 1134 }
1077 1135
1078 // Dispatch to different algorithms for different length of pattern/subject 1136 // Dispatch to different algorithms for different length of pattern/subject
1079 template <typename schar, typename pchar> 1137 template <typename schar, typename pchar>
1080 static int StringMatchKMP(Vector<const schar> sub, 1138 static int StringMatchStrategy(Vector<const schar> sub,
1081 Vector<const pchar> pat, 1139 Vector<const pchar> pat,
1082 int start_index) { 1140 int start_index) {
1141 int pattern_length = pat.length();
1083 // Searching for one specific character is common. For one 1142 // Searching for one specific character is common. For one
1084 // character patterns the KMP algorithm is guaranteed to slow down 1143 // character patterns the KMP algorithm is guaranteed to slow down
1085 // the search, so we just run through the subject string. 1144 // the search, so we just run through the subject string.
1086 if (pat.length() == 1) { 1145 if (pattern_length == 1) {
1087 return SingleCharIndexOf(sub, pat[0], start_index); 1146 return SingleCharIndexOf(sub, pat[0], start_index);
1088 } 1147 }
1089 1148
1090 // For small searches, KMP is not worth the setup overhead. 1149 // For small searches, a complex sort is not worth the setup overhead.
1091 if (sub.length() - start_index < 100) { 1150 if (sub.length() - start_index < 25) {
1092 return SimpleIndexOf(sub, pat, start_index); 1151 return SimpleIndexOf(sub, pat, start_index);
1093 } 1152 }
1094 1153
1095 // For patterns with a larger length we use the KMP algorithm. 1154 return BoyerMooreHorspoolIndexOf(sub, pat, start_index);
1096 return KMPIndexOf(sub, pat, start_index);
1097 } 1155 }
1098 1156
1099 // Perform string match of pattern on subject, starting at start index. 1157 // Perform string match of pattern on subject, starting at start index.
1100 // Caller must ensure that 0 <= start_index <= sub->length(), 1158 // Caller must ensure that 0 <= start_index <= sub->length(),
1101 // and should check that pat->length() + start_index <= sub->length() 1159 // and should check that pat->length() + start_index <= sub->length()
1102 int Runtime::StringMatchKmp(Handle<String> sub, 1160 int Runtime::StringMatch(Handle<String> sub,
1103 Handle<String> pat, 1161 Handle<String> pat,
1104 int start_index) { 1162 int start_index) {
1105 ASSERT(0 <= start_index); 1163 ASSERT(0 <= start_index);
1106 ASSERT(start_index <= sub->length()); 1164 ASSERT(start_index <= sub->length());
1107 1165
1108 if (pat->length() == 0) return start_index; 1166 int pattern_length = pat->length();
1167 if (pattern_length == 0) return start_index;
1168
1169 int subject_length = sub->length();
1170 if (start_index + pattern_length > subject_length) return -1;
1171
1109 FlattenString(sub); 1172 FlattenString(sub);
1110 FlattenString(pat); 1173 FlattenString(pat);
1111 1174
1112 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid 1175 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
1113 // dispatch on type of strings 1176 // dispatch on type of strings
1114 if (pat->is_ascii()) { 1177 if (pat->is_ascii()) {
1115 Vector<const char> pat_vector = ToAsciiVector(*pat); 1178 Vector<const char> pat_vector = ToAsciiVector(*pat);
1116 if (sub->is_ascii()) { 1179 if (sub->is_ascii()) {
1117 return StringMatchKMP(ToAsciiVector(*sub), pat_vector, start_index); 1180 return StringMatchStrategy(ToAsciiVector(*sub), pat_vector, start_index);
1118 } 1181 }
1119 return StringMatchKMP(ToUC16Vector(*sub), pat_vector, start_index); 1182 return StringMatchStrategy(ToUC16Vector(*sub), pat_vector, start_index);
1120 } 1183 }
1121 Vector<const uc16> pat_vector = ToUC16Vector(*pat); 1184 Vector<const uc16> pat_vector = ToUC16Vector(*pat);
1122 if (sub->is_ascii()) { 1185 if (sub->is_ascii()) {
1123 return StringMatchKMP(ToAsciiVector(*sub), pat_vector, start_index); 1186 return StringMatchStrategy(ToAsciiVector(*sub), pat_vector, start_index);
1124 } 1187 }
1125 return StringMatchKMP(ToUC16Vector(*sub), pat_vector, start_index); 1188 return StringMatchStrategy(ToUC16Vector(*sub), pat_vector, start_index);
1126 } 1189 }
1127 1190
1128 1191
1129 static Object* Runtime_StringIndexOf(Arguments args) { 1192 static Object* Runtime_StringIndexOf(Arguments args) {
1130 HandleScope scope; // create a new handle scope 1193 HandleScope scope; // create a new handle scope
1131 ASSERT(args.length() == 3); 1194 ASSERT(args.length() == 3);
1132 1195
1133 CONVERT_ARG_CHECKED(String, sub, 0); 1196 CONVERT_ARG_CHECKED(String, sub, 0);
1134 CONVERT_ARG_CHECKED(String, pat, 1); 1197 CONVERT_ARG_CHECKED(String, pat, 1);
1135 1198
1136 Object* index = args[2]; 1199 Object* index = args[2];
1137 uint32_t start_index; 1200 uint32_t start_index;
1138 if (!Array::IndexFromObject(index, &start_index)) return Smi::FromInt(-1); 1201 if (!Array::IndexFromObject(index, &start_index)) return Smi::FromInt(-1);
1139 1202
1140 int position = Runtime::StringMatchKmp(sub, pat, start_index); 1203 int position = Runtime::StringMatch(sub, pat, start_index);
1141 return Smi::FromInt(position); 1204 return Smi::FromInt(position);
1142 } 1205 }
1143 1206
1144 1207
1145 static Object* Runtime_StringLastIndexOf(Arguments args) { 1208 static Object* Runtime_StringLastIndexOf(Arguments args) {
1146 NoHandleAllocation ha; 1209 NoHandleAllocation ha;
1147 ASSERT(args.length() == 3); 1210 ASSERT(args.length() == 3);
1148 1211
1149 CONVERT_CHECKED(String, sub, args[0]); 1212 CONVERT_CHECKED(String, sub, args[0]);
1150 CONVERT_CHECKED(String, pat, args[1]); 1213 CONVERT_CHECKED(String, pat, args[1]);
(...skipping 4015 matching lines...) Expand 10 before | Expand all | Expand 10 after
5166 5229
5167 void Runtime::PerformGC(Object* result) { 5230 void Runtime::PerformGC(Object* result) {
5168 Failure* failure = Failure::cast(result); 5231 Failure* failure = Failure::cast(result);
5169 // Try to do a garbage collection; ignore it if it fails. The C 5232 // Try to do a garbage collection; ignore it if it fails. The C
5170 // entry stub will throw an out-of-memory exception in that case. 5233 // entry stub will throw an out-of-memory exception in that case.
5171 Heap::CollectGarbage(failure->requested(), failure->allocation_space()); 5234 Heap::CollectGarbage(failure->requested(), failure->allocation_space());
5172 } 5235 }
5173 5236
5174 5237
5175 } } // namespace v8::internal 5238 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/runtime.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698