Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(64)

Side by Side Diff: src/runtime.cc

Issue 7139: * Start string search with simple algorithm, and fall trough to Boyer-Moore (Closed)
Patch Set: StringSearch now defaults to simple search and falls through to Boyer-Moore if it seems better. Created 12 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 1132 matching lines...) Expand 10 before | Expand all | Expand 10 after
1143 int start_index) { 1143 int start_index) {
1144 for (int i = start_index, n = string.length(); i < n; i++) { 1144 for (int i = start_index, n = string.length(); i < n; i++) {
1145 if (pattern_char == string[i]) { 1145 if (pattern_char == string[i]) {
1146 return i; 1146 return i;
1147 } 1147 }
1148 } 1148 }
1149 return -1; 1149 return -1;
1150 } 1150 }
1151 1151
1152 // Trivial string search for shorter strings. 1152 // Trivial string search for shorter strings.
1153 // On return, if "complete" is set to true, the return value is the
1154 // final result of searching for the patter in the subject.
1155 // If "complete" is set to false, the return value is the index where
1156 // further checking should start, i.e., it's guaranteed that the pattern
1157 // does not occur at a position prior to the returned index.
1153 template <typename pchar, typename schar> 1158 template <typename pchar, typename schar>
1154 static int SimpleIndexOf(Vector<const schar> subject, 1159 static int SimpleIndexOf(Vector<const schar> subject,
1155 Vector<const pchar> pattern, 1160 Vector<const pchar> pattern,
1156 int start_index) { 1161 int start_index,
1162 bool &complete) {
1157 int pattern_length = pattern.length(); 1163 int pattern_length = pattern.length();
1158 int subject_length = subject.length(); 1164 int subject_length = subject.length();
1165 // Badness is a count of how many extra times the same character
1166 // is checked. We compare it to the index counter, so we start
1167 // it at the start_index, and give it a little discount to avoid
1168 // very early bail-outs.
1169 int badness = start_index - pattern_length;
1159 // We know our pattern is at least 2 characters, we cache the first so 1170 // We know our pattern is at least 2 characters, we cache the first so
1160 // the common case of the first character not matching is faster. 1171 // the common case of the first character not matching is faster.
1161 pchar pattern_first_char = pattern[0]; 1172 pchar pattern_first_char = pattern[0];
1173
1162 for (int i = start_index, n = subject_length - pattern_length; i <= n; i++) { 1174 for (int i = start_index, n = subject_length - pattern_length; i <= n; i++) {
1163 if (subject[i] != pattern_first_char) continue; 1175 if (subject[i] != pattern_first_char) continue;
1164 int j = 1; 1176 int j = 1;
1165 do { 1177 do {
1166 if (pattern[j] != subject[j+i]) { 1178 if (pattern[j] != subject[i+j]) {
1167 break; 1179 break;
1168 } 1180 }
1169 j++; 1181 j++;
1170 } while (j < pattern_length); 1182 } while (j < pattern_length);
1171 if (j == pattern_length) { 1183 if (j == pattern_length) {
1184 complete = true;
1172 return i; 1185 return i;
1173 } 1186 }
1187 badness += j;
1188 if (badness > i) { // More than one extra character on average.
1189 complete = false;
1190 return (i + 1); // No matches up to index i+1.
1191 }
1174 } 1192 }
1193 complete = true;
1175 return -1; 1194 return -1;
1176 } 1195 }
1177 1196
1178 // Dispatch to different algorithms for different length of pattern/subject 1197 // Dispatch to different algorithms for different length of pattern/subject
1179 template <typename schar, typename pchar> 1198 template <typename schar, typename pchar>
1180 static int StringMatchStrategy(Vector<const schar> sub, 1199 static int StringMatchStrategy(Vector<const schar> sub,
1181 Vector<const pchar> pat, 1200 Vector<const pchar> pat,
1182 int start_index) { 1201 int start_index) {
1183 int pattern_length = pat.length(); 1202 ASSERT(pat.length() > 1);
1184 ASSERT(pattern_length > 1);
1185 1203
1186 // For small searches, a complex sort is not worth the setup overhead. 1204 // For small searches, a complex sort is not worth the setup overhead.
1187 int subject_length = sub.length() - start_index; 1205 bool complete;
1188 if (subject_length < 100 || pattern_length < 8) { 1206 int idx = SimpleIndexOf(sub, pat, start_index, complete);
1189 return SimpleIndexOf(sub, pat, start_index); 1207 if (complete) return idx;
1190 } 1208 return BoyerMooreIndexOf(sub, pat, idx);
1191
1192 return BoyerMooreIndexOf(sub, pat, start_index);
1193 } 1209 }
1194 1210
1195 // Perform string match of pattern on subject, starting at start index. 1211 // Perform string match of pattern on subject, starting at start index.
1196 // Caller must ensure that 0 <= start_index <= sub->length(), 1212 // Caller must ensure that 0 <= start_index <= sub->length(),
1197 // and should check that pat->length() + start_index <= sub->length() 1213 // and should check that pat->length() + start_index <= sub->length()
1198 int Runtime::StringMatch(Handle<String> sub, 1214 int Runtime::StringMatch(Handle<String> sub,
1199 Handle<String> pat, 1215 Handle<String> pat,
1200 int start_index) { 1216 int start_index) {
1201 ASSERT(0 <= start_index); 1217 ASSERT(0 <= start_index);
1202 ASSERT(start_index <= sub->length()); 1218 ASSERT(start_index <= sub->length());
(...skipping 4112 matching lines...) Expand 10 before | Expand all | Expand 10 after
5315 5331
5316 void Runtime::PerformGC(Object* result) { 5332 void Runtime::PerformGC(Object* result) {
5317 Failure* failure = Failure::cast(result); 5333 Failure* failure = Failure::cast(result);
5318 // Try to do a garbage collection; ignore it if it fails. The C 5334 // Try to do a garbage collection; ignore it if it fails. The C
5319 // entry stub will throw an out-of-memory exception in that case. 5335 // entry stub will throw an out-of-memory exception in that case.
5320 Heap::CollectGarbage(failure->requested(), failure->allocation_space()); 5336 Heap::CollectGarbage(failure->requested(), failure->allocation_space());
5321 } 5337 }
5322 5338
5323 5339
5324 } } // namespace v8::internal 5340 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698