Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(7)

Side by Side Diff: src/string-search.h

Issue 1303033012: Speedup stringsearch for two byte strings (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef V8_STRING_SEARCH_H_ 5 #ifndef V8_STRING_SEARCH_H_
6 #define V8_STRING_SEARCH_H_ 6 #define V8_STRING_SEARCH_H_
7 7
8 #include "src/isolate.h" 8 #include "src/isolate.h"
9 #include "src/vector.h" 9 #include "src/vector.h"
10 10
(...skipping 170 matching lines...) Expand 10 before | Expand all | Expand 10 after
181 } 181 }
182 182
183 Isolate* isolate_; 183 Isolate* isolate_;
184 // The pattern to search for. 184 // The pattern to search for.
185 Vector<const PatternChar> pattern_; 185 Vector<const PatternChar> pattern_;
186 // Pointer to implementation of the search. 186 // Pointer to implementation of the search.
187 SearchFunction strategy_; 187 SearchFunction strategy_;
188 // Cache value of Max(0, pattern_length() - kBMMaxShift) 188 // Cache value of Max(0, pattern_length() - kBMMaxShift)
189 int start_; 189 int start_;
190 }; 190 };
191 191
Jakob Kummerow 2015/09/03 16:46:33 nit: two empty lines between top-level things
192 template <typename T, typename U>
193 inline T AlignDown(T value, U alignment) {
194 return reinterpret_cast<T>(
195 (reinterpret_cast<uintptr_t>(value) & ~(alignment - 1)));
196 }
197
Jakob Kummerow 2015/09/03 16:46:33 nit: two empty lines between top-level things
198 template <typename PatternChar, typename SubjectChar>
199 inline int FindFirstByte(Vector<const PatternChar> pattern,
Jakob Kummerow 2015/09/03 16:46:33 Since this works with two-byte strings too, I'd ca
200 Vector<const SubjectChar> subject, int index) {
201 PatternChar pattern_first_char = pattern[0];
202
203 if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
204 const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(memchr(
205 subject.start() + index, pattern_first_char, subject.length() - index));
206 if (pos == NULL) return -1;
207 return static_cast<int>(pos - subject.start());
208 } else {
209 uint8_t search_low_byte = static_cast<uint8_t>(pattern_first_char & 0xFF);
210 const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
211 memchr(subject.start() + index, search_low_byte,
212 (subject.length() - index) * sizeof(SubjectChar)));
213 if (pos == NULL) return -1;
214 pos = AlignDown(pos, sizeof(SubjectChar));
Jakob Kummerow 2015/09/03 16:46:33 I don't think this is correct. If memchr() found t
215 return static_cast<int>(pos - subject.start());
216 }
217 return -1;
218 }
192 219
193 //--------------------------------------------------------------------- 220 //---------------------------------------------------------------------
194 // Single Character Pattern Search Strategy 221 // Single Character Pattern Search Strategy
195 //--------------------------------------------------------------------- 222 //---------------------------------------------------------------------
196
Jakob Kummerow 2015/09/03 16:46:33 nit: keep this line
197 template <typename PatternChar, typename SubjectChar> 223 template <typename PatternChar, typename SubjectChar>
198 int StringSearch<PatternChar, SubjectChar>::SingleCharSearch( 224 int StringSearch<PatternChar, SubjectChar>::SingleCharSearch(
199 StringSearch<PatternChar, SubjectChar>* search, 225 StringSearch<PatternChar, SubjectChar>* search,
200 Vector<const SubjectChar> subject, 226 Vector<const SubjectChar> subject,
201 int index) { 227 int index) {
202 DCHECK_EQ(1, search->pattern_.length()); 228 DCHECK_EQ(1, search->pattern_.length());
203 PatternChar pattern_first_char = search->pattern_[0]; 229 PatternChar pattern_first_char = search->pattern_[0];
204 int i = index;
205 if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { 230 if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
206 const SubjectChar* pos = reinterpret_cast<const SubjectChar*>( 231 return FindFirstByte(search->pattern_, subject, index);
207 memchr(subject.start() + i,
208 pattern_first_char,
209 subject.length() - i));
210 if (pos == NULL) return -1;
211 return static_cast<int>(pos - subject.start());
212 } else { 232 } else {
213 if (sizeof(PatternChar) > sizeof(SubjectChar)) { 233 if (sizeof(PatternChar) > sizeof(SubjectChar)) {
214 if (exceedsOneByte(pattern_first_char)) { 234 if (exceedsOneByte(pattern_first_char)) {
215 return -1; 235 return -1;
216 } 236 }
217 } 237 }
218 SubjectChar search_char = static_cast<SubjectChar>(pattern_first_char); 238 SubjectChar search_char = static_cast<SubjectChar>(pattern_first_char);
219 int n = subject.length(); 239 const int n = subject.length();
240 int i = index;
220 while (i < n) { 241 while (i < n) {
242 i = FindFirstByte(search->pattern_, subject, i);
243 if (i == -1) return -1;
244
221 if (subject[i++] == search_char) return i - 1; 245 if (subject[i++] == search_char) return i - 1;
222 } 246 }
223 return -1; 247 return -1;
224 } 248 }
225 } 249 }
226 250
227 //--------------------------------------------------------------------- 251 //---------------------------------------------------------------------
228 // Linear Search Strategy 252 // Linear Search Strategy
229 //--------------------------------------------------------------------- 253 //---------------------------------------------------------------------
230 254
(...skipping 16 matching lines...) Expand all
247 271
248 // Simple linear search for short patterns. Never bails out. 272 // Simple linear search for short patterns. Never bails out.
249 template <typename PatternChar, typename SubjectChar> 273 template <typename PatternChar, typename SubjectChar>
250 int StringSearch<PatternChar, SubjectChar>::LinearSearch( 274 int StringSearch<PatternChar, SubjectChar>::LinearSearch(
251 StringSearch<PatternChar, SubjectChar>* search, 275 StringSearch<PatternChar, SubjectChar>* search,
252 Vector<const SubjectChar> subject, 276 Vector<const SubjectChar> subject,
253 int index) { 277 int index) {
254 Vector<const PatternChar> pattern = search->pattern_; 278 Vector<const PatternChar> pattern = search->pattern_;
255 DCHECK(pattern.length() > 1); 279 DCHECK(pattern.length() > 1);
256 int pattern_length = pattern.length(); 280 int pattern_length = pattern.length();
257 PatternChar pattern_first_char = pattern[0];
258 int i = index; 281 int i = index;
259 int n = subject.length() - pattern_length; 282 int n = subject.length() - pattern_length;
260 while (i <= n) { 283 while (i <= n) {
261 if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { 284 i = FindFirstByte(pattern, subject, i);
262 const SubjectChar* pos = reinterpret_cast<const SubjectChar*>( 285 if (i == -1) return -1;
263 memchr(subject.start() + i, 286 i++;
264 pattern_first_char,
265 n - i + 1));
266 if (pos == NULL) return -1;
267 i = static_cast<int>(pos - subject.start()) + 1;
268 } else {
269 if (subject[i++] != pattern_first_char) continue;
270 }
271 // Loop extracted to separate function to allow using return to do 287 // Loop extracted to separate function to allow using return to do
272 // a deeper break. 288 // a deeper break.
273 if (CharCompare(pattern.start() + 1, 289 if (CharCompare(pattern.start() + 1,
274 subject.start() + i, 290 subject.start() + i,
275 pattern_length - 1)) { 291 pattern_length - 1)) {
276 return i - 1; 292 return i - 1;
277 } 293 }
278 } 294 }
279 return -1; 295 return -1;
280 } 296 }
(...skipping 217 matching lines...) Expand 10 before | Expand all | Expand 10 after
498 int index) { 514 int index) {
499 Vector<const PatternChar> pattern = search->pattern_; 515 Vector<const PatternChar> pattern = search->pattern_;
500 int pattern_length = pattern.length(); 516 int pattern_length = pattern.length();
501 // Badness is a count of how much work we have done. When we have 517 // Badness is a count of how much work we have done. When we have
502 // done enough work we decide it's probably worth switching to a better 518 // done enough work we decide it's probably worth switching to a better
503 // algorithm. 519 // algorithm.
504 int badness = -10 - (pattern_length << 2); 520 int badness = -10 - (pattern_length << 2);
505 521
506 // We know our pattern is at least 2 characters, we cache the first so 522 // We know our pattern is at least 2 characters, we cache the first so
507 // the common case of the first character not matching is faster. 523 // the common case of the first character not matching is faster.
508 PatternChar pattern_first_char = pattern[0];
509 for (int i = index, n = subject.length() - pattern_length; i <= n; i++) { 524 for (int i = index, n = subject.length() - pattern_length; i <= n; i++) {
510 badness++; 525 badness++;
511 if (badness <= 0) { 526 if (badness <= 0) {
512 if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { 527 i = FindFirstByte(pattern, subject, i);
513 const SubjectChar* pos = reinterpret_cast<const SubjectChar*>( 528 if (i == -1) return -1;
514 memchr(subject.start() + i,
515 pattern_first_char,
516 n - i + 1));
517 if (pos == NULL) {
518 return -1;
519 }
520 i = static_cast<int>(pos - subject.start());
521 } else {
522 if (subject[i] != pattern_first_char) continue;
523 }
524 int j = 1; 529 int j = 1;
525 do { 530 do {
526 if (pattern[j] != subject[i + j]) { 531 if (pattern[j] != subject[i + j]) {
527 break; 532 break;
528 } 533 }
529 j++; 534 j++;
530 } while (j < pattern_length); 535 } while (j < pattern_length);
531 if (j == pattern_length) { 536 if (j == pattern_length) {
532 return i; 537 return i;
533 } 538 }
(...skipping 17 matching lines...) Expand all
551 Vector<const SubjectChar> subject, 556 Vector<const SubjectChar> subject,
552 Vector<const PatternChar> pattern, 557 Vector<const PatternChar> pattern,
553 int start_index) { 558 int start_index) {
554 StringSearch<PatternChar, SubjectChar> search(isolate, pattern); 559 StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
555 return search.Search(subject, start_index); 560 return search.Search(subject, start_index);
556 } 561 }
557 562
558 }} // namespace v8::internal 563 }} // namespace v8::internal
559 564
560 #endif // V8_STRING_SEARCH_H_ 565 #endif // V8_STRING_SEARCH_H_
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698