OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef V8_STRING_SEARCH_H_ | 5 #ifndef V8_STRING_SEARCH_H_ |
6 #define V8_STRING_SEARCH_H_ | 6 #define V8_STRING_SEARCH_H_ |
7 | 7 |
8 #include "src/isolate.h" | 8 #include "src/isolate.h" |
9 #include "src/vector.h" | 9 #include "src/vector.h" |
10 | 10 |
(...skipping 172 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
183 Isolate* isolate_; | 183 Isolate* isolate_; |
184 // The pattern to search for. | 184 // The pattern to search for. |
185 Vector<const PatternChar> pattern_; | 185 Vector<const PatternChar> pattern_; |
186 // Pointer to implementation of the search. | 186 // Pointer to implementation of the search. |
187 SearchFunction strategy_; | 187 SearchFunction strategy_; |
188 // Cache value of Max(0, pattern_length() - kBMMaxShift) | 188 // Cache value of Max(0, pattern_length() - kBMMaxShift) |
189 int start_; | 189 int start_; |
190 }; | 190 }; |
191 | 191 |
192 | 192 |
| 193 template <typename PatternChar, typename SubjectChar> |
| 194 int FindFirstCharacter(Vector<const PatternChar> pattern, |
| 195 Vector<const SubjectChar> subject, int index) { |
| 196 PatternChar pattern_first_char = pattern[0]; |
| 197 const int max_n = (subject.length() - pattern.length() + 1); |
| 198 |
| 199 if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { |
| 200 DCHECK_GE(max_n - index, 0); |
| 201 const SubjectChar* char_pos = reinterpret_cast<const SubjectChar*>( |
| 202 memchr(subject.start() + index, pattern_first_char, max_n - index)); |
| 203 if (char_pos == NULL) return -1; |
| 204 return static_cast<int>(char_pos - subject.start()); |
| 205 } else { |
| 206 const uint8_t search_low_byte = |
| 207 static_cast<uint8_t>(pattern_first_char & 0xFF); |
| 208 const SubjectChar search_char = |
| 209 static_cast<SubjectChar>(pattern_first_char); |
| 210 int pos = index; |
| 211 do { |
| 212 DCHECK_GE(max_n - pos, 0); |
| 213 const SubjectChar* char_pos = reinterpret_cast<const SubjectChar*>( |
| 214 memchr(subject.start() + pos, search_low_byte, |
| 215 (max_n - pos) * sizeof(SubjectChar))); |
| 216 if (char_pos == NULL) return -1; |
| 217 pos = static_cast<int>(char_pos - subject.start()); |
| 218 if (IsAligned(reinterpret_cast<uintptr_t>(char_pos), |
| 219 sizeof(SubjectChar))) { |
| 220 if (subject[pos] == search_char) return pos; |
| 221 } |
| 222 } while (++pos < max_n); |
| 223 } |
| 224 return -1; |
| 225 } |
| 226 |
| 227 |
193 //--------------------------------------------------------------------- | 228 //--------------------------------------------------------------------- |
194 // Single Character Pattern Search Strategy | 229 // Single Character Pattern Search Strategy |
195 //--------------------------------------------------------------------- | 230 //--------------------------------------------------------------------- |
196 | 231 |
197 template <typename PatternChar, typename SubjectChar> | 232 template <typename PatternChar, typename SubjectChar> |
198 int StringSearch<PatternChar, SubjectChar>::SingleCharSearch( | 233 int StringSearch<PatternChar, SubjectChar>::SingleCharSearch( |
199 StringSearch<PatternChar, SubjectChar>* search, | 234 StringSearch<PatternChar, SubjectChar>* search, |
200 Vector<const SubjectChar> subject, | 235 Vector<const SubjectChar> subject, |
201 int index) { | 236 int index) { |
202 DCHECK_EQ(1, search->pattern_.length()); | 237 DCHECK_EQ(1, search->pattern_.length()); |
203 PatternChar pattern_first_char = search->pattern_[0]; | 238 PatternChar pattern_first_char = search->pattern_[0]; |
204 int i = index; | |
205 if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { | 239 if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { |
206 const SubjectChar* pos = reinterpret_cast<const SubjectChar*>( | 240 return FindFirstCharacter(search->pattern_, subject, index); |
207 memchr(subject.start() + i, | |
208 pattern_first_char, | |
209 subject.length() - i)); | |
210 if (pos == NULL) return -1; | |
211 return static_cast<int>(pos - subject.start()); | |
212 } else { | 241 } else { |
213 if (sizeof(PatternChar) > sizeof(SubjectChar)) { | 242 if (sizeof(PatternChar) > sizeof(SubjectChar)) { |
214 if (exceedsOneByte(pattern_first_char)) { | 243 if (exceedsOneByte(pattern_first_char)) { |
215 return -1; | 244 return -1; |
216 } | 245 } |
217 } | 246 } |
218 SubjectChar search_char = static_cast<SubjectChar>(pattern_first_char); | 247 return FindFirstCharacter(search->pattern_, subject, index); |
219 int n = subject.length(); | |
220 while (i < n) { | |
221 if (subject[i++] == search_char) return i - 1; | |
222 } | |
223 return -1; | |
224 } | 248 } |
225 } | 249 } |
226 | 250 |
227 //--------------------------------------------------------------------- | 251 //--------------------------------------------------------------------- |
228 // Linear Search Strategy | 252 // Linear Search Strategy |
229 //--------------------------------------------------------------------- | 253 //--------------------------------------------------------------------- |
230 | 254 |
231 | 255 |
232 template <typename PatternChar, typename SubjectChar> | 256 template <typename PatternChar, typename SubjectChar> |
233 inline bool CharCompare(const PatternChar* pattern, | 257 inline bool CharCompare(const PatternChar* pattern, |
(...skipping 13 matching lines...) Expand all Loading... |
247 | 271 |
248 // Simple linear search for short patterns. Never bails out. | 272 // Simple linear search for short patterns. Never bails out. |
249 template <typename PatternChar, typename SubjectChar> | 273 template <typename PatternChar, typename SubjectChar> |
250 int StringSearch<PatternChar, SubjectChar>::LinearSearch( | 274 int StringSearch<PatternChar, SubjectChar>::LinearSearch( |
251 StringSearch<PatternChar, SubjectChar>* search, | 275 StringSearch<PatternChar, SubjectChar>* search, |
252 Vector<const SubjectChar> subject, | 276 Vector<const SubjectChar> subject, |
253 int index) { | 277 int index) { |
254 Vector<const PatternChar> pattern = search->pattern_; | 278 Vector<const PatternChar> pattern = search->pattern_; |
255 DCHECK(pattern.length() > 1); | 279 DCHECK(pattern.length() > 1); |
256 int pattern_length = pattern.length(); | 280 int pattern_length = pattern.length(); |
257 PatternChar pattern_first_char = pattern[0]; | |
258 int i = index; | 281 int i = index; |
259 int n = subject.length() - pattern_length; | 282 int n = subject.length() - pattern_length; |
260 while (i <= n) { | 283 while (i <= n) { |
261 if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { | 284 i = FindFirstCharacter(pattern, subject, i); |
262 const SubjectChar* pos = reinterpret_cast<const SubjectChar*>( | 285 if (i == -1) return -1; |
263 memchr(subject.start() + i, | 286 DCHECK_LE(i, n); |
264 pattern_first_char, | 287 i++; |
265 n - i + 1)); | |
266 if (pos == NULL) return -1; | |
267 i = static_cast<int>(pos - subject.start()) + 1; | |
268 } else { | |
269 if (subject[i++] != pattern_first_char) continue; | |
270 } | |
271 // Loop extracted to separate function to allow using return to do | 288 // Loop extracted to separate function to allow using return to do |
272 // a deeper break. | 289 // a deeper break. |
273 if (CharCompare(pattern.start() + 1, | 290 if (CharCompare(pattern.start() + 1, |
274 subject.start() + i, | 291 subject.start() + i, |
275 pattern_length - 1)) { | 292 pattern_length - 1)) { |
276 return i - 1; | 293 return i - 1; |
277 } | 294 } |
278 } | 295 } |
279 return -1; | 296 return -1; |
280 } | 297 } |
(...skipping 217 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
498 int index) { | 515 int index) { |
499 Vector<const PatternChar> pattern = search->pattern_; | 516 Vector<const PatternChar> pattern = search->pattern_; |
500 int pattern_length = pattern.length(); | 517 int pattern_length = pattern.length(); |
501 // Badness is a count of how much work we have done. When we have | 518 // Badness is a count of how much work we have done. When we have |
502 // done enough work we decide it's probably worth switching to a better | 519 // done enough work we decide it's probably worth switching to a better |
503 // algorithm. | 520 // algorithm. |
504 int badness = -10 - (pattern_length << 2); | 521 int badness = -10 - (pattern_length << 2); |
505 | 522 |
506 // We know our pattern is at least 2 characters, we cache the first so | 523 // We know our pattern is at least 2 characters, we cache the first so |
507 // the common case of the first character not matching is faster. | 524 // the common case of the first character not matching is faster. |
508 PatternChar pattern_first_char = pattern[0]; | |
509 for (int i = index, n = subject.length() - pattern_length; i <= n; i++) { | 525 for (int i = index, n = subject.length() - pattern_length; i <= n; i++) { |
510 badness++; | 526 badness++; |
511 if (badness <= 0) { | 527 if (badness <= 0) { |
512 if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { | 528 i = FindFirstCharacter(pattern, subject, i); |
513 const SubjectChar* pos = reinterpret_cast<const SubjectChar*>( | 529 if (i == -1) return -1; |
514 memchr(subject.start() + i, | 530 DCHECK_LE(i, n); |
515 pattern_first_char, | |
516 n - i + 1)); | |
517 if (pos == NULL) { | |
518 return -1; | |
519 } | |
520 i = static_cast<int>(pos - subject.start()); | |
521 } else { | |
522 if (subject[i] != pattern_first_char) continue; | |
523 } | |
524 int j = 1; | 531 int j = 1; |
525 do { | 532 do { |
526 if (pattern[j] != subject[i + j]) { | 533 if (pattern[j] != subject[i + j]) { |
527 break; | 534 break; |
528 } | 535 } |
529 j++; | 536 j++; |
530 } while (j < pattern_length); | 537 } while (j < pattern_length); |
531 if (j == pattern_length) { | 538 if (j == pattern_length) { |
532 return i; | 539 return i; |
533 } | 540 } |
(...skipping 17 matching lines...) Expand all Loading... |
551 Vector<const SubjectChar> subject, | 558 Vector<const SubjectChar> subject, |
552 Vector<const PatternChar> pattern, | 559 Vector<const PatternChar> pattern, |
553 int start_index) { | 560 int start_index) { |
554 StringSearch<PatternChar, SubjectChar> search(isolate, pattern); | 561 StringSearch<PatternChar, SubjectChar> search(isolate, pattern); |
555 return search.Search(subject, start_index); | 562 return search.Search(subject, start_index); |
556 } | 563 } |
557 | 564 |
558 }} // namespace v8::internal | 565 }} // namespace v8::internal |
559 | 566 |
560 #endif // V8_STRING_SEARCH_H_ | 567 #endif // V8_STRING_SEARCH_H_ |
OLD | NEW |