OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef V8_STRING_SEARCH_H_ | 5 #ifndef V8_STRING_SEARCH_H_ |
6 #define V8_STRING_SEARCH_H_ | 6 #define V8_STRING_SEARCH_H_ |
7 | 7 |
8 #include "src/isolate.h" | 8 #include "src/isolate.h" |
9 #include "src/vector.h" | 9 #include "src/vector.h" |
10 | 10 |
(...skipping 172 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
183 Isolate* isolate_; | 183 Isolate* isolate_; |
184 // The pattern to search for. | 184 // The pattern to search for. |
185 Vector<const PatternChar> pattern_; | 185 Vector<const PatternChar> pattern_; |
186 // Pointer to implementation of the search. | 186 // Pointer to implementation of the search. |
187 SearchFunction strategy_; | 187 SearchFunction strategy_; |
188 // Cache value of Max(0, pattern_length() - kBMMaxShift) | 188 // Cache value of Max(0, pattern_length() - kBMMaxShift) |
189 int start_; | 189 int start_; |
190 }; | 190 }; |
191 | 191 |
192 | 192 |
193 template <typename PatternChar, typename SubjectChar> | |
194 int FindFirstCharacter(Vector<const PatternChar> pattern, | |
195 Vector<const SubjectChar> subject, int index) { | |
196 PatternChar pattern_first_char = pattern[0]; | |
197 | |
198 if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { | |
199 const SubjectChar* char_pos = reinterpret_cast<const SubjectChar*>(memchr( | |
200 subject.start() + index, pattern_first_char, subject.length() - index)); | |
201 if (char_pos == NULL) return -1; | |
202 return static_cast<int>(char_pos - subject.start()); | |
203 } else { | |
204 const uint8_t search_low_byte = | |
205 static_cast<uint8_t>(pattern_first_char & 0xFF); | |
206 const SubjectChar search_char = | |
207 static_cast<SubjectChar>(pattern_first_char); | |
208 int pos = index; | |
209 do { | |
210 const SubjectChar* char_pos = reinterpret_cast<const SubjectChar*>( | |
211 memchr(subject.start() + pos, search_low_byte, | |
212 (subject.length() - pos) * sizeof(SubjectChar))); | |
213 if (char_pos == NULL) return -1; | |
214 pos = static_cast<int>(char_pos - subject.start()); | |
215 if (IsAligned(reinterpret_cast<uintptr_t>(char_pos), | |
216 sizeof(SubjectChar))) { | |
217 if (subject[pos] == search_char) return pos; | |
218 } | |
219 } while (++pos < subject.length()); | |
220 } | |
221 return -1; | |
222 } | |
223 | |
224 | |
225 //--------------------------------------------------------------------- | 193 //--------------------------------------------------------------------- |
226 // Single Character Pattern Search Strategy | 194 // Single Character Pattern Search Strategy |
227 //--------------------------------------------------------------------- | 195 //--------------------------------------------------------------------- |
228 | 196 |
229 template <typename PatternChar, typename SubjectChar> | 197 template <typename PatternChar, typename SubjectChar> |
230 int StringSearch<PatternChar, SubjectChar>::SingleCharSearch( | 198 int StringSearch<PatternChar, SubjectChar>::SingleCharSearch( |
231 StringSearch<PatternChar, SubjectChar>* search, | 199 StringSearch<PatternChar, SubjectChar>* search, |
232 Vector<const SubjectChar> subject, | 200 Vector<const SubjectChar> subject, |
233 int index) { | 201 int index) { |
234 DCHECK_EQ(1, search->pattern_.length()); | 202 DCHECK_EQ(1, search->pattern_.length()); |
235 PatternChar pattern_first_char = search->pattern_[0]; | 203 PatternChar pattern_first_char = search->pattern_[0]; |
| 204 int i = index; |
236 if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { | 205 if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { |
237 return FindFirstCharacter(search->pattern_, subject, index); | 206 const SubjectChar* pos = reinterpret_cast<const SubjectChar*>( |
| 207 memchr(subject.start() + i, |
| 208 pattern_first_char, |
| 209 subject.length() - i)); |
| 210 if (pos == NULL) return -1; |
| 211 return static_cast<int>(pos - subject.start()); |
238 } else { | 212 } else { |
239 if (sizeof(PatternChar) > sizeof(SubjectChar)) { | 213 if (sizeof(PatternChar) > sizeof(SubjectChar)) { |
240 if (exceedsOneByte(pattern_first_char)) { | 214 if (exceedsOneByte(pattern_first_char)) { |
241 return -1; | 215 return -1; |
242 } | 216 } |
243 } | 217 } |
244 return FindFirstCharacter(search->pattern_, subject, index); | 218 SubjectChar search_char = static_cast<SubjectChar>(pattern_first_char); |
| 219 int n = subject.length(); |
| 220 while (i < n) { |
| 221 if (subject[i++] == search_char) return i - 1; |
| 222 } |
| 223 return -1; |
245 } | 224 } |
246 } | 225 } |
247 | 226 |
248 //--------------------------------------------------------------------- | 227 //--------------------------------------------------------------------- |
249 // Linear Search Strategy | 228 // Linear Search Strategy |
250 //--------------------------------------------------------------------- | 229 //--------------------------------------------------------------------- |
251 | 230 |
252 | 231 |
253 template <typename PatternChar, typename SubjectChar> | 232 template <typename PatternChar, typename SubjectChar> |
254 inline bool CharCompare(const PatternChar* pattern, | 233 inline bool CharCompare(const PatternChar* pattern, |
(...skipping 13 matching lines...) Expand all Loading... |
268 | 247 |
269 // Simple linear search for short patterns. Never bails out. | 248 // Simple linear search for short patterns. Never bails out. |
270 template <typename PatternChar, typename SubjectChar> | 249 template <typename PatternChar, typename SubjectChar> |
271 int StringSearch<PatternChar, SubjectChar>::LinearSearch( | 250 int StringSearch<PatternChar, SubjectChar>::LinearSearch( |
272 StringSearch<PatternChar, SubjectChar>* search, | 251 StringSearch<PatternChar, SubjectChar>* search, |
273 Vector<const SubjectChar> subject, | 252 Vector<const SubjectChar> subject, |
274 int index) { | 253 int index) { |
275 Vector<const PatternChar> pattern = search->pattern_; | 254 Vector<const PatternChar> pattern = search->pattern_; |
276 DCHECK(pattern.length() > 1); | 255 DCHECK(pattern.length() > 1); |
277 int pattern_length = pattern.length(); | 256 int pattern_length = pattern.length(); |
| 257 PatternChar pattern_first_char = pattern[0]; |
278 int i = index; | 258 int i = index; |
279 int n = subject.length() - pattern_length; | 259 int n = subject.length() - pattern_length; |
280 while (i <= n) { | 260 while (i <= n) { |
281 i = FindFirstCharacter(pattern, subject, i); | 261 if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { |
282 if (i == -1) return -1; | 262 const SubjectChar* pos = reinterpret_cast<const SubjectChar*>( |
283 i++; | 263 memchr(subject.start() + i, |
| 264 pattern_first_char, |
| 265 n - i + 1)); |
| 266 if (pos == NULL) return -1; |
| 267 i = static_cast<int>(pos - subject.start()) + 1; |
| 268 } else { |
| 269 if (subject[i++] != pattern_first_char) continue; |
| 270 } |
284 // Loop extracted to separate function to allow using return to do | 271 // Loop extracted to separate function to allow using return to do |
285 // a deeper break. | 272 // a deeper break. |
286 if (CharCompare(pattern.start() + 1, | 273 if (CharCompare(pattern.start() + 1, |
287 subject.start() + i, | 274 subject.start() + i, |
288 pattern_length - 1)) { | 275 pattern_length - 1)) { |
289 return i - 1; | 276 return i - 1; |
290 } | 277 } |
291 } | 278 } |
292 return -1; | 279 return -1; |
293 } | 280 } |
(...skipping 217 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
511 int index) { | 498 int index) { |
512 Vector<const PatternChar> pattern = search->pattern_; | 499 Vector<const PatternChar> pattern = search->pattern_; |
513 int pattern_length = pattern.length(); | 500 int pattern_length = pattern.length(); |
514 // Badness is a count of how much work we have done. When we have | 501 // Badness is a count of how much work we have done. When we have |
515 // done enough work we decide it's probably worth switching to a better | 502 // done enough work we decide it's probably worth switching to a better |
516 // algorithm. | 503 // algorithm. |
517 int badness = -10 - (pattern_length << 2); | 504 int badness = -10 - (pattern_length << 2); |
518 | 505 |
519 // We know our pattern is at least 2 characters, we cache the first so | 506 // We know our pattern is at least 2 characters, we cache the first so |
520 // the common case of the first character not matching is faster. | 507 // the common case of the first character not matching is faster. |
| 508 PatternChar pattern_first_char = pattern[0]; |
521 for (int i = index, n = subject.length() - pattern_length; i <= n; i++) { | 509 for (int i = index, n = subject.length() - pattern_length; i <= n; i++) { |
522 badness++; | 510 badness++; |
523 if (badness <= 0) { | 511 if (badness <= 0) { |
524 i = FindFirstCharacter(pattern, subject, i); | 512 if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { |
525 if (i == -1) return -1; | 513 const SubjectChar* pos = reinterpret_cast<const SubjectChar*>( |
| 514 memchr(subject.start() + i, |
| 515 pattern_first_char, |
| 516 n - i + 1)); |
| 517 if (pos == NULL) { |
| 518 return -1; |
| 519 } |
| 520 i = static_cast<int>(pos - subject.start()); |
| 521 } else { |
| 522 if (subject[i] != pattern_first_char) continue; |
| 523 } |
526 int j = 1; | 524 int j = 1; |
527 do { | 525 do { |
528 if (pattern[j] != subject[i + j]) { | 526 if (pattern[j] != subject[i + j]) { |
529 break; | 527 break; |
530 } | 528 } |
531 j++; | 529 j++; |
532 } while (j < pattern_length); | 530 } while (j < pattern_length); |
533 if (j == pattern_length) { | 531 if (j == pattern_length) { |
534 return i; | 532 return i; |
535 } | 533 } |
(...skipping 17 matching lines...) Expand all Loading... |
553 Vector<const SubjectChar> subject, | 551 Vector<const SubjectChar> subject, |
554 Vector<const PatternChar> pattern, | 552 Vector<const PatternChar> pattern, |
555 int start_index) { | 553 int start_index) { |
556 StringSearch<PatternChar, SubjectChar> search(isolate, pattern); | 554 StringSearch<PatternChar, SubjectChar> search(isolate, pattern); |
557 return search.Search(subject, start_index); | 555 return search.Search(subject, start_index); |
558 } | 556 } |
559 | 557 |
560 }} // namespace v8::internal | 558 }} // namespace v8::internal |
561 | 559 |
562 #endif // V8_STRING_SEARCH_H_ | 560 #endif // V8_STRING_SEARCH_H_ |
OLD | NEW |