Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef V8_STRING_SEARCH_H_ | 5 #ifndef V8_STRING_SEARCH_H_ |
| 6 #define V8_STRING_SEARCH_H_ | 6 #define V8_STRING_SEARCH_H_ |
| 7 | 7 |
| 8 #include "src/isolate.h" | 8 #include "src/isolate.h" |
| 9 #include "src/vector.h" | 9 #include "src/vector.h" |
| 10 | 10 |
| (...skipping 170 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 181 } | 181 } |
| 182 | 182 |
| 183 Isolate* isolate_; | 183 Isolate* isolate_; |
| 184 // The pattern to search for. | 184 // The pattern to search for. |
| 185 Vector<const PatternChar> pattern_; | 185 Vector<const PatternChar> pattern_; |
| 186 // Pointer to implementation of the search. | 186 // Pointer to implementation of the search. |
| 187 SearchFunction strategy_; | 187 SearchFunction strategy_; |
| 188 // Cache value of Max(0, pattern_length() - kBMMaxShift) | 188 // Cache value of Max(0, pattern_length() - kBMMaxShift) |
| 189 int start_; | 189 int start_; |
| 190 }; | 190 }; |
| 191 | 191 |
|
Jakob Kummerow
2015/09/03 16:46:33
nit: two empty lines between top-level things
| |
| 192 template <typename T, typename U> | |
| 193 inline T AlignDown(T value, U alignment) { | |
| 194 return reinterpret_cast<T>( | |
| 195 (reinterpret_cast<uintptr_t>(value) & ~(alignment - 1))); | |
| 196 } | |
| 197 | |
|
Jakob Kummerow
2015/09/03 16:46:33
nit: two empty lines between top-level things
| |
| 198 template <typename PatternChar, typename SubjectChar> | |
| 199 inline int FindFirstByte(Vector<const PatternChar> pattern, | |
|
Jakob Kummerow
2015/09/03 16:46:33
Since this works with two-byte strings too, I'd ca
| |
| 200 Vector<const SubjectChar> subject, int index) { | |
| 201 PatternChar pattern_first_char = pattern[0]; | |
| 202 | |
| 203 if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { | |
| 204 const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(memchr( | |
| 205 subject.start() + index, pattern_first_char, subject.length() - index)); | |
| 206 if (pos == NULL) return -1; | |
| 207 return static_cast<int>(pos - subject.start()); | |
| 208 } else { | |
| 209 uint8_t search_low_byte = static_cast<uint8_t>(pattern_first_char & 0xFF); | |
| 210 const SubjectChar* pos = reinterpret_cast<const SubjectChar*>( | |
| 211 memchr(subject.start() + index, search_low_byte, | |
| 212 (subject.length() - index) * sizeof(SubjectChar))); | |
| 213 if (pos == NULL) return -1; | |
| 214 pos = AlignDown(pos, sizeof(SubjectChar)); | |
|
Jakob Kummerow
2015/09/03 16:46:33
I don't think this is correct. If memchr() found t
| |
| 215 return static_cast<int>(pos - subject.start()); | |
| 216 } | |
| 217 return -1; | |
| 218 } | |
| 192 | 219 |
| 193 //--------------------------------------------------------------------- | 220 //--------------------------------------------------------------------- |
| 194 // Single Character Pattern Search Strategy | 221 // Single Character Pattern Search Strategy |
| 195 //--------------------------------------------------------------------- | 222 //--------------------------------------------------------------------- |
| 196 | |
|
Jakob Kummerow
2015/09/03 16:46:33
nit: keep this line
| |
| 197 template <typename PatternChar, typename SubjectChar> | 223 template <typename PatternChar, typename SubjectChar> |
| 198 int StringSearch<PatternChar, SubjectChar>::SingleCharSearch( | 224 int StringSearch<PatternChar, SubjectChar>::SingleCharSearch( |
| 199 StringSearch<PatternChar, SubjectChar>* search, | 225 StringSearch<PatternChar, SubjectChar>* search, |
| 200 Vector<const SubjectChar> subject, | 226 Vector<const SubjectChar> subject, |
| 201 int index) { | 227 int index) { |
| 202 DCHECK_EQ(1, search->pattern_.length()); | 228 DCHECK_EQ(1, search->pattern_.length()); |
| 203 PatternChar pattern_first_char = search->pattern_[0]; | 229 PatternChar pattern_first_char = search->pattern_[0]; |
| 204 int i = index; | |
| 205 if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { | 230 if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { |
| 206 const SubjectChar* pos = reinterpret_cast<const SubjectChar*>( | 231 return FindFirstByte(search->pattern_, subject, index); |
| 207 memchr(subject.start() + i, | |
| 208 pattern_first_char, | |
| 209 subject.length() - i)); | |
| 210 if (pos == NULL) return -1; | |
| 211 return static_cast<int>(pos - subject.start()); | |
| 212 } else { | 232 } else { |
| 213 if (sizeof(PatternChar) > sizeof(SubjectChar)) { | 233 if (sizeof(PatternChar) > sizeof(SubjectChar)) { |
| 214 if (exceedsOneByte(pattern_first_char)) { | 234 if (exceedsOneByte(pattern_first_char)) { |
| 215 return -1; | 235 return -1; |
| 216 } | 236 } |
| 217 } | 237 } |
| 218 SubjectChar search_char = static_cast<SubjectChar>(pattern_first_char); | 238 SubjectChar search_char = static_cast<SubjectChar>(pattern_first_char); |
| 219 int n = subject.length(); | 239 const int n = subject.length(); |
| 240 int i = index; | |
| 220 while (i < n) { | 241 while (i < n) { |
| 242 i = FindFirstByte(search->pattern_, subject, i); | |
| 243 if (i == -1) return -1; | |
| 244 | |
| 221 if (subject[i++] == search_char) return i - 1; | 245 if (subject[i++] == search_char) return i - 1; |
| 222 } | 246 } |
| 223 return -1; | 247 return -1; |
| 224 } | 248 } |
| 225 } | 249 } |
| 226 | 250 |
| 227 //--------------------------------------------------------------------- | 251 //--------------------------------------------------------------------- |
| 228 // Linear Search Strategy | 252 // Linear Search Strategy |
| 229 //--------------------------------------------------------------------- | 253 //--------------------------------------------------------------------- |
| 230 | 254 |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 247 | 271 |
| 248 // Simple linear search for short patterns. Never bails out. | 272 // Simple linear search for short patterns. Never bails out. |
| 249 template <typename PatternChar, typename SubjectChar> | 273 template <typename PatternChar, typename SubjectChar> |
| 250 int StringSearch<PatternChar, SubjectChar>::LinearSearch( | 274 int StringSearch<PatternChar, SubjectChar>::LinearSearch( |
| 251 StringSearch<PatternChar, SubjectChar>* search, | 275 StringSearch<PatternChar, SubjectChar>* search, |
| 252 Vector<const SubjectChar> subject, | 276 Vector<const SubjectChar> subject, |
| 253 int index) { | 277 int index) { |
| 254 Vector<const PatternChar> pattern = search->pattern_; | 278 Vector<const PatternChar> pattern = search->pattern_; |
| 255 DCHECK(pattern.length() > 1); | 279 DCHECK(pattern.length() > 1); |
| 256 int pattern_length = pattern.length(); | 280 int pattern_length = pattern.length(); |
| 257 PatternChar pattern_first_char = pattern[0]; | |
| 258 int i = index; | 281 int i = index; |
| 259 int n = subject.length() - pattern_length; | 282 int n = subject.length() - pattern_length; |
| 260 while (i <= n) { | 283 while (i <= n) { |
| 261 if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { | 284 i = FindFirstByte(pattern, subject, i); |
| 262 const SubjectChar* pos = reinterpret_cast<const SubjectChar*>( | 285 if (i == -1) return -1; |
| 263 memchr(subject.start() + i, | 286 i++; |
| 264 pattern_first_char, | |
| 265 n - i + 1)); | |
| 266 if (pos == NULL) return -1; | |
| 267 i = static_cast<int>(pos - subject.start()) + 1; | |
| 268 } else { | |
| 269 if (subject[i++] != pattern_first_char) continue; | |
| 270 } | |
| 271 // Loop extracted to separate function to allow using return to do | 287 // Loop extracted to separate function to allow using return to do |
| 272 // a deeper break. | 288 // a deeper break. |
| 273 if (CharCompare(pattern.start() + 1, | 289 if (CharCompare(pattern.start() + 1, |
| 274 subject.start() + i, | 290 subject.start() + i, |
| 275 pattern_length - 1)) { | 291 pattern_length - 1)) { |
| 276 return i - 1; | 292 return i - 1; |
| 277 } | 293 } |
| 278 } | 294 } |
| 279 return -1; | 295 return -1; |
| 280 } | 296 } |
| (...skipping 217 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 498 int index) { | 514 int index) { |
| 499 Vector<const PatternChar> pattern = search->pattern_; | 515 Vector<const PatternChar> pattern = search->pattern_; |
| 500 int pattern_length = pattern.length(); | 516 int pattern_length = pattern.length(); |
| 501 // Badness is a count of how much work we have done. When we have | 517 // Badness is a count of how much work we have done. When we have |
| 502 // done enough work we decide it's probably worth switching to a better | 518 // done enough work we decide it's probably worth switching to a better |
| 503 // algorithm. | 519 // algorithm. |
| 504 int badness = -10 - (pattern_length << 2); | 520 int badness = -10 - (pattern_length << 2); |
| 505 | 521 |
| 506 // We know our pattern is at least 2 characters, we cache the first so | 522 // We know our pattern is at least 2 characters, we cache the first so |
| 507 // the common case of the first character not matching is faster. | 523 // the common case of the first character not matching is faster. |
| 508 PatternChar pattern_first_char = pattern[0]; | |
| 509 for (int i = index, n = subject.length() - pattern_length; i <= n; i++) { | 524 for (int i = index, n = subject.length() - pattern_length; i <= n; i++) { |
| 510 badness++; | 525 badness++; |
| 511 if (badness <= 0) { | 526 if (badness <= 0) { |
| 512 if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { | 527 i = FindFirstByte(pattern, subject, i); |
| 513 const SubjectChar* pos = reinterpret_cast<const SubjectChar*>( | 528 if (i == -1) return -1; |
| 514 memchr(subject.start() + i, | |
| 515 pattern_first_char, | |
| 516 n - i + 1)); | |
| 517 if (pos == NULL) { | |
| 518 return -1; | |
| 519 } | |
| 520 i = static_cast<int>(pos - subject.start()); | |
| 521 } else { | |
| 522 if (subject[i] != pattern_first_char) continue; | |
| 523 } | |
| 524 int j = 1; | 529 int j = 1; |
| 525 do { | 530 do { |
| 526 if (pattern[j] != subject[i + j]) { | 531 if (pattern[j] != subject[i + j]) { |
| 527 break; | 532 break; |
| 528 } | 533 } |
| 529 j++; | 534 j++; |
| 530 } while (j < pattern_length); | 535 } while (j < pattern_length); |
| 531 if (j == pattern_length) { | 536 if (j == pattern_length) { |
| 532 return i; | 537 return i; |
| 533 } | 538 } |
| (...skipping 17 matching lines...) Expand all Loading... | |
| 551 Vector<const SubjectChar> subject, | 556 Vector<const SubjectChar> subject, |
| 552 Vector<const PatternChar> pattern, | 557 Vector<const PatternChar> pattern, |
| 553 int start_index) { | 558 int start_index) { |
| 554 StringSearch<PatternChar, SubjectChar> search(isolate, pattern); | 559 StringSearch<PatternChar, SubjectChar> search(isolate, pattern); |
| 555 return search.Search(subject, start_index); | 560 return search.Search(subject, start_index); |
| 556 } | 561 } |
| 557 | 562 |
| 558 }} // namespace v8::internal | 563 }} // namespace v8::internal |
| 559 | 564 |
| 560 #endif // V8_STRING_SEARCH_H_ | 565 #endif // V8_STRING_SEARCH_H_ |
| OLD | NEW |