Chromium Code Reviews| Index: src/string-search.h |
| diff --git a/src/string-search.h b/src/string-search.h |
| index 349d4fd2db72918560fd0e784bf9ea4664c803c2..192261655e5aa7c776d3071ca91adeb6fcefd68a 100644 |
| --- a/src/string-search.h |
| +++ b/src/string-search.h |
| @@ -189,11 +189,37 @@ class StringSearch : private StringSearchBase { |
| int start_; |
| }; |
|
Jakob Kummerow
2015/09/03 16:46:33
nit: two empty lines between top-level things
|
| +template <typename T, typename U> |
| +inline T AlignDown(T value, U alignment) { |
| + return reinterpret_cast<T>( |
| + (reinterpret_cast<uintptr_t>(value) & ~(alignment - 1))); |
| +} |
| + |
|
Jakob Kummerow
2015/09/03 16:46:33
nit: two empty lines between top-level things
|
| +template <typename PatternChar, typename SubjectChar> |
| +inline int FindFirstByte(Vector<const PatternChar> pattern, |
|
Jakob Kummerow
2015/09/03 16:46:33
Since this works with two-byte strings too, I'd ca
|
| + Vector<const SubjectChar> subject, int index) { |
| + PatternChar pattern_first_char = pattern[0]; |
| + |
| + if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { |
| + const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(memchr( |
| + subject.start() + index, pattern_first_char, subject.length() - index)); |
| + if (pos == NULL) return -1; |
| + return static_cast<int>(pos - subject.start()); |
| + } else { |
| + uint8_t search_low_byte = static_cast<uint8_t>(pattern_first_char & 0xFF); |
| + const SubjectChar* pos = reinterpret_cast<const SubjectChar*>( |
| + memchr(subject.start() + index, search_low_byte, |
| + (subject.length() - index) * sizeof(SubjectChar))); |
| + if (pos == NULL) return -1; |
| + pos = AlignDown(pos, sizeof(SubjectChar)); |
|
Jakob Kummerow
2015/09/03 16:46:33
I don't think this is correct. If memchr() found t
|
| + return static_cast<int>(pos - subject.start()); |
| + } |
| + return -1; |
| +} |
| //--------------------------------------------------------------------- |
| // Single Character Pattern Search Strategy |
| //--------------------------------------------------------------------- |
| - |
|
Jakob Kummerow
2015/09/03 16:46:33
nit: keep this line
|
| template <typename PatternChar, typename SubjectChar> |
| int StringSearch<PatternChar, SubjectChar>::SingleCharSearch( |
| StringSearch<PatternChar, SubjectChar>* search, |
| @@ -201,14 +227,8 @@ int StringSearch<PatternChar, SubjectChar>::SingleCharSearch( |
| int index) { |
| DCHECK_EQ(1, search->pattern_.length()); |
| PatternChar pattern_first_char = search->pattern_[0]; |
| - int i = index; |
| if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { |
| - const SubjectChar* pos = reinterpret_cast<const SubjectChar*>( |
| - memchr(subject.start() + i, |
| - pattern_first_char, |
| - subject.length() - i)); |
| - if (pos == NULL) return -1; |
| - return static_cast<int>(pos - subject.start()); |
| + return FindFirstByte(search->pattern_, subject, index); |
| } else { |
| if (sizeof(PatternChar) > sizeof(SubjectChar)) { |
| if (exceedsOneByte(pattern_first_char)) { |
| @@ -216,8 +236,12 @@ int StringSearch<PatternChar, SubjectChar>::SingleCharSearch( |
| } |
| } |
| SubjectChar search_char = static_cast<SubjectChar>(pattern_first_char); |
| - int n = subject.length(); |
| + const int n = subject.length(); |
| + int i = index; |
| while (i < n) { |
| + i = FindFirstByte(search->pattern_, subject, i); |
| + if (i == -1) return -1; |
| + |
| if (subject[i++] == search_char) return i - 1; |
| } |
| return -1; |
| @@ -254,20 +278,12 @@ int StringSearch<PatternChar, SubjectChar>::LinearSearch( |
| Vector<const PatternChar> pattern = search->pattern_; |
| DCHECK(pattern.length() > 1); |
| int pattern_length = pattern.length(); |
| - PatternChar pattern_first_char = pattern[0]; |
| int i = index; |
| int n = subject.length() - pattern_length; |
| while (i <= n) { |
| - if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { |
| - const SubjectChar* pos = reinterpret_cast<const SubjectChar*>( |
| - memchr(subject.start() + i, |
| - pattern_first_char, |
| - n - i + 1)); |
| - if (pos == NULL) return -1; |
| - i = static_cast<int>(pos - subject.start()) + 1; |
| - } else { |
| - if (subject[i++] != pattern_first_char) continue; |
| - } |
| + i = FindFirstByte(pattern, subject, i); |
| + if (i == -1) return -1; |
| + i++; |
| // Loop extracted to separate function to allow using return to do |
| // a deeper break. |
| if (CharCompare(pattern.start() + 1, |
| @@ -505,22 +521,11 @@ int StringSearch<PatternChar, SubjectChar>::InitialSearch( |
| // We know our pattern is at least 2 characters, we cache the first so |
| // the common case of the first character not matching is faster. |
| - PatternChar pattern_first_char = pattern[0]; |
| for (int i = index, n = subject.length() - pattern_length; i <= n; i++) { |
| badness++; |
| if (badness <= 0) { |
| - if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { |
| - const SubjectChar* pos = reinterpret_cast<const SubjectChar*>( |
| - memchr(subject.start() + i, |
| - pattern_first_char, |
| - n - i + 1)); |
| - if (pos == NULL) { |
| - return -1; |
| - } |
| - i = static_cast<int>(pos - subject.start()); |
| - } else { |
| - if (subject[i] != pattern_first_char) continue; |
| - } |
| + i = FindFirstByte(pattern, subject, i); |
| + if (i == -1) return -1; |
| int j = 1; |
| do { |
| if (pattern[j] != subject[i + j]) { |