Index: src/string-search.h |
diff --git a/src/string-search.h b/src/string-search.h |
index 349d4fd2db72918560fd0e784bf9ea4664c803c2..e2e540b6cf8324f573a451b5061ebfd92ede1857 100644 |
--- a/src/string-search.h |
+++ b/src/string-search.h |
@@ -190,6 +190,41 @@ class StringSearch : private StringSearchBase { |
}; |
+template <typename PatternChar, typename SubjectChar> |
+int FindFirstCharacter(Vector<const PatternChar> pattern, |
+ Vector<const SubjectChar> subject, int index) { |
+ PatternChar pattern_first_char = pattern[0]; |
+ const int max_n = (subject.length() - pattern.length() + 1); |
+ |
+ if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { |
+ DCHECK_GE(max_n - index, 0); |
+ const SubjectChar* char_pos = reinterpret_cast<const SubjectChar*>( |
+ memchr(subject.start() + index, pattern_first_char, max_n - index)); |
+ if (char_pos == NULL) return -1; |
+ return static_cast<int>(char_pos - subject.start()); |
+ } else { |
+ const uint8_t search_low_byte = |
+ static_cast<uint8_t>(pattern_first_char & 0xFF); |
+ const SubjectChar search_char = |
+ static_cast<SubjectChar>(pattern_first_char); |
+ int pos = index; |
+ do { |
+ DCHECK_GE(max_n - pos, 0); |
+ const SubjectChar* char_pos = reinterpret_cast<const SubjectChar*>( |
+ memchr(subject.start() + pos, search_low_byte, |
+ (max_n - pos) * sizeof(SubjectChar))); |
+ if (char_pos == NULL) return -1; |
+ pos = static_cast<int>(char_pos - subject.start()); |
+ if (IsAligned(reinterpret_cast<uintptr_t>(char_pos), |
+ sizeof(SubjectChar))) { |
+ if (subject[pos] == search_char) return pos; |
+ } |
+ } while (++pos < max_n); |
+ } |
+ return -1; |
+} |
+ |
+ |
//--------------------------------------------------------------------- |
// Single Character Pattern Search Strategy |
//--------------------------------------------------------------------- |
@@ -201,26 +236,15 @@ int StringSearch<PatternChar, SubjectChar>::SingleCharSearch( |
int index) { |
DCHECK_EQ(1, search->pattern_.length()); |
PatternChar pattern_first_char = search->pattern_[0]; |
- int i = index; |
if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { |
- const SubjectChar* pos = reinterpret_cast<const SubjectChar*>( |
- memchr(subject.start() + i, |
- pattern_first_char, |
- subject.length() - i)); |
- if (pos == NULL) return -1; |
- return static_cast<int>(pos - subject.start()); |
+ return FindFirstCharacter(search->pattern_, subject, index); |
} else { |
if (sizeof(PatternChar) > sizeof(SubjectChar)) { |
if (exceedsOneByte(pattern_first_char)) { |
return -1; |
} |
} |
- SubjectChar search_char = static_cast<SubjectChar>(pattern_first_char); |
- int n = subject.length(); |
- while (i < n) { |
- if (subject[i++] == search_char) return i - 1; |
- } |
- return -1; |
+ return FindFirstCharacter(search->pattern_, subject, index); |
} |
} |
@@ -254,20 +278,13 @@ int StringSearch<PatternChar, SubjectChar>::LinearSearch( |
Vector<const PatternChar> pattern = search->pattern_; |
DCHECK(pattern.length() > 1); |
int pattern_length = pattern.length(); |
- PatternChar pattern_first_char = pattern[0]; |
int i = index; |
int n = subject.length() - pattern_length; |
while (i <= n) { |
- if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { |
- const SubjectChar* pos = reinterpret_cast<const SubjectChar*>( |
- memchr(subject.start() + i, |
- pattern_first_char, |
- n - i + 1)); |
- if (pos == NULL) return -1; |
- i = static_cast<int>(pos - subject.start()) + 1; |
- } else { |
- if (subject[i++] != pattern_first_char) continue; |
- } |
+ i = FindFirstCharacter(pattern, subject, i); |
+ if (i == -1) return -1; |
+ DCHECK_LE(i, n); |
+ i++; |
// Loop extracted to separate function to allow using return to do |
// a deeper break. |
if (CharCompare(pattern.start() + 1, |
@@ -505,22 +522,12 @@ int StringSearch<PatternChar, SubjectChar>::InitialSearch( |
// We know our pattern is at least 2 characters, we cache the first so |
// the common case of the first character not matching is faster. |
- PatternChar pattern_first_char = pattern[0]; |
for (int i = index, n = subject.length() - pattern_length; i <= n; i++) { |
badness++; |
if (badness <= 0) { |
- if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { |
- const SubjectChar* pos = reinterpret_cast<const SubjectChar*>( |
- memchr(subject.start() + i, |
- pattern_first_char, |
- n - i + 1)); |
- if (pos == NULL) { |
- return -1; |
- } |
- i = static_cast<int>(pos - subject.start()); |
- } else { |
- if (subject[i] != pattern_first_char) continue; |
- } |
+ i = FindFirstCharacter(pattern, subject, i); |
+ if (i == -1) return -1; |
+ DCHECK_LE(i, n); |
int j = 1; |
do { |
if (pattern[j] != subject[i + j]) { |