Index: src/string-search.h |
diff --git a/src/string-search.h b/src/string-search.h |
index 349d4fd2db72918560fd0e784bf9ea4664c803c2..192261655e5aa7c776d3071ca91adeb6fcefd68a 100644 |
--- a/src/string-search.h |
+++ b/src/string-search.h |
@@ -189,11 +189,37 @@ class StringSearch : private StringSearchBase { |
int start_; |
}; |
Jakob Kummerow
2015/09/03 16:46:33
nit: two empty lines between top-level things
|
+template <typename T, typename U> |
+inline T AlignDown(T value, U alignment) { |
+ return reinterpret_cast<T>( |
+ (reinterpret_cast<uintptr_t>(value) & ~(alignment - 1))); |
+} |
+ |
Jakob Kummerow
2015/09/03 16:46:33
nit: two empty lines between top-level things
|
+template <typename PatternChar, typename SubjectChar> |
+inline int FindFirstByte(Vector<const PatternChar> pattern, |
Jakob Kummerow
2015/09/03 16:46:33
Since this works with two-byte strings too, I'd ca
|
+ Vector<const SubjectChar> subject, int index) { |
+ PatternChar pattern_first_char = pattern[0]; |
+ |
+ if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { |
+ const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(memchr( |
+ subject.start() + index, pattern_first_char, subject.length() - index)); |
+ if (pos == NULL) return -1; |
+ return static_cast<int>(pos - subject.start()); |
+ } else { |
+ uint8_t search_low_byte = static_cast<uint8_t>(pattern_first_char & 0xFF); |
+ const SubjectChar* pos = reinterpret_cast<const SubjectChar*>( |
+ memchr(subject.start() + index, search_low_byte, |
+ (subject.length() - index) * sizeof(SubjectChar))); |
+ if (pos == NULL) return -1; |
+ pos = AlignDown(pos, sizeof(SubjectChar)); |
Jakob Kummerow
2015/09/03 16:46:33
I don't think this is correct. If memchr() found t
|
+ return static_cast<int>(pos - subject.start()); |
+ } |
+ return -1; |
+} |
//--------------------------------------------------------------------- |
// Single Character Pattern Search Strategy |
//--------------------------------------------------------------------- |
- |
Jakob Kummerow
2015/09/03 16:46:33
nit: keep this line
|
template <typename PatternChar, typename SubjectChar> |
int StringSearch<PatternChar, SubjectChar>::SingleCharSearch( |
StringSearch<PatternChar, SubjectChar>* search, |
@@ -201,14 +227,8 @@ int StringSearch<PatternChar, SubjectChar>::SingleCharSearch( |
int index) { |
DCHECK_EQ(1, search->pattern_.length()); |
PatternChar pattern_first_char = search->pattern_[0]; |
- int i = index; |
if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { |
- const SubjectChar* pos = reinterpret_cast<const SubjectChar*>( |
- memchr(subject.start() + i, |
- pattern_first_char, |
- subject.length() - i)); |
- if (pos == NULL) return -1; |
- return static_cast<int>(pos - subject.start()); |
+ return FindFirstByte(search->pattern_, subject, index); |
} else { |
if (sizeof(PatternChar) > sizeof(SubjectChar)) { |
if (exceedsOneByte(pattern_first_char)) { |
@@ -216,8 +236,12 @@ int StringSearch<PatternChar, SubjectChar>::SingleCharSearch( |
} |
} |
SubjectChar search_char = static_cast<SubjectChar>(pattern_first_char); |
- int n = subject.length(); |
+ const int n = subject.length(); |
+ int i = index; |
while (i < n) { |
+ i = FindFirstByte(search->pattern_, subject, i); |
+ if (i == -1) return -1; |
+ |
if (subject[i++] == search_char) return i - 1; |
} |
return -1; |
@@ -254,20 +278,12 @@ int StringSearch<PatternChar, SubjectChar>::LinearSearch( |
Vector<const PatternChar> pattern = search->pattern_; |
DCHECK(pattern.length() > 1); |
int pattern_length = pattern.length(); |
- PatternChar pattern_first_char = pattern[0]; |
int i = index; |
int n = subject.length() - pattern_length; |
while (i <= n) { |
- if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { |
- const SubjectChar* pos = reinterpret_cast<const SubjectChar*>( |
- memchr(subject.start() + i, |
- pattern_first_char, |
- n - i + 1)); |
- if (pos == NULL) return -1; |
- i = static_cast<int>(pos - subject.start()) + 1; |
- } else { |
- if (subject[i++] != pattern_first_char) continue; |
- } |
+ i = FindFirstByte(pattern, subject, i); |
+ if (i == -1) return -1; |
+ i++; |
// Loop extracted to separate function to allow using return to do |
// a deeper break. |
if (CharCompare(pattern.start() + 1, |
@@ -505,22 +521,11 @@ int StringSearch<PatternChar, SubjectChar>::InitialSearch( |
// We know our pattern is at least 2 characters, we cache the first so |
// the common case of the first character not matching is faster. |
- PatternChar pattern_first_char = pattern[0]; |
for (int i = index, n = subject.length() - pattern_length; i <= n; i++) { |
badness++; |
if (badness <= 0) { |
- if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) { |
- const SubjectChar* pos = reinterpret_cast<const SubjectChar*>( |
- memchr(subject.start() + i, |
- pattern_first_char, |
- n - i + 1)); |
- if (pos == NULL) { |
- return -1; |
- } |
- i = static_cast<int>(pos - subject.start()); |
- } else { |
- if (subject[i] != pattern_first_char) continue; |
- } |
+ i = FindFirstByte(pattern, subject, i); |
+ if (i == -1) return -1; |
int j = 1; |
do { |
if (pattern[j] != subject[i + j]) { |