Chromium Code Reviews| Index: src/runtime.cc |
| diff --git a/src/runtime.cc b/src/runtime.cc |
| index 24b19047eb564ff6b4c1842ed99dba46bb9112f2..b9f1f48ef2138a4179b5436b4572e70d851e6e49 100644 |
| --- a/src/runtime.cc |
| +++ b/src/runtime.cc |
| @@ -1151,15 +1151,14 @@ static inline int CharOccurence(int char_code) { |
| return bad_char_occurence[char_code % kBMAlphabetSize]; |
| } |
| -// Restricted simplified Boyer-Moore string matching. Restricts tables to a |
| -// suffix of long pattern strings and handles only equivalence classes |
| -// of the full alphabet. This allows us to ensure that tables take only |
| -// a fixed amount of space. |
| +// Restricted simplified Boyer-Moore string matching. |
| +// Uses only the bad-shift table of Boyer-Moore and only uses it |
| +// for the character compared to the last character of the needle. |
| template <typename schar, typename pchar> |
| -static int BoyerMooreSimplified(Vector<const schar> subject, |
| - Vector<const pchar> pattern, |
| - int start_index, |
| - bool* complete) { |
| +static int BoyerMooreHorsepool(Vector<const schar> subject, |
| + Vector<const pchar> pattern, |
| + int start_index, |
| + bool* complete) { |
| int n = subject.length(); |
| int m = pattern.length(); |
| // Only preprocess at most kBMMaxShift last characters of pattern. |
| @@ -1170,6 +1169,7 @@ static int BoyerMooreSimplified(Vector<const schar> subject, |
| int badness = -m; // How bad we are doing without a good-suffix table. |
| int idx; // No matches found prior to this index. |
| pchar last_char = pattern[m - 1]; |
| + int last_char_shift = m - 1 - CharOccurence<schar, pchar>(last_char); |
|
Erik Corry
2008/12/17 09:07:18
Noun. occurence. Common misspelling of occurrence.
|
| // Perform search |
| for (idx = start_index; idx <= n - m;) { |
| int j = m - 1; |
| @@ -1185,19 +1185,17 @@ static int BoyerMooreSimplified(Vector<const schar> subject, |
| } |
| } |
| j--; |
| - while (j >= 0 && pattern[j] == (c = subject[idx + j])) j--; |
| + while (j >= 0 && pattern[j] == (subject[idx + j])) j--; |
| if (j < 0) { |
| *complete = true; |
| return idx; |
| } else { |
| - int bc_occ = CharOccurence<schar, pchar>(c); |
| - int shift = bc_occ < j ? j - bc_occ : 1; |
| - idx += shift; |
| + idx += last_char_shift; |
| // Badness increases by the number of characters we have |
| // checked, and decreases by the number of characters we |
| // can skip by shifting. It's a measure of how we are doing |
| // compared to reading each character exactly once. |
| - badness += (m - j) - shift; |
| + badness += (m - j) - last_char_shift; |
| if (badness > 0) { |
| *complete = false; |
| return idx; |
| @@ -1237,12 +1235,15 @@ static int BoyerMooreIndexOf(Vector<const schar> subject, |
| return idx; |
| } else if (j < start) { |
| // we have matched more than our tables allow us to be smart about. |
| - idx += 1; |
| + // Fall back on BMH shift. |
| + idx += m - 1 - CharOccurence<schar, pchar>(last_char); |
| } else { |
| int gs_shift = bmgs_buffers.shift(j + 1); // Good suffix shift. |
| int bc_occ = CharOccurence<schar, pchar>(c); |
| int shift = j - bc_occ; // Bad-char shift. |
| - shift = (gs_shift > shift) ? gs_shift : shift; |
| + if (gs_shift > shift) { |
| + shift = gs_shift; |
| + } |
| idx += shift; |
| } |
| } while (idx <= n - m); |
| @@ -1286,7 +1287,7 @@ static int SimpleIndexOf(Vector<const schar> subject, |
| badness++; |
| if (badness > 0) { |
| *complete = false; |
| - return (i); |
| + return i; |
| } |
| if (subject[i] != pattern_first_char) continue; |
| int j = 1; |
| @@ -1357,7 +1358,7 @@ static int StringMatchStrategy(Vector<const schar> sub, |
| bool complete; |
| int idx = SimpleIndexOf(sub, pat, start_index, &complete); |
| if (complete) return idx; |
| - idx = BoyerMooreSimplified(sub, pat, idx, &complete); |
| + idx = BoyerMooreHorsepool(sub, pat, idx, &complete); |
| if (complete) return idx; |
| return BoyerMooreIndexOf(sub, pat, idx); |
| } |