| Index: src/string-search.h
 | 
| diff --git a/src/string-search.h b/src/string-search.h
 | 
| index d7959c0be982c35208c14cfc8803ec00a7f14a9b..1de375018b1d0c9cdd69d2dabceaf077d259f1a7 100644
 | 
| --- a/src/string-search.h
 | 
| +++ b/src/string-search.h
 | 
| @@ -32,278 +32,483 @@ namespace v8 {
 | 
|  namespace internal {
 | 
|  
 | 
|  
 | 
| -// Cap on the maximal shift in the Boyer-Moore implementation. By setting a
 | 
| -// limit, we can fix the size of tables. For a needle longer than this limit,
 | 
| -// search will not be optimal, since we only build tables for a smaller suffix
 | 
| -// of the string, which is a safe approximation.
 | 
| -static const int kBMMaxShift = 250;
 | 
| -// Reduce alphabet to this size.
 | 
| -// One of the tables used by Boyer-Moore and Boyer-Moore-Horspool has size
 | 
| -// proportional to the input alphabet. We reduce the alphabet size by
 | 
| -// equating input characters modulo a smaller alphabet size. This gives
 | 
| -// a potentially less efficient searching, but is a safe approximation.
 | 
| -// For needles using only characters in the same Unicode 256-code point page,
 | 
| -// there is no search speed degradation.
 | 
| -static const int kBMAlphabetSize = 256;
 | 
| -// For patterns below this length, the skip length of Boyer-Moore is too short
 | 
| -// to compensate for the algorithmic overhead compared to simple brute force.
 | 
| -static const int kBMMinPatternLength = 7;
 | 
| -
 | 
| -// Holds the two buffers used by Boyer-Moore string search's Good Suffix
 | 
| -// shift. Only allows the last kBMMaxShift characters of the needle
 | 
| -// to be indexed.
 | 
| -class BMGoodSuffixBuffers {
 | 
| +//---------------------------------------------------------------------
 | 
| +// String Search object.
 | 
| +//---------------------------------------------------------------------
 | 
| +
 | 
| +// Class holding constants and methods that apply to all string search variants,
 | 
| +// independently of subject and pattern char size.
 | 
| +class StringSearchBase {
 | 
| + protected:
 | 
| +  // Cap on the maximal shift in the Boyer-Moore implementation. By setting a
 | 
| +  // limit, we can fix the size of tables. For a needle longer than this limit,
 | 
| +  // search will not be optimal, since we only build tables for a suffix
 | 
| +  // of the string, but it is a safe approximation.
 | 
| +  static const int kBMMaxShift = 250;
 | 
| +
 | 
| +  // Reduce alphabet to this size.
 | 
| +  // One of the tables used by Boyer-Moore and Boyer-Moore-Horspool has size
 | 
| +  // proportional to the input alphabet. We reduce the alphabet size by
 | 
| +  // equating input characters modulo a smaller alphabet size. This gives
 | 
| +  // a potentially less efficient searching, but is a safe approximation.
 | 
| +  // For needles using only characters in the same Unicode 256-code point page,
 | 
| +  // there is no search speed degradation.
 | 
| +  static const int kAsciiAlphabetSize = 128;
 | 
| +  static const int kUC16AlphabetSize = 256;
 | 
| +
 | 
| +  // Bad-char shift table stored in the state. It's length is the alphabet size.
 | 
| +  // For patterns below this length, the skip length of Boyer-Moore is too short
 | 
| +  // to compensate for the algorithmic overhead compared to simple brute force.
 | 
| +  static const int kBMMinPatternLength = 7;
 | 
| +
 | 
| +  static inline bool IsAsciiString(Vector<const char>) {
 | 
| +    return true;
 | 
| +  }
 | 
| +
 | 
| +  static inline bool IsAsciiString(Vector<const uc16> string) {
 | 
| +    for (int i = 0, n = string.length(); i < n; i++) {
 | 
| +      if (static_cast<unsigned>(string[i]) > String::kMaxAsciiCharCodeU) {
 | 
| +        return false;
 | 
| +      }
 | 
| +    }
 | 
| +    return true;
 | 
| +  }
 | 
| +
 | 
| +  // The following tables are shared by all searches.
 | 
| +  // TODO(lrn): Introduce a way for a pattern to keep its tables
 | 
| +  // between searches (e.g., for an Atom RegExp).
 | 
| +
 | 
| +  // Store for the BoyerMoore(Horspool) bad char shift table.
 | 
| +  static int kBadCharShiftTable[kUC16AlphabetSize];
 | 
| +  // Store for the BoyerMoore good suffix shift table.
 | 
| +  static int kGoodSuffixShiftTable[kBMMaxShift + 1];
 | 
| +  // Table used temporarily while building the BoyerMoore good suffix
 | 
| +  // shift table.
 | 
| +  static int kSuffixTable[kBMMaxShift + 1];
 | 
| +};
 | 
| +
 | 
| +
 | 
| +template <typename PatternChar, typename SubjectChar>
 | 
| +class StringSearch : private StringSearchBase {
 | 
|   public:
 | 
| -  BMGoodSuffixBuffers() {}
 | 
| -  inline void Initialize(int needle_length) {
 | 
| -    ASSERT(needle_length > 1);
 | 
| -    int start = needle_length < kBMMaxShift ? 0 : needle_length - kBMMaxShift;
 | 
| -    int len = needle_length - start;
 | 
| -    biased_suffixes_ = suffixes_ - start;
 | 
| -    biased_good_suffix_shift_ = good_suffix_shift_ - start;
 | 
| -    for (int i = 0; i <= len; i++) {
 | 
| -      good_suffix_shift_[i] = len;
 | 
| +  explicit StringSearch(Vector<const PatternChar> pattern)
 | 
| +      : pattern_(pattern),
 | 
| +        start_(Max(0, pattern.length() - kBMMaxShift)) {
 | 
| +    if (sizeof(PatternChar) > sizeof(SubjectChar)) {
 | 
| +      if (!IsAsciiString(pattern_)) {
 | 
| +        strategy_ = &FailSearch;
 | 
| +        return;
 | 
| +      }
 | 
| +    }
 | 
| +    int pattern_length = pattern_.length();
 | 
| +    if (pattern_length < kBMMinPatternLength) {
 | 
| +      if (pattern_length == 1) {
 | 
| +        strategy_ = &SingleCharSearch;
 | 
| +        return;
 | 
| +      }
 | 
| +      strategy_ = &LinearSearch;
 | 
| +      return;
 | 
|      }
 | 
| +    strategy_ = &InitialSearch;
 | 
|    }
 | 
| -  inline int& suffix(int index) {
 | 
| -    ASSERT(biased_suffixes_ + index >= suffixes_);
 | 
| -    return biased_suffixes_[index];
 | 
| +
 | 
| +  int Search(Vector<const SubjectChar> subject, int index) {
 | 
| +    return strategy_(this, subject, index);
 | 
|    }
 | 
| -  inline int& shift(int index) {
 | 
| -    ASSERT(biased_good_suffix_shift_ + index >= good_suffix_shift_);
 | 
| -    return biased_good_suffix_shift_[index];
 | 
| +
 | 
| +  static inline int AlphabetSize() {
 | 
| +    if (sizeof(PatternChar) == 1) {
 | 
| +      // ASCII needle.
 | 
| +      return kAsciiAlphabetSize;
 | 
| +    } else {
 | 
| +      ASSERT(sizeof(PatternChar) == 2);
 | 
| +      // UC16 needle.
 | 
| +      return kUC16AlphabetSize;
 | 
| +    }
 | 
|    }
 | 
| +
 | 
|   private:
 | 
| -  int suffixes_[kBMMaxShift + 1];
 | 
| -  int good_suffix_shift_[kBMMaxShift + 1];
 | 
| -  int* biased_suffixes_;
 | 
| -  int* biased_good_suffix_shift_;
 | 
| -  DISALLOW_COPY_AND_ASSIGN(BMGoodSuffixBuffers);
 | 
| -};
 | 
| +  typedef int (*SearchFunction)(  // NOLINT - it's not a cast!
 | 
| +      StringSearch<PatternChar, SubjectChar>*,
 | 
| +      Vector<const SubjectChar>,
 | 
| +      int);
 | 
| +
 | 
| +  static int FailSearch(StringSearch<PatternChar, SubjectChar>*,
 | 
| +                        Vector<const SubjectChar>,
 | 
| +                        int) {
 | 
| +    return -1;
 | 
| +  }
 | 
|  
 | 
| -// buffers reused by BoyerMoore
 | 
| -struct BMBuffers {
 | 
| - public:
 | 
| -  static int bad_char_occurrence[kBMAlphabetSize];
 | 
| -  static BMGoodSuffixBuffers bmgs_buffers;
 | 
| +  static int SingleCharSearch(StringSearch<PatternChar, SubjectChar>* search,
 | 
| +                              Vector<const SubjectChar> subject,
 | 
| +                              int start_index);
 | 
| +
 | 
| +  static int LinearSearch(StringSearch<PatternChar, SubjectChar>* search,
 | 
| +                          Vector<const SubjectChar> subject,
 | 
| +                          int start_index);
 | 
| +
 | 
| +  static int InitialSearch(StringSearch<PatternChar, SubjectChar>* search,
 | 
| +                           Vector<const SubjectChar> subject,
 | 
| +                           int start_index);
 | 
| +
 | 
| +  static int BoyerMooreHorspoolSearch(
 | 
| +      StringSearch<PatternChar, SubjectChar>* search,
 | 
| +      Vector<const SubjectChar> subject,
 | 
| +      int start_index);
 | 
| +
 | 
| +  static int BoyerMooreSearch(StringSearch<PatternChar, SubjectChar>* search,
 | 
| +                              Vector<const SubjectChar> subject,
 | 
| +                              int start_index);
 | 
| +
 | 
| +  void PopulateBoyerMooreHorspoolTable();
 | 
| +
 | 
| +  void PopulateBoyerMooreTable();
 | 
| +
 | 
| +  static inline int CharOccurrence(int* bad_char_occurrence,
 | 
| +                                   SubjectChar char_code) {
 | 
| +    if (sizeof(SubjectChar) == 1) {
 | 
| +      return bad_char_occurrence[static_cast<int>(char_code)];
 | 
| +    }
 | 
| +    if (sizeof(PatternChar) == 1) {
 | 
| +      if (static_cast<unsigned char>(char_code) > String::kMaxAsciiCharCode) {
 | 
| +        return -1;
 | 
| +      }
 | 
| +      return bad_char_occurrence[static_cast<int>(char_code)];
 | 
| +    }
 | 
| +    // Reduce to equivalence class.
 | 
| +    int equiv_class = char_code % kUC16AlphabetSize;
 | 
| +    return bad_char_occurrence[equiv_class];
 | 
| +  }
 | 
| +
 | 
| +  // Return a table covering the last kBMMaxShift+1 positions of
 | 
| +  // pattern.
 | 
| +  int* bad_char_table() {
 | 
| +    return kBadCharShiftTable;
 | 
| +  }
 | 
| +
 | 
| +  int* good_suffix_shift_table() {
 | 
| +    // Return biased pointer that maps the range  [start_..pattern_.length()
 | 
| +    // to the kGoodSuffixShiftTable array.
 | 
| +    return kGoodSuffixShiftTable - start_;
 | 
| +  }
 | 
| +
 | 
| +  int* suffix_table() {
 | 
| +    // Return biased pointer that maps the range  [start_..pattern_.length()
 | 
| +    // to the kSuffixTable array.
 | 
| +    return kSuffixTable - start_;
 | 
| +  }
 | 
| +
 | 
| +  // The pattern to search for.
 | 
| +  Vector<const PatternChar> pattern_;
 | 
| +  // Pointer to implementation of the search.
 | 
| +  SearchFunction strategy_;
 | 
| +  // Cache value of Max(0, pattern_length() - kBMMaxShift)
 | 
| +  int start_;
 | 
|  };
 | 
|  
 | 
| -// State of the string match tables.
 | 
| -// SIMPLE: No usable content in the buffers.
 | 
| -// BOYER_MOORE_HORSPOOL: The bad_char_occurence table has been populated.
 | 
| -// BOYER_MOORE: The bmgs_buffers tables have also been populated.
 | 
| -// Whenever starting with a new needle, one should call InitializeStringSearch
 | 
| -// to determine which search strategy to use, and in the case of a long-needle
 | 
| -// strategy, the call also initializes the algorithm to SIMPLE.
 | 
| -enum StringSearchAlgorithm { SIMPLE_SEARCH, BOYER_MOORE_HORSPOOL, BOYER_MOORE };
 | 
| -static StringSearchAlgorithm algorithm;
 | 
|  
 | 
| +//---------------------------------------------------------------------
 | 
| +// Single Character Pattern Search Strategy
 | 
| +//---------------------------------------------------------------------
 | 
|  
 | 
| -// Compute the bad-char table for Boyer-Moore in the static buffer.
 | 
| -template <typename PatternChar>
 | 
| -static void BoyerMoorePopulateBadCharTable(Vector<const PatternChar> pattern) {
 | 
| -  // Only preprocess at most kBMMaxShift last characters of pattern.
 | 
| -  int start = Max(pattern.length() - kBMMaxShift, 0);
 | 
| -  // Run forwards to populate bad_char_table, so that *last* instance
 | 
| -  // of character equivalence class is the one registered.
 | 
| -  // Notice: Doesn't include the last character.
 | 
| -  int table_size = (sizeof(PatternChar) == 1) ? String::kMaxAsciiCharCode + 1
 | 
| -                                        : kBMAlphabetSize;
 | 
| -  if (start == 0) {  // All patterns less than kBMMaxShift in length.
 | 
| -    memset(BMBuffers::bad_char_occurrence,
 | 
| -           -1,
 | 
| -           table_size * sizeof(*BMBuffers::bad_char_occurrence));
 | 
| +template <typename PatternChar, typename SubjectChar>
 | 
| +int StringSearch<PatternChar, SubjectChar>::SingleCharSearch(
 | 
| +    StringSearch<PatternChar, SubjectChar>* search,
 | 
| +    Vector<const SubjectChar> subject,
 | 
| +    int index) {
 | 
| +  ASSERT_EQ(1, search->pattern_.length());
 | 
| +  PatternChar pattern_first_char = search->pattern_[0];
 | 
| +  int i = index;
 | 
| +  if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
 | 
| +    const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
 | 
| +        memchr(subject.start() + i,
 | 
| +               pattern_first_char,
 | 
| +               subject.length() - i));
 | 
| +    if (pos == NULL) return -1;
 | 
| +    return static_cast<int>(pos - subject.start());
 | 
|    } else {
 | 
| -    for (int i = 0; i < table_size; i++) {
 | 
| -      BMBuffers::bad_char_occurrence[i] = start - 1;
 | 
| +    if (sizeof(PatternChar) > sizeof(SubjectChar)) {
 | 
| +      if (static_cast<uc16>(pattern_first_char) > String::kMaxAsciiCharCodeU) {
 | 
| +        return -1;
 | 
| +      }
 | 
|      }
 | 
| +    SubjectChar search_char = static_cast<SubjectChar>(pattern_first_char);
 | 
| +    int n = subject.length();
 | 
| +    while (i < n) {
 | 
| +      if (subject[i++] == search_char) return i - 1;
 | 
| +    }
 | 
| +    return -1;
 | 
|    }
 | 
| -  for (int i = start; i < pattern.length() - 1; i++) {
 | 
| -    PatternChar c = pattern[i];
 | 
| -    int bucket = (sizeof(PatternChar) ==1) ? c : c % kBMAlphabetSize;
 | 
| -    BMBuffers::bad_char_occurrence[bucket] = i;
 | 
| +}
 | 
| +
 | 
| +//---------------------------------------------------------------------
 | 
| +// Linear Search Strategy
 | 
| +//---------------------------------------------------------------------
 | 
| +
 | 
| +
 | 
| +template <typename PatternChar, typename SubjectChar>
 | 
| +static inline bool CharCompare(const PatternChar* pattern,
 | 
| +                               const SubjectChar* subject,
 | 
| +                               int length) {
 | 
| +  ASSERT(length > 0);
 | 
| +  int pos = 0;
 | 
| +  do {
 | 
| +    if (pattern[pos] != subject[pos]) {
 | 
| +      return false;
 | 
| +    }
 | 
| +    pos++;
 | 
| +  } while (pos < length);
 | 
| +  return true;
 | 
| +}
 | 
| +
 | 
| +
 | 
| +// Simple linear search for short patterns. Never bails out.
 | 
| +template <typename PatternChar, typename SubjectChar>
 | 
| +int StringSearch<PatternChar, SubjectChar>::LinearSearch(
 | 
| +    StringSearch<PatternChar, SubjectChar>* search,
 | 
| +    Vector<const SubjectChar> subject,
 | 
| +    int index) {
 | 
| +  Vector<const PatternChar> pattern = search->pattern_;
 | 
| +  ASSERT(pattern.length() > 1);
 | 
| +  int pattern_length = pattern.length();
 | 
| +  PatternChar pattern_first_char = pattern[0];
 | 
| +  int i = index;
 | 
| +  int n = subject.length() - pattern_length;
 | 
| +  while (i <= n) {
 | 
| +    if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
 | 
| +      const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
 | 
| +          memchr(subject.start() + i,
 | 
| +                 pattern_first_char,
 | 
| +                 n - i + 1));
 | 
| +      if (pos == NULL) return -1;
 | 
| +      i = static_cast<int>(pos - subject.start()) + 1;
 | 
| +    } else {
 | 
| +      if (subject[i++] != pattern_first_char) continue;
 | 
| +    }
 | 
| +    // Loop extracted to separate function to allow using return to do
 | 
| +    // a deeper break.
 | 
| +    if (CharCompare(pattern.start() + 1,
 | 
| +                    subject.start() + i,
 | 
| +                    pattern_length - 1)) {
 | 
| +      return i - 1;
 | 
| +    }
 | 
| +  }
 | 
| +  return -1;
 | 
| +}
 | 
| +
 | 
| +//---------------------------------------------------------------------
 | 
| +// Boyer-Moore string search
 | 
| +//---------------------------------------------------------------------
 | 
| +
 | 
| +template <typename PatternChar, typename SubjectChar>
 | 
| +int StringSearch<PatternChar, SubjectChar>::BoyerMooreSearch(
 | 
| +    StringSearch<PatternChar, SubjectChar>* search,
 | 
| +    Vector<const SubjectChar> subject,
 | 
| +    int start_index) {
 | 
| +  Vector<const PatternChar> pattern = search->pattern_;
 | 
| +  int subject_length = subject.length();
 | 
| +  int pattern_length = pattern.length();
 | 
| +  // Only preprocess at most kBMMaxShift last characters of pattern.
 | 
| +  int start = search->start_;
 | 
| +
 | 
| +  int* bad_char_occurence = search->bad_char_table();
 | 
| +  int* good_suffix_shift = search->good_suffix_shift_table();
 | 
| +
 | 
| +  PatternChar last_char = pattern[pattern_length - 1];
 | 
| +  int index = start_index;
 | 
| +  // Continue search from i.
 | 
| +  while (index <= subject_length - pattern_length) {
 | 
| +    int j = pattern_length - 1;
 | 
| +    int c;
 | 
| +    while (last_char != (c = subject[index + j])) {
 | 
| +      int shift =
 | 
| +          j - CharOccurrence(bad_char_occurence, c);
 | 
| +      index += shift;
 | 
| +      if (index > subject_length - pattern_length) {
 | 
| +        return -1;
 | 
| +      }
 | 
| +    }
 | 
| +    while (j >= 0 && pattern[j] == (c = subject[index + j])) j--;
 | 
| +    if (j < 0) {
 | 
| +      return index;
 | 
| +    } else if (j < start) {
 | 
| +      // we have matched more than our tables allow us to be smart about.
 | 
| +      // Fall back on BMH shift.
 | 
| +      index +=
 | 
| +          pattern_length - 1 - CharOccurrence(bad_char_occurence, last_char);
 | 
| +    } else {
 | 
| +      int gs_shift = good_suffix_shift[j + 1];
 | 
| +      int bc_occ =
 | 
| +          CharOccurrence(bad_char_occurence, c);
 | 
| +      int shift = j - bc_occ;
 | 
| +      if (gs_shift > shift) {
 | 
| +        shift = gs_shift;
 | 
| +      }
 | 
| +      index += shift;
 | 
| +    }
 | 
|    }
 | 
| +
 | 
| +  return -1;
 | 
|  }
 | 
|  
 | 
|  
 | 
| -template <typename PatternChar>
 | 
| -static void BoyerMoorePopulateGoodSuffixTable(
 | 
| -    Vector<const PatternChar> pattern) {
 | 
| -  int m = pattern.length();
 | 
| -  int start = m < kBMMaxShift ? 0 : m - kBMMaxShift;
 | 
| -  int len = m - start;
 | 
| -  // Compute Good Suffix tables.
 | 
| -  BMBuffers::bmgs_buffers.Initialize(m);
 | 
| +template <typename PatternChar, typename SubjectChar>
 | 
| +void StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreTable() {
 | 
| +  int pattern_length = pattern_.length();
 | 
| +  const PatternChar* pattern = pattern_.start();
 | 
| +  // Only look at the last kBMMaxShift characters of pattern (from start_
 | 
| +  // to pattern_length).
 | 
| +  int start = start_;
 | 
| +  int length = pattern_length - start;
 | 
| +
 | 
| +  // Biased tables so that we can use pattern indices as table indices,
 | 
| +  // even if we only cover the part of the pattern from offset start.
 | 
| +  int* shift_table = good_suffix_shift_table();
 | 
| +  int* suffix_table = this->suffix_table();
 | 
| +
 | 
| +  // Initialize table.
 | 
| +  for (int i = start; i < pattern_length; i++) {
 | 
| +    shift_table[i] = length;
 | 
| +  }
 | 
| +  shift_table[pattern_length] = 1;
 | 
| +  suffix_table[pattern_length] = pattern_length + 1;
 | 
|  
 | 
| -  BMBuffers::bmgs_buffers.shift(m-1) = 1;
 | 
| -  BMBuffers::bmgs_buffers.suffix(m) = m + 1;
 | 
| -  PatternChar last_char = pattern[m - 1];
 | 
| -  int suffix = m + 1;
 | 
| +  // Find suffixes.
 | 
| +  PatternChar last_char = pattern[pattern_length - 1];
 | 
| +  int suffix = pattern_length + 1;
 | 
|    {
 | 
| -    int i = m;
 | 
| +    int i = pattern_length;
 | 
|      while (i > start) {
 | 
|        PatternChar c = pattern[i - 1];
 | 
| -      while (suffix <= m && c != pattern[suffix - 1]) {
 | 
| -        if (BMBuffers::bmgs_buffers.shift(suffix) == len) {
 | 
| -          BMBuffers::bmgs_buffers.shift(suffix) = suffix - i;
 | 
| +      while (suffix <= pattern_length && c != pattern[suffix - 1]) {
 | 
| +        if (shift_table[suffix] == length) {
 | 
| +          shift_table[suffix] = suffix - i;
 | 
|          }
 | 
| -        suffix = BMBuffers::bmgs_buffers.suffix(suffix);
 | 
| +        suffix = suffix_table[suffix];
 | 
|        }
 | 
| -      BMBuffers::bmgs_buffers.suffix(--i) = --suffix;
 | 
| -      if (suffix == m) {
 | 
| +      suffix_table[--i] = --suffix;
 | 
| +      if (suffix == pattern_length) {
 | 
|          // No suffix to extend, so we check against last_char only.
 | 
|          while ((i > start) && (pattern[i - 1] != last_char)) {
 | 
| -          if (BMBuffers::bmgs_buffers.shift(m) == len) {
 | 
| -            BMBuffers::bmgs_buffers.shift(m) = m - i;
 | 
| +          if (shift_table[pattern_length] == length) {
 | 
| +            shift_table[pattern_length] = pattern_length - i;
 | 
|            }
 | 
| -          BMBuffers::bmgs_buffers.suffix(--i) = m;
 | 
| +          suffix_table[--i] = pattern_length;
 | 
|          }
 | 
|          if (i > start) {
 | 
| -          BMBuffers::bmgs_buffers.suffix(--i) = --suffix;
 | 
| +          suffix_table[--i] = --suffix;
 | 
|          }
 | 
|        }
 | 
|      }
 | 
|    }
 | 
| -  if (suffix < m) {
 | 
| -    for (int i = start; i <= m; i++) {
 | 
| -      if (BMBuffers::bmgs_buffers.shift(i) == len) {
 | 
| -        BMBuffers::bmgs_buffers.shift(i) = suffix - start;
 | 
| +  // Build shift table using suffixes.
 | 
| +  if (suffix < pattern_length) {
 | 
| +    for (int i = start; i <= pattern_length; i++) {
 | 
| +      if (shift_table[i] == length) {
 | 
| +        shift_table[i] = suffix - start;
 | 
|        }
 | 
|        if (i == suffix) {
 | 
| -        suffix = BMBuffers::bmgs_buffers.suffix(suffix);
 | 
| +        suffix = suffix_table[suffix];
 | 
|        }
 | 
|      }
 | 
|    }
 | 
|  }
 | 
|  
 | 
| +//---------------------------------------------------------------------
 | 
| +// Boyer-Moore-Horspool string search.
 | 
| +//---------------------------------------------------------------------
 | 
|  
 | 
| -template <typename SubjectChar, typename PatternChar>
 | 
| -static inline int CharOccurrence(int char_code) {
 | 
| -  if (sizeof(SubjectChar) == 1) {
 | 
| -    return BMBuffers::bad_char_occurrence[char_code];
 | 
| -  }
 | 
| -  if (sizeof(PatternChar) == 1) {
 | 
| -    if (char_code > String::kMaxAsciiCharCode) {
 | 
| -      return -1;
 | 
| -    }
 | 
| -    return BMBuffers::bad_char_occurrence[char_code];
 | 
| -  }
 | 
| -  return BMBuffers::bad_char_occurrence[char_code % kBMAlphabetSize];
 | 
| -}
 | 
| -
 | 
| -
 | 
| -// Restricted simplified Boyer-Moore string matching.
 | 
| -// Uses only the bad-shift table of Boyer-Moore and only uses it
 | 
| -// for the character compared to the last character of the needle.
 | 
| -template <typename SubjectChar, typename PatternChar>
 | 
| -static int BoyerMooreHorspool(Vector<const SubjectChar> subject,
 | 
| -                              Vector<const PatternChar> pattern,
 | 
| -                              int start_index,
 | 
| -                              bool* complete) {
 | 
| -  ASSERT(algorithm <= BOYER_MOORE_HORSPOOL);
 | 
| -  int n = subject.length();
 | 
| -  int m = pattern.length();
 | 
| -
 | 
| -  int badness = -m;
 | 
| +template <typename PatternChar, typename SubjectChar>
 | 
| +int StringSearch<PatternChar, SubjectChar>::BoyerMooreHorspoolSearch(
 | 
| +    StringSearch<PatternChar, SubjectChar>* search,
 | 
| +    Vector<const SubjectChar> subject,
 | 
| +    int start_index) {
 | 
| +  Vector<const PatternChar> pattern = search->pattern_;
 | 
| +  int subject_length = subject.length();
 | 
| +  int pattern_length = pattern.length();
 | 
| +  int* char_occurrences = search->bad_char_table();
 | 
| +  int badness = -pattern_length;
 | 
|  
 | 
|    // How bad we are doing without a good-suffix table.
 | 
| -  int idx;  // No matches found prior to this index.
 | 
| -  PatternChar last_char = pattern[m - 1];
 | 
| -  int last_char_shift =
 | 
| -      m - 1 - CharOccurrence<SubjectChar, PatternChar>(last_char);
 | 
| +  PatternChar last_char = pattern[pattern_length - 1];
 | 
| +  int last_char_shift = pattern_length - 1 -
 | 
| +      CharOccurrence(char_occurrences, last_char);
 | 
|    // Perform search
 | 
| -  for (idx = start_index; idx <= n - m;) {
 | 
| -    int j = m - 1;
 | 
| -    int c;
 | 
| -    while (last_char != (c = subject[idx + j])) {
 | 
| -      int bc_occ = CharOccurrence<SubjectChar, PatternChar>(c);
 | 
| +  int index = start_index;  // No matches found prior to this index.
 | 
| +  while (index <= subject_length - pattern_length) {
 | 
| +    int j = pattern_length - 1;
 | 
| +    int subject_char;
 | 
| +    while (last_char != (subject_char = subject[index + j])) {
 | 
| +      int bc_occ = CharOccurrence(char_occurrences, subject_char);
 | 
|        int shift = j - bc_occ;
 | 
| -      idx += shift;
 | 
| +      index += shift;
 | 
|        badness += 1 - shift;  // at most zero, so badness cannot increase.
 | 
| -      if (idx > n - m) {
 | 
| -        *complete = true;
 | 
| +      if (index > subject_length - pattern_length) {
 | 
|          return -1;
 | 
|        }
 | 
|      }
 | 
|      j--;
 | 
| -    while (j >= 0 && pattern[j] == (subject[idx + j])) j--;
 | 
| +    while (j >= 0 && pattern[j] == (subject[index + j])) j--;
 | 
|      if (j < 0) {
 | 
| -      *complete = true;
 | 
| -      return idx;
 | 
| +      return index;
 | 
|      } else {
 | 
| -      idx += last_char_shift;
 | 
| +      index += last_char_shift;
 | 
|        // Badness increases by the number of characters we have
 | 
|        // checked, and decreases by the number of characters we
 | 
|        // can skip by shifting. It's a measure of how we are doing
 | 
|        // compared to reading each character exactly once.
 | 
| -      badness += (m - j) - last_char_shift;
 | 
| +      badness += (pattern_length - j) - last_char_shift;
 | 
|        if (badness > 0) {
 | 
| -        *complete = false;
 | 
| -        return idx;
 | 
| +        search->PopulateBoyerMooreTable();
 | 
| +        search->strategy_ = &BoyerMooreSearch;
 | 
| +        return BoyerMooreSearch(search, subject, index);
 | 
|        }
 | 
|      }
 | 
|    }
 | 
| -  *complete = true;
 | 
|    return -1;
 | 
|  }
 | 
|  
 | 
|  
 | 
| -template <typename SubjectChar, typename PatternChar>
 | 
| -static int BoyerMooreIndexOf(Vector<const SubjectChar> subject,
 | 
| -                             Vector<const PatternChar> pattern,
 | 
| -                             int idx) {
 | 
| -  ASSERT(algorithm <= BOYER_MOORE);
 | 
| -  int n = subject.length();
 | 
| -  int m = pattern.length();
 | 
| -  // Only preprocess at most kBMMaxShift last characters of pattern.
 | 
| -  int start = m < kBMMaxShift ? 0 : m - kBMMaxShift;
 | 
| +template <typename PatternChar, typename SubjectChar>
 | 
| +void StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreHorspoolTable() {
 | 
| +  int pattern_length = pattern_.length();
 | 
|  
 | 
| -  PatternChar last_char = pattern[m - 1];
 | 
| -  // Continue search from i.
 | 
| -  while (idx <= n - m) {
 | 
| -    int j = m - 1;
 | 
| -    SubjectChar c;
 | 
| -    while (last_char != (c = subject[idx + j])) {
 | 
| -      int shift = j - CharOccurrence<SubjectChar, PatternChar>(c);
 | 
| -      idx += shift;
 | 
| -      if (idx > n - m) {
 | 
| -        return -1;
 | 
| -      }
 | 
| -    }
 | 
| -    while (j >= 0 && pattern[j] == (c = subject[idx + j])) j--;
 | 
| -    if (j < 0) {
 | 
| -      return idx;
 | 
| -    } else if (j < start) {
 | 
| -      // we have matched more than our tables allow us to be smart about.
 | 
| -      // Fall back on BMH shift.
 | 
| -      idx += m - 1 - CharOccurrence<SubjectChar, PatternChar>(last_char);
 | 
| -    } else {
 | 
| -      int gs_shift = BMBuffers::bmgs_buffers.shift(j + 1);
 | 
| -      int bc_occ = CharOccurrence<SubjectChar, PatternChar>(c);
 | 
| -      int shift = j - bc_occ;
 | 
| -      if (gs_shift > shift) {
 | 
| -        shift = gs_shift;
 | 
| -      }
 | 
| -      idx += shift;
 | 
| +  int* bad_char_occurrence = bad_char_table();
 | 
| +
 | 
| +  // Only preprocess at most kBMMaxShift last characters of pattern.
 | 
| +  int start = start_;
 | 
| +  // Run forwards to populate bad_char_table, so that *last* instance
 | 
| +  // of character equivalence class is the one registered.
 | 
| +  // Notice: Doesn't include the last character.
 | 
| +  int table_size = AlphabetSize();
 | 
| +  if (start == 0) {  // All patterns less than kBMMaxShift in length.
 | 
| +    memset(bad_char_occurrence,
 | 
| +           -1,
 | 
| +           table_size * sizeof(*bad_char_occurrence));
 | 
| +  } else {
 | 
| +    for (int i = 0; i < table_size; i++) {
 | 
| +      bad_char_occurrence[i] = start - 1;
 | 
|      }
 | 
|    }
 | 
| -
 | 
| -  return -1;
 | 
| +  for (int i = start; i < pattern_length - 1; i++) {
 | 
| +    PatternChar c = pattern_[i];
 | 
| +    int bucket = (sizeof(PatternChar) == 1) ? c : c % AlphabetSize();
 | 
| +    bad_char_occurrence[bucket] = i;
 | 
| +  }
 | 
|  }
 | 
|  
 | 
| +//---------------------------------------------------------------------
 | 
| +// Linear string search with bailout to BMH.
 | 
| +//---------------------------------------------------------------------
 | 
|  
 | 
| -// Trivial string search for shorter strings.
 | 
| -// On return, if "complete" is set to true, the return value is the
 | 
| -// final result of searching for the patter in the subject.
 | 
| -// If "complete" is set to false, the return value is the index where
 | 
| -// further checking should start, i.e., it's guaranteed that the pattern
 | 
| -// does not occur at a position prior to the returned index.
 | 
| +// Simple linear search for short patterns, which bails out if the string
 | 
| +// isn't found very early in the subject. Upgrades to BoyerMooreHorspool.
 | 
|  template <typename PatternChar, typename SubjectChar>
 | 
| -static int SimpleIndexOf(Vector<const SubjectChar> subject,
 | 
| -                         Vector<const PatternChar> pattern,
 | 
| -                         int idx,
 | 
| -                         bool* complete) {
 | 
| -  ASSERT(pattern.length() > 1);
 | 
| +int StringSearch<PatternChar, SubjectChar>::InitialSearch(
 | 
| +    StringSearch<PatternChar, SubjectChar>* search,
 | 
| +    Vector<const SubjectChar> subject,
 | 
| +    int index) {
 | 
| +  Vector<const PatternChar> pattern = search->pattern_;
 | 
|    int pattern_length = pattern.length();
 | 
|    // Badness is a count of how much work we have done.  When we have
 | 
|    // done enough work we decide it's probably worth switching to a better
 | 
| @@ -313,149 +518,52 @@ static int SimpleIndexOf(Vector<const SubjectChar> subject,
 | 
|    // We know our pattern is at least 2 characters, we cache the first so
 | 
|    // the common case of the first character not matching is faster.
 | 
|    PatternChar pattern_first_char = pattern[0];
 | 
| -  for (int i = idx, n = subject.length() - pattern_length; i <= n; i++) {
 | 
| +  for (int i = index, n = subject.length() - pattern_length; i <= n; i++) {
 | 
|      badness++;
 | 
| -    if (badness > 0) {
 | 
| -      *complete = false;
 | 
| -      return i;
 | 
| -    }
 | 
| -    if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
 | 
| -      const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
 | 
| -          memchr(subject.start() + i,
 | 
| -                 pattern_first_char,
 | 
| -                 n - i + 1));
 | 
| -      if (pos == NULL) {
 | 
| -        *complete = true;
 | 
| -        return -1;
 | 
| +    if (badness <= 0) {
 | 
| +      if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
 | 
| +        const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
 | 
| +            memchr(subject.start() + i,
 | 
| +                   pattern_first_char,
 | 
| +                   n - i + 1));
 | 
| +        if (pos == NULL) {
 | 
| +          return -1;
 | 
| +        }
 | 
| +        i = static_cast<int>(pos - subject.start());
 | 
| +      } else {
 | 
| +        if (subject[i] != pattern_first_char) continue;
 | 
|        }
 | 
| -      i = static_cast<int>(pos - subject.start());
 | 
| -    } else {
 | 
| -      if (subject[i] != pattern_first_char) continue;
 | 
| -    }
 | 
| -    int j = 1;
 | 
| -    do {
 | 
| -      if (pattern[j] != subject[i+j]) {
 | 
| -        break;
 | 
| +      int j = 1;
 | 
| +      do {
 | 
| +        if (pattern[j] != subject[i + j]) {
 | 
| +          break;
 | 
| +        }
 | 
| +        j++;
 | 
| +      } while (j < pattern_length);
 | 
| +      if (j == pattern_length) {
 | 
| +        return i;
 | 
|        }
 | 
| -      j++;
 | 
| -    } while (j < pattern_length);
 | 
| -    if (j == pattern_length) {
 | 
| -      *complete = true;
 | 
| -      return i;
 | 
| -    }
 | 
| -    badness += j;
 | 
| -  }
 | 
| -  *complete = true;
 | 
| -  return -1;
 | 
| -}
 | 
| -
 | 
| -// Simple indexOf that never bails out. For short patterns only.
 | 
| -template <typename PatternChar, typename SubjectChar>
 | 
| -static int SimpleIndexOf(Vector<const SubjectChar> subject,
 | 
| -                         Vector<const PatternChar> pattern,
 | 
| -                         int idx) {
 | 
| -  int pattern_length = pattern.length();
 | 
| -  PatternChar pattern_first_char = pattern[0];
 | 
| -  for (int i = idx, n = subject.length() - pattern_length; i <= n; i++) {
 | 
| -    if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
 | 
| -      const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
 | 
| -          memchr(subject.start() + i,
 | 
| -                 pattern_first_char,
 | 
| -                 n - i + 1));
 | 
| -      if (pos == NULL) return -1;
 | 
| -      i = static_cast<int>(pos - subject.start());
 | 
| +      badness += j;
 | 
|      } else {
 | 
| -      if (subject[i] != pattern_first_char) continue;
 | 
| -    }
 | 
| -    int j = 1;
 | 
| -    while (j < pattern_length) {
 | 
| -      if (pattern[j] != subject[i+j]) {
 | 
| -        break;
 | 
| -      }
 | 
| -      j++;
 | 
| -    }
 | 
| -    if (j == pattern_length) {
 | 
| -      return i;
 | 
| +      search->PopulateBoyerMooreHorspoolTable();
 | 
| +      search->strategy_ = &BoyerMooreHorspoolSearch;
 | 
| +      return BoyerMooreHorspoolSearch(search, subject, i);
 | 
|      }
 | 
|    }
 | 
|    return -1;
 | 
|  }
 | 
|  
 | 
|  
 | 
| -// Strategy for searching for a string in another string.
 | 
| -enum StringSearchStrategy { SEARCH_FAIL, SEARCH_SHORT, SEARCH_LONG };
 | 
| -
 | 
| -
 | 
| -template <typename PatternChar>
 | 
| -static inline StringSearchStrategy InitializeStringSearch(
 | 
| -    Vector<const PatternChar> pat, bool ascii_subject) {
 | 
| -  // We have an ASCII haystack and a non-ASCII needle. Check if there
 | 
| -  // really is a non-ASCII character in the needle and bail out if there
 | 
| -  // is.
 | 
| -  if (ascii_subject && sizeof(PatternChar) > 1) {
 | 
| -    for (int i = 0; i < pat.length(); i++) {
 | 
| -      uc16 c = pat[i];
 | 
| -      if (c > String::kMaxAsciiCharCode) {
 | 
| -        return SEARCH_FAIL;
 | 
| -      }
 | 
| -    }
 | 
| -  }
 | 
| -  if (pat.length() < kBMMinPatternLength) {
 | 
| -    return SEARCH_SHORT;
 | 
| -  }
 | 
| -  algorithm = SIMPLE_SEARCH;
 | 
| -  return SEARCH_LONG;
 | 
| -}
 | 
| -
 | 
| -
 | 
| -// Dispatch long needle searches to different algorithms.
 | 
| +// Perform a a single stand-alone search.
 | 
| +// If searching multiple times for the same pattern, a search
 | 
| +// object should be constructed once and the Search function then called
 | 
| +// for each search.
 | 
|  template <typename SubjectChar, typename PatternChar>
 | 
| -static int ComplexIndexOf(Vector<const SubjectChar> sub,
 | 
| -                          Vector<const PatternChar> pat,
 | 
| -                          int start_index) {
 | 
| -  ASSERT(pat.length() >= kBMMinPatternLength);
 | 
| -  // Try algorithms in order of increasing setup cost and expected performance.
 | 
| -  bool complete;
 | 
| -  int idx = start_index;
 | 
| -  switch (algorithm) {
 | 
| -    case SIMPLE_SEARCH:
 | 
| -      idx = SimpleIndexOf(sub, pat, idx, &complete);
 | 
| -      if (complete) return idx;
 | 
| -      BoyerMoorePopulateBadCharTable(pat);
 | 
| -      algorithm = BOYER_MOORE_HORSPOOL;
 | 
| -      // FALLTHROUGH.
 | 
| -    case BOYER_MOORE_HORSPOOL:
 | 
| -      idx = BoyerMooreHorspool(sub, pat, idx, &complete);
 | 
| -      if (complete) return idx;
 | 
| -      // Build the Good Suffix table and continue searching.
 | 
| -      BoyerMoorePopulateGoodSuffixTable(pat);
 | 
| -      algorithm = BOYER_MOORE;
 | 
| -      // FALLTHROUGH.
 | 
| -    case BOYER_MOORE:
 | 
| -      return BoyerMooreIndexOf(sub, pat, idx);
 | 
| -  }
 | 
| -  UNREACHABLE();
 | 
| -  return -1;
 | 
| -}
 | 
| -
 | 
| -
 | 
| -// Dispatch to different search strategies for a single search.
 | 
| -// If searching multiple times on the same needle, the search
 | 
| -// strategy should only be computed once and then dispatch to different
 | 
| -// loops.
 | 
| -template <typename SubjectChar, typename PatternChar>
 | 
| -static int StringSearch(Vector<const SubjectChar> sub,
 | 
| -                        Vector<const PatternChar> pat,
 | 
| +static int SearchString(Vector<const SubjectChar> subject,
 | 
| +                        Vector<const PatternChar> pattern,
 | 
|                          int start_index) {
 | 
| -  bool ascii_subject = (sizeof(SubjectChar) == 1);
 | 
| -  StringSearchStrategy strategy = InitializeStringSearch(pat, ascii_subject);
 | 
| -  switch (strategy) {
 | 
| -    case SEARCH_FAIL: return -1;
 | 
| -    case SEARCH_SHORT: return SimpleIndexOf(sub, pat, start_index);
 | 
| -    case SEARCH_LONG: return ComplexIndexOf(sub, pat, start_index);
 | 
| -  }
 | 
| -  UNREACHABLE();
 | 
| -  return -1;
 | 
| +  StringSearch<PatternChar, SubjectChar> search(pattern);
 | 
| +  return search.Search(subject, start_index);
 | 
|  }
 | 
|  
 | 
|  }}  // namespace v8::internal
 | 
| 
 |