| OLD | NEW |
| 1 // Copyright 2010 the V8 project authors. All rights reserved. | 1 // Copyright 2010 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 26 matching lines...) Expand all Loading... |
| 37 //--------------------------------------------------------------------- | 37 //--------------------------------------------------------------------- |
| 38 | 38 |
| 39 // Class holding constants and methods that apply to all string search variants, | 39 // Class holding constants and methods that apply to all string search variants, |
| 40 // independently of subject and pattern char size. | 40 // independently of subject and pattern char size. |
| 41 class StringSearchBase { | 41 class StringSearchBase { |
| 42 protected: | 42 protected: |
| 43 // Cap on the maximal shift in the Boyer-Moore implementation. By setting a | 43 // Cap on the maximal shift in the Boyer-Moore implementation. By setting a |
| 44 // limit, we can fix the size of tables. For a needle longer than this limit, | 44 // limit, we can fix the size of tables. For a needle longer than this limit, |
| 45 // search will not be optimal, since we only build tables for a suffix | 45 // search will not be optimal, since we only build tables for a suffix |
| 46 // of the string, but it is a safe approximation. | 46 // of the string, but it is a safe approximation. |
| 47 static const int kBMMaxShift = 250; | 47 static const int kBMMaxShift = Isolate::kBMMaxShift; |
| 48 | 48 |
| 49 // Reduce alphabet to this size. | 49 // Reduce alphabet to this size. |
| 50 // One of the tables used by Boyer-Moore and Boyer-Moore-Horspool has size | 50 // One of the tables used by Boyer-Moore and Boyer-Moore-Horspool has size |
| 51 // proportional to the input alphabet. We reduce the alphabet size by | 51 // proportional to the input alphabet. We reduce the alphabet size by |
| 52 // equating input characters modulo a smaller alphabet size. This gives | 52 // equating input characters modulo a smaller alphabet size. This gives |
| 53 // a potentially less efficient searching, but is a safe approximation. | 53 // a potentially less efficient searching, but is a safe approximation. |
| 54 // For needles using only characters in the same Unicode 256-code point page, | 54 // For needles using only characters in the same Unicode 256-code point page, |
| 55 // there is no search speed degradation. | 55 // there is no search speed degradation. |
| 56 static const int kAsciiAlphabetSize = 128; | 56 static const int kAsciiAlphabetSize = 128; |
| 57 static const int kUC16AlphabetSize = 256; | 57 static const int kUC16AlphabetSize = Isolate::kUC16AlphabetSize; |
| 58 | 58 |
| 59 // Bad-char shift table stored in the state. It's length is the alphabet size. | 59 // Bad-char shift table stored in the state. It's length is the alphabet size. |
| 60 // For patterns below this length, the skip length of Boyer-Moore is too short | 60 // For patterns below this length, the skip length of Boyer-Moore is too short |
| 61 // to compensate for the algorithmic overhead compared to simple brute force. | 61 // to compensate for the algorithmic overhead compared to simple brute force. |
| 62 static const int kBMMinPatternLength = 7; | 62 static const int kBMMinPatternLength = 7; |
| 63 | 63 |
| 64 static inline bool IsAsciiString(Vector<const char>) { | 64 static inline bool IsAsciiString(Vector<const char>) { |
| 65 return true; | 65 return true; |
| 66 } | 66 } |
| 67 | 67 |
| 68 static inline bool IsAsciiString(Vector<const uc16> string) { | 68 static inline bool IsAsciiString(Vector<const uc16> string) { |
| 69 return String::IsAscii(string.start(), string.length()); | 69 return String::IsAscii(string.start(), string.length()); |
| 70 } | 70 } |
| 71 | 71 |
| 72 // The following tables are shared by all searches. | 72 friend class Isolate; |
| 73 // TODO(lrn): Introduce a way for a pattern to keep its tables | |
| 74 // between searches (e.g., for an Atom RegExp). | |
| 75 | |
| 76 // Store for the BoyerMoore(Horspool) bad char shift table. | |
| 77 static int kBadCharShiftTable[kUC16AlphabetSize]; | |
| 78 // Store for the BoyerMoore good suffix shift table. | |
| 79 static int kGoodSuffixShiftTable[kBMMaxShift + 1]; | |
| 80 // Table used temporarily while building the BoyerMoore good suffix | |
| 81 // shift table. | |
| 82 static int kSuffixTable[kBMMaxShift + 1]; | |
| 83 }; | 73 }; |
| 84 | 74 |
| 85 | 75 |
| 86 template <typename PatternChar, typename SubjectChar> | 76 template <typename PatternChar, typename SubjectChar> |
| 87 class StringSearch : private StringSearchBase { | 77 class StringSearch : private StringSearchBase { |
| 88 public: | 78 public: |
| 89 explicit StringSearch(Vector<const PatternChar> pattern) | 79 StringSearch(Isolate* isolate, Vector<const PatternChar> pattern) |
| 90 : pattern_(pattern), | 80 : isolate_(isolate), |
| 81 pattern_(pattern), |
| 91 start_(Max(0, pattern.length() - kBMMaxShift)) { | 82 start_(Max(0, pattern.length() - kBMMaxShift)) { |
| 92 if (sizeof(PatternChar) > sizeof(SubjectChar)) { | 83 if (sizeof(PatternChar) > sizeof(SubjectChar)) { |
| 93 if (!IsAsciiString(pattern_)) { | 84 if (!IsAsciiString(pattern_)) { |
| 94 strategy_ = &FailSearch; | 85 strategy_ = &FailSearch; |
| 95 return; | 86 return; |
| 96 } | 87 } |
| 97 } | 88 } |
| 98 int pattern_length = pattern_.length(); | 89 int pattern_length = pattern_.length(); |
| 99 if (pattern_length < kBMMinPatternLength) { | 90 if (pattern_length < kBMMinPatternLength) { |
| 100 if (pattern_length == 1) { | 91 if (pattern_length == 1) { |
| (...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 168 if (static_cast<unsigned int>(char_code) > String::kMaxAsciiCharCodeU) { | 159 if (static_cast<unsigned int>(char_code) > String::kMaxAsciiCharCodeU) { |
| 169 return -1; | 160 return -1; |
| 170 } | 161 } |
| 171 return bad_char_occurrence[static_cast<unsigned int>(char_code)]; | 162 return bad_char_occurrence[static_cast<unsigned int>(char_code)]; |
| 172 } | 163 } |
| 173 // Both pattern and subject are UC16. Reduce character to equivalence class. | 164 // Both pattern and subject are UC16. Reduce character to equivalence class. |
| 174 int equiv_class = char_code % kUC16AlphabetSize; | 165 int equiv_class = char_code % kUC16AlphabetSize; |
| 175 return bad_char_occurrence[equiv_class]; | 166 return bad_char_occurrence[equiv_class]; |
| 176 } | 167 } |
| 177 | 168 |
| 169 // The following tables are shared by all searches. |
| 170 // TODO(lrn): Introduce a way for a pattern to keep its tables |
| 171 // between searches (e.g., for an Atom RegExp). |
| 172 |
| 173 // Store for the BoyerMoore(Horspool) bad char shift table. |
| 178 // Return a table covering the last kBMMaxShift+1 positions of | 174 // Return a table covering the last kBMMaxShift+1 positions of |
| 179 // pattern. | 175 // pattern. |
| 180 int* bad_char_table() { | 176 int* bad_char_table() { |
| 181 return kBadCharShiftTable; | 177 return isolate_->bad_char_shift_table(); |
| 182 } | 178 } |
| 183 | 179 |
| 180 // Store for the BoyerMoore good suffix shift table. |
| 184 int* good_suffix_shift_table() { | 181 int* good_suffix_shift_table() { |
| 185 // Return biased pointer that maps the range [start_..pattern_.length() | 182 // Return biased pointer that maps the range [start_..pattern_.length() |
| 186 // to the kGoodSuffixShiftTable array. | 183 // to the kGoodSuffixShiftTable array. |
| 187 return kGoodSuffixShiftTable - start_; | 184 return isolate_->good_suffix_shift_table() - start_; |
| 188 } | 185 } |
| 189 | 186 |
| 187 // Table used temporarily while building the BoyerMoore good suffix |
| 188 // shift table. |
| 190 int* suffix_table() { | 189 int* suffix_table() { |
| 191 // Return biased pointer that maps the range [start_..pattern_.length() | 190 // Return biased pointer that maps the range [start_..pattern_.length() |
| 192 // to the kSuffixTable array. | 191 // to the kSuffixTable array. |
| 193 return kSuffixTable - start_; | 192 return isolate_->suffix_table() - start_; |
| 194 } | 193 } |
| 195 | 194 |
| 195 Isolate* isolate_; |
| 196 // The pattern to search for. | 196 // The pattern to search for. |
| 197 Vector<const PatternChar> pattern_; | 197 Vector<const PatternChar> pattern_; |
| 198 // Pointer to implementation of the search. | 198 // Pointer to implementation of the search. |
| 199 SearchFunction strategy_; | 199 SearchFunction strategy_; |
| 200 // Cache value of Max(0, pattern_length() - kBMMaxShift) | 200 // Cache value of Max(0, pattern_length() - kBMMaxShift) |
| 201 int start_; | 201 int start_; |
| 202 }; | 202 }; |
| 203 | 203 |
| 204 | 204 |
| 205 //--------------------------------------------------------------------- | 205 //--------------------------------------------------------------------- |
| (...skipping 342 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 548 } | 548 } |
| 549 return -1; | 549 return -1; |
| 550 } | 550 } |
| 551 | 551 |
| 552 | 552 |
| 553 // Perform a a single stand-alone search. | 553 // Perform a a single stand-alone search. |
| 554 // If searching multiple times for the same pattern, a search | 554 // If searching multiple times for the same pattern, a search |
| 555 // object should be constructed once and the Search function then called | 555 // object should be constructed once and the Search function then called |
| 556 // for each search. | 556 // for each search. |
| 557 template <typename SubjectChar, typename PatternChar> | 557 template <typename SubjectChar, typename PatternChar> |
| 558 static int SearchString(Vector<const SubjectChar> subject, | 558 static int SearchString(Isolate* isolate, |
| 559 Vector<const SubjectChar> subject, |
| 559 Vector<const PatternChar> pattern, | 560 Vector<const PatternChar> pattern, |
| 560 int start_index) { | 561 int start_index) { |
| 561 StringSearch<PatternChar, SubjectChar> search(pattern); | 562 StringSearch<PatternChar, SubjectChar> search(isolate, pattern); |
| 562 return search.Search(subject, start_index); | 563 return search.Search(subject, start_index); |
| 563 } | 564 } |
| 564 | 565 |
| 565 }} // namespace v8::internal | 566 }} // namespace v8::internal |
| 566 | 567 |
| 567 #endif // V8_STRING_SEARCH_H_ | 568 #endif // V8_STRING_SEARCH_H_ |
| OLD | NEW |