OLD | NEW |
1 // Copyright 2010 the V8 project authors. All rights reserved. | 1 // Copyright 2010 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 26 matching lines...) Expand all Loading... |
37 //--------------------------------------------------------------------- | 37 //--------------------------------------------------------------------- |
38 | 38 |
39 // Class holding constants and methods that apply to all string search variants, | 39 // Class holding constants and methods that apply to all string search variants, |
40 // independently of subject and pattern char size. | 40 // independently of subject and pattern char size. |
41 class StringSearchBase { | 41 class StringSearchBase { |
42 protected: | 42 protected: |
43 // Cap on the maximal shift in the Boyer-Moore implementation. By setting a | 43 // Cap on the maximal shift in the Boyer-Moore implementation. By setting a |
44 // limit, we can fix the size of tables. For a needle longer than this limit, | 44 // limit, we can fix the size of tables. For a needle longer than this limit, |
45 // search will not be optimal, since we only build tables for a suffix | 45 // search will not be optimal, since we only build tables for a suffix |
46 // of the string, but it is a safe approximation. | 46 // of the string, but it is a safe approximation. |
47 static const int kBMMaxShift = 250; | 47 static const int kBMMaxShift = Isolate::kBMMaxShift; |
48 | 48 |
49 // Reduce alphabet to this size. | 49 // Reduce alphabet to this size. |
50 // One of the tables used by Boyer-Moore and Boyer-Moore-Horspool has size | 50 // One of the tables used by Boyer-Moore and Boyer-Moore-Horspool has size |
51 // proportional to the input alphabet. We reduce the alphabet size by | 51 // proportional to the input alphabet. We reduce the alphabet size by |
52 // equating input characters modulo a smaller alphabet size. This gives | 52 // equating input characters modulo a smaller alphabet size. This gives |
53 // a potentially less efficient searching, but is a safe approximation. | 53 // a potentially less efficient searching, but is a safe approximation. |
54 // For needles using only characters in the same Unicode 256-code point page, | 54 // For needles using only characters in the same Unicode 256-code point page, |
55 // there is no search speed degradation. | 55 // there is no search speed degradation. |
56 static const int kAsciiAlphabetSize = 128; | 56 static const int kAsciiAlphabetSize = 128; |
57 static const int kUC16AlphabetSize = 256; | 57 static const int kUC16AlphabetSize = Isolate::kUC16AlphabetSize; |
58 | 58 |
59 // Bad-char shift table stored in the state. It's length is the alphabet size. | 59 // Bad-char shift table stored in the state. It's length is the alphabet size. |
60 // For patterns below this length, the skip length of Boyer-Moore is too short | 60 // For patterns below this length, the skip length of Boyer-Moore is too short |
61 // to compensate for the algorithmic overhead compared to simple brute force. | 61 // to compensate for the algorithmic overhead compared to simple brute force. |
62 static const int kBMMinPatternLength = 7; | 62 static const int kBMMinPatternLength = 7; |
63 | 63 |
64 static inline bool IsAsciiString(Vector<const char>) { | 64 static inline bool IsAsciiString(Vector<const char>) { |
65 return true; | 65 return true; |
66 } | 66 } |
67 | 67 |
68 static inline bool IsAsciiString(Vector<const uc16> string) { | 68 static inline bool IsAsciiString(Vector<const uc16> string) { |
69 return String::IsAscii(string.start(), string.length()); | 69 return String::IsAscii(string.start(), string.length()); |
70 } | 70 } |
71 | 71 |
72 // The following tables are shared by all searches. | 72 friend class Isolate; |
73 // TODO(lrn): Introduce a way for a pattern to keep its tables | |
74 // between searches (e.g., for an Atom RegExp). | |
75 | |
76 // Store for the BoyerMoore(Horspool) bad char shift table. | |
77 static int kBadCharShiftTable[kUC16AlphabetSize]; | |
78 // Store for the BoyerMoore good suffix shift table. | |
79 static int kGoodSuffixShiftTable[kBMMaxShift + 1]; | |
80 // Table used temporarily while building the BoyerMoore good suffix | |
81 // shift table. | |
82 static int kSuffixTable[kBMMaxShift + 1]; | |
83 }; | 73 }; |
84 | 74 |
85 | 75 |
86 template <typename PatternChar, typename SubjectChar> | 76 template <typename PatternChar, typename SubjectChar> |
87 class StringSearch : private StringSearchBase { | 77 class StringSearch : private StringSearchBase { |
88 public: | 78 public: |
89 explicit StringSearch(Vector<const PatternChar> pattern) | 79 StringSearch(Isolate* isolate, Vector<const PatternChar> pattern) |
90 : pattern_(pattern), | 80 : isolate_(isolate), |
| 81 pattern_(pattern), |
91 start_(Max(0, pattern.length() - kBMMaxShift)) { | 82 start_(Max(0, pattern.length() - kBMMaxShift)) { |
92 if (sizeof(PatternChar) > sizeof(SubjectChar)) { | 83 if (sizeof(PatternChar) > sizeof(SubjectChar)) { |
93 if (!IsAsciiString(pattern_)) { | 84 if (!IsAsciiString(pattern_)) { |
94 strategy_ = &FailSearch; | 85 strategy_ = &FailSearch; |
95 return; | 86 return; |
96 } | 87 } |
97 } | 88 } |
98 int pattern_length = pattern_.length(); | 89 int pattern_length = pattern_.length(); |
99 if (pattern_length < kBMMinPatternLength) { | 90 if (pattern_length < kBMMinPatternLength) { |
100 if (pattern_length == 1) { | 91 if (pattern_length == 1) { |
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
168 if (static_cast<unsigned int>(char_code) > String::kMaxAsciiCharCodeU) { | 159 if (static_cast<unsigned int>(char_code) > String::kMaxAsciiCharCodeU) { |
169 return -1; | 160 return -1; |
170 } | 161 } |
171 return bad_char_occurrence[static_cast<unsigned int>(char_code)]; | 162 return bad_char_occurrence[static_cast<unsigned int>(char_code)]; |
172 } | 163 } |
173 // Both pattern and subject are UC16. Reduce character to equivalence class. | 164 // Both pattern and subject are UC16. Reduce character to equivalence class. |
174 int equiv_class = char_code % kUC16AlphabetSize; | 165 int equiv_class = char_code % kUC16AlphabetSize; |
175 return bad_char_occurrence[equiv_class]; | 166 return bad_char_occurrence[equiv_class]; |
176 } | 167 } |
177 | 168 |
| 169 // The following tables are shared by all searches. |
| 170 // TODO(lrn): Introduce a way for a pattern to keep its tables |
| 171 // between searches (e.g., for an Atom RegExp). |
| 172 |
| 173 // Store for the BoyerMoore(Horspool) bad char shift table. |
178 // Return a table covering the last kBMMaxShift+1 positions of | 174 // Return a table covering the last kBMMaxShift+1 positions of |
179 // pattern. | 175 // pattern. |
180 int* bad_char_table() { | 176 int* bad_char_table() { |
181 return kBadCharShiftTable; | 177 return isolate_->bad_char_shift_table(); |
182 } | 178 } |
183 | 179 |
| 180 // Store for the BoyerMoore good suffix shift table. |
184 int* good_suffix_shift_table() { | 181 int* good_suffix_shift_table() { |
185 // Return biased pointer that maps the range [start_..pattern_.length() | 182 // Return biased pointer that maps the range [start_..pattern_.length() |
186 // to the kGoodSuffixShiftTable array. | 183 // to the kGoodSuffixShiftTable array. |
187 return kGoodSuffixShiftTable - start_; | 184 return isolate_->good_suffix_shift_table() - start_; |
188 } | 185 } |
189 | 186 |
| 187 // Table used temporarily while building the BoyerMoore good suffix |
| 188 // shift table. |
190 int* suffix_table() { | 189 int* suffix_table() { |
191 // Return biased pointer that maps the range [start_..pattern_.length() | 190 // Return biased pointer that maps the range [start_..pattern_.length() |
192 // to the kSuffixTable array. | 191 // to the kSuffixTable array. |
193 return kSuffixTable - start_; | 192 return isolate_->suffix_table() - start_; |
194 } | 193 } |
195 | 194 |
| 195 Isolate* isolate_; |
196 // The pattern to search for. | 196 // The pattern to search for. |
197 Vector<const PatternChar> pattern_; | 197 Vector<const PatternChar> pattern_; |
198 // Pointer to implementation of the search. | 198 // Pointer to implementation of the search. |
199 SearchFunction strategy_; | 199 SearchFunction strategy_; |
200 // Cache value of Max(0, pattern_length() - kBMMaxShift) | 200 // Cache value of Max(0, pattern_length() - kBMMaxShift) |
201 int start_; | 201 int start_; |
202 }; | 202 }; |
203 | 203 |
204 | 204 |
205 //--------------------------------------------------------------------- | 205 //--------------------------------------------------------------------- |
(...skipping 342 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
548 } | 548 } |
549 return -1; | 549 return -1; |
550 } | 550 } |
551 | 551 |
552 | 552 |
553 // Perform a a single stand-alone search. | 553 // Perform a a single stand-alone search. |
554 // If searching multiple times for the same pattern, a search | 554 // If searching multiple times for the same pattern, a search |
555 // object should be constructed once and the Search function then called | 555 // object should be constructed once and the Search function then called |
556 // for each search. | 556 // for each search. |
557 template <typename SubjectChar, typename PatternChar> | 557 template <typename SubjectChar, typename PatternChar> |
558 static int SearchString(Vector<const SubjectChar> subject, | 558 static int SearchString(Isolate* isolate, |
| 559 Vector<const SubjectChar> subject, |
559 Vector<const PatternChar> pattern, | 560 Vector<const PatternChar> pattern, |
560 int start_index) { | 561 int start_index) { |
561 StringSearch<PatternChar, SubjectChar> search(pattern); | 562 StringSearch<PatternChar, SubjectChar> search(isolate, pattern); |
562 return search.Search(subject, start_index); | 563 return search.Search(subject, start_index); |
563 } | 564 } |
564 | 565 |
565 }} // namespace v8::internal | 566 }} // namespace v8::internal |
566 | 567 |
567 #endif // V8_STRING_SEARCH_H_ | 568 #endif // V8_STRING_SEARCH_H_ |
OLD | NEW |