Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(153)

Side by Side Diff: src/string-search.h

Issue 6685088: Merge isolates to bleeding_edge. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: '' Created 9 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/spaces-inl.h ('k') | src/string-search.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2010 the V8 project authors. All rights reserved. 1 // Copyright 2010 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 26 matching lines...) Expand all
37 //--------------------------------------------------------------------- 37 //---------------------------------------------------------------------
38 38
39 // Class holding constants and methods that apply to all string search variants, 39 // Class holding constants and methods that apply to all string search variants,
40 // independently of subject and pattern char size. 40 // independently of subject and pattern char size.
41 class StringSearchBase { 41 class StringSearchBase {
42 protected: 42 protected:
43 // Cap on the maximal shift in the Boyer-Moore implementation. By setting a 43 // Cap on the maximal shift in the Boyer-Moore implementation. By setting a
44 // limit, we can fix the size of tables. For a needle longer than this limit, 44 // limit, we can fix the size of tables. For a needle longer than this limit,
45 // search will not be optimal, since we only build tables for a suffix 45 // search will not be optimal, since we only build tables for a suffix
46 // of the string, but it is a safe approximation. 46 // of the string, but it is a safe approximation.
47 static const int kBMMaxShift = 250; 47 static const int kBMMaxShift = Isolate::kBMMaxShift;
48 48
49 // Reduce alphabet to this size. 49 // Reduce alphabet to this size.
50 // One of the tables used by Boyer-Moore and Boyer-Moore-Horspool has size 50 // One of the tables used by Boyer-Moore and Boyer-Moore-Horspool has size
51 // proportional to the input alphabet. We reduce the alphabet size by 51 // proportional to the input alphabet. We reduce the alphabet size by
52 // equating input characters modulo a smaller alphabet size. This gives 52 // equating input characters modulo a smaller alphabet size. This gives
53 // a potentially less efficient searching, but is a safe approximation. 53 // a potentially less efficient searching, but is a safe approximation.
54 // For needles using only characters in the same Unicode 256-code point page, 54 // For needles using only characters in the same Unicode 256-code point page,
55 // there is no search speed degradation. 55 // there is no search speed degradation.
56 static const int kAsciiAlphabetSize = 128; 56 static const int kAsciiAlphabetSize = 128;
57 static const int kUC16AlphabetSize = 256; 57 static const int kUC16AlphabetSize = Isolate::kUC16AlphabetSize;
58 58
59 // Bad-char shift table stored in the state. It's length is the alphabet size. 59 // Bad-char shift table stored in the state. It's length is the alphabet size.
60 // For patterns below this length, the skip length of Boyer-Moore is too short 60 // For patterns below this length, the skip length of Boyer-Moore is too short
61 // to compensate for the algorithmic overhead compared to simple brute force. 61 // to compensate for the algorithmic overhead compared to simple brute force.
62 static const int kBMMinPatternLength = 7; 62 static const int kBMMinPatternLength = 7;
63 63
64 static inline bool IsAsciiString(Vector<const char>) { 64 static inline bool IsAsciiString(Vector<const char>) {
65 return true; 65 return true;
66 } 66 }
67 67
68 static inline bool IsAsciiString(Vector<const uc16> string) { 68 static inline bool IsAsciiString(Vector<const uc16> string) {
69 return String::IsAscii(string.start(), string.length()); 69 return String::IsAscii(string.start(), string.length());
70 } 70 }
71 71
72 // The following tables are shared by all searches. 72 friend class Isolate;
73 // TODO(lrn): Introduce a way for a pattern to keep its tables
74 // between searches (e.g., for an Atom RegExp).
75
76 // Store for the BoyerMoore(Horspool) bad char shift table.
77 static int kBadCharShiftTable[kUC16AlphabetSize];
78 // Store for the BoyerMoore good suffix shift table.
79 static int kGoodSuffixShiftTable[kBMMaxShift + 1];
80 // Table used temporarily while building the BoyerMoore good suffix
81 // shift table.
82 static int kSuffixTable[kBMMaxShift + 1];
83 }; 73 };
84 74
85 75
86 template <typename PatternChar, typename SubjectChar> 76 template <typename PatternChar, typename SubjectChar>
87 class StringSearch : private StringSearchBase { 77 class StringSearch : private StringSearchBase {
88 public: 78 public:
89 explicit StringSearch(Vector<const PatternChar> pattern) 79 StringSearch(Isolate* isolate, Vector<const PatternChar> pattern)
90 : pattern_(pattern), 80 : isolate_(isolate),
81 pattern_(pattern),
91 start_(Max(0, pattern.length() - kBMMaxShift)) { 82 start_(Max(0, pattern.length() - kBMMaxShift)) {
92 if (sizeof(PatternChar) > sizeof(SubjectChar)) { 83 if (sizeof(PatternChar) > sizeof(SubjectChar)) {
93 if (!IsAsciiString(pattern_)) { 84 if (!IsAsciiString(pattern_)) {
94 strategy_ = &FailSearch; 85 strategy_ = &FailSearch;
95 return; 86 return;
96 } 87 }
97 } 88 }
98 int pattern_length = pattern_.length(); 89 int pattern_length = pattern_.length();
99 if (pattern_length < kBMMinPatternLength) { 90 if (pattern_length < kBMMinPatternLength) {
100 if (pattern_length == 1) { 91 if (pattern_length == 1) {
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
168 if (static_cast<unsigned int>(char_code) > String::kMaxAsciiCharCodeU) { 159 if (static_cast<unsigned int>(char_code) > String::kMaxAsciiCharCodeU) {
169 return -1; 160 return -1;
170 } 161 }
171 return bad_char_occurrence[static_cast<unsigned int>(char_code)]; 162 return bad_char_occurrence[static_cast<unsigned int>(char_code)];
172 } 163 }
173 // Both pattern and subject are UC16. Reduce character to equivalence class. 164 // Both pattern and subject are UC16. Reduce character to equivalence class.
174 int equiv_class = char_code % kUC16AlphabetSize; 165 int equiv_class = char_code % kUC16AlphabetSize;
175 return bad_char_occurrence[equiv_class]; 166 return bad_char_occurrence[equiv_class];
176 } 167 }
177 168
169 // The following tables are shared by all searches.
170 // TODO(lrn): Introduce a way for a pattern to keep its tables
171 // between searches (e.g., for an Atom RegExp).
172
173 // Store for the BoyerMoore(Horspool) bad char shift table.
178 // Return a table covering the last kBMMaxShift+1 positions of 174 // Return a table covering the last kBMMaxShift+1 positions of
179 // pattern. 175 // pattern.
180 int* bad_char_table() { 176 int* bad_char_table() {
181 return kBadCharShiftTable; 177 return isolate_->bad_char_shift_table();
182 } 178 }
183 179
180 // Store for the BoyerMoore good suffix shift table.
184 int* good_suffix_shift_table() { 181 int* good_suffix_shift_table() {
185 // Return biased pointer that maps the range [start_..pattern_.length() 182 // Return biased pointer that maps the range [start_..pattern_.length()
186 // to the kGoodSuffixShiftTable array. 183 // to the kGoodSuffixShiftTable array.
187 return kGoodSuffixShiftTable - start_; 184 return isolate_->good_suffix_shift_table() - start_;
188 } 185 }
189 186
187 // Table used temporarily while building the BoyerMoore good suffix
188 // shift table.
190 int* suffix_table() { 189 int* suffix_table() {
191 // Return biased pointer that maps the range [start_..pattern_.length() 190 // Return biased pointer that maps the range [start_..pattern_.length()
192 // to the kSuffixTable array. 191 // to the kSuffixTable array.
193 return kSuffixTable - start_; 192 return isolate_->suffix_table() - start_;
194 } 193 }
195 194
195 Isolate* isolate_;
196 // The pattern to search for. 196 // The pattern to search for.
197 Vector<const PatternChar> pattern_; 197 Vector<const PatternChar> pattern_;
198 // Pointer to implementation of the search. 198 // Pointer to implementation of the search.
199 SearchFunction strategy_; 199 SearchFunction strategy_;
200 // Cache value of Max(0, pattern_length() - kBMMaxShift) 200 // Cache value of Max(0, pattern_length() - kBMMaxShift)
201 int start_; 201 int start_;
202 }; 202 };
203 203
204 204
205 //--------------------------------------------------------------------- 205 //---------------------------------------------------------------------
(...skipping 342 matching lines...) Expand 10 before | Expand all | Expand 10 after
548 } 548 }
549 return -1; 549 return -1;
550 } 550 }
551 551
552 552
553 // Perform a a single stand-alone search. 553 // Perform a a single stand-alone search.
554 // If searching multiple times for the same pattern, a search 554 // If searching multiple times for the same pattern, a search
555 // object should be constructed once and the Search function then called 555 // object should be constructed once and the Search function then called
556 // for each search. 556 // for each search.
557 template <typename SubjectChar, typename PatternChar> 557 template <typename SubjectChar, typename PatternChar>
558 static int SearchString(Vector<const SubjectChar> subject, 558 static int SearchString(Isolate* isolate,
559 Vector<const SubjectChar> subject,
559 Vector<const PatternChar> pattern, 560 Vector<const PatternChar> pattern,
560 int start_index) { 561 int start_index) {
561 StringSearch<PatternChar, SubjectChar> search(pattern); 562 StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
562 return search.Search(subject, start_index); 563 return search.Search(subject, start_index);
563 } 564 }
564 565
565 }} // namespace v8::internal 566 }} // namespace v8::internal
566 567
567 #endif // V8_STRING_SEARCH_H_ 568 #endif // V8_STRING_SEARCH_H_
OLDNEW
« no previous file with comments | « src/spaces-inl.h ('k') | src/string-search.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698