OLD | NEW |
---|---|
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 12 matching lines...) Expand all Loading... | |
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
27 | 27 |
28 #ifndef V8_JSREGEXP_H_ | 28 #ifndef V8_JSREGEXP_H_ |
29 #define V8_JSREGEXP_H_ | 29 #define V8_JSREGEXP_H_ |
30 | 30 |
31 namespace v8 { namespace internal { | 31 namespace v8 { namespace internal { |
32 | 32 |
33 | |
34 class RegExpMacroAssembler; | |
35 | |
36 | |
33 class RegExpImpl { | 37 class RegExpImpl { |
34 public: | 38 public: |
35 // Creates a regular expression literal in the old space. | 39 // Creates a regular expression literal in the old space. |
36 // This function calls the garbage collector if necessary. | 40 // This function calls the garbage collector if necessary. |
37 static Handle<Object> CreateRegExpLiteral(Handle<JSFunction> constructor, | 41 static Handle<Object> CreateRegExpLiteral(Handle<JSFunction> constructor, |
38 Handle<String> pattern, | 42 Handle<String> pattern, |
39 Handle<String> flags, | 43 Handle<String> flags, |
40 bool* has_pending_exception); | 44 bool* has_pending_exception); |
41 | 45 |
42 // Returns a string representation of a regular expression. | 46 // Returns a string representation of a regular expression. |
(...skipping 11 matching lines...) Expand all Loading... | |
54 static Handle<Object> Exec(Handle<JSRegExp> regexp, | 58 static Handle<Object> Exec(Handle<JSRegExp> regexp, |
55 Handle<String> subject, | 59 Handle<String> subject, |
56 Handle<Object> index); | 60 Handle<Object> index); |
57 | 61 |
58 // Call RegExp.prototyp.exec(string) in a loop. | 62 // Call RegExp.prototyp.exec(string) in a loop. |
59 // Used by String.prototype.match and String.prototype.replace. | 63 // Used by String.prototype.match and String.prototype.replace. |
60 // This function calls the garbage collector if necessary. | 64 // This function calls the garbage collector if necessary. |
61 static Handle<Object> ExecGlobal(Handle<JSRegExp> regexp, | 65 static Handle<Object> ExecGlobal(Handle<JSRegExp> regexp, |
62 Handle<String> subject); | 66 Handle<String> subject); |
63 | 67 |
68 // Stores an uncompiled RegExp pattern in the JSRegExp object. | |
69 // It will be compiled by JSCRE when first executed. | |
70 static Handle<Object> JscrePrepare(Handle<JSRegExp> re, | |
71 Handle<String> pattern, | |
72 JSRegExp::Flags flags); | |
73 | |
74 // Stores a compiled RegExp pattern in the JSRegExp object. | |
75 // The pattern is compiled by Irregexp. | |
76 static Handle<Object> IrregexpPrepare(Handle<JSRegExp> re, | |
77 Handle<String> pattern, | |
78 JSRegExp::Flags flags, | |
79 Handle<FixedArray> irregexp_data); | |
80 | |
81 | |
82 // Compile the pattern using JSCRE and store the result in the | |
83 // JSRegExp object. | |
84 static Handle<Object> JscreCompile(Handle<JSRegExp> re); | |
85 | |
64 static Handle<Object> AtomCompile(Handle<JSRegExp> re, | 86 static Handle<Object> AtomCompile(Handle<JSRegExp> re, |
65 Handle<String> pattern, | 87 Handle<String> pattern, |
66 JSRegExp::Flags flags); | 88 JSRegExp::Flags flags, |
67 | 89 Handle<String> match_pattern); |
68 static Handle<Object> AtomExec(Handle<JSRegExp> regexp, | 90 static Handle<Object> AtomExec(Handle<JSRegExp> regexp, |
69 Handle<String> subject, | 91 Handle<String> subject, |
70 Handle<Object> index); | 92 Handle<Object> index); |
71 | 93 |
72 static Handle<Object> AtomExecGlobal(Handle<JSRegExp> regexp, | 94 static Handle<Object> AtomExecGlobal(Handle<JSRegExp> regexp, |
73 Handle<String> subject); | 95 Handle<String> subject); |
74 | 96 |
75 static Handle<Object> JsreCompile(Handle<JSRegExp> re, | 97 static Handle<Object> JscreCompile(Handle<JSRegExp> re, |
76 Handle<String> pattern, | 98 Handle<String> pattern, |
77 JSRegExp::Flags flags); | 99 JSRegExp::Flags flags); |
78 | 100 |
79 static Handle<Object> JsreExec(Handle<JSRegExp> regexp, | 101 // Execute a compiled JSCRE pattern. |
80 Handle<String> subject, | 102 static Handle<Object> JscreExec(Handle<JSRegExp> regexp, |
81 Handle<Object> index); | 103 Handle<String> subject, |
104 Handle<Object> index); | |
82 | 105 |
83 static Handle<Object> JsreExecGlobal(Handle<JSRegExp> regexp, | 106 // Execute an Irregexp bytecode pattern. |
84 Handle<String> subject); | 107 static Handle<Object> IrregexpExec(Handle<JSRegExp> regexp, |
108 Handle<String> subject, | |
109 Handle<Object> index); | |
110 | |
111 static Handle<Object> JscreExecGlobal(Handle<JSRegExp> regexp, | |
112 Handle<String> subject); | |
113 | |
114 static Handle<Object> IrregexpExecGlobal(Handle<JSRegExp> regexp, | |
115 Handle<String> subject); | |
85 | 116 |
86 static void NewSpaceCollectionPrologue(); | 117 static void NewSpaceCollectionPrologue(); |
87 static void OldSpaceCollectionPrologue(); | 118 static void OldSpaceCollectionPrologue(); |
88 | 119 |
89 private: | |
90 // Converts a source string to a 16 bit flat string. The string | 120 // Converts a source string to a 16 bit flat string. The string |
91 // will be either sequential or it will be a SlicedString backed | 121 // will be either sequential or it will be a SlicedString backed |
92 // by a flat string. | 122 // by a flat string. |
93 static Handle<String> StringToTwoByte(Handle<String> pattern); | 123 static Handle<String> StringToTwoByte(Handle<String> pattern); |
94 static Handle<String> CachedStringToTwoByte(Handle<String> pattern); | 124 static Handle<String> CachedStringToTwoByte(Handle<String> pattern); |
95 | 125 |
126 static const int kIrregexpImplementationIndex = 0; | |
127 static const int kIrregexpNumberOfCapturesIndex = 1; | |
128 static const int kIrregexpNumberOfRegistersIndex = 2; | |
129 static const int kIrregexpCodeIndex = 3; | |
130 static const int kIrregexpDataLength = 4; | |
131 | |
132 static const int kJscreNumberOfCapturesIndex = 0; | |
133 static const int kJscreInternalIndex = 1; | |
134 static const int kJscreDataLength = 2; | |
135 | |
136 private: | |
96 static String* last_ascii_string_; | 137 static String* last_ascii_string_; |
97 static String* two_byte_cached_string_; | 138 static String* two_byte_cached_string_; |
98 | 139 |
99 // Returns the caputure from the re. | 140 static int JscreNumberOfCaptures(Handle<JSRegExp> re); |
100 static int JsreCapture(Handle<JSRegExp> re); | 141 static ByteArray* JscreInternal(Handle<JSRegExp> re); |
101 static ByteArray* JsreInternal(Handle<JSRegExp> re); | 142 |
143 static int IrregexpNumberOfCaptures(Handle<JSRegExp> re); | |
144 static int IrregexpNumberOfRegisters(Handle<JSRegExp> re); | |
145 static Handle<ByteArray> IrregexpCode(Handle<JSRegExp> re); | |
102 | 146 |
103 // Call jsRegExpExecute once | 147 // Call jsRegExpExecute once |
104 static Handle<Object> JsreExecOnce(Handle<JSRegExp> regexp, | 148 static Handle<Object> JscreExecOnce(Handle<JSRegExp> regexp, |
105 int num_captures, | 149 int num_captures, |
106 Handle<String> subject, | 150 Handle<String> subject, |
107 int previous_index, | 151 int previous_index, |
108 const uc16* utf8_subject, | 152 const uc16* utf8_subject, |
109 int* ovector, | 153 int* ovector, |
110 int ovector_length); | 154 int ovector_length); |
155 | |
156 static Handle<Object> IrregexpExecOnce(Handle<JSRegExp> regexp, | |
157 int num_captures, | |
158 Handle<String> subject16, | |
159 int previous_index, | |
160 int* ovector, | |
161 int ovector_length); | |
111 | 162 |
112 // Set the subject cache. The previous string buffer is not deleted, so the | 163 // Set the subject cache. The previous string buffer is not deleted, so the |
113 // caller should ensure that it doesn't leak. | 164 // caller should ensure that it doesn't leak. |
114 static void SetSubjectCache(String* subject, char* utf8_subject, | 165 static void SetSubjectCache(String* subject, |
115 int uft8_length, int character_position, | 166 char* utf8_subject, |
167 int uft8_length, | |
168 int character_position, | |
116 int utf8_position); | 169 int utf8_position); |
117 | 170 |
118 // A one element cache of the last utf8_subject string and its length. The | 171 // A one element cache of the last utf8_subject string and its length. The |
119 // subject JS String object is cached in the heap. We also cache a | 172 // subject JS String object is cached in the heap. We also cache a |
120 // translation between position and utf8 position. | 173 // translation between position and utf8 position. |
121 static char* utf8_subject_cache_; | 174 static char* utf8_subject_cache_; |
122 static int utf8_length_cache_; | 175 static int utf8_length_cache_; |
123 static int utf8_position_; | 176 static int utf8_position_; |
124 static int character_position_; | 177 static int character_position_; |
125 }; | 178 }; |
126 | 179 |
127 | 180 |
181 class CharacterRange { | |
iposva
2008/11/26 06:56:22
Please add comments about this class.
| |
182 public: | |
183 CharacterRange() : from_(0), to_(0) { } | |
184 // For compatibility with the CHECK_OK macro | |
185 CharacterRange(void* null) { ASSERT_EQ(NULL, null); } //NOLINT | |
186 CharacterRange(uc16 from, uc16 to) | |
187 : from_(from), | |
Mads Ager (chromium)
2008/11/25 21:09:41
Four space indent. This migth all fit on one line
Christian Plesner Hansen
2008/11/26 06:49:56
Fixed.
| |
188 to_(to) { | |
189 } | |
190 static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges); | |
191 static inline CharacterRange Singleton(uc16 value) { | |
192 return CharacterRange(value, value); | |
193 } | |
194 static inline CharacterRange Range(uc16 from, uc16 to) { | |
195 ASSERT(from <= to); | |
196 return CharacterRange(from, to); | |
197 } | |
198 static inline CharacterRange Everything() { | |
199 return CharacterRange(0, 0xFFFF); | |
200 } | |
201 bool Contains(uc16 i) { return from_ <= i && i <= to_; } | |
202 uc16 from() const { return from_; } | |
203 void set_from(uc16 value) { from_ = value; } | |
204 uc16 to() const { return to_; } | |
205 void set_to(uc16 value) { to_ = value; } | |
206 bool is_valid() { return from_ <= to_; } | |
207 bool IsSingleton() { return (from_ == to_); } | |
208 void AddCaseEquivalents(ZoneList<CharacterRange>* ranges); | |
209 static const int kRangeCanonicalizeMax = 0x200; | |
iposva
2008/11/26 06:56:22
Declaration order: constants before constructors.
Christian Plesner Hansen
2008/11/26 07:49:51
We've never followed that rule. In fact, as far a
| |
210 static const int kStartMarker = (1 << 24); | |
211 static const int kPayloadMask = (1 << 24) - 1; | |
212 private: | |
iposva
2008/11/26 06:56:22
Please leave a blank line before "private:" in acc
Christian Plesner Hansen
2008/11/26 07:49:51
Again, that's not a rule we've been consistent abo
| |
213 uc16 from_; | |
214 uc16 to_; | |
215 }; | |
216 | |
217 | |
218 template <typename Node, class Callback> | |
219 static void DoForEach(Node* node, Callback* callback); | |
220 | |
221 | |
222 // A zone splay tree. The config type parameter encapsulates the | |
223 // different configurations of a concrete splay tree: | |
224 // | |
225 // typedef Key: the key type | |
226 // typedef Value: the value type | |
227 // static const kNoKey: the dummy key used when no key is set | |
228 // static const kNoValue: the dummy value used to initialize nodes | |
229 // int (Compare)(Key& a, Key& b) -> {-1, 0, 1}: comparison function | |
230 // | |
231 template <typename Config> | |
232 class ZoneSplayTree : public ZoneObject { | |
233 public: | |
234 typedef typename Config::Key Key; | |
235 typedef typename Config::Value Value; | |
236 | |
237 class Locator; | |
238 | |
239 ZoneSplayTree() : root_(NULL) { } | |
240 | |
241 // Inserts the given key in this tree with the given value. Returns | |
242 // true if a node was inserted, otherwise false. If found the locator | |
243 // is enabled and provides access to the mapping for the key. | |
244 bool Insert(const Key& key, Locator* locator); | |
245 | |
246 // Looks up the key in this tree and returns true if it was found, | |
247 // otherwise false. If the node is found the locator is enabled and | |
248 // provides access to the mapping for the key. | |
249 bool Find(const Key& key, Locator* locator); | |
250 | |
251 // Finds the mapping with the greatest key less than or equal to the | |
252 // given key. | |
253 bool FindGreatestLessThan(const Key& key, Locator* locator); | |
254 | |
255 // Find the mapping with the greatest key in this tree. | |
256 bool FindGreatest(Locator* locator); | |
257 | |
258 // Finds the mapping with the least key greater than or equal to the | |
259 // given key. | |
260 bool FindLeastGreaterThan(const Key& key, Locator* locator); | |
261 | |
262 // Find the mapping with the least key in this tree. | |
263 bool FindLeast(Locator* locator); | |
264 | |
265 // Remove the node with the given key from the tree. | |
266 bool Remove(const Key& key); | |
267 | |
268 bool is_empty() { return root_ == NULL; } | |
269 | |
270 // Perform the splay operation for the given key. Moves the node with | |
271 // the given key to the top of the tree. If no node has the given | |
272 // key, the last node on the search path is moved to the top of the | |
273 // tree. | |
274 void Splay(const Key& key); | |
275 | |
276 class Node : public ZoneObject { | |
277 public: | |
278 Node(const Key& key, const Value& value) | |
279 : key_(key), | |
Mads Ager (chromium)
2008/11/25 21:09:41
Four space indent. Many more occurrences below.
Christian Plesner Hansen
2008/11/26 06:49:56
Fixed.
| |
280 value_(value), | |
281 left_(NULL), | |
282 right_(NULL) { } | |
283 Key key() { return key_; } | |
Mads Ager (chromium)
2008/11/25 21:09:41
Indentation is off for all the accessors.
Christian Plesner Hansen
2008/11/26 06:49:56
Fixed. I wonder why lint didn't catch this?
| |
284 Value value() { return value_; } | |
285 Node* left() { return left_; } | |
286 Node* right() { return right_; } | |
287 private: | |
288 friend class ZoneSplayTree; | |
289 friend class Locator; | |
290 Key key_; | |
291 Value value_; | |
292 Node* left_; | |
293 Node* right_; | |
294 }; | |
295 | |
296 // A locator provides access to a node in the tree without actually | |
297 // exposing the node. | |
298 class Locator { | |
299 public: | |
300 explicit Locator(Node* node) : node_(node) { } | |
301 Locator() : node_(NULL) { } | |
302 const Key& key() { return node_->key_; } | |
303 Value& value() { return node_->value_; } | |
304 void set_value(const Value& value) { node_->value_ = value; } | |
305 inline void bind(Node* node) { node_ = node; } | |
306 private: | |
307 Node* node_; | |
308 }; | |
309 | |
310 template <class Callback> | |
311 void ForEach(Callback* c) { | |
312 DoForEach<typename ZoneSplayTree<Config>::Node, Callback>(root_, c); | |
313 } | |
314 | |
315 private: | |
316 Node* root_; | |
317 }; | |
318 | |
319 | |
320 // A set of unsigned integers that behaves especially well on small | |
321 // integers (< 32). May do zone-allocation. | |
322 class OutSet: public ZoneObject { | |
323 public: | |
324 OutSet() : first_(0), remaining_(NULL), successors_(NULL) { } | |
325 OutSet* Extend(unsigned value); | |
326 bool Get(unsigned value); | |
327 static const unsigned kFirstLimit = 32; | |
328 private: | |
Mads Ager (chromium)
2008/11/25 21:09:41
Have the empty line before private instead of afte
Christian Plesner Hansen
2008/11/26 06:49:56
Fixed.
| |
329 | |
330 // Destructively set a value in this set. In most cases you want | |
331 // to use Extend instead to ensure that only one instance exists | |
332 // that contains the same values. | |
333 void Set(unsigned value); | |
334 | |
335 // The successors are a list of sets that contain the same values | |
336 // as this set and the one more value that is not present in this | |
337 // set. | |
338 ZoneList<OutSet*>* successors() { return successors_; } | |
339 | |
340 OutSet(uint32_t first, ZoneList<unsigned>* remaining) | |
341 : first_(first), remaining_(remaining), successors_(NULL) { } | |
342 uint32_t first_; | |
343 ZoneList<unsigned>* remaining_; | |
344 ZoneList<OutSet*>* successors_; | |
345 }; | |
346 | |
347 | |
348 // A mapping from integers, specified as ranges, to a set of integers. | |
349 // Used for mapping character ranges to choices. | |
350 class DispatchTable { | |
351 public: | |
352 class Entry { | |
353 public: | |
354 Entry() | |
355 : from_(0), to_(0), out_set_(NULL) { } | |
Mads Ager (chromium)
2008/11/25 21:09:41
Put this on one line.
Christian Plesner Hansen
2008/11/26 06:49:56
Fixed.
| |
356 Entry(uc16 from, uc16 to, OutSet* out_set) | |
357 : from_(from), to_(to), out_set_(out_set) { } | |
358 uc16 from() { return from_; } | |
359 uc16 to() { return to_; } | |
360 void set_to(uc16 value) { to_ = value; } | |
361 void AddValue(int value) { out_set_ = out_set_->Extend(value); } | |
362 OutSet* out_set() { return out_set_; } | |
363 private: | |
364 uc16 from_; | |
365 uc16 to_; | |
366 OutSet* out_set_; | |
367 }; | |
368 | |
369 class Config { | |
370 public: | |
371 typedef uc16 Key; | |
372 typedef Entry Value; | |
373 static const uc16 kNoKey; | |
374 static const Entry kNoValue; | |
375 static inline int Compare(uc16 a, uc16 b) { | |
376 if (a == b) | |
Mads Ager (chromium)
2008/11/25 21:09:41
How about:
if (a == b) return 0;
if (a < b) retur
Christian Plesner Hansen
2008/11/26 06:49:56
That's a religious question: should there be an 'e
| |
377 return 0; | |
378 else if (a < b) | |
379 return -1; | |
380 else | |
381 return 1; | |
382 } | |
383 }; | |
384 | |
385 void AddRange(CharacterRange range, int value); | |
386 OutSet* Get(uc16 value); | |
387 void Dump(); | |
388 | |
389 template <typename Callback> | |
390 void ForEach(Callback* callback) { return tree()->ForEach(callback); } | |
391 private: | |
392 // There can't be a static empty set since it allocates its | |
393 // successors in a zone and caches them. | |
394 OutSet* empty() { return &empty_; } | |
395 OutSet empty_; | |
396 ZoneSplayTree<Config>* tree() { return &tree_; } | |
397 ZoneSplayTree<Config> tree_; | |
398 }; | |
399 | |
400 | |
401 #define FOR_EACH_NODE_TYPE(VISIT) \ | |
402 VISIT(End) \ | |
403 VISIT(Action) \ | |
404 VISIT(Choice) \ | |
405 VISIT(BackReference) \ | |
406 VISIT(Text) | |
407 | |
408 | |
409 #define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \ | |
410 VISIT(Disjunction) \ | |
411 VISIT(Alternative) \ | |
412 VISIT(Assertion) \ | |
413 VISIT(CharacterClass) \ | |
414 VISIT(Atom) \ | |
415 VISIT(Quantifier) \ | |
416 VISIT(Capture) \ | |
417 VISIT(Lookahead) \ | |
418 VISIT(BackReference) \ | |
419 VISIT(Empty) \ | |
420 VISIT(Text) | |
421 | |
422 | |
423 #define FORWARD_DECLARE(Name) class RegExp##Name; | |
424 FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE) | |
425 #undef FORWARD_DECLARE | |
426 | |
427 | |
428 class TextElement { | |
429 public: | |
430 enum Type {UNINITIALIZED, ATOM, CHAR_CLASS}; | |
431 TextElement() : type(UNINITIALIZED) { } | |
432 explicit TextElement(Type t) : type(t) { } | |
433 static TextElement Atom(RegExpAtom* atom); | |
434 static TextElement CharClass(RegExpCharacterClass* char_class); | |
435 Type type; | |
436 union { | |
437 RegExpAtom* u_atom; | |
438 RegExpCharacterClass* u_char_class; | |
439 } data; | |
440 }; | |
441 | |
442 | |
443 struct NodeInfo { | |
444 NodeInfo() | |
445 : being_analyzed(false), | |
446 been_analyzed(false), | |
447 determine_word(false), | |
448 determine_newline(false), | |
449 determine_start(false), | |
450 follows_word_interest(false), | |
451 follows_newline_interest(false), | |
452 follows_start_interest(false) { } | |
453 bool SameInterests(NodeInfo* that) { | |
454 return (follows_word_interest == that->follows_word_interest) | |
455 && (follows_newline_interest == that->follows_newline_interest) | |
456 && (follows_start_interest == that->follows_start_interest); | |
457 } | |
458 void AdoptInterests(NodeInfo* that) { | |
459 follows_word_interest = that->follows_word_interest; | |
460 follows_newline_interest = that->follows_newline_interest; | |
461 follows_start_interest = that->follows_start_interest; | |
462 } | |
463 bool prev_determine_word() { | |
464 return determine_word || follows_word_interest; | |
465 } | |
466 bool prev_determine_newline() { | |
467 return determine_newline || follows_newline_interest; | |
468 } | |
469 bool prev_determine_start() { | |
470 return determine_start || follows_start_interest; | |
471 } | |
472 bool being_analyzed: 1; | |
473 bool been_analyzed: 1; | |
474 bool determine_word: 1; | |
475 bool determine_newline: 1; | |
476 bool determine_start: 1; | |
477 bool follows_word_interest: 1; | |
478 bool follows_newline_interest: 1; | |
479 bool follows_start_interest: 1; | |
480 }; | |
481 | |
482 | |
483 STATIC_CHECK(sizeof(NodeInfo) <= sizeof(int)); // NOLINT | |
484 | |
485 | |
486 class SiblingList { | |
487 public: | |
488 SiblingList() : list_(NULL) { } | |
489 int length() { | |
490 return list_ == NULL ? 0 : list_->length(); | |
491 } | |
492 void Ensure(RegExpNode* parent) { | |
493 if (list_ == NULL) { | |
494 list_ = new ZoneList<RegExpNode*>(2); | |
495 list_->Add(parent); | |
496 } | |
497 } | |
498 void Add(RegExpNode* node) { list_->Add(node); } | |
499 RegExpNode* Get(int index) { return list_->at(index); } | |
500 private: | |
501 ZoneList<RegExpNode*>* list_; | |
502 }; | |
503 | |
504 | |
505 class RegExpNode: public ZoneObject { | |
506 public: | |
507 virtual ~RegExpNode() { } | |
508 virtual void Accept(NodeVisitor* visitor) = 0; | |
509 // Generates a goto to this node or actually generates the code at this point. | |
510 // Until the implementation is complete we will return true for success and | |
511 // false for failure. | |
512 virtual bool GoTo(RegExpCompiler* compiler); | |
513 Label* label(); | |
514 | |
515 // Until the implementation is complete we will return true for success and | |
516 // false for failure. | |
517 virtual bool Emit(RegExpCompiler* compiler) = 0; | |
518 virtual RegExpNode* PropagateInterest(NodeInfo* info) = 0; | |
519 NodeInfo* info() { return &info_; } | |
520 virtual bool IsBacktrack() { return false; } | |
521 RegExpNode* GetSibling(NodeInfo* info); | |
522 void EnsureSiblings() { siblings_.Ensure(this); } | |
523 void AddSibling(RegExpNode* node) { siblings_.Add(node); } | |
524 protected: | |
525 inline void Bind(RegExpMacroAssembler* macro); | |
526 private: | |
527 Label label_; | |
528 NodeInfo info_; | |
529 SiblingList siblings_; | |
530 }; | |
531 | |
532 | |
533 class SeqRegExpNode: public RegExpNode { | |
534 public: | |
535 explicit SeqRegExpNode(RegExpNode* on_success) | |
536 : on_success_(on_success) { } | |
537 RegExpNode* on_success() { return on_success_; } | |
538 void set_on_success(RegExpNode* node) { on_success_ = node; } | |
539 virtual bool Emit(RegExpCompiler* compiler) { return false; } | |
540 private: | |
541 RegExpNode* on_success_; | |
542 }; | |
543 | |
544 | |
545 class ActionNode: public SeqRegExpNode { | |
546 public: | |
547 enum Type { | |
548 STORE_REGISTER, | |
549 INCREMENT_REGISTER, | |
550 STORE_POSITION, | |
551 SAVE_POSITION, | |
552 RESTORE_POSITION, | |
553 BEGIN_SUBMATCH, | |
554 ESCAPE_SUBMATCH | |
555 }; | |
556 static ActionNode* StoreRegister(int reg, int val, RegExpNode* on_success); | |
557 static ActionNode* IncrementRegister(int reg, RegExpNode* on_success); | |
558 static ActionNode* StorePosition(int reg, RegExpNode* on_success); | |
559 static ActionNode* SavePosition(int reg, RegExpNode* on_success); | |
560 static ActionNode* RestorePosition(int reg, RegExpNode* on_success); | |
561 static ActionNode* BeginSubmatch(int reg, RegExpNode* on_success); | |
562 static ActionNode* EscapeSubmatch(int reg, RegExpNode* on_success); | |
563 virtual void Accept(NodeVisitor* visitor); | |
564 virtual bool Emit(RegExpCompiler* compiler); | |
565 virtual RegExpNode* PropagateInterest(NodeInfo* info); | |
566 private: | |
567 union { | |
568 struct { | |
569 int reg; | |
570 int value; | |
571 } u_store_register; | |
572 struct { | |
573 int reg; | |
574 } u_increment_register; | |
575 struct { | |
576 int reg; | |
577 } u_position_register; | |
578 struct { | |
579 int reg; | |
580 } u_submatch_stack_pointer_register; | |
581 } data_; | |
582 ActionNode(Type type, RegExpNode* on_success) | |
583 : SeqRegExpNode(on_success), | |
584 type_(type) { } | |
585 Type type_; | |
586 friend class DotPrinter; | |
587 }; | |
588 | |
589 | |
590 class TextNode: public SeqRegExpNode { | |
591 public: | |
592 TextNode(ZoneList<TextElement>* elms, | |
593 RegExpNode* on_success, | |
594 RegExpNode* on_failure) | |
595 : SeqRegExpNode(on_success), | |
596 on_failure_(on_failure), | |
597 elms_(elms) { } | |
598 virtual void Accept(NodeVisitor* visitor); | |
599 virtual RegExpNode* PropagateInterest(NodeInfo* info); | |
600 RegExpNode* on_failure() { return on_failure_; } | |
601 virtual bool Emit(RegExpCompiler* compiler); | |
602 ZoneList<TextElement>* elements() { return elms_; } | |
603 private: | |
604 RegExpNode* on_failure_; | |
605 ZoneList<TextElement>* elms_; | |
606 }; | |
607 | |
608 | |
609 class BackReferenceNode: public SeqRegExpNode { | |
610 public: | |
611 BackReferenceNode(int start_reg, | |
612 int end_reg, | |
613 RegExpNode* on_success, | |
614 RegExpNode* on_failure) | |
615 : SeqRegExpNode(on_success), | |
616 on_failure_(on_failure), | |
617 start_reg_(start_reg), | |
618 end_reg_(end_reg) { } | |
619 virtual void Accept(NodeVisitor* visitor); | |
620 RegExpNode* on_failure() { return on_failure_; } | |
621 int start_register() { return start_reg_; } | |
622 int end_register() { return end_reg_; } | |
623 virtual bool Emit(RegExpCompiler* compiler); | |
624 virtual RegExpNode* PropagateInterest(NodeInfo* info); | |
625 private: | |
626 RegExpNode* on_failure_; | |
627 int start_reg_; | |
628 int end_reg_; | |
629 }; | |
630 | |
631 | |
632 class EndNode: public RegExpNode { | |
633 public: | |
634 enum Action { ACCEPT, BACKTRACK }; | |
635 explicit EndNode(Action action) : action_(action) { } | |
636 virtual void Accept(NodeVisitor* visitor); | |
637 virtual bool Emit(RegExpCompiler* compiler); | |
638 virtual RegExpNode* PropagateInterest(NodeInfo* info); | |
639 virtual bool IsBacktrack() { return action_ == BACKTRACK; } | |
640 virtual bool GoTo(RegExpCompiler* compiler); | |
641 private: | |
642 Action action_; | |
643 }; | |
644 | |
645 | |
646 class Guard: public ZoneObject { | |
647 public: | |
648 enum Relation { LT, GEQ }; | |
649 Guard(int reg, Relation op, int value) | |
650 : reg_(reg), | |
651 op_(op), | |
652 value_(value) { } | |
653 int reg() { return reg_; } | |
654 Relation op() { return op_; } | |
655 int value() { return value_; } | |
656 private: | |
657 int reg_; | |
658 Relation op_; | |
659 int value_; | |
660 }; | |
661 | |
662 | |
663 class GuardedAlternative { | |
664 public: | |
665 explicit GuardedAlternative(RegExpNode* node) : node_(node), guards_(NULL) { } | |
666 void AddGuard(Guard* guard); | |
667 RegExpNode* node() { return node_; } | |
668 void set_node(RegExpNode* node) { node_ = node; } | |
669 ZoneList<Guard*>* guards() { return guards_; } | |
670 private: | |
671 RegExpNode* node_; | |
672 ZoneList<Guard*>* guards_; | |
673 }; | |
674 | |
675 | |
676 class ChoiceNode: public RegExpNode { | |
677 public: | |
678 explicit ChoiceNode(int expected_size, RegExpNode* on_failure) | |
679 : on_failure_(on_failure), | |
680 alternatives_(new ZoneList<GuardedAlternative>(expected_size)), | |
681 table_calculated_(false), | |
682 being_calculated_(false) { } | |
683 virtual void Accept(NodeVisitor* visitor); | |
684 void AddAlternative(GuardedAlternative node) { alternatives()->Add(node); } | |
685 ZoneList<GuardedAlternative>* alternatives() { return alternatives_; } | |
686 DispatchTable* table() { return &table_; } | |
687 RegExpNode* on_failure() { return on_failure_; } | |
688 virtual bool Emit(RegExpCompiler* compiler); | |
689 virtual RegExpNode* PropagateInterest(NodeInfo* info); | |
690 bool table_calculated() { return table_calculated_; } | |
691 void set_table_calculated(bool b) { table_calculated_ = b; } | |
692 bool being_calculated() { return being_calculated_; } | |
693 void set_being_calculated(bool b) { being_calculated_ = b; } | |
694 private: | |
695 void GenerateGuard(RegExpMacroAssembler* macro_assembler, | |
696 Guard *guard, | |
Erik Corry
2008/11/25 11:03:52
Misplaced asterisk.
Mads Ager (chromium)
2008/11/25 21:09:41
This one is still there.
| |
697 Label* on_failure); | |
698 RegExpNode* on_failure_; | |
699 ZoneList<GuardedAlternative>* alternatives_; | |
700 DispatchTable table_; | |
701 bool table_calculated_; | |
702 bool being_calculated_; | |
703 }; | |
704 | |
705 | |
706 class NodeVisitor { | |
707 public: | |
708 virtual ~NodeVisitor() { } | |
709 #define DECLARE_VISIT(Type) \ | |
710 virtual void Visit##Type(Type##Node* that) = 0; | |
711 FOR_EACH_NODE_TYPE(DECLARE_VISIT) | |
712 #undef DECLARE_VISIT | |
713 }; | |
714 | |
715 | |
716 // Node visitor used to add the start set of the alternatives to the | |
717 // dispatch table of a choice node. | |
718 class DispatchTableConstructor: public NodeVisitor { | |
719 public: | |
720 explicit DispatchTableConstructor(DispatchTable* table) | |
721 : table_(table), | |
722 choice_index_(-1) { } | |
723 | |
724 void BuildTable(ChoiceNode* node); | |
725 | |
726 void AddRange(CharacterRange range) { | |
727 table()->AddRange(range, choice_index_); | |
728 } | |
729 | |
730 void AddInverse(ZoneList<CharacterRange>* ranges); | |
731 | |
732 #define DECLARE_VISIT(Type) \ | |
733 virtual void Visit##Type(Type##Node* that); | |
734 FOR_EACH_NODE_TYPE(DECLARE_VISIT) | |
735 #undef DECLARE_VISIT | |
736 | |
737 DispatchTable* table() { return table_; } | |
738 void set_choice_index(int value) { choice_index_ = value; } | |
739 | |
740 protected: | |
741 DispatchTable *table_; | |
Erik Corry
2008/11/25 11:03:52
Misplaced asterisk.
Mads Ager (chromium)
2008/11/25 21:09:41
This is still there.
| |
742 int choice_index_; | |
743 }; | |
744 | |
745 | |
746 class Analysis: public NodeVisitor { | |
747 public: | |
748 void EnsureAnalyzed(RegExpNode* node); | |
749 | |
750 #define DECLARE_VISIT(Type) \ | |
751 virtual void Visit##Type(Type##Node* that); | |
752 FOR_EACH_NODE_TYPE(DECLARE_VISIT) | |
753 #undef DECLARE_VISIT | |
754 }; | |
755 | |
756 | |
757 struct RegExpParseResult { | |
758 RegExpTree* tree; | |
759 bool has_character_escapes; | |
760 Handle<String> error; | |
761 int capture_count; | |
762 }; | |
763 | |
764 | |
765 class RegExpEngine: public AllStatic { | |
766 public: | |
767 static Handle<FixedArray> Compile(RegExpParseResult* input, | |
768 RegExpNode** node_return, | |
769 bool ignore_case); | |
770 static void DotPrint(const char* label, RegExpNode* node); | |
771 }; | |
772 | |
773 | |
128 } } // namespace v8::internal | 774 } } // namespace v8::internal |
129 | 775 |
130 #endif // V8_JSREGEXP_H_ | 776 #endif // V8_JSREGEXP_H_ |
OLD | NEW |