Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(142)

Side by Side Diff: src/jsregexp.h

Issue 11000: Periodic merge of bleeding_edge to experimental code generator branch.... (Closed) Base URL: http://v8.googlecode.com/svn/branches/experimental/toiger/
Patch Set: Created 12 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/heap.cc ('k') | src/jsregexp.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 12 matching lines...) Expand all
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 27
28 #ifndef V8_JSREGEXP_H_ 28 #ifndef V8_JSREGEXP_H_
29 #define V8_JSREGEXP_H_ 29 #define V8_JSREGEXP_H_
30 30
31 namespace v8 { namespace internal { 31 namespace v8 { namespace internal {
32 32
33
34 class RegExpMacroAssembler;
35
36
33 class RegExpImpl { 37 class RegExpImpl {
34 public: 38 public:
35 // Creates a regular expression literal in the old space. 39 // Creates a regular expression literal in the old space.
36 // This function calls the garbage collector if necessary. 40 // This function calls the garbage collector if necessary.
37 static Handle<Object> CreateRegExpLiteral(Handle<JSFunction> constructor, 41 static Handle<Object> CreateRegExpLiteral(Handle<JSFunction> constructor,
38 Handle<String> pattern, 42 Handle<String> pattern,
39 Handle<String> flags, 43 Handle<String> flags,
40 bool* has_pending_exception); 44 bool* has_pending_exception);
41 45
42 // Returns a string representation of a regular expression. 46 // Returns a string representation of a regular expression.
(...skipping 11 matching lines...) Expand all
54 static Handle<Object> Exec(Handle<JSRegExp> regexp, 58 static Handle<Object> Exec(Handle<JSRegExp> regexp,
55 Handle<String> subject, 59 Handle<String> subject,
56 Handle<Object> index); 60 Handle<Object> index);
57 61
58 // Call RegExp.prototyp.exec(string) in a loop. 62 // Call RegExp.prototyp.exec(string) in a loop.
59 // Used by String.prototype.match and String.prototype.replace. 63 // Used by String.prototype.match and String.prototype.replace.
60 // This function calls the garbage collector if necessary. 64 // This function calls the garbage collector if necessary.
61 static Handle<Object> ExecGlobal(Handle<JSRegExp> regexp, 65 static Handle<Object> ExecGlobal(Handle<JSRegExp> regexp,
62 Handle<String> subject); 66 Handle<String> subject);
63 67
68 // Stores an uncompiled RegExp pattern in the JSRegExp object.
69 // It will be compiled by JSCRE when first executed.
70 static Handle<Object> JscrePrepare(Handle<JSRegExp> re,
71 Handle<String> pattern,
72 JSRegExp::Flags flags);
73
74 // Stores a compiled RegExp pattern in the JSRegExp object.
75 // The pattern is compiled by Irregexp.
76 static Handle<Object> IrregexpPrepare(Handle<JSRegExp> re,
77 Handle<String> pattern,
78 JSRegExp::Flags flags,
79 Handle<FixedArray> irregexp_data);
80
81
82 // Compile the pattern using JSCRE and store the result in the
83 // JSRegExp object.
84 static Handle<Object> JscreCompile(Handle<JSRegExp> re);
85
64 static Handle<Object> AtomCompile(Handle<JSRegExp> re, 86 static Handle<Object> AtomCompile(Handle<JSRegExp> re,
65 Handle<String> pattern, 87 Handle<String> pattern,
66 JSRegExp::Flags flags); 88 JSRegExp::Flags flags,
67 89 Handle<String> match_pattern);
68 static Handle<Object> AtomExec(Handle<JSRegExp> regexp, 90 static Handle<Object> AtomExec(Handle<JSRegExp> regexp,
69 Handle<String> subject, 91 Handle<String> subject,
70 Handle<Object> index); 92 Handle<Object> index);
71 93
72 static Handle<Object> AtomExecGlobal(Handle<JSRegExp> regexp, 94 static Handle<Object> AtomExecGlobal(Handle<JSRegExp> regexp,
73 Handle<String> subject); 95 Handle<String> subject);
74 96
75 static Handle<Object> JsreCompile(Handle<JSRegExp> re, 97 static Handle<Object> JscreCompile(Handle<JSRegExp> re,
76 Handle<String> pattern, 98 Handle<String> pattern,
77 JSRegExp::Flags flags); 99 JSRegExp::Flags flags);
78 100
79 static Handle<Object> JsreExec(Handle<JSRegExp> regexp, 101 // Execute a compiled JSCRE pattern.
80 Handle<String> subject, 102 static Handle<Object> JscreExec(Handle<JSRegExp> regexp,
81 Handle<Object> index); 103 Handle<String> subject,
104 Handle<Object> index);
82 105
83 static Handle<Object> JsreExecGlobal(Handle<JSRegExp> regexp, 106 // Execute an Irregexp bytecode pattern.
84 Handle<String> subject); 107 static Handle<Object> IrregexpExec(Handle<JSRegExp> regexp,
108 Handle<String> subject,
109 Handle<Object> index);
110
111 static Handle<Object> JscreExecGlobal(Handle<JSRegExp> regexp,
112 Handle<String> subject);
113
114 static Handle<Object> IrregexpExecGlobal(Handle<JSRegExp> regexp,
115 Handle<String> subject);
85 116
86 static void NewSpaceCollectionPrologue(); 117 static void NewSpaceCollectionPrologue();
87 static void OldSpaceCollectionPrologue(); 118 static void OldSpaceCollectionPrologue();
88 119
89 private:
90 // Converts a source string to a 16 bit flat string. The string 120 // Converts a source string to a 16 bit flat string. The string
91 // will be either sequential or it will be a SlicedString backed 121 // will be either sequential or it will be a SlicedString backed
92 // by a flat string. 122 // by a flat string.
93 static Handle<String> StringToTwoByte(Handle<String> pattern); 123 static Handle<String> StringToTwoByte(Handle<String> pattern);
94 static Handle<String> CachedStringToTwoByte(Handle<String> pattern); 124 static Handle<String> CachedStringToTwoByte(Handle<String> pattern);
95 125
126 static const int kIrregexpImplementationIndex = 0;
127 static const int kIrregexpNumberOfCapturesIndex = 1;
128 static const int kIrregexpNumberOfRegistersIndex = 2;
129 static const int kIrregexpCodeIndex = 3;
130 static const int kIrregexpDataLength = 4;
131
132 static const int kJscreNumberOfCapturesIndex = 0;
133 static const int kJscreInternalIndex = 1;
134 static const int kJscreDataLength = 2;
135
136 private:
96 static String* last_ascii_string_; 137 static String* last_ascii_string_;
97 static String* two_byte_cached_string_; 138 static String* two_byte_cached_string_;
98 139
99 // Returns the caputure from the re. 140 static int JscreNumberOfCaptures(Handle<JSRegExp> re);
100 static int JsreCapture(Handle<JSRegExp> re); 141 static ByteArray* JscreInternal(Handle<JSRegExp> re);
101 static ByteArray* JsreInternal(Handle<JSRegExp> re); 142
143 static int IrregexpNumberOfCaptures(Handle<JSRegExp> re);
144 static int IrregexpNumberOfRegisters(Handle<JSRegExp> re);
145 static Handle<ByteArray> IrregexpCode(Handle<JSRegExp> re);
102 146
103 // Call jsRegExpExecute once 147 // Call jsRegExpExecute once
104 static Handle<Object> JsreExecOnce(Handle<JSRegExp> regexp, 148 static Handle<Object> JscreExecOnce(Handle<JSRegExp> regexp,
105 int num_captures, 149 int num_captures,
106 Handle<String> subject, 150 Handle<String> subject,
107 int previous_index, 151 int previous_index,
108 const uc16* utf8_subject, 152 const uc16* utf8_subject,
109 int* ovector, 153 int* ovector,
110 int ovector_length); 154 int ovector_length);
155
156 static Handle<Object> IrregexpExecOnce(Handle<JSRegExp> regexp,
157 int num_captures,
158 Handle<String> subject16,
159 int previous_index,
160 int* ovector,
161 int ovector_length);
111 162
112 // Set the subject cache. The previous string buffer is not deleted, so the 163 // Set the subject cache. The previous string buffer is not deleted, so the
113 // caller should ensure that it doesn't leak. 164 // caller should ensure that it doesn't leak.
114 static void SetSubjectCache(String* subject, char* utf8_subject, 165 static void SetSubjectCache(String* subject,
115 int uft8_length, int character_position, 166 char* utf8_subject,
167 int uft8_length,
168 int character_position,
116 int utf8_position); 169 int utf8_position);
117 170
118 // A one element cache of the last utf8_subject string and its length. The 171 // A one element cache of the last utf8_subject string and its length. The
119 // subject JS String object is cached in the heap. We also cache a 172 // subject JS String object is cached in the heap. We also cache a
120 // translation between position and utf8 position. 173 // translation between position and utf8 position.
121 static char* utf8_subject_cache_; 174 static char* utf8_subject_cache_;
122 static int utf8_length_cache_; 175 static int utf8_length_cache_;
123 static int utf8_position_; 176 static int utf8_position_;
124 static int character_position_; 177 static int character_position_;
125 }; 178 };
126 179
127 180
181 class CharacterRange {
182 public:
183 CharacterRange() : from_(0), to_(0) { }
184 // For compatibility with the CHECK_OK macro
185 CharacterRange(void* null) { ASSERT_EQ(NULL, null); } //NOLINT
186 CharacterRange(uc16 from, uc16 to) : from_(from), to_(to) { }
187 static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges);
188 static inline CharacterRange Singleton(uc16 value) {
189 return CharacterRange(value, value);
190 }
191 static inline CharacterRange Range(uc16 from, uc16 to) {
192 ASSERT(from <= to);
193 return CharacterRange(from, to);
194 }
195 static inline CharacterRange Everything() {
196 return CharacterRange(0, 0xFFFF);
197 }
198 bool Contains(uc16 i) { return from_ <= i && i <= to_; }
199 uc16 from() const { return from_; }
200 void set_from(uc16 value) { from_ = value; }
201 uc16 to() const { return to_; }
202 void set_to(uc16 value) { to_ = value; }
203 bool is_valid() { return from_ <= to_; }
204 bool IsSingleton() { return (from_ == to_); }
205 void AddCaseEquivalents(ZoneList<CharacterRange>* ranges);
206 static const int kRangeCanonicalizeMax = 0x346;
207 static const int kStartMarker = (1 << 24);
208 static const int kPayloadMask = (1 << 24) - 1;
209 private:
210 uc16 from_;
211 uc16 to_;
212 };
213
214
215 template <typename Node, class Callback>
216 static void DoForEach(Node* node, Callback* callback);
217
218
219 // A zone splay tree. The config type parameter encapsulates the
220 // different configurations of a concrete splay tree:
221 //
222 // typedef Key: the key type
223 // typedef Value: the value type
224 // static const kNoKey: the dummy key used when no key is set
225 // static const kNoValue: the dummy value used to initialize nodes
226 // int (Compare)(Key& a, Key& b) -> {-1, 0, 1}: comparison function
227 //
228 template <typename Config>
229 class ZoneSplayTree : public ZoneObject {
230 public:
231 typedef typename Config::Key Key;
232 typedef typename Config::Value Value;
233
234 class Locator;
235
236 ZoneSplayTree() : root_(NULL) { }
237
238 // Inserts the given key in this tree with the given value. Returns
239 // true if a node was inserted, otherwise false. If found the locator
240 // is enabled and provides access to the mapping for the key.
241 bool Insert(const Key& key, Locator* locator);
242
243 // Looks up the key in this tree and returns true if it was found,
244 // otherwise false. If the node is found the locator is enabled and
245 // provides access to the mapping for the key.
246 bool Find(const Key& key, Locator* locator);
247
248 // Finds the mapping with the greatest key less than or equal to the
249 // given key.
250 bool FindGreatestLessThan(const Key& key, Locator* locator);
251
252 // Find the mapping with the greatest key in this tree.
253 bool FindGreatest(Locator* locator);
254
255 // Finds the mapping with the least key greater than or equal to the
256 // given key.
257 bool FindLeastGreaterThan(const Key& key, Locator* locator);
258
259 // Find the mapping with the least key in this tree.
260 bool FindLeast(Locator* locator);
261
262 // Remove the node with the given key from the tree.
263 bool Remove(const Key& key);
264
265 bool is_empty() { return root_ == NULL; }
266
267 // Perform the splay operation for the given key. Moves the node with
268 // the given key to the top of the tree. If no node has the given
269 // key, the last node on the search path is moved to the top of the
270 // tree.
271 void Splay(const Key& key);
272
273 class Node : public ZoneObject {
274 public:
275 Node(const Key& key, const Value& value)
276 : key_(key),
277 value_(value),
278 left_(NULL),
279 right_(NULL) { }
280 Key key() { return key_; }
281 Value value() { return value_; }
282 Node* left() { return left_; }
283 Node* right() { return right_; }
284 private:
285 friend class ZoneSplayTree;
286 friend class Locator;
287 Key key_;
288 Value value_;
289 Node* left_;
290 Node* right_;
291 };
292
293 // A locator provides access to a node in the tree without actually
294 // exposing the node.
295 class Locator {
296 public:
297 explicit Locator(Node* node) : node_(node) { }
298 Locator() : node_(NULL) { }
299 const Key& key() { return node_->key_; }
300 Value& value() { return node_->value_; }
301 void set_value(const Value& value) { node_->value_ = value; }
302 inline void bind(Node* node) { node_ = node; }
303 private:
304 Node* node_;
305 };
306
307 template <class Callback>
308 void ForEach(Callback* c) {
309 DoForEach<typename ZoneSplayTree<Config>::Node, Callback>(root_, c);
310 }
311
312 private:
313 Node* root_;
314 };
315
316
317 // A set of unsigned integers that behaves especially well on small
318 // integers (< 32). May do zone-allocation.
319 class OutSet: public ZoneObject {
320 public:
321 OutSet() : first_(0), remaining_(NULL), successors_(NULL) { }
322 OutSet* Extend(unsigned value);
323 bool Get(unsigned value);
324 static const unsigned kFirstLimit = 32;
325
326 private:
327 // Destructively set a value in this set. In most cases you want
328 // to use Extend instead to ensure that only one instance exists
329 // that contains the same values.
330 void Set(unsigned value);
331
332 // The successors are a list of sets that contain the same values
333 // as this set and the one more value that is not present in this
334 // set.
335 ZoneList<OutSet*>* successors() { return successors_; }
336
337 OutSet(uint32_t first, ZoneList<unsigned>* remaining)
338 : first_(first), remaining_(remaining), successors_(NULL) { }
339 uint32_t first_;
340 ZoneList<unsigned>* remaining_;
341 ZoneList<OutSet*>* successors_;
342 };
343
344
345 // A mapping from integers, specified as ranges, to a set of integers.
346 // Used for mapping character ranges to choices.
347 class DispatchTable {
348 public:
349 class Entry {
350 public:
351 Entry() : from_(0), to_(0), out_set_(NULL) { }
352 Entry(uc16 from, uc16 to, OutSet* out_set)
353 : from_(from), to_(to), out_set_(out_set) { }
354 uc16 from() { return from_; }
355 uc16 to() { return to_; }
356 void set_to(uc16 value) { to_ = value; }
357 void AddValue(int value) { out_set_ = out_set_->Extend(value); }
358 OutSet* out_set() { return out_set_; }
359 private:
360 uc16 from_;
361 uc16 to_;
362 OutSet* out_set_;
363 };
364
365 class Config {
366 public:
367 typedef uc16 Key;
368 typedef Entry Value;
369 static const uc16 kNoKey;
370 static const Entry kNoValue;
371 static inline int Compare(uc16 a, uc16 b) {
372 if (a == b)
373 return 0;
374 else if (a < b)
375 return -1;
376 else
377 return 1;
378 }
379 };
380
381 void AddRange(CharacterRange range, int value);
382 OutSet* Get(uc16 value);
383 void Dump();
384
385 template <typename Callback>
386 void ForEach(Callback* callback) { return tree()->ForEach(callback); }
387 private:
388 // There can't be a static empty set since it allocates its
389 // successors in a zone and caches them.
390 OutSet* empty() { return &empty_; }
391 OutSet empty_;
392 ZoneSplayTree<Config>* tree() { return &tree_; }
393 ZoneSplayTree<Config> tree_;
394 };
395
396
397 #define FOR_EACH_NODE_TYPE(VISIT) \
398 VISIT(End) \
399 VISIT(Action) \
400 VISIT(Choice) \
401 VISIT(BackReference) \
402 VISIT(Text)
403
404
405 #define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \
406 VISIT(Disjunction) \
407 VISIT(Alternative) \
408 VISIT(Assertion) \
409 VISIT(CharacterClass) \
410 VISIT(Atom) \
411 VISIT(Quantifier) \
412 VISIT(Capture) \
413 VISIT(Lookahead) \
414 VISIT(BackReference) \
415 VISIT(Empty) \
416 VISIT(Text)
417
418
419 #define FORWARD_DECLARE(Name) class RegExp##Name;
420 FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE)
421 #undef FORWARD_DECLARE
422
423
424 class TextElement {
425 public:
426 enum Type {UNINITIALIZED, ATOM, CHAR_CLASS};
427 TextElement() : type(UNINITIALIZED) { }
428 explicit TextElement(Type t) : type(t) { }
429 static TextElement Atom(RegExpAtom* atom);
430 static TextElement CharClass(RegExpCharacterClass* char_class);
431 Type type;
432 union {
433 RegExpAtom* u_atom;
434 RegExpCharacterClass* u_char_class;
435 } data;
436 };
437
438
439 struct NodeInfo {
440 enum PrecedingInfo {
441 UNKNOWN = -1, FALSE = 0, TRUE = 1
442 };
443
444 NodeInfo()
445 : being_analyzed(false),
446 been_analyzed(false),
447 determine_word(false),
448 determine_newline(false),
449 determine_start(false),
450 follows_word_interest(false),
451 follows_newline_interest(false),
452 follows_start_interest(false),
453 at_end(false),
454 follows_word(UNKNOWN),
455 follows_newline(UNKNOWN),
456 follows_start(UNKNOWN) { }
457
458 bool HasSameForwardInterests(NodeInfo* that) {
459 return (at_end == that->at_end)
460 && (follows_word_interest == that->follows_word_interest)
461 && (follows_newline_interest == that->follows_newline_interest)
462 && (follows_start_interest == that->follows_start_interest);
463 }
464
465 // Updates the interests of this node given the interests of the
466 // node preceding it.
467 void AddFromPreceding(NodeInfo* that) {
468 at_end |= that->at_end;
469 follows_word_interest |= that->follows_word_interest;
470 follows_newline_interest |= that->follows_newline_interest;
471 follows_start_interest |= that->follows_start_interest;
472 }
473
474 // Sets the interests of this node to include the interests of the
475 // following node.
476 void AddFromFollowing(NodeInfo* that) {
477 follows_word_interest |= that->follows_word_interest;
478 follows_newline_interest |= that->follows_newline_interest;
479 follows_start_interest |= that->follows_start_interest;
480 }
481
482 bool being_analyzed: 1;
483 bool been_analyzed: 1;
484
485 // These bits are set if this node must propagate forward information
486 // about the last character it consumed (or, in the case of 'start',
487 // if it is at the start of the input).
488 bool determine_word: 1;
489 bool determine_newline: 1;
490 bool determine_start: 1;
491
492 // These bits are set of this node has to know what the preceding
493 // character was.
494 bool follows_word_interest: 1;
495 bool follows_newline_interest: 1;
496 bool follows_start_interest: 1;
497
498 bool at_end: 1;
499
500 // These bits are set if the node can make assumptions about what
501 // the previous character was.
502 PrecedingInfo follows_word: 2;
503 PrecedingInfo follows_newline: 2;
504 PrecedingInfo follows_start: 2;
505 };
506
507
508 class SiblingList {
509 public:
510 SiblingList() : list_(NULL) { }
511 int length() {
512 return list_ == NULL ? 0 : list_->length();
513 }
514 void Ensure(RegExpNode* parent) {
515 if (list_ == NULL) {
516 list_ = new ZoneList<RegExpNode*>(2);
517 list_->Add(parent);
518 }
519 }
520 void Add(RegExpNode* node) { list_->Add(node); }
521 RegExpNode* Get(int index) { return list_->at(index); }
522 private:
523 ZoneList<RegExpNode*>* list_;
524 };
525
526
527 class RegExpNode: public ZoneObject {
528 public:
529 virtual ~RegExpNode() { }
530 virtual void Accept(NodeVisitor* visitor) = 0;
531 // Generates a goto to this node or actually generates the code at this point.
532 // Until the implementation is complete we will return true for success and
533 // false for failure.
534 virtual bool GoTo(RegExpCompiler* compiler);
535 Label* label();
536
537 // Until the implementation is complete we will return true for success and
538 // false for failure.
539 virtual bool Emit(RegExpCompiler* compiler) = 0;
540
541 // Propagates the given interest information forward. When seeing
542 // \bfoo for instance, the \b is implemented by propagating forward
543 // to the 'foo' string that it should only succeed if its first
544 // character is a letter xor the previous character was a letter.
545 virtual RegExpNode* PropagateForward(NodeInfo* info) = 0;
546
547 NodeInfo* info() { return &info_; }
548 virtual bool IsBacktrack() { return false; }
549 RegExpNode* GetSibling(NodeInfo* info);
550 void EnsureSiblings() { siblings_.Ensure(this); }
551 void AddSibling(RegExpNode* node) { siblings_.Add(node); }
552 protected:
553 inline void Bind(RegExpMacroAssembler* macro);
554 private:
555 Label label_;
556 NodeInfo info_;
557 SiblingList siblings_;
558 };
559
560
561 class SeqRegExpNode: public RegExpNode {
562 public:
563 explicit SeqRegExpNode(RegExpNode* on_success)
564 : on_success_(on_success) { }
565 RegExpNode* on_success() { return on_success_; }
566 void set_on_success(RegExpNode* node) { on_success_ = node; }
567 virtual bool Emit(RegExpCompiler* compiler) { return false; }
568 private:
569 RegExpNode* on_success_;
570 };
571
572
573 class ActionNode: public SeqRegExpNode {
574 public:
575 enum Type {
576 STORE_REGISTER,
577 INCREMENT_REGISTER,
578 STORE_POSITION,
579 RESTORE_POSITION,
580 BEGIN_SUBMATCH,
581 ESCAPE_SUBMATCH
582 };
583 static ActionNode* StoreRegister(int reg, int val, RegExpNode* on_success);
584 static ActionNode* IncrementRegister(int reg, RegExpNode* on_success);
585 static ActionNode* StorePosition(int reg, RegExpNode* on_success);
586 static ActionNode* RestorePosition(int reg, RegExpNode* on_success);
587 static ActionNode* BeginSubmatch(int stack_pointer_reg,
588 int position_reg,
589 RegExpNode* on_success);
590 static ActionNode* EscapeSubmatch(int stack_pointer_reg,
591 bool and_restore_position,
592 int restore_reg,
593 RegExpNode* on_success);
594 virtual void Accept(NodeVisitor* visitor);
595 virtual bool Emit(RegExpCompiler* compiler);
596 virtual RegExpNode* PropagateForward(NodeInfo* info);
597 private:
598 union {
599 struct {
600 int reg;
601 int value;
602 } u_store_register;
603 struct {
604 int reg;
605 } u_increment_register;
606 struct {
607 int reg;
608 } u_position_register;
609 struct {
610 int stack_pointer_register;
611 int current_position_register;
612 } u_submatch;
613 } data_;
614 ActionNode(Type type, RegExpNode* on_success)
615 : SeqRegExpNode(on_success),
616 type_(type) { }
617 Type type_;
618 friend class DotPrinter;
619 };
620
621
622 class TextNode: public SeqRegExpNode {
623 public:
624 TextNode(ZoneList<TextElement>* elms,
625 RegExpNode* on_success,
626 RegExpNode* on_failure)
627 : SeqRegExpNode(on_success),
628 on_failure_(on_failure),
629 elms_(elms) { }
630 virtual void Accept(NodeVisitor* visitor);
631 virtual RegExpNode* PropagateForward(NodeInfo* info);
632 RegExpNode* on_failure() { return on_failure_; }
633 virtual bool Emit(RegExpCompiler* compiler);
634 ZoneList<TextElement>* elements() { return elms_; }
635 void MakeCaseIndependent();
636 private:
637 RegExpNode* on_failure_;
638 ZoneList<TextElement>* elms_;
639 };
640
641
642 class BackReferenceNode: public SeqRegExpNode {
643 public:
644 BackReferenceNode(int start_reg,
645 int end_reg,
646 RegExpNode* on_success,
647 RegExpNode* on_failure)
648 : SeqRegExpNode(on_success),
649 on_failure_(on_failure),
650 start_reg_(start_reg),
651 end_reg_(end_reg) { }
652 virtual void Accept(NodeVisitor* visitor);
653 RegExpNode* on_failure() { return on_failure_; }
654 int start_register() { return start_reg_; }
655 int end_register() { return end_reg_; }
656 virtual bool Emit(RegExpCompiler* compiler);
657 virtual RegExpNode* PropagateForward(NodeInfo* info);
658 private:
659 RegExpNode* on_failure_;
660 int start_reg_;
661 int end_reg_;
662 };
663
664
665 class EndNode: public RegExpNode {
666 public:
667 enum Action { ACCEPT, BACKTRACK };
668 explicit EndNode(Action action) : action_(action) { }
669 virtual void Accept(NodeVisitor* visitor);
670 virtual bool Emit(RegExpCompiler* compiler);
671 virtual RegExpNode* PropagateForward(NodeInfo* info);
672 virtual bool IsBacktrack() { return action_ == BACKTRACK; }
673 virtual bool GoTo(RegExpCompiler* compiler);
674 private:
675 Action action_;
676 };
677
678
679 class Guard: public ZoneObject {
680 public:
681 enum Relation { LT, GEQ };
682 Guard(int reg, Relation op, int value)
683 : reg_(reg),
684 op_(op),
685 value_(value) { }
686 int reg() { return reg_; }
687 Relation op() { return op_; }
688 int value() { return value_; }
689 private:
690 int reg_;
691 Relation op_;
692 int value_;
693 };
694
695
696 class GuardedAlternative {
697 public:
698 explicit GuardedAlternative(RegExpNode* node) : node_(node), guards_(NULL) { }
699 void AddGuard(Guard* guard);
700 RegExpNode* node() { return node_; }
701 void set_node(RegExpNode* node) { node_ = node; }
702 ZoneList<Guard*>* guards() { return guards_; }
703 private:
704 RegExpNode* node_;
705 ZoneList<Guard*>* guards_;
706 };
707
708
709 class ChoiceNode: public RegExpNode {
710 public:
711 explicit ChoiceNode(int expected_size, RegExpNode* on_failure)
712 : on_failure_(on_failure),
713 alternatives_(new ZoneList<GuardedAlternative>(expected_size)),
714 table_calculated_(false),
715 being_calculated_(false) { }
716 virtual void Accept(NodeVisitor* visitor);
717 void AddAlternative(GuardedAlternative node) { alternatives()->Add(node); }
718 ZoneList<GuardedAlternative>* alternatives() { return alternatives_; }
719 DispatchTable* table() { return &table_; }
720 RegExpNode* on_failure() { return on_failure_; }
721 virtual bool Emit(RegExpCompiler* compiler);
722 virtual RegExpNode* PropagateForward(NodeInfo* info);
723 bool table_calculated() { return table_calculated_; }
724 void set_table_calculated(bool b) { table_calculated_ = b; }
725 bool being_calculated() { return being_calculated_; }
726 void set_being_calculated(bool b) { being_calculated_ = b; }
727 private:
728 void GenerateGuard(RegExpMacroAssembler* macro_assembler,
729 Guard *guard,
730 Label* on_failure);
731 RegExpNode* on_failure_;
732 ZoneList<GuardedAlternative>* alternatives_;
733 DispatchTable table_;
734 bool table_calculated_;
735 bool being_calculated_;
736 };
737
738
739 class NodeVisitor {
740 public:
741 virtual ~NodeVisitor() { }
742 #define DECLARE_VISIT(Type) \
743 virtual void Visit##Type(Type##Node* that) = 0;
744 FOR_EACH_NODE_TYPE(DECLARE_VISIT)
745 #undef DECLARE_VISIT
746 };
747
748
749 // Node visitor used to add the start set of the alternatives to the
750 // dispatch table of a choice node.
751 class DispatchTableConstructor: public NodeVisitor {
752 public:
753 explicit DispatchTableConstructor(DispatchTable* table)
754 : table_(table),
755 choice_index_(-1) { }
756
757 void BuildTable(ChoiceNode* node);
758
759 void AddRange(CharacterRange range) {
760 table()->AddRange(range, choice_index_);
761 }
762
763 void AddInverse(ZoneList<CharacterRange>* ranges);
764
765 #define DECLARE_VISIT(Type) \
766 virtual void Visit##Type(Type##Node* that);
767 FOR_EACH_NODE_TYPE(DECLARE_VISIT)
768 #undef DECLARE_VISIT
769
770 DispatchTable* table() { return table_; }
771 void set_choice_index(int value) { choice_index_ = value; }
772
773 protected:
774 DispatchTable *table_;
775 int choice_index_;
776 };
777
778
779 class Analysis: public NodeVisitor {
780 public:
781 explicit Analysis(bool ignore_case)
782 : ignore_case_(ignore_case) { }
783 void EnsureAnalyzed(RegExpNode* node);
784
785 #define DECLARE_VISIT(Type) \
786 virtual void Visit##Type(Type##Node* that);
787 FOR_EACH_NODE_TYPE(DECLARE_VISIT)
788 #undef DECLARE_VISIT
789
790 private:
791 bool ignore_case_;
792
793 DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis);
794 };
795
796
797 struct RegExpParseResult {
798 RegExpTree* tree;
799 bool has_character_escapes;
800 Handle<String> error;
801 int capture_count;
802 };
803
804
805 class RegExpEngine: public AllStatic {
806 public:
807 static Handle<FixedArray> Compile(RegExpParseResult* input,
808 RegExpNode** node_return,
809 bool ignore_case,
810 bool multiline);
811 static void DotPrint(const char* label, RegExpNode* node);
812 };
813
814
128 } } // namespace v8::internal 815 } } // namespace v8::internal
129 816
130 #endif // V8_JSREGEXP_H_ 817 #endif // V8_JSREGEXP_H_
OLDNEW
« no previous file with comments | « src/heap.cc ('k') | src/jsregexp.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698