OLD | NEW |
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
160 // A one element cache of the last utf8_subject string and its length. The | 160 // A one element cache of the last utf8_subject string and its length. The |
161 // subject JS String object is cached in the heap. We also cache a | 161 // subject JS String object is cached in the heap. We also cache a |
162 // translation between position and utf8 position. | 162 // translation between position and utf8 position. |
163 static char* utf8_subject_cache_; | 163 static char* utf8_subject_cache_; |
164 static int utf8_length_cache_; | 164 static int utf8_length_cache_; |
165 static int utf8_position_; | 165 static int utf8_position_; |
166 static int character_position_; | 166 static int character_position_; |
167 }; | 167 }; |
168 | 168 |
169 | 169 |
| 170 // Represents the location of one element relative to the intersection of |
| 171 // two sets. Corresponds to the four areas of a Venn diagram. |
| 172 enum ElementInSetsRelation { |
| 173 kInsideNone = 0, |
| 174 kInsideFirst = 1, |
| 175 kInsideSecond = 2, |
| 176 kInsideBoth = 3 |
| 177 }; |
| 178 |
| 179 |
| 180 // Represents the relation of two sets. |
| 181 // Sets can be either disjoint, partially or fully overlapping, or equal. |
| 182 class SetRelation BASE_EMBEDDED { |
| 183 public: |
| 184 // Relation is represented by a bit saying whether there are elements in |
| 185 // one set that is not in the other, and a bit saying that there are elements |
| 186 // that are in both sets. |
| 187 |
| 188 // Location of an element. Corresponds to the internal areas of |
| 189 // a Venn diagram. |
| 190 enum { |
| 191 kInFirst = 1 << kInsideFirst, |
| 192 kInSecond = 1 << kInsideSecond, |
| 193 kInBoth = 1 << kInsideBoth |
| 194 }; |
| 195 SetRelation() : bits_(0) {} |
| 196 ~SetRelation() {} |
| 197 // Add the existence of objects in a particular |
| 198 void SetElementsInFirstSet() { bits_ |= kInFirst; } |
| 199 void SetElementsInSecondSet() { bits_ |= kInSecond; } |
| 200 void SetElementsInBothSets() { bits_ |= kInBoth; } |
| 201 // Check the currently known relation of the sets (common functions only, |
| 202 // for other combinations, use value() to get the bits and check them |
| 203 // manually). |
| 204 // Sets are completely disjoint. |
| 205 bool Disjoint() { return (bits_ & kInBoth) == 0; } |
| 206 // Sets are equal. |
| 207 bool Equals() { return (bits_ & (kInFirst | kInSecond)) == 0; } |
| 208 // First set contains second. |
| 209 bool Contains() { return (bits_ & kInSecond) == 0; } |
| 210 // Second set contains first. |
| 211 bool ContainedIn() { return (bits_ & kInFirst) == 0; } |
| 212 bool NonTrivialIntersection() { |
| 213 return (bits_ == (kInFirst | kInSecond | kInBoth)); |
| 214 } |
| 215 int value() { return bits_; } |
| 216 private: |
| 217 int bits_; |
| 218 }; |
| 219 |
| 220 |
170 class CharacterRange { | 221 class CharacterRange { |
171 public: | 222 public: |
172 CharacterRange() : from_(0), to_(0) { } | 223 CharacterRange() : from_(0), to_(0) { } |
173 // For compatibility with the CHECK_OK macro | 224 // For compatibility with the CHECK_OK macro |
174 CharacterRange(void* null) { ASSERT_EQ(NULL, null); } //NOLINT | 225 CharacterRange(void* null) { ASSERT_EQ(NULL, null); } //NOLINT |
175 CharacterRange(uc16 from, uc16 to) : from_(from), to_(to) { } | 226 CharacterRange(uc16 from, uc16 to) : from_(from), to_(to) { } |
176 static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges); | 227 static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges); |
177 static Vector<const uc16> GetWordBounds(); | 228 static Vector<const uc16> GetWordBounds(); |
178 static inline CharacterRange Singleton(uc16 value) { | 229 static inline CharacterRange Singleton(uc16 value) { |
179 return CharacterRange(value, value); | 230 return CharacterRange(value, value); |
(...skipping 11 matching lines...) Expand all Loading... |
191 uc16 to() const { return to_; } | 242 uc16 to() const { return to_; } |
192 void set_to(uc16 value) { to_ = value; } | 243 void set_to(uc16 value) { to_ = value; } |
193 bool is_valid() { return from_ <= to_; } | 244 bool is_valid() { return from_ <= to_; } |
194 bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; } | 245 bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; } |
195 bool IsSingleton() { return (from_ == to_); } | 246 bool IsSingleton() { return (from_ == to_); } |
196 void AddCaseEquivalents(ZoneList<CharacterRange>* ranges, bool is_ascii); | 247 void AddCaseEquivalents(ZoneList<CharacterRange>* ranges, bool is_ascii); |
197 static void Split(ZoneList<CharacterRange>* base, | 248 static void Split(ZoneList<CharacterRange>* base, |
198 Vector<const uc16> overlay, | 249 Vector<const uc16> overlay, |
199 ZoneList<CharacterRange>** included, | 250 ZoneList<CharacterRange>** included, |
200 ZoneList<CharacterRange>** excluded); | 251 ZoneList<CharacterRange>** excluded); |
201 | 252 // Whether a range list is in canonical form: Ranges ordered by from value, |
| 253 // and ranges non-overlapping and non-adjacent. |
| 254 static bool IsCanonical(ZoneList<CharacterRange>* ranges); |
| 255 // Convert range list to canonical form. The characters covered by the ranges |
| 256 // will still be the same, but no character is in more than one range, and |
| 257 // adjacent ranges are merged. The resulting list may be shorter than the |
| 258 // original, but cannot be longer. |
| 259 static void Canonicalize(ZoneList<CharacterRange>* ranges); |
| 260 // Check how the set of characters defined by a CharacterRange list relates |
| 261 // to the set of word characters. List must be in canonical form. |
| 262 static SetRelation WordCharacterRelation(ZoneList<CharacterRange>* ranges); |
| 263 // Takes two character range lists (representing character sets) in canonical |
| 264 // form and merges them. |
| 265 // The characters that are only covered by the first set are added to |
| 266 // first_set_only_out. the characters that are only in the second set are |
| 267 // added to second_set_only_out, and the characters that are in both are |
| 268 // added to both_sets_out. |
| 269 // The pointers to first_set_only_out, second_set_only_out and both_sets_out |
| 270 // should be to empty lists, but they need not be distinct, and may be NULL. |
| 271 // If NULL, the characters are dropped, and if two arguments are the same |
| 272 // pointer, the result is the union of the two sets that would be created |
| 273 // if the pointers had been distinct. |
| 274 // This way, the Merge function can compute all the usual set operations: |
| 275 // union (all three out-sets are equal), intersection (only both_sets_out is |
| 276 // non-NULL), and set difference (only first_set is non-NULL). |
| 277 static void Merge(ZoneList<CharacterRange>* first_set, |
| 278 ZoneList<CharacterRange>* second_set, |
| 279 ZoneList<CharacterRange>* first_set_only_out, |
| 280 ZoneList<CharacterRange>* second_set_only_out, |
| 281 ZoneList<CharacterRange>* both_sets_out); |
| 282 // Negate the contents of a character range in canonical form. |
| 283 static void Negate(ZoneList<CharacterRange>* src, |
| 284 ZoneList<CharacterRange>* dst); |
202 static const int kRangeCanonicalizeMax = 0x346; | 285 static const int kRangeCanonicalizeMax = 0x346; |
203 static const int kStartMarker = (1 << 24); | 286 static const int kStartMarker = (1 << 24); |
204 static const int kPayloadMask = (1 << 24) - 1; | 287 static const int kPayloadMask = (1 << 24) - 1; |
205 | 288 |
206 private: | 289 private: |
207 uc16 from_; | 290 uc16 from_; |
208 uc16 to_; | 291 uc16 to_; |
209 }; | 292 }; |
210 | 293 |
211 | 294 |
(...skipping 253 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
465 uint32_t mask_; | 548 uint32_t mask_; |
466 uint32_t value_; | 549 uint32_t value_; |
467 // If set to true, there is no way this quick check can match at all. | 550 // If set to true, there is no way this quick check can match at all. |
468 // E.g., if it requires to be at the start of the input, and isn't. | 551 // E.g., if it requires to be at the start of the input, and isn't. |
469 bool cannot_match_; | 552 bool cannot_match_; |
470 }; | 553 }; |
471 | 554 |
472 | 555 |
473 class RegExpNode: public ZoneObject { | 556 class RegExpNode: public ZoneObject { |
474 public: | 557 public: |
475 RegExpNode() : trace_count_(0) { } | 558 RegExpNode() : first_character_set_(NULL), trace_count_(0) { } |
476 virtual ~RegExpNode(); | 559 virtual ~RegExpNode(); |
477 virtual void Accept(NodeVisitor* visitor) = 0; | 560 virtual void Accept(NodeVisitor* visitor) = 0; |
478 // Generates a goto to this node or actually generates the code at this point. | 561 // Generates a goto to this node or actually generates the code at this point. |
479 virtual void Emit(RegExpCompiler* compiler, Trace* trace) = 0; | 562 virtual void Emit(RegExpCompiler* compiler, Trace* trace) = 0; |
480 // How many characters must this node consume at a minimum in order to | 563 // How many characters must this node consume at a minimum in order to |
481 // succeed. If we have found at least 'still_to_find' characters that | 564 // succeed. If we have found at least 'still_to_find' characters that |
482 // must be consumed there is no need to ask any following nodes whether | 565 // must be consumed there is no need to ask any following nodes whether |
483 // they are sure to eat any more characters. | 566 // they are sure to eat any more characters. |
484 virtual int EatsAtLeast(int still_to_find, int recursion_depth) = 0; | 567 virtual int EatsAtLeast(int still_to_find, int recursion_depth) = 0; |
485 // Emits some quick code that checks whether the preloaded characters match. | 568 // Emits some quick code that checks whether the preloaded characters match. |
(...skipping 30 matching lines...) Expand all Loading... |
516 // Static version of EnsureSibling that expresses the fact that the | 599 // Static version of EnsureSibling that expresses the fact that the |
517 // result has the same type as the input. | 600 // result has the same type as the input. |
518 template <class C> | 601 template <class C> |
519 static C* EnsureSibling(C* node, NodeInfo* info, bool* cloned) { | 602 static C* EnsureSibling(C* node, NodeInfo* info, bool* cloned) { |
520 return static_cast<C*>(node->EnsureSibling(info, cloned)); | 603 return static_cast<C*>(node->EnsureSibling(info, cloned)); |
521 } | 604 } |
522 | 605 |
523 SiblingList* siblings() { return &siblings_; } | 606 SiblingList* siblings() { return &siblings_; } |
524 void set_siblings(SiblingList* other) { siblings_ = *other; } | 607 void set_siblings(SiblingList* other) { siblings_ = *other; } |
525 | 608 |
| 609 // Return the set of possible next characters recognized by the regexp |
| 610 // (or a safe subset, potentially the set of all characters). |
| 611 ZoneList<CharacterRange>* FirstCharacterSet(); |
| 612 |
| 613 // Compute (if possible within the budget of traversed nodes) the |
| 614 // possible first characters of the input matched by this node and |
| 615 // its continuation. Returns the remaining budget after the computation. |
| 616 // If the budget is spent, the result is negative, and the cached |
| 617 // first_character_set_ value isn't set. |
| 618 virtual int ComputeFirstCharacterSet(int budget); |
| 619 |
| 620 // Get and set the cached first character set value. |
| 621 ZoneList<CharacterRange>* first_character_set() { |
| 622 return first_character_set_; |
| 623 } |
| 624 void set_first_character_set(ZoneList<CharacterRange>* character_set) { |
| 625 first_character_set_ = character_set; |
| 626 } |
| 627 |
526 protected: | 628 protected: |
527 enum LimitResult { DONE, CONTINUE }; | 629 enum LimitResult { DONE, CONTINUE }; |
| 630 static const int kComputeFirstCharacterSetFail = -1; |
| 631 |
528 LimitResult LimitVersions(RegExpCompiler* compiler, Trace* trace); | 632 LimitResult LimitVersions(RegExpCompiler* compiler, Trace* trace); |
529 | 633 |
530 // Returns a sibling of this node whose interests and assumptions | 634 // Returns a sibling of this node whose interests and assumptions |
531 // match the ones in the given node info. If no sibling exists NULL | 635 // match the ones in the given node info. If no sibling exists NULL |
532 // is returned. | 636 // is returned. |
533 RegExpNode* TryGetSibling(NodeInfo* info); | 637 RegExpNode* TryGetSibling(NodeInfo* info); |
534 | 638 |
535 // Returns a sibling of this node whose interests match the ones in | 639 // Returns a sibling of this node whose interests match the ones in |
536 // the given node info. The info must not contain any assertions. | 640 // the given node info. The info must not contain any assertions. |
537 // If no node exists a new one will be created by cloning the current | 641 // If no node exists a new one will be created by cloning the current |
538 // node. The result will always be an instance of the same concrete | 642 // node. The result will always be an instance of the same concrete |
539 // class as this node. | 643 // class as this node. |
540 RegExpNode* EnsureSibling(NodeInfo* info, bool* cloned); | 644 RegExpNode* EnsureSibling(NodeInfo* info, bool* cloned); |
541 | 645 |
542 // Returns a clone of this node initialized using the copy constructor | 646 // Returns a clone of this node initialized using the copy constructor |
543 // of its concrete class. Note that the node may have to be pre- | 647 // of its concrete class. Note that the node may have to be pre- |
544 // processed before it is on a usable state. | 648 // processed before it is on a usable state. |
545 virtual RegExpNode* Clone() = 0; | 649 virtual RegExpNode* Clone() = 0; |
546 | 650 |
547 private: | 651 private: |
| 652 static const int kFirstCharBudget = 10; |
548 Label label_; | 653 Label label_; |
549 NodeInfo info_; | 654 NodeInfo info_; |
550 SiblingList siblings_; | 655 SiblingList siblings_; |
| 656 ZoneList<CharacterRange>* first_character_set_; |
551 // This variable keeps track of how many times code has been generated for | 657 // This variable keeps track of how many times code has been generated for |
552 // this node (in different traces). We don't keep track of where the | 658 // this node (in different traces). We don't keep track of where the |
553 // generated code is located unless the code is generated at the start of | 659 // generated code is located unless the code is generated at the start of |
554 // a trace, in which case it is generic and can be reused by flushing the | 660 // a trace, in which case it is generic and can be reused by flushing the |
555 // deferred operations in the current trace and generating a goto. | 661 // deferred operations in the current trace and generating a goto. |
556 int trace_count_; | 662 int trace_count_; |
557 }; | 663 }; |
558 | 664 |
559 | 665 |
560 // A simple closed interval. | 666 // A simple closed interval. |
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
631 RegExpCompiler* compiler, | 737 RegExpCompiler* compiler, |
632 int filled_in, | 738 int filled_in, |
633 bool not_at_start) { | 739 bool not_at_start) { |
634 return on_success()->GetQuickCheckDetails( | 740 return on_success()->GetQuickCheckDetails( |
635 details, compiler, filled_in, not_at_start); | 741 details, compiler, filled_in, not_at_start); |
636 } | 742 } |
637 Type type() { return type_; } | 743 Type type() { return type_; } |
638 // TODO(erikcorry): We should allow some action nodes in greedy loops. | 744 // TODO(erikcorry): We should allow some action nodes in greedy loops. |
639 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } | 745 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } |
640 virtual ActionNode* Clone() { return new ActionNode(*this); } | 746 virtual ActionNode* Clone() { return new ActionNode(*this); } |
641 | 747 virtual int ComputeFirstCharacterSet(int budget); |
642 private: | 748 private: |
643 union { | 749 union { |
644 struct { | 750 struct { |
645 int reg; | 751 int reg; |
646 int value; | 752 int value; |
647 } u_store_register; | 753 } u_store_register; |
648 struct { | 754 struct { |
649 int reg; | 755 int reg; |
650 } u_increment_register; | 756 } u_increment_register; |
651 struct { | 757 struct { |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
697 bool not_at_start); | 803 bool not_at_start); |
698 ZoneList<TextElement>* elements() { return elms_; } | 804 ZoneList<TextElement>* elements() { return elms_; } |
699 void MakeCaseIndependent(bool is_ascii); | 805 void MakeCaseIndependent(bool is_ascii); |
700 virtual int GreedyLoopTextLength(); | 806 virtual int GreedyLoopTextLength(); |
701 virtual TextNode* Clone() { | 807 virtual TextNode* Clone() { |
702 TextNode* result = new TextNode(*this); | 808 TextNode* result = new TextNode(*this); |
703 result->CalculateOffsets(); | 809 result->CalculateOffsets(); |
704 return result; | 810 return result; |
705 } | 811 } |
706 void CalculateOffsets(); | 812 void CalculateOffsets(); |
707 | 813 virtual int ComputeFirstCharacterSet(int budget); |
708 private: | 814 private: |
709 enum TextEmitPassType { | 815 enum TextEmitPassType { |
710 NON_ASCII_MATCH, // Check for characters that can't match. | 816 NON_ASCII_MATCH, // Check for characters that can't match. |
711 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check. | 817 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check. |
712 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs. | 818 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs. |
713 CASE_CHARACTER_MATCH, // Case-independent single character check. | 819 CASE_CHARACTER_MATCH, // Case-independent single character check. |
714 CHARACTER_CLASS_MATCH // Character class. | 820 CHARACTER_CLASS_MATCH // Character class. |
715 }; | 821 }; |
716 static bool SkipPass(int pass, bool ignore_case); | 822 static bool SkipPass(int pass, bool ignore_case); |
717 static const int kFirstRealPass = SIMPLE_CHARACTER_MATCH; | 823 static const int kFirstRealPass = SIMPLE_CHARACTER_MATCH; |
718 static const int kLastPass = CHARACTER_CLASS_MATCH; | 824 static const int kLastPass = CHARACTER_CLASS_MATCH; |
719 void TextEmitPass(RegExpCompiler* compiler, | 825 void TextEmitPass(RegExpCompiler* compiler, |
720 TextEmitPassType pass, | 826 TextEmitPassType pass, |
721 bool preloaded, | 827 bool preloaded, |
722 Trace* trace, | 828 Trace* trace, |
723 bool first_element_checked, | 829 bool first_element_checked, |
724 int* checked_up_to); | 830 int* checked_up_to); |
725 int Length(); | 831 int Length(); |
726 ZoneList<TextElement>* elms_; | 832 ZoneList<TextElement>* elms_; |
727 }; | 833 }; |
728 | 834 |
729 | 835 |
730 class AssertionNode: public SeqRegExpNode { | 836 class AssertionNode: public SeqRegExpNode { |
731 public: | 837 public: |
732 enum AssertionNodeType { | 838 enum AssertionNodeType { |
733 AT_END, | 839 AT_END, |
734 AT_START, | 840 AT_START, |
735 AT_BOUNDARY, | 841 AT_BOUNDARY, |
736 AT_NON_BOUNDARY, | 842 AT_NON_BOUNDARY, |
737 AFTER_NEWLINE | 843 AFTER_NEWLINE, |
| 844 // Types not directly expressible in regexp syntax. |
| 845 // Used for modifying a boundary node if its following character is |
| 846 // known to be word and/or non-word. |
| 847 AFTER_NONWORD_CHARACTER, |
| 848 AFTER_WORD_CHARACTER |
738 }; | 849 }; |
739 static AssertionNode* AtEnd(RegExpNode* on_success) { | 850 static AssertionNode* AtEnd(RegExpNode* on_success) { |
740 return new AssertionNode(AT_END, on_success); | 851 return new AssertionNode(AT_END, on_success); |
741 } | 852 } |
742 static AssertionNode* AtStart(RegExpNode* on_success) { | 853 static AssertionNode* AtStart(RegExpNode* on_success) { |
743 return new AssertionNode(AT_START, on_success); | 854 return new AssertionNode(AT_START, on_success); |
744 } | 855 } |
745 static AssertionNode* AtBoundary(RegExpNode* on_success) { | 856 static AssertionNode* AtBoundary(RegExpNode* on_success) { |
746 return new AssertionNode(AT_BOUNDARY, on_success); | 857 return new AssertionNode(AT_BOUNDARY, on_success); |
747 } | 858 } |
748 static AssertionNode* AtNonBoundary(RegExpNode* on_success) { | 859 static AssertionNode* AtNonBoundary(RegExpNode* on_success) { |
749 return new AssertionNode(AT_NON_BOUNDARY, on_success); | 860 return new AssertionNode(AT_NON_BOUNDARY, on_success); |
750 } | 861 } |
751 static AssertionNode* AfterNewline(RegExpNode* on_success) { | 862 static AssertionNode* AfterNewline(RegExpNode* on_success) { |
752 return new AssertionNode(AFTER_NEWLINE, on_success); | 863 return new AssertionNode(AFTER_NEWLINE, on_success); |
753 } | 864 } |
754 virtual void Accept(NodeVisitor* visitor); | 865 virtual void Accept(NodeVisitor* visitor); |
755 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 866 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
756 virtual int EatsAtLeast(int still_to_find, int recursion_depth); | 867 virtual int EatsAtLeast(int still_to_find, int recursion_depth); |
757 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 868 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
758 RegExpCompiler* compiler, | 869 RegExpCompiler* compiler, |
759 int filled_in, | 870 int filled_in, |
760 bool not_at_start); | 871 bool not_at_start); |
| 872 virtual int ComputeFirstCharacterSet(int budget); |
761 virtual AssertionNode* Clone() { return new AssertionNode(*this); } | 873 virtual AssertionNode* Clone() { return new AssertionNode(*this); } |
762 AssertionNodeType type() { return type_; } | 874 AssertionNodeType type() { return type_; } |
| 875 void set_type(AssertionNodeType type) { type_ = type; } |
763 private: | 876 private: |
764 AssertionNode(AssertionNodeType t, RegExpNode* on_success) | 877 AssertionNode(AssertionNodeType t, RegExpNode* on_success) |
765 : SeqRegExpNode(on_success), type_(t) { } | 878 : SeqRegExpNode(on_success), type_(t) { } |
766 AssertionNodeType type_; | 879 AssertionNodeType type_; |
767 }; | 880 }; |
768 | 881 |
769 | 882 |
770 class BackReferenceNode: public SeqRegExpNode { | 883 class BackReferenceNode: public SeqRegExpNode { |
771 public: | 884 public: |
772 BackReferenceNode(int start_reg, | 885 BackReferenceNode(int start_reg, |
773 int end_reg, | 886 int end_reg, |
774 RegExpNode* on_success) | 887 RegExpNode* on_success) |
775 : SeqRegExpNode(on_success), | 888 : SeqRegExpNode(on_success), |
776 start_reg_(start_reg), | 889 start_reg_(start_reg), |
777 end_reg_(end_reg) { } | 890 end_reg_(end_reg) { } |
778 virtual void Accept(NodeVisitor* visitor); | 891 virtual void Accept(NodeVisitor* visitor); |
779 int start_register() { return start_reg_; } | 892 int start_register() { return start_reg_; } |
780 int end_register() { return end_reg_; } | 893 int end_register() { return end_reg_; } |
781 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 894 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
782 virtual int EatsAtLeast(int still_to_find, int recursion_depth); | 895 virtual int EatsAtLeast(int still_to_find, int recursion_depth); |
783 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 896 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
784 RegExpCompiler* compiler, | 897 RegExpCompiler* compiler, |
785 int characters_filled_in, | 898 int characters_filled_in, |
786 bool not_at_start) { | 899 bool not_at_start) { |
787 return; | 900 return; |
788 } | 901 } |
789 virtual BackReferenceNode* Clone() { return new BackReferenceNode(*this); } | 902 virtual BackReferenceNode* Clone() { return new BackReferenceNode(*this); } |
790 | 903 virtual int ComputeFirstCharacterSet(int budget); |
791 private: | 904 private: |
792 int start_reg_; | 905 int start_reg_; |
793 int end_reg_; | 906 int end_reg_; |
794 }; | 907 }; |
795 | 908 |
796 | 909 |
797 class EndNode: public RegExpNode { | 910 class EndNode: public RegExpNode { |
798 public: | 911 public: |
799 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; | 912 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; |
800 explicit EndNode(Action action) : action_(action) { } | 913 explicit EndNode(Action action) : action_(action) { } |
801 virtual void Accept(NodeVisitor* visitor); | 914 virtual void Accept(NodeVisitor* visitor); |
802 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 915 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
803 virtual int EatsAtLeast(int still_to_find, int recursion_depth) { return 0; } | 916 virtual int EatsAtLeast(int still_to_find, int recursion_depth) { return 0; } |
804 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 917 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
805 RegExpCompiler* compiler, | 918 RegExpCompiler* compiler, |
806 int characters_filled_in, | 919 int characters_filled_in, |
807 bool not_at_start) { | 920 bool not_at_start) { |
808 // Returning 0 from EatsAtLeast should ensure we never get here. | 921 // Returning 0 from EatsAtLeast should ensure we never get here. |
809 UNREACHABLE(); | 922 UNREACHABLE(); |
810 } | 923 } |
811 virtual EndNode* Clone() { return new EndNode(*this); } | 924 virtual EndNode* Clone() { return new EndNode(*this); } |
812 | |
813 private: | 925 private: |
814 Action action_; | 926 Action action_; |
815 }; | 927 }; |
816 | 928 |
817 | 929 |
818 class NegativeSubmatchSuccess: public EndNode { | 930 class NegativeSubmatchSuccess: public EndNode { |
819 public: | 931 public: |
820 NegativeSubmatchSuccess(int stack_pointer_reg, | 932 NegativeSubmatchSuccess(int stack_pointer_reg, |
821 int position_reg, | 933 int position_reg, |
822 int clear_capture_count, | 934 int clear_capture_count, |
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
936 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1048 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
937 RegExpCompiler* compiler, | 1049 RegExpCompiler* compiler, |
938 int characters_filled_in, | 1050 int characters_filled_in, |
939 bool not_at_start); | 1051 bool not_at_start); |
940 // For a negative lookahead we don't emit the quick check for the | 1052 // For a negative lookahead we don't emit the quick check for the |
941 // alternative that is expected to fail. This is because quick check code | 1053 // alternative that is expected to fail. This is because quick check code |
942 // starts by loading enough characters for the alternative that takes fewest | 1054 // starts by loading enough characters for the alternative that takes fewest |
943 // characters, but on a negative lookahead the negative branch did not take | 1055 // characters, but on a negative lookahead the negative branch did not take |
944 // part in that calculation (EatsAtLeast) so the assumptions don't hold. | 1056 // part in that calculation (EatsAtLeast) so the assumptions don't hold. |
945 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; } | 1057 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; } |
| 1058 virtual int ComputeFirstCharacterSet(int budget); |
946 }; | 1059 }; |
947 | 1060 |
948 | 1061 |
949 class LoopChoiceNode: public ChoiceNode { | 1062 class LoopChoiceNode: public ChoiceNode { |
950 public: | 1063 public: |
951 explicit LoopChoiceNode(bool body_can_be_zero_length) | 1064 explicit LoopChoiceNode(bool body_can_be_zero_length) |
952 : ChoiceNode(2), | 1065 : ChoiceNode(2), |
953 loop_node_(NULL), | 1066 loop_node_(NULL), |
954 continue_node_(NULL), | 1067 continue_node_(NULL), |
955 body_can_be_zero_length_(body_can_be_zero_length) { } | 1068 body_can_be_zero_length_(body_can_be_zero_length) { } |
956 void AddLoopAlternative(GuardedAlternative alt); | 1069 void AddLoopAlternative(GuardedAlternative alt); |
957 void AddContinueAlternative(GuardedAlternative alt); | 1070 void AddContinueAlternative(GuardedAlternative alt); |
958 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 1071 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
959 virtual int EatsAtLeast(int still_to_find, int recursion_depth); | 1072 virtual int EatsAtLeast(int still_to_find, int recursion_depth); |
960 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1073 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
961 RegExpCompiler* compiler, | 1074 RegExpCompiler* compiler, |
962 int characters_filled_in, | 1075 int characters_filled_in, |
963 bool not_at_start); | 1076 bool not_at_start); |
| 1077 virtual int ComputeFirstCharacterSet(int budget); |
964 virtual LoopChoiceNode* Clone() { return new LoopChoiceNode(*this); } | 1078 virtual LoopChoiceNode* Clone() { return new LoopChoiceNode(*this); } |
965 RegExpNode* loop_node() { return loop_node_; } | 1079 RegExpNode* loop_node() { return loop_node_; } |
966 RegExpNode* continue_node() { return continue_node_; } | 1080 RegExpNode* continue_node() { return continue_node_; } |
967 bool body_can_be_zero_length() { return body_can_be_zero_length_; } | 1081 bool body_can_be_zero_length() { return body_can_be_zero_length_; } |
968 virtual void Accept(NodeVisitor* visitor); | 1082 virtual void Accept(NodeVisitor* visitor); |
969 | 1083 |
970 private: | 1084 private: |
971 // AddAlternative is made private for loop nodes because alternatives | 1085 // AddAlternative is made private for loop nodes because alternatives |
972 // should not be added freely, we need to keep track of which node | 1086 // should not be added freely, we need to keep track of which node |
973 // goes back to the node itself. | 1087 // goes back to the node itself. |
(...skipping 135 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1109 // These set methods and AdvanceCurrentPositionInTrace should be used only on | 1223 // These set methods and AdvanceCurrentPositionInTrace should be used only on |
1110 // new traces - the intention is that traces are immutable after creation. | 1224 // new traces - the intention is that traces are immutable after creation. |
1111 void add_action(DeferredAction* new_action) { | 1225 void add_action(DeferredAction* new_action) { |
1112 ASSERT(new_action->next_ == NULL); | 1226 ASSERT(new_action->next_ == NULL); |
1113 new_action->next_ = actions_; | 1227 new_action->next_ = actions_; |
1114 actions_ = new_action; | 1228 actions_ = new_action; |
1115 } | 1229 } |
1116 void set_backtrack(Label* backtrack) { backtrack_ = backtrack; } | 1230 void set_backtrack(Label* backtrack) { backtrack_ = backtrack; } |
1117 void set_stop_node(RegExpNode* node) { stop_node_ = node; } | 1231 void set_stop_node(RegExpNode* node) { stop_node_ = node; } |
1118 void set_loop_label(Label* label) { loop_label_ = label; } | 1232 void set_loop_label(Label* label) { loop_label_ = label; } |
1119 void set_characters_preloaded(int cpre) { characters_preloaded_ = cpre; } | 1233 void set_characters_preloaded(int count) { characters_preloaded_ = count; } |
1120 void set_bound_checked_up_to(int to) { bound_checked_up_to_ = to; } | 1234 void set_bound_checked_up_to(int to) { bound_checked_up_to_ = to; } |
1121 void set_flush_budget(int to) { flush_budget_ = to; } | 1235 void set_flush_budget(int to) { flush_budget_ = to; } |
1122 void set_quick_check_performed(QuickCheckDetails* d) { | 1236 void set_quick_check_performed(QuickCheckDetails* d) { |
1123 quick_check_performed_ = *d; | 1237 quick_check_performed_ = *d; |
1124 } | 1238 } |
1125 void InvalidateCurrentCharacter(); | 1239 void InvalidateCurrentCharacter(); |
1126 void AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler); | 1240 void AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler); |
1127 private: | 1241 private: |
1128 int FindAffectedRegisters(OutSet* affected_registers); | 1242 int FindAffectedRegisters(OutSet* affected_registers); |
1129 void PerformDeferredActions(RegExpMacroAssembler* macro, | 1243 void PerformDeferredActions(RegExpMacroAssembler* macro, |
(...skipping 142 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1272 Handle<String> pattern, | 1386 Handle<String> pattern, |
1273 bool is_ascii); | 1387 bool is_ascii); |
1274 | 1388 |
1275 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); | 1389 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); |
1276 }; | 1390 }; |
1277 | 1391 |
1278 | 1392 |
1279 } } // namespace v8::internal | 1393 } } // namespace v8::internal |
1280 | 1394 |
1281 #endif // V8_JSREGEXP_H_ | 1395 #endif // V8_JSREGEXP_H_ |
OLD | NEW |