| OLD | NEW |
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 160 // A one element cache of the last utf8_subject string and its length. The | 160 // A one element cache of the last utf8_subject string and its length. The |
| 161 // subject JS String object is cached in the heap. We also cache a | 161 // subject JS String object is cached in the heap. We also cache a |
| 162 // translation between position and utf8 position. | 162 // translation between position and utf8 position. |
| 163 static char* utf8_subject_cache_; | 163 static char* utf8_subject_cache_; |
| 164 static int utf8_length_cache_; | 164 static int utf8_length_cache_; |
| 165 static int utf8_position_; | 165 static int utf8_position_; |
| 166 static int character_position_; | 166 static int character_position_; |
| 167 }; | 167 }; |
| 168 | 168 |
| 169 | 169 |
| 170 // Represents the location of one element relative to the intersection of |
| 171 // two sets. Corresponds to the four areas of a Venn diagram. |
| 172 enum ElementInSetsRelation { |
| 173 kInsideNone = 0, |
| 174 kInsideFirst = 1, |
| 175 kInsideSecond = 2, |
| 176 kInsideBoth = 3 |
| 177 }; |
| 178 |
| 179 |
| 180 // Represents the relation of two sets. |
| 181 // Sets can be either disjoint, partially or fully overlapping, or equal. |
| 182 class SetRelation BASE_EMBEDDED { |
| 183 public: |
| 184 // Relation is represented by a bit saying whether there are elements in |
| 185 // one set that is not in the other, and a bit saying that there are elements |
| 186 // that are in both sets. |
| 187 |
| 188 // Location of an element. Corresponds to the internal areas of |
| 189 // a Venn diagram. |
| 190 enum { |
| 191 kInFirst = 1 << kInsideFirst, |
| 192 kInSecond = 1 << kInsideSecond, |
| 193 kInBoth = 1 << kInsideBoth |
| 194 }; |
| 195 SetRelation() : bits_(0) {} |
| 196 ~SetRelation() {} |
| 197 // Add the existence of objects in a particular |
| 198 void SetElementsInFirstSet() { bits_ |= kInFirst; } |
| 199 void SetElementsInSecondSet() { bits_ |= kInSecond; } |
| 200 void SetElementsInBothSets() { bits_ |= kInBoth; } |
| 201 // Check the currently known relation of the sets (common functions only, |
| 202 // for other combinations, use value() to get the bits and check them |
| 203 // manually). |
| 204 // Sets are completely disjoint. |
| 205 bool Disjoint() { return (bits_ & kInBoth) == 0; } |
| 206 // Sets are equal. |
| 207 bool Equals() { return (bits_ & (kInFirst | kInSecond)) == 0; } |
| 208 // First set contains second. |
| 209 bool Contains() { return (bits_ & kInSecond) == 0; } |
| 210 // Second set contains first. |
| 211 bool ContainedIn() { return (bits_ & kInFirst) == 0; } |
| 212 bool NonTrivialIntersection() { |
| 213 return (bits_ == (kInFirst | kInSecond | kInBoth)); |
| 214 } |
| 215 int value() { return bits_; } |
| 216 private: |
| 217 int bits_; |
| 218 }; |
| 219 |
| 220 |
| 170 class CharacterRange { | 221 class CharacterRange { |
| 171 public: | 222 public: |
| 172 CharacterRange() : from_(0), to_(0) { } | 223 CharacterRange() : from_(0), to_(0) { } |
| 173 // For compatibility with the CHECK_OK macro | 224 // For compatibility with the CHECK_OK macro |
| 174 CharacterRange(void* null) { ASSERT_EQ(NULL, null); } //NOLINT | 225 CharacterRange(void* null) { ASSERT_EQ(NULL, null); } //NOLINT |
| 175 CharacterRange(uc16 from, uc16 to) : from_(from), to_(to) { } | 226 CharacterRange(uc16 from, uc16 to) : from_(from), to_(to) { } |
| 176 static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges); | 227 static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges); |
| 177 static Vector<const uc16> GetWordBounds(); | 228 static Vector<const uc16> GetWordBounds(); |
| 178 static inline CharacterRange Singleton(uc16 value) { | 229 static inline CharacterRange Singleton(uc16 value) { |
| 179 return CharacterRange(value, value); | 230 return CharacterRange(value, value); |
| (...skipping 11 matching lines...) Expand all Loading... |
| 191 uc16 to() const { return to_; } | 242 uc16 to() const { return to_; } |
| 192 void set_to(uc16 value) { to_ = value; } | 243 void set_to(uc16 value) { to_ = value; } |
| 193 bool is_valid() { return from_ <= to_; } | 244 bool is_valid() { return from_ <= to_; } |
| 194 bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; } | 245 bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; } |
| 195 bool IsSingleton() { return (from_ == to_); } | 246 bool IsSingleton() { return (from_ == to_); } |
| 196 void AddCaseEquivalents(ZoneList<CharacterRange>* ranges, bool is_ascii); | 247 void AddCaseEquivalents(ZoneList<CharacterRange>* ranges, bool is_ascii); |
| 197 static void Split(ZoneList<CharacterRange>* base, | 248 static void Split(ZoneList<CharacterRange>* base, |
| 198 Vector<const uc16> overlay, | 249 Vector<const uc16> overlay, |
| 199 ZoneList<CharacterRange>** included, | 250 ZoneList<CharacterRange>** included, |
| 200 ZoneList<CharacterRange>** excluded); | 251 ZoneList<CharacterRange>** excluded); |
| 201 | 252 // Whether a range list is in canonical form: Ranges ordered by from value, |
| 253 // and ranges non-overlapping and non-adjacent. |
| 254 static bool IsCanonical(ZoneList<CharacterRange>* ranges); |
| 255 // Convert range list to canonical form. The characters covered by the ranges |
| 256 // will still be the same, but no character is in more than one range, and |
| 257 // adjacent ranges are merged. The resulting list may be shorter than the |
| 258 // original, but cannot be longer. |
| 259 static void Canonicalize(ZoneList<CharacterRange>* ranges); |
| 260 // Check how the set of characters defined by a CharacterRange list relates |
| 261 // to the set of word characters. List must be in canonical form. |
| 262 static SetRelation WordCharacterRelation(ZoneList<CharacterRange>* ranges); |
| 263 // Takes two character range lists (representing character sets) in canonical |
| 264 // form and merges them. |
| 265 // The characters that are only covered by the first set are added to |
| 266 // first_set_only_out. the characters that are only in the second set are |
| 267 // added to second_set_only_out, and the characters that are in both are |
| 268 // added to both_sets_out. |
| 269 // The pointers to first_set_only_out, second_set_only_out and both_sets_out |
| 270 // should be to empty lists, but they need not be distinct, and may be NULL. |
| 271 // If NULL, the characters are dropped, and if two arguments are the same |
| 272 // pointer, the result is the union of the two sets that would be created |
| 273 // if the pointers had been distinct. |
| 274 // This way, the Merge function can compute all the usual set operations: |
| 275 // union (all three out-sets are equal), intersection (only both_sets_out is |
| 276 // non-NULL), and set difference (only first_set is non-NULL). |
| 277 static void Merge(ZoneList<CharacterRange>* first_set, |
| 278 ZoneList<CharacterRange>* second_set, |
| 279 ZoneList<CharacterRange>* first_set_only_out, |
| 280 ZoneList<CharacterRange>* second_set_only_out, |
| 281 ZoneList<CharacterRange>* both_sets_out); |
| 282 // Negate the contents of a character range in canonical form. |
| 283 static void Negate(ZoneList<CharacterRange>* src, |
| 284 ZoneList<CharacterRange>* dst); |
| 202 static const int kRangeCanonicalizeMax = 0x346; | 285 static const int kRangeCanonicalizeMax = 0x346; |
| 203 static const int kStartMarker = (1 << 24); | 286 static const int kStartMarker = (1 << 24); |
| 204 static const int kPayloadMask = (1 << 24) - 1; | 287 static const int kPayloadMask = (1 << 24) - 1; |
| 205 | 288 |
| 206 private: | 289 private: |
| 207 uc16 from_; | 290 uc16 from_; |
| 208 uc16 to_; | 291 uc16 to_; |
| 209 }; | 292 }; |
| 210 | 293 |
| 211 | 294 |
| (...skipping 253 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 465 uint32_t mask_; | 548 uint32_t mask_; |
| 466 uint32_t value_; | 549 uint32_t value_; |
| 467 // If set to true, there is no way this quick check can match at all. | 550 // If set to true, there is no way this quick check can match at all. |
| 468 // E.g., if it requires to be at the start of the input, and isn't. | 551 // E.g., if it requires to be at the start of the input, and isn't. |
| 469 bool cannot_match_; | 552 bool cannot_match_; |
| 470 }; | 553 }; |
| 471 | 554 |
| 472 | 555 |
| 473 class RegExpNode: public ZoneObject { | 556 class RegExpNode: public ZoneObject { |
| 474 public: | 557 public: |
| 475 RegExpNode() : trace_count_(0) { } | 558 RegExpNode() : first_character_set_(NULL), trace_count_(0) { } |
| 476 virtual ~RegExpNode(); | 559 virtual ~RegExpNode(); |
| 477 virtual void Accept(NodeVisitor* visitor) = 0; | 560 virtual void Accept(NodeVisitor* visitor) = 0; |
| 478 // Generates a goto to this node or actually generates the code at this point. | 561 // Generates a goto to this node or actually generates the code at this point. |
| 479 virtual void Emit(RegExpCompiler* compiler, Trace* trace) = 0; | 562 virtual void Emit(RegExpCompiler* compiler, Trace* trace) = 0; |
| 480 // How many characters must this node consume at a minimum in order to | 563 // How many characters must this node consume at a minimum in order to |
| 481 // succeed. If we have found at least 'still_to_find' characters that | 564 // succeed. If we have found at least 'still_to_find' characters that |
| 482 // must be consumed there is no need to ask any following nodes whether | 565 // must be consumed there is no need to ask any following nodes whether |
| 483 // they are sure to eat any more characters. | 566 // they are sure to eat any more characters. |
| 484 virtual int EatsAtLeast(int still_to_find, int recursion_depth) = 0; | 567 virtual int EatsAtLeast(int still_to_find, int recursion_depth) = 0; |
| 485 // Emits some quick code that checks whether the preloaded characters match. | 568 // Emits some quick code that checks whether the preloaded characters match. |
| (...skipping 30 matching lines...) Expand all Loading... |
| 516 // Static version of EnsureSibling that expresses the fact that the | 599 // Static version of EnsureSibling that expresses the fact that the |
| 517 // result has the same type as the input. | 600 // result has the same type as the input. |
| 518 template <class C> | 601 template <class C> |
| 519 static C* EnsureSibling(C* node, NodeInfo* info, bool* cloned) { | 602 static C* EnsureSibling(C* node, NodeInfo* info, bool* cloned) { |
| 520 return static_cast<C*>(node->EnsureSibling(info, cloned)); | 603 return static_cast<C*>(node->EnsureSibling(info, cloned)); |
| 521 } | 604 } |
| 522 | 605 |
| 523 SiblingList* siblings() { return &siblings_; } | 606 SiblingList* siblings() { return &siblings_; } |
| 524 void set_siblings(SiblingList* other) { siblings_ = *other; } | 607 void set_siblings(SiblingList* other) { siblings_ = *other; } |
| 525 | 608 |
| 609 // Return the set of possible next characters recognized by the regexp |
| 610 // (or a safe subset, potentially the set of all characters). |
| 611 ZoneList<CharacterRange>* FirstCharacterSet(); |
| 612 |
| 613 // Compute (if possible within the budget of traversed nodes) the |
| 614 // possible first characters of the input matched by this node and |
| 615 // its continuation. Returns the remaining budget after the computation. |
| 616 // If the budget is spent, the result is negative, and the cached |
| 617 // first_character_set_ value isn't set. |
| 618 virtual int ComputeFirstCharacterSet(int budget); |
| 619 |
| 620 // Get and set the cached first character set value. |
| 621 ZoneList<CharacterRange>* first_character_set() { |
| 622 return first_character_set_; |
| 623 } |
| 624 void set_first_character_set(ZoneList<CharacterRange>* character_set) { |
| 625 first_character_set_ = character_set; |
| 626 } |
| 627 |
| 526 protected: | 628 protected: |
| 527 enum LimitResult { DONE, CONTINUE }; | 629 enum LimitResult { DONE, CONTINUE }; |
| 630 static const int kComputeFirstCharacterSetFail = -1; |
| 631 |
| 528 LimitResult LimitVersions(RegExpCompiler* compiler, Trace* trace); | 632 LimitResult LimitVersions(RegExpCompiler* compiler, Trace* trace); |
| 529 | 633 |
| 530 // Returns a sibling of this node whose interests and assumptions | 634 // Returns a sibling of this node whose interests and assumptions |
| 531 // match the ones in the given node info. If no sibling exists NULL | 635 // match the ones in the given node info. If no sibling exists NULL |
| 532 // is returned. | 636 // is returned. |
| 533 RegExpNode* TryGetSibling(NodeInfo* info); | 637 RegExpNode* TryGetSibling(NodeInfo* info); |
| 534 | 638 |
| 535 // Returns a sibling of this node whose interests match the ones in | 639 // Returns a sibling of this node whose interests match the ones in |
| 536 // the given node info. The info must not contain any assertions. | 640 // the given node info. The info must not contain any assertions. |
| 537 // If no node exists a new one will be created by cloning the current | 641 // If no node exists a new one will be created by cloning the current |
| 538 // node. The result will always be an instance of the same concrete | 642 // node. The result will always be an instance of the same concrete |
| 539 // class as this node. | 643 // class as this node. |
| 540 RegExpNode* EnsureSibling(NodeInfo* info, bool* cloned); | 644 RegExpNode* EnsureSibling(NodeInfo* info, bool* cloned); |
| 541 | 645 |
| 542 // Returns a clone of this node initialized using the copy constructor | 646 // Returns a clone of this node initialized using the copy constructor |
| 543 // of its concrete class. Note that the node may have to be pre- | 647 // of its concrete class. Note that the node may have to be pre- |
| 544 // processed before it is on a usable state. | 648 // processed before it is on a usable state. |
| 545 virtual RegExpNode* Clone() = 0; | 649 virtual RegExpNode* Clone() = 0; |
| 546 | 650 |
| 547 private: | 651 private: |
| 652 static const int kFirstCharBudget = 10; |
| 548 Label label_; | 653 Label label_; |
| 549 NodeInfo info_; | 654 NodeInfo info_; |
| 550 SiblingList siblings_; | 655 SiblingList siblings_; |
| 656 ZoneList<CharacterRange>* first_character_set_; |
| 551 // This variable keeps track of how many times code has been generated for | 657 // This variable keeps track of how many times code has been generated for |
| 552 // this node (in different traces). We don't keep track of where the | 658 // this node (in different traces). We don't keep track of where the |
| 553 // generated code is located unless the code is generated at the start of | 659 // generated code is located unless the code is generated at the start of |
| 554 // a trace, in which case it is generic and can be reused by flushing the | 660 // a trace, in which case it is generic and can be reused by flushing the |
| 555 // deferred operations in the current trace and generating a goto. | 661 // deferred operations in the current trace and generating a goto. |
| 556 int trace_count_; | 662 int trace_count_; |
| 557 }; | 663 }; |
| 558 | 664 |
| 559 | 665 |
| 560 // A simple closed interval. | 666 // A simple closed interval. |
| (...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 631 RegExpCompiler* compiler, | 737 RegExpCompiler* compiler, |
| 632 int filled_in, | 738 int filled_in, |
| 633 bool not_at_start) { | 739 bool not_at_start) { |
| 634 return on_success()->GetQuickCheckDetails( | 740 return on_success()->GetQuickCheckDetails( |
| 635 details, compiler, filled_in, not_at_start); | 741 details, compiler, filled_in, not_at_start); |
| 636 } | 742 } |
| 637 Type type() { return type_; } | 743 Type type() { return type_; } |
| 638 // TODO(erikcorry): We should allow some action nodes in greedy loops. | 744 // TODO(erikcorry): We should allow some action nodes in greedy loops. |
| 639 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } | 745 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } |
| 640 virtual ActionNode* Clone() { return new ActionNode(*this); } | 746 virtual ActionNode* Clone() { return new ActionNode(*this); } |
| 641 | 747 virtual int ComputeFirstCharacterSet(int budget); |
| 642 private: | 748 private: |
| 643 union { | 749 union { |
| 644 struct { | 750 struct { |
| 645 int reg; | 751 int reg; |
| 646 int value; | 752 int value; |
| 647 } u_store_register; | 753 } u_store_register; |
| 648 struct { | 754 struct { |
| 649 int reg; | 755 int reg; |
| 650 } u_increment_register; | 756 } u_increment_register; |
| 651 struct { | 757 struct { |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 697 bool not_at_start); | 803 bool not_at_start); |
| 698 ZoneList<TextElement>* elements() { return elms_; } | 804 ZoneList<TextElement>* elements() { return elms_; } |
| 699 void MakeCaseIndependent(bool is_ascii); | 805 void MakeCaseIndependent(bool is_ascii); |
| 700 virtual int GreedyLoopTextLength(); | 806 virtual int GreedyLoopTextLength(); |
| 701 virtual TextNode* Clone() { | 807 virtual TextNode* Clone() { |
| 702 TextNode* result = new TextNode(*this); | 808 TextNode* result = new TextNode(*this); |
| 703 result->CalculateOffsets(); | 809 result->CalculateOffsets(); |
| 704 return result; | 810 return result; |
| 705 } | 811 } |
| 706 void CalculateOffsets(); | 812 void CalculateOffsets(); |
| 707 | 813 virtual int ComputeFirstCharacterSet(int budget); |
| 708 private: | 814 private: |
| 709 enum TextEmitPassType { | 815 enum TextEmitPassType { |
| 710 NON_ASCII_MATCH, // Check for characters that can't match. | 816 NON_ASCII_MATCH, // Check for characters that can't match. |
| 711 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check. | 817 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check. |
| 712 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs. | 818 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs. |
| 713 CASE_CHARACTER_MATCH, // Case-independent single character check. | 819 CASE_CHARACTER_MATCH, // Case-independent single character check. |
| 714 CHARACTER_CLASS_MATCH // Character class. | 820 CHARACTER_CLASS_MATCH // Character class. |
| 715 }; | 821 }; |
| 716 static bool SkipPass(int pass, bool ignore_case); | 822 static bool SkipPass(int pass, bool ignore_case); |
| 717 static const int kFirstRealPass = SIMPLE_CHARACTER_MATCH; | 823 static const int kFirstRealPass = SIMPLE_CHARACTER_MATCH; |
| 718 static const int kLastPass = CHARACTER_CLASS_MATCH; | 824 static const int kLastPass = CHARACTER_CLASS_MATCH; |
| 719 void TextEmitPass(RegExpCompiler* compiler, | 825 void TextEmitPass(RegExpCompiler* compiler, |
| 720 TextEmitPassType pass, | 826 TextEmitPassType pass, |
| 721 bool preloaded, | 827 bool preloaded, |
| 722 Trace* trace, | 828 Trace* trace, |
| 723 bool first_element_checked, | 829 bool first_element_checked, |
| 724 int* checked_up_to); | 830 int* checked_up_to); |
| 725 int Length(); | 831 int Length(); |
| 726 ZoneList<TextElement>* elms_; | 832 ZoneList<TextElement>* elms_; |
| 727 }; | 833 }; |
| 728 | 834 |
| 729 | 835 |
| 730 class AssertionNode: public SeqRegExpNode { | 836 class AssertionNode: public SeqRegExpNode { |
| 731 public: | 837 public: |
| 732 enum AssertionNodeType { | 838 enum AssertionNodeType { |
| 733 AT_END, | 839 AT_END, |
| 734 AT_START, | 840 AT_START, |
| 735 AT_BOUNDARY, | 841 AT_BOUNDARY, |
| 736 AT_NON_BOUNDARY, | 842 AT_NON_BOUNDARY, |
| 737 AFTER_NEWLINE | 843 AFTER_NEWLINE, |
| 844 // Types not directly expressible in regexp syntax. |
| 845 // Used for modifying a boundary node if its following character is |
| 846 // known to be word and/or non-word. |
| 847 AFTER_NONWORD_CHARACTER, |
| 848 AFTER_WORD_CHARACTER |
| 738 }; | 849 }; |
| 739 static AssertionNode* AtEnd(RegExpNode* on_success) { | 850 static AssertionNode* AtEnd(RegExpNode* on_success) { |
| 740 return new AssertionNode(AT_END, on_success); | 851 return new AssertionNode(AT_END, on_success); |
| 741 } | 852 } |
| 742 static AssertionNode* AtStart(RegExpNode* on_success) { | 853 static AssertionNode* AtStart(RegExpNode* on_success) { |
| 743 return new AssertionNode(AT_START, on_success); | 854 return new AssertionNode(AT_START, on_success); |
| 744 } | 855 } |
| 745 static AssertionNode* AtBoundary(RegExpNode* on_success) { | 856 static AssertionNode* AtBoundary(RegExpNode* on_success) { |
| 746 return new AssertionNode(AT_BOUNDARY, on_success); | 857 return new AssertionNode(AT_BOUNDARY, on_success); |
| 747 } | 858 } |
| 748 static AssertionNode* AtNonBoundary(RegExpNode* on_success) { | 859 static AssertionNode* AtNonBoundary(RegExpNode* on_success) { |
| 749 return new AssertionNode(AT_NON_BOUNDARY, on_success); | 860 return new AssertionNode(AT_NON_BOUNDARY, on_success); |
| 750 } | 861 } |
| 751 static AssertionNode* AfterNewline(RegExpNode* on_success) { | 862 static AssertionNode* AfterNewline(RegExpNode* on_success) { |
| 752 return new AssertionNode(AFTER_NEWLINE, on_success); | 863 return new AssertionNode(AFTER_NEWLINE, on_success); |
| 753 } | 864 } |
| 754 virtual void Accept(NodeVisitor* visitor); | 865 virtual void Accept(NodeVisitor* visitor); |
| 755 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 866 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 756 virtual int EatsAtLeast(int still_to_find, int recursion_depth); | 867 virtual int EatsAtLeast(int still_to_find, int recursion_depth); |
| 757 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 868 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 758 RegExpCompiler* compiler, | 869 RegExpCompiler* compiler, |
| 759 int filled_in, | 870 int filled_in, |
| 760 bool not_at_start); | 871 bool not_at_start); |
| 872 virtual int ComputeFirstCharacterSet(int budget); |
| 761 virtual AssertionNode* Clone() { return new AssertionNode(*this); } | 873 virtual AssertionNode* Clone() { return new AssertionNode(*this); } |
| 762 AssertionNodeType type() { return type_; } | 874 AssertionNodeType type() { return type_; } |
| 875 void set_type(AssertionNodeType type) { type_ = type; } |
| 763 private: | 876 private: |
| 764 AssertionNode(AssertionNodeType t, RegExpNode* on_success) | 877 AssertionNode(AssertionNodeType t, RegExpNode* on_success) |
| 765 : SeqRegExpNode(on_success), type_(t) { } | 878 : SeqRegExpNode(on_success), type_(t) { } |
| 766 AssertionNodeType type_; | 879 AssertionNodeType type_; |
| 767 }; | 880 }; |
| 768 | 881 |
| 769 | 882 |
| 770 class BackReferenceNode: public SeqRegExpNode { | 883 class BackReferenceNode: public SeqRegExpNode { |
| 771 public: | 884 public: |
| 772 BackReferenceNode(int start_reg, | 885 BackReferenceNode(int start_reg, |
| 773 int end_reg, | 886 int end_reg, |
| 774 RegExpNode* on_success) | 887 RegExpNode* on_success) |
| 775 : SeqRegExpNode(on_success), | 888 : SeqRegExpNode(on_success), |
| 776 start_reg_(start_reg), | 889 start_reg_(start_reg), |
| 777 end_reg_(end_reg) { } | 890 end_reg_(end_reg) { } |
| 778 virtual void Accept(NodeVisitor* visitor); | 891 virtual void Accept(NodeVisitor* visitor); |
| 779 int start_register() { return start_reg_; } | 892 int start_register() { return start_reg_; } |
| 780 int end_register() { return end_reg_; } | 893 int end_register() { return end_reg_; } |
| 781 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 894 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 782 virtual int EatsAtLeast(int still_to_find, int recursion_depth); | 895 virtual int EatsAtLeast(int still_to_find, int recursion_depth); |
| 783 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 896 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 784 RegExpCompiler* compiler, | 897 RegExpCompiler* compiler, |
| 785 int characters_filled_in, | 898 int characters_filled_in, |
| 786 bool not_at_start) { | 899 bool not_at_start) { |
| 787 return; | 900 return; |
| 788 } | 901 } |
| 789 virtual BackReferenceNode* Clone() { return new BackReferenceNode(*this); } | 902 virtual BackReferenceNode* Clone() { return new BackReferenceNode(*this); } |
| 790 | 903 virtual int ComputeFirstCharacterSet(int budget); |
| 791 private: | 904 private: |
| 792 int start_reg_; | 905 int start_reg_; |
| 793 int end_reg_; | 906 int end_reg_; |
| 794 }; | 907 }; |
| 795 | 908 |
| 796 | 909 |
| 797 class EndNode: public RegExpNode { | 910 class EndNode: public RegExpNode { |
| 798 public: | 911 public: |
| 799 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; | 912 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; |
| 800 explicit EndNode(Action action) : action_(action) { } | 913 explicit EndNode(Action action) : action_(action) { } |
| 801 virtual void Accept(NodeVisitor* visitor); | 914 virtual void Accept(NodeVisitor* visitor); |
| 802 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 915 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 803 virtual int EatsAtLeast(int still_to_find, int recursion_depth) { return 0; } | 916 virtual int EatsAtLeast(int still_to_find, int recursion_depth) { return 0; } |
| 804 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 917 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 805 RegExpCompiler* compiler, | 918 RegExpCompiler* compiler, |
| 806 int characters_filled_in, | 919 int characters_filled_in, |
| 807 bool not_at_start) { | 920 bool not_at_start) { |
| 808 // Returning 0 from EatsAtLeast should ensure we never get here. | 921 // Returning 0 from EatsAtLeast should ensure we never get here. |
| 809 UNREACHABLE(); | 922 UNREACHABLE(); |
| 810 } | 923 } |
| 811 virtual EndNode* Clone() { return new EndNode(*this); } | 924 virtual EndNode* Clone() { return new EndNode(*this); } |
| 812 | |
| 813 private: | 925 private: |
| 814 Action action_; | 926 Action action_; |
| 815 }; | 927 }; |
| 816 | 928 |
| 817 | 929 |
| 818 class NegativeSubmatchSuccess: public EndNode { | 930 class NegativeSubmatchSuccess: public EndNode { |
| 819 public: | 931 public: |
| 820 NegativeSubmatchSuccess(int stack_pointer_reg, | 932 NegativeSubmatchSuccess(int stack_pointer_reg, |
| 821 int position_reg, | 933 int position_reg, |
| 822 int clear_capture_count, | 934 int clear_capture_count, |
| (...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 936 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1048 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 937 RegExpCompiler* compiler, | 1049 RegExpCompiler* compiler, |
| 938 int characters_filled_in, | 1050 int characters_filled_in, |
| 939 bool not_at_start); | 1051 bool not_at_start); |
| 940 // For a negative lookahead we don't emit the quick check for the | 1052 // For a negative lookahead we don't emit the quick check for the |
| 941 // alternative that is expected to fail. This is because quick check code | 1053 // alternative that is expected to fail. This is because quick check code |
| 942 // starts by loading enough characters for the alternative that takes fewest | 1054 // starts by loading enough characters for the alternative that takes fewest |
| 943 // characters, but on a negative lookahead the negative branch did not take | 1055 // characters, but on a negative lookahead the negative branch did not take |
| 944 // part in that calculation (EatsAtLeast) so the assumptions don't hold. | 1056 // part in that calculation (EatsAtLeast) so the assumptions don't hold. |
| 945 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; } | 1057 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; } |
| 1058 virtual int ComputeFirstCharacterSet(int budget); |
| 946 }; | 1059 }; |
| 947 | 1060 |
| 948 | 1061 |
| 949 class LoopChoiceNode: public ChoiceNode { | 1062 class LoopChoiceNode: public ChoiceNode { |
| 950 public: | 1063 public: |
| 951 explicit LoopChoiceNode(bool body_can_be_zero_length) | 1064 explicit LoopChoiceNode(bool body_can_be_zero_length) |
| 952 : ChoiceNode(2), | 1065 : ChoiceNode(2), |
| 953 loop_node_(NULL), | 1066 loop_node_(NULL), |
| 954 continue_node_(NULL), | 1067 continue_node_(NULL), |
| 955 body_can_be_zero_length_(body_can_be_zero_length) { } | 1068 body_can_be_zero_length_(body_can_be_zero_length) { } |
| 956 void AddLoopAlternative(GuardedAlternative alt); | 1069 void AddLoopAlternative(GuardedAlternative alt); |
| 957 void AddContinueAlternative(GuardedAlternative alt); | 1070 void AddContinueAlternative(GuardedAlternative alt); |
| 958 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 1071 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 959 virtual int EatsAtLeast(int still_to_find, int recursion_depth); | 1072 virtual int EatsAtLeast(int still_to_find, int recursion_depth); |
| 960 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1073 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 961 RegExpCompiler* compiler, | 1074 RegExpCompiler* compiler, |
| 962 int characters_filled_in, | 1075 int characters_filled_in, |
| 963 bool not_at_start); | 1076 bool not_at_start); |
| 1077 virtual int ComputeFirstCharacterSet(int budget); |
| 964 virtual LoopChoiceNode* Clone() { return new LoopChoiceNode(*this); } | 1078 virtual LoopChoiceNode* Clone() { return new LoopChoiceNode(*this); } |
| 965 RegExpNode* loop_node() { return loop_node_; } | 1079 RegExpNode* loop_node() { return loop_node_; } |
| 966 RegExpNode* continue_node() { return continue_node_; } | 1080 RegExpNode* continue_node() { return continue_node_; } |
| 967 bool body_can_be_zero_length() { return body_can_be_zero_length_; } | 1081 bool body_can_be_zero_length() { return body_can_be_zero_length_; } |
| 968 virtual void Accept(NodeVisitor* visitor); | 1082 virtual void Accept(NodeVisitor* visitor); |
| 969 | 1083 |
| 970 private: | 1084 private: |
| 971 // AddAlternative is made private for loop nodes because alternatives | 1085 // AddAlternative is made private for loop nodes because alternatives |
| 972 // should not be added freely, we need to keep track of which node | 1086 // should not be added freely, we need to keep track of which node |
| 973 // goes back to the node itself. | 1087 // goes back to the node itself. |
| (...skipping 135 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1109 // These set methods and AdvanceCurrentPositionInTrace should be used only on | 1223 // These set methods and AdvanceCurrentPositionInTrace should be used only on |
| 1110 // new traces - the intention is that traces are immutable after creation. | 1224 // new traces - the intention is that traces are immutable after creation. |
| 1111 void add_action(DeferredAction* new_action) { | 1225 void add_action(DeferredAction* new_action) { |
| 1112 ASSERT(new_action->next_ == NULL); | 1226 ASSERT(new_action->next_ == NULL); |
| 1113 new_action->next_ = actions_; | 1227 new_action->next_ = actions_; |
| 1114 actions_ = new_action; | 1228 actions_ = new_action; |
| 1115 } | 1229 } |
| 1116 void set_backtrack(Label* backtrack) { backtrack_ = backtrack; } | 1230 void set_backtrack(Label* backtrack) { backtrack_ = backtrack; } |
| 1117 void set_stop_node(RegExpNode* node) { stop_node_ = node; } | 1231 void set_stop_node(RegExpNode* node) { stop_node_ = node; } |
| 1118 void set_loop_label(Label* label) { loop_label_ = label; } | 1232 void set_loop_label(Label* label) { loop_label_ = label; } |
| 1119 void set_characters_preloaded(int cpre) { characters_preloaded_ = cpre; } | 1233 void set_characters_preloaded(int count) { characters_preloaded_ = count; } |
| 1120 void set_bound_checked_up_to(int to) { bound_checked_up_to_ = to; } | 1234 void set_bound_checked_up_to(int to) { bound_checked_up_to_ = to; } |
| 1121 void set_flush_budget(int to) { flush_budget_ = to; } | 1235 void set_flush_budget(int to) { flush_budget_ = to; } |
| 1122 void set_quick_check_performed(QuickCheckDetails* d) { | 1236 void set_quick_check_performed(QuickCheckDetails* d) { |
| 1123 quick_check_performed_ = *d; | 1237 quick_check_performed_ = *d; |
| 1124 } | 1238 } |
| 1125 void InvalidateCurrentCharacter(); | 1239 void InvalidateCurrentCharacter(); |
| 1126 void AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler); | 1240 void AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler); |
| 1127 private: | 1241 private: |
| 1128 int FindAffectedRegisters(OutSet* affected_registers); | 1242 int FindAffectedRegisters(OutSet* affected_registers); |
| 1129 void PerformDeferredActions(RegExpMacroAssembler* macro, | 1243 void PerformDeferredActions(RegExpMacroAssembler* macro, |
| (...skipping 142 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1272 Handle<String> pattern, | 1386 Handle<String> pattern, |
| 1273 bool is_ascii); | 1387 bool is_ascii); |
| 1274 | 1388 |
| 1275 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); | 1389 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); |
| 1276 }; | 1390 }; |
| 1277 | 1391 |
| 1278 | 1392 |
| 1279 } } // namespace v8::internal | 1393 } } // namespace v8::internal |
| 1280 | 1394 |
| 1281 #endif // V8_JSREGEXP_H_ | 1395 #endif // V8_JSREGEXP_H_ |
| OLD | NEW |