Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(177)

Side by Side Diff: src/jsregexp.h

Issue 507051: Attempt to make \b\w+ faster. Slight performance increase on, e.g., string unpacking. (Closed)
Patch Set: Addressed review comments. Created 10 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/ia32/regexp-macro-assembler-ia32.cc ('k') | src/jsregexp.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after
160 // A one element cache of the last utf8_subject string and its length. The 160 // A one element cache of the last utf8_subject string and its length. The
161 // subject JS String object is cached in the heap. We also cache a 161 // subject JS String object is cached in the heap. We also cache a
162 // translation between position and utf8 position. 162 // translation between position and utf8 position.
163 static char* utf8_subject_cache_; 163 static char* utf8_subject_cache_;
164 static int utf8_length_cache_; 164 static int utf8_length_cache_;
165 static int utf8_position_; 165 static int utf8_position_;
166 static int character_position_; 166 static int character_position_;
167 }; 167 };
168 168
169 169
170 // Represents the location of one element relative to the intersection of
171 // two sets. Corresponds to the four areas of a Venn diagram.
172 enum ElementInSetsRelation {
173 kInsideNone = 0,
174 kInsideFirst = 1,
175 kInsideSecond = 2,
176 kInsideBoth = 3
177 };
178
179
180 // Represents the relation of two sets.
181 // Sets can be either disjoint, partially or fully overlapping, or equal.
182 class SetRelation BASE_EMBEDDED {
183 public:
184 // Relation is represented by a bit saying whether there are elements in
185 // one set that is not in the other, and a bit saying that there are elements
186 // that are in both sets.
187
188 // Location of an element. Corresponds to the internal areas of
189 // a Venn diagram.
190 enum {
191 kInFirst = 1 << kInsideFirst,
192 kInSecond = 1 << kInsideSecond,
193 kInBoth = 1 << kInsideBoth
194 };
195 SetRelation() : bits_(0) {}
196 ~SetRelation() {}
197 // Add the existence of objects in a particular
198 void SetElementsInFirstSet() { bits_ |= kInFirst; }
199 void SetElementsInSecondSet() { bits_ |= kInSecond; }
200 void SetElementsInBothSets() { bits_ |= kInBoth; }
201 // Check the currently known relation of the sets (common functions only,
202 // for other combinations, use value() to get the bits and check them
203 // manually).
204 // Sets are completely disjoint.
205 bool Disjoint() { return (bits_ & kInBoth) == 0; }
206 // Sets are equal.
207 bool Equals() { return (bits_ & (kInFirst | kInSecond)) == 0; }
208 // First set contains second.
209 bool Contains() { return (bits_ & kInSecond) == 0; }
210 // Second set contains first.
211 bool ContainedIn() { return (bits_ & kInFirst) == 0; }
212 bool NonTrivialIntersection() {
213 return (bits_ == (kInFirst | kInSecond | kInBoth));
214 }
215 int value() { return bits_; }
216 private:
217 int bits_;
218 };
219
220
170 class CharacterRange { 221 class CharacterRange {
171 public: 222 public:
172 CharacterRange() : from_(0), to_(0) { } 223 CharacterRange() : from_(0), to_(0) { }
173 // For compatibility with the CHECK_OK macro 224 // For compatibility with the CHECK_OK macro
174 CharacterRange(void* null) { ASSERT_EQ(NULL, null); } //NOLINT 225 CharacterRange(void* null) { ASSERT_EQ(NULL, null); } //NOLINT
175 CharacterRange(uc16 from, uc16 to) : from_(from), to_(to) { } 226 CharacterRange(uc16 from, uc16 to) : from_(from), to_(to) { }
176 static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges); 227 static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges);
177 static Vector<const uc16> GetWordBounds(); 228 static Vector<const uc16> GetWordBounds();
178 static inline CharacterRange Singleton(uc16 value) { 229 static inline CharacterRange Singleton(uc16 value) {
179 return CharacterRange(value, value); 230 return CharacterRange(value, value);
(...skipping 11 matching lines...) Expand all
191 uc16 to() const { return to_; } 242 uc16 to() const { return to_; }
192 void set_to(uc16 value) { to_ = value; } 243 void set_to(uc16 value) { to_ = value; }
193 bool is_valid() { return from_ <= to_; } 244 bool is_valid() { return from_ <= to_; }
194 bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; } 245 bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; }
195 bool IsSingleton() { return (from_ == to_); } 246 bool IsSingleton() { return (from_ == to_); }
196 void AddCaseEquivalents(ZoneList<CharacterRange>* ranges, bool is_ascii); 247 void AddCaseEquivalents(ZoneList<CharacterRange>* ranges, bool is_ascii);
197 static void Split(ZoneList<CharacterRange>* base, 248 static void Split(ZoneList<CharacterRange>* base,
198 Vector<const uc16> overlay, 249 Vector<const uc16> overlay,
199 ZoneList<CharacterRange>** included, 250 ZoneList<CharacterRange>** included,
200 ZoneList<CharacterRange>** excluded); 251 ZoneList<CharacterRange>** excluded);
201 252 // Whether a range list is in canonical form: Ranges ordered by from value,
253 // and ranges non-overlapping and non-adjacent.
254 static bool IsCanonical(ZoneList<CharacterRange>* ranges);
255 // Convert range list to canonical form. The characters covered by the ranges
256 // will still be the same, but no character is in more than one range, and
257 // adjacent ranges are merged. The resulting list may be shorter than the
258 // original, but cannot be longer.
259 static void Canonicalize(ZoneList<CharacterRange>* ranges);
260 // Check how the set of characters defined by a CharacterRange list relates
261 // to the set of word characters. List must be in canonical form.
262 static SetRelation WordCharacterRelation(ZoneList<CharacterRange>* ranges);
263 // Takes two character range lists (representing character sets) in canonical
264 // form and merges them.
265 // The characters that are only covered by the first set are added to
266 // first_set_only_out. the characters that are only in the second set are
267 // added to second_set_only_out, and the characters that are in both are
268 // added to both_sets_out.
269 // The pointers to first_set_only_out, second_set_only_out and both_sets_out
270 // should be to empty lists, but they need not be distinct, and may be NULL.
271 // If NULL, the characters are dropped, and if two arguments are the same
272 // pointer, the result is the union of the two sets that would be created
273 // if the pointers had been distinct.
274 // This way, the Merge function can compute all the usual set operations:
275 // union (all three out-sets are equal), intersection (only both_sets_out is
276 // non-NULL), and set difference (only first_set is non-NULL).
277 static void Merge(ZoneList<CharacterRange>* first_set,
278 ZoneList<CharacterRange>* second_set,
279 ZoneList<CharacterRange>* first_set_only_out,
280 ZoneList<CharacterRange>* second_set_only_out,
281 ZoneList<CharacterRange>* both_sets_out);
282 // Negate the contents of a character range in canonical form.
283 static void Negate(ZoneList<CharacterRange>* src,
284 ZoneList<CharacterRange>* dst);
202 static const int kRangeCanonicalizeMax = 0x346; 285 static const int kRangeCanonicalizeMax = 0x346;
203 static const int kStartMarker = (1 << 24); 286 static const int kStartMarker = (1 << 24);
204 static const int kPayloadMask = (1 << 24) - 1; 287 static const int kPayloadMask = (1 << 24) - 1;
205 288
206 private: 289 private:
207 uc16 from_; 290 uc16 from_;
208 uc16 to_; 291 uc16 to_;
209 }; 292 };
210 293
211 294
(...skipping 253 matching lines...) Expand 10 before | Expand all | Expand 10 after
465 uint32_t mask_; 548 uint32_t mask_;
466 uint32_t value_; 549 uint32_t value_;
467 // If set to true, there is no way this quick check can match at all. 550 // If set to true, there is no way this quick check can match at all.
468 // E.g., if it requires to be at the start of the input, and isn't. 551 // E.g., if it requires to be at the start of the input, and isn't.
469 bool cannot_match_; 552 bool cannot_match_;
470 }; 553 };
471 554
472 555
473 class RegExpNode: public ZoneObject { 556 class RegExpNode: public ZoneObject {
474 public: 557 public:
475 RegExpNode() : trace_count_(0) { } 558 RegExpNode() : first_character_set_(NULL), trace_count_(0) { }
476 virtual ~RegExpNode(); 559 virtual ~RegExpNode();
477 virtual void Accept(NodeVisitor* visitor) = 0; 560 virtual void Accept(NodeVisitor* visitor) = 0;
478 // Generates a goto to this node or actually generates the code at this point. 561 // Generates a goto to this node or actually generates the code at this point.
479 virtual void Emit(RegExpCompiler* compiler, Trace* trace) = 0; 562 virtual void Emit(RegExpCompiler* compiler, Trace* trace) = 0;
480 // How many characters must this node consume at a minimum in order to 563 // How many characters must this node consume at a minimum in order to
481 // succeed. If we have found at least 'still_to_find' characters that 564 // succeed. If we have found at least 'still_to_find' characters that
482 // must be consumed there is no need to ask any following nodes whether 565 // must be consumed there is no need to ask any following nodes whether
483 // they are sure to eat any more characters. 566 // they are sure to eat any more characters.
484 virtual int EatsAtLeast(int still_to_find, int recursion_depth) = 0; 567 virtual int EatsAtLeast(int still_to_find, int recursion_depth) = 0;
485 // Emits some quick code that checks whether the preloaded characters match. 568 // Emits some quick code that checks whether the preloaded characters match.
(...skipping 30 matching lines...) Expand all
516 // Static version of EnsureSibling that expresses the fact that the 599 // Static version of EnsureSibling that expresses the fact that the
517 // result has the same type as the input. 600 // result has the same type as the input.
518 template <class C> 601 template <class C>
519 static C* EnsureSibling(C* node, NodeInfo* info, bool* cloned) { 602 static C* EnsureSibling(C* node, NodeInfo* info, bool* cloned) {
520 return static_cast<C*>(node->EnsureSibling(info, cloned)); 603 return static_cast<C*>(node->EnsureSibling(info, cloned));
521 } 604 }
522 605
523 SiblingList* siblings() { return &siblings_; } 606 SiblingList* siblings() { return &siblings_; }
524 void set_siblings(SiblingList* other) { siblings_ = *other; } 607 void set_siblings(SiblingList* other) { siblings_ = *other; }
525 608
609 // Return the set of possible next characters recognized by the regexp
610 // (or a safe subset, potentially the set of all characters).
611 ZoneList<CharacterRange>* FirstCharacterSet();
612
613 // Compute (if possible within the budget of traversed nodes) the
614 // possible first characters of the input matched by this node and
615 // its continuation. Returns the remaining budget after the computation.
616 // If the budget is spent, the result is negative, and the cached
617 // first_character_set_ value isn't set.
618 virtual int ComputeFirstCharacterSet(int budget);
619
620 // Get and set the cached first character set value.
621 ZoneList<CharacterRange>* first_character_set() {
622 return first_character_set_;
623 }
624 void set_first_character_set(ZoneList<CharacterRange>* character_set) {
625 first_character_set_ = character_set;
626 }
627
526 protected: 628 protected:
527 enum LimitResult { DONE, CONTINUE }; 629 enum LimitResult { DONE, CONTINUE };
630 static const int kComputeFirstCharacterSetFail = -1;
631
528 LimitResult LimitVersions(RegExpCompiler* compiler, Trace* trace); 632 LimitResult LimitVersions(RegExpCompiler* compiler, Trace* trace);
529 633
530 // Returns a sibling of this node whose interests and assumptions 634 // Returns a sibling of this node whose interests and assumptions
531 // match the ones in the given node info. If no sibling exists NULL 635 // match the ones in the given node info. If no sibling exists NULL
532 // is returned. 636 // is returned.
533 RegExpNode* TryGetSibling(NodeInfo* info); 637 RegExpNode* TryGetSibling(NodeInfo* info);
534 638
535 // Returns a sibling of this node whose interests match the ones in 639 // Returns a sibling of this node whose interests match the ones in
536 // the given node info. The info must not contain any assertions. 640 // the given node info. The info must not contain any assertions.
537 // If no node exists a new one will be created by cloning the current 641 // If no node exists a new one will be created by cloning the current
538 // node. The result will always be an instance of the same concrete 642 // node. The result will always be an instance of the same concrete
539 // class as this node. 643 // class as this node.
540 RegExpNode* EnsureSibling(NodeInfo* info, bool* cloned); 644 RegExpNode* EnsureSibling(NodeInfo* info, bool* cloned);
541 645
542 // Returns a clone of this node initialized using the copy constructor 646 // Returns a clone of this node initialized using the copy constructor
543 // of its concrete class. Note that the node may have to be pre- 647 // of its concrete class. Note that the node may have to be pre-
544 // processed before it is on a usable state. 648 // processed before it is on a usable state.
545 virtual RegExpNode* Clone() = 0; 649 virtual RegExpNode* Clone() = 0;
546 650
547 private: 651 private:
652 static const int kFirstCharBudget = 10;
548 Label label_; 653 Label label_;
549 NodeInfo info_; 654 NodeInfo info_;
550 SiblingList siblings_; 655 SiblingList siblings_;
656 ZoneList<CharacterRange>* first_character_set_;
551 // This variable keeps track of how many times code has been generated for 657 // This variable keeps track of how many times code has been generated for
552 // this node (in different traces). We don't keep track of where the 658 // this node (in different traces). We don't keep track of where the
553 // generated code is located unless the code is generated at the start of 659 // generated code is located unless the code is generated at the start of
554 // a trace, in which case it is generic and can be reused by flushing the 660 // a trace, in which case it is generic and can be reused by flushing the
555 // deferred operations in the current trace and generating a goto. 661 // deferred operations in the current trace and generating a goto.
556 int trace_count_; 662 int trace_count_;
557 }; 663 };
558 664
559 665
560 // A simple closed interval. 666 // A simple closed interval.
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after
631 RegExpCompiler* compiler, 737 RegExpCompiler* compiler,
632 int filled_in, 738 int filled_in,
633 bool not_at_start) { 739 bool not_at_start) {
634 return on_success()->GetQuickCheckDetails( 740 return on_success()->GetQuickCheckDetails(
635 details, compiler, filled_in, not_at_start); 741 details, compiler, filled_in, not_at_start);
636 } 742 }
637 Type type() { return type_; } 743 Type type() { return type_; }
638 // TODO(erikcorry): We should allow some action nodes in greedy loops. 744 // TODO(erikcorry): We should allow some action nodes in greedy loops.
639 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } 745 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; }
640 virtual ActionNode* Clone() { return new ActionNode(*this); } 746 virtual ActionNode* Clone() { return new ActionNode(*this); }
641 747 virtual int ComputeFirstCharacterSet(int budget);
642 private: 748 private:
643 union { 749 union {
644 struct { 750 struct {
645 int reg; 751 int reg;
646 int value; 752 int value;
647 } u_store_register; 753 } u_store_register;
648 struct { 754 struct {
649 int reg; 755 int reg;
650 } u_increment_register; 756 } u_increment_register;
651 struct { 757 struct {
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
697 bool not_at_start); 803 bool not_at_start);
698 ZoneList<TextElement>* elements() { return elms_; } 804 ZoneList<TextElement>* elements() { return elms_; }
699 void MakeCaseIndependent(bool is_ascii); 805 void MakeCaseIndependent(bool is_ascii);
700 virtual int GreedyLoopTextLength(); 806 virtual int GreedyLoopTextLength();
701 virtual TextNode* Clone() { 807 virtual TextNode* Clone() {
702 TextNode* result = new TextNode(*this); 808 TextNode* result = new TextNode(*this);
703 result->CalculateOffsets(); 809 result->CalculateOffsets();
704 return result; 810 return result;
705 } 811 }
706 void CalculateOffsets(); 812 void CalculateOffsets();
707 813 virtual int ComputeFirstCharacterSet(int budget);
708 private: 814 private:
709 enum TextEmitPassType { 815 enum TextEmitPassType {
710 NON_ASCII_MATCH, // Check for characters that can't match. 816 NON_ASCII_MATCH, // Check for characters that can't match.
711 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check. 817 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check.
712 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs. 818 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs.
713 CASE_CHARACTER_MATCH, // Case-independent single character check. 819 CASE_CHARACTER_MATCH, // Case-independent single character check.
714 CHARACTER_CLASS_MATCH // Character class. 820 CHARACTER_CLASS_MATCH // Character class.
715 }; 821 };
716 static bool SkipPass(int pass, bool ignore_case); 822 static bool SkipPass(int pass, bool ignore_case);
717 static const int kFirstRealPass = SIMPLE_CHARACTER_MATCH; 823 static const int kFirstRealPass = SIMPLE_CHARACTER_MATCH;
718 static const int kLastPass = CHARACTER_CLASS_MATCH; 824 static const int kLastPass = CHARACTER_CLASS_MATCH;
719 void TextEmitPass(RegExpCompiler* compiler, 825 void TextEmitPass(RegExpCompiler* compiler,
720 TextEmitPassType pass, 826 TextEmitPassType pass,
721 bool preloaded, 827 bool preloaded,
722 Trace* trace, 828 Trace* trace,
723 bool first_element_checked, 829 bool first_element_checked,
724 int* checked_up_to); 830 int* checked_up_to);
725 int Length(); 831 int Length();
726 ZoneList<TextElement>* elms_; 832 ZoneList<TextElement>* elms_;
727 }; 833 };
728 834
729 835
730 class AssertionNode: public SeqRegExpNode { 836 class AssertionNode: public SeqRegExpNode {
731 public: 837 public:
732 enum AssertionNodeType { 838 enum AssertionNodeType {
733 AT_END, 839 AT_END,
734 AT_START, 840 AT_START,
735 AT_BOUNDARY, 841 AT_BOUNDARY,
736 AT_NON_BOUNDARY, 842 AT_NON_BOUNDARY,
737 AFTER_NEWLINE 843 AFTER_NEWLINE,
844 // Types not directly expressible in regexp syntax.
845 // Used for modifying a boundary node if its following character is
846 // known to be word and/or non-word.
847 AFTER_NONWORD_CHARACTER,
848 AFTER_WORD_CHARACTER
738 }; 849 };
739 static AssertionNode* AtEnd(RegExpNode* on_success) { 850 static AssertionNode* AtEnd(RegExpNode* on_success) {
740 return new AssertionNode(AT_END, on_success); 851 return new AssertionNode(AT_END, on_success);
741 } 852 }
742 static AssertionNode* AtStart(RegExpNode* on_success) { 853 static AssertionNode* AtStart(RegExpNode* on_success) {
743 return new AssertionNode(AT_START, on_success); 854 return new AssertionNode(AT_START, on_success);
744 } 855 }
745 static AssertionNode* AtBoundary(RegExpNode* on_success) { 856 static AssertionNode* AtBoundary(RegExpNode* on_success) {
746 return new AssertionNode(AT_BOUNDARY, on_success); 857 return new AssertionNode(AT_BOUNDARY, on_success);
747 } 858 }
748 static AssertionNode* AtNonBoundary(RegExpNode* on_success) { 859 static AssertionNode* AtNonBoundary(RegExpNode* on_success) {
749 return new AssertionNode(AT_NON_BOUNDARY, on_success); 860 return new AssertionNode(AT_NON_BOUNDARY, on_success);
750 } 861 }
751 static AssertionNode* AfterNewline(RegExpNode* on_success) { 862 static AssertionNode* AfterNewline(RegExpNode* on_success) {
752 return new AssertionNode(AFTER_NEWLINE, on_success); 863 return new AssertionNode(AFTER_NEWLINE, on_success);
753 } 864 }
754 virtual void Accept(NodeVisitor* visitor); 865 virtual void Accept(NodeVisitor* visitor);
755 virtual void Emit(RegExpCompiler* compiler, Trace* trace); 866 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
756 virtual int EatsAtLeast(int still_to_find, int recursion_depth); 867 virtual int EatsAtLeast(int still_to_find, int recursion_depth);
757 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 868 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
758 RegExpCompiler* compiler, 869 RegExpCompiler* compiler,
759 int filled_in, 870 int filled_in,
760 bool not_at_start); 871 bool not_at_start);
872 virtual int ComputeFirstCharacterSet(int budget);
761 virtual AssertionNode* Clone() { return new AssertionNode(*this); } 873 virtual AssertionNode* Clone() { return new AssertionNode(*this); }
762 AssertionNodeType type() { return type_; } 874 AssertionNodeType type() { return type_; }
875 void set_type(AssertionNodeType type) { type_ = type; }
763 private: 876 private:
764 AssertionNode(AssertionNodeType t, RegExpNode* on_success) 877 AssertionNode(AssertionNodeType t, RegExpNode* on_success)
765 : SeqRegExpNode(on_success), type_(t) { } 878 : SeqRegExpNode(on_success), type_(t) { }
766 AssertionNodeType type_; 879 AssertionNodeType type_;
767 }; 880 };
768 881
769 882
770 class BackReferenceNode: public SeqRegExpNode { 883 class BackReferenceNode: public SeqRegExpNode {
771 public: 884 public:
772 BackReferenceNode(int start_reg, 885 BackReferenceNode(int start_reg,
773 int end_reg, 886 int end_reg,
774 RegExpNode* on_success) 887 RegExpNode* on_success)
775 : SeqRegExpNode(on_success), 888 : SeqRegExpNode(on_success),
776 start_reg_(start_reg), 889 start_reg_(start_reg),
777 end_reg_(end_reg) { } 890 end_reg_(end_reg) { }
778 virtual void Accept(NodeVisitor* visitor); 891 virtual void Accept(NodeVisitor* visitor);
779 int start_register() { return start_reg_; } 892 int start_register() { return start_reg_; }
780 int end_register() { return end_reg_; } 893 int end_register() { return end_reg_; }
781 virtual void Emit(RegExpCompiler* compiler, Trace* trace); 894 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
782 virtual int EatsAtLeast(int still_to_find, int recursion_depth); 895 virtual int EatsAtLeast(int still_to_find, int recursion_depth);
783 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 896 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
784 RegExpCompiler* compiler, 897 RegExpCompiler* compiler,
785 int characters_filled_in, 898 int characters_filled_in,
786 bool not_at_start) { 899 bool not_at_start) {
787 return; 900 return;
788 } 901 }
789 virtual BackReferenceNode* Clone() { return new BackReferenceNode(*this); } 902 virtual BackReferenceNode* Clone() { return new BackReferenceNode(*this); }
790 903 virtual int ComputeFirstCharacterSet(int budget);
791 private: 904 private:
792 int start_reg_; 905 int start_reg_;
793 int end_reg_; 906 int end_reg_;
794 }; 907 };
795 908
796 909
797 class EndNode: public RegExpNode { 910 class EndNode: public RegExpNode {
798 public: 911 public:
799 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; 912 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS };
800 explicit EndNode(Action action) : action_(action) { } 913 explicit EndNode(Action action) : action_(action) { }
801 virtual void Accept(NodeVisitor* visitor); 914 virtual void Accept(NodeVisitor* visitor);
802 virtual void Emit(RegExpCompiler* compiler, Trace* trace); 915 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
803 virtual int EatsAtLeast(int still_to_find, int recursion_depth) { return 0; } 916 virtual int EatsAtLeast(int still_to_find, int recursion_depth) { return 0; }
804 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 917 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
805 RegExpCompiler* compiler, 918 RegExpCompiler* compiler,
806 int characters_filled_in, 919 int characters_filled_in,
807 bool not_at_start) { 920 bool not_at_start) {
808 // Returning 0 from EatsAtLeast should ensure we never get here. 921 // Returning 0 from EatsAtLeast should ensure we never get here.
809 UNREACHABLE(); 922 UNREACHABLE();
810 } 923 }
811 virtual EndNode* Clone() { return new EndNode(*this); } 924 virtual EndNode* Clone() { return new EndNode(*this); }
812
813 private: 925 private:
814 Action action_; 926 Action action_;
815 }; 927 };
816 928
817 929
818 class NegativeSubmatchSuccess: public EndNode { 930 class NegativeSubmatchSuccess: public EndNode {
819 public: 931 public:
820 NegativeSubmatchSuccess(int stack_pointer_reg, 932 NegativeSubmatchSuccess(int stack_pointer_reg,
821 int position_reg, 933 int position_reg,
822 int clear_capture_count, 934 int clear_capture_count,
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after
936 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 1048 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
937 RegExpCompiler* compiler, 1049 RegExpCompiler* compiler,
938 int characters_filled_in, 1050 int characters_filled_in,
939 bool not_at_start); 1051 bool not_at_start);
940 // For a negative lookahead we don't emit the quick check for the 1052 // For a negative lookahead we don't emit the quick check for the
941 // alternative that is expected to fail. This is because quick check code 1053 // alternative that is expected to fail. This is because quick check code
942 // starts by loading enough characters for the alternative that takes fewest 1054 // starts by loading enough characters for the alternative that takes fewest
943 // characters, but on a negative lookahead the negative branch did not take 1055 // characters, but on a negative lookahead the negative branch did not take
944 // part in that calculation (EatsAtLeast) so the assumptions don't hold. 1056 // part in that calculation (EatsAtLeast) so the assumptions don't hold.
945 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; } 1057 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; }
1058 virtual int ComputeFirstCharacterSet(int budget);
946 }; 1059 };
947 1060
948 1061
949 class LoopChoiceNode: public ChoiceNode { 1062 class LoopChoiceNode: public ChoiceNode {
950 public: 1063 public:
951 explicit LoopChoiceNode(bool body_can_be_zero_length) 1064 explicit LoopChoiceNode(bool body_can_be_zero_length)
952 : ChoiceNode(2), 1065 : ChoiceNode(2),
953 loop_node_(NULL), 1066 loop_node_(NULL),
954 continue_node_(NULL), 1067 continue_node_(NULL),
955 body_can_be_zero_length_(body_can_be_zero_length) { } 1068 body_can_be_zero_length_(body_can_be_zero_length) { }
956 void AddLoopAlternative(GuardedAlternative alt); 1069 void AddLoopAlternative(GuardedAlternative alt);
957 void AddContinueAlternative(GuardedAlternative alt); 1070 void AddContinueAlternative(GuardedAlternative alt);
958 virtual void Emit(RegExpCompiler* compiler, Trace* trace); 1071 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
959 virtual int EatsAtLeast(int still_to_find, int recursion_depth); 1072 virtual int EatsAtLeast(int still_to_find, int recursion_depth);
960 virtual void GetQuickCheckDetails(QuickCheckDetails* details, 1073 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
961 RegExpCompiler* compiler, 1074 RegExpCompiler* compiler,
962 int characters_filled_in, 1075 int characters_filled_in,
963 bool not_at_start); 1076 bool not_at_start);
1077 virtual int ComputeFirstCharacterSet(int budget);
964 virtual LoopChoiceNode* Clone() { return new LoopChoiceNode(*this); } 1078 virtual LoopChoiceNode* Clone() { return new LoopChoiceNode(*this); }
965 RegExpNode* loop_node() { return loop_node_; } 1079 RegExpNode* loop_node() { return loop_node_; }
966 RegExpNode* continue_node() { return continue_node_; } 1080 RegExpNode* continue_node() { return continue_node_; }
967 bool body_can_be_zero_length() { return body_can_be_zero_length_; } 1081 bool body_can_be_zero_length() { return body_can_be_zero_length_; }
968 virtual void Accept(NodeVisitor* visitor); 1082 virtual void Accept(NodeVisitor* visitor);
969 1083
970 private: 1084 private:
971 // AddAlternative is made private for loop nodes because alternatives 1085 // AddAlternative is made private for loop nodes because alternatives
972 // should not be added freely, we need to keep track of which node 1086 // should not be added freely, we need to keep track of which node
973 // goes back to the node itself. 1087 // goes back to the node itself.
(...skipping 135 matching lines...) Expand 10 before | Expand all | Expand 10 after
1109 // These set methods and AdvanceCurrentPositionInTrace should be used only on 1223 // These set methods and AdvanceCurrentPositionInTrace should be used only on
1110 // new traces - the intention is that traces are immutable after creation. 1224 // new traces - the intention is that traces are immutable after creation.
1111 void add_action(DeferredAction* new_action) { 1225 void add_action(DeferredAction* new_action) {
1112 ASSERT(new_action->next_ == NULL); 1226 ASSERT(new_action->next_ == NULL);
1113 new_action->next_ = actions_; 1227 new_action->next_ = actions_;
1114 actions_ = new_action; 1228 actions_ = new_action;
1115 } 1229 }
1116 void set_backtrack(Label* backtrack) { backtrack_ = backtrack; } 1230 void set_backtrack(Label* backtrack) { backtrack_ = backtrack; }
1117 void set_stop_node(RegExpNode* node) { stop_node_ = node; } 1231 void set_stop_node(RegExpNode* node) { stop_node_ = node; }
1118 void set_loop_label(Label* label) { loop_label_ = label; } 1232 void set_loop_label(Label* label) { loop_label_ = label; }
1119 void set_characters_preloaded(int cpre) { characters_preloaded_ = cpre; } 1233 void set_characters_preloaded(int count) { characters_preloaded_ = count; }
1120 void set_bound_checked_up_to(int to) { bound_checked_up_to_ = to; } 1234 void set_bound_checked_up_to(int to) { bound_checked_up_to_ = to; }
1121 void set_flush_budget(int to) { flush_budget_ = to; } 1235 void set_flush_budget(int to) { flush_budget_ = to; }
1122 void set_quick_check_performed(QuickCheckDetails* d) { 1236 void set_quick_check_performed(QuickCheckDetails* d) {
1123 quick_check_performed_ = *d; 1237 quick_check_performed_ = *d;
1124 } 1238 }
1125 void InvalidateCurrentCharacter(); 1239 void InvalidateCurrentCharacter();
1126 void AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler); 1240 void AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler);
1127 private: 1241 private:
1128 int FindAffectedRegisters(OutSet* affected_registers); 1242 int FindAffectedRegisters(OutSet* affected_registers);
1129 void PerformDeferredActions(RegExpMacroAssembler* macro, 1243 void PerformDeferredActions(RegExpMacroAssembler* macro,
(...skipping 142 matching lines...) Expand 10 before | Expand all | Expand 10 after
1272 Handle<String> pattern, 1386 Handle<String> pattern,
1273 bool is_ascii); 1387 bool is_ascii);
1274 1388
1275 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); 1389 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case);
1276 }; 1390 };
1277 1391
1278 1392
1279 } } // namespace v8::internal 1393 } } // namespace v8::internal
1280 1394
1281 #endif // V8_JSREGEXP_H_ 1395 #endif // V8_JSREGEXP_H_
OLDNEW
« no previous file with comments | « src/ia32/regexp-macro-assembler-ia32.cc ('k') | src/jsregexp.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698