OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef V8_JSREGEXP_H_ | 5 #ifndef V8_JSREGEXP_H_ |
6 #define V8_JSREGEXP_H_ | 6 #define V8_JSREGEXP_H_ |
7 | 7 |
8 #include "src/allocation.h" | 8 #include "src/allocation.h" |
9 #include "src/assembler.h" | 9 #include "src/assembler.h" |
10 | 10 |
(...skipping 601 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
612 return NULL; | 612 return NULL; |
613 } | 613 } |
614 | 614 |
615 // Collects information on the possible code units (mod 128) that can match if | 615 // Collects information on the possible code units (mod 128) that can match if |
616 // we look forward. This is used for a Boyer-Moore-like string searching | 616 // we look forward. This is used for a Boyer-Moore-like string searching |
617 // implementation. TODO(erikcorry): This should share more code with | 617 // implementation. TODO(erikcorry): This should share more code with |
618 // EatsAtLeast, GetQuickCheckDetails. The budget argument is used to limit | 618 // EatsAtLeast, GetQuickCheckDetails. The budget argument is used to limit |
619 // the number of nodes we are willing to look at in order to create this data. | 619 // the number of nodes we are willing to look at in order to create this data. |
620 static const int kRecursionBudget = 200; | 620 static const int kRecursionBudget = 200; |
621 bool KeepRecursing(RegExpCompiler* compiler); | 621 bool KeepRecursing(RegExpCompiler* compiler); |
622 virtual void FillInBMInfo(int offset, | 622 virtual void FillInBMInfo(Isolate* isolate, int offset, int budget, |
623 int budget, | 623 BoyerMooreLookahead* bm, bool not_at_start) { |
624 BoyerMooreLookahead* bm, | |
625 bool not_at_start) { | |
626 UNREACHABLE(); | 624 UNREACHABLE(); |
627 } | 625 } |
628 | 626 |
629 // If we know that the input is one-byte then there are some nodes that can | 627 // If we know that the input is one-byte then there are some nodes that can |
630 // never match. This method returns a node that can be substituted for | 628 // never match. This method returns a node that can be substituted for |
631 // itself, or NULL if the node can never match. | 629 // itself, or NULL if the node can never match. |
632 virtual RegExpNode* FilterOneByte(int depth, bool ignore_case) { | 630 virtual RegExpNode* FilterOneByte(int depth, bool ignore_case) { |
633 return this; | 631 return this; |
634 } | 632 } |
635 // Helper for FilterOneByte. | 633 // Helper for FilterOneByte. |
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
724 }; | 722 }; |
725 | 723 |
726 | 724 |
727 class SeqRegExpNode: public RegExpNode { | 725 class SeqRegExpNode: public RegExpNode { |
728 public: | 726 public: |
729 explicit SeqRegExpNode(RegExpNode* on_success) | 727 explicit SeqRegExpNode(RegExpNode* on_success) |
730 : RegExpNode(on_success->zone()), on_success_(on_success) { } | 728 : RegExpNode(on_success->zone()), on_success_(on_success) { } |
731 RegExpNode* on_success() { return on_success_; } | 729 RegExpNode* on_success() { return on_success_; } |
732 void set_on_success(RegExpNode* node) { on_success_ = node; } | 730 void set_on_success(RegExpNode* node) { on_success_ = node; } |
733 virtual RegExpNode* FilterOneByte(int depth, bool ignore_case); | 731 virtual RegExpNode* FilterOneByte(int depth, bool ignore_case); |
734 virtual void FillInBMInfo(int offset, | 732 virtual void FillInBMInfo(Isolate* isolate, int offset, int budget, |
735 int budget, | 733 BoyerMooreLookahead* bm, bool not_at_start) { |
736 BoyerMooreLookahead* bm, | 734 on_success_->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); |
737 bool not_at_start) { | |
738 on_success_->FillInBMInfo(offset, budget - 1, bm, not_at_start); | |
739 if (offset == 0) set_bm_info(not_at_start, bm); | 735 if (offset == 0) set_bm_info(not_at_start, bm); |
740 } | 736 } |
741 | 737 |
742 protected: | 738 protected: |
743 RegExpNode* FilterSuccessor(int depth, bool ignore_case); | 739 RegExpNode* FilterSuccessor(int depth, bool ignore_case); |
744 | 740 |
745 private: | 741 private: |
746 RegExpNode* on_success_; | 742 RegExpNode* on_success_; |
747 }; | 743 }; |
748 | 744 |
(...skipping 30 matching lines...) Expand all Loading... |
779 virtual void Accept(NodeVisitor* visitor); | 775 virtual void Accept(NodeVisitor* visitor); |
780 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 776 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
781 virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start); | 777 virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start); |
782 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 778 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
783 RegExpCompiler* compiler, | 779 RegExpCompiler* compiler, |
784 int filled_in, | 780 int filled_in, |
785 bool not_at_start) { | 781 bool not_at_start) { |
786 return on_success()->GetQuickCheckDetails( | 782 return on_success()->GetQuickCheckDetails( |
787 details, compiler, filled_in, not_at_start); | 783 details, compiler, filled_in, not_at_start); |
788 } | 784 } |
789 virtual void FillInBMInfo(int offset, | 785 virtual void FillInBMInfo(Isolate* isolate, int offset, int budget, |
790 int budget, | 786 BoyerMooreLookahead* bm, bool not_at_start); |
791 BoyerMooreLookahead* bm, | |
792 bool not_at_start); | |
793 ActionType action_type() { return action_type_; } | 787 ActionType action_type() { return action_type_; } |
794 // TODO(erikcorry): We should allow some action nodes in greedy loops. | 788 // TODO(erikcorry): We should allow some action nodes in greedy loops. |
795 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } | 789 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } |
796 | 790 |
797 private: | 791 private: |
798 union { | 792 union { |
799 struct { | 793 struct { |
800 int reg; | 794 int reg; |
801 int value; | 795 int value; |
802 } u_store_register; | 796 } u_store_register; |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
848 virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start); | 842 virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start); |
849 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 843 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
850 RegExpCompiler* compiler, | 844 RegExpCompiler* compiler, |
851 int characters_filled_in, | 845 int characters_filled_in, |
852 bool not_at_start); | 846 bool not_at_start); |
853 ZoneList<TextElement>* elements() { return elms_; } | 847 ZoneList<TextElement>* elements() { return elms_; } |
854 void MakeCaseIndependent(Isolate* isolate, bool is_one_byte); | 848 void MakeCaseIndependent(Isolate* isolate, bool is_one_byte); |
855 virtual int GreedyLoopTextLength(); | 849 virtual int GreedyLoopTextLength(); |
856 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( | 850 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( |
857 RegExpCompiler* compiler); | 851 RegExpCompiler* compiler); |
858 virtual void FillInBMInfo(int offset, | 852 virtual void FillInBMInfo(Isolate* isolate, int offset, int budget, |
859 int budget, | 853 BoyerMooreLookahead* bm, bool not_at_start); |
860 BoyerMooreLookahead* bm, | |
861 bool not_at_start); | |
862 void CalculateOffsets(); | 854 void CalculateOffsets(); |
863 virtual RegExpNode* FilterOneByte(int depth, bool ignore_case); | 855 virtual RegExpNode* FilterOneByte(int depth, bool ignore_case); |
864 | 856 |
865 private: | 857 private: |
866 enum TextEmitPassType { | 858 enum TextEmitPassType { |
867 NON_LATIN1_MATCH, // Check for characters that can't match. | 859 NON_LATIN1_MATCH, // Check for characters that can't match. |
868 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check. | 860 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check. |
869 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs. | 861 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs. |
870 CASE_CHARACTER_MATCH, // Case-independent single character check. | 862 CASE_CHARACTER_MATCH, // Case-independent single character check. |
871 CHARACTER_CLASS_MATCH // Character class. | 863 CHARACTER_CLASS_MATCH // Character class. |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
908 static AssertionNode* AfterNewline(RegExpNode* on_success) { | 900 static AssertionNode* AfterNewline(RegExpNode* on_success) { |
909 return new(on_success->zone()) AssertionNode(AFTER_NEWLINE, on_success); | 901 return new(on_success->zone()) AssertionNode(AFTER_NEWLINE, on_success); |
910 } | 902 } |
911 virtual void Accept(NodeVisitor* visitor); | 903 virtual void Accept(NodeVisitor* visitor); |
912 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 904 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
913 virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start); | 905 virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start); |
914 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 906 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
915 RegExpCompiler* compiler, | 907 RegExpCompiler* compiler, |
916 int filled_in, | 908 int filled_in, |
917 bool not_at_start); | 909 bool not_at_start); |
918 virtual void FillInBMInfo(int offset, | 910 virtual void FillInBMInfo(Isolate* isolate, int offset, int budget, |
919 int budget, | 911 BoyerMooreLookahead* bm, bool not_at_start); |
920 BoyerMooreLookahead* bm, | |
921 bool not_at_start); | |
922 AssertionType assertion_type() { return assertion_type_; } | 912 AssertionType assertion_type() { return assertion_type_; } |
923 | 913 |
924 private: | 914 private: |
925 void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace); | 915 void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace); |
926 enum IfPrevious { kIsNonWord, kIsWord }; | 916 enum IfPrevious { kIsNonWord, kIsWord }; |
927 void BacktrackIfPrevious(RegExpCompiler* compiler, | 917 void BacktrackIfPrevious(RegExpCompiler* compiler, |
928 Trace* trace, | 918 Trace* trace, |
929 IfPrevious backtrack_if_previous); | 919 IfPrevious backtrack_if_previous); |
930 AssertionNode(AssertionType t, RegExpNode* on_success) | 920 AssertionNode(AssertionType t, RegExpNode* on_success) |
931 : SeqRegExpNode(on_success), assertion_type_(t) { } | 921 : SeqRegExpNode(on_success), assertion_type_(t) { } |
(...skipping 15 matching lines...) Expand all Loading... |
947 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 937 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
948 virtual int EatsAtLeast(int still_to_find, | 938 virtual int EatsAtLeast(int still_to_find, |
949 int recursion_depth, | 939 int recursion_depth, |
950 bool not_at_start); | 940 bool not_at_start); |
951 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 941 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
952 RegExpCompiler* compiler, | 942 RegExpCompiler* compiler, |
953 int characters_filled_in, | 943 int characters_filled_in, |
954 bool not_at_start) { | 944 bool not_at_start) { |
955 return; | 945 return; |
956 } | 946 } |
957 virtual void FillInBMInfo(int offset, | 947 virtual void FillInBMInfo(Isolate* isolate, int offset, int budget, |
958 int budget, | 948 BoyerMooreLookahead* bm, bool not_at_start); |
959 BoyerMooreLookahead* bm, | |
960 bool not_at_start); | |
961 | 949 |
962 private: | 950 private: |
963 int start_reg_; | 951 int start_reg_; |
964 int end_reg_; | 952 int end_reg_; |
965 }; | 953 }; |
966 | 954 |
967 | 955 |
968 class EndNode: public RegExpNode { | 956 class EndNode: public RegExpNode { |
969 public: | 957 public: |
970 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; | 958 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; |
971 explicit EndNode(Action action, Zone* zone) | 959 explicit EndNode(Action action, Zone* zone) |
972 : RegExpNode(zone), action_(action) { } | 960 : RegExpNode(zone), action_(action) { } |
973 virtual void Accept(NodeVisitor* visitor); | 961 virtual void Accept(NodeVisitor* visitor); |
974 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 962 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
975 virtual int EatsAtLeast(int still_to_find, | 963 virtual int EatsAtLeast(int still_to_find, |
976 int recursion_depth, | 964 int recursion_depth, |
977 bool not_at_start) { return 0; } | 965 bool not_at_start) { return 0; } |
978 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 966 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
979 RegExpCompiler* compiler, | 967 RegExpCompiler* compiler, |
980 int characters_filled_in, | 968 int characters_filled_in, |
981 bool not_at_start) { | 969 bool not_at_start) { |
982 // Returning 0 from EatsAtLeast should ensure we never get here. | 970 // Returning 0 from EatsAtLeast should ensure we never get here. |
983 UNREACHABLE(); | 971 UNREACHABLE(); |
984 } | 972 } |
985 virtual void FillInBMInfo(int offset, | 973 virtual void FillInBMInfo(Isolate* isolate, int offset, int budget, |
986 int budget, | 974 BoyerMooreLookahead* bm, bool not_at_start) { |
987 BoyerMooreLookahead* bm, | |
988 bool not_at_start) { | |
989 // Returning 0 from EatsAtLeast should ensure we never get here. | 975 // Returning 0 from EatsAtLeast should ensure we never get here. |
990 UNREACHABLE(); | 976 UNREACHABLE(); |
991 } | 977 } |
992 | 978 |
993 private: | 979 private: |
994 Action action_; | 980 Action action_; |
995 }; | 981 }; |
996 | 982 |
997 | 983 |
998 class NegativeSubmatchSuccess: public EndNode { | 984 class NegativeSubmatchSuccess: public EndNode { |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1070 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 1056 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
1071 virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start); | 1057 virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start); |
1072 int EatsAtLeastHelper(int still_to_find, | 1058 int EatsAtLeastHelper(int still_to_find, |
1073 int budget, | 1059 int budget, |
1074 RegExpNode* ignore_this_node, | 1060 RegExpNode* ignore_this_node, |
1075 bool not_at_start); | 1061 bool not_at_start); |
1076 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1062 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
1077 RegExpCompiler* compiler, | 1063 RegExpCompiler* compiler, |
1078 int characters_filled_in, | 1064 int characters_filled_in, |
1079 bool not_at_start); | 1065 bool not_at_start); |
1080 virtual void FillInBMInfo(int offset, | 1066 virtual void FillInBMInfo(Isolate* isolate, int offset, int budget, |
1081 int budget, | 1067 BoyerMooreLookahead* bm, bool not_at_start); |
1082 BoyerMooreLookahead* bm, | |
1083 bool not_at_start); | |
1084 | 1068 |
1085 bool being_calculated() { return being_calculated_; } | 1069 bool being_calculated() { return being_calculated_; } |
1086 bool not_at_start() { return not_at_start_; } | 1070 bool not_at_start() { return not_at_start_; } |
1087 void set_not_at_start() { not_at_start_ = true; } | 1071 void set_not_at_start() { not_at_start_ = true; } |
1088 void set_being_calculated(bool b) { being_calculated_ = b; } | 1072 void set_being_calculated(bool b) { being_calculated_ = b; } |
1089 virtual bool try_to_emit_quick_check_for_alternative(bool is_first) { | 1073 virtual bool try_to_emit_quick_check_for_alternative(bool is_first) { |
1090 return true; | 1074 return true; |
1091 } | 1075 } |
1092 virtual RegExpNode* FilterOneByte(int depth, bool ignore_case); | 1076 virtual RegExpNode* FilterOneByte(int depth, bool ignore_case); |
1093 | 1077 |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1139 Zone* zone) | 1123 Zone* zone) |
1140 : ChoiceNode(2, zone) { | 1124 : ChoiceNode(2, zone) { |
1141 AddAlternative(this_must_fail); | 1125 AddAlternative(this_must_fail); |
1142 AddAlternative(then_do_this); | 1126 AddAlternative(then_do_this); |
1143 } | 1127 } |
1144 virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start); | 1128 virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start); |
1145 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1129 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
1146 RegExpCompiler* compiler, | 1130 RegExpCompiler* compiler, |
1147 int characters_filled_in, | 1131 int characters_filled_in, |
1148 bool not_at_start); | 1132 bool not_at_start); |
1149 virtual void FillInBMInfo(int offset, | 1133 virtual void FillInBMInfo(Isolate* isolate, int offset, int budget, |
1150 int budget, | 1134 BoyerMooreLookahead* bm, bool not_at_start) { |
1151 BoyerMooreLookahead* bm, | 1135 alternatives_->at(1).node()->FillInBMInfo(isolate, offset, budget - 1, bm, |
1152 bool not_at_start) { | 1136 not_at_start); |
1153 alternatives_->at(1).node()->FillInBMInfo( | |
1154 offset, budget - 1, bm, not_at_start); | |
1155 if (offset == 0) set_bm_info(not_at_start, bm); | 1137 if (offset == 0) set_bm_info(not_at_start, bm); |
1156 } | 1138 } |
1157 // For a negative lookahead we don't emit the quick check for the | 1139 // For a negative lookahead we don't emit the quick check for the |
1158 // alternative that is expected to fail. This is because quick check code | 1140 // alternative that is expected to fail. This is because quick check code |
1159 // starts by loading enough characters for the alternative that takes fewest | 1141 // starts by loading enough characters for the alternative that takes fewest |
1160 // characters, but on a negative lookahead the negative branch did not take | 1142 // characters, but on a negative lookahead the negative branch did not take |
1161 // part in that calculation (EatsAtLeast) so the assumptions don't hold. | 1143 // part in that calculation (EatsAtLeast) so the assumptions don't hold. |
1162 virtual bool try_to_emit_quick_check_for_alternative(bool is_first) { | 1144 virtual bool try_to_emit_quick_check_for_alternative(bool is_first) { |
1163 return !is_first; | 1145 return !is_first; |
1164 } | 1146 } |
(...skipping 10 matching lines...) Expand all Loading... |
1175 body_can_be_zero_length_(body_can_be_zero_length) | 1157 body_can_be_zero_length_(body_can_be_zero_length) |
1176 { } | 1158 { } |
1177 void AddLoopAlternative(GuardedAlternative alt); | 1159 void AddLoopAlternative(GuardedAlternative alt); |
1178 void AddContinueAlternative(GuardedAlternative alt); | 1160 void AddContinueAlternative(GuardedAlternative alt); |
1179 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 1161 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
1180 virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start); | 1162 virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start); |
1181 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1163 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
1182 RegExpCompiler* compiler, | 1164 RegExpCompiler* compiler, |
1183 int characters_filled_in, | 1165 int characters_filled_in, |
1184 bool not_at_start); | 1166 bool not_at_start); |
1185 virtual void FillInBMInfo(int offset, | 1167 virtual void FillInBMInfo(Isolate* isolate, int offset, int budget, |
1186 int budget, | 1168 BoyerMooreLookahead* bm, bool not_at_start); |
1187 BoyerMooreLookahead* bm, | |
1188 bool not_at_start); | |
1189 RegExpNode* loop_node() { return loop_node_; } | 1169 RegExpNode* loop_node() { return loop_node_; } |
1190 RegExpNode* continue_node() { return continue_node_; } | 1170 RegExpNode* continue_node() { return continue_node_; } |
1191 bool body_can_be_zero_length() { return body_can_be_zero_length_; } | 1171 bool body_can_be_zero_length() { return body_can_be_zero_length_; } |
1192 virtual void Accept(NodeVisitor* visitor); | 1172 virtual void Accept(NodeVisitor* visitor); |
1193 virtual RegExpNode* FilterOneByte(int depth, bool ignore_case); | 1173 virtual RegExpNode* FilterOneByte(int depth, bool ignore_case); |
1194 | 1174 |
1195 private: | 1175 private: |
1196 // AddAlternative is made private for loop nodes because alternatives | 1176 // AddAlternative is made private for loop nodes because alternatives |
1197 // should not be added freely, we need to keep track of which node | 1177 // should not be added freely, we need to keep track of which node |
1198 // goes back to the node itself. | 1178 // goes back to the node itself. |
(...skipping 476 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1675 | 1655 |
1676 static bool TooMuchRegExpCode(Handle<String> pattern); | 1656 static bool TooMuchRegExpCode(Handle<String> pattern); |
1677 | 1657 |
1678 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); | 1658 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); |
1679 }; | 1659 }; |
1680 | 1660 |
1681 | 1661 |
1682 } } // namespace v8::internal | 1662 } } // namespace v8::internal |
1683 | 1663 |
1684 #endif // V8_JSREGEXP_H_ | 1664 #endif // V8_JSREGEXP_H_ |
OLD | NEW |