OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 172 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
183 // is not tracked, however. As a conservative approximation we track the | 183 // is not tracked, however. As a conservative approximation we track the |
184 // total regexp code compiled including code that has subsequently been freed | 184 // total regexp code compiled including code that has subsequently been freed |
185 // and the total executable memory at any point. | 185 // and the total executable memory at any point. |
186 static const int kRegExpExecutableMemoryLimit = 16 * MB; | 186 static const int kRegExpExecutableMemoryLimit = 16 * MB; |
187 static const int kRegWxpCompiledLimit = 1 * MB; | 187 static const int kRegWxpCompiledLimit = 1 * MB; |
188 | 188 |
189 private: | 189 private: |
190 static String* last_ascii_string_; | 190 static String* last_ascii_string_; |
191 static String* two_byte_cached_string_; | 191 static String* two_byte_cached_string_; |
192 | 192 |
193 static bool CompileIrregexp(Handle<JSRegExp> re, bool is_ascii); | 193 static bool CompileIrregexp( |
194 static inline bool EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii); | 194 Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii); |
| 195 static inline bool EnsureCompiledIrregexp( |
| 196 Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii); |
195 | 197 |
196 | 198 |
197 // Set the subject cache. The previous string buffer is not deleted, so the | 199 // Set the subject cache. The previous string buffer is not deleted, so the |
198 // caller should ensure that it doesn't leak. | 200 // caller should ensure that it doesn't leak. |
199 static void SetSubjectCache(String* subject, | 201 static void SetSubjectCache(String* subject, |
200 char* utf8_subject, | 202 char* utf8_subject, |
201 int uft8_length, | 203 int uft8_length, |
202 int character_position, | 204 int character_position, |
203 int utf8_position); | 205 int utf8_position); |
204 | 206 |
(...skipping 207 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
412 private: | 414 private: |
413 // There can't be a static empty set since it allocates its | 415 // There can't be a static empty set since it allocates its |
414 // successors in a zone and caches them. | 416 // successors in a zone and caches them. |
415 OutSet* empty() { return &empty_; } | 417 OutSet* empty() { return &empty_; } |
416 OutSet empty_; | 418 OutSet empty_; |
417 ZoneSplayTree<Config>* tree() { return &tree_; } | 419 ZoneSplayTree<Config>* tree() { return &tree_; } |
418 ZoneSplayTree<Config> tree_; | 420 ZoneSplayTree<Config> tree_; |
419 }; | 421 }; |
420 | 422 |
421 | 423 |
| 424 // Improve the speed that we scan for an initial point where a non-anchored |
| 425 // regexp can match by using a Boyer-Moore-like table. This is done by |
| 426 // identifying non-greedy non-capturing loops in the nodes that eat any |
| 427 // character one at a time. For example in the middle of the regexp |
| 428 // /foo[\s\S]*?bar/ we find such a loop. There is also such a loop implicitly |
| 429 // inserted at the start of any non-anchored regexp. |
| 430 // |
| 431 // When we have found such a loop we look ahead in the nodes to find the set of |
| 432 // characters that can come at given distances. For example for the regexp |
| 433 // /.?foo/ we know that there are at least 3 characters ahead of us, and the |
| 434 // sets of characters that can occur are [any, [f, o], [o]]. We find a range in |
| 435 // the lookahead info where the set of characters is reasonably constrained. In |
| 436 // our example this is from index 1 to 2 (0 is not constrained). We can now |
| 437 // look 3 characters ahead and if we don't find one of [f, o] (the union of |
| 438 // [f, o] and [o]) then we can skip forwards by the range size (in this case 2). |
| 439 // |
| 440 // For Unicode input strings we do the same, but modulo 128. |
| 441 // |
| 442 // We also look at the first string fed to the regexp and use that to get a hint |
| 443 // of the character frequencies in the inputs. This affects the assessment of |
| 444 // whether the set of characters is 'reasonably constrained'. |
| 445 // |
| 446 // We also have another lookahead mechanism (called quick check in the code), |
| 447 // which uses a wide load of multiple characters followed by a mask and compare |
| 448 // to determine whether a match is possible at this point. |
| 449 class BoyerMooreLookahead { |
| 450 public: |
| 451 BoyerMooreLookahead(int length, int map_length, RegExpCompiler* compiler); |
| 452 |
| 453 int length() { return length_; } |
| 454 int max_char() { return max_char_; } |
| 455 RegExpCompiler* compiler() { return compiler_; } |
| 456 |
| 457 static const int kTooManyCharacters = 32; |
| 458 |
| 459 int Count(int map_number) { |
| 460 ZoneList<bool>* map = bitmaps_->at(map_number); |
| 461 if (map == NULL) return map_length_; |
| 462 int count = 0; |
| 463 for (int i = 0; i < map_length_; i++) { |
| 464 if (map->at(i)) count++; |
| 465 } |
| 466 return count; |
| 467 } |
| 468 |
| 469 void Set(int map_number, int character) { |
| 470 if (character > max_char_) return; |
| 471 ZoneList<bool>* map = bitmaps_->at(map_number); |
| 472 if (map == NULL) return; |
| 473 map->at(character & (map_length_ - 1)) = true; |
| 474 } |
| 475 |
| 476 void SetAll(int map_number) { |
| 477 bitmaps_->at(map_number) = NULL; |
| 478 } |
| 479 |
| 480 void SetRest(int from_map) { |
| 481 for (int i = from_map; i < length_; i++) SetAll(i); |
| 482 } |
| 483 bool EmitSkipInstructions(RegExpMacroAssembler* masm); |
| 484 |
| 485 private: |
| 486 // This is the value obtained by EatsAtLeast. If we do not have at least this |
| 487 // many characters left in the sample string then the match is bound to fail. |
| 488 // Therefore it is OK to read a character this far ahead of the current match |
| 489 // point. |
| 490 int length_; |
| 491 // We conservatively consider all character values modulo this length. For |
| 492 // ASCII there is no loss of precision, since this has a value of 128. |
| 493 int map_length_; |
| 494 RegExpCompiler* compiler_; |
| 495 // 0x7f for ASCII, 0xffff for UTF-16. |
| 496 int max_char_; |
| 497 ZoneList<ZoneList<bool>*>* bitmaps_; |
| 498 |
| 499 int GetSkipTable(int min_lookahead, |
| 500 int max_lookahead, |
| 501 Handle<ByteArray> boolean_skip_table); |
| 502 bool FindWorthwhileInterval(int* from, int* to); |
| 503 int FindBestInterval( |
| 504 int max_number_of_chars, int old_biggest_points, int* from, int* to); |
| 505 }; |
| 506 |
| 507 |
422 #define FOR_EACH_NODE_TYPE(VISIT) \ | 508 #define FOR_EACH_NODE_TYPE(VISIT) \ |
423 VISIT(End) \ | 509 VISIT(End) \ |
424 VISIT(Action) \ | 510 VISIT(Action) \ |
425 VISIT(Choice) \ | 511 VISIT(Choice) \ |
426 VISIT(BackReference) \ | 512 VISIT(BackReference) \ |
427 VISIT(Assertion) \ | 513 VISIT(Assertion) \ |
428 VISIT(Text) | 514 VISIT(Text) |
429 | 515 |
430 | 516 |
431 #define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \ | 517 #define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \ |
(...skipping 196 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
628 // For a given number of characters this returns a mask and a value. The | 714 // For a given number of characters this returns a mask and a value. The |
629 // next n characters are anded with the mask and compared with the value. | 715 // next n characters are anded with the mask and compared with the value. |
630 // A comparison failure indicates the node cannot match the next n characters. | 716 // A comparison failure indicates the node cannot match the next n characters. |
631 // A comparison success indicates the node may match. | 717 // A comparison success indicates the node may match. |
632 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 718 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
633 RegExpCompiler* compiler, | 719 RegExpCompiler* compiler, |
634 int characters_filled_in, | 720 int characters_filled_in, |
635 bool not_at_start) = 0; | 721 bool not_at_start) = 0; |
636 static const int kNodeIsTooComplexForGreedyLoops = -1; | 722 static const int kNodeIsTooComplexForGreedyLoops = -1; |
637 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } | 723 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } |
| 724 // Only returns the successor for a text node of length 1 that matches any |
| 725 // character and that has no guards on it. |
| 726 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( |
| 727 RegExpCompiler* compiler) { |
| 728 return NULL; |
| 729 } |
| 730 |
| 731 // Collects information on the possible code units (mod 128) that can match if |
| 732 // we look forward. This is used for a Boyer-Moore-like string searching |
| 733 // implementation. TODO(erikcorry): This should share more code with |
| 734 // EatsAtLeast, GetQuickCheckDetails and ComputeFirstCharacterSet. |
| 735 virtual void FillInBMInfo( |
| 736 int offset, BoyerMooreLookahead* bm, bool not_at_start) { |
| 737 UNREACHABLE(); |
| 738 } |
| 739 |
638 Label* label() { return &label_; } | 740 Label* label() { return &label_; } |
639 // If non-generic code is generated for a node (i.e. the node is not at the | 741 // If non-generic code is generated for a node (i.e. the node is not at the |
640 // start of the trace) then it cannot be reused. This variable sets a limit | 742 // start of the trace) then it cannot be reused. This variable sets a limit |
641 // on how often we allow that to happen before we insist on starting a new | 743 // on how often we allow that to happen before we insist on starting a new |
642 // trace and generating generic code for a node that can be reused by flushing | 744 // trace and generating generic code for a node that can be reused by flushing |
643 // the deferred actions in the current trace and generating a goto. | 745 // the deferred actions in the current trace and generating a goto. |
644 static const int kMaxCopiesCodeGenerated = 10; | 746 static const int kMaxCopiesCodeGenerated = 10; |
645 | 747 |
646 NodeInfo* info() { return &info_; } | 748 NodeInfo* info() { return &info_; } |
647 | 749 |
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
740 int to_; | 842 int to_; |
741 }; | 843 }; |
742 | 844 |
743 | 845 |
744 class SeqRegExpNode: public RegExpNode { | 846 class SeqRegExpNode: public RegExpNode { |
745 public: | 847 public: |
746 explicit SeqRegExpNode(RegExpNode* on_success) | 848 explicit SeqRegExpNode(RegExpNode* on_success) |
747 : on_success_(on_success) { } | 849 : on_success_(on_success) { } |
748 RegExpNode* on_success() { return on_success_; } | 850 RegExpNode* on_success() { return on_success_; } |
749 void set_on_success(RegExpNode* node) { on_success_ = node; } | 851 void set_on_success(RegExpNode* node) { on_success_ = node; } |
| 852 virtual void FillInBMInfo( |
| 853 int offset, BoyerMooreLookahead* bm, bool not_at_start) { |
| 854 on_success_->FillInBMInfo(offset, bm, not_at_start); |
| 855 } |
750 private: | 856 private: |
751 RegExpNode* on_success_; | 857 RegExpNode* on_success_; |
752 }; | 858 }; |
753 | 859 |
754 | 860 |
755 class ActionNode: public SeqRegExpNode { | 861 class ActionNode: public SeqRegExpNode { |
756 public: | 862 public: |
757 enum Type { | 863 enum Type { |
758 SET_REGISTER, | 864 SET_REGISTER, |
759 INCREMENT_REGISTER, | 865 INCREMENT_REGISTER, |
(...skipping 26 matching lines...) Expand all Loading... |
786 virtual int EatsAtLeast(int still_to_find, | 892 virtual int EatsAtLeast(int still_to_find, |
787 int recursion_depth, | 893 int recursion_depth, |
788 bool not_at_start); | 894 bool not_at_start); |
789 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 895 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
790 RegExpCompiler* compiler, | 896 RegExpCompiler* compiler, |
791 int filled_in, | 897 int filled_in, |
792 bool not_at_start) { | 898 bool not_at_start) { |
793 return on_success()->GetQuickCheckDetails( | 899 return on_success()->GetQuickCheckDetails( |
794 details, compiler, filled_in, not_at_start); | 900 details, compiler, filled_in, not_at_start); |
795 } | 901 } |
| 902 virtual void FillInBMInfo( |
| 903 int offset, BoyerMooreLookahead* bm, bool not_at_start); |
796 Type type() { return type_; } | 904 Type type() { return type_; } |
797 // TODO(erikcorry): We should allow some action nodes in greedy loops. | 905 // TODO(erikcorry): We should allow some action nodes in greedy loops. |
798 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } | 906 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } |
799 virtual ActionNode* Clone() { return new ActionNode(*this); } | 907 virtual ActionNode* Clone() { return new ActionNode(*this); } |
800 virtual int ComputeFirstCharacterSet(int budget); | 908 virtual int ComputeFirstCharacterSet(int budget); |
801 | 909 |
802 private: | 910 private: |
803 union { | 911 union { |
804 struct { | 912 struct { |
805 int reg; | 913 int reg; |
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
853 virtual int EatsAtLeast(int still_to_find, | 961 virtual int EatsAtLeast(int still_to_find, |
854 int recursion_depth, | 962 int recursion_depth, |
855 bool not_at_start); | 963 bool not_at_start); |
856 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 964 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
857 RegExpCompiler* compiler, | 965 RegExpCompiler* compiler, |
858 int characters_filled_in, | 966 int characters_filled_in, |
859 bool not_at_start); | 967 bool not_at_start); |
860 ZoneList<TextElement>* elements() { return elms_; } | 968 ZoneList<TextElement>* elements() { return elms_; } |
861 void MakeCaseIndependent(bool is_ascii); | 969 void MakeCaseIndependent(bool is_ascii); |
862 virtual int GreedyLoopTextLength(); | 970 virtual int GreedyLoopTextLength(); |
| 971 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( |
| 972 RegExpCompiler* compiler); |
| 973 virtual void FillInBMInfo( |
| 974 int offset, BoyerMooreLookahead* bm, bool not_at_start); |
863 virtual TextNode* Clone() { | 975 virtual TextNode* Clone() { |
864 TextNode* result = new TextNode(*this); | 976 TextNode* result = new TextNode(*this); |
865 result->CalculateOffsets(); | 977 result->CalculateOffsets(); |
866 return result; | 978 return result; |
867 } | 979 } |
868 void CalculateOffsets(); | 980 void CalculateOffsets(); |
869 virtual int ComputeFirstCharacterSet(int budget); | 981 virtual int ComputeFirstCharacterSet(int budget); |
870 | 982 |
871 private: | 983 private: |
872 enum TextEmitPassType { | 984 enum TextEmitPassType { |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
921 } | 1033 } |
922 virtual void Accept(NodeVisitor* visitor); | 1034 virtual void Accept(NodeVisitor* visitor); |
923 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 1035 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
924 virtual int EatsAtLeast(int still_to_find, | 1036 virtual int EatsAtLeast(int still_to_find, |
925 int recursion_depth, | 1037 int recursion_depth, |
926 bool not_at_start); | 1038 bool not_at_start); |
927 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1039 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
928 RegExpCompiler* compiler, | 1040 RegExpCompiler* compiler, |
929 int filled_in, | 1041 int filled_in, |
930 bool not_at_start); | 1042 bool not_at_start); |
| 1043 virtual void FillInBMInfo( |
| 1044 int offset, BoyerMooreLookahead* bm, bool not_at_start); |
931 virtual int ComputeFirstCharacterSet(int budget); | 1045 virtual int ComputeFirstCharacterSet(int budget); |
932 virtual AssertionNode* Clone() { return new AssertionNode(*this); } | 1046 virtual AssertionNode* Clone() { return new AssertionNode(*this); } |
933 AssertionNodeType type() { return type_; } | 1047 AssertionNodeType type() { return type_; } |
934 void set_type(AssertionNodeType type) { type_ = type; } | 1048 void set_type(AssertionNodeType type) { type_ = type; } |
935 | 1049 |
936 private: | 1050 private: |
937 AssertionNode(AssertionNodeType t, RegExpNode* on_success) | 1051 AssertionNode(AssertionNodeType t, RegExpNode* on_success) |
938 : SeqRegExpNode(on_success), type_(t) { } | 1052 : SeqRegExpNode(on_success), type_(t) { } |
939 AssertionNodeType type_; | 1053 AssertionNodeType type_; |
940 }; | 1054 }; |
(...skipping 13 matching lines...) Expand all Loading... |
954 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 1068 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
955 virtual int EatsAtLeast(int still_to_find, | 1069 virtual int EatsAtLeast(int still_to_find, |
956 int recursion_depth, | 1070 int recursion_depth, |
957 bool not_at_start); | 1071 bool not_at_start); |
958 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1072 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
959 RegExpCompiler* compiler, | 1073 RegExpCompiler* compiler, |
960 int characters_filled_in, | 1074 int characters_filled_in, |
961 bool not_at_start) { | 1075 bool not_at_start) { |
962 return; | 1076 return; |
963 } | 1077 } |
| 1078 virtual void FillInBMInfo( |
| 1079 int offset, BoyerMooreLookahead* bm, bool not_at_start) { |
| 1080 // Working out the set of characters that a backreference can match is too |
| 1081 // hard, so we just say that any character can match. |
| 1082 bm->SetRest(offset); |
| 1083 } |
964 virtual BackReferenceNode* Clone() { return new BackReferenceNode(*this); } | 1084 virtual BackReferenceNode* Clone() { return new BackReferenceNode(*this); } |
965 virtual int ComputeFirstCharacterSet(int budget); | 1085 virtual int ComputeFirstCharacterSet(int budget); |
966 | 1086 |
967 private: | 1087 private: |
968 int start_reg_; | 1088 int start_reg_; |
969 int end_reg_; | 1089 int end_reg_; |
970 }; | 1090 }; |
971 | 1091 |
972 | 1092 |
973 class EndNode: public RegExpNode { | 1093 class EndNode: public RegExpNode { |
974 public: | 1094 public: |
975 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; | 1095 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; |
976 explicit EndNode(Action action) : action_(action) { } | 1096 explicit EndNode(Action action) : action_(action) { } |
977 virtual void Accept(NodeVisitor* visitor); | 1097 virtual void Accept(NodeVisitor* visitor); |
978 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 1098 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
979 virtual int EatsAtLeast(int still_to_find, | 1099 virtual int EatsAtLeast(int still_to_find, |
980 int recursion_depth, | 1100 int recursion_depth, |
981 bool not_at_start) { return 0; } | 1101 bool not_at_start) { return 0; } |
982 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1102 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
983 RegExpCompiler* compiler, | 1103 RegExpCompiler* compiler, |
984 int characters_filled_in, | 1104 int characters_filled_in, |
985 bool not_at_start) { | 1105 bool not_at_start) { |
986 // Returning 0 from EatsAtLeast should ensure we never get here. | 1106 // Returning 0 from EatsAtLeast should ensure we never get here. |
987 UNREACHABLE(); | 1107 UNREACHABLE(); |
988 } | 1108 } |
| 1109 virtual void FillInBMInfo( |
| 1110 int offset, BoyerMooreLookahead* bm, bool not_at_start) { |
| 1111 // Returning 0 from EatsAtLeast should ensure we never get here. |
| 1112 UNREACHABLE(); |
| 1113 } |
989 virtual EndNode* Clone() { return new EndNode(*this); } | 1114 virtual EndNode* Clone() { return new EndNode(*this); } |
990 private: | 1115 private: |
991 Action action_; | 1116 Action action_; |
992 }; | 1117 }; |
993 | 1118 |
994 | 1119 |
995 class NegativeSubmatchSuccess: public EndNode { | 1120 class NegativeSubmatchSuccess: public EndNode { |
996 public: | 1121 public: |
997 NegativeSubmatchSuccess(int stack_pointer_reg, | 1122 NegativeSubmatchSuccess(int stack_pointer_reg, |
998 int position_reg, | 1123 int position_reg, |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1064 int recursion_depth, | 1189 int recursion_depth, |
1065 bool not_at_start); | 1190 bool not_at_start); |
1066 int EatsAtLeastHelper(int still_to_find, | 1191 int EatsAtLeastHelper(int still_to_find, |
1067 int recursion_depth, | 1192 int recursion_depth, |
1068 RegExpNode* ignore_this_node, | 1193 RegExpNode* ignore_this_node, |
1069 bool not_at_start); | 1194 bool not_at_start); |
1070 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1195 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
1071 RegExpCompiler* compiler, | 1196 RegExpCompiler* compiler, |
1072 int characters_filled_in, | 1197 int characters_filled_in, |
1073 bool not_at_start); | 1198 bool not_at_start); |
| 1199 virtual void FillInBMInfo( |
| 1200 int offset, BoyerMooreLookahead* bm, bool not_at_start); |
1074 virtual ChoiceNode* Clone() { return new ChoiceNode(*this); } | 1201 virtual ChoiceNode* Clone() { return new ChoiceNode(*this); } |
1075 | 1202 |
1076 bool being_calculated() { return being_calculated_; } | 1203 bool being_calculated() { return being_calculated_; } |
1077 bool not_at_start() { return not_at_start_; } | 1204 bool not_at_start() { return not_at_start_; } |
1078 void set_not_at_start() { not_at_start_ = true; } | 1205 void set_not_at_start() { not_at_start_ = true; } |
1079 void set_being_calculated(bool b) { being_calculated_ = b; } | 1206 void set_being_calculated(bool b) { being_calculated_ = b; } |
1080 virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; } | 1207 virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; } |
1081 | 1208 |
1082 protected: | 1209 protected: |
1083 int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative); | 1210 int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative); |
1084 ZoneList<GuardedAlternative>* alternatives_; | 1211 ZoneList<GuardedAlternative>* alternatives_; |
1085 | 1212 |
1086 private: | 1213 private: |
1087 friend class DispatchTableConstructor; | 1214 friend class DispatchTableConstructor; |
1088 friend class Analysis; | 1215 friend class Analysis; |
1089 void GenerateGuard(RegExpMacroAssembler* macro_assembler, | 1216 void GenerateGuard(RegExpMacroAssembler* macro_assembler, |
1090 Guard* guard, | 1217 Guard* guard, |
1091 Trace* trace); | 1218 Trace* trace); |
1092 int CalculatePreloadCharacters(RegExpCompiler* compiler, bool not_at_start); | 1219 int CalculatePreloadCharacters(RegExpCompiler* compiler, int eats_at_least); |
1093 void EmitOutOfLineContinuation(RegExpCompiler* compiler, | 1220 void EmitOutOfLineContinuation(RegExpCompiler* compiler, |
1094 Trace* trace, | 1221 Trace* trace, |
1095 GuardedAlternative alternative, | 1222 GuardedAlternative alternative, |
1096 AlternativeGeneration* alt_gen, | 1223 AlternativeGeneration* alt_gen, |
1097 int preload_characters, | 1224 int preload_characters, |
1098 bool next_expects_preload); | 1225 bool next_expects_preload); |
1099 DispatchTable* table_; | 1226 DispatchTable* table_; |
1100 // If true, this node is never checked at the start of the input. | 1227 // If true, this node is never checked at the start of the input. |
1101 // Allows a new trace to start with at_start() set to false. | 1228 // Allows a new trace to start with at_start() set to false. |
1102 bool not_at_start_; | 1229 bool not_at_start_; |
1103 bool being_calculated_; | 1230 bool being_calculated_; |
1104 }; | 1231 }; |
1105 | 1232 |
1106 | 1233 |
1107 class NegativeLookaheadChoiceNode: public ChoiceNode { | 1234 class NegativeLookaheadChoiceNode: public ChoiceNode { |
1108 public: | 1235 public: |
1109 explicit NegativeLookaheadChoiceNode(GuardedAlternative this_must_fail, | 1236 explicit NegativeLookaheadChoiceNode(GuardedAlternative this_must_fail, |
1110 GuardedAlternative then_do_this) | 1237 GuardedAlternative then_do_this) |
1111 : ChoiceNode(2) { | 1238 : ChoiceNode(2) { |
1112 AddAlternative(this_must_fail); | 1239 AddAlternative(this_must_fail); |
1113 AddAlternative(then_do_this); | 1240 AddAlternative(then_do_this); |
1114 } | 1241 } |
1115 virtual int EatsAtLeast(int still_to_find, | 1242 virtual int EatsAtLeast(int still_to_find, |
1116 int recursion_depth, | 1243 int recursion_depth, |
1117 bool not_at_start); | 1244 bool not_at_start); |
1118 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1245 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
1119 RegExpCompiler* compiler, | 1246 RegExpCompiler* compiler, |
1120 int characters_filled_in, | 1247 int characters_filled_in, |
1121 bool not_at_start); | 1248 bool not_at_start); |
| 1249 virtual void FillInBMInfo( |
| 1250 int offset, BoyerMooreLookahead* bm, bool not_at_start) { |
| 1251 alternatives_->at(1).node()->FillInBMInfo(offset, bm, not_at_start); |
| 1252 } |
1122 // For a negative lookahead we don't emit the quick check for the | 1253 // For a negative lookahead we don't emit the quick check for the |
1123 // alternative that is expected to fail. This is because quick check code | 1254 // alternative that is expected to fail. This is because quick check code |
1124 // starts by loading enough characters for the alternative that takes fewest | 1255 // starts by loading enough characters for the alternative that takes fewest |
1125 // characters, but on a negative lookahead the negative branch did not take | 1256 // characters, but on a negative lookahead the negative branch did not take |
1126 // part in that calculation (EatsAtLeast) so the assumptions don't hold. | 1257 // part in that calculation (EatsAtLeast) so the assumptions don't hold. |
1127 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; } | 1258 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; } |
1128 virtual int ComputeFirstCharacterSet(int budget); | 1259 virtual int ComputeFirstCharacterSet(int budget); |
1129 }; | 1260 }; |
1130 | 1261 |
1131 | 1262 |
1132 class LoopChoiceNode: public ChoiceNode { | 1263 class LoopChoiceNode: public ChoiceNode { |
1133 public: | 1264 public: |
1134 explicit LoopChoiceNode(bool body_can_be_zero_length) | 1265 explicit LoopChoiceNode(bool body_can_be_zero_length) |
1135 : ChoiceNode(2), | 1266 : ChoiceNode(2), |
1136 loop_node_(NULL), | 1267 loop_node_(NULL), |
1137 continue_node_(NULL), | 1268 continue_node_(NULL), |
1138 body_can_be_zero_length_(body_can_be_zero_length) { } | 1269 body_can_be_zero_length_(body_can_be_zero_length) { } |
1139 void AddLoopAlternative(GuardedAlternative alt); | 1270 void AddLoopAlternative(GuardedAlternative alt); |
1140 void AddContinueAlternative(GuardedAlternative alt); | 1271 void AddContinueAlternative(GuardedAlternative alt); |
1141 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 1272 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
1142 virtual int EatsAtLeast(int still_to_find, | 1273 virtual int EatsAtLeast(int still_to_find, |
1143 int recursion_depth, | 1274 int recursion_depth, |
1144 bool not_at_start); | 1275 bool not_at_start); |
1145 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1276 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
1146 RegExpCompiler* compiler, | 1277 RegExpCompiler* compiler, |
1147 int characters_filled_in, | 1278 int characters_filled_in, |
1148 bool not_at_start); | 1279 bool not_at_start); |
| 1280 virtual void FillInBMInfo( |
| 1281 int offset, BoyerMooreLookahead* bm, bool not_at_start); |
1149 virtual int ComputeFirstCharacterSet(int budget); | 1282 virtual int ComputeFirstCharacterSet(int budget); |
1150 virtual LoopChoiceNode* Clone() { return new LoopChoiceNode(*this); } | 1283 virtual LoopChoiceNode* Clone() { return new LoopChoiceNode(*this); } |
1151 RegExpNode* loop_node() { return loop_node_; } | 1284 RegExpNode* loop_node() { return loop_node_; } |
1152 RegExpNode* continue_node() { return continue_node_; } | 1285 RegExpNode* continue_node() { return continue_node_; } |
1153 bool body_can_be_zero_length() { return body_can_be_zero_length_; } | 1286 bool body_can_be_zero_length() { return body_can_be_zero_length_; } |
1154 virtual void Accept(NodeVisitor* visitor); | 1287 virtual void Accept(NodeVisitor* visitor); |
1155 | 1288 |
1156 private: | 1289 private: |
1157 // AddAlternative is made private for loop nodes because alternatives | 1290 // AddAlternative is made private for loop nodes because alternatives |
1158 // should not be added freely, we need to keep track of which node | 1291 // should not be added freely, we need to keep track of which node |
(...skipping 292 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1451 num_registers(registers) {} | 1584 num_registers(registers) {} |
1452 const char* error_message; | 1585 const char* error_message; |
1453 Object* code; | 1586 Object* code; |
1454 int num_registers; | 1587 int num_registers; |
1455 }; | 1588 }; |
1456 | 1589 |
1457 static CompilationResult Compile(RegExpCompileData* input, | 1590 static CompilationResult Compile(RegExpCompileData* input, |
1458 bool ignore_case, | 1591 bool ignore_case, |
1459 bool multiline, | 1592 bool multiline, |
1460 Handle<String> pattern, | 1593 Handle<String> pattern, |
| 1594 Handle<String> sample_subject, |
1461 bool is_ascii); | 1595 bool is_ascii); |
1462 | 1596 |
1463 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); | 1597 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); |
1464 }; | 1598 }; |
1465 | 1599 |
1466 | 1600 |
1467 class OffsetsVector { | 1601 class OffsetsVector { |
1468 public: | 1602 public: |
1469 inline OffsetsVector(int num_registers, Isolate* isolate) | 1603 inline OffsetsVector(int num_registers, Isolate* isolate) |
1470 : offsets_vector_length_(num_registers) { | 1604 : offsets_vector_length_(num_registers) { |
(...skipping 22 matching lines...) Expand all Loading... |
1493 int* vector_; | 1627 int* vector_; |
1494 int offsets_vector_length_; | 1628 int offsets_vector_length_; |
1495 | 1629 |
1496 friend class ExternalReference; | 1630 friend class ExternalReference; |
1497 }; | 1631 }; |
1498 | 1632 |
1499 | 1633 |
1500 } } // namespace v8::internal | 1634 } } // namespace v8::internal |
1501 | 1635 |
1502 #endif // V8_JSREGEXP_H_ | 1636 #endif // V8_JSREGEXP_H_ |
OLD | NEW |