| OLD | NEW |
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 172 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 183 // is not tracked, however. As a conservative approximation we track the | 183 // is not tracked, however. As a conservative approximation we track the |
| 184 // total regexp code compiled including code that has subsequently been freed | 184 // total regexp code compiled including code that has subsequently been freed |
| 185 // and the total executable memory at any point. | 185 // and the total executable memory at any point. |
| 186 static const int kRegExpExecutableMemoryLimit = 16 * MB; | 186 static const int kRegExpExecutableMemoryLimit = 16 * MB; |
| 187 static const int kRegWxpCompiledLimit = 1 * MB; | 187 static const int kRegWxpCompiledLimit = 1 * MB; |
| 188 | 188 |
| 189 private: | 189 private: |
| 190 static String* last_ascii_string_; | 190 static String* last_ascii_string_; |
| 191 static String* two_byte_cached_string_; | 191 static String* two_byte_cached_string_; |
| 192 | 192 |
| 193 static bool CompileIrregexp(Handle<JSRegExp> re, bool is_ascii); | 193 static bool CompileIrregexp( |
| 194 static inline bool EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii); | 194 Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii); |
| 195 static inline bool EnsureCompiledIrregexp( |
| 196 Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii); |
| 195 | 197 |
| 196 | 198 |
| 197 // Set the subject cache. The previous string buffer is not deleted, so the | 199 // Set the subject cache. The previous string buffer is not deleted, so the |
| 198 // caller should ensure that it doesn't leak. | 200 // caller should ensure that it doesn't leak. |
| 199 static void SetSubjectCache(String* subject, | 201 static void SetSubjectCache(String* subject, |
| 200 char* utf8_subject, | 202 char* utf8_subject, |
| 201 int uft8_length, | 203 int uft8_length, |
| 202 int character_position, | 204 int character_position, |
| 203 int utf8_position); | 205 int utf8_position); |
| 204 | 206 |
| (...skipping 207 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 412 private: | 414 private: |
| 413 // There can't be a static empty set since it allocates its | 415 // There can't be a static empty set since it allocates its |
| 414 // successors in a zone and caches them. | 416 // successors in a zone and caches them. |
| 415 OutSet* empty() { return &empty_; } | 417 OutSet* empty() { return &empty_; } |
| 416 OutSet empty_; | 418 OutSet empty_; |
| 417 ZoneSplayTree<Config>* tree() { return &tree_; } | 419 ZoneSplayTree<Config>* tree() { return &tree_; } |
| 418 ZoneSplayTree<Config> tree_; | 420 ZoneSplayTree<Config> tree_; |
| 419 }; | 421 }; |
| 420 | 422 |
| 421 | 423 |
| 424 // Improve the speed that we scan for an initial point where a non-anchored |
| 425 // regexp can match by using a Boyer-Moore-like table. This is done by |
| 426 // identifying non-greedy non-capturing loops in the nodes that eat any |
| 427 // character one at a time. For example in the middle of the regexp |
| 428 // /foo[\s\S]*?bar/ we find such a loop. There is also such a loop implicitly |
| 429 // inserted at the start of any non-anchored regexp. |
| 430 // |
| 431 // When we have found such a loop we look ahead in the nodes to find the set of |
| 432 // characters that can come at given distances. For example for the regexp |
| 433 // /.?foo/ we know that there are at least 3 characters ahead of us, and the |
| 434 // sets of characters that can occur are [any, [f, o], [o]]. We find a range in |
| 435 // the lookahead info where the set of characters is reasonably constrained. In |
| 436 // our example this is from index 1 to 2 (0 is not constrained). We can now |
| 437 // look 3 characters ahead and if we don't find one of [f, o] (the union of |
| 438 // [f, o] and [o]) then we can skip forwards by the range size (in this case 2). |
| 439 // |
| 440 // For Unicode input strings we do the same, but modulo 128. |
| 441 // |
| 442 // We also look at the first string fed to the regexp and use that to get a hint |
| 443 // of the character frequencies in the inputs. This affects the assessment of |
| 444 // whether the set of characters is 'reasonably constrained'. |
| 445 // |
| 446 // We also have another lookahead mechanism (called quick check in the code), |
| 447 // which uses a wide load of multiple characters followed by a mask and compare |
| 448 // to determine whether a match is possible at this point. |
| 449 class BoyerMooreLookahead { |
| 450 public: |
| 451 BoyerMooreLookahead(int length, int map_length, RegExpCompiler* compiler); |
| 452 |
| 453 int length() { return length_; } |
| 454 int max_char() { return max_char_; } |
| 455 RegExpCompiler* compiler() { return compiler_; } |
| 456 |
| 457 static const int kTooManyCharacters = 32; |
| 458 |
| 459 int Count(int map_number) { |
| 460 ZoneList<bool>* map = bitmaps_->at(map_number); |
| 461 if (map == NULL) return map_length_; |
| 462 int count = 0; |
| 463 for (int i = 0; i < map_length_; i++) { |
| 464 if (map->at(i)) count++; |
| 465 } |
| 466 return count; |
| 467 } |
| 468 |
| 469 void Set(int map_number, int character) { |
| 470 if (character > max_char_) return; |
| 471 ZoneList<bool>* map = bitmaps_->at(map_number); |
| 472 if (map == NULL) return; |
| 473 map->at(character & (map_length_ - 1)) = true; |
| 474 } |
| 475 |
| 476 void SetAll(int map_number) { |
| 477 bitmaps_->at(map_number) = NULL; |
| 478 } |
| 479 |
| 480 void SetRest(int from_map) { |
| 481 for (int i = from_map; i < length_; i++) SetAll(i); |
| 482 } |
| 483 bool EmitSkipInstructions(RegExpMacroAssembler* masm); |
| 484 |
| 485 private: |
| 486 // This is the value obtained by EatsAtLeast. If we do not have at least this |
| 487 // many characters left in the sample string then the match is bound to fail. |
| 488 // Therefore it is OK to read a character this far ahead of the current match |
| 489 // point. |
| 490 int length_; |
| 491 // We conservatively consider all character values modulo this length. For |
| 492 // ASCII there is no loss of precision, since this has a value of 128. |
| 493 int map_length_; |
| 494 RegExpCompiler* compiler_; |
| 495 // 0x7f for ASCII, 0xffff for UTF-16. |
| 496 int max_char_; |
| 497 ZoneList<ZoneList<bool>*>* bitmaps_; |
| 498 |
| 499 int GetSkipTable(int min_lookahead, |
| 500 int max_lookahead, |
| 501 Handle<ByteArray> boolean_skip_table); |
| 502 bool FindWorthwhileInterval(int* from, int* to); |
| 503 int FindBestInterval( |
| 504 int max_number_of_chars, int old_biggest_points, int* from, int* to); |
| 505 }; |
| 506 |
| 507 |
| 422 #define FOR_EACH_NODE_TYPE(VISIT) \ | 508 #define FOR_EACH_NODE_TYPE(VISIT) \ |
| 423 VISIT(End) \ | 509 VISIT(End) \ |
| 424 VISIT(Action) \ | 510 VISIT(Action) \ |
| 425 VISIT(Choice) \ | 511 VISIT(Choice) \ |
| 426 VISIT(BackReference) \ | 512 VISIT(BackReference) \ |
| 427 VISIT(Assertion) \ | 513 VISIT(Assertion) \ |
| 428 VISIT(Text) | 514 VISIT(Text) |
| 429 | 515 |
| 430 | 516 |
| 431 #define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \ | 517 #define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \ |
| (...skipping 196 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 628 // For a given number of characters this returns a mask and a value. The | 714 // For a given number of characters this returns a mask and a value. The |
| 629 // next n characters are anded with the mask and compared with the value. | 715 // next n characters are anded with the mask and compared with the value. |
| 630 // A comparison failure indicates the node cannot match the next n characters. | 716 // A comparison failure indicates the node cannot match the next n characters. |
| 631 // A comparison success indicates the node may match. | 717 // A comparison success indicates the node may match. |
| 632 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 718 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 633 RegExpCompiler* compiler, | 719 RegExpCompiler* compiler, |
| 634 int characters_filled_in, | 720 int characters_filled_in, |
| 635 bool not_at_start) = 0; | 721 bool not_at_start) = 0; |
| 636 static const int kNodeIsTooComplexForGreedyLoops = -1; | 722 static const int kNodeIsTooComplexForGreedyLoops = -1; |
| 637 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } | 723 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } |
| 724 // Only returns the successor for a text node of length 1 that matches any |
| 725 // character and that has no guards on it. |
| 726 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( |
| 727 RegExpCompiler* compiler) { |
| 728 return NULL; |
| 729 } |
| 730 |
| 731 // Collects information on the possible code units (mod 128) that can match if |
| 732 // we look forward. This is used for a Boyer-Moore-like string searching |
| 733 // implementation. TODO(erikcorry): This should share more code with |
| 734 // EatsAtLeast, GetQuickCheckDetails and ComputeFirstCharacterSet. |
| 735 virtual void FillInBMInfo( |
| 736 int offset, BoyerMooreLookahead* bm, bool not_at_start) { |
| 737 UNREACHABLE(); |
| 738 } |
| 739 |
| 638 Label* label() { return &label_; } | 740 Label* label() { return &label_; } |
| 639 // If non-generic code is generated for a node (i.e. the node is not at the | 741 // If non-generic code is generated for a node (i.e. the node is not at the |
| 640 // start of the trace) then it cannot be reused. This variable sets a limit | 742 // start of the trace) then it cannot be reused. This variable sets a limit |
| 641 // on how often we allow that to happen before we insist on starting a new | 743 // on how often we allow that to happen before we insist on starting a new |
| 642 // trace and generating generic code for a node that can be reused by flushing | 744 // trace and generating generic code for a node that can be reused by flushing |
| 643 // the deferred actions in the current trace and generating a goto. | 745 // the deferred actions in the current trace and generating a goto. |
| 644 static const int kMaxCopiesCodeGenerated = 10; | 746 static const int kMaxCopiesCodeGenerated = 10; |
| 645 | 747 |
| 646 NodeInfo* info() { return &info_; } | 748 NodeInfo* info() { return &info_; } |
| 647 | 749 |
| (...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 740 int to_; | 842 int to_; |
| 741 }; | 843 }; |
| 742 | 844 |
| 743 | 845 |
| 744 class SeqRegExpNode: public RegExpNode { | 846 class SeqRegExpNode: public RegExpNode { |
| 745 public: | 847 public: |
| 746 explicit SeqRegExpNode(RegExpNode* on_success) | 848 explicit SeqRegExpNode(RegExpNode* on_success) |
| 747 : on_success_(on_success) { } | 849 : on_success_(on_success) { } |
| 748 RegExpNode* on_success() { return on_success_; } | 850 RegExpNode* on_success() { return on_success_; } |
| 749 void set_on_success(RegExpNode* node) { on_success_ = node; } | 851 void set_on_success(RegExpNode* node) { on_success_ = node; } |
| 852 virtual void FillInBMInfo( |
| 853 int offset, BoyerMooreLookahead* bm, bool not_at_start) { |
| 854 on_success_->FillInBMInfo(offset, bm, not_at_start); |
| 855 } |
| 750 private: | 856 private: |
| 751 RegExpNode* on_success_; | 857 RegExpNode* on_success_; |
| 752 }; | 858 }; |
| 753 | 859 |
| 754 | 860 |
| 755 class ActionNode: public SeqRegExpNode { | 861 class ActionNode: public SeqRegExpNode { |
| 756 public: | 862 public: |
| 757 enum Type { | 863 enum Type { |
| 758 SET_REGISTER, | 864 SET_REGISTER, |
| 759 INCREMENT_REGISTER, | 865 INCREMENT_REGISTER, |
| (...skipping 26 matching lines...) Expand all Loading... |
| 786 virtual int EatsAtLeast(int still_to_find, | 892 virtual int EatsAtLeast(int still_to_find, |
| 787 int recursion_depth, | 893 int recursion_depth, |
| 788 bool not_at_start); | 894 bool not_at_start); |
| 789 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 895 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 790 RegExpCompiler* compiler, | 896 RegExpCompiler* compiler, |
| 791 int filled_in, | 897 int filled_in, |
| 792 bool not_at_start) { | 898 bool not_at_start) { |
| 793 return on_success()->GetQuickCheckDetails( | 899 return on_success()->GetQuickCheckDetails( |
| 794 details, compiler, filled_in, not_at_start); | 900 details, compiler, filled_in, not_at_start); |
| 795 } | 901 } |
| 902 virtual void FillInBMInfo( |
| 903 int offset, BoyerMooreLookahead* bm, bool not_at_start); |
| 796 Type type() { return type_; } | 904 Type type() { return type_; } |
| 797 // TODO(erikcorry): We should allow some action nodes in greedy loops. | 905 // TODO(erikcorry): We should allow some action nodes in greedy loops. |
| 798 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } | 906 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } |
| 799 virtual ActionNode* Clone() { return new ActionNode(*this); } | 907 virtual ActionNode* Clone() { return new ActionNode(*this); } |
| 800 virtual int ComputeFirstCharacterSet(int budget); | 908 virtual int ComputeFirstCharacterSet(int budget); |
| 801 | 909 |
| 802 private: | 910 private: |
| 803 union { | 911 union { |
| 804 struct { | 912 struct { |
| 805 int reg; | 913 int reg; |
| (...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 853 virtual int EatsAtLeast(int still_to_find, | 961 virtual int EatsAtLeast(int still_to_find, |
| 854 int recursion_depth, | 962 int recursion_depth, |
| 855 bool not_at_start); | 963 bool not_at_start); |
| 856 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 964 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 857 RegExpCompiler* compiler, | 965 RegExpCompiler* compiler, |
| 858 int characters_filled_in, | 966 int characters_filled_in, |
| 859 bool not_at_start); | 967 bool not_at_start); |
| 860 ZoneList<TextElement>* elements() { return elms_; } | 968 ZoneList<TextElement>* elements() { return elms_; } |
| 861 void MakeCaseIndependent(bool is_ascii); | 969 void MakeCaseIndependent(bool is_ascii); |
| 862 virtual int GreedyLoopTextLength(); | 970 virtual int GreedyLoopTextLength(); |
| 971 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( |
| 972 RegExpCompiler* compiler); |
| 973 virtual void FillInBMInfo( |
| 974 int offset, BoyerMooreLookahead* bm, bool not_at_start); |
| 863 virtual TextNode* Clone() { | 975 virtual TextNode* Clone() { |
| 864 TextNode* result = new TextNode(*this); | 976 TextNode* result = new TextNode(*this); |
| 865 result->CalculateOffsets(); | 977 result->CalculateOffsets(); |
| 866 return result; | 978 return result; |
| 867 } | 979 } |
| 868 void CalculateOffsets(); | 980 void CalculateOffsets(); |
| 869 virtual int ComputeFirstCharacterSet(int budget); | 981 virtual int ComputeFirstCharacterSet(int budget); |
| 870 | 982 |
| 871 private: | 983 private: |
| 872 enum TextEmitPassType { | 984 enum TextEmitPassType { |
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 921 } | 1033 } |
| 922 virtual void Accept(NodeVisitor* visitor); | 1034 virtual void Accept(NodeVisitor* visitor); |
| 923 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 1035 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 924 virtual int EatsAtLeast(int still_to_find, | 1036 virtual int EatsAtLeast(int still_to_find, |
| 925 int recursion_depth, | 1037 int recursion_depth, |
| 926 bool not_at_start); | 1038 bool not_at_start); |
| 927 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1039 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 928 RegExpCompiler* compiler, | 1040 RegExpCompiler* compiler, |
| 929 int filled_in, | 1041 int filled_in, |
| 930 bool not_at_start); | 1042 bool not_at_start); |
| 1043 virtual void FillInBMInfo( |
| 1044 int offset, BoyerMooreLookahead* bm, bool not_at_start); |
| 931 virtual int ComputeFirstCharacterSet(int budget); | 1045 virtual int ComputeFirstCharacterSet(int budget); |
| 932 virtual AssertionNode* Clone() { return new AssertionNode(*this); } | 1046 virtual AssertionNode* Clone() { return new AssertionNode(*this); } |
| 933 AssertionNodeType type() { return type_; } | 1047 AssertionNodeType type() { return type_; } |
| 934 void set_type(AssertionNodeType type) { type_ = type; } | 1048 void set_type(AssertionNodeType type) { type_ = type; } |
| 935 | 1049 |
| 936 private: | 1050 private: |
| 937 AssertionNode(AssertionNodeType t, RegExpNode* on_success) | 1051 AssertionNode(AssertionNodeType t, RegExpNode* on_success) |
| 938 : SeqRegExpNode(on_success), type_(t) { } | 1052 : SeqRegExpNode(on_success), type_(t) { } |
| 939 AssertionNodeType type_; | 1053 AssertionNodeType type_; |
| 940 }; | 1054 }; |
| (...skipping 13 matching lines...) Expand all Loading... |
| 954 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 1068 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 955 virtual int EatsAtLeast(int still_to_find, | 1069 virtual int EatsAtLeast(int still_to_find, |
| 956 int recursion_depth, | 1070 int recursion_depth, |
| 957 bool not_at_start); | 1071 bool not_at_start); |
| 958 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1072 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 959 RegExpCompiler* compiler, | 1073 RegExpCompiler* compiler, |
| 960 int characters_filled_in, | 1074 int characters_filled_in, |
| 961 bool not_at_start) { | 1075 bool not_at_start) { |
| 962 return; | 1076 return; |
| 963 } | 1077 } |
| 1078 virtual void FillInBMInfo( |
| 1079 int offset, BoyerMooreLookahead* bm, bool not_at_start) { |
| 1080 // Working out the set of characters that a backreference can match is too |
| 1081 // hard, so we just say that any character can match. |
| 1082 bm->SetRest(offset); |
| 1083 } |
| 964 virtual BackReferenceNode* Clone() { return new BackReferenceNode(*this); } | 1084 virtual BackReferenceNode* Clone() { return new BackReferenceNode(*this); } |
| 965 virtual int ComputeFirstCharacterSet(int budget); | 1085 virtual int ComputeFirstCharacterSet(int budget); |
| 966 | 1086 |
| 967 private: | 1087 private: |
| 968 int start_reg_; | 1088 int start_reg_; |
| 969 int end_reg_; | 1089 int end_reg_; |
| 970 }; | 1090 }; |
| 971 | 1091 |
| 972 | 1092 |
| 973 class EndNode: public RegExpNode { | 1093 class EndNode: public RegExpNode { |
| 974 public: | 1094 public: |
| 975 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; | 1095 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; |
| 976 explicit EndNode(Action action) : action_(action) { } | 1096 explicit EndNode(Action action) : action_(action) { } |
| 977 virtual void Accept(NodeVisitor* visitor); | 1097 virtual void Accept(NodeVisitor* visitor); |
| 978 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 1098 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 979 virtual int EatsAtLeast(int still_to_find, | 1099 virtual int EatsAtLeast(int still_to_find, |
| 980 int recursion_depth, | 1100 int recursion_depth, |
| 981 bool not_at_start) { return 0; } | 1101 bool not_at_start) { return 0; } |
| 982 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1102 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 983 RegExpCompiler* compiler, | 1103 RegExpCompiler* compiler, |
| 984 int characters_filled_in, | 1104 int characters_filled_in, |
| 985 bool not_at_start) { | 1105 bool not_at_start) { |
| 986 // Returning 0 from EatsAtLeast should ensure we never get here. | 1106 // Returning 0 from EatsAtLeast should ensure we never get here. |
| 987 UNREACHABLE(); | 1107 UNREACHABLE(); |
| 988 } | 1108 } |
| 1109 virtual void FillInBMInfo( |
| 1110 int offset, BoyerMooreLookahead* bm, bool not_at_start) { |
| 1111 // Returning 0 from EatsAtLeast should ensure we never get here. |
| 1112 UNREACHABLE(); |
| 1113 } |
| 989 virtual EndNode* Clone() { return new EndNode(*this); } | 1114 virtual EndNode* Clone() { return new EndNode(*this); } |
| 990 private: | 1115 private: |
| 991 Action action_; | 1116 Action action_; |
| 992 }; | 1117 }; |
| 993 | 1118 |
| 994 | 1119 |
| 995 class NegativeSubmatchSuccess: public EndNode { | 1120 class NegativeSubmatchSuccess: public EndNode { |
| 996 public: | 1121 public: |
| 997 NegativeSubmatchSuccess(int stack_pointer_reg, | 1122 NegativeSubmatchSuccess(int stack_pointer_reg, |
| 998 int position_reg, | 1123 int position_reg, |
| (...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1064 int recursion_depth, | 1189 int recursion_depth, |
| 1065 bool not_at_start); | 1190 bool not_at_start); |
| 1066 int EatsAtLeastHelper(int still_to_find, | 1191 int EatsAtLeastHelper(int still_to_find, |
| 1067 int recursion_depth, | 1192 int recursion_depth, |
| 1068 RegExpNode* ignore_this_node, | 1193 RegExpNode* ignore_this_node, |
| 1069 bool not_at_start); | 1194 bool not_at_start); |
| 1070 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1195 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 1071 RegExpCompiler* compiler, | 1196 RegExpCompiler* compiler, |
| 1072 int characters_filled_in, | 1197 int characters_filled_in, |
| 1073 bool not_at_start); | 1198 bool not_at_start); |
| 1199 virtual void FillInBMInfo( |
| 1200 int offset, BoyerMooreLookahead* bm, bool not_at_start); |
| 1074 virtual ChoiceNode* Clone() { return new ChoiceNode(*this); } | 1201 virtual ChoiceNode* Clone() { return new ChoiceNode(*this); } |
| 1075 | 1202 |
| 1076 bool being_calculated() { return being_calculated_; } | 1203 bool being_calculated() { return being_calculated_; } |
| 1077 bool not_at_start() { return not_at_start_; } | 1204 bool not_at_start() { return not_at_start_; } |
| 1078 void set_not_at_start() { not_at_start_ = true; } | 1205 void set_not_at_start() { not_at_start_ = true; } |
| 1079 void set_being_calculated(bool b) { being_calculated_ = b; } | 1206 void set_being_calculated(bool b) { being_calculated_ = b; } |
| 1080 virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; } | 1207 virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; } |
| 1081 | 1208 |
| 1082 protected: | 1209 protected: |
| 1083 int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative); | 1210 int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative); |
| 1084 ZoneList<GuardedAlternative>* alternatives_; | 1211 ZoneList<GuardedAlternative>* alternatives_; |
| 1085 | 1212 |
| 1086 private: | 1213 private: |
| 1087 friend class DispatchTableConstructor; | 1214 friend class DispatchTableConstructor; |
| 1088 friend class Analysis; | 1215 friend class Analysis; |
| 1089 void GenerateGuard(RegExpMacroAssembler* macro_assembler, | 1216 void GenerateGuard(RegExpMacroAssembler* macro_assembler, |
| 1090 Guard* guard, | 1217 Guard* guard, |
| 1091 Trace* trace); | 1218 Trace* trace); |
| 1092 int CalculatePreloadCharacters(RegExpCompiler* compiler, bool not_at_start); | 1219 int CalculatePreloadCharacters(RegExpCompiler* compiler, int eats_at_least); |
| 1093 void EmitOutOfLineContinuation(RegExpCompiler* compiler, | 1220 void EmitOutOfLineContinuation(RegExpCompiler* compiler, |
| 1094 Trace* trace, | 1221 Trace* trace, |
| 1095 GuardedAlternative alternative, | 1222 GuardedAlternative alternative, |
| 1096 AlternativeGeneration* alt_gen, | 1223 AlternativeGeneration* alt_gen, |
| 1097 int preload_characters, | 1224 int preload_characters, |
| 1098 bool next_expects_preload); | 1225 bool next_expects_preload); |
| 1099 DispatchTable* table_; | 1226 DispatchTable* table_; |
| 1100 // If true, this node is never checked at the start of the input. | 1227 // If true, this node is never checked at the start of the input. |
| 1101 // Allows a new trace to start with at_start() set to false. | 1228 // Allows a new trace to start with at_start() set to false. |
| 1102 bool not_at_start_; | 1229 bool not_at_start_; |
| 1103 bool being_calculated_; | 1230 bool being_calculated_; |
| 1104 }; | 1231 }; |
| 1105 | 1232 |
| 1106 | 1233 |
| 1107 class NegativeLookaheadChoiceNode: public ChoiceNode { | 1234 class NegativeLookaheadChoiceNode: public ChoiceNode { |
| 1108 public: | 1235 public: |
| 1109 explicit NegativeLookaheadChoiceNode(GuardedAlternative this_must_fail, | 1236 explicit NegativeLookaheadChoiceNode(GuardedAlternative this_must_fail, |
| 1110 GuardedAlternative then_do_this) | 1237 GuardedAlternative then_do_this) |
| 1111 : ChoiceNode(2) { | 1238 : ChoiceNode(2) { |
| 1112 AddAlternative(this_must_fail); | 1239 AddAlternative(this_must_fail); |
| 1113 AddAlternative(then_do_this); | 1240 AddAlternative(then_do_this); |
| 1114 } | 1241 } |
| 1115 virtual int EatsAtLeast(int still_to_find, | 1242 virtual int EatsAtLeast(int still_to_find, |
| 1116 int recursion_depth, | 1243 int recursion_depth, |
| 1117 bool not_at_start); | 1244 bool not_at_start); |
| 1118 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1245 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 1119 RegExpCompiler* compiler, | 1246 RegExpCompiler* compiler, |
| 1120 int characters_filled_in, | 1247 int characters_filled_in, |
| 1121 bool not_at_start); | 1248 bool not_at_start); |
| 1249 virtual void FillInBMInfo( |
| 1250 int offset, BoyerMooreLookahead* bm, bool not_at_start) { |
| 1251 alternatives_->at(1).node()->FillInBMInfo(offset, bm, not_at_start); |
| 1252 } |
| 1122 // For a negative lookahead we don't emit the quick check for the | 1253 // For a negative lookahead we don't emit the quick check for the |
| 1123 // alternative that is expected to fail. This is because quick check code | 1254 // alternative that is expected to fail. This is because quick check code |
| 1124 // starts by loading enough characters for the alternative that takes fewest | 1255 // starts by loading enough characters for the alternative that takes fewest |
| 1125 // characters, but on a negative lookahead the negative branch did not take | 1256 // characters, but on a negative lookahead the negative branch did not take |
| 1126 // part in that calculation (EatsAtLeast) so the assumptions don't hold. | 1257 // part in that calculation (EatsAtLeast) so the assumptions don't hold. |
| 1127 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; } | 1258 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; } |
| 1128 virtual int ComputeFirstCharacterSet(int budget); | 1259 virtual int ComputeFirstCharacterSet(int budget); |
| 1129 }; | 1260 }; |
| 1130 | 1261 |
| 1131 | 1262 |
| 1132 class LoopChoiceNode: public ChoiceNode { | 1263 class LoopChoiceNode: public ChoiceNode { |
| 1133 public: | 1264 public: |
| 1134 explicit LoopChoiceNode(bool body_can_be_zero_length) | 1265 explicit LoopChoiceNode(bool body_can_be_zero_length) |
| 1135 : ChoiceNode(2), | 1266 : ChoiceNode(2), |
| 1136 loop_node_(NULL), | 1267 loop_node_(NULL), |
| 1137 continue_node_(NULL), | 1268 continue_node_(NULL), |
| 1138 body_can_be_zero_length_(body_can_be_zero_length) { } | 1269 body_can_be_zero_length_(body_can_be_zero_length) { } |
| 1139 void AddLoopAlternative(GuardedAlternative alt); | 1270 void AddLoopAlternative(GuardedAlternative alt); |
| 1140 void AddContinueAlternative(GuardedAlternative alt); | 1271 void AddContinueAlternative(GuardedAlternative alt); |
| 1141 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 1272 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 1142 virtual int EatsAtLeast(int still_to_find, | 1273 virtual int EatsAtLeast(int still_to_find, |
| 1143 int recursion_depth, | 1274 int recursion_depth, |
| 1144 bool not_at_start); | 1275 bool not_at_start); |
| 1145 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1276 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 1146 RegExpCompiler* compiler, | 1277 RegExpCompiler* compiler, |
| 1147 int characters_filled_in, | 1278 int characters_filled_in, |
| 1148 bool not_at_start); | 1279 bool not_at_start); |
| 1280 virtual void FillInBMInfo( |
| 1281 int offset, BoyerMooreLookahead* bm, bool not_at_start); |
| 1149 virtual int ComputeFirstCharacterSet(int budget); | 1282 virtual int ComputeFirstCharacterSet(int budget); |
| 1150 virtual LoopChoiceNode* Clone() { return new LoopChoiceNode(*this); } | 1283 virtual LoopChoiceNode* Clone() { return new LoopChoiceNode(*this); } |
| 1151 RegExpNode* loop_node() { return loop_node_; } | 1284 RegExpNode* loop_node() { return loop_node_; } |
| 1152 RegExpNode* continue_node() { return continue_node_; } | 1285 RegExpNode* continue_node() { return continue_node_; } |
| 1153 bool body_can_be_zero_length() { return body_can_be_zero_length_; } | 1286 bool body_can_be_zero_length() { return body_can_be_zero_length_; } |
| 1154 virtual void Accept(NodeVisitor* visitor); | 1287 virtual void Accept(NodeVisitor* visitor); |
| 1155 | 1288 |
| 1156 private: | 1289 private: |
| 1157 // AddAlternative is made private for loop nodes because alternatives | 1290 // AddAlternative is made private for loop nodes because alternatives |
| 1158 // should not be added freely, we need to keep track of which node | 1291 // should not be added freely, we need to keep track of which node |
| (...skipping 292 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1451 num_registers(registers) {} | 1584 num_registers(registers) {} |
| 1452 const char* error_message; | 1585 const char* error_message; |
| 1453 Object* code; | 1586 Object* code; |
| 1454 int num_registers; | 1587 int num_registers; |
| 1455 }; | 1588 }; |
| 1456 | 1589 |
| 1457 static CompilationResult Compile(RegExpCompileData* input, | 1590 static CompilationResult Compile(RegExpCompileData* input, |
| 1458 bool ignore_case, | 1591 bool ignore_case, |
| 1459 bool multiline, | 1592 bool multiline, |
| 1460 Handle<String> pattern, | 1593 Handle<String> pattern, |
| 1594 Handle<String> sample_subject, |
| 1461 bool is_ascii); | 1595 bool is_ascii); |
| 1462 | 1596 |
| 1463 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); | 1597 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); |
| 1464 }; | 1598 }; |
| 1465 | 1599 |
| 1466 | 1600 |
| 1467 class OffsetsVector { | 1601 class OffsetsVector { |
| 1468 public: | 1602 public: |
| 1469 inline OffsetsVector(int num_registers, Isolate* isolate) | 1603 inline OffsetsVector(int num_registers, Isolate* isolate) |
| 1470 : offsets_vector_length_(num_registers) { | 1604 : offsets_vector_length_(num_registers) { |
| (...skipping 22 matching lines...) Expand all Loading... |
| 1493 int* vector_; | 1627 int* vector_; |
| 1494 int offsets_vector_length_; | 1628 int offsets_vector_length_; |
| 1495 | 1629 |
| 1496 friend class ExternalReference; | 1630 friend class ExternalReference; |
| 1497 }; | 1631 }; |
| 1498 | 1632 |
| 1499 | 1633 |
| 1500 } } // namespace v8::internal | 1634 } } // namespace v8::internal |
| 1501 | 1635 |
| 1502 #endif // V8_JSREGEXP_H_ | 1636 #endif // V8_JSREGEXP_H_ |
| OLD | NEW |