| OLD | NEW |
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 436 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 447 RegExpCharacterClass* u_char_class; | 447 RegExpCharacterClass* u_char_class; |
| 448 } data; | 448 } data; |
| 449 int cp_offset; | 449 int cp_offset; |
| 450 }; | 450 }; |
| 451 | 451 |
| 452 | 452 |
| 453 class Trace; | 453 class Trace; |
| 454 | 454 |
| 455 | 455 |
| 456 struct NodeInfo { | 456 struct NodeInfo { |
| 457 enum TriBool { | |
| 458 UNKNOWN = -1, FALSE = 0, TRUE = 1 | |
| 459 }; | |
| 460 | |
| 461 NodeInfo() | 457 NodeInfo() |
| 462 : being_analyzed(false), | 458 : being_analyzed(false), |
| 463 been_analyzed(false), | 459 been_analyzed(false), |
| 464 follows_word_interest(false), | 460 follows_word_interest(false), |
| 465 follows_newline_interest(false), | 461 follows_newline_interest(false), |
| 466 follows_start_interest(false), | 462 follows_start_interest(false), |
| 467 at_end(false), | 463 at_end(false), |
| 468 visited(false) { } | 464 visited(false) { } |
| 469 | 465 |
| 470 // Returns true if the interests and assumptions of this node | 466 // Returns true if the interests and assumptions of this node |
| (...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 537 }; | 533 }; |
| 538 | 534 |
| 539 | 535 |
| 540 // Details of a quick mask-compare check that can look ahead in the | 536 // Details of a quick mask-compare check that can look ahead in the |
| 541 // input stream. | 537 // input stream. |
| 542 class QuickCheckDetails { | 538 class QuickCheckDetails { |
| 543 public: | 539 public: |
| 544 QuickCheckDetails() | 540 QuickCheckDetails() |
| 545 : characters_(0), | 541 : characters_(0), |
| 546 mask_(0), | 542 mask_(0), |
| 547 value_(0) { } | 543 value_(0), |
| 544 cannot_match_(false) { } |
| 548 explicit QuickCheckDetails(int characters) | 545 explicit QuickCheckDetails(int characters) |
| 549 : characters_(characters), | 546 : characters_(characters), |
| 550 mask_(0), | 547 mask_(0), |
| 551 value_(0) { } | 548 value_(0), |
| 549 cannot_match_(false) { } |
| 552 bool Rationalize(bool ascii); | 550 bool Rationalize(bool ascii); |
| 553 // Merge in the information from another branch of an alternation. | 551 // Merge in the information from another branch of an alternation. |
| 554 void Merge(QuickCheckDetails* other, int from_index); | 552 void Merge(QuickCheckDetails* other, int from_index); |
| 555 // Advance the current position by some amount. | 553 // Advance the current position by some amount. |
| 556 void Advance(int by, bool ascii); | 554 void Advance(int by, bool ascii); |
| 557 void Clear(); | 555 void Clear(); |
| 556 bool cannot_match() { return cannot_match_; } |
| 557 void set_cannot_match() { cannot_match_ = true; } |
| 558 struct Position { | 558 struct Position { |
| 559 Position() : mask(0), value(0), determines_perfectly(false) { } | 559 Position() : mask(0), value(0), determines_perfectly(false) { } |
| 560 uc16 mask; | 560 uc16 mask; |
| 561 uc16 value; | 561 uc16 value; |
| 562 bool determines_perfectly; | 562 bool determines_perfectly; |
| 563 }; | 563 }; |
| 564 int characters() { return characters_; } | 564 int characters() { return characters_; } |
| 565 void set_characters(int characters) { characters_ = characters; } | 565 void set_characters(int characters) { characters_ = characters; } |
| 566 Position* positions(int index) { | 566 Position* positions(int index) { |
| 567 ASSERT(index >= 0); | 567 ASSERT(index >= 0); |
| 568 ASSERT(index < characters_); | 568 ASSERT(index < characters_); |
| 569 return positions_ + index; | 569 return positions_ + index; |
| 570 } | 570 } |
| 571 uint32_t mask() { return mask_; } | 571 uint32_t mask() { return mask_; } |
| 572 uint32_t value() { return value_; } | 572 uint32_t value() { return value_; } |
| 573 | 573 |
| 574 private: | 574 private: |
| 575 // How many characters do we have quick check information from. This is | 575 // How many characters do we have quick check information from. This is |
| 576 // the same for all branches of a choice node. | 576 // the same for all branches of a choice node. |
| 577 int characters_; | 577 int characters_; |
| 578 Position positions_[4]; | 578 Position positions_[4]; |
| 579 // These values are the condensate of the above array after Rationalize(). | 579 // These values are the condensate of the above array after Rationalize(). |
| 580 uint32_t mask_; | 580 uint32_t mask_; |
| 581 uint32_t value_; | 581 uint32_t value_; |
| 582 // If set to true, there is no way this quick check can match at all. |
| 583 // E.g., if it requires to be at the start of the input, and isn't. |
| 584 bool cannot_match_; |
| 582 }; | 585 }; |
| 583 | 586 |
| 584 | 587 |
| 585 class RegExpNode: public ZoneObject { | 588 class RegExpNode: public ZoneObject { |
| 586 public: | 589 public: |
| 587 RegExpNode() : trace_count_(0) { } | 590 RegExpNode() : trace_count_(0) { } |
| 588 virtual ~RegExpNode(); | 591 virtual ~RegExpNode(); |
| 589 virtual void Accept(NodeVisitor* visitor) = 0; | 592 virtual void Accept(NodeVisitor* visitor) = 0; |
| 590 // Generates a goto to this node or actually generates the code at this point. | 593 // Generates a goto to this node or actually generates the code at this point. |
| 591 virtual void Emit(RegExpCompiler* compiler, Trace* trace) = 0; | 594 virtual void Emit(RegExpCompiler* compiler, Trace* trace) = 0; |
| (...skipping 10 matching lines...) Expand all Loading... |
| 602 bool preload_has_checked_bounds, | 605 bool preload_has_checked_bounds, |
| 603 Label* on_possible_success, | 606 Label* on_possible_success, |
| 604 QuickCheckDetails* details_return, | 607 QuickCheckDetails* details_return, |
| 605 bool fall_through_on_failure); | 608 bool fall_through_on_failure); |
| 606 // For a given number of characters this returns a mask and a value. The | 609 // For a given number of characters this returns a mask and a value. The |
| 607 // next n characters are anded with the mask and compared with the value. | 610 // next n characters are anded with the mask and compared with the value. |
| 608 // A comparison failure indicates the node cannot match the next n characters. | 611 // A comparison failure indicates the node cannot match the next n characters. |
| 609 // A comparison success indicates the node may match. | 612 // A comparison success indicates the node may match. |
| 610 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 613 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 611 RegExpCompiler* compiler, | 614 RegExpCompiler* compiler, |
| 612 int characters_filled_in) = 0; | 615 int characters_filled_in, |
| 616 bool not_at_start) = 0; |
| 613 static const int kNodeIsTooComplexForGreedyLoops = -1; | 617 static const int kNodeIsTooComplexForGreedyLoops = -1; |
| 614 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } | 618 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } |
| 615 Label* label() { return &label_; } | 619 Label* label() { return &label_; } |
| 616 // If non-generic code is generated for a node (ie the node is not at the | 620 // If non-generic code is generated for a node (ie the node is not at the |
| 617 // start of the trace) then it cannot be reused. This variable sets a limit | 621 // start of the trace) then it cannot be reused. This variable sets a limit |
| 618 // on how often we allow that to happen before we insist on starting a new | 622 // on how often we allow that to happen before we insist on starting a new |
| 619 // trace and generating generic code for a node that can be reused by flushing | 623 // trace and generating generic code for a node that can be reused by flushing |
| 620 // the deferred actions in the current trace and generating a goto. | 624 // the deferred actions in the current trace and generating a goto. |
| 621 static const int kMaxCopiesCodeGenerated = 10; | 625 static const int kMaxCopiesCodeGenerated = 10; |
| 622 | 626 |
| (...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 733 RegExpNode* on_success); | 737 RegExpNode* on_success); |
| 734 static ActionNode* EmptyMatchCheck(int start_register, | 738 static ActionNode* EmptyMatchCheck(int start_register, |
| 735 int repetition_register, | 739 int repetition_register, |
| 736 int repetition_limit, | 740 int repetition_limit, |
| 737 RegExpNode* on_success); | 741 RegExpNode* on_success); |
| 738 virtual void Accept(NodeVisitor* visitor); | 742 virtual void Accept(NodeVisitor* visitor); |
| 739 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 743 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 740 virtual int EatsAtLeast(int still_to_find, int recursion_depth); | 744 virtual int EatsAtLeast(int still_to_find, int recursion_depth); |
| 741 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 745 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 742 RegExpCompiler* compiler, | 746 RegExpCompiler* compiler, |
| 743 int filled_in) { | 747 int filled_in, |
| 744 return on_success()->GetQuickCheckDetails(details, compiler, filled_in); | 748 bool not_at_start) { |
| 749 return on_success()->GetQuickCheckDetails( |
| 750 details, compiler, filled_in, not_at_start); |
| 745 } | 751 } |
| 746 Type type() { return type_; } | 752 Type type() { return type_; } |
| 747 // TODO(erikcorry): We should allow some action nodes in greedy loops. | 753 // TODO(erikcorry): We should allow some action nodes in greedy loops. |
| 748 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } | 754 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } |
| 749 virtual ActionNode* Clone() { return new ActionNode(*this); } | 755 virtual ActionNode* Clone() { return new ActionNode(*this); } |
| 750 | 756 |
| 751 private: | 757 private: |
| 752 union { | 758 union { |
| 753 struct { | 759 struct { |
| 754 int reg; | 760 int reg; |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 795 RegExpNode* on_success) | 801 RegExpNode* on_success) |
| 796 : SeqRegExpNode(on_success), | 802 : SeqRegExpNode(on_success), |
| 797 elms_(new ZoneList<TextElement>(1)) { | 803 elms_(new ZoneList<TextElement>(1)) { |
| 798 elms_->Add(TextElement::CharClass(that)); | 804 elms_->Add(TextElement::CharClass(that)); |
| 799 } | 805 } |
| 800 virtual void Accept(NodeVisitor* visitor); | 806 virtual void Accept(NodeVisitor* visitor); |
| 801 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 807 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 802 virtual int EatsAtLeast(int still_to_find, int recursion_depth); | 808 virtual int EatsAtLeast(int still_to_find, int recursion_depth); |
| 803 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 809 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 804 RegExpCompiler* compiler, | 810 RegExpCompiler* compiler, |
| 805 int characters_filled_in); | 811 int characters_filled_in, |
| 812 bool not_at_start); |
| 806 ZoneList<TextElement>* elements() { return elms_; } | 813 ZoneList<TextElement>* elements() { return elms_; } |
| 807 void MakeCaseIndependent(); | 814 void MakeCaseIndependent(); |
| 808 virtual int GreedyLoopTextLength(); | 815 virtual int GreedyLoopTextLength(); |
| 809 virtual TextNode* Clone() { | 816 virtual TextNode* Clone() { |
| 810 TextNode* result = new TextNode(*this); | 817 TextNode* result = new TextNode(*this); |
| 811 result->CalculateOffsets(); | 818 result->CalculateOffsets(); |
| 812 return result; | 819 return result; |
| 813 } | 820 } |
| 814 void CalculateOffsets(); | 821 void CalculateOffsets(); |
| 815 | 822 |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 853 return new AssertionNode(AT_NON_BOUNDARY, on_success); | 860 return new AssertionNode(AT_NON_BOUNDARY, on_success); |
| 854 } | 861 } |
| 855 static AssertionNode* AfterNewline(RegExpNode* on_success) { | 862 static AssertionNode* AfterNewline(RegExpNode* on_success) { |
| 856 return new AssertionNode(AFTER_NEWLINE, on_success); | 863 return new AssertionNode(AFTER_NEWLINE, on_success); |
| 857 } | 864 } |
| 858 virtual void Accept(NodeVisitor* visitor); | 865 virtual void Accept(NodeVisitor* visitor); |
| 859 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 866 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 860 virtual int EatsAtLeast(int still_to_find, int recursion_depth); | 867 virtual int EatsAtLeast(int still_to_find, int recursion_depth); |
| 861 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 868 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 862 RegExpCompiler* compiler, | 869 RegExpCompiler* compiler, |
| 863 int filled_in) { | 870 int filled_in, |
| 864 return on_success()->GetQuickCheckDetails(details, compiler, filled_in); | 871 bool not_at_start); |
| 865 } | |
| 866 virtual AssertionNode* Clone() { return new AssertionNode(*this); } | 872 virtual AssertionNode* Clone() { return new AssertionNode(*this); } |
| 867 AssertionNodeType type() { return type_; } | 873 AssertionNodeType type() { return type_; } |
| 868 private: | 874 private: |
| 869 AssertionNode(AssertionNodeType t, RegExpNode* on_success) | 875 AssertionNode(AssertionNodeType t, RegExpNode* on_success) |
| 870 : SeqRegExpNode(on_success), type_(t) { } | 876 : SeqRegExpNode(on_success), type_(t) { } |
| 871 AssertionNodeType type_; | 877 AssertionNodeType type_; |
| 872 }; | 878 }; |
| 873 | 879 |
| 874 | 880 |
| 875 class BackReferenceNode: public SeqRegExpNode { | 881 class BackReferenceNode: public SeqRegExpNode { |
| 876 public: | 882 public: |
| 877 BackReferenceNode(int start_reg, | 883 BackReferenceNode(int start_reg, |
| 878 int end_reg, | 884 int end_reg, |
| 879 RegExpNode* on_success) | 885 RegExpNode* on_success) |
| 880 : SeqRegExpNode(on_success), | 886 : SeqRegExpNode(on_success), |
| 881 start_reg_(start_reg), | 887 start_reg_(start_reg), |
| 882 end_reg_(end_reg) { } | 888 end_reg_(end_reg) { } |
| 883 virtual void Accept(NodeVisitor* visitor); | 889 virtual void Accept(NodeVisitor* visitor); |
| 884 int start_register() { return start_reg_; } | 890 int start_register() { return start_reg_; } |
| 885 int end_register() { return end_reg_; } | 891 int end_register() { return end_reg_; } |
| 886 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 892 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 887 virtual int EatsAtLeast(int still_to_find, int recursion_depth); | 893 virtual int EatsAtLeast(int still_to_find, int recursion_depth); |
| 888 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 894 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 889 RegExpCompiler* compiler, | 895 RegExpCompiler* compiler, |
| 890 int characters_filled_in) { | 896 int characters_filled_in, |
| 897 bool not_at_start) { |
| 891 return; | 898 return; |
| 892 } | 899 } |
| 893 virtual BackReferenceNode* Clone() { return new BackReferenceNode(*this); } | 900 virtual BackReferenceNode* Clone() { return new BackReferenceNode(*this); } |
| 894 | 901 |
| 895 private: | 902 private: |
| 896 int start_reg_; | 903 int start_reg_; |
| 897 int end_reg_; | 904 int end_reg_; |
| 898 }; | 905 }; |
| 899 | 906 |
| 900 | 907 |
| 901 class EndNode: public RegExpNode { | 908 class EndNode: public RegExpNode { |
| 902 public: | 909 public: |
| 903 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; | 910 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; |
| 904 explicit EndNode(Action action) : action_(action) { } | 911 explicit EndNode(Action action) : action_(action) { } |
| 905 virtual void Accept(NodeVisitor* visitor); | 912 virtual void Accept(NodeVisitor* visitor); |
| 906 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 913 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 907 virtual int EatsAtLeast(int still_to_find, int recursion_depth) { return 0; } | 914 virtual int EatsAtLeast(int still_to_find, int recursion_depth) { return 0; } |
| 908 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 915 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 909 RegExpCompiler* compiler, | 916 RegExpCompiler* compiler, |
| 910 int characters_filled_in) { | 917 int characters_filled_in, |
| 918 bool not_at_start) { |
| 911 // Returning 0 from EatsAtLeast should ensure we never get here. | 919 // Returning 0 from EatsAtLeast should ensure we never get here. |
| 912 UNREACHABLE(); | 920 UNREACHABLE(); |
| 913 } | 921 } |
| 914 virtual EndNode* Clone() { return new EndNode(*this); } | 922 virtual EndNode* Clone() { return new EndNode(*this); } |
| 915 | 923 |
| 916 private: | 924 private: |
| 917 Action action_; | 925 Action action_; |
| 918 }; | 926 }; |
| 919 | 927 |
| 920 | 928 |
| (...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 972 | 980 |
| 973 | 981 |
| 974 class AlternativeGeneration; | 982 class AlternativeGeneration; |
| 975 | 983 |
| 976 | 984 |
| 977 class ChoiceNode: public RegExpNode { | 985 class ChoiceNode: public RegExpNode { |
| 978 public: | 986 public: |
| 979 explicit ChoiceNode(int expected_size) | 987 explicit ChoiceNode(int expected_size) |
| 980 : alternatives_(new ZoneList<GuardedAlternative>(expected_size)), | 988 : alternatives_(new ZoneList<GuardedAlternative>(expected_size)), |
| 981 table_(NULL), | 989 table_(NULL), |
| 990 not_at_start_(false), |
| 982 being_calculated_(false) { } | 991 being_calculated_(false) { } |
| 983 virtual void Accept(NodeVisitor* visitor); | 992 virtual void Accept(NodeVisitor* visitor); |
| 984 void AddAlternative(GuardedAlternative node) { alternatives()->Add(node); } | 993 void AddAlternative(GuardedAlternative node) { alternatives()->Add(node); } |
| 985 ZoneList<GuardedAlternative>* alternatives() { return alternatives_; } | 994 ZoneList<GuardedAlternative>* alternatives() { return alternatives_; } |
| 986 DispatchTable* GetTable(bool ignore_case); | 995 DispatchTable* GetTable(bool ignore_case); |
| 987 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 996 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 988 virtual int EatsAtLeast(int still_to_find, int recursion_depth); | 997 virtual int EatsAtLeast(int still_to_find, int recursion_depth); |
| 989 int EatsAtLeastHelper(int still_to_find, | 998 int EatsAtLeastHelper(int still_to_find, |
| 990 int recursion_depth, | 999 int recursion_depth, |
| 991 RegExpNode* ignore_this_node); | 1000 RegExpNode* ignore_this_node); |
| 992 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1001 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 993 RegExpCompiler* compiler, | 1002 RegExpCompiler* compiler, |
| 994 int characters_filled_in); | 1003 int characters_filled_in, |
| 1004 bool not_at_start); |
| 995 virtual ChoiceNode* Clone() { return new ChoiceNode(*this); } | 1005 virtual ChoiceNode* Clone() { return new ChoiceNode(*this); } |
| 996 | 1006 |
| 997 bool being_calculated() { return being_calculated_; } | 1007 bool being_calculated() { return being_calculated_; } |
| 1008 bool not_at_start() { return not_at_start_; } |
| 1009 void set_not_at_start() { not_at_start_ = true; } |
| 998 void set_being_calculated(bool b) { being_calculated_ = b; } | 1010 void set_being_calculated(bool b) { being_calculated_ = b; } |
| 999 virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; } | 1011 virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; } |
| 1000 | 1012 |
| 1001 protected: | 1013 protected: |
| 1002 int GreedyLoopTextLength(GuardedAlternative *alternative); | 1014 int GreedyLoopTextLength(GuardedAlternative *alternative); |
| 1003 ZoneList<GuardedAlternative>* alternatives_; | 1015 ZoneList<GuardedAlternative>* alternatives_; |
| 1004 | 1016 |
| 1005 private: | 1017 private: |
| 1006 friend class DispatchTableConstructor; | 1018 friend class DispatchTableConstructor; |
| 1007 friend class Analysis; | 1019 friend class Analysis; |
| 1008 void GenerateGuard(RegExpMacroAssembler* macro_assembler, | 1020 void GenerateGuard(RegExpMacroAssembler* macro_assembler, |
| 1009 Guard *guard, | 1021 Guard *guard, |
| 1010 Trace* trace); | 1022 Trace* trace); |
| 1011 int CalculatePreloadCharacters(RegExpCompiler* compiler); | 1023 int CalculatePreloadCharacters(RegExpCompiler* compiler); |
| 1012 void EmitOutOfLineContinuation(RegExpCompiler* compiler, | 1024 void EmitOutOfLineContinuation(RegExpCompiler* compiler, |
| 1013 Trace* trace, | 1025 Trace* trace, |
| 1014 GuardedAlternative alternative, | 1026 GuardedAlternative alternative, |
| 1015 AlternativeGeneration* alt_gen, | 1027 AlternativeGeneration* alt_gen, |
| 1016 int preload_characters, | 1028 int preload_characters, |
| 1017 bool next_expects_preload); | 1029 bool next_expects_preload); |
| 1018 DispatchTable* table_; | 1030 DispatchTable* table_; |
| 1031 // If true, this node is never checked at the start of the input. |
| 1032 // Allows a new trace to start with at_start() set to false. |
| 1033 bool not_at_start_; |
| 1019 bool being_calculated_; | 1034 bool being_calculated_; |
| 1020 }; | 1035 }; |
| 1021 | 1036 |
| 1022 | 1037 |
| 1023 class NegativeLookaheadChoiceNode: public ChoiceNode { | 1038 class NegativeLookaheadChoiceNode: public ChoiceNode { |
| 1024 public: | 1039 public: |
| 1025 explicit NegativeLookaheadChoiceNode(GuardedAlternative this_must_fail, | 1040 explicit NegativeLookaheadChoiceNode(GuardedAlternative this_must_fail, |
| 1026 GuardedAlternative then_do_this) | 1041 GuardedAlternative then_do_this) |
| 1027 : ChoiceNode(2) { | 1042 : ChoiceNode(2) { |
| 1028 AddAlternative(this_must_fail); | 1043 AddAlternative(this_must_fail); |
| 1029 AddAlternative(then_do_this); | 1044 AddAlternative(then_do_this); |
| 1030 } | 1045 } |
| 1031 virtual int EatsAtLeast(int still_to_find, int recursion_depth); | 1046 virtual int EatsAtLeast(int still_to_find, int recursion_depth); |
| 1032 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1047 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 1033 RegExpCompiler* compiler, | 1048 RegExpCompiler* compiler, |
| 1034 int characters_filled_in); | 1049 int characters_filled_in, |
| 1050 bool not_at_start); |
| 1035 // For a negative lookahead we don't emit the quick check for the | 1051 // For a negative lookahead we don't emit the quick check for the |
| 1036 // alternative that is expected to fail. This is because quick check code | 1052 // alternative that is expected to fail. This is because quick check code |
| 1037 // starts by loading enough characters for the alternative that takes fewest | 1053 // starts by loading enough characters for the alternative that takes fewest |
| 1038 // characters, but on a negative lookahead the negative branch did not take | 1054 // characters, but on a negative lookahead the negative branch did not take |
| 1039 // part in that calculation (EatsAtLeast) so the assumptions don't hold. | 1055 // part in that calculation (EatsAtLeast) so the assumptions don't hold. |
| 1040 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; } | 1056 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; } |
| 1041 }; | 1057 }; |
| 1042 | 1058 |
| 1043 | 1059 |
| 1044 class LoopChoiceNode: public ChoiceNode { | 1060 class LoopChoiceNode: public ChoiceNode { |
| 1045 public: | 1061 public: |
| 1046 explicit LoopChoiceNode(bool body_can_be_zero_length) | 1062 explicit LoopChoiceNode(bool body_can_be_zero_length) |
| 1047 : ChoiceNode(2), | 1063 : ChoiceNode(2), |
| 1048 loop_node_(NULL), | 1064 loop_node_(NULL), |
| 1049 continue_node_(NULL), | 1065 continue_node_(NULL), |
| 1050 body_can_be_zero_length_(body_can_be_zero_length) { } | 1066 body_can_be_zero_length_(body_can_be_zero_length) { } |
| 1051 void AddLoopAlternative(GuardedAlternative alt); | 1067 void AddLoopAlternative(GuardedAlternative alt); |
| 1052 void AddContinueAlternative(GuardedAlternative alt); | 1068 void AddContinueAlternative(GuardedAlternative alt); |
| 1053 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 1069 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 1054 virtual int EatsAtLeast(int still_to_find, int recursion_depth); | 1070 virtual int EatsAtLeast(int still_to_find, int recursion_depth); |
| 1055 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 1071 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 1056 RegExpCompiler* compiler, | 1072 RegExpCompiler* compiler, |
| 1057 int characters_filled_in); | 1073 int characters_filled_in, |
| 1074 bool not_at_start); |
| 1058 virtual LoopChoiceNode* Clone() { return new LoopChoiceNode(*this); } | 1075 virtual LoopChoiceNode* Clone() { return new LoopChoiceNode(*this); } |
| 1059 RegExpNode* loop_node() { return loop_node_; } | 1076 RegExpNode* loop_node() { return loop_node_; } |
| 1060 RegExpNode* continue_node() { return continue_node_; } | 1077 RegExpNode* continue_node() { return continue_node_; } |
| 1061 bool body_can_be_zero_length() { return body_can_be_zero_length_; } | 1078 bool body_can_be_zero_length() { return body_can_be_zero_length_; } |
| 1062 virtual void Accept(NodeVisitor* visitor); | 1079 virtual void Accept(NodeVisitor* visitor); |
| 1063 | 1080 |
| 1064 private: | 1081 private: |
| 1065 // AddAlternative is made private for loop nodes because alternatives | 1082 // AddAlternative is made private for loop nodes because alternatives |
| 1066 // should not be added freely, we need to keep track of which node | 1083 // should not be added freely, we need to keep track of which node |
| 1067 // goes back to the node itself. | 1084 // goes back to the node itself. |
| (...skipping 13 matching lines...) Expand all Loading... |
| 1081 // generate code for paths that the matcher can take through the regular | 1098 // generate code for paths that the matcher can take through the regular |
| 1082 // expression. A given node in the regexp can be code-generated several times | 1099 // expression. A given node in the regexp can be code-generated several times |
| 1083 // as it can be part of several traces. For example for the regexp: | 1100 // as it can be part of several traces. For example for the regexp: |
| 1084 // /foo(bar|ip)baz/ the code to match baz will be generated twice, once as part | 1101 // /foo(bar|ip)baz/ the code to match baz will be generated twice, once as part |
| 1085 // of the foo-bar-baz trace and once as part of the foo-ip-baz trace. The code | 1102 // of the foo-bar-baz trace and once as part of the foo-ip-baz trace. The code |
| 1086 // to match foo is generated only once (the traces have a common prefix). The | 1103 // to match foo is generated only once (the traces have a common prefix). The |
| 1087 // code to store the capture is deferred and generated (twice) after the places | 1104 // code to store the capture is deferred and generated (twice) after the places |
| 1088 // where baz has been matched. | 1105 // where baz has been matched. |
| 1089 class Trace { | 1106 class Trace { |
| 1090 public: | 1107 public: |
| 1108 // A value for a property that is either known to be true, know to be false, |
| 1109 // or not known. |
| 1110 enum TriBool { |
| 1111 UNKNOWN = -1, FALSE = 0, TRUE = 1 |
| 1112 }; |
| 1113 |
| 1091 class DeferredAction { | 1114 class DeferredAction { |
| 1092 public: | 1115 public: |
| 1093 DeferredAction(ActionNode::Type type, int reg) | 1116 DeferredAction(ActionNode::Type type, int reg) |
| 1094 : type_(type), reg_(reg), next_(NULL) { } | 1117 : type_(type), reg_(reg), next_(NULL) { } |
| 1095 DeferredAction* next() { return next_; } | 1118 DeferredAction* next() { return next_; } |
| 1096 bool Mentions(int reg); | 1119 bool Mentions(int reg); |
| 1097 int reg() { return reg_; } | 1120 int reg() { return reg_; } |
| 1098 ActionNode::Type type() { return type_; } | 1121 ActionNode::Type type() { return type_; } |
| 1099 private: | 1122 private: |
| 1100 ActionNode::Type type_; | 1123 ActionNode::Type type_; |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1143 : DeferredAction(ActionNode::INCREMENT_REGISTER, reg) { } | 1166 : DeferredAction(ActionNode::INCREMENT_REGISTER, reg) { } |
| 1144 }; | 1167 }; |
| 1145 | 1168 |
| 1146 Trace() | 1169 Trace() |
| 1147 : cp_offset_(0), | 1170 : cp_offset_(0), |
| 1148 actions_(NULL), | 1171 actions_(NULL), |
| 1149 backtrack_(NULL), | 1172 backtrack_(NULL), |
| 1150 stop_node_(NULL), | 1173 stop_node_(NULL), |
| 1151 loop_label_(NULL), | 1174 loop_label_(NULL), |
| 1152 characters_preloaded_(0), | 1175 characters_preloaded_(0), |
| 1153 bound_checked_up_to_(0) { } | 1176 bound_checked_up_to_(0), |
| 1177 at_start_(UNKNOWN) { } |
| 1178 |
| 1154 // End the trace. This involves flushing the deferred actions in the trace | 1179 // End the trace. This involves flushing the deferred actions in the trace |
| 1155 // and pushing a backtrack location onto the backtrack stack. Once this is | 1180 // and pushing a backtrack location onto the backtrack stack. Once this is |
| 1156 // done we can start a new trace or go to one that has already been | 1181 // done we can start a new trace or go to one that has already been |
| 1157 // generated. | 1182 // generated. |
| 1158 void Flush(RegExpCompiler* compiler, RegExpNode* successor); | 1183 void Flush(RegExpCompiler* compiler, RegExpNode* successor); |
| 1159 int cp_offset() { return cp_offset_; } | 1184 int cp_offset() { return cp_offset_; } |
| 1160 DeferredAction* actions() { return actions_; } | 1185 DeferredAction* actions() { return actions_; } |
| 1161 // A trivial trace is one that has no deferred actions or other state that | 1186 // A trivial trace is one that has no deferred actions or other state that |
| 1162 // affects the assumptions used when generating code. There is no recorded | 1187 // affects the assumptions used when generating code. There is no recorded |
| 1163 // backtrack location in a trivial trace, so with a trivial trace we will | 1188 // backtrack location in a trivial trace, so with a trivial trace we will |
| 1164 // generate code that, on a failure to match, gets the backtrack location | 1189 // generate code that, on a failure to match, gets the backtrack location |
| 1165 // from the backtrack stack rather than using a direct jump instruction. We | 1190 // from the backtrack stack rather than using a direct jump instruction. We |
| 1166 // always start code generation with a trivial trace and non-trivial traces | 1191 // always start code generation with a trivial trace and non-trivial traces |
| 1167 // are created as we emit code for nodes or add to the list of deferred | 1192 // are created as we emit code for nodes or add to the list of deferred |
| 1168 // actions in the trace. The location of the code generated for a node using | 1193 // actions in the trace. The location of the code generated for a node using |
| 1169 // a trivial trace is recorded in a label in the node so that gotos can be | 1194 // a trivial trace is recorded in a label in the node so that gotos can be |
| 1170 // generated to that code. | 1195 // generated to that code. |
| 1171 bool is_trivial() { | 1196 bool is_trivial() { |
| 1172 return backtrack_ == NULL && | 1197 return backtrack_ == NULL && |
| 1173 actions_ == NULL && | 1198 actions_ == NULL && |
| 1174 cp_offset_ == 0 && | 1199 cp_offset_ == 0 && |
| 1175 characters_preloaded_ == 0 && | 1200 characters_preloaded_ == 0 && |
| 1176 bound_checked_up_to_ == 0 && | 1201 bound_checked_up_to_ == 0 && |
| 1177 quick_check_performed_.characters() == 0; | 1202 quick_check_performed_.characters() == 0 && |
| 1203 at_start_ == UNKNOWN; |
| 1178 } | 1204 } |
| 1205 TriBool at_start() { return at_start_; } |
| 1206 void set_at_start(bool at_start) { at_start_ = at_start ? TRUE : FALSE; } |
| 1179 Label* backtrack() { return backtrack_; } | 1207 Label* backtrack() { return backtrack_; } |
| 1180 Label* loop_label() { return loop_label_; } | 1208 Label* loop_label() { return loop_label_; } |
| 1181 RegExpNode* stop_node() { return stop_node_; } | 1209 RegExpNode* stop_node() { return stop_node_; } |
| 1182 int characters_preloaded() { return characters_preloaded_; } | 1210 int characters_preloaded() { return characters_preloaded_; } |
| 1183 int bound_checked_up_to() { return bound_checked_up_to_; } | 1211 int bound_checked_up_to() { return bound_checked_up_to_; } |
| 1184 QuickCheckDetails* quick_check_performed() { return &quick_check_performed_; } | 1212 QuickCheckDetails* quick_check_performed() { return &quick_check_performed_; } |
| 1185 bool mentions_reg(int reg); | 1213 bool mentions_reg(int reg); |
| 1186 // Returns true if a deferred position store exists to the specified | 1214 // Returns true if a deferred position store exists to the specified |
| 1187 // register and stores the offset in the out-parameter. Otherwise | 1215 // register and stores the offset in the out-parameter. Otherwise |
| 1188 // returns false. | 1216 // returns false. |
| (...skipping 27 matching lines...) Expand all Loading... |
| 1216 OutSet& registers_to_pop, | 1244 OutSet& registers_to_pop, |
| 1217 OutSet& registers_to_clear); | 1245 OutSet& registers_to_clear); |
| 1218 int cp_offset_; | 1246 int cp_offset_; |
| 1219 DeferredAction* actions_; | 1247 DeferredAction* actions_; |
| 1220 Label* backtrack_; | 1248 Label* backtrack_; |
| 1221 RegExpNode* stop_node_; | 1249 RegExpNode* stop_node_; |
| 1222 Label* loop_label_; | 1250 Label* loop_label_; |
| 1223 int characters_preloaded_; | 1251 int characters_preloaded_; |
| 1224 int bound_checked_up_to_; | 1252 int bound_checked_up_to_; |
| 1225 QuickCheckDetails quick_check_performed_; | 1253 QuickCheckDetails quick_check_performed_; |
| 1254 TriBool at_start_; |
| 1226 }; | 1255 }; |
| 1227 | 1256 |
| 1228 | 1257 |
| 1229 class NodeVisitor { | 1258 class NodeVisitor { |
| 1230 public: | 1259 public: |
| 1231 virtual ~NodeVisitor() { } | 1260 virtual ~NodeVisitor() { } |
| 1232 #define DECLARE_VISIT(Type) \ | 1261 #define DECLARE_VISIT(Type) \ |
| 1233 virtual void Visit##Type(Type##Node* that) = 0; | 1262 virtual void Visit##Type(Type##Node* that) = 0; |
| 1234 FOR_EACH_NODE_TYPE(DECLARE_VISIT) | 1263 FOR_EACH_NODE_TYPE(DECLARE_VISIT) |
| 1235 #undef DECLARE_VISIT | 1264 #undef DECLARE_VISIT |
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1298 | 1327 |
| 1299 DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis); | 1328 DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis); |
| 1300 }; | 1329 }; |
| 1301 | 1330 |
| 1302 | 1331 |
| 1303 struct RegExpCompileData { | 1332 struct RegExpCompileData { |
| 1304 RegExpCompileData() | 1333 RegExpCompileData() |
| 1305 : tree(NULL), | 1334 : tree(NULL), |
| 1306 node(NULL), | 1335 node(NULL), |
| 1307 simple(true), | 1336 simple(true), |
| 1337 contains_anchor(false), |
| 1308 capture_count(0) { } | 1338 capture_count(0) { } |
| 1309 RegExpTree* tree; | 1339 RegExpTree* tree; |
| 1310 RegExpNode* node; | 1340 RegExpNode* node; |
| 1311 bool simple; | 1341 bool simple; |
| 1342 bool contains_anchor; |
| 1312 Handle<String> error; | 1343 Handle<String> error; |
| 1313 int capture_count; | 1344 int capture_count; |
| 1314 }; | 1345 }; |
| 1315 | 1346 |
| 1316 | 1347 |
| 1317 class RegExpEngine: public AllStatic { | 1348 class RegExpEngine: public AllStatic { |
| 1318 public: | 1349 public: |
| 1319 static Handle<FixedArray> Compile(RegExpCompileData* input, | 1350 static Handle<FixedArray> Compile(RegExpCompileData* input, |
| 1320 bool ignore_case, | 1351 bool ignore_case, |
| 1321 bool multiline, | 1352 bool multiline, |
| 1322 Handle<String> pattern, | 1353 Handle<String> pattern, |
| 1323 bool is_ascii); | 1354 bool is_ascii); |
| 1324 | 1355 |
| 1325 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); | 1356 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); |
| 1326 }; | 1357 }; |
| 1327 | 1358 |
| 1328 | 1359 |
| 1329 } } // namespace v8::internal | 1360 } } // namespace v8::internal |
| 1330 | 1361 |
| 1331 #endif // V8_JSREGEXP_H_ | 1362 #endif // V8_JSREGEXP_H_ |
| OLD | NEW |