Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: src/jsregexp.cc

Issue 9638: More automaton translation (Closed)
Patch Set: Created 12 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 186 matching lines...) Expand 10 before | Expand all | Expand 10 after
197 Handle<String> flag_str) { 197 Handle<String> flag_str) {
198 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str); 198 JSRegExp::Flags flags = RegExpFlagsFromString(flag_str);
199 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags); 199 Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags);
200 bool in_cache = !cached.is_null(); 200 bool in_cache = !cached.is_null();
201 Handle<Object> result; 201 Handle<Object> result;
202 StringShape shape(*pattern); 202 StringShape shape(*pattern);
203 if (in_cache) { 203 if (in_cache) {
204 re->set_data(*cached); 204 re->set_data(*cached);
205 result = re; 205 result = re;
206 } else { 206 } else {
207 SafeStringInputBuffer buffer(pattern.location()); 207 SafeStringInputBuffer buffer(pattern.location());
208 Handle<String> error_text; 208 RegExpParseResult parse_result;
209 bool has_escapes; 209 if (!ParseRegExp(&buffer, &parse_result)) {
210 RegExpTree* ast = ParseRegExp(&buffer, &error_text, &has_escapes);
211 if (!error_text.is_null()) {
212 // Throw an exception if we fail to parse the pattern. 210 // Throw an exception if we fail to parse the pattern.
213 return CreateRegExpException(re, pattern, error_text, "malformed_regexp"); 211 return CreateRegExpException(re,
212 pattern,
213 parse_result.error,
214 "malformed_regexp");
214 } 215 }
215 RegExpAtom* atom = ast->AsAtom(); 216 RegExpAtom* atom = parse_result.tree->AsAtom();
216 if (atom != NULL && !flags.is_ignore_case()) { 217 if (atom != NULL && !flags.is_ignore_case()) {
217 if (has_escapes) { 218 if (parse_result.has_character_escapes) {
218 Vector<const uc16> atom_pattern = atom->data(); 219 Vector<const uc16> atom_pattern = atom->data();
219 Handle<String> atom_string = 220 Handle<String> atom_string =
220 Factory::NewStringFromTwoByte(atom_pattern); 221 Factory::NewStringFromTwoByte(atom_pattern);
221 result = AtomCompile(re, pattern, flags, atom_string); 222 result = AtomCompile(re, pattern, flags, atom_string);
222 } else { 223 } else {
223 result = AtomCompile(re, pattern, flags, pattern); 224 result = AtomCompile(re, pattern, flags, pattern);
224 } 225 }
225 } else { 226 } else {
226 result = JsrePrepare(re, pattern, flags); 227 result = JsrePrepare(re, pattern, flags);
227 } 228 }
(...skipping 394 matching lines...) Expand 10 before | Expand all | Expand 10 after
622 ByteArray* RegExpImpl::JsreInternal(Handle<JSRegExp> re) { 623 ByteArray* RegExpImpl::JsreInternal(Handle<JSRegExp> re) {
623 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex)); 624 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
624 return ByteArray::cast(value->get(INTERNAL_INDEX)); 625 return ByteArray::cast(value->get(INTERNAL_INDEX));
625 } 626 }
626 627
627 628
628 // ------------------------------------------------------------------- 629 // -------------------------------------------------------------------
629 // New regular expression engine 630 // New regular expression engine
630 631
631 632
632 template <typename Char> class ExecutionState; 633 class ExecutionState;
633 634
634 635
635 template <typename Char>
636 class DotPrinter { 636 class DotPrinter {
637 public: 637 public:
638 DotPrinter() : stream_(&alloc_) { } 638 DotPrinter() : stream_(&alloc_) { }
639 void PrintNode(RegExpNode<Char>* node); 639 void PrintNode(const char* label, RegExpNode* node);
640 void Visit(RegExpNode<Char>* node); 640 void Visit(RegExpNode* node);
641 StringStream* stream() { return &stream_; } 641 StringStream* stream() { return &stream_; }
642 private: 642 private:
643 HeapStringAllocator alloc_; 643 HeapStringAllocator alloc_;
644 StringStream stream_; 644 StringStream stream_;
645 std::set<RegExpNode<Char>*> seen_; 645 std::set<RegExpNode*> seen_;
646 }; 646 };
647 647
648 648
649 template <typename Char> 649 class RegExpCompiler {
650 class RegExpCompiler: public RegExpVisitor { 650 public:
651 public: 651 explicit RegExpCompiler(int capture_count)
652 RegExpCompiler() { } 652 : next_register_(2 * capture_count) { }
653 RegExpNode<Char>* Compile(RegExpTree* tree, RegExpNode<Char>* rest) { 653
654 return static_cast<RegExpNode<Char>*>(tree->Accept(this, rest)); 654 RegExpNode* Compile(RegExpTree* tree,
655 } 655 RegExpNode* on_success,
656 #define MAKE_CASE(Name) virtual void* Visit##Name(RegExp##Name*, void*); 656 RegExpNode* on_failure) {
657 FOR_EACH_REG_EXP_NODE_TYPE(MAKE_CASE) 657 return tree->ToNode(this, on_success, on_failure);
658 #undef MAKE_CASE 658 }
659 }; 659
660 660 int AllocateRegister() { return next_register_++; }
661 661
662 template <typename Char> 662 private:
663 int next_register_;
664 };
665
666
663 class RegExpNode: public ZoneObject { 667 class RegExpNode: public ZoneObject {
664 public: 668 public:
665 virtual ~RegExpNode() { } 669 virtual ~RegExpNode() { }
666 virtual void EmitDot(DotPrinter<Char>* out); 670 virtual void EmitDot(DotPrinter* out);
667 virtual bool Step(ExecutionState<Char>* state) = 0; 671 };
668 }; 672
669 673
670 674 class SeqRegExpNode: public RegExpNode {
671 template <typename Char> 675 public:
672 class SeqRegExpNode: public RegExpNode<Char> { 676 explicit SeqRegExpNode(RegExpNode* on_success)
673 public: 677 : on_success_(on_success) { }
674 explicit SeqRegExpNode(RegExpNode<Char>* next) : next_(next) { } 678 RegExpNode* on_success() { return on_success_; }
675 RegExpNode<Char>* next() { return next_; } 679 private:
676 private: 680 RegExpNode* on_success_;
677 RegExpNode<Char>* next_; 681 };
678 }; 682
679 683
680 684 class EndNode: public RegExpNode {
681 template <typename Char> 685 public:
682 class EndNode: public RegExpNode<Char> { 686 enum Action { ACCEPT, BACKTRACK };
683 public: 687 virtual void EmitDot(DotPrinter* out);
684 virtual void EmitDot(DotPrinter<Char>* out); 688 static EndNode* GetAccept() { return &kAccept; }
685 virtual bool Step(ExecutionState<Char>* state); 689 static EndNode* GetBacktrack() { return &kBacktrack; }
686 }; 690 private:
687 691 explicit EndNode(Action action) : action_(action) { }
688 692 Action action_;
689 template <typename Char> 693 static EndNode kAccept;
690 class AtomNode: public SeqRegExpNode<Char> { 694 static EndNode kBacktrack;
691 public: 695 };
692 AtomNode(Vector<const uc16> data, RegExpNode<Char>* next) 696
693 : SeqRegExpNode<Char>(next), 697
698 EndNode EndNode::kAccept(ACCEPT);
699 EndNode EndNode::kBacktrack(BACKTRACK);
700
701
702 class AtomNode: public SeqRegExpNode {
703 public:
704 AtomNode(Vector<const uc16> data,
705 RegExpNode* on_success,
706 RegExpNode* on_failure)
707 : SeqRegExpNode(on_success),
708 on_failure_(on_failure),
694 data_(data) { } 709 data_(data) { }
695 virtual void EmitDot(DotPrinter<Char>* out); 710 virtual void EmitDot(DotPrinter* out);
696 virtual bool Step(ExecutionState<Char>* state);
697 Vector<const uc16> data() { return data_; } 711 Vector<const uc16> data() { return data_; }
698 private: 712 RegExpNode* on_failure() { return on_failure_; }
713 private:
714 RegExpNode* on_failure_;
699 Vector<const uc16> data_; 715 Vector<const uc16> data_;
700 }; 716 };
701 717
702 718
703 template <typename Char> 719 class BackreferenceNode: public SeqRegExpNode {
704 class CharacterClassNode: public SeqRegExpNode<Char> { 720 public:
705 public: 721 BackreferenceNode(int start_reg,
706 CharacterClassNode(ZoneList<CharacterRange>* ranges, RegExpNode<Char>* next) 722 int end_reg,
707 : SeqRegExpNode<Char>(next), 723 RegExpNode* on_success,
724 RegExpNode* on_failure)
725 : SeqRegExpNode(on_success),
726 on_failure_(on_failure),
727 start_reg_(start_reg),
728 end_reg_(end_reg) { }
729 virtual void EmitDot(DotPrinter* out);
730 RegExpNode* on_failure() { return on_failure_; }
731 int start_register() { return start_reg_; }
732 int end_register() { return end_reg_; }
733 private:
734 RegExpNode* on_failure_;
735 int start_reg_;
736 int end_reg_;
737 };
738
739
740 class CharacterClassNode: public SeqRegExpNode {
741 public:
742 CharacterClassNode(ZoneList<CharacterRange>* ranges,
743 RegExpNode* on_success,
744 RegExpNode* on_failure)
745 : SeqRegExpNode(on_success),
746 on_failure_(on_failure),
708 ranges_(ranges) { } 747 ranges_(ranges) { }
709 virtual void EmitDot(DotPrinter<Char>* out); 748 virtual void EmitDot(DotPrinter* out);
710 virtual bool Step(ExecutionState<Char>* state);
711 ZoneList<CharacterRange>* ranges() { return ranges_; } 749 ZoneList<CharacterRange>* ranges() { return ranges_; }
712 private: 750 RegExpNode* on_failure() { return on_failure_; }
751 private:
752 RegExpNode* on_failure_;
713 ZoneList<CharacterRange>* ranges_; 753 ZoneList<CharacterRange>* ranges_;
714 }; 754 };
715 755
716 756
717 template <typename Char> 757 class Guard: public ZoneObject {
718 class ChoiceNode: public RegExpNode<Char> { 758 public:
719 public: 759 enum Relation { LT, GEQ };
720 explicit ChoiceNode(ZoneList<RegExpNode<Char>*>* choices) 760 Guard(int reg, Relation op, int value)
721 : choices_(choices) { } 761 : reg_(reg),
722 virtual void EmitDot(DotPrinter<Char>* out); 762 op_(op),
723 virtual bool Step(ExecutionState<Char>* state); 763 value_(value) { }
724 ZoneList<RegExpNode<Char>*>* choices() { return choices_; } 764 int reg() { return reg_; }
725 DispatchTable* table() { return table_; } 765 Relation op() { return op_; }
726 private: 766 int value() { return value_; }
727 ZoneList<RegExpNode<Char>*>* choices_; 767 private:
768 int reg_;
769 Relation op_;
770 int value_;
771 };
772
773
774 class GuardedAlternative {
775 public:
776 explicit GuardedAlternative(RegExpNode* node) : node_(node), guards_(NULL) { }
777 void AddGuard(Guard* guard);
778 RegExpNode* node() { return node_; }
779 ZoneList<Guard*>* guards() { return guards_; }
780 private:
781 RegExpNode* node_;
782 ZoneList<Guard*>* guards_;
783 };
784
785
786 void GuardedAlternative::AddGuard(Guard* guard) {
787 if (guards_ == NULL)
788 guards_ = new ZoneList<Guard*>(1);
789 guards_->Add(guard);
790 }
791
792
793 class ChoiceNode: public RegExpNode {
794 public:
795 explicit ChoiceNode(int expected_size, RegExpNode* on_failure)
796 : on_failure_(on_failure),
797 choices_(new ZoneList<GuardedAlternative>(expected_size)) { }
798 virtual void EmitDot(DotPrinter* out);
799 void AddChild(GuardedAlternative node) { choices()->Add(node); }
800 ZoneList<GuardedAlternative>* choices() { return choices_; }
801 DispatchTable* table() { return &table_; }
802 RegExpNode* on_failure() { return on_failure_; }
803 private:
804 RegExpNode* on_failure_;
805 ZoneList<GuardedAlternative>* choices_;
728 DispatchTable table_; 806 DispatchTable table_;
729 }; 807 };
730 808
731 809
810 class ActionNode: public SeqRegExpNode {
811 public:
812 enum Type {
813 STORE_REGISTER,
814 INCREMENT_REGISTER,
815 STORE_POSITION,
816 BEGIN_SUBMATCH,
817 ESCAPE_SUBMATCH,
818 END_SUBMATCH
819 };
820 static ActionNode* StoreRegister(int reg, int val, RegExpNode* on_success) {
821 ActionNode* result = new ActionNode(STORE_REGISTER, on_success);
822 result->data_.u_store_register.reg_ = reg;
823 result->data_.u_store_register.value_ = val;
824 return result;
825 }
826 static ActionNode* IncrementRegister(int reg, RegExpNode* on_success) {
827 ActionNode* result = new ActionNode(INCREMENT_REGISTER, on_success);
828 result->data_.u_increment_register.reg_ = reg;
829 return result;
830 }
831 static ActionNode* StorePosition(int reg, RegExpNode* on_success) {
832 ActionNode* result = new ActionNode(STORE_POSITION, on_success);
833 result->data_.u_store_position.reg_ = reg;
834 return result;
835 }
836 static ActionNode* BeginSubmatch(RegExpNode* on_success) {
837 return new ActionNode(BEGIN_SUBMATCH, on_success);
838 }
839 static ActionNode* EscapeSubmatch(RegExpNode* on_success) {
840 return new ActionNode(ESCAPE_SUBMATCH, on_success);
841 }
842 static ActionNode* EndSubmatch(RegExpNode* on_success) {
843 return new ActionNode(END_SUBMATCH, on_success);
844 }
845 virtual void EmitDot(DotPrinter* out);
846 Type type() { return type_; }
847 private:
848 ActionNode(Type type, RegExpNode* on_success)
849 : SeqRegExpNode(on_success),
850 type_(type) { }
851 Type type_;
852 union {
853 struct {
854 int reg_;
855 int value_;
856 } u_store_register;
857 struct {
858 int reg_;
859 } u_increment_register;
860 struct {
861 int reg_;
862 } u_store_position;
863 } data_;
864 };
865
866
732 // ------------------------------------------------------------------- 867 // -------------------------------------------------------------------
733 // Dot/dotty output 868 // Dot/dotty output
734 869
735 870 void DotPrinter::PrintNode(const char* label, RegExpNode* node) {
736 template <typename Char> 871 stream()->Add("digraph G {\n graph [label=\"");
737 void DotPrinter<Char>::PrintNode(RegExpNode<Char>* node) { 872 for (int i = 0; label[i]; i++) {
738 stream()->Add("digraph G {\n"); 873 switch (label[i]) {
874 case '\\':
875 stream()->Add("\\\\");
876 break;
877 case '"':
878 stream()->Add("\"");
879 break;
880 default:
881 stream()->Put(label[i]);
882 break;
883 }
884 }
885 stream()->Add("\"]; \n");
739 Visit(node); 886 Visit(node);
740 stream()->Add("}\n"); 887 stream()->Add("}\n");
741 printf("%s", *(stream()->ToCString())); 888 printf("%s", *(stream()->ToCString()));
742 } 889 }
743 890
744 891
745 template <typename Char> 892 void DotPrinter::Visit(RegExpNode* node) {
746 void DotPrinter<Char>::Visit(RegExpNode<Char>* node) {
747 if (seen_.find(node) != seen_.end()) 893 if (seen_.find(node) != seen_.end())
748 return; 894 return;
749 seen_.insert(node); 895 seen_.insert(node);
750 node->EmitDot(this); 896 node->EmitDot(this);
751 } 897 }
752 898
753 899
754 template <typename Char> 900 void RegExpNode::EmitDot(DotPrinter* out) {
755 void RegExpNode<Char>::EmitDot(DotPrinter<Char>* out) {
756 UNIMPLEMENTED(); 901 UNIMPLEMENTED();
757 } 902 }
758 903
759 904
760 template <typename Char> 905 static void PrintOnFailure(DotPrinter* out,
761 void ChoiceNode<Char>::EmitDot(DotPrinter<Char>* out) { 906 RegExpNode* from,
762 out->stream()->Add("n%p [label=\"?\"];\n", this); 907 RegExpNode* on_failure) {
908 if (on_failure == EndNode::GetBacktrack()) return;
909 out->stream()->Add(" n%p -> n%p [style=dotted];\n", from, on_failure);
910 out->Visit(on_failure);
911 }
912
913
914 void ChoiceNode::EmitDot(DotPrinter* out) {
915 out->stream()->Add(" n%p [label=\"?\", shape=circle];\n", this);
916 PrintOnFailure(out, this, this->on_failure());
763 for (int i = 0; i < choices()->length(); i++) { 917 for (int i = 0; i < choices()->length(); i++) {
764 out->stream()->Add("n%p -> n%p [label=\"%i\"];\n", 918 GuardedAlternative alt = choices()->at(i);
919 out->stream()->Add(" n%p -> n%p [label=\"%i",
765 this, 920 this,
766 choices()->at(i), 921 alt.node(),
767 i); 922 i);
768 out->Visit(choices()->at(i)); 923 if (alt.guards() != NULL) {
924 out->stream()->Add(" [");
925 for (int j = 0; j < alt.guards()->length(); j++) {
926 if (j > 0) out->stream()->Add(" ");
927 Guard* guard = alt.guards()->at(j);
928 switch (guard->op()) {
929 case Guard::GEQ:
930 out->stream()->Add("$%i &#8805; %i", guard->reg(), guard->value());
931 break;
932 case Guard::LT:
933 out->stream()->Add("$%i < %i", guard->reg(), guard->value());
934 break;
935 }
936 }
937 out->stream()->Add("]");
938 }
939 out->stream()->Add("\"];\n");
940 out->Visit(choices()->at(i).node());
769 } 941 }
770 } 942 }
771 943
772 944
773 template <typename Char> 945 void AtomNode::EmitDot(DotPrinter* out) {
774 void AtomNode<Char>::EmitDot(DotPrinter<Char>* out) { 946 out->stream()->Add(" n%p [label=\"'%w'\", shape=doubleoctagon];\n",
775 out->stream()->Add("n%p [label=\"'%w'\"];\n", this, data()); 947 this,
776 out->stream()->Add("n%p -> n%p;\n", this, this->next()); 948 data());
777 out->Visit(this->next()); 949 out->stream()->Add(" n%p -> n%p;\n", this, this->on_success());
950 out->Visit(this->on_success());
951 PrintOnFailure(out, this, this->on_failure());
778 } 952 }
779 953
780 954
781 template <typename Char> 955 void BackreferenceNode::EmitDot(DotPrinter* out) {
782 void EndNode<Char>::EmitDot(DotPrinter<Char>* out) { 956 out->stream()->Add(" n%p [label=\"$%i..$%i\", shape=doubleoctagon];\n",
783 out->stream()->Add("n%p [style=bold, label=\"done\"];\n", this); 957 this,
958 start_register(),
959 end_register());
960 out->stream()->Add(" n%p -> n%p;\n", this, this->on_success());
961 out->Visit(this->on_success());
962 PrintOnFailure(out, this, this->on_failure());
784 } 963 }
785 964
786 965
787 template <typename Char> 966 void EndNode::EmitDot(DotPrinter* out) {
788 void CharacterClassNode<Char>::EmitDot(DotPrinter<Char>* out) { 967 out->stream()->Add(" n%p [style=bold, shape=point];\n", this);
789 out->stream()->Add("n%p [label=\"[...]\"];\n", this);
790 out->stream()->Add("n%p -> n%p;\n", this, this->next());
791 out->Visit(this->next());
792 } 968 }
793 969
794 970
971 void CharacterClassNode::EmitDot(DotPrinter* out) {
972 out->stream()->Add(" n%p [label=\"[...]\"];\n", this);
973 out->stream()->Add(" n%p -> n%p;\n", this, this->on_success());
974 out->Visit(this->on_success());
975 PrintOnFailure(out, this, this->on_failure());
976 }
977
978
979 void ActionNode::EmitDot(DotPrinter* out) {
980 out->stream()->Add(" n%p [", this);
981 switch (type()) {
982 case STORE_REGISTER:
983 out->stream()->Add("label=\"$%i:=%i\", shape=box",
984 data_.u_store_register.reg_,
985 data_.u_store_register.value_);
986 break;
987 case INCREMENT_REGISTER:
988 out->stream()->Add("label=\"$%i++\", shape=box",
989 data_.u_increment_register.reg_);
990 break;
991 case STORE_POSITION:
992 out->stream()->Add("label=\"$%i:=$pos\", shape=box",
993 data_.u_store_position.reg_);
994 break;
995 case BEGIN_SUBMATCH:
996 out->stream()->Add("label=\"begin\", shape=septagon");
997 break;
998 case ESCAPE_SUBMATCH:
999 out->stream()->Add("label=\"escape\", shape=septagon");
1000 break;
1001 case END_SUBMATCH:
1002 out->stream()->Add("label=\"end\", shape=septagon");
1003 break;
1004 }
1005 out->stream()->Add("];\n");
1006 out->stream()->Add(" n%p -> n%p;\n", this, this->on_success());
1007 out->Visit(this->on_success());
1008 }
1009
1010
795 // ------------------------------------------------------------------- 1011 // -------------------------------------------------------------------
796 // Tree to graph conversion 1012 // Tree to graph conversion
797 1013
798 1014
799 template <typename Char> 1015 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,
800 void* RegExpCompiler<Char>::VisitAtom(RegExpAtom* that, void* rest) { 1016 RegExpNode* on_success,
801 return new AtomNode<Char>(that->data(), 1017 RegExpNode* on_failure) {
802 static_cast<RegExpNode<Char>*>(rest)); 1018 return new AtomNode(data(), on_success, on_failure);
803 } 1019 }
804 1020
805 1021
806 template <typename Char> 1022 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
807 void* RegExpCompiler<Char>::VisitCharacterClass(RegExpCharacterClass* that, 1023 RegExpNode* on_success,
808 void* rest) { 1024 RegExpNode* on_failure) {
809 return new CharacterClassNode<Char>(that->ranges(), 1025 return new CharacterClassNode(ranges(), on_success, on_failure);
810 static_cast<RegExpNode<Char>*>(rest));
811 } 1026 }
812 1027
813 1028
814 template <typename Char> 1029 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,
815 void* RegExpCompiler<Char>::VisitDisjunction(RegExpDisjunction* that, 1030 RegExpNode* on_success,
816 void* rest_ptr) { 1031 RegExpNode* on_failure) {
817 RegExpNode<Char>* rest = static_cast<RegExpNode<Char>*>(rest_ptr); 1032 ZoneList<RegExpTree*>* children = nodes();
818 ZoneList<RegExpTree*>* children = that->nodes();
819 int length = children->length(); 1033 int length = children->length();
820 ZoneList<RegExpNode<Char>*>* choices 1034 ChoiceNode* result = new ChoiceNode(length, on_failure);
821 = new ZoneList<RegExpNode<Char>*>(length); 1035 for (int i = 0; i < length; i++) {
822 for (int i = 0; i < length; i++) 1036 GuardedAlternative child(compiler->Compile(children->at(i),
823 choices->Add(Compile(children->at(i), rest)); 1037 on_success,
824 return new ChoiceNode<Char>(choices); 1038 on_failure));
1039 result->AddChild(child);
1040 }
1041 return result;
825 } 1042 }
826 1043
827 1044
828 template <typename Char> 1045 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler,
829 void* RegExpCompiler<Char>::VisitQuantifier(RegExpQuantifier* that, 1046 RegExpNode* on_success,
830 void* rest_ptr) { 1047 RegExpNode* on_failure) {
831 RegExpNode<Char>* rest = static_cast<RegExpNode<Char>*>(rest_ptr); 1048 // x{f, t} becomes this:
832 if (that->max() >= RegExpQuantifier::kInfinity) { 1049 //
833 // Don't try to count the number of iterations if the max it too 1050 // (r++)<-.
834 // large. 1051 // | `
835 if (that->min() != 0) { 1052 // | (x)
836 UNIMPLEMENTED(); 1053 // v ^
837 } 1054 // (r=0)-->(?)---/ [if r < t]
838 ZoneList<RegExpNode<Char>*>* loop_choices 1055 // |
839 = new ZoneList<RegExpNode<Char>*>(2); 1056 // [if r >= f] \----> ...
840 RegExpNode<Char>* loop_node = new ChoiceNode<Char>(loop_choices); 1057 //
Lasse Reichstein 2008/11/06 16:46:44 Do you test that x didn't match the empty string w
Christian Plesner Hansen 2008/11/06 17:34:42 Good points all -- I'm just adding TODOs for now.
841 RegExpNode<Char>* body_node = Compile(that->body(), loop_node); 1058 bool has_min = min() > 0;
842 if (that->is_greedy()) { 1059 bool has_max = max() < RegExpQuantifier::kInfinity;
843 loop_choices->Add(body_node); 1060 bool needs_counter = has_min || has_max;
844 loop_choices->Add(rest); 1061 int reg = needs_counter ? compiler->AllocateRegister() : -1;
Lasse Reichstein 2008/11/06 16:46:44 The case where min = 0 and max = 1 (i.e., the '?'
Christian Plesner Hansen 2008/11/06 17:34:42 Yes. My idea was to add a few transformations at
845 } else { 1062 ChoiceNode* center = new ChoiceNode(2, on_failure);
846 loop_choices->Add(rest); 1063 RegExpNode* loop_return = needs_counter
847 loop_choices->Add(body_node); 1064 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg, center))
848 } 1065 : static_cast<RegExpNode*>(center);
849 return loop_node; 1066 RegExpNode* body_node = compiler->Compile(body(), loop_return, on_failure);
1067 GuardedAlternative body_alt(body_node);
1068 if (has_max) {
1069 Guard* body_guard = new Guard(reg, Guard::LT, max());
1070 body_alt.AddGuard(body_guard);
1071 }
1072 GuardedAlternative rest_alt(on_success);
1073 if (has_min) {
1074 Guard* rest_guard = new Guard(reg, Guard::GEQ, min());
1075 rest_alt.AddGuard(rest_guard);
1076 }
1077 if (is_greedy()) {
1078 center->AddChild(body_alt);
1079 center->AddChild(rest_alt);
850 } else { 1080 } else {
851 UNIMPLEMENTED(); 1081 center->AddChild(rest_alt);
852 return NULL; 1082 center->AddChild(body_alt);
1083 }
1084 if (needs_counter) {
1085 return ActionNode::StoreRegister(reg, 0, center);
1086 } else {
1087 return center;
853 } 1088 }
854 } 1089 }
855 1090
856 1091
857 template <typename Char> 1092 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
858 void* RegExpCompiler<Char>::VisitAssertion(RegExpAssertion* that, 1093 RegExpNode* on_success,
859 void* rest) { 1094 RegExpNode* on_failure) {
860 UNIMPLEMENTED(); 1095 // TODO(self): implement assertions.
861 return NULL; 1096 return on_success;
862 } 1097 }
863 1098
864 1099
865 template <typename Char> 1100 RegExpNode* RegExpBackreference::ToNode(RegExpCompiler* compiler,
866 void* RegExpCompiler<Char>::VisitCapture(RegExpCapture* that, void* rest) { 1101 RegExpNode* on_success,
867 UNIMPLEMENTED(); 1102 RegExpNode* on_failure) {
868 return NULL; 1103 return new BackreferenceNode(RegExpCapture::StartRegister(index()),
1104 RegExpCapture::EndRegister(index()),
1105 on_success,
1106 on_failure);
869 } 1107 }
870 1108
871 1109
872 template <typename Char> 1110 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
873 void* RegExpCompiler<Char>::VisitLookahead(RegExpLookahead* that, 1111 RegExpNode* on_success,
874 void* rest) { 1112 RegExpNode* on_failure) {
875 UNIMPLEMENTED(); 1113 return on_success;
876 return NULL;
877 } 1114 }
878 1115
879 1116
880 template <typename Char> 1117 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,
881 void* RegExpCompiler<Char>::VisitBackreference(RegExpBackreference* that, 1118 RegExpNode* on_success,
882 void* rest) { 1119 RegExpNode* on_failure) {
883 UNIMPLEMENTED(); 1120 if (is_positive()) {
884 return NULL; 1121 RegExpNode* proceed = ActionNode::EndSubmatch(on_success);
Lasse Reichstein 2008/11/06 16:46:44 What does the Submatch concept cover? Why isn't a
Christian Plesner Hansen 2008/11/06 17:34:42 This is based on a long discussion I had with Erik
1122 RegExpNode* escape = ActionNode::EscapeSubmatch(on_failure);
1123 RegExpNode* body_node = compiler->Compile(body(), proceed, escape);
1124 return ActionNode::BeginSubmatch(body_node);
1125 } else {
1126 RegExpNode* failed = ActionNode::EscapeSubmatch(on_success);
1127 RegExpNode* succeeded = ActionNode::EndSubmatch(on_failure);
1128 RegExpNode* body_node = compiler->Compile(body(), succeeded, failed);
1129 return ActionNode::BeginSubmatch(body_node);
1130 }
885 } 1131 }
886 1132
887 1133
888 template <typename Char> 1134 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,
889 void* RegExpCompiler<Char>::VisitEmpty(RegExpEmpty* that, void* rest) { 1135 RegExpNode* on_success,
890 return rest; 1136 RegExpNode* on_failure) {
1137 int start_reg = RegExpCapture::StartRegister(index());
1138 int end_reg = RegExpCapture::EndRegister(index());
1139 RegExpNode* store_end = ActionNode::StorePosition(end_reg, on_success);
1140 RegExpNode* body_node = compiler->Compile(body(), store_end, on_failure);
1141 return ActionNode::StorePosition(start_reg, body_node);
891 } 1142 }
892 1143
893 1144
894 template <typename Char> 1145 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,
895 void* RegExpCompiler<Char>::VisitAlternative(RegExpAlternative* that, 1146 RegExpNode* on_success,
896 void* rest) { 1147 RegExpNode* on_failure) {
897 ZoneList<RegExpTree*>* children = that->nodes(); 1148 ZoneList<RegExpTree*>* children = nodes();
898 RegExpNode<Char>* current = static_cast<RegExpNode<Char>*>(rest); 1149 RegExpNode* current = on_success;
899 for (int i = children->length() - 1; i >= 0; i--) { 1150 for (int i = children->length() - 1; i >= 0; i--) {
900 current = Compile(children->at(i), current); 1151 current = compiler->Compile(children->at(i), current, on_failure);
901 } 1152 }
902 return current; 1153 return current;
903 } 1154 }
904 1155
905 1156
906 static const int kSpaceRangeCount = 20; 1157 static const int kSpaceRangeCount = 20;
907 static const uc16 kSpaceRanges[kSpaceRangeCount] = { 1158 static const uc16 kSpaceRanges[kSpaceRangeCount] = {
908 0x0009, 0x0009, 0x000B, 0x000C, 0x0020, 0x0020, 0x00A0, 0x00A0, 1159 0x0009, 0x0009, 0x000B, 0x000C, 0x0020, 0x0020, 0x00A0, 0x00A0,
909 0x1680, 0x1680, 0x180E, 0x180E, 0x2000, 0x200A, 0x202F, 0x202F, 1160 0x1680, 0x1680, 0x180E, 0x180E, 0x2000, 0x200A, 0x202F, 0x202F,
910 0x205F, 0x205F, 0x3000, 0x3000 1161 0x205F, 0x205F, 0x3000, 0x3000
(...skipping 194 matching lines...) Expand 10 before | Expand all | Expand 10 after
1105 if (!tree()->FindGreatestLessThan(value, &loc)) 1356 if (!tree()->FindGreatestLessThan(value, &loc))
1106 return OutSet::empty(); 1357 return OutSet::empty();
1107 Entry* entry = &loc.value(); 1358 Entry* entry = &loc.value();
1108 if (value <= entry->to()) 1359 if (value <= entry->to())
1109 return entry->out_set(); 1360 return entry->out_set();
1110 else 1361 else
1111 return OutSet::empty(); 1362 return OutSet::empty();
1112 } 1363 }
1113 1364
1114 1365
1115 // ------------------------------------------------------------------- 1366 void RegExpEngine::DotPrint(const char* label, RegExpNode* node) {
1116 // Execution 1367 DotPrinter printer;
1117 1368 printer.PrintNode(label, node);
1118
1119 template <typename Char>
1120 class ExecutionState {
1121 public:
1122 ExecutionState(RegExpNode<Char>* start, Vector<Char> input)
1123 : current_(start),
1124 input_(input),
1125 pos_(0),
1126 backtrack_stack_(8),
1127 is_done_(false) { }
1128
1129 class BacktrackState {
1130 public:
1131 BacktrackState(ChoiceNode<Char>* choice, int next, int pos)
1132 : choice_(choice),
1133 next_(next),
1134 pos_(pos) { }
1135 ChoiceNode<Char>* choice() { return choice_; }
1136 int next() { return next_; }
1137 void set_next(int value) { next_ = value; }
1138 int pos() { return pos_; }
1139 private:
1140 ChoiceNode<Char>* choice_;
1141 int next_;
1142 int pos_;
1143 };
1144
1145 // Execute a single step, returning true if it succeeded
1146 inline bool Step() { return current()->Step(this); }
1147
1148 // Stores the given choice node and the execution state on the
1149 // backtrack stack.
1150 void SaveBacktrack(ChoiceNode<Char>* choice);
1151
1152 // Reverts to the next unused backtrack if there is one. Returns
1153 // false exactly if there was no backtrack to restore.
1154 bool Backtrack();
1155
1156 Char current_char() { return input()[pos()]; }
1157
1158 void Advance(int delta, RegExpNode<Char>* next) {
1159 pos_ += delta;
1160 current_ = next;
1161 }
1162
1163 bool AtEnd() { return pos_ >= input_.length(); }
1164
1165 bool is_done() { return is_done_; }
1166 void set_done() { is_done_ = true; }
1167
1168 List<BacktrackState>* backtrack_stack() { return &backtrack_stack_; }
1169 RegExpNode<Char>* current() { return current_; }
1170 void set_current(RegExpNode<Char>* value) { current_ = value; }
1171 Vector<Char> input() { return input_; }
1172 int pos() { return pos_; }
1173 private:
1174 RegExpNode<Char>* current_;
1175 Vector<Char> input_;
1176 int pos_;
1177 List<BacktrackState> backtrack_stack_;
1178 bool is_done_;
1179 };
1180
1181
1182 template <typename Char>
1183 void ExecutionState<Char>::SaveBacktrack(ChoiceNode<Char>* choice) {
1184 ASSERT(choice->choices()->length() > 1);
1185 if (FLAG_trace_regexps) {
1186 PrintF("Setting up backtrack on level %i for choice %p\n",
1187 backtrack_stack()->length(),
1188 choice);
1189 }
1190 backtrack_stack()->Add(BacktrackState(choice, 1, pos_));
1191 } 1369 }
1192 1370
1193 1371
1194 template <typename Char> 1372 RegExpNode* RegExpEngine::Compile(RegExpParseResult* input) {
1195 bool ExecutionState<Char>::Backtrack() { 1373 RegExpCompiler compiler(input->capture_count);
1196 if (backtrack_stack()->is_empty()) return false; 1374 RegExpNode* node = compiler.Compile(input->tree,
1197 BacktrackState& top = backtrack_stack()->at(backtrack_stack()->length() - 1); 1375 EndNode::GetAccept(),
1198 ZoneList<RegExpNode<Char>*>* choices = top.choice()->choices(); 1376 EndNode::GetBacktrack());
1199 int next_index = top.next(); 1377 return node;
1200 current_ = choices->at(next_index);
1201 pos_ = top.pos();
1202 if (FLAG_trace_regexps) {
1203 PrintF("Backtracking to %p[%i] on level %i\n",
1204 top.choice(),
1205 next_index,
1206 backtrack_stack()->length() - 1);
1207 }
1208 if (next_index == choices->length() - 1) {
1209 if (FLAG_trace_regexps)
1210 PrintF("Popping backtrack on level %i\n",
1211 backtrack_stack()->length() - 1);
1212 // If this was the last alternative we're done with this backtrack
1213 // state and can pop it off the stack.
1214 backtrack_stack()->RemoveLast();
1215 } else {
1216 if (FLAG_trace_regexps)
1217 PrintF("Advancing backtrack on level %i\n",
1218 backtrack_stack()->length() - 1);
1219 // Otherwise we set the next choice to visit if this one fails.
1220 top.set_next(next_index + 1);
1221 }
1222 return true;
1223 } 1378 }
1224 1379
1225 1380
1226 template <typename Char>
1227 bool ChoiceNode<Char>::Step(ExecutionState<Char>* state) {
1228 state->SaveBacktrack(this);
1229 state->set_current(this->choices()->at(0));
1230 return true;
1231 }
1232
1233
1234 template <typename Char>
1235 bool AtomNode<Char>::Step(ExecutionState<Char>* state) {
1236 Vector<const uc16> data = this->data();
1237 int length = data.length();
1238 Vector<Char> input = state->input();
1239 int p = state->pos();
1240 if (p + length > input.length())
1241 return false;
1242 for (int i = 0; i < length; i++, p++) {
1243 if (data[i] != input[p])
1244 return false;
1245 }
1246 state->Advance(length, this->next());
1247 return true;
1248 }
1249
1250
1251 template <typename Char>
1252 bool CharacterClassNode<Char>::Step(ExecutionState<Char>* state) {
1253 if (state->AtEnd()) return false;
1254 ZoneList<CharacterRange>* ranges = this->ranges();
1255 unsigned current = state->current_char();
1256 for (int i = 0; i < ranges->length(); i++) {
1257 CharacterRange& range = ranges->at(i);
1258 if (range.from() <= current && current <= range.to()) {
1259 state->Advance(1, this->next());
1260 return true;
1261 }
1262 }
1263 return false;
1264 }
1265
1266
1267 template <typename Char>
1268 bool EndNode<Char>::Step(ExecutionState<Char>* state) {
1269 state->set_done();
1270 return false;
1271 }
1272
1273
1274 template <typename Char>
1275 bool RegExpEngine::Execute(RegExpNode<Char>* start, Vector<Char> input) {
1276 ExecutionState<Char> state(start, input);
1277 if (FLAG_trace_regexps) {
1278 PrintF("Beginning regexp execution\n");
1279 }
1280 while (state.Step() || (!state.is_done() && state.Backtrack()))
1281 ;
1282 if (FLAG_trace_regexps) {
1283 PrintF("Matching %s\n", state.is_done() ? "succeeded" : "failed");
1284 }
1285 return state.is_done();
1286 }
1287
1288
1289 template <typename Char>
1290 RegExpNode<Char>* RegExpEngine::Compile(RegExpTree* regexp) {
1291 RegExpNode<Char>* end = new EndNode<Char>();
1292 RegExpCompiler<Char> compiler;
1293 return compiler.Compile(regexp, end);
1294 }
1295
1296
1297 template
1298 RegExpNode<const char>* RegExpEngine::Compile<const char>(RegExpTree* regexp);
1299
1300 template
1301 RegExpNode<const uc16>* RegExpEngine::Compile<const uc16>(RegExpTree* regexp);
1302
1303 template
1304 bool RegExpEngine::Execute<const char>(RegExpNode<const char>* start,
1305 Vector<const char> input);
1306
1307 template
1308 bool RegExpEngine::Execute<const uc16>(RegExpNode<const uc16>* start,
1309 Vector<const uc16> input);
1310
1311
1312 }} // namespace v8::internal 1381 }} // namespace v8::internal
OLDNEW
« src/jsregexp.h ('K') | « src/jsregexp.h ('k') | src/parser.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698