| OLD | NEW |
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 618 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 629 ByteArray* RegExpImpl::JsreInternal(Handle<JSRegExp> re) { | 629 ByteArray* RegExpImpl::JsreInternal(Handle<JSRegExp> re) { |
| 630 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex)); | 630 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex)); |
| 631 return ByteArray::cast(value->get(INTERNAL_INDEX)); | 631 return ByteArray::cast(value->get(INTERNAL_INDEX)); |
| 632 } | 632 } |
| 633 | 633 |
| 634 | 634 |
| 635 // ------------------------------------------------------------------- | 635 // ------------------------------------------------------------------- |
| 636 // New regular expression engine | 636 // New regular expression engine |
| 637 | 637 |
| 638 | 638 |
| 639 class DotPrinter; | |
| 640 | |
| 641 | |
| 642 class RegExpCompiler { | 639 class RegExpCompiler { |
| 643 public: | 640 public: |
| 644 explicit RegExpCompiler(int capture_count) | 641 explicit RegExpCompiler(int capture_count); |
| 645 : next_register_(2 * capture_count), | |
| 646 work_list_(NULL) { } | |
| 647 | 642 |
| 648 int AllocateRegister() { return next_register_++; } | 643 int AllocateRegister() { return next_register_++; } |
| 649 | 644 |
| 650 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler, | 645 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler, |
| 651 RegExpNode* start); | 646 RegExpNode* start); |
| 652 | 647 |
| 653 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } | 648 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } |
| 654 | 649 |
| 655 static const int kImplementationOffset = 0; | 650 static const int kImplementationOffset = 0; |
| 656 static const int kNumberOfRegistersOffset = 0; | 651 static const int kNumberOfRegistersOffset = 0; |
| 657 static const int kCodeOffset = 1; | 652 static const int kCodeOffset = 1; |
| 658 | 653 |
| 659 RegExpMacroAssembler* macro_assembler() { | 654 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } |
| 660 return macro_assembler_; | 655 EndNode* accept() { return accept_; } |
| 661 } | 656 EndNode* backtrack() { return backtrack_; } |
| 657 |
| 662 private: | 658 private: |
| 659 EndNode* accept_; |
| 660 EndNode* backtrack_; |
| 663 int next_register_; | 661 int next_register_; |
| 664 List<RegExpNode*>* work_list_; | 662 List<RegExpNode*>* work_list_; |
| 665 RegExpMacroAssembler* macro_assembler_; | 663 RegExpMacroAssembler* macro_assembler_; |
| 666 }; | 664 }; |
| 667 | 665 |
| 668 | 666 |
| 667 RegExpCompiler::RegExpCompiler(int capture_count) |
| 668 : next_register_(2 * capture_count), |
| 669 work_list_(NULL) { |
| 670 accept_ = new EndNode(EndNode::ACCEPT); |
| 671 backtrack_ = new EndNode(EndNode::BACKTRACK); |
| 672 } |
| 673 |
| 674 |
| 669 Handle<FixedArray> RegExpCompiler::Assemble( | 675 Handle<FixedArray> RegExpCompiler::Assemble( |
| 670 RegExpMacroAssembler* macro_assembler, | 676 RegExpMacroAssembler* macro_assembler, |
| 671 RegExpNode* start) { | 677 RegExpNode* start) { |
| 672 macro_assembler_ = macro_assembler; | 678 macro_assembler_ = macro_assembler; |
| 673 List <RegExpNode*> work_list(0); | 679 List <RegExpNode*> work_list(0); |
| 674 work_list_ = &work_list; | 680 work_list_ = &work_list; |
| 675 start->GoTo(this); | 681 start->GoTo(this); |
| 676 while (!work_list.is_empty()) { | 682 while (!work_list.is_empty()) { |
| 677 work_list.RemoveLast()->Emit(this); | 683 work_list.RemoveLast()->Emit(this); |
| 678 } | 684 } |
| (...skipping 17 matching lines...) Expand all Loading... |
| 696 Emit(compiler); | 702 Emit(compiler); |
| 697 } | 703 } |
| 698 } | 704 } |
| 699 | 705 |
| 700 | 706 |
| 701 void RegExpNode::EmitAddress(RegExpCompiler* compiler) { | 707 void RegExpNode::EmitAddress(RegExpCompiler* compiler) { |
| 702 compiler->macro_assembler()->EmitOrLink(&label); | 708 compiler->macro_assembler()->EmitOrLink(&label); |
| 703 } | 709 } |
| 704 | 710 |
| 705 | 711 |
| 706 EndNode EndNode::kAccept(ACCEPT); | |
| 707 EndNode EndNode::kBacktrack(BACKTRACK); | |
| 708 | |
| 709 | |
| 710 void GuardedAlternative::AddGuard(Guard* guard) { | 712 void GuardedAlternative::AddGuard(Guard* guard) { |
| 711 if (guards_ == NULL) | 713 if (guards_ == NULL) |
| 712 guards_ = new ZoneList<Guard*>(1); | 714 guards_ = new ZoneList<Guard*>(1); |
| 713 guards_->Add(guard); | 715 guards_->Add(guard); |
| 714 } | 716 } |
| 715 | 717 |
| 716 | 718 |
| 717 ActionNode* ActionNode::StoreRegister(int reg, | 719 ActionNode* ActionNode::StoreRegister(int reg, |
| 718 int val, | 720 int val, |
| 719 RegExpNode* on_success) { | 721 RegExpNode* on_success) { |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 753 ActionNode* ActionNode::EscapeSubmatch(RegExpNode* on_success) { | 755 ActionNode* ActionNode::EscapeSubmatch(RegExpNode* on_success) { |
| 754 return new ActionNode(ESCAPE_SUBMATCH, on_success); | 756 return new ActionNode(ESCAPE_SUBMATCH, on_success); |
| 755 } | 757 } |
| 756 | 758 |
| 757 | 759 |
| 758 ActionNode* ActionNode::EndSubmatch(RegExpNode* on_success) { | 760 ActionNode* ActionNode::EndSubmatch(RegExpNode* on_success) { |
| 759 return new ActionNode(END_SUBMATCH, on_success); | 761 return new ActionNode(END_SUBMATCH, on_success); |
| 760 } | 762 } |
| 761 | 763 |
| 762 | 764 |
| 763 class NodeVisitor { | |
| 764 public: | |
| 765 virtual ~NodeVisitor() { } | |
| 766 #define DECLARE_VISIT(Type) \ | |
| 767 virtual void Visit##Type(Type##Node* that) = 0; | |
| 768 FOR_EACH_NODE_TYPE(DECLARE_VISIT) | |
| 769 #undef DECLARE_VISIT | |
| 770 }; | |
| 771 | |
| 772 | |
| 773 #define DEFINE_ACCEPT(Type) \ | 765 #define DEFINE_ACCEPT(Type) \ |
| 774 void Type##Node::Accept(NodeVisitor* visitor) { \ | 766 void Type##Node::Accept(NodeVisitor* visitor) { \ |
| 775 visitor->Visit##Type(this); \ | 767 visitor->Visit##Type(this); \ |
| 776 } | 768 } |
| 777 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT) | 769 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT) |
| 778 #undef DEFINE_ACCEPT | 770 #undef DEFINE_ACCEPT |
| 779 | 771 |
| 780 | 772 |
| 781 // ------------------------------------------------------------------- | 773 // ------------------------------------------------------------------- |
| 782 // Emit code. | 774 // Emit code. |
| (...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 853 stream()->Add("\\\\"); | 845 stream()->Add("\\\\"); |
| 854 break; | 846 break; |
| 855 case '"': | 847 case '"': |
| 856 stream()->Add("\""); | 848 stream()->Add("\""); |
| 857 break; | 849 break; |
| 858 default: | 850 default: |
| 859 stream()->Put(label[i]); | 851 stream()->Put(label[i]); |
| 860 break; | 852 break; |
| 861 } | 853 } |
| 862 } | 854 } |
| 863 stream()->Add("\"]; \n"); | 855 stream()->Add("\"];\n"); |
| 864 Visit(node); | 856 Visit(node); |
| 865 stream()->Add("}\n"); | 857 stream()->Add("}\n"); |
| 866 printf("%s", *(stream()->ToCString())); | 858 printf("%s", *(stream()->ToCString())); |
| 867 } | 859 } |
| 868 | 860 |
| 869 | 861 |
| 870 void DotPrinter::Visit(RegExpNode* node) { | 862 void DotPrinter::Visit(RegExpNode* node) { |
| 871 if (seen_.find(node) != seen_.end()) | 863 if (seen_.find(node) != seen_.end()) |
| 872 return; | 864 return; |
| 873 seen_.insert(node); | 865 seen_.insert(node); |
| 874 node->Accept(this); | 866 node->Accept(this); |
| 875 } | 867 } |
| 876 | 868 |
| 877 | 869 |
| 878 void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) { | 870 void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) { |
| 879 if (on_failure == EndNode::GetBacktrack()) return; | 871 if (on_failure->IsBacktrack()) return; |
| 880 stream()->Add(" n%p -> n%p [style=dotted];\n", from, on_failure); | 872 stream()->Add(" n%p -> n%p [style=dotted];\n", from, on_failure); |
| 881 Visit(on_failure); | 873 Visit(on_failure); |
| 882 } | 874 } |
| 883 | 875 |
| 884 | 876 |
| 877 class TableEntryBodyPrinter { |
| 878 public: |
| 879 TableEntryBodyPrinter(StringStream* stream, ChoiceNode* choice) |
| 880 : stream_(stream), choice_(choice) { } |
| 881 void Call(uc16 from, DispatchTable::Entry entry) { |
| 882 OutSet* out_set = entry.out_set(); |
| 883 for (unsigned i = 0; i < OutSet::kFirstLimit; i++) { |
| 884 if (out_set->Get(i)) { |
| 885 stream()->Add(" n%p:s%io%i -> n%p;\n", |
| 886 choice(), |
| 887 from, |
| 888 i, |
| 889 choice()->alternatives()->at(i).node()); |
| 890 } |
| 891 } |
| 892 } |
| 893 private: |
| 894 StringStream* stream() { return stream_; } |
| 895 ChoiceNode* choice() { return choice_; } |
| 896 StringStream* stream_; |
| 897 ChoiceNode* choice_; |
| 898 }; |
| 899 |
| 900 |
| 901 class TableEntryHeaderPrinter { |
| 902 public: |
| 903 TableEntryHeaderPrinter(StringStream* stream) |
| 904 : first_(true), stream_(stream) { } |
| 905 void Call(uc16 from, DispatchTable::Entry entry) { |
| 906 if (first_) { |
| 907 first_ = false; |
| 908 } else { |
| 909 stream()->Add("|"); |
| 910 } |
| 911 stream()->Add("{%k-%k|{", from, from, entry.to()); |
| 912 OutSet* out_set = entry.out_set(); |
| 913 int priority = 0; |
| 914 for (unsigned i = 0; i < OutSet::kFirstLimit; i++) { |
| 915 if (out_set->Get(i)) { |
| 916 if (priority > 0) stream()->Add("|"); |
| 917 stream()->Add("<s%io%i> %i", from, i, priority); |
| 918 priority++; |
| 919 } |
| 920 } |
| 921 stream()->Add("}}"); |
| 922 } |
| 923 private: |
| 924 bool first_; |
| 925 StringStream* stream() { return stream_; } |
| 926 StringStream* stream_; |
| 927 }; |
| 928 |
| 929 |
| 885 void DotPrinter::VisitChoice(ChoiceNode* that) { | 930 void DotPrinter::VisitChoice(ChoiceNode* that) { |
| 886 stream()->Add(" n%p [label=\"? (%p)\"];\n", that, that); | 931 stream()->Add(" n%p [shape=Mrecord, label=\"", that); |
| 932 TableEntryHeaderPrinter header_printer(stream()); |
| 933 that->table()->ForEach(&header_printer); |
| 934 stream()->Add("\"]\n", that); |
| 935 TableEntryBodyPrinter body_printer(stream(), that); |
| 936 that->table()->ForEach(&body_printer); |
| 887 PrintOnFailure(that, that->on_failure()); | 937 PrintOnFailure(that, that->on_failure()); |
| 888 for (int i = 0; i < that->choices()->length(); i++) { | 938 for (int i = 0; i < that->alternatives()->length(); i++) { |
| 889 GuardedAlternative alt = that->choices()->at(i); | 939 GuardedAlternative alt = that->alternatives()->at(i); |
| 890 stream()->Add(" n%p -> n%p [label=\"%i", | 940 alt.node()->Accept(this); |
| 891 that, | |
| 892 alt.node(), | |
| 893 i); | |
| 894 if (alt.guards() != NULL) { | |
| 895 stream()->Add(" ["); | |
| 896 for (int j = 0; j < alt.guards()->length(); j++) { | |
| 897 if (j > 0) stream()->Add(" "); | |
| 898 Guard* guard = alt.guards()->at(j); | |
| 899 switch (guard->op()) { | |
| 900 case Guard::GEQ: | |
| 901 stream()->Add("$%i ≥ %i", guard->reg(), guard->value()); | |
| 902 break; | |
| 903 case Guard::LT: | |
| 904 stream()->Add("$%i < %i", guard->reg(), guard->value()); | |
| 905 break; | |
| 906 } | |
| 907 } | |
| 908 stream()->Add("]"); | |
| 909 } | |
| 910 stream()->Add("\"];\n"); | |
| 911 that->choices()->at(i).node()->Accept(this); | |
| 912 } | 941 } |
| 913 OS::PrintError("--- %p ---\n", static_cast<void*>(that)); | |
| 914 that->table()->Dump(); | |
| 915 } | 942 } |
| 916 | 943 |
| 917 | 944 |
| 918 void DotPrinter::VisitAtom(AtomNode* that) { | 945 void DotPrinter::VisitAtom(AtomNode* that) { |
| 919 stream()->Add(" n%p [label=\"'%w'\", shape=doubleoctagon];\n", | 946 stream()->Add(" n%p [label=\"'%w'\", shape=doubleoctagon];\n", |
| 920 that, | 947 that, |
| 921 that->data()); | 948 that->data()); |
| 922 stream()->Add(" n%p -> n%p;\n", that, that->on_success()); | 949 stream()->Add(" n%p -> n%p;\n", that, that->on_success()); |
| 923 Visit(that->on_success()); | 950 Visit(that->on_success()); |
| 924 PrintOnFailure(that, that->on_failure()); | 951 PrintOnFailure(that, that->on_failure()); |
| (...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1016 stream()->Add("%i", i); | 1043 stream()->Add("%i", i); |
| 1017 } | 1044 } |
| 1018 } | 1045 } |
| 1019 stream()->Add("}\n"); | 1046 stream()->Add("}\n"); |
| 1020 } | 1047 } |
| 1021 | 1048 |
| 1022 | 1049 |
| 1023 void DispatchTable::Dump() { | 1050 void DispatchTable::Dump() { |
| 1024 HeapStringAllocator alloc; | 1051 HeapStringAllocator alloc; |
| 1025 StringStream stream(&alloc); | 1052 StringStream stream(&alloc); |
| 1026 tree()->ForEach(DispatchTableDumper(&stream)); | 1053 DispatchTableDumper dumper(&stream); |
| 1054 tree()->ForEach(&dumper); |
| 1027 OS::PrintError("%s", *stream.ToCString()); | 1055 OS::PrintError("%s", *stream.ToCString()); |
| 1028 } | 1056 } |
| 1029 | 1057 |
| 1030 | 1058 |
| 1031 void RegExpEngine::DotPrint(const char* label, RegExpNode* node) { | 1059 void RegExpEngine::DotPrint(const char* label, RegExpNode* node) { |
| 1032 DotPrinter printer; | 1060 DotPrinter printer; |
| 1033 printer.PrintNode(label, node); | 1061 printer.PrintNode(label, node); |
| 1034 } | 1062 } |
| 1035 | 1063 |
| 1036 | 1064 |
| (...skipping 17 matching lines...) Expand all Loading... |
| 1054 return new CharacterClassNode(ranges(), | 1082 return new CharacterClassNode(ranges(), |
| 1055 is_negated(), | 1083 is_negated(), |
| 1056 on_success, | 1084 on_success, |
| 1057 on_failure); | 1085 on_failure); |
| 1058 } | 1086 } |
| 1059 | 1087 |
| 1060 | 1088 |
| 1061 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, | 1089 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, |
| 1062 RegExpNode* on_success, | 1090 RegExpNode* on_success, |
| 1063 RegExpNode* on_failure) { | 1091 RegExpNode* on_failure) { |
| 1064 ZoneList<RegExpTree*>* children = nodes(); | 1092 ZoneList<RegExpTree*>* alternatives = this->alternatives(); |
| 1065 int length = children->length(); | 1093 int length = alternatives->length(); |
| 1066 ChoiceNode* result = new ChoiceNode(length, on_failure); | 1094 ChoiceNode* result = new ChoiceNode(length, on_failure); |
| 1067 for (int i = 0; i < length; i++) { | 1095 for (int i = 0; i < length; i++) { |
| 1068 GuardedAlternative child(children->at(i)->ToNode(compiler, | 1096 GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler, |
| 1069 on_success, | 1097 on_success, |
| 1070 on_failure)); | 1098 on_failure)); |
| 1071 result->AddChild(child); | 1099 result->AddAlternative(alternative); |
| 1072 } | 1100 } |
| 1073 return result; | 1101 return result; |
| 1074 } | 1102 } |
| 1075 | 1103 |
| 1076 | 1104 |
| 1077 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler, | 1105 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler, |
| 1078 RegExpNode* on_success, | 1106 RegExpNode* on_success, |
| 1079 RegExpNode* on_failure) { | 1107 RegExpNode* on_failure) { |
| 1080 return ToNode(min(), | 1108 return ToNode(min(), |
| 1081 max(), | 1109 max(), |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1120 if (has_max) { | 1148 if (has_max) { |
| 1121 Guard* body_guard = new Guard(reg_ctr, Guard::LT, max); | 1149 Guard* body_guard = new Guard(reg_ctr, Guard::LT, max); |
| 1122 body_alt.AddGuard(body_guard); | 1150 body_alt.AddGuard(body_guard); |
| 1123 } | 1151 } |
| 1124 GuardedAlternative rest_alt(on_success); | 1152 GuardedAlternative rest_alt(on_success); |
| 1125 if (has_min) { | 1153 if (has_min) { |
| 1126 Guard* rest_guard = new Guard(reg_ctr, Guard::GEQ, min); | 1154 Guard* rest_guard = new Guard(reg_ctr, Guard::GEQ, min); |
| 1127 rest_alt.AddGuard(rest_guard); | 1155 rest_alt.AddGuard(rest_guard); |
| 1128 } | 1156 } |
| 1129 if (is_greedy) { | 1157 if (is_greedy) { |
| 1130 center->AddChild(body_alt); | 1158 center->AddAlternative(body_alt); |
| 1131 center->AddChild(rest_alt); | 1159 center->AddAlternative(rest_alt); |
| 1132 } else { | 1160 } else { |
| 1133 center->AddChild(rest_alt); | 1161 center->AddAlternative(rest_alt); |
| 1134 center->AddChild(body_alt); | 1162 center->AddAlternative(body_alt); |
| 1135 } | 1163 } |
| 1136 if (needs_counter) { | 1164 if (needs_counter) { |
| 1137 return ActionNode::StoreRegister(reg_ctr, 0, center); | 1165 return ActionNode::StoreRegister(reg_ctr, 0, center); |
| 1138 } else { | 1166 } else { |
| 1139 return center; | 1167 return center; |
| 1140 } | 1168 } |
| 1141 } | 1169 } |
| 1142 | 1170 |
| 1143 | 1171 |
| 1144 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler, | 1172 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler, |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1195 // fail | 1223 // fail |
| 1196 // else | 1224 // else |
| 1197 // backtrack | 1225 // backtrack |
| 1198 // second | 1226 // second |
| 1199 // end submatch scope | 1227 // end submatch scope |
| 1200 // succeed | 1228 // succeed |
| 1201 ChoiceNode* try_node = | 1229 ChoiceNode* try_node = |
| 1202 new ChoiceNode(1, ActionNode::EndSubmatch(on_success)); | 1230 new ChoiceNode(1, ActionNode::EndSubmatch(on_success)); |
| 1203 RegExpNode* body_node = body()->ToNode(compiler, | 1231 RegExpNode* body_node = body()->ToNode(compiler, |
| 1204 ActionNode::EscapeSubmatch(on_failure), | 1232 ActionNode::EscapeSubmatch(on_failure), |
| 1205 EndNode::GetBacktrack()); | 1233 compiler->backtrack()); |
| 1206 GuardedAlternative body_alt(body_node); | 1234 GuardedAlternative body_alt(body_node); |
| 1207 try_node->AddChild(body_alt); | 1235 try_node->AddAlternative(body_alt); |
| 1208 return ActionNode::BeginSubmatch(try_node); | 1236 return ActionNode::BeginSubmatch(try_node); |
| 1209 } | 1237 } |
| 1210 } | 1238 } |
| 1211 | 1239 |
| 1212 | 1240 |
| 1213 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, | 1241 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, |
| 1214 RegExpNode* on_success, | 1242 RegExpNode* on_success, |
| 1215 RegExpNode* on_failure) { | 1243 RegExpNode* on_failure) { |
| 1216 return ToNode(body(), index(), compiler, on_success, on_failure); | 1244 return ToNode(body(), index(), compiler, on_success, on_failure); |
| 1217 } | 1245 } |
| (...skipping 246 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1464 return entry->out_set(); | 1492 return entry->out_set(); |
| 1465 else | 1493 else |
| 1466 return empty(); | 1494 return empty(); |
| 1467 } | 1495 } |
| 1468 | 1496 |
| 1469 | 1497 |
| 1470 // ------------------------------------------------------------------- | 1498 // ------------------------------------------------------------------- |
| 1471 // Analysis | 1499 // Analysis |
| 1472 | 1500 |
| 1473 | 1501 |
| 1474 class Analysis: public NodeVisitor { | 1502 void Analysis::EnsureAnalyzed(RegExpNode* that) { |
| 1475 public: | 1503 if (that->info()->been_analyzed || that->info()->being_analyzed) |
| 1476 explicit Analysis(RegExpCompiler* compiler) | 1504 return; |
| 1477 : compiler_(compiler), | 1505 that->info()->being_analyzed = true; |
| 1478 table_(NULL), | 1506 that->Accept(this); |
| 1479 choice_index_(-1) { } | 1507 that->info()->being_analyzed = false; |
| 1480 DispatchTable* table() { return table_; } | 1508 that->info()->been_analyzed = true; |
| 1481 RegExpCompiler* compiler() { return compiler_; } | 1509 } |
| 1482 int choice_index() { return choice_index_; } | |
| 1483 void Analyze(RegExpNode* node) { node->Accept(this); } | |
| 1484 #define DECLARE_VISIT(Type) \ | |
| 1485 virtual void Visit##Type(Type##Node* that); | |
| 1486 FOR_EACH_NODE_TYPE(DECLARE_VISIT) | |
| 1487 #undef DECLARE_VISIT | |
| 1488 protected: | |
| 1489 explicit Analysis(Analysis* prev) { *this = *prev; } | |
| 1490 RegExpCompiler* compiler_; | |
| 1491 DispatchTable *table_; | |
| 1492 int choice_index_; | |
| 1493 }; | |
| 1494 | |
| 1495 | |
| 1496 // A subclass of analysis data that allows fields to be set. Anyone | |
| 1497 // who needs to create new analysis data creates an instance of this | |
| 1498 // class, configures it, and then passes it on as an AnalysisData | |
| 1499 // which doesn't allow its fields to be changed. | |
| 1500 class AnalysisBuilder: public Analysis { | |
| 1501 public: | |
| 1502 explicit AnalysisBuilder(Analysis* prev) : Analysis(prev) { } | |
| 1503 void set_table(DispatchTable* value) { table_ = value; } | |
| 1504 void set_choice_index(int value) { choice_index_ = value; } | |
| 1505 }; | |
| 1506 | 1510 |
| 1507 | 1511 |
| 1508 void Analysis::VisitEnd(EndNode* that) { | 1512 void Analysis::VisitEnd(EndNode* that) { |
| 1509 // nothing to do | 1513 // nothing to do |
| 1510 } | 1514 } |
| 1511 | 1515 |
| 1512 | 1516 |
| 1517 void Analysis::VisitAtom(AtomNode* that) { |
| 1518 EnsureAnalyzed(that->on_success()); |
| 1519 EnsureAnalyzed(that->on_failure()); |
| 1520 } |
| 1521 |
| 1522 |
| 1523 void Analysis::VisitAction(ActionNode* that) { |
| 1524 RegExpNode* next = that->on_success(); |
| 1525 EnsureAnalyzed(next); |
| 1526 that->info()->propagate_line = next->info()->propagate_line; |
| 1527 that->info()->propagate_word = next->info()->propagate_word; |
| 1528 } |
| 1529 |
| 1530 |
| 1531 void Analysis::VisitChoice(ChoiceNode* that) { |
| 1532 NodeInfo* info = that->info(); |
| 1533 for (int i = 0; i < that->alternatives()->length(); i++) { |
| 1534 RegExpNode* node = that->alternatives()->at(i).node(); |
| 1535 EnsureAnalyzed(node); |
| 1536 info->propagate_line |= node->info()->propagate_line; |
| 1537 info->propagate_word |= node->info()->propagate_word; |
| 1538 } |
| 1539 DispatchTableConstructor cons(that->table()); |
| 1540 cons.BuildTable(that); |
| 1541 EnsureAnalyzed(that->on_failure()); |
| 1542 } |
| 1543 |
| 1544 |
| 1545 void Analysis::VisitBackreference(BackreferenceNode* that) { |
| 1546 EnsureAnalyzed(that->on_success()); |
| 1547 EnsureAnalyzed(that->on_failure()); |
| 1548 } |
| 1549 |
| 1550 |
| 1551 void Analysis::VisitCharacterClass(CharacterClassNode* that) { |
| 1552 EnsureAnalyzed(that->on_success()); |
| 1553 EnsureAnalyzed(that->on_failure()); |
| 1554 } |
| 1555 |
| 1556 |
| 1557 // ------------------------------------------------------------------- |
| 1558 // Dispatch table construction |
| 1559 |
| 1560 |
| 1561 void DispatchTableConstructor::VisitEnd(EndNode* that) { |
| 1562 // nothing to do |
| 1563 } |
| 1564 |
| 1565 |
| 1566 void DispatchTableConstructor::BuildTable(ChoiceNode* node) { |
| 1567 ASSERT(!node->table_calculated()); |
| 1568 node->set_being_calculated(true); |
| 1569 ZoneList<GuardedAlternative>* alternatives = node->alternatives(); |
| 1570 for (int i = 0; i < alternatives->length(); i++) { |
| 1571 set_choice_index(i); |
| 1572 alternatives->at(i).node()->Accept(this); |
| 1573 } |
| 1574 node->set_being_calculated(false); |
| 1575 node->set_table_calculated(true); |
| 1576 } |
| 1577 |
| 1578 |
| 1513 class AddDispatchRange { | 1579 class AddDispatchRange { |
| 1514 public: | 1580 public: |
| 1515 explicit AddDispatchRange(Analysis* analysis) : analysis_(analysis) { } | 1581 explicit AddDispatchRange(DispatchTableConstructor* constructor) |
| 1582 : constructor_(constructor) { } |
| 1516 void Call(uc32 from, DispatchTable::Entry entry); | 1583 void Call(uc32 from, DispatchTable::Entry entry); |
| 1517 private: | 1584 private: |
| 1518 Analysis* analysis_; | 1585 DispatchTableConstructor* constructor_; |
| 1519 }; | 1586 }; |
| 1520 | 1587 |
| 1521 | 1588 |
| 1522 void AddDispatchRange::Call(uc32 from, DispatchTable::Entry entry) { | 1589 void AddDispatchRange::Call(uc32 from, DispatchTable::Entry entry) { |
| 1523 CharacterRange range(from, entry.to()); | 1590 CharacterRange range(from, entry.to()); |
| 1524 analysis_->table()->AddRange(range, analysis_->choice_index()); | 1591 constructor_->AddRange(range); |
| 1525 } | 1592 } |
| 1526 | 1593 |
| 1527 | 1594 |
| 1528 void Analysis::VisitChoice(ChoiceNode* node) { | 1595 void DispatchTableConstructor::VisitChoice(ChoiceNode* node) { |
| 1529 if (node->visited()) return; | 1596 if (node->being_calculated()) |
| 1530 node->set_visited(true); | 1597 return; |
| 1531 ZoneList<GuardedAlternative>* choices = node->choices(); | 1598 if (!node->table_calculated()) { |
| 1532 AnalysisBuilder data(this); | 1599 DispatchTableConstructor constructor(node->table()); |
| 1533 data.set_table(node->table()); | 1600 constructor.BuildTable(node); |
| 1534 for (int i = 0; i < choices->length(); i++) { | |
| 1535 data.set_choice_index(i); | |
| 1536 data.Analyze(choices->at(i).node()); | |
| 1537 } | 1601 } |
| 1538 node->set_visited(false); | 1602 ASSERT(node->table_calculated()); |
| 1539 if (table() != NULL) { | 1603 AddDispatchRange adder(this); |
| 1540 data.table()->ForEach(AddDispatchRange(this)); | 1604 node->table()->ForEach(&adder); |
| 1541 } | |
| 1542 } | 1605 } |
| 1543 | 1606 |
| 1544 | 1607 |
| 1545 void Analysis::VisitBackreference(BackreferenceNode* that) { | 1608 void DispatchTableConstructor::VisitBackreference(BackreferenceNode* that) { |
| 1546 UNIMPLEMENTED(); | 1609 UNIMPLEMENTED(); |
| 1547 } | 1610 } |
| 1548 | 1611 |
| 1549 | 1612 |
| 1550 | 1613 |
| 1551 static int CompareRangeByFrom(const CharacterRange* a, | 1614 static int CompareRangeByFrom(const CharacterRange* a, |
| 1552 const CharacterRange* b) { | 1615 const CharacterRange* b) { |
| 1553 return Spaceship<uc16>(a->from(), b->from()); | 1616 return Spaceship<uc16>(a->from(), b->from()); |
| 1554 } | 1617 } |
| 1555 | 1618 |
| 1556 | 1619 |
| 1557 void CharacterClassNode::AddInverseToTable(ZoneList<CharacterRange>* ranges, | 1620 void DispatchTableConstructor::AddInverse(ZoneList<CharacterRange>* ranges) { |
| 1558 DispatchTable* table, | |
| 1559 int index) { | |
| 1560 ranges->Sort(CompareRangeByFrom); | 1621 ranges->Sort(CompareRangeByFrom); |
| 1561 uc16 last = 0; | 1622 uc16 last = 0; |
| 1562 for (int i = 0; i < ranges->length(); i++) { | 1623 for (int i = 0; i < ranges->length(); i++) { |
| 1563 CharacterRange range = ranges->at(i); | 1624 CharacterRange range = ranges->at(i); |
| 1564 if (last < range.from()) | 1625 if (last < range.from()) |
| 1565 table->AddRange(CharacterRange(last, range.from() - 1), index); | 1626 AddRange(CharacterRange(last, range.from() - 1)); |
| 1566 if (range.to() >= last) { | 1627 if (range.to() >= last) { |
| 1567 if (range.to() == 0xFFFF) { | 1628 if (range.to() == 0xFFFF) { |
| 1568 return; | 1629 return; |
| 1569 } else { | 1630 } else { |
| 1570 last = range.to() + 1; | 1631 last = range.to() + 1; |
| 1571 } | 1632 } |
| 1572 } | 1633 } |
| 1573 } | 1634 } |
| 1574 table->AddRange(CharacterRange(last, 0xFFFF), index); | 1635 AddRange(CharacterRange(last, 0xFFFF)); |
| 1575 } | 1636 } |
| 1576 | 1637 |
| 1577 | 1638 |
| 1578 void Analysis::VisitCharacterClass(CharacterClassNode* that) { | 1639 void DispatchTableConstructor::VisitCharacterClass(CharacterClassNode* that) { |
| 1579 if (table() != NULL) { | 1640 ZoneList<CharacterRange>* ranges = that->ranges(); |
| 1580 int index = choice_index(); | 1641 if (that->is_negated()) { |
| 1581 ZoneList<CharacterRange>* ranges = that->ranges(); | 1642 AddInverse(ranges); |
| 1582 if (that->is_negated()) { | 1643 } else { |
| 1583 CharacterClassNode::AddInverseToTable(ranges, table(), index); | 1644 for (int i = 0; i < ranges->length(); i++) { |
| 1584 } else { | 1645 AddRange(ranges->at(i)); |
| 1585 for (int i = 0; i < ranges->length(); i++) { | |
| 1586 CharacterRange range = ranges->at(i); | |
| 1587 table()->AddRange(range, index); | |
| 1588 } | |
| 1589 } | 1646 } |
| 1590 } | 1647 } |
| 1591 AnalysisBuilder outgoing(this); | |
| 1592 outgoing.set_table(NULL); | |
| 1593 outgoing.Analyze(that->on_success()); | |
| 1594 } | 1648 } |
| 1595 | 1649 |
| 1596 | 1650 |
| 1597 void Analysis::VisitAtom(AtomNode* that) { | 1651 void DispatchTableConstructor::VisitAtom(AtomNode* that) { |
| 1598 if (table() != NULL) { | 1652 uc16 c = that->data()[0]; |
| 1599 uc16 c = that->data()[0]; | 1653 AddRange(CharacterRange(c, c)); |
| 1600 table()->AddRange(CharacterRange(c, c), choice_index()); | |
| 1601 } | |
| 1602 AnalysisBuilder outgoing(this); | |
| 1603 outgoing.set_table(NULL); | |
| 1604 outgoing.Analyze(that->on_success()); | |
| 1605 } | 1654 } |
| 1606 | 1655 |
| 1607 | 1656 |
| 1608 void Analysis::VisitAction(ActionNode* that) { | 1657 void DispatchTableConstructor::VisitAction(ActionNode* that) { |
| 1609 Analyze(that->on_success()); | 1658 that->on_success()->Accept(this); |
| 1610 } | 1659 } |
| 1611 | 1660 |
| 1612 | 1661 |
| 1613 RegExpNode* RegExpEngine::Compile(RegExpParseResult* input) { | 1662 RegExpNode* RegExpEngine::Compile(RegExpParseResult* input) { |
| 1614 RegExpCompiler compiler(input->capture_count); | 1663 RegExpCompiler compiler(input->capture_count); |
| 1615 // Wrap the body of the regexp in capture #0. | 1664 // Wrap the body of the regexp in capture #0. |
| 1616 RegExpNode* captured_body = RegExpCapture::ToNode(input->tree, | 1665 RegExpNode* captured_body = RegExpCapture::ToNode(input->tree, |
| 1617 0, | 1666 0, |
| 1618 &compiler, | 1667 &compiler, |
| 1619 EndNode::GetAccept(), | 1668 compiler.accept(), |
| 1620 EndNode::GetBacktrack()); | 1669 compiler.backtrack()); |
| 1621 // Add a .*? at the beginning, outside the body capture. | 1670 // Add a .*? at the beginning, outside the body capture. |
| 1622 // Note: We could choose to not add this if the regexp is anchored at | 1671 // Note: We could choose to not add this if the regexp is anchored at |
| 1623 // the start of the input but I'm not sure how best to do that and | 1672 // the start of the input but I'm not sure how best to do that and |
| 1624 // since we don't even handle ^ yet I'm saving that optimization for | 1673 // since we don't even handle ^ yet I'm saving that optimization for |
| 1625 // later. | 1674 // later. |
| 1626 RegExpNode* node = RegExpQuantifier::ToNode(0, | 1675 RegExpNode* result = RegExpQuantifier::ToNode(0, |
| 1627 RegExpQuantifier::kInfinity, | 1676 RegExpQuantifier::kInfinity, |
| 1628 false, | 1677 false, |
| 1629 new RegExpCharacterClass('.'), | 1678 new RegExpCharacterClass('.'), |
| 1630 &compiler, | 1679 &compiler, |
| 1631 captured_body, | 1680 captured_body, |
| 1632 EndNode::GetBacktrack()); | 1681 compiler.backtrack()); |
| 1633 Analysis analysis(&compiler); | 1682 Analysis analysis; |
| 1634 analysis.Analyze(node); | 1683 analysis.EnsureAnalyzed(result); |
| 1635 return node; | 1684 return result; |
| 1636 } | 1685 } |
| 1637 | 1686 |
| 1638 | 1687 |
| 1639 RegExpMacroAssembler::~RegExpMacroAssembler() { | 1688 RegExpMacroAssembler::~RegExpMacroAssembler() { |
| 1640 } | 1689 } |
| 1641 | 1690 |
| 1642 | 1691 |
| 1643 }} // namespace v8::internal | 1692 }} // namespace v8::internal |
| OLD | NEW |