| OLD | NEW |
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 617 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 628 ByteArray* RegExpImpl::JsreInternal(Handle<JSRegExp> re) { | 628 ByteArray* RegExpImpl::JsreInternal(Handle<JSRegExp> re) { |
| 629 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex)); | 629 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex)); |
| 630 return ByteArray::cast(value->get(INTERNAL_INDEX)); | 630 return ByteArray::cast(value->get(INTERNAL_INDEX)); |
| 631 } | 631 } |
| 632 | 632 |
| 633 | 633 |
| 634 // ------------------------------------------------------------------- | 634 // ------------------------------------------------------------------- |
| 635 // New regular expression engine | 635 // New regular expression engine |
| 636 | 636 |
| 637 | 637 |
| 638 class RegExpCompiler; | |
| 639 class DotPrinter; | 638 class DotPrinter; |
| 640 | 639 |
| 641 | 640 |
| 642 class RegExpCompiler { | 641 class RegExpCompiler { |
| 643 public: | 642 public: |
| 644 explicit RegExpCompiler(int capture_count) | 643 explicit RegExpCompiler(int capture_count) |
| 645 : next_register_(2 * capture_count), | 644 : next_register_(2 * capture_count), |
| 646 work_list_(NULL) { } | 645 work_list_(NULL) { } |
| 647 | 646 |
| 648 RegExpNode* Compile(RegExpTree* tree, | |
| 649 RegExpNode* on_success, | |
| 650 RegExpNode* on_failure); | |
| 651 | |
| 652 int AllocateRegister() { return next_register_++; } | 647 int AllocateRegister() { return next_register_++; } |
| 653 | 648 |
| 654 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler, | 649 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler, |
| 655 RegExpNode* start); | 650 RegExpNode* start); |
| 656 | 651 |
| 657 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } | 652 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } |
| 658 | 653 |
| 659 static const int kImplementationOffset = 0; | 654 static const int kImplementationOffset = 0; |
| 660 static const int kNumberOfRegistersOffset = 0; | 655 static const int kNumberOfRegistersOffset = 0; |
| 661 static const int kCodeOffset = 1; | 656 static const int kCodeOffset = 1; |
| (...skipping 245 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 907 case Guard::LT: | 902 case Guard::LT: |
| 908 stream()->Add("$%i < %i", guard->reg(), guard->value()); | 903 stream()->Add("$%i < %i", guard->reg(), guard->value()); |
| 909 break; | 904 break; |
| 910 } | 905 } |
| 911 } | 906 } |
| 912 stream()->Add("]"); | 907 stream()->Add("]"); |
| 913 } | 908 } |
| 914 stream()->Add("\"];\n"); | 909 stream()->Add("\"];\n"); |
| 915 that->choices()->at(i).node()->Accept(this); | 910 that->choices()->at(i).node()->Accept(this); |
| 916 } | 911 } |
| 917 OS::PrintError("--- %p ---\n", static_cast<void*>(this)); | 912 OS::PrintError("--- %p ---\n", static_cast<void*>(that)); |
| 918 that->table()->Dump(); | 913 that->table()->Dump(); |
| 919 } | 914 } |
| 920 | 915 |
| 921 | 916 |
| 922 void DotPrinter::VisitAtom(AtomNode* that) { | 917 void DotPrinter::VisitAtom(AtomNode* that) { |
| 923 stream()->Add(" n%p [label=\"'%w'\", shape=doubleoctagon];\n", | 918 stream()->Add(" n%p [label=\"'%w'\", shape=doubleoctagon];\n", |
| 924 that, | 919 that, |
| 925 that->data()); | 920 that->data()); |
| 926 stream()->Add(" n%p -> n%p;\n", that, that->on_success()); | 921 stream()->Add(" n%p -> n%p;\n", that, that->on_success()); |
| 927 Visit(that->on_success()); | 922 Visit(that->on_success()); |
| (...skipping 11 matching lines...) Expand all Loading... |
| 939 PrintOnFailure(that, that->on_failure()); | 934 PrintOnFailure(that, that->on_failure()); |
| 940 } | 935 } |
| 941 | 936 |
| 942 | 937 |
| 943 void DotPrinter::VisitEnd(EndNode* that) { | 938 void DotPrinter::VisitEnd(EndNode* that) { |
| 944 stream()->Add(" n%p [style=bold, shape=point];\n", that); | 939 stream()->Add(" n%p [style=bold, shape=point];\n", that); |
| 945 } | 940 } |
| 946 | 941 |
| 947 | 942 |
| 948 void DotPrinter::VisitCharacterClass(CharacterClassNode* that) { | 943 void DotPrinter::VisitCharacterClass(CharacterClassNode* that) { |
| 949 stream()->Add(" n%p [label=\"[...]\"];\n", that); | 944 stream()->Add(" n%p [label=\"[", that); |
| 945 if (that->is_negated()) |
| 946 stream()->Add("^"); |
| 947 for (int i = 0; i < that->ranges()->length(); i++) { |
| 948 CharacterRange range = that->ranges()->at(i); |
| 949 stream()->Add("%k-%k", range.from(), range.to()); |
| 950 } |
| 951 stream()->Add("]\"];\n"); |
| 950 stream()->Add(" n%p -> n%p;\n", that, that->on_success()); | 952 stream()->Add(" n%p -> n%p;\n", that, that->on_success()); |
| 951 Visit(that->on_success()); | 953 Visit(that->on_success()); |
| 952 PrintOnFailure(that, that->on_failure()); | 954 PrintOnFailure(that, that->on_failure()); |
| 953 } | 955 } |
| 954 | 956 |
| 955 | 957 |
| 956 void DotPrinter::VisitAction(ActionNode* that) { | 958 void DotPrinter::VisitAction(ActionNode* that) { |
| 957 stream()->Add(" n%p [", that); | 959 stream()->Add(" n%p [", that); |
| 958 switch (that->type_) { | 960 switch (that->type_) { |
| 959 case ActionNode::STORE_REGISTER: | 961 case ActionNode::STORE_REGISTER: |
| (...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1055 } | 1057 } |
| 1056 | 1058 |
| 1057 | 1059 |
| 1058 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, | 1060 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, |
| 1059 RegExpNode* on_success, | 1061 RegExpNode* on_success, |
| 1060 RegExpNode* on_failure) { | 1062 RegExpNode* on_failure) { |
| 1061 ZoneList<RegExpTree*>* children = nodes(); | 1063 ZoneList<RegExpTree*>* children = nodes(); |
| 1062 int length = children->length(); | 1064 int length = children->length(); |
| 1063 ChoiceNode* result = new ChoiceNode(length, on_failure); | 1065 ChoiceNode* result = new ChoiceNode(length, on_failure); |
| 1064 for (int i = 0; i < length; i++) { | 1066 for (int i = 0; i < length; i++) { |
| 1065 GuardedAlternative child(compiler->Compile(children->at(i), | 1067 GuardedAlternative child(children->at(i)->ToNode(compiler, |
| 1066 on_success, | 1068 on_success, |
| 1067 on_failure)); | 1069 on_failure)); |
| 1068 result->AddChild(child); | 1070 result->AddChild(child); |
| 1069 } | 1071 } |
| 1070 return result; | 1072 return result; |
| 1071 } | 1073 } |
| 1072 | 1074 |
| 1073 | 1075 |
| 1074 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler, | 1076 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler, |
| 1075 RegExpNode* on_success, | 1077 RegExpNode* on_success, |
| 1076 RegExpNode* on_failure) { | 1078 RegExpNode* on_failure) { |
| 1079 return ToNode(min(), |
| 1080 max(), |
| 1081 is_greedy(), |
| 1082 body(), |
| 1083 compiler, |
| 1084 on_success, |
| 1085 on_failure); |
| 1086 } |
| 1087 |
| 1088 |
| 1089 RegExpNode* RegExpQuantifier::ToNode(int min, |
| 1090 int max, |
| 1091 bool is_greedy, |
| 1092 RegExpTree* body, |
| 1093 RegExpCompiler* compiler, |
| 1094 RegExpNode* on_success, |
| 1095 RegExpNode* on_failure) { |
| 1077 // x{f, t} becomes this: | 1096 // x{f, t} becomes this: |
| 1078 // | 1097 // |
| 1079 // (r++)<-. | 1098 // (r++)<-. |
| 1080 // | ` | 1099 // | ` |
| 1081 // | (x) | 1100 // | (x) |
| 1082 // v ^ | 1101 // v ^ |
| 1083 // (r=0)-->(?)---/ [if r < t] | 1102 // (r=0)-->(?)---/ [if r < t] |
| 1084 // | | 1103 // | |
| 1085 // [if r >= f] \----> ... | 1104 // [if r >= f] \----> ... |
| 1086 // | 1105 // |
| 1087 // | 1106 // |
| 1088 // TODO(someone): clear captures on repetition and handle empty | 1107 // TODO(someone): clear captures on repetition and handle empty |
| 1089 // matches. | 1108 // matches. |
| 1090 bool has_min = min() > 0; | 1109 bool has_min = min > 0; |
| 1091 bool has_max = max() < RegExpQuantifier::kInfinity; | 1110 bool has_max = max < RegExpQuantifier::kInfinity; |
| 1092 bool needs_counter = has_min || has_max; | 1111 bool needs_counter = has_min || has_max; |
| 1093 int reg_ctr = needs_counter ? compiler->AllocateRegister() : -1; | 1112 int reg_ctr = needs_counter ? compiler->AllocateRegister() : -1; |
| 1094 ChoiceNode* center = new ChoiceNode(2, on_failure); | 1113 ChoiceNode* center = new ChoiceNode(2, on_failure); |
| 1095 RegExpNode* loop_return = needs_counter | 1114 RegExpNode* loop_return = needs_counter |
| 1096 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center)) | 1115 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center)) |
| 1097 : static_cast<RegExpNode*>(center); | 1116 : static_cast<RegExpNode*>(center); |
| 1098 RegExpNode* body_node = compiler->Compile(body(), loop_return, on_failure); | 1117 RegExpNode* body_node = body->ToNode(compiler, loop_return, on_failure); |
| 1099 GuardedAlternative body_alt(body_node); | 1118 GuardedAlternative body_alt(body_node); |
| 1100 if (has_max) { | 1119 if (has_max) { |
| 1101 Guard* body_guard = new Guard(reg_ctr, Guard::LT, max()); | 1120 Guard* body_guard = new Guard(reg_ctr, Guard::LT, max); |
| 1102 body_alt.AddGuard(body_guard); | 1121 body_alt.AddGuard(body_guard); |
| 1103 } | 1122 } |
| 1104 GuardedAlternative rest_alt(on_success); | 1123 GuardedAlternative rest_alt(on_success); |
| 1105 if (has_min) { | 1124 if (has_min) { |
| 1106 Guard* rest_guard = new Guard(reg_ctr, Guard::GEQ, min()); | 1125 Guard* rest_guard = new Guard(reg_ctr, Guard::GEQ, min); |
| 1107 rest_alt.AddGuard(rest_guard); | 1126 rest_alt.AddGuard(rest_guard); |
| 1108 } | 1127 } |
| 1109 if (is_greedy()) { | 1128 if (is_greedy) { |
| 1110 center->AddChild(body_alt); | 1129 center->AddChild(body_alt); |
| 1111 center->AddChild(rest_alt); | 1130 center->AddChild(rest_alt); |
| 1112 } else { | 1131 } else { |
| 1113 center->AddChild(rest_alt); | 1132 center->AddChild(rest_alt); |
| 1114 center->AddChild(body_alt); | 1133 center->AddChild(body_alt); |
| 1115 } | 1134 } |
| 1116 if (needs_counter) { | 1135 if (needs_counter) { |
| 1117 return ActionNode::StoreRegister(reg_ctr, 0, center); | 1136 return ActionNode::StoreRegister(reg_ctr, 0, center); |
| 1118 } else { | 1137 } else { |
| 1119 return center; | 1138 return center; |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1155 // $reg = $pos | 1174 // $reg = $pos |
| 1156 // if [body] | 1175 // if [body] |
| 1157 // then | 1176 // then |
| 1158 // $pos = $reg | 1177 // $pos = $reg |
| 1159 // escape submatch scope (drop all backtracks created in scope) | 1178 // escape submatch scope (drop all backtracks created in scope) |
| 1160 // succeed | 1179 // succeed |
| 1161 // else | 1180 // else |
| 1162 // end submatch scope (nothing to clean up, just exit the scope) | 1181 // end submatch scope (nothing to clean up, just exit the scope) |
| 1163 // fail | 1182 // fail |
| 1164 return ActionNode::BeginSubmatch(ActionNode::StorePosition( | 1183 return ActionNode::BeginSubmatch(ActionNode::StorePosition( |
| 1165 position_register, compiler->Compile(body(), | 1184 position_register, body()->ToNode(compiler, |
| 1166 ActionNode::RestorePosition(position_register, | 1185 ActionNode::RestorePosition(position_register, |
| 1167 ActionNode::EscapeSubmatch(on_success)), | 1186 ActionNode::EscapeSubmatch(on_success)), |
| 1168 ActionNode::EndSubmatch(on_failure)))); | 1187 ActionNode::EndSubmatch(on_failure)))); |
| 1169 } else { | 1188 } else { |
| 1170 // begin submatch scope | 1189 // begin submatch scope |
| 1171 // try | 1190 // try |
| 1172 // first if (body) | 1191 // first if (body) |
| 1173 // then | 1192 // then |
| 1174 // escape submatch scope | 1193 // escape submatch scope |
| 1175 // fail | 1194 // fail |
| 1176 // else | 1195 // else |
| 1177 // backtrack | 1196 // backtrack |
| 1178 // second | 1197 // second |
| 1179 // end submatch scope | 1198 // end submatch scope |
| 1180 // succeed | 1199 // succeed |
| 1181 ChoiceNode* try_node = | 1200 ChoiceNode* try_node = |
| 1182 new ChoiceNode(1, ActionNode::EndSubmatch(on_success)); | 1201 new ChoiceNode(1, ActionNode::EndSubmatch(on_success)); |
| 1183 RegExpNode* body_node = compiler->Compile(body(), | 1202 RegExpNode* body_node = body()->ToNode(compiler, |
| 1184 ActionNode::EscapeSubmatch(on_failure), | 1203 ActionNode::EscapeSubmatch(on_failure), |
| 1185 EndNode::GetBacktrack()); | 1204 EndNode::GetBacktrack()); |
| 1186 GuardedAlternative body_alt(body_node); | 1205 GuardedAlternative body_alt(body_node); |
| 1187 try_node->AddChild(body_alt); | 1206 try_node->AddChild(body_alt); |
| 1188 return ActionNode::BeginSubmatch(try_node); | 1207 return ActionNode::BeginSubmatch(try_node); |
| 1189 } | 1208 } |
| 1190 } | 1209 } |
| 1191 | 1210 |
| 1192 | 1211 |
| 1193 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, | 1212 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, |
| 1194 RegExpNode* on_success, | 1213 RegExpNode* on_success, |
| 1195 RegExpNode* on_failure) { | 1214 RegExpNode* on_failure) { |
| 1196 int start_reg = RegExpCapture::StartRegister(index()); | 1215 return ToNode(body(), index(), compiler, on_success, on_failure); |
| 1197 int end_reg = RegExpCapture::EndRegister(index()); | 1216 } |
| 1217 |
| 1218 |
| 1219 RegExpNode* RegExpCapture::ToNode(RegExpTree* body, |
| 1220 int index, |
| 1221 RegExpCompiler* compiler, |
| 1222 RegExpNode* on_success, |
| 1223 RegExpNode* on_failure) { |
| 1224 int start_reg = RegExpCapture::StartRegister(index); |
| 1225 int end_reg = RegExpCapture::EndRegister(index); |
| 1198 RegExpNode* store_end = ActionNode::StorePosition(end_reg, on_success); | 1226 RegExpNode* store_end = ActionNode::StorePosition(end_reg, on_success); |
| 1199 RegExpNode* body_node = compiler->Compile(body(), store_end, on_failure); | 1227 RegExpNode* body_node = body->ToNode(compiler, store_end, on_failure); |
| 1200 return ActionNode::StorePosition(start_reg, body_node); | 1228 return ActionNode::StorePosition(start_reg, body_node); |
| 1201 } | 1229 } |
| 1202 | 1230 |
| 1203 | 1231 |
| 1204 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler, | 1232 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler, |
| 1205 RegExpNode* on_success, | 1233 RegExpNode* on_success, |
| 1206 RegExpNode* on_failure) { | 1234 RegExpNode* on_failure) { |
| 1207 ZoneList<RegExpTree*>* children = nodes(); | 1235 ZoneList<RegExpTree*>* children = nodes(); |
| 1208 RegExpNode* current = on_success; | 1236 RegExpNode* current = on_success; |
| 1209 for (int i = children->length() - 1; i >= 0; i--) { | 1237 for (int i = children->length() - 1; i >= 0; i--) { |
| 1210 current = compiler->Compile(children->at(i), current, on_failure); | 1238 current = children->at(i)->ToNode(compiler, current, on_failure); |
| 1211 } | 1239 } |
| 1212 return current; | 1240 return current; |
| 1213 } | 1241 } |
| 1214 | 1242 |
| 1215 | 1243 |
| 1216 static const int kSpaceRangeCount = 20; | 1244 static const int kSpaceRangeCount = 20; |
| 1217 static const uc16 kSpaceRanges[kSpaceRangeCount] = { | 1245 static const uc16 kSpaceRanges[kSpaceRangeCount] = { |
| 1218 0x0009, 0x0009, 0x000B, 0x000C, 0x0020, 0x0020, 0x00A0, 0x00A0, | 1246 0x0009, 0x0009, 0x000B, 0x000C, 0x0020, 0x0020, 0x00A0, 0x00A0, |
| 1219 0x1680, 0x1680, 0x180E, 0x180E, 0x2000, 0x200A, 0x202F, 0x202F, | 1247 0x1680, 0x1680, 0x180E, 0x180E, 0x2000, 0x200A, 0x202F, 0x202F, |
| 1220 0x205F, 0x205F, 0x3000, 0x3000 | 1248 0x205F, 0x205F, 0x3000, 0x3000 |
| (...skipping 353 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1574 outgoing.set_table(NULL); | 1602 outgoing.set_table(NULL); |
| 1575 outgoing.Analyze(that->on_success()); | 1603 outgoing.Analyze(that->on_success()); |
| 1576 } | 1604 } |
| 1577 | 1605 |
| 1578 | 1606 |
| 1579 void Analysis::VisitAction(ActionNode* that) { | 1607 void Analysis::VisitAction(ActionNode* that) { |
| 1580 Analyze(that->on_success()); | 1608 Analyze(that->on_success()); |
| 1581 } | 1609 } |
| 1582 | 1610 |
| 1583 | 1611 |
| 1584 RegExpNode* RegExpCompiler::Compile(RegExpTree* tree, | |
| 1585 RegExpNode* on_success, | |
| 1586 RegExpNode* on_failure) { | |
| 1587 return tree->ToNode(this, on_success, on_failure); | |
| 1588 } | |
| 1589 | |
| 1590 | |
| 1591 RegExpNode* RegExpEngine::Compile(RegExpParseResult* input) { | 1612 RegExpNode* RegExpEngine::Compile(RegExpParseResult* input) { |
| 1592 RegExpCompiler compiler(input->capture_count); | 1613 RegExpCompiler compiler(input->capture_count); |
| 1593 RegExpNode* node = compiler.Compile(input->tree, | 1614 // Wrap the body of the regexp in capture #0. |
| 1594 EndNode::GetAccept(), | 1615 RegExpNode* captured_body = RegExpCapture::ToNode(input->tree, |
| 1595 EndNode::GetBacktrack()); | 1616 0, |
| 1617 &compiler, |
| 1618 EndNode::GetAccept(), |
| 1619 EndNode::GetBacktrack()); |
| 1620 // Add a .*? at the beginning, outside the body capture. |
| 1621 // Note: We could choose to not add this if the regexp is anchored at |
| 1622 // the start of the input but I'm not sure how best to do that and |
| 1623 // since we don't even handle ^ yet I'm saving that optimization for |
| 1624 // later. |
| 1625 RegExpNode* node = RegExpQuantifier::ToNode(0, |
| 1626 RegExpQuantifier::kInfinity, |
| 1627 false, |
| 1628 new RegExpCharacterClass('.'), |
| 1629 &compiler, |
| 1630 captured_body, |
| 1631 EndNode::GetBacktrack()); |
| 1596 Analysis analysis(&compiler); | 1632 Analysis analysis(&compiler); |
| 1597 analysis.Analyze(node); | 1633 analysis.Analyze(node); |
| 1598 return node; | 1634 return node; |
| 1599 } | 1635 } |
| 1600 | 1636 |
| 1601 | 1637 |
| 1602 RegExpMacroAssembler::~RegExpMacroAssembler() { | 1638 RegExpMacroAssembler::~RegExpMacroAssembler() { |
| 1603 } | 1639 } |
| 1604 | 1640 |
| 1605 | 1641 |
| 1606 }} // namespace v8::internal | 1642 }} // namespace v8::internal |
| OLD | NEW |