Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: src/jsregexp.cc

Issue 10890: Add .*?(...) to regexps (Closed)
Patch Set: Created 12 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/ast.h ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 617 matching lines...) Expand 10 before | Expand all | Expand 10 after
628 ByteArray* RegExpImpl::JsreInternal(Handle<JSRegExp> re) { 628 ByteArray* RegExpImpl::JsreInternal(Handle<JSRegExp> re) {
629 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex)); 629 FixedArray* value = FixedArray::cast(re->DataAt(JSRegExp::kJscreDataIndex));
630 return ByteArray::cast(value->get(INTERNAL_INDEX)); 630 return ByteArray::cast(value->get(INTERNAL_INDEX));
631 } 631 }
632 632
633 633
634 // ------------------------------------------------------------------- 634 // -------------------------------------------------------------------
635 // New regular expression engine 635 // New regular expression engine
636 636
637 637
638 class RegExpCompiler;
639 class DotPrinter; 638 class DotPrinter;
640 639
641 640
642 class RegExpCompiler { 641 class RegExpCompiler {
643 public: 642 public:
644 explicit RegExpCompiler(int capture_count) 643 explicit RegExpCompiler(int capture_count)
645 : next_register_(2 * capture_count), 644 : next_register_(2 * capture_count),
646 work_list_(NULL) { } 645 work_list_(NULL) { }
647 646
648 RegExpNode* Compile(RegExpTree* tree,
649 RegExpNode* on_success,
650 RegExpNode* on_failure);
651
652 int AllocateRegister() { return next_register_++; } 647 int AllocateRegister() { return next_register_++; }
653 648
654 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler, 649 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler,
655 RegExpNode* start); 650 RegExpNode* start);
656 651
657 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } 652 inline void AddWork(RegExpNode* node) { work_list_->Add(node); }
658 653
659 static const int kImplementationOffset = 0; 654 static const int kImplementationOffset = 0;
660 static const int kNumberOfRegistersOffset = 0; 655 static const int kNumberOfRegistersOffset = 0;
661 static const int kCodeOffset = 1; 656 static const int kCodeOffset = 1;
(...skipping 245 matching lines...) Expand 10 before | Expand all | Expand 10 after
907 case Guard::LT: 902 case Guard::LT:
908 stream()->Add("$%i < %i", guard->reg(), guard->value()); 903 stream()->Add("$%i < %i", guard->reg(), guard->value());
909 break; 904 break;
910 } 905 }
911 } 906 }
912 stream()->Add("]"); 907 stream()->Add("]");
913 } 908 }
914 stream()->Add("\"];\n"); 909 stream()->Add("\"];\n");
915 that->choices()->at(i).node()->Accept(this); 910 that->choices()->at(i).node()->Accept(this);
916 } 911 }
917 OS::PrintError("--- %p ---\n", static_cast<void*>(this)); 912 OS::PrintError("--- %p ---\n", static_cast<void*>(that));
918 that->table()->Dump(); 913 that->table()->Dump();
919 } 914 }
920 915
921 916
922 void DotPrinter::VisitAtom(AtomNode* that) { 917 void DotPrinter::VisitAtom(AtomNode* that) {
923 stream()->Add(" n%p [label=\"'%w'\", shape=doubleoctagon];\n", 918 stream()->Add(" n%p [label=\"'%w'\", shape=doubleoctagon];\n",
924 that, 919 that,
925 that->data()); 920 that->data());
926 stream()->Add(" n%p -> n%p;\n", that, that->on_success()); 921 stream()->Add(" n%p -> n%p;\n", that, that->on_success());
927 Visit(that->on_success()); 922 Visit(that->on_success());
(...skipping 11 matching lines...) Expand all
939 PrintOnFailure(that, that->on_failure()); 934 PrintOnFailure(that, that->on_failure());
940 } 935 }
941 936
942 937
943 void DotPrinter::VisitEnd(EndNode* that) { 938 void DotPrinter::VisitEnd(EndNode* that) {
944 stream()->Add(" n%p [style=bold, shape=point];\n", that); 939 stream()->Add(" n%p [style=bold, shape=point];\n", that);
945 } 940 }
946 941
947 942
948 void DotPrinter::VisitCharacterClass(CharacterClassNode* that) { 943 void DotPrinter::VisitCharacterClass(CharacterClassNode* that) {
949 stream()->Add(" n%p [label=\"[...]\"];\n", that); 944 stream()->Add(" n%p [label=\"[", that);
945 if (that->is_negated())
946 stream()->Add("^");
947 for (int i = 0; i < that->ranges()->length(); i++) {
948 CharacterRange range = that->ranges()->at(i);
949 stream()->Add("%k-%k", range.from(), range.to());
950 }
951 stream()->Add("]\"];\n");
950 stream()->Add(" n%p -> n%p;\n", that, that->on_success()); 952 stream()->Add(" n%p -> n%p;\n", that, that->on_success());
951 Visit(that->on_success()); 953 Visit(that->on_success());
952 PrintOnFailure(that, that->on_failure()); 954 PrintOnFailure(that, that->on_failure());
953 } 955 }
954 956
955 957
956 void DotPrinter::VisitAction(ActionNode* that) { 958 void DotPrinter::VisitAction(ActionNode* that) {
957 stream()->Add(" n%p [", that); 959 stream()->Add(" n%p [", that);
958 switch (that->type_) { 960 switch (that->type_) {
959 case ActionNode::STORE_REGISTER: 961 case ActionNode::STORE_REGISTER:
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after
1055 } 1057 }
1056 1058
1057 1059
1058 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, 1060 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,
1059 RegExpNode* on_success, 1061 RegExpNode* on_success,
1060 RegExpNode* on_failure) { 1062 RegExpNode* on_failure) {
1061 ZoneList<RegExpTree*>* children = nodes(); 1063 ZoneList<RegExpTree*>* children = nodes();
1062 int length = children->length(); 1064 int length = children->length();
1063 ChoiceNode* result = new ChoiceNode(length, on_failure); 1065 ChoiceNode* result = new ChoiceNode(length, on_failure);
1064 for (int i = 0; i < length; i++) { 1066 for (int i = 0; i < length; i++) {
1065 GuardedAlternative child(compiler->Compile(children->at(i), 1067 GuardedAlternative child(children->at(i)->ToNode(compiler,
1066 on_success, 1068 on_success,
1067 on_failure)); 1069 on_failure));
1068 result->AddChild(child); 1070 result->AddChild(child);
1069 } 1071 }
1070 return result; 1072 return result;
1071 } 1073 }
1072 1074
1073 1075
1074 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler, 1076 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler,
1075 RegExpNode* on_success, 1077 RegExpNode* on_success,
1076 RegExpNode* on_failure) { 1078 RegExpNode* on_failure) {
1079 return ToNode(min(),
1080 max(),
1081 is_greedy(),
1082 body(),
1083 compiler,
1084 on_success,
1085 on_failure);
1086 }
1087
1088
1089 RegExpNode* RegExpQuantifier::ToNode(int min,
1090 int max,
1091 bool is_greedy,
1092 RegExpTree* body,
1093 RegExpCompiler* compiler,
1094 RegExpNode* on_success,
1095 RegExpNode* on_failure) {
1077 // x{f, t} becomes this: 1096 // x{f, t} becomes this:
1078 // 1097 //
1079 // (r++)<-. 1098 // (r++)<-.
1080 // | ` 1099 // | `
1081 // | (x) 1100 // | (x)
1082 // v ^ 1101 // v ^
1083 // (r=0)-->(?)---/ [if r < t] 1102 // (r=0)-->(?)---/ [if r < t]
1084 // | 1103 // |
1085 // [if r >= f] \----> ... 1104 // [if r >= f] \----> ...
1086 // 1105 //
1087 // 1106 //
1088 // TODO(someone): clear captures on repetition and handle empty 1107 // TODO(someone): clear captures on repetition and handle empty
1089 // matches. 1108 // matches.
1090 bool has_min = min() > 0; 1109 bool has_min = min > 0;
1091 bool has_max = max() < RegExpQuantifier::kInfinity; 1110 bool has_max = max < RegExpQuantifier::kInfinity;
1092 bool needs_counter = has_min || has_max; 1111 bool needs_counter = has_min || has_max;
1093 int reg_ctr = needs_counter ? compiler->AllocateRegister() : -1; 1112 int reg_ctr = needs_counter ? compiler->AllocateRegister() : -1;
1094 ChoiceNode* center = new ChoiceNode(2, on_failure); 1113 ChoiceNode* center = new ChoiceNode(2, on_failure);
1095 RegExpNode* loop_return = needs_counter 1114 RegExpNode* loop_return = needs_counter
1096 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center)) 1115 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center))
1097 : static_cast<RegExpNode*>(center); 1116 : static_cast<RegExpNode*>(center);
1098 RegExpNode* body_node = compiler->Compile(body(), loop_return, on_failure); 1117 RegExpNode* body_node = body->ToNode(compiler, loop_return, on_failure);
1099 GuardedAlternative body_alt(body_node); 1118 GuardedAlternative body_alt(body_node);
1100 if (has_max) { 1119 if (has_max) {
1101 Guard* body_guard = new Guard(reg_ctr, Guard::LT, max()); 1120 Guard* body_guard = new Guard(reg_ctr, Guard::LT, max);
1102 body_alt.AddGuard(body_guard); 1121 body_alt.AddGuard(body_guard);
1103 } 1122 }
1104 GuardedAlternative rest_alt(on_success); 1123 GuardedAlternative rest_alt(on_success);
1105 if (has_min) { 1124 if (has_min) {
1106 Guard* rest_guard = new Guard(reg_ctr, Guard::GEQ, min()); 1125 Guard* rest_guard = new Guard(reg_ctr, Guard::GEQ, min);
1107 rest_alt.AddGuard(rest_guard); 1126 rest_alt.AddGuard(rest_guard);
1108 } 1127 }
1109 if (is_greedy()) { 1128 if (is_greedy) {
1110 center->AddChild(body_alt); 1129 center->AddChild(body_alt);
1111 center->AddChild(rest_alt); 1130 center->AddChild(rest_alt);
1112 } else { 1131 } else {
1113 center->AddChild(rest_alt); 1132 center->AddChild(rest_alt);
1114 center->AddChild(body_alt); 1133 center->AddChild(body_alt);
1115 } 1134 }
1116 if (needs_counter) { 1135 if (needs_counter) {
1117 return ActionNode::StoreRegister(reg_ctr, 0, center); 1136 return ActionNode::StoreRegister(reg_ctr, 0, center);
1118 } else { 1137 } else {
1119 return center; 1138 return center;
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
1155 // $reg = $pos 1174 // $reg = $pos
1156 // if [body] 1175 // if [body]
1157 // then 1176 // then
1158 // $pos = $reg 1177 // $pos = $reg
1159 // escape submatch scope (drop all backtracks created in scope) 1178 // escape submatch scope (drop all backtracks created in scope)
1160 // succeed 1179 // succeed
1161 // else 1180 // else
1162 // end submatch scope (nothing to clean up, just exit the scope) 1181 // end submatch scope (nothing to clean up, just exit the scope)
1163 // fail 1182 // fail
1164 return ActionNode::BeginSubmatch(ActionNode::StorePosition( 1183 return ActionNode::BeginSubmatch(ActionNode::StorePosition(
1165 position_register, compiler->Compile(body(), 1184 position_register, body()->ToNode(compiler,
1166 ActionNode::RestorePosition(position_register, 1185 ActionNode::RestorePosition(position_register,
1167 ActionNode::EscapeSubmatch(on_success)), 1186 ActionNode::EscapeSubmatch(on_success)),
1168 ActionNode::EndSubmatch(on_failure)))); 1187 ActionNode::EndSubmatch(on_failure))));
1169 } else { 1188 } else {
1170 // begin submatch scope 1189 // begin submatch scope
1171 // try 1190 // try
1172 // first if (body) 1191 // first if (body)
1173 // then 1192 // then
1174 // escape submatch scope 1193 // escape submatch scope
1175 // fail 1194 // fail
1176 // else 1195 // else
1177 // backtrack 1196 // backtrack
1178 // second 1197 // second
1179 // end submatch scope 1198 // end submatch scope
1180 // succeed 1199 // succeed
1181 ChoiceNode* try_node = 1200 ChoiceNode* try_node =
1182 new ChoiceNode(1, ActionNode::EndSubmatch(on_success)); 1201 new ChoiceNode(1, ActionNode::EndSubmatch(on_success));
1183 RegExpNode* body_node = compiler->Compile(body(), 1202 RegExpNode* body_node = body()->ToNode(compiler,
1184 ActionNode::EscapeSubmatch(on_failure), 1203 ActionNode::EscapeSubmatch(on_failure),
1185 EndNode::GetBacktrack()); 1204 EndNode::GetBacktrack());
1186 GuardedAlternative body_alt(body_node); 1205 GuardedAlternative body_alt(body_node);
1187 try_node->AddChild(body_alt); 1206 try_node->AddChild(body_alt);
1188 return ActionNode::BeginSubmatch(try_node); 1207 return ActionNode::BeginSubmatch(try_node);
1189 } 1208 }
1190 } 1209 }
1191 1210
1192 1211
1193 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, 1212 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,
1194 RegExpNode* on_success, 1213 RegExpNode* on_success,
1195 RegExpNode* on_failure) { 1214 RegExpNode* on_failure) {
1196 int start_reg = RegExpCapture::StartRegister(index()); 1215 return ToNode(body(), index(), compiler, on_success, on_failure);
1197 int end_reg = RegExpCapture::EndRegister(index()); 1216 }
1217
1218
1219 RegExpNode* RegExpCapture::ToNode(RegExpTree* body,
1220 int index,
1221 RegExpCompiler* compiler,
1222 RegExpNode* on_success,
1223 RegExpNode* on_failure) {
1224 int start_reg = RegExpCapture::StartRegister(index);
1225 int end_reg = RegExpCapture::EndRegister(index);
1198 RegExpNode* store_end = ActionNode::StorePosition(end_reg, on_success); 1226 RegExpNode* store_end = ActionNode::StorePosition(end_reg, on_success);
1199 RegExpNode* body_node = compiler->Compile(body(), store_end, on_failure); 1227 RegExpNode* body_node = body->ToNode(compiler, store_end, on_failure);
1200 return ActionNode::StorePosition(start_reg, body_node); 1228 return ActionNode::StorePosition(start_reg, body_node);
1201 } 1229 }
1202 1230
1203 1231
1204 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler, 1232 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,
1205 RegExpNode* on_success, 1233 RegExpNode* on_success,
1206 RegExpNode* on_failure) { 1234 RegExpNode* on_failure) {
1207 ZoneList<RegExpTree*>* children = nodes(); 1235 ZoneList<RegExpTree*>* children = nodes();
1208 RegExpNode* current = on_success; 1236 RegExpNode* current = on_success;
1209 for (int i = children->length() - 1; i >= 0; i--) { 1237 for (int i = children->length() - 1; i >= 0; i--) {
1210 current = compiler->Compile(children->at(i), current, on_failure); 1238 current = children->at(i)->ToNode(compiler, current, on_failure);
1211 } 1239 }
1212 return current; 1240 return current;
1213 } 1241 }
1214 1242
1215 1243
1216 static const int kSpaceRangeCount = 20; 1244 static const int kSpaceRangeCount = 20;
1217 static const uc16 kSpaceRanges[kSpaceRangeCount] = { 1245 static const uc16 kSpaceRanges[kSpaceRangeCount] = {
1218 0x0009, 0x0009, 0x000B, 0x000C, 0x0020, 0x0020, 0x00A0, 0x00A0, 1246 0x0009, 0x0009, 0x000B, 0x000C, 0x0020, 0x0020, 0x00A0, 0x00A0,
1219 0x1680, 0x1680, 0x180E, 0x180E, 0x2000, 0x200A, 0x202F, 0x202F, 1247 0x1680, 0x1680, 0x180E, 0x180E, 0x2000, 0x200A, 0x202F, 0x202F,
1220 0x205F, 0x205F, 0x3000, 0x3000 1248 0x205F, 0x205F, 0x3000, 0x3000
(...skipping 353 matching lines...) Expand 10 before | Expand all | Expand 10 after
1574 outgoing.set_table(NULL); 1602 outgoing.set_table(NULL);
1575 outgoing.Analyze(that->on_success()); 1603 outgoing.Analyze(that->on_success());
1576 } 1604 }
1577 1605
1578 1606
1579 void Analysis::VisitAction(ActionNode* that) { 1607 void Analysis::VisitAction(ActionNode* that) {
1580 Analyze(that->on_success()); 1608 Analyze(that->on_success());
1581 } 1609 }
1582 1610
1583 1611
1584 RegExpNode* RegExpCompiler::Compile(RegExpTree* tree,
1585 RegExpNode* on_success,
1586 RegExpNode* on_failure) {
1587 return tree->ToNode(this, on_success, on_failure);
1588 }
1589
1590
1591 RegExpNode* RegExpEngine::Compile(RegExpParseResult* input) { 1612 RegExpNode* RegExpEngine::Compile(RegExpParseResult* input) {
1592 RegExpCompiler compiler(input->capture_count); 1613 RegExpCompiler compiler(input->capture_count);
1593 RegExpNode* node = compiler.Compile(input->tree, 1614 // Wrap the body of the regexp in capture #0.
1594 EndNode::GetAccept(), 1615 RegExpNode* captured_body = RegExpCapture::ToNode(input->tree,
1595 EndNode::GetBacktrack()); 1616 0,
1617 &compiler,
1618 EndNode::GetAccept(),
1619 EndNode::GetBacktrack());
1620 // Add a .*? at the beginning, outside the body capture.
1621 // Note: We could choose to not add this if the regexp is anchored at
1622 // the start of the input but I'm not sure how best to do that and
1623 // since we don't even handle ^ yet I'm saving that optimization for
1624 // later.
1625 RegExpNode* node = RegExpQuantifier::ToNode(0,
1626 RegExpQuantifier::kInfinity,
1627 false,
1628 new RegExpCharacterClass('.'),
1629 &compiler,
1630 captured_body,
1631 EndNode::GetBacktrack());
1596 Analysis analysis(&compiler); 1632 Analysis analysis(&compiler);
1597 analysis.Analyze(node); 1633 analysis.Analyze(node);
1598 return node; 1634 return node;
1599 } 1635 }
1600 1636
1601 1637
1602 RegExpMacroAssembler::~RegExpMacroAssembler() { 1638 RegExpMacroAssembler::~RegExpMacroAssembler() {
1603 } 1639 }
1604 1640
1605 1641
1606 }} // namespace v8::internal 1642 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/ast.h ('k') | test/cctest/test-regexp.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698