Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(140)

Side by Side Diff: regexp2000/src/jsregexp.cc

Issue 9692: * Fix bug in regexp parser. (Closed)
Patch Set: Addressed review comments Created 12 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | regexp2000/src/parser.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 796 matching lines...) Expand 10 before | Expand all | Expand 10 after
807 DispatchTable table_; 807 DispatchTable table_;
808 }; 808 };
809 809
810 810
811 class ActionNode: public SeqRegExpNode { 811 class ActionNode: public SeqRegExpNode {
812 public: 812 public:
813 enum Type { 813 enum Type {
814 STORE_REGISTER, 814 STORE_REGISTER,
815 INCREMENT_REGISTER, 815 INCREMENT_REGISTER,
816 STORE_POSITION, 816 STORE_POSITION,
817 RESTORE_POSITION,
817 BEGIN_SUBMATCH, 818 BEGIN_SUBMATCH,
818 ESCAPE_SUBMATCH, 819 ESCAPE_SUBMATCH,
819 END_SUBMATCH 820 END_SUBMATCH
820 }; 821 };
821 static ActionNode* StoreRegister(int reg, int val, RegExpNode* on_success) { 822 static ActionNode* StoreRegister(int reg, int val, RegExpNode* on_success) {
822 ActionNode* result = new ActionNode(STORE_REGISTER, on_success); 823 ActionNode* result = new ActionNode(STORE_REGISTER, on_success);
823 result->data_.u_store_register.reg_ = reg; 824 result->data_.u_store_register.reg_ = reg;
824 result->data_.u_store_register.value_ = val; 825 result->data_.u_store_register.value_ = val;
825 return result; 826 return result;
826 } 827 }
827 static ActionNode* IncrementRegister(int reg, RegExpNode* on_success) { 828 static ActionNode* IncrementRegister(int reg, RegExpNode* on_success) {
828 ActionNode* result = new ActionNode(INCREMENT_REGISTER, on_success); 829 ActionNode* result = new ActionNode(INCREMENT_REGISTER, on_success);
829 result->data_.u_increment_register.reg_ = reg; 830 result->data_.u_increment_register.reg_ = reg;
830 return result; 831 return result;
831 } 832 }
832 static ActionNode* StorePosition(int reg, RegExpNode* on_success) { 833 static ActionNode* StorePosition(int reg, RegExpNode* on_success) {
833 ActionNode* result = new ActionNode(STORE_POSITION, on_success); 834 ActionNode* result = new ActionNode(STORE_POSITION, on_success);
834 result->data_.u_store_position.reg_ = reg; 835 result->data_.u_position_register.reg_ = reg;
836 return result;
837 }
838 static ActionNode* RestorePosition(int reg, RegExpNode* on_success) {
839 ActionNode* result = new ActionNode(RESTORE_POSITION, on_success);
840 result->data_.u_position_register.reg_ = reg;
835 return result; 841 return result;
836 } 842 }
837 static ActionNode* BeginSubmatch(RegExpNode* on_success) { 843 static ActionNode* BeginSubmatch(RegExpNode* on_success) {
838 return new ActionNode(BEGIN_SUBMATCH, on_success); 844 return new ActionNode(BEGIN_SUBMATCH, on_success);
839 } 845 }
840 static ActionNode* EscapeSubmatch(RegExpNode* on_success) { 846 static ActionNode* EscapeSubmatch(RegExpNode* on_success) {
841 return new ActionNode(ESCAPE_SUBMATCH, on_success); 847 return new ActionNode(ESCAPE_SUBMATCH, on_success);
842 } 848 }
843 static ActionNode* EndSubmatch(RegExpNode* on_success) { 849 static ActionNode* EndSubmatch(RegExpNode* on_success) {
844 return new ActionNode(END_SUBMATCH, on_success); 850 return new ActionNode(END_SUBMATCH, on_success);
845 } 851 }
846 virtual void EmitDot(DotPrinter* out); 852 virtual void EmitDot(DotPrinter* out);
847 Type type() { return type_; } 853 Type type() { return type_; }
848 private: 854 private:
849 ActionNode(Type type, RegExpNode* on_success) 855 ActionNode(Type type, RegExpNode* on_success)
850 : SeqRegExpNode(on_success), 856 : SeqRegExpNode(on_success),
851 type_(type) { } 857 type_(type) { }
852 Type type_; 858 Type type_;
853 union { 859 union {
854 struct { 860 struct {
855 int reg_; 861 int reg_;
856 int value_; 862 int value_;
857 } u_store_register; 863 } u_store_register;
858 struct { 864 struct {
859 int reg_; 865 int reg_;
860 } u_increment_register; 866 } u_increment_register;
861 struct { 867 struct {
862 int reg_; 868 int reg_;
863 } u_store_position; 869 } u_position_register;
864 } data_; 870 } data_;
865 }; 871 };
866 872
867 873
868 // ------------------------------------------------------------------- 874 // -------------------------------------------------------------------
869 // Dot/dotty output 875 // Dot/dotty output
870 876
871 void DotPrinter::PrintNode(const char* label, RegExpNode* node) { 877 void DotPrinter::PrintNode(const char* label, RegExpNode* node) {
872 stream()->Add("digraph G {\n graph [label=\""); 878 stream()->Add("digraph G {\n graph [label=\"");
873 for (int i = 0; label[i]; i++) { 879 for (int i = 0; label[i]; i++) {
(...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after
984 out->stream()->Add("label=\"$%i:=%i\", shape=box", 990 out->stream()->Add("label=\"$%i:=%i\", shape=box",
985 data_.u_store_register.reg_, 991 data_.u_store_register.reg_,
986 data_.u_store_register.value_); 992 data_.u_store_register.value_);
987 break; 993 break;
988 case INCREMENT_REGISTER: 994 case INCREMENT_REGISTER:
989 out->stream()->Add("label=\"$%i++\", shape=box", 995 out->stream()->Add("label=\"$%i++\", shape=box",
990 data_.u_increment_register.reg_); 996 data_.u_increment_register.reg_);
991 break; 997 break;
992 case STORE_POSITION: 998 case STORE_POSITION:
993 out->stream()->Add("label=\"$%i:=$pos\", shape=box", 999 out->stream()->Add("label=\"$%i:=$pos\", shape=box",
994 data_.u_store_position.reg_); 1000 data_.u_position_register.reg_);
1001 break;
1002 case RESTORE_POSITION:
1003 out->stream()->Add("label=\"$pos:=$%i\", shape=box",
1004 data_.u_position_register.reg_);
995 break; 1005 break;
996 case BEGIN_SUBMATCH: 1006 case BEGIN_SUBMATCH:
997 out->stream()->Add("label=\"begin\", shape=septagon"); 1007 out->stream()->Add("label=\"begin\", shape=septagon");
998 break; 1008 break;
999 case ESCAPE_SUBMATCH: 1009 case ESCAPE_SUBMATCH:
1000 out->stream()->Add("label=\"escape\", shape=septagon"); 1010 out->stream()->Add("label=\"escape\", shape=septagon");
1001 break; 1011 break;
1002 case END_SUBMATCH: 1012 case END_SUBMATCH:
1003 out->stream()->Add("label=\"end\", shape=septagon"); 1013 out->stream()->Add("label=\"end\", shape=septagon");
1004 break; 1014 break;
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
1055 // (r=0)-->(?)---/ [if r < t] 1065 // (r=0)-->(?)---/ [if r < t]
1056 // | 1066 // |
1057 // [if r >= f] \----> ... 1067 // [if r >= f] \----> ...
1058 // 1068 //
1059 // 1069 //
1060 // TODO(someone): clear captures on repetition and handle empty 1070 // TODO(someone): clear captures on repetition and handle empty
1061 // matches. 1071 // matches.
1062 bool has_min = min() > 0; 1072 bool has_min = min() > 0;
1063 bool has_max = max() < RegExpQuantifier::kInfinity; 1073 bool has_max = max() < RegExpQuantifier::kInfinity;
1064 bool needs_counter = has_min || has_max; 1074 bool needs_counter = has_min || has_max;
1065 int reg = needs_counter ? compiler->AllocateRegister() : -1; 1075 int reg_ctr = needs_counter ? compiler->AllocateRegister() : -1;
1066 ChoiceNode* center = new ChoiceNode(2, on_failure); 1076 ChoiceNode* center = new ChoiceNode(2, on_failure);
1067 RegExpNode* loop_return = needs_counter 1077 RegExpNode* loop_return = needs_counter
1068 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg, center)) 1078 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center))
1069 : static_cast<RegExpNode*>(center); 1079 : static_cast<RegExpNode*>(center);
1070 RegExpNode* body_node = compiler->Compile(body(), loop_return, on_failure); 1080 RegExpNode* body_node = compiler->Compile(body(), loop_return, on_failure);
1071 GuardedAlternative body_alt(body_node); 1081 GuardedAlternative body_alt(body_node);
1072 if (has_max) { 1082 if (has_max) {
1073 Guard* body_guard = new Guard(reg, Guard::LT, max()); 1083 Guard* body_guard = new Guard(reg_ctr, Guard::LT, max());
1074 body_alt.AddGuard(body_guard); 1084 body_alt.AddGuard(body_guard);
1075 } 1085 }
1076 GuardedAlternative rest_alt(on_success); 1086 GuardedAlternative rest_alt(on_success);
1077 if (has_min) { 1087 if (has_min) {
1078 Guard* rest_guard = new Guard(reg, Guard::GEQ, min()); 1088 Guard* rest_guard = new Guard(reg_ctr, Guard::GEQ, min());
1079 rest_alt.AddGuard(rest_guard); 1089 rest_alt.AddGuard(rest_guard);
1080 } 1090 }
1081 if (is_greedy()) { 1091 if (is_greedy()) {
1082 center->AddChild(body_alt); 1092 center->AddChild(body_alt);
1083 center->AddChild(rest_alt); 1093 center->AddChild(rest_alt);
1084 } else { 1094 } else {
1085 center->AddChild(rest_alt); 1095 center->AddChild(rest_alt);
1086 center->AddChild(body_alt); 1096 center->AddChild(body_alt);
1087 } 1097 }
1088 if (needs_counter) { 1098 if (needs_counter) {
1089 return ActionNode::StoreRegister(reg, 0, center); 1099 return ActionNode::StoreRegister(reg_ctr, 0, center);
1090 } else { 1100 } else {
1091 return center; 1101 return center;
1092 } 1102 }
1093 } 1103 }
1094 1104
1095 1105
1096 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler, 1106 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
1097 RegExpNode* on_success, 1107 RegExpNode* on_success,
1098 RegExpNode* on_failure) { 1108 RegExpNode* on_failure) {
1099 // TODO(self): implement assertions. 1109 // TODO(self): implement assertions.
(...skipping 15 matching lines...) Expand all
1115 RegExpNode* on_success, 1125 RegExpNode* on_success,
1116 RegExpNode* on_failure) { 1126 RegExpNode* on_failure) {
1117 return on_success; 1127 return on_success;
1118 } 1128 }
1119 1129
1120 1130
1121 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, 1131 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,
1122 RegExpNode* on_success, 1132 RegExpNode* on_success,
1123 RegExpNode* on_failure) { 1133 RegExpNode* on_failure) {
1124 if (is_positive()) { 1134 if (is_positive()) {
1125 RegExpNode* proceed = ActionNode::EndSubmatch(on_success); 1135 int position_register = compiler->AllocateRegister();
1126 RegExpNode* escape = ActionNode::EscapeSubmatch(on_failure); 1136 // begin submatch scope
1127 RegExpNode* body_node = compiler->Compile(body(), proceed, escape); 1137 // $reg = $pos
1128 return ActionNode::BeginSubmatch(body_node); 1138 // if [body]
1139 // then
1140 // $pos = $reg
1141 // escape submatch scope (drop all backtracks created in scope)
1142 // succeed
1143 // else
1144 // end submatch scope (nothing to clean up, just exit the scope)
1145 // fail
1146 return ActionNode::BeginSubmatch(ActionNode::StorePosition(
1147 position_register, compiler->Compile(body(),
1148 ActionNode::RestorePosition(position_register,
1149 ActionNode::EscapeSubmatch(on_success)),
1150 ActionNode::EndSubmatch(on_failure))));
1129 } else { 1151 } else {
1130 RegExpNode* failed = ActionNode::EscapeSubmatch(on_success); 1152 // begin submatch scope
1131 RegExpNode* succeeded = ActionNode::EndSubmatch(on_failure); 1153 // try
1132 RegExpNode* body_node = compiler->Compile(body(), succeeded, failed); 1154 // first if (body)
1133 return ActionNode::BeginSubmatch(body_node); 1155 // then
1156 // escape submatch scope
1157 // fail
1158 // else
1159 // backtrack
1160 // second
1161 // end submatch scope
1162 // succeed
1163 ChoiceNode* try_node =
1164 new ChoiceNode(1, ActionNode::EndSubmatch(on_success));
1165 RegExpNode* body_node = compiler->Compile(body(),
1166 ActionNode::EscapeSubmatch(on_failure),
1167 EndNode::GetBacktrack());
1168 GuardedAlternative body_alt(body_node);
1169 try_node->AddChild(body_alt);
1170 return ActionNode::BeginSubmatch(try_node);
1134 } 1171 }
1135 } 1172 }
1136 1173
1137 1174
1138 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, 1175 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,
1139 RegExpNode* on_success, 1176 RegExpNode* on_success,
1140 RegExpNode* on_failure) { 1177 RegExpNode* on_failure) {
1141 int start_reg = RegExpCapture::StartRegister(index()); 1178 int start_reg = RegExpCapture::StartRegister(index());
1142 int end_reg = RegExpCapture::EndRegister(index()); 1179 int end_reg = RegExpCapture::EndRegister(index());
1143 RegExpNode* store_end = ActionNode::StorePosition(end_reg, on_success); 1180 RegExpNode* store_end = ActionNode::StorePosition(end_reg, on_success);
(...skipping 232 matching lines...) Expand 10 before | Expand all | Expand 10 after
1376 RegExpNode* RegExpEngine::Compile(RegExpParseResult* input) { 1413 RegExpNode* RegExpEngine::Compile(RegExpParseResult* input) {
1377 RegExpCompiler compiler(input->capture_count); 1414 RegExpCompiler compiler(input->capture_count);
1378 RegExpNode* node = compiler.Compile(input->tree, 1415 RegExpNode* node = compiler.Compile(input->tree,
1379 EndNode::GetAccept(), 1416 EndNode::GetAccept(),
1380 EndNode::GetBacktrack()); 1417 EndNode::GetBacktrack());
1381 return node; 1418 return node;
1382 } 1419 }
1383 1420
1384 1421
1385 }} // namespace v8::internal 1422 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « no previous file | regexp2000/src/parser.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698