Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: src/jsregexp.cc

Issue 12900: Irregexp:... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: '' Created 12 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 224 matching lines...) Expand 10 before | Expand all | Expand 10 after
235 result = AtomCompile(re, pattern, flags, atom_string); 235 result = AtomCompile(re, pattern, flags, atom_string);
236 } else { 236 } else {
237 result = AtomCompile(re, pattern, flags, pattern); 237 result = AtomCompile(re, pattern, flags, pattern);
238 } 238 }
239 } else { 239 } else {
240 RegExpNode* node = NULL; 240 RegExpNode* node = NULL;
241 Handle<FixedArray> irregexp_data = 241 Handle<FixedArray> irregexp_data =
242 RegExpEngine::Compile(&parse_result, 242 RegExpEngine::Compile(&parse_result,
243 &node, 243 &node,
244 flags.is_ignore_case(), 244 flags.is_ignore_case(),
245 flags.is_multiline()); 245 flags.is_multiline(),
246 pattern);
246 if (irregexp_data.is_null()) { 247 if (irregexp_data.is_null()) {
247 if (FLAG_disable_jscre) { 248 if (FLAG_disable_jscre) {
248 UNIMPLEMENTED(); 249 UNIMPLEMENTED();
249 } 250 }
250 result = JscrePrepare(re, pattern, flags); 251 result = JscrePrepare(re, pattern, flags);
251 } else { 252 } else {
252 result = IrregexpPrepare(re, pattern, flags, irregexp_data); 253 result = IrregexpPrepare(re, pattern, flags, irregexp_data);
253 } 254 }
254 } 255 }
255 Object* data = re->data(); 256 Object* data = re->data();
(...skipping 595 matching lines...) Expand 10 before | Expand all | Expand 10 after
851 852
852 Handle<ByteArray> RegExpImpl::IrregexpCode(Handle<JSRegExp> re) { 853 Handle<ByteArray> RegExpImpl::IrregexpCode(Handle<JSRegExp> re) {
853 FixedArray* value = 854 FixedArray* value =
854 FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex)); 855 FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex));
855 return Handle<ByteArray>(ByteArray::cast(value->get(kIrregexpCodeIndex))); 856 return Handle<ByteArray>(ByteArray::cast(value->get(kIrregexpCodeIndex)));
856 } 857 }
857 858
858 859
859 // ------------------------------------------------------------------- 860 // -------------------------------------------------------------------
860 // Implmentation of the Irregexp regular expression engine. 861 // Implmentation of the Irregexp regular expression engine.
862 //
863 // The Irregexp regular expression engine is intended to be a complete
864 // implementation of ECMAScript regular expressions. It generates either
865 // bytecodes or native code.
866
867 // The Irregexp regexp engine is structured in three steps.
868 // 1) The parser generates an abstract syntax tree. See ast.cc.
869 // 2) From the AST a node network is created. The nodes are all
870 // subclasses of RegExpNode. The nodes represent states when
871 // executing a regular expression. Several optimizations are
872 // performed on the node network.
873 // 3) From the nodes we generate either byte codes or native code
874 // that can actually execute the regular expression (perform
875 // the search). The code generation step is described in more
876 // detail below.
877
878 // Code generation.
879 //
880 // The nodes are divided into four main categories.
881 // * Choice nodes
882 // These represent places where the regular expression can
883 // match in more than one way. For example on entry to an
884 // alternation (foo|bar) or a repetition (*, +, ? or {}).
885 // * Action nodes
886 // These represent places where some action should be
887 // performed. Examples include recording the current position
888 // in the input string to a register (in order to implement
889 // captures) or other actions on register for example in order
890 // to implement the counters needed for {} repetitions.
891 // * Matching nodes
892 // These attempt to match some element part of the input string.
893 // Examples of elements include character classes, plain strings
894 // or back references.
895 // * End nodes
896 // These are used to implement the actions required on finding
897 // a successful match or failing to find a match.
898 //
899 // The code generated (whether as byte codes or native code) maintains
900 // some state as it runs. This consists of the following elements:
901 //
902 // * The capture registers. Used for string captures.
903 // * Other registers. Used for counters etc.
904 // * The current position.
905 // * The stack of backtracking information. Used when a matching node
906 // fails to find a match and needs to try an alternative.
907 //
908 // Conceptual regular expression execution model:
909 //
910 // There is a simple conceptual model of regular expression execution
911 // which will be presented first. The actual code generated is a more
912 // efficient simulation of the simple conceptual model:
913 //
914 // * Choice nodes are implemented as follows:
915 // For each choice except the last {
916 // push current position
917 // push backtrack code location
918 // <generate code to test for choice>
919 // backtrack code location:
920 // pop current position
921 // }
922 // <generate code to test for last choice>
923 //
924 // * Actions nodes are generated as follows
925 // <push affected registers on backtrack stack>
926 // <generate code to perform action>
927 // push backtrack code location
928 // <generate code to test for following nodes>
929 // backtrack code location:
930 // <pop affected registers to restore their state>
931 // <pop backtrack location from stack and go to it>
932 //
933 // * Matching nodes are generated as follows:
934 // if input string matches at current position
935 // update current position
936 // <generate code to test for following nodes>
937 // else
938 // <pop backtrack location from stack and go to it>
939 //
940 // Thus it can be seen that the current position is saved and restored
941 // by the choice nodes, whereas the registers are saved and restored by
942 // by the action nodes that manipulate them.
943 //
944 // The other interesting aspect of this model is that nodes are generated
945 // at the point where they are needed by a recursive call to Emit(). If
946 // the node has already been code generated then the Emit() call will
947 // generate a jump to the previously generated code instead. In order to
948 // limit recursion it is possible for the Emit() function to put the node
949 // on a work list for later generation and instead generate a jump. The
950 // destination of the jump is resolved later when the code is generated.
951 //
952 // Actual regular expression code generation.
953 //
954 // Code generation is actually more complicated than the above. In order
955 // to improve the efficiency of the generated code some optimizations are
956 // performed
957 //
958 // * Choice nodes have 1-character lookahead.
959 // A choice node looks at the following character and eliminates some of
960 // the choices immediately based on that character. This is not yet
961 // implemented.
962 // * Simple greedy loops store reduced backtracking information.
963 // A quantifier like /.*foo/m will greedily match the whole input. It will
964 // then need to backtrack to a point where it can match "foo". The naive
965 // implementation of this would push each character position onto the
966 // backtracking stack, then pop them off one by one. This would use space
967 // proportional to the length of the input string. However since the "."
968 // can only match in one way and always has a constant length (in this case
969 // of 1) it suffices to store the current position on the top of the stack
970 // once. Matching now becomes merely incrementing the current position and
971 // backtracking becomes decrementing the current position and checking the
972 // result against the stored current position. This is faster and saves
973 // space.
974 // * The current state is virtualized.
975 // This is used to defer expensive operations until it is clear that they
976 // are needed and to generate code for a node more than once, allowing
977 // specialized an efficient versions of the code to be created. This is
978 // explained in the section below.
979 //
980 // Execution state virtualization.
981 //
982 // Instead of emitting code, nodes that manipulate the state can record their
983 // manipulation in an object called the GenerationVariant. The
984 // GenerationVariant object can record a current position offset, an
985 // optional backtrack code location on the top of the virtualized backtrack
986 // stack and some register changes. When a node is to be emitted it can flush
987 // the GenerationVariant or update it. Flushing the GenerationVariant
988 // will emit code to bring the actual state into line with the virtual state.
989 // Avoiding flushing the state can postpone some work (eg updates of capture
990 // registers). Postponing work can save time when executing the regular
991 // expression since it may be found that the work never has to be done as a
992 // failure to match can occur. In addition it is much faster to jump to a
993 // known backtrack code location than it is to pop an unknown backtrack
994 // location from the stack and jump there.
995 //
996 // The virtual state found in the GenerationVariant affects code generation.
997 // For example the virtual state contains the difference between the actual
998 // current position and the virtual current position, and matching code needs
999 // to use this offset to attempt a match in the correct location of the input
1000 // string. Therefore code generated for a non-trivial GenerationVariant is
1001 // specialized to that GenerationVariant. The code generator therefore
1002 // has the ability to generate code for each node several times. In order to
1003 // limit the size of the generated code there is an arbitrary limit on how
1004 // many specialized sets of code may be generated for a given node. If the
1005 // limit is reached, the GenerationVariant is flushed and a generic version of
1006 // the code for a node is emitted. This is subsequently used for that node.
1007 // The code emitted for non-generic GenerationVariants is not recorded in the
1008 // node and so it cannot currently be reused in the event that code generation
1009 // is requested for an identical GenerationVariant.
Christian Plesner Hansen 2008/12/08 08:54:19 Most excellent!
861 1010
862 1011
863 void RegExpTree::AppendToText(RegExpText* text) { 1012 void RegExpTree::AppendToText(RegExpText* text) {
864 UNREACHABLE(); 1013 UNREACHABLE();
865 } 1014 }
866 1015
867 1016
868 void RegExpAtom::AppendToText(RegExpText* text) { 1017 void RegExpAtom::AppendToText(RegExpText* text) {
869 text->AddElement(TextElement::Atom(this)); 1018 text->AddElement(TextElement::Atom(this));
870 } 1019 }
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
907 1056
908 1057
909 class RegExpCompiler { 1058 class RegExpCompiler {
910 public: 1059 public:
911 RegExpCompiler(int capture_count, bool ignore_case); 1060 RegExpCompiler(int capture_count, bool ignore_case);
912 1061
913 int AllocateRegister() { return next_register_++; } 1062 int AllocateRegister() { return next_register_++; }
914 1063
915 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler, 1064 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler,
916 RegExpNode* start, 1065 RegExpNode* start,
917 int capture_count); 1066 int capture_count,
1067 Handle<String> pattern);
918 1068
919 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } 1069 inline void AddWork(RegExpNode* node) { work_list_->Add(node); }
920 1070
921 static const int kImplementationOffset = 0; 1071 static const int kImplementationOffset = 0;
922 static const int kNumberOfRegistersOffset = 0; 1072 static const int kNumberOfRegistersOffset = 0;
923 static const int kCodeOffset = 1; 1073 static const int kCodeOffset = 1;
924 1074
925 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } 1075 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }
926 EndNode* accept() { return accept_; } 1076 EndNode* accept() { return accept_; }
927 EndNode* backtrack() { return backtrack_; }
928 1077
929 static const int kMaxRecursion = 100; 1078 static const int kMaxRecursion = 100;
930 inline int recursion_depth() { return recursion_depth_; } 1079 inline int recursion_depth() { return recursion_depth_; }
931 inline void IncrementRecursionDepth() { recursion_depth_++; } 1080 inline void IncrementRecursionDepth() { recursion_depth_++; }
932 inline void DecrementRecursionDepth() { recursion_depth_--; } 1081 inline void DecrementRecursionDepth() { recursion_depth_--; }
933 1082
934 inline bool ignore_case() { return ignore_case_; } 1083 inline bool ignore_case() { return ignore_case_; }
935 1084
936 private: 1085 private:
937 EndNode* accept_; 1086 EndNode* accept_;
938 EndNode* backtrack_;
939 int next_register_; 1087 int next_register_;
940 List<RegExpNode*>* work_list_; 1088 List<RegExpNode*>* work_list_;
941 int recursion_depth_; 1089 int recursion_depth_;
942 RegExpMacroAssembler* macro_assembler_; 1090 RegExpMacroAssembler* macro_assembler_;
943 bool ignore_case_; 1091 bool ignore_case_;
944 }; 1092 };
945 1093
946 1094
1095 class RecursionCheck {
1096 public:
1097 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
1098 compiler->IncrementRecursionDepth();
1099 }
1100 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); }
1101 private:
1102 RegExpCompiler* compiler_;
1103 };
1104
1105
947 // Attempts to compile the regexp using an Irregexp code generator. Returns 1106 // Attempts to compile the regexp using an Irregexp code generator. Returns
948 // a fixed array or a null handle depending on whether it succeeded. 1107 // a fixed array or a null handle depending on whether it succeeded.
949 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case) 1108 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case)
950 : next_register_(2 * (capture_count + 1)), 1109 : next_register_(2 * (capture_count + 1)),
951 work_list_(NULL), 1110 work_list_(NULL),
952 recursion_depth_(0), 1111 recursion_depth_(0),
953 ignore_case_(ignore_case) { 1112 ignore_case_(ignore_case) {
954 accept_ = new EndNode(EndNode::ACCEPT); 1113 accept_ = new EndNode(EndNode::ACCEPT);
955 backtrack_ = new EndNode(EndNode::BACKTRACK);
956 } 1114 }
957 1115
958 1116
959 Handle<FixedArray> RegExpCompiler::Assemble( 1117 Handle<FixedArray> RegExpCompiler::Assemble(
960 RegExpMacroAssembler* macro_assembler, 1118 RegExpMacroAssembler* macro_assembler,
961 RegExpNode* start, 1119 RegExpNode* start,
962 int capture_count) { 1120 int capture_count,
1121 Handle<String> pattern) {
963 #ifdef DEBUG 1122 #ifdef DEBUG
964 if (FLAG_trace_regexp_assembler) 1123 if (FLAG_trace_regexp_assembler)
965 macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler); 1124 macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler);
966 else 1125 else
967 #endif 1126 #endif
968 macro_assembler_ = macro_assembler; 1127 macro_assembler_ = macro_assembler;
969 List <RegExpNode*> work_list(0); 1128 List <RegExpNode*> work_list(0);
970 work_list_ = &work_list; 1129 work_list_ = &work_list;
971 Label fail; 1130 Label fail;
972 macro_assembler_->PushBacktrack(&fail); 1131 macro_assembler->PushBacktrack(&fail);
973 if (!start->GoTo(this)) { 1132 GenerationVariant generic_variant;
1133 if (!start->Emit(this, &generic_variant)) {
974 fail.Unuse(); 1134 fail.Unuse();
975 return Handle<FixedArray>::null(); 1135 return Handle<FixedArray>::null();
976 } 1136 }
1137 macro_assembler_->Bind(&fail);
1138 macro_assembler_->Fail();
977 while (!work_list.is_empty()) { 1139 while (!work_list.is_empty()) {
978 if (!work_list.RemoveLast()->GoTo(this)) { 1140 if (!work_list.RemoveLast()->Emit(this, &generic_variant)) {
979 fail.Unuse();
980 return Handle<FixedArray>::null(); 1141 return Handle<FixedArray>::null();
981 } 1142 }
982 } 1143 }
983 macro_assembler_->Bind(&fail);
984 macro_assembler_->Fail();
985 Handle<FixedArray> array = 1144 Handle<FixedArray> array =
986 Factory::NewFixedArray(RegExpImpl::kIrregexpDataLength); 1145 Factory::NewFixedArray(RegExpImpl::kIrregexpDataLength);
987 array->set(RegExpImpl::kIrregexpImplementationIndex, 1146 array->set(RegExpImpl::kIrregexpImplementationIndex,
988 Smi::FromInt(macro_assembler_->Implementation())); 1147 Smi::FromInt(macro_assembler_->Implementation()));
989 array->set(RegExpImpl::kIrregexpNumberOfRegistersIndex, 1148 array->set(RegExpImpl::kIrregexpNumberOfRegistersIndex,
990 Smi::FromInt(next_register_)); 1149 Smi::FromInt(next_register_));
991 array->set(RegExpImpl::kIrregexpNumberOfCapturesIndex, 1150 array->set(RegExpImpl::kIrregexpNumberOfCapturesIndex,
992 Smi::FromInt(capture_count)); 1151 Smi::FromInt(capture_count));
993 Handle<Object> code = macro_assembler_->GetCode(); 1152 Handle<Object> code = macro_assembler_->GetCode(pattern);
994 array->set(RegExpImpl::kIrregexpCodeIndex, *code); 1153 array->set(RegExpImpl::kIrregexpCodeIndex, *code);
995 work_list_ = NULL; 1154 work_list_ = NULL;
996 #ifdef DEBUG 1155 #ifdef DEBUG
997 if (FLAG_trace_regexp_assembler) { 1156 if (FLAG_trace_regexp_assembler) {
998 delete macro_assembler_; 1157 delete macro_assembler_;
999 } 1158 }
1000 #endif 1159 #endif
1001 return array; 1160 return array;
1002 } 1161 }
1003 1162
1004 1163
1005 bool RegExpNode::GoTo(RegExpCompiler* compiler) { 1164 bool GenerationVariant::mentions_reg(int reg) {
1006 // TODO(erikcorry): Implement support. 1165 for (DeferredAction* action = actions_;
1007 if (info_.follows_word_interest || 1166 action != NULL;
1008 info_.follows_newline_interest || 1167 action = action->next()) {
1009 info_.follows_start_interest) { 1168 if (reg == action->reg()) return true;
1010 return false; 1169 }
1011 } 1170 return false;
1012 if (label_.is_bound()) { 1171 }
1013 compiler->macro_assembler()->GoTo(&label_); 1172
1014 return true; 1173
1174 int GenerationVariant::FindAffectedRegisters(OutSet* affected_registers) {
1175 int max_register = -1;
1176 for (DeferredAction* action = actions_;
1177 action != NULL;
1178 action = action->next()) {
1179 affected_registers->Set(action->reg());
1180 if (action->reg() > max_register) max_register = action->reg();
1181 }
1182 return max_register;
1183 }
1184
1185
1186 void GenerationVariant::PushAffectedRegisters(RegExpMacroAssembler* macro,
1187 int max_register,
1188 OutSet& affected_registers) {
1189 for (int reg = 0; reg <= max_register; reg++) {
1190 if (affected_registers.Get(reg)) macro->PushRegister(reg);
1191 }
1192 }
1193
1194
1195 void GenerationVariant::RestoreAffectedRegisters(RegExpMacroAssembler* macro,
1196 int max_register,
1197 OutSet& affected_registers) {
1198 for (int reg = max_register; reg >= 0; reg--) {
1199 if (affected_registers.Get(reg)) macro->PopRegister(reg);
1200 }
1201 }
1202
1203
1204 void GenerationVariant::PerformDeferredActions(RegExpMacroAssembler* macro,
1205 int max_register,
Christian Plesner Hansen 2008/12/08 08:54:19 Indentation off.
1206 OutSet& affected_registers) {
1207 for (int reg = 0; reg <= max_register; reg++) {
1208 if (!affected_registers.Get(reg)) {
1209 continue;
1210 }
1211 int value = 0;
1212 bool absolute = false;
1213 int store_position = -1;
1214 // This is a little tricky because we are scanning the actions in reverse
1215 // historical order (newest first).
1216 for (DeferredAction* action = actions_;
1217 action != NULL;
1218 action = action->next()) {
1219 if (action->reg() == reg) {
1220 switch (action->type()) {
1221 case ActionNode::SET_REGISTER: {
1222 GenerationVariant::DeferredSetRegister* psr =
1223 static_cast<GenerationVariant::DeferredSetRegister*>(action);
1224 value += psr->value();
1225 absolute = true;
1226 ASSERT_EQ(store_position, -1);
1227 break;
1228 }
1229 case ActionNode::INCREMENT_REGISTER:
1230 if (!absolute) {
1231 value++;
1232 }
1233 ASSERT_EQ(store_position, -1);
1234 break;
1235 case ActionNode::STORE_POSITION: {
1236 GenerationVariant::DeferredCapture* pc =
1237 static_cast<GenerationVariant::DeferredCapture*>(action);
1238 if (store_position == -1) {
1239 store_position = pc->cp_offset();
1240 }
1241 ASSERT(!absolute);
1242 ASSERT_EQ(value, 0);
1243 break;
1244 }
1245 default:
1246 UNREACHABLE();
1247 break;
1248 }
1249 }
1250 }
1251 if (store_position != -1) {
1252 macro->WriteCurrentPositionToRegister(reg, store_position);
1253 } else {
1254 if (absolute) {
1255 macro->SetRegister(reg, value);
1256 } else {
1257 if (value != 0) {
1258 macro->AdvanceRegister(reg, value);
1259 }
1260 }
1261 }
1262 }
1263 }
1264
1265
1266 // This is called as we come into a loop choice node and some other tricky
1267 // nodes. It normalises the state of the code generator to ensure we can
1268 // generate generic code.
1269 bool GenerationVariant::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
1270 RegExpMacroAssembler* macro = compiler->macro_assembler();
1271
1272 ASSERT(actions_ != NULL || cp_offset_ != 0 || backtrack() != NULL);
1273
1274 if (actions_ == NULL && backtrack() == NULL) {
1275 // Here we just have some deferred cp advances to fix and we are back to
1276 // a normal situation.
1277 macro->AdvanceCurrentPosition(cp_offset_);
1278 // Create a new trivial state and generate the node with that.
1279 GenerationVariant new_state;
1280 return successor->Emit(compiler, &new_state);
1281 }
1282
1283 // Generate deferred actions here along with code to undo them again.
1284 OutSet affected_registers;
1285 int max_register = FindAffectedRegisters(&affected_registers);
1286 PushAffectedRegisters(macro, max_register, affected_registers);
1287 PerformDeferredActions(macro, max_register, affected_registers);
1288 if (backtrack() != NULL) {
1289 // Here we have a concrete backtrack location. These are set up by choice
1290 // nodes and so they indicate that we have a deferred save of the current
1291 // position which we may need to emit here.
1292 macro->PushCurrentPosition();
1293 }
1294 if (cp_offset_ != 0) {
1295 macro->AdvanceCurrentPosition(cp_offset_);
1296 }
1297
1298 // Create a new trivial state and generate the node with that.
Christian Plesner Hansen 2008/12/08 08:54:19 trivial -> empty?
1299 Label undo;
1300 macro->PushBacktrack(&undo);
1301 GenerationVariant new_state;
1302 bool ok = successor->Emit(compiler, &new_state);
1303
1304 // On backtrack we need to restore state.
1305 macro->Bind(&undo);
1306 if (!ok) return false;
1307 if (backtrack() != NULL) {
1308 macro->PopCurrentPosition();
1309 }
1310 RestoreAffectedRegisters(macro, max_register, affected_registers);
1311 if (backtrack() == NULL) {
1312 macro->Backtrack();
1015 } else { 1313 } else {
1016 if (compiler->recursion_depth() > RegExpCompiler::kMaxRecursion) { 1314 macro->GoTo(backtrack());
1017 compiler->macro_assembler()->GoTo(&label_); 1315 }
1018 compiler->AddWork(this); 1316
1019 return true; 1317 return true;
1020 } else { 1318 }
1021 compiler->IncrementRecursionDepth(); 1319
1022 bool how_it_went = Emit(compiler); 1320
1023 compiler->DecrementRecursionDepth(); 1321 void EndNode::EmitInfoChecks(RegExpMacroAssembler* macro,
1024 return how_it_went; 1322 GenerationVariant* variant) {
1025 } 1323 if (info()->at_end) {
1026 } 1324 Label succeed;
1027 } 1325 // LoadCurrentCharacter will go to the label if we are at the end of the
1028 1326 // input string.
1029 1327 macro->LoadCurrentCharacter(0, &succeed);
1030 // EndNodes are special. Because they can be very common and they are very 1328 macro->GoTo(variant->backtrack());
1031 // short we normally inline them. That is, if we are asked to emit a GoTo 1329 macro->Bind(&succeed);
1032 // we just emit the entire node. Since they don't have successors this 1330 }
1033 // works. 1331 }
1034 bool EndNode::GoTo(RegExpCompiler* compiler) { 1332
1035 if (info()->follows_word_interest || 1333
1036 info()->follows_newline_interest || 1334 bool NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler,
1037 info()->follows_start_interest) { 1335 GenerationVariant* variant) {
1038 return false; 1336 if (!variant->is_trivial()) {
1039 } 1337 return variant->Flush(compiler, this);
1040 return Emit(compiler); 1338 }
1041 }
1042
1043
1044 Label* RegExpNode::label() {
1045 return &label_;
1046 }
1047
1048
1049 bool EndNode::Emit(RegExpCompiler* compiler) {
1050 RegExpMacroAssembler* macro = compiler->macro_assembler(); 1339 RegExpMacroAssembler* macro = compiler->macro_assembler();
1340 if (!label()->is_bound()) {
1341 macro->Bind(label());
1342 }
1343 EmitInfoChecks(macro, variant);
1344 macro->ReadCurrentPositionFromRegister(current_position_register_);
1345 macro->ReadStackPointerFromRegister(stack_pointer_register_);
1346 // Now that we have unwound the stack we find at the top of the stack the
1347 // backtrack that the BeginSubmatch node got.
1348 macro->Backtrack();
1349 return true;
1350 }
1351
1352
1353 bool EndNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
1354 if (!variant->is_trivial()) {
1355 return variant->Flush(compiler, this);
1356 }
1357 RegExpMacroAssembler* macro = compiler->macro_assembler();
1358 if (!label()->is_bound()) {
1359 macro->Bind(label());
1360 }
1051 switch (action_) { 1361 switch (action_) {
1052 case ACCEPT: 1362 case ACCEPT:
1053 if (!label()->is_bound()) Bind(macro); 1363 EmitInfoChecks(macro, variant);
1054 if (info()->at_end) {
1055 Label succeed;
1056 // LoadCurrentCharacter will go to the label if we are at the end of the
1057 // input string.
1058 macro->LoadCurrentCharacter(0, &succeed);
1059 macro->Backtrack();
1060 macro->Bind(&succeed);
1061 }
1062 macro->Succeed(); 1364 macro->Succeed();
1063 return true; 1365 return true;
1064 case BACKTRACK: 1366 case BACKTRACK:
1065 if (!label()->is_bound()) Bind(macro);
1066 ASSERT(!info()->at_end); 1367 ASSERT(!info()->at_end);
1067 macro->Backtrack(); 1368 macro->GoTo(variant->backtrack());
1068 return true; 1369 return true;
1069 } 1370 case NEGATIVE_SUBMATCH_SUCCESS:
1371 // This case is handled in a different virtual method.
1372 UNREACHABLE();
1373 }
1374 UNIMPLEMENTED();
1070 return false; 1375 return false;
1071 } 1376 }
1072 1377
1073 1378
1074 void GuardedAlternative::AddGuard(Guard* guard) { 1379 void GuardedAlternative::AddGuard(Guard* guard) {
1075 if (guards_ == NULL) 1380 if (guards_ == NULL)
1076 guards_ = new ZoneList<Guard*>(1); 1381 guards_ = new ZoneList<Guard*>(1);
1077 guards_->Add(guard); 1382 guards_->Add(guard);
1078 } 1383 }
1079 1384
1080 1385
1081 ActionNode* ActionNode::StoreRegister(int reg, 1386 ActionNode* ActionNode::SetRegister(int reg,
1082 int val, 1387 int val,
1083 RegExpNode* on_success) { 1388 RegExpNode* on_success) {
1084 ActionNode* result = new ActionNode(STORE_REGISTER, on_success); 1389 ActionNode* result = new ActionNode(SET_REGISTER, on_success);
1085 result->data_.u_store_register.reg = reg; 1390 result->data_.u_store_register.reg = reg;
1086 result->data_.u_store_register.value = val; 1391 result->data_.u_store_register.value = val;
1087 return result; 1392 return result;
1088 } 1393 }
1089 1394
1090 1395
1091 ActionNode* ActionNode::IncrementRegister(int reg, RegExpNode* on_success) { 1396 ActionNode* ActionNode::IncrementRegister(int reg, RegExpNode* on_success) {
1092 ActionNode* result = new ActionNode(INCREMENT_REGISTER, on_success); 1397 ActionNode* result = new ActionNode(INCREMENT_REGISTER, on_success);
1093 result->data_.u_increment_register.reg = reg; 1398 result->data_.u_increment_register.reg = reg;
1094 return result; 1399 return result;
1095 } 1400 }
1096 1401
1097 1402
1098 ActionNode* ActionNode::StorePosition(int reg, RegExpNode* on_success) { 1403 ActionNode* ActionNode::StorePosition(int reg, RegExpNode* on_success) {
1099 ActionNode* result = new ActionNode(STORE_POSITION, on_success); 1404 ActionNode* result = new ActionNode(STORE_POSITION, on_success);
1100 result->data_.u_position_register.reg = reg; 1405 result->data_.u_position_register.reg = reg;
1101 return result; 1406 return result;
1102 } 1407 }
1103 1408
1104 1409
1105 ActionNode* ActionNode::RestorePosition(int reg, RegExpNode* on_success) {
1106 ActionNode* result = new ActionNode(RESTORE_POSITION, on_success);
1107 result->data_.u_position_register.reg = reg;
1108 return result;
1109 }
1110
1111
1112 ActionNode* ActionNode::BeginSubmatch(int stack_reg, 1410 ActionNode* ActionNode::BeginSubmatch(int stack_reg,
1113 int position_reg, 1411 int position_reg,
1114 RegExpNode* on_success) { 1412 RegExpNode* on_success) {
1115 ActionNode* result = new ActionNode(BEGIN_SUBMATCH, on_success); 1413 ActionNode* result = new ActionNode(BEGIN_SUBMATCH, on_success);
1116 result->data_.u_submatch.stack_pointer_register = stack_reg; 1414 result->data_.u_submatch.stack_pointer_register = stack_reg;
1117 result->data_.u_submatch.current_position_register = position_reg; 1415 result->data_.u_submatch.current_position_register = position_reg;
1118 return result; 1416 return result;
1119 } 1417 }
1120 1418
1121 1419
1122 ActionNode* ActionNode::EscapeSubmatch(int stack_reg, 1420 ActionNode* ActionNode::PositiveSubmatchSuccess(int stack_reg,
1123 bool restore_position, 1421 int position_reg,
1124 int position_reg, 1422 RegExpNode* on_success) {
1125 RegExpNode* on_success) { 1423 ActionNode* result = new ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success);
1126 ActionNode* result = new ActionNode(ESCAPE_SUBMATCH, on_success);
1127 result->data_.u_submatch.stack_pointer_register = stack_reg; 1424 result->data_.u_submatch.stack_pointer_register = stack_reg;
1128 if (restore_position) { 1425 result->data_.u_submatch.current_position_register = position_reg;
1129 result->data_.u_submatch.current_position_register = position_reg;
1130 } else {
1131 result->data_.u_submatch.current_position_register = -1;
1132 }
1133 return result; 1426 return result;
1134 } 1427 }
1135 1428
1136 1429
1137 #define DEFINE_ACCEPT(Type) \ 1430 #define DEFINE_ACCEPT(Type) \
1138 void Type##Node::Accept(NodeVisitor* visitor) { \ 1431 void Type##Node::Accept(NodeVisitor* visitor) { \
1139 visitor->Visit##Type(this); \ 1432 visitor->Visit##Type(this); \
1140 } 1433 }
1141 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT) 1434 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT)
1142 #undef DEFINE_ACCEPT 1435 #undef DEFINE_ACCEPT
1143 1436
1144 1437
1145 // ------------------------------------------------------------------- 1438 // -------------------------------------------------------------------
1146 // Emit code. 1439 // Emit code.
1147 1440
1148 1441
1149 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler, 1442 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,
1150 Guard* guard, 1443 Guard* guard,
Christian Plesner Hansen 2008/12/08 08:54:19 Indentation.
1151 Label* on_failure) { 1444 GenerationVariant* variant) {
1152 switch (guard->op()) { 1445 switch (guard->op()) {
1153 case Guard::LT: 1446 case Guard::LT:
1154 macro_assembler->IfRegisterGE(guard->reg(), guard->value(), on_failure); 1447 ASSERT(!variant->mentions_reg(guard->reg()));
1448 macro_assembler->IfRegisterGE(guard->reg(),
1449 guard->value(),
1450 variant->backtrack());
1155 break; 1451 break;
1156 case Guard::GEQ: 1452 case Guard::GEQ:
1157 macro_assembler->IfRegisterLT(guard->reg(), guard->value(), on_failure); 1453 ASSERT(!variant->mentions_reg(guard->reg()));
1454 macro_assembler->IfRegisterLT(guard->reg(),
1455 guard->value(),
1456 variant->backtrack());
1158 break; 1457 break;
1159 } 1458 }
1160 } 1459 }
1161 1460
1162 1461
1163 static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize; 1462 static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize;
1164 static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange; 1463 static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange;
1165 1464
1166 1465
1167 static inline void EmitAtomNonLetters( 1466 static inline void EmitAtomNonLetters(
1168 RegExpMacroAssembler* macro_assembler, 1467 RegExpMacroAssembler* macro_assembler,
1169 TextElement elm, 1468 TextElement elm,
1170 Vector<const uc16> quarks, 1469 Vector<const uc16> quarks,
1171 Label* on_failure, 1470 Label* on_failure,
1172 int cp_offset) { 1471 int cp_offset,
1472 bool check_offset) {
1173 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1473 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1474 // It is vital that this loop is backwards due to the unchecked character
1475 // load below.
1174 for (int i = quarks.length() - 1; i >= 0; i--) { 1476 for (int i = quarks.length() - 1; i >= 0; i--) {
1175 uc16 c = quarks[i]; 1477 uc16 c = quarks[i];
1176 int length = uncanonicalize.get(c, '\0', chars); 1478 int length = uncanonicalize.get(c, '\0', chars);
1177 if (length <= 1) { 1479 if (length <= 1) {
1178 macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure); 1480 if (check_offset && i == quarks.length() - 1) {
1481 macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure);
1482 } else {
1483 // Here we don't need to check against the end of the input string
1484 // since this character lies before a character that matched.
1485 macro_assembler->LoadCurrentCharacterUnchecked(cp_offset + i);
1486 }
1179 macro_assembler->CheckNotCharacter(c, on_failure); 1487 macro_assembler->CheckNotCharacter(c, on_failure);
1180 } 1488 }
1181 } 1489 }
1182 } 1490 }
1183 1491
1184 1492
1185 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, 1493 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
1186 uc16 c1, 1494 uc16 c1,
1187 uc16 c2, 1495 uc16 c2,
1188 Label* on_failure) { 1496 Label* on_failure) {
(...skipping 20 matching lines...) Expand all
1209 } 1517 }
1210 return false; 1518 return false;
1211 } 1519 }
1212 1520
1213 1521
1214 static inline void EmitAtomLetters( 1522 static inline void EmitAtomLetters(
1215 RegExpMacroAssembler* macro_assembler, 1523 RegExpMacroAssembler* macro_assembler,
1216 TextElement elm, 1524 TextElement elm,
1217 Vector<const uc16> quarks, 1525 Vector<const uc16> quarks,
1218 Label* on_failure, 1526 Label* on_failure,
1219 int cp_offset) { 1527 int cp_offset,
1528 bool check_offset) {
1220 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1529 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1530 // It is vital that this loop is backwards due to the unchecked character
1531 // load below.
1221 for (int i = quarks.length() - 1; i >= 0; i--) { 1532 for (int i = quarks.length() - 1; i >= 0; i--) {
1222 uc16 c = quarks[i]; 1533 uc16 c = quarks[i];
1223 int length = uncanonicalize.get(c, '\0', chars); 1534 int length = uncanonicalize.get(c, '\0', chars);
1224 if (length <= 1) continue; 1535 if (length <= 1) continue;
1225 macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure); 1536 if (check_offset && i == quarks.length() - 1) {
1537 macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure);
1538 } else {
1539 // Here we don't need to check against the end of the input string
1540 // since this character lies before a character that matched.
1541 macro_assembler->LoadCurrentCharacterUnchecked(cp_offset + i);
1542 }
1226 Label ok; 1543 Label ok;
1227 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); 1544 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);
1228 switch (length) { 1545 switch (length) {
1229 case 2: { 1546 case 2: {
1230 if (ShortCutEmitCharacterPair(macro_assembler, 1547 if (ShortCutEmitCharacterPair(macro_assembler,
1231 chars[0], 1548 chars[0],
1232 chars[1], 1549 chars[1],
1233 on_failure)) { 1550 on_failure)) {
1234 ok.Unuse(); 1551 ok.Unuse();
1235 } else { 1552 } else {
(...skipping 16 matching lines...) Expand all
1252 UNREACHABLE(); 1569 UNREACHABLE();
1253 break; 1570 break;
1254 } 1571 }
1255 } 1572 }
1256 } 1573 }
1257 1574
1258 1575
1259 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, 1576 static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
1260 RegExpCharacterClass* cc, 1577 RegExpCharacterClass* cc,
1261 int cp_offset, 1578 int cp_offset,
1262 Label* on_failure) { 1579 Label* on_failure,
1263 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure); 1580 bool check_offset) {
1264 cp_offset++;
1265
1266 ZoneList<CharacterRange>* ranges = cc->ranges(); 1581 ZoneList<CharacterRange>* ranges = cc->ranges();
1267 1582
1268 Label success; 1583 Label success;
1269 1584
1270 Label* char_is_in_class = 1585 Label* char_is_in_class =
1271 cc->is_negated() ? on_failure : &success; 1586 cc->is_negated() ? on_failure : &success;
1272 1587
1273 int range_count = ranges->length(); 1588 int range_count = ranges->length();
1274 1589
1275 if (range_count == 0) { 1590 if (range_count == 0) {
1276 if (!cc->is_negated()) { 1591 if (!cc->is_negated()) {
1277 macro_assembler->GoTo(on_failure); 1592 macro_assembler->GoTo(on_failure);
1278 } 1593 }
1279 return; 1594 return;
1280 } 1595 }
1281 1596
1597 if (range_count == 1 &&
1598 !cc->is_negated() &&
1599 ranges->at(0).from() == 0 &&
Christian Plesner Hansen 2008/12/08 08:54:19 Maybe add an IsEverything method on CharacterRange
1600 ranges->at(0).to() == 0xffff) {
1601 // This is a common case hit by non-anchored expressions.
1602 // TODO(erikcorry): We should have a macro assembler instruction that just
1603 // checks for end of string without loading the character.
1604 if (check_offset) {
1605 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure);
1606 }
1607 return;
1608 }
1609
1610 if (check_offset) {
1611 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure);
1612 } else {
1613 // Here we don't need to check against the end of the input string
1614 // since this character lies before a character that matched.
1615 macro_assembler->LoadCurrentCharacterUnchecked(cp_offset);
1616 }
1617
1282 for (int i = 0; i < range_count - 1; i++) { 1618 for (int i = 0; i < range_count - 1; i++) {
1283 CharacterRange& range = ranges->at(i); 1619 CharacterRange& range = ranges->at(i);
1284 Label next_range; 1620 Label next_range;
1285 uc16 from = range.from(); 1621 uc16 from = range.from();
1286 uc16 to = range.to(); 1622 uc16 to = range.to();
1287 if (to == from) { 1623 if (to == from) {
1288 macro_assembler->CheckCharacter(to, char_is_in_class); 1624 macro_assembler->CheckCharacter(to, char_is_in_class);
1289 } else { 1625 } else {
1290 if (from != 0) { 1626 if (from != 0) {
1291 macro_assembler->CheckCharacterLT(from, &next_range); 1627 macro_assembler->CheckCharacterLT(from, &next_range);
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
1326 } else { 1662 } else {
1327 if (cc->is_negated()) { 1663 if (cc->is_negated()) {
1328 macro_assembler->GoTo(on_failure); 1664 macro_assembler->GoTo(on_failure);
1329 } 1665 }
1330 } 1666 }
1331 } 1667 }
1332 macro_assembler->Bind(&success); 1668 macro_assembler->Bind(&success);
1333 } 1669 }
1334 1670
1335 1671
1672 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
1673 GenerationVariant* variant) {
1674 // TODO(erikcorry): Implement support.
1675 if (info_.follows_word_interest ||
1676 info_.follows_newline_interest ||
1677 info_.follows_start_interest) {
1678 return FAIL;
1679 }
1336 1680
1337 bool TextNode::Emit(RegExpCompiler* compiler) { 1681 // If we are generating a greedy loop then don't stop and don't reuse code.
1682 if (variant->stop_node() != NULL) {
1683 return CONTINUE;
1684 }
1685
1338 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 1686 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1339 Bind(macro_assembler); 1687 if (variant->is_trivial()) {
1688 if (label_.is_bound()) {
1689 // We are being asked to generate a generic version, but that's already
1690 // been done so just go to it.
1691 macro_assembler->GoTo(&label_);
1692 return DONE;
1693 }
1694 if (compiler->recursion_depth() >= RegExpCompiler::kMaxRecursion) {
1695 // To avoid too deep recursion we push the node to the work queue and just
1696 // generate a goto here.
1697 compiler->AddWork(this);
1698 macro_assembler->GoTo(&label_);
1699 return DONE;
1700 }
1701 // Generate generic version of the node and bind the label for later use.
1702 macro_assembler->Bind(&label_);
1703 return CONTINUE;
1704 }
1705
1706 // We are being asked to make a non-generic version. Keep track of how many
1707 // non-generic versions we generate so as not to overdo it.
1708 variants_generated_++;
1709 if (variants_generated_ < kMaxVariantsGenerated &&
1710 compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) {
1711 return CONTINUE;
1712 }
1713
1714 // If we get here there have been too many variants generated or recursion
1715 // is too deep. Time to switch to a generic version. The code for
1716 // generic versions above can handle deep recursion properly.
1717 bool ok = variant->Flush(compiler, this);
1718 return ok ? DONE : FAIL;
1719 }
1720
1721
1722 // This generates the code to match a text node. A text node can contain
1723 // straight character sequences (possibly to be matched in a case-independent
1724 // way) and character classes. In order to be most efficient we test for the
1725 // simple things first and then move on to the more complicated things. The
1726 // simplest thing is a non-letter or a letter if we are matching case. The
1727 // next-most simple thing is a case-independent letter. The least simple is
1728 // a character class. Another optimization is that we test the last one first.
1729 // If that succeeds we don't need to test for the end of the string when we
1730 // load other characters.
1731 bool TextNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
1732 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1733 Label *backtrack = variant->backtrack();
1734 LimitResult limit_result = LimitVersions(compiler, variant);
1735 if (limit_result == FAIL) return false;
1736 if (limit_result == DONE) return true;
1737 ASSERT(limit_result == CONTINUE);
1738
1340 int element_count = elms_->length(); 1739 int element_count = elms_->length();
1341 ASSERT(element_count != 0); 1740 ASSERT(element_count != 0);
1342 int cp_offset = 0;
1343 if (info()->at_end) { 1741 if (info()->at_end) {
1344 macro_assembler->Backtrack(); 1742 macro_assembler->GoTo(backtrack);
1345 return true; 1743 return true;
1346 } 1744 }
1347 // First, handle straight character matches. 1745 // First, handle straight character matches.
1348 for (int i = 0; i < element_count; i++) { 1746 int checked_up_to = -1;
1747 for (int i = element_count - 1; i >= 0; i--) {
1349 TextElement elm = elms_->at(i); 1748 TextElement elm = elms_->at(i);
1749 ASSERT(elm.cp_offset >= 0);
1750 int cp_offset = variant->cp_offset() + elm.cp_offset;
1350 if (elm.type == TextElement::ATOM) { 1751 if (elm.type == TextElement::ATOM) {
1351 Vector<const uc16> quarks = elm.data.u_atom->data(); 1752 Vector<const uc16> quarks = elm.data.u_atom->data();
1753 int last_cp_offset = cp_offset + quarks.length();
1352 if (compiler->ignore_case()) { 1754 if (compiler->ignore_case()) {
1353 EmitAtomNonLetters(macro_assembler, 1755 EmitAtomNonLetters(macro_assembler,
1354 elm, 1756 elm,
1355 quarks, 1757 quarks,
1356 on_failure_->label(), 1758 backtrack,
1357 cp_offset); 1759 cp_offset,
1760 checked_up_to < last_cp_offset);
1358 } else { 1761 } else {
1359 macro_assembler->CheckCharacters(quarks, 1762 macro_assembler->CheckCharacters(quarks,
1360 cp_offset, 1763 cp_offset,
1361 on_failure_->label()); 1764 backtrack,
1765 checked_up_to < last_cp_offset);
1362 } 1766 }
1363 cp_offset += quarks.length(); 1767 if (last_cp_offset > checked_up_to) checked_up_to = last_cp_offset - 1;
1364 } else { 1768 } else {
1365 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); 1769 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS);
1366 cp_offset++;
1367 } 1770 }
1368 } 1771 }
1369 // Second, handle case independent letter matches if any. 1772 // Second, handle case independent letter matches if any.
1370 if (compiler->ignore_case()) { 1773 if (compiler->ignore_case()) {
1371 cp_offset = 0; 1774 for (int i = element_count - 1; i >= 0; i--) {
1372 for (int i = 0; i < element_count; i++) {
1373 TextElement elm = elms_->at(i); 1775 TextElement elm = elms_->at(i);
1776 int cp_offset = variant->cp_offset() + elm.cp_offset;
1374 if (elm.type == TextElement::ATOM) { 1777 if (elm.type == TextElement::ATOM) {
1375 Vector<const uc16> quarks = elm.data.u_atom->data(); 1778 Vector<const uc16> quarks = elm.data.u_atom->data();
1779 int last_cp_offset = cp_offset + quarks.length();
1376 EmitAtomLetters(macro_assembler, 1780 EmitAtomLetters(macro_assembler,
1377 elm, 1781 elm,
1378 quarks, 1782 quarks,
1379 on_failure_->label(), 1783 backtrack,
1380 cp_offset); 1784 cp_offset,
1381 cp_offset += quarks.length(); 1785 checked_up_to < last_cp_offset);
1382 } else { 1786 if (last_cp_offset > checked_up_to) checked_up_to = last_cp_offset - 1;
1383 cp_offset++;
1384 } 1787 }
1385 } 1788 }
1386 } 1789 }
1387 // If the fast character matches passed then do the character classes. 1790 // If the fast character matches passed then do the character classes.
1388 cp_offset = 0; 1791 for (int i = element_count - 1; i >= 0; i--) {
1389 for (int i = 0; i < element_count; i++) {
1390 TextElement elm = elms_->at(i); 1792 TextElement elm = elms_->at(i);
1793 int cp_offset = variant->cp_offset() + elm.cp_offset;
1391 if (elm.type == TextElement::CHAR_CLASS) { 1794 if (elm.type == TextElement::CHAR_CLASS) {
1392 RegExpCharacterClass* cc = elm.data.u_char_class; 1795 RegExpCharacterClass* cc = elm.data.u_char_class;
1393 EmitCharClass(macro_assembler, cc, cp_offset, on_failure_->label()); 1796 EmitCharClass(macro_assembler,
1394 cp_offset++; 1797 cc,
1395 } else { 1798 cp_offset,
1396 cp_offset += elm.data.u_atom->data().length(); 1799 backtrack,
1800 checked_up_to < cp_offset);
1801 if (cp_offset > checked_up_to) checked_up_to = cp_offset;
1397 } 1802 }
1398 } 1803 }
1399 1804
1400 compiler->AddWork(on_failure_); 1805 GenerationVariant new_variant(*variant);
1401 macro_assembler->AdvanceCurrentPosition(cp_offset); 1806 new_variant.set_cp_offset(checked_up_to + 1);
1402 return on_success()->GoTo(compiler); 1807 RecursionCheck rc(compiler);
1808 return on_success()->Emit(compiler, &new_variant);
1403 } 1809 }
1404 1810
1405 1811
1406 void TextNode::MakeCaseIndependent() { 1812 void TextNode::MakeCaseIndependent() {
1407 int element_count = elms_->length(); 1813 int element_count = elms_->length();
1408 for (int i = 0; i < element_count; i++) { 1814 for (int i = 0; i < element_count; i++) {
1409 TextElement elm = elms_->at(i); 1815 TextElement elm = elms_->at(i);
1410 if (elm.type == TextElement::CHAR_CLASS) { 1816 if (elm.type == TextElement::CHAR_CLASS) {
1411 RegExpCharacterClass* cc = elm.data.u_char_class; 1817 RegExpCharacterClass* cc = elm.data.u_char_class;
1412 ZoneList<CharacterRange>* ranges = cc->ranges(); 1818 ZoneList<CharacterRange>* ranges = cc->ranges();
1413 int range_count = ranges->length(); 1819 int range_count = ranges->length();
1414 for (int i = 0; i < range_count; i++) { 1820 for (int i = 0; i < range_count; i++) {
1415 ranges->at(i).AddCaseEquivalents(ranges); 1821 ranges->at(i).AddCaseEquivalents(ranges);
1416 } 1822 }
1417 } 1823 }
1418 } 1824 }
1419 } 1825 }
1420 1826
1421 1827
1422 bool ChoiceNode::Emit(RegExpCompiler* compiler) { 1828 int TextNode::GreedyLoopTextLength() {
1829 TextElement elm = elms_->at(elms_->length() - 1);
1830 if (elm.type == TextElement::CHAR_CLASS) {
1831 return elm.cp_offset + 1;
1832 } else {
1833 return elm.cp_offset + elm.data.u_atom->data().length();
1834 }
1835 }
1836
1837
1838 // Finds the fixed match length of a sequence of nodes that goes from
1839 // this alternative and back to this choice node. If there are variable
1840 // length nodes or other complications in the way then return a sentinel
1841 // value indicating that a greedy loop cannot be constructed.
1842 int ChoiceNode::GreedyLoopTextLength(GuardedAlternative* alternative) {
1843 int length = 0;
1844 RegExpNode* node = alternative->node();
1845 // Later we will generate code for all these text nodes using recursion
1846 // so we have to limit the max number.
1847 int recursion_depth = 0;
1848 while (node != this) {
1849 if (recursion_depth++ > RegExpCompiler::kMaxRecursion) {
1850 return kNodeIsTooComplexForGreedyLoops;
1851 }
1852 NodeInfo* info = node->info();
1853 if (info->follows_word_interest ||
1854 info->follows_newline_interest ||
1855 info->follows_start_interest) {
1856 return kNodeIsTooComplexForGreedyLoops;
1857 }
1858 int node_length = node->GreedyLoopTextLength();
1859 if (node_length == kNodeIsTooComplexForGreedyLoops) {
1860 return kNodeIsTooComplexForGreedyLoops;
1861 }
1862 length += node_length;
1863 SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node);
1864 node = seq_node->on_success();
1865 }
1866 return length;
1867 }
1868
1869
1870 bool LoopChoiceNode::Emit(RegExpCompiler* compiler,
1871 GenerationVariant* variant) {
1872 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1873 if (variant->stop_node() == this) {
1874 int text_length = GreedyLoopTextLength(&(alternatives_->at(0)));
1875 ASSERT(text_length != kNodeIsTooComplexForGreedyLoops);
1876 // Update the counter-based backtracking info on the stack. This is an
1877 // optimization for greedy loops (see below).
1878 ASSERT(variant->cp_offset() == text_length);
1879 macro_assembler->AdvanceCurrentPosition(text_length);
1880 macro_assembler->GoTo(variant->loop_label());
1881 return true;
1882 }
1883 ASSERT(variant->stop_node() == NULL);
1884 if (!variant->is_trivial()) {
1885 return variant->Flush(compiler, this);
1886 }
1887 return ChoiceNode::Emit(compiler, variant);
1888 }
1889
1890
1891 bool ChoiceNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
1892 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1423 int choice_count = alternatives_->length(); 1893 int choice_count = alternatives_->length();
1424 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 1894 #ifdef DEBUG
1425 Bind(macro_assembler); 1895 for (int i = 0; i < choice_count - 1; i++) {
1896 GuardedAlternative alternative = alternatives_->at(i);
1897 ZoneList<Guard*>* guards = alternative.guards();
1898 int guard_count = (guards == NULL) ? 0 : guards->length();
1899 for (int j = 0; j < guard_count; j++) {
1900 ASSERT(!variant->mentions_reg(guards->at(j)->reg()));
1901 }
1902 }
1903 #endif
1904
1905 LimitResult limit_result = LimitVersions(compiler, variant);
1906 if (limit_result == DONE) return true;
1907 if (limit_result == FAIL) return false;
1908 ASSERT(limit_result == CONTINUE);
1909
1910 RecursionCheck rc(compiler);
1911
1912 GenerationVariant* current_variant = variant;
1913
1914 int text_length = GreedyLoopTextLength(&(alternatives_->at(0)));
1915 bool greedy_loop = false;
1916 Label greedy_loop_label;
1917 GenerationVariant counter_backtrack_variant(&greedy_loop_label);
1918 if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
1919 // Here we have special handling for greedy loops containing only text nodes
1920 // and other simple nodes. These are handled by pushing the current
1921 // position on the stack and then incrementing the current position each
1922 // time around the switch. On backtrack we decrement the current position
1923 // and check it against the pushed value. This avoids pushing backtrack
1924 // information for each iteration of the loop, which could take up a lot of
1925 // space.
1926 greedy_loop = true;
1927 ASSERT(variant->stop_node() == NULL);
1928 macro_assembler->PushCurrentPosition();
1929 current_variant = &counter_backtrack_variant;
1930 Label greedy_match_failed;
1931 GenerationVariant greedy_match_variant(&greedy_match_failed);
1932 Label loop_label;
1933 macro_assembler->Bind(&loop_label);
1934 greedy_match_variant.set_stop_node(this);
1935 greedy_match_variant.set_loop_label(&loop_label);
1936 bool ok = alternatives_->at(0).node()->Emit(compiler,
1937 &greedy_match_variant);
1938 macro_assembler->Bind(&greedy_match_failed);
1939 if (!ok) {
1940 greedy_loop_label.Unuse();
1941 return false;
1942 }
1943 }
1944
1945 Label second_choice; // For use in greedy matches.
1946 macro_assembler->Bind(&second_choice);
1947
1426 // For now we just call all choices one after the other. The idea ultimately 1948 // For now we just call all choices one after the other. The idea ultimately
1427 // is to use the Dispatch table to try only the relevant ones. 1949 // is to use the Dispatch table to try only the relevant ones.
1428 for (int i = 0; i < choice_count - 1; i++) { 1950 for (int i = greedy_loop ? 1 : 0; i < choice_count - 1; i++) {
1429 GuardedAlternative alternative = alternatives_->at(i); 1951 GuardedAlternative alternative = alternatives_->at(i);
1430 Label after; 1952 Label after;
1431 Label after_no_pop_cp;
1432 ZoneList<Guard*>* guards = alternative.guards(); 1953 ZoneList<Guard*>* guards = alternative.guards();
1433 if (guards != NULL) { 1954 int guard_count = (guards == NULL) ? 0 : guards->length();
1434 int guard_count = guards->length(); 1955 GenerationVariant new_variant(*current_variant);
1435 for (int j = 0; j < guard_count; j++) { 1956 new_variant.set_backtrack(&after);
1436 GenerateGuard(macro_assembler, guards->at(j), &after_no_pop_cp); 1957 for (int j = 0; j < guard_count; j++) {
1437 } 1958 GenerateGuard(macro_assembler, guards->at(j), &new_variant);
1438 } 1959 }
1439 macro_assembler->PushCurrentPosition(); 1960 if (!alternative.node()->Emit(compiler, &new_variant)) {
1440 macro_assembler->PushBacktrack(&after);
1441 if (!alternative.node()->GoTo(compiler)) {
1442 after.Unuse(); 1961 after.Unuse();
1443 after_no_pop_cp.Unuse();
1444 return false; 1962 return false;
1445 } 1963 }
1446 macro_assembler->Bind(&after); 1964 macro_assembler->Bind(&after);
1447 macro_assembler->PopCurrentPosition();
1448 macro_assembler->Bind(&after_no_pop_cp);
1449 } 1965 }
1450 GuardedAlternative alternative = alternatives_->at(choice_count - 1); 1966 GuardedAlternative alternative = alternatives_->at(choice_count - 1);
1451 ZoneList<Guard*>* guards = alternative.guards(); 1967 ZoneList<Guard*>* guards = alternative.guards();
1452 if (guards != NULL) { 1968 int guard_count = (guards == NULL) ? 0 : guards->length();
1453 int guard_count = guards->length(); 1969 for (int j = 0; j < guard_count; j++) {
1454 for (int j = 0; j < guard_count; j++) { 1970 GenerateGuard(macro_assembler, guards->at(j), current_variant);
1455 GenerateGuard(macro_assembler, guards->at(j), on_failure_->label()); 1971 }
1456 } 1972 bool ok = alternative.node()->Emit(compiler, current_variant);
1457 } 1973 if (!ok) return false;
1458 if (!on_failure_->IsBacktrack()) { 1974 if (greedy_loop) {
1459 ASSERT_NOT_NULL(on_failure_ -> label()); 1975 macro_assembler->Bind(&greedy_loop_label);
1460 macro_assembler->PushBacktrack(on_failure_->label()); 1976 // If we have unwound to the bottom then backtrack.
1461 compiler->AddWork(on_failure_); 1977 macro_assembler->CheckGreedyLoop(variant->backtrack());
1462 } 1978 // Otherwise try the second priority at an earlier position.
1463 if (!alternative.node()->GoTo(compiler)) { 1979 macro_assembler->AdvanceCurrentPosition(-text_length);
1464 return false; 1980 macro_assembler->GoTo(&second_choice);
1465 } 1981 }
1466 return true; 1982 return true;
1467 } 1983 }
1468 1984
1469 1985
1470 bool ActionNode::Emit(RegExpCompiler* compiler) { 1986 bool ActionNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
1471 RegExpMacroAssembler* macro = compiler->macro_assembler(); 1987 RegExpMacroAssembler* macro = compiler->macro_assembler();
1472 Bind(macro); 1988 LimitResult limit_result = LimitVersions(compiler, variant);
1989 if (limit_result == DONE) return true;
1990 if (limit_result == FAIL) return false;
1991 ASSERT(limit_result == CONTINUE);
1992
1993 RecursionCheck rc(compiler);
1994
1473 switch (type_) { 1995 switch (type_) {
1474 case STORE_REGISTER: 1996 case STORE_POSITION: {
1475 macro->SetRegister(data_.u_store_register.reg, 1997 GenerationVariant::DeferredCapture
1476 data_.u_store_register.value); 1998 new_capture(data_.u_position_register.reg, variant);
1477 break; 1999 GenerationVariant new_variant = *variant;
2000 new_variant.add_action(&new_capture);
2001 return on_success()->Emit(compiler, &new_variant);
2002 }
1478 case INCREMENT_REGISTER: { 2003 case INCREMENT_REGISTER: {
1479 Label undo; 2004 GenerationVariant::DeferredIncrementRegister
1480 macro->PushBacktrack(&undo); 2005 new_increment(data_.u_increment_register.reg);
1481 macro->AdvanceRegister(data_.u_increment_register.reg, 1); 2006 GenerationVariant new_variant = *variant;
1482 bool ok = on_success()->GoTo(compiler); 2007 new_variant.add_action(&new_increment);
1483 if (!ok) { 2008 return on_success()->Emit(compiler, &new_variant);
1484 undo.Unuse(); 2009 }
2010 case SET_REGISTER: {
2011 GenerationVariant::DeferredSetRegister
2012 new_set(data_.u_store_register.reg, data_.u_store_register.value);
2013 GenerationVariant new_variant = *variant;
2014 new_variant.add_action(&new_set);
2015 return on_success()->Emit(compiler, &new_variant);
2016 }
2017 case BEGIN_SUBMATCH:
2018 if (!variant->is_trivial()) return variant->Flush(compiler, this);
2019 macro->WriteCurrentPositionToRegister(
2020 data_.u_submatch.current_position_register, 0);
2021 macro->WriteStackPointerToRegister(
2022 data_.u_submatch.stack_pointer_register);
2023 return on_success()->Emit(compiler, variant);
2024 case POSITIVE_SUBMATCH_SUCCESS:
2025 if (!variant->is_trivial()) return variant->Flush(compiler, this);
2026 // TODO(erikcorry): Implement support.
2027 if (info()->follows_word_interest ||
2028 info()->follows_newline_interest ||
2029 info()->follows_start_interest) {
1485 return false; 2030 return false;
1486 } 2031 }
1487 macro->Bind(&undo);
1488 macro->AdvanceRegister(data_.u_increment_register.reg, -1);
1489 macro->Backtrack();
1490 break;
1491 }
1492 case STORE_POSITION: {
1493 Label undo;
1494 macro->PushRegister(data_.u_position_register.reg);
1495 macro->PushBacktrack(&undo);
1496 macro->WriteCurrentPositionToRegister(data_.u_position_register.reg);
1497 bool ok = on_success()->GoTo(compiler);
1498 if (!ok) {
1499 undo.Unuse();
1500 return false;
1501 }
1502 macro->Bind(&undo);
1503 macro->PopRegister(data_.u_position_register.reg);
1504 macro->Backtrack();
1505 break;
1506 }
1507 case RESTORE_POSITION:
1508 macro->ReadCurrentPositionFromRegister(
1509 data_.u_position_register.reg);
1510 break;
1511 case BEGIN_SUBMATCH:
1512 macro->WriteCurrentPositionToRegister(
1513 data_.u_submatch.current_position_register);
1514 macro->WriteStackPointerToRegister(
1515 data_.u_submatch.stack_pointer_register);
1516 break;
1517 case ESCAPE_SUBMATCH:
1518 if (info()->at_end) { 2032 if (info()->at_end) {
1519 Label at_end; 2033 Label at_end;
1520 // Load current character jumps to the label if we are beyond the string 2034 // Load current character jumps to the label if we are beyond the string
1521 // end. 2035 // end.
1522 macro->LoadCurrentCharacter(0, &at_end); 2036 macro->LoadCurrentCharacter(0, &at_end);
1523 macro->Backtrack(); 2037 macro->GoTo(variant->backtrack());
1524 macro->Bind(&at_end); 2038 macro->Bind(&at_end);
1525 } 2039 }
1526 if (data_.u_submatch.current_position_register != -1) { 2040 macro->ReadCurrentPositionFromRegister(
1527 macro->ReadCurrentPositionFromRegister( 2041 data_.u_submatch.current_position_register);
1528 data_.u_submatch.current_position_register);
1529 }
1530 macro->ReadStackPointerFromRegister( 2042 macro->ReadStackPointerFromRegister(
1531 data_.u_submatch.stack_pointer_register); 2043 data_.u_submatch.stack_pointer_register);
1532 break; 2044 return on_success()->Emit(compiler, variant);
1533 default: 2045 default:
1534 UNREACHABLE(); 2046 UNREACHABLE();
1535 return false; 2047 return false;
1536 } 2048 }
1537 return on_success()->GoTo(compiler); 2049 }
1538 } 2050
1539 2051
1540 2052 bool BackReferenceNode::Emit(RegExpCompiler* compiler,
1541 bool BackReferenceNode::Emit(RegExpCompiler* compiler) { 2053 GenerationVariant* variant) {
1542 RegExpMacroAssembler* macro = compiler->macro_assembler(); 2054 RegExpMacroAssembler* macro = compiler->macro_assembler();
1543 Bind(macro); 2055 if (!variant->is_trivial()) {
2056 return variant->Flush(compiler, this);
2057 }
2058
2059 LimitResult limit_result = LimitVersions(compiler, variant);
2060 if (limit_result == DONE) return true;
2061 if (limit_result == FAIL) return false;
2062 ASSERT(limit_result == CONTINUE);
2063
2064 RecursionCheck rc(compiler);
2065
1544 ASSERT_EQ(start_reg_ + 1, end_reg_); 2066 ASSERT_EQ(start_reg_ + 1, end_reg_);
1545 if (info()->at_end) { 2067 if (info()->at_end) {
1546 // If we are constrained to match at the end of the input then succeed 2068 // If we are constrained to match at the end of the input then succeed
1547 // iff the back reference is empty. 2069 // iff the back reference is empty.
1548 macro->CheckNotRegistersEqual(start_reg_, end_reg_, on_failure_->label()); 2070 macro->CheckNotRegistersEqual(start_reg_, end_reg_, variant->backtrack());
1549 } else { 2071 } else {
1550 if (compiler->ignore_case()) { 2072 if (compiler->ignore_case()) {
1551 macro->CheckNotBackReferenceIgnoreCase(start_reg_, on_failure_->label()); 2073 macro->CheckNotBackReferenceIgnoreCase(start_reg_, variant->backtrack());
1552 } else { 2074 } else {
1553 macro->CheckNotBackReference(start_reg_, on_failure_->label()); 2075 macro->CheckNotBackReference(start_reg_, variant->backtrack());
1554 } 2076 }
1555 } 2077 }
1556 return on_success()->GoTo(compiler); 2078 return on_success()->Emit(compiler, variant);
1557 } 2079 }
1558 2080
1559 2081
1560 // ------------------------------------------------------------------- 2082 // -------------------------------------------------------------------
1561 // Dot/dotty output 2083 // Dot/dotty output
1562 2084
1563 2085
1564 #ifdef DEBUG 2086 #ifdef DEBUG
1565 2087
1566 2088
1567 class DotPrinter: public NodeVisitor { 2089 class DotPrinter: public NodeVisitor {
1568 public: 2090 public:
1569 explicit DotPrinter(bool ignore_case) 2091 explicit DotPrinter(bool ignore_case)
1570 : ignore_case_(ignore_case), 2092 : ignore_case_(ignore_case),
1571 stream_(&alloc_) { } 2093 stream_(&alloc_) { }
1572 void PrintNode(const char* label, RegExpNode* node); 2094 void PrintNode(const char* label, RegExpNode* node);
1573 void Visit(RegExpNode* node); 2095 void Visit(RegExpNode* node);
1574 void PrintOnFailure(RegExpNode* from, RegExpNode* on_failure);
1575 void PrintAttributes(RegExpNode* from); 2096 void PrintAttributes(RegExpNode* from);
1576 StringStream* stream() { return &stream_; } 2097 StringStream* stream() { return &stream_; }
2098 void PrintOnFailure(RegExpNode* from, RegExpNode* to);
1577 #define DECLARE_VISIT(Type) \ 2099 #define DECLARE_VISIT(Type) \
1578 virtual void Visit##Type(Type##Node* that); 2100 virtual void Visit##Type(Type##Node* that);
1579 FOR_EACH_NODE_TYPE(DECLARE_VISIT) 2101 FOR_EACH_NODE_TYPE(DECLARE_VISIT)
1580 #undef DECLARE_VISIT 2102 #undef DECLARE_VISIT
1581 private: 2103 private:
1582 bool ignore_case_; 2104 bool ignore_case_;
1583 HeapStringAllocator alloc_; 2105 HeapStringAllocator alloc_;
1584 StringStream stream_; 2106 StringStream stream_;
1585 }; 2107 };
1586 2108
(...skipping 21 matching lines...) Expand all
1608 2130
1609 2131
1610 void DotPrinter::Visit(RegExpNode* node) { 2132 void DotPrinter::Visit(RegExpNode* node) {
1611 if (node->info()->visited) return; 2133 if (node->info()->visited) return;
1612 node->info()->visited = true; 2134 node->info()->visited = true;
1613 node->Accept(this); 2135 node->Accept(this);
1614 } 2136 }
1615 2137
1616 2138
1617 void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) { 2139 void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) {
1618 if (on_failure->IsBacktrack()) return;
1619 stream()->Add(" n%p -> n%p [style=dotted];\n", from, on_failure); 2140 stream()->Add(" n%p -> n%p [style=dotted];\n", from, on_failure);
1620 Visit(on_failure); 2141 Visit(on_failure);
1621 } 2142 }
1622 2143
1623 2144
1624 class TableEntryBodyPrinter { 2145 class TableEntryBodyPrinter {
1625 public: 2146 public:
1626 TableEntryBodyPrinter(StringStream* stream, ChoiceNode* choice) 2147 TableEntryBodyPrinter(StringStream* stream, ChoiceNode* choice)
1627 : stream_(stream), choice_(choice) { } 2148 : stream_(stream), choice_(choice) { }
1628 void Call(uc16 from, DispatchTable::Entry entry) { 2149 void Call(uc16 from, DispatchTable::Entry entry) {
(...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after
1733 static const bool kPrintDispatchTable = false; 2254 static const bool kPrintDispatchTable = false;
1734 void DotPrinter::VisitChoice(ChoiceNode* that) { 2255 void DotPrinter::VisitChoice(ChoiceNode* that) {
1735 if (kPrintDispatchTable) { 2256 if (kPrintDispatchTable) {
1736 stream()->Add(" n%p [shape=Mrecord, label=\"", that); 2257 stream()->Add(" n%p [shape=Mrecord, label=\"", that);
1737 TableEntryHeaderPrinter header_printer(stream()); 2258 TableEntryHeaderPrinter header_printer(stream());
1738 that->GetTable(ignore_case_)->ForEach(&header_printer); 2259 that->GetTable(ignore_case_)->ForEach(&header_printer);
1739 stream()->Add("\"]\n", that); 2260 stream()->Add("\"]\n", that);
1740 PrintAttributes(that); 2261 PrintAttributes(that);
1741 TableEntryBodyPrinter body_printer(stream(), that); 2262 TableEntryBodyPrinter body_printer(stream(), that);
1742 that->GetTable(ignore_case_)->ForEach(&body_printer); 2263 that->GetTable(ignore_case_)->ForEach(&body_printer);
1743 PrintOnFailure(that, that->on_failure());
1744 } else { 2264 } else {
1745 stream()->Add(" n%p [shape=Mrecord, label=\"?\"];\n", that); 2265 stream()->Add(" n%p [shape=Mrecord, label=\"?\"];\n", that);
1746 for (int i = 0; i < that->alternatives()->length(); i++) { 2266 for (int i = 0; i < that->alternatives()->length(); i++) {
1747 GuardedAlternative alt = that->alternatives()->at(i); 2267 GuardedAlternative alt = that->alternatives()->at(i);
1748 stream()->Add(" n%p -> n%p;\n", that, alt.node()); 2268 stream()->Add(" n%p -> n%p;\n", that, alt.node());
1749 } 2269 }
1750 } 2270 }
1751 for (int i = 0; i < that->alternatives()->length(); i++) { 2271 for (int i = 0; i < that->alternatives()->length(); i++) {
1752 GuardedAlternative alt = that->alternatives()->at(i); 2272 GuardedAlternative alt = that->alternatives()->at(i);
1753 alt.node()->Accept(this); 2273 alt.node()->Accept(this);
(...skipping 24 matching lines...) Expand all
1778 break; 2298 break;
1779 } 2299 }
1780 default: 2300 default:
1781 UNREACHABLE(); 2301 UNREACHABLE();
1782 } 2302 }
1783 } 2303 }
1784 stream()->Add("\", shape=box, peripheries=2];\n"); 2304 stream()->Add("\", shape=box, peripheries=2];\n");
1785 PrintAttributes(that); 2305 PrintAttributes(that);
1786 stream()->Add(" n%p -> n%p;\n", that, that->on_success()); 2306 stream()->Add(" n%p -> n%p;\n", that, that->on_success());
1787 Visit(that->on_success()); 2307 Visit(that->on_success());
1788 PrintOnFailure(that, that->on_failure());
1789 } 2308 }
1790 2309
1791 2310
1792 void DotPrinter::VisitBackReference(BackReferenceNode* that) { 2311 void DotPrinter::VisitBackReference(BackReferenceNode* that) {
1793 stream()->Add(" n%p [label=\"$%i..$%i\", shape=doubleoctagon];\n", 2312 stream()->Add(" n%p [label=\"$%i..$%i\", shape=doubleoctagon];\n",
1794 that, 2313 that,
1795 that->start_register(), 2314 that->start_register(),
1796 that->end_register()); 2315 that->end_register());
1797 PrintAttributes(that); 2316 PrintAttributes(that);
1798 stream()->Add(" n%p -> n%p;\n", that, that->on_success()); 2317 stream()->Add(" n%p -> n%p;\n", that, that->on_success());
1799 Visit(that->on_success()); 2318 Visit(that->on_success());
1800 PrintOnFailure(that, that->on_failure());
1801 } 2319 }
1802 2320
1803 2321
1804 void DotPrinter::VisitEnd(EndNode* that) { 2322 void DotPrinter::VisitEnd(EndNode* that) {
1805 stream()->Add(" n%p [style=bold, shape=point];\n", that); 2323 stream()->Add(" n%p [style=bold, shape=point];\n", that);
1806 PrintAttributes(that); 2324 PrintAttributes(that);
1807 } 2325 }
1808 2326
1809 2327
1810 void DotPrinter::VisitAction(ActionNode* that) { 2328 void DotPrinter::VisitAction(ActionNode* that) {
1811 stream()->Add(" n%p [", that); 2329 stream()->Add(" n%p [", that);
1812 switch (that->type_) { 2330 switch (that->type_) {
1813 case ActionNode::STORE_REGISTER: 2331 case ActionNode::SET_REGISTER:
1814 stream()->Add("label=\"$%i:=%i\", shape=octagon", 2332 stream()->Add("label=\"$%i:=%i\", shape=octagon",
1815 that->data_.u_store_register.reg, 2333 that->data_.u_store_register.reg,
1816 that->data_.u_store_register.value); 2334 that->data_.u_store_register.value);
1817 break; 2335 break;
1818 case ActionNode::INCREMENT_REGISTER: 2336 case ActionNode::INCREMENT_REGISTER:
1819 stream()->Add("label=\"$%i++\", shape=octagon", 2337 stream()->Add("label=\"$%i++\", shape=octagon",
1820 that->data_.u_increment_register.reg); 2338 that->data_.u_increment_register.reg);
1821 break; 2339 break;
1822 case ActionNode::STORE_POSITION: 2340 case ActionNode::STORE_POSITION:
1823 stream()->Add("label=\"$%i:=$pos\", shape=octagon", 2341 stream()->Add("label=\"$%i:=$pos\", shape=octagon",
1824 that->data_.u_position_register.reg); 2342 that->data_.u_position_register.reg);
1825 break; 2343 break;
1826 case ActionNode::RESTORE_POSITION:
1827 stream()->Add("label=\"$pos:=$%i\", shape=octagon",
1828 that->data_.u_position_register.reg);
1829 break;
1830 case ActionNode::BEGIN_SUBMATCH: 2344 case ActionNode::BEGIN_SUBMATCH:
1831 stream()->Add("label=\"$%i:=$pos,begin\", shape=septagon", 2345 stream()->Add("label=\"$%i:=$pos,begin\", shape=septagon",
1832 that->data_.u_submatch.current_position_register); 2346 that->data_.u_submatch.current_position_register);
1833 break; 2347 break;
1834 case ActionNode::ESCAPE_SUBMATCH: 2348 case ActionNode::POSITIVE_SUBMATCH_SUCCESS:
1835 stream()->Add("label=\"escape\", shape=septagon"); 2349 stream()->Add("label=\"escape\", shape=septagon");
1836 break; 2350 break;
1837 } 2351 }
1838 stream()->Add("];\n"); 2352 stream()->Add("];\n");
1839 PrintAttributes(that); 2353 PrintAttributes(that);
1840 stream()->Add(" n%p -> n%p;\n", that, that->on_success()); 2354 RegExpNode* successor = that->on_success();
1841 Visit(that->on_success()); 2355 stream()->Add(" n%p -> n%p;\n", that, successor);
2356 Visit(successor);
1842 } 2357 }
1843 2358
1844 2359
1845 class DispatchTableDumper { 2360 class DispatchTableDumper {
1846 public: 2361 public:
1847 explicit DispatchTableDumper(StringStream* stream) : stream_(stream) { } 2362 explicit DispatchTableDumper(StringStream* stream) : stream_(stream) { }
1848 void Call(uc16 key, DispatchTable::Entry entry); 2363 void Call(uc16 key, DispatchTable::Entry entry);
1849 StringStream* stream() { return stream_; } 2364 StringStream* stream() { return stream_; }
1850 private: 2365 private:
1851 StringStream* stream_; 2366 StringStream* stream_;
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
1888 2403
1889 2404
1890 #endif // DEBUG 2405 #endif // DEBUG
1891 2406
1892 2407
1893 // ------------------------------------------------------------------- 2408 // -------------------------------------------------------------------
1894 // Tree to graph conversion 2409 // Tree to graph conversion
1895 2410
1896 2411
1897 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, 2412 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,
1898 RegExpNode* on_success, 2413 RegExpNode* on_success) {
1899 RegExpNode* on_failure) {
1900 ZoneList<TextElement>* elms = new ZoneList<TextElement>(1); 2414 ZoneList<TextElement>* elms = new ZoneList<TextElement>(1);
1901 elms->Add(TextElement::Atom(this)); 2415 elms->Add(TextElement::Atom(this));
1902 return new TextNode(elms, on_success, on_failure); 2416 return new TextNode(elms, on_success);
1903 } 2417 }
1904 2418
1905 2419
1906 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, 2420 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,
1907 RegExpNode* on_success, 2421 RegExpNode* on_success) {
1908 RegExpNode* on_failure) { 2422 return new TextNode(elements(), on_success);
1909 return new TextNode(elements(), on_success, on_failure);
1910 } 2423 }
1911 2424
1912 2425
1913 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, 2426 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
1914 RegExpNode* on_success, 2427 RegExpNode* on_success) {
1915 RegExpNode* on_failure) {
1916 ZoneList<TextElement>* elms = new ZoneList<TextElement>(1); 2428 ZoneList<TextElement>* elms = new ZoneList<TextElement>(1);
1917 elms->Add(TextElement::CharClass(this)); 2429 elms->Add(TextElement::CharClass(this));
1918 return new TextNode(elms, on_success, on_failure); 2430 return new TextNode(elms, on_success);
1919 } 2431 }
1920 2432
1921 2433
1922 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, 2434 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,
1923 RegExpNode* on_success, 2435 RegExpNode* on_success) {
1924 RegExpNode* on_failure) {
1925 ZoneList<RegExpTree*>* alternatives = this->alternatives(); 2436 ZoneList<RegExpTree*>* alternatives = this->alternatives();
1926 int length = alternatives->length(); 2437 int length = alternatives->length();
1927 ChoiceNode* result = new ChoiceNode(length, on_failure); 2438 ChoiceNode* result = new ChoiceNode(length);
1928 for (int i = 0; i < length; i++) { 2439 for (int i = 0; i < length; i++) {
1929 GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler, 2440 GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler,
1930 on_success, 2441 on_success));
1931 on_failure));
1932 result->AddAlternative(alternative); 2442 result->AddAlternative(alternative);
1933 } 2443 }
1934 return result; 2444 return result;
1935 } 2445 }
1936 2446
1937 2447
1938 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler, 2448 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler,
1939 RegExpNode* on_success, 2449 RegExpNode* on_success) {
1940 RegExpNode* on_failure) {
1941 return ToNode(min(), 2450 return ToNode(min(),
1942 max(), 2451 max(),
1943 is_greedy(), 2452 is_greedy(),
1944 body(), 2453 body(),
1945 compiler, 2454 compiler,
1946 on_success, 2455 on_success);
1947 on_failure);
1948 } 2456 }
1949 2457
1950 2458
1951 RegExpNode* RegExpQuantifier::ToNode(int min, 2459 RegExpNode* RegExpQuantifier::ToNode(int min,
1952 int max, 2460 int max,
1953 bool is_greedy, 2461 bool is_greedy,
1954 RegExpTree* body, 2462 RegExpTree* body,
1955 RegExpCompiler* compiler, 2463 RegExpCompiler* compiler,
1956 RegExpNode* on_success, 2464 RegExpNode* on_success) {
1957 RegExpNode* on_failure) {
1958 // x{f, t} becomes this: 2465 // x{f, t} becomes this:
1959 // 2466 //
1960 // (r++)<-. 2467 // (r++)<-.
1961 // | ` 2468 // | `
1962 // | (x) 2469 // | (x)
1963 // v ^ 2470 // v ^
1964 // (r=0)-->(?)---/ [if r < t] 2471 // (r=0)-->(?)---/ [if r < t]
1965 // | 2472 // |
1966 // [if r >= f] \----> ... 2473 // [if r >= f] \----> ...
1967 // 2474 //
1968 // 2475 //
1969 // TODO(someone): clear captures on repetition and handle empty 2476 // TODO(someone): clear captures on repetition and handle empty
1970 // matches. 2477 // matches.
1971 bool has_min = min > 0; 2478 bool has_min = min > 0;
1972 bool has_max = max < RegExpQuantifier::kInfinity; 2479 bool has_max = max < RegExpQuantifier::kInfinity;
1973 bool needs_counter = has_min || has_max; 2480 bool needs_counter = has_min || has_max;
1974 int reg_ctr = needs_counter ? compiler->AllocateRegister() : -1; 2481 int reg_ctr = needs_counter ? compiler->AllocateRegister() : -1;
1975 ChoiceNode* center = new ChoiceNode(2, on_failure); 2482 ChoiceNode* center = new LoopChoiceNode(2);
1976 RegExpNode* loop_return = needs_counter 2483 RegExpNode* loop_return = needs_counter
1977 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center)) 2484 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center))
1978 : static_cast<RegExpNode*>(center); 2485 : static_cast<RegExpNode*>(center);
1979 RegExpNode* body_node = body->ToNode(compiler, loop_return, on_failure); 2486 RegExpNode* body_node = body->ToNode(compiler, loop_return);
1980 GuardedAlternative body_alt(body_node); 2487 GuardedAlternative body_alt(body_node);
1981 if (has_max) { 2488 if (has_max) {
1982 Guard* body_guard = new Guard(reg_ctr, Guard::LT, max); 2489 Guard* body_guard = new Guard(reg_ctr, Guard::LT, max);
1983 body_alt.AddGuard(body_guard); 2490 body_alt.AddGuard(body_guard);
1984 } 2491 }
1985 GuardedAlternative rest_alt(on_success); 2492 GuardedAlternative rest_alt(on_success);
1986 if (has_min) { 2493 if (has_min) {
1987 Guard* rest_guard = new Guard(reg_ctr, Guard::GEQ, min); 2494 Guard* rest_guard = new Guard(reg_ctr, Guard::GEQ, min);
1988 rest_alt.AddGuard(rest_guard); 2495 rest_alt.AddGuard(rest_guard);
1989 } 2496 }
1990 if (is_greedy) { 2497 if (is_greedy) {
1991 center->AddAlternative(body_alt); 2498 center->AddAlternative(body_alt);
1992 center->AddAlternative(rest_alt); 2499 center->AddAlternative(rest_alt);
1993 } else { 2500 } else {
1994 center->AddAlternative(rest_alt); 2501 center->AddAlternative(rest_alt);
1995 center->AddAlternative(body_alt); 2502 center->AddAlternative(body_alt);
1996 } 2503 }
1997 if (needs_counter) { 2504 if (needs_counter) {
1998 return ActionNode::StoreRegister(reg_ctr, 0, center); 2505 return ActionNode::SetRegister(reg_ctr, 0, center);
1999 } else { 2506 } else {
2000 return center; 2507 return center;
2001 } 2508 }
2002 } 2509 }
2003 2510
2004 2511
2005 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler, 2512 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
2006 RegExpNode* on_success, 2513 RegExpNode* on_success) {
2007 RegExpNode* on_failure) {
2008 NodeInfo info; 2514 NodeInfo info;
2009 switch (type()) { 2515 switch (type()) {
2010 case START_OF_LINE: 2516 case START_OF_LINE:
2011 info.follows_newline_interest = true; 2517 info.follows_newline_interest = true;
2012 break; 2518 break;
2013 case START_OF_INPUT: 2519 case START_OF_INPUT:
2014 info.follows_start_interest = true; 2520 info.follows_start_interest = true;
2015 break; 2521 break;
2016 case BOUNDARY: case NON_BOUNDARY: 2522 case BOUNDARY: case NON_BOUNDARY:
2017 info.follows_word_interest = true; 2523 info.follows_word_interest = true;
2018 break; 2524 break;
2019 case END_OF_INPUT: 2525 case END_OF_INPUT:
2020 info.at_end = true; 2526 info.at_end = true;
2021 break; 2527 break;
2022 case END_OF_LINE: 2528 case END_OF_LINE:
2023 // This is wrong but has the effect of making the compiler abort. 2529 // This is wrong but has the effect of making the compiler abort.
2024 info.at_end = true; 2530 info.at_end = true;
2025 } 2531 }
2026 return on_success->PropagateForward(&info); 2532 return on_success->PropagateForward(&info);
2027 } 2533 }
2028 2534
2029 2535
2030 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, 2536 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,
2031 RegExpNode* on_success, 2537 RegExpNode* on_success) {
2032 RegExpNode* on_failure) {
2033 return new BackReferenceNode(RegExpCapture::StartRegister(index()), 2538 return new BackReferenceNode(RegExpCapture::StartRegister(index()),
2034 RegExpCapture::EndRegister(index()), 2539 RegExpCapture::EndRegister(index()),
2035 on_success, 2540 on_success);
2036 on_failure);
2037 } 2541 }
2038 2542
2039 2543
2040 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, 2544 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
2041 RegExpNode* on_success, 2545 RegExpNode* on_success) {
2042 RegExpNode* on_failure) {
2043 return on_success; 2546 return on_success;
2044 } 2547 }
2045 2548
2046 2549
2047 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, 2550 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,
2048 RegExpNode* on_success, 2551 RegExpNode* on_success) {
2049 RegExpNode* on_failure) {
2050 int stack_pointer_register = compiler->AllocateRegister(); 2552 int stack_pointer_register = compiler->AllocateRegister();
2051 int position_register = compiler->AllocateRegister(); 2553 int position_register = compiler->AllocateRegister();
2554 RegExpNode* success;
2052 if (is_positive()) { 2555 if (is_positive()) {
2053 // begin submatch scope
2054 // $reg = $pos
2055 // if [body]
2056 // then
2057 // $pos = $reg
2058 // escape submatch scope (drop all backtracks created in scope)
2059 // succeed
2060 // else
2061 // end submatch scope (nothing to clean up, just exit the scope)
2062 // fail
2063 return ActionNode::BeginSubmatch( 2556 return ActionNode::BeginSubmatch(
2064 stack_pointer_register, 2557 stack_pointer_register,
2065 position_register, 2558 position_register,
2066 body()->ToNode( 2559 body()->ToNode(
2067 compiler, 2560 compiler,
2068 ActionNode::EscapeSubmatch( 2561 ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
2069 stack_pointer_register, 2562 position_register,
2070 true, // Also restore input position. 2563 on_success)));
2071 position_register,
2072 on_success),
2073 on_failure));
2074 } else { 2564 } else {
2075 // begin submatch scope 2565 // We use a ChoiceNode for a negative lookahead because it has most of
2076 // try 2566 // the characteristics we need. It has the body of the lookahead as its
2077 // first if (body) 2567 // first alternative and the expression after the lookahead of the second
2078 // then 2568 // alternative. If the first alternative succeeds then the
2079 // escape submatch scope 2569 // NegativeSubmatchSuccess will unwind the stack including everything the
2080 // fail 2570 // choice node set up and backtrack. If the first alternative fails then
2081 // else 2571 // the second alternative is tried, which is exactly the desired result
2082 // backtrack 2572 // for a negative lookahead. In the case where the dispatch table
2083 // second 2573 // determines that the first alternative cannot match we will save time
2084 // end submatch scope 2574 // by not trying it. Things are not quite so well-optimized if the
2085 // restore current position 2575 // dispatch table determines that the second alternative cannot match.
2086 // succeed 2576 // In this case we could optimize by immediately backtracking.
2087 ChoiceNode* try_node = 2577 ChoiceNode* choice_node = new ChoiceNode(2);
2088 new ChoiceNode(1, ActionNode::RestorePosition(position_register, 2578 GuardedAlternative body_alt(
2089 on_success)); 2579 body()->ToNode(
2090 RegExpNode* body_node = body()->ToNode( 2580 compiler,
2091 compiler, 2581 success = new NegativeSubmatchSuccess(stack_pointer_register,
2092 ActionNode::EscapeSubmatch(stack_pointer_register, 2582 position_register)));
2093 false, // Don't also restore position 2583 choice_node->AddAlternative(body_alt);
2094 0, // Unused arguments. 2584 choice_node->AddAlternative(GuardedAlternative(on_success));
2095 on_failure),
2096 compiler->backtrack());
2097 GuardedAlternative body_alt(body_node);
2098 try_node->AddAlternative(body_alt);
2099 return ActionNode::BeginSubmatch(stack_pointer_register, 2585 return ActionNode::BeginSubmatch(stack_pointer_register,
2100 position_register, 2586 position_register,
2101 try_node); 2587 choice_node);
2102 } 2588 }
2103 } 2589 }
2104 2590
2105 2591
2106 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, 2592 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,
2107 RegExpNode* on_success, 2593 RegExpNode* on_success) {
2108 RegExpNode* on_failure) { 2594 return ToNode(body(), index(), compiler, on_success);
2109 return ToNode(body(), index(), compiler, on_success, on_failure);
2110 } 2595 }
2111 2596
2112 2597
2113 RegExpNode* RegExpCapture::ToNode(RegExpTree* body, 2598 RegExpNode* RegExpCapture::ToNode(RegExpTree* body,
2114 int index, 2599 int index,
2115 RegExpCompiler* compiler, 2600 RegExpCompiler* compiler,
2116 RegExpNode* on_success, 2601 RegExpNode* on_success) {
2117 RegExpNode* on_failure) {
2118 int start_reg = RegExpCapture::StartRegister(index); 2602 int start_reg = RegExpCapture::StartRegister(index);
2119 int end_reg = RegExpCapture::EndRegister(index); 2603 int end_reg = RegExpCapture::EndRegister(index);
2120 RegExpNode* store_end = ActionNode::StorePosition(end_reg, on_success); 2604 RegExpNode* store_end = ActionNode::StorePosition(end_reg, on_success);
2121 RegExpNode* body_node = body->ToNode(compiler, store_end, on_failure); 2605 RegExpNode* body_node = body->ToNode(compiler, store_end);
2122 return ActionNode::StorePosition(start_reg, body_node); 2606 return ActionNode::StorePosition(start_reg, body_node);
2123 } 2607 }
2124 2608
2125 2609
2126 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler, 2610 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,
2127 RegExpNode* on_success, 2611 RegExpNode* on_success) {
2128 RegExpNode* on_failure) {
2129 ZoneList<RegExpTree*>* children = nodes(); 2612 ZoneList<RegExpTree*>* children = nodes();
2130 RegExpNode* current = on_success; 2613 RegExpNode* current = on_success;
2131 for (int i = children->length() - 1; i >= 0; i--) { 2614 for (int i = children->length() - 1; i >= 0; i--) {
2132 current = children->at(i)->ToNode(compiler, current, on_failure); 2615 current = children->at(i)->ToNode(compiler, current);
2133 } 2616 }
2134 return current; 2617 return current;
2135 } 2618 }
2136 2619
2137 2620
2138 static const int kSpaceRangeCount = 20; 2621 static const int kSpaceRangeCount = 20;
2139 static const uc16 kSpaceRanges[kSpaceRangeCount] = { 2622 static const uc16 kSpaceRanges[kSpaceRangeCount] = {
2140 0x0009, 0x000D, 0x0020, 0x0020, 0x00A0, 0x00A0, 0x1680, 2623 0x0009, 0x000D, 0x0020, 0x0020, 0x00A0, 0x00A0, 0x1680,
2141 0x1680, 0x180E, 0x180E, 0x2000, 0x200A, 0x2028, 0x2029, 2624 0x1680, 0x180E, 0x180E, 0x2000, 0x200A, 0x2028, 0x2029,
2142 0x202F, 0x202F, 0x205F, 0x205F, 0x3000, 0x3000 2625 0x202F, 0x202F, 0x205F, 0x205F, 0x3000, 0x3000
(...skipping 250 matching lines...) Expand 10 before | Expand all | Expand 10 after
2393 bool cloned = false; 2876 bool cloned = false;
2394 return RegExpNode::EnsureSibling(node, &full_info, &cloned); 2877 return RegExpNode::EnsureSibling(node, &full_info, &cloned);
2395 } 2878 }
2396 2879
2397 2880
2398 RegExpNode* ActionNode::PropagateForward(NodeInfo* info) { 2881 RegExpNode* ActionNode::PropagateForward(NodeInfo* info) {
2399 NodeInfo full_info(*this->info()); 2882 NodeInfo full_info(*this->info());
2400 full_info.AddFromPreceding(info); 2883 full_info.AddFromPreceding(info);
2401 bool cloned = false; 2884 bool cloned = false;
2402 ActionNode* action = EnsureSibling(this, &full_info, &cloned); 2885 ActionNode* action = EnsureSibling(this, &full_info, &cloned);
2403 if (cloned && type_ != ESCAPE_SUBMATCH) { 2886 action->set_on_success(action->on_success()->PropagateForward(info));
2404 action->set_on_success(action->on_success()->PropagateForward(info));
2405 }
2406 return action; 2887 return action;
2407 } 2888 }
2408 2889
2409 2890
2410 RegExpNode* ChoiceNode::PropagateForward(NodeInfo* info) { 2891 RegExpNode* ChoiceNode::PropagateForward(NodeInfo* info) {
2411 NodeInfo full_info(*this->info()); 2892 NodeInfo full_info(*this->info());
2412 full_info.AddFromPreceding(info); 2893 full_info.AddFromPreceding(info);
2413 bool cloned = false; 2894 bool cloned = false;
2414 ChoiceNode* choice = EnsureSibling(this, &full_info, &cloned); 2895 ChoiceNode* choice = EnsureSibling(this, &full_info, &cloned);
2415 if (cloned) { 2896 if (cloned) {
2416 ZoneList<GuardedAlternative>* old_alternatives = alternatives(); 2897 ZoneList<GuardedAlternative>* old_alternatives = alternatives();
2417 int count = old_alternatives->length(); 2898 int count = old_alternatives->length();
2418 choice->alternatives_ = new ZoneList<GuardedAlternative>(count); 2899 choice->alternatives_ = new ZoneList<GuardedAlternative>(count);
2419 for (int i = 0; i < count; i++) { 2900 for (int i = 0; i < count; i++) {
2420 GuardedAlternative alternative = old_alternatives->at(i); 2901 GuardedAlternative alternative = old_alternatives->at(i);
2421 alternative.set_node(alternative.node()->PropagateForward(info)); 2902 alternative.set_node(alternative.node()->PropagateForward(info));
2422 choice->alternatives()->Add(alternative); 2903 choice->alternatives()->Add(alternative);
2423 } 2904 }
2424 if (!choice->on_failure_->IsBacktrack()) {
2425 choice->on_failure_ = choice->on_failure_->PropagateForward(info);
2426 }
2427 } 2905 }
2428 return choice; 2906 return choice;
2429 } 2907 }
2430 2908
2431 2909
2432 RegExpNode* EndNode::PropagateForward(NodeInfo* info) { 2910 RegExpNode* EndNode::PropagateForward(NodeInfo* info) {
2433 return PropagateToEndpoint(this, info); 2911 return PropagateToEndpoint(this, info);
2434 } 2912 }
2435 2913
2436 2914
(...skipping 180 matching lines...) Expand 10 before | Expand all | Expand 10 after
2617 that->info()->being_analyzed = false; 3095 that->info()->being_analyzed = false;
2618 that->info()->been_analyzed = true; 3096 that->info()->been_analyzed = true;
2619 } 3097 }
2620 3098
2621 3099
2622 void Analysis::VisitEnd(EndNode* that) { 3100 void Analysis::VisitEnd(EndNode* that) {
2623 // nothing to do 3101 // nothing to do
2624 } 3102 }
2625 3103
2626 3104
3105 void TextNode::CalculateOffsets() {
3106 int element_count = elements()->length();
3107 // Set up the offsets of the elements relative to the start. This is a fixed
3108 // quantity since a TextNode can only contain fixed-width things.
3109 int cp_offset = 0;
3110 for (int i = 0; i < element_count; i++) {
3111 TextElement& elm = elements()->at(i);
3112 elm.cp_offset = cp_offset;
3113 if (elm.type == TextElement::ATOM) {
3114 cp_offset += elm.data.u_atom->data().length();
3115 } else {
3116 cp_offset++;
3117 Vector<const uc16> quarks = elm.data.u_atom->data();
3118 }
3119 }
3120 }
3121
3122
2627 void Analysis::VisitText(TextNode* that) { 3123 void Analysis::VisitText(TextNode* that) {
2628 if (ignore_case_) { 3124 if (ignore_case_) {
2629 that->MakeCaseIndependent(); 3125 that->MakeCaseIndependent();
2630 } 3126 }
2631 EnsureAnalyzed(that->on_success()); 3127 EnsureAnalyzed(that->on_success());
2632 EnsureAnalyzed(that->on_failure());
2633 NodeInfo* info = that->info(); 3128 NodeInfo* info = that->info();
2634 NodeInfo* next_info = that->on_success()->info(); 3129 NodeInfo* next_info = that->on_success()->info();
2635 // If the following node is interested in what it follows then this 3130 // If the following node is interested in what it follows then this
2636 // node must determine it. 3131 // node must determine it.
2637 info->determine_newline = next_info->follows_newline_interest; 3132 info->determine_newline = next_info->follows_newline_interest;
2638 info->determine_word = next_info->follows_word_interest; 3133 info->determine_word = next_info->follows_word_interest;
2639 info->determine_start = next_info->follows_start_interest; 3134 info->determine_start = next_info->follows_start_interest;
3135 that->CalculateOffsets();
2640 } 3136 }
2641 3137
2642 3138
2643 void Analysis::VisitAction(ActionNode* that) { 3139 void Analysis::VisitAction(ActionNode* that) {
2644 EnsureAnalyzed(that->on_success()); 3140 RegExpNode* target = that->on_success();
3141 EnsureAnalyzed(target);
2645 // If the next node is interested in what it follows then this node 3142 // If the next node is interested in what it follows then this node
2646 // has to be interested too so it can pass the information on. 3143 // has to be interested too so it can pass the information on.
2647 that->info()->AddFromFollowing(that->on_success()->info()); 3144 that->info()->AddFromFollowing(target->info());
2648 } 3145 }
2649 3146
2650 3147
2651 void Analysis::VisitChoice(ChoiceNode* that) { 3148 void Analysis::VisitChoice(ChoiceNode* that) {
2652 NodeInfo* info = that->info(); 3149 NodeInfo* info = that->info();
2653 for (int i = 0; i < that->alternatives()->length(); i++) { 3150 for (int i = 0; i < that->alternatives()->length(); i++) {
2654 RegExpNode* node = that->alternatives()->at(i).node(); 3151 RegExpNode* node = that->alternatives()->at(i).node();
2655 EnsureAnalyzed(node); 3152 EnsureAnalyzed(node);
2656 // Anything the following nodes need to know has to be known by 3153 // Anything the following nodes need to know has to be known by
2657 // this node also, so it can pass it on. 3154 // this node also, so it can pass it on.
2658 info->AddFromFollowing(node->info()); 3155 info->AddFromFollowing(node->info());
2659 } 3156 }
2660 EnsureAnalyzed(that->on_failure());
2661 } 3157 }
2662 3158
2663 3159
2664 void Analysis::VisitBackReference(BackReferenceNode* that) { 3160 void Analysis::VisitBackReference(BackReferenceNode* that) {
2665 EnsureAnalyzed(that->on_success()); 3161 EnsureAnalyzed(that->on_success());
2666 EnsureAnalyzed(that->on_failure());
2667 } 3162 }
2668 3163
2669 3164
2670 // ------------------------------------------------------------------- 3165 // -------------------------------------------------------------------
2671 // Assumption expansion 3166 // Assumption expansion
2672 3167
2673 3168
2674 RegExpNode* RegExpNode::EnsureExpanded(NodeInfo* info) { 3169 RegExpNode* RegExpNode::EnsureExpanded(NodeInfo* info) {
2675 siblings_.Ensure(this); 3170 siblings_.Ensure(this);
2676 NodeInfo new_info = *this->info(); 3171 NodeInfo new_info = *this->info();
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after
2739 if (non_word == NULL) { 3234 if (non_word == NULL) {
2740 // This node contains no non-word characters so it must be 3235 // This node contains no non-word characters so it must be
2741 // all word. 3236 // all word.
2742 this->info()->is_word = NodeInfo::TRUE; 3237 this->info()->is_word = NodeInfo::TRUE;
2743 } else if (word == NULL) { 3238 } else if (word == NULL) {
2744 // Vice versa. 3239 // Vice versa.
2745 this->info()->is_word = NodeInfo::FALSE; 3240 this->info()->is_word = NodeInfo::FALSE;
2746 } else { 3241 } else {
2747 // If this character class contains both word and non-word 3242 // If this character class contains both word and non-word
2748 // characters we need to split it into two. 3243 // characters we need to split it into two.
2749 ChoiceNode* result = new ChoiceNode(2, on_failure()); 3244 ChoiceNode* result = new ChoiceNode(2);
2750 // Welcome to the family, son! 3245 // Welcome to the family, son!
2751 result->set_siblings(this->siblings()); 3246 result->set_siblings(this->siblings());
2752 *result->info() = *this->info(); 3247 *result->info() = *this->info();
2753 result->info()->ResetCompilationState(); 3248 result->info()->ResetCompilationState();
2754 result->info()->AddAssumptions(info); 3249 result->info()->AddAssumptions(info);
2755 RegExpNode* word_node 3250 RegExpNode* word_node
2756 = new TextNode(new RegExpCharacterClass(word, false), 3251 = new TextNode(new RegExpCharacterClass(word, false),
2757 on_success(), 3252 on_success());
2758 on_failure());
2759 word_node->info()->determine_word = true; 3253 word_node->info()->determine_word = true;
2760 word_node->info()->does_determine_word = true; 3254 word_node->info()->does_determine_word = true;
2761 word_node->info()->is_word = NodeInfo::TRUE; 3255 word_node->info()->is_word = NodeInfo::TRUE;
2762 result->alternatives()->Add(GuardedAlternative(word_node)); 3256 result->alternatives()->Add(GuardedAlternative(word_node));
2763 RegExpNode* non_word_node 3257 RegExpNode* non_word_node
2764 = new TextNode(new RegExpCharacterClass(non_word, false), 3258 = new TextNode(new RegExpCharacterClass(non_word, false),
2765 on_success(), 3259 on_success());
2766 on_failure());
2767 non_word_node->info()->determine_word = true; 3260 non_word_node->info()->determine_word = true;
2768 non_word_node->info()->does_determine_word = true; 3261 non_word_node->info()->does_determine_word = true;
2769 non_word_node->info()->is_word = NodeInfo::FALSE; 3262 non_word_node->info()->is_word = NodeInfo::FALSE;
2770 result->alternatives()->Add(GuardedAlternative(non_word_node)); 3263 result->alternatives()->Add(GuardedAlternative(non_word_node));
2771 return result; 3264 return result;
2772 } 3265 }
2773 } 3266 }
2774 } 3267 }
2775 TextNode* clone = this->Clone(); 3268 TextNode* clone = this->Clone();
2776 clone->info()->ResetCompilationState(); 3269 clone->info()->ResetCompilationState();
(...skipping 190 matching lines...) Expand 10 before | Expand all | Expand 10 after
2967 break; 3460 break;
2968 } 3461 }
2969 default: { 3462 default: {
2970 UNIMPLEMENTED(); 3463 UNIMPLEMENTED();
2971 } 3464 }
2972 } 3465 }
2973 } 3466 }
2974 3467
2975 3468
2976 void DispatchTableConstructor::VisitAction(ActionNode* that) { 3469 void DispatchTableConstructor::VisitAction(ActionNode* that) {
2977 that->on_success()->Accept(this); 3470 RegExpNode* target = that->on_success();
3471 target->Accept(this);
2978 } 3472 }
2979 3473
2980 3474
2981 Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input, 3475 Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input,
2982 RegExpNode** node_return, 3476 RegExpNode** node_return,
2983 bool ignore_case, 3477 bool ignore_case,
2984 bool is_multiline) { 3478 bool is_multiline,
3479 Handle<String> pattern) {
2985 RegExpCompiler compiler(input->capture_count, ignore_case); 3480 RegExpCompiler compiler(input->capture_count, ignore_case);
2986 // Wrap the body of the regexp in capture #0. 3481 // Wrap the body of the regexp in capture #0.
2987 RegExpNode* captured_body = RegExpCapture::ToNode(input->tree, 3482 RegExpNode* captured_body = RegExpCapture::ToNode(input->tree,
2988 0, 3483 0,
2989 &compiler, 3484 &compiler,
2990 compiler.accept(), 3485 compiler.accept());
2991 compiler.backtrack());
2992 // Add a .*? at the beginning, outside the body capture. 3486 // Add a .*? at the beginning, outside the body capture.
2993 // Note: We could choose to not add this if the regexp is anchored at 3487 // Note: We could choose to not add this if the regexp is anchored at
2994 // the start of the input but I'm not sure how best to do that and 3488 // the start of the input but I'm not sure how best to do that and
2995 // since we don't even handle ^ yet I'm saving that optimization for 3489 // since we don't even handle ^ yet I'm saving that optimization for
2996 // later. 3490 // later.
2997 RegExpNode* node = RegExpQuantifier::ToNode(0, 3491 RegExpNode* node = RegExpQuantifier::ToNode(0,
2998 RegExpQuantifier::kInfinity, 3492 RegExpQuantifier::kInfinity,
2999 false, 3493 false,
3000 new RegExpCharacterClass('*'), 3494 new RegExpCharacterClass('*'),
3001 &compiler, 3495 &compiler,
3002 captured_body, 3496 captured_body);
3003 compiler.backtrack());
3004 if (node_return != NULL) *node_return = node; 3497 if (node_return != NULL) *node_return = node;
3005 Analysis analysis(ignore_case); 3498 Analysis analysis(ignore_case);
3006 analysis.EnsureAnalyzed(node); 3499 analysis.EnsureAnalyzed(node);
3007 3500
3008 NodeInfo info = *node->info(); 3501 NodeInfo info = *node->info();
3009 node = node->EnsureExpanded(&info); 3502 node = node->EnsureExpanded(&info);
3010 3503
3011 if (!FLAG_irregexp) { 3504 if (!FLAG_irregexp) {
3012 return Handle<FixedArray>::null(); 3505 return Handle<FixedArray>::null();
3013 } 3506 }
3014 3507
3015 if (is_multiline && !FLAG_attempt_multiline_irregexp) { 3508 if (is_multiline && !FLAG_attempt_multiline_irregexp) {
3016 return Handle<FixedArray>::null(); 3509 return Handle<FixedArray>::null();
3017 } 3510 }
3018 3511
3019 if (FLAG_irregexp_native) { 3512 if (FLAG_irregexp_native) {
3020 #ifdef ARM 3513 #ifdef ARM
3021 // Unimplemented, fall-through to bytecode implementation. 3514 // Unimplemented, fall-through to bytecode implementation.
3022 #else // IA32 3515 #else // IA32
3023 RegExpMacroAssemblerIA32 macro_assembler(RegExpMacroAssemblerIA32::UC16, 3516 RegExpMacroAssemblerIA32 macro_assembler(RegExpMacroAssemblerIA32::UC16,
3024 (input->capture_count + 1) * 2); 3517 (input->capture_count + 1) * 2);
3025 return compiler.Assemble(&macro_assembler, 3518 return compiler.Assemble(&macro_assembler,
3026 node, 3519 node,
3027 input->capture_count); 3520 input->capture_count,
3521 pattern);
3028 #endif 3522 #endif
3029 } 3523 }
3030 EmbeddedVector<byte, 1024> codes; 3524 EmbeddedVector<byte, 1024> codes;
3031 RegExpMacroAssemblerIrregexp macro_assembler(codes); 3525 RegExpMacroAssemblerIrregexp macro_assembler(codes);
3032 return compiler.Assemble(&macro_assembler, 3526 return compiler.Assemble(&macro_assembler,
3033 node, 3527 node,
3034 input->capture_count); 3528 input->capture_count,
3529 pattern);
3035 } 3530 }
3036 3531
3037 3532
3038 }} // namespace v8::internal 3533 }} // namespace v8::internal
OLDNEW
« src/jsregexp.h ('K') | « src/jsregexp.h ('k') | src/jsregexp-inl.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698