OLD | NEW |
---|---|
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 224 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
235 result = AtomCompile(re, pattern, flags, atom_string); | 235 result = AtomCompile(re, pattern, flags, atom_string); |
236 } else { | 236 } else { |
237 result = AtomCompile(re, pattern, flags, pattern); | 237 result = AtomCompile(re, pattern, flags, pattern); |
238 } | 238 } |
239 } else { | 239 } else { |
240 RegExpNode* node = NULL; | 240 RegExpNode* node = NULL; |
241 Handle<FixedArray> irregexp_data = | 241 Handle<FixedArray> irregexp_data = |
242 RegExpEngine::Compile(&parse_result, | 242 RegExpEngine::Compile(&parse_result, |
243 &node, | 243 &node, |
244 flags.is_ignore_case(), | 244 flags.is_ignore_case(), |
245 flags.is_multiline()); | 245 flags.is_multiline(), |
246 pattern); | |
246 if (irregexp_data.is_null()) { | 247 if (irregexp_data.is_null()) { |
247 if (FLAG_disable_jscre) { | 248 if (FLAG_disable_jscre) { |
248 UNIMPLEMENTED(); | 249 UNIMPLEMENTED(); |
249 } | 250 } |
250 result = JscrePrepare(re, pattern, flags); | 251 result = JscrePrepare(re, pattern, flags); |
251 } else { | 252 } else { |
252 result = IrregexpPrepare(re, pattern, flags, irregexp_data); | 253 result = IrregexpPrepare(re, pattern, flags, irregexp_data); |
253 } | 254 } |
254 } | 255 } |
255 Object* data = re->data(); | 256 Object* data = re->data(); |
(...skipping 595 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
851 | 852 |
852 Handle<ByteArray> RegExpImpl::IrregexpCode(Handle<JSRegExp> re) { | 853 Handle<ByteArray> RegExpImpl::IrregexpCode(Handle<JSRegExp> re) { |
853 FixedArray* value = | 854 FixedArray* value = |
854 FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex)); | 855 FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex)); |
855 return Handle<ByteArray>(ByteArray::cast(value->get(kIrregexpCodeIndex))); | 856 return Handle<ByteArray>(ByteArray::cast(value->get(kIrregexpCodeIndex))); |
856 } | 857 } |
857 | 858 |
858 | 859 |
859 // ------------------------------------------------------------------- | 860 // ------------------------------------------------------------------- |
860 // Implmentation of the Irregexp regular expression engine. | 861 // Implmentation of the Irregexp regular expression engine. |
862 // | |
863 // The Irregexp regular expression engine is intended to be a complete | |
864 // implementation of ECMAScript regular expressions. It generates either | |
865 // bytecodes or native code. | |
866 | |
867 // The Irregexp regexp engine is structured in three steps. | |
868 // 1) The parser generates an abstract syntax tree. See ast.cc. | |
869 // 2) From the AST a node network is created. The nodes are all | |
870 // subclasses of RegExpNode. The nodes represent states when | |
871 // executing a regular expression. Several optimizations are | |
872 // performed on the node network. | |
873 // 3) From the nodes we generate either byte codes or native code | |
874 // that can actually execute the regular expression (perform | |
875 // the search). The code generation step is described in more | |
876 // detail below. | |
877 | |
878 // Code generation. | |
879 // | |
880 // The nodes are divided into four main categories. | |
881 // * Choice nodes | |
882 // These represent places where the regular expression can | |
883 // match in more than one way. For example on entry to an | |
884 // alternation (foo|bar) or a repetition (*, +, ? or {}). | |
885 // * Action nodes | |
886 // These represent places where some action should be | |
887 // performed. Examples include recording the current position | |
888 // in the input string to a register (in order to implement | |
889 // captures) or other actions on register for example in order | |
890 // to implement the counters needed for {} repetitions. | |
891 // * Matching nodes | |
892 // These attempt to match some element part of the input string. | |
893 // Examples of elements include character classes, plain strings | |
894 // or back references. | |
895 // * End nodes | |
896 // These are used to implement the actions required on finding | |
897 // a successful match or failing to find a match. | |
898 // | |
899 // The code generated (whether as byte codes or native code) maintains | |
900 // some state as it runs. This consists of the following elements: | |
901 // | |
902 // * The capture registers. Used for string captures. | |
903 // * Other registers. Used for counters etc. | |
904 // * The current position. | |
905 // * The stack of backtracking information. Used when a matching node | |
906 // fails to find a match and needs to try an alternative. | |
907 // | |
908 // Conceptual regular expression execution model: | |
909 // | |
910 // There is a simple conceptual model of regular expression execution | |
911 // which will be presented first. The actual code generated is a more | |
912 // efficient simulation of the simple conceptual model: | |
913 // | |
914 // * Choice nodes are implemented as follows: | |
915 // For each choice except the last { | |
916 // push current position | |
917 // push backtrack code location | |
918 // <generate code to test for choice> | |
919 // backtrack code location: | |
920 // pop current position | |
921 // } | |
922 // <generate code to test for last choice> | |
923 // | |
924 // * Actions nodes are generated as follows | |
925 // <push affected registers on backtrack stack> | |
926 // <generate code to perform action> | |
927 // push backtrack code location | |
928 // <generate code to test for following nodes> | |
929 // backtrack code location: | |
930 // <pop affected registers to restore their state> | |
931 // <pop backtrack location from stack and go to it> | |
932 // | |
933 // * Matching nodes are generated as follows: | |
934 // if input string matches at current position | |
935 // update current position | |
936 // <generate code to test for following nodes> | |
937 // else | |
938 // <pop backtrack location from stack and go to it> | |
939 // | |
940 // Thus it can be seen that the current position is saved and restored | |
941 // by the choice nodes, whereas the registers are saved and restored by | |
942 // by the action nodes that manipulate them. | |
943 // | |
944 // The other interesting aspect of this model is that nodes are generated | |
945 // at the point where they are needed by a recursive call to Emit(). If | |
946 // the node has already been code generated then the Emit() call will | |
947 // generate a jump to the previously generated code instead. In order to | |
948 // limit recursion it is possible for the Emit() function to put the node | |
949 // on a work list for later generation and instead generate a jump. The | |
950 // destination of the jump is resolved later when the code is generated. | |
951 // | |
952 // Actual regular expression code generation. | |
953 // | |
954 // Code generation is actually more complicated than the above. In order | |
955 // to improve the efficiency of the generated code some optimizations are | |
956 // performed | |
957 // | |
958 // * Choice nodes have 1-character lookahead. | |
959 // A choice node looks at the following character and eliminates some of | |
960 // the choices immediately based on that character. This is not yet | |
961 // implemented. | |
962 // * Simple greedy loops store reduced backtracking information. | |
963 // A quantifier like /.*foo/m will greedily match the whole input. It will | |
964 // then need to backtrack to a point where it can match "foo". The naive | |
965 // implementation of this would push each character position onto the | |
966 // backtracking stack, then pop them off one by one. This would use space | |
967 // proportional to the length of the input string. However since the "." | |
968 // can only match in one way and always has a constant length (in this case | |
969 // of 1) it suffices to store the current position on the top of the stack | |
970 // once. Matching now becomes merely incrementing the current position and | |
971 // backtracking becomes decrementing the current position and checking the | |
972 // result against the stored current position. This is faster and saves | |
973 // space. | |
974 // * The current state is virtualized. | |
975 // This is used to defer expensive operations until it is clear that they | |
976 // are needed and to generate code for a node more than once, allowing | |
977 // specialized an efficient versions of the code to be created. This is | |
978 // explained in the section below. | |
979 // | |
980 // Execution state virtualization. | |
981 // | |
982 // Instead of emitting code, nodes that manipulate the state can record their | |
983 // manipulation in an object called the GenerationVariant. The | |
984 // GenerationVariant object can record a current position offset, an | |
985 // optional backtrack code location on the top of the virtualized backtrack | |
986 // stack and some register changes. When a node is to be emitted it can flush | |
987 // the GenerationVariant or update it. Flushing the GenerationVariant | |
988 // will emit code to bring the actual state into line with the virtual state. | |
989 // Avoiding flushing the state can postpone some work (eg updates of capture | |
990 // registers). Postponing work can save time when executing the regular | |
991 // expression since it may be found that the work never has to be done as a | |
992 // failure to match can occur. In addition it is much faster to jump to a | |
993 // known backtrack code location than it is to pop an unknown backtrack | |
994 // location from the stack and jump there. | |
995 // | |
996 // The virtual state found in the GenerationVariant affects code generation. | |
997 // For example the virtual state contains the difference between the actual | |
998 // current position and the virtual current position, and matching code needs | |
999 // to use this offset to attempt a match in the correct location of the input | |
1000 // string. Therefore code generated for a non-trivial GenerationVariant is | |
1001 // specialized to that GenerationVariant. The code generator therefore | |
1002 // has the ability to generate code for each node several times. In order to | |
1003 // limit the size of the generated code there is an arbitrary limit on how | |
1004 // many specialized sets of code may be generated for a given node. If the | |
1005 // limit is reached, the GenerationVariant is flushed and a generic version of | |
1006 // the code for a node is emitted. This is subsequently used for that node. | |
1007 // The code emitted for non-generic GenerationVariants is not recorded in the | |
1008 // node and so it cannot currently be reused in the event that code generation | |
1009 // is requested for an identical GenerationVariant. | |
Christian Plesner Hansen
2008/12/08 08:54:19
Most excellent!
| |
861 | 1010 |
862 | 1011 |
863 void RegExpTree::AppendToText(RegExpText* text) { | 1012 void RegExpTree::AppendToText(RegExpText* text) { |
864 UNREACHABLE(); | 1013 UNREACHABLE(); |
865 } | 1014 } |
866 | 1015 |
867 | 1016 |
868 void RegExpAtom::AppendToText(RegExpText* text) { | 1017 void RegExpAtom::AppendToText(RegExpText* text) { |
869 text->AddElement(TextElement::Atom(this)); | 1018 text->AddElement(TextElement::Atom(this)); |
870 } | 1019 } |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
907 | 1056 |
908 | 1057 |
909 class RegExpCompiler { | 1058 class RegExpCompiler { |
910 public: | 1059 public: |
911 RegExpCompiler(int capture_count, bool ignore_case); | 1060 RegExpCompiler(int capture_count, bool ignore_case); |
912 | 1061 |
913 int AllocateRegister() { return next_register_++; } | 1062 int AllocateRegister() { return next_register_++; } |
914 | 1063 |
915 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler, | 1064 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler, |
916 RegExpNode* start, | 1065 RegExpNode* start, |
917 int capture_count); | 1066 int capture_count, |
1067 Handle<String> pattern); | |
918 | 1068 |
919 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } | 1069 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } |
920 | 1070 |
921 static const int kImplementationOffset = 0; | 1071 static const int kImplementationOffset = 0; |
922 static const int kNumberOfRegistersOffset = 0; | 1072 static const int kNumberOfRegistersOffset = 0; |
923 static const int kCodeOffset = 1; | 1073 static const int kCodeOffset = 1; |
924 | 1074 |
925 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } | 1075 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } |
926 EndNode* accept() { return accept_; } | 1076 EndNode* accept() { return accept_; } |
927 EndNode* backtrack() { return backtrack_; } | |
928 | 1077 |
929 static const int kMaxRecursion = 100; | 1078 static const int kMaxRecursion = 100; |
930 inline int recursion_depth() { return recursion_depth_; } | 1079 inline int recursion_depth() { return recursion_depth_; } |
931 inline void IncrementRecursionDepth() { recursion_depth_++; } | 1080 inline void IncrementRecursionDepth() { recursion_depth_++; } |
932 inline void DecrementRecursionDepth() { recursion_depth_--; } | 1081 inline void DecrementRecursionDepth() { recursion_depth_--; } |
933 | 1082 |
934 inline bool ignore_case() { return ignore_case_; } | 1083 inline bool ignore_case() { return ignore_case_; } |
935 | 1084 |
936 private: | 1085 private: |
937 EndNode* accept_; | 1086 EndNode* accept_; |
938 EndNode* backtrack_; | |
939 int next_register_; | 1087 int next_register_; |
940 List<RegExpNode*>* work_list_; | 1088 List<RegExpNode*>* work_list_; |
941 int recursion_depth_; | 1089 int recursion_depth_; |
942 RegExpMacroAssembler* macro_assembler_; | 1090 RegExpMacroAssembler* macro_assembler_; |
943 bool ignore_case_; | 1091 bool ignore_case_; |
944 }; | 1092 }; |
945 | 1093 |
946 | 1094 |
1095 class RecursionCheck { | |
1096 public: | |
1097 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { | |
1098 compiler->IncrementRecursionDepth(); | |
1099 } | |
1100 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } | |
1101 private: | |
1102 RegExpCompiler* compiler_; | |
1103 }; | |
1104 | |
1105 | |
947 // Attempts to compile the regexp using an Irregexp code generator. Returns | 1106 // Attempts to compile the regexp using an Irregexp code generator. Returns |
948 // a fixed array or a null handle depending on whether it succeeded. | 1107 // a fixed array or a null handle depending on whether it succeeded. |
949 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case) | 1108 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case) |
950 : next_register_(2 * (capture_count + 1)), | 1109 : next_register_(2 * (capture_count + 1)), |
951 work_list_(NULL), | 1110 work_list_(NULL), |
952 recursion_depth_(0), | 1111 recursion_depth_(0), |
953 ignore_case_(ignore_case) { | 1112 ignore_case_(ignore_case) { |
954 accept_ = new EndNode(EndNode::ACCEPT); | 1113 accept_ = new EndNode(EndNode::ACCEPT); |
955 backtrack_ = new EndNode(EndNode::BACKTRACK); | |
956 } | 1114 } |
957 | 1115 |
958 | 1116 |
959 Handle<FixedArray> RegExpCompiler::Assemble( | 1117 Handle<FixedArray> RegExpCompiler::Assemble( |
960 RegExpMacroAssembler* macro_assembler, | 1118 RegExpMacroAssembler* macro_assembler, |
961 RegExpNode* start, | 1119 RegExpNode* start, |
962 int capture_count) { | 1120 int capture_count, |
1121 Handle<String> pattern) { | |
963 #ifdef DEBUG | 1122 #ifdef DEBUG |
964 if (FLAG_trace_regexp_assembler) | 1123 if (FLAG_trace_regexp_assembler) |
965 macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler); | 1124 macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler); |
966 else | 1125 else |
967 #endif | 1126 #endif |
968 macro_assembler_ = macro_assembler; | 1127 macro_assembler_ = macro_assembler; |
969 List <RegExpNode*> work_list(0); | 1128 List <RegExpNode*> work_list(0); |
970 work_list_ = &work_list; | 1129 work_list_ = &work_list; |
971 Label fail; | 1130 Label fail; |
972 macro_assembler_->PushBacktrack(&fail); | 1131 macro_assembler->PushBacktrack(&fail); |
973 if (!start->GoTo(this)) { | 1132 GenerationVariant generic_variant; |
1133 if (!start->Emit(this, &generic_variant)) { | |
974 fail.Unuse(); | 1134 fail.Unuse(); |
975 return Handle<FixedArray>::null(); | 1135 return Handle<FixedArray>::null(); |
976 } | 1136 } |
1137 macro_assembler_->Bind(&fail); | |
1138 macro_assembler_->Fail(); | |
977 while (!work_list.is_empty()) { | 1139 while (!work_list.is_empty()) { |
978 if (!work_list.RemoveLast()->GoTo(this)) { | 1140 if (!work_list.RemoveLast()->Emit(this, &generic_variant)) { |
979 fail.Unuse(); | |
980 return Handle<FixedArray>::null(); | 1141 return Handle<FixedArray>::null(); |
981 } | 1142 } |
982 } | 1143 } |
983 macro_assembler_->Bind(&fail); | |
984 macro_assembler_->Fail(); | |
985 Handle<FixedArray> array = | 1144 Handle<FixedArray> array = |
986 Factory::NewFixedArray(RegExpImpl::kIrregexpDataLength); | 1145 Factory::NewFixedArray(RegExpImpl::kIrregexpDataLength); |
987 array->set(RegExpImpl::kIrregexpImplementationIndex, | 1146 array->set(RegExpImpl::kIrregexpImplementationIndex, |
988 Smi::FromInt(macro_assembler_->Implementation())); | 1147 Smi::FromInt(macro_assembler_->Implementation())); |
989 array->set(RegExpImpl::kIrregexpNumberOfRegistersIndex, | 1148 array->set(RegExpImpl::kIrregexpNumberOfRegistersIndex, |
990 Smi::FromInt(next_register_)); | 1149 Smi::FromInt(next_register_)); |
991 array->set(RegExpImpl::kIrregexpNumberOfCapturesIndex, | 1150 array->set(RegExpImpl::kIrregexpNumberOfCapturesIndex, |
992 Smi::FromInt(capture_count)); | 1151 Smi::FromInt(capture_count)); |
993 Handle<Object> code = macro_assembler_->GetCode(); | 1152 Handle<Object> code = macro_assembler_->GetCode(pattern); |
994 array->set(RegExpImpl::kIrregexpCodeIndex, *code); | 1153 array->set(RegExpImpl::kIrregexpCodeIndex, *code); |
995 work_list_ = NULL; | 1154 work_list_ = NULL; |
996 #ifdef DEBUG | 1155 #ifdef DEBUG |
997 if (FLAG_trace_regexp_assembler) { | 1156 if (FLAG_trace_regexp_assembler) { |
998 delete macro_assembler_; | 1157 delete macro_assembler_; |
999 } | 1158 } |
1000 #endif | 1159 #endif |
1001 return array; | 1160 return array; |
1002 } | 1161 } |
1003 | 1162 |
1004 | 1163 |
1005 bool RegExpNode::GoTo(RegExpCompiler* compiler) { | 1164 bool GenerationVariant::mentions_reg(int reg) { |
1006 // TODO(erikcorry): Implement support. | 1165 for (DeferredAction* action = actions_; |
1007 if (info_.follows_word_interest || | 1166 action != NULL; |
1008 info_.follows_newline_interest || | 1167 action = action->next()) { |
1009 info_.follows_start_interest) { | 1168 if (reg == action->reg()) return true; |
1010 return false; | 1169 } |
1011 } | 1170 return false; |
1012 if (label_.is_bound()) { | 1171 } |
1013 compiler->macro_assembler()->GoTo(&label_); | 1172 |
1014 return true; | 1173 |
1174 int GenerationVariant::FindAffectedRegisters(OutSet* affected_registers) { | |
1175 int max_register = -1; | |
1176 for (DeferredAction* action = actions_; | |
1177 action != NULL; | |
1178 action = action->next()) { | |
1179 affected_registers->Set(action->reg()); | |
1180 if (action->reg() > max_register) max_register = action->reg(); | |
1181 } | |
1182 return max_register; | |
1183 } | |
1184 | |
1185 | |
1186 void GenerationVariant::PushAffectedRegisters(RegExpMacroAssembler* macro, | |
1187 int max_register, | |
1188 OutSet& affected_registers) { | |
1189 for (int reg = 0; reg <= max_register; reg++) { | |
1190 if (affected_registers.Get(reg)) macro->PushRegister(reg); | |
1191 } | |
1192 } | |
1193 | |
1194 | |
1195 void GenerationVariant::RestoreAffectedRegisters(RegExpMacroAssembler* macro, | |
1196 int max_register, | |
1197 OutSet& affected_registers) { | |
1198 for (int reg = max_register; reg >= 0; reg--) { | |
1199 if (affected_registers.Get(reg)) macro->PopRegister(reg); | |
1200 } | |
1201 } | |
1202 | |
1203 | |
1204 void GenerationVariant::PerformDeferredActions(RegExpMacroAssembler* macro, | |
1205 int max_register, | |
Christian Plesner Hansen
2008/12/08 08:54:19
Indentation off.
| |
1206 OutSet& affected_registers) { | |
1207 for (int reg = 0; reg <= max_register; reg++) { | |
1208 if (!affected_registers.Get(reg)) { | |
1209 continue; | |
1210 } | |
1211 int value = 0; | |
1212 bool absolute = false; | |
1213 int store_position = -1; | |
1214 // This is a little tricky because we are scanning the actions in reverse | |
1215 // historical order (newest first). | |
1216 for (DeferredAction* action = actions_; | |
1217 action != NULL; | |
1218 action = action->next()) { | |
1219 if (action->reg() == reg) { | |
1220 switch (action->type()) { | |
1221 case ActionNode::SET_REGISTER: { | |
1222 GenerationVariant::DeferredSetRegister* psr = | |
1223 static_cast<GenerationVariant::DeferredSetRegister*>(action); | |
1224 value += psr->value(); | |
1225 absolute = true; | |
1226 ASSERT_EQ(store_position, -1); | |
1227 break; | |
1228 } | |
1229 case ActionNode::INCREMENT_REGISTER: | |
1230 if (!absolute) { | |
1231 value++; | |
1232 } | |
1233 ASSERT_EQ(store_position, -1); | |
1234 break; | |
1235 case ActionNode::STORE_POSITION: { | |
1236 GenerationVariant::DeferredCapture* pc = | |
1237 static_cast<GenerationVariant::DeferredCapture*>(action); | |
1238 if (store_position == -1) { | |
1239 store_position = pc->cp_offset(); | |
1240 } | |
1241 ASSERT(!absolute); | |
1242 ASSERT_EQ(value, 0); | |
1243 break; | |
1244 } | |
1245 default: | |
1246 UNREACHABLE(); | |
1247 break; | |
1248 } | |
1249 } | |
1250 } | |
1251 if (store_position != -1) { | |
1252 macro->WriteCurrentPositionToRegister(reg, store_position); | |
1253 } else { | |
1254 if (absolute) { | |
1255 macro->SetRegister(reg, value); | |
1256 } else { | |
1257 if (value != 0) { | |
1258 macro->AdvanceRegister(reg, value); | |
1259 } | |
1260 } | |
1261 } | |
1262 } | |
1263 } | |
1264 | |
1265 | |
1266 // This is called as we come into a loop choice node and some other tricky | |
1267 // nodes. It normalises the state of the code generator to ensure we can | |
1268 // generate generic code. | |
1269 bool GenerationVariant::Flush(RegExpCompiler* compiler, RegExpNode* successor) { | |
1270 RegExpMacroAssembler* macro = compiler->macro_assembler(); | |
1271 | |
1272 ASSERT(actions_ != NULL || cp_offset_ != 0 || backtrack() != NULL); | |
1273 | |
1274 if (actions_ == NULL && backtrack() == NULL) { | |
1275 // Here we just have some deferred cp advances to fix and we are back to | |
1276 // a normal situation. | |
1277 macro->AdvanceCurrentPosition(cp_offset_); | |
1278 // Create a new trivial state and generate the node with that. | |
1279 GenerationVariant new_state; | |
1280 return successor->Emit(compiler, &new_state); | |
1281 } | |
1282 | |
1283 // Generate deferred actions here along with code to undo them again. | |
1284 OutSet affected_registers; | |
1285 int max_register = FindAffectedRegisters(&affected_registers); | |
1286 PushAffectedRegisters(macro, max_register, affected_registers); | |
1287 PerformDeferredActions(macro, max_register, affected_registers); | |
1288 if (backtrack() != NULL) { | |
1289 // Here we have a concrete backtrack location. These are set up by choice | |
1290 // nodes and so they indicate that we have a deferred save of the current | |
1291 // position which we may need to emit here. | |
1292 macro->PushCurrentPosition(); | |
1293 } | |
1294 if (cp_offset_ != 0) { | |
1295 macro->AdvanceCurrentPosition(cp_offset_); | |
1296 } | |
1297 | |
1298 // Create a new trivial state and generate the node with that. | |
Christian Plesner Hansen
2008/12/08 08:54:19
trivial -> empty?
| |
1299 Label undo; | |
1300 macro->PushBacktrack(&undo); | |
1301 GenerationVariant new_state; | |
1302 bool ok = successor->Emit(compiler, &new_state); | |
1303 | |
1304 // On backtrack we need to restore state. | |
1305 macro->Bind(&undo); | |
1306 if (!ok) return false; | |
1307 if (backtrack() != NULL) { | |
1308 macro->PopCurrentPosition(); | |
1309 } | |
1310 RestoreAffectedRegisters(macro, max_register, affected_registers); | |
1311 if (backtrack() == NULL) { | |
1312 macro->Backtrack(); | |
1015 } else { | 1313 } else { |
1016 if (compiler->recursion_depth() > RegExpCompiler::kMaxRecursion) { | 1314 macro->GoTo(backtrack()); |
1017 compiler->macro_assembler()->GoTo(&label_); | 1315 } |
1018 compiler->AddWork(this); | 1316 |
1019 return true; | 1317 return true; |
1020 } else { | 1318 } |
1021 compiler->IncrementRecursionDepth(); | 1319 |
1022 bool how_it_went = Emit(compiler); | 1320 |
1023 compiler->DecrementRecursionDepth(); | 1321 void EndNode::EmitInfoChecks(RegExpMacroAssembler* macro, |
1024 return how_it_went; | 1322 GenerationVariant* variant) { |
1025 } | 1323 if (info()->at_end) { |
1026 } | 1324 Label succeed; |
1027 } | 1325 // LoadCurrentCharacter will go to the label if we are at the end of the |
1028 | 1326 // input string. |
1029 | 1327 macro->LoadCurrentCharacter(0, &succeed); |
1030 // EndNodes are special. Because they can be very common and they are very | 1328 macro->GoTo(variant->backtrack()); |
1031 // short we normally inline them. That is, if we are asked to emit a GoTo | 1329 macro->Bind(&succeed); |
1032 // we just emit the entire node. Since they don't have successors this | 1330 } |
1033 // works. | 1331 } |
1034 bool EndNode::GoTo(RegExpCompiler* compiler) { | 1332 |
1035 if (info()->follows_word_interest || | 1333 |
1036 info()->follows_newline_interest || | 1334 bool NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, |
1037 info()->follows_start_interest) { | 1335 GenerationVariant* variant) { |
1038 return false; | 1336 if (!variant->is_trivial()) { |
1039 } | 1337 return variant->Flush(compiler, this); |
1040 return Emit(compiler); | 1338 } |
1041 } | |
1042 | |
1043 | |
1044 Label* RegExpNode::label() { | |
1045 return &label_; | |
1046 } | |
1047 | |
1048 | |
1049 bool EndNode::Emit(RegExpCompiler* compiler) { | |
1050 RegExpMacroAssembler* macro = compiler->macro_assembler(); | 1339 RegExpMacroAssembler* macro = compiler->macro_assembler(); |
1340 if (!label()->is_bound()) { | |
1341 macro->Bind(label()); | |
1342 } | |
1343 EmitInfoChecks(macro, variant); | |
1344 macro->ReadCurrentPositionFromRegister(current_position_register_); | |
1345 macro->ReadStackPointerFromRegister(stack_pointer_register_); | |
1346 // Now that we have unwound the stack we find at the top of the stack the | |
1347 // backtrack that the BeginSubmatch node got. | |
1348 macro->Backtrack(); | |
1349 return true; | |
1350 } | |
1351 | |
1352 | |
1353 bool EndNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) { | |
1354 if (!variant->is_trivial()) { | |
1355 return variant->Flush(compiler, this); | |
1356 } | |
1357 RegExpMacroAssembler* macro = compiler->macro_assembler(); | |
1358 if (!label()->is_bound()) { | |
1359 macro->Bind(label()); | |
1360 } | |
1051 switch (action_) { | 1361 switch (action_) { |
1052 case ACCEPT: | 1362 case ACCEPT: |
1053 if (!label()->is_bound()) Bind(macro); | 1363 EmitInfoChecks(macro, variant); |
1054 if (info()->at_end) { | |
1055 Label succeed; | |
1056 // LoadCurrentCharacter will go to the label if we are at the end of the | |
1057 // input string. | |
1058 macro->LoadCurrentCharacter(0, &succeed); | |
1059 macro->Backtrack(); | |
1060 macro->Bind(&succeed); | |
1061 } | |
1062 macro->Succeed(); | 1364 macro->Succeed(); |
1063 return true; | 1365 return true; |
1064 case BACKTRACK: | 1366 case BACKTRACK: |
1065 if (!label()->is_bound()) Bind(macro); | |
1066 ASSERT(!info()->at_end); | 1367 ASSERT(!info()->at_end); |
1067 macro->Backtrack(); | 1368 macro->GoTo(variant->backtrack()); |
1068 return true; | 1369 return true; |
1069 } | 1370 case NEGATIVE_SUBMATCH_SUCCESS: |
1371 // This case is handled in a different virtual method. | |
1372 UNREACHABLE(); | |
1373 } | |
1374 UNIMPLEMENTED(); | |
1070 return false; | 1375 return false; |
1071 } | 1376 } |
1072 | 1377 |
1073 | 1378 |
1074 void GuardedAlternative::AddGuard(Guard* guard) { | 1379 void GuardedAlternative::AddGuard(Guard* guard) { |
1075 if (guards_ == NULL) | 1380 if (guards_ == NULL) |
1076 guards_ = new ZoneList<Guard*>(1); | 1381 guards_ = new ZoneList<Guard*>(1); |
1077 guards_->Add(guard); | 1382 guards_->Add(guard); |
1078 } | 1383 } |
1079 | 1384 |
1080 | 1385 |
1081 ActionNode* ActionNode::StoreRegister(int reg, | 1386 ActionNode* ActionNode::SetRegister(int reg, |
1082 int val, | 1387 int val, |
1083 RegExpNode* on_success) { | 1388 RegExpNode* on_success) { |
1084 ActionNode* result = new ActionNode(STORE_REGISTER, on_success); | 1389 ActionNode* result = new ActionNode(SET_REGISTER, on_success); |
1085 result->data_.u_store_register.reg = reg; | 1390 result->data_.u_store_register.reg = reg; |
1086 result->data_.u_store_register.value = val; | 1391 result->data_.u_store_register.value = val; |
1087 return result; | 1392 return result; |
1088 } | 1393 } |
1089 | 1394 |
1090 | 1395 |
1091 ActionNode* ActionNode::IncrementRegister(int reg, RegExpNode* on_success) { | 1396 ActionNode* ActionNode::IncrementRegister(int reg, RegExpNode* on_success) { |
1092 ActionNode* result = new ActionNode(INCREMENT_REGISTER, on_success); | 1397 ActionNode* result = new ActionNode(INCREMENT_REGISTER, on_success); |
1093 result->data_.u_increment_register.reg = reg; | 1398 result->data_.u_increment_register.reg = reg; |
1094 return result; | 1399 return result; |
1095 } | 1400 } |
1096 | 1401 |
1097 | 1402 |
1098 ActionNode* ActionNode::StorePosition(int reg, RegExpNode* on_success) { | 1403 ActionNode* ActionNode::StorePosition(int reg, RegExpNode* on_success) { |
1099 ActionNode* result = new ActionNode(STORE_POSITION, on_success); | 1404 ActionNode* result = new ActionNode(STORE_POSITION, on_success); |
1100 result->data_.u_position_register.reg = reg; | 1405 result->data_.u_position_register.reg = reg; |
1101 return result; | 1406 return result; |
1102 } | 1407 } |
1103 | 1408 |
1104 | 1409 |
1105 ActionNode* ActionNode::RestorePosition(int reg, RegExpNode* on_success) { | |
1106 ActionNode* result = new ActionNode(RESTORE_POSITION, on_success); | |
1107 result->data_.u_position_register.reg = reg; | |
1108 return result; | |
1109 } | |
1110 | |
1111 | |
1112 ActionNode* ActionNode::BeginSubmatch(int stack_reg, | 1410 ActionNode* ActionNode::BeginSubmatch(int stack_reg, |
1113 int position_reg, | 1411 int position_reg, |
1114 RegExpNode* on_success) { | 1412 RegExpNode* on_success) { |
1115 ActionNode* result = new ActionNode(BEGIN_SUBMATCH, on_success); | 1413 ActionNode* result = new ActionNode(BEGIN_SUBMATCH, on_success); |
1116 result->data_.u_submatch.stack_pointer_register = stack_reg; | 1414 result->data_.u_submatch.stack_pointer_register = stack_reg; |
1117 result->data_.u_submatch.current_position_register = position_reg; | 1415 result->data_.u_submatch.current_position_register = position_reg; |
1118 return result; | 1416 return result; |
1119 } | 1417 } |
1120 | 1418 |
1121 | 1419 |
1122 ActionNode* ActionNode::EscapeSubmatch(int stack_reg, | 1420 ActionNode* ActionNode::PositiveSubmatchSuccess(int stack_reg, |
1123 bool restore_position, | 1421 int position_reg, |
1124 int position_reg, | 1422 RegExpNode* on_success) { |
1125 RegExpNode* on_success) { | 1423 ActionNode* result = new ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success); |
1126 ActionNode* result = new ActionNode(ESCAPE_SUBMATCH, on_success); | |
1127 result->data_.u_submatch.stack_pointer_register = stack_reg; | 1424 result->data_.u_submatch.stack_pointer_register = stack_reg; |
1128 if (restore_position) { | 1425 result->data_.u_submatch.current_position_register = position_reg; |
1129 result->data_.u_submatch.current_position_register = position_reg; | |
1130 } else { | |
1131 result->data_.u_submatch.current_position_register = -1; | |
1132 } | |
1133 return result; | 1426 return result; |
1134 } | 1427 } |
1135 | 1428 |
1136 | 1429 |
1137 #define DEFINE_ACCEPT(Type) \ | 1430 #define DEFINE_ACCEPT(Type) \ |
1138 void Type##Node::Accept(NodeVisitor* visitor) { \ | 1431 void Type##Node::Accept(NodeVisitor* visitor) { \ |
1139 visitor->Visit##Type(this); \ | 1432 visitor->Visit##Type(this); \ |
1140 } | 1433 } |
1141 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT) | 1434 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT) |
1142 #undef DEFINE_ACCEPT | 1435 #undef DEFINE_ACCEPT |
1143 | 1436 |
1144 | 1437 |
1145 // ------------------------------------------------------------------- | 1438 // ------------------------------------------------------------------- |
1146 // Emit code. | 1439 // Emit code. |
1147 | 1440 |
1148 | 1441 |
1149 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler, | 1442 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler, |
1150 Guard* guard, | 1443 Guard* guard, |
Christian Plesner Hansen
2008/12/08 08:54:19
Indentation.
| |
1151 Label* on_failure) { | 1444 GenerationVariant* variant) { |
1152 switch (guard->op()) { | 1445 switch (guard->op()) { |
1153 case Guard::LT: | 1446 case Guard::LT: |
1154 macro_assembler->IfRegisterGE(guard->reg(), guard->value(), on_failure); | 1447 ASSERT(!variant->mentions_reg(guard->reg())); |
1448 macro_assembler->IfRegisterGE(guard->reg(), | |
1449 guard->value(), | |
1450 variant->backtrack()); | |
1155 break; | 1451 break; |
1156 case Guard::GEQ: | 1452 case Guard::GEQ: |
1157 macro_assembler->IfRegisterLT(guard->reg(), guard->value(), on_failure); | 1453 ASSERT(!variant->mentions_reg(guard->reg())); |
1454 macro_assembler->IfRegisterLT(guard->reg(), | |
1455 guard->value(), | |
1456 variant->backtrack()); | |
1158 break; | 1457 break; |
1159 } | 1458 } |
1160 } | 1459 } |
1161 | 1460 |
1162 | 1461 |
1163 static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize; | 1462 static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize; |
1164 static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange; | 1463 static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange; |
1165 | 1464 |
1166 | 1465 |
1167 static inline void EmitAtomNonLetters( | 1466 static inline void EmitAtomNonLetters( |
1168 RegExpMacroAssembler* macro_assembler, | 1467 RegExpMacroAssembler* macro_assembler, |
1169 TextElement elm, | 1468 TextElement elm, |
1170 Vector<const uc16> quarks, | 1469 Vector<const uc16> quarks, |
1171 Label* on_failure, | 1470 Label* on_failure, |
1172 int cp_offset) { | 1471 int cp_offset, |
1472 bool check_offset) { | |
1173 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 1473 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
1474 // It is vital that this loop is backwards due to the unchecked character | |
1475 // load below. | |
1174 for (int i = quarks.length() - 1; i >= 0; i--) { | 1476 for (int i = quarks.length() - 1; i >= 0; i--) { |
1175 uc16 c = quarks[i]; | 1477 uc16 c = quarks[i]; |
1176 int length = uncanonicalize.get(c, '\0', chars); | 1478 int length = uncanonicalize.get(c, '\0', chars); |
1177 if (length <= 1) { | 1479 if (length <= 1) { |
1178 macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure); | 1480 if (check_offset && i == quarks.length() - 1) { |
1481 macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure); | |
1482 } else { | |
1483 // Here we don't need to check against the end of the input string | |
1484 // since this character lies before a character that matched. | |
1485 macro_assembler->LoadCurrentCharacterUnchecked(cp_offset + i); | |
1486 } | |
1179 macro_assembler->CheckNotCharacter(c, on_failure); | 1487 macro_assembler->CheckNotCharacter(c, on_failure); |
1180 } | 1488 } |
1181 } | 1489 } |
1182 } | 1490 } |
1183 | 1491 |
1184 | 1492 |
1185 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, | 1493 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, |
1186 uc16 c1, | 1494 uc16 c1, |
1187 uc16 c2, | 1495 uc16 c2, |
1188 Label* on_failure) { | 1496 Label* on_failure) { |
(...skipping 20 matching lines...) Expand all Loading... | |
1209 } | 1517 } |
1210 return false; | 1518 return false; |
1211 } | 1519 } |
1212 | 1520 |
1213 | 1521 |
1214 static inline void EmitAtomLetters( | 1522 static inline void EmitAtomLetters( |
1215 RegExpMacroAssembler* macro_assembler, | 1523 RegExpMacroAssembler* macro_assembler, |
1216 TextElement elm, | 1524 TextElement elm, |
1217 Vector<const uc16> quarks, | 1525 Vector<const uc16> quarks, |
1218 Label* on_failure, | 1526 Label* on_failure, |
1219 int cp_offset) { | 1527 int cp_offset, |
1528 bool check_offset) { | |
1220 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 1529 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
1530 // It is vital that this loop is backwards due to the unchecked character | |
1531 // load below. | |
1221 for (int i = quarks.length() - 1; i >= 0; i--) { | 1532 for (int i = quarks.length() - 1; i >= 0; i--) { |
1222 uc16 c = quarks[i]; | 1533 uc16 c = quarks[i]; |
1223 int length = uncanonicalize.get(c, '\0', chars); | 1534 int length = uncanonicalize.get(c, '\0', chars); |
1224 if (length <= 1) continue; | 1535 if (length <= 1) continue; |
1225 macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure); | 1536 if (check_offset && i == quarks.length() - 1) { |
1537 macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure); | |
1538 } else { | |
1539 // Here we don't need to check against the end of the input string | |
1540 // since this character lies before a character that matched. | |
1541 macro_assembler->LoadCurrentCharacterUnchecked(cp_offset + i); | |
1542 } | |
1226 Label ok; | 1543 Label ok; |
1227 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); | 1544 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); |
1228 switch (length) { | 1545 switch (length) { |
1229 case 2: { | 1546 case 2: { |
1230 if (ShortCutEmitCharacterPair(macro_assembler, | 1547 if (ShortCutEmitCharacterPair(macro_assembler, |
1231 chars[0], | 1548 chars[0], |
1232 chars[1], | 1549 chars[1], |
1233 on_failure)) { | 1550 on_failure)) { |
1234 ok.Unuse(); | 1551 ok.Unuse(); |
1235 } else { | 1552 } else { |
(...skipping 16 matching lines...) Expand all Loading... | |
1252 UNREACHABLE(); | 1569 UNREACHABLE(); |
1253 break; | 1570 break; |
1254 } | 1571 } |
1255 } | 1572 } |
1256 } | 1573 } |
1257 | 1574 |
1258 | 1575 |
1259 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, | 1576 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, |
1260 RegExpCharacterClass* cc, | 1577 RegExpCharacterClass* cc, |
1261 int cp_offset, | 1578 int cp_offset, |
1262 Label* on_failure) { | 1579 Label* on_failure, |
1263 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure); | 1580 bool check_offset) { |
1264 cp_offset++; | |
1265 | |
1266 ZoneList<CharacterRange>* ranges = cc->ranges(); | 1581 ZoneList<CharacterRange>* ranges = cc->ranges(); |
1267 | 1582 |
1268 Label success; | 1583 Label success; |
1269 | 1584 |
1270 Label* char_is_in_class = | 1585 Label* char_is_in_class = |
1271 cc->is_negated() ? on_failure : &success; | 1586 cc->is_negated() ? on_failure : &success; |
1272 | 1587 |
1273 int range_count = ranges->length(); | 1588 int range_count = ranges->length(); |
1274 | 1589 |
1275 if (range_count == 0) { | 1590 if (range_count == 0) { |
1276 if (!cc->is_negated()) { | 1591 if (!cc->is_negated()) { |
1277 macro_assembler->GoTo(on_failure); | 1592 macro_assembler->GoTo(on_failure); |
1278 } | 1593 } |
1279 return; | 1594 return; |
1280 } | 1595 } |
1281 | 1596 |
1597 if (range_count == 1 && | |
1598 !cc->is_negated() && | |
1599 ranges->at(0).from() == 0 && | |
Christian Plesner Hansen
2008/12/08 08:54:19
Maybe add an IsEverything method on CharacterRange
| |
1600 ranges->at(0).to() == 0xffff) { | |
1601 // This is a common case hit by non-anchored expressions. | |
1602 // TODO(erikcorry): We should have a macro assembler instruction that just | |
1603 // checks for end of string without loading the character. | |
1604 if (check_offset) { | |
1605 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure); | |
1606 } | |
1607 return; | |
1608 } | |
1609 | |
1610 if (check_offset) { | |
1611 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure); | |
1612 } else { | |
1613 // Here we don't need to check against the end of the input string | |
1614 // since this character lies before a character that matched. | |
1615 macro_assembler->LoadCurrentCharacterUnchecked(cp_offset); | |
1616 } | |
1617 | |
1282 for (int i = 0; i < range_count - 1; i++) { | 1618 for (int i = 0; i < range_count - 1; i++) { |
1283 CharacterRange& range = ranges->at(i); | 1619 CharacterRange& range = ranges->at(i); |
1284 Label next_range; | 1620 Label next_range; |
1285 uc16 from = range.from(); | 1621 uc16 from = range.from(); |
1286 uc16 to = range.to(); | 1622 uc16 to = range.to(); |
1287 if (to == from) { | 1623 if (to == from) { |
1288 macro_assembler->CheckCharacter(to, char_is_in_class); | 1624 macro_assembler->CheckCharacter(to, char_is_in_class); |
1289 } else { | 1625 } else { |
1290 if (from != 0) { | 1626 if (from != 0) { |
1291 macro_assembler->CheckCharacterLT(from, &next_range); | 1627 macro_assembler->CheckCharacterLT(from, &next_range); |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1326 } else { | 1662 } else { |
1327 if (cc->is_negated()) { | 1663 if (cc->is_negated()) { |
1328 macro_assembler->GoTo(on_failure); | 1664 macro_assembler->GoTo(on_failure); |
1329 } | 1665 } |
1330 } | 1666 } |
1331 } | 1667 } |
1332 macro_assembler->Bind(&success); | 1668 macro_assembler->Bind(&success); |
1333 } | 1669 } |
1334 | 1670 |
1335 | 1671 |
1672 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, | |
1673 GenerationVariant* variant) { | |
1674 // TODO(erikcorry): Implement support. | |
1675 if (info_.follows_word_interest || | |
1676 info_.follows_newline_interest || | |
1677 info_.follows_start_interest) { | |
1678 return FAIL; | |
1679 } | |
1336 | 1680 |
1337 bool TextNode::Emit(RegExpCompiler* compiler) { | 1681 // If we are generating a greedy loop then don't stop and don't reuse code. |
1682 if (variant->stop_node() != NULL) { | |
1683 return CONTINUE; | |
1684 } | |
1685 | |
1338 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 1686 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
1339 Bind(macro_assembler); | 1687 if (variant->is_trivial()) { |
1688 if (label_.is_bound()) { | |
1689 // We are being asked to generate a generic version, but that's already | |
1690 // been done so just go to it. | |
1691 macro_assembler->GoTo(&label_); | |
1692 return DONE; | |
1693 } | |
1694 if (compiler->recursion_depth() >= RegExpCompiler::kMaxRecursion) { | |
1695 // To avoid too deep recursion we push the node to the work queue and just | |
1696 // generate a goto here. | |
1697 compiler->AddWork(this); | |
1698 macro_assembler->GoTo(&label_); | |
1699 return DONE; | |
1700 } | |
1701 // Generate generic version of the node and bind the label for later use. | |
1702 macro_assembler->Bind(&label_); | |
1703 return CONTINUE; | |
1704 } | |
1705 | |
1706 // We are being asked to make a non-generic version. Keep track of how many | |
1707 // non-generic versions we generate so as not to overdo it. | |
1708 variants_generated_++; | |
1709 if (variants_generated_ < kMaxVariantsGenerated && | |
1710 compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) { | |
1711 return CONTINUE; | |
1712 } | |
1713 | |
1714 // If we get here there have been too many variants generated or recursion | |
1715 // is too deep. Time to switch to a generic version. The code for | |
1716 // generic versions above can handle deep recursion properly. | |
1717 bool ok = variant->Flush(compiler, this); | |
1718 return ok ? DONE : FAIL; | |
1719 } | |
1720 | |
1721 | |
1722 // This generates the code to match a text node. A text node can contain | |
1723 // straight character sequences (possibly to be matched in a case-independent | |
1724 // way) and character classes. In order to be most efficient we test for the | |
1725 // simple things first and then move on to the more complicated things. The | |
1726 // simplest thing is a non-letter or a letter if we are matching case. The | |
1727 // next-most simple thing is a case-independent letter. The least simple is | |
1728 // a character class. Another optimization is that we test the last one first. | |
1729 // If that succeeds we don't need to test for the end of the string when we | |
1730 // load other characters. | |
1731 bool TextNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) { | |
1732 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | |
1733 Label *backtrack = variant->backtrack(); | |
1734 LimitResult limit_result = LimitVersions(compiler, variant); | |
1735 if (limit_result == FAIL) return false; | |
1736 if (limit_result == DONE) return true; | |
1737 ASSERT(limit_result == CONTINUE); | |
1738 | |
1340 int element_count = elms_->length(); | 1739 int element_count = elms_->length(); |
1341 ASSERT(element_count != 0); | 1740 ASSERT(element_count != 0); |
1342 int cp_offset = 0; | |
1343 if (info()->at_end) { | 1741 if (info()->at_end) { |
1344 macro_assembler->Backtrack(); | 1742 macro_assembler->GoTo(backtrack); |
1345 return true; | 1743 return true; |
1346 } | 1744 } |
1347 // First, handle straight character matches. | 1745 // First, handle straight character matches. |
1348 for (int i = 0; i < element_count; i++) { | 1746 int checked_up_to = -1; |
1747 for (int i = element_count - 1; i >= 0; i--) { | |
1349 TextElement elm = elms_->at(i); | 1748 TextElement elm = elms_->at(i); |
1749 ASSERT(elm.cp_offset >= 0); | |
1750 int cp_offset = variant->cp_offset() + elm.cp_offset; | |
1350 if (elm.type == TextElement::ATOM) { | 1751 if (elm.type == TextElement::ATOM) { |
1351 Vector<const uc16> quarks = elm.data.u_atom->data(); | 1752 Vector<const uc16> quarks = elm.data.u_atom->data(); |
1753 int last_cp_offset = cp_offset + quarks.length(); | |
1352 if (compiler->ignore_case()) { | 1754 if (compiler->ignore_case()) { |
1353 EmitAtomNonLetters(macro_assembler, | 1755 EmitAtomNonLetters(macro_assembler, |
1354 elm, | 1756 elm, |
1355 quarks, | 1757 quarks, |
1356 on_failure_->label(), | 1758 backtrack, |
1357 cp_offset); | 1759 cp_offset, |
1760 checked_up_to < last_cp_offset); | |
1358 } else { | 1761 } else { |
1359 macro_assembler->CheckCharacters(quarks, | 1762 macro_assembler->CheckCharacters(quarks, |
1360 cp_offset, | 1763 cp_offset, |
1361 on_failure_->label()); | 1764 backtrack, |
1765 checked_up_to < last_cp_offset); | |
1362 } | 1766 } |
1363 cp_offset += quarks.length(); | 1767 if (last_cp_offset > checked_up_to) checked_up_to = last_cp_offset - 1; |
1364 } else { | 1768 } else { |
1365 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); | 1769 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); |
1366 cp_offset++; | |
1367 } | 1770 } |
1368 } | 1771 } |
1369 // Second, handle case independent letter matches if any. | 1772 // Second, handle case independent letter matches if any. |
1370 if (compiler->ignore_case()) { | 1773 if (compiler->ignore_case()) { |
1371 cp_offset = 0; | 1774 for (int i = element_count - 1; i >= 0; i--) { |
1372 for (int i = 0; i < element_count; i++) { | |
1373 TextElement elm = elms_->at(i); | 1775 TextElement elm = elms_->at(i); |
1776 int cp_offset = variant->cp_offset() + elm.cp_offset; | |
1374 if (elm.type == TextElement::ATOM) { | 1777 if (elm.type == TextElement::ATOM) { |
1375 Vector<const uc16> quarks = elm.data.u_atom->data(); | 1778 Vector<const uc16> quarks = elm.data.u_atom->data(); |
1779 int last_cp_offset = cp_offset + quarks.length(); | |
1376 EmitAtomLetters(macro_assembler, | 1780 EmitAtomLetters(macro_assembler, |
1377 elm, | 1781 elm, |
1378 quarks, | 1782 quarks, |
1379 on_failure_->label(), | 1783 backtrack, |
1380 cp_offset); | 1784 cp_offset, |
1381 cp_offset += quarks.length(); | 1785 checked_up_to < last_cp_offset); |
1382 } else { | 1786 if (last_cp_offset > checked_up_to) checked_up_to = last_cp_offset - 1; |
1383 cp_offset++; | |
1384 } | 1787 } |
1385 } | 1788 } |
1386 } | 1789 } |
1387 // If the fast character matches passed then do the character classes. | 1790 // If the fast character matches passed then do the character classes. |
1388 cp_offset = 0; | 1791 for (int i = element_count - 1; i >= 0; i--) { |
1389 for (int i = 0; i < element_count; i++) { | |
1390 TextElement elm = elms_->at(i); | 1792 TextElement elm = elms_->at(i); |
1793 int cp_offset = variant->cp_offset() + elm.cp_offset; | |
1391 if (elm.type == TextElement::CHAR_CLASS) { | 1794 if (elm.type == TextElement::CHAR_CLASS) { |
1392 RegExpCharacterClass* cc = elm.data.u_char_class; | 1795 RegExpCharacterClass* cc = elm.data.u_char_class; |
1393 EmitCharClass(macro_assembler, cc, cp_offset, on_failure_->label()); | 1796 EmitCharClass(macro_assembler, |
1394 cp_offset++; | 1797 cc, |
1395 } else { | 1798 cp_offset, |
1396 cp_offset += elm.data.u_atom->data().length(); | 1799 backtrack, |
1800 checked_up_to < cp_offset); | |
1801 if (cp_offset > checked_up_to) checked_up_to = cp_offset; | |
1397 } | 1802 } |
1398 } | 1803 } |
1399 | 1804 |
1400 compiler->AddWork(on_failure_); | 1805 GenerationVariant new_variant(*variant); |
1401 macro_assembler->AdvanceCurrentPosition(cp_offset); | 1806 new_variant.set_cp_offset(checked_up_to + 1); |
1402 return on_success()->GoTo(compiler); | 1807 RecursionCheck rc(compiler); |
1808 return on_success()->Emit(compiler, &new_variant); | |
1403 } | 1809 } |
1404 | 1810 |
1405 | 1811 |
1406 void TextNode::MakeCaseIndependent() { | 1812 void TextNode::MakeCaseIndependent() { |
1407 int element_count = elms_->length(); | 1813 int element_count = elms_->length(); |
1408 for (int i = 0; i < element_count; i++) { | 1814 for (int i = 0; i < element_count; i++) { |
1409 TextElement elm = elms_->at(i); | 1815 TextElement elm = elms_->at(i); |
1410 if (elm.type == TextElement::CHAR_CLASS) { | 1816 if (elm.type == TextElement::CHAR_CLASS) { |
1411 RegExpCharacterClass* cc = elm.data.u_char_class; | 1817 RegExpCharacterClass* cc = elm.data.u_char_class; |
1412 ZoneList<CharacterRange>* ranges = cc->ranges(); | 1818 ZoneList<CharacterRange>* ranges = cc->ranges(); |
1413 int range_count = ranges->length(); | 1819 int range_count = ranges->length(); |
1414 for (int i = 0; i < range_count; i++) { | 1820 for (int i = 0; i < range_count; i++) { |
1415 ranges->at(i).AddCaseEquivalents(ranges); | 1821 ranges->at(i).AddCaseEquivalents(ranges); |
1416 } | 1822 } |
1417 } | 1823 } |
1418 } | 1824 } |
1419 } | 1825 } |
1420 | 1826 |
1421 | 1827 |
1422 bool ChoiceNode::Emit(RegExpCompiler* compiler) { | 1828 int TextNode::GreedyLoopTextLength() { |
1829 TextElement elm = elms_->at(elms_->length() - 1); | |
1830 if (elm.type == TextElement::CHAR_CLASS) { | |
1831 return elm.cp_offset + 1; | |
1832 } else { | |
1833 return elm.cp_offset + elm.data.u_atom->data().length(); | |
1834 } | |
1835 } | |
1836 | |
1837 | |
1838 // Finds the fixed match length of a sequence of nodes that goes from | |
1839 // this alternative and back to this choice node. If there are variable | |
1840 // length nodes or other complications in the way then return a sentinel | |
1841 // value indicating that a greedy loop cannot be constructed. | |
1842 int ChoiceNode::GreedyLoopTextLength(GuardedAlternative* alternative) { | |
1843 int length = 0; | |
1844 RegExpNode* node = alternative->node(); | |
1845 // Later we will generate code for all these text nodes using recursion | |
1846 // so we have to limit the max number. | |
1847 int recursion_depth = 0; | |
1848 while (node != this) { | |
1849 if (recursion_depth++ > RegExpCompiler::kMaxRecursion) { | |
1850 return kNodeIsTooComplexForGreedyLoops; | |
1851 } | |
1852 NodeInfo* info = node->info(); | |
1853 if (info->follows_word_interest || | |
1854 info->follows_newline_interest || | |
1855 info->follows_start_interest) { | |
1856 return kNodeIsTooComplexForGreedyLoops; | |
1857 } | |
1858 int node_length = node->GreedyLoopTextLength(); | |
1859 if (node_length == kNodeIsTooComplexForGreedyLoops) { | |
1860 return kNodeIsTooComplexForGreedyLoops; | |
1861 } | |
1862 length += node_length; | |
1863 SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node); | |
1864 node = seq_node->on_success(); | |
1865 } | |
1866 return length; | |
1867 } | |
1868 | |
1869 | |
1870 bool LoopChoiceNode::Emit(RegExpCompiler* compiler, | |
1871 GenerationVariant* variant) { | |
1872 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | |
1873 if (variant->stop_node() == this) { | |
1874 int text_length = GreedyLoopTextLength(&(alternatives_->at(0))); | |
1875 ASSERT(text_length != kNodeIsTooComplexForGreedyLoops); | |
1876 // Update the counter-based backtracking info on the stack. This is an | |
1877 // optimization for greedy loops (see below). | |
1878 ASSERT(variant->cp_offset() == text_length); | |
1879 macro_assembler->AdvanceCurrentPosition(text_length); | |
1880 macro_assembler->GoTo(variant->loop_label()); | |
1881 return true; | |
1882 } | |
1883 ASSERT(variant->stop_node() == NULL); | |
1884 if (!variant->is_trivial()) { | |
1885 return variant->Flush(compiler, this); | |
1886 } | |
1887 return ChoiceNode::Emit(compiler, variant); | |
1888 } | |
1889 | |
1890 | |
1891 bool ChoiceNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) { | |
1892 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | |
1423 int choice_count = alternatives_->length(); | 1893 int choice_count = alternatives_->length(); |
1424 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 1894 #ifdef DEBUG |
1425 Bind(macro_assembler); | 1895 for (int i = 0; i < choice_count - 1; i++) { |
1896 GuardedAlternative alternative = alternatives_->at(i); | |
1897 ZoneList<Guard*>* guards = alternative.guards(); | |
1898 int guard_count = (guards == NULL) ? 0 : guards->length(); | |
1899 for (int j = 0; j < guard_count; j++) { | |
1900 ASSERT(!variant->mentions_reg(guards->at(j)->reg())); | |
1901 } | |
1902 } | |
1903 #endif | |
1904 | |
1905 LimitResult limit_result = LimitVersions(compiler, variant); | |
1906 if (limit_result == DONE) return true; | |
1907 if (limit_result == FAIL) return false; | |
1908 ASSERT(limit_result == CONTINUE); | |
1909 | |
1910 RecursionCheck rc(compiler); | |
1911 | |
1912 GenerationVariant* current_variant = variant; | |
1913 | |
1914 int text_length = GreedyLoopTextLength(&(alternatives_->at(0))); | |
1915 bool greedy_loop = false; | |
1916 Label greedy_loop_label; | |
1917 GenerationVariant counter_backtrack_variant(&greedy_loop_label); | |
1918 if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) { | |
1919 // Here we have special handling for greedy loops containing only text nodes | |
1920 // and other simple nodes. These are handled by pushing the current | |
1921 // position on the stack and then incrementing the current position each | |
1922 // time around the switch. On backtrack we decrement the current position | |
1923 // and check it against the pushed value. This avoids pushing backtrack | |
1924 // information for each iteration of the loop, which could take up a lot of | |
1925 // space. | |
1926 greedy_loop = true; | |
1927 ASSERT(variant->stop_node() == NULL); | |
1928 macro_assembler->PushCurrentPosition(); | |
1929 current_variant = &counter_backtrack_variant; | |
1930 Label greedy_match_failed; | |
1931 GenerationVariant greedy_match_variant(&greedy_match_failed); | |
1932 Label loop_label; | |
1933 macro_assembler->Bind(&loop_label); | |
1934 greedy_match_variant.set_stop_node(this); | |
1935 greedy_match_variant.set_loop_label(&loop_label); | |
1936 bool ok = alternatives_->at(0).node()->Emit(compiler, | |
1937 &greedy_match_variant); | |
1938 macro_assembler->Bind(&greedy_match_failed); | |
1939 if (!ok) { | |
1940 greedy_loop_label.Unuse(); | |
1941 return false; | |
1942 } | |
1943 } | |
1944 | |
1945 Label second_choice; // For use in greedy matches. | |
1946 macro_assembler->Bind(&second_choice); | |
1947 | |
1426 // For now we just call all choices one after the other. The idea ultimately | 1948 // For now we just call all choices one after the other. The idea ultimately |
1427 // is to use the Dispatch table to try only the relevant ones. | 1949 // is to use the Dispatch table to try only the relevant ones. |
1428 for (int i = 0; i < choice_count - 1; i++) { | 1950 for (int i = greedy_loop ? 1 : 0; i < choice_count - 1; i++) { |
1429 GuardedAlternative alternative = alternatives_->at(i); | 1951 GuardedAlternative alternative = alternatives_->at(i); |
1430 Label after; | 1952 Label after; |
1431 Label after_no_pop_cp; | |
1432 ZoneList<Guard*>* guards = alternative.guards(); | 1953 ZoneList<Guard*>* guards = alternative.guards(); |
1433 if (guards != NULL) { | 1954 int guard_count = (guards == NULL) ? 0 : guards->length(); |
1434 int guard_count = guards->length(); | 1955 GenerationVariant new_variant(*current_variant); |
1435 for (int j = 0; j < guard_count; j++) { | 1956 new_variant.set_backtrack(&after); |
1436 GenerateGuard(macro_assembler, guards->at(j), &after_no_pop_cp); | 1957 for (int j = 0; j < guard_count; j++) { |
1437 } | 1958 GenerateGuard(macro_assembler, guards->at(j), &new_variant); |
1438 } | 1959 } |
1439 macro_assembler->PushCurrentPosition(); | 1960 if (!alternative.node()->Emit(compiler, &new_variant)) { |
1440 macro_assembler->PushBacktrack(&after); | |
1441 if (!alternative.node()->GoTo(compiler)) { | |
1442 after.Unuse(); | 1961 after.Unuse(); |
1443 after_no_pop_cp.Unuse(); | |
1444 return false; | 1962 return false; |
1445 } | 1963 } |
1446 macro_assembler->Bind(&after); | 1964 macro_assembler->Bind(&after); |
1447 macro_assembler->PopCurrentPosition(); | |
1448 macro_assembler->Bind(&after_no_pop_cp); | |
1449 } | 1965 } |
1450 GuardedAlternative alternative = alternatives_->at(choice_count - 1); | 1966 GuardedAlternative alternative = alternatives_->at(choice_count - 1); |
1451 ZoneList<Guard*>* guards = alternative.guards(); | 1967 ZoneList<Guard*>* guards = alternative.guards(); |
1452 if (guards != NULL) { | 1968 int guard_count = (guards == NULL) ? 0 : guards->length(); |
1453 int guard_count = guards->length(); | 1969 for (int j = 0; j < guard_count; j++) { |
1454 for (int j = 0; j < guard_count; j++) { | 1970 GenerateGuard(macro_assembler, guards->at(j), current_variant); |
1455 GenerateGuard(macro_assembler, guards->at(j), on_failure_->label()); | 1971 } |
1456 } | 1972 bool ok = alternative.node()->Emit(compiler, current_variant); |
1457 } | 1973 if (!ok) return false; |
1458 if (!on_failure_->IsBacktrack()) { | 1974 if (greedy_loop) { |
1459 ASSERT_NOT_NULL(on_failure_ -> label()); | 1975 macro_assembler->Bind(&greedy_loop_label); |
1460 macro_assembler->PushBacktrack(on_failure_->label()); | 1976 // If we have unwound to the bottom then backtrack. |
1461 compiler->AddWork(on_failure_); | 1977 macro_assembler->CheckGreedyLoop(variant->backtrack()); |
1462 } | 1978 // Otherwise try the second priority at an earlier position. |
1463 if (!alternative.node()->GoTo(compiler)) { | 1979 macro_assembler->AdvanceCurrentPosition(-text_length); |
1464 return false; | 1980 macro_assembler->GoTo(&second_choice); |
1465 } | 1981 } |
1466 return true; | 1982 return true; |
1467 } | 1983 } |
1468 | 1984 |
1469 | 1985 |
1470 bool ActionNode::Emit(RegExpCompiler* compiler) { | 1986 bool ActionNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) { |
1471 RegExpMacroAssembler* macro = compiler->macro_assembler(); | 1987 RegExpMacroAssembler* macro = compiler->macro_assembler(); |
1472 Bind(macro); | 1988 LimitResult limit_result = LimitVersions(compiler, variant); |
1989 if (limit_result == DONE) return true; | |
1990 if (limit_result == FAIL) return false; | |
1991 ASSERT(limit_result == CONTINUE); | |
1992 | |
1993 RecursionCheck rc(compiler); | |
1994 | |
1473 switch (type_) { | 1995 switch (type_) { |
1474 case STORE_REGISTER: | 1996 case STORE_POSITION: { |
1475 macro->SetRegister(data_.u_store_register.reg, | 1997 GenerationVariant::DeferredCapture |
1476 data_.u_store_register.value); | 1998 new_capture(data_.u_position_register.reg, variant); |
1477 break; | 1999 GenerationVariant new_variant = *variant; |
2000 new_variant.add_action(&new_capture); | |
2001 return on_success()->Emit(compiler, &new_variant); | |
2002 } | |
1478 case INCREMENT_REGISTER: { | 2003 case INCREMENT_REGISTER: { |
1479 Label undo; | 2004 GenerationVariant::DeferredIncrementRegister |
1480 macro->PushBacktrack(&undo); | 2005 new_increment(data_.u_increment_register.reg); |
1481 macro->AdvanceRegister(data_.u_increment_register.reg, 1); | 2006 GenerationVariant new_variant = *variant; |
1482 bool ok = on_success()->GoTo(compiler); | 2007 new_variant.add_action(&new_increment); |
1483 if (!ok) { | 2008 return on_success()->Emit(compiler, &new_variant); |
1484 undo.Unuse(); | 2009 } |
2010 case SET_REGISTER: { | |
2011 GenerationVariant::DeferredSetRegister | |
2012 new_set(data_.u_store_register.reg, data_.u_store_register.value); | |
2013 GenerationVariant new_variant = *variant; | |
2014 new_variant.add_action(&new_set); | |
2015 return on_success()->Emit(compiler, &new_variant); | |
2016 } | |
2017 case BEGIN_SUBMATCH: | |
2018 if (!variant->is_trivial()) return variant->Flush(compiler, this); | |
2019 macro->WriteCurrentPositionToRegister( | |
2020 data_.u_submatch.current_position_register, 0); | |
2021 macro->WriteStackPointerToRegister( | |
2022 data_.u_submatch.stack_pointer_register); | |
2023 return on_success()->Emit(compiler, variant); | |
2024 case POSITIVE_SUBMATCH_SUCCESS: | |
2025 if (!variant->is_trivial()) return variant->Flush(compiler, this); | |
2026 // TODO(erikcorry): Implement support. | |
2027 if (info()->follows_word_interest || | |
2028 info()->follows_newline_interest || | |
2029 info()->follows_start_interest) { | |
1485 return false; | 2030 return false; |
1486 } | 2031 } |
1487 macro->Bind(&undo); | |
1488 macro->AdvanceRegister(data_.u_increment_register.reg, -1); | |
1489 macro->Backtrack(); | |
1490 break; | |
1491 } | |
1492 case STORE_POSITION: { | |
1493 Label undo; | |
1494 macro->PushRegister(data_.u_position_register.reg); | |
1495 macro->PushBacktrack(&undo); | |
1496 macro->WriteCurrentPositionToRegister(data_.u_position_register.reg); | |
1497 bool ok = on_success()->GoTo(compiler); | |
1498 if (!ok) { | |
1499 undo.Unuse(); | |
1500 return false; | |
1501 } | |
1502 macro->Bind(&undo); | |
1503 macro->PopRegister(data_.u_position_register.reg); | |
1504 macro->Backtrack(); | |
1505 break; | |
1506 } | |
1507 case RESTORE_POSITION: | |
1508 macro->ReadCurrentPositionFromRegister( | |
1509 data_.u_position_register.reg); | |
1510 break; | |
1511 case BEGIN_SUBMATCH: | |
1512 macro->WriteCurrentPositionToRegister( | |
1513 data_.u_submatch.current_position_register); | |
1514 macro->WriteStackPointerToRegister( | |
1515 data_.u_submatch.stack_pointer_register); | |
1516 break; | |
1517 case ESCAPE_SUBMATCH: | |
1518 if (info()->at_end) { | 2032 if (info()->at_end) { |
1519 Label at_end; | 2033 Label at_end; |
1520 // Load current character jumps to the label if we are beyond the string | 2034 // Load current character jumps to the label if we are beyond the string |
1521 // end. | 2035 // end. |
1522 macro->LoadCurrentCharacter(0, &at_end); | 2036 macro->LoadCurrentCharacter(0, &at_end); |
1523 macro->Backtrack(); | 2037 macro->GoTo(variant->backtrack()); |
1524 macro->Bind(&at_end); | 2038 macro->Bind(&at_end); |
1525 } | 2039 } |
1526 if (data_.u_submatch.current_position_register != -1) { | 2040 macro->ReadCurrentPositionFromRegister( |
1527 macro->ReadCurrentPositionFromRegister( | 2041 data_.u_submatch.current_position_register); |
1528 data_.u_submatch.current_position_register); | |
1529 } | |
1530 macro->ReadStackPointerFromRegister( | 2042 macro->ReadStackPointerFromRegister( |
1531 data_.u_submatch.stack_pointer_register); | 2043 data_.u_submatch.stack_pointer_register); |
1532 break; | 2044 return on_success()->Emit(compiler, variant); |
1533 default: | 2045 default: |
1534 UNREACHABLE(); | 2046 UNREACHABLE(); |
1535 return false; | 2047 return false; |
1536 } | 2048 } |
1537 return on_success()->GoTo(compiler); | 2049 } |
1538 } | 2050 |
1539 | 2051 |
1540 | 2052 bool BackReferenceNode::Emit(RegExpCompiler* compiler, |
1541 bool BackReferenceNode::Emit(RegExpCompiler* compiler) { | 2053 GenerationVariant* variant) { |
1542 RegExpMacroAssembler* macro = compiler->macro_assembler(); | 2054 RegExpMacroAssembler* macro = compiler->macro_assembler(); |
1543 Bind(macro); | 2055 if (!variant->is_trivial()) { |
2056 return variant->Flush(compiler, this); | |
2057 } | |
2058 | |
2059 LimitResult limit_result = LimitVersions(compiler, variant); | |
2060 if (limit_result == DONE) return true; | |
2061 if (limit_result == FAIL) return false; | |
2062 ASSERT(limit_result == CONTINUE); | |
2063 | |
2064 RecursionCheck rc(compiler); | |
2065 | |
1544 ASSERT_EQ(start_reg_ + 1, end_reg_); | 2066 ASSERT_EQ(start_reg_ + 1, end_reg_); |
1545 if (info()->at_end) { | 2067 if (info()->at_end) { |
1546 // If we are constrained to match at the end of the input then succeed | 2068 // If we are constrained to match at the end of the input then succeed |
1547 // iff the back reference is empty. | 2069 // iff the back reference is empty. |
1548 macro->CheckNotRegistersEqual(start_reg_, end_reg_, on_failure_->label()); | 2070 macro->CheckNotRegistersEqual(start_reg_, end_reg_, variant->backtrack()); |
1549 } else { | 2071 } else { |
1550 if (compiler->ignore_case()) { | 2072 if (compiler->ignore_case()) { |
1551 macro->CheckNotBackReferenceIgnoreCase(start_reg_, on_failure_->label()); | 2073 macro->CheckNotBackReferenceIgnoreCase(start_reg_, variant->backtrack()); |
1552 } else { | 2074 } else { |
1553 macro->CheckNotBackReference(start_reg_, on_failure_->label()); | 2075 macro->CheckNotBackReference(start_reg_, variant->backtrack()); |
1554 } | 2076 } |
1555 } | 2077 } |
1556 return on_success()->GoTo(compiler); | 2078 return on_success()->Emit(compiler, variant); |
1557 } | 2079 } |
1558 | 2080 |
1559 | 2081 |
1560 // ------------------------------------------------------------------- | 2082 // ------------------------------------------------------------------- |
1561 // Dot/dotty output | 2083 // Dot/dotty output |
1562 | 2084 |
1563 | 2085 |
1564 #ifdef DEBUG | 2086 #ifdef DEBUG |
1565 | 2087 |
1566 | 2088 |
1567 class DotPrinter: public NodeVisitor { | 2089 class DotPrinter: public NodeVisitor { |
1568 public: | 2090 public: |
1569 explicit DotPrinter(bool ignore_case) | 2091 explicit DotPrinter(bool ignore_case) |
1570 : ignore_case_(ignore_case), | 2092 : ignore_case_(ignore_case), |
1571 stream_(&alloc_) { } | 2093 stream_(&alloc_) { } |
1572 void PrintNode(const char* label, RegExpNode* node); | 2094 void PrintNode(const char* label, RegExpNode* node); |
1573 void Visit(RegExpNode* node); | 2095 void Visit(RegExpNode* node); |
1574 void PrintOnFailure(RegExpNode* from, RegExpNode* on_failure); | |
1575 void PrintAttributes(RegExpNode* from); | 2096 void PrintAttributes(RegExpNode* from); |
1576 StringStream* stream() { return &stream_; } | 2097 StringStream* stream() { return &stream_; } |
2098 void PrintOnFailure(RegExpNode* from, RegExpNode* to); | |
1577 #define DECLARE_VISIT(Type) \ | 2099 #define DECLARE_VISIT(Type) \ |
1578 virtual void Visit##Type(Type##Node* that); | 2100 virtual void Visit##Type(Type##Node* that); |
1579 FOR_EACH_NODE_TYPE(DECLARE_VISIT) | 2101 FOR_EACH_NODE_TYPE(DECLARE_VISIT) |
1580 #undef DECLARE_VISIT | 2102 #undef DECLARE_VISIT |
1581 private: | 2103 private: |
1582 bool ignore_case_; | 2104 bool ignore_case_; |
1583 HeapStringAllocator alloc_; | 2105 HeapStringAllocator alloc_; |
1584 StringStream stream_; | 2106 StringStream stream_; |
1585 }; | 2107 }; |
1586 | 2108 |
(...skipping 21 matching lines...) Expand all Loading... | |
1608 | 2130 |
1609 | 2131 |
1610 void DotPrinter::Visit(RegExpNode* node) { | 2132 void DotPrinter::Visit(RegExpNode* node) { |
1611 if (node->info()->visited) return; | 2133 if (node->info()->visited) return; |
1612 node->info()->visited = true; | 2134 node->info()->visited = true; |
1613 node->Accept(this); | 2135 node->Accept(this); |
1614 } | 2136 } |
1615 | 2137 |
1616 | 2138 |
1617 void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) { | 2139 void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) { |
1618 if (on_failure->IsBacktrack()) return; | |
1619 stream()->Add(" n%p -> n%p [style=dotted];\n", from, on_failure); | 2140 stream()->Add(" n%p -> n%p [style=dotted];\n", from, on_failure); |
1620 Visit(on_failure); | 2141 Visit(on_failure); |
1621 } | 2142 } |
1622 | 2143 |
1623 | 2144 |
1624 class TableEntryBodyPrinter { | 2145 class TableEntryBodyPrinter { |
1625 public: | 2146 public: |
1626 TableEntryBodyPrinter(StringStream* stream, ChoiceNode* choice) | 2147 TableEntryBodyPrinter(StringStream* stream, ChoiceNode* choice) |
1627 : stream_(stream), choice_(choice) { } | 2148 : stream_(stream), choice_(choice) { } |
1628 void Call(uc16 from, DispatchTable::Entry entry) { | 2149 void Call(uc16 from, DispatchTable::Entry entry) { |
(...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1733 static const bool kPrintDispatchTable = false; | 2254 static const bool kPrintDispatchTable = false; |
1734 void DotPrinter::VisitChoice(ChoiceNode* that) { | 2255 void DotPrinter::VisitChoice(ChoiceNode* that) { |
1735 if (kPrintDispatchTable) { | 2256 if (kPrintDispatchTable) { |
1736 stream()->Add(" n%p [shape=Mrecord, label=\"", that); | 2257 stream()->Add(" n%p [shape=Mrecord, label=\"", that); |
1737 TableEntryHeaderPrinter header_printer(stream()); | 2258 TableEntryHeaderPrinter header_printer(stream()); |
1738 that->GetTable(ignore_case_)->ForEach(&header_printer); | 2259 that->GetTable(ignore_case_)->ForEach(&header_printer); |
1739 stream()->Add("\"]\n", that); | 2260 stream()->Add("\"]\n", that); |
1740 PrintAttributes(that); | 2261 PrintAttributes(that); |
1741 TableEntryBodyPrinter body_printer(stream(), that); | 2262 TableEntryBodyPrinter body_printer(stream(), that); |
1742 that->GetTable(ignore_case_)->ForEach(&body_printer); | 2263 that->GetTable(ignore_case_)->ForEach(&body_printer); |
1743 PrintOnFailure(that, that->on_failure()); | |
1744 } else { | 2264 } else { |
1745 stream()->Add(" n%p [shape=Mrecord, label=\"?\"];\n", that); | 2265 stream()->Add(" n%p [shape=Mrecord, label=\"?\"];\n", that); |
1746 for (int i = 0; i < that->alternatives()->length(); i++) { | 2266 for (int i = 0; i < that->alternatives()->length(); i++) { |
1747 GuardedAlternative alt = that->alternatives()->at(i); | 2267 GuardedAlternative alt = that->alternatives()->at(i); |
1748 stream()->Add(" n%p -> n%p;\n", that, alt.node()); | 2268 stream()->Add(" n%p -> n%p;\n", that, alt.node()); |
1749 } | 2269 } |
1750 } | 2270 } |
1751 for (int i = 0; i < that->alternatives()->length(); i++) { | 2271 for (int i = 0; i < that->alternatives()->length(); i++) { |
1752 GuardedAlternative alt = that->alternatives()->at(i); | 2272 GuardedAlternative alt = that->alternatives()->at(i); |
1753 alt.node()->Accept(this); | 2273 alt.node()->Accept(this); |
(...skipping 24 matching lines...) Expand all Loading... | |
1778 break; | 2298 break; |
1779 } | 2299 } |
1780 default: | 2300 default: |
1781 UNREACHABLE(); | 2301 UNREACHABLE(); |
1782 } | 2302 } |
1783 } | 2303 } |
1784 stream()->Add("\", shape=box, peripheries=2];\n"); | 2304 stream()->Add("\", shape=box, peripheries=2];\n"); |
1785 PrintAttributes(that); | 2305 PrintAttributes(that); |
1786 stream()->Add(" n%p -> n%p;\n", that, that->on_success()); | 2306 stream()->Add(" n%p -> n%p;\n", that, that->on_success()); |
1787 Visit(that->on_success()); | 2307 Visit(that->on_success()); |
1788 PrintOnFailure(that, that->on_failure()); | |
1789 } | 2308 } |
1790 | 2309 |
1791 | 2310 |
1792 void DotPrinter::VisitBackReference(BackReferenceNode* that) { | 2311 void DotPrinter::VisitBackReference(BackReferenceNode* that) { |
1793 stream()->Add(" n%p [label=\"$%i..$%i\", shape=doubleoctagon];\n", | 2312 stream()->Add(" n%p [label=\"$%i..$%i\", shape=doubleoctagon];\n", |
1794 that, | 2313 that, |
1795 that->start_register(), | 2314 that->start_register(), |
1796 that->end_register()); | 2315 that->end_register()); |
1797 PrintAttributes(that); | 2316 PrintAttributes(that); |
1798 stream()->Add(" n%p -> n%p;\n", that, that->on_success()); | 2317 stream()->Add(" n%p -> n%p;\n", that, that->on_success()); |
1799 Visit(that->on_success()); | 2318 Visit(that->on_success()); |
1800 PrintOnFailure(that, that->on_failure()); | |
1801 } | 2319 } |
1802 | 2320 |
1803 | 2321 |
1804 void DotPrinter::VisitEnd(EndNode* that) { | 2322 void DotPrinter::VisitEnd(EndNode* that) { |
1805 stream()->Add(" n%p [style=bold, shape=point];\n", that); | 2323 stream()->Add(" n%p [style=bold, shape=point];\n", that); |
1806 PrintAttributes(that); | 2324 PrintAttributes(that); |
1807 } | 2325 } |
1808 | 2326 |
1809 | 2327 |
1810 void DotPrinter::VisitAction(ActionNode* that) { | 2328 void DotPrinter::VisitAction(ActionNode* that) { |
1811 stream()->Add(" n%p [", that); | 2329 stream()->Add(" n%p [", that); |
1812 switch (that->type_) { | 2330 switch (that->type_) { |
1813 case ActionNode::STORE_REGISTER: | 2331 case ActionNode::SET_REGISTER: |
1814 stream()->Add("label=\"$%i:=%i\", shape=octagon", | 2332 stream()->Add("label=\"$%i:=%i\", shape=octagon", |
1815 that->data_.u_store_register.reg, | 2333 that->data_.u_store_register.reg, |
1816 that->data_.u_store_register.value); | 2334 that->data_.u_store_register.value); |
1817 break; | 2335 break; |
1818 case ActionNode::INCREMENT_REGISTER: | 2336 case ActionNode::INCREMENT_REGISTER: |
1819 stream()->Add("label=\"$%i++\", shape=octagon", | 2337 stream()->Add("label=\"$%i++\", shape=octagon", |
1820 that->data_.u_increment_register.reg); | 2338 that->data_.u_increment_register.reg); |
1821 break; | 2339 break; |
1822 case ActionNode::STORE_POSITION: | 2340 case ActionNode::STORE_POSITION: |
1823 stream()->Add("label=\"$%i:=$pos\", shape=octagon", | 2341 stream()->Add("label=\"$%i:=$pos\", shape=octagon", |
1824 that->data_.u_position_register.reg); | 2342 that->data_.u_position_register.reg); |
1825 break; | 2343 break; |
1826 case ActionNode::RESTORE_POSITION: | |
1827 stream()->Add("label=\"$pos:=$%i\", shape=octagon", | |
1828 that->data_.u_position_register.reg); | |
1829 break; | |
1830 case ActionNode::BEGIN_SUBMATCH: | 2344 case ActionNode::BEGIN_SUBMATCH: |
1831 stream()->Add("label=\"$%i:=$pos,begin\", shape=septagon", | 2345 stream()->Add("label=\"$%i:=$pos,begin\", shape=septagon", |
1832 that->data_.u_submatch.current_position_register); | 2346 that->data_.u_submatch.current_position_register); |
1833 break; | 2347 break; |
1834 case ActionNode::ESCAPE_SUBMATCH: | 2348 case ActionNode::POSITIVE_SUBMATCH_SUCCESS: |
1835 stream()->Add("label=\"escape\", shape=septagon"); | 2349 stream()->Add("label=\"escape\", shape=septagon"); |
1836 break; | 2350 break; |
1837 } | 2351 } |
1838 stream()->Add("];\n"); | 2352 stream()->Add("];\n"); |
1839 PrintAttributes(that); | 2353 PrintAttributes(that); |
1840 stream()->Add(" n%p -> n%p;\n", that, that->on_success()); | 2354 RegExpNode* successor = that->on_success(); |
1841 Visit(that->on_success()); | 2355 stream()->Add(" n%p -> n%p;\n", that, successor); |
2356 Visit(successor); | |
1842 } | 2357 } |
1843 | 2358 |
1844 | 2359 |
1845 class DispatchTableDumper { | 2360 class DispatchTableDumper { |
1846 public: | 2361 public: |
1847 explicit DispatchTableDumper(StringStream* stream) : stream_(stream) { } | 2362 explicit DispatchTableDumper(StringStream* stream) : stream_(stream) { } |
1848 void Call(uc16 key, DispatchTable::Entry entry); | 2363 void Call(uc16 key, DispatchTable::Entry entry); |
1849 StringStream* stream() { return stream_; } | 2364 StringStream* stream() { return stream_; } |
1850 private: | 2365 private: |
1851 StringStream* stream_; | 2366 StringStream* stream_; |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1888 | 2403 |
1889 | 2404 |
1890 #endif // DEBUG | 2405 #endif // DEBUG |
1891 | 2406 |
1892 | 2407 |
1893 // ------------------------------------------------------------------- | 2408 // ------------------------------------------------------------------- |
1894 // Tree to graph conversion | 2409 // Tree to graph conversion |
1895 | 2410 |
1896 | 2411 |
1897 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, | 2412 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, |
1898 RegExpNode* on_success, | 2413 RegExpNode* on_success) { |
1899 RegExpNode* on_failure) { | |
1900 ZoneList<TextElement>* elms = new ZoneList<TextElement>(1); | 2414 ZoneList<TextElement>* elms = new ZoneList<TextElement>(1); |
1901 elms->Add(TextElement::Atom(this)); | 2415 elms->Add(TextElement::Atom(this)); |
1902 return new TextNode(elms, on_success, on_failure); | 2416 return new TextNode(elms, on_success); |
1903 } | 2417 } |
1904 | 2418 |
1905 | 2419 |
1906 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, | 2420 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, |
1907 RegExpNode* on_success, | 2421 RegExpNode* on_success) { |
1908 RegExpNode* on_failure) { | 2422 return new TextNode(elements(), on_success); |
1909 return new TextNode(elements(), on_success, on_failure); | |
1910 } | 2423 } |
1911 | 2424 |
1912 | 2425 |
1913 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, | 2426 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, |
1914 RegExpNode* on_success, | 2427 RegExpNode* on_success) { |
1915 RegExpNode* on_failure) { | |
1916 ZoneList<TextElement>* elms = new ZoneList<TextElement>(1); | 2428 ZoneList<TextElement>* elms = new ZoneList<TextElement>(1); |
1917 elms->Add(TextElement::CharClass(this)); | 2429 elms->Add(TextElement::CharClass(this)); |
1918 return new TextNode(elms, on_success, on_failure); | 2430 return new TextNode(elms, on_success); |
1919 } | 2431 } |
1920 | 2432 |
1921 | 2433 |
1922 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, | 2434 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, |
1923 RegExpNode* on_success, | 2435 RegExpNode* on_success) { |
1924 RegExpNode* on_failure) { | |
1925 ZoneList<RegExpTree*>* alternatives = this->alternatives(); | 2436 ZoneList<RegExpTree*>* alternatives = this->alternatives(); |
1926 int length = alternatives->length(); | 2437 int length = alternatives->length(); |
1927 ChoiceNode* result = new ChoiceNode(length, on_failure); | 2438 ChoiceNode* result = new ChoiceNode(length); |
1928 for (int i = 0; i < length; i++) { | 2439 for (int i = 0; i < length; i++) { |
1929 GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler, | 2440 GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler, |
1930 on_success, | 2441 on_success)); |
1931 on_failure)); | |
1932 result->AddAlternative(alternative); | 2442 result->AddAlternative(alternative); |
1933 } | 2443 } |
1934 return result; | 2444 return result; |
1935 } | 2445 } |
1936 | 2446 |
1937 | 2447 |
1938 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler, | 2448 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler, |
1939 RegExpNode* on_success, | 2449 RegExpNode* on_success) { |
1940 RegExpNode* on_failure) { | |
1941 return ToNode(min(), | 2450 return ToNode(min(), |
1942 max(), | 2451 max(), |
1943 is_greedy(), | 2452 is_greedy(), |
1944 body(), | 2453 body(), |
1945 compiler, | 2454 compiler, |
1946 on_success, | 2455 on_success); |
1947 on_failure); | |
1948 } | 2456 } |
1949 | 2457 |
1950 | 2458 |
1951 RegExpNode* RegExpQuantifier::ToNode(int min, | 2459 RegExpNode* RegExpQuantifier::ToNode(int min, |
1952 int max, | 2460 int max, |
1953 bool is_greedy, | 2461 bool is_greedy, |
1954 RegExpTree* body, | 2462 RegExpTree* body, |
1955 RegExpCompiler* compiler, | 2463 RegExpCompiler* compiler, |
1956 RegExpNode* on_success, | 2464 RegExpNode* on_success) { |
1957 RegExpNode* on_failure) { | |
1958 // x{f, t} becomes this: | 2465 // x{f, t} becomes this: |
1959 // | 2466 // |
1960 // (r++)<-. | 2467 // (r++)<-. |
1961 // | ` | 2468 // | ` |
1962 // | (x) | 2469 // | (x) |
1963 // v ^ | 2470 // v ^ |
1964 // (r=0)-->(?)---/ [if r < t] | 2471 // (r=0)-->(?)---/ [if r < t] |
1965 // | | 2472 // | |
1966 // [if r >= f] \----> ... | 2473 // [if r >= f] \----> ... |
1967 // | 2474 // |
1968 // | 2475 // |
1969 // TODO(someone): clear captures on repetition and handle empty | 2476 // TODO(someone): clear captures on repetition and handle empty |
1970 // matches. | 2477 // matches. |
1971 bool has_min = min > 0; | 2478 bool has_min = min > 0; |
1972 bool has_max = max < RegExpQuantifier::kInfinity; | 2479 bool has_max = max < RegExpQuantifier::kInfinity; |
1973 bool needs_counter = has_min || has_max; | 2480 bool needs_counter = has_min || has_max; |
1974 int reg_ctr = needs_counter ? compiler->AllocateRegister() : -1; | 2481 int reg_ctr = needs_counter ? compiler->AllocateRegister() : -1; |
1975 ChoiceNode* center = new ChoiceNode(2, on_failure); | 2482 ChoiceNode* center = new LoopChoiceNode(2); |
1976 RegExpNode* loop_return = needs_counter | 2483 RegExpNode* loop_return = needs_counter |
1977 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center)) | 2484 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center)) |
1978 : static_cast<RegExpNode*>(center); | 2485 : static_cast<RegExpNode*>(center); |
1979 RegExpNode* body_node = body->ToNode(compiler, loop_return, on_failure); | 2486 RegExpNode* body_node = body->ToNode(compiler, loop_return); |
1980 GuardedAlternative body_alt(body_node); | 2487 GuardedAlternative body_alt(body_node); |
1981 if (has_max) { | 2488 if (has_max) { |
1982 Guard* body_guard = new Guard(reg_ctr, Guard::LT, max); | 2489 Guard* body_guard = new Guard(reg_ctr, Guard::LT, max); |
1983 body_alt.AddGuard(body_guard); | 2490 body_alt.AddGuard(body_guard); |
1984 } | 2491 } |
1985 GuardedAlternative rest_alt(on_success); | 2492 GuardedAlternative rest_alt(on_success); |
1986 if (has_min) { | 2493 if (has_min) { |
1987 Guard* rest_guard = new Guard(reg_ctr, Guard::GEQ, min); | 2494 Guard* rest_guard = new Guard(reg_ctr, Guard::GEQ, min); |
1988 rest_alt.AddGuard(rest_guard); | 2495 rest_alt.AddGuard(rest_guard); |
1989 } | 2496 } |
1990 if (is_greedy) { | 2497 if (is_greedy) { |
1991 center->AddAlternative(body_alt); | 2498 center->AddAlternative(body_alt); |
1992 center->AddAlternative(rest_alt); | 2499 center->AddAlternative(rest_alt); |
1993 } else { | 2500 } else { |
1994 center->AddAlternative(rest_alt); | 2501 center->AddAlternative(rest_alt); |
1995 center->AddAlternative(body_alt); | 2502 center->AddAlternative(body_alt); |
1996 } | 2503 } |
1997 if (needs_counter) { | 2504 if (needs_counter) { |
1998 return ActionNode::StoreRegister(reg_ctr, 0, center); | 2505 return ActionNode::SetRegister(reg_ctr, 0, center); |
1999 } else { | 2506 } else { |
2000 return center; | 2507 return center; |
2001 } | 2508 } |
2002 } | 2509 } |
2003 | 2510 |
2004 | 2511 |
2005 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler, | 2512 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler, |
2006 RegExpNode* on_success, | 2513 RegExpNode* on_success) { |
2007 RegExpNode* on_failure) { | |
2008 NodeInfo info; | 2514 NodeInfo info; |
2009 switch (type()) { | 2515 switch (type()) { |
2010 case START_OF_LINE: | 2516 case START_OF_LINE: |
2011 info.follows_newline_interest = true; | 2517 info.follows_newline_interest = true; |
2012 break; | 2518 break; |
2013 case START_OF_INPUT: | 2519 case START_OF_INPUT: |
2014 info.follows_start_interest = true; | 2520 info.follows_start_interest = true; |
2015 break; | 2521 break; |
2016 case BOUNDARY: case NON_BOUNDARY: | 2522 case BOUNDARY: case NON_BOUNDARY: |
2017 info.follows_word_interest = true; | 2523 info.follows_word_interest = true; |
2018 break; | 2524 break; |
2019 case END_OF_INPUT: | 2525 case END_OF_INPUT: |
2020 info.at_end = true; | 2526 info.at_end = true; |
2021 break; | 2527 break; |
2022 case END_OF_LINE: | 2528 case END_OF_LINE: |
2023 // This is wrong but has the effect of making the compiler abort. | 2529 // This is wrong but has the effect of making the compiler abort. |
2024 info.at_end = true; | 2530 info.at_end = true; |
2025 } | 2531 } |
2026 return on_success->PropagateForward(&info); | 2532 return on_success->PropagateForward(&info); |
2027 } | 2533 } |
2028 | 2534 |
2029 | 2535 |
2030 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, | 2536 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, |
2031 RegExpNode* on_success, | 2537 RegExpNode* on_success) { |
2032 RegExpNode* on_failure) { | |
2033 return new BackReferenceNode(RegExpCapture::StartRegister(index()), | 2538 return new BackReferenceNode(RegExpCapture::StartRegister(index()), |
2034 RegExpCapture::EndRegister(index()), | 2539 RegExpCapture::EndRegister(index()), |
2035 on_success, | 2540 on_success); |
2036 on_failure); | |
2037 } | 2541 } |
2038 | 2542 |
2039 | 2543 |
2040 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, | 2544 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, |
2041 RegExpNode* on_success, | 2545 RegExpNode* on_success) { |
2042 RegExpNode* on_failure) { | |
2043 return on_success; | 2546 return on_success; |
2044 } | 2547 } |
2045 | 2548 |
2046 | 2549 |
2047 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, | 2550 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, |
2048 RegExpNode* on_success, | 2551 RegExpNode* on_success) { |
2049 RegExpNode* on_failure) { | |
2050 int stack_pointer_register = compiler->AllocateRegister(); | 2552 int stack_pointer_register = compiler->AllocateRegister(); |
2051 int position_register = compiler->AllocateRegister(); | 2553 int position_register = compiler->AllocateRegister(); |
2554 RegExpNode* success; | |
2052 if (is_positive()) { | 2555 if (is_positive()) { |
2053 // begin submatch scope | |
2054 // $reg = $pos | |
2055 // if [body] | |
2056 // then | |
2057 // $pos = $reg | |
2058 // escape submatch scope (drop all backtracks created in scope) | |
2059 // succeed | |
2060 // else | |
2061 // end submatch scope (nothing to clean up, just exit the scope) | |
2062 // fail | |
2063 return ActionNode::BeginSubmatch( | 2556 return ActionNode::BeginSubmatch( |
2064 stack_pointer_register, | 2557 stack_pointer_register, |
2065 position_register, | 2558 position_register, |
2066 body()->ToNode( | 2559 body()->ToNode( |
2067 compiler, | 2560 compiler, |
2068 ActionNode::EscapeSubmatch( | 2561 ActionNode::PositiveSubmatchSuccess(stack_pointer_register, |
2069 stack_pointer_register, | 2562 position_register, |
2070 true, // Also restore input position. | 2563 on_success))); |
2071 position_register, | |
2072 on_success), | |
2073 on_failure)); | |
2074 } else { | 2564 } else { |
2075 // begin submatch scope | 2565 // We use a ChoiceNode for a negative lookahead because it has most of |
2076 // try | 2566 // the characteristics we need. It has the body of the lookahead as its |
2077 // first if (body) | 2567 // first alternative and the expression after the lookahead of the second |
2078 // then | 2568 // alternative. If the first alternative succeeds then the |
2079 // escape submatch scope | 2569 // NegativeSubmatchSuccess will unwind the stack including everything the |
2080 // fail | 2570 // choice node set up and backtrack. If the first alternative fails then |
2081 // else | 2571 // the second alternative is tried, which is exactly the desired result |
2082 // backtrack | 2572 // for a negative lookahead. In the case where the dispatch table |
2083 // second | 2573 // determines that the first alternative cannot match we will save time |
2084 // end submatch scope | 2574 // by not trying it. Things are not quite so well-optimized if the |
2085 // restore current position | 2575 // dispatch table determines that the second alternative cannot match. |
2086 // succeed | 2576 // In this case we could optimize by immediately backtracking. |
2087 ChoiceNode* try_node = | 2577 ChoiceNode* choice_node = new ChoiceNode(2); |
2088 new ChoiceNode(1, ActionNode::RestorePosition(position_register, | 2578 GuardedAlternative body_alt( |
2089 on_success)); | 2579 body()->ToNode( |
2090 RegExpNode* body_node = body()->ToNode( | 2580 compiler, |
2091 compiler, | 2581 success = new NegativeSubmatchSuccess(stack_pointer_register, |
2092 ActionNode::EscapeSubmatch(stack_pointer_register, | 2582 position_register))); |
2093 false, // Don't also restore position | 2583 choice_node->AddAlternative(body_alt); |
2094 0, // Unused arguments. | 2584 choice_node->AddAlternative(GuardedAlternative(on_success)); |
2095 on_failure), | |
2096 compiler->backtrack()); | |
2097 GuardedAlternative body_alt(body_node); | |
2098 try_node->AddAlternative(body_alt); | |
2099 return ActionNode::BeginSubmatch(stack_pointer_register, | 2585 return ActionNode::BeginSubmatch(stack_pointer_register, |
2100 position_register, | 2586 position_register, |
2101 try_node); | 2587 choice_node); |
2102 } | 2588 } |
2103 } | 2589 } |
2104 | 2590 |
2105 | 2591 |
2106 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, | 2592 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, |
2107 RegExpNode* on_success, | 2593 RegExpNode* on_success) { |
2108 RegExpNode* on_failure) { | 2594 return ToNode(body(), index(), compiler, on_success); |
2109 return ToNode(body(), index(), compiler, on_success, on_failure); | |
2110 } | 2595 } |
2111 | 2596 |
2112 | 2597 |
2113 RegExpNode* RegExpCapture::ToNode(RegExpTree* body, | 2598 RegExpNode* RegExpCapture::ToNode(RegExpTree* body, |
2114 int index, | 2599 int index, |
2115 RegExpCompiler* compiler, | 2600 RegExpCompiler* compiler, |
2116 RegExpNode* on_success, | 2601 RegExpNode* on_success) { |
2117 RegExpNode* on_failure) { | |
2118 int start_reg = RegExpCapture::StartRegister(index); | 2602 int start_reg = RegExpCapture::StartRegister(index); |
2119 int end_reg = RegExpCapture::EndRegister(index); | 2603 int end_reg = RegExpCapture::EndRegister(index); |
2120 RegExpNode* store_end = ActionNode::StorePosition(end_reg, on_success); | 2604 RegExpNode* store_end = ActionNode::StorePosition(end_reg, on_success); |
2121 RegExpNode* body_node = body->ToNode(compiler, store_end, on_failure); | 2605 RegExpNode* body_node = body->ToNode(compiler, store_end); |
2122 return ActionNode::StorePosition(start_reg, body_node); | 2606 return ActionNode::StorePosition(start_reg, body_node); |
2123 } | 2607 } |
2124 | 2608 |
2125 | 2609 |
2126 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler, | 2610 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler, |
2127 RegExpNode* on_success, | 2611 RegExpNode* on_success) { |
2128 RegExpNode* on_failure) { | |
2129 ZoneList<RegExpTree*>* children = nodes(); | 2612 ZoneList<RegExpTree*>* children = nodes(); |
2130 RegExpNode* current = on_success; | 2613 RegExpNode* current = on_success; |
2131 for (int i = children->length() - 1; i >= 0; i--) { | 2614 for (int i = children->length() - 1; i >= 0; i--) { |
2132 current = children->at(i)->ToNode(compiler, current, on_failure); | 2615 current = children->at(i)->ToNode(compiler, current); |
2133 } | 2616 } |
2134 return current; | 2617 return current; |
2135 } | 2618 } |
2136 | 2619 |
2137 | 2620 |
2138 static const int kSpaceRangeCount = 20; | 2621 static const int kSpaceRangeCount = 20; |
2139 static const uc16 kSpaceRanges[kSpaceRangeCount] = { | 2622 static const uc16 kSpaceRanges[kSpaceRangeCount] = { |
2140 0x0009, 0x000D, 0x0020, 0x0020, 0x00A0, 0x00A0, 0x1680, | 2623 0x0009, 0x000D, 0x0020, 0x0020, 0x00A0, 0x00A0, 0x1680, |
2141 0x1680, 0x180E, 0x180E, 0x2000, 0x200A, 0x2028, 0x2029, | 2624 0x1680, 0x180E, 0x180E, 0x2000, 0x200A, 0x2028, 0x2029, |
2142 0x202F, 0x202F, 0x205F, 0x205F, 0x3000, 0x3000 | 2625 0x202F, 0x202F, 0x205F, 0x205F, 0x3000, 0x3000 |
(...skipping 250 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2393 bool cloned = false; | 2876 bool cloned = false; |
2394 return RegExpNode::EnsureSibling(node, &full_info, &cloned); | 2877 return RegExpNode::EnsureSibling(node, &full_info, &cloned); |
2395 } | 2878 } |
2396 | 2879 |
2397 | 2880 |
2398 RegExpNode* ActionNode::PropagateForward(NodeInfo* info) { | 2881 RegExpNode* ActionNode::PropagateForward(NodeInfo* info) { |
2399 NodeInfo full_info(*this->info()); | 2882 NodeInfo full_info(*this->info()); |
2400 full_info.AddFromPreceding(info); | 2883 full_info.AddFromPreceding(info); |
2401 bool cloned = false; | 2884 bool cloned = false; |
2402 ActionNode* action = EnsureSibling(this, &full_info, &cloned); | 2885 ActionNode* action = EnsureSibling(this, &full_info, &cloned); |
2403 if (cloned && type_ != ESCAPE_SUBMATCH) { | 2886 action->set_on_success(action->on_success()->PropagateForward(info)); |
2404 action->set_on_success(action->on_success()->PropagateForward(info)); | |
2405 } | |
2406 return action; | 2887 return action; |
2407 } | 2888 } |
2408 | 2889 |
2409 | 2890 |
2410 RegExpNode* ChoiceNode::PropagateForward(NodeInfo* info) { | 2891 RegExpNode* ChoiceNode::PropagateForward(NodeInfo* info) { |
2411 NodeInfo full_info(*this->info()); | 2892 NodeInfo full_info(*this->info()); |
2412 full_info.AddFromPreceding(info); | 2893 full_info.AddFromPreceding(info); |
2413 bool cloned = false; | 2894 bool cloned = false; |
2414 ChoiceNode* choice = EnsureSibling(this, &full_info, &cloned); | 2895 ChoiceNode* choice = EnsureSibling(this, &full_info, &cloned); |
2415 if (cloned) { | 2896 if (cloned) { |
2416 ZoneList<GuardedAlternative>* old_alternatives = alternatives(); | 2897 ZoneList<GuardedAlternative>* old_alternatives = alternatives(); |
2417 int count = old_alternatives->length(); | 2898 int count = old_alternatives->length(); |
2418 choice->alternatives_ = new ZoneList<GuardedAlternative>(count); | 2899 choice->alternatives_ = new ZoneList<GuardedAlternative>(count); |
2419 for (int i = 0; i < count; i++) { | 2900 for (int i = 0; i < count; i++) { |
2420 GuardedAlternative alternative = old_alternatives->at(i); | 2901 GuardedAlternative alternative = old_alternatives->at(i); |
2421 alternative.set_node(alternative.node()->PropagateForward(info)); | 2902 alternative.set_node(alternative.node()->PropagateForward(info)); |
2422 choice->alternatives()->Add(alternative); | 2903 choice->alternatives()->Add(alternative); |
2423 } | 2904 } |
2424 if (!choice->on_failure_->IsBacktrack()) { | |
2425 choice->on_failure_ = choice->on_failure_->PropagateForward(info); | |
2426 } | |
2427 } | 2905 } |
2428 return choice; | 2906 return choice; |
2429 } | 2907 } |
2430 | 2908 |
2431 | 2909 |
2432 RegExpNode* EndNode::PropagateForward(NodeInfo* info) { | 2910 RegExpNode* EndNode::PropagateForward(NodeInfo* info) { |
2433 return PropagateToEndpoint(this, info); | 2911 return PropagateToEndpoint(this, info); |
2434 } | 2912 } |
2435 | 2913 |
2436 | 2914 |
(...skipping 180 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2617 that->info()->being_analyzed = false; | 3095 that->info()->being_analyzed = false; |
2618 that->info()->been_analyzed = true; | 3096 that->info()->been_analyzed = true; |
2619 } | 3097 } |
2620 | 3098 |
2621 | 3099 |
2622 void Analysis::VisitEnd(EndNode* that) { | 3100 void Analysis::VisitEnd(EndNode* that) { |
2623 // nothing to do | 3101 // nothing to do |
2624 } | 3102 } |
2625 | 3103 |
2626 | 3104 |
3105 void TextNode::CalculateOffsets() { | |
3106 int element_count = elements()->length(); | |
3107 // Set up the offsets of the elements relative to the start. This is a fixed | |
3108 // quantity since a TextNode can only contain fixed-width things. | |
3109 int cp_offset = 0; | |
3110 for (int i = 0; i < element_count; i++) { | |
3111 TextElement& elm = elements()->at(i); | |
3112 elm.cp_offset = cp_offset; | |
3113 if (elm.type == TextElement::ATOM) { | |
3114 cp_offset += elm.data.u_atom->data().length(); | |
3115 } else { | |
3116 cp_offset++; | |
3117 Vector<const uc16> quarks = elm.data.u_atom->data(); | |
3118 } | |
3119 } | |
3120 } | |
3121 | |
3122 | |
2627 void Analysis::VisitText(TextNode* that) { | 3123 void Analysis::VisitText(TextNode* that) { |
2628 if (ignore_case_) { | 3124 if (ignore_case_) { |
2629 that->MakeCaseIndependent(); | 3125 that->MakeCaseIndependent(); |
2630 } | 3126 } |
2631 EnsureAnalyzed(that->on_success()); | 3127 EnsureAnalyzed(that->on_success()); |
2632 EnsureAnalyzed(that->on_failure()); | |
2633 NodeInfo* info = that->info(); | 3128 NodeInfo* info = that->info(); |
2634 NodeInfo* next_info = that->on_success()->info(); | 3129 NodeInfo* next_info = that->on_success()->info(); |
2635 // If the following node is interested in what it follows then this | 3130 // If the following node is interested in what it follows then this |
2636 // node must determine it. | 3131 // node must determine it. |
2637 info->determine_newline = next_info->follows_newline_interest; | 3132 info->determine_newline = next_info->follows_newline_interest; |
2638 info->determine_word = next_info->follows_word_interest; | 3133 info->determine_word = next_info->follows_word_interest; |
2639 info->determine_start = next_info->follows_start_interest; | 3134 info->determine_start = next_info->follows_start_interest; |
3135 that->CalculateOffsets(); | |
2640 } | 3136 } |
2641 | 3137 |
2642 | 3138 |
2643 void Analysis::VisitAction(ActionNode* that) { | 3139 void Analysis::VisitAction(ActionNode* that) { |
2644 EnsureAnalyzed(that->on_success()); | 3140 RegExpNode* target = that->on_success(); |
3141 EnsureAnalyzed(target); | |
2645 // If the next node is interested in what it follows then this node | 3142 // If the next node is interested in what it follows then this node |
2646 // has to be interested too so it can pass the information on. | 3143 // has to be interested too so it can pass the information on. |
2647 that->info()->AddFromFollowing(that->on_success()->info()); | 3144 that->info()->AddFromFollowing(target->info()); |
2648 } | 3145 } |
2649 | 3146 |
2650 | 3147 |
2651 void Analysis::VisitChoice(ChoiceNode* that) { | 3148 void Analysis::VisitChoice(ChoiceNode* that) { |
2652 NodeInfo* info = that->info(); | 3149 NodeInfo* info = that->info(); |
2653 for (int i = 0; i < that->alternatives()->length(); i++) { | 3150 for (int i = 0; i < that->alternatives()->length(); i++) { |
2654 RegExpNode* node = that->alternatives()->at(i).node(); | 3151 RegExpNode* node = that->alternatives()->at(i).node(); |
2655 EnsureAnalyzed(node); | 3152 EnsureAnalyzed(node); |
2656 // Anything the following nodes need to know has to be known by | 3153 // Anything the following nodes need to know has to be known by |
2657 // this node also, so it can pass it on. | 3154 // this node also, so it can pass it on. |
2658 info->AddFromFollowing(node->info()); | 3155 info->AddFromFollowing(node->info()); |
2659 } | 3156 } |
2660 EnsureAnalyzed(that->on_failure()); | |
2661 } | 3157 } |
2662 | 3158 |
2663 | 3159 |
2664 void Analysis::VisitBackReference(BackReferenceNode* that) { | 3160 void Analysis::VisitBackReference(BackReferenceNode* that) { |
2665 EnsureAnalyzed(that->on_success()); | 3161 EnsureAnalyzed(that->on_success()); |
2666 EnsureAnalyzed(that->on_failure()); | |
2667 } | 3162 } |
2668 | 3163 |
2669 | 3164 |
2670 // ------------------------------------------------------------------- | 3165 // ------------------------------------------------------------------- |
2671 // Assumption expansion | 3166 // Assumption expansion |
2672 | 3167 |
2673 | 3168 |
2674 RegExpNode* RegExpNode::EnsureExpanded(NodeInfo* info) { | 3169 RegExpNode* RegExpNode::EnsureExpanded(NodeInfo* info) { |
2675 siblings_.Ensure(this); | 3170 siblings_.Ensure(this); |
2676 NodeInfo new_info = *this->info(); | 3171 NodeInfo new_info = *this->info(); |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2739 if (non_word == NULL) { | 3234 if (non_word == NULL) { |
2740 // This node contains no non-word characters so it must be | 3235 // This node contains no non-word characters so it must be |
2741 // all word. | 3236 // all word. |
2742 this->info()->is_word = NodeInfo::TRUE; | 3237 this->info()->is_word = NodeInfo::TRUE; |
2743 } else if (word == NULL) { | 3238 } else if (word == NULL) { |
2744 // Vice versa. | 3239 // Vice versa. |
2745 this->info()->is_word = NodeInfo::FALSE; | 3240 this->info()->is_word = NodeInfo::FALSE; |
2746 } else { | 3241 } else { |
2747 // If this character class contains both word and non-word | 3242 // If this character class contains both word and non-word |
2748 // characters we need to split it into two. | 3243 // characters we need to split it into two. |
2749 ChoiceNode* result = new ChoiceNode(2, on_failure()); | 3244 ChoiceNode* result = new ChoiceNode(2); |
2750 // Welcome to the family, son! | 3245 // Welcome to the family, son! |
2751 result->set_siblings(this->siblings()); | 3246 result->set_siblings(this->siblings()); |
2752 *result->info() = *this->info(); | 3247 *result->info() = *this->info(); |
2753 result->info()->ResetCompilationState(); | 3248 result->info()->ResetCompilationState(); |
2754 result->info()->AddAssumptions(info); | 3249 result->info()->AddAssumptions(info); |
2755 RegExpNode* word_node | 3250 RegExpNode* word_node |
2756 = new TextNode(new RegExpCharacterClass(word, false), | 3251 = new TextNode(new RegExpCharacterClass(word, false), |
2757 on_success(), | 3252 on_success()); |
2758 on_failure()); | |
2759 word_node->info()->determine_word = true; | 3253 word_node->info()->determine_word = true; |
2760 word_node->info()->does_determine_word = true; | 3254 word_node->info()->does_determine_word = true; |
2761 word_node->info()->is_word = NodeInfo::TRUE; | 3255 word_node->info()->is_word = NodeInfo::TRUE; |
2762 result->alternatives()->Add(GuardedAlternative(word_node)); | 3256 result->alternatives()->Add(GuardedAlternative(word_node)); |
2763 RegExpNode* non_word_node | 3257 RegExpNode* non_word_node |
2764 = new TextNode(new RegExpCharacterClass(non_word, false), | 3258 = new TextNode(new RegExpCharacterClass(non_word, false), |
2765 on_success(), | 3259 on_success()); |
2766 on_failure()); | |
2767 non_word_node->info()->determine_word = true; | 3260 non_word_node->info()->determine_word = true; |
2768 non_word_node->info()->does_determine_word = true; | 3261 non_word_node->info()->does_determine_word = true; |
2769 non_word_node->info()->is_word = NodeInfo::FALSE; | 3262 non_word_node->info()->is_word = NodeInfo::FALSE; |
2770 result->alternatives()->Add(GuardedAlternative(non_word_node)); | 3263 result->alternatives()->Add(GuardedAlternative(non_word_node)); |
2771 return result; | 3264 return result; |
2772 } | 3265 } |
2773 } | 3266 } |
2774 } | 3267 } |
2775 TextNode* clone = this->Clone(); | 3268 TextNode* clone = this->Clone(); |
2776 clone->info()->ResetCompilationState(); | 3269 clone->info()->ResetCompilationState(); |
(...skipping 190 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2967 break; | 3460 break; |
2968 } | 3461 } |
2969 default: { | 3462 default: { |
2970 UNIMPLEMENTED(); | 3463 UNIMPLEMENTED(); |
2971 } | 3464 } |
2972 } | 3465 } |
2973 } | 3466 } |
2974 | 3467 |
2975 | 3468 |
2976 void DispatchTableConstructor::VisitAction(ActionNode* that) { | 3469 void DispatchTableConstructor::VisitAction(ActionNode* that) { |
2977 that->on_success()->Accept(this); | 3470 RegExpNode* target = that->on_success(); |
3471 target->Accept(this); | |
2978 } | 3472 } |
2979 | 3473 |
2980 | 3474 |
2981 Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input, | 3475 Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input, |
2982 RegExpNode** node_return, | 3476 RegExpNode** node_return, |
2983 bool ignore_case, | 3477 bool ignore_case, |
2984 bool is_multiline) { | 3478 bool is_multiline, |
3479 Handle<String> pattern) { | |
2985 RegExpCompiler compiler(input->capture_count, ignore_case); | 3480 RegExpCompiler compiler(input->capture_count, ignore_case); |
2986 // Wrap the body of the regexp in capture #0. | 3481 // Wrap the body of the regexp in capture #0. |
2987 RegExpNode* captured_body = RegExpCapture::ToNode(input->tree, | 3482 RegExpNode* captured_body = RegExpCapture::ToNode(input->tree, |
2988 0, | 3483 0, |
2989 &compiler, | 3484 &compiler, |
2990 compiler.accept(), | 3485 compiler.accept()); |
2991 compiler.backtrack()); | |
2992 // Add a .*? at the beginning, outside the body capture. | 3486 // Add a .*? at the beginning, outside the body capture. |
2993 // Note: We could choose to not add this if the regexp is anchored at | 3487 // Note: We could choose to not add this if the regexp is anchored at |
2994 // the start of the input but I'm not sure how best to do that and | 3488 // the start of the input but I'm not sure how best to do that and |
2995 // since we don't even handle ^ yet I'm saving that optimization for | 3489 // since we don't even handle ^ yet I'm saving that optimization for |
2996 // later. | 3490 // later. |
2997 RegExpNode* node = RegExpQuantifier::ToNode(0, | 3491 RegExpNode* node = RegExpQuantifier::ToNode(0, |
2998 RegExpQuantifier::kInfinity, | 3492 RegExpQuantifier::kInfinity, |
2999 false, | 3493 false, |
3000 new RegExpCharacterClass('*'), | 3494 new RegExpCharacterClass('*'), |
3001 &compiler, | 3495 &compiler, |
3002 captured_body, | 3496 captured_body); |
3003 compiler.backtrack()); | |
3004 if (node_return != NULL) *node_return = node; | 3497 if (node_return != NULL) *node_return = node; |
3005 Analysis analysis(ignore_case); | 3498 Analysis analysis(ignore_case); |
3006 analysis.EnsureAnalyzed(node); | 3499 analysis.EnsureAnalyzed(node); |
3007 | 3500 |
3008 NodeInfo info = *node->info(); | 3501 NodeInfo info = *node->info(); |
3009 node = node->EnsureExpanded(&info); | 3502 node = node->EnsureExpanded(&info); |
3010 | 3503 |
3011 if (!FLAG_irregexp) { | 3504 if (!FLAG_irregexp) { |
3012 return Handle<FixedArray>::null(); | 3505 return Handle<FixedArray>::null(); |
3013 } | 3506 } |
3014 | 3507 |
3015 if (is_multiline && !FLAG_attempt_multiline_irregexp) { | 3508 if (is_multiline && !FLAG_attempt_multiline_irregexp) { |
3016 return Handle<FixedArray>::null(); | 3509 return Handle<FixedArray>::null(); |
3017 } | 3510 } |
3018 | 3511 |
3019 if (FLAG_irregexp_native) { | 3512 if (FLAG_irregexp_native) { |
3020 #ifdef ARM | 3513 #ifdef ARM |
3021 // Unimplemented, fall-through to bytecode implementation. | 3514 // Unimplemented, fall-through to bytecode implementation. |
3022 #else // IA32 | 3515 #else // IA32 |
3023 RegExpMacroAssemblerIA32 macro_assembler(RegExpMacroAssemblerIA32::UC16, | 3516 RegExpMacroAssemblerIA32 macro_assembler(RegExpMacroAssemblerIA32::UC16, |
3024 (input->capture_count + 1) * 2); | 3517 (input->capture_count + 1) * 2); |
3025 return compiler.Assemble(¯o_assembler, | 3518 return compiler.Assemble(¯o_assembler, |
3026 node, | 3519 node, |
3027 input->capture_count); | 3520 input->capture_count, |
3521 pattern); | |
3028 #endif | 3522 #endif |
3029 } | 3523 } |
3030 EmbeddedVector<byte, 1024> codes; | 3524 EmbeddedVector<byte, 1024> codes; |
3031 RegExpMacroAssemblerIrregexp macro_assembler(codes); | 3525 RegExpMacroAssemblerIrregexp macro_assembler(codes); |
3032 return compiler.Assemble(¯o_assembler, | 3526 return compiler.Assemble(¯o_assembler, |
3033 node, | 3527 node, |
3034 input->capture_count); | 3528 input->capture_count, |
3529 pattern); | |
3035 } | 3530 } |
3036 | 3531 |
3037 | 3532 |
3038 }} // namespace v8::internal | 3533 }} // namespace v8::internal |
OLD | NEW |