Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 224 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 235 result = AtomCompile(re, pattern, flags, atom_string); | 235 result = AtomCompile(re, pattern, flags, atom_string); |
| 236 } else { | 236 } else { |
| 237 result = AtomCompile(re, pattern, flags, pattern); | 237 result = AtomCompile(re, pattern, flags, pattern); |
| 238 } | 238 } |
| 239 } else { | 239 } else { |
| 240 RegExpNode* node = NULL; | 240 RegExpNode* node = NULL; |
| 241 Handle<FixedArray> irregexp_data = | 241 Handle<FixedArray> irregexp_data = |
| 242 RegExpEngine::Compile(&parse_result, | 242 RegExpEngine::Compile(&parse_result, |
| 243 &node, | 243 &node, |
| 244 flags.is_ignore_case(), | 244 flags.is_ignore_case(), |
| 245 flags.is_multiline()); | 245 flags.is_multiline(), |
| 246 pattern); | |
| 246 if (irregexp_data.is_null()) { | 247 if (irregexp_data.is_null()) { |
| 247 if (FLAG_disable_jscre) { | 248 if (FLAG_disable_jscre) { |
| 248 UNIMPLEMENTED(); | 249 UNIMPLEMENTED(); |
| 249 } | 250 } |
| 250 result = JscrePrepare(re, pattern, flags); | 251 result = JscrePrepare(re, pattern, flags); |
| 251 } else { | 252 } else { |
| 252 result = IrregexpPrepare(re, pattern, flags, irregexp_data); | 253 result = IrregexpPrepare(re, pattern, flags, irregexp_data); |
| 253 } | 254 } |
| 254 } | 255 } |
| 255 Object* data = re->data(); | 256 Object* data = re->data(); |
| (...skipping 595 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 851 | 852 |
| 852 Handle<ByteArray> RegExpImpl::IrregexpCode(Handle<JSRegExp> re) { | 853 Handle<ByteArray> RegExpImpl::IrregexpCode(Handle<JSRegExp> re) { |
| 853 FixedArray* value = | 854 FixedArray* value = |
| 854 FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex)); | 855 FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex)); |
| 855 return Handle<ByteArray>(ByteArray::cast(value->get(kIrregexpCodeIndex))); | 856 return Handle<ByteArray>(ByteArray::cast(value->get(kIrregexpCodeIndex))); |
| 856 } | 857 } |
| 857 | 858 |
| 858 | 859 |
| 859 // ------------------------------------------------------------------- | 860 // ------------------------------------------------------------------- |
| 860 // Implmentation of the Irregexp regular expression engine. | 861 // Implmentation of the Irregexp regular expression engine. |
| 862 // | |
| 863 // The Irregexp regular expression engine is intended to be a complete | |
| 864 // implementation of ECMAScript regular expressions. It generates either | |
| 865 // bytecodes or native code. | |
| 866 | |
| 867 // The Irregexp regexp engine is structured in three steps. | |
| 868 // 1) The parser generates an abstract syntax tree. See ast.cc. | |
| 869 // 2) From the AST a node network is created. The nodes are all | |
| 870 // subclasses of RegExpNode. The nodes represent states when | |
| 871 // executing a regular expression. Several optimizations are | |
| 872 // performed on the node network. | |
| 873 // 3) From the nodes we generate either byte codes or native code | |
| 874 // that can actually execute the regular expression (perform | |
| 875 // the search). The code generation step is described in more | |
| 876 // detail below. | |
| 877 | |
| 878 // Code generation. | |
| 879 // | |
| 880 // The nodes are divided into four main categories. | |
| 881 // * Choice nodes | |
| 882 // These represent places where the regular expression can | |
| 883 // match in more than one way. For example on entry to an | |
| 884 // alternation (foo|bar) or a repetition (*, +, ? or {}). | |
| 885 // * Action nodes | |
| 886 // These represent places where some action should be | |
| 887 // performed. Examples include recording the current position | |
| 888 // in the input string to a register (in order to implement | |
| 889 // captures) or other actions on register for example in order | |
| 890 // to implement the counters needed for {} repetitions. | |
| 891 // * Matching nodes | |
| 892 // These attempt to match some element part of the input string. | |
| 893 // Examples of elements include character classes, plain strings | |
| 894 // or back references. | |
| 895 // * End nodes | |
| 896 // These are used to implement the actions required on finding | |
| 897 // a successful match or failing to find a match. | |
| 898 // | |
| 899 // The code generated (whether as byte codes or native code) maintains | |
| 900 // some state as it runs. This consists of the following elements: | |
| 901 // | |
| 902 // * The capture registers. Used for string captures. | |
| 903 // * Other registers. Used for counters etc. | |
| 904 // * The current position. | |
| 905 // * The stack of backtracking information. Used when a matching node | |
| 906 // fails to find a match and needs to try an alternative. | |
| 907 // | |
| 908 // Conceptual regular expression execution model: | |
| 909 // | |
| 910 // There is a simple conceptual model of regular expression execution | |
| 911 // which will be presented first. The actual code generated is a more | |
| 912 // efficient simulation of the simple conceptual model: | |
| 913 // | |
| 914 // * Choice nodes are implemented as follows: | |
| 915 // For each choice except the last { | |
| 916 // push current position | |
| 917 // push backtrack code location | |
| 918 // <generate code to test for choice> | |
| 919 // backtrack code location: | |
| 920 // pop current position | |
| 921 // } | |
| 922 // <generate code to test for last choice> | |
| 923 // | |
| 924 // * Actions nodes are generated as follows | |
| 925 // <push affected registers on backtrack stack> | |
| 926 // <generate code to perform action> | |
| 927 // push backtrack code location | |
| 928 // <generate code to test for following nodes> | |
| 929 // backtrack code location: | |
| 930 // <pop affected registers to restore their state> | |
| 931 // <pop backtrack location from stack and go to it> | |
| 932 // | |
| 933 // * Matching nodes are generated as follows: | |
| 934 // if input string matches at current position | |
| 935 // update current position | |
| 936 // <generate code to test for following nodes> | |
| 937 // else | |
| 938 // <pop backtrack location from stack and go to it> | |
| 939 // | |
| 940 // Thus it can be seen that the current position is saved and restored | |
| 941 // by the choice nodes, whereas the registers are saved and restored by | |
| 942 // by the action nodes that manipulate them. | |
| 943 // | |
| 944 // The other interesting aspect of this model is that nodes are generated | |
| 945 // at the point where they are needed by a recursive call to Emit(). If | |
| 946 // the node has already been code generated then the Emit() call will | |
| 947 // generate a jump to the previously generated code instead. In order to | |
| 948 // limit recursion it is possible for the Emit() function to put the node | |
| 949 // on a work list for later generation and instead generate a jump. The | |
| 950 // destination of the jump is resolved later when the code is generated. | |
| 951 // | |
| 952 // Actual regular expression code generation. | |
| 953 // | |
| 954 // Code generation is actually more complicated than the above. In order | |
| 955 // to improve the efficiency of the generated code some optimizations are | |
| 956 // performed | |
| 957 // | |
| 958 // * Choice nodes have 1-character lookahead. | |
| 959 // A choice node looks at the following character and eliminates some of | |
| 960 // the choices immediately based on that character. This is not yet | |
| 961 // implemented. | |
| 962 // * Simple greedy loops store reduced backtracking information. | |
| 963 // A quantifier like /.*foo/m will greedily match the whole input. It will | |
| 964 // then need to backtrack to a point where it can match "foo". The naive | |
| 965 // implementation of this would push each character position onto the | |
| 966 // backtracking stack, then pop them off one by one. This would use space | |
| 967 // proportional to the length of the input string. However since the "." | |
| 968 // can only match in one way and always has a constant length (in this case | |
| 969 // of 1) it suffices to store the current position on the top of the stack | |
| 970 // once. Matching now becomes merely incrementing the current position and | |
| 971 // backtracking becomes decrementing the current position and checking the | |
| 972 // result against the stored current position. This is faster and saves | |
| 973 // space. | |
| 974 // * The current state is virtualized. | |
| 975 // This is used to defer expensive operations until it is clear that they | |
| 976 // are needed and to generate code for a node more than once, allowing | |
| 977 // specialized an efficient versions of the code to be created. This is | |
| 978 // explained in the section below. | |
| 979 // | |
| 980 // Execution state virtualization. | |
| 981 // | |
| 982 // Instead of emitting code, nodes that manipulate the state can record their | |
| 983 // manipulation in an object called the GenerationVariant. The | |
| 984 // GenerationVariant object can record a current position offset, an | |
| 985 // optional backtrack code location on the top of the virtualized backtrack | |
| 986 // stack and some register changes. When a node is to be emitted it can flush | |
| 987 // the GenerationVariant or update it. Flushing the GenerationVariant | |
| 988 // will emit code to bring the actual state into line with the virtual state. | |
| 989 // Avoiding flushing the state can postpone some work (eg updates of capture | |
| 990 // registers). Postponing work can save time when executing the regular | |
| 991 // expression since it may be found that the work never has to be done as a | |
| 992 // failure to match can occur. In addition it is much faster to jump to a | |
| 993 // known backtrack code location than it is to pop an unknown backtrack | |
| 994 // location from the stack and jump there. | |
| 995 // | |
| 996 // The virtual state found in the GenerationVariant affects code generation. | |
| 997 // For example the virtual state contains the difference between the actual | |
| 998 // current position and the virtual current position, and matching code needs | |
| 999 // to use this offset to attempt a match in the correct location of the input | |
| 1000 // string. Therefore code generated for a non-trivial GenerationVariant is | |
| 1001 // specialized to that GenerationVariant. The code generator therefore | |
| 1002 // has the ability to generate code for each node several times. In order to | |
| 1003 // limit the size of the generated code there is an arbitrary limit on how | |
| 1004 // many specialized sets of code may be generated for a given node. If the | |
| 1005 // limit is reached, the GenerationVariant is flushed and a generic version of | |
| 1006 // the code for a node is emitted. This is subsequently used for that node. | |
| 1007 // The code emitted for non-generic GenerationVariants is not recorded in the | |
| 1008 // node and so it cannot currently be reused in the event that code generation | |
| 1009 // is requested for an identical GenerationVariant. | |
|
Christian Plesner Hansen
2008/12/08 08:54:19
Most excellent!
| |
| 861 | 1010 |
| 862 | 1011 |
| 863 void RegExpTree::AppendToText(RegExpText* text) { | 1012 void RegExpTree::AppendToText(RegExpText* text) { |
| 864 UNREACHABLE(); | 1013 UNREACHABLE(); |
| 865 } | 1014 } |
| 866 | 1015 |
| 867 | 1016 |
| 868 void RegExpAtom::AppendToText(RegExpText* text) { | 1017 void RegExpAtom::AppendToText(RegExpText* text) { |
| 869 text->AddElement(TextElement::Atom(this)); | 1018 text->AddElement(TextElement::Atom(this)); |
| 870 } | 1019 } |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 907 | 1056 |
| 908 | 1057 |
| 909 class RegExpCompiler { | 1058 class RegExpCompiler { |
| 910 public: | 1059 public: |
| 911 RegExpCompiler(int capture_count, bool ignore_case); | 1060 RegExpCompiler(int capture_count, bool ignore_case); |
| 912 | 1061 |
| 913 int AllocateRegister() { return next_register_++; } | 1062 int AllocateRegister() { return next_register_++; } |
| 914 | 1063 |
| 915 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler, | 1064 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler, |
| 916 RegExpNode* start, | 1065 RegExpNode* start, |
| 917 int capture_count); | 1066 int capture_count, |
| 1067 Handle<String> pattern); | |
| 918 | 1068 |
| 919 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } | 1069 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } |
| 920 | 1070 |
| 921 static const int kImplementationOffset = 0; | 1071 static const int kImplementationOffset = 0; |
| 922 static const int kNumberOfRegistersOffset = 0; | 1072 static const int kNumberOfRegistersOffset = 0; |
| 923 static const int kCodeOffset = 1; | 1073 static const int kCodeOffset = 1; |
| 924 | 1074 |
| 925 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } | 1075 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } |
| 926 EndNode* accept() { return accept_; } | 1076 EndNode* accept() { return accept_; } |
| 927 EndNode* backtrack() { return backtrack_; } | |
| 928 | 1077 |
| 929 static const int kMaxRecursion = 100; | 1078 static const int kMaxRecursion = 100; |
| 930 inline int recursion_depth() { return recursion_depth_; } | 1079 inline int recursion_depth() { return recursion_depth_; } |
| 931 inline void IncrementRecursionDepth() { recursion_depth_++; } | 1080 inline void IncrementRecursionDepth() { recursion_depth_++; } |
| 932 inline void DecrementRecursionDepth() { recursion_depth_--; } | 1081 inline void DecrementRecursionDepth() { recursion_depth_--; } |
| 933 | 1082 |
| 934 inline bool ignore_case() { return ignore_case_; } | 1083 inline bool ignore_case() { return ignore_case_; } |
| 935 | 1084 |
| 936 private: | 1085 private: |
| 937 EndNode* accept_; | 1086 EndNode* accept_; |
| 938 EndNode* backtrack_; | |
| 939 int next_register_; | 1087 int next_register_; |
| 940 List<RegExpNode*>* work_list_; | 1088 List<RegExpNode*>* work_list_; |
| 941 int recursion_depth_; | 1089 int recursion_depth_; |
| 942 RegExpMacroAssembler* macro_assembler_; | 1090 RegExpMacroAssembler* macro_assembler_; |
| 943 bool ignore_case_; | 1091 bool ignore_case_; |
| 944 }; | 1092 }; |
| 945 | 1093 |
| 946 | 1094 |
| 1095 class RecursionCheck { | |
| 1096 public: | |
| 1097 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { | |
| 1098 compiler->IncrementRecursionDepth(); | |
| 1099 } | |
| 1100 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } | |
| 1101 private: | |
| 1102 RegExpCompiler* compiler_; | |
| 1103 }; | |
| 1104 | |
| 1105 | |
| 947 // Attempts to compile the regexp using an Irregexp code generator. Returns | 1106 // Attempts to compile the regexp using an Irregexp code generator. Returns |
| 948 // a fixed array or a null handle depending on whether it succeeded. | 1107 // a fixed array or a null handle depending on whether it succeeded. |
| 949 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case) | 1108 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case) |
| 950 : next_register_(2 * (capture_count + 1)), | 1109 : next_register_(2 * (capture_count + 1)), |
| 951 work_list_(NULL), | 1110 work_list_(NULL), |
| 952 recursion_depth_(0), | 1111 recursion_depth_(0), |
| 953 ignore_case_(ignore_case) { | 1112 ignore_case_(ignore_case) { |
| 954 accept_ = new EndNode(EndNode::ACCEPT); | 1113 accept_ = new EndNode(EndNode::ACCEPT); |
| 955 backtrack_ = new EndNode(EndNode::BACKTRACK); | |
| 956 } | 1114 } |
| 957 | 1115 |
| 958 | 1116 |
| 959 Handle<FixedArray> RegExpCompiler::Assemble( | 1117 Handle<FixedArray> RegExpCompiler::Assemble( |
| 960 RegExpMacroAssembler* macro_assembler, | 1118 RegExpMacroAssembler* macro_assembler, |
| 961 RegExpNode* start, | 1119 RegExpNode* start, |
| 962 int capture_count) { | 1120 int capture_count, |
| 1121 Handle<String> pattern) { | |
| 963 #ifdef DEBUG | 1122 #ifdef DEBUG |
| 964 if (FLAG_trace_regexp_assembler) | 1123 if (FLAG_trace_regexp_assembler) |
| 965 macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler); | 1124 macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler); |
| 966 else | 1125 else |
| 967 #endif | 1126 #endif |
| 968 macro_assembler_ = macro_assembler; | 1127 macro_assembler_ = macro_assembler; |
| 969 List <RegExpNode*> work_list(0); | 1128 List <RegExpNode*> work_list(0); |
| 970 work_list_ = &work_list; | 1129 work_list_ = &work_list; |
| 971 Label fail; | 1130 Label fail; |
| 972 macro_assembler_->PushBacktrack(&fail); | 1131 macro_assembler->PushBacktrack(&fail); |
| 973 if (!start->GoTo(this)) { | 1132 GenerationVariant generic_variant; |
| 1133 if (!start->Emit(this, &generic_variant)) { | |
| 974 fail.Unuse(); | 1134 fail.Unuse(); |
| 975 return Handle<FixedArray>::null(); | 1135 return Handle<FixedArray>::null(); |
| 976 } | 1136 } |
| 1137 macro_assembler_->Bind(&fail); | |
| 1138 macro_assembler_->Fail(); | |
| 977 while (!work_list.is_empty()) { | 1139 while (!work_list.is_empty()) { |
| 978 if (!work_list.RemoveLast()->GoTo(this)) { | 1140 if (!work_list.RemoveLast()->Emit(this, &generic_variant)) { |
| 979 fail.Unuse(); | |
| 980 return Handle<FixedArray>::null(); | 1141 return Handle<FixedArray>::null(); |
| 981 } | 1142 } |
| 982 } | 1143 } |
| 983 macro_assembler_->Bind(&fail); | |
| 984 macro_assembler_->Fail(); | |
| 985 Handle<FixedArray> array = | 1144 Handle<FixedArray> array = |
| 986 Factory::NewFixedArray(RegExpImpl::kIrregexpDataLength); | 1145 Factory::NewFixedArray(RegExpImpl::kIrregexpDataLength); |
| 987 array->set(RegExpImpl::kIrregexpImplementationIndex, | 1146 array->set(RegExpImpl::kIrregexpImplementationIndex, |
| 988 Smi::FromInt(macro_assembler_->Implementation())); | 1147 Smi::FromInt(macro_assembler_->Implementation())); |
| 989 array->set(RegExpImpl::kIrregexpNumberOfRegistersIndex, | 1148 array->set(RegExpImpl::kIrregexpNumberOfRegistersIndex, |
| 990 Smi::FromInt(next_register_)); | 1149 Smi::FromInt(next_register_)); |
| 991 array->set(RegExpImpl::kIrregexpNumberOfCapturesIndex, | 1150 array->set(RegExpImpl::kIrregexpNumberOfCapturesIndex, |
| 992 Smi::FromInt(capture_count)); | 1151 Smi::FromInt(capture_count)); |
| 993 Handle<Object> code = macro_assembler_->GetCode(); | 1152 Handle<Object> code = macro_assembler_->GetCode(pattern); |
| 994 array->set(RegExpImpl::kIrregexpCodeIndex, *code); | 1153 array->set(RegExpImpl::kIrregexpCodeIndex, *code); |
| 995 work_list_ = NULL; | 1154 work_list_ = NULL; |
| 996 #ifdef DEBUG | 1155 #ifdef DEBUG |
| 997 if (FLAG_trace_regexp_assembler) { | 1156 if (FLAG_trace_regexp_assembler) { |
| 998 delete macro_assembler_; | 1157 delete macro_assembler_; |
| 999 } | 1158 } |
| 1000 #endif | 1159 #endif |
| 1001 return array; | 1160 return array; |
| 1002 } | 1161 } |
| 1003 | 1162 |
| 1004 | 1163 |
| 1005 bool RegExpNode::GoTo(RegExpCompiler* compiler) { | 1164 bool GenerationVariant::mentions_reg(int reg) { |
| 1006 // TODO(erikcorry): Implement support. | 1165 for (DeferredAction* action = actions_; |
| 1007 if (info_.follows_word_interest || | 1166 action != NULL; |
| 1008 info_.follows_newline_interest || | 1167 action = action->next()) { |
| 1009 info_.follows_start_interest) { | 1168 if (reg == action->reg()) return true; |
| 1010 return false; | 1169 } |
| 1011 } | 1170 return false; |
| 1012 if (label_.is_bound()) { | 1171 } |
| 1013 compiler->macro_assembler()->GoTo(&label_); | 1172 |
| 1014 return true; | 1173 |
| 1174 int GenerationVariant::FindAffectedRegisters(OutSet* affected_registers) { | |
| 1175 int max_register = -1; | |
| 1176 for (DeferredAction* action = actions_; | |
| 1177 action != NULL; | |
| 1178 action = action->next()) { | |
| 1179 affected_registers->Set(action->reg()); | |
| 1180 if (action->reg() > max_register) max_register = action->reg(); | |
| 1181 } | |
| 1182 return max_register; | |
| 1183 } | |
| 1184 | |
| 1185 | |
| 1186 void GenerationVariant::PushAffectedRegisters(RegExpMacroAssembler* macro, | |
| 1187 int max_register, | |
| 1188 OutSet& affected_registers) { | |
| 1189 for (int reg = 0; reg <= max_register; reg++) { | |
| 1190 if (affected_registers.Get(reg)) macro->PushRegister(reg); | |
| 1191 } | |
| 1192 } | |
| 1193 | |
| 1194 | |
| 1195 void GenerationVariant::RestoreAffectedRegisters(RegExpMacroAssembler* macro, | |
| 1196 int max_register, | |
| 1197 OutSet& affected_registers) { | |
| 1198 for (int reg = max_register; reg >= 0; reg--) { | |
| 1199 if (affected_registers.Get(reg)) macro->PopRegister(reg); | |
| 1200 } | |
| 1201 } | |
| 1202 | |
| 1203 | |
| 1204 void GenerationVariant::PerformDeferredActions(RegExpMacroAssembler* macro, | |
| 1205 int max_register, | |
|
Christian Plesner Hansen
2008/12/08 08:54:19
Indentation off.
| |
| 1206 OutSet& affected_registers) { | |
| 1207 for (int reg = 0; reg <= max_register; reg++) { | |
| 1208 if (!affected_registers.Get(reg)) { | |
| 1209 continue; | |
| 1210 } | |
| 1211 int value = 0; | |
| 1212 bool absolute = false; | |
| 1213 int store_position = -1; | |
| 1214 // This is a little tricky because we are scanning the actions in reverse | |
| 1215 // historical order (newest first). | |
| 1216 for (DeferredAction* action = actions_; | |
| 1217 action != NULL; | |
| 1218 action = action->next()) { | |
| 1219 if (action->reg() == reg) { | |
| 1220 switch (action->type()) { | |
| 1221 case ActionNode::SET_REGISTER: { | |
| 1222 GenerationVariant::DeferredSetRegister* psr = | |
| 1223 static_cast<GenerationVariant::DeferredSetRegister*>(action); | |
| 1224 value += psr->value(); | |
| 1225 absolute = true; | |
| 1226 ASSERT_EQ(store_position, -1); | |
| 1227 break; | |
| 1228 } | |
| 1229 case ActionNode::INCREMENT_REGISTER: | |
| 1230 if (!absolute) { | |
| 1231 value++; | |
| 1232 } | |
| 1233 ASSERT_EQ(store_position, -1); | |
| 1234 break; | |
| 1235 case ActionNode::STORE_POSITION: { | |
| 1236 GenerationVariant::DeferredCapture* pc = | |
| 1237 static_cast<GenerationVariant::DeferredCapture*>(action); | |
| 1238 if (store_position == -1) { | |
| 1239 store_position = pc->cp_offset(); | |
| 1240 } | |
| 1241 ASSERT(!absolute); | |
| 1242 ASSERT_EQ(value, 0); | |
| 1243 break; | |
| 1244 } | |
| 1245 default: | |
| 1246 UNREACHABLE(); | |
| 1247 break; | |
| 1248 } | |
| 1249 } | |
| 1250 } | |
| 1251 if (store_position != -1) { | |
| 1252 macro->WriteCurrentPositionToRegister(reg, store_position); | |
| 1253 } else { | |
| 1254 if (absolute) { | |
| 1255 macro->SetRegister(reg, value); | |
| 1256 } else { | |
| 1257 if (value != 0) { | |
| 1258 macro->AdvanceRegister(reg, value); | |
| 1259 } | |
| 1260 } | |
| 1261 } | |
| 1262 } | |
| 1263 } | |
| 1264 | |
| 1265 | |
| 1266 // This is called as we come into a loop choice node and some other tricky | |
| 1267 // nodes. It normalises the state of the code generator to ensure we can | |
| 1268 // generate generic code. | |
| 1269 bool GenerationVariant::Flush(RegExpCompiler* compiler, RegExpNode* successor) { | |
| 1270 RegExpMacroAssembler* macro = compiler->macro_assembler(); | |
| 1271 | |
| 1272 ASSERT(actions_ != NULL || cp_offset_ != 0 || backtrack() != NULL); | |
| 1273 | |
| 1274 if (actions_ == NULL && backtrack() == NULL) { | |
| 1275 // Here we just have some deferred cp advances to fix and we are back to | |
| 1276 // a normal situation. | |
| 1277 macro->AdvanceCurrentPosition(cp_offset_); | |
| 1278 // Create a new trivial state and generate the node with that. | |
| 1279 GenerationVariant new_state; | |
| 1280 return successor->Emit(compiler, &new_state); | |
| 1281 } | |
| 1282 | |
| 1283 // Generate deferred actions here along with code to undo them again. | |
| 1284 OutSet affected_registers; | |
| 1285 int max_register = FindAffectedRegisters(&affected_registers); | |
| 1286 PushAffectedRegisters(macro, max_register, affected_registers); | |
| 1287 PerformDeferredActions(macro, max_register, affected_registers); | |
| 1288 if (backtrack() != NULL) { | |
| 1289 // Here we have a concrete backtrack location. These are set up by choice | |
| 1290 // nodes and so they indicate that we have a deferred save of the current | |
| 1291 // position which we may need to emit here. | |
| 1292 macro->PushCurrentPosition(); | |
| 1293 } | |
| 1294 if (cp_offset_ != 0) { | |
| 1295 macro->AdvanceCurrentPosition(cp_offset_); | |
| 1296 } | |
| 1297 | |
| 1298 // Create a new trivial state and generate the node with that. | |
|
Christian Plesner Hansen
2008/12/08 08:54:19
trivial -> empty?
| |
| 1299 Label undo; | |
| 1300 macro->PushBacktrack(&undo); | |
| 1301 GenerationVariant new_state; | |
| 1302 bool ok = successor->Emit(compiler, &new_state); | |
| 1303 | |
| 1304 // On backtrack we need to restore state. | |
| 1305 macro->Bind(&undo); | |
| 1306 if (!ok) return false; | |
| 1307 if (backtrack() != NULL) { | |
| 1308 macro->PopCurrentPosition(); | |
| 1309 } | |
| 1310 RestoreAffectedRegisters(macro, max_register, affected_registers); | |
| 1311 if (backtrack() == NULL) { | |
| 1312 macro->Backtrack(); | |
| 1015 } else { | 1313 } else { |
| 1016 if (compiler->recursion_depth() > RegExpCompiler::kMaxRecursion) { | 1314 macro->GoTo(backtrack()); |
| 1017 compiler->macro_assembler()->GoTo(&label_); | 1315 } |
| 1018 compiler->AddWork(this); | 1316 |
| 1019 return true; | 1317 return true; |
| 1020 } else { | 1318 } |
| 1021 compiler->IncrementRecursionDepth(); | 1319 |
| 1022 bool how_it_went = Emit(compiler); | 1320 |
| 1023 compiler->DecrementRecursionDepth(); | 1321 void EndNode::EmitInfoChecks(RegExpMacroAssembler* macro, |
| 1024 return how_it_went; | 1322 GenerationVariant* variant) { |
| 1025 } | 1323 if (info()->at_end) { |
| 1026 } | 1324 Label succeed; |
| 1027 } | 1325 // LoadCurrentCharacter will go to the label if we are at the end of the |
| 1028 | 1326 // input string. |
| 1029 | 1327 macro->LoadCurrentCharacter(0, &succeed); |
| 1030 // EndNodes are special. Because they can be very common and they are very | 1328 macro->GoTo(variant->backtrack()); |
| 1031 // short we normally inline them. That is, if we are asked to emit a GoTo | 1329 macro->Bind(&succeed); |
| 1032 // we just emit the entire node. Since they don't have successors this | 1330 } |
| 1033 // works. | 1331 } |
| 1034 bool EndNode::GoTo(RegExpCompiler* compiler) { | 1332 |
| 1035 if (info()->follows_word_interest || | 1333 |
| 1036 info()->follows_newline_interest || | 1334 bool NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, |
| 1037 info()->follows_start_interest) { | 1335 GenerationVariant* variant) { |
| 1038 return false; | 1336 if (!variant->is_trivial()) { |
| 1039 } | 1337 return variant->Flush(compiler, this); |
| 1040 return Emit(compiler); | 1338 } |
| 1041 } | |
| 1042 | |
| 1043 | |
| 1044 Label* RegExpNode::label() { | |
| 1045 return &label_; | |
| 1046 } | |
| 1047 | |
| 1048 | |
| 1049 bool EndNode::Emit(RegExpCompiler* compiler) { | |
| 1050 RegExpMacroAssembler* macro = compiler->macro_assembler(); | 1339 RegExpMacroAssembler* macro = compiler->macro_assembler(); |
| 1340 if (!label()->is_bound()) { | |
| 1341 macro->Bind(label()); | |
| 1342 } | |
| 1343 EmitInfoChecks(macro, variant); | |
| 1344 macro->ReadCurrentPositionFromRegister(current_position_register_); | |
| 1345 macro->ReadStackPointerFromRegister(stack_pointer_register_); | |
| 1346 // Now that we have unwound the stack we find at the top of the stack the | |
| 1347 // backtrack that the BeginSubmatch node got. | |
| 1348 macro->Backtrack(); | |
| 1349 return true; | |
| 1350 } | |
| 1351 | |
| 1352 | |
| 1353 bool EndNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) { | |
| 1354 if (!variant->is_trivial()) { | |
| 1355 return variant->Flush(compiler, this); | |
| 1356 } | |
| 1357 RegExpMacroAssembler* macro = compiler->macro_assembler(); | |
| 1358 if (!label()->is_bound()) { | |
| 1359 macro->Bind(label()); | |
| 1360 } | |
| 1051 switch (action_) { | 1361 switch (action_) { |
| 1052 case ACCEPT: | 1362 case ACCEPT: |
| 1053 if (!label()->is_bound()) Bind(macro); | 1363 EmitInfoChecks(macro, variant); |
| 1054 if (info()->at_end) { | |
| 1055 Label succeed; | |
| 1056 // LoadCurrentCharacter will go to the label if we are at the end of the | |
| 1057 // input string. | |
| 1058 macro->LoadCurrentCharacter(0, &succeed); | |
| 1059 macro->Backtrack(); | |
| 1060 macro->Bind(&succeed); | |
| 1061 } | |
| 1062 macro->Succeed(); | 1364 macro->Succeed(); |
| 1063 return true; | 1365 return true; |
| 1064 case BACKTRACK: | 1366 case BACKTRACK: |
| 1065 if (!label()->is_bound()) Bind(macro); | |
| 1066 ASSERT(!info()->at_end); | 1367 ASSERT(!info()->at_end); |
| 1067 macro->Backtrack(); | 1368 macro->GoTo(variant->backtrack()); |
| 1068 return true; | 1369 return true; |
| 1069 } | 1370 case NEGATIVE_SUBMATCH_SUCCESS: |
| 1371 // This case is handled in a different virtual method. | |
| 1372 UNREACHABLE(); | |
| 1373 } | |
| 1374 UNIMPLEMENTED(); | |
| 1070 return false; | 1375 return false; |
| 1071 } | 1376 } |
| 1072 | 1377 |
| 1073 | 1378 |
| 1074 void GuardedAlternative::AddGuard(Guard* guard) { | 1379 void GuardedAlternative::AddGuard(Guard* guard) { |
| 1075 if (guards_ == NULL) | 1380 if (guards_ == NULL) |
| 1076 guards_ = new ZoneList<Guard*>(1); | 1381 guards_ = new ZoneList<Guard*>(1); |
| 1077 guards_->Add(guard); | 1382 guards_->Add(guard); |
| 1078 } | 1383 } |
| 1079 | 1384 |
| 1080 | 1385 |
| 1081 ActionNode* ActionNode::StoreRegister(int reg, | 1386 ActionNode* ActionNode::SetRegister(int reg, |
| 1082 int val, | 1387 int val, |
| 1083 RegExpNode* on_success) { | 1388 RegExpNode* on_success) { |
| 1084 ActionNode* result = new ActionNode(STORE_REGISTER, on_success); | 1389 ActionNode* result = new ActionNode(SET_REGISTER, on_success); |
| 1085 result->data_.u_store_register.reg = reg; | 1390 result->data_.u_store_register.reg = reg; |
| 1086 result->data_.u_store_register.value = val; | 1391 result->data_.u_store_register.value = val; |
| 1087 return result; | 1392 return result; |
| 1088 } | 1393 } |
| 1089 | 1394 |
| 1090 | 1395 |
| 1091 ActionNode* ActionNode::IncrementRegister(int reg, RegExpNode* on_success) { | 1396 ActionNode* ActionNode::IncrementRegister(int reg, RegExpNode* on_success) { |
| 1092 ActionNode* result = new ActionNode(INCREMENT_REGISTER, on_success); | 1397 ActionNode* result = new ActionNode(INCREMENT_REGISTER, on_success); |
| 1093 result->data_.u_increment_register.reg = reg; | 1398 result->data_.u_increment_register.reg = reg; |
| 1094 return result; | 1399 return result; |
| 1095 } | 1400 } |
| 1096 | 1401 |
| 1097 | 1402 |
| 1098 ActionNode* ActionNode::StorePosition(int reg, RegExpNode* on_success) { | 1403 ActionNode* ActionNode::StorePosition(int reg, RegExpNode* on_success) { |
| 1099 ActionNode* result = new ActionNode(STORE_POSITION, on_success); | 1404 ActionNode* result = new ActionNode(STORE_POSITION, on_success); |
| 1100 result->data_.u_position_register.reg = reg; | 1405 result->data_.u_position_register.reg = reg; |
| 1101 return result; | 1406 return result; |
| 1102 } | 1407 } |
| 1103 | 1408 |
| 1104 | 1409 |
| 1105 ActionNode* ActionNode::RestorePosition(int reg, RegExpNode* on_success) { | |
| 1106 ActionNode* result = new ActionNode(RESTORE_POSITION, on_success); | |
| 1107 result->data_.u_position_register.reg = reg; | |
| 1108 return result; | |
| 1109 } | |
| 1110 | |
| 1111 | |
| 1112 ActionNode* ActionNode::BeginSubmatch(int stack_reg, | 1410 ActionNode* ActionNode::BeginSubmatch(int stack_reg, |
| 1113 int position_reg, | 1411 int position_reg, |
| 1114 RegExpNode* on_success) { | 1412 RegExpNode* on_success) { |
| 1115 ActionNode* result = new ActionNode(BEGIN_SUBMATCH, on_success); | 1413 ActionNode* result = new ActionNode(BEGIN_SUBMATCH, on_success); |
| 1116 result->data_.u_submatch.stack_pointer_register = stack_reg; | 1414 result->data_.u_submatch.stack_pointer_register = stack_reg; |
| 1117 result->data_.u_submatch.current_position_register = position_reg; | 1415 result->data_.u_submatch.current_position_register = position_reg; |
| 1118 return result; | 1416 return result; |
| 1119 } | 1417 } |
| 1120 | 1418 |
| 1121 | 1419 |
| 1122 ActionNode* ActionNode::EscapeSubmatch(int stack_reg, | 1420 ActionNode* ActionNode::PositiveSubmatchSuccess(int stack_reg, |
| 1123 bool restore_position, | 1421 int position_reg, |
| 1124 int position_reg, | 1422 RegExpNode* on_success) { |
| 1125 RegExpNode* on_success) { | 1423 ActionNode* result = new ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success); |
| 1126 ActionNode* result = new ActionNode(ESCAPE_SUBMATCH, on_success); | |
| 1127 result->data_.u_submatch.stack_pointer_register = stack_reg; | 1424 result->data_.u_submatch.stack_pointer_register = stack_reg; |
| 1128 if (restore_position) { | 1425 result->data_.u_submatch.current_position_register = position_reg; |
| 1129 result->data_.u_submatch.current_position_register = position_reg; | |
| 1130 } else { | |
| 1131 result->data_.u_submatch.current_position_register = -1; | |
| 1132 } | |
| 1133 return result; | 1426 return result; |
| 1134 } | 1427 } |
| 1135 | 1428 |
| 1136 | 1429 |
| 1137 #define DEFINE_ACCEPT(Type) \ | 1430 #define DEFINE_ACCEPT(Type) \ |
| 1138 void Type##Node::Accept(NodeVisitor* visitor) { \ | 1431 void Type##Node::Accept(NodeVisitor* visitor) { \ |
| 1139 visitor->Visit##Type(this); \ | 1432 visitor->Visit##Type(this); \ |
| 1140 } | 1433 } |
| 1141 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT) | 1434 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT) |
| 1142 #undef DEFINE_ACCEPT | 1435 #undef DEFINE_ACCEPT |
| 1143 | 1436 |
| 1144 | 1437 |
| 1145 // ------------------------------------------------------------------- | 1438 // ------------------------------------------------------------------- |
| 1146 // Emit code. | 1439 // Emit code. |
| 1147 | 1440 |
| 1148 | 1441 |
| 1149 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler, | 1442 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler, |
| 1150 Guard* guard, | 1443 Guard* guard, |
|
Christian Plesner Hansen
2008/12/08 08:54:19
Indentation.
| |
| 1151 Label* on_failure) { | 1444 GenerationVariant* variant) { |
| 1152 switch (guard->op()) { | 1445 switch (guard->op()) { |
| 1153 case Guard::LT: | 1446 case Guard::LT: |
| 1154 macro_assembler->IfRegisterGE(guard->reg(), guard->value(), on_failure); | 1447 ASSERT(!variant->mentions_reg(guard->reg())); |
| 1448 macro_assembler->IfRegisterGE(guard->reg(), | |
| 1449 guard->value(), | |
| 1450 variant->backtrack()); | |
| 1155 break; | 1451 break; |
| 1156 case Guard::GEQ: | 1452 case Guard::GEQ: |
| 1157 macro_assembler->IfRegisterLT(guard->reg(), guard->value(), on_failure); | 1453 ASSERT(!variant->mentions_reg(guard->reg())); |
| 1454 macro_assembler->IfRegisterLT(guard->reg(), | |
| 1455 guard->value(), | |
| 1456 variant->backtrack()); | |
| 1158 break; | 1457 break; |
| 1159 } | 1458 } |
| 1160 } | 1459 } |
| 1161 | 1460 |
| 1162 | 1461 |
| 1163 static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize; | 1462 static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize; |
| 1164 static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange; | 1463 static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange; |
| 1165 | 1464 |
| 1166 | 1465 |
| 1167 static inline void EmitAtomNonLetters( | 1466 static inline void EmitAtomNonLetters( |
| 1168 RegExpMacroAssembler* macro_assembler, | 1467 RegExpMacroAssembler* macro_assembler, |
| 1169 TextElement elm, | 1468 TextElement elm, |
| 1170 Vector<const uc16> quarks, | 1469 Vector<const uc16> quarks, |
| 1171 Label* on_failure, | 1470 Label* on_failure, |
| 1172 int cp_offset) { | 1471 int cp_offset, |
| 1472 bool check_offset) { | |
| 1173 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 1473 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 1474 // It is vital that this loop is backwards due to the unchecked character | |
| 1475 // load below. | |
| 1174 for (int i = quarks.length() - 1; i >= 0; i--) { | 1476 for (int i = quarks.length() - 1; i >= 0; i--) { |
| 1175 uc16 c = quarks[i]; | 1477 uc16 c = quarks[i]; |
| 1176 int length = uncanonicalize.get(c, '\0', chars); | 1478 int length = uncanonicalize.get(c, '\0', chars); |
| 1177 if (length <= 1) { | 1479 if (length <= 1) { |
| 1178 macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure); | 1480 if (check_offset && i == quarks.length() - 1) { |
| 1481 macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure); | |
| 1482 } else { | |
| 1483 // Here we don't need to check against the end of the input string | |
| 1484 // since this character lies before a character that matched. | |
| 1485 macro_assembler->LoadCurrentCharacterUnchecked(cp_offset + i); | |
| 1486 } | |
| 1179 macro_assembler->CheckNotCharacter(c, on_failure); | 1487 macro_assembler->CheckNotCharacter(c, on_failure); |
| 1180 } | 1488 } |
| 1181 } | 1489 } |
| 1182 } | 1490 } |
| 1183 | 1491 |
| 1184 | 1492 |
| 1185 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, | 1493 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, |
| 1186 uc16 c1, | 1494 uc16 c1, |
| 1187 uc16 c2, | 1495 uc16 c2, |
| 1188 Label* on_failure) { | 1496 Label* on_failure) { |
| (...skipping 20 matching lines...) Expand all Loading... | |
| 1209 } | 1517 } |
| 1210 return false; | 1518 return false; |
| 1211 } | 1519 } |
| 1212 | 1520 |
| 1213 | 1521 |
| 1214 static inline void EmitAtomLetters( | 1522 static inline void EmitAtomLetters( |
| 1215 RegExpMacroAssembler* macro_assembler, | 1523 RegExpMacroAssembler* macro_assembler, |
| 1216 TextElement elm, | 1524 TextElement elm, |
| 1217 Vector<const uc16> quarks, | 1525 Vector<const uc16> quarks, |
| 1218 Label* on_failure, | 1526 Label* on_failure, |
| 1219 int cp_offset) { | 1527 int cp_offset, |
| 1528 bool check_offset) { | |
| 1220 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 1529 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 1530 // It is vital that this loop is backwards due to the unchecked character | |
| 1531 // load below. | |
| 1221 for (int i = quarks.length() - 1; i >= 0; i--) { | 1532 for (int i = quarks.length() - 1; i >= 0; i--) { |
| 1222 uc16 c = quarks[i]; | 1533 uc16 c = quarks[i]; |
| 1223 int length = uncanonicalize.get(c, '\0', chars); | 1534 int length = uncanonicalize.get(c, '\0', chars); |
| 1224 if (length <= 1) continue; | 1535 if (length <= 1) continue; |
| 1225 macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure); | 1536 if (check_offset && i == quarks.length() - 1) { |
| 1537 macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure); | |
| 1538 } else { | |
| 1539 // Here we don't need to check against the end of the input string | |
| 1540 // since this character lies before a character that matched. | |
| 1541 macro_assembler->LoadCurrentCharacterUnchecked(cp_offset + i); | |
| 1542 } | |
| 1226 Label ok; | 1543 Label ok; |
| 1227 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); | 1544 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); |
| 1228 switch (length) { | 1545 switch (length) { |
| 1229 case 2: { | 1546 case 2: { |
| 1230 if (ShortCutEmitCharacterPair(macro_assembler, | 1547 if (ShortCutEmitCharacterPair(macro_assembler, |
| 1231 chars[0], | 1548 chars[0], |
| 1232 chars[1], | 1549 chars[1], |
| 1233 on_failure)) { | 1550 on_failure)) { |
| 1234 ok.Unuse(); | 1551 ok.Unuse(); |
| 1235 } else { | 1552 } else { |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 1252 UNREACHABLE(); | 1569 UNREACHABLE(); |
| 1253 break; | 1570 break; |
| 1254 } | 1571 } |
| 1255 } | 1572 } |
| 1256 } | 1573 } |
| 1257 | 1574 |
| 1258 | 1575 |
| 1259 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, | 1576 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, |
| 1260 RegExpCharacterClass* cc, | 1577 RegExpCharacterClass* cc, |
| 1261 int cp_offset, | 1578 int cp_offset, |
| 1262 Label* on_failure) { | 1579 Label* on_failure, |
| 1263 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure); | 1580 bool check_offset) { |
| 1264 cp_offset++; | |
| 1265 | |
| 1266 ZoneList<CharacterRange>* ranges = cc->ranges(); | 1581 ZoneList<CharacterRange>* ranges = cc->ranges(); |
| 1267 | 1582 |
| 1268 Label success; | 1583 Label success; |
| 1269 | 1584 |
| 1270 Label* char_is_in_class = | 1585 Label* char_is_in_class = |
| 1271 cc->is_negated() ? on_failure : &success; | 1586 cc->is_negated() ? on_failure : &success; |
| 1272 | 1587 |
| 1273 int range_count = ranges->length(); | 1588 int range_count = ranges->length(); |
| 1274 | 1589 |
| 1275 if (range_count == 0) { | 1590 if (range_count == 0) { |
| 1276 if (!cc->is_negated()) { | 1591 if (!cc->is_negated()) { |
| 1277 macro_assembler->GoTo(on_failure); | 1592 macro_assembler->GoTo(on_failure); |
| 1278 } | 1593 } |
| 1279 return; | 1594 return; |
| 1280 } | 1595 } |
| 1281 | 1596 |
| 1597 if (range_count == 1 && | |
| 1598 !cc->is_negated() && | |
| 1599 ranges->at(0).from() == 0 && | |
|
Christian Plesner Hansen
2008/12/08 08:54:19
Maybe add an IsEverything method on CharacterRange
| |
| 1600 ranges->at(0).to() == 0xffff) { | |
| 1601 // This is a common case hit by non-anchored expressions. | |
| 1602 // TODO(erikcorry): We should have a macro assembler instruction that just | |
| 1603 // checks for end of string without loading the character. | |
| 1604 if (check_offset) { | |
| 1605 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure); | |
| 1606 } | |
| 1607 return; | |
| 1608 } | |
| 1609 | |
| 1610 if (check_offset) { | |
| 1611 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure); | |
| 1612 } else { | |
| 1613 // Here we don't need to check against the end of the input string | |
| 1614 // since this character lies before a character that matched. | |
| 1615 macro_assembler->LoadCurrentCharacterUnchecked(cp_offset); | |
| 1616 } | |
| 1617 | |
| 1282 for (int i = 0; i < range_count - 1; i++) { | 1618 for (int i = 0; i < range_count - 1; i++) { |
| 1283 CharacterRange& range = ranges->at(i); | 1619 CharacterRange& range = ranges->at(i); |
| 1284 Label next_range; | 1620 Label next_range; |
| 1285 uc16 from = range.from(); | 1621 uc16 from = range.from(); |
| 1286 uc16 to = range.to(); | 1622 uc16 to = range.to(); |
| 1287 if (to == from) { | 1623 if (to == from) { |
| 1288 macro_assembler->CheckCharacter(to, char_is_in_class); | 1624 macro_assembler->CheckCharacter(to, char_is_in_class); |
| 1289 } else { | 1625 } else { |
| 1290 if (from != 0) { | 1626 if (from != 0) { |
| 1291 macro_assembler->CheckCharacterLT(from, &next_range); | 1627 macro_assembler->CheckCharacterLT(from, &next_range); |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1326 } else { | 1662 } else { |
| 1327 if (cc->is_negated()) { | 1663 if (cc->is_negated()) { |
| 1328 macro_assembler->GoTo(on_failure); | 1664 macro_assembler->GoTo(on_failure); |
| 1329 } | 1665 } |
| 1330 } | 1666 } |
| 1331 } | 1667 } |
| 1332 macro_assembler->Bind(&success); | 1668 macro_assembler->Bind(&success); |
| 1333 } | 1669 } |
| 1334 | 1670 |
| 1335 | 1671 |
| 1672 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, | |
| 1673 GenerationVariant* variant) { | |
| 1674 // TODO(erikcorry): Implement support. | |
| 1675 if (info_.follows_word_interest || | |
| 1676 info_.follows_newline_interest || | |
| 1677 info_.follows_start_interest) { | |
| 1678 return FAIL; | |
| 1679 } | |
| 1336 | 1680 |
| 1337 bool TextNode::Emit(RegExpCompiler* compiler) { | 1681 // If we are generating a greedy loop then don't stop and don't reuse code. |
| 1682 if (variant->stop_node() != NULL) { | |
| 1683 return CONTINUE; | |
| 1684 } | |
| 1685 | |
| 1338 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 1686 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
| 1339 Bind(macro_assembler); | 1687 if (variant->is_trivial()) { |
| 1688 if (label_.is_bound()) { | |
| 1689 // We are being asked to generate a generic version, but that's already | |
| 1690 // been done so just go to it. | |
| 1691 macro_assembler->GoTo(&label_); | |
| 1692 return DONE; | |
| 1693 } | |
| 1694 if (compiler->recursion_depth() >= RegExpCompiler::kMaxRecursion) { | |
| 1695 // To avoid too deep recursion we push the node to the work queue and just | |
| 1696 // generate a goto here. | |
| 1697 compiler->AddWork(this); | |
| 1698 macro_assembler->GoTo(&label_); | |
| 1699 return DONE; | |
| 1700 } | |
| 1701 // Generate generic version of the node and bind the label for later use. | |
| 1702 macro_assembler->Bind(&label_); | |
| 1703 return CONTINUE; | |
| 1704 } | |
| 1705 | |
| 1706 // We are being asked to make a non-generic version. Keep track of how many | |
| 1707 // non-generic versions we generate so as not to overdo it. | |
| 1708 variants_generated_++; | |
| 1709 if (variants_generated_ < kMaxVariantsGenerated && | |
| 1710 compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) { | |
| 1711 return CONTINUE; | |
| 1712 } | |
| 1713 | |
| 1714 // If we get here there have been too many variants generated or recursion | |
| 1715 // is too deep. Time to switch to a generic version. The code for | |
| 1716 // generic versions above can handle deep recursion properly. | |
| 1717 bool ok = variant->Flush(compiler, this); | |
| 1718 return ok ? DONE : FAIL; | |
| 1719 } | |
| 1720 | |
| 1721 | |
| 1722 // This generates the code to match a text node. A text node can contain | |
| 1723 // straight character sequences (possibly to be matched in a case-independent | |
| 1724 // way) and character classes. In order to be most efficient we test for the | |
| 1725 // simple things first and then move on to the more complicated things. The | |
| 1726 // simplest thing is a non-letter or a letter if we are matching case. The | |
| 1727 // next-most simple thing is a case-independent letter. The least simple is | |
| 1728 // a character class. Another optimization is that we test the last one first. | |
| 1729 // If that succeeds we don't need to test for the end of the string when we | |
| 1730 // load other characters. | |
| 1731 bool TextNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) { | |
| 1732 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | |
| 1733 Label *backtrack = variant->backtrack(); | |
| 1734 LimitResult limit_result = LimitVersions(compiler, variant); | |
| 1735 if (limit_result == FAIL) return false; | |
| 1736 if (limit_result == DONE) return true; | |
| 1737 ASSERT(limit_result == CONTINUE); | |
| 1738 | |
| 1340 int element_count = elms_->length(); | 1739 int element_count = elms_->length(); |
| 1341 ASSERT(element_count != 0); | 1740 ASSERT(element_count != 0); |
| 1342 int cp_offset = 0; | |
| 1343 if (info()->at_end) { | 1741 if (info()->at_end) { |
| 1344 macro_assembler->Backtrack(); | 1742 macro_assembler->GoTo(backtrack); |
| 1345 return true; | 1743 return true; |
| 1346 } | 1744 } |
| 1347 // First, handle straight character matches. | 1745 // First, handle straight character matches. |
| 1348 for (int i = 0; i < element_count; i++) { | 1746 int checked_up_to = -1; |
| 1747 for (int i = element_count - 1; i >= 0; i--) { | |
| 1349 TextElement elm = elms_->at(i); | 1748 TextElement elm = elms_->at(i); |
| 1749 ASSERT(elm.cp_offset >= 0); | |
| 1750 int cp_offset = variant->cp_offset() + elm.cp_offset; | |
| 1350 if (elm.type == TextElement::ATOM) { | 1751 if (elm.type == TextElement::ATOM) { |
| 1351 Vector<const uc16> quarks = elm.data.u_atom->data(); | 1752 Vector<const uc16> quarks = elm.data.u_atom->data(); |
| 1753 int last_cp_offset = cp_offset + quarks.length(); | |
| 1352 if (compiler->ignore_case()) { | 1754 if (compiler->ignore_case()) { |
| 1353 EmitAtomNonLetters(macro_assembler, | 1755 EmitAtomNonLetters(macro_assembler, |
| 1354 elm, | 1756 elm, |
| 1355 quarks, | 1757 quarks, |
| 1356 on_failure_->label(), | 1758 backtrack, |
| 1357 cp_offset); | 1759 cp_offset, |
| 1760 checked_up_to < last_cp_offset); | |
| 1358 } else { | 1761 } else { |
| 1359 macro_assembler->CheckCharacters(quarks, | 1762 macro_assembler->CheckCharacters(quarks, |
| 1360 cp_offset, | 1763 cp_offset, |
| 1361 on_failure_->label()); | 1764 backtrack, |
| 1765 checked_up_to < last_cp_offset); | |
| 1362 } | 1766 } |
| 1363 cp_offset += quarks.length(); | 1767 if (last_cp_offset > checked_up_to) checked_up_to = last_cp_offset - 1; |
| 1364 } else { | 1768 } else { |
| 1365 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); | 1769 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); |
| 1366 cp_offset++; | |
| 1367 } | 1770 } |
| 1368 } | 1771 } |
| 1369 // Second, handle case independent letter matches if any. | 1772 // Second, handle case independent letter matches if any. |
| 1370 if (compiler->ignore_case()) { | 1773 if (compiler->ignore_case()) { |
| 1371 cp_offset = 0; | 1774 for (int i = element_count - 1; i >= 0; i--) { |
| 1372 for (int i = 0; i < element_count; i++) { | |
| 1373 TextElement elm = elms_->at(i); | 1775 TextElement elm = elms_->at(i); |
| 1776 int cp_offset = variant->cp_offset() + elm.cp_offset; | |
| 1374 if (elm.type == TextElement::ATOM) { | 1777 if (elm.type == TextElement::ATOM) { |
| 1375 Vector<const uc16> quarks = elm.data.u_atom->data(); | 1778 Vector<const uc16> quarks = elm.data.u_atom->data(); |
| 1779 int last_cp_offset = cp_offset + quarks.length(); | |
| 1376 EmitAtomLetters(macro_assembler, | 1780 EmitAtomLetters(macro_assembler, |
| 1377 elm, | 1781 elm, |
| 1378 quarks, | 1782 quarks, |
| 1379 on_failure_->label(), | 1783 backtrack, |
| 1380 cp_offset); | 1784 cp_offset, |
| 1381 cp_offset += quarks.length(); | 1785 checked_up_to < last_cp_offset); |
| 1382 } else { | 1786 if (last_cp_offset > checked_up_to) checked_up_to = last_cp_offset - 1; |
| 1383 cp_offset++; | |
| 1384 } | 1787 } |
| 1385 } | 1788 } |
| 1386 } | 1789 } |
| 1387 // If the fast character matches passed then do the character classes. | 1790 // If the fast character matches passed then do the character classes. |
| 1388 cp_offset = 0; | 1791 for (int i = element_count - 1; i >= 0; i--) { |
| 1389 for (int i = 0; i < element_count; i++) { | |
| 1390 TextElement elm = elms_->at(i); | 1792 TextElement elm = elms_->at(i); |
| 1793 int cp_offset = variant->cp_offset() + elm.cp_offset; | |
| 1391 if (elm.type == TextElement::CHAR_CLASS) { | 1794 if (elm.type == TextElement::CHAR_CLASS) { |
| 1392 RegExpCharacterClass* cc = elm.data.u_char_class; | 1795 RegExpCharacterClass* cc = elm.data.u_char_class; |
| 1393 EmitCharClass(macro_assembler, cc, cp_offset, on_failure_->label()); | 1796 EmitCharClass(macro_assembler, |
| 1394 cp_offset++; | 1797 cc, |
| 1395 } else { | 1798 cp_offset, |
| 1396 cp_offset += elm.data.u_atom->data().length(); | 1799 backtrack, |
| 1800 checked_up_to < cp_offset); | |
| 1801 if (cp_offset > checked_up_to) checked_up_to = cp_offset; | |
| 1397 } | 1802 } |
| 1398 } | 1803 } |
| 1399 | 1804 |
| 1400 compiler->AddWork(on_failure_); | 1805 GenerationVariant new_variant(*variant); |
| 1401 macro_assembler->AdvanceCurrentPosition(cp_offset); | 1806 new_variant.set_cp_offset(checked_up_to + 1); |
| 1402 return on_success()->GoTo(compiler); | 1807 RecursionCheck rc(compiler); |
| 1808 return on_success()->Emit(compiler, &new_variant); | |
| 1403 } | 1809 } |
| 1404 | 1810 |
| 1405 | 1811 |
| 1406 void TextNode::MakeCaseIndependent() { | 1812 void TextNode::MakeCaseIndependent() { |
| 1407 int element_count = elms_->length(); | 1813 int element_count = elms_->length(); |
| 1408 for (int i = 0; i < element_count; i++) { | 1814 for (int i = 0; i < element_count; i++) { |
| 1409 TextElement elm = elms_->at(i); | 1815 TextElement elm = elms_->at(i); |
| 1410 if (elm.type == TextElement::CHAR_CLASS) { | 1816 if (elm.type == TextElement::CHAR_CLASS) { |
| 1411 RegExpCharacterClass* cc = elm.data.u_char_class; | 1817 RegExpCharacterClass* cc = elm.data.u_char_class; |
| 1412 ZoneList<CharacterRange>* ranges = cc->ranges(); | 1818 ZoneList<CharacterRange>* ranges = cc->ranges(); |
| 1413 int range_count = ranges->length(); | 1819 int range_count = ranges->length(); |
| 1414 for (int i = 0; i < range_count; i++) { | 1820 for (int i = 0; i < range_count; i++) { |
| 1415 ranges->at(i).AddCaseEquivalents(ranges); | 1821 ranges->at(i).AddCaseEquivalents(ranges); |
| 1416 } | 1822 } |
| 1417 } | 1823 } |
| 1418 } | 1824 } |
| 1419 } | 1825 } |
| 1420 | 1826 |
| 1421 | 1827 |
| 1422 bool ChoiceNode::Emit(RegExpCompiler* compiler) { | 1828 int TextNode::GreedyLoopTextLength() { |
| 1829 TextElement elm = elms_->at(elms_->length() - 1); | |
| 1830 if (elm.type == TextElement::CHAR_CLASS) { | |
| 1831 return elm.cp_offset + 1; | |
| 1832 } else { | |
| 1833 return elm.cp_offset + elm.data.u_atom->data().length(); | |
| 1834 } | |
| 1835 } | |
| 1836 | |
| 1837 | |
| 1838 // Finds the fixed match length of a sequence of nodes that goes from | |
| 1839 // this alternative and back to this choice node. If there are variable | |
| 1840 // length nodes or other complications in the way then return a sentinel | |
| 1841 // value indicating that a greedy loop cannot be constructed. | |
| 1842 int ChoiceNode::GreedyLoopTextLength(GuardedAlternative* alternative) { | |
| 1843 int length = 0; | |
| 1844 RegExpNode* node = alternative->node(); | |
| 1845 // Later we will generate code for all these text nodes using recursion | |
| 1846 // so we have to limit the max number. | |
| 1847 int recursion_depth = 0; | |
| 1848 while (node != this) { | |
| 1849 if (recursion_depth++ > RegExpCompiler::kMaxRecursion) { | |
| 1850 return kNodeIsTooComplexForGreedyLoops; | |
| 1851 } | |
| 1852 NodeInfo* info = node->info(); | |
| 1853 if (info->follows_word_interest || | |
| 1854 info->follows_newline_interest || | |
| 1855 info->follows_start_interest) { | |
| 1856 return kNodeIsTooComplexForGreedyLoops; | |
| 1857 } | |
| 1858 int node_length = node->GreedyLoopTextLength(); | |
| 1859 if (node_length == kNodeIsTooComplexForGreedyLoops) { | |
| 1860 return kNodeIsTooComplexForGreedyLoops; | |
| 1861 } | |
| 1862 length += node_length; | |
| 1863 SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node); | |
| 1864 node = seq_node->on_success(); | |
| 1865 } | |
| 1866 return length; | |
| 1867 } | |
| 1868 | |
| 1869 | |
| 1870 bool LoopChoiceNode::Emit(RegExpCompiler* compiler, | |
| 1871 GenerationVariant* variant) { | |
| 1872 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | |
| 1873 if (variant->stop_node() == this) { | |
| 1874 int text_length = GreedyLoopTextLength(&(alternatives_->at(0))); | |
| 1875 ASSERT(text_length != kNodeIsTooComplexForGreedyLoops); | |
| 1876 // Update the counter-based backtracking info on the stack. This is an | |
| 1877 // optimization for greedy loops (see below). | |
| 1878 ASSERT(variant->cp_offset() == text_length); | |
| 1879 macro_assembler->AdvanceCurrentPosition(text_length); | |
| 1880 macro_assembler->GoTo(variant->loop_label()); | |
| 1881 return true; | |
| 1882 } | |
| 1883 ASSERT(variant->stop_node() == NULL); | |
| 1884 if (!variant->is_trivial()) { | |
| 1885 return variant->Flush(compiler, this); | |
| 1886 } | |
| 1887 return ChoiceNode::Emit(compiler, variant); | |
| 1888 } | |
| 1889 | |
| 1890 | |
| 1891 bool ChoiceNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) { | |
| 1892 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | |
| 1423 int choice_count = alternatives_->length(); | 1893 int choice_count = alternatives_->length(); |
| 1424 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 1894 #ifdef DEBUG |
| 1425 Bind(macro_assembler); | 1895 for (int i = 0; i < choice_count - 1; i++) { |
| 1896 GuardedAlternative alternative = alternatives_->at(i); | |
| 1897 ZoneList<Guard*>* guards = alternative.guards(); | |
| 1898 int guard_count = (guards == NULL) ? 0 : guards->length(); | |
| 1899 for (int j = 0; j < guard_count; j++) { | |
| 1900 ASSERT(!variant->mentions_reg(guards->at(j)->reg())); | |
| 1901 } | |
| 1902 } | |
| 1903 #endif | |
| 1904 | |
| 1905 LimitResult limit_result = LimitVersions(compiler, variant); | |
| 1906 if (limit_result == DONE) return true; | |
| 1907 if (limit_result == FAIL) return false; | |
| 1908 ASSERT(limit_result == CONTINUE); | |
| 1909 | |
| 1910 RecursionCheck rc(compiler); | |
| 1911 | |
| 1912 GenerationVariant* current_variant = variant; | |
| 1913 | |
| 1914 int text_length = GreedyLoopTextLength(&(alternatives_->at(0))); | |
| 1915 bool greedy_loop = false; | |
| 1916 Label greedy_loop_label; | |
| 1917 GenerationVariant counter_backtrack_variant(&greedy_loop_label); | |
| 1918 if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) { | |
| 1919 // Here we have special handling for greedy loops containing only text nodes | |
| 1920 // and other simple nodes. These are handled by pushing the current | |
| 1921 // position on the stack and then incrementing the current position each | |
| 1922 // time around the switch. On backtrack we decrement the current position | |
| 1923 // and check it against the pushed value. This avoids pushing backtrack | |
| 1924 // information for each iteration of the loop, which could take up a lot of | |
| 1925 // space. | |
| 1926 greedy_loop = true; | |
| 1927 ASSERT(variant->stop_node() == NULL); | |
| 1928 macro_assembler->PushCurrentPosition(); | |
| 1929 current_variant = &counter_backtrack_variant; | |
| 1930 Label greedy_match_failed; | |
| 1931 GenerationVariant greedy_match_variant(&greedy_match_failed); | |
| 1932 Label loop_label; | |
| 1933 macro_assembler->Bind(&loop_label); | |
| 1934 greedy_match_variant.set_stop_node(this); | |
| 1935 greedy_match_variant.set_loop_label(&loop_label); | |
| 1936 bool ok = alternatives_->at(0).node()->Emit(compiler, | |
| 1937 &greedy_match_variant); | |
| 1938 macro_assembler->Bind(&greedy_match_failed); | |
| 1939 if (!ok) { | |
| 1940 greedy_loop_label.Unuse(); | |
| 1941 return false; | |
| 1942 } | |
| 1943 } | |
| 1944 | |
| 1945 Label second_choice; // For use in greedy matches. | |
| 1946 macro_assembler->Bind(&second_choice); | |
| 1947 | |
| 1426 // For now we just call all choices one after the other. The idea ultimately | 1948 // For now we just call all choices one after the other. The idea ultimately |
| 1427 // is to use the Dispatch table to try only the relevant ones. | 1949 // is to use the Dispatch table to try only the relevant ones. |
| 1428 for (int i = 0; i < choice_count - 1; i++) { | 1950 for (int i = greedy_loop ? 1 : 0; i < choice_count - 1; i++) { |
| 1429 GuardedAlternative alternative = alternatives_->at(i); | 1951 GuardedAlternative alternative = alternatives_->at(i); |
| 1430 Label after; | 1952 Label after; |
| 1431 Label after_no_pop_cp; | |
| 1432 ZoneList<Guard*>* guards = alternative.guards(); | 1953 ZoneList<Guard*>* guards = alternative.guards(); |
| 1433 if (guards != NULL) { | 1954 int guard_count = (guards == NULL) ? 0 : guards->length(); |
| 1434 int guard_count = guards->length(); | 1955 GenerationVariant new_variant(*current_variant); |
| 1435 for (int j = 0; j < guard_count; j++) { | 1956 new_variant.set_backtrack(&after); |
| 1436 GenerateGuard(macro_assembler, guards->at(j), &after_no_pop_cp); | 1957 for (int j = 0; j < guard_count; j++) { |
| 1437 } | 1958 GenerateGuard(macro_assembler, guards->at(j), &new_variant); |
| 1438 } | 1959 } |
| 1439 macro_assembler->PushCurrentPosition(); | 1960 if (!alternative.node()->Emit(compiler, &new_variant)) { |
| 1440 macro_assembler->PushBacktrack(&after); | |
| 1441 if (!alternative.node()->GoTo(compiler)) { | |
| 1442 after.Unuse(); | 1961 after.Unuse(); |
| 1443 after_no_pop_cp.Unuse(); | |
| 1444 return false; | 1962 return false; |
| 1445 } | 1963 } |
| 1446 macro_assembler->Bind(&after); | 1964 macro_assembler->Bind(&after); |
| 1447 macro_assembler->PopCurrentPosition(); | |
| 1448 macro_assembler->Bind(&after_no_pop_cp); | |
| 1449 } | 1965 } |
| 1450 GuardedAlternative alternative = alternatives_->at(choice_count - 1); | 1966 GuardedAlternative alternative = alternatives_->at(choice_count - 1); |
| 1451 ZoneList<Guard*>* guards = alternative.guards(); | 1967 ZoneList<Guard*>* guards = alternative.guards(); |
| 1452 if (guards != NULL) { | 1968 int guard_count = (guards == NULL) ? 0 : guards->length(); |
| 1453 int guard_count = guards->length(); | 1969 for (int j = 0; j < guard_count; j++) { |
| 1454 for (int j = 0; j < guard_count; j++) { | 1970 GenerateGuard(macro_assembler, guards->at(j), current_variant); |
| 1455 GenerateGuard(macro_assembler, guards->at(j), on_failure_->label()); | 1971 } |
| 1456 } | 1972 bool ok = alternative.node()->Emit(compiler, current_variant); |
| 1457 } | 1973 if (!ok) return false; |
| 1458 if (!on_failure_->IsBacktrack()) { | 1974 if (greedy_loop) { |
| 1459 ASSERT_NOT_NULL(on_failure_ -> label()); | 1975 macro_assembler->Bind(&greedy_loop_label); |
| 1460 macro_assembler->PushBacktrack(on_failure_->label()); | 1976 // If we have unwound to the bottom then backtrack. |
| 1461 compiler->AddWork(on_failure_); | 1977 macro_assembler->CheckGreedyLoop(variant->backtrack()); |
| 1462 } | 1978 // Otherwise try the second priority at an earlier position. |
| 1463 if (!alternative.node()->GoTo(compiler)) { | 1979 macro_assembler->AdvanceCurrentPosition(-text_length); |
| 1464 return false; | 1980 macro_assembler->GoTo(&second_choice); |
| 1465 } | 1981 } |
| 1466 return true; | 1982 return true; |
| 1467 } | 1983 } |
| 1468 | 1984 |
| 1469 | 1985 |
| 1470 bool ActionNode::Emit(RegExpCompiler* compiler) { | 1986 bool ActionNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) { |
| 1471 RegExpMacroAssembler* macro = compiler->macro_assembler(); | 1987 RegExpMacroAssembler* macro = compiler->macro_assembler(); |
| 1472 Bind(macro); | 1988 LimitResult limit_result = LimitVersions(compiler, variant); |
| 1989 if (limit_result == DONE) return true; | |
| 1990 if (limit_result == FAIL) return false; | |
| 1991 ASSERT(limit_result == CONTINUE); | |
| 1992 | |
| 1993 RecursionCheck rc(compiler); | |
| 1994 | |
| 1473 switch (type_) { | 1995 switch (type_) { |
| 1474 case STORE_REGISTER: | 1996 case STORE_POSITION: { |
| 1475 macro->SetRegister(data_.u_store_register.reg, | 1997 GenerationVariant::DeferredCapture |
| 1476 data_.u_store_register.value); | 1998 new_capture(data_.u_position_register.reg, variant); |
| 1477 break; | 1999 GenerationVariant new_variant = *variant; |
| 2000 new_variant.add_action(&new_capture); | |
| 2001 return on_success()->Emit(compiler, &new_variant); | |
| 2002 } | |
| 1478 case INCREMENT_REGISTER: { | 2003 case INCREMENT_REGISTER: { |
| 1479 Label undo; | 2004 GenerationVariant::DeferredIncrementRegister |
| 1480 macro->PushBacktrack(&undo); | 2005 new_increment(data_.u_increment_register.reg); |
| 1481 macro->AdvanceRegister(data_.u_increment_register.reg, 1); | 2006 GenerationVariant new_variant = *variant; |
| 1482 bool ok = on_success()->GoTo(compiler); | 2007 new_variant.add_action(&new_increment); |
| 1483 if (!ok) { | 2008 return on_success()->Emit(compiler, &new_variant); |
| 1484 undo.Unuse(); | 2009 } |
| 2010 case SET_REGISTER: { | |
| 2011 GenerationVariant::DeferredSetRegister | |
| 2012 new_set(data_.u_store_register.reg, data_.u_store_register.value); | |
| 2013 GenerationVariant new_variant = *variant; | |
| 2014 new_variant.add_action(&new_set); | |
| 2015 return on_success()->Emit(compiler, &new_variant); | |
| 2016 } | |
| 2017 case BEGIN_SUBMATCH: | |
| 2018 if (!variant->is_trivial()) return variant->Flush(compiler, this); | |
| 2019 macro->WriteCurrentPositionToRegister( | |
| 2020 data_.u_submatch.current_position_register, 0); | |
| 2021 macro->WriteStackPointerToRegister( | |
| 2022 data_.u_submatch.stack_pointer_register); | |
| 2023 return on_success()->Emit(compiler, variant); | |
| 2024 case POSITIVE_SUBMATCH_SUCCESS: | |
| 2025 if (!variant->is_trivial()) return variant->Flush(compiler, this); | |
| 2026 // TODO(erikcorry): Implement support. | |
| 2027 if (info()->follows_word_interest || | |
| 2028 info()->follows_newline_interest || | |
| 2029 info()->follows_start_interest) { | |
| 1485 return false; | 2030 return false; |
| 1486 } | 2031 } |
| 1487 macro->Bind(&undo); | |
| 1488 macro->AdvanceRegister(data_.u_increment_register.reg, -1); | |
| 1489 macro->Backtrack(); | |
| 1490 break; | |
| 1491 } | |
| 1492 case STORE_POSITION: { | |
| 1493 Label undo; | |
| 1494 macro->PushRegister(data_.u_position_register.reg); | |
| 1495 macro->PushBacktrack(&undo); | |
| 1496 macro->WriteCurrentPositionToRegister(data_.u_position_register.reg); | |
| 1497 bool ok = on_success()->GoTo(compiler); | |
| 1498 if (!ok) { | |
| 1499 undo.Unuse(); | |
| 1500 return false; | |
| 1501 } | |
| 1502 macro->Bind(&undo); | |
| 1503 macro->PopRegister(data_.u_position_register.reg); | |
| 1504 macro->Backtrack(); | |
| 1505 break; | |
| 1506 } | |
| 1507 case RESTORE_POSITION: | |
| 1508 macro->ReadCurrentPositionFromRegister( | |
| 1509 data_.u_position_register.reg); | |
| 1510 break; | |
| 1511 case BEGIN_SUBMATCH: | |
| 1512 macro->WriteCurrentPositionToRegister( | |
| 1513 data_.u_submatch.current_position_register); | |
| 1514 macro->WriteStackPointerToRegister( | |
| 1515 data_.u_submatch.stack_pointer_register); | |
| 1516 break; | |
| 1517 case ESCAPE_SUBMATCH: | |
| 1518 if (info()->at_end) { | 2032 if (info()->at_end) { |
| 1519 Label at_end; | 2033 Label at_end; |
| 1520 // Load current character jumps to the label if we are beyond the string | 2034 // Load current character jumps to the label if we are beyond the string |
| 1521 // end. | 2035 // end. |
| 1522 macro->LoadCurrentCharacter(0, &at_end); | 2036 macro->LoadCurrentCharacter(0, &at_end); |
| 1523 macro->Backtrack(); | 2037 macro->GoTo(variant->backtrack()); |
| 1524 macro->Bind(&at_end); | 2038 macro->Bind(&at_end); |
| 1525 } | 2039 } |
| 1526 if (data_.u_submatch.current_position_register != -1) { | 2040 macro->ReadCurrentPositionFromRegister( |
| 1527 macro->ReadCurrentPositionFromRegister( | 2041 data_.u_submatch.current_position_register); |
| 1528 data_.u_submatch.current_position_register); | |
| 1529 } | |
| 1530 macro->ReadStackPointerFromRegister( | 2042 macro->ReadStackPointerFromRegister( |
| 1531 data_.u_submatch.stack_pointer_register); | 2043 data_.u_submatch.stack_pointer_register); |
| 1532 break; | 2044 return on_success()->Emit(compiler, variant); |
| 1533 default: | 2045 default: |
| 1534 UNREACHABLE(); | 2046 UNREACHABLE(); |
| 1535 return false; | 2047 return false; |
| 1536 } | 2048 } |
| 1537 return on_success()->GoTo(compiler); | 2049 } |
| 1538 } | 2050 |
| 1539 | 2051 |
| 1540 | 2052 bool BackReferenceNode::Emit(RegExpCompiler* compiler, |
| 1541 bool BackReferenceNode::Emit(RegExpCompiler* compiler) { | 2053 GenerationVariant* variant) { |
| 1542 RegExpMacroAssembler* macro = compiler->macro_assembler(); | 2054 RegExpMacroAssembler* macro = compiler->macro_assembler(); |
| 1543 Bind(macro); | 2055 if (!variant->is_trivial()) { |
| 2056 return variant->Flush(compiler, this); | |
| 2057 } | |
| 2058 | |
| 2059 LimitResult limit_result = LimitVersions(compiler, variant); | |
| 2060 if (limit_result == DONE) return true; | |
| 2061 if (limit_result == FAIL) return false; | |
| 2062 ASSERT(limit_result == CONTINUE); | |
| 2063 | |
| 2064 RecursionCheck rc(compiler); | |
| 2065 | |
| 1544 ASSERT_EQ(start_reg_ + 1, end_reg_); | 2066 ASSERT_EQ(start_reg_ + 1, end_reg_); |
| 1545 if (info()->at_end) { | 2067 if (info()->at_end) { |
| 1546 // If we are constrained to match at the end of the input then succeed | 2068 // If we are constrained to match at the end of the input then succeed |
| 1547 // iff the back reference is empty. | 2069 // iff the back reference is empty. |
| 1548 macro->CheckNotRegistersEqual(start_reg_, end_reg_, on_failure_->label()); | 2070 macro->CheckNotRegistersEqual(start_reg_, end_reg_, variant->backtrack()); |
| 1549 } else { | 2071 } else { |
| 1550 if (compiler->ignore_case()) { | 2072 if (compiler->ignore_case()) { |
| 1551 macro->CheckNotBackReferenceIgnoreCase(start_reg_, on_failure_->label()); | 2073 macro->CheckNotBackReferenceIgnoreCase(start_reg_, variant->backtrack()); |
| 1552 } else { | 2074 } else { |
| 1553 macro->CheckNotBackReference(start_reg_, on_failure_->label()); | 2075 macro->CheckNotBackReference(start_reg_, variant->backtrack()); |
| 1554 } | 2076 } |
| 1555 } | 2077 } |
| 1556 return on_success()->GoTo(compiler); | 2078 return on_success()->Emit(compiler, variant); |
| 1557 } | 2079 } |
| 1558 | 2080 |
| 1559 | 2081 |
| 1560 // ------------------------------------------------------------------- | 2082 // ------------------------------------------------------------------- |
| 1561 // Dot/dotty output | 2083 // Dot/dotty output |
| 1562 | 2084 |
| 1563 | 2085 |
| 1564 #ifdef DEBUG | 2086 #ifdef DEBUG |
| 1565 | 2087 |
| 1566 | 2088 |
| 1567 class DotPrinter: public NodeVisitor { | 2089 class DotPrinter: public NodeVisitor { |
| 1568 public: | 2090 public: |
| 1569 explicit DotPrinter(bool ignore_case) | 2091 explicit DotPrinter(bool ignore_case) |
| 1570 : ignore_case_(ignore_case), | 2092 : ignore_case_(ignore_case), |
| 1571 stream_(&alloc_) { } | 2093 stream_(&alloc_) { } |
| 1572 void PrintNode(const char* label, RegExpNode* node); | 2094 void PrintNode(const char* label, RegExpNode* node); |
| 1573 void Visit(RegExpNode* node); | 2095 void Visit(RegExpNode* node); |
| 1574 void PrintOnFailure(RegExpNode* from, RegExpNode* on_failure); | |
| 1575 void PrintAttributes(RegExpNode* from); | 2096 void PrintAttributes(RegExpNode* from); |
| 1576 StringStream* stream() { return &stream_; } | 2097 StringStream* stream() { return &stream_; } |
| 2098 void PrintOnFailure(RegExpNode* from, RegExpNode* to); | |
| 1577 #define DECLARE_VISIT(Type) \ | 2099 #define DECLARE_VISIT(Type) \ |
| 1578 virtual void Visit##Type(Type##Node* that); | 2100 virtual void Visit##Type(Type##Node* that); |
| 1579 FOR_EACH_NODE_TYPE(DECLARE_VISIT) | 2101 FOR_EACH_NODE_TYPE(DECLARE_VISIT) |
| 1580 #undef DECLARE_VISIT | 2102 #undef DECLARE_VISIT |
| 1581 private: | 2103 private: |
| 1582 bool ignore_case_; | 2104 bool ignore_case_; |
| 1583 HeapStringAllocator alloc_; | 2105 HeapStringAllocator alloc_; |
| 1584 StringStream stream_; | 2106 StringStream stream_; |
| 1585 }; | 2107 }; |
| 1586 | 2108 |
| (...skipping 21 matching lines...) Expand all Loading... | |
| 1608 | 2130 |
| 1609 | 2131 |
| 1610 void DotPrinter::Visit(RegExpNode* node) { | 2132 void DotPrinter::Visit(RegExpNode* node) { |
| 1611 if (node->info()->visited) return; | 2133 if (node->info()->visited) return; |
| 1612 node->info()->visited = true; | 2134 node->info()->visited = true; |
| 1613 node->Accept(this); | 2135 node->Accept(this); |
| 1614 } | 2136 } |
| 1615 | 2137 |
| 1616 | 2138 |
| 1617 void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) { | 2139 void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) { |
| 1618 if (on_failure->IsBacktrack()) return; | |
| 1619 stream()->Add(" n%p -> n%p [style=dotted];\n", from, on_failure); | 2140 stream()->Add(" n%p -> n%p [style=dotted];\n", from, on_failure); |
| 1620 Visit(on_failure); | 2141 Visit(on_failure); |
| 1621 } | 2142 } |
| 1622 | 2143 |
| 1623 | 2144 |
| 1624 class TableEntryBodyPrinter { | 2145 class TableEntryBodyPrinter { |
| 1625 public: | 2146 public: |
| 1626 TableEntryBodyPrinter(StringStream* stream, ChoiceNode* choice) | 2147 TableEntryBodyPrinter(StringStream* stream, ChoiceNode* choice) |
| 1627 : stream_(stream), choice_(choice) { } | 2148 : stream_(stream), choice_(choice) { } |
| 1628 void Call(uc16 from, DispatchTable::Entry entry) { | 2149 void Call(uc16 from, DispatchTable::Entry entry) { |
| (...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1733 static const bool kPrintDispatchTable = false; | 2254 static const bool kPrintDispatchTable = false; |
| 1734 void DotPrinter::VisitChoice(ChoiceNode* that) { | 2255 void DotPrinter::VisitChoice(ChoiceNode* that) { |
| 1735 if (kPrintDispatchTable) { | 2256 if (kPrintDispatchTable) { |
| 1736 stream()->Add(" n%p [shape=Mrecord, label=\"", that); | 2257 stream()->Add(" n%p [shape=Mrecord, label=\"", that); |
| 1737 TableEntryHeaderPrinter header_printer(stream()); | 2258 TableEntryHeaderPrinter header_printer(stream()); |
| 1738 that->GetTable(ignore_case_)->ForEach(&header_printer); | 2259 that->GetTable(ignore_case_)->ForEach(&header_printer); |
| 1739 stream()->Add("\"]\n", that); | 2260 stream()->Add("\"]\n", that); |
| 1740 PrintAttributes(that); | 2261 PrintAttributes(that); |
| 1741 TableEntryBodyPrinter body_printer(stream(), that); | 2262 TableEntryBodyPrinter body_printer(stream(), that); |
| 1742 that->GetTable(ignore_case_)->ForEach(&body_printer); | 2263 that->GetTable(ignore_case_)->ForEach(&body_printer); |
| 1743 PrintOnFailure(that, that->on_failure()); | |
| 1744 } else { | 2264 } else { |
| 1745 stream()->Add(" n%p [shape=Mrecord, label=\"?\"];\n", that); | 2265 stream()->Add(" n%p [shape=Mrecord, label=\"?\"];\n", that); |
| 1746 for (int i = 0; i < that->alternatives()->length(); i++) { | 2266 for (int i = 0; i < that->alternatives()->length(); i++) { |
| 1747 GuardedAlternative alt = that->alternatives()->at(i); | 2267 GuardedAlternative alt = that->alternatives()->at(i); |
| 1748 stream()->Add(" n%p -> n%p;\n", that, alt.node()); | 2268 stream()->Add(" n%p -> n%p;\n", that, alt.node()); |
| 1749 } | 2269 } |
| 1750 } | 2270 } |
| 1751 for (int i = 0; i < that->alternatives()->length(); i++) { | 2271 for (int i = 0; i < that->alternatives()->length(); i++) { |
| 1752 GuardedAlternative alt = that->alternatives()->at(i); | 2272 GuardedAlternative alt = that->alternatives()->at(i); |
| 1753 alt.node()->Accept(this); | 2273 alt.node()->Accept(this); |
| (...skipping 24 matching lines...) Expand all Loading... | |
| 1778 break; | 2298 break; |
| 1779 } | 2299 } |
| 1780 default: | 2300 default: |
| 1781 UNREACHABLE(); | 2301 UNREACHABLE(); |
| 1782 } | 2302 } |
| 1783 } | 2303 } |
| 1784 stream()->Add("\", shape=box, peripheries=2];\n"); | 2304 stream()->Add("\", shape=box, peripheries=2];\n"); |
| 1785 PrintAttributes(that); | 2305 PrintAttributes(that); |
| 1786 stream()->Add(" n%p -> n%p;\n", that, that->on_success()); | 2306 stream()->Add(" n%p -> n%p;\n", that, that->on_success()); |
| 1787 Visit(that->on_success()); | 2307 Visit(that->on_success()); |
| 1788 PrintOnFailure(that, that->on_failure()); | |
| 1789 } | 2308 } |
| 1790 | 2309 |
| 1791 | 2310 |
| 1792 void DotPrinter::VisitBackReference(BackReferenceNode* that) { | 2311 void DotPrinter::VisitBackReference(BackReferenceNode* that) { |
| 1793 stream()->Add(" n%p [label=\"$%i..$%i\", shape=doubleoctagon];\n", | 2312 stream()->Add(" n%p [label=\"$%i..$%i\", shape=doubleoctagon];\n", |
| 1794 that, | 2313 that, |
| 1795 that->start_register(), | 2314 that->start_register(), |
| 1796 that->end_register()); | 2315 that->end_register()); |
| 1797 PrintAttributes(that); | 2316 PrintAttributes(that); |
| 1798 stream()->Add(" n%p -> n%p;\n", that, that->on_success()); | 2317 stream()->Add(" n%p -> n%p;\n", that, that->on_success()); |
| 1799 Visit(that->on_success()); | 2318 Visit(that->on_success()); |
| 1800 PrintOnFailure(that, that->on_failure()); | |
| 1801 } | 2319 } |
| 1802 | 2320 |
| 1803 | 2321 |
| 1804 void DotPrinter::VisitEnd(EndNode* that) { | 2322 void DotPrinter::VisitEnd(EndNode* that) { |
| 1805 stream()->Add(" n%p [style=bold, shape=point];\n", that); | 2323 stream()->Add(" n%p [style=bold, shape=point];\n", that); |
| 1806 PrintAttributes(that); | 2324 PrintAttributes(that); |
| 1807 } | 2325 } |
| 1808 | 2326 |
| 1809 | 2327 |
| 1810 void DotPrinter::VisitAction(ActionNode* that) { | 2328 void DotPrinter::VisitAction(ActionNode* that) { |
| 1811 stream()->Add(" n%p [", that); | 2329 stream()->Add(" n%p [", that); |
| 1812 switch (that->type_) { | 2330 switch (that->type_) { |
| 1813 case ActionNode::STORE_REGISTER: | 2331 case ActionNode::SET_REGISTER: |
| 1814 stream()->Add("label=\"$%i:=%i\", shape=octagon", | 2332 stream()->Add("label=\"$%i:=%i\", shape=octagon", |
| 1815 that->data_.u_store_register.reg, | 2333 that->data_.u_store_register.reg, |
| 1816 that->data_.u_store_register.value); | 2334 that->data_.u_store_register.value); |
| 1817 break; | 2335 break; |
| 1818 case ActionNode::INCREMENT_REGISTER: | 2336 case ActionNode::INCREMENT_REGISTER: |
| 1819 stream()->Add("label=\"$%i++\", shape=octagon", | 2337 stream()->Add("label=\"$%i++\", shape=octagon", |
| 1820 that->data_.u_increment_register.reg); | 2338 that->data_.u_increment_register.reg); |
| 1821 break; | 2339 break; |
| 1822 case ActionNode::STORE_POSITION: | 2340 case ActionNode::STORE_POSITION: |
| 1823 stream()->Add("label=\"$%i:=$pos\", shape=octagon", | 2341 stream()->Add("label=\"$%i:=$pos\", shape=octagon", |
| 1824 that->data_.u_position_register.reg); | 2342 that->data_.u_position_register.reg); |
| 1825 break; | 2343 break; |
| 1826 case ActionNode::RESTORE_POSITION: | |
| 1827 stream()->Add("label=\"$pos:=$%i\", shape=octagon", | |
| 1828 that->data_.u_position_register.reg); | |
| 1829 break; | |
| 1830 case ActionNode::BEGIN_SUBMATCH: | 2344 case ActionNode::BEGIN_SUBMATCH: |
| 1831 stream()->Add("label=\"$%i:=$pos,begin\", shape=septagon", | 2345 stream()->Add("label=\"$%i:=$pos,begin\", shape=septagon", |
| 1832 that->data_.u_submatch.current_position_register); | 2346 that->data_.u_submatch.current_position_register); |
| 1833 break; | 2347 break; |
| 1834 case ActionNode::ESCAPE_SUBMATCH: | 2348 case ActionNode::POSITIVE_SUBMATCH_SUCCESS: |
| 1835 stream()->Add("label=\"escape\", shape=septagon"); | 2349 stream()->Add("label=\"escape\", shape=septagon"); |
| 1836 break; | 2350 break; |
| 1837 } | 2351 } |
| 1838 stream()->Add("];\n"); | 2352 stream()->Add("];\n"); |
| 1839 PrintAttributes(that); | 2353 PrintAttributes(that); |
| 1840 stream()->Add(" n%p -> n%p;\n", that, that->on_success()); | 2354 RegExpNode* successor = that->on_success(); |
| 1841 Visit(that->on_success()); | 2355 stream()->Add(" n%p -> n%p;\n", that, successor); |
| 2356 Visit(successor); | |
| 1842 } | 2357 } |
| 1843 | 2358 |
| 1844 | 2359 |
| 1845 class DispatchTableDumper { | 2360 class DispatchTableDumper { |
| 1846 public: | 2361 public: |
| 1847 explicit DispatchTableDumper(StringStream* stream) : stream_(stream) { } | 2362 explicit DispatchTableDumper(StringStream* stream) : stream_(stream) { } |
| 1848 void Call(uc16 key, DispatchTable::Entry entry); | 2363 void Call(uc16 key, DispatchTable::Entry entry); |
| 1849 StringStream* stream() { return stream_; } | 2364 StringStream* stream() { return stream_; } |
| 1850 private: | 2365 private: |
| 1851 StringStream* stream_; | 2366 StringStream* stream_; |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1888 | 2403 |
| 1889 | 2404 |
| 1890 #endif // DEBUG | 2405 #endif // DEBUG |
| 1891 | 2406 |
| 1892 | 2407 |
| 1893 // ------------------------------------------------------------------- | 2408 // ------------------------------------------------------------------- |
| 1894 // Tree to graph conversion | 2409 // Tree to graph conversion |
| 1895 | 2410 |
| 1896 | 2411 |
| 1897 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, | 2412 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, |
| 1898 RegExpNode* on_success, | 2413 RegExpNode* on_success) { |
| 1899 RegExpNode* on_failure) { | |
| 1900 ZoneList<TextElement>* elms = new ZoneList<TextElement>(1); | 2414 ZoneList<TextElement>* elms = new ZoneList<TextElement>(1); |
| 1901 elms->Add(TextElement::Atom(this)); | 2415 elms->Add(TextElement::Atom(this)); |
| 1902 return new TextNode(elms, on_success, on_failure); | 2416 return new TextNode(elms, on_success); |
| 1903 } | 2417 } |
| 1904 | 2418 |
| 1905 | 2419 |
| 1906 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, | 2420 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, |
| 1907 RegExpNode* on_success, | 2421 RegExpNode* on_success) { |
| 1908 RegExpNode* on_failure) { | 2422 return new TextNode(elements(), on_success); |
| 1909 return new TextNode(elements(), on_success, on_failure); | |
| 1910 } | 2423 } |
| 1911 | 2424 |
| 1912 | 2425 |
| 1913 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, | 2426 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, |
| 1914 RegExpNode* on_success, | 2427 RegExpNode* on_success) { |
| 1915 RegExpNode* on_failure) { | |
| 1916 ZoneList<TextElement>* elms = new ZoneList<TextElement>(1); | 2428 ZoneList<TextElement>* elms = new ZoneList<TextElement>(1); |
| 1917 elms->Add(TextElement::CharClass(this)); | 2429 elms->Add(TextElement::CharClass(this)); |
| 1918 return new TextNode(elms, on_success, on_failure); | 2430 return new TextNode(elms, on_success); |
| 1919 } | 2431 } |
| 1920 | 2432 |
| 1921 | 2433 |
| 1922 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, | 2434 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, |
| 1923 RegExpNode* on_success, | 2435 RegExpNode* on_success) { |
| 1924 RegExpNode* on_failure) { | |
| 1925 ZoneList<RegExpTree*>* alternatives = this->alternatives(); | 2436 ZoneList<RegExpTree*>* alternatives = this->alternatives(); |
| 1926 int length = alternatives->length(); | 2437 int length = alternatives->length(); |
| 1927 ChoiceNode* result = new ChoiceNode(length, on_failure); | 2438 ChoiceNode* result = new ChoiceNode(length); |
| 1928 for (int i = 0; i < length; i++) { | 2439 for (int i = 0; i < length; i++) { |
| 1929 GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler, | 2440 GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler, |
| 1930 on_success, | 2441 on_success)); |
| 1931 on_failure)); | |
| 1932 result->AddAlternative(alternative); | 2442 result->AddAlternative(alternative); |
| 1933 } | 2443 } |
| 1934 return result; | 2444 return result; |
| 1935 } | 2445 } |
| 1936 | 2446 |
| 1937 | 2447 |
| 1938 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler, | 2448 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler, |
| 1939 RegExpNode* on_success, | 2449 RegExpNode* on_success) { |
| 1940 RegExpNode* on_failure) { | |
| 1941 return ToNode(min(), | 2450 return ToNode(min(), |
| 1942 max(), | 2451 max(), |
| 1943 is_greedy(), | 2452 is_greedy(), |
| 1944 body(), | 2453 body(), |
| 1945 compiler, | 2454 compiler, |
| 1946 on_success, | 2455 on_success); |
| 1947 on_failure); | |
| 1948 } | 2456 } |
| 1949 | 2457 |
| 1950 | 2458 |
| 1951 RegExpNode* RegExpQuantifier::ToNode(int min, | 2459 RegExpNode* RegExpQuantifier::ToNode(int min, |
| 1952 int max, | 2460 int max, |
| 1953 bool is_greedy, | 2461 bool is_greedy, |
| 1954 RegExpTree* body, | 2462 RegExpTree* body, |
| 1955 RegExpCompiler* compiler, | 2463 RegExpCompiler* compiler, |
| 1956 RegExpNode* on_success, | 2464 RegExpNode* on_success) { |
| 1957 RegExpNode* on_failure) { | |
| 1958 // x{f, t} becomes this: | 2465 // x{f, t} becomes this: |
| 1959 // | 2466 // |
| 1960 // (r++)<-. | 2467 // (r++)<-. |
| 1961 // | ` | 2468 // | ` |
| 1962 // | (x) | 2469 // | (x) |
| 1963 // v ^ | 2470 // v ^ |
| 1964 // (r=0)-->(?)---/ [if r < t] | 2471 // (r=0)-->(?)---/ [if r < t] |
| 1965 // | | 2472 // | |
| 1966 // [if r >= f] \----> ... | 2473 // [if r >= f] \----> ... |
| 1967 // | 2474 // |
| 1968 // | 2475 // |
| 1969 // TODO(someone): clear captures on repetition and handle empty | 2476 // TODO(someone): clear captures on repetition and handle empty |
| 1970 // matches. | 2477 // matches. |
| 1971 bool has_min = min > 0; | 2478 bool has_min = min > 0; |
| 1972 bool has_max = max < RegExpQuantifier::kInfinity; | 2479 bool has_max = max < RegExpQuantifier::kInfinity; |
| 1973 bool needs_counter = has_min || has_max; | 2480 bool needs_counter = has_min || has_max; |
| 1974 int reg_ctr = needs_counter ? compiler->AllocateRegister() : -1; | 2481 int reg_ctr = needs_counter ? compiler->AllocateRegister() : -1; |
| 1975 ChoiceNode* center = new ChoiceNode(2, on_failure); | 2482 ChoiceNode* center = new LoopChoiceNode(2); |
| 1976 RegExpNode* loop_return = needs_counter | 2483 RegExpNode* loop_return = needs_counter |
| 1977 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center)) | 2484 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center)) |
| 1978 : static_cast<RegExpNode*>(center); | 2485 : static_cast<RegExpNode*>(center); |
| 1979 RegExpNode* body_node = body->ToNode(compiler, loop_return, on_failure); | 2486 RegExpNode* body_node = body->ToNode(compiler, loop_return); |
| 1980 GuardedAlternative body_alt(body_node); | 2487 GuardedAlternative body_alt(body_node); |
| 1981 if (has_max) { | 2488 if (has_max) { |
| 1982 Guard* body_guard = new Guard(reg_ctr, Guard::LT, max); | 2489 Guard* body_guard = new Guard(reg_ctr, Guard::LT, max); |
| 1983 body_alt.AddGuard(body_guard); | 2490 body_alt.AddGuard(body_guard); |
| 1984 } | 2491 } |
| 1985 GuardedAlternative rest_alt(on_success); | 2492 GuardedAlternative rest_alt(on_success); |
| 1986 if (has_min) { | 2493 if (has_min) { |
| 1987 Guard* rest_guard = new Guard(reg_ctr, Guard::GEQ, min); | 2494 Guard* rest_guard = new Guard(reg_ctr, Guard::GEQ, min); |
| 1988 rest_alt.AddGuard(rest_guard); | 2495 rest_alt.AddGuard(rest_guard); |
| 1989 } | 2496 } |
| 1990 if (is_greedy) { | 2497 if (is_greedy) { |
| 1991 center->AddAlternative(body_alt); | 2498 center->AddAlternative(body_alt); |
| 1992 center->AddAlternative(rest_alt); | 2499 center->AddAlternative(rest_alt); |
| 1993 } else { | 2500 } else { |
| 1994 center->AddAlternative(rest_alt); | 2501 center->AddAlternative(rest_alt); |
| 1995 center->AddAlternative(body_alt); | 2502 center->AddAlternative(body_alt); |
| 1996 } | 2503 } |
| 1997 if (needs_counter) { | 2504 if (needs_counter) { |
| 1998 return ActionNode::StoreRegister(reg_ctr, 0, center); | 2505 return ActionNode::SetRegister(reg_ctr, 0, center); |
| 1999 } else { | 2506 } else { |
| 2000 return center; | 2507 return center; |
| 2001 } | 2508 } |
| 2002 } | 2509 } |
| 2003 | 2510 |
| 2004 | 2511 |
| 2005 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler, | 2512 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler, |
| 2006 RegExpNode* on_success, | 2513 RegExpNode* on_success) { |
| 2007 RegExpNode* on_failure) { | |
| 2008 NodeInfo info; | 2514 NodeInfo info; |
| 2009 switch (type()) { | 2515 switch (type()) { |
| 2010 case START_OF_LINE: | 2516 case START_OF_LINE: |
| 2011 info.follows_newline_interest = true; | 2517 info.follows_newline_interest = true; |
| 2012 break; | 2518 break; |
| 2013 case START_OF_INPUT: | 2519 case START_OF_INPUT: |
| 2014 info.follows_start_interest = true; | 2520 info.follows_start_interest = true; |
| 2015 break; | 2521 break; |
| 2016 case BOUNDARY: case NON_BOUNDARY: | 2522 case BOUNDARY: case NON_BOUNDARY: |
| 2017 info.follows_word_interest = true; | 2523 info.follows_word_interest = true; |
| 2018 break; | 2524 break; |
| 2019 case END_OF_INPUT: | 2525 case END_OF_INPUT: |
| 2020 info.at_end = true; | 2526 info.at_end = true; |
| 2021 break; | 2527 break; |
| 2022 case END_OF_LINE: | 2528 case END_OF_LINE: |
| 2023 // This is wrong but has the effect of making the compiler abort. | 2529 // This is wrong but has the effect of making the compiler abort. |
| 2024 info.at_end = true; | 2530 info.at_end = true; |
| 2025 } | 2531 } |
| 2026 return on_success->PropagateForward(&info); | 2532 return on_success->PropagateForward(&info); |
| 2027 } | 2533 } |
| 2028 | 2534 |
| 2029 | 2535 |
| 2030 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, | 2536 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, |
| 2031 RegExpNode* on_success, | 2537 RegExpNode* on_success) { |
| 2032 RegExpNode* on_failure) { | |
| 2033 return new BackReferenceNode(RegExpCapture::StartRegister(index()), | 2538 return new BackReferenceNode(RegExpCapture::StartRegister(index()), |
| 2034 RegExpCapture::EndRegister(index()), | 2539 RegExpCapture::EndRegister(index()), |
| 2035 on_success, | 2540 on_success); |
| 2036 on_failure); | |
| 2037 } | 2541 } |
| 2038 | 2542 |
| 2039 | 2543 |
| 2040 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, | 2544 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, |
| 2041 RegExpNode* on_success, | 2545 RegExpNode* on_success) { |
| 2042 RegExpNode* on_failure) { | |
| 2043 return on_success; | 2546 return on_success; |
| 2044 } | 2547 } |
| 2045 | 2548 |
| 2046 | 2549 |
| 2047 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, | 2550 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, |
| 2048 RegExpNode* on_success, | 2551 RegExpNode* on_success) { |
| 2049 RegExpNode* on_failure) { | |
| 2050 int stack_pointer_register = compiler->AllocateRegister(); | 2552 int stack_pointer_register = compiler->AllocateRegister(); |
| 2051 int position_register = compiler->AllocateRegister(); | 2553 int position_register = compiler->AllocateRegister(); |
| 2554 RegExpNode* success; | |
| 2052 if (is_positive()) { | 2555 if (is_positive()) { |
| 2053 // begin submatch scope | |
| 2054 // $reg = $pos | |
| 2055 // if [body] | |
| 2056 // then | |
| 2057 // $pos = $reg | |
| 2058 // escape submatch scope (drop all backtracks created in scope) | |
| 2059 // succeed | |
| 2060 // else | |
| 2061 // end submatch scope (nothing to clean up, just exit the scope) | |
| 2062 // fail | |
| 2063 return ActionNode::BeginSubmatch( | 2556 return ActionNode::BeginSubmatch( |
| 2064 stack_pointer_register, | 2557 stack_pointer_register, |
| 2065 position_register, | 2558 position_register, |
| 2066 body()->ToNode( | 2559 body()->ToNode( |
| 2067 compiler, | 2560 compiler, |
| 2068 ActionNode::EscapeSubmatch( | 2561 ActionNode::PositiveSubmatchSuccess(stack_pointer_register, |
| 2069 stack_pointer_register, | 2562 position_register, |
| 2070 true, // Also restore input position. | 2563 on_success))); |
| 2071 position_register, | |
| 2072 on_success), | |
| 2073 on_failure)); | |
| 2074 } else { | 2564 } else { |
| 2075 // begin submatch scope | 2565 // We use a ChoiceNode for a negative lookahead because it has most of |
| 2076 // try | 2566 // the characteristics we need. It has the body of the lookahead as its |
| 2077 // first if (body) | 2567 // first alternative and the expression after the lookahead of the second |
| 2078 // then | 2568 // alternative. If the first alternative succeeds then the |
| 2079 // escape submatch scope | 2569 // NegativeSubmatchSuccess will unwind the stack including everything the |
| 2080 // fail | 2570 // choice node set up and backtrack. If the first alternative fails then |
| 2081 // else | 2571 // the second alternative is tried, which is exactly the desired result |
| 2082 // backtrack | 2572 // for a negative lookahead. In the case where the dispatch table |
| 2083 // second | 2573 // determines that the first alternative cannot match we will save time |
| 2084 // end submatch scope | 2574 // by not trying it. Things are not quite so well-optimized if the |
| 2085 // restore current position | 2575 // dispatch table determines that the second alternative cannot match. |
| 2086 // succeed | 2576 // In this case we could optimize by immediately backtracking. |
| 2087 ChoiceNode* try_node = | 2577 ChoiceNode* choice_node = new ChoiceNode(2); |
| 2088 new ChoiceNode(1, ActionNode::RestorePosition(position_register, | 2578 GuardedAlternative body_alt( |
| 2089 on_success)); | 2579 body()->ToNode( |
| 2090 RegExpNode* body_node = body()->ToNode( | 2580 compiler, |
| 2091 compiler, | 2581 success = new NegativeSubmatchSuccess(stack_pointer_register, |
| 2092 ActionNode::EscapeSubmatch(stack_pointer_register, | 2582 position_register))); |
| 2093 false, // Don't also restore position | 2583 choice_node->AddAlternative(body_alt); |
| 2094 0, // Unused arguments. | 2584 choice_node->AddAlternative(GuardedAlternative(on_success)); |
| 2095 on_failure), | |
| 2096 compiler->backtrack()); | |
| 2097 GuardedAlternative body_alt(body_node); | |
| 2098 try_node->AddAlternative(body_alt); | |
| 2099 return ActionNode::BeginSubmatch(stack_pointer_register, | 2585 return ActionNode::BeginSubmatch(stack_pointer_register, |
| 2100 position_register, | 2586 position_register, |
| 2101 try_node); | 2587 choice_node); |
| 2102 } | 2588 } |
| 2103 } | 2589 } |
| 2104 | 2590 |
| 2105 | 2591 |
| 2106 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, | 2592 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, |
| 2107 RegExpNode* on_success, | 2593 RegExpNode* on_success) { |
| 2108 RegExpNode* on_failure) { | 2594 return ToNode(body(), index(), compiler, on_success); |
| 2109 return ToNode(body(), index(), compiler, on_success, on_failure); | |
| 2110 } | 2595 } |
| 2111 | 2596 |
| 2112 | 2597 |
| 2113 RegExpNode* RegExpCapture::ToNode(RegExpTree* body, | 2598 RegExpNode* RegExpCapture::ToNode(RegExpTree* body, |
| 2114 int index, | 2599 int index, |
| 2115 RegExpCompiler* compiler, | 2600 RegExpCompiler* compiler, |
| 2116 RegExpNode* on_success, | 2601 RegExpNode* on_success) { |
| 2117 RegExpNode* on_failure) { | |
| 2118 int start_reg = RegExpCapture::StartRegister(index); | 2602 int start_reg = RegExpCapture::StartRegister(index); |
| 2119 int end_reg = RegExpCapture::EndRegister(index); | 2603 int end_reg = RegExpCapture::EndRegister(index); |
| 2120 RegExpNode* store_end = ActionNode::StorePosition(end_reg, on_success); | 2604 RegExpNode* store_end = ActionNode::StorePosition(end_reg, on_success); |
| 2121 RegExpNode* body_node = body->ToNode(compiler, store_end, on_failure); | 2605 RegExpNode* body_node = body->ToNode(compiler, store_end); |
| 2122 return ActionNode::StorePosition(start_reg, body_node); | 2606 return ActionNode::StorePosition(start_reg, body_node); |
| 2123 } | 2607 } |
| 2124 | 2608 |
| 2125 | 2609 |
| 2126 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler, | 2610 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler, |
| 2127 RegExpNode* on_success, | 2611 RegExpNode* on_success) { |
| 2128 RegExpNode* on_failure) { | |
| 2129 ZoneList<RegExpTree*>* children = nodes(); | 2612 ZoneList<RegExpTree*>* children = nodes(); |
| 2130 RegExpNode* current = on_success; | 2613 RegExpNode* current = on_success; |
| 2131 for (int i = children->length() - 1; i >= 0; i--) { | 2614 for (int i = children->length() - 1; i >= 0; i--) { |
| 2132 current = children->at(i)->ToNode(compiler, current, on_failure); | 2615 current = children->at(i)->ToNode(compiler, current); |
| 2133 } | 2616 } |
| 2134 return current; | 2617 return current; |
| 2135 } | 2618 } |
| 2136 | 2619 |
| 2137 | 2620 |
| 2138 static const int kSpaceRangeCount = 20; | 2621 static const int kSpaceRangeCount = 20; |
| 2139 static const uc16 kSpaceRanges[kSpaceRangeCount] = { | 2622 static const uc16 kSpaceRanges[kSpaceRangeCount] = { |
| 2140 0x0009, 0x000D, 0x0020, 0x0020, 0x00A0, 0x00A0, 0x1680, | 2623 0x0009, 0x000D, 0x0020, 0x0020, 0x00A0, 0x00A0, 0x1680, |
| 2141 0x1680, 0x180E, 0x180E, 0x2000, 0x200A, 0x2028, 0x2029, | 2624 0x1680, 0x180E, 0x180E, 0x2000, 0x200A, 0x2028, 0x2029, |
| 2142 0x202F, 0x202F, 0x205F, 0x205F, 0x3000, 0x3000 | 2625 0x202F, 0x202F, 0x205F, 0x205F, 0x3000, 0x3000 |
| (...skipping 250 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2393 bool cloned = false; | 2876 bool cloned = false; |
| 2394 return RegExpNode::EnsureSibling(node, &full_info, &cloned); | 2877 return RegExpNode::EnsureSibling(node, &full_info, &cloned); |
| 2395 } | 2878 } |
| 2396 | 2879 |
| 2397 | 2880 |
| 2398 RegExpNode* ActionNode::PropagateForward(NodeInfo* info) { | 2881 RegExpNode* ActionNode::PropagateForward(NodeInfo* info) { |
| 2399 NodeInfo full_info(*this->info()); | 2882 NodeInfo full_info(*this->info()); |
| 2400 full_info.AddFromPreceding(info); | 2883 full_info.AddFromPreceding(info); |
| 2401 bool cloned = false; | 2884 bool cloned = false; |
| 2402 ActionNode* action = EnsureSibling(this, &full_info, &cloned); | 2885 ActionNode* action = EnsureSibling(this, &full_info, &cloned); |
| 2403 if (cloned && type_ != ESCAPE_SUBMATCH) { | 2886 action->set_on_success(action->on_success()->PropagateForward(info)); |
| 2404 action->set_on_success(action->on_success()->PropagateForward(info)); | |
| 2405 } | |
| 2406 return action; | 2887 return action; |
| 2407 } | 2888 } |
| 2408 | 2889 |
| 2409 | 2890 |
| 2410 RegExpNode* ChoiceNode::PropagateForward(NodeInfo* info) { | 2891 RegExpNode* ChoiceNode::PropagateForward(NodeInfo* info) { |
| 2411 NodeInfo full_info(*this->info()); | 2892 NodeInfo full_info(*this->info()); |
| 2412 full_info.AddFromPreceding(info); | 2893 full_info.AddFromPreceding(info); |
| 2413 bool cloned = false; | 2894 bool cloned = false; |
| 2414 ChoiceNode* choice = EnsureSibling(this, &full_info, &cloned); | 2895 ChoiceNode* choice = EnsureSibling(this, &full_info, &cloned); |
| 2415 if (cloned) { | 2896 if (cloned) { |
| 2416 ZoneList<GuardedAlternative>* old_alternatives = alternatives(); | 2897 ZoneList<GuardedAlternative>* old_alternatives = alternatives(); |
| 2417 int count = old_alternatives->length(); | 2898 int count = old_alternatives->length(); |
| 2418 choice->alternatives_ = new ZoneList<GuardedAlternative>(count); | 2899 choice->alternatives_ = new ZoneList<GuardedAlternative>(count); |
| 2419 for (int i = 0; i < count; i++) { | 2900 for (int i = 0; i < count; i++) { |
| 2420 GuardedAlternative alternative = old_alternatives->at(i); | 2901 GuardedAlternative alternative = old_alternatives->at(i); |
| 2421 alternative.set_node(alternative.node()->PropagateForward(info)); | 2902 alternative.set_node(alternative.node()->PropagateForward(info)); |
| 2422 choice->alternatives()->Add(alternative); | 2903 choice->alternatives()->Add(alternative); |
| 2423 } | 2904 } |
| 2424 if (!choice->on_failure_->IsBacktrack()) { | |
| 2425 choice->on_failure_ = choice->on_failure_->PropagateForward(info); | |
| 2426 } | |
| 2427 } | 2905 } |
| 2428 return choice; | 2906 return choice; |
| 2429 } | 2907 } |
| 2430 | 2908 |
| 2431 | 2909 |
| 2432 RegExpNode* EndNode::PropagateForward(NodeInfo* info) { | 2910 RegExpNode* EndNode::PropagateForward(NodeInfo* info) { |
| 2433 return PropagateToEndpoint(this, info); | 2911 return PropagateToEndpoint(this, info); |
| 2434 } | 2912 } |
| 2435 | 2913 |
| 2436 | 2914 |
| (...skipping 180 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2617 that->info()->being_analyzed = false; | 3095 that->info()->being_analyzed = false; |
| 2618 that->info()->been_analyzed = true; | 3096 that->info()->been_analyzed = true; |
| 2619 } | 3097 } |
| 2620 | 3098 |
| 2621 | 3099 |
| 2622 void Analysis::VisitEnd(EndNode* that) { | 3100 void Analysis::VisitEnd(EndNode* that) { |
| 2623 // nothing to do | 3101 // nothing to do |
| 2624 } | 3102 } |
| 2625 | 3103 |
| 2626 | 3104 |
| 3105 void TextNode::CalculateOffsets() { | |
| 3106 int element_count = elements()->length(); | |
| 3107 // Set up the offsets of the elements relative to the start. This is a fixed | |
| 3108 // quantity since a TextNode can only contain fixed-width things. | |
| 3109 int cp_offset = 0; | |
| 3110 for (int i = 0; i < element_count; i++) { | |
| 3111 TextElement& elm = elements()->at(i); | |
| 3112 elm.cp_offset = cp_offset; | |
| 3113 if (elm.type == TextElement::ATOM) { | |
| 3114 cp_offset += elm.data.u_atom->data().length(); | |
| 3115 } else { | |
| 3116 cp_offset++; | |
| 3117 Vector<const uc16> quarks = elm.data.u_atom->data(); | |
| 3118 } | |
| 3119 } | |
| 3120 } | |
| 3121 | |
| 3122 | |
| 2627 void Analysis::VisitText(TextNode* that) { | 3123 void Analysis::VisitText(TextNode* that) { |
| 2628 if (ignore_case_) { | 3124 if (ignore_case_) { |
| 2629 that->MakeCaseIndependent(); | 3125 that->MakeCaseIndependent(); |
| 2630 } | 3126 } |
| 2631 EnsureAnalyzed(that->on_success()); | 3127 EnsureAnalyzed(that->on_success()); |
| 2632 EnsureAnalyzed(that->on_failure()); | |
| 2633 NodeInfo* info = that->info(); | 3128 NodeInfo* info = that->info(); |
| 2634 NodeInfo* next_info = that->on_success()->info(); | 3129 NodeInfo* next_info = that->on_success()->info(); |
| 2635 // If the following node is interested in what it follows then this | 3130 // If the following node is interested in what it follows then this |
| 2636 // node must determine it. | 3131 // node must determine it. |
| 2637 info->determine_newline = next_info->follows_newline_interest; | 3132 info->determine_newline = next_info->follows_newline_interest; |
| 2638 info->determine_word = next_info->follows_word_interest; | 3133 info->determine_word = next_info->follows_word_interest; |
| 2639 info->determine_start = next_info->follows_start_interest; | 3134 info->determine_start = next_info->follows_start_interest; |
| 3135 that->CalculateOffsets(); | |
| 2640 } | 3136 } |
| 2641 | 3137 |
| 2642 | 3138 |
| 2643 void Analysis::VisitAction(ActionNode* that) { | 3139 void Analysis::VisitAction(ActionNode* that) { |
| 2644 EnsureAnalyzed(that->on_success()); | 3140 RegExpNode* target = that->on_success(); |
| 3141 EnsureAnalyzed(target); | |
| 2645 // If the next node is interested in what it follows then this node | 3142 // If the next node is interested in what it follows then this node |
| 2646 // has to be interested too so it can pass the information on. | 3143 // has to be interested too so it can pass the information on. |
| 2647 that->info()->AddFromFollowing(that->on_success()->info()); | 3144 that->info()->AddFromFollowing(target->info()); |
| 2648 } | 3145 } |
| 2649 | 3146 |
| 2650 | 3147 |
| 2651 void Analysis::VisitChoice(ChoiceNode* that) { | 3148 void Analysis::VisitChoice(ChoiceNode* that) { |
| 2652 NodeInfo* info = that->info(); | 3149 NodeInfo* info = that->info(); |
| 2653 for (int i = 0; i < that->alternatives()->length(); i++) { | 3150 for (int i = 0; i < that->alternatives()->length(); i++) { |
| 2654 RegExpNode* node = that->alternatives()->at(i).node(); | 3151 RegExpNode* node = that->alternatives()->at(i).node(); |
| 2655 EnsureAnalyzed(node); | 3152 EnsureAnalyzed(node); |
| 2656 // Anything the following nodes need to know has to be known by | 3153 // Anything the following nodes need to know has to be known by |
| 2657 // this node also, so it can pass it on. | 3154 // this node also, so it can pass it on. |
| 2658 info->AddFromFollowing(node->info()); | 3155 info->AddFromFollowing(node->info()); |
| 2659 } | 3156 } |
| 2660 EnsureAnalyzed(that->on_failure()); | |
| 2661 } | 3157 } |
| 2662 | 3158 |
| 2663 | 3159 |
| 2664 void Analysis::VisitBackReference(BackReferenceNode* that) { | 3160 void Analysis::VisitBackReference(BackReferenceNode* that) { |
| 2665 EnsureAnalyzed(that->on_success()); | 3161 EnsureAnalyzed(that->on_success()); |
| 2666 EnsureAnalyzed(that->on_failure()); | |
| 2667 } | 3162 } |
| 2668 | 3163 |
| 2669 | 3164 |
| 2670 // ------------------------------------------------------------------- | 3165 // ------------------------------------------------------------------- |
| 2671 // Assumption expansion | 3166 // Assumption expansion |
| 2672 | 3167 |
| 2673 | 3168 |
| 2674 RegExpNode* RegExpNode::EnsureExpanded(NodeInfo* info) { | 3169 RegExpNode* RegExpNode::EnsureExpanded(NodeInfo* info) { |
| 2675 siblings_.Ensure(this); | 3170 siblings_.Ensure(this); |
| 2676 NodeInfo new_info = *this->info(); | 3171 NodeInfo new_info = *this->info(); |
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2739 if (non_word == NULL) { | 3234 if (non_word == NULL) { |
| 2740 // This node contains no non-word characters so it must be | 3235 // This node contains no non-word characters so it must be |
| 2741 // all word. | 3236 // all word. |
| 2742 this->info()->is_word = NodeInfo::TRUE; | 3237 this->info()->is_word = NodeInfo::TRUE; |
| 2743 } else if (word == NULL) { | 3238 } else if (word == NULL) { |
| 2744 // Vice versa. | 3239 // Vice versa. |
| 2745 this->info()->is_word = NodeInfo::FALSE; | 3240 this->info()->is_word = NodeInfo::FALSE; |
| 2746 } else { | 3241 } else { |
| 2747 // If this character class contains both word and non-word | 3242 // If this character class contains both word and non-word |
| 2748 // characters we need to split it into two. | 3243 // characters we need to split it into two. |
| 2749 ChoiceNode* result = new ChoiceNode(2, on_failure()); | 3244 ChoiceNode* result = new ChoiceNode(2); |
| 2750 // Welcome to the family, son! | 3245 // Welcome to the family, son! |
| 2751 result->set_siblings(this->siblings()); | 3246 result->set_siblings(this->siblings()); |
| 2752 *result->info() = *this->info(); | 3247 *result->info() = *this->info(); |
| 2753 result->info()->ResetCompilationState(); | 3248 result->info()->ResetCompilationState(); |
| 2754 result->info()->AddAssumptions(info); | 3249 result->info()->AddAssumptions(info); |
| 2755 RegExpNode* word_node | 3250 RegExpNode* word_node |
| 2756 = new TextNode(new RegExpCharacterClass(word, false), | 3251 = new TextNode(new RegExpCharacterClass(word, false), |
| 2757 on_success(), | 3252 on_success()); |
| 2758 on_failure()); | |
| 2759 word_node->info()->determine_word = true; | 3253 word_node->info()->determine_word = true; |
| 2760 word_node->info()->does_determine_word = true; | 3254 word_node->info()->does_determine_word = true; |
| 2761 word_node->info()->is_word = NodeInfo::TRUE; | 3255 word_node->info()->is_word = NodeInfo::TRUE; |
| 2762 result->alternatives()->Add(GuardedAlternative(word_node)); | 3256 result->alternatives()->Add(GuardedAlternative(word_node)); |
| 2763 RegExpNode* non_word_node | 3257 RegExpNode* non_word_node |
| 2764 = new TextNode(new RegExpCharacterClass(non_word, false), | 3258 = new TextNode(new RegExpCharacterClass(non_word, false), |
| 2765 on_success(), | 3259 on_success()); |
| 2766 on_failure()); | |
| 2767 non_word_node->info()->determine_word = true; | 3260 non_word_node->info()->determine_word = true; |
| 2768 non_word_node->info()->does_determine_word = true; | 3261 non_word_node->info()->does_determine_word = true; |
| 2769 non_word_node->info()->is_word = NodeInfo::FALSE; | 3262 non_word_node->info()->is_word = NodeInfo::FALSE; |
| 2770 result->alternatives()->Add(GuardedAlternative(non_word_node)); | 3263 result->alternatives()->Add(GuardedAlternative(non_word_node)); |
| 2771 return result; | 3264 return result; |
| 2772 } | 3265 } |
| 2773 } | 3266 } |
| 2774 } | 3267 } |
| 2775 TextNode* clone = this->Clone(); | 3268 TextNode* clone = this->Clone(); |
| 2776 clone->info()->ResetCompilationState(); | 3269 clone->info()->ResetCompilationState(); |
| (...skipping 190 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2967 break; | 3460 break; |
| 2968 } | 3461 } |
| 2969 default: { | 3462 default: { |
| 2970 UNIMPLEMENTED(); | 3463 UNIMPLEMENTED(); |
| 2971 } | 3464 } |
| 2972 } | 3465 } |
| 2973 } | 3466 } |
| 2974 | 3467 |
| 2975 | 3468 |
| 2976 void DispatchTableConstructor::VisitAction(ActionNode* that) { | 3469 void DispatchTableConstructor::VisitAction(ActionNode* that) { |
| 2977 that->on_success()->Accept(this); | 3470 RegExpNode* target = that->on_success(); |
| 3471 target->Accept(this); | |
| 2978 } | 3472 } |
| 2979 | 3473 |
| 2980 | 3474 |
| 2981 Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input, | 3475 Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input, |
| 2982 RegExpNode** node_return, | 3476 RegExpNode** node_return, |
| 2983 bool ignore_case, | 3477 bool ignore_case, |
| 2984 bool is_multiline) { | 3478 bool is_multiline, |
| 3479 Handle<String> pattern) { | |
| 2985 RegExpCompiler compiler(input->capture_count, ignore_case); | 3480 RegExpCompiler compiler(input->capture_count, ignore_case); |
| 2986 // Wrap the body of the regexp in capture #0. | 3481 // Wrap the body of the regexp in capture #0. |
| 2987 RegExpNode* captured_body = RegExpCapture::ToNode(input->tree, | 3482 RegExpNode* captured_body = RegExpCapture::ToNode(input->tree, |
| 2988 0, | 3483 0, |
| 2989 &compiler, | 3484 &compiler, |
| 2990 compiler.accept(), | 3485 compiler.accept()); |
| 2991 compiler.backtrack()); | |
| 2992 // Add a .*? at the beginning, outside the body capture. | 3486 // Add a .*? at the beginning, outside the body capture. |
| 2993 // Note: We could choose to not add this if the regexp is anchored at | 3487 // Note: We could choose to not add this if the regexp is anchored at |
| 2994 // the start of the input but I'm not sure how best to do that and | 3488 // the start of the input but I'm not sure how best to do that and |
| 2995 // since we don't even handle ^ yet I'm saving that optimization for | 3489 // since we don't even handle ^ yet I'm saving that optimization for |
| 2996 // later. | 3490 // later. |
| 2997 RegExpNode* node = RegExpQuantifier::ToNode(0, | 3491 RegExpNode* node = RegExpQuantifier::ToNode(0, |
| 2998 RegExpQuantifier::kInfinity, | 3492 RegExpQuantifier::kInfinity, |
| 2999 false, | 3493 false, |
| 3000 new RegExpCharacterClass('*'), | 3494 new RegExpCharacterClass('*'), |
| 3001 &compiler, | 3495 &compiler, |
| 3002 captured_body, | 3496 captured_body); |
| 3003 compiler.backtrack()); | |
| 3004 if (node_return != NULL) *node_return = node; | 3497 if (node_return != NULL) *node_return = node; |
| 3005 Analysis analysis(ignore_case); | 3498 Analysis analysis(ignore_case); |
| 3006 analysis.EnsureAnalyzed(node); | 3499 analysis.EnsureAnalyzed(node); |
| 3007 | 3500 |
| 3008 NodeInfo info = *node->info(); | 3501 NodeInfo info = *node->info(); |
| 3009 node = node->EnsureExpanded(&info); | 3502 node = node->EnsureExpanded(&info); |
| 3010 | 3503 |
| 3011 if (!FLAG_irregexp) { | 3504 if (!FLAG_irregexp) { |
| 3012 return Handle<FixedArray>::null(); | 3505 return Handle<FixedArray>::null(); |
| 3013 } | 3506 } |
| 3014 | 3507 |
| 3015 if (is_multiline && !FLAG_attempt_multiline_irregexp) { | 3508 if (is_multiline && !FLAG_attempt_multiline_irregexp) { |
| 3016 return Handle<FixedArray>::null(); | 3509 return Handle<FixedArray>::null(); |
| 3017 } | 3510 } |
| 3018 | 3511 |
| 3019 if (FLAG_irregexp_native) { | 3512 if (FLAG_irregexp_native) { |
| 3020 #ifdef ARM | 3513 #ifdef ARM |
| 3021 // Unimplemented, fall-through to bytecode implementation. | 3514 // Unimplemented, fall-through to bytecode implementation. |
| 3022 #else // IA32 | 3515 #else // IA32 |
| 3023 RegExpMacroAssemblerIA32 macro_assembler(RegExpMacroAssemblerIA32::UC16, | 3516 RegExpMacroAssemblerIA32 macro_assembler(RegExpMacroAssemblerIA32::UC16, |
| 3024 (input->capture_count + 1) * 2); | 3517 (input->capture_count + 1) * 2); |
| 3025 return compiler.Assemble(¯o_assembler, | 3518 return compiler.Assemble(¯o_assembler, |
| 3026 node, | 3519 node, |
| 3027 input->capture_count); | 3520 input->capture_count, |
| 3521 pattern); | |
| 3028 #endif | 3522 #endif |
| 3029 } | 3523 } |
| 3030 EmbeddedVector<byte, 1024> codes; | 3524 EmbeddedVector<byte, 1024> codes; |
| 3031 RegExpMacroAssemblerIrregexp macro_assembler(codes); | 3525 RegExpMacroAssemblerIrregexp macro_assembler(codes); |
| 3032 return compiler.Assemble(¯o_assembler, | 3526 return compiler.Assemble(¯o_assembler, |
| 3033 node, | 3527 node, |
| 3034 input->capture_count); | 3528 input->capture_count, |
| 3529 pattern); | |
| 3035 } | 3530 } |
| 3036 | 3531 |
| 3037 | 3532 |
| 3038 }} // namespace v8::internal | 3533 }} // namespace v8::internal |
| OLD | NEW |