| OLD | NEW |
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 857 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 868 TextElement TextElement::CharClass( | 868 TextElement TextElement::CharClass( |
| 869 RegExpCharacterClass* char_class) { | 869 RegExpCharacterClass* char_class) { |
| 870 TextElement result = TextElement(CHAR_CLASS); | 870 TextElement result = TextElement(CHAR_CLASS); |
| 871 result.data.u_char_class = char_class; | 871 result.data.u_char_class = char_class; |
| 872 return result; | 872 return result; |
| 873 } | 873 } |
| 874 | 874 |
| 875 | 875 |
| 876 class RegExpCompiler { | 876 class RegExpCompiler { |
| 877 public: | 877 public: |
| 878 explicit RegExpCompiler(int capture_count); | 878 RegExpCompiler(int capture_count, bool ignore_case); |
| 879 | 879 |
| 880 int AllocateRegister() { return next_register_++; } | 880 int AllocateRegister() { return next_register_++; } |
| 881 | 881 |
| 882 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler, | 882 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler, |
| 883 RegExpNode* start, | 883 RegExpNode* start, |
| 884 int capture_count, | 884 int capture_count); |
| 885 bool case_independent); | |
| 886 | 885 |
| 887 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } | 886 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } |
| 888 | 887 |
| 889 static const int kImplementationOffset = 0; | 888 static const int kImplementationOffset = 0; |
| 890 static const int kNumberOfRegistersOffset = 0; | 889 static const int kNumberOfRegistersOffset = 0; |
| 891 static const int kCodeOffset = 1; | 890 static const int kCodeOffset = 1; |
| 892 | 891 |
| 893 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } | 892 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } |
| 894 EndNode* accept() { return accept_; } | 893 EndNode* accept() { return accept_; } |
| 895 EndNode* backtrack() { return backtrack_; } | 894 EndNode* backtrack() { return backtrack_; } |
| 896 | 895 |
| 897 static const int kMaxRecursion = 100; | 896 static const int kMaxRecursion = 100; |
| 898 inline int recursion_depth() { return recursion_depth_; } | 897 inline int recursion_depth() { return recursion_depth_; } |
| 899 inline void IncrementRecursionDepth() { recursion_depth_++; } | 898 inline void IncrementRecursionDepth() { recursion_depth_++; } |
| 900 inline void DecrementRecursionDepth() { recursion_depth_--; } | 899 inline void DecrementRecursionDepth() { recursion_depth_--; } |
| 901 | 900 |
| 901 inline bool is_case_independent() { return is_case_independent_; } |
| 902 |
| 902 private: | 903 private: |
| 903 EndNode* accept_; | 904 EndNode* accept_; |
| 904 EndNode* backtrack_; | 905 EndNode* backtrack_; |
| 905 int next_register_; | 906 int next_register_; |
| 906 List<RegExpNode*>* work_list_; | 907 List<RegExpNode*>* work_list_; |
| 907 int recursion_depth_; | 908 int recursion_depth_; |
| 908 RegExpMacroAssembler* macro_assembler_; | 909 RegExpMacroAssembler* macro_assembler_; |
| 910 bool is_case_independent_; |
| 909 }; | 911 }; |
| 910 | 912 |
| 911 | 913 |
| 912 // Attempts to compile the regexp using a Regexp2000 code generator. Returns | 914 // Attempts to compile the regexp using a Regexp2000 code generator. Returns |
| 913 // a fixed array or a null handle depending on whether it succeeded. | 915 // a fixed array or a null handle depending on whether it succeeded. |
| 914 RegExpCompiler::RegExpCompiler(int capture_count) | 916 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case) |
| 915 : next_register_(2 * (capture_count + 1)), | 917 : next_register_(2 * (capture_count + 1)), |
| 916 work_list_(NULL), | 918 work_list_(NULL), |
| 917 recursion_depth_(0) { | 919 recursion_depth_(0), |
| 920 is_case_independent_(ignore_case) { |
| 918 accept_ = new EndNode(EndNode::ACCEPT); | 921 accept_ = new EndNode(EndNode::ACCEPT); |
| 919 backtrack_ = new EndNode(EndNode::BACKTRACK); | 922 backtrack_ = new EndNode(EndNode::BACKTRACK); |
| 920 } | 923 } |
| 921 | 924 |
| 922 | 925 |
| 923 Handle<FixedArray> RegExpCompiler::Assemble( | 926 Handle<FixedArray> RegExpCompiler::Assemble( |
| 924 RegExpMacroAssembler* macro_assembler, | 927 RegExpMacroAssembler* macro_assembler, |
| 925 RegExpNode* start, | 928 RegExpNode* start, |
| 926 int capture_count, | 929 int capture_count) { |
| 927 bool case_independent) { | 930 if (!FLAG_attempt_case_independent && is_case_independent_) { |
| 928 if (case_independent) return Handle<FixedArray>::null(); | 931 return Handle<FixedArray>::null(); |
| 932 } |
| 929 macro_assembler_ = macro_assembler; | 933 macro_assembler_ = macro_assembler; |
| 930 List <RegExpNode*> work_list(0); | 934 List <RegExpNode*> work_list(0); |
| 931 work_list_ = &work_list; | 935 work_list_ = &work_list; |
| 932 Label fail; | 936 Label fail; |
| 933 macro_assembler->PushBacktrack(&fail); | 937 macro_assembler->PushBacktrack(&fail); |
| 934 if (!start->GoTo(this)) { | 938 if (!start->GoTo(this)) { |
| 935 fail.Unuse(); | 939 fail.Unuse(); |
| 936 return Handle<FixedArray>::null(); | 940 return Handle<FixedArray>::null(); |
| 937 } | 941 } |
| 938 while (!work_list.is_empty()) { | 942 while (!work_list.is_empty()) { |
| (...skipping 164 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1103 case Guard::LT: | 1107 case Guard::LT: |
| 1104 macro_assembler->IfRegisterGE(guard->reg(), guard->value(), on_failure); | 1108 macro_assembler->IfRegisterGE(guard->reg(), guard->value(), on_failure); |
| 1105 break; | 1109 break; |
| 1106 case Guard::GEQ: | 1110 case Guard::GEQ: |
| 1107 macro_assembler->IfRegisterLT(guard->reg(), guard->value(), on_failure); | 1111 macro_assembler->IfRegisterLT(guard->reg(), guard->value(), on_failure); |
| 1108 break; | 1112 break; |
| 1109 } | 1113 } |
| 1110 } | 1114 } |
| 1111 | 1115 |
| 1112 | 1116 |
| 1117 static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize; |
| 1118 static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange; |
| 1119 |
| 1120 |
| 1121 static inline void EmitAtomNonLetters( |
| 1122 RegExpMacroAssembler* macro_assembler, |
| 1123 TextElement elm, |
| 1124 Vector<const uc16> quarks, |
| 1125 Label* on_failure, |
| 1126 int cp_offset) { |
| 1127 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 1128 for (int i = quarks.length() - 1; i >= 0; i--) { |
| 1129 uc16 c = quarks[i]; |
| 1130 int length = uncanonicalize.get(c, '\0', chars); |
| 1131 if (length <= 1) { |
| 1132 macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure); |
| 1133 macro_assembler->CheckNotCharacter(c, on_failure); |
| 1134 } |
| 1135 } |
| 1136 } |
| 1137 |
| 1138 |
| 1139 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, |
| 1140 uc16 c1, |
| 1141 uc16 c2, |
| 1142 Label* on_failure) { |
| 1143 uc16 exor = c1 ^ c2; |
| 1144 // Check whether exor has only one bit set. |
| 1145 if (((exor - 1) & exor) == 0) { |
| 1146 // If c1 and c2 differ only by one bit. |
| 1147 // Ecma262UnCanonicalize always gives the highest number last. |
| 1148 ASSERT(c2 > c1); |
| 1149 macro_assembler->CheckNotCharacterAfterOr(c2, exor, on_failure); |
| 1150 return true; |
| 1151 } else { |
| 1152 ASSERT(c2 > c1); |
| 1153 uc16 diff = c2 - c1; |
| 1154 if (((diff - 1) & diff) == 0 && c1 >= diff) { |
| 1155 // If the characters differ by 2^n but don't differ by one bit then |
| 1156 // subtract the difference from the found character, then do the or |
| 1157 // trick. We avoid the theoretical case where negative numbers are |
| 1158 // involved in order to simplify code generation. |
| 1159 macro_assembler->CheckNotCharacterAfterMinusOr(c2 - diff, |
| 1160 diff, |
| 1161 on_failure); |
| 1162 return true; |
| 1163 } |
| 1164 } |
| 1165 return false; |
| 1166 } |
| 1167 |
| 1168 |
| 1169 static inline void EmitAtomLetters( |
| 1170 RegExpMacroAssembler* macro_assembler, |
| 1171 TextElement elm, |
| 1172 Vector<const uc16> quarks, |
| 1173 Label* on_failure, |
| 1174 int cp_offset) { |
| 1175 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 1176 for (int i = quarks.length() - 1; i >= 0; i--) { |
| 1177 uc16 c = quarks[i]; |
| 1178 int length = uncanonicalize.get(c, '\0', chars); |
| 1179 if (length <= 1) continue; |
| 1180 macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure); |
| 1181 Label ok; |
| 1182 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); |
| 1183 switch (length) { |
| 1184 case 2: { |
| 1185 if (ShortCutEmitCharacterPair(macro_assembler, |
| 1186 chars[0], |
| 1187 chars[1], |
| 1188 on_failure)) { |
| 1189 ok.Unuse(); |
| 1190 } else { |
| 1191 macro_assembler->CheckCharacter(chars[0], &ok); |
| 1192 macro_assembler->CheckNotCharacter(chars[1], on_failure); |
| 1193 macro_assembler->Bind(&ok); |
| 1194 } |
| 1195 break; |
| 1196 } |
| 1197 case 4: |
| 1198 macro_assembler->CheckCharacter(chars[3], &ok); |
| 1199 // Fall through! |
| 1200 case 3: |
| 1201 macro_assembler->CheckCharacter(chars[0], &ok); |
| 1202 macro_assembler->CheckCharacter(chars[1], &ok); |
| 1203 macro_assembler->CheckNotCharacter(chars[2], on_failure); |
| 1204 macro_assembler->Bind(&ok); |
| 1205 break; |
| 1206 default: |
| 1207 UNREACHABLE(); |
| 1208 break; |
| 1209 } |
| 1210 } |
| 1211 } |
| 1212 |
| 1213 |
| 1214 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, |
| 1215 RegExpCharacterClass* cc, |
| 1216 int cp_offset, |
| 1217 Label* on_failure) { |
| 1218 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure); |
| 1219 cp_offset++; |
| 1220 |
| 1221 ZoneList<CharacterRange>* ranges = cc->ranges(); |
| 1222 |
| 1223 Label success; |
| 1224 |
| 1225 Label *char_is_in_class = |
| 1226 cc->is_negated() ? on_failure : &success; |
| 1227 |
| 1228 int range_count = ranges->length(); |
| 1229 |
| 1230 if (range_count == 0) { |
| 1231 if (!cc->is_negated()) { |
| 1232 macro_assembler->GoTo(on_failure); |
| 1233 } |
| 1234 return; |
| 1235 } |
| 1236 |
| 1237 for (int i = 0; i < range_count - 1; i++) { |
| 1238 CharacterRange& range = ranges->at(i); |
| 1239 Label next_range; |
| 1240 uc16 from = range.from(); |
| 1241 uc16 to = range.to(); |
| 1242 if (to == from) { |
| 1243 macro_assembler->CheckCharacter(to, char_is_in_class); |
| 1244 } else { |
| 1245 if (from != 0) { |
| 1246 macro_assembler->CheckCharacterLT(from, &next_range); |
| 1247 } |
| 1248 if (to != 0xffff) { |
| 1249 macro_assembler->CheckCharacterLT(to + 1, char_is_in_class); |
| 1250 } else { |
| 1251 macro_assembler->GoTo(char_is_in_class); |
| 1252 } |
| 1253 } |
| 1254 macro_assembler->Bind(&next_range); |
| 1255 } |
| 1256 |
| 1257 CharacterRange& range = ranges->at(range_count - 1); |
| 1258 uc16 from = range.from(); |
| 1259 uc16 to = range.to(); |
| 1260 |
| 1261 if (to == from) { |
| 1262 if (cc->is_negated()) { |
| 1263 macro_assembler->CheckCharacter(to, on_failure); |
| 1264 } else { |
| 1265 macro_assembler->CheckNotCharacter(to, on_failure); |
| 1266 } |
| 1267 } else { |
| 1268 if (from != 0) { |
| 1269 if (!cc->is_negated()) { |
| 1270 macro_assembler->CheckCharacterLT(from, on_failure); |
| 1271 } else { |
| 1272 macro_assembler->CheckCharacterLT(from, &success); |
| 1273 } |
| 1274 } |
| 1275 if (to != 0xffff) { |
| 1276 if (!cc->is_negated()) { |
| 1277 macro_assembler->CheckCharacterGT(to, on_failure); |
| 1278 } else { |
| 1279 macro_assembler->CheckCharacterLT(to + 1, on_failure); |
| 1280 } |
| 1281 } else { |
| 1282 if (cc->is_negated()) { |
| 1283 macro_assembler->GoTo(on_failure); |
| 1284 } |
| 1285 } |
| 1286 } |
| 1287 macro_assembler->Bind(&success); |
| 1288 } |
| 1289 |
| 1290 |
| 1291 |
| 1113 bool TextNode::Emit(RegExpCompiler* compiler) { | 1292 bool TextNode::Emit(RegExpCompiler* compiler) { |
| 1114 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 1293 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
| 1115 Bind(macro_assembler); | 1294 Bind(macro_assembler); |
| 1116 int element_count = elms_->length(); | 1295 int element_count = elms_->length(); |
| 1117 int cp_offset = 0; | 1296 int cp_offset = 0; |
| 1297 // First, handle straight character matches. |
| 1118 for (int i = 0; i < element_count; i++) { | 1298 for (int i = 0; i < element_count; i++) { |
| 1119 TextElement elm = (*elms_)[i]; | 1299 TextElement elm = elms_->at(i); |
| 1120 switch (elm.type) { | 1300 if (elm.type == TextElement::ATOM) { |
| 1121 case TextElement::ATOM: { | 1301 Vector<const uc16> quarks = elm.data.u_atom->data(); |
| 1122 Vector<const uc16> quarks = elm.data.u_atom->data(); | 1302 if (!compiler->is_case_independent()) { |
| 1123 macro_assembler->CheckCharacters(quarks, | 1303 macro_assembler->CheckCharacters(quarks, |
| 1124 cp_offset, | 1304 cp_offset, |
| 1125 on_failure_->label()); | 1305 on_failure_->label()); |
| 1306 } else { |
| 1307 EmitAtomNonLetters(macro_assembler, elm, quarks, on_failure_->label(), c
p_offset); |
| 1308 } |
| 1309 cp_offset += quarks.length(); |
| 1310 } else { |
| 1311 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); |
| 1312 cp_offset++; |
| 1313 } |
| 1314 } |
| 1315 // Second, handle case independent letter matches if any. |
| 1316 if (compiler->is_case_independent()) { |
| 1317 cp_offset = 0; |
| 1318 for (int i = 0; i < element_count; i++) { |
| 1319 TextElement elm = elms_->at(i); |
| 1320 if (elm.type == TextElement::ATOM) { |
| 1321 Vector<const uc16> quarks = elm.data.u_atom->data(); |
| 1322 EmitAtomLetters(macro_assembler, elm, quarks, on_failure_->label(), cp_o
ffset); |
| 1126 cp_offset += quarks.length(); | 1323 cp_offset += quarks.length(); |
| 1127 break; | 1324 } else { |
| 1128 } | |
| 1129 case TextElement::CHAR_CLASS: { | |
| 1130 RegExpCharacterClass* cc = elm.data.u_char_class; | |
| 1131 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure_->label()); | |
| 1132 cp_offset++; | 1325 cp_offset++; |
| 1133 | 1326 } |
| 1134 ZoneList<CharacterRange>* ranges = cc->ranges(); | 1327 } |
| 1135 | 1328 } |
| 1136 Label success; | 1329 // If the fast character matches passed then do the character classes. |
| 1137 | 1330 cp_offset = 0; |
| 1138 Label *char_is_in_class = | 1331 for (int i = 0; i < element_count; i++) { |
| 1139 cc->is_negated() ? on_failure_->label() : &success; | 1332 TextElement elm = elms_->at(i); |
| 1140 | 1333 if (elm.type == TextElement::CHAR_CLASS) { |
| 1141 int range_count = ranges->length(); | 1334 RegExpCharacterClass* cc = elm.data.u_char_class; |
| 1142 | 1335 EmitCharClass(macro_assembler, cc, cp_offset, on_failure_->label()); |
| 1143 if (range_count == 0) { | 1336 cp_offset ++; |
| 1144 if (!cc->is_negated()) { | 1337 } else { |
| 1145 on_failure()->GoTo(compiler); | 1338 cp_offset += elm.data.u_atom->data().length(); |
| 1146 } | 1339 } |
| 1147 break; | 1340 } |
| 1148 } | 1341 |
| 1149 | |
| 1150 for (int i = 0; i < range_count - 1; i++) { | |
| 1151 CharacterRange& range = (*ranges)[i]; | |
| 1152 Label next_range; | |
| 1153 uc16 from = range.from(); | |
| 1154 uc16 to = range.to(); | |
| 1155 if (to == from) { | |
| 1156 macro_assembler->CheckCharacter(to, char_is_in_class); | |
| 1157 } else { | |
| 1158 if (from != 0) { | |
| 1159 macro_assembler->CheckCharacterLT(from, &next_range); | |
| 1160 } | |
| 1161 if (to != 0xffff) { | |
| 1162 macro_assembler->CheckCharacterLT(to + 1, char_is_in_class); | |
| 1163 } else { | |
| 1164 macro_assembler->GoTo(char_is_in_class); | |
| 1165 } | |
| 1166 } | |
| 1167 macro_assembler->Bind(&next_range); | |
| 1168 } | |
| 1169 | |
| 1170 if (range_count != 0) { | |
| 1171 CharacterRange& range = (*ranges)[range_count - 1]; | |
| 1172 uc16 from = range.from(); | |
| 1173 uc16 to = range.to(); | |
| 1174 | |
| 1175 if (to == from) { | |
| 1176 if (cc->is_negated()) { | |
| 1177 macro_assembler->CheckCharacter(to, on_failure_->label()); | |
| 1178 } else { | |
| 1179 macro_assembler->CheckNotCharacter(to, on_failure_->label()); | |
| 1180 } | |
| 1181 } else { | |
| 1182 if (from != 0) { | |
| 1183 if (!cc->is_negated()) { | |
| 1184 macro_assembler->CheckCharacterLT(from, on_failure_->label()); | |
| 1185 } else { | |
| 1186 macro_assembler->CheckCharacterLT(from, &success); | |
| 1187 } | |
| 1188 } | |
| 1189 if (to != 0xffff) { | |
| 1190 if (!cc->is_negated()) { | |
| 1191 macro_assembler->CheckCharacterGT(to, on_failure_->label()); | |
| 1192 } else { | |
| 1193 macro_assembler->CheckCharacterLT(to + 1, on_failure_->label()); | |
| 1194 } | |
| 1195 } else { | |
| 1196 if (cc->is_negated()) { | |
| 1197 macro_assembler->GoTo(on_failure_->label()); | |
| 1198 } | |
| 1199 } | |
| 1200 } | |
| 1201 } else if (cc->is_negated()) { | |
| 1202 macro_assembler->GoTo(on_failure_->label()); | |
| 1203 } | |
| 1204 | |
| 1205 macro_assembler->Bind(&success); | |
| 1206 | |
| 1207 break; | |
| 1208 } | |
| 1209 default: | |
| 1210 UNREACHABLE(); | |
| 1211 return false; | |
| 1212 } | |
| 1213 } | |
| 1214 compiler->AddWork(on_failure_); | 1342 compiler->AddWork(on_failure_); |
| 1215 macro_assembler->AdvanceCurrentPosition(cp_offset); | 1343 macro_assembler->AdvanceCurrentPosition(cp_offset); |
| 1216 return on_success()->GoTo(compiler); | 1344 return on_success()->GoTo(compiler); |
| 1217 } | 1345 } |
| 1218 | 1346 |
| 1219 | 1347 |
| 1220 bool ChoiceNode::Emit(RegExpCompiler* compiler) { | 1348 bool ChoiceNode::Emit(RegExpCompiler* compiler) { |
| 1221 int choice_count = alternatives_->length(); | 1349 int choice_count = alternatives_->length(); |
| 1222 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 1350 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
| 1223 Bind(macro_assembler); | 1351 Bind(macro_assembler); |
| 1224 // For now we just call all choices one after the other. The idea ultimately | 1352 // For now we just call all choices one after the other. The idea ultimately |
| 1225 // is to use the Dispatch table to try only the relevant ones. | 1353 // is to use the Dispatch table to try only the relevant ones. |
| 1226 int i; | 1354 int i; |
| 1227 for (i = 0; i < choice_count - 1; i++) { | 1355 for (i = 0; i < choice_count - 1; i++) { |
| 1228 GuardedAlternative alternative = (*alternatives_)[i]; | 1356 GuardedAlternative alternative = alternatives_->at(i); |
| 1229 Label after; | 1357 Label after; |
| 1230 Label after_no_pop_cp; | 1358 Label after_no_pop_cp; |
| 1231 ZoneList<Guard*>* guards = alternative.guards(); | 1359 ZoneList<Guard*>* guards = alternative.guards(); |
| 1232 if (guards != NULL) { | 1360 if (guards != NULL) { |
| 1233 int guard_count = guards->length(); | 1361 int guard_count = guards->length(); |
| 1234 for (int j = 0; j < guard_count; j++) { | 1362 for (int j = 0; j < guard_count; j++) { |
| 1235 GenerateGuard(macro_assembler, (*guards)[j], &after_no_pop_cp); | 1363 GenerateGuard(macro_assembler, guards->at(j), &after_no_pop_cp); |
| 1236 } | 1364 } |
| 1237 } | 1365 } |
| 1238 macro_assembler->PushCurrentPosition(); | 1366 macro_assembler->PushCurrentPosition(); |
| 1239 macro_assembler->PushBacktrack(&after); | 1367 macro_assembler->PushBacktrack(&after); |
| 1240 if (!alternative.node()->GoTo(compiler)) { | 1368 if (!alternative.node()->GoTo(compiler)) { |
| 1241 after.Unuse(); | 1369 after.Unuse(); |
| 1242 after_no_pop_cp.Unuse(); | 1370 after_no_pop_cp.Unuse(); |
| 1243 return false; | 1371 return false; |
| 1244 } | 1372 } |
| 1245 macro_assembler->Bind(&after); | 1373 macro_assembler->Bind(&after); |
| 1246 macro_assembler->PopCurrentPosition(); | 1374 macro_assembler->PopCurrentPosition(); |
| 1247 macro_assembler->Bind(&after_no_pop_cp); | 1375 macro_assembler->Bind(&after_no_pop_cp); |
| 1248 } | 1376 } |
| 1249 GuardedAlternative alternative = (*alternatives_)[i]; | 1377 GuardedAlternative alternative = alternatives_->at(i); |
| 1250 ZoneList<Guard*>* guards = alternative.guards(); | 1378 ZoneList<Guard*>* guards = alternative.guards(); |
| 1251 if (guards != NULL) { | 1379 if (guards != NULL) { |
| 1252 int guard_count = guards->length(); | 1380 int guard_count = guards->length(); |
| 1253 for (int j = 0; j < guard_count; j++) { | 1381 for (int j = 0; j < guard_count; j++) { |
| 1254 GenerateGuard(macro_assembler, (*guards)[j], on_failure_->label()); | 1382 GenerateGuard(macro_assembler, guards->at(j), on_failure_->label()); |
| 1255 } | 1383 } |
| 1256 } | 1384 } |
| 1257 if (!on_failure_->IsBacktrack()) { | 1385 if (!on_failure_->IsBacktrack()) { |
| 1258 ASSERT_NOT_NULL(on_failure_ -> label()); | 1386 ASSERT_NOT_NULL(on_failure_ -> label()); |
| 1259 macro_assembler->PushBacktrack(on_failure_->label()); | 1387 macro_assembler->PushBacktrack(on_failure_->label()); |
| 1260 compiler->AddWork(on_failure_); | 1388 compiler->AddWork(on_failure_); |
| 1261 } | 1389 } |
| 1262 if (!alternative.node()->GoTo(compiler)) { | 1390 if (!alternative.node()->GoTo(compiler)) { |
| 1263 return false; | 1391 return false; |
| 1264 } | 1392 } |
| (...skipping 660 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1925 // character. | 2053 // character. |
| 1926 case '*': | 2054 case '*': |
| 1927 ranges->Add(CharacterRange::Everything()); | 2055 ranges->Add(CharacterRange::Everything()); |
| 1928 break; | 2056 break; |
| 1929 default: | 2057 default: |
| 1930 UNREACHABLE(); | 2058 UNREACHABLE(); |
| 1931 } | 2059 } |
| 1932 } | 2060 } |
| 1933 | 2061 |
| 1934 | 2062 |
| 1935 static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize; | |
| 1936 static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange; | |
| 1937 | |
| 1938 | |
| 1939 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges) { | 2063 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges) { |
| 1940 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 2064 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 1941 if (IsSingleton()) { | 2065 if (IsSingleton()) { |
| 1942 // If this is a singleton we just expand the one character. | 2066 // If this is a singleton we just expand the one character. |
| 1943 int length = uncanonicalize.get(from(), '\0', chars); | 2067 int length = uncanonicalize.get(from(), '\0', chars); |
| 1944 for (int i = 0; i < length; i++) { | 2068 for (int i = 0; i < length; i++) { |
| 1945 uc32 chr = chars[i]; | 2069 uc32 chr = chars[i]; |
| 1946 if (chr != from()) { | 2070 if (chr != from()) { |
| 1947 ranges->Add(CharacterRange::Singleton(chars[i])); | 2071 ranges->Add(CharacterRange::Singleton(chars[i])); |
| 1948 } | 2072 } |
| (...skipping 456 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2405 | 2529 |
| 2406 | 2530 |
| 2407 void DispatchTableConstructor::VisitAction(ActionNode* that) { | 2531 void DispatchTableConstructor::VisitAction(ActionNode* that) { |
| 2408 that->on_success()->Accept(this); | 2532 that->on_success()->Accept(this); |
| 2409 } | 2533 } |
| 2410 | 2534 |
| 2411 | 2535 |
| 2412 Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input, | 2536 Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input, |
| 2413 RegExpNode** node_return, | 2537 RegExpNode** node_return, |
| 2414 bool ignore_case) { | 2538 bool ignore_case) { |
| 2415 RegExpCompiler compiler(input->capture_count); | 2539 RegExpCompiler compiler(input->capture_count, ignore_case); |
| 2416 // Wrap the body of the regexp in capture #0. | 2540 // Wrap the body of the regexp in capture #0. |
| 2417 RegExpNode* captured_body = RegExpCapture::ToNode(input->tree, | 2541 RegExpNode* captured_body = RegExpCapture::ToNode(input->tree, |
| 2418 0, | 2542 0, |
| 2419 &compiler, | 2543 &compiler, |
| 2420 compiler.accept(), | 2544 compiler.accept(), |
| 2421 compiler.backtrack()); | 2545 compiler.backtrack()); |
| 2422 // Add a .*? at the beginning, outside the body capture. | 2546 // Add a .*? at the beginning, outside the body capture. |
| 2423 // Note: We could choose to not add this if the regexp is anchored at | 2547 // Note: We could choose to not add this if the regexp is anchored at |
| 2424 // the start of the input but I'm not sure how best to do that and | 2548 // the start of the input but I'm not sure how best to do that and |
| 2425 // since we don't even handle ^ yet I'm saving that optimization for | 2549 // since we don't even handle ^ yet I'm saving that optimization for |
| (...skipping 12 matching lines...) Expand all Loading... |
| 2438 #if !(defined ARM || defined __arm__ || defined __thumb__) | 2562 #if !(defined ARM || defined __arm__ || defined __thumb__) |
| 2439 if (FLAG_re2k_native) { // Flag only checked in IA32 mode. | 2563 if (FLAG_re2k_native) { // Flag only checked in IA32 mode. |
| 2440 // TODO(lrn) Move compilation to a later point in the life-cycle | 2564 // TODO(lrn) Move compilation to a later point in the life-cycle |
| 2441 // of the RegExp. We don't know the type of input string yet. | 2565 // of the RegExp. We don't know the type of input string yet. |
| 2442 // For now, always assume two-byte strings. | 2566 // For now, always assume two-byte strings. |
| 2443 RegExpMacroAssemblerIA32 macro_assembler(RegExpMacroAssemblerIA32::UC16, | 2567 RegExpMacroAssemblerIA32 macro_assembler(RegExpMacroAssemblerIA32::UC16, |
| 2444 (input->capture_count + 1) * 2, | 2568 (input->capture_count + 1) * 2, |
| 2445 ignore_case); | 2569 ignore_case); |
| 2446 return compiler.Assemble(¯o_assembler, | 2570 return compiler.Assemble(¯o_assembler, |
| 2447 node, | 2571 node, |
| 2448 input->capture_count, | 2572 input->capture_count); |
| 2449 ignore_case); | |
| 2450 } | 2573 } |
| 2451 #endif | 2574 #endif |
| 2452 byte codes[1024]; | 2575 byte codes[1024]; |
| 2453 Re2kAssembler assembler(Vector<byte>(codes, 1024)); | 2576 Re2kAssembler assembler(Vector<byte>(codes, 1024)); |
| 2454 RegExpMacroAssemblerRe2k macro_assembler(&assembler); | 2577 RegExpMacroAssemblerRe2k macro_assembler(&assembler); |
| 2455 return compiler.Assemble(¯o_assembler, | 2578 return compiler.Assemble(¯o_assembler, |
| 2456 node, | 2579 node, |
| 2457 input->capture_count, | 2580 input->capture_count); |
| 2458 ignore_case); | |
| 2459 } | 2581 } |
| 2460 | 2582 |
| 2461 | 2583 |
| 2462 }} // namespace v8::internal | 2584 }} // namespace v8::internal |
| OLD | NEW |