OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/regexp/jsregexp.h" | 5 #include "src/regexp/jsregexp.h" |
6 | 6 |
7 #include "src/ast.h" | 7 #include "src/ast.h" |
8 #include "src/base/platform/platform.h" | 8 #include "src/base/platform/platform.h" |
9 #include "src/compilation-cache.h" | 9 #include "src/compilation-cache.h" |
10 #include "src/compiler.h" | 10 #include "src/compiler.h" |
(...skipping 984 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
995 void SetRegExpTooBig() { reg_exp_too_big_ = true; } | 995 void SetRegExpTooBig() { reg_exp_too_big_ = true; } |
996 | 996 |
997 inline bool ignore_case() { return ignore_case_; } | 997 inline bool ignore_case() { return ignore_case_; } |
998 inline bool one_byte() { return one_byte_; } | 998 inline bool one_byte() { return one_byte_; } |
999 inline bool optimize() { return optimize_; } | 999 inline bool optimize() { return optimize_; } |
1000 inline void set_optimize(bool value) { optimize_ = value; } | 1000 inline void set_optimize(bool value) { optimize_ = value; } |
1001 inline bool limiting_recursion() { return limiting_recursion_; } | 1001 inline bool limiting_recursion() { return limiting_recursion_; } |
1002 inline void set_limiting_recursion(bool value) { | 1002 inline void set_limiting_recursion(bool value) { |
1003 limiting_recursion_ = value; | 1003 limiting_recursion_ = value; |
1004 } | 1004 } |
1005 bool read_backward() { return read_backward_; } | |
1006 void set_read_backward(bool value) { read_backward_ = value; } | |
1007 FrequencyCollator* frequency_collator() { return &frequency_collator_; } | 1005 FrequencyCollator* frequency_collator() { return &frequency_collator_; } |
1008 | 1006 |
1009 int current_expansion_factor() { return current_expansion_factor_; } | 1007 int current_expansion_factor() { return current_expansion_factor_; } |
1010 void set_current_expansion_factor(int value) { | 1008 void set_current_expansion_factor(int value) { |
1011 current_expansion_factor_ = value; | 1009 current_expansion_factor_ = value; |
1012 } | 1010 } |
1013 | 1011 |
1014 Isolate* isolate() const { return isolate_; } | 1012 Isolate* isolate() const { return isolate_; } |
1015 Zone* zone() const { return zone_; } | 1013 Zone* zone() const { return zone_; } |
1016 | 1014 |
1017 static const int kNoRegister = -1; | 1015 static const int kNoRegister = -1; |
1018 | 1016 |
1019 private: | 1017 private: |
1020 EndNode* accept_; | 1018 EndNode* accept_; |
1021 int next_register_; | 1019 int next_register_; |
1022 List<RegExpNode*>* work_list_; | 1020 List<RegExpNode*>* work_list_; |
1023 int recursion_depth_; | 1021 int recursion_depth_; |
1024 RegExpMacroAssembler* macro_assembler_; | 1022 RegExpMacroAssembler* macro_assembler_; |
1025 bool ignore_case_; | 1023 bool ignore_case_; |
1026 bool one_byte_; | 1024 bool one_byte_; |
1027 bool reg_exp_too_big_; | 1025 bool reg_exp_too_big_; |
1028 bool limiting_recursion_; | 1026 bool limiting_recursion_; |
1029 bool optimize_; | 1027 bool optimize_; |
1030 bool read_backward_; | |
1031 int current_expansion_factor_; | 1028 int current_expansion_factor_; |
1032 FrequencyCollator frequency_collator_; | 1029 FrequencyCollator frequency_collator_; |
1033 Isolate* isolate_; | 1030 Isolate* isolate_; |
1034 Zone* zone_; | 1031 Zone* zone_; |
1035 }; | 1032 }; |
1036 | 1033 |
1037 | 1034 |
1038 class RecursionCheck { | 1035 class RecursionCheck { |
1039 public: | 1036 public: |
1040 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { | 1037 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { |
(...skipping 15 matching lines...) Expand all Loading... |
1056 RegExpCompiler::RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count, | 1053 RegExpCompiler::RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count, |
1057 bool ignore_case, bool one_byte) | 1054 bool ignore_case, bool one_byte) |
1058 : next_register_(2 * (capture_count + 1)), | 1055 : next_register_(2 * (capture_count + 1)), |
1059 work_list_(NULL), | 1056 work_list_(NULL), |
1060 recursion_depth_(0), | 1057 recursion_depth_(0), |
1061 ignore_case_(ignore_case), | 1058 ignore_case_(ignore_case), |
1062 one_byte_(one_byte), | 1059 one_byte_(one_byte), |
1063 reg_exp_too_big_(false), | 1060 reg_exp_too_big_(false), |
1064 limiting_recursion_(false), | 1061 limiting_recursion_(false), |
1065 optimize_(FLAG_regexp_optimization), | 1062 optimize_(FLAG_regexp_optimization), |
1066 read_backward_(false), | |
1067 current_expansion_factor_(1), | 1063 current_expansion_factor_(1), |
1068 frequency_collator_(), | 1064 frequency_collator_(), |
1069 isolate_(isolate), | 1065 isolate_(isolate), |
1070 zone_(zone) { | 1066 zone_(zone) { |
1071 accept_ = new(zone) EndNode(EndNode::ACCEPT, zone); | 1067 accept_ = new(zone) EndNode(EndNode::ACCEPT, zone); |
1072 DCHECK(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister); | 1068 DCHECK(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister); |
1073 } | 1069 } |
1074 | 1070 |
1075 | 1071 |
1076 RegExpEngine::CompilationResult RegExpCompiler::Assemble( | 1072 RegExpEngine::CompilationResult RegExpCompiler::Assemble( |
(...skipping 144 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1221 | 1217 |
1222 // The chronologically first deferred action in the trace | 1218 // The chronologically first deferred action in the trace |
1223 // is used to infer the action needed to restore a register | 1219 // is used to infer the action needed to restore a register |
1224 // to its previous state (or not, if it's safe to ignore it). | 1220 // to its previous state (or not, if it's safe to ignore it). |
1225 enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR }; | 1221 enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR }; |
1226 DeferredActionUndoType undo_action = IGNORE; | 1222 DeferredActionUndoType undo_action = IGNORE; |
1227 | 1223 |
1228 int value = 0; | 1224 int value = 0; |
1229 bool absolute = false; | 1225 bool absolute = false; |
1230 bool clear = false; | 1226 bool clear = false; |
1231 static const int kNoStore = kMinInt; | 1227 int store_position = -1; |
1232 int store_position = kNoStore; | |
1233 // This is a little tricky because we are scanning the actions in reverse | 1228 // This is a little tricky because we are scanning the actions in reverse |
1234 // historical order (newest first). | 1229 // historical order (newest first). |
1235 for (DeferredAction* action = actions_; | 1230 for (DeferredAction* action = actions_; |
1236 action != NULL; | 1231 action != NULL; |
1237 action = action->next()) { | 1232 action = action->next()) { |
1238 if (action->Mentions(reg)) { | 1233 if (action->Mentions(reg)) { |
1239 switch (action->action_type()) { | 1234 switch (action->action_type()) { |
1240 case ActionNode::SET_REGISTER: { | 1235 case ActionNode::SET_REGISTER: { |
1241 Trace::DeferredSetRegister* psr = | 1236 Trace::DeferredSetRegister* psr = |
1242 static_cast<Trace::DeferredSetRegister*>(action); | 1237 static_cast<Trace::DeferredSetRegister*>(action); |
1243 if (!absolute) { | 1238 if (!absolute) { |
1244 value += psr->value(); | 1239 value += psr->value(); |
1245 absolute = true; | 1240 absolute = true; |
1246 } | 1241 } |
1247 // SET_REGISTER is currently only used for newly introduced loop | 1242 // SET_REGISTER is currently only used for newly introduced loop |
1248 // counters. They can have a significant previous value if they | 1243 // counters. They can have a significant previous value if they |
1249 // occour in a loop. TODO(lrn): Propagate this information, so | 1244 // occour in a loop. TODO(lrn): Propagate this information, so |
1250 // we can set undo_action to IGNORE if we know there is no value to | 1245 // we can set undo_action to IGNORE if we know there is no value to |
1251 // restore. | 1246 // restore. |
1252 undo_action = RESTORE; | 1247 undo_action = RESTORE; |
1253 DCHECK_EQ(store_position, kNoStore); | 1248 DCHECK_EQ(store_position, -1); |
1254 DCHECK(!clear); | 1249 DCHECK(!clear); |
1255 break; | 1250 break; |
1256 } | 1251 } |
1257 case ActionNode::INCREMENT_REGISTER: | 1252 case ActionNode::INCREMENT_REGISTER: |
1258 if (!absolute) { | 1253 if (!absolute) { |
1259 value++; | 1254 value++; |
1260 } | 1255 } |
1261 DCHECK_EQ(store_position, kNoStore); | 1256 DCHECK_EQ(store_position, -1); |
1262 DCHECK(!clear); | 1257 DCHECK(!clear); |
1263 undo_action = RESTORE; | 1258 undo_action = RESTORE; |
1264 break; | 1259 break; |
1265 case ActionNode::STORE_POSITION: { | 1260 case ActionNode::STORE_POSITION: { |
1266 Trace::DeferredCapture* pc = | 1261 Trace::DeferredCapture* pc = |
1267 static_cast<Trace::DeferredCapture*>(action); | 1262 static_cast<Trace::DeferredCapture*>(action); |
1268 if (!clear && store_position == kNoStore) { | 1263 if (!clear && store_position == -1) { |
1269 store_position = pc->cp_offset(); | 1264 store_position = pc->cp_offset(); |
1270 } | 1265 } |
1271 | 1266 |
1272 // For captures we know that stores and clears alternate. | 1267 // For captures we know that stores and clears alternate. |
1273 // Other register, are never cleared, and if the occur | 1268 // Other register, are never cleared, and if the occur |
1274 // inside a loop, they might be assigned more than once. | 1269 // inside a loop, they might be assigned more than once. |
1275 if (reg <= 1) { | 1270 if (reg <= 1) { |
1276 // Registers zero and one, aka "capture zero", is | 1271 // Registers zero and one, aka "capture zero", is |
1277 // always set correctly if we succeed. There is no | 1272 // always set correctly if we succeed. There is no |
1278 // need to undo a setting on backtrack, because we | 1273 // need to undo a setting on backtrack, because we |
1279 // will set it again or fail. | 1274 // will set it again or fail. |
1280 undo_action = IGNORE; | 1275 undo_action = IGNORE; |
1281 } else { | 1276 } else { |
1282 undo_action = pc->is_capture() ? CLEAR : RESTORE; | 1277 undo_action = pc->is_capture() ? CLEAR : RESTORE; |
1283 } | 1278 } |
1284 DCHECK(!absolute); | 1279 DCHECK(!absolute); |
1285 DCHECK_EQ(value, 0); | 1280 DCHECK_EQ(value, 0); |
1286 break; | 1281 break; |
1287 } | 1282 } |
1288 case ActionNode::CLEAR_CAPTURES: { | 1283 case ActionNode::CLEAR_CAPTURES: { |
1289 // Since we're scanning in reverse order, if we've already | 1284 // Since we're scanning in reverse order, if we've already |
1290 // set the position we have to ignore historically earlier | 1285 // set the position we have to ignore historically earlier |
1291 // clearing operations. | 1286 // clearing operations. |
1292 if (store_position == kNoStore) { | 1287 if (store_position == -1) { |
1293 clear = true; | 1288 clear = true; |
1294 } | 1289 } |
1295 undo_action = RESTORE; | 1290 undo_action = RESTORE; |
1296 DCHECK(!absolute); | 1291 DCHECK(!absolute); |
1297 DCHECK_EQ(value, 0); | 1292 DCHECK_EQ(value, 0); |
1298 break; | 1293 break; |
1299 } | 1294 } |
1300 default: | 1295 default: |
1301 UNREACHABLE(); | 1296 UNREACHABLE(); |
1302 break; | 1297 break; |
(...skipping 10 matching lines...) Expand all Loading... |
1313 pushes = 0; | 1308 pushes = 0; |
1314 } | 1309 } |
1315 | 1310 |
1316 assembler->PushRegister(reg, stack_check); | 1311 assembler->PushRegister(reg, stack_check); |
1317 registers_to_pop->Set(reg, zone); | 1312 registers_to_pop->Set(reg, zone); |
1318 } else if (undo_action == CLEAR) { | 1313 } else if (undo_action == CLEAR) { |
1319 registers_to_clear->Set(reg, zone); | 1314 registers_to_clear->Set(reg, zone); |
1320 } | 1315 } |
1321 // Perform the chronologically last action (or accumulated increment) | 1316 // Perform the chronologically last action (or accumulated increment) |
1322 // for the register. | 1317 // for the register. |
1323 if (store_position != kNoStore) { | 1318 if (store_position != -1) { |
1324 assembler->WriteCurrentPositionToRegister(reg, store_position); | 1319 assembler->WriteCurrentPositionToRegister(reg, store_position); |
1325 } else if (clear) { | 1320 } else if (clear) { |
1326 assembler->ClearRegisters(reg, reg); | 1321 assembler->ClearRegisters(reg, reg); |
1327 } else if (absolute) { | 1322 } else if (absolute) { |
1328 assembler->SetRegister(reg, value); | 1323 assembler->SetRegister(reg, value); |
1329 } else if (value != 0) { | 1324 } else if (value != 0) { |
1330 assembler->AdvanceRegister(reg, value); | 1325 assembler->AdvanceRegister(reg, value); |
1331 } | 1326 } |
1332 } | 1327 } |
1333 } | 1328 } |
(...skipping 977 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2311 // Match the behaviour of EatsAtLeast on this node. | 2306 // Match the behaviour of EatsAtLeast on this node. |
2312 if (assertion_type() == AT_START && not_at_start) return; | 2307 if (assertion_type() == AT_START && not_at_start) return; |
2313 on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); | 2308 on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); |
2314 SaveBMInfo(bm, not_at_start, offset); | 2309 SaveBMInfo(bm, not_at_start, offset); |
2315 } | 2310 } |
2316 | 2311 |
2317 | 2312 |
2318 int BackReferenceNode::EatsAtLeast(int still_to_find, | 2313 int BackReferenceNode::EatsAtLeast(int still_to_find, |
2319 int budget, | 2314 int budget, |
2320 bool not_at_start) { | 2315 bool not_at_start) { |
2321 if (read_backward()) return 0; | |
2322 if (budget <= 0) return 0; | 2316 if (budget <= 0) return 0; |
2323 return on_success()->EatsAtLeast(still_to_find, | 2317 return on_success()->EatsAtLeast(still_to_find, |
2324 budget - 1, | 2318 budget - 1, |
2325 not_at_start); | 2319 not_at_start); |
2326 } | 2320 } |
2327 | 2321 |
2328 | 2322 |
2329 int TextNode::EatsAtLeast(int still_to_find, | 2323 int TextNode::EatsAtLeast(int still_to_find, |
2330 int budget, | 2324 int budget, |
2331 bool not_at_start) { | 2325 bool not_at_start) { |
2332 if (read_backward()) return 0; | |
2333 int answer = Length(); | 2326 int answer = Length(); |
2334 if (answer >= still_to_find) return answer; | 2327 if (answer >= still_to_find) return answer; |
2335 if (budget <= 0) return answer; | 2328 if (budget <= 0) return answer; |
2336 // We are not at start after this node so we set the last argument to 'true'. | 2329 // We are not at start after this node so we set the last argument to 'true'. |
2337 return answer + on_success()->EatsAtLeast(still_to_find - answer, | 2330 return answer + on_success()->EatsAtLeast(still_to_find - answer, |
2338 budget - 1, | 2331 budget - 1, |
2339 true); | 2332 true); |
2340 } | 2333 } |
2341 | 2334 |
2342 | 2335 |
(...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2526 bool not_at_start) { | 2519 bool not_at_start) { |
2527 Isolate* isolate = compiler->macro_assembler()->isolate(); | 2520 Isolate* isolate = compiler->macro_assembler()->isolate(); |
2528 DCHECK(characters_filled_in < details->characters()); | 2521 DCHECK(characters_filled_in < details->characters()); |
2529 int characters = details->characters(); | 2522 int characters = details->characters(); |
2530 int char_mask; | 2523 int char_mask; |
2531 if (compiler->one_byte()) { | 2524 if (compiler->one_byte()) { |
2532 char_mask = String::kMaxOneByteCharCode; | 2525 char_mask = String::kMaxOneByteCharCode; |
2533 } else { | 2526 } else { |
2534 char_mask = String::kMaxUtf16CodeUnit; | 2527 char_mask = String::kMaxUtf16CodeUnit; |
2535 } | 2528 } |
2536 for (int k = 0; k < elements()->length(); k++) { | 2529 for (int k = 0; k < elms_->length(); k++) { |
2537 TextElement elm = elements()->at(k); | 2530 TextElement elm = elms_->at(k); |
2538 if (elm.text_type() == TextElement::ATOM) { | 2531 if (elm.text_type() == TextElement::ATOM) { |
2539 Vector<const uc16> quarks = elm.atom()->data(); | 2532 Vector<const uc16> quarks = elm.atom()->data(); |
2540 for (int i = 0; i < characters && i < quarks.length(); i++) { | 2533 for (int i = 0; i < characters && i < quarks.length(); i++) { |
2541 QuickCheckDetails::Position* pos = | 2534 QuickCheckDetails::Position* pos = |
2542 details->positions(characters_filled_in); | 2535 details->positions(characters_filled_in); |
2543 uc16 c = quarks[i]; | 2536 uc16 c = quarks[i]; |
2544 if (compiler->ignore_case()) { | 2537 if (compiler->ignore_case()) { |
2545 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 2538 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
2546 int length = GetCaseIndependentLetters(isolate, c, | 2539 int length = GetCaseIndependentLetters(isolate, c, |
2547 compiler->one_byte(), chars); | 2540 compiler->one_byte(), chars); |
(...skipping 130 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2678 for (int i = 0; i < characters_; i++) { | 2671 for (int i = 0; i < characters_; i++) { |
2679 positions_[i].mask = 0; | 2672 positions_[i].mask = 0; |
2680 positions_[i].value = 0; | 2673 positions_[i].value = 0; |
2681 positions_[i].determines_perfectly = false; | 2674 positions_[i].determines_perfectly = false; |
2682 } | 2675 } |
2683 characters_ = 0; | 2676 characters_ = 0; |
2684 } | 2677 } |
2685 | 2678 |
2686 | 2679 |
2687 void QuickCheckDetails::Advance(int by, bool one_byte) { | 2680 void QuickCheckDetails::Advance(int by, bool one_byte) { |
| 2681 DCHECK(by >= 0); |
2688 if (by >= characters_) { | 2682 if (by >= characters_) { |
2689 Clear(); | 2683 Clear(); |
2690 return; | 2684 return; |
2691 } | 2685 } |
2692 for (int i = 0; i < characters_ - by; i++) { | 2686 for (int i = 0; i < characters_ - by; i++) { |
2693 positions_[i] = positions_[by + i]; | 2687 positions_[i] = positions_[by + i]; |
2694 } | 2688 } |
2695 for (int i = characters_ - by; i < characters_; i++) { | 2689 for (int i = characters_ - by; i < characters_; i++) { |
2696 positions_[i].mask = 0; | 2690 positions_[i].mask = 0; |
2697 positions_[i].value = 0; | 2691 positions_[i].value = 0; |
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2779 } | 2773 } |
2780 return false; | 2774 return false; |
2781 } | 2775 } |
2782 | 2776 |
2783 | 2777 |
2784 RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) { | 2778 RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) { |
2785 if (info()->replacement_calculated) return replacement(); | 2779 if (info()->replacement_calculated) return replacement(); |
2786 if (depth < 0) return this; | 2780 if (depth < 0) return this; |
2787 DCHECK(!info()->visited); | 2781 DCHECK(!info()->visited); |
2788 VisitMarker marker(info()); | 2782 VisitMarker marker(info()); |
2789 int element_count = elements()->length(); | 2783 int element_count = elms_->length(); |
2790 for (int i = 0; i < element_count; i++) { | 2784 for (int i = 0; i < element_count; i++) { |
2791 TextElement elm = elements()->at(i); | 2785 TextElement elm = elms_->at(i); |
2792 if (elm.text_type() == TextElement::ATOM) { | 2786 if (elm.text_type() == TextElement::ATOM) { |
2793 Vector<const uc16> quarks = elm.atom()->data(); | 2787 Vector<const uc16> quarks = elm.atom()->data(); |
2794 for (int j = 0; j < quarks.length(); j++) { | 2788 for (int j = 0; j < quarks.length(); j++) { |
2795 uint16_t c = quarks[j]; | 2789 uint16_t c = quarks[j]; |
2796 if (c <= String::kMaxOneByteCharCode) continue; | 2790 if (c <= String::kMaxOneByteCharCode) continue; |
2797 if (!ignore_case) return set_replacement(NULL); | 2791 if (!ignore_case) return set_replacement(NULL); |
2798 // Here, we need to check for characters whose upper and lower cases | 2792 // Here, we need to check for characters whose upper and lower cases |
2799 // are outside the Latin-1 range. | 2793 // are outside the Latin-1 range. |
2800 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c); | 2794 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c); |
2801 // Character is outside Latin-1 completely | 2795 // Character is outside Latin-1 completely |
(...skipping 343 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3145 assembler->GoTo(trace->backtrack()); | 3139 assembler->GoTo(trace->backtrack()); |
3146 assembler->Bind(&ok); | 3140 assembler->Bind(&ok); |
3147 break; | 3141 break; |
3148 } | 3142 } |
3149 case AT_START: { | 3143 case AT_START: { |
3150 if (trace->at_start() == Trace::FALSE_VALUE) { | 3144 if (trace->at_start() == Trace::FALSE_VALUE) { |
3151 assembler->GoTo(trace->backtrack()); | 3145 assembler->GoTo(trace->backtrack()); |
3152 return; | 3146 return; |
3153 } | 3147 } |
3154 if (trace->at_start() == Trace::UNKNOWN) { | 3148 if (trace->at_start() == Trace::UNKNOWN) { |
3155 assembler->CheckNotAtStart(trace->cp_offset(), trace->backtrack()); | 3149 assembler->CheckNotAtStart(trace->backtrack()); |
3156 Trace at_start_trace = *trace; | 3150 Trace at_start_trace = *trace; |
3157 at_start_trace.set_at_start(Trace::TRUE_VALUE); | 3151 at_start_trace.set_at_start(true); |
3158 on_success()->Emit(compiler, &at_start_trace); | 3152 on_success()->Emit(compiler, &at_start_trace); |
3159 return; | 3153 return; |
3160 } | 3154 } |
3161 } | 3155 } |
3162 break; | 3156 break; |
3163 case AFTER_NEWLINE: | 3157 case AFTER_NEWLINE: |
3164 EmitHat(compiler, on_success(), trace); | 3158 EmitHat(compiler, on_success(), trace); |
3165 return; | 3159 return; |
3166 case AT_BOUNDARY: | 3160 case AT_BOUNDARY: |
3167 case AT_NON_BOUNDARY: { | 3161 case AT_NON_BOUNDARY: { |
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3220 TextEmitPassType pass, | 3214 TextEmitPassType pass, |
3221 bool preloaded, | 3215 bool preloaded, |
3222 Trace* trace, | 3216 Trace* trace, |
3223 bool first_element_checked, | 3217 bool first_element_checked, |
3224 int* checked_up_to) { | 3218 int* checked_up_to) { |
3225 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 3219 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
3226 Isolate* isolate = assembler->isolate(); | 3220 Isolate* isolate = assembler->isolate(); |
3227 bool one_byte = compiler->one_byte(); | 3221 bool one_byte = compiler->one_byte(); |
3228 Label* backtrack = trace->backtrack(); | 3222 Label* backtrack = trace->backtrack(); |
3229 QuickCheckDetails* quick_check = trace->quick_check_performed(); | 3223 QuickCheckDetails* quick_check = trace->quick_check_performed(); |
3230 int element_count = elements()->length(); | 3224 int element_count = elms_->length(); |
3231 int backward_offset = read_backward() ? -Length() : 0; | |
3232 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { | 3225 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { |
3233 TextElement elm = elements()->at(i); | 3226 TextElement elm = elms_->at(i); |
3234 int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset; | 3227 int cp_offset = trace->cp_offset() + elm.cp_offset(); |
3235 if (elm.text_type() == TextElement::ATOM) { | 3228 if (elm.text_type() == TextElement::ATOM) { |
3236 Vector<const uc16> quarks = elm.atom()->data(); | 3229 Vector<const uc16> quarks = elm.atom()->data(); |
3237 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { | 3230 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { |
3238 if (first_element_checked && i == 0 && j == 0) continue; | 3231 if (first_element_checked && i == 0 && j == 0) continue; |
3239 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue; | 3232 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue; |
3240 EmitCharacterFunction* emit_function = NULL; | 3233 EmitCharacterFunction* emit_function = NULL; |
3241 switch (pass) { | 3234 switch (pass) { |
3242 case NON_LATIN1_MATCH: | 3235 case NON_LATIN1_MATCH: |
3243 DCHECK(one_byte); | 3236 DCHECK(one_byte); |
3244 if (quarks[j] > String::kMaxOneByteCharCode) { | 3237 if (quarks[j] > String::kMaxOneByteCharCode) { |
3245 assembler->GoTo(backtrack); | 3238 assembler->GoTo(backtrack); |
3246 return; | 3239 return; |
3247 } | 3240 } |
3248 break; | 3241 break; |
3249 case NON_LETTER_CHARACTER_MATCH: | 3242 case NON_LETTER_CHARACTER_MATCH: |
3250 emit_function = &EmitAtomNonLetter; | 3243 emit_function = &EmitAtomNonLetter; |
3251 break; | 3244 break; |
3252 case SIMPLE_CHARACTER_MATCH: | 3245 case SIMPLE_CHARACTER_MATCH: |
3253 emit_function = &EmitSimpleCharacter; | 3246 emit_function = &EmitSimpleCharacter; |
3254 break; | 3247 break; |
3255 case CASE_CHARACTER_MATCH: | 3248 case CASE_CHARACTER_MATCH: |
3256 emit_function = &EmitAtomLetter; | 3249 emit_function = &EmitAtomLetter; |
3257 break; | 3250 break; |
3258 default: | 3251 default: |
3259 break; | 3252 break; |
3260 } | 3253 } |
3261 if (emit_function != NULL) { | 3254 if (emit_function != NULL) { |
3262 bool bounds_check = *checked_up_to < cp_offset + j || read_backward(); | 3255 bool bound_checked = emit_function(isolate, |
3263 bool bound_checked = | 3256 compiler, |
3264 emit_function(isolate, compiler, quarks[j], backtrack, | 3257 quarks[j], |
3265 cp_offset + j, bounds_check, preloaded); | 3258 backtrack, |
| 3259 cp_offset + j, |
| 3260 *checked_up_to < cp_offset + j, |
| 3261 preloaded); |
3266 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); | 3262 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); |
3267 } | 3263 } |
3268 } | 3264 } |
3269 } else { | 3265 } else { |
3270 DCHECK_EQ(TextElement::CHAR_CLASS, elm.text_type()); | 3266 DCHECK_EQ(TextElement::CHAR_CLASS, elm.text_type()); |
3271 if (pass == CHARACTER_CLASS_MATCH) { | 3267 if (pass == CHARACTER_CLASS_MATCH) { |
3272 if (first_element_checked && i == 0) continue; | 3268 if (first_element_checked && i == 0) continue; |
3273 if (DeterminedAlready(quick_check, elm.cp_offset())) continue; | 3269 if (DeterminedAlready(quick_check, elm.cp_offset())) continue; |
3274 RegExpCharacterClass* cc = elm.char_class(); | 3270 RegExpCharacterClass* cc = elm.char_class(); |
3275 bool bounds_check = *checked_up_to < cp_offset || read_backward(); | |
3276 EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset, | 3271 EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset, |
3277 bounds_check, preloaded, zone()); | 3272 *checked_up_to < cp_offset, preloaded, zone()); |
3278 UpdateBoundsCheck(cp_offset, checked_up_to); | 3273 UpdateBoundsCheck(cp_offset, checked_up_to); |
3279 } | 3274 } |
3280 } | 3275 } |
3281 } | 3276 } |
3282 } | 3277 } |
3283 | 3278 |
3284 | 3279 |
3285 int TextNode::Length() { | 3280 int TextNode::Length() { |
3286 TextElement elm = elements()->last(); | 3281 TextElement elm = elms_->last(); |
3287 DCHECK(elm.cp_offset() >= 0); | 3282 DCHECK(elm.cp_offset() >= 0); |
3288 return elm.cp_offset() + elm.length(); | 3283 return elm.cp_offset() + elm.length(); |
3289 } | 3284 } |
3290 | 3285 |
3291 | 3286 |
3292 bool TextNode::SkipPass(int int_pass, bool ignore_case) { | 3287 bool TextNode::SkipPass(int int_pass, bool ignore_case) { |
3293 TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass); | 3288 TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass); |
3294 if (ignore_case) { | 3289 if (ignore_case) { |
3295 return pass == SIMPLE_CHARACTER_MATCH; | 3290 return pass == SIMPLE_CHARACTER_MATCH; |
3296 } else { | 3291 } else { |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3345 TextEmitPass(compiler, | 3340 TextEmitPass(compiler, |
3346 static_cast<TextEmitPassType>(pass), | 3341 static_cast<TextEmitPassType>(pass), |
3347 false, | 3342 false, |
3348 trace, | 3343 trace, |
3349 first_elt_done, | 3344 first_elt_done, |
3350 &bound_checked_to); | 3345 &bound_checked_to); |
3351 } | 3346 } |
3352 } | 3347 } |
3353 | 3348 |
3354 Trace successor_trace(*trace); | 3349 Trace successor_trace(*trace); |
3355 // If we advance backward, we may end up at the start. | 3350 successor_trace.set_at_start(false); |
3356 successor_trace.AdvanceCurrentPositionInTrace( | 3351 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler); |
3357 read_backward() ? -Length() : Length(), compiler); | |
3358 successor_trace.set_at_start(read_backward() ? Trace::UNKNOWN | |
3359 : Trace::FALSE_VALUE); | |
3360 RecursionCheck rc(compiler); | 3352 RecursionCheck rc(compiler); |
3361 on_success()->Emit(compiler, &successor_trace); | 3353 on_success()->Emit(compiler, &successor_trace); |
3362 } | 3354 } |
3363 | 3355 |
3364 | 3356 |
3365 void Trace::InvalidateCurrentCharacter() { | 3357 void Trace::InvalidateCurrentCharacter() { |
3366 characters_preloaded_ = 0; | 3358 characters_preloaded_ = 0; |
3367 } | 3359 } |
3368 | 3360 |
3369 | 3361 |
3370 void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) { | 3362 void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) { |
| 3363 DCHECK(by > 0); |
3371 // We don't have an instruction for shifting the current character register | 3364 // We don't have an instruction for shifting the current character register |
3372 // down or for using a shifted value for anything so lets just forget that | 3365 // down or for using a shifted value for anything so lets just forget that |
3373 // we preloaded any characters into it. | 3366 // we preloaded any characters into it. |
3374 characters_preloaded_ = 0; | 3367 characters_preloaded_ = 0; |
3375 // Adjust the offsets of the quick check performed information. This | 3368 // Adjust the offsets of the quick check performed information. This |
3376 // information is used to find out what we already determined about the | 3369 // information is used to find out what we already determined about the |
3377 // characters by means of mask and compare. | 3370 // characters by means of mask and compare. |
3378 quick_check_performed_.Advance(by, compiler->one_byte()); | 3371 quick_check_performed_.Advance(by, compiler->one_byte()); |
3379 cp_offset_ += by; | 3372 cp_offset_ += by; |
3380 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) { | 3373 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) { |
3381 compiler->SetRegExpTooBig(); | 3374 compiler->SetRegExpTooBig(); |
3382 cp_offset_ = 0; | 3375 cp_offset_ = 0; |
3383 } | 3376 } |
3384 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by); | 3377 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by); |
3385 } | 3378 } |
3386 | 3379 |
3387 | 3380 |
3388 void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte) { | 3381 void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte) { |
3389 int element_count = elements()->length(); | 3382 int element_count = elms_->length(); |
3390 for (int i = 0; i < element_count; i++) { | 3383 for (int i = 0; i < element_count; i++) { |
3391 TextElement elm = elements()->at(i); | 3384 TextElement elm = elms_->at(i); |
3392 if (elm.text_type() == TextElement::CHAR_CLASS) { | 3385 if (elm.text_type() == TextElement::CHAR_CLASS) { |
3393 RegExpCharacterClass* cc = elm.char_class(); | 3386 RegExpCharacterClass* cc = elm.char_class(); |
3394 // None of the standard character classes is different in the case | 3387 // None of the standard character classes is different in the case |
3395 // independent case and it slows us down if we don't know that. | 3388 // independent case and it slows us down if we don't know that. |
3396 if (cc->is_standard(zone())) continue; | 3389 if (cc->is_standard(zone())) continue; |
3397 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); | 3390 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); |
3398 int range_count = ranges->length(); | 3391 int range_count = ranges->length(); |
3399 for (int j = 0; j < range_count; j++) { | 3392 for (int j = 0; j < range_count; j++) { |
3400 ranges->at(j).AddCaseEquivalents(isolate, zone(), ranges, is_one_byte); | 3393 ranges->at(j).AddCaseEquivalents(isolate, zone(), ranges, is_one_byte); |
3401 } | 3394 } |
3402 } | 3395 } |
3403 } | 3396 } |
3404 } | 3397 } |
3405 | 3398 |
3406 | 3399 |
3407 int TextNode::GreedyLoopTextLength() { return Length(); } | 3400 int TextNode::GreedyLoopTextLength() { |
| 3401 TextElement elm = elms_->at(elms_->length() - 1); |
| 3402 return elm.cp_offset() + elm.length(); |
| 3403 } |
3408 | 3404 |
3409 | 3405 |
3410 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode( | 3406 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode( |
3411 RegExpCompiler* compiler) { | 3407 RegExpCompiler* compiler) { |
3412 if (read_backward()) return NULL; | 3408 if (elms_->length() != 1) return NULL; |
3413 if (elements()->length() != 1) return NULL; | 3409 TextElement elm = elms_->at(0); |
3414 TextElement elm = elements()->at(0); | |
3415 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL; | 3410 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL; |
3416 RegExpCharacterClass* node = elm.char_class(); | 3411 RegExpCharacterClass* node = elm.char_class(); |
3417 ZoneList<CharacterRange>* ranges = node->ranges(zone()); | 3412 ZoneList<CharacterRange>* ranges = node->ranges(zone()); |
3418 if (!CharacterRange::IsCanonical(ranges)) { | 3413 if (!CharacterRange::IsCanonical(ranges)) { |
3419 CharacterRange::Canonicalize(ranges); | 3414 CharacterRange::Canonicalize(ranges); |
3420 } | 3415 } |
3421 if (node->is_negated()) { | 3416 if (node->is_negated()) { |
3422 return ranges->length() == 0 ? on_success() : NULL; | 3417 return ranges->length() == 0 ? on_success() : NULL; |
3423 } | 3418 } |
3424 if (ranges->length() != 1) return NULL; | 3419 if (ranges->length() != 1) return NULL; |
(...skipping 23 matching lines...) Expand all Loading... |
3448 return kNodeIsTooComplexForGreedyLoops; | 3443 return kNodeIsTooComplexForGreedyLoops; |
3449 } | 3444 } |
3450 int node_length = node->GreedyLoopTextLength(); | 3445 int node_length = node->GreedyLoopTextLength(); |
3451 if (node_length == kNodeIsTooComplexForGreedyLoops) { | 3446 if (node_length == kNodeIsTooComplexForGreedyLoops) { |
3452 return kNodeIsTooComplexForGreedyLoops; | 3447 return kNodeIsTooComplexForGreedyLoops; |
3453 } | 3448 } |
3454 length += node_length; | 3449 length += node_length; |
3455 SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node); | 3450 SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node); |
3456 node = seq_node->on_success(); | 3451 node = seq_node->on_success(); |
3457 } | 3452 } |
3458 return read_backward() ? -length : length; | 3453 return length; |
3459 } | 3454 } |
3460 | 3455 |
3461 | 3456 |
3462 void LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) { | 3457 void LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) { |
3463 DCHECK_NULL(loop_node_); | 3458 DCHECK_NULL(loop_node_); |
3464 AddAlternative(alt); | 3459 AddAlternative(alt); |
3465 loop_node_ = alt.node(); | 3460 loop_node_ = alt.node(); |
3466 } | 3461 } |
3467 | 3462 |
3468 | 3463 |
(...skipping 410 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3879 * V |S | 3874 * V |S |
3880 * Q2 ---> U----->backtrack | 3875 * Q2 ---> U----->backtrack |
3881 * | F / | 3876 * | F / |
3882 * S| / | 3877 * S| / |
3883 * V F / | 3878 * V F / |
3884 * S2--/ | 3879 * S2--/ |
3885 */ | 3880 */ |
3886 | 3881 |
3887 GreedyLoopState::GreedyLoopState(bool not_at_start) { | 3882 GreedyLoopState::GreedyLoopState(bool not_at_start) { |
3888 counter_backtrack_trace_.set_backtrack(&label_); | 3883 counter_backtrack_trace_.set_backtrack(&label_); |
3889 if (not_at_start) counter_backtrack_trace_.set_at_start(Trace::FALSE_VALUE); | 3884 if (not_at_start) counter_backtrack_trace_.set_at_start(false); |
3890 } | 3885 } |
3891 | 3886 |
3892 | 3887 |
3893 void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) { | 3888 void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) { |
3894 #ifdef DEBUG | 3889 #ifdef DEBUG |
3895 int choice_count = alternatives_->length(); | 3890 int choice_count = alternatives_->length(); |
3896 for (int i = 0; i < choice_count - 1; i++) { | 3891 for (int i = 0; i < choice_count - 1; i++) { |
3897 GuardedAlternative alternative = alternatives_->at(i); | 3892 GuardedAlternative alternative = alternatives_->at(i); |
3898 ZoneList<Guard*>* guards = alternative.guards(); | 3893 ZoneList<Guard*>* guards = alternative.guards(); |
3899 int guard_count = (guards == NULL) ? 0 : guards->length(); | 3894 int guard_count = (guards == NULL) ? 0 : guards->length(); |
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4006 // and other simple nodes. These are handled by pushing the current | 4001 // and other simple nodes. These are handled by pushing the current |
4007 // position on the stack and then incrementing the current position each | 4002 // position on the stack and then incrementing the current position each |
4008 // time around the switch. On backtrack we decrement the current position | 4003 // time around the switch. On backtrack we decrement the current position |
4009 // and check it against the pushed value. This avoids pushing backtrack | 4004 // and check it against the pushed value. This avoids pushing backtrack |
4010 // information for each iteration of the loop, which could take up a lot of | 4005 // information for each iteration of the loop, which could take up a lot of |
4011 // space. | 4006 // space. |
4012 DCHECK(trace->stop_node() == NULL); | 4007 DCHECK(trace->stop_node() == NULL); |
4013 macro_assembler->PushCurrentPosition(); | 4008 macro_assembler->PushCurrentPosition(); |
4014 Label greedy_match_failed; | 4009 Label greedy_match_failed; |
4015 Trace greedy_match_trace; | 4010 Trace greedy_match_trace; |
4016 if (not_at_start()) greedy_match_trace.set_at_start(Trace::FALSE_VALUE); | 4011 if (not_at_start()) greedy_match_trace.set_at_start(false); |
4017 greedy_match_trace.set_backtrack(&greedy_match_failed); | 4012 greedy_match_trace.set_backtrack(&greedy_match_failed); |
4018 Label loop_label; | 4013 Label loop_label; |
4019 macro_assembler->Bind(&loop_label); | 4014 macro_assembler->Bind(&loop_label); |
4020 greedy_match_trace.set_stop_node(this); | 4015 greedy_match_trace.set_stop_node(this); |
4021 greedy_match_trace.set_loop_label(&loop_label); | 4016 greedy_match_trace.set_loop_label(&loop_label); |
4022 alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace); | 4017 alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace); |
4023 macro_assembler->Bind(&greedy_match_failed); | 4018 macro_assembler->Bind(&greedy_match_failed); |
4024 | 4019 |
4025 Label second_choice; // For use in greedy matches. | 4020 Label second_choice; // For use in greedy matches. |
4026 macro_assembler->Bind(&second_choice); | 4021 macro_assembler->Bind(&second_choice); |
(...skipping 325 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4352 } | 4347 } |
4353 | 4348 |
4354 LimitResult limit_result = LimitVersions(compiler, trace); | 4349 LimitResult limit_result = LimitVersions(compiler, trace); |
4355 if (limit_result == DONE) return; | 4350 if (limit_result == DONE) return; |
4356 DCHECK(limit_result == CONTINUE); | 4351 DCHECK(limit_result == CONTINUE); |
4357 | 4352 |
4358 RecursionCheck rc(compiler); | 4353 RecursionCheck rc(compiler); |
4359 | 4354 |
4360 DCHECK_EQ(start_reg_ + 1, end_reg_); | 4355 DCHECK_EQ(start_reg_ + 1, end_reg_); |
4361 if (compiler->ignore_case()) { | 4356 if (compiler->ignore_case()) { |
4362 assembler->CheckNotBackReferenceIgnoreCase(start_reg_, read_backward(), | 4357 assembler->CheckNotBackReferenceIgnoreCase(start_reg_, |
4363 trace->backtrack()); | 4358 trace->backtrack()); |
4364 } else { | 4359 } else { |
4365 assembler->CheckNotBackReference(start_reg_, read_backward(), | 4360 assembler->CheckNotBackReference(start_reg_, trace->backtrack()); |
4366 trace->backtrack()); | |
4367 } | 4361 } |
4368 // We are going to advance backward, so we may end up at the start. | |
4369 if (read_backward()) trace->set_at_start(Trace::UNKNOWN); | |
4370 on_success()->Emit(compiler, trace); | 4362 on_success()->Emit(compiler, trace); |
4371 } | 4363 } |
4372 | 4364 |
4373 | 4365 |
4374 // ------------------------------------------------------------------- | 4366 // ------------------------------------------------------------------- |
4375 // Dot/dotty output | 4367 // Dot/dotty output |
4376 | 4368 |
4377 | 4369 |
4378 #ifdef DEBUG | 4370 #ifdef DEBUG |
4379 | 4371 |
(...skipping 340 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4720 | 4712 |
4721 | 4713 |
4722 // ------------------------------------------------------------------- | 4714 // ------------------------------------------------------------------- |
4723 // Tree to graph conversion | 4715 // Tree to graph conversion |
4724 | 4716 |
4725 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, | 4717 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, |
4726 RegExpNode* on_success) { | 4718 RegExpNode* on_success) { |
4727 ZoneList<TextElement>* elms = | 4719 ZoneList<TextElement>* elms = |
4728 new(compiler->zone()) ZoneList<TextElement>(1, compiler->zone()); | 4720 new(compiler->zone()) ZoneList<TextElement>(1, compiler->zone()); |
4729 elms->Add(TextElement::Atom(this), compiler->zone()); | 4721 elms->Add(TextElement::Atom(this), compiler->zone()); |
4730 return new (compiler->zone()) | 4722 return new(compiler->zone()) TextNode(elms, on_success); |
4731 TextNode(elms, compiler->read_backward(), on_success); | |
4732 } | 4723 } |
4733 | 4724 |
4734 | 4725 |
4735 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, | 4726 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, |
4736 RegExpNode* on_success) { | 4727 RegExpNode* on_success) { |
4737 return new (compiler->zone()) | 4728 return new(compiler->zone()) TextNode(elements(), on_success); |
4738 TextNode(elements(), compiler->read_backward(), on_success); | |
4739 } | 4729 } |
4740 | 4730 |
4741 | 4731 |
4742 static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges, | 4732 static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges, |
4743 const int* special_class, | 4733 const int* special_class, |
4744 int length) { | 4734 int length) { |
4745 length--; // Remove final 0x10000. | 4735 length--; // Remove final 0x10000. |
4746 DCHECK(special_class[length] == 0x10000); | 4736 DCHECK(special_class[length] == 0x10000); |
4747 DCHECK(ranges->length() != 0); | 4737 DCHECK(ranges->length() != 0); |
4748 DCHECK(length != 0); | 4738 DCHECK(length != 0); |
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4825 if (CompareInverseRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) { | 4815 if (CompareInverseRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) { |
4826 set_.set_standard_set_type('W'); | 4816 set_.set_standard_set_type('W'); |
4827 return true; | 4817 return true; |
4828 } | 4818 } |
4829 return false; | 4819 return false; |
4830 } | 4820 } |
4831 | 4821 |
4832 | 4822 |
4833 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, | 4823 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, |
4834 RegExpNode* on_success) { | 4824 RegExpNode* on_success) { |
4835 return new (compiler->zone()) | 4825 return new(compiler->zone()) TextNode(this, on_success); |
4836 TextNode(this, compiler->read_backward(), on_success); | |
4837 } | 4826 } |
4838 | 4827 |
4839 | 4828 |
4840 int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) { | 4829 int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) { |
4841 RegExpAtom* atom1 = (*a)->AsAtom(); | 4830 RegExpAtom* atom1 = (*a)->AsAtom(); |
4842 RegExpAtom* atom2 = (*b)->AsAtom(); | 4831 RegExpAtom* atom2 = (*b)->AsAtom(); |
4843 uc16 character1 = atom1->data().at(0); | 4832 uc16 character1 = atom1->data().at(0); |
4844 uc16 character2 = atom2->data().at(0); | 4833 uc16 character2 = atom2->data().at(0); |
4845 if (character1 < character2) return -1; | 4834 if (character1 < character2) return -1; |
4846 if (character1 > character2) return 1; | 4835 if (character1 > character2) return 1; |
(...skipping 361 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5208 if (is_greedy) { | 5197 if (is_greedy) { |
5209 alternation->AddAlternative( | 5198 alternation->AddAlternative( |
5210 GuardedAlternative(body->ToNode(compiler, answer))); | 5199 GuardedAlternative(body->ToNode(compiler, answer))); |
5211 alternation->AddAlternative(GuardedAlternative(on_success)); | 5200 alternation->AddAlternative(GuardedAlternative(on_success)); |
5212 } else { | 5201 } else { |
5213 alternation->AddAlternative(GuardedAlternative(on_success)); | 5202 alternation->AddAlternative(GuardedAlternative(on_success)); |
5214 alternation->AddAlternative( | 5203 alternation->AddAlternative( |
5215 GuardedAlternative(body->ToNode(compiler, answer))); | 5204 GuardedAlternative(body->ToNode(compiler, answer))); |
5216 } | 5205 } |
5217 answer = alternation; | 5206 answer = alternation; |
5218 if (not_at_start && !compiler->read_backward()) { | 5207 if (not_at_start) alternation->set_not_at_start(); |
5219 alternation->set_not_at_start(); | |
5220 } | |
5221 } | 5208 } |
5222 return answer; | 5209 return answer; |
5223 } | 5210 } |
5224 } | 5211 } |
5225 } | 5212 } |
5226 bool has_min = min > 0; | 5213 bool has_min = min > 0; |
5227 bool has_max = max < RegExpTree::kInfinity; | 5214 bool has_max = max < RegExpTree::kInfinity; |
5228 bool needs_counter = has_min || has_max; | 5215 bool needs_counter = has_min || has_max; |
5229 int reg_ctr = needs_counter | 5216 int reg_ctr = needs_counter |
5230 ? compiler->AllocateRegister() | 5217 ? compiler->AllocateRegister() |
5231 : RegExpCompiler::kNoRegister; | 5218 : RegExpCompiler::kNoRegister; |
5232 LoopChoiceNode* center = new (zone) | 5219 LoopChoiceNode* center = new(zone) LoopChoiceNode(body->min_match() == 0, |
5233 LoopChoiceNode(body->min_match() == 0, compiler->read_backward(), zone); | 5220 zone); |
5234 if (not_at_start && !compiler->read_backward()) center->set_not_at_start(); | 5221 if (not_at_start) center->set_not_at_start(); |
5235 RegExpNode* loop_return = needs_counter | 5222 RegExpNode* loop_return = needs_counter |
5236 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center)) | 5223 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center)) |
5237 : static_cast<RegExpNode*>(center); | 5224 : static_cast<RegExpNode*>(center); |
5238 if (body_can_be_empty) { | 5225 if (body_can_be_empty) { |
5239 // If the body can be empty we need to check if it was and then | 5226 // If the body can be empty we need to check if it was and then |
5240 // backtrack. | 5227 // backtrack. |
5241 loop_return = ActionNode::EmptyMatchCheck(body_start_reg, | 5228 loop_return = ActionNode::EmptyMatchCheck(body_start_reg, |
5242 reg_ctr, | 5229 reg_ctr, |
5243 min, | 5230 min, |
5244 loop_return); | 5231 loop_return); |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5300 // lookahead in one side and an end-of-input on the other side. | 5287 // lookahead in one side and an end-of-input on the other side. |
5301 // We need two registers for the lookahead. | 5288 // We need two registers for the lookahead. |
5302 int stack_pointer_register = compiler->AllocateRegister(); | 5289 int stack_pointer_register = compiler->AllocateRegister(); |
5303 int position_register = compiler->AllocateRegister(); | 5290 int position_register = compiler->AllocateRegister(); |
5304 // The ChoiceNode to distinguish between a newline and end-of-input. | 5291 // The ChoiceNode to distinguish between a newline and end-of-input. |
5305 ChoiceNode* result = new(zone) ChoiceNode(2, zone); | 5292 ChoiceNode* result = new(zone) ChoiceNode(2, zone); |
5306 // Create a newline atom. | 5293 // Create a newline atom. |
5307 ZoneList<CharacterRange>* newline_ranges = | 5294 ZoneList<CharacterRange>* newline_ranges = |
5308 new(zone) ZoneList<CharacterRange>(3, zone); | 5295 new(zone) ZoneList<CharacterRange>(3, zone); |
5309 CharacterRange::AddClassEscape('n', newline_ranges, zone); | 5296 CharacterRange::AddClassEscape('n', newline_ranges, zone); |
5310 RegExpCharacterClass* newline_atom = new (zone) RegExpCharacterClass('n'); | 5297 RegExpCharacterClass* newline_atom = new(zone) RegExpCharacterClass('n'); |
5311 TextNode* newline_matcher = new (zone) TextNode( | 5298 TextNode* newline_matcher = new(zone) TextNode( |
5312 newline_atom, false, ActionNode::PositiveSubmatchSuccess( | 5299 newline_atom, |
5313 stack_pointer_register, position_register, | 5300 ActionNode::PositiveSubmatchSuccess(stack_pointer_register, |
5314 0, // No captures inside. | 5301 position_register, |
5315 -1, // Ignored if no captures. | 5302 0, // No captures inside. |
5316 on_success)); | 5303 -1, // Ignored if no captures. |
| 5304 on_success)); |
5317 // Create an end-of-input matcher. | 5305 // Create an end-of-input matcher. |
5318 RegExpNode* end_of_line = ActionNode::BeginSubmatch( | 5306 RegExpNode* end_of_line = ActionNode::BeginSubmatch( |
5319 stack_pointer_register, | 5307 stack_pointer_register, |
5320 position_register, | 5308 position_register, |
5321 newline_matcher); | 5309 newline_matcher); |
5322 // Add the two alternatives to the ChoiceNode. | 5310 // Add the two alternatives to the ChoiceNode. |
5323 GuardedAlternative eol_alternative(end_of_line); | 5311 GuardedAlternative eol_alternative(end_of_line); |
5324 result->AddAlternative(eol_alternative); | 5312 result->AddAlternative(eol_alternative); |
5325 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success)); | 5313 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success)); |
5326 result->AddAlternative(end_alternative); | 5314 result->AddAlternative(end_alternative); |
5327 return result; | 5315 return result; |
5328 } | 5316 } |
5329 default: | 5317 default: |
5330 UNREACHABLE(); | 5318 UNREACHABLE(); |
5331 } | 5319 } |
5332 return on_success; | 5320 return on_success; |
5333 } | 5321 } |
5334 | 5322 |
5335 | 5323 |
5336 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, | 5324 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, |
5337 RegExpNode* on_success) { | 5325 RegExpNode* on_success) { |
5338 return new (compiler->zone()) | 5326 return new(compiler->zone()) |
5339 BackReferenceNode(RegExpCapture::StartRegister(index()), | 5327 BackReferenceNode(RegExpCapture::StartRegister(index()), |
5340 RegExpCapture::EndRegister(index()), | 5328 RegExpCapture::EndRegister(index()), |
5341 compiler->read_backward(), on_success); | 5329 on_success); |
5342 } | 5330 } |
5343 | 5331 |
5344 | 5332 |
5345 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, | 5333 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, |
5346 RegExpNode* on_success) { | 5334 RegExpNode* on_success) { |
5347 return on_success; | 5335 return on_success; |
5348 } | 5336 } |
5349 | 5337 |
5350 | 5338 |
5351 RegExpNode* RegExpLookaround::ToNode(RegExpCompiler* compiler, | 5339 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, |
5352 RegExpNode* on_success) { | 5340 RegExpNode* on_success) { |
5353 int stack_pointer_register = compiler->AllocateRegister(); | 5341 int stack_pointer_register = compiler->AllocateRegister(); |
5354 int position_register = compiler->AllocateRegister(); | 5342 int position_register = compiler->AllocateRegister(); |
5355 | 5343 |
5356 const int registers_per_capture = 2; | 5344 const int registers_per_capture = 2; |
5357 const int register_of_first_capture = 2; | 5345 const int register_of_first_capture = 2; |
5358 int register_count = capture_count_ * registers_per_capture; | 5346 int register_count = capture_count_ * registers_per_capture; |
5359 int register_start = | 5347 int register_start = |
5360 register_of_first_capture + capture_from_ * registers_per_capture; | 5348 register_of_first_capture + capture_from_ * registers_per_capture; |
5361 | 5349 |
5362 RegExpNode* result; | 5350 RegExpNode* success; |
5363 bool was_reading_backward = compiler->read_backward(); | |
5364 compiler->set_read_backward(type() == LOOKBEHIND); | |
5365 if (is_positive()) { | 5351 if (is_positive()) { |
5366 result = ActionNode::BeginSubmatch( | 5352 RegExpNode* node = ActionNode::BeginSubmatch( |
5367 stack_pointer_register, position_register, | 5353 stack_pointer_register, |
5368 body()->ToNode(compiler, | 5354 position_register, |
5369 ActionNode::PositiveSubmatchSuccess( | 5355 body()->ToNode( |
5370 stack_pointer_register, position_register, | 5356 compiler, |
5371 register_count, register_start, on_success))); | 5357 ActionNode::PositiveSubmatchSuccess(stack_pointer_register, |
| 5358 position_register, |
| 5359 register_count, |
| 5360 register_start, |
| 5361 on_success))); |
| 5362 return node; |
5372 } else { | 5363 } else { |
5373 // We use a ChoiceNode for a negative lookahead because it has most of | 5364 // We use a ChoiceNode for a negative lookahead because it has most of |
5374 // the characteristics we need. It has the body of the lookahead as its | 5365 // the characteristics we need. It has the body of the lookahead as its |
5375 // first alternative and the expression after the lookahead of the second | 5366 // first alternative and the expression after the lookahead of the second |
5376 // alternative. If the first alternative succeeds then the | 5367 // alternative. If the first alternative succeeds then the |
5377 // NegativeSubmatchSuccess will unwind the stack including everything the | 5368 // NegativeSubmatchSuccess will unwind the stack including everything the |
5378 // choice node set up and backtrack. If the first alternative fails then | 5369 // choice node set up and backtrack. If the first alternative fails then |
5379 // the second alternative is tried, which is exactly the desired result | 5370 // the second alternative is tried, which is exactly the desired result |
5380 // for a negative lookahead. The NegativeLookaheadChoiceNode is a special | 5371 // for a negative lookahead. The NegativeLookaheadChoiceNode is a special |
5381 // ChoiceNode that knows to ignore the first exit when calculating quick | 5372 // ChoiceNode that knows to ignore the first exit when calculating quick |
5382 // checks. | 5373 // checks. |
5383 Zone* zone = compiler->zone(); | 5374 Zone* zone = compiler->zone(); |
5384 | 5375 |
5385 GuardedAlternative body_alt( | 5376 GuardedAlternative body_alt( |
5386 body()->ToNode(compiler, new (zone) NegativeSubmatchSuccess( | 5377 body()->ToNode( |
5387 stack_pointer_register, position_register, | 5378 compiler, |
5388 register_count, register_start, zone))); | 5379 success = new(zone) NegativeSubmatchSuccess(stack_pointer_register, |
| 5380 position_register, |
| 5381 register_count, |
| 5382 register_start, |
| 5383 zone))); |
5389 ChoiceNode* choice_node = | 5384 ChoiceNode* choice_node = |
5390 new(zone) NegativeLookaheadChoiceNode(body_alt, | 5385 new(zone) NegativeLookaheadChoiceNode(body_alt, |
5391 GuardedAlternative(on_success), | 5386 GuardedAlternative(on_success), |
5392 zone); | 5387 zone); |
5393 result = ActionNode::BeginSubmatch(stack_pointer_register, | 5388 return ActionNode::BeginSubmatch(stack_pointer_register, |
5394 position_register, choice_node); | 5389 position_register, |
| 5390 choice_node); |
5395 } | 5391 } |
5396 compiler->set_read_backward(was_reading_backward); | |
5397 return result; | |
5398 } | 5392 } |
5399 | 5393 |
5400 | 5394 |
5401 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, | 5395 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, |
5402 RegExpNode* on_success) { | 5396 RegExpNode* on_success) { |
5403 return ToNode(body(), index(), compiler, on_success); | 5397 return ToNode(body(), index(), compiler, on_success); |
5404 } | 5398 } |
5405 | 5399 |
5406 | 5400 |
5407 RegExpNode* RegExpCapture::ToNode(RegExpTree* body, | 5401 RegExpNode* RegExpCapture::ToNode(RegExpTree* body, |
5408 int index, | 5402 int index, |
5409 RegExpCompiler* compiler, | 5403 RegExpCompiler* compiler, |
5410 RegExpNode* on_success) { | 5404 RegExpNode* on_success) { |
5411 DCHECK_NOT_NULL(body); | |
5412 int start_reg = RegExpCapture::StartRegister(index); | 5405 int start_reg = RegExpCapture::StartRegister(index); |
5413 int end_reg = RegExpCapture::EndRegister(index); | 5406 int end_reg = RegExpCapture::EndRegister(index); |
5414 if (compiler->read_backward()) std::swap(start_reg, end_reg); | |
5415 RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success); | 5407 RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success); |
5416 RegExpNode* body_node = body->ToNode(compiler, store_end); | 5408 RegExpNode* body_node = body->ToNode(compiler, store_end); |
5417 return ActionNode::StorePosition(start_reg, true, body_node); | 5409 return ActionNode::StorePosition(start_reg, true, body_node); |
5418 } | 5410 } |
5419 | 5411 |
5420 | 5412 |
5421 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler, | 5413 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler, |
5422 RegExpNode* on_success) { | 5414 RegExpNode* on_success) { |
5423 ZoneList<RegExpTree*>* children = nodes(); | 5415 ZoneList<RegExpTree*>* children = nodes(); |
5424 RegExpNode* current = on_success; | 5416 RegExpNode* current = on_success; |
5425 if (compiler->read_backward()) { | 5417 for (int i = children->length() - 1; i >= 0; i--) { |
5426 for (int i = 0; i < children->length(); i++) { | 5418 current = children->at(i)->ToNode(compiler, current); |
5427 current = children->at(i)->ToNode(compiler, current); | |
5428 } | |
5429 } else { | |
5430 for (int i = children->length() - 1; i >= 0; i--) { | |
5431 current = children->at(i)->ToNode(compiler, current); | |
5432 } | |
5433 } | 5419 } |
5434 return current; | 5420 return current; |
5435 } | 5421 } |
5436 | 5422 |
5437 | 5423 |
5438 static void AddClass(const int* elmv, | 5424 static void AddClass(const int* elmv, |
5439 int elmc, | 5425 int elmc, |
5440 ZoneList<CharacterRange>* ranges, | 5426 ZoneList<CharacterRange>* ranges, |
5441 Zone* zone) { | 5427 Zone* zone) { |
5442 elmc--; | 5428 elmc--; |
(...skipping 855 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6298 0, | 6284 0, |
6299 &compiler, | 6285 &compiler, |
6300 compiler.accept()); | 6286 compiler.accept()); |
6301 RegExpNode* node = captured_body; | 6287 RegExpNode* node = captured_body; |
6302 bool is_end_anchored = data->tree->IsAnchoredAtEnd(); | 6288 bool is_end_anchored = data->tree->IsAnchoredAtEnd(); |
6303 bool is_start_anchored = data->tree->IsAnchoredAtStart(); | 6289 bool is_start_anchored = data->tree->IsAnchoredAtStart(); |
6304 int max_length = data->tree->max_match(); | 6290 int max_length = data->tree->max_match(); |
6305 if (!is_start_anchored && !is_sticky) { | 6291 if (!is_start_anchored && !is_sticky) { |
6306 // Add a .*? at the beginning, outside the body capture, unless | 6292 // Add a .*? at the beginning, outside the body capture, unless |
6307 // this expression is anchored at the beginning or sticky. | 6293 // this expression is anchored at the beginning or sticky. |
6308 RegExpNode* loop_node = RegExpQuantifier::ToNode( | 6294 RegExpNode* loop_node = |
6309 0, RegExpTree::kInfinity, false, new (zone) RegExpCharacterClass('*'), | 6295 RegExpQuantifier::ToNode(0, |
6310 &compiler, captured_body, data->contains_anchor); | 6296 RegExpTree::kInfinity, |
| 6297 false, |
| 6298 new(zone) RegExpCharacterClass('*'), |
| 6299 &compiler, |
| 6300 captured_body, |
| 6301 data->contains_anchor); |
6311 | 6302 |
6312 if (data->contains_anchor) { | 6303 if (data->contains_anchor) { |
6313 // Unroll loop once, to take care of the case that might start | 6304 // Unroll loop once, to take care of the case that might start |
6314 // at the start of input. | 6305 // at the start of input. |
6315 ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone); | 6306 ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone); |
6316 first_step_node->AddAlternative(GuardedAlternative(captured_body)); | 6307 first_step_node->AddAlternative(GuardedAlternative(captured_body)); |
6317 first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode( | 6308 first_step_node->AddAlternative(GuardedAlternative( |
6318 new (zone) RegExpCharacterClass('*'), false, loop_node))); | 6309 new(zone) TextNode(new(zone) RegExpCharacterClass('*'), loop_node))); |
6319 node = first_step_node; | 6310 node = first_step_node; |
6320 } else { | 6311 } else { |
6321 node = loop_node; | 6312 node = loop_node; |
6322 } | 6313 } |
6323 } | 6314 } |
6324 if (is_one_byte) { | 6315 if (is_one_byte) { |
6325 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); | 6316 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); |
6326 // Do it again to propagate the new nodes to places where they were not | 6317 // Do it again to propagate the new nodes to places where they were not |
6327 // put because they had not been calculated yet. | 6318 // put because they had not been calculated yet. |
6328 if (node != NULL) { | 6319 if (node != NULL) { |
(...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6512 | 6503 |
6513 | 6504 |
6514 void RegExpResultsCache::Clear(FixedArray* cache) { | 6505 void RegExpResultsCache::Clear(FixedArray* cache) { |
6515 for (int i = 0; i < kRegExpResultsCacheSize; i++) { | 6506 for (int i = 0; i < kRegExpResultsCacheSize; i++) { |
6516 cache->set(i, Smi::FromInt(0)); | 6507 cache->set(i, Smi::FromInt(0)); |
6517 } | 6508 } |
6518 } | 6509 } |
6519 | 6510 |
6520 } // namespace internal | 6511 } // namespace internal |
6521 } // namespace v8 | 6512 } // namespace v8 |
OLD | NEW |