Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/regexp/jsregexp.h" | 5 #include "src/regexp/jsregexp.h" |
| 6 | 6 |
| 7 #include "src/ast.h" | 7 #include "src/ast.h" |
| 8 #include "src/base/platform/platform.h" | 8 #include "src/base/platform/platform.h" |
| 9 #include "src/compilation-cache.h" | 9 #include "src/compilation-cache.h" |
| 10 #include "src/compiler.h" | 10 #include "src/compiler.h" |
| (...skipping 1206 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1217 | 1217 |
| 1218 // The chronologically first deferred action in the trace | 1218 // The chronologically first deferred action in the trace |
| 1219 // is used to infer the action needed to restore a register | 1219 // is used to infer the action needed to restore a register |
| 1220 // to its previous state (or not, if it's safe to ignore it). | 1220 // to its previous state (or not, if it's safe to ignore it). |
| 1221 enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR }; | 1221 enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR }; |
| 1222 DeferredActionUndoType undo_action = IGNORE; | 1222 DeferredActionUndoType undo_action = IGNORE; |
| 1223 | 1223 |
| 1224 int value = 0; | 1224 int value = 0; |
| 1225 bool absolute = false; | 1225 bool absolute = false; |
| 1226 bool clear = false; | 1226 bool clear = false; |
| 1227 int store_position = -1; | 1227 static const int kNoStore = kMinInt; |
| 1228 int store_position = kNoStore; | |
| 1228 // This is a little tricky because we are scanning the actions in reverse | 1229 // This is a little tricky because we are scanning the actions in reverse |
| 1229 // historical order (newest first). | 1230 // historical order (newest first). |
| 1230 for (DeferredAction* action = actions_; | 1231 for (DeferredAction* action = actions_; |
| 1231 action != NULL; | 1232 action != NULL; |
| 1232 action = action->next()) { | 1233 action = action->next()) { |
| 1233 if (action->Mentions(reg)) { | 1234 if (action->Mentions(reg)) { |
| 1234 switch (action->action_type()) { | 1235 switch (action->action_type()) { |
| 1235 case ActionNode::SET_REGISTER: { | 1236 case ActionNode::SET_REGISTER: { |
| 1236 Trace::DeferredSetRegister* psr = | 1237 Trace::DeferredSetRegister* psr = |
| 1237 static_cast<Trace::DeferredSetRegister*>(action); | 1238 static_cast<Trace::DeferredSetRegister*>(action); |
| 1238 if (!absolute) { | 1239 if (!absolute) { |
| 1239 value += psr->value(); | 1240 value += psr->value(); |
| 1240 absolute = true; | 1241 absolute = true; |
| 1241 } | 1242 } |
| 1242 // SET_REGISTER is currently only used for newly introduced loop | 1243 // SET_REGISTER is currently only used for newly introduced loop |
| 1243 // counters. They can have a significant previous value if they | 1244 // counters. They can have a significant previous value if they |
| 1244 // occour in a loop. TODO(lrn): Propagate this information, so | 1245 // occour in a loop. TODO(lrn): Propagate this information, so |
| 1245 // we can set undo_action to IGNORE if we know there is no value to | 1246 // we can set undo_action to IGNORE if we know there is no value to |
| 1246 // restore. | 1247 // restore. |
| 1247 undo_action = RESTORE; | 1248 undo_action = RESTORE; |
| 1248 DCHECK_EQ(store_position, -1); | 1249 DCHECK_EQ(store_position, kNoStore); |
| 1249 DCHECK(!clear); | 1250 DCHECK(!clear); |
| 1250 break; | 1251 break; |
| 1251 } | 1252 } |
| 1252 case ActionNode::INCREMENT_REGISTER: | 1253 case ActionNode::INCREMENT_REGISTER: |
| 1253 if (!absolute) { | 1254 if (!absolute) { |
| 1254 value++; | 1255 value++; |
| 1255 } | 1256 } |
| 1256 DCHECK_EQ(store_position, -1); | 1257 DCHECK_EQ(store_position, kNoStore); |
| 1257 DCHECK(!clear); | 1258 DCHECK(!clear); |
| 1258 undo_action = RESTORE; | 1259 undo_action = RESTORE; |
| 1259 break; | 1260 break; |
| 1260 case ActionNode::STORE_POSITION: { | 1261 case ActionNode::STORE_POSITION: { |
| 1261 Trace::DeferredCapture* pc = | 1262 Trace::DeferredCapture* pc = |
| 1262 static_cast<Trace::DeferredCapture*>(action); | 1263 static_cast<Trace::DeferredCapture*>(action); |
| 1263 if (!clear && store_position == -1) { | 1264 if (!clear && store_position == kNoStore) { |
| 1264 store_position = pc->cp_offset(); | 1265 store_position = pc->cp_offset(); |
| 1265 } | 1266 } |
| 1266 | 1267 |
| 1267 // For captures we know that stores and clears alternate. | 1268 // For captures we know that stores and clears alternate. |
| 1268 // Other register, are never cleared, and if the occur | 1269 // Other register, are never cleared, and if the occur |
| 1269 // inside a loop, they might be assigned more than once. | 1270 // inside a loop, they might be assigned more than once. |
| 1270 if (reg <= 1) { | 1271 if (reg <= 1) { |
| 1271 // Registers zero and one, aka "capture zero", is | 1272 // Registers zero and one, aka "capture zero", is |
| 1272 // always set correctly if we succeed. There is no | 1273 // always set correctly if we succeed. There is no |
| 1273 // need to undo a setting on backtrack, because we | 1274 // need to undo a setting on backtrack, because we |
| 1274 // will set it again or fail. | 1275 // will set it again or fail. |
| 1275 undo_action = IGNORE; | 1276 undo_action = IGNORE; |
| 1276 } else { | 1277 } else { |
| 1277 undo_action = pc->is_capture() ? CLEAR : RESTORE; | 1278 undo_action = pc->is_capture() ? CLEAR : RESTORE; |
| 1278 } | 1279 } |
| 1279 DCHECK(!absolute); | 1280 DCHECK(!absolute); |
| 1280 DCHECK_EQ(value, 0); | 1281 DCHECK_EQ(value, 0); |
| 1281 break; | 1282 break; |
| 1282 } | 1283 } |
| 1283 case ActionNode::CLEAR_CAPTURES: { | 1284 case ActionNode::CLEAR_CAPTURES: { |
| 1284 // Since we're scanning in reverse order, if we've already | 1285 // Since we're scanning in reverse order, if we've already |
| 1285 // set the position we have to ignore historically earlier | 1286 // set the position we have to ignore historically earlier |
| 1286 // clearing operations. | 1287 // clearing operations. |
| 1287 if (store_position == -1) { | 1288 if (store_position == kNoStore) { |
| 1288 clear = true; | 1289 clear = true; |
| 1289 } | 1290 } |
| 1290 undo_action = RESTORE; | 1291 undo_action = RESTORE; |
| 1291 DCHECK(!absolute); | 1292 DCHECK(!absolute); |
| 1292 DCHECK_EQ(value, 0); | 1293 DCHECK_EQ(value, 0); |
| 1293 break; | 1294 break; |
| 1294 } | 1295 } |
| 1295 default: | 1296 default: |
| 1296 UNREACHABLE(); | 1297 UNREACHABLE(); |
| 1297 break; | 1298 break; |
| (...skipping 10 matching lines...) Expand all Loading... | |
| 1308 pushes = 0; | 1309 pushes = 0; |
| 1309 } | 1310 } |
| 1310 | 1311 |
| 1311 assembler->PushRegister(reg, stack_check); | 1312 assembler->PushRegister(reg, stack_check); |
| 1312 registers_to_pop->Set(reg, zone); | 1313 registers_to_pop->Set(reg, zone); |
| 1313 } else if (undo_action == CLEAR) { | 1314 } else if (undo_action == CLEAR) { |
| 1314 registers_to_clear->Set(reg, zone); | 1315 registers_to_clear->Set(reg, zone); |
| 1315 } | 1316 } |
| 1316 // Perform the chronologically last action (or accumulated increment) | 1317 // Perform the chronologically last action (or accumulated increment) |
| 1317 // for the register. | 1318 // for the register. |
| 1318 if (store_position != -1) { | 1319 if (store_position != kNoStore) { |
| 1319 assembler->WriteCurrentPositionToRegister(reg, store_position); | 1320 assembler->WriteCurrentPositionToRegister(reg, store_position); |
| 1320 } else if (clear) { | 1321 } else if (clear) { |
| 1321 assembler->ClearRegisters(reg, reg); | 1322 assembler->ClearRegisters(reg, reg); |
| 1322 } else if (absolute) { | 1323 } else if (absolute) { |
| 1323 assembler->SetRegister(reg, value); | 1324 assembler->SetRegister(reg, value); |
| 1324 } else if (value != 0) { | 1325 } else if (value != 0) { |
| 1325 assembler->AdvanceRegister(reg, value); | 1326 assembler->AdvanceRegister(reg, value); |
| 1326 } | 1327 } |
| 1327 } | 1328 } |
| 1328 } | 1329 } |
| (...skipping 977 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2306 // Match the behaviour of EatsAtLeast on this node. | 2307 // Match the behaviour of EatsAtLeast on this node. |
| 2307 if (assertion_type() == AT_START && not_at_start) return; | 2308 if (assertion_type() == AT_START && not_at_start) return; |
| 2308 on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); | 2309 on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); |
| 2309 SaveBMInfo(bm, not_at_start, offset); | 2310 SaveBMInfo(bm, not_at_start, offset); |
| 2310 } | 2311 } |
| 2311 | 2312 |
| 2312 | 2313 |
| 2313 int BackReferenceNode::EatsAtLeast(int still_to_find, | 2314 int BackReferenceNode::EatsAtLeast(int still_to_find, |
| 2314 int budget, | 2315 int budget, |
| 2315 bool not_at_start) { | 2316 bool not_at_start) { |
| 2317 if (read_backward()) return 0; | |
| 2316 if (budget <= 0) return 0; | 2318 if (budget <= 0) return 0; |
| 2317 return on_success()->EatsAtLeast(still_to_find, | 2319 return on_success()->EatsAtLeast(still_to_find, |
| 2318 budget - 1, | 2320 budget - 1, |
| 2319 not_at_start); | 2321 not_at_start); |
| 2320 } | 2322 } |
| 2321 | 2323 |
| 2322 | 2324 |
| 2323 int TextNode::EatsAtLeast(int still_to_find, | 2325 int TextNode::EatsAtLeast(int still_to_find, |
| 2324 int budget, | 2326 int budget, |
| 2325 bool not_at_start) { | 2327 bool not_at_start) { |
| 2328 if (read_backward()) return 0; | |
| 2326 int answer = Length(); | 2329 int answer = Length(); |
| 2327 if (answer >= still_to_find) return answer; | 2330 if (answer >= still_to_find) return answer; |
| 2328 if (budget <= 0) return answer; | 2331 if (budget <= 0) return answer; |
| 2329 // We are not at start after this node so we set the last argument to 'true'. | 2332 // We are not at start after this node so we set the last argument to 'true'. |
| 2330 return answer + on_success()->EatsAtLeast(still_to_find - answer, | 2333 return answer + on_success()->EatsAtLeast(still_to_find - answer, |
| 2331 budget - 1, | 2334 budget - 1, |
| 2332 true); | 2335 true); |
| 2333 } | 2336 } |
| 2334 | 2337 |
| 2335 | 2338 |
| (...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2519 bool not_at_start) { | 2522 bool not_at_start) { |
| 2520 Isolate* isolate = compiler->macro_assembler()->isolate(); | 2523 Isolate* isolate = compiler->macro_assembler()->isolate(); |
| 2521 DCHECK(characters_filled_in < details->characters()); | 2524 DCHECK(characters_filled_in < details->characters()); |
| 2522 int characters = details->characters(); | 2525 int characters = details->characters(); |
| 2523 int char_mask; | 2526 int char_mask; |
| 2524 if (compiler->one_byte()) { | 2527 if (compiler->one_byte()) { |
| 2525 char_mask = String::kMaxOneByteCharCode; | 2528 char_mask = String::kMaxOneByteCharCode; |
| 2526 } else { | 2529 } else { |
| 2527 char_mask = String::kMaxUtf16CodeUnit; | 2530 char_mask = String::kMaxUtf16CodeUnit; |
| 2528 } | 2531 } |
| 2529 for (int k = 0; k < elms_->length(); k++) { | 2532 for (int k = 0; k < elements()->length(); k++) { |
| 2530 TextElement elm = elms_->at(k); | 2533 TextElement elm = elements()->at(k); |
| 2531 if (elm.text_type() == TextElement::ATOM) { | 2534 if (elm.text_type() == TextElement::ATOM) { |
| 2532 Vector<const uc16> quarks = elm.atom()->data(); | 2535 Vector<const uc16> quarks = elm.atom()->data(); |
| 2533 for (int i = 0; i < characters && i < quarks.length(); i++) { | 2536 for (int i = 0; i < characters && i < quarks.length(); i++) { |
| 2534 QuickCheckDetails::Position* pos = | 2537 QuickCheckDetails::Position* pos = |
| 2535 details->positions(characters_filled_in); | 2538 details->positions(characters_filled_in); |
| 2536 uc16 c = quarks[i]; | 2539 uc16 c = quarks[i]; |
| 2537 if (compiler->ignore_case()) { | 2540 if (compiler->ignore_case()) { |
| 2538 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 2541 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 2539 int length = GetCaseIndependentLetters(isolate, c, | 2542 int length = GetCaseIndependentLetters(isolate, c, |
| 2540 compiler->one_byte(), chars); | 2543 compiler->one_byte(), chars); |
| (...skipping 130 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2671 for (int i = 0; i < characters_; i++) { | 2674 for (int i = 0; i < characters_; i++) { |
| 2672 positions_[i].mask = 0; | 2675 positions_[i].mask = 0; |
| 2673 positions_[i].value = 0; | 2676 positions_[i].value = 0; |
| 2674 positions_[i].determines_perfectly = false; | 2677 positions_[i].determines_perfectly = false; |
| 2675 } | 2678 } |
| 2676 characters_ = 0; | 2679 characters_ = 0; |
| 2677 } | 2680 } |
| 2678 | 2681 |
| 2679 | 2682 |
| 2680 void QuickCheckDetails::Advance(int by, bool one_byte) { | 2683 void QuickCheckDetails::Advance(int by, bool one_byte) { |
| 2681 DCHECK(by >= 0); | |
| 2682 if (by >= characters_) { | 2684 if (by >= characters_) { |
| 2683 Clear(); | 2685 Clear(); |
| 2684 return; | 2686 return; |
| 2685 } | 2687 } |
| 2686 for (int i = 0; i < characters_ - by; i++) { | 2688 for (int i = 0; i < characters_ - by; i++) { |
| 2687 positions_[i] = positions_[by + i]; | 2689 positions_[i] = positions_[by + i]; |
| 2688 } | 2690 } |
| 2689 for (int i = characters_ - by; i < characters_; i++) { | 2691 for (int i = characters_ - by; i < characters_; i++) { |
| 2690 positions_[i].mask = 0; | 2692 positions_[i].mask = 0; |
| 2691 positions_[i].value = 0; | 2693 positions_[i].value = 0; |
| (...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2773 } | 2775 } |
| 2774 return false; | 2776 return false; |
| 2775 } | 2777 } |
| 2776 | 2778 |
| 2777 | 2779 |
| 2778 RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) { | 2780 RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) { |
| 2779 if (info()->replacement_calculated) return replacement(); | 2781 if (info()->replacement_calculated) return replacement(); |
| 2780 if (depth < 0) return this; | 2782 if (depth < 0) return this; |
| 2781 DCHECK(!info()->visited); | 2783 DCHECK(!info()->visited); |
| 2782 VisitMarker marker(info()); | 2784 VisitMarker marker(info()); |
| 2783 int element_count = elms_->length(); | 2785 int element_count = elements()->length(); |
| 2784 for (int i = 0; i < element_count; i++) { | 2786 for (int i = 0; i < element_count; i++) { |
| 2785 TextElement elm = elms_->at(i); | 2787 TextElement elm = elements()->at(i); |
| 2786 if (elm.text_type() == TextElement::ATOM) { | 2788 if (elm.text_type() == TextElement::ATOM) { |
| 2787 Vector<const uc16> quarks = elm.atom()->data(); | 2789 Vector<const uc16> quarks = elm.atom()->data(); |
| 2788 for (int j = 0; j < quarks.length(); j++) { | 2790 for (int j = 0; j < quarks.length(); j++) { |
| 2789 uint16_t c = quarks[j]; | 2791 uint16_t c = quarks[j]; |
| 2790 if (c <= String::kMaxOneByteCharCode) continue; | 2792 if (c <= String::kMaxOneByteCharCode) continue; |
| 2791 if (!ignore_case) return set_replacement(NULL); | 2793 if (!ignore_case) return set_replacement(NULL); |
| 2792 // Here, we need to check for characters whose upper and lower cases | 2794 // Here, we need to check for characters whose upper and lower cases |
| 2793 // are outside the Latin-1 range. | 2795 // are outside the Latin-1 range. |
| 2794 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c); | 2796 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c); |
| 2795 // Character is outside Latin-1 completely | 2797 // Character is outside Latin-1 completely |
| (...skipping 343 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3139 assembler->GoTo(trace->backtrack()); | 3141 assembler->GoTo(trace->backtrack()); |
| 3140 assembler->Bind(&ok); | 3142 assembler->Bind(&ok); |
| 3141 break; | 3143 break; |
| 3142 } | 3144 } |
| 3143 case AT_START: { | 3145 case AT_START: { |
| 3144 if (trace->at_start() == Trace::FALSE_VALUE) { | 3146 if (trace->at_start() == Trace::FALSE_VALUE) { |
| 3145 assembler->GoTo(trace->backtrack()); | 3147 assembler->GoTo(trace->backtrack()); |
| 3146 return; | 3148 return; |
| 3147 } | 3149 } |
| 3148 if (trace->at_start() == Trace::UNKNOWN) { | 3150 if (trace->at_start() == Trace::UNKNOWN) { |
| 3149 assembler->CheckNotAtStart(trace->backtrack()); | 3151 assembler->CheckNotAtStart(trace->cp_offset(), trace->backtrack()); |
| 3150 Trace at_start_trace = *trace; | 3152 Trace at_start_trace = *trace; |
| 3151 at_start_trace.set_at_start(true); | 3153 at_start_trace.set_at_start(Trace::TRUE_VALUE); |
| 3152 on_success()->Emit(compiler, &at_start_trace); | 3154 on_success()->Emit(compiler, &at_start_trace); |
| 3153 return; | 3155 return; |
| 3154 } | 3156 } |
| 3155 } | 3157 } |
| 3156 break; | 3158 break; |
| 3157 case AFTER_NEWLINE: | 3159 case AFTER_NEWLINE: |
| 3158 EmitHat(compiler, on_success(), trace); | 3160 EmitHat(compiler, on_success(), trace); |
| 3159 return; | 3161 return; |
| 3160 case AT_BOUNDARY: | 3162 case AT_BOUNDARY: |
| 3161 case AT_NON_BOUNDARY: { | 3163 case AT_NON_BOUNDARY: { |
| (...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3214 TextEmitPassType pass, | 3216 TextEmitPassType pass, |
| 3215 bool preloaded, | 3217 bool preloaded, |
| 3216 Trace* trace, | 3218 Trace* trace, |
| 3217 bool first_element_checked, | 3219 bool first_element_checked, |
| 3218 int* checked_up_to) { | 3220 int* checked_up_to) { |
| 3219 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 3221 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| 3220 Isolate* isolate = assembler->isolate(); | 3222 Isolate* isolate = assembler->isolate(); |
| 3221 bool one_byte = compiler->one_byte(); | 3223 bool one_byte = compiler->one_byte(); |
| 3222 Label* backtrack = trace->backtrack(); | 3224 Label* backtrack = trace->backtrack(); |
| 3223 QuickCheckDetails* quick_check = trace->quick_check_performed(); | 3225 QuickCheckDetails* quick_check = trace->quick_check_performed(); |
| 3224 int element_count = elms_->length(); | 3226 int element_count = elements()->length(); |
| 3227 int backward_offset = read_backward() ? -Length() : 0; | |
| 3225 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { | 3228 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { |
| 3226 TextElement elm = elms_->at(i); | 3229 TextElement elm = elements()->at(i); |
| 3227 int cp_offset = trace->cp_offset() + elm.cp_offset(); | 3230 int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset; |
| 3228 if (elm.text_type() == TextElement::ATOM) { | 3231 if (elm.text_type() == TextElement::ATOM) { |
| 3229 Vector<const uc16> quarks = elm.atom()->data(); | 3232 Vector<const uc16> quarks = elm.atom()->data(); |
| 3230 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { | 3233 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { |
| 3231 if (first_element_checked && i == 0 && j == 0) continue; | 3234 if (first_element_checked && i == 0 && j == 0) continue; |
| 3232 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue; | 3235 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue; |
| 3233 EmitCharacterFunction* emit_function = NULL; | 3236 EmitCharacterFunction* emit_function = NULL; |
| 3234 switch (pass) { | 3237 switch (pass) { |
| 3235 case NON_LATIN1_MATCH: | 3238 case NON_LATIN1_MATCH: |
| 3236 DCHECK(one_byte); | 3239 DCHECK(one_byte); |
| 3237 if (quarks[j] > String::kMaxOneByteCharCode) { | 3240 if (quarks[j] > String::kMaxOneByteCharCode) { |
| 3238 assembler->GoTo(backtrack); | 3241 assembler->GoTo(backtrack); |
| 3239 return; | 3242 return; |
| 3240 } | 3243 } |
| 3241 break; | 3244 break; |
| 3242 case NON_LETTER_CHARACTER_MATCH: | 3245 case NON_LETTER_CHARACTER_MATCH: |
| 3243 emit_function = &EmitAtomNonLetter; | 3246 emit_function = &EmitAtomNonLetter; |
| 3244 break; | 3247 break; |
| 3245 case SIMPLE_CHARACTER_MATCH: | 3248 case SIMPLE_CHARACTER_MATCH: |
| 3246 emit_function = &EmitSimpleCharacter; | 3249 emit_function = &EmitSimpleCharacter; |
| 3247 break; | 3250 break; |
| 3248 case CASE_CHARACTER_MATCH: | 3251 case CASE_CHARACTER_MATCH: |
| 3249 emit_function = &EmitAtomLetter; | 3252 emit_function = &EmitAtomLetter; |
| 3250 break; | 3253 break; |
| 3251 default: | 3254 default: |
| 3252 break; | 3255 break; |
| 3253 } | 3256 } |
| 3254 if (emit_function != NULL) { | 3257 if (emit_function != NULL) { |
| 3255 bool bound_checked = emit_function(isolate, | 3258 bool bounds_check = *checked_up_to < cp_offset + j || read_backward(); |
| 3256 compiler, | 3259 bool bound_checked = |
| 3257 quarks[j], | 3260 emit_function(isolate, compiler, quarks[j], backtrack, |
| 3258 backtrack, | 3261 cp_offset + j, bounds_check, preloaded); |
| 3259 cp_offset + j, | |
| 3260 *checked_up_to < cp_offset + j, | |
| 3261 preloaded); | |
| 3262 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); | 3262 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); |
| 3263 } | 3263 } |
| 3264 } | 3264 } |
| 3265 } else { | 3265 } else { |
| 3266 DCHECK_EQ(TextElement::CHAR_CLASS, elm.text_type()); | 3266 DCHECK_EQ(TextElement::CHAR_CLASS, elm.text_type()); |
| 3267 if (pass == CHARACTER_CLASS_MATCH) { | 3267 if (pass == CHARACTER_CLASS_MATCH) { |
| 3268 if (first_element_checked && i == 0) continue; | 3268 if (first_element_checked && i == 0) continue; |
| 3269 if (DeterminedAlready(quick_check, elm.cp_offset())) continue; | 3269 if (DeterminedAlready(quick_check, elm.cp_offset())) continue; |
| 3270 RegExpCharacterClass* cc = elm.char_class(); | 3270 RegExpCharacterClass* cc = elm.char_class(); |
| 3271 bool bounds_check = *checked_up_to < cp_offset || read_backward(); | |
| 3271 EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset, | 3272 EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset, |
| 3272 *checked_up_to < cp_offset, preloaded, zone()); | 3273 bounds_check, preloaded, zone()); |
| 3273 UpdateBoundsCheck(cp_offset, checked_up_to); | 3274 UpdateBoundsCheck(cp_offset, checked_up_to); |
| 3274 } | 3275 } |
| 3275 } | 3276 } |
| 3276 } | 3277 } |
| 3277 } | 3278 } |
| 3278 | 3279 |
| 3279 | 3280 |
| 3280 int TextNode::Length() { | 3281 int TextNode::Length() { |
| 3281 TextElement elm = elms_->last(); | 3282 TextElement elm = elements()->last(); |
| 3282 DCHECK(elm.cp_offset() >= 0); | 3283 DCHECK(elm.cp_offset() >= 0); |
| 3283 return elm.cp_offset() + elm.length(); | 3284 return elm.cp_offset() + elm.length(); |
| 3284 } | 3285 } |
| 3285 | 3286 |
| 3286 | 3287 |
| 3287 bool TextNode::SkipPass(int int_pass, bool ignore_case) { | 3288 bool TextNode::SkipPass(int int_pass, bool ignore_case) { |
| 3288 TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass); | 3289 TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass); |
| 3289 if (ignore_case) { | 3290 if (ignore_case) { |
| 3290 return pass == SIMPLE_CHARACTER_MATCH; | 3291 return pass == SIMPLE_CHARACTER_MATCH; |
| 3291 } else { | 3292 } else { |
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3340 TextEmitPass(compiler, | 3341 TextEmitPass(compiler, |
| 3341 static_cast<TextEmitPassType>(pass), | 3342 static_cast<TextEmitPassType>(pass), |
| 3342 false, | 3343 false, |
| 3343 trace, | 3344 trace, |
| 3344 first_elt_done, | 3345 first_elt_done, |
| 3345 &bound_checked_to); | 3346 &bound_checked_to); |
| 3346 } | 3347 } |
| 3347 } | 3348 } |
| 3348 | 3349 |
| 3349 Trace successor_trace(*trace); | 3350 Trace successor_trace(*trace); |
| 3350 successor_trace.set_at_start(false); | 3351 // If we advance backward, we may end up at the start. |
| 3351 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler); | 3352 successor_trace.AdvanceCurrentPositionInTrace( |
| 3353 read_backward() ? -Length() : Length(), compiler); | |
| 3354 successor_trace.set_at_start(read_backward() ? Trace::UNKNOWN | |
| 3355 : Trace::FALSE_VALUE); | |
| 3352 RecursionCheck rc(compiler); | 3356 RecursionCheck rc(compiler); |
| 3353 on_success()->Emit(compiler, &successor_trace); | 3357 on_success()->Emit(compiler, &successor_trace); |
| 3354 } | 3358 } |
| 3355 | 3359 |
| 3356 | 3360 |
| 3357 void Trace::InvalidateCurrentCharacter() { | 3361 void Trace::InvalidateCurrentCharacter() { |
| 3358 characters_preloaded_ = 0; | 3362 characters_preloaded_ = 0; |
| 3359 } | 3363 } |
| 3360 | 3364 |
| 3361 | 3365 |
| 3362 void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) { | 3366 void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) { |
| 3363 DCHECK(by > 0); | |
| 3364 // We don't have an instruction for shifting the current character register | 3367 // We don't have an instruction for shifting the current character register |
| 3365 // down or for using a shifted value for anything so lets just forget that | 3368 // down or for using a shifted value for anything so lets just forget that |
| 3366 // we preloaded any characters into it. | 3369 // we preloaded any characters into it. |
| 3367 characters_preloaded_ = 0; | 3370 characters_preloaded_ = 0; |
| 3368 // Adjust the offsets of the quick check performed information. This | 3371 // Adjust the offsets of the quick check performed information. This |
| 3369 // information is used to find out what we already determined about the | 3372 // information is used to find out what we already determined about the |
| 3370 // characters by means of mask and compare. | 3373 // characters by means of mask and compare. |
| 3371 quick_check_performed_.Advance(by, compiler->one_byte()); | 3374 quick_check_performed_.Advance(by, compiler->one_byte()); |
| 3372 cp_offset_ += by; | 3375 cp_offset_ += by; |
| 3373 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) { | 3376 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) { |
| 3374 compiler->SetRegExpTooBig(); | 3377 compiler->SetRegExpTooBig(); |
| 3375 cp_offset_ = 0; | 3378 cp_offset_ = 0; |
| 3376 } | 3379 } |
| 3377 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by); | 3380 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by); |
| 3378 } | 3381 } |
| 3379 | 3382 |
| 3380 | 3383 |
| 3381 void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte) { | 3384 void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte) { |
| 3382 int element_count = elms_->length(); | 3385 int element_count = elements()->length(); |
| 3383 for (int i = 0; i < element_count; i++) { | 3386 for (int i = 0; i < element_count; i++) { |
| 3384 TextElement elm = elms_->at(i); | 3387 TextElement elm = elements()->at(i); |
| 3385 if (elm.text_type() == TextElement::CHAR_CLASS) { | 3388 if (elm.text_type() == TextElement::CHAR_CLASS) { |
| 3386 RegExpCharacterClass* cc = elm.char_class(); | 3389 RegExpCharacterClass* cc = elm.char_class(); |
| 3387 // None of the standard character classes is different in the case | 3390 // None of the standard character classes is different in the case |
| 3388 // independent case and it slows us down if we don't know that. | 3391 // independent case and it slows us down if we don't know that. |
| 3389 if (cc->is_standard(zone())) continue; | 3392 if (cc->is_standard(zone())) continue; |
| 3390 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); | 3393 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); |
| 3391 int range_count = ranges->length(); | 3394 int range_count = ranges->length(); |
| 3392 for (int j = 0; j < range_count; j++) { | 3395 for (int j = 0; j < range_count; j++) { |
| 3393 ranges->at(j).AddCaseEquivalents(isolate, zone(), ranges, is_one_byte); | 3396 ranges->at(j).AddCaseEquivalents(isolate, zone(), ranges, is_one_byte); |
| 3394 } | 3397 } |
| 3395 } | 3398 } |
| 3396 } | 3399 } |
| 3397 } | 3400 } |
| 3398 | 3401 |
| 3399 | 3402 |
| 3400 int TextNode::GreedyLoopTextLength() { | 3403 int TextNode::GreedyLoopTextLength() { |
| 3401 TextElement elm = elms_->at(elms_->length() - 1); | 3404 return read_backward() ? kNodeIsTooComplexForGreedyLoops : Length(); |
|
erikcorry
2015/11/11 15:39:04
This is a case where perhaps we do need a little o
Yang
2015/11/12 08:25:35
Done.
| |
| 3402 return elm.cp_offset() + elm.length(); | |
| 3403 } | 3405 } |
| 3404 | 3406 |
| 3405 | 3407 |
| 3406 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode( | 3408 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode( |
|
erikcorry
2015/11/11 15:39:04
Not quite sure, but this optimization may have to
Yang
2015/11/12 08:25:35
I haven't found a test case where this would becom
| |
| 3407 RegExpCompiler* compiler) { | 3409 RegExpCompiler* compiler) { |
| 3408 if (elms_->length() != 1) return NULL; | 3410 if (elements()->length() != 1) return NULL; |
| 3409 TextElement elm = elms_->at(0); | 3411 TextElement elm = elements()->at(0); |
| 3410 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL; | 3412 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL; |
| 3411 RegExpCharacterClass* node = elm.char_class(); | 3413 RegExpCharacterClass* node = elm.char_class(); |
| 3412 ZoneList<CharacterRange>* ranges = node->ranges(zone()); | 3414 ZoneList<CharacterRange>* ranges = node->ranges(zone()); |
| 3413 if (!CharacterRange::IsCanonical(ranges)) { | 3415 if (!CharacterRange::IsCanonical(ranges)) { |
| 3414 CharacterRange::Canonicalize(ranges); | 3416 CharacterRange::Canonicalize(ranges); |
| 3415 } | 3417 } |
| 3416 if (node->is_negated()) { | 3418 if (node->is_negated()) { |
| 3417 return ranges->length() == 0 ? on_success() : NULL; | 3419 return ranges->length() == 0 ? on_success() : NULL; |
| 3418 } | 3420 } |
| 3419 if (ranges->length() != 1) return NULL; | 3421 if (ranges->length() != 1) return NULL; |
| (...skipping 454 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3874 * V |S | 3876 * V |S |
| 3875 * Q2 ---> U----->backtrack | 3877 * Q2 ---> U----->backtrack |
| 3876 * | F / | 3878 * | F / |
| 3877 * S| / | 3879 * S| / |
| 3878 * V F / | 3880 * V F / |
| 3879 * S2--/ | 3881 * S2--/ |
| 3880 */ | 3882 */ |
| 3881 | 3883 |
| 3882 GreedyLoopState::GreedyLoopState(bool not_at_start) { | 3884 GreedyLoopState::GreedyLoopState(bool not_at_start) { |
| 3883 counter_backtrack_trace_.set_backtrack(&label_); | 3885 counter_backtrack_trace_.set_backtrack(&label_); |
| 3884 if (not_at_start) counter_backtrack_trace_.set_at_start(false); | 3886 if (not_at_start) counter_backtrack_trace_.set_at_start(Trace::FALSE_VALUE); |
| 3885 } | 3887 } |
| 3886 | 3888 |
| 3887 | 3889 |
| 3888 void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) { | 3890 void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) { |
| 3889 #ifdef DEBUG | 3891 #ifdef DEBUG |
| 3890 int choice_count = alternatives_->length(); | 3892 int choice_count = alternatives_->length(); |
| 3891 for (int i = 0; i < choice_count - 1; i++) { | 3893 for (int i = 0; i < choice_count - 1; i++) { |
| 3892 GuardedAlternative alternative = alternatives_->at(i); | 3894 GuardedAlternative alternative = alternatives_->at(i); |
| 3893 ZoneList<Guard*>* guards = alternative.guards(); | 3895 ZoneList<Guard*>* guards = alternative.guards(); |
| 3894 int guard_count = (guards == NULL) ? 0 : guards->length(); | 3896 int guard_count = (guards == NULL) ? 0 : guards->length(); |
| (...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4001 // and other simple nodes. These are handled by pushing the current | 4003 // and other simple nodes. These are handled by pushing the current |
| 4002 // position on the stack and then incrementing the current position each | 4004 // position on the stack and then incrementing the current position each |
| 4003 // time around the switch. On backtrack we decrement the current position | 4005 // time around the switch. On backtrack we decrement the current position |
| 4004 // and check it against the pushed value. This avoids pushing backtrack | 4006 // and check it against the pushed value. This avoids pushing backtrack |
| 4005 // information for each iteration of the loop, which could take up a lot of | 4007 // information for each iteration of the loop, which could take up a lot of |
| 4006 // space. | 4008 // space. |
| 4007 DCHECK(trace->stop_node() == NULL); | 4009 DCHECK(trace->stop_node() == NULL); |
| 4008 macro_assembler->PushCurrentPosition(); | 4010 macro_assembler->PushCurrentPosition(); |
| 4009 Label greedy_match_failed; | 4011 Label greedy_match_failed; |
| 4010 Trace greedy_match_trace; | 4012 Trace greedy_match_trace; |
| 4011 if (not_at_start()) greedy_match_trace.set_at_start(false); | 4013 if (not_at_start()) greedy_match_trace.set_at_start(Trace::FALSE_VALUE); |
| 4012 greedy_match_trace.set_backtrack(&greedy_match_failed); | 4014 greedy_match_trace.set_backtrack(&greedy_match_failed); |
| 4013 Label loop_label; | 4015 Label loop_label; |
| 4014 macro_assembler->Bind(&loop_label); | 4016 macro_assembler->Bind(&loop_label); |
| 4015 greedy_match_trace.set_stop_node(this); | 4017 greedy_match_trace.set_stop_node(this); |
| 4016 greedy_match_trace.set_loop_label(&loop_label); | 4018 greedy_match_trace.set_loop_label(&loop_label); |
| 4017 alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace); | 4019 alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace); |
| 4018 macro_assembler->Bind(&greedy_match_failed); | 4020 macro_assembler->Bind(&greedy_match_failed); |
| 4019 | 4021 |
| 4020 Label second_choice; // For use in greedy matches. | 4022 Label second_choice; // For use in greedy matches. |
| 4021 macro_assembler->Bind(&second_choice); | 4023 macro_assembler->Bind(&second_choice); |
| (...skipping 325 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4347 } | 4349 } |
| 4348 | 4350 |
| 4349 LimitResult limit_result = LimitVersions(compiler, trace); | 4351 LimitResult limit_result = LimitVersions(compiler, trace); |
| 4350 if (limit_result == DONE) return; | 4352 if (limit_result == DONE) return; |
| 4351 DCHECK(limit_result == CONTINUE); | 4353 DCHECK(limit_result == CONTINUE); |
| 4352 | 4354 |
| 4353 RecursionCheck rc(compiler); | 4355 RecursionCheck rc(compiler); |
| 4354 | 4356 |
| 4355 DCHECK_EQ(start_reg_ + 1, end_reg_); | 4357 DCHECK_EQ(start_reg_ + 1, end_reg_); |
| 4356 if (compiler->ignore_case()) { | 4358 if (compiler->ignore_case()) { |
| 4357 assembler->CheckNotBackReferenceIgnoreCase(start_reg_, | 4359 assembler->CheckNotBackReferenceIgnoreCase(start_reg_, read_backward(), |
| 4358 trace->backtrack()); | 4360 trace->backtrack()); |
| 4359 } else { | 4361 } else { |
| 4360 assembler->CheckNotBackReference(start_reg_, trace->backtrack()); | 4362 assembler->CheckNotBackReference(start_reg_, read_backward(), |
| 4363 trace->backtrack()); | |
| 4361 } | 4364 } |
| 4365 // We are going to advance backward, so we may end up at the start. | |
| 4366 if (read_backward()) trace->set_at_start(Trace::UNKNOWN); | |
| 4362 on_success()->Emit(compiler, trace); | 4367 on_success()->Emit(compiler, trace); |
| 4363 } | 4368 } |
| 4364 | 4369 |
| 4365 | 4370 |
| 4366 // ------------------------------------------------------------------- | 4371 // ------------------------------------------------------------------- |
| 4367 // Dot/dotty output | 4372 // Dot/dotty output |
| 4368 | 4373 |
| 4369 | 4374 |
| 4370 #ifdef DEBUG | 4375 #ifdef DEBUG |
| 4371 | 4376 |
| (...skipping 340 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4712 | 4717 |
| 4713 | 4718 |
| 4714 // ------------------------------------------------------------------- | 4719 // ------------------------------------------------------------------- |
| 4715 // Tree to graph conversion | 4720 // Tree to graph conversion |
| 4716 | 4721 |
| 4717 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, | 4722 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, |
| 4718 RegExpNode* on_success) { | 4723 RegExpNode* on_success) { |
| 4719 ZoneList<TextElement>* elms = | 4724 ZoneList<TextElement>* elms = |
| 4720 new(compiler->zone()) ZoneList<TextElement>(1, compiler->zone()); | 4725 new(compiler->zone()) ZoneList<TextElement>(1, compiler->zone()); |
| 4721 elms->Add(TextElement::Atom(this), compiler->zone()); | 4726 elms->Add(TextElement::Atom(this), compiler->zone()); |
| 4722 return new(compiler->zone()) TextNode(elms, on_success); | 4727 return new (compiler->zone()) TextNode(elms, read_backward(), on_success); |
| 4723 } | 4728 } |
| 4724 | 4729 |
| 4725 | 4730 |
| 4726 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, | 4731 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, |
| 4727 RegExpNode* on_success) { | 4732 RegExpNode* on_success) { |
| 4728 return new(compiler->zone()) TextNode(elements(), on_success); | 4733 return new (compiler->zone()) |
| 4734 TextNode(elements(), read_backward(), on_success); | |
| 4729 } | 4735 } |
| 4730 | 4736 |
| 4731 | 4737 |
| 4732 static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges, | 4738 static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges, |
| 4733 const int* special_class, | 4739 const int* special_class, |
| 4734 int length) { | 4740 int length) { |
| 4735 length--; // Remove final 0x10000. | 4741 length--; // Remove final 0x10000. |
| 4736 DCHECK(special_class[length] == 0x10000); | 4742 DCHECK(special_class[length] == 0x10000); |
| 4737 DCHECK(ranges->length() != 0); | 4743 DCHECK(ranges->length() != 0); |
| 4738 DCHECK(length != 0); | 4744 DCHECK(length != 0); |
| (...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4815 if (CompareInverseRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) { | 4821 if (CompareInverseRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) { |
| 4816 set_.set_standard_set_type('W'); | 4822 set_.set_standard_set_type('W'); |
| 4817 return true; | 4823 return true; |
| 4818 } | 4824 } |
| 4819 return false; | 4825 return false; |
| 4820 } | 4826 } |
| 4821 | 4827 |
| 4822 | 4828 |
| 4823 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, | 4829 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, |
| 4824 RegExpNode* on_success) { | 4830 RegExpNode* on_success) { |
| 4825 return new(compiler->zone()) TextNode(this, on_success); | 4831 return new (compiler->zone()) TextNode(this, read_backward(), on_success); |
| 4826 } | 4832 } |
| 4827 | 4833 |
| 4828 | 4834 |
| 4829 int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) { | 4835 int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) { |
| 4830 RegExpAtom* atom1 = (*a)->AsAtom(); | 4836 RegExpAtom* atom1 = (*a)->AsAtom(); |
| 4831 RegExpAtom* atom2 = (*b)->AsAtom(); | 4837 RegExpAtom* atom2 = (*b)->AsAtom(); |
| 4832 uc16 character1 = atom1->data().at(0); | 4838 uc16 character1 = atom1->data().at(0); |
| 4833 uc16 character2 = atom2->data().at(0); | 4839 uc16 character2 = atom2->data().at(0); |
| 4834 if (character1 < character2) return -1; | 4840 if (character1 < character2) return -1; |
| 4835 if (character1 > character2) return 1; | 4841 if (character1 > character2) return 1; |
| (...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4960 for (int j = 1; j < run_length && prefix_length > 1; j++) { | 4966 for (int j = 1; j < run_length && prefix_length > 1; j++) { |
| 4961 RegExpAtom* old_atom = | 4967 RegExpAtom* old_atom = |
| 4962 alternatives->at(j + first_with_prefix)->AsAtom(); | 4968 alternatives->at(j + first_with_prefix)->AsAtom(); |
| 4963 for (int k = 1; k < prefix_length; k++) { | 4969 for (int k = 1; k < prefix_length; k++) { |
| 4964 if (atom->data().at(k) != old_atom->data().at(k)) { | 4970 if (atom->data().at(k) != old_atom->data().at(k)) { |
| 4965 prefix_length = k; | 4971 prefix_length = k; |
| 4966 break; | 4972 break; |
| 4967 } | 4973 } |
| 4968 } | 4974 } |
| 4969 } | 4975 } |
| 4970 RegExpAtom* prefix = | 4976 RegExpAtom* prefix = new (zone) RegExpAtom( |
| 4971 new (zone) RegExpAtom(atom->data().SubVector(0, prefix_length)); | 4977 atom->data().SubVector(0, prefix_length), read_direction()); |
| 4972 ZoneList<RegExpTree*>* pair = new (zone) ZoneList<RegExpTree*>(2, zone); | 4978 ZoneList<RegExpTree*>* pair = new (zone) ZoneList<RegExpTree*>(2, zone); |
| 4973 pair->Add(prefix, zone); | 4979 pair->Add(prefix, zone); |
| 4974 ZoneList<RegExpTree*>* suffixes = | 4980 ZoneList<RegExpTree*>* suffixes = |
| 4975 new (zone) ZoneList<RegExpTree*>(run_length, zone); | 4981 new (zone) ZoneList<RegExpTree*>(run_length, zone); |
| 4976 for (int j = 0; j < run_length; j++) { | 4982 for (int j = 0; j < run_length; j++) { |
| 4977 RegExpAtom* old_atom = | 4983 RegExpAtom* old_atom = |
| 4978 alternatives->at(j + first_with_prefix)->AsAtom(); | 4984 alternatives->at(j + first_with_prefix)->AsAtom(); |
| 4979 int len = old_atom->length(); | 4985 int len = old_atom->length(); |
| 4980 if (len == prefix_length) { | 4986 if (len == prefix_length) { |
| 4981 suffixes->Add(new (zone) RegExpEmpty(), zone); | 4987 suffixes->Add(new (zone) RegExpEmpty(), zone); |
| 4982 } else { | 4988 } else { |
| 4983 RegExpTree* suffix = new (zone) RegExpAtom( | 4989 RegExpTree* suffix = new (zone) RegExpAtom( |
| 4984 old_atom->data().SubVector(prefix_length, old_atom->length())); | 4990 old_atom->data().SubVector(prefix_length, old_atom->length()), |
| 4991 read_direction()); | |
| 4985 suffixes->Add(suffix, zone); | 4992 suffixes->Add(suffix, zone); |
| 4986 } | 4993 } |
| 4987 } | 4994 } |
| 4988 pair->Add(new (zone) RegExpDisjunction(suffixes), zone); | 4995 pair->Add(new (zone) RegExpDisjunction(suffixes, read_direction()), zone); |
| 4989 alternatives->at(write_posn++) = new (zone) RegExpAlternative(pair); | 4996 alternatives->at(write_posn++) = |
| 4997 new (zone) RegExpAlternative(pair, read_direction()); | |
| 4990 } else { | 4998 } else { |
| 4991 // Just copy any non-worthwhile alternatives. | 4999 // Just copy any non-worthwhile alternatives. |
| 4992 for (int j = first_with_prefix; j < i; j++) { | 5000 for (int j = first_with_prefix; j < i; j++) { |
| 4993 alternatives->at(write_posn++) = alternatives->at(j); | 5001 alternatives->at(write_posn++) = alternatives->at(j); |
| 4994 } | 5002 } |
| 4995 } | 5003 } |
| 4996 } | 5004 } |
| 4997 alternatives->Rewind(write_posn); // Trim end of array. | 5005 alternatives->Rewind(write_posn); // Trim end of array. |
| 4998 } | 5006 } |
| 4999 | 5007 |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5033 // Found non-trivial run of single-character alternatives. | 5041 // Found non-trivial run of single-character alternatives. |
| 5034 int run_length = i - first_in_run; | 5042 int run_length = i - first_in_run; |
| 5035 ZoneList<CharacterRange>* ranges = | 5043 ZoneList<CharacterRange>* ranges = |
| 5036 new (zone) ZoneList<CharacterRange>(2, zone); | 5044 new (zone) ZoneList<CharacterRange>(2, zone); |
| 5037 for (int j = 0; j < run_length; j++) { | 5045 for (int j = 0; j < run_length; j++) { |
| 5038 RegExpAtom* old_atom = alternatives->at(j + first_in_run)->AsAtom(); | 5046 RegExpAtom* old_atom = alternatives->at(j + first_in_run)->AsAtom(); |
| 5039 DCHECK_EQ(old_atom->length(), 1); | 5047 DCHECK_EQ(old_atom->length(), 1); |
| 5040 ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone); | 5048 ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone); |
| 5041 } | 5049 } |
| 5042 alternatives->at(write_posn++) = | 5050 alternatives->at(write_posn++) = |
| 5043 new (zone) RegExpCharacterClass(ranges, false); | 5051 new (zone) RegExpCharacterClass(ranges, false, read_direction()); |
| 5044 } else { | 5052 } else { |
| 5045 // Just copy any trivial alternatives. | 5053 // Just copy any trivial alternatives. |
| 5046 for (int j = first_in_run; j < i; j++) { | 5054 for (int j = first_in_run; j < i; j++) { |
| 5047 alternatives->at(write_posn++) = alternatives->at(j); | 5055 alternatives->at(write_posn++) = alternatives->at(j); |
| 5048 } | 5056 } |
| 5049 } | 5057 } |
| 5050 } | 5058 } |
| 5051 alternatives->Rewind(write_posn); // Trim end of array. | 5059 alternatives->Rewind(write_posn); // Trim end of array. |
| 5052 } | 5060 } |
| 5053 | 5061 |
| (...skipping 233 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5287 // lookahead in one side and an end-of-input on the other side. | 5295 // lookahead in one side and an end-of-input on the other side. |
| 5288 // We need two registers for the lookahead. | 5296 // We need two registers for the lookahead. |
| 5289 int stack_pointer_register = compiler->AllocateRegister(); | 5297 int stack_pointer_register = compiler->AllocateRegister(); |
| 5290 int position_register = compiler->AllocateRegister(); | 5298 int position_register = compiler->AllocateRegister(); |
| 5291 // The ChoiceNode to distinguish between a newline and end-of-input. | 5299 // The ChoiceNode to distinguish between a newline and end-of-input. |
| 5292 ChoiceNode* result = new(zone) ChoiceNode(2, zone); | 5300 ChoiceNode* result = new(zone) ChoiceNode(2, zone); |
| 5293 // Create a newline atom. | 5301 // Create a newline atom. |
| 5294 ZoneList<CharacterRange>* newline_ranges = | 5302 ZoneList<CharacterRange>* newline_ranges = |
| 5295 new(zone) ZoneList<CharacterRange>(3, zone); | 5303 new(zone) ZoneList<CharacterRange>(3, zone); |
| 5296 CharacterRange::AddClassEscape('n', newline_ranges, zone); | 5304 CharacterRange::AddClassEscape('n', newline_ranges, zone); |
| 5297 RegExpCharacterClass* newline_atom = new(zone) RegExpCharacterClass('n'); | 5305 RegExpCharacterClass* newline_atom = |
| 5298 TextNode* newline_matcher = new(zone) TextNode( | 5306 new (zone) RegExpCharacterClass('n', RegExpTree::READ_FORWARD); |
|
erikcorry
2015/11/11 15:39:04
Yes :-)
Do we have a test of $ and ^ inside lookb
Yang
2015/11/12 08:25:34
Added a couple of tests.
| |
| 5299 newline_atom, | 5307 TextNode* newline_matcher = new (zone) TextNode( |
| 5300 ActionNode::PositiveSubmatchSuccess(stack_pointer_register, | 5308 newline_atom, false, ActionNode::PositiveSubmatchSuccess( |
| 5301 position_register, | 5309 stack_pointer_register, position_register, |
| 5302 0, // No captures inside. | 5310 0, // No captures inside. |
| 5303 -1, // Ignored if no captures. | 5311 -1, // Ignored if no captures. |
| 5304 on_success)); | 5312 on_success)); |
| 5305 // Create an end-of-input matcher. | 5313 // Create an end-of-input matcher. |
| 5306 RegExpNode* end_of_line = ActionNode::BeginSubmatch( | 5314 RegExpNode* end_of_line = ActionNode::BeginSubmatch( |
| 5307 stack_pointer_register, | 5315 stack_pointer_register, |
| 5308 position_register, | 5316 position_register, |
| 5309 newline_matcher); | 5317 newline_matcher); |
| 5310 // Add the two alternatives to the ChoiceNode. | 5318 // Add the two alternatives to the ChoiceNode. |
| 5311 GuardedAlternative eol_alternative(end_of_line); | 5319 GuardedAlternative eol_alternative(end_of_line); |
| 5312 result->AddAlternative(eol_alternative); | 5320 result->AddAlternative(eol_alternative); |
| 5313 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success)); | 5321 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success)); |
| 5314 result->AddAlternative(end_alternative); | 5322 result->AddAlternative(end_alternative); |
| 5315 return result; | 5323 return result; |
| 5316 } | 5324 } |
| 5317 default: | 5325 default: |
| 5318 UNREACHABLE(); | 5326 UNREACHABLE(); |
| 5319 } | 5327 } |
| 5320 return on_success; | 5328 return on_success; |
| 5321 } | 5329 } |
| 5322 | 5330 |
| 5323 | 5331 |
| 5324 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, | 5332 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, |
| 5325 RegExpNode* on_success) { | 5333 RegExpNode* on_success) { |
| 5326 return new(compiler->zone()) | 5334 return new (compiler->zone()) BackReferenceNode( |
| 5327 BackReferenceNode(RegExpCapture::StartRegister(index()), | 5335 RegExpCapture::StartRegister(index()), |
| 5328 RegExpCapture::EndRegister(index()), | 5336 RegExpCapture::EndRegister(index()), read_backward(), on_success); |
| 5329 on_success); | |
| 5330 } | 5337 } |
| 5331 | 5338 |
| 5332 | 5339 |
| 5333 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, | 5340 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, |
| 5334 RegExpNode* on_success) { | 5341 RegExpNode* on_success) { |
| 5335 return on_success; | 5342 return on_success; |
| 5336 } | 5343 } |
| 5337 | 5344 |
| 5338 | 5345 |
| 5339 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, | 5346 RegExpNode* RegExpLookaround::ToNode(RegExpCompiler* compiler, |
| 5340 RegExpNode* on_success) { | 5347 RegExpNode* on_success) { |
| 5341 int stack_pointer_register = compiler->AllocateRegister(); | 5348 int stack_pointer_register = compiler->AllocateRegister(); |
| 5342 int position_register = compiler->AllocateRegister(); | 5349 int position_register = compiler->AllocateRegister(); |
| 5343 | 5350 |
| 5344 const int registers_per_capture = 2; | 5351 const int registers_per_capture = 2; |
| 5345 const int register_of_first_capture = 2; | 5352 const int register_of_first_capture = 2; |
| 5346 int register_count = capture_count_ * registers_per_capture; | 5353 int register_count = capture_count_ * registers_per_capture; |
| 5347 int register_start = | 5354 int register_start = |
| 5348 register_of_first_capture + capture_from_ * registers_per_capture; | 5355 register_of_first_capture + capture_from_ * registers_per_capture; |
| 5349 | 5356 |
| 5350 RegExpNode* success; | |
| 5351 if (is_positive()) { | 5357 if (is_positive()) { |
| 5352 RegExpNode* node = ActionNode::BeginSubmatch( | 5358 RegExpNode* node = ActionNode::BeginSubmatch( |
| 5353 stack_pointer_register, | 5359 stack_pointer_register, |
| 5354 position_register, | 5360 position_register, |
| 5355 body()->ToNode( | 5361 body()->ToNode( |
| 5356 compiler, | 5362 compiler, |
| 5357 ActionNode::PositiveSubmatchSuccess(stack_pointer_register, | 5363 ActionNode::PositiveSubmatchSuccess(stack_pointer_register, |
| 5358 position_register, | 5364 position_register, |
| 5359 register_count, | 5365 register_count, |
| 5360 register_start, | 5366 register_start, |
| 5361 on_success))); | 5367 on_success))); |
| 5362 return node; | 5368 return node; |
| 5363 } else { | 5369 } else { |
| 5364 // We use a ChoiceNode for a negative lookahead because it has most of | 5370 // We use a ChoiceNode for a negative lookahead because it has most of |
| 5365 // the characteristics we need. It has the body of the lookahead as its | 5371 // the characteristics we need. It has the body of the lookahead as its |
| 5366 // first alternative and the expression after the lookahead of the second | 5372 // first alternative and the expression after the lookahead of the second |
| 5367 // alternative. If the first alternative succeeds then the | 5373 // alternative. If the first alternative succeeds then the |
| 5368 // NegativeSubmatchSuccess will unwind the stack including everything the | 5374 // NegativeSubmatchSuccess will unwind the stack including everything the |
| 5369 // choice node set up and backtrack. If the first alternative fails then | 5375 // choice node set up and backtrack. If the first alternative fails then |
| 5370 // the second alternative is tried, which is exactly the desired result | 5376 // the second alternative is tried, which is exactly the desired result |
| 5371 // for a negative lookahead. The NegativeLookaheadChoiceNode is a special | 5377 // for a negative lookahead. The NegativeLookaheadChoiceNode is a special |
| 5372 // ChoiceNode that knows to ignore the first exit when calculating quick | 5378 // ChoiceNode that knows to ignore the first exit when calculating quick |
| 5373 // checks. | 5379 // checks. |
| 5374 Zone* zone = compiler->zone(); | 5380 Zone* zone = compiler->zone(); |
| 5375 | 5381 |
| 5376 GuardedAlternative body_alt( | 5382 GuardedAlternative body_alt( |
| 5377 body()->ToNode( | 5383 body()->ToNode(compiler, new (zone) NegativeSubmatchSuccess( |
| 5378 compiler, | 5384 stack_pointer_register, position_register, |
| 5379 success = new(zone) NegativeSubmatchSuccess(stack_pointer_register, | 5385 register_count, register_start, zone))); |
| 5380 position_register, | |
| 5381 register_count, | |
| 5382 register_start, | |
| 5383 zone))); | |
| 5384 ChoiceNode* choice_node = | 5386 ChoiceNode* choice_node = |
| 5385 new(zone) NegativeLookaheadChoiceNode(body_alt, | 5387 new(zone) NegativeLookaheadChoiceNode(body_alt, |
| 5386 GuardedAlternative(on_success), | 5388 GuardedAlternative(on_success), |
| 5387 zone); | 5389 zone); |
| 5388 return ActionNode::BeginSubmatch(stack_pointer_register, | 5390 return ActionNode::BeginSubmatch(stack_pointer_register, |
| 5389 position_register, | 5391 position_register, |
| 5390 choice_node); | 5392 choice_node); |
| 5391 } | 5393 } |
| 5392 } | 5394 } |
| 5393 | 5395 |
| 5394 | 5396 |
| 5395 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, | 5397 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, |
| 5396 RegExpNode* on_success) { | 5398 RegExpNode* on_success) { |
| 5397 return ToNode(body(), index(), compiler, on_success); | 5399 return ToNode(body(), index(), compiler, on_success); |
| 5398 } | 5400 } |
| 5399 | 5401 |
| 5400 | 5402 |
| 5401 RegExpNode* RegExpCapture::ToNode(RegExpTree* body, | 5403 RegExpNode* RegExpCapture::ToNode(RegExpTree* body, |
| 5402 int index, | 5404 int index, |
| 5403 RegExpCompiler* compiler, | 5405 RegExpCompiler* compiler, |
| 5404 RegExpNode* on_success) { | 5406 RegExpNode* on_success) { |
| 5407 DCHECK_NOT_NULL(body); | |
| 5405 int start_reg = RegExpCapture::StartRegister(index); | 5408 int start_reg = RegExpCapture::StartRegister(index); |
| 5406 int end_reg = RegExpCapture::EndRegister(index); | 5409 int end_reg = RegExpCapture::EndRegister(index); |
| 5410 if (body->read_backward()) std::swap(start_reg, end_reg); | |
| 5407 RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success); | 5411 RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success); |
| 5408 RegExpNode* body_node = body->ToNode(compiler, store_end); | 5412 RegExpNode* body_node = body->ToNode(compiler, store_end); |
| 5409 return ActionNode::StorePosition(start_reg, true, body_node); | 5413 return ActionNode::StorePosition(start_reg, true, body_node); |
| 5410 } | 5414 } |
| 5411 | 5415 |
| 5412 | 5416 |
| 5413 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler, | 5417 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler, |
| 5414 RegExpNode* on_success) { | 5418 RegExpNode* on_success) { |
| 5415 ZoneList<RegExpTree*>* children = nodes(); | 5419 ZoneList<RegExpTree*>* children = nodes(); |
| 5416 RegExpNode* current = on_success; | 5420 RegExpNode* current = on_success; |
| 5417 for (int i = children->length() - 1; i >= 0; i--) { | 5421 if (read_backward()) { |
| 5418 current = children->at(i)->ToNode(compiler, current); | 5422 for (int i = 0; i < children->length(); i++) { |
| 5423 current = children->at(i)->ToNode(compiler, current); | |
| 5424 } | |
| 5425 } else { | |
| 5426 for (int i = children->length() - 1; i >= 0; i--) { | |
| 5427 current = children->at(i)->ToNode(compiler, current); | |
| 5428 } | |
| 5419 } | 5429 } |
| 5420 return current; | 5430 return current; |
| 5421 } | 5431 } |
| 5422 | 5432 |
| 5423 | 5433 |
| 5424 static void AddClass(const int* elmv, | 5434 static void AddClass(const int* elmv, |
| 5425 int elmc, | 5435 int elmc, |
| 5426 ZoneList<CharacterRange>* ranges, | 5436 ZoneList<CharacterRange>* ranges, |
| 5427 Zone* zone) { | 5437 Zone* zone) { |
| 5428 elmc--; | 5438 elmc--; |
| (...skipping 855 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 6284 0, | 6294 0, |
| 6285 &compiler, | 6295 &compiler, |
| 6286 compiler.accept()); | 6296 compiler.accept()); |
| 6287 RegExpNode* node = captured_body; | 6297 RegExpNode* node = captured_body; |
| 6288 bool is_end_anchored = data->tree->IsAnchoredAtEnd(); | 6298 bool is_end_anchored = data->tree->IsAnchoredAtEnd(); |
| 6289 bool is_start_anchored = data->tree->IsAnchoredAtStart(); | 6299 bool is_start_anchored = data->tree->IsAnchoredAtStart(); |
| 6290 int max_length = data->tree->max_match(); | 6300 int max_length = data->tree->max_match(); |
| 6291 if (!is_start_anchored && !is_sticky) { | 6301 if (!is_start_anchored && !is_sticky) { |
| 6292 // Add a .*? at the beginning, outside the body capture, unless | 6302 // Add a .*? at the beginning, outside the body capture, unless |
| 6293 // this expression is anchored at the beginning or sticky. | 6303 // this expression is anchored at the beginning or sticky. |
| 6294 RegExpNode* loop_node = | 6304 RegExpNode* loop_node = RegExpQuantifier::ToNode( |
| 6295 RegExpQuantifier::ToNode(0, | 6305 0, RegExpTree::kInfinity, false, |
| 6296 RegExpTree::kInfinity, | 6306 new (zone) RegExpCharacterClass('*', RegExpTree::READ_FORWARD), |
| 6297 false, | 6307 &compiler, captured_body, data->contains_anchor); |
| 6298 new(zone) RegExpCharacterClass('*'), | |
| 6299 &compiler, | |
| 6300 captured_body, | |
| 6301 data->contains_anchor); | |
| 6302 | 6308 |
| 6303 if (data->contains_anchor) { | 6309 if (data->contains_anchor) { |
| 6304 // Unroll loop once, to take care of the case that might start | 6310 // Unroll loop once, to take care of the case that might start |
| 6305 // at the start of input. | 6311 // at the start of input. |
| 6306 ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone); | 6312 ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone); |
| 6307 first_step_node->AddAlternative(GuardedAlternative(captured_body)); | 6313 first_step_node->AddAlternative(GuardedAlternative(captured_body)); |
| 6308 first_step_node->AddAlternative(GuardedAlternative( | 6314 first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode( |
| 6309 new(zone) TextNode(new(zone) RegExpCharacterClass('*'), loop_node))); | 6315 new (zone) RegExpCharacterClass('*', RegExpTree::READ_FORWARD), false, |
| 6316 loop_node))); | |
| 6310 node = first_step_node; | 6317 node = first_step_node; |
| 6311 } else { | 6318 } else { |
| 6312 node = loop_node; | 6319 node = loop_node; |
| 6313 } | 6320 } |
| 6314 } | 6321 } |
| 6315 if (is_one_byte) { | 6322 if (is_one_byte) { |
| 6316 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); | 6323 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); |
| 6317 // Do it again to propagate the new nodes to places where they were not | 6324 // Do it again to propagate the new nodes to places where they were not |
| 6318 // put because they had not been calculated yet. | 6325 // put because they had not been calculated yet. |
| 6319 if (node != NULL) { | 6326 if (node != NULL) { |
| (...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 6503 | 6510 |
| 6504 | 6511 |
| 6505 void RegExpResultsCache::Clear(FixedArray* cache) { | 6512 void RegExpResultsCache::Clear(FixedArray* cache) { |
| 6506 for (int i = 0; i < kRegExpResultsCacheSize; i++) { | 6513 for (int i = 0; i < kRegExpResultsCacheSize; i++) { |
| 6507 cache->set(i, Smi::FromInt(0)); | 6514 cache->set(i, Smi::FromInt(0)); |
| 6508 } | 6515 } |
| 6509 } | 6516 } |
| 6510 | 6517 |
| 6511 } // namespace internal | 6518 } // namespace internal |
| 6512 } // namespace v8 | 6519 } // namespace v8 |
| OLD | NEW |