Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(281)

Side by Side Diff: src/regexp/jsregexp.cc

Issue 1418963009: Experimental support for RegExp lookbehind. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: fixed test cases Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/regexp/jsregexp.h" 5 #include "src/regexp/jsregexp.h"
6 6
7 #include "src/ast.h" 7 #include "src/ast.h"
8 #include "src/base/platform/platform.h" 8 #include "src/base/platform/platform.h"
9 #include "src/compilation-cache.h" 9 #include "src/compilation-cache.h"
10 #include "src/compiler.h" 10 #include "src/compiler.h"
(...skipping 1206 matching lines...) Expand 10 before | Expand all | Expand 10 after
1217 1217
1218 // The chronologically first deferred action in the trace 1218 // The chronologically first deferred action in the trace
1219 // is used to infer the action needed to restore a register 1219 // is used to infer the action needed to restore a register
1220 // to its previous state (or not, if it's safe to ignore it). 1220 // to its previous state (or not, if it's safe to ignore it).
1221 enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR }; 1221 enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR };
1222 DeferredActionUndoType undo_action = IGNORE; 1222 DeferredActionUndoType undo_action = IGNORE;
1223 1223
1224 int value = 0; 1224 int value = 0;
1225 bool absolute = false; 1225 bool absolute = false;
1226 bool clear = false; 1226 bool clear = false;
1227 int store_position = -1; 1227 static const int kNoStore = kMinInt;
1228 int store_position = kNoStore;
1228 // This is a little tricky because we are scanning the actions in reverse 1229 // This is a little tricky because we are scanning the actions in reverse
1229 // historical order (newest first). 1230 // historical order (newest first).
1230 for (DeferredAction* action = actions_; 1231 for (DeferredAction* action = actions_;
1231 action != NULL; 1232 action != NULL;
1232 action = action->next()) { 1233 action = action->next()) {
1233 if (action->Mentions(reg)) { 1234 if (action->Mentions(reg)) {
1234 switch (action->action_type()) { 1235 switch (action->action_type()) {
1235 case ActionNode::SET_REGISTER: { 1236 case ActionNode::SET_REGISTER: {
1236 Trace::DeferredSetRegister* psr = 1237 Trace::DeferredSetRegister* psr =
1237 static_cast<Trace::DeferredSetRegister*>(action); 1238 static_cast<Trace::DeferredSetRegister*>(action);
1238 if (!absolute) { 1239 if (!absolute) {
1239 value += psr->value(); 1240 value += psr->value();
1240 absolute = true; 1241 absolute = true;
1241 } 1242 }
1242 // SET_REGISTER is currently only used for newly introduced loop 1243 // SET_REGISTER is currently only used for newly introduced loop
1243 // counters. They can have a significant previous value if they 1244 // counters. They can have a significant previous value if they
1244 // occour in a loop. TODO(lrn): Propagate this information, so 1245 // occour in a loop. TODO(lrn): Propagate this information, so
1245 // we can set undo_action to IGNORE if we know there is no value to 1246 // we can set undo_action to IGNORE if we know there is no value to
1246 // restore. 1247 // restore.
1247 undo_action = RESTORE; 1248 undo_action = RESTORE;
1248 DCHECK_EQ(store_position, -1); 1249 DCHECK_EQ(store_position, kNoStore);
1249 DCHECK(!clear); 1250 DCHECK(!clear);
1250 break; 1251 break;
1251 } 1252 }
1252 case ActionNode::INCREMENT_REGISTER: 1253 case ActionNode::INCREMENT_REGISTER:
1253 if (!absolute) { 1254 if (!absolute) {
1254 value++; 1255 value++;
1255 } 1256 }
1256 DCHECK_EQ(store_position, -1); 1257 DCHECK_EQ(store_position, kNoStore);
1257 DCHECK(!clear); 1258 DCHECK(!clear);
1258 undo_action = RESTORE; 1259 undo_action = RESTORE;
1259 break; 1260 break;
1260 case ActionNode::STORE_POSITION: { 1261 case ActionNode::STORE_POSITION: {
1261 Trace::DeferredCapture* pc = 1262 Trace::DeferredCapture* pc =
1262 static_cast<Trace::DeferredCapture*>(action); 1263 static_cast<Trace::DeferredCapture*>(action);
1263 if (!clear && store_position == -1) { 1264 if (!clear && store_position == kNoStore) {
1264 store_position = pc->cp_offset(); 1265 store_position = pc->cp_offset();
1265 } 1266 }
1266 1267
1267 // For captures we know that stores and clears alternate. 1268 // For captures we know that stores and clears alternate.
1268 // Other register, are never cleared, and if the occur 1269 // Other register, are never cleared, and if the occur
1269 // inside a loop, they might be assigned more than once. 1270 // inside a loop, they might be assigned more than once.
1270 if (reg <= 1) { 1271 if (reg <= 1) {
1271 // Registers zero and one, aka "capture zero", is 1272 // Registers zero and one, aka "capture zero", is
1272 // always set correctly if we succeed. There is no 1273 // always set correctly if we succeed. There is no
1273 // need to undo a setting on backtrack, because we 1274 // need to undo a setting on backtrack, because we
1274 // will set it again or fail. 1275 // will set it again or fail.
1275 undo_action = IGNORE; 1276 undo_action = IGNORE;
1276 } else { 1277 } else {
1277 undo_action = pc->is_capture() ? CLEAR : RESTORE; 1278 undo_action = pc->is_capture() ? CLEAR : RESTORE;
1278 } 1279 }
1279 DCHECK(!absolute); 1280 DCHECK(!absolute);
1280 DCHECK_EQ(value, 0); 1281 DCHECK_EQ(value, 0);
1281 break; 1282 break;
1282 } 1283 }
1283 case ActionNode::CLEAR_CAPTURES: { 1284 case ActionNode::CLEAR_CAPTURES: {
1284 // Since we're scanning in reverse order, if we've already 1285 // Since we're scanning in reverse order, if we've already
1285 // set the position we have to ignore historically earlier 1286 // set the position we have to ignore historically earlier
1286 // clearing operations. 1287 // clearing operations.
1287 if (store_position == -1) { 1288 if (store_position == kNoStore) {
1288 clear = true; 1289 clear = true;
1289 } 1290 }
1290 undo_action = RESTORE; 1291 undo_action = RESTORE;
1291 DCHECK(!absolute); 1292 DCHECK(!absolute);
1292 DCHECK_EQ(value, 0); 1293 DCHECK_EQ(value, 0);
1293 break; 1294 break;
1294 } 1295 }
1295 default: 1296 default:
1296 UNREACHABLE(); 1297 UNREACHABLE();
1297 break; 1298 break;
(...skipping 10 matching lines...) Expand all
1308 pushes = 0; 1309 pushes = 0;
1309 } 1310 }
1310 1311
1311 assembler->PushRegister(reg, stack_check); 1312 assembler->PushRegister(reg, stack_check);
1312 registers_to_pop->Set(reg, zone); 1313 registers_to_pop->Set(reg, zone);
1313 } else if (undo_action == CLEAR) { 1314 } else if (undo_action == CLEAR) {
1314 registers_to_clear->Set(reg, zone); 1315 registers_to_clear->Set(reg, zone);
1315 } 1316 }
1316 // Perform the chronologically last action (or accumulated increment) 1317 // Perform the chronologically last action (or accumulated increment)
1317 // for the register. 1318 // for the register.
1318 if (store_position != -1) { 1319 if (store_position != kNoStore) {
1319 assembler->WriteCurrentPositionToRegister(reg, store_position); 1320 assembler->WriteCurrentPositionToRegister(reg, store_position);
1320 } else if (clear) { 1321 } else if (clear) {
1321 assembler->ClearRegisters(reg, reg); 1322 assembler->ClearRegisters(reg, reg);
1322 } else if (absolute) { 1323 } else if (absolute) {
1323 assembler->SetRegister(reg, value); 1324 assembler->SetRegister(reg, value);
1324 } else if (value != 0) { 1325 } else if (value != 0) {
1325 assembler->AdvanceRegister(reg, value); 1326 assembler->AdvanceRegister(reg, value);
1326 } 1327 }
1327 } 1328 }
1328 } 1329 }
(...skipping 977 matching lines...) Expand 10 before | Expand all | Expand 10 after
2306 // Match the behaviour of EatsAtLeast on this node. 2307 // Match the behaviour of EatsAtLeast on this node.
2307 if (assertion_type() == AT_START && not_at_start) return; 2308 if (assertion_type() == AT_START && not_at_start) return;
2308 on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); 2309 on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
2309 SaveBMInfo(bm, not_at_start, offset); 2310 SaveBMInfo(bm, not_at_start, offset);
2310 } 2311 }
2311 2312
2312 2313
2313 int BackReferenceNode::EatsAtLeast(int still_to_find, 2314 int BackReferenceNode::EatsAtLeast(int still_to_find,
2314 int budget, 2315 int budget,
2315 bool not_at_start) { 2316 bool not_at_start) {
2317 if (read_backward()) return 0;
2316 if (budget <= 0) return 0; 2318 if (budget <= 0) return 0;
2317 return on_success()->EatsAtLeast(still_to_find, 2319 return on_success()->EatsAtLeast(still_to_find,
2318 budget - 1, 2320 budget - 1,
2319 not_at_start); 2321 not_at_start);
2320 } 2322 }
2321 2323
2322 2324
2323 int TextNode::EatsAtLeast(int still_to_find, 2325 int TextNode::EatsAtLeast(int still_to_find,
2324 int budget, 2326 int budget,
2325 bool not_at_start) { 2327 bool not_at_start) {
2328 if (read_backward()) return 0;
2326 int answer = Length(); 2329 int answer = Length();
2327 if (answer >= still_to_find) return answer; 2330 if (answer >= still_to_find) return answer;
2328 if (budget <= 0) return answer; 2331 if (budget <= 0) return answer;
2329 // We are not at start after this node so we set the last argument to 'true'. 2332 // We are not at start after this node so we set the last argument to 'true'.
2330 return answer + on_success()->EatsAtLeast(still_to_find - answer, 2333 return answer + on_success()->EatsAtLeast(still_to_find - answer,
2331 budget - 1, 2334 budget - 1,
2332 true); 2335 true);
2333 } 2336 }
2334 2337
2335 2338
(...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after
2519 bool not_at_start) { 2522 bool not_at_start) {
2520 Isolate* isolate = compiler->macro_assembler()->isolate(); 2523 Isolate* isolate = compiler->macro_assembler()->isolate();
2521 DCHECK(characters_filled_in < details->characters()); 2524 DCHECK(characters_filled_in < details->characters());
2522 int characters = details->characters(); 2525 int characters = details->characters();
2523 int char_mask; 2526 int char_mask;
2524 if (compiler->one_byte()) { 2527 if (compiler->one_byte()) {
2525 char_mask = String::kMaxOneByteCharCode; 2528 char_mask = String::kMaxOneByteCharCode;
2526 } else { 2529 } else {
2527 char_mask = String::kMaxUtf16CodeUnit; 2530 char_mask = String::kMaxUtf16CodeUnit;
2528 } 2531 }
2529 for (int k = 0; k < elms_->length(); k++) { 2532 for (int k = 0; k < elements()->length(); k++) {
2530 TextElement elm = elms_->at(k); 2533 TextElement elm = elements()->at(k);
2531 if (elm.text_type() == TextElement::ATOM) { 2534 if (elm.text_type() == TextElement::ATOM) {
2532 Vector<const uc16> quarks = elm.atom()->data(); 2535 Vector<const uc16> quarks = elm.atom()->data();
2533 for (int i = 0; i < characters && i < quarks.length(); i++) { 2536 for (int i = 0; i < characters && i < quarks.length(); i++) {
2534 QuickCheckDetails::Position* pos = 2537 QuickCheckDetails::Position* pos =
2535 details->positions(characters_filled_in); 2538 details->positions(characters_filled_in);
2536 uc16 c = quarks[i]; 2539 uc16 c = quarks[i];
2537 if (compiler->ignore_case()) { 2540 if (compiler->ignore_case()) {
2538 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 2541 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
2539 int length = GetCaseIndependentLetters(isolate, c, 2542 int length = GetCaseIndependentLetters(isolate, c,
2540 compiler->one_byte(), chars); 2543 compiler->one_byte(), chars);
(...skipping 130 matching lines...) Expand 10 before | Expand all | Expand 10 after
2671 for (int i = 0; i < characters_; i++) { 2674 for (int i = 0; i < characters_; i++) {
2672 positions_[i].mask = 0; 2675 positions_[i].mask = 0;
2673 positions_[i].value = 0; 2676 positions_[i].value = 0;
2674 positions_[i].determines_perfectly = false; 2677 positions_[i].determines_perfectly = false;
2675 } 2678 }
2676 characters_ = 0; 2679 characters_ = 0;
2677 } 2680 }
2678 2681
2679 2682
2680 void QuickCheckDetails::Advance(int by, bool one_byte) { 2683 void QuickCheckDetails::Advance(int by, bool one_byte) {
2681 DCHECK(by >= 0);
2682 if (by >= characters_) { 2684 if (by >= characters_) {
2683 Clear(); 2685 Clear();
2684 return; 2686 return;
2685 } 2687 }
2686 for (int i = 0; i < characters_ - by; i++) { 2688 for (int i = 0; i < characters_ - by; i++) {
2687 positions_[i] = positions_[by + i]; 2689 positions_[i] = positions_[by + i];
2688 } 2690 }
2689 for (int i = characters_ - by; i < characters_; i++) { 2691 for (int i = characters_ - by; i < characters_; i++) {
2690 positions_[i].mask = 0; 2692 positions_[i].mask = 0;
2691 positions_[i].value = 0; 2693 positions_[i].value = 0;
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
2773 } 2775 }
2774 return false; 2776 return false;
2775 } 2777 }
2776 2778
2777 2779
2778 RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) { 2780 RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) {
2779 if (info()->replacement_calculated) return replacement(); 2781 if (info()->replacement_calculated) return replacement();
2780 if (depth < 0) return this; 2782 if (depth < 0) return this;
2781 DCHECK(!info()->visited); 2783 DCHECK(!info()->visited);
2782 VisitMarker marker(info()); 2784 VisitMarker marker(info());
2783 int element_count = elms_->length(); 2785 int element_count = elements()->length();
2784 for (int i = 0; i < element_count; i++) { 2786 for (int i = 0; i < element_count; i++) {
2785 TextElement elm = elms_->at(i); 2787 TextElement elm = elements()->at(i);
2786 if (elm.text_type() == TextElement::ATOM) { 2788 if (elm.text_type() == TextElement::ATOM) {
2787 Vector<const uc16> quarks = elm.atom()->data(); 2789 Vector<const uc16> quarks = elm.atom()->data();
2788 for (int j = 0; j < quarks.length(); j++) { 2790 for (int j = 0; j < quarks.length(); j++) {
2789 uint16_t c = quarks[j]; 2791 uint16_t c = quarks[j];
2790 if (c <= String::kMaxOneByteCharCode) continue; 2792 if (c <= String::kMaxOneByteCharCode) continue;
2791 if (!ignore_case) return set_replacement(NULL); 2793 if (!ignore_case) return set_replacement(NULL);
2792 // Here, we need to check for characters whose upper and lower cases 2794 // Here, we need to check for characters whose upper and lower cases
2793 // are outside the Latin-1 range. 2795 // are outside the Latin-1 range.
2794 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c); 2796 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c);
2795 // Character is outside Latin-1 completely 2797 // Character is outside Latin-1 completely
(...skipping 343 matching lines...) Expand 10 before | Expand all | Expand 10 after
3139 assembler->GoTo(trace->backtrack()); 3141 assembler->GoTo(trace->backtrack());
3140 assembler->Bind(&ok); 3142 assembler->Bind(&ok);
3141 break; 3143 break;
3142 } 3144 }
3143 case AT_START: { 3145 case AT_START: {
3144 if (trace->at_start() == Trace::FALSE_VALUE) { 3146 if (trace->at_start() == Trace::FALSE_VALUE) {
3145 assembler->GoTo(trace->backtrack()); 3147 assembler->GoTo(trace->backtrack());
3146 return; 3148 return;
3147 } 3149 }
3148 if (trace->at_start() == Trace::UNKNOWN) { 3150 if (trace->at_start() == Trace::UNKNOWN) {
3149 assembler->CheckNotAtStart(trace->backtrack()); 3151 assembler->CheckNotAtStart(trace->cp_offset(), trace->backtrack());
3150 Trace at_start_trace = *trace; 3152 Trace at_start_trace = *trace;
3151 at_start_trace.set_at_start(true); 3153 at_start_trace.set_at_start(Trace::TRUE_VALUE);
3152 on_success()->Emit(compiler, &at_start_trace); 3154 on_success()->Emit(compiler, &at_start_trace);
3153 return; 3155 return;
3154 } 3156 }
3155 } 3157 }
3156 break; 3158 break;
3157 case AFTER_NEWLINE: 3159 case AFTER_NEWLINE:
3158 EmitHat(compiler, on_success(), trace); 3160 EmitHat(compiler, on_success(), trace);
3159 return; 3161 return;
3160 case AT_BOUNDARY: 3162 case AT_BOUNDARY:
3161 case AT_NON_BOUNDARY: { 3163 case AT_NON_BOUNDARY: {
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
3214 TextEmitPassType pass, 3216 TextEmitPassType pass,
3215 bool preloaded, 3217 bool preloaded,
3216 Trace* trace, 3218 Trace* trace,
3217 bool first_element_checked, 3219 bool first_element_checked,
3218 int* checked_up_to) { 3220 int* checked_up_to) {
3219 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 3221 RegExpMacroAssembler* assembler = compiler->macro_assembler();
3220 Isolate* isolate = assembler->isolate(); 3222 Isolate* isolate = assembler->isolate();
3221 bool one_byte = compiler->one_byte(); 3223 bool one_byte = compiler->one_byte();
3222 Label* backtrack = trace->backtrack(); 3224 Label* backtrack = trace->backtrack();
3223 QuickCheckDetails* quick_check = trace->quick_check_performed(); 3225 QuickCheckDetails* quick_check = trace->quick_check_performed();
3224 int element_count = elms_->length(); 3226 int element_count = elements()->length();
3227 int backward_offset = read_backward() ? -Length() : 0;
3225 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { 3228 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
3226 TextElement elm = elms_->at(i); 3229 TextElement elm = elements()->at(i);
3227 int cp_offset = trace->cp_offset() + elm.cp_offset(); 3230 int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset;
3228 if (elm.text_type() == TextElement::ATOM) { 3231 if (elm.text_type() == TextElement::ATOM) {
3229 Vector<const uc16> quarks = elm.atom()->data(); 3232 Vector<const uc16> quarks = elm.atom()->data();
3230 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { 3233 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
3231 if (first_element_checked && i == 0 && j == 0) continue; 3234 if (first_element_checked && i == 0 && j == 0) continue;
3232 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue; 3235 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;
3233 EmitCharacterFunction* emit_function = NULL; 3236 EmitCharacterFunction* emit_function = NULL;
3234 switch (pass) { 3237 switch (pass) {
3235 case NON_LATIN1_MATCH: 3238 case NON_LATIN1_MATCH:
3236 DCHECK(one_byte); 3239 DCHECK(one_byte);
3237 if (quarks[j] > String::kMaxOneByteCharCode) { 3240 if (quarks[j] > String::kMaxOneByteCharCode) {
3238 assembler->GoTo(backtrack); 3241 assembler->GoTo(backtrack);
3239 return; 3242 return;
3240 } 3243 }
3241 break; 3244 break;
3242 case NON_LETTER_CHARACTER_MATCH: 3245 case NON_LETTER_CHARACTER_MATCH:
3243 emit_function = &EmitAtomNonLetter; 3246 emit_function = &EmitAtomNonLetter;
3244 break; 3247 break;
3245 case SIMPLE_CHARACTER_MATCH: 3248 case SIMPLE_CHARACTER_MATCH:
3246 emit_function = &EmitSimpleCharacter; 3249 emit_function = &EmitSimpleCharacter;
3247 break; 3250 break;
3248 case CASE_CHARACTER_MATCH: 3251 case CASE_CHARACTER_MATCH:
3249 emit_function = &EmitAtomLetter; 3252 emit_function = &EmitAtomLetter;
3250 break; 3253 break;
3251 default: 3254 default:
3252 break; 3255 break;
3253 } 3256 }
3254 if (emit_function != NULL) { 3257 if (emit_function != NULL) {
3255 bool bound_checked = emit_function(isolate, 3258 bool bounds_check = *checked_up_to < cp_offset + j || read_backward();
3256 compiler, 3259 bool bound_checked =
3257 quarks[j], 3260 emit_function(isolate, compiler, quarks[j], backtrack,
3258 backtrack, 3261 cp_offset + j, bounds_check, preloaded);
3259 cp_offset + j,
3260 *checked_up_to < cp_offset + j,
3261 preloaded);
3262 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); 3262 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
3263 } 3263 }
3264 } 3264 }
3265 } else { 3265 } else {
3266 DCHECK_EQ(TextElement::CHAR_CLASS, elm.text_type()); 3266 DCHECK_EQ(TextElement::CHAR_CLASS, elm.text_type());
3267 if (pass == CHARACTER_CLASS_MATCH) { 3267 if (pass == CHARACTER_CLASS_MATCH) {
3268 if (first_element_checked && i == 0) continue; 3268 if (first_element_checked && i == 0) continue;
3269 if (DeterminedAlready(quick_check, elm.cp_offset())) continue; 3269 if (DeterminedAlready(quick_check, elm.cp_offset())) continue;
3270 RegExpCharacterClass* cc = elm.char_class(); 3270 RegExpCharacterClass* cc = elm.char_class();
3271 bool bounds_check = *checked_up_to < cp_offset || read_backward();
3271 EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset, 3272 EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset,
3272 *checked_up_to < cp_offset, preloaded, zone()); 3273 bounds_check, preloaded, zone());
3273 UpdateBoundsCheck(cp_offset, checked_up_to); 3274 UpdateBoundsCheck(cp_offset, checked_up_to);
3274 } 3275 }
3275 } 3276 }
3276 } 3277 }
3277 } 3278 }
3278 3279
3279 3280
3280 int TextNode::Length() { 3281 int TextNode::Length() {
3281 TextElement elm = elms_->last(); 3282 TextElement elm = elements()->last();
3282 DCHECK(elm.cp_offset() >= 0); 3283 DCHECK(elm.cp_offset() >= 0);
3283 return elm.cp_offset() + elm.length(); 3284 return elm.cp_offset() + elm.length();
3284 } 3285 }
3285 3286
3286 3287
3287 bool TextNode::SkipPass(int int_pass, bool ignore_case) { 3288 bool TextNode::SkipPass(int int_pass, bool ignore_case) {
3288 TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass); 3289 TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass);
3289 if (ignore_case) { 3290 if (ignore_case) {
3290 return pass == SIMPLE_CHARACTER_MATCH; 3291 return pass == SIMPLE_CHARACTER_MATCH;
3291 } else { 3292 } else {
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
3340 TextEmitPass(compiler, 3341 TextEmitPass(compiler,
3341 static_cast<TextEmitPassType>(pass), 3342 static_cast<TextEmitPassType>(pass),
3342 false, 3343 false,
3343 trace, 3344 trace,
3344 first_elt_done, 3345 first_elt_done,
3345 &bound_checked_to); 3346 &bound_checked_to);
3346 } 3347 }
3347 } 3348 }
3348 3349
3349 Trace successor_trace(*trace); 3350 Trace successor_trace(*trace);
3350 successor_trace.set_at_start(false); 3351 // If we advance backward, we may end up at the start.
3351 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler); 3352 successor_trace.AdvanceCurrentPositionInTrace(
3353 read_backward() ? -Length() : Length(), compiler);
3354 successor_trace.set_at_start(read_backward() ? Trace::UNKNOWN
3355 : Trace::FALSE_VALUE);
3352 RecursionCheck rc(compiler); 3356 RecursionCheck rc(compiler);
3353 on_success()->Emit(compiler, &successor_trace); 3357 on_success()->Emit(compiler, &successor_trace);
3354 } 3358 }
3355 3359
3356 3360
3357 void Trace::InvalidateCurrentCharacter() { 3361 void Trace::InvalidateCurrentCharacter() {
3358 characters_preloaded_ = 0; 3362 characters_preloaded_ = 0;
3359 } 3363 }
3360 3364
3361 3365
3362 void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) { 3366 void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {
3363 DCHECK(by > 0);
3364 // We don't have an instruction for shifting the current character register 3367 // We don't have an instruction for shifting the current character register
3365 // down or for using a shifted value for anything so lets just forget that 3368 // down or for using a shifted value for anything so lets just forget that
3366 // we preloaded any characters into it. 3369 // we preloaded any characters into it.
3367 characters_preloaded_ = 0; 3370 characters_preloaded_ = 0;
3368 // Adjust the offsets of the quick check performed information. This 3371 // Adjust the offsets of the quick check performed information. This
3369 // information is used to find out what we already determined about the 3372 // information is used to find out what we already determined about the
3370 // characters by means of mask and compare. 3373 // characters by means of mask and compare.
3371 quick_check_performed_.Advance(by, compiler->one_byte()); 3374 quick_check_performed_.Advance(by, compiler->one_byte());
3372 cp_offset_ += by; 3375 cp_offset_ += by;
3373 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) { 3376 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {
3374 compiler->SetRegExpTooBig(); 3377 compiler->SetRegExpTooBig();
3375 cp_offset_ = 0; 3378 cp_offset_ = 0;
3376 } 3379 }
3377 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by); 3380 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by);
3378 } 3381 }
3379 3382
3380 3383
3381 void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte) { 3384 void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte) {
3382 int element_count = elms_->length(); 3385 int element_count = elements()->length();
3383 for (int i = 0; i < element_count; i++) { 3386 for (int i = 0; i < element_count; i++) {
3384 TextElement elm = elms_->at(i); 3387 TextElement elm = elements()->at(i);
3385 if (elm.text_type() == TextElement::CHAR_CLASS) { 3388 if (elm.text_type() == TextElement::CHAR_CLASS) {
3386 RegExpCharacterClass* cc = elm.char_class(); 3389 RegExpCharacterClass* cc = elm.char_class();
3387 // None of the standard character classes is different in the case 3390 // None of the standard character classes is different in the case
3388 // independent case and it slows us down if we don't know that. 3391 // independent case and it slows us down if we don't know that.
3389 if (cc->is_standard(zone())) continue; 3392 if (cc->is_standard(zone())) continue;
3390 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); 3393 ZoneList<CharacterRange>* ranges = cc->ranges(zone());
3391 int range_count = ranges->length(); 3394 int range_count = ranges->length();
3392 for (int j = 0; j < range_count; j++) { 3395 for (int j = 0; j < range_count; j++) {
3393 ranges->at(j).AddCaseEquivalents(isolate, zone(), ranges, is_one_byte); 3396 ranges->at(j).AddCaseEquivalents(isolate, zone(), ranges, is_one_byte);
3394 } 3397 }
3395 } 3398 }
3396 } 3399 }
3397 } 3400 }
3398 3401
3399 3402
3400 int TextNode::GreedyLoopTextLength() { 3403 int TextNode::GreedyLoopTextLength() {
3401 TextElement elm = elms_->at(elms_->length() - 1); 3404 return read_backward() ? kNodeIsTooComplexForGreedyLoops : Length();
erikcorry 2015/11/11 15:39:04 This is a case where perhaps we do need a little o
Yang 2015/11/12 08:25:35 Done.
3402 return elm.cp_offset() + elm.length();
3403 } 3405 }
3404 3406
3405 3407
3406 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode( 3408 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(
erikcorry 2015/11/11 15:39:04 Not quite sure, but this optimization may have to
Yang 2015/11/12 08:25:35 I haven't found a test case where this would becom
3407 RegExpCompiler* compiler) { 3409 RegExpCompiler* compiler) {
3408 if (elms_->length() != 1) return NULL; 3410 if (elements()->length() != 1) return NULL;
3409 TextElement elm = elms_->at(0); 3411 TextElement elm = elements()->at(0);
3410 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL; 3412 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL;
3411 RegExpCharacterClass* node = elm.char_class(); 3413 RegExpCharacterClass* node = elm.char_class();
3412 ZoneList<CharacterRange>* ranges = node->ranges(zone()); 3414 ZoneList<CharacterRange>* ranges = node->ranges(zone());
3413 if (!CharacterRange::IsCanonical(ranges)) { 3415 if (!CharacterRange::IsCanonical(ranges)) {
3414 CharacterRange::Canonicalize(ranges); 3416 CharacterRange::Canonicalize(ranges);
3415 } 3417 }
3416 if (node->is_negated()) { 3418 if (node->is_negated()) {
3417 return ranges->length() == 0 ? on_success() : NULL; 3419 return ranges->length() == 0 ? on_success() : NULL;
3418 } 3420 }
3419 if (ranges->length() != 1) return NULL; 3421 if (ranges->length() != 1) return NULL;
(...skipping 454 matching lines...) Expand 10 before | Expand all | Expand 10 after
3874 * V |S 3876 * V |S
3875 * Q2 ---> U----->backtrack 3877 * Q2 ---> U----->backtrack
3876 * | F / 3878 * | F /
3877 * S| / 3879 * S| /
3878 * V F / 3880 * V F /
3879 * S2--/ 3881 * S2--/
3880 */ 3882 */
3881 3883
3882 GreedyLoopState::GreedyLoopState(bool not_at_start) { 3884 GreedyLoopState::GreedyLoopState(bool not_at_start) {
3883 counter_backtrack_trace_.set_backtrack(&label_); 3885 counter_backtrack_trace_.set_backtrack(&label_);
3884 if (not_at_start) counter_backtrack_trace_.set_at_start(false); 3886 if (not_at_start) counter_backtrack_trace_.set_at_start(Trace::FALSE_VALUE);
3885 } 3887 }
3886 3888
3887 3889
3888 void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) { 3890 void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) {
3889 #ifdef DEBUG 3891 #ifdef DEBUG
3890 int choice_count = alternatives_->length(); 3892 int choice_count = alternatives_->length();
3891 for (int i = 0; i < choice_count - 1; i++) { 3893 for (int i = 0; i < choice_count - 1; i++) {
3892 GuardedAlternative alternative = alternatives_->at(i); 3894 GuardedAlternative alternative = alternatives_->at(i);
3893 ZoneList<Guard*>* guards = alternative.guards(); 3895 ZoneList<Guard*>* guards = alternative.guards();
3894 int guard_count = (guards == NULL) ? 0 : guards->length(); 3896 int guard_count = (guards == NULL) ? 0 : guards->length();
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after
4001 // and other simple nodes. These are handled by pushing the current 4003 // and other simple nodes. These are handled by pushing the current
4002 // position on the stack and then incrementing the current position each 4004 // position on the stack and then incrementing the current position each
4003 // time around the switch. On backtrack we decrement the current position 4005 // time around the switch. On backtrack we decrement the current position
4004 // and check it against the pushed value. This avoids pushing backtrack 4006 // and check it against the pushed value. This avoids pushing backtrack
4005 // information for each iteration of the loop, which could take up a lot of 4007 // information for each iteration of the loop, which could take up a lot of
4006 // space. 4008 // space.
4007 DCHECK(trace->stop_node() == NULL); 4009 DCHECK(trace->stop_node() == NULL);
4008 macro_assembler->PushCurrentPosition(); 4010 macro_assembler->PushCurrentPosition();
4009 Label greedy_match_failed; 4011 Label greedy_match_failed;
4010 Trace greedy_match_trace; 4012 Trace greedy_match_trace;
4011 if (not_at_start()) greedy_match_trace.set_at_start(false); 4013 if (not_at_start()) greedy_match_trace.set_at_start(Trace::FALSE_VALUE);
4012 greedy_match_trace.set_backtrack(&greedy_match_failed); 4014 greedy_match_trace.set_backtrack(&greedy_match_failed);
4013 Label loop_label; 4015 Label loop_label;
4014 macro_assembler->Bind(&loop_label); 4016 macro_assembler->Bind(&loop_label);
4015 greedy_match_trace.set_stop_node(this); 4017 greedy_match_trace.set_stop_node(this);
4016 greedy_match_trace.set_loop_label(&loop_label); 4018 greedy_match_trace.set_loop_label(&loop_label);
4017 alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace); 4019 alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);
4018 macro_assembler->Bind(&greedy_match_failed); 4020 macro_assembler->Bind(&greedy_match_failed);
4019 4021
4020 Label second_choice; // For use in greedy matches. 4022 Label second_choice; // For use in greedy matches.
4021 macro_assembler->Bind(&second_choice); 4023 macro_assembler->Bind(&second_choice);
(...skipping 325 matching lines...) Expand 10 before | Expand all | Expand 10 after
4347 } 4349 }
4348 4350
4349 LimitResult limit_result = LimitVersions(compiler, trace); 4351 LimitResult limit_result = LimitVersions(compiler, trace);
4350 if (limit_result == DONE) return; 4352 if (limit_result == DONE) return;
4351 DCHECK(limit_result == CONTINUE); 4353 DCHECK(limit_result == CONTINUE);
4352 4354
4353 RecursionCheck rc(compiler); 4355 RecursionCheck rc(compiler);
4354 4356
4355 DCHECK_EQ(start_reg_ + 1, end_reg_); 4357 DCHECK_EQ(start_reg_ + 1, end_reg_);
4356 if (compiler->ignore_case()) { 4358 if (compiler->ignore_case()) {
4357 assembler->CheckNotBackReferenceIgnoreCase(start_reg_, 4359 assembler->CheckNotBackReferenceIgnoreCase(start_reg_, read_backward(),
4358 trace->backtrack()); 4360 trace->backtrack());
4359 } else { 4361 } else {
4360 assembler->CheckNotBackReference(start_reg_, trace->backtrack()); 4362 assembler->CheckNotBackReference(start_reg_, read_backward(),
4363 trace->backtrack());
4361 } 4364 }
4365 // We are going to advance backward, so we may end up at the start.
4366 if (read_backward()) trace->set_at_start(Trace::UNKNOWN);
4362 on_success()->Emit(compiler, trace); 4367 on_success()->Emit(compiler, trace);
4363 } 4368 }
4364 4369
4365 4370
4366 // ------------------------------------------------------------------- 4371 // -------------------------------------------------------------------
4367 // Dot/dotty output 4372 // Dot/dotty output
4368 4373
4369 4374
4370 #ifdef DEBUG 4375 #ifdef DEBUG
4371 4376
(...skipping 340 matching lines...) Expand 10 before | Expand all | Expand 10 after
4712 4717
4713 4718
4714 // ------------------------------------------------------------------- 4719 // -------------------------------------------------------------------
4715 // Tree to graph conversion 4720 // Tree to graph conversion
4716 4721
4717 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, 4722 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,
4718 RegExpNode* on_success) { 4723 RegExpNode* on_success) {
4719 ZoneList<TextElement>* elms = 4724 ZoneList<TextElement>* elms =
4720 new(compiler->zone()) ZoneList<TextElement>(1, compiler->zone()); 4725 new(compiler->zone()) ZoneList<TextElement>(1, compiler->zone());
4721 elms->Add(TextElement::Atom(this), compiler->zone()); 4726 elms->Add(TextElement::Atom(this), compiler->zone());
4722 return new(compiler->zone()) TextNode(elms, on_success); 4727 return new (compiler->zone()) TextNode(elms, read_backward(), on_success);
4723 } 4728 }
4724 4729
4725 4730
4726 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, 4731 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,
4727 RegExpNode* on_success) { 4732 RegExpNode* on_success) {
4728 return new(compiler->zone()) TextNode(elements(), on_success); 4733 return new (compiler->zone())
4734 TextNode(elements(), read_backward(), on_success);
4729 } 4735 }
4730 4736
4731 4737
4732 static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges, 4738 static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,
4733 const int* special_class, 4739 const int* special_class,
4734 int length) { 4740 int length) {
4735 length--; // Remove final 0x10000. 4741 length--; // Remove final 0x10000.
4736 DCHECK(special_class[length] == 0x10000); 4742 DCHECK(special_class[length] == 0x10000);
4737 DCHECK(ranges->length() != 0); 4743 DCHECK(ranges->length() != 0);
4738 DCHECK(length != 0); 4744 DCHECK(length != 0);
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
4815 if (CompareInverseRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) { 4821 if (CompareInverseRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {
4816 set_.set_standard_set_type('W'); 4822 set_.set_standard_set_type('W');
4817 return true; 4823 return true;
4818 } 4824 }
4819 return false; 4825 return false;
4820 } 4826 }
4821 4827
4822 4828
4823 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, 4829 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
4824 RegExpNode* on_success) { 4830 RegExpNode* on_success) {
4825 return new(compiler->zone()) TextNode(this, on_success); 4831 return new (compiler->zone()) TextNode(this, read_backward(), on_success);
4826 } 4832 }
4827 4833
4828 4834
4829 int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) { 4835 int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) {
4830 RegExpAtom* atom1 = (*a)->AsAtom(); 4836 RegExpAtom* atom1 = (*a)->AsAtom();
4831 RegExpAtom* atom2 = (*b)->AsAtom(); 4837 RegExpAtom* atom2 = (*b)->AsAtom();
4832 uc16 character1 = atom1->data().at(0); 4838 uc16 character1 = atom1->data().at(0);
4833 uc16 character2 = atom2->data().at(0); 4839 uc16 character2 = atom2->data().at(0);
4834 if (character1 < character2) return -1; 4840 if (character1 < character2) return -1;
4835 if (character1 > character2) return 1; 4841 if (character1 > character2) return 1;
(...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after
4960 for (int j = 1; j < run_length && prefix_length > 1; j++) { 4966 for (int j = 1; j < run_length && prefix_length > 1; j++) {
4961 RegExpAtom* old_atom = 4967 RegExpAtom* old_atom =
4962 alternatives->at(j + first_with_prefix)->AsAtom(); 4968 alternatives->at(j + first_with_prefix)->AsAtom();
4963 for (int k = 1; k < prefix_length; k++) { 4969 for (int k = 1; k < prefix_length; k++) {
4964 if (atom->data().at(k) != old_atom->data().at(k)) { 4970 if (atom->data().at(k) != old_atom->data().at(k)) {
4965 prefix_length = k; 4971 prefix_length = k;
4966 break; 4972 break;
4967 } 4973 }
4968 } 4974 }
4969 } 4975 }
4970 RegExpAtom* prefix = 4976 RegExpAtom* prefix = new (zone) RegExpAtom(
4971 new (zone) RegExpAtom(atom->data().SubVector(0, prefix_length)); 4977 atom->data().SubVector(0, prefix_length), read_direction());
4972 ZoneList<RegExpTree*>* pair = new (zone) ZoneList<RegExpTree*>(2, zone); 4978 ZoneList<RegExpTree*>* pair = new (zone) ZoneList<RegExpTree*>(2, zone);
4973 pair->Add(prefix, zone); 4979 pair->Add(prefix, zone);
4974 ZoneList<RegExpTree*>* suffixes = 4980 ZoneList<RegExpTree*>* suffixes =
4975 new (zone) ZoneList<RegExpTree*>(run_length, zone); 4981 new (zone) ZoneList<RegExpTree*>(run_length, zone);
4976 for (int j = 0; j < run_length; j++) { 4982 for (int j = 0; j < run_length; j++) {
4977 RegExpAtom* old_atom = 4983 RegExpAtom* old_atom =
4978 alternatives->at(j + first_with_prefix)->AsAtom(); 4984 alternatives->at(j + first_with_prefix)->AsAtom();
4979 int len = old_atom->length(); 4985 int len = old_atom->length();
4980 if (len == prefix_length) { 4986 if (len == prefix_length) {
4981 suffixes->Add(new (zone) RegExpEmpty(), zone); 4987 suffixes->Add(new (zone) RegExpEmpty(), zone);
4982 } else { 4988 } else {
4983 RegExpTree* suffix = new (zone) RegExpAtom( 4989 RegExpTree* suffix = new (zone) RegExpAtom(
4984 old_atom->data().SubVector(prefix_length, old_atom->length())); 4990 old_atom->data().SubVector(prefix_length, old_atom->length()),
4991 read_direction());
4985 suffixes->Add(suffix, zone); 4992 suffixes->Add(suffix, zone);
4986 } 4993 }
4987 } 4994 }
4988 pair->Add(new (zone) RegExpDisjunction(suffixes), zone); 4995 pair->Add(new (zone) RegExpDisjunction(suffixes, read_direction()), zone);
4989 alternatives->at(write_posn++) = new (zone) RegExpAlternative(pair); 4996 alternatives->at(write_posn++) =
4997 new (zone) RegExpAlternative(pair, read_direction());
4990 } else { 4998 } else {
4991 // Just copy any non-worthwhile alternatives. 4999 // Just copy any non-worthwhile alternatives.
4992 for (int j = first_with_prefix; j < i; j++) { 5000 for (int j = first_with_prefix; j < i; j++) {
4993 alternatives->at(write_posn++) = alternatives->at(j); 5001 alternatives->at(write_posn++) = alternatives->at(j);
4994 } 5002 }
4995 } 5003 }
4996 } 5004 }
4997 alternatives->Rewind(write_posn); // Trim end of array. 5005 alternatives->Rewind(write_posn); // Trim end of array.
4998 } 5006 }
4999 5007
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
5033 // Found non-trivial run of single-character alternatives. 5041 // Found non-trivial run of single-character alternatives.
5034 int run_length = i - first_in_run; 5042 int run_length = i - first_in_run;
5035 ZoneList<CharacterRange>* ranges = 5043 ZoneList<CharacterRange>* ranges =
5036 new (zone) ZoneList<CharacterRange>(2, zone); 5044 new (zone) ZoneList<CharacterRange>(2, zone);
5037 for (int j = 0; j < run_length; j++) { 5045 for (int j = 0; j < run_length; j++) {
5038 RegExpAtom* old_atom = alternatives->at(j + first_in_run)->AsAtom(); 5046 RegExpAtom* old_atom = alternatives->at(j + first_in_run)->AsAtom();
5039 DCHECK_EQ(old_atom->length(), 1); 5047 DCHECK_EQ(old_atom->length(), 1);
5040 ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone); 5048 ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone);
5041 } 5049 }
5042 alternatives->at(write_posn++) = 5050 alternatives->at(write_posn++) =
5043 new (zone) RegExpCharacterClass(ranges, false); 5051 new (zone) RegExpCharacterClass(ranges, false, read_direction());
5044 } else { 5052 } else {
5045 // Just copy any trivial alternatives. 5053 // Just copy any trivial alternatives.
5046 for (int j = first_in_run; j < i; j++) { 5054 for (int j = first_in_run; j < i; j++) {
5047 alternatives->at(write_posn++) = alternatives->at(j); 5055 alternatives->at(write_posn++) = alternatives->at(j);
5048 } 5056 }
5049 } 5057 }
5050 } 5058 }
5051 alternatives->Rewind(write_posn); // Trim end of array. 5059 alternatives->Rewind(write_posn); // Trim end of array.
5052 } 5060 }
5053 5061
(...skipping 233 matching lines...) Expand 10 before | Expand all | Expand 10 after
5287 // lookahead in one side and an end-of-input on the other side. 5295 // lookahead in one side and an end-of-input on the other side.
5288 // We need two registers for the lookahead. 5296 // We need two registers for the lookahead.
5289 int stack_pointer_register = compiler->AllocateRegister(); 5297 int stack_pointer_register = compiler->AllocateRegister();
5290 int position_register = compiler->AllocateRegister(); 5298 int position_register = compiler->AllocateRegister();
5291 // The ChoiceNode to distinguish between a newline and end-of-input. 5299 // The ChoiceNode to distinguish between a newline and end-of-input.
5292 ChoiceNode* result = new(zone) ChoiceNode(2, zone); 5300 ChoiceNode* result = new(zone) ChoiceNode(2, zone);
5293 // Create a newline atom. 5301 // Create a newline atom.
5294 ZoneList<CharacterRange>* newline_ranges = 5302 ZoneList<CharacterRange>* newline_ranges =
5295 new(zone) ZoneList<CharacterRange>(3, zone); 5303 new(zone) ZoneList<CharacterRange>(3, zone);
5296 CharacterRange::AddClassEscape('n', newline_ranges, zone); 5304 CharacterRange::AddClassEscape('n', newline_ranges, zone);
5297 RegExpCharacterClass* newline_atom = new(zone) RegExpCharacterClass('n'); 5305 RegExpCharacterClass* newline_atom =
5298 TextNode* newline_matcher = new(zone) TextNode( 5306 new (zone) RegExpCharacterClass('n', RegExpTree::READ_FORWARD);
erikcorry 2015/11/11 15:39:04 Yes :-) Do we have a test of $ and ^ inside lookb
Yang 2015/11/12 08:25:34 Added a couple of tests.
5299 newline_atom, 5307 TextNode* newline_matcher = new (zone) TextNode(
5300 ActionNode::PositiveSubmatchSuccess(stack_pointer_register, 5308 newline_atom, false, ActionNode::PositiveSubmatchSuccess(
5301 position_register, 5309 stack_pointer_register, position_register,
5302 0, // No captures inside. 5310 0, // No captures inside.
5303 -1, // Ignored if no captures. 5311 -1, // Ignored if no captures.
5304 on_success)); 5312 on_success));
5305 // Create an end-of-input matcher. 5313 // Create an end-of-input matcher.
5306 RegExpNode* end_of_line = ActionNode::BeginSubmatch( 5314 RegExpNode* end_of_line = ActionNode::BeginSubmatch(
5307 stack_pointer_register, 5315 stack_pointer_register,
5308 position_register, 5316 position_register,
5309 newline_matcher); 5317 newline_matcher);
5310 // Add the two alternatives to the ChoiceNode. 5318 // Add the two alternatives to the ChoiceNode.
5311 GuardedAlternative eol_alternative(end_of_line); 5319 GuardedAlternative eol_alternative(end_of_line);
5312 result->AddAlternative(eol_alternative); 5320 result->AddAlternative(eol_alternative);
5313 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success)); 5321 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success));
5314 result->AddAlternative(end_alternative); 5322 result->AddAlternative(end_alternative);
5315 return result; 5323 return result;
5316 } 5324 }
5317 default: 5325 default:
5318 UNREACHABLE(); 5326 UNREACHABLE();
5319 } 5327 }
5320 return on_success; 5328 return on_success;
5321 } 5329 }
5322 5330
5323 5331
5324 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, 5332 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,
5325 RegExpNode* on_success) { 5333 RegExpNode* on_success) {
5326 return new(compiler->zone()) 5334 return new (compiler->zone()) BackReferenceNode(
5327 BackReferenceNode(RegExpCapture::StartRegister(index()), 5335 RegExpCapture::StartRegister(index()),
5328 RegExpCapture::EndRegister(index()), 5336 RegExpCapture::EndRegister(index()), read_backward(), on_success);
5329 on_success);
5330 } 5337 }
5331 5338
5332 5339
5333 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, 5340 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
5334 RegExpNode* on_success) { 5341 RegExpNode* on_success) {
5335 return on_success; 5342 return on_success;
5336 } 5343 }
5337 5344
5338 5345
5339 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, 5346 RegExpNode* RegExpLookaround::ToNode(RegExpCompiler* compiler,
5340 RegExpNode* on_success) { 5347 RegExpNode* on_success) {
5341 int stack_pointer_register = compiler->AllocateRegister(); 5348 int stack_pointer_register = compiler->AllocateRegister();
5342 int position_register = compiler->AllocateRegister(); 5349 int position_register = compiler->AllocateRegister();
5343 5350
5344 const int registers_per_capture = 2; 5351 const int registers_per_capture = 2;
5345 const int register_of_first_capture = 2; 5352 const int register_of_first_capture = 2;
5346 int register_count = capture_count_ * registers_per_capture; 5353 int register_count = capture_count_ * registers_per_capture;
5347 int register_start = 5354 int register_start =
5348 register_of_first_capture + capture_from_ * registers_per_capture; 5355 register_of_first_capture + capture_from_ * registers_per_capture;
5349 5356
5350 RegExpNode* success;
5351 if (is_positive()) { 5357 if (is_positive()) {
5352 RegExpNode* node = ActionNode::BeginSubmatch( 5358 RegExpNode* node = ActionNode::BeginSubmatch(
5353 stack_pointer_register, 5359 stack_pointer_register,
5354 position_register, 5360 position_register,
5355 body()->ToNode( 5361 body()->ToNode(
5356 compiler, 5362 compiler,
5357 ActionNode::PositiveSubmatchSuccess(stack_pointer_register, 5363 ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
5358 position_register, 5364 position_register,
5359 register_count, 5365 register_count,
5360 register_start, 5366 register_start,
5361 on_success))); 5367 on_success)));
5362 return node; 5368 return node;
5363 } else { 5369 } else {
5364 // We use a ChoiceNode for a negative lookahead because it has most of 5370 // We use a ChoiceNode for a negative lookahead because it has most of
5365 // the characteristics we need. It has the body of the lookahead as its 5371 // the characteristics we need. It has the body of the lookahead as its
5366 // first alternative and the expression after the lookahead of the second 5372 // first alternative and the expression after the lookahead of the second
5367 // alternative. If the first alternative succeeds then the 5373 // alternative. If the first alternative succeeds then the
5368 // NegativeSubmatchSuccess will unwind the stack including everything the 5374 // NegativeSubmatchSuccess will unwind the stack including everything the
5369 // choice node set up and backtrack. If the first alternative fails then 5375 // choice node set up and backtrack. If the first alternative fails then
5370 // the second alternative is tried, which is exactly the desired result 5376 // the second alternative is tried, which is exactly the desired result
5371 // for a negative lookahead. The NegativeLookaheadChoiceNode is a special 5377 // for a negative lookahead. The NegativeLookaheadChoiceNode is a special
5372 // ChoiceNode that knows to ignore the first exit when calculating quick 5378 // ChoiceNode that knows to ignore the first exit when calculating quick
5373 // checks. 5379 // checks.
5374 Zone* zone = compiler->zone(); 5380 Zone* zone = compiler->zone();
5375 5381
5376 GuardedAlternative body_alt( 5382 GuardedAlternative body_alt(
5377 body()->ToNode( 5383 body()->ToNode(compiler, new (zone) NegativeSubmatchSuccess(
5378 compiler, 5384 stack_pointer_register, position_register,
5379 success = new(zone) NegativeSubmatchSuccess(stack_pointer_register, 5385 register_count, register_start, zone)));
5380 position_register,
5381 register_count,
5382 register_start,
5383 zone)));
5384 ChoiceNode* choice_node = 5386 ChoiceNode* choice_node =
5385 new(zone) NegativeLookaheadChoiceNode(body_alt, 5387 new(zone) NegativeLookaheadChoiceNode(body_alt,
5386 GuardedAlternative(on_success), 5388 GuardedAlternative(on_success),
5387 zone); 5389 zone);
5388 return ActionNode::BeginSubmatch(stack_pointer_register, 5390 return ActionNode::BeginSubmatch(stack_pointer_register,
5389 position_register, 5391 position_register,
5390 choice_node); 5392 choice_node);
5391 } 5393 }
5392 } 5394 }
5393 5395
5394 5396
5395 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, 5397 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,
5396 RegExpNode* on_success) { 5398 RegExpNode* on_success) {
5397 return ToNode(body(), index(), compiler, on_success); 5399 return ToNode(body(), index(), compiler, on_success);
5398 } 5400 }
5399 5401
5400 5402
5401 RegExpNode* RegExpCapture::ToNode(RegExpTree* body, 5403 RegExpNode* RegExpCapture::ToNode(RegExpTree* body,
5402 int index, 5404 int index,
5403 RegExpCompiler* compiler, 5405 RegExpCompiler* compiler,
5404 RegExpNode* on_success) { 5406 RegExpNode* on_success) {
5407 DCHECK_NOT_NULL(body);
5405 int start_reg = RegExpCapture::StartRegister(index); 5408 int start_reg = RegExpCapture::StartRegister(index);
5406 int end_reg = RegExpCapture::EndRegister(index); 5409 int end_reg = RegExpCapture::EndRegister(index);
5410 if (body->read_backward()) std::swap(start_reg, end_reg);
5407 RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success); 5411 RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);
5408 RegExpNode* body_node = body->ToNode(compiler, store_end); 5412 RegExpNode* body_node = body->ToNode(compiler, store_end);
5409 return ActionNode::StorePosition(start_reg, true, body_node); 5413 return ActionNode::StorePosition(start_reg, true, body_node);
5410 } 5414 }
5411 5415
5412 5416
5413 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler, 5417 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,
5414 RegExpNode* on_success) { 5418 RegExpNode* on_success) {
5415 ZoneList<RegExpTree*>* children = nodes(); 5419 ZoneList<RegExpTree*>* children = nodes();
5416 RegExpNode* current = on_success; 5420 RegExpNode* current = on_success;
5417 for (int i = children->length() - 1; i >= 0; i--) { 5421 if (read_backward()) {
5418 current = children->at(i)->ToNode(compiler, current); 5422 for (int i = 0; i < children->length(); i++) {
5423 current = children->at(i)->ToNode(compiler, current);
5424 }
5425 } else {
5426 for (int i = children->length() - 1; i >= 0; i--) {
5427 current = children->at(i)->ToNode(compiler, current);
5428 }
5419 } 5429 }
5420 return current; 5430 return current;
5421 } 5431 }
5422 5432
5423 5433
5424 static void AddClass(const int* elmv, 5434 static void AddClass(const int* elmv,
5425 int elmc, 5435 int elmc,
5426 ZoneList<CharacterRange>* ranges, 5436 ZoneList<CharacterRange>* ranges,
5427 Zone* zone) { 5437 Zone* zone) {
5428 elmc--; 5438 elmc--;
(...skipping 855 matching lines...) Expand 10 before | Expand all | Expand 10 after
6284 0, 6294 0,
6285 &compiler, 6295 &compiler,
6286 compiler.accept()); 6296 compiler.accept());
6287 RegExpNode* node = captured_body; 6297 RegExpNode* node = captured_body;
6288 bool is_end_anchored = data->tree->IsAnchoredAtEnd(); 6298 bool is_end_anchored = data->tree->IsAnchoredAtEnd();
6289 bool is_start_anchored = data->tree->IsAnchoredAtStart(); 6299 bool is_start_anchored = data->tree->IsAnchoredAtStart();
6290 int max_length = data->tree->max_match(); 6300 int max_length = data->tree->max_match();
6291 if (!is_start_anchored && !is_sticky) { 6301 if (!is_start_anchored && !is_sticky) {
6292 // Add a .*? at the beginning, outside the body capture, unless 6302 // Add a .*? at the beginning, outside the body capture, unless
6293 // this expression is anchored at the beginning or sticky. 6303 // this expression is anchored at the beginning or sticky.
6294 RegExpNode* loop_node = 6304 RegExpNode* loop_node = RegExpQuantifier::ToNode(
6295 RegExpQuantifier::ToNode(0, 6305 0, RegExpTree::kInfinity, false,
6296 RegExpTree::kInfinity, 6306 new (zone) RegExpCharacterClass('*', RegExpTree::READ_FORWARD),
6297 false, 6307 &compiler, captured_body, data->contains_anchor);
6298 new(zone) RegExpCharacterClass('*'),
6299 &compiler,
6300 captured_body,
6301 data->contains_anchor);
6302 6308
6303 if (data->contains_anchor) { 6309 if (data->contains_anchor) {
6304 // Unroll loop once, to take care of the case that might start 6310 // Unroll loop once, to take care of the case that might start
6305 // at the start of input. 6311 // at the start of input.
6306 ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone); 6312 ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone);
6307 first_step_node->AddAlternative(GuardedAlternative(captured_body)); 6313 first_step_node->AddAlternative(GuardedAlternative(captured_body));
6308 first_step_node->AddAlternative(GuardedAlternative( 6314 first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode(
6309 new(zone) TextNode(new(zone) RegExpCharacterClass('*'), loop_node))); 6315 new (zone) RegExpCharacterClass('*', RegExpTree::READ_FORWARD), false,
6316 loop_node)));
6310 node = first_step_node; 6317 node = first_step_node;
6311 } else { 6318 } else {
6312 node = loop_node; 6319 node = loop_node;
6313 } 6320 }
6314 } 6321 }
6315 if (is_one_byte) { 6322 if (is_one_byte) {
6316 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); 6323 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
6317 // Do it again to propagate the new nodes to places where they were not 6324 // Do it again to propagate the new nodes to places where they were not
6318 // put because they had not been calculated yet. 6325 // put because they had not been calculated yet.
6319 if (node != NULL) { 6326 if (node != NULL) {
(...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after
6503 6510
6504 6511
6505 void RegExpResultsCache::Clear(FixedArray* cache) { 6512 void RegExpResultsCache::Clear(FixedArray* cache) {
6506 for (int i = 0; i < kRegExpResultsCacheSize; i++) { 6513 for (int i = 0; i < kRegExpResultsCacheSize; i++) {
6507 cache->set(i, Smi::FromInt(0)); 6514 cache->set(i, Smi::FromInt(0));
6508 } 6515 }
6509 } 6516 }
6510 6517
6511 } // namespace internal 6518 } // namespace internal
6512 } // namespace v8 6519 } // namespace v8
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698