src/regexp/jsregexp.cc - Issue 1418963009: Experimental support for RegExp lookbehind.

Side by Side Diff: src/regexp/jsregexp.cc

Issue 1418963009: Experimental support for RegExp lookbehind. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: fixed test cases Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2012 the V8 project authors. All rights reserved.	1 // Copyright 2012 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "src/regexp/jsregexp.h"	5 #include "src/regexp/jsregexp.h"

6	6

7 #include "src/ast.h"	7 #include "src/ast.h"

8 #include "src/base/platform/platform.h"	8 #include "src/base/platform/platform.h"

9 #include "src/compilation-cache.h"	9 #include "src/compilation-cache.h"

10 #include "src/compiler.h"	10 #include "src/compiler.h"

(...skipping 1206 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1217	1217

1218 // The chronologically first deferred action in the trace	1218 // The chronologically first deferred action in the trace

1219 // is used to infer the action needed to restore a register	1219 // is used to infer the action needed to restore a register

1220 // to its previous state (or not, if it's safe to ignore it).	1220 // to its previous state (or not, if it's safe to ignore it).

1221 enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR };	1221 enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR };

1222 DeferredActionUndoType undo_action = IGNORE;	1222 DeferredActionUndoType undo_action = IGNORE;

1223	1223

1224 int value = 0;	1224 int value = 0;

1225 bool absolute = false;	1225 bool absolute = false;

1226 bool clear = false;	1226 bool clear = false;

1227 int store_position = -1;	1227 static const int kNoStore = kMinInt;

	1228 int store_position = kNoStore;

1228 // This is a little tricky because we are scanning the actions in reverse	1229 // This is a little tricky because we are scanning the actions in reverse

1229 // historical order (newest first).	1230 // historical order (newest first).

1230 for (DeferredAction* action = actions_;	1231 for (DeferredAction* action = actions_;

1231 action != NULL;	1232 action != NULL;

1232 action = action->next()) {	1233 action = action->next()) {

1233 if (action->Mentions(reg)) {	1234 if (action->Mentions(reg)) {

1234 switch (action->action_type()) {	1235 switch (action->action_type()) {

1235 case ActionNode::SET_REGISTER: {	1236 case ActionNode::SET_REGISTER: {

1236 Trace::DeferredSetRegister* psr =	1237 Trace::DeferredSetRegister* psr =

1237 static_cast<Trace::DeferredSetRegister*>(action);	1238 static_cast<Trace::DeferredSetRegister*>(action);

1238 if (!absolute) {	1239 if (!absolute) {

1239 value += psr->value();	1240 value += psr->value();

1240 absolute = true;	1241 absolute = true;

1241 }	1242 }

1242 // SET_REGISTER is currently only used for newly introduced loop	1243 // SET_REGISTER is currently only used for newly introduced loop

1243 // counters. They can have a significant previous value if they	1244 // counters. They can have a significant previous value if they

1244 // occour in a loop. TODO(lrn): Propagate this information, so	1245 // occour in a loop. TODO(lrn): Propagate this information, so

1245 // we can set undo_action to IGNORE if we know there is no value to	1246 // we can set undo_action to IGNORE if we know there is no value to

1246 // restore.	1247 // restore.

1247 undo_action = RESTORE;	1248 undo_action = RESTORE;

1248 DCHECK_EQ(store_position, -1);	1249 DCHECK_EQ(store_position, kNoStore);

1249 DCHECK(!clear);	1250 DCHECK(!clear);

1250 break;	1251 break;

1251 }	1252 }

1252 case ActionNode::INCREMENT_REGISTER:	1253 case ActionNode::INCREMENT_REGISTER:

1253 if (!absolute) {	1254 if (!absolute) {

1254 value++;	1255 value++;

1255 }	1256 }

1256 DCHECK_EQ(store_position, -1);	1257 DCHECK_EQ(store_position, kNoStore);

1257 DCHECK(!clear);	1258 DCHECK(!clear);

1258 undo_action = RESTORE;	1259 undo_action = RESTORE;

1259 break;	1260 break;

1260 case ActionNode::STORE_POSITION: {	1261 case ActionNode::STORE_POSITION: {

1261 Trace::DeferredCapture* pc =	1262 Trace::DeferredCapture* pc =

1262 static_cast<Trace::DeferredCapture*>(action);	1263 static_cast<Trace::DeferredCapture*>(action);

1263 if (!clear && store_position == -1) {	1264 if (!clear && store_position == kNoStore) {

1264 store_position = pc->cp_offset();	1265 store_position = pc->cp_offset();

1265 }	1266 }

1266	1267

1267 // For captures we know that stores and clears alternate.	1268 // For captures we know that stores and clears alternate.

1268 // Other register, are never cleared, and if the occur	1269 // Other register, are never cleared, and if the occur

1269 // inside a loop, they might be assigned more than once.	1270 // inside a loop, they might be assigned more than once.

1270 if (reg <= 1) {	1271 if (reg <= 1) {

1271 // Registers zero and one, aka "capture zero", is	1272 // Registers zero and one, aka "capture zero", is

1272 // always set correctly if we succeed. There is no	1273 // always set correctly if we succeed. There is no

1273 // need to undo a setting on backtrack, because we	1274 // need to undo a setting on backtrack, because we

1274 // will set it again or fail.	1275 // will set it again or fail.

1275 undo_action = IGNORE;	1276 undo_action = IGNORE;

1276 } else {	1277 } else {

1277 undo_action = pc->is_capture() ? CLEAR : RESTORE;	1278 undo_action = pc->is_capture() ? CLEAR : RESTORE;

1278 }	1279 }

1279 DCHECK(!absolute);	1280 DCHECK(!absolute);

1280 DCHECK_EQ(value, 0);	1281 DCHECK_EQ(value, 0);

1281 break;	1282 break;

1282 }	1283 }

1283 case ActionNode::CLEAR_CAPTURES: {	1284 case ActionNode::CLEAR_CAPTURES: {

1284 // Since we're scanning in reverse order, if we've already	1285 // Since we're scanning in reverse order, if we've already

1285 // set the position we have to ignore historically earlier	1286 // set the position we have to ignore historically earlier

1286 // clearing operations.	1287 // clearing operations.

1287 if (store_position == -1) {	1288 if (store_position == kNoStore) {

1288 clear = true;	1289 clear = true;

1289 }	1290 }

1290 undo_action = RESTORE;	1291 undo_action = RESTORE;

1291 DCHECK(!absolute);	1292 DCHECK(!absolute);

1292 DCHECK_EQ(value, 0);	1293 DCHECK_EQ(value, 0);

1293 break;	1294 break;

1294 }	1295 }

1295 default:	1296 default:

1296 UNREACHABLE();	1297 UNREACHABLE();

1297 break;	1298 break;

(...skipping 10 matching lines...) Expand all Loading...
1308 pushes = 0;	1309 pushes = 0;

1309 }	1310 }

1310	1311

1311 assembler->PushRegister(reg, stack_check);	1312 assembler->PushRegister(reg, stack_check);

1312 registers_to_pop->Set(reg, zone);	1313 registers_to_pop->Set(reg, zone);

1313 } else if (undo_action == CLEAR) {	1314 } else if (undo_action == CLEAR) {

1314 registers_to_clear->Set(reg, zone);	1315 registers_to_clear->Set(reg, zone);

1315 }	1316 }

1316 // Perform the chronologically last action (or accumulated increment)	1317 // Perform the chronologically last action (or accumulated increment)

1317 // for the register.	1318 // for the register.

1318 if (store_position != -1) {	1319 if (store_position != kNoStore) {

1319 assembler->WriteCurrentPositionToRegister(reg, store_position);	1320 assembler->WriteCurrentPositionToRegister(reg, store_position);

1320 } else if (clear) {	1321 } else if (clear) {

1321 assembler->ClearRegisters(reg, reg);	1322 assembler->ClearRegisters(reg, reg);

1322 } else if (absolute) {	1323 } else if (absolute) {

1323 assembler->SetRegister(reg, value);	1324 assembler->SetRegister(reg, value);

1324 } else if (value != 0) {	1325 } else if (value != 0) {

1325 assembler->AdvanceRegister(reg, value);	1326 assembler->AdvanceRegister(reg, value);

1326 }	1327 }

1327 }	1328 }

1328 }	1329 }

(...skipping 977 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2306 // Match the behaviour of EatsAtLeast on this node.	2307 // Match the behaviour of EatsAtLeast on this node.

2307 if (assertion_type() == AT_START && not_at_start) return;	2308 if (assertion_type() == AT_START && not_at_start) return;

2308 on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);	2309 on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);

2309 SaveBMInfo(bm, not_at_start, offset);	2310 SaveBMInfo(bm, not_at_start, offset);

2310 }	2311 }

2311	2312

2312	2313

2313 int BackReferenceNode::EatsAtLeast(int still_to_find,	2314 int BackReferenceNode::EatsAtLeast(int still_to_find,

2314 int budget,	2315 int budget,

2315 bool not_at_start) {	2316 bool not_at_start) {

	2317 if (read_backward()) return 0;

2316 if (budget <= 0) return 0;	2318 if (budget <= 0) return 0;

2317 return on_success()->EatsAtLeast(still_to_find,	2319 return on_success()->EatsAtLeast(still_to_find,

2318 budget - 1,	2320 budget - 1,

2319 not_at_start);	2321 not_at_start);

2320 }	2322 }

2321	2323

2322	2324

2323 int TextNode::EatsAtLeast(int still_to_find,	2325 int TextNode::EatsAtLeast(int still_to_find,

2324 int budget,	2326 int budget,

2325 bool not_at_start) {	2327 bool not_at_start) {

	2328 if (read_backward()) return 0;

2326 int answer = Length();	2329 int answer = Length();

2327 if (answer >= still_to_find) return answer;	2330 if (answer >= still_to_find) return answer;

2328 if (budget <= 0) return answer;	2331 if (budget <= 0) return answer;

2329 // We are not at start after this node so we set the last argument to 'true'.	2332 // We are not at start after this node so we set the last argument to 'true'.

2330 return answer + on_success()->EatsAtLeast(still_to_find - answer,	2333 return answer + on_success()->EatsAtLeast(still_to_find - answer,

2331 budget - 1,	2334 budget - 1,

2332 true);	2335 true);

2333 }	2336 }

2334	2337

2335	2338

(...skipping 183 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2519 bool not_at_start) {	2522 bool not_at_start) {

2520 Isolate* isolate = compiler->macro_assembler()->isolate();	2523 Isolate* isolate = compiler->macro_assembler()->isolate();

2521 DCHECK(characters_filled_in < details->characters());	2524 DCHECK(characters_filled_in < details->characters());

2522 int characters = details->characters();	2525 int characters = details->characters();

2523 int char_mask;	2526 int char_mask;

2524 if (compiler->one_byte()) {	2527 if (compiler->one_byte()) {

2525 char_mask = String::kMaxOneByteCharCode;	2528 char_mask = String::kMaxOneByteCharCode;

2526 } else {	2529 } else {

2527 char_mask = String::kMaxUtf16CodeUnit;	2530 char_mask = String::kMaxUtf16CodeUnit;

2528 }	2531 }

2529 for (int k = 0; k < elms_->length(); k++) {	2532 for (int k = 0; k < elements()->length(); k++) {

2530 TextElement elm = elms_->at(k);	2533 TextElement elm = elements()->at(k);

2531 if (elm.text_type() == TextElement::ATOM) {	2534 if (elm.text_type() == TextElement::ATOM) {

2532 Vector<const uc16> quarks = elm.atom()->data();	2535 Vector<const uc16> quarks = elm.atom()->data();

2533 for (int i = 0; i < characters && i < quarks.length(); i++) {	2536 for (int i = 0; i < characters && i < quarks.length(); i++) {

2534 QuickCheckDetails::Position* pos =	2537 QuickCheckDetails::Position* pos =

2535 details->positions(characters_filled_in);	2538 details->positions(characters_filled_in);

2536 uc16 c = quarks[i];	2539 uc16 c = quarks[i];

2537 if (compiler->ignore_case()) {	2540 if (compiler->ignore_case()) {

2538 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];	2541 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

2539 int length = GetCaseIndependentLetters(isolate, c,	2542 int length = GetCaseIndependentLetters(isolate, c,

2540 compiler->one_byte(), chars);	2543 compiler->one_byte(), chars);

(...skipping 130 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2671 for (int i = 0; i < characters_; i++) {	2674 for (int i = 0; i < characters_; i++) {

2672 positions_[i].mask = 0;	2675 positions_[i].mask = 0;

2673 positions_[i].value = 0;	2676 positions_[i].value = 0;

2674 positions_[i].determines_perfectly = false;	2677 positions_[i].determines_perfectly = false;

2675 }	2678 }

2676 characters_ = 0;	2679 characters_ = 0;

2677 }	2680 }

2678	2681

2679	2682

2680 void QuickCheckDetails::Advance(int by, bool one_byte) {	2683 void QuickCheckDetails::Advance(int by, bool one_byte) {

2681 DCHECK(by >= 0);

2682 if (by >= characters_) {	2684 if (by >= characters_) {

2683 Clear();	2685 Clear();

2684 return;	2686 return;

2685 }	2687 }

2686 for (int i = 0; i < characters_ - by; i++) {	2688 for (int i = 0; i < characters_ - by; i++) {

2687 positions_[i] = positions_[by + i];	2689 positions_[i] = positions_[by + i];

2688 }	2690 }

2689 for (int i = characters_ - by; i < characters_; i++) {	2691 for (int i = characters_ - by; i < characters_; i++) {

2690 positions_[i].mask = 0;	2692 positions_[i].mask = 0;

2691 positions_[i].value = 0;	2693 positions_[i].value = 0;

(...skipping 81 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2773 }	2775 }

2774 return false;	2776 return false;

2775 }	2777 }

2776	2778

2777	2779

2778 RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) {	2780 RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) {

2779 if (info()->replacement_calculated) return replacement();	2781 if (info()->replacement_calculated) return replacement();

2780 if (depth < 0) return this;	2782 if (depth < 0) return this;

2781 DCHECK(!info()->visited);	2783 DCHECK(!info()->visited);

2782 VisitMarker marker(info());	2784 VisitMarker marker(info());

2783 int element_count = elms_->length();	2785 int element_count = elements()->length();

2784 for (int i = 0; i < element_count; i++) {	2786 for (int i = 0; i < element_count; i++) {

2785 TextElement elm = elms_->at(i);	2787 TextElement elm = elements()->at(i);

2786 if (elm.text_type() == TextElement::ATOM) {	2788 if (elm.text_type() == TextElement::ATOM) {

2787 Vector<const uc16> quarks = elm.atom()->data();	2789 Vector<const uc16> quarks = elm.atom()->data();

2788 for (int j = 0; j < quarks.length(); j++) {	2790 for (int j = 0; j < quarks.length(); j++) {

2789 uint16_t c = quarks[j];	2791 uint16_t c = quarks[j];

2790 if (c <= String::kMaxOneByteCharCode) continue;	2792 if (c <= String::kMaxOneByteCharCode) continue;

2791 if (!ignore_case) return set_replacement(NULL);	2793 if (!ignore_case) return set_replacement(NULL);

2792 // Here, we need to check for characters whose upper and lower cases	2794 // Here, we need to check for characters whose upper and lower cases

2793 // are outside the Latin-1 range.	2795 // are outside the Latin-1 range.

2794 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c);	2796 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c);

2795 // Character is outside Latin-1 completely	2797 // Character is outside Latin-1 completely

(...skipping 343 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3139 assembler->GoTo(trace->backtrack());	3141 assembler->GoTo(trace->backtrack());

3140 assembler->Bind(&ok);	3142 assembler->Bind(&ok);

3141 break;	3143 break;

3142 }	3144 }

3143 case AT_START: {	3145 case AT_START: {

3144 if (trace->at_start() == Trace::FALSE_VALUE) {	3146 if (trace->at_start() == Trace::FALSE_VALUE) {

3145 assembler->GoTo(trace->backtrack());	3147 assembler->GoTo(trace->backtrack());

3146 return;	3148 return;

3147 }	3149 }

3148 if (trace->at_start() == Trace::UNKNOWN) {	3150 if (trace->at_start() == Trace::UNKNOWN) {

3149 assembler->CheckNotAtStart(trace->backtrack());	3151 assembler->CheckNotAtStart(trace->cp_offset(), trace->backtrack());

3150 Trace at_start_trace = *trace;	3152 Trace at_start_trace = *trace;

3151 at_start_trace.set_at_start(true);	3153 at_start_trace.set_at_start(Trace::TRUE_VALUE);

3152 on_success()->Emit(compiler, &at_start_trace);	3154 on_success()->Emit(compiler, &at_start_trace);

3153 return;	3155 return;

3154 }	3156 }

3155 }	3157 }

3156 break;	3158 break;

3157 case AFTER_NEWLINE:	3159 case AFTER_NEWLINE:

3158 EmitHat(compiler, on_success(), trace);	3160 EmitHat(compiler, on_success(), trace);

3159 return;	3161 return;

3160 case AT_BOUNDARY:	3162 case AT_BOUNDARY:

3161 case AT_NON_BOUNDARY: {	3163 case AT_NON_BOUNDARY: {

(...skipping 52 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3214 TextEmitPassType pass,	3216 TextEmitPassType pass,

3215 bool preloaded,	3217 bool preloaded,

3216 Trace* trace,	3218 Trace* trace,

3217 bool first_element_checked,	3219 bool first_element_checked,

3218 int* checked_up_to) {	3220 int* checked_up_to) {

3219 RegExpMacroAssembler* assembler = compiler->macro_assembler();	3221 RegExpMacroAssembler* assembler = compiler->macro_assembler();

3220 Isolate* isolate = assembler->isolate();	3222 Isolate* isolate = assembler->isolate();

3221 bool one_byte = compiler->one_byte();	3223 bool one_byte = compiler->one_byte();

3222 Label* backtrack = trace->backtrack();	3224 Label* backtrack = trace->backtrack();

3223 QuickCheckDetails* quick_check = trace->quick_check_performed();	3225 QuickCheckDetails* quick_check = trace->quick_check_performed();

3224 int element_count = elms_->length();	3226 int element_count = elements()->length();

	3227 int backward_offset = read_backward() ? -Length() : 0;

3225 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {	3228 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {

3226 TextElement elm = elms_->at(i);	3229 TextElement elm = elements()->at(i);

3227 int cp_offset = trace->cp_offset() + elm.cp_offset();	3230 int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset;

3228 if (elm.text_type() == TextElement::ATOM) {	3231 if (elm.text_type() == TextElement::ATOM) {

3229 Vector<const uc16> quarks = elm.atom()->data();	3232 Vector<const uc16> quarks = elm.atom()->data();

3230 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {	3233 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {

3231 if (first_element_checked && i == 0 && j == 0) continue;	3234 if (first_element_checked && i == 0 && j == 0) continue;

3232 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;	3235 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;

3233 EmitCharacterFunction* emit_function = NULL;	3236 EmitCharacterFunction* emit_function = NULL;

3234 switch (pass) {	3237 switch (pass) {

3235 case NON_LATIN1_MATCH:	3238 case NON_LATIN1_MATCH:

3236 DCHECK(one_byte);	3239 DCHECK(one_byte);

3237 if (quarks[j] > String::kMaxOneByteCharCode) {	3240 if (quarks[j] > String::kMaxOneByteCharCode) {

3238 assembler->GoTo(backtrack);	3241 assembler->GoTo(backtrack);

3239 return;	3242 return;

3240 }	3243 }

3241 break;	3244 break;

3242 case NON_LETTER_CHARACTER_MATCH:	3245 case NON_LETTER_CHARACTER_MATCH:

3243 emit_function = &EmitAtomNonLetter;	3246 emit_function = &EmitAtomNonLetter;

3244 break;	3247 break;

3245 case SIMPLE_CHARACTER_MATCH:	3248 case SIMPLE_CHARACTER_MATCH:

3246 emit_function = &EmitSimpleCharacter;	3249 emit_function = &EmitSimpleCharacter;

3247 break;	3250 break;

3248 case CASE_CHARACTER_MATCH:	3251 case CASE_CHARACTER_MATCH:

3249 emit_function = &EmitAtomLetter;	3252 emit_function = &EmitAtomLetter;

3250 break;	3253 break;

3251 default:	3254 default:

3252 break;	3255 break;

3253 }	3256 }

3254 if (emit_function != NULL) {	3257 if (emit_function != NULL) {

3255 bool bound_checked = emit_function(isolate,	3258 bool bounds_check = *checked_up_to < cp_offset + j \|\| read_backward();

3256 compiler,	3259 bool bound_checked =

3257 quarks[j],	3260 emit_function(isolate, compiler, quarks[j], backtrack,

3258 backtrack,	3261 cp_offset + j, bounds_check, preloaded);

3259 cp_offset + j,

3260 *checked_up_to < cp_offset + j,

3261 preloaded);

3262 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);	3262 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);

3263 }	3263 }

3264 }	3264 }

3265 } else {	3265 } else {

3266 DCHECK_EQ(TextElement::CHAR_CLASS, elm.text_type());	3266 DCHECK_EQ(TextElement::CHAR_CLASS, elm.text_type());

3267 if (pass == CHARACTER_CLASS_MATCH) {	3267 if (pass == CHARACTER_CLASS_MATCH) {

3268 if (first_element_checked && i == 0) continue;	3268 if (first_element_checked && i == 0) continue;

3269 if (DeterminedAlready(quick_check, elm.cp_offset())) continue;	3269 if (DeterminedAlready(quick_check, elm.cp_offset())) continue;

3270 RegExpCharacterClass* cc = elm.char_class();	3270 RegExpCharacterClass* cc = elm.char_class();

	3271 bool bounds_check = *checked_up_to < cp_offset \|\| read_backward();

3271 EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset,	3272 EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset,

3272 *checked_up_to < cp_offset, preloaded, zone());	3273 bounds_check, preloaded, zone());

3273 UpdateBoundsCheck(cp_offset, checked_up_to);	3274 UpdateBoundsCheck(cp_offset, checked_up_to);

3274 }	3275 }

3275 }	3276 }

3276 }	3277 }

3277 }	3278 }

3278	3279

3279	3280

3280 int TextNode::Length() {	3281 int TextNode::Length() {

3281 TextElement elm = elms_->last();	3282 TextElement elm = elements()->last();

3282 DCHECK(elm.cp_offset() >= 0);	3283 DCHECK(elm.cp_offset() >= 0);

3283 return elm.cp_offset() + elm.length();	3284 return elm.cp_offset() + elm.length();

3284 }	3285 }

3285	3286

3286	3287

3287 bool TextNode::SkipPass(int int_pass, bool ignore_case) {	3288 bool TextNode::SkipPass(int int_pass, bool ignore_case) {

3288 TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass);	3289 TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass);

3289 if (ignore_case) {	3290 if (ignore_case) {

3290 return pass == SIMPLE_CHARACTER_MATCH;	3291 return pass == SIMPLE_CHARACTER_MATCH;

3291 } else {	3292 } else {

(...skipping 48 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3340 TextEmitPass(compiler,	3341 TextEmitPass(compiler,

3341 static_cast<TextEmitPassType>(pass),	3342 static_cast<TextEmitPassType>(pass),

3342 false,	3343 false,

3343 trace,	3344 trace,

3344 first_elt_done,	3345 first_elt_done,

3345 &bound_checked_to);	3346 &bound_checked_to);

3346 }	3347 }

3347 }	3348 }

3348	3349

3349 Trace successor_trace(*trace);	3350 Trace successor_trace(*trace);

3350 successor_trace.set_at_start(false);	3351 // If we advance backward, we may end up at the start.

3351 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler);	3352 successor_trace.AdvanceCurrentPositionInTrace(

	3353 read_backward() ? -Length() : Length(), compiler);

	3354 successor_trace.set_at_start(read_backward() ? Trace::UNKNOWN

	3355 : Trace::FALSE_VALUE);

3352 RecursionCheck rc(compiler);	3356 RecursionCheck rc(compiler);

3353 on_success()->Emit(compiler, &successor_trace);	3357 on_success()->Emit(compiler, &successor_trace);

3354 }	3358 }

3355	3359

3356	3360

3357 void Trace::InvalidateCurrentCharacter() {	3361 void Trace::InvalidateCurrentCharacter() {

3358 characters_preloaded_ = 0;	3362 characters_preloaded_ = 0;

3359 }	3363 }

3360	3364

3361	3365

3362 void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {	3366 void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {

3363 DCHECK(by > 0);

3364 // We don't have an instruction for shifting the current character register	3367 // We don't have an instruction for shifting the current character register

3365 // down or for using a shifted value for anything so lets just forget that	3368 // down or for using a shifted value for anything so lets just forget that

3366 // we preloaded any characters into it.	3369 // we preloaded any characters into it.

3367 characters_preloaded_ = 0;	3370 characters_preloaded_ = 0;

3368 // Adjust the offsets of the quick check performed information. This	3371 // Adjust the offsets of the quick check performed information. This

3369 // information is used to find out what we already determined about the	3372 // information is used to find out what we already determined about the

3370 // characters by means of mask and compare.	3373 // characters by means of mask and compare.

3371 quick_check_performed_.Advance(by, compiler->one_byte());	3374 quick_check_performed_.Advance(by, compiler->one_byte());

3372 cp_offset_ += by;	3375 cp_offset_ += by;

3373 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {	3376 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {

3374 compiler->SetRegExpTooBig();	3377 compiler->SetRegExpTooBig();

3375 cp_offset_ = 0;	3378 cp_offset_ = 0;

3376 }	3379 }

3377 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by);	3380 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by);

3378 }	3381 }

3379	3382

3380	3383

3381 void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte) {	3384 void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte) {

3382 int element_count = elms_->length();	3385 int element_count = elements()->length();

3383 for (int i = 0; i < element_count; i++) {	3386 for (int i = 0; i < element_count; i++) {

3384 TextElement elm = elms_->at(i);	3387 TextElement elm = elements()->at(i);

3385 if (elm.text_type() == TextElement::CHAR_CLASS) {	3388 if (elm.text_type() == TextElement::CHAR_CLASS) {

3386 RegExpCharacterClass* cc = elm.char_class();	3389 RegExpCharacterClass* cc = elm.char_class();

3387 // None of the standard character classes is different in the case	3390 // None of the standard character classes is different in the case

3388 // independent case and it slows us down if we don't know that.	3391 // independent case and it slows us down if we don't know that.

3389 if (cc->is_standard(zone())) continue;	3392 if (cc->is_standard(zone())) continue;

3390 ZoneList<CharacterRange>* ranges = cc->ranges(zone());	3393 ZoneList<CharacterRange>* ranges = cc->ranges(zone());

3391 int range_count = ranges->length();	3394 int range_count = ranges->length();

3392 for (int j = 0; j < range_count; j++) {	3395 for (int j = 0; j < range_count; j++) {

3393 ranges->at(j).AddCaseEquivalents(isolate, zone(), ranges, is_one_byte);	3396 ranges->at(j).AddCaseEquivalents(isolate, zone(), ranges, is_one_byte);

3394 }	3397 }

3395 }	3398 }

3396 }	3399 }

3397 }	3400 }

3398	3401

3399	3402

3400 int TextNode::GreedyLoopTextLength() {	3403 int TextNode::GreedyLoopTextLength() {

3401 TextElement elm = elms_->at(elms_->length() - 1);	3404 return read_backward() ? kNodeIsTooComplexForGreedyLoops : Length();
	erikcorry 2015/11/11 15:39:04 This is a case where perhaps we do need a little o This is a case where perhaps we do need a little optimization. If we have a very simple loop like: foo.bar or foo(?:bar)baz then the naïve solution can use backtracking stack proportional to the length of the string. This is why we recognize greedy loops, which should really be called "greedy loops with a simple fixed width body". In that case we just use registers and position counting, instead of pushing a bunch of backtracking commands that all just decrement the current position. With this change you can use memory proportional to the string size if you search for: (?<=foo.)bar as it searches backwards from bar to the start of the string, looking for foo. This will overflow the backtracking stack for big strings. In the Fletch change the greedy loop optimization is taken care of even in backwards mode. See the Irregexp slide show for details. Slides 35 to 57 (skip the interlude in the middle). By the way in Unicode mode, . won't be an optimizable greedy loop in any direction :-/ Yang 2015/11/12 08:25:35 Done. Show quoted text On 2015/11/11 15:39:04, erikcorry wrote: > This is a case where perhaps we do need a little optimization. If we have a > very simple loop like: > > foo.bar > > or > > foo(?:bar)baz > > then the naïve solution can use backtracking stack proportional to the length of > the string. This is why we recognize greedy loops, which should really be > called "greedy loops with a simple fixed width body". In that case we just use > registers and position counting, instead of pushing a bunch of backtracking > commands that all just decrement the current position. > > With this change you can use memory proportional to the string size if you > search for: > > (?<=foo.)bar > > as it searches backwards from bar to the start of the string, looking for foo. > This will overflow the backtracking stack for big strings. > > In the Fletch change the greedy loop optimization is taken care of even in > backwards mode. > > See the Irregexp slide show for details. Slides 35 to 57 (skip the interlude in > the middle). > > By the way in Unicode mode, . won't be an optimizable greedy loop in any > direction :-/ Done.
3402 return elm.cp_offset() + elm.length();

3403 }	3405 }

3404	3406

3405	3407

3406 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(	3408 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(
	erikcorry 2015/11/11 15:39:04 Not quite sure, but this optimization may have to Not quite sure, but this optimization may have to be switched off for backwards. It's basically there to handle the initial skipping forwards-to-find-the-match part, which will always be forwards. You can trigger it in the middle with [^]* though. Yang 2015/11/12 08:25:35 I haven't found a test case where this would becom Show quoted text On 2015/11/11 15:39:04, erikcorry wrote: > Not quite sure, but this optimization may have to be switched off for backwards. > It's basically there to handle the initial skipping forwards-to-find-the-match > part, which will always be forwards. You can trigger it in the middle with [^]* > though. I haven't found a test case where this would become an issue. The caller of this method uses the return value to determine what eats_at_least value to use. But we already return 0 for nodes that read backward. I added a line just to be sure.
3407 RegExpCompiler* compiler) {	3409 RegExpCompiler* compiler) {

3408 if (elms_->length() != 1) return NULL;	3410 if (elements()->length() != 1) return NULL;

3409 TextElement elm = elms_->at(0);	3411 TextElement elm = elements()->at(0);

3410 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL;	3412 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL;

3411 RegExpCharacterClass* node = elm.char_class();	3413 RegExpCharacterClass* node = elm.char_class();

3412 ZoneList<CharacterRange>* ranges = node->ranges(zone());	3414 ZoneList<CharacterRange>* ranges = node->ranges(zone());

3413 if (!CharacterRange::IsCanonical(ranges)) {	3415 if (!CharacterRange::IsCanonical(ranges)) {

3414 CharacterRange::Canonicalize(ranges);	3416 CharacterRange::Canonicalize(ranges);

3415 }	3417 }

3416 if (node->is_negated()) {	3418 if (node->is_negated()) {

3417 return ranges->length() == 0 ? on_success() : NULL;	3419 return ranges->length() == 0 ? on_success() : NULL;

3418 }	3420 }

3419 if (ranges->length() != 1) return NULL;	3421 if (ranges->length() != 1) return NULL;

(...skipping 454 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3874 * V \|S	3876 * V \|S

3875 * Q2 ---> U----->backtrack	3877 * Q2 ---> U----->backtrack

3876 * \| F /	3878 * \| F /

3877 * S\| /	3879 * S\| /

3878 * V F /	3880 * V F /

3879 * S2--/	3881 * S2--/

3880 */	3882 */

3881	3883

3882 GreedyLoopState::GreedyLoopState(bool not_at_start) {	3884 GreedyLoopState::GreedyLoopState(bool not_at_start) {

3883 counter_backtrack_trace_.set_backtrack(&label_);	3885 counter_backtrack_trace_.set_backtrack(&label_);

3884 if (not_at_start) counter_backtrack_trace_.set_at_start(false);	3886 if (not_at_start) counter_backtrack_trace_.set_at_start(Trace::FALSE_VALUE);

3885 }	3887 }

3886	3888

3887	3889

3888 void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) {	3890 void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) {

3889 #ifdef DEBUG	3891 #ifdef DEBUG

3890 int choice_count = alternatives_->length();	3892 int choice_count = alternatives_->length();

3891 for (int i = 0; i < choice_count - 1; i++) {	3893 for (int i = 0; i < choice_count - 1; i++) {

3892 GuardedAlternative alternative = alternatives_->at(i);	3894 GuardedAlternative alternative = alternatives_->at(i);

3893 ZoneList<Guard> guards = alternative.guards();	3895 ZoneList<Guard> guards = alternative.guards();

3894 int guard_count = (guards == NULL) ? 0 : guards->length();	3896 int guard_count = (guards == NULL) ? 0 : guards->length();

(...skipping 106 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4001 // and other simple nodes. These are handled by pushing the current	4003 // and other simple nodes. These are handled by pushing the current

4002 // position on the stack and then incrementing the current position each	4004 // position on the stack and then incrementing the current position each

4003 // time around the switch. On backtrack we decrement the current position	4005 // time around the switch. On backtrack we decrement the current position

4004 // and check it against the pushed value. This avoids pushing backtrack	4006 // and check it against the pushed value. This avoids pushing backtrack

4005 // information for each iteration of the loop, which could take up a lot of	4007 // information for each iteration of the loop, which could take up a lot of

4006 // space.	4008 // space.

4007 DCHECK(trace->stop_node() == NULL);	4009 DCHECK(trace->stop_node() == NULL);

4008 macro_assembler->PushCurrentPosition();	4010 macro_assembler->PushCurrentPosition();

4009 Label greedy_match_failed;	4011 Label greedy_match_failed;

4010 Trace greedy_match_trace;	4012 Trace greedy_match_trace;

4011 if (not_at_start()) greedy_match_trace.set_at_start(false);	4013 if (not_at_start()) greedy_match_trace.set_at_start(Trace::FALSE_VALUE);

4012 greedy_match_trace.set_backtrack(&greedy_match_failed);	4014 greedy_match_trace.set_backtrack(&greedy_match_failed);

4013 Label loop_label;	4015 Label loop_label;

4014 macro_assembler->Bind(&loop_label);	4016 macro_assembler->Bind(&loop_label);

4015 greedy_match_trace.set_stop_node(this);	4017 greedy_match_trace.set_stop_node(this);

4016 greedy_match_trace.set_loop_label(&loop_label);	4018 greedy_match_trace.set_loop_label(&loop_label);

4017 alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);	4019 alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);

4018 macro_assembler->Bind(&greedy_match_failed);	4020 macro_assembler->Bind(&greedy_match_failed);

4019	4021

4020 Label second_choice; // For use in greedy matches.	4022 Label second_choice; // For use in greedy matches.

4021 macro_assembler->Bind(&second_choice);	4023 macro_assembler->Bind(&second_choice);

(...skipping 325 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4347 }	4349 }

4348	4350

4349 LimitResult limit_result = LimitVersions(compiler, trace);	4351 LimitResult limit_result = LimitVersions(compiler, trace);

4350 if (limit_result == DONE) return;	4352 if (limit_result == DONE) return;

4351 DCHECK(limit_result == CONTINUE);	4353 DCHECK(limit_result == CONTINUE);

4352	4354

4353 RecursionCheck rc(compiler);	4355 RecursionCheck rc(compiler);

4354	4356

4355 DCHECK_EQ(start_reg_ + 1, end_reg_);	4357 DCHECK_EQ(start_reg_ + 1, end_reg_);

4356 if (compiler->ignore_case()) {	4358 if (compiler->ignore_case()) {

4357 assembler->CheckNotBackReferenceIgnoreCase(start_reg_,	4359 assembler->CheckNotBackReferenceIgnoreCase(start_reg_, read_backward(),

4358 trace->backtrack());	4360 trace->backtrack());

4359 } else {	4361 } else {

4360 assembler->CheckNotBackReference(start_reg_, trace->backtrack());	4362 assembler->CheckNotBackReference(start_reg_, read_backward(),

	4363 trace->backtrack());

4361 }	4364 }

	4365 // We are going to advance backward, so we may end up at the start.

	4366 if (read_backward()) trace->set_at_start(Trace::UNKNOWN);

4362 on_success()->Emit(compiler, trace);	4367 on_success()->Emit(compiler, trace);

4363 }	4368 }

4364	4369

4365	4370

4366 // -------------------------------------------------------------------	4371 // -------------------------------------------------------------------

4367 // Dot/dotty output	4372 // Dot/dotty output

4368	4373

4369	4374

4370 #ifdef DEBUG	4375 #ifdef DEBUG

4371	4376

(...skipping 340 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4712	4717

4713	4718

4714 // -------------------------------------------------------------------	4719 // -------------------------------------------------------------------

4715 // Tree to graph conversion	4720 // Tree to graph conversion

4716	4721

4717 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,	4722 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,

4718 RegExpNode* on_success) {	4723 RegExpNode* on_success) {

4719 ZoneList<TextElement>* elms =	4724 ZoneList<TextElement>* elms =

4720 new(compiler->zone()) ZoneList<TextElement>(1, compiler->zone());	4725 new(compiler->zone()) ZoneList<TextElement>(1, compiler->zone());

4721 elms->Add(TextElement::Atom(this), compiler->zone());	4726 elms->Add(TextElement::Atom(this), compiler->zone());

4722 return new(compiler->zone()) TextNode(elms, on_success);	4727 return new (compiler->zone()) TextNode(elms, read_backward(), on_success);

4723 }	4728 }

4724	4729

4725	4730

4726 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,	4731 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,

4727 RegExpNode* on_success) {	4732 RegExpNode* on_success) {

4728 return new(compiler->zone()) TextNode(elements(), on_success);	4733 return new (compiler->zone())

	4734 TextNode(elements(), read_backward(), on_success);

4729 }	4735 }

4730	4736

4731	4737

4732 static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,	4738 static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,

4733 const int* special_class,	4739 const int* special_class,

4734 int length) {	4740 int length) {

4735 length--; // Remove final 0x10000.	4741 length--; // Remove final 0x10000.

4736 DCHECK(special_class[length] == 0x10000);	4742 DCHECK(special_class[length] == 0x10000);

4737 DCHECK(ranges->length() != 0);	4743 DCHECK(ranges->length() != 0);

4738 DCHECK(length != 0);	4744 DCHECK(length != 0);

(...skipping 76 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4815 if (CompareInverseRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {	4821 if (CompareInverseRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {

4816 set_.set_standard_set_type('W');	4822 set_.set_standard_set_type('W');

4817 return true;	4823 return true;

4818 }	4824 }

4819 return false;	4825 return false;

4820 }	4826 }

4821	4827

4822	4828

4823 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,	4829 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,

4824 RegExpNode* on_success) {	4830 RegExpNode* on_success) {

4825 return new(compiler->zone()) TextNode(this, on_success);	4831 return new (compiler->zone()) TextNode(this, read_backward(), on_success);

4826 }	4832 }

4827	4833

4828	4834

4829 int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) {	4835 int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) {

4830 RegExpAtom* atom1 = (*a)->AsAtom();	4836 RegExpAtom* atom1 = (*a)->AsAtom();

4831 RegExpAtom* atom2 = (*b)->AsAtom();	4837 RegExpAtom* atom2 = (*b)->AsAtom();

4832 uc16 character1 = atom1->data().at(0);	4838 uc16 character1 = atom1->data().at(0);

4833 uc16 character2 = atom2->data().at(0);	4839 uc16 character2 = atom2->data().at(0);

4834 if (character1 < character2) return -1;	4840 if (character1 < character2) return -1;

4835 if (character1 > character2) return 1;	4841 if (character1 > character2) return 1;

(...skipping 124 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4960 for (int j = 1; j < run_length && prefix_length > 1; j++) {	4966 for (int j = 1; j < run_length && prefix_length > 1; j++) {

4961 RegExpAtom* old_atom =	4967 RegExpAtom* old_atom =

4962 alternatives->at(j + first_with_prefix)->AsAtom();	4968 alternatives->at(j + first_with_prefix)->AsAtom();

4963 for (int k = 1; k < prefix_length; k++) {	4969 for (int k = 1; k < prefix_length; k++) {

4964 if (atom->data().at(k) != old_atom->data().at(k)) {	4970 if (atom->data().at(k) != old_atom->data().at(k)) {

4965 prefix_length = k;	4971 prefix_length = k;

4966 break;	4972 break;

4967 }	4973 }

4968 }	4974 }

4969 }	4975 }

4970 RegExpAtom* prefix =	4976 RegExpAtom* prefix = new (zone) RegExpAtom(

4971 new (zone) RegExpAtom(atom->data().SubVector(0, prefix_length));	4977 atom->data().SubVector(0, prefix_length), read_direction());

4972 ZoneList<RegExpTree> pair = new (zone) ZoneList<RegExpTree*>(2, zone);	4978 ZoneList<RegExpTree> pair = new (zone) ZoneList<RegExpTree*>(2, zone);

4973 pair->Add(prefix, zone);	4979 pair->Add(prefix, zone);

4974 ZoneList<RegExpTree> suffixes =	4980 ZoneList<RegExpTree> suffixes =

4975 new (zone) ZoneList<RegExpTree*>(run_length, zone);	4981 new (zone) ZoneList<RegExpTree*>(run_length, zone);

4976 for (int j = 0; j < run_length; j++) {	4982 for (int j = 0; j < run_length; j++) {

4977 RegExpAtom* old_atom =	4983 RegExpAtom* old_atom =

4978 alternatives->at(j + first_with_prefix)->AsAtom();	4984 alternatives->at(j + first_with_prefix)->AsAtom();

4979 int len = old_atom->length();	4985 int len = old_atom->length();

4980 if (len == prefix_length) {	4986 if (len == prefix_length) {

4981 suffixes->Add(new (zone) RegExpEmpty(), zone);	4987 suffixes->Add(new (zone) RegExpEmpty(), zone);

4982 } else {	4988 } else {

4983 RegExpTree* suffix = new (zone) RegExpAtom(	4989 RegExpTree* suffix = new (zone) RegExpAtom(

4984 old_atom->data().SubVector(prefix_length, old_atom->length()));	4990 old_atom->data().SubVector(prefix_length, old_atom->length()),

	4991 read_direction());

4985 suffixes->Add(suffix, zone);	4992 suffixes->Add(suffix, zone);

4986 }	4993 }

4987 }	4994 }

4988 pair->Add(new (zone) RegExpDisjunction(suffixes), zone);	4995 pair->Add(new (zone) RegExpDisjunction(suffixes, read_direction()), zone);

4989 alternatives->at(write_posn++) = new (zone) RegExpAlternative(pair);	4996 alternatives->at(write_posn++) =

	4997 new (zone) RegExpAlternative(pair, read_direction());

4990 } else {	4998 } else {

4991 // Just copy any non-worthwhile alternatives.	4999 // Just copy any non-worthwhile alternatives.

4992 for (int j = first_with_prefix; j < i; j++) {	5000 for (int j = first_with_prefix; j < i; j++) {

4993 alternatives->at(write_posn++) = alternatives->at(j);	5001 alternatives->at(write_posn++) = alternatives->at(j);

4994 }	5002 }

4995 }	5003 }

4996 }	5004 }

4997 alternatives->Rewind(write_posn); // Trim end of array.	5005 alternatives->Rewind(write_posn); // Trim end of array.

4998 }	5006 }

4999	5007

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5033 // Found non-trivial run of single-character alternatives.	5041 // Found non-trivial run of single-character alternatives.

5034 int run_length = i - first_in_run;	5042 int run_length = i - first_in_run;

5035 ZoneList<CharacterRange>* ranges =	5043 ZoneList<CharacterRange>* ranges =

5036 new (zone) ZoneList<CharacterRange>(2, zone);	5044 new (zone) ZoneList<CharacterRange>(2, zone);

5037 for (int j = 0; j < run_length; j++) {	5045 for (int j = 0; j < run_length; j++) {

5038 RegExpAtom* old_atom = alternatives->at(j + first_in_run)->AsAtom();	5046 RegExpAtom* old_atom = alternatives->at(j + first_in_run)->AsAtom();

5039 DCHECK_EQ(old_atom->length(), 1);	5047 DCHECK_EQ(old_atom->length(), 1);

5040 ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone);	5048 ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone);

5041 }	5049 }

5042 alternatives->at(write_posn++) =	5050 alternatives->at(write_posn++) =

5043 new (zone) RegExpCharacterClass(ranges, false);	5051 new (zone) RegExpCharacterClass(ranges, false, read_direction());

5044 } else {	5052 } else {

5045 // Just copy any trivial alternatives.	5053 // Just copy any trivial alternatives.

5046 for (int j = first_in_run; j < i; j++) {	5054 for (int j = first_in_run; j < i; j++) {

5047 alternatives->at(write_posn++) = alternatives->at(j);	5055 alternatives->at(write_posn++) = alternatives->at(j);

5048 }	5056 }

5049 }	5057 }

5050 }	5058 }

5051 alternatives->Rewind(write_posn); // Trim end of array.	5059 alternatives->Rewind(write_posn); // Trim end of array.

5052 }	5060 }

5053	5061

(...skipping 233 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5287 // lookahead in one side and an end-of-input on the other side.	5295 // lookahead in one side and an end-of-input on the other side.

5288 // We need two registers for the lookahead.	5296 // We need two registers for the lookahead.

5289 int stack_pointer_register = compiler->AllocateRegister();	5297 int stack_pointer_register = compiler->AllocateRegister();

5290 int position_register = compiler->AllocateRegister();	5298 int position_register = compiler->AllocateRegister();

5291 // The ChoiceNode to distinguish between a newline and end-of-input.	5299 // The ChoiceNode to distinguish between a newline and end-of-input.

5292 ChoiceNode* result = new(zone) ChoiceNode(2, zone);	5300 ChoiceNode* result = new(zone) ChoiceNode(2, zone);

5293 // Create a newline atom.	5301 // Create a newline atom.

5294 ZoneList<CharacterRange>* newline_ranges =	5302 ZoneList<CharacterRange>* newline_ranges =

5295 new(zone) ZoneList<CharacterRange>(3, zone);	5303 new(zone) ZoneList<CharacterRange>(3, zone);

5296 CharacterRange::AddClassEscape('n', newline_ranges, zone);	5304 CharacterRange::AddClassEscape('n', newline_ranges, zone);

5297 RegExpCharacterClass* newline_atom = new(zone) RegExpCharacterClass('n');	5305 RegExpCharacterClass* newline_atom =

5298 TextNode* newline_matcher = new(zone) TextNode(	5306 new (zone) RegExpCharacterClass('n', RegExpTree::READ_FORWARD);
	erikcorry 2015/11/11 15:39:04 Yes :-) Do we have a test of $ and ^ inside lookb Yes :-) Do we have a test of $ and ^ inside lookbehind in a /m regexp? Yang 2015/11/12 08:25:34 Added a couple of tests. Show quoted text On 2015/11/11 15:39:04, erikcorry wrote: > Yes :-) > > Do we have a test of $ and ^ inside lookbehind in a /m regexp? Added a couple of tests.
5299 newline_atom,	5307 TextNode* newline_matcher = new (zone) TextNode(

5300 ActionNode::PositiveSubmatchSuccess(stack_pointer_register,	5308 newline_atom, false, ActionNode::PositiveSubmatchSuccess(

5301 position_register,	5309 stack_pointer_register, position_register,

5302 0, // No captures inside.	5310 0, // No captures inside.

5303 -1, // Ignored if no captures.	5311 -1, // Ignored if no captures.

5304 on_success));	5312 on_success));

5305 // Create an end-of-input matcher.	5313 // Create an end-of-input matcher.

5306 RegExpNode* end_of_line = ActionNode::BeginSubmatch(	5314 RegExpNode* end_of_line = ActionNode::BeginSubmatch(

5307 stack_pointer_register,	5315 stack_pointer_register,

5308 position_register,	5316 position_register,

5309 newline_matcher);	5317 newline_matcher);

5310 // Add the two alternatives to the ChoiceNode.	5318 // Add the two alternatives to the ChoiceNode.

5311 GuardedAlternative eol_alternative(end_of_line);	5319 GuardedAlternative eol_alternative(end_of_line);

5312 result->AddAlternative(eol_alternative);	5320 result->AddAlternative(eol_alternative);

5313 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success));	5321 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success));

5314 result->AddAlternative(end_alternative);	5322 result->AddAlternative(end_alternative);

5315 return result;	5323 return result;

5316 }	5324 }

5317 default:	5325 default:

5318 UNREACHABLE();	5326 UNREACHABLE();

5319 }	5327 }

5320 return on_success;	5328 return on_success;

5321 }	5329 }

5322	5330

5323	5331

5324 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,	5332 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,

5325 RegExpNode* on_success) {	5333 RegExpNode* on_success) {

5326 return new(compiler->zone())	5334 return new (compiler->zone()) BackReferenceNode(

5327 BackReferenceNode(RegExpCapture::StartRegister(index()),	5335 RegExpCapture::StartRegister(index()),

5328 RegExpCapture::EndRegister(index()),	5336 RegExpCapture::EndRegister(index()), read_backward(), on_success);

5329 on_success);

5330 }	5337 }

5331	5338

5332	5339

5333 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,	5340 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,

5334 RegExpNode* on_success) {	5341 RegExpNode* on_success) {

5335 return on_success;	5342 return on_success;

5336 }	5343 }

5337	5344

5338	5345

5339 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,	5346 RegExpNode* RegExpLookaround::ToNode(RegExpCompiler* compiler,

5340 RegExpNode* on_success) {	5347 RegExpNode* on_success) {

5341 int stack_pointer_register = compiler->AllocateRegister();	5348 int stack_pointer_register = compiler->AllocateRegister();

5342 int position_register = compiler->AllocateRegister();	5349 int position_register = compiler->AllocateRegister();

5343	5350

5344 const int registers_per_capture = 2;	5351 const int registers_per_capture = 2;

5345 const int register_of_first_capture = 2;	5352 const int register_of_first_capture = 2;

5346 int register_count = capture_count_ * registers_per_capture;	5353 int register_count = capture_count_ * registers_per_capture;

5347 int register_start =	5354 int register_start =

5348 register_of_first_capture + capture_from_ * registers_per_capture;	5355 register_of_first_capture + capture_from_ * registers_per_capture;

5349	5356

5350 RegExpNode* success;

5351 if (is_positive()) {	5357 if (is_positive()) {

5352 RegExpNode* node = ActionNode::BeginSubmatch(	5358 RegExpNode* node = ActionNode::BeginSubmatch(

5353 stack_pointer_register,	5359 stack_pointer_register,

5354 position_register,	5360 position_register,

5355 body()->ToNode(	5361 body()->ToNode(

5356 compiler,	5362 compiler,

5357 ActionNode::PositiveSubmatchSuccess(stack_pointer_register,	5363 ActionNode::PositiveSubmatchSuccess(stack_pointer_register,

5358 position_register,	5364 position_register,

5359 register_count,	5365 register_count,

5360 register_start,	5366 register_start,

5361 on_success)));	5367 on_success)));

5362 return node;	5368 return node;

5363 } else {	5369 } else {

5364 // We use a ChoiceNode for a negative lookahead because it has most of	5370 // We use a ChoiceNode for a negative lookahead because it has most of

5365 // the characteristics we need. It has the body of the lookahead as its	5371 // the characteristics we need. It has the body of the lookahead as its

5366 // first alternative and the expression after the lookahead of the second	5372 // first alternative and the expression after the lookahead of the second

5367 // alternative. If the first alternative succeeds then the	5373 // alternative. If the first alternative succeeds then the

5368 // NegativeSubmatchSuccess will unwind the stack including everything the	5374 // NegativeSubmatchSuccess will unwind the stack including everything the

5369 // choice node set up and backtrack. If the first alternative fails then	5375 // choice node set up and backtrack. If the first alternative fails then

5370 // the second alternative is tried, which is exactly the desired result	5376 // the second alternative is tried, which is exactly the desired result

5371 // for a negative lookahead. The NegativeLookaheadChoiceNode is a special	5377 // for a negative lookahead. The NegativeLookaheadChoiceNode is a special

5372 // ChoiceNode that knows to ignore the first exit when calculating quick	5378 // ChoiceNode that knows to ignore the first exit when calculating quick

5373 // checks.	5379 // checks.

5374 Zone* zone = compiler->zone();	5380 Zone* zone = compiler->zone();

5375	5381

5376 GuardedAlternative body_alt(	5382 GuardedAlternative body_alt(

5377 body()->ToNode(	5383 body()->ToNode(compiler, new (zone) NegativeSubmatchSuccess(

5378 compiler,	5384 stack_pointer_register, position_register,

5379 success = new(zone) NegativeSubmatchSuccess(stack_pointer_register,	5385 register_count, register_start, zone)));

5380 position_register,

5381 register_count,

5382 register_start,

5383 zone)));

5384 ChoiceNode* choice_node =	5386 ChoiceNode* choice_node =

5385 new(zone) NegativeLookaheadChoiceNode(body_alt,	5387 new(zone) NegativeLookaheadChoiceNode(body_alt,

5386 GuardedAlternative(on_success),	5388 GuardedAlternative(on_success),

5387 zone);	5389 zone);

5388 return ActionNode::BeginSubmatch(stack_pointer_register,	5390 return ActionNode::BeginSubmatch(stack_pointer_register,

5389 position_register,	5391 position_register,

5390 choice_node);	5392 choice_node);

5391 }	5393 }

5392 }	5394 }

5393	5395

5394	5396

5395 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,	5397 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,

5396 RegExpNode* on_success) {	5398 RegExpNode* on_success) {

5397 return ToNode(body(), index(), compiler, on_success);	5399 return ToNode(body(), index(), compiler, on_success);

5398 }	5400 }

5399	5401

5400	5402

5401 RegExpNode* RegExpCapture::ToNode(RegExpTree* body,	5403 RegExpNode* RegExpCapture::ToNode(RegExpTree* body,

5402 int index,	5404 int index,

5403 RegExpCompiler* compiler,	5405 RegExpCompiler* compiler,

5404 RegExpNode* on_success) {	5406 RegExpNode* on_success) {

	5407 DCHECK_NOT_NULL(body);

5405 int start_reg = RegExpCapture::StartRegister(index);	5408 int start_reg = RegExpCapture::StartRegister(index);

5406 int end_reg = RegExpCapture::EndRegister(index);	5409 int end_reg = RegExpCapture::EndRegister(index);

	5410 if (body->read_backward()) std::swap(start_reg, end_reg);

5407 RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);	5411 RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);

5408 RegExpNode* body_node = body->ToNode(compiler, store_end);	5412 RegExpNode* body_node = body->ToNode(compiler, store_end);

5409 return ActionNode::StorePosition(start_reg, true, body_node);	5413 return ActionNode::StorePosition(start_reg, true, body_node);

5410 }	5414 }

5411	5415

5412	5416

5413 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,	5417 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,

5414 RegExpNode* on_success) {	5418 RegExpNode* on_success) {

5415 ZoneList<RegExpTree> children = nodes();	5419 ZoneList<RegExpTree> children = nodes();

5416 RegExpNode* current = on_success;	5420 RegExpNode* current = on_success;

5417 for (int i = children->length() - 1; i >= 0; i--) {	5421 if (read_backward()) {

5418 current = children->at(i)->ToNode(compiler, current);	5422 for (int i = 0; i < children->length(); i++) {

	5423 current = children->at(i)->ToNode(compiler, current);

	5424 }

	5425 } else {

	5426 for (int i = children->length() - 1; i >= 0; i--) {

	5427 current = children->at(i)->ToNode(compiler, current);

	5428 }

5419 }	5429 }

5420 return current;	5430 return current;

5421 }	5431 }

5422	5432

5423	5433

5424 static void AddClass(const int* elmv,	5434 static void AddClass(const int* elmv,

5425 int elmc,	5435 int elmc,

5426 ZoneList<CharacterRange>* ranges,	5436 ZoneList<CharacterRange>* ranges,

5427 Zone* zone) {	5437 Zone* zone) {

5428 elmc--;	5438 elmc--;

(...skipping 855 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6284 0,	6294 0,

6285 &compiler,	6295 &compiler,

6286 compiler.accept());	6296 compiler.accept());

6287 RegExpNode* node = captured_body;	6297 RegExpNode* node = captured_body;

6288 bool is_end_anchored = data->tree->IsAnchoredAtEnd();	6298 bool is_end_anchored = data->tree->IsAnchoredAtEnd();

6289 bool is_start_anchored = data->tree->IsAnchoredAtStart();	6299 bool is_start_anchored = data->tree->IsAnchoredAtStart();

6290 int max_length = data->tree->max_match();	6300 int max_length = data->tree->max_match();

6291 if (!is_start_anchored && !is_sticky) {	6301 if (!is_start_anchored && !is_sticky) {

6292 // Add a .*? at the beginning, outside the body capture, unless	6302 // Add a .*? at the beginning, outside the body capture, unless

6293 // this expression is anchored at the beginning or sticky.	6303 // this expression is anchored at the beginning or sticky.

6294 RegExpNode* loop_node =	6304 RegExpNode* loop_node = RegExpQuantifier::ToNode(

6295 RegExpQuantifier::ToNode(0,	6305 0, RegExpTree::kInfinity, false,

6296 RegExpTree::kInfinity,	6306 new (zone) RegExpCharacterClass('*', RegExpTree::READ_FORWARD),

6297 false,	6307 &compiler, captured_body, data->contains_anchor);

6298 new(zone) RegExpCharacterClass('*'),

6299 &compiler,

6300 captured_body,

6301 data->contains_anchor);

6302	6308

6303 if (data->contains_anchor) {	6309 if (data->contains_anchor) {

6304 // Unroll loop once, to take care of the case that might start	6310 // Unroll loop once, to take care of the case that might start

6305 // at the start of input.	6311 // at the start of input.

6306 ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone);	6312 ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone);

6307 first_step_node->AddAlternative(GuardedAlternative(captured_body));	6313 first_step_node->AddAlternative(GuardedAlternative(captured_body));

6308 first_step_node->AddAlternative(GuardedAlternative(	6314 first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode(

6309 new(zone) TextNode(new(zone) RegExpCharacterClass('*'), loop_node)));	6315 new (zone) RegExpCharacterClass('*', RegExpTree::READ_FORWARD), false,

	6316 loop_node)));

6310 node = first_step_node;	6317 node = first_step_node;

6311 } else {	6318 } else {

6312 node = loop_node;	6319 node = loop_node;

6313 }	6320 }

6314 }	6321 }

6315 if (is_one_byte) {	6322 if (is_one_byte) {

6316 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);	6323 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);

6317 // Do it again to propagate the new nodes to places where they were not	6324 // Do it again to propagate the new nodes to places where they were not

6318 // put because they had not been calculated yet.	6325 // put because they had not been calculated yet.

6319 if (node != NULL) {	6326 if (node != NULL) {

(...skipping 183 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6503	6510

6504	6511

6505 void RegExpResultsCache::Clear(FixedArray* cache) {	6512 void RegExpResultsCache::Clear(FixedArray* cache) {

6506 for (int i = 0; i < kRegExpResultsCacheSize; i++) {	6513 for (int i = 0; i < kRegExpResultsCacheSize; i++) {

6507 cache->set(i, Smi::FromInt(0));	6514 cache->set(i, Smi::FromInt(0));

6508 }	6515 }

6509 }	6516 }

6510	6517

6511 } // namespace internal	6518 } // namespace internal

6512 } // namespace v8	6519 } // namespace v8

OLD	NEW

« no previous file with comments | « src/regexp/jsregexp.h ('k') | src/regexp/regexp-macro-assembler.h » ('j') | test/cctest/test-regexp.cc » ('J')