Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: src/regexp/jsregexp.cc

Issue 1451373003: Revert of Experimental support for RegExp lookbehind. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/regexp/jsregexp.h ('k') | src/regexp/mips/regexp-macro-assembler-mips.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/regexp/jsregexp.h" 5 #include "src/regexp/jsregexp.h"
6 6
7 #include "src/ast.h" 7 #include "src/ast.h"
8 #include "src/base/platform/platform.h" 8 #include "src/base/platform/platform.h"
9 #include "src/compilation-cache.h" 9 #include "src/compilation-cache.h"
10 #include "src/compiler.h" 10 #include "src/compiler.h"
(...skipping 984 matching lines...) Expand 10 before | Expand all | Expand 10 after
995 void SetRegExpTooBig() { reg_exp_too_big_ = true; } 995 void SetRegExpTooBig() { reg_exp_too_big_ = true; }
996 996
997 inline bool ignore_case() { return ignore_case_; } 997 inline bool ignore_case() { return ignore_case_; }
998 inline bool one_byte() { return one_byte_; } 998 inline bool one_byte() { return one_byte_; }
999 inline bool optimize() { return optimize_; } 999 inline bool optimize() { return optimize_; }
1000 inline void set_optimize(bool value) { optimize_ = value; } 1000 inline void set_optimize(bool value) { optimize_ = value; }
1001 inline bool limiting_recursion() { return limiting_recursion_; } 1001 inline bool limiting_recursion() { return limiting_recursion_; }
1002 inline void set_limiting_recursion(bool value) { 1002 inline void set_limiting_recursion(bool value) {
1003 limiting_recursion_ = value; 1003 limiting_recursion_ = value;
1004 } 1004 }
1005 bool read_backward() { return read_backward_; }
1006 void set_read_backward(bool value) { read_backward_ = value; }
1007 FrequencyCollator* frequency_collator() { return &frequency_collator_; } 1005 FrequencyCollator* frequency_collator() { return &frequency_collator_; }
1008 1006
1009 int current_expansion_factor() { return current_expansion_factor_; } 1007 int current_expansion_factor() { return current_expansion_factor_; }
1010 void set_current_expansion_factor(int value) { 1008 void set_current_expansion_factor(int value) {
1011 current_expansion_factor_ = value; 1009 current_expansion_factor_ = value;
1012 } 1010 }
1013 1011
1014 Isolate* isolate() const { return isolate_; } 1012 Isolate* isolate() const { return isolate_; }
1015 Zone* zone() const { return zone_; } 1013 Zone* zone() const { return zone_; }
1016 1014
1017 static const int kNoRegister = -1; 1015 static const int kNoRegister = -1;
1018 1016
1019 private: 1017 private:
1020 EndNode* accept_; 1018 EndNode* accept_;
1021 int next_register_; 1019 int next_register_;
1022 List<RegExpNode*>* work_list_; 1020 List<RegExpNode*>* work_list_;
1023 int recursion_depth_; 1021 int recursion_depth_;
1024 RegExpMacroAssembler* macro_assembler_; 1022 RegExpMacroAssembler* macro_assembler_;
1025 bool ignore_case_; 1023 bool ignore_case_;
1026 bool one_byte_; 1024 bool one_byte_;
1027 bool reg_exp_too_big_; 1025 bool reg_exp_too_big_;
1028 bool limiting_recursion_; 1026 bool limiting_recursion_;
1029 bool optimize_; 1027 bool optimize_;
1030 bool read_backward_;
1031 int current_expansion_factor_; 1028 int current_expansion_factor_;
1032 FrequencyCollator frequency_collator_; 1029 FrequencyCollator frequency_collator_;
1033 Isolate* isolate_; 1030 Isolate* isolate_;
1034 Zone* zone_; 1031 Zone* zone_;
1035 }; 1032 };
1036 1033
1037 1034
1038 class RecursionCheck { 1035 class RecursionCheck {
1039 public: 1036 public:
1040 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { 1037 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
(...skipping 15 matching lines...) Expand all
1056 RegExpCompiler::RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count, 1053 RegExpCompiler::RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,
1057 bool ignore_case, bool one_byte) 1054 bool ignore_case, bool one_byte)
1058 : next_register_(2 * (capture_count + 1)), 1055 : next_register_(2 * (capture_count + 1)),
1059 work_list_(NULL), 1056 work_list_(NULL),
1060 recursion_depth_(0), 1057 recursion_depth_(0),
1061 ignore_case_(ignore_case), 1058 ignore_case_(ignore_case),
1062 one_byte_(one_byte), 1059 one_byte_(one_byte),
1063 reg_exp_too_big_(false), 1060 reg_exp_too_big_(false),
1064 limiting_recursion_(false), 1061 limiting_recursion_(false),
1065 optimize_(FLAG_regexp_optimization), 1062 optimize_(FLAG_regexp_optimization),
1066 read_backward_(false),
1067 current_expansion_factor_(1), 1063 current_expansion_factor_(1),
1068 frequency_collator_(), 1064 frequency_collator_(),
1069 isolate_(isolate), 1065 isolate_(isolate),
1070 zone_(zone) { 1066 zone_(zone) {
1071 accept_ = new(zone) EndNode(EndNode::ACCEPT, zone); 1067 accept_ = new(zone) EndNode(EndNode::ACCEPT, zone);
1072 DCHECK(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister); 1068 DCHECK(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister);
1073 } 1069 }
1074 1070
1075 1071
1076 RegExpEngine::CompilationResult RegExpCompiler::Assemble( 1072 RegExpEngine::CompilationResult RegExpCompiler::Assemble(
(...skipping 144 matching lines...) Expand 10 before | Expand all | Expand 10 after
1221 1217
1222 // The chronologically first deferred action in the trace 1218 // The chronologically first deferred action in the trace
1223 // is used to infer the action needed to restore a register 1219 // is used to infer the action needed to restore a register
1224 // to its previous state (or not, if it's safe to ignore it). 1220 // to its previous state (or not, if it's safe to ignore it).
1225 enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR }; 1221 enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR };
1226 DeferredActionUndoType undo_action = IGNORE; 1222 DeferredActionUndoType undo_action = IGNORE;
1227 1223
1228 int value = 0; 1224 int value = 0;
1229 bool absolute = false; 1225 bool absolute = false;
1230 bool clear = false; 1226 bool clear = false;
1231 static const int kNoStore = kMinInt; 1227 int store_position = -1;
1232 int store_position = kNoStore;
1233 // This is a little tricky because we are scanning the actions in reverse 1228 // This is a little tricky because we are scanning the actions in reverse
1234 // historical order (newest first). 1229 // historical order (newest first).
1235 for (DeferredAction* action = actions_; 1230 for (DeferredAction* action = actions_;
1236 action != NULL; 1231 action != NULL;
1237 action = action->next()) { 1232 action = action->next()) {
1238 if (action->Mentions(reg)) { 1233 if (action->Mentions(reg)) {
1239 switch (action->action_type()) { 1234 switch (action->action_type()) {
1240 case ActionNode::SET_REGISTER: { 1235 case ActionNode::SET_REGISTER: {
1241 Trace::DeferredSetRegister* psr = 1236 Trace::DeferredSetRegister* psr =
1242 static_cast<Trace::DeferredSetRegister*>(action); 1237 static_cast<Trace::DeferredSetRegister*>(action);
1243 if (!absolute) { 1238 if (!absolute) {
1244 value += psr->value(); 1239 value += psr->value();
1245 absolute = true; 1240 absolute = true;
1246 } 1241 }
1247 // SET_REGISTER is currently only used for newly introduced loop 1242 // SET_REGISTER is currently only used for newly introduced loop
1248 // counters. They can have a significant previous value if they 1243 // counters. They can have a significant previous value if they
1249 // occour in a loop. TODO(lrn): Propagate this information, so 1244 // occour in a loop. TODO(lrn): Propagate this information, so
1250 // we can set undo_action to IGNORE if we know there is no value to 1245 // we can set undo_action to IGNORE if we know there is no value to
1251 // restore. 1246 // restore.
1252 undo_action = RESTORE; 1247 undo_action = RESTORE;
1253 DCHECK_EQ(store_position, kNoStore); 1248 DCHECK_EQ(store_position, -1);
1254 DCHECK(!clear); 1249 DCHECK(!clear);
1255 break; 1250 break;
1256 } 1251 }
1257 case ActionNode::INCREMENT_REGISTER: 1252 case ActionNode::INCREMENT_REGISTER:
1258 if (!absolute) { 1253 if (!absolute) {
1259 value++; 1254 value++;
1260 } 1255 }
1261 DCHECK_EQ(store_position, kNoStore); 1256 DCHECK_EQ(store_position, -1);
1262 DCHECK(!clear); 1257 DCHECK(!clear);
1263 undo_action = RESTORE; 1258 undo_action = RESTORE;
1264 break; 1259 break;
1265 case ActionNode::STORE_POSITION: { 1260 case ActionNode::STORE_POSITION: {
1266 Trace::DeferredCapture* pc = 1261 Trace::DeferredCapture* pc =
1267 static_cast<Trace::DeferredCapture*>(action); 1262 static_cast<Trace::DeferredCapture*>(action);
1268 if (!clear && store_position == kNoStore) { 1263 if (!clear && store_position == -1) {
1269 store_position = pc->cp_offset(); 1264 store_position = pc->cp_offset();
1270 } 1265 }
1271 1266
1272 // For captures we know that stores and clears alternate. 1267 // For captures we know that stores and clears alternate.
1273 // Other register, are never cleared, and if the occur 1268 // Other register, are never cleared, and if the occur
1274 // inside a loop, they might be assigned more than once. 1269 // inside a loop, they might be assigned more than once.
1275 if (reg <= 1) { 1270 if (reg <= 1) {
1276 // Registers zero and one, aka "capture zero", is 1271 // Registers zero and one, aka "capture zero", is
1277 // always set correctly if we succeed. There is no 1272 // always set correctly if we succeed. There is no
1278 // need to undo a setting on backtrack, because we 1273 // need to undo a setting on backtrack, because we
1279 // will set it again or fail. 1274 // will set it again or fail.
1280 undo_action = IGNORE; 1275 undo_action = IGNORE;
1281 } else { 1276 } else {
1282 undo_action = pc->is_capture() ? CLEAR : RESTORE; 1277 undo_action = pc->is_capture() ? CLEAR : RESTORE;
1283 } 1278 }
1284 DCHECK(!absolute); 1279 DCHECK(!absolute);
1285 DCHECK_EQ(value, 0); 1280 DCHECK_EQ(value, 0);
1286 break; 1281 break;
1287 } 1282 }
1288 case ActionNode::CLEAR_CAPTURES: { 1283 case ActionNode::CLEAR_CAPTURES: {
1289 // Since we're scanning in reverse order, if we've already 1284 // Since we're scanning in reverse order, if we've already
1290 // set the position we have to ignore historically earlier 1285 // set the position we have to ignore historically earlier
1291 // clearing operations. 1286 // clearing operations.
1292 if (store_position == kNoStore) { 1287 if (store_position == -1) {
1293 clear = true; 1288 clear = true;
1294 } 1289 }
1295 undo_action = RESTORE; 1290 undo_action = RESTORE;
1296 DCHECK(!absolute); 1291 DCHECK(!absolute);
1297 DCHECK_EQ(value, 0); 1292 DCHECK_EQ(value, 0);
1298 break; 1293 break;
1299 } 1294 }
1300 default: 1295 default:
1301 UNREACHABLE(); 1296 UNREACHABLE();
1302 break; 1297 break;
(...skipping 10 matching lines...) Expand all
1313 pushes = 0; 1308 pushes = 0;
1314 } 1309 }
1315 1310
1316 assembler->PushRegister(reg, stack_check); 1311 assembler->PushRegister(reg, stack_check);
1317 registers_to_pop->Set(reg, zone); 1312 registers_to_pop->Set(reg, zone);
1318 } else if (undo_action == CLEAR) { 1313 } else if (undo_action == CLEAR) {
1319 registers_to_clear->Set(reg, zone); 1314 registers_to_clear->Set(reg, zone);
1320 } 1315 }
1321 // Perform the chronologically last action (or accumulated increment) 1316 // Perform the chronologically last action (or accumulated increment)
1322 // for the register. 1317 // for the register.
1323 if (store_position != kNoStore) { 1318 if (store_position != -1) {
1324 assembler->WriteCurrentPositionToRegister(reg, store_position); 1319 assembler->WriteCurrentPositionToRegister(reg, store_position);
1325 } else if (clear) { 1320 } else if (clear) {
1326 assembler->ClearRegisters(reg, reg); 1321 assembler->ClearRegisters(reg, reg);
1327 } else if (absolute) { 1322 } else if (absolute) {
1328 assembler->SetRegister(reg, value); 1323 assembler->SetRegister(reg, value);
1329 } else if (value != 0) { 1324 } else if (value != 0) {
1330 assembler->AdvanceRegister(reg, value); 1325 assembler->AdvanceRegister(reg, value);
1331 } 1326 }
1332 } 1327 }
1333 } 1328 }
(...skipping 977 matching lines...) Expand 10 before | Expand all | Expand 10 after
2311 // Match the behaviour of EatsAtLeast on this node. 2306 // Match the behaviour of EatsAtLeast on this node.
2312 if (assertion_type() == AT_START && not_at_start) return; 2307 if (assertion_type() == AT_START && not_at_start) return;
2313 on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); 2308 on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
2314 SaveBMInfo(bm, not_at_start, offset); 2309 SaveBMInfo(bm, not_at_start, offset);
2315 } 2310 }
2316 2311
2317 2312
2318 int BackReferenceNode::EatsAtLeast(int still_to_find, 2313 int BackReferenceNode::EatsAtLeast(int still_to_find,
2319 int budget, 2314 int budget,
2320 bool not_at_start) { 2315 bool not_at_start) {
2321 if (read_backward()) return 0;
2322 if (budget <= 0) return 0; 2316 if (budget <= 0) return 0;
2323 return on_success()->EatsAtLeast(still_to_find, 2317 return on_success()->EatsAtLeast(still_to_find,
2324 budget - 1, 2318 budget - 1,
2325 not_at_start); 2319 not_at_start);
2326 } 2320 }
2327 2321
2328 2322
2329 int TextNode::EatsAtLeast(int still_to_find, 2323 int TextNode::EatsAtLeast(int still_to_find,
2330 int budget, 2324 int budget,
2331 bool not_at_start) { 2325 bool not_at_start) {
2332 if (read_backward()) return 0;
2333 int answer = Length(); 2326 int answer = Length();
2334 if (answer >= still_to_find) return answer; 2327 if (answer >= still_to_find) return answer;
2335 if (budget <= 0) return answer; 2328 if (budget <= 0) return answer;
2336 // We are not at start after this node so we set the last argument to 'true'. 2329 // We are not at start after this node so we set the last argument to 'true'.
2337 return answer + on_success()->EatsAtLeast(still_to_find - answer, 2330 return answer + on_success()->EatsAtLeast(still_to_find - answer,
2338 budget - 1, 2331 budget - 1,
2339 true); 2332 true);
2340 } 2333 }
2341 2334
2342 2335
(...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after
2526 bool not_at_start) { 2519 bool not_at_start) {
2527 Isolate* isolate = compiler->macro_assembler()->isolate(); 2520 Isolate* isolate = compiler->macro_assembler()->isolate();
2528 DCHECK(characters_filled_in < details->characters()); 2521 DCHECK(characters_filled_in < details->characters());
2529 int characters = details->characters(); 2522 int characters = details->characters();
2530 int char_mask; 2523 int char_mask;
2531 if (compiler->one_byte()) { 2524 if (compiler->one_byte()) {
2532 char_mask = String::kMaxOneByteCharCode; 2525 char_mask = String::kMaxOneByteCharCode;
2533 } else { 2526 } else {
2534 char_mask = String::kMaxUtf16CodeUnit; 2527 char_mask = String::kMaxUtf16CodeUnit;
2535 } 2528 }
2536 for (int k = 0; k < elements()->length(); k++) { 2529 for (int k = 0; k < elms_->length(); k++) {
2537 TextElement elm = elements()->at(k); 2530 TextElement elm = elms_->at(k);
2538 if (elm.text_type() == TextElement::ATOM) { 2531 if (elm.text_type() == TextElement::ATOM) {
2539 Vector<const uc16> quarks = elm.atom()->data(); 2532 Vector<const uc16> quarks = elm.atom()->data();
2540 for (int i = 0; i < characters && i < quarks.length(); i++) { 2533 for (int i = 0; i < characters && i < quarks.length(); i++) {
2541 QuickCheckDetails::Position* pos = 2534 QuickCheckDetails::Position* pos =
2542 details->positions(characters_filled_in); 2535 details->positions(characters_filled_in);
2543 uc16 c = quarks[i]; 2536 uc16 c = quarks[i];
2544 if (compiler->ignore_case()) { 2537 if (compiler->ignore_case()) {
2545 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 2538 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
2546 int length = GetCaseIndependentLetters(isolate, c, 2539 int length = GetCaseIndependentLetters(isolate, c,
2547 compiler->one_byte(), chars); 2540 compiler->one_byte(), chars);
(...skipping 130 matching lines...) Expand 10 before | Expand all | Expand 10 after
2678 for (int i = 0; i < characters_; i++) { 2671 for (int i = 0; i < characters_; i++) {
2679 positions_[i].mask = 0; 2672 positions_[i].mask = 0;
2680 positions_[i].value = 0; 2673 positions_[i].value = 0;
2681 positions_[i].determines_perfectly = false; 2674 positions_[i].determines_perfectly = false;
2682 } 2675 }
2683 characters_ = 0; 2676 characters_ = 0;
2684 } 2677 }
2685 2678
2686 2679
2687 void QuickCheckDetails::Advance(int by, bool one_byte) { 2680 void QuickCheckDetails::Advance(int by, bool one_byte) {
2681 DCHECK(by >= 0);
2688 if (by >= characters_) { 2682 if (by >= characters_) {
2689 Clear(); 2683 Clear();
2690 return; 2684 return;
2691 } 2685 }
2692 for (int i = 0; i < characters_ - by; i++) { 2686 for (int i = 0; i < characters_ - by; i++) {
2693 positions_[i] = positions_[by + i]; 2687 positions_[i] = positions_[by + i];
2694 } 2688 }
2695 for (int i = characters_ - by; i < characters_; i++) { 2689 for (int i = characters_ - by; i < characters_; i++) {
2696 positions_[i].mask = 0; 2690 positions_[i].mask = 0;
2697 positions_[i].value = 0; 2691 positions_[i].value = 0;
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
2779 } 2773 }
2780 return false; 2774 return false;
2781 } 2775 }
2782 2776
2783 2777
2784 RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) { 2778 RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) {
2785 if (info()->replacement_calculated) return replacement(); 2779 if (info()->replacement_calculated) return replacement();
2786 if (depth < 0) return this; 2780 if (depth < 0) return this;
2787 DCHECK(!info()->visited); 2781 DCHECK(!info()->visited);
2788 VisitMarker marker(info()); 2782 VisitMarker marker(info());
2789 int element_count = elements()->length(); 2783 int element_count = elms_->length();
2790 for (int i = 0; i < element_count; i++) { 2784 for (int i = 0; i < element_count; i++) {
2791 TextElement elm = elements()->at(i); 2785 TextElement elm = elms_->at(i);
2792 if (elm.text_type() == TextElement::ATOM) { 2786 if (elm.text_type() == TextElement::ATOM) {
2793 Vector<const uc16> quarks = elm.atom()->data(); 2787 Vector<const uc16> quarks = elm.atom()->data();
2794 for (int j = 0; j < quarks.length(); j++) { 2788 for (int j = 0; j < quarks.length(); j++) {
2795 uint16_t c = quarks[j]; 2789 uint16_t c = quarks[j];
2796 if (c <= String::kMaxOneByteCharCode) continue; 2790 if (c <= String::kMaxOneByteCharCode) continue;
2797 if (!ignore_case) return set_replacement(NULL); 2791 if (!ignore_case) return set_replacement(NULL);
2798 // Here, we need to check for characters whose upper and lower cases 2792 // Here, we need to check for characters whose upper and lower cases
2799 // are outside the Latin-1 range. 2793 // are outside the Latin-1 range.
2800 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c); 2794 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c);
2801 // Character is outside Latin-1 completely 2795 // Character is outside Latin-1 completely
(...skipping 343 matching lines...) Expand 10 before | Expand all | Expand 10 after
3145 assembler->GoTo(trace->backtrack()); 3139 assembler->GoTo(trace->backtrack());
3146 assembler->Bind(&ok); 3140 assembler->Bind(&ok);
3147 break; 3141 break;
3148 } 3142 }
3149 case AT_START: { 3143 case AT_START: {
3150 if (trace->at_start() == Trace::FALSE_VALUE) { 3144 if (trace->at_start() == Trace::FALSE_VALUE) {
3151 assembler->GoTo(trace->backtrack()); 3145 assembler->GoTo(trace->backtrack());
3152 return; 3146 return;
3153 } 3147 }
3154 if (trace->at_start() == Trace::UNKNOWN) { 3148 if (trace->at_start() == Trace::UNKNOWN) {
3155 assembler->CheckNotAtStart(trace->cp_offset(), trace->backtrack()); 3149 assembler->CheckNotAtStart(trace->backtrack());
3156 Trace at_start_trace = *trace; 3150 Trace at_start_trace = *trace;
3157 at_start_trace.set_at_start(Trace::TRUE_VALUE); 3151 at_start_trace.set_at_start(true);
3158 on_success()->Emit(compiler, &at_start_trace); 3152 on_success()->Emit(compiler, &at_start_trace);
3159 return; 3153 return;
3160 } 3154 }
3161 } 3155 }
3162 break; 3156 break;
3163 case AFTER_NEWLINE: 3157 case AFTER_NEWLINE:
3164 EmitHat(compiler, on_success(), trace); 3158 EmitHat(compiler, on_success(), trace);
3165 return; 3159 return;
3166 case AT_BOUNDARY: 3160 case AT_BOUNDARY:
3167 case AT_NON_BOUNDARY: { 3161 case AT_NON_BOUNDARY: {
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
3220 TextEmitPassType pass, 3214 TextEmitPassType pass,
3221 bool preloaded, 3215 bool preloaded,
3222 Trace* trace, 3216 Trace* trace,
3223 bool first_element_checked, 3217 bool first_element_checked,
3224 int* checked_up_to) { 3218 int* checked_up_to) {
3225 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 3219 RegExpMacroAssembler* assembler = compiler->macro_assembler();
3226 Isolate* isolate = assembler->isolate(); 3220 Isolate* isolate = assembler->isolate();
3227 bool one_byte = compiler->one_byte(); 3221 bool one_byte = compiler->one_byte();
3228 Label* backtrack = trace->backtrack(); 3222 Label* backtrack = trace->backtrack();
3229 QuickCheckDetails* quick_check = trace->quick_check_performed(); 3223 QuickCheckDetails* quick_check = trace->quick_check_performed();
3230 int element_count = elements()->length(); 3224 int element_count = elms_->length();
3231 int backward_offset = read_backward() ? -Length() : 0;
3232 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { 3225 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
3233 TextElement elm = elements()->at(i); 3226 TextElement elm = elms_->at(i);
3234 int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset; 3227 int cp_offset = trace->cp_offset() + elm.cp_offset();
3235 if (elm.text_type() == TextElement::ATOM) { 3228 if (elm.text_type() == TextElement::ATOM) {
3236 Vector<const uc16> quarks = elm.atom()->data(); 3229 Vector<const uc16> quarks = elm.atom()->data();
3237 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { 3230 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
3238 if (first_element_checked && i == 0 && j == 0) continue; 3231 if (first_element_checked && i == 0 && j == 0) continue;
3239 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue; 3232 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;
3240 EmitCharacterFunction* emit_function = NULL; 3233 EmitCharacterFunction* emit_function = NULL;
3241 switch (pass) { 3234 switch (pass) {
3242 case NON_LATIN1_MATCH: 3235 case NON_LATIN1_MATCH:
3243 DCHECK(one_byte); 3236 DCHECK(one_byte);
3244 if (quarks[j] > String::kMaxOneByteCharCode) { 3237 if (quarks[j] > String::kMaxOneByteCharCode) {
3245 assembler->GoTo(backtrack); 3238 assembler->GoTo(backtrack);
3246 return; 3239 return;
3247 } 3240 }
3248 break; 3241 break;
3249 case NON_LETTER_CHARACTER_MATCH: 3242 case NON_LETTER_CHARACTER_MATCH:
3250 emit_function = &EmitAtomNonLetter; 3243 emit_function = &EmitAtomNonLetter;
3251 break; 3244 break;
3252 case SIMPLE_CHARACTER_MATCH: 3245 case SIMPLE_CHARACTER_MATCH:
3253 emit_function = &EmitSimpleCharacter; 3246 emit_function = &EmitSimpleCharacter;
3254 break; 3247 break;
3255 case CASE_CHARACTER_MATCH: 3248 case CASE_CHARACTER_MATCH:
3256 emit_function = &EmitAtomLetter; 3249 emit_function = &EmitAtomLetter;
3257 break; 3250 break;
3258 default: 3251 default:
3259 break; 3252 break;
3260 } 3253 }
3261 if (emit_function != NULL) { 3254 if (emit_function != NULL) {
3262 bool bounds_check = *checked_up_to < cp_offset + j || read_backward(); 3255 bool bound_checked = emit_function(isolate,
3263 bool bound_checked = 3256 compiler,
3264 emit_function(isolate, compiler, quarks[j], backtrack, 3257 quarks[j],
3265 cp_offset + j, bounds_check, preloaded); 3258 backtrack,
3259 cp_offset + j,
3260 *checked_up_to < cp_offset + j,
3261 preloaded);
3266 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); 3262 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
3267 } 3263 }
3268 } 3264 }
3269 } else { 3265 } else {
3270 DCHECK_EQ(TextElement::CHAR_CLASS, elm.text_type()); 3266 DCHECK_EQ(TextElement::CHAR_CLASS, elm.text_type());
3271 if (pass == CHARACTER_CLASS_MATCH) { 3267 if (pass == CHARACTER_CLASS_MATCH) {
3272 if (first_element_checked && i == 0) continue; 3268 if (first_element_checked && i == 0) continue;
3273 if (DeterminedAlready(quick_check, elm.cp_offset())) continue; 3269 if (DeterminedAlready(quick_check, elm.cp_offset())) continue;
3274 RegExpCharacterClass* cc = elm.char_class(); 3270 RegExpCharacterClass* cc = elm.char_class();
3275 bool bounds_check = *checked_up_to < cp_offset || read_backward();
3276 EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset, 3271 EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset,
3277 bounds_check, preloaded, zone()); 3272 *checked_up_to < cp_offset, preloaded, zone());
3278 UpdateBoundsCheck(cp_offset, checked_up_to); 3273 UpdateBoundsCheck(cp_offset, checked_up_to);
3279 } 3274 }
3280 } 3275 }
3281 } 3276 }
3282 } 3277 }
3283 3278
3284 3279
3285 int TextNode::Length() { 3280 int TextNode::Length() {
3286 TextElement elm = elements()->last(); 3281 TextElement elm = elms_->last();
3287 DCHECK(elm.cp_offset() >= 0); 3282 DCHECK(elm.cp_offset() >= 0);
3288 return elm.cp_offset() + elm.length(); 3283 return elm.cp_offset() + elm.length();
3289 } 3284 }
3290 3285
3291 3286
3292 bool TextNode::SkipPass(int int_pass, bool ignore_case) { 3287 bool TextNode::SkipPass(int int_pass, bool ignore_case) {
3293 TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass); 3288 TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass);
3294 if (ignore_case) { 3289 if (ignore_case) {
3295 return pass == SIMPLE_CHARACTER_MATCH; 3290 return pass == SIMPLE_CHARACTER_MATCH;
3296 } else { 3291 } else {
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
3345 TextEmitPass(compiler, 3340 TextEmitPass(compiler,
3346 static_cast<TextEmitPassType>(pass), 3341 static_cast<TextEmitPassType>(pass),
3347 false, 3342 false,
3348 trace, 3343 trace,
3349 first_elt_done, 3344 first_elt_done,
3350 &bound_checked_to); 3345 &bound_checked_to);
3351 } 3346 }
3352 } 3347 }
3353 3348
3354 Trace successor_trace(*trace); 3349 Trace successor_trace(*trace);
3355 // If we advance backward, we may end up at the start. 3350 successor_trace.set_at_start(false);
3356 successor_trace.AdvanceCurrentPositionInTrace( 3351 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler);
3357 read_backward() ? -Length() : Length(), compiler);
3358 successor_trace.set_at_start(read_backward() ? Trace::UNKNOWN
3359 : Trace::FALSE_VALUE);
3360 RecursionCheck rc(compiler); 3352 RecursionCheck rc(compiler);
3361 on_success()->Emit(compiler, &successor_trace); 3353 on_success()->Emit(compiler, &successor_trace);
3362 } 3354 }
3363 3355
3364 3356
3365 void Trace::InvalidateCurrentCharacter() { 3357 void Trace::InvalidateCurrentCharacter() {
3366 characters_preloaded_ = 0; 3358 characters_preloaded_ = 0;
3367 } 3359 }
3368 3360
3369 3361
3370 void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) { 3362 void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {
3363 DCHECK(by > 0);
3371 // We don't have an instruction for shifting the current character register 3364 // We don't have an instruction for shifting the current character register
3372 // down or for using a shifted value for anything so lets just forget that 3365 // down or for using a shifted value for anything so lets just forget that
3373 // we preloaded any characters into it. 3366 // we preloaded any characters into it.
3374 characters_preloaded_ = 0; 3367 characters_preloaded_ = 0;
3375 // Adjust the offsets of the quick check performed information. This 3368 // Adjust the offsets of the quick check performed information. This
3376 // information is used to find out what we already determined about the 3369 // information is used to find out what we already determined about the
3377 // characters by means of mask and compare. 3370 // characters by means of mask and compare.
3378 quick_check_performed_.Advance(by, compiler->one_byte()); 3371 quick_check_performed_.Advance(by, compiler->one_byte());
3379 cp_offset_ += by; 3372 cp_offset_ += by;
3380 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) { 3373 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {
3381 compiler->SetRegExpTooBig(); 3374 compiler->SetRegExpTooBig();
3382 cp_offset_ = 0; 3375 cp_offset_ = 0;
3383 } 3376 }
3384 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by); 3377 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by);
3385 } 3378 }
3386 3379
3387 3380
3388 void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte) { 3381 void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte) {
3389 int element_count = elements()->length(); 3382 int element_count = elms_->length();
3390 for (int i = 0; i < element_count; i++) { 3383 for (int i = 0; i < element_count; i++) {
3391 TextElement elm = elements()->at(i); 3384 TextElement elm = elms_->at(i);
3392 if (elm.text_type() == TextElement::CHAR_CLASS) { 3385 if (elm.text_type() == TextElement::CHAR_CLASS) {
3393 RegExpCharacterClass* cc = elm.char_class(); 3386 RegExpCharacterClass* cc = elm.char_class();
3394 // None of the standard character classes is different in the case 3387 // None of the standard character classes is different in the case
3395 // independent case and it slows us down if we don't know that. 3388 // independent case and it slows us down if we don't know that.
3396 if (cc->is_standard(zone())) continue; 3389 if (cc->is_standard(zone())) continue;
3397 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); 3390 ZoneList<CharacterRange>* ranges = cc->ranges(zone());
3398 int range_count = ranges->length(); 3391 int range_count = ranges->length();
3399 for (int j = 0; j < range_count; j++) { 3392 for (int j = 0; j < range_count; j++) {
3400 ranges->at(j).AddCaseEquivalents(isolate, zone(), ranges, is_one_byte); 3393 ranges->at(j).AddCaseEquivalents(isolate, zone(), ranges, is_one_byte);
3401 } 3394 }
3402 } 3395 }
3403 } 3396 }
3404 } 3397 }
3405 3398
3406 3399
3407 int TextNode::GreedyLoopTextLength() { return Length(); } 3400 int TextNode::GreedyLoopTextLength() {
3401 TextElement elm = elms_->at(elms_->length() - 1);
3402 return elm.cp_offset() + elm.length();
3403 }
3408 3404
3409 3405
3410 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode( 3406 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(
3411 RegExpCompiler* compiler) { 3407 RegExpCompiler* compiler) {
3412 if (read_backward()) return NULL; 3408 if (elms_->length() != 1) return NULL;
3413 if (elements()->length() != 1) return NULL; 3409 TextElement elm = elms_->at(0);
3414 TextElement elm = elements()->at(0);
3415 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL; 3410 if (elm.text_type() != TextElement::CHAR_CLASS) return NULL;
3416 RegExpCharacterClass* node = elm.char_class(); 3411 RegExpCharacterClass* node = elm.char_class();
3417 ZoneList<CharacterRange>* ranges = node->ranges(zone()); 3412 ZoneList<CharacterRange>* ranges = node->ranges(zone());
3418 if (!CharacterRange::IsCanonical(ranges)) { 3413 if (!CharacterRange::IsCanonical(ranges)) {
3419 CharacterRange::Canonicalize(ranges); 3414 CharacterRange::Canonicalize(ranges);
3420 } 3415 }
3421 if (node->is_negated()) { 3416 if (node->is_negated()) {
3422 return ranges->length() == 0 ? on_success() : NULL; 3417 return ranges->length() == 0 ? on_success() : NULL;
3423 } 3418 }
3424 if (ranges->length() != 1) return NULL; 3419 if (ranges->length() != 1) return NULL;
(...skipping 23 matching lines...) Expand all
3448 return kNodeIsTooComplexForGreedyLoops; 3443 return kNodeIsTooComplexForGreedyLoops;
3449 } 3444 }
3450 int node_length = node->GreedyLoopTextLength(); 3445 int node_length = node->GreedyLoopTextLength();
3451 if (node_length == kNodeIsTooComplexForGreedyLoops) { 3446 if (node_length == kNodeIsTooComplexForGreedyLoops) {
3452 return kNodeIsTooComplexForGreedyLoops; 3447 return kNodeIsTooComplexForGreedyLoops;
3453 } 3448 }
3454 length += node_length; 3449 length += node_length;
3455 SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node); 3450 SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node);
3456 node = seq_node->on_success(); 3451 node = seq_node->on_success();
3457 } 3452 }
3458 return read_backward() ? -length : length; 3453 return length;
3459 } 3454 }
3460 3455
3461 3456
3462 void LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) { 3457 void LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) {
3463 DCHECK_NULL(loop_node_); 3458 DCHECK_NULL(loop_node_);
3464 AddAlternative(alt); 3459 AddAlternative(alt);
3465 loop_node_ = alt.node(); 3460 loop_node_ = alt.node();
3466 } 3461 }
3467 3462
3468 3463
(...skipping 410 matching lines...) Expand 10 before | Expand all | Expand 10 after
3879 * V |S 3874 * V |S
3880 * Q2 ---> U----->backtrack 3875 * Q2 ---> U----->backtrack
3881 * | F / 3876 * | F /
3882 * S| / 3877 * S| /
3883 * V F / 3878 * V F /
3884 * S2--/ 3879 * S2--/
3885 */ 3880 */
3886 3881
3887 GreedyLoopState::GreedyLoopState(bool not_at_start) { 3882 GreedyLoopState::GreedyLoopState(bool not_at_start) {
3888 counter_backtrack_trace_.set_backtrack(&label_); 3883 counter_backtrack_trace_.set_backtrack(&label_);
3889 if (not_at_start) counter_backtrack_trace_.set_at_start(Trace::FALSE_VALUE); 3884 if (not_at_start) counter_backtrack_trace_.set_at_start(false);
3890 } 3885 }
3891 3886
3892 3887
3893 void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) { 3888 void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) {
3894 #ifdef DEBUG 3889 #ifdef DEBUG
3895 int choice_count = alternatives_->length(); 3890 int choice_count = alternatives_->length();
3896 for (int i = 0; i < choice_count - 1; i++) { 3891 for (int i = 0; i < choice_count - 1; i++) {
3897 GuardedAlternative alternative = alternatives_->at(i); 3892 GuardedAlternative alternative = alternatives_->at(i);
3898 ZoneList<Guard*>* guards = alternative.guards(); 3893 ZoneList<Guard*>* guards = alternative.guards();
3899 int guard_count = (guards == NULL) ? 0 : guards->length(); 3894 int guard_count = (guards == NULL) ? 0 : guards->length();
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after
4006 // and other simple nodes. These are handled by pushing the current 4001 // and other simple nodes. These are handled by pushing the current
4007 // position on the stack and then incrementing the current position each 4002 // position on the stack and then incrementing the current position each
4008 // time around the switch. On backtrack we decrement the current position 4003 // time around the switch. On backtrack we decrement the current position
4009 // and check it against the pushed value. This avoids pushing backtrack 4004 // and check it against the pushed value. This avoids pushing backtrack
4010 // information for each iteration of the loop, which could take up a lot of 4005 // information for each iteration of the loop, which could take up a lot of
4011 // space. 4006 // space.
4012 DCHECK(trace->stop_node() == NULL); 4007 DCHECK(trace->stop_node() == NULL);
4013 macro_assembler->PushCurrentPosition(); 4008 macro_assembler->PushCurrentPosition();
4014 Label greedy_match_failed; 4009 Label greedy_match_failed;
4015 Trace greedy_match_trace; 4010 Trace greedy_match_trace;
4016 if (not_at_start()) greedy_match_trace.set_at_start(Trace::FALSE_VALUE); 4011 if (not_at_start()) greedy_match_trace.set_at_start(false);
4017 greedy_match_trace.set_backtrack(&greedy_match_failed); 4012 greedy_match_trace.set_backtrack(&greedy_match_failed);
4018 Label loop_label; 4013 Label loop_label;
4019 macro_assembler->Bind(&loop_label); 4014 macro_assembler->Bind(&loop_label);
4020 greedy_match_trace.set_stop_node(this); 4015 greedy_match_trace.set_stop_node(this);
4021 greedy_match_trace.set_loop_label(&loop_label); 4016 greedy_match_trace.set_loop_label(&loop_label);
4022 alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace); 4017 alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);
4023 macro_assembler->Bind(&greedy_match_failed); 4018 macro_assembler->Bind(&greedy_match_failed);
4024 4019
4025 Label second_choice; // For use in greedy matches. 4020 Label second_choice; // For use in greedy matches.
4026 macro_assembler->Bind(&second_choice); 4021 macro_assembler->Bind(&second_choice);
(...skipping 325 matching lines...) Expand 10 before | Expand all | Expand 10 after
4352 } 4347 }
4353 4348
4354 LimitResult limit_result = LimitVersions(compiler, trace); 4349 LimitResult limit_result = LimitVersions(compiler, trace);
4355 if (limit_result == DONE) return; 4350 if (limit_result == DONE) return;
4356 DCHECK(limit_result == CONTINUE); 4351 DCHECK(limit_result == CONTINUE);
4357 4352
4358 RecursionCheck rc(compiler); 4353 RecursionCheck rc(compiler);
4359 4354
4360 DCHECK_EQ(start_reg_ + 1, end_reg_); 4355 DCHECK_EQ(start_reg_ + 1, end_reg_);
4361 if (compiler->ignore_case()) { 4356 if (compiler->ignore_case()) {
4362 assembler->CheckNotBackReferenceIgnoreCase(start_reg_, read_backward(), 4357 assembler->CheckNotBackReferenceIgnoreCase(start_reg_,
4363 trace->backtrack()); 4358 trace->backtrack());
4364 } else { 4359 } else {
4365 assembler->CheckNotBackReference(start_reg_, read_backward(), 4360 assembler->CheckNotBackReference(start_reg_, trace->backtrack());
4366 trace->backtrack());
4367 } 4361 }
4368 // We are going to advance backward, so we may end up at the start.
4369 if (read_backward()) trace->set_at_start(Trace::UNKNOWN);
4370 on_success()->Emit(compiler, trace); 4362 on_success()->Emit(compiler, trace);
4371 } 4363 }
4372 4364
4373 4365
4374 // ------------------------------------------------------------------- 4366 // -------------------------------------------------------------------
4375 // Dot/dotty output 4367 // Dot/dotty output
4376 4368
4377 4369
4378 #ifdef DEBUG 4370 #ifdef DEBUG
4379 4371
(...skipping 340 matching lines...) Expand 10 before | Expand all | Expand 10 after
4720 4712
4721 4713
4722 // ------------------------------------------------------------------- 4714 // -------------------------------------------------------------------
4723 // Tree to graph conversion 4715 // Tree to graph conversion
4724 4716
4725 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, 4717 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,
4726 RegExpNode* on_success) { 4718 RegExpNode* on_success) {
4727 ZoneList<TextElement>* elms = 4719 ZoneList<TextElement>* elms =
4728 new(compiler->zone()) ZoneList<TextElement>(1, compiler->zone()); 4720 new(compiler->zone()) ZoneList<TextElement>(1, compiler->zone());
4729 elms->Add(TextElement::Atom(this), compiler->zone()); 4721 elms->Add(TextElement::Atom(this), compiler->zone());
4730 return new (compiler->zone()) 4722 return new(compiler->zone()) TextNode(elms, on_success);
4731 TextNode(elms, compiler->read_backward(), on_success);
4732 } 4723 }
4733 4724
4734 4725
4735 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, 4726 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,
4736 RegExpNode* on_success) { 4727 RegExpNode* on_success) {
4737 return new (compiler->zone()) 4728 return new(compiler->zone()) TextNode(elements(), on_success);
4738 TextNode(elements(), compiler->read_backward(), on_success);
4739 } 4729 }
4740 4730
4741 4731
4742 static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges, 4732 static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,
4743 const int* special_class, 4733 const int* special_class,
4744 int length) { 4734 int length) {
4745 length--; // Remove final 0x10000. 4735 length--; // Remove final 0x10000.
4746 DCHECK(special_class[length] == 0x10000); 4736 DCHECK(special_class[length] == 0x10000);
4747 DCHECK(ranges->length() != 0); 4737 DCHECK(ranges->length() != 0);
4748 DCHECK(length != 0); 4738 DCHECK(length != 0);
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
4825 if (CompareInverseRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) { 4815 if (CompareInverseRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {
4826 set_.set_standard_set_type('W'); 4816 set_.set_standard_set_type('W');
4827 return true; 4817 return true;
4828 } 4818 }
4829 return false; 4819 return false;
4830 } 4820 }
4831 4821
4832 4822
4833 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, 4823 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
4834 RegExpNode* on_success) { 4824 RegExpNode* on_success) {
4835 return new (compiler->zone()) 4825 return new(compiler->zone()) TextNode(this, on_success);
4836 TextNode(this, compiler->read_backward(), on_success);
4837 } 4826 }
4838 4827
4839 4828
4840 int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) { 4829 int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) {
4841 RegExpAtom* atom1 = (*a)->AsAtom(); 4830 RegExpAtom* atom1 = (*a)->AsAtom();
4842 RegExpAtom* atom2 = (*b)->AsAtom(); 4831 RegExpAtom* atom2 = (*b)->AsAtom();
4843 uc16 character1 = atom1->data().at(0); 4832 uc16 character1 = atom1->data().at(0);
4844 uc16 character2 = atom2->data().at(0); 4833 uc16 character2 = atom2->data().at(0);
4845 if (character1 < character2) return -1; 4834 if (character1 < character2) return -1;
4846 if (character1 > character2) return 1; 4835 if (character1 > character2) return 1;
(...skipping 361 matching lines...) Expand 10 before | Expand all | Expand 10 after
5208 if (is_greedy) { 5197 if (is_greedy) {
5209 alternation->AddAlternative( 5198 alternation->AddAlternative(
5210 GuardedAlternative(body->ToNode(compiler, answer))); 5199 GuardedAlternative(body->ToNode(compiler, answer)));
5211 alternation->AddAlternative(GuardedAlternative(on_success)); 5200 alternation->AddAlternative(GuardedAlternative(on_success));
5212 } else { 5201 } else {
5213 alternation->AddAlternative(GuardedAlternative(on_success)); 5202 alternation->AddAlternative(GuardedAlternative(on_success));
5214 alternation->AddAlternative( 5203 alternation->AddAlternative(
5215 GuardedAlternative(body->ToNode(compiler, answer))); 5204 GuardedAlternative(body->ToNode(compiler, answer)));
5216 } 5205 }
5217 answer = alternation; 5206 answer = alternation;
5218 if (not_at_start && !compiler->read_backward()) { 5207 if (not_at_start) alternation->set_not_at_start();
5219 alternation->set_not_at_start();
5220 }
5221 } 5208 }
5222 return answer; 5209 return answer;
5223 } 5210 }
5224 } 5211 }
5225 } 5212 }
5226 bool has_min = min > 0; 5213 bool has_min = min > 0;
5227 bool has_max = max < RegExpTree::kInfinity; 5214 bool has_max = max < RegExpTree::kInfinity;
5228 bool needs_counter = has_min || has_max; 5215 bool needs_counter = has_min || has_max;
5229 int reg_ctr = needs_counter 5216 int reg_ctr = needs_counter
5230 ? compiler->AllocateRegister() 5217 ? compiler->AllocateRegister()
5231 : RegExpCompiler::kNoRegister; 5218 : RegExpCompiler::kNoRegister;
5232 LoopChoiceNode* center = new (zone) 5219 LoopChoiceNode* center = new(zone) LoopChoiceNode(body->min_match() == 0,
5233 LoopChoiceNode(body->min_match() == 0, compiler->read_backward(), zone); 5220 zone);
5234 if (not_at_start && !compiler->read_backward()) center->set_not_at_start(); 5221 if (not_at_start) center->set_not_at_start();
5235 RegExpNode* loop_return = needs_counter 5222 RegExpNode* loop_return = needs_counter
5236 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center)) 5223 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center))
5237 : static_cast<RegExpNode*>(center); 5224 : static_cast<RegExpNode*>(center);
5238 if (body_can_be_empty) { 5225 if (body_can_be_empty) {
5239 // If the body can be empty we need to check if it was and then 5226 // If the body can be empty we need to check if it was and then
5240 // backtrack. 5227 // backtrack.
5241 loop_return = ActionNode::EmptyMatchCheck(body_start_reg, 5228 loop_return = ActionNode::EmptyMatchCheck(body_start_reg,
5242 reg_ctr, 5229 reg_ctr,
5243 min, 5230 min,
5244 loop_return); 5231 loop_return);
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after
5300 // lookahead in one side and an end-of-input on the other side. 5287 // lookahead in one side and an end-of-input on the other side.
5301 // We need two registers for the lookahead. 5288 // We need two registers for the lookahead.
5302 int stack_pointer_register = compiler->AllocateRegister(); 5289 int stack_pointer_register = compiler->AllocateRegister();
5303 int position_register = compiler->AllocateRegister(); 5290 int position_register = compiler->AllocateRegister();
5304 // The ChoiceNode to distinguish between a newline and end-of-input. 5291 // The ChoiceNode to distinguish between a newline and end-of-input.
5305 ChoiceNode* result = new(zone) ChoiceNode(2, zone); 5292 ChoiceNode* result = new(zone) ChoiceNode(2, zone);
5306 // Create a newline atom. 5293 // Create a newline atom.
5307 ZoneList<CharacterRange>* newline_ranges = 5294 ZoneList<CharacterRange>* newline_ranges =
5308 new(zone) ZoneList<CharacterRange>(3, zone); 5295 new(zone) ZoneList<CharacterRange>(3, zone);
5309 CharacterRange::AddClassEscape('n', newline_ranges, zone); 5296 CharacterRange::AddClassEscape('n', newline_ranges, zone);
5310 RegExpCharacterClass* newline_atom = new (zone) RegExpCharacterClass('n'); 5297 RegExpCharacterClass* newline_atom = new(zone) RegExpCharacterClass('n');
5311 TextNode* newline_matcher = new (zone) TextNode( 5298 TextNode* newline_matcher = new(zone) TextNode(
5312 newline_atom, false, ActionNode::PositiveSubmatchSuccess( 5299 newline_atom,
5313 stack_pointer_register, position_register, 5300 ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
5314 0, // No captures inside. 5301 position_register,
5315 -1, // Ignored if no captures. 5302 0, // No captures inside.
5316 on_success)); 5303 -1, // Ignored if no captures.
5304 on_success));
5317 // Create an end-of-input matcher. 5305 // Create an end-of-input matcher.
5318 RegExpNode* end_of_line = ActionNode::BeginSubmatch( 5306 RegExpNode* end_of_line = ActionNode::BeginSubmatch(
5319 stack_pointer_register, 5307 stack_pointer_register,
5320 position_register, 5308 position_register,
5321 newline_matcher); 5309 newline_matcher);
5322 // Add the two alternatives to the ChoiceNode. 5310 // Add the two alternatives to the ChoiceNode.
5323 GuardedAlternative eol_alternative(end_of_line); 5311 GuardedAlternative eol_alternative(end_of_line);
5324 result->AddAlternative(eol_alternative); 5312 result->AddAlternative(eol_alternative);
5325 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success)); 5313 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success));
5326 result->AddAlternative(end_alternative); 5314 result->AddAlternative(end_alternative);
5327 return result; 5315 return result;
5328 } 5316 }
5329 default: 5317 default:
5330 UNREACHABLE(); 5318 UNREACHABLE();
5331 } 5319 }
5332 return on_success; 5320 return on_success;
5333 } 5321 }
5334 5322
5335 5323
5336 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, 5324 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,
5337 RegExpNode* on_success) { 5325 RegExpNode* on_success) {
5338 return new (compiler->zone()) 5326 return new(compiler->zone())
5339 BackReferenceNode(RegExpCapture::StartRegister(index()), 5327 BackReferenceNode(RegExpCapture::StartRegister(index()),
5340 RegExpCapture::EndRegister(index()), 5328 RegExpCapture::EndRegister(index()),
5341 compiler->read_backward(), on_success); 5329 on_success);
5342 } 5330 }
5343 5331
5344 5332
5345 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, 5333 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
5346 RegExpNode* on_success) { 5334 RegExpNode* on_success) {
5347 return on_success; 5335 return on_success;
5348 } 5336 }
5349 5337
5350 5338
5351 RegExpNode* RegExpLookaround::ToNode(RegExpCompiler* compiler, 5339 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,
5352 RegExpNode* on_success) { 5340 RegExpNode* on_success) {
5353 int stack_pointer_register = compiler->AllocateRegister(); 5341 int stack_pointer_register = compiler->AllocateRegister();
5354 int position_register = compiler->AllocateRegister(); 5342 int position_register = compiler->AllocateRegister();
5355 5343
5356 const int registers_per_capture = 2; 5344 const int registers_per_capture = 2;
5357 const int register_of_first_capture = 2; 5345 const int register_of_first_capture = 2;
5358 int register_count = capture_count_ * registers_per_capture; 5346 int register_count = capture_count_ * registers_per_capture;
5359 int register_start = 5347 int register_start =
5360 register_of_first_capture + capture_from_ * registers_per_capture; 5348 register_of_first_capture + capture_from_ * registers_per_capture;
5361 5349
5362 RegExpNode* result; 5350 RegExpNode* success;
5363 bool was_reading_backward = compiler->read_backward();
5364 compiler->set_read_backward(type() == LOOKBEHIND);
5365 if (is_positive()) { 5351 if (is_positive()) {
5366 result = ActionNode::BeginSubmatch( 5352 RegExpNode* node = ActionNode::BeginSubmatch(
5367 stack_pointer_register, position_register, 5353 stack_pointer_register,
5368 body()->ToNode(compiler, 5354 position_register,
5369 ActionNode::PositiveSubmatchSuccess( 5355 body()->ToNode(
5370 stack_pointer_register, position_register, 5356 compiler,
5371 register_count, register_start, on_success))); 5357 ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
5358 position_register,
5359 register_count,
5360 register_start,
5361 on_success)));
5362 return node;
5372 } else { 5363 } else {
5373 // We use a ChoiceNode for a negative lookahead because it has most of 5364 // We use a ChoiceNode for a negative lookahead because it has most of
5374 // the characteristics we need. It has the body of the lookahead as its 5365 // the characteristics we need. It has the body of the lookahead as its
5375 // first alternative and the expression after the lookahead of the second 5366 // first alternative and the expression after the lookahead of the second
5376 // alternative. If the first alternative succeeds then the 5367 // alternative. If the first alternative succeeds then the
5377 // NegativeSubmatchSuccess will unwind the stack including everything the 5368 // NegativeSubmatchSuccess will unwind the stack including everything the
5378 // choice node set up and backtrack. If the first alternative fails then 5369 // choice node set up and backtrack. If the first alternative fails then
5379 // the second alternative is tried, which is exactly the desired result 5370 // the second alternative is tried, which is exactly the desired result
5380 // for a negative lookahead. The NegativeLookaheadChoiceNode is a special 5371 // for a negative lookahead. The NegativeLookaheadChoiceNode is a special
5381 // ChoiceNode that knows to ignore the first exit when calculating quick 5372 // ChoiceNode that knows to ignore the first exit when calculating quick
5382 // checks. 5373 // checks.
5383 Zone* zone = compiler->zone(); 5374 Zone* zone = compiler->zone();
5384 5375
5385 GuardedAlternative body_alt( 5376 GuardedAlternative body_alt(
5386 body()->ToNode(compiler, new (zone) NegativeSubmatchSuccess( 5377 body()->ToNode(
5387 stack_pointer_register, position_register, 5378 compiler,
5388 register_count, register_start, zone))); 5379 success = new(zone) NegativeSubmatchSuccess(stack_pointer_register,
5380 position_register,
5381 register_count,
5382 register_start,
5383 zone)));
5389 ChoiceNode* choice_node = 5384 ChoiceNode* choice_node =
5390 new(zone) NegativeLookaheadChoiceNode(body_alt, 5385 new(zone) NegativeLookaheadChoiceNode(body_alt,
5391 GuardedAlternative(on_success), 5386 GuardedAlternative(on_success),
5392 zone); 5387 zone);
5393 result = ActionNode::BeginSubmatch(stack_pointer_register, 5388 return ActionNode::BeginSubmatch(stack_pointer_register,
5394 position_register, choice_node); 5389 position_register,
5390 choice_node);
5395 } 5391 }
5396 compiler->set_read_backward(was_reading_backward);
5397 return result;
5398 } 5392 }
5399 5393
5400 5394
5401 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, 5395 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,
5402 RegExpNode* on_success) { 5396 RegExpNode* on_success) {
5403 return ToNode(body(), index(), compiler, on_success); 5397 return ToNode(body(), index(), compiler, on_success);
5404 } 5398 }
5405 5399
5406 5400
5407 RegExpNode* RegExpCapture::ToNode(RegExpTree* body, 5401 RegExpNode* RegExpCapture::ToNode(RegExpTree* body,
5408 int index, 5402 int index,
5409 RegExpCompiler* compiler, 5403 RegExpCompiler* compiler,
5410 RegExpNode* on_success) { 5404 RegExpNode* on_success) {
5411 DCHECK_NOT_NULL(body);
5412 int start_reg = RegExpCapture::StartRegister(index); 5405 int start_reg = RegExpCapture::StartRegister(index);
5413 int end_reg = RegExpCapture::EndRegister(index); 5406 int end_reg = RegExpCapture::EndRegister(index);
5414 if (compiler->read_backward()) std::swap(start_reg, end_reg);
5415 RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success); 5407 RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);
5416 RegExpNode* body_node = body->ToNode(compiler, store_end); 5408 RegExpNode* body_node = body->ToNode(compiler, store_end);
5417 return ActionNode::StorePosition(start_reg, true, body_node); 5409 return ActionNode::StorePosition(start_reg, true, body_node);
5418 } 5410 }
5419 5411
5420 5412
5421 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler, 5413 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,
5422 RegExpNode* on_success) { 5414 RegExpNode* on_success) {
5423 ZoneList<RegExpTree*>* children = nodes(); 5415 ZoneList<RegExpTree*>* children = nodes();
5424 RegExpNode* current = on_success; 5416 RegExpNode* current = on_success;
5425 if (compiler->read_backward()) { 5417 for (int i = children->length() - 1; i >= 0; i--) {
5426 for (int i = 0; i < children->length(); i++) { 5418 current = children->at(i)->ToNode(compiler, current);
5427 current = children->at(i)->ToNode(compiler, current);
5428 }
5429 } else {
5430 for (int i = children->length() - 1; i >= 0; i--) {
5431 current = children->at(i)->ToNode(compiler, current);
5432 }
5433 } 5419 }
5434 return current; 5420 return current;
5435 } 5421 }
5436 5422
5437 5423
5438 static void AddClass(const int* elmv, 5424 static void AddClass(const int* elmv,
5439 int elmc, 5425 int elmc,
5440 ZoneList<CharacterRange>* ranges, 5426 ZoneList<CharacterRange>* ranges,
5441 Zone* zone) { 5427 Zone* zone) {
5442 elmc--; 5428 elmc--;
(...skipping 855 matching lines...) Expand 10 before | Expand all | Expand 10 after
6298 0, 6284 0,
6299 &compiler, 6285 &compiler,
6300 compiler.accept()); 6286 compiler.accept());
6301 RegExpNode* node = captured_body; 6287 RegExpNode* node = captured_body;
6302 bool is_end_anchored = data->tree->IsAnchoredAtEnd(); 6288 bool is_end_anchored = data->tree->IsAnchoredAtEnd();
6303 bool is_start_anchored = data->tree->IsAnchoredAtStart(); 6289 bool is_start_anchored = data->tree->IsAnchoredAtStart();
6304 int max_length = data->tree->max_match(); 6290 int max_length = data->tree->max_match();
6305 if (!is_start_anchored && !is_sticky) { 6291 if (!is_start_anchored && !is_sticky) {
6306 // Add a .*? at the beginning, outside the body capture, unless 6292 // Add a .*? at the beginning, outside the body capture, unless
6307 // this expression is anchored at the beginning or sticky. 6293 // this expression is anchored at the beginning or sticky.
6308 RegExpNode* loop_node = RegExpQuantifier::ToNode( 6294 RegExpNode* loop_node =
6309 0, RegExpTree::kInfinity, false, new (zone) RegExpCharacterClass('*'), 6295 RegExpQuantifier::ToNode(0,
6310 &compiler, captured_body, data->contains_anchor); 6296 RegExpTree::kInfinity,
6297 false,
6298 new(zone) RegExpCharacterClass('*'),
6299 &compiler,
6300 captured_body,
6301 data->contains_anchor);
6311 6302
6312 if (data->contains_anchor) { 6303 if (data->contains_anchor) {
6313 // Unroll loop once, to take care of the case that might start 6304 // Unroll loop once, to take care of the case that might start
6314 // at the start of input. 6305 // at the start of input.
6315 ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone); 6306 ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone);
6316 first_step_node->AddAlternative(GuardedAlternative(captured_body)); 6307 first_step_node->AddAlternative(GuardedAlternative(captured_body));
6317 first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode( 6308 first_step_node->AddAlternative(GuardedAlternative(
6318 new (zone) RegExpCharacterClass('*'), false, loop_node))); 6309 new(zone) TextNode(new(zone) RegExpCharacterClass('*'), loop_node)));
6319 node = first_step_node; 6310 node = first_step_node;
6320 } else { 6311 } else {
6321 node = loop_node; 6312 node = loop_node;
6322 } 6313 }
6323 } 6314 }
6324 if (is_one_byte) { 6315 if (is_one_byte) {
6325 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); 6316 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
6326 // Do it again to propagate the new nodes to places where they were not 6317 // Do it again to propagate the new nodes to places where they were not
6327 // put because they had not been calculated yet. 6318 // put because they had not been calculated yet.
6328 if (node != NULL) { 6319 if (node != NULL) {
(...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after
6512 6503
6513 6504
6514 void RegExpResultsCache::Clear(FixedArray* cache) { 6505 void RegExpResultsCache::Clear(FixedArray* cache) {
6515 for (int i = 0; i < kRegExpResultsCacheSize; i++) { 6506 for (int i = 0; i < kRegExpResultsCacheSize; i++) {
6516 cache->set(i, Smi::FromInt(0)); 6507 cache->set(i, Smi::FromInt(0));
6517 } 6508 }
6518 } 6509 }
6519 6510
6520 } // namespace internal 6511 } // namespace internal
6521 } // namespace v8 6512 } // namespace v8
OLDNEW
« no previous file with comments | « src/regexp/jsregexp.h ('k') | src/regexp/mips/regexp-macro-assembler-mips.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698