Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(105)

Side by Side Diff: runtime/vm/regexp.cc

Issue 2481873005: clang-format runtime/vm (Closed)
Patch Set: Merge Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « runtime/vm/regexp.h ('k') | runtime/vm/regexp_assembler.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #include "vm/regexp.h" 5 #include "vm/regexp.h"
6 6
7 #include "vm/dart_entry.h" 7 #include "vm/dart_entry.h"
8 #include "vm/regexp_assembler.h" 8 #include "vm/regexp_assembler.h"
9 #include "vm/regexp_assembler_bytecode.h" 9 #include "vm/regexp_assembler_bytecode.h"
10 #include "vm/regexp_assembler_ir.h" 10 #include "vm/regexp_assembler_ir.h"
(...skipping 252 matching lines...) Expand 10 before | Expand all | Expand 10 after
263 ASSERT((in_character & RegExpMacroAssembler::kTableMask) == in_character); 263 ASSERT((in_character & RegExpMacroAssembler::kTableMask) == in_character);
264 if (total_samples_ < 1) return 1; // Division by zero. 264 if (total_samples_ < 1) return 1; // Division by zero.
265 intptr_t freq_in_per128 = 265 intptr_t freq_in_per128 =
266 (frequencies_[in_character].counter() * 128) / total_samples_; 266 (frequencies_[in_character].counter() * 128) / total_samples_;
267 return freq_in_per128; 267 return freq_in_per128;
268 } 268 }
269 269
270 private: 270 private:
271 class CharacterFrequency { 271 class CharacterFrequency {
272 public: 272 public:
273 CharacterFrequency() : counter_(0), character_(-1) { } 273 CharacterFrequency() : counter_(0), character_(-1) {}
274 explicit CharacterFrequency(intptr_t character) 274 explicit CharacterFrequency(intptr_t character)
275 : counter_(0), character_(character) { } 275 : counter_(0), character_(character) {}
276 276
277 void Increment() { counter_++; } 277 void Increment() { counter_++; }
278 intptr_t counter() { return counter_; } 278 intptr_t counter() { return counter_; }
279 intptr_t character() { return character_; } 279 intptr_t character() { return character_; }
280 280
281 private: 281 private:
282 intptr_t counter_; 282 intptr_t counter_;
283 intptr_t character_; 283 intptr_t character_;
284 284
285 DISALLOW_ALLOCATION(); 285 DISALLOW_ALLOCATION();
286 }; 286 };
287 287
288 288
289 private: 289 private:
290 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize]; 290 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize];
291 intptr_t total_samples_; 291 intptr_t total_samples_;
292 }; 292 };
293 293
294 294
295 class RegExpCompiler : public ValueObject { 295 class RegExpCompiler : public ValueObject {
296 public: 296 public:
297 RegExpCompiler(intptr_t capture_count, 297 RegExpCompiler(intptr_t capture_count, bool ignore_case, bool is_one_byte);
298 bool ignore_case,
299 bool is_one_byte);
300 298
301 intptr_t AllocateRegister() { 299 intptr_t AllocateRegister() { return next_register_++; }
302 return next_register_++;
303 }
304 300
305 RegExpEngine::CompilationResult Assemble( 301 RegExpEngine::CompilationResult Assemble(IRRegExpMacroAssembler* assembler,
306 IRRegExpMacroAssembler* assembler, 302 RegExpNode* start,
307 RegExpNode* start, 303 intptr_t capture_count,
308 intptr_t capture_count, 304 const String& pattern);
309 const String& pattern);
310 305
311 RegExpEngine::CompilationResult Assemble( 306 RegExpEngine::CompilationResult Assemble(
312 BytecodeRegExpMacroAssembler* assembler, 307 BytecodeRegExpMacroAssembler* assembler,
313 RegExpNode* start, 308 RegExpNode* start,
314 intptr_t capture_count, 309 intptr_t capture_count,
315 const String& pattern); 310 const String& pattern);
316 311
317 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } 312 inline void AddWork(RegExpNode* node) { work_list_->Add(node); }
318 313
319 static const intptr_t kImplementationOffset = 0; 314 static const intptr_t kImplementationOffset = 0;
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
357 Zone* zone_; 352 Zone* zone_;
358 }; 353 };
359 354
360 355
361 class RecursionCheck : public ValueObject { 356 class RecursionCheck : public ValueObject {
362 public: 357 public:
363 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { 358 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
364 compiler->IncrementRecursionDepth(); 359 compiler->IncrementRecursionDepth();
365 } 360 }
366 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } 361 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); }
362
367 private: 363 private:
368 RegExpCompiler* compiler_; 364 RegExpCompiler* compiler_;
369 }; 365 };
370 366
371 367
372 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { 368 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() {
373 return RegExpEngine::CompilationResult("RegExp too big"); 369 return RegExpEngine::CompilationResult("RegExp too big");
374 } 370 }
375 371
376 372
377 // Attempts to compile the regexp using an Irregexp code generator. Returns 373 // Attempts to compile the regexp using an Irregexp code generator. Returns
378 // a fixed array or a null handle depending on whether it succeeded. 374 // a fixed array or a null handle depending on whether it succeeded.
379 RegExpCompiler::RegExpCompiler(intptr_t capture_count, 375 RegExpCompiler::RegExpCompiler(intptr_t capture_count,
380 bool ignore_case, 376 bool ignore_case,
381 bool is_one_byte) 377 bool is_one_byte)
382 : next_register_(2 * (capture_count + 1)), 378 : next_register_(2 * (capture_count + 1)),
383 work_list_(NULL), 379 work_list_(NULL),
384 recursion_depth_(0), 380 recursion_depth_(0),
385 ignore_case_(ignore_case), 381 ignore_case_(ignore_case),
386 is_one_byte_(is_one_byte), 382 is_one_byte_(is_one_byte),
387 reg_exp_too_big_(false), 383 reg_exp_too_big_(false),
388 current_expansion_factor_(1), 384 current_expansion_factor_(1),
389 zone_(Thread::Current()->zone()) { 385 zone_(Thread::Current()->zone()) {
390 accept_ = new(Z) EndNode(EndNode::ACCEPT, Z); 386 accept_ = new (Z) EndNode(EndNode::ACCEPT, Z);
391 } 387 }
392 388
393 389
394 RegExpEngine::CompilationResult RegExpCompiler::Assemble( 390 RegExpEngine::CompilationResult RegExpCompiler::Assemble(
395 IRRegExpMacroAssembler* macro_assembler, 391 IRRegExpMacroAssembler* macro_assembler,
396 RegExpNode* start, 392 RegExpNode* start,
397 intptr_t capture_count, 393 intptr_t capture_count,
398 const String& pattern) { 394 const String& pattern) {
399 macro_assembler->set_slow_safe(false /* use_slow_safe_regexp_compiler */); 395 macro_assembler->set_slow_safe(false /* use_slow_safe_regexp_compiler */);
400 macro_assembler_ = macro_assembler; 396 macro_assembler_ = macro_assembler;
401 397
402 ZoneGrowableArray<RegExpNode*> work_list(0); 398 ZoneGrowableArray<RegExpNode*> work_list(0);
403 work_list_ = &work_list; 399 work_list_ = &work_list;
404 BlockLabel fail; 400 BlockLabel fail;
405 macro_assembler_->PushBacktrack(&fail); 401 macro_assembler_->PushBacktrack(&fail);
406 Trace new_trace; 402 Trace new_trace;
407 start->Emit(this, &new_trace); 403 start->Emit(this, &new_trace);
408 macro_assembler_->BindBlock(&fail); 404 macro_assembler_->BindBlock(&fail);
409 macro_assembler_->Fail(); 405 macro_assembler_->Fail();
410 while (!work_list.is_empty()) { 406 while (!work_list.is_empty()) {
411 work_list.RemoveLast()->Emit(this, &new_trace); 407 work_list.RemoveLast()->Emit(this, &new_trace);
412 } 408 }
413 if (reg_exp_too_big_) return IrregexpRegExpTooBig(); 409 if (reg_exp_too_big_) return IrregexpRegExpTooBig();
414 410
415 macro_assembler->GenerateBacktrackBlock(); 411 macro_assembler->GenerateBacktrackBlock();
416 macro_assembler->FinalizeRegistersArray(); 412 macro_assembler->FinalizeRegistersArray();
417 413
418 return RegExpEngine::CompilationResult(macro_assembler->backtrack_goto(), 414 return RegExpEngine::CompilationResult(
419 macro_assembler->graph_entry(), 415 macro_assembler->backtrack_goto(), macro_assembler->graph_entry(),
420 macro_assembler->num_blocks(), 416 macro_assembler->num_blocks(), macro_assembler->num_stack_locals(),
421 macro_assembler->num_stack_locals(), 417 next_register_);
422 next_register_);
423 } 418 }
424 419
425 420
426 RegExpEngine::CompilationResult RegExpCompiler::Assemble( 421 RegExpEngine::CompilationResult RegExpCompiler::Assemble(
427 BytecodeRegExpMacroAssembler* macro_assembler, 422 BytecodeRegExpMacroAssembler* macro_assembler,
428 RegExpNode* start, 423 RegExpNode* start,
429 intptr_t capture_count, 424 intptr_t capture_count,
430 const String& pattern) { 425 const String& pattern) {
431 macro_assembler->set_slow_safe(false /* use_slow_safe_regexp_compiler */); 426 macro_assembler->set_slow_safe(false /* use_slow_safe_regexp_compiler */);
432 macro_assembler_ = macro_assembler; 427 macro_assembler_ = macro_assembler;
(...skipping 20 matching lines...) Expand all
453 if (action_type() == ActionNode::CLEAR_CAPTURES) { 448 if (action_type() == ActionNode::CLEAR_CAPTURES) {
454 Interval range = static_cast<DeferredClearCaptures*>(this)->range(); 449 Interval range = static_cast<DeferredClearCaptures*>(this)->range();
455 return range.Contains(that); 450 return range.Contains(that);
456 } else { 451 } else {
457 return reg() == that; 452 return reg() == that;
458 } 453 }
459 } 454 }
460 455
461 456
462 bool Trace::mentions_reg(intptr_t reg) { 457 bool Trace::mentions_reg(intptr_t reg) {
463 for (DeferredAction* action = actions_; 458 for (DeferredAction* action = actions_; action != NULL;
464 action != NULL;
465 action = action->next()) { 459 action = action->next()) {
466 if (action->Mentions(reg)) 460 if (action->Mentions(reg)) return true;
467 return true;
468 } 461 }
469 return false; 462 return false;
470 } 463 }
471 464
472 465
473 bool Trace::GetStoredPosition(intptr_t reg, intptr_t* cp_offset) { 466 bool Trace::GetStoredPosition(intptr_t reg, intptr_t* cp_offset) {
474 ASSERT(*cp_offset == 0); 467 ASSERT(*cp_offset == 0);
475 for (DeferredAction* action = actions_; 468 for (DeferredAction* action = actions_; action != NULL;
476 action != NULL;
477 action = action->next()) { 469 action = action->next()) {
478 if (action->Mentions(reg)) { 470 if (action->Mentions(reg)) {
479 if (action->action_type() == ActionNode::STORE_POSITION) { 471 if (action->action_type() == ActionNode::STORE_POSITION) {
480 *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset(); 472 *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset();
481 return true; 473 return true;
482 } else { 474 } else {
483 return false; 475 return false;
484 } 476 }
485 } 477 }
486 } 478 }
487 return false; 479 return false;
488 } 480 }
489 481
490 482
491 // This is called as we come into a loop choice node and some other tricky 483 // This is called as we come into a loop choice node and some other tricky
492 // nodes. It normalizes the state of the code generator to ensure we can 484 // nodes. It normalizes the state of the code generator to ensure we can
493 // generate generic code. 485 // generate generic code.
494 intptr_t Trace::FindAffectedRegisters(OutSet* affected_registers, 486 intptr_t Trace::FindAffectedRegisters(OutSet* affected_registers, Zone* zone) {
495 Zone* zone) {
496 intptr_t max_register = RegExpCompiler::kNoRegister; 487 intptr_t max_register = RegExpCompiler::kNoRegister;
497 for (DeferredAction* action = actions_; 488 for (DeferredAction* action = actions_; action != NULL;
498 action != NULL;
499 action = action->next()) { 489 action = action->next()) {
500 if (action->action_type() == ActionNode::CLEAR_CAPTURES) { 490 if (action->action_type() == ActionNode::CLEAR_CAPTURES) {
501 Interval range = static_cast<DeferredClearCaptures*>(action)->range(); 491 Interval range = static_cast<DeferredClearCaptures*>(action)->range();
502 for (intptr_t i = range.from(); i <= range.to(); i++) 492 for (intptr_t i = range.from(); i <= range.to(); i++)
503 affected_registers->Set(i, zone); 493 affected_registers->Set(i, zone);
504 if (range.to() > max_register) max_register = range.to(); 494 if (range.to() > max_register) max_register = range.to();
505 } else { 495 } else {
506 affected_registers->Set(action->reg(), zone); 496 affected_registers->Set(action->reg(), zone);
507 if (action->reg() > max_register) max_register = action->reg(); 497 if (action->reg() > max_register) max_register = action->reg();
508 } 498 }
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
545 // to its previous state (or not, if it's safe to ignore it). 535 // to its previous state (or not, if it's safe to ignore it).
546 enum DeferredActionUndoType { ACTION_IGNORE, ACTION_RESTORE, ACTION_CLEAR }; 536 enum DeferredActionUndoType { ACTION_IGNORE, ACTION_RESTORE, ACTION_CLEAR };
547 DeferredActionUndoType undo_action = ACTION_IGNORE; 537 DeferredActionUndoType undo_action = ACTION_IGNORE;
548 538
549 intptr_t value = 0; 539 intptr_t value = 0;
550 bool absolute = false; 540 bool absolute = false;
551 bool clear = false; 541 bool clear = false;
552 intptr_t store_position = -1; 542 intptr_t store_position = -1;
553 // This is a little tricky because we are scanning the actions in reverse 543 // This is a little tricky because we are scanning the actions in reverse
554 // historical order (newest first). 544 // historical order (newest first).
555 for (DeferredAction* action = actions_; 545 for (DeferredAction* action = actions_; action != NULL;
556 action != NULL;
557 action = action->next()) { 546 action = action->next()) {
558 if (action->Mentions(reg)) { 547 if (action->Mentions(reg)) {
559 switch (action->action_type()) { 548 switch (action->action_type()) {
560 case ActionNode::SET_REGISTER: { 549 case ActionNode::SET_REGISTER: {
561 Trace::DeferredSetRegister* psr = 550 Trace::DeferredSetRegister* psr =
562 static_cast<Trace::DeferredSetRegister*>(action); 551 static_cast<Trace::DeferredSetRegister*>(action);
563 if (!absolute) { 552 if (!absolute) {
564 value += psr->value(); 553 value += psr->value();
565 absolute = true; 554 absolute = true;
566 } 555 }
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after
670 if (backtrack() != NULL) { 659 if (backtrack() != NULL) {
671 // Here we have a concrete backtrack location. These are set up by choice 660 // Here we have a concrete backtrack location. These are set up by choice
672 // nodes and so they indicate that we have a deferred save of the current 661 // nodes and so they indicate that we have a deferred save of the current
673 // position which we may need to emit here. 662 // position which we may need to emit here.
674 assembler->PushCurrentPosition(); 663 assembler->PushCurrentPosition();
675 } 664 }
676 Zone* zone = successor->zone(); 665 Zone* zone = successor->zone();
677 intptr_t max_register = FindAffectedRegisters(&affected_registers, zone); 666 intptr_t max_register = FindAffectedRegisters(&affected_registers, zone);
678 OutSet registers_to_pop; 667 OutSet registers_to_pop;
679 OutSet registers_to_clear; 668 OutSet registers_to_clear;
680 PerformDeferredActions(assembler, 669 PerformDeferredActions(assembler, max_register, affected_registers,
681 max_register, 670 &registers_to_pop, &registers_to_clear, zone);
682 affected_registers,
683 &registers_to_pop,
684 &registers_to_clear,
685 zone);
686 if (cp_offset_ != 0) { 671 if (cp_offset_ != 0) {
687 assembler->AdvanceCurrentPosition(cp_offset_); 672 assembler->AdvanceCurrentPosition(cp_offset_);
688 } 673 }
689 674
690 // Create a new trivial state and generate the node with that. 675 // Create a new trivial state and generate the node with that.
691 BlockLabel undo; 676 BlockLabel undo;
692 assembler->PushBacktrack(&undo); 677 assembler->PushBacktrack(&undo);
693 Trace new_state; 678 Trace new_state;
694 successor->Emit(compiler, &new_state); 679 successor->Emit(compiler, &new_state);
695 680
696 // On backtrack we need to restore state. 681 // On backtrack we need to restore state.
697 assembler->BindBlock(&undo); 682 assembler->BindBlock(&undo);
698 RestoreAffectedRegisters(assembler, 683 RestoreAffectedRegisters(assembler, max_register, registers_to_pop,
699 max_register,
700 registers_to_pop,
701 registers_to_clear); 684 registers_to_clear);
702 if (backtrack() == NULL) { 685 if (backtrack() == NULL) {
703 assembler->Backtrack(); 686 assembler->Backtrack();
704 } else { 687 } else {
705 assembler->PopCurrentPosition(); 688 assembler->PopCurrentPosition();
706 assembler->GoTo(backtrack()); 689 assembler->GoTo(backtrack());
707 } 690 }
708 } 691 }
709 692
710 693
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
753 return; 736 return;
754 case NEGATIVE_SUBMATCH_SUCCESS: 737 case NEGATIVE_SUBMATCH_SUCCESS:
755 // This case is handled in a different virtual method. 738 // This case is handled in a different virtual method.
756 UNREACHABLE(); 739 UNREACHABLE();
757 } 740 }
758 UNIMPLEMENTED(); 741 UNIMPLEMENTED();
759 } 742 }
760 743
761 744
762 void GuardedAlternative::AddGuard(Guard* guard, Zone* zone) { 745 void GuardedAlternative::AddGuard(Guard* guard, Zone* zone) {
763 if (guards_ == NULL) 746 if (guards_ == NULL) guards_ = new (zone) ZoneGrowableArray<Guard*>(1);
764 guards_ = new(zone) ZoneGrowableArray<Guard*>(1);
765 guards_->Add(guard); 747 guards_->Add(guard);
766 } 748 }
767 749
768 750
769 ActionNode* ActionNode::SetRegister(intptr_t reg, 751 ActionNode* ActionNode::SetRegister(intptr_t reg,
770 intptr_t val, 752 intptr_t val,
771 RegExpNode* on_success) { 753 RegExpNode* on_success) {
772 ActionNode* result = 754 ActionNode* result =
773 new(on_success->zone()) ActionNode(SET_REGISTER, on_success); 755 new (on_success->zone()) ActionNode(SET_REGISTER, on_success);
774 result->data_.u_store_register.reg = reg; 756 result->data_.u_store_register.reg = reg;
775 result->data_.u_store_register.value = val; 757 result->data_.u_store_register.value = val;
776 return result; 758 return result;
777 } 759 }
778 760
779 761
780 ActionNode* ActionNode::IncrementRegister(intptr_t reg, 762 ActionNode* ActionNode::IncrementRegister(intptr_t reg,
781 RegExpNode* on_success) { 763 RegExpNode* on_success) {
782 ActionNode* result = 764 ActionNode* result =
783 new(on_success->zone()) ActionNode(INCREMENT_REGISTER, on_success); 765 new (on_success->zone()) ActionNode(INCREMENT_REGISTER, on_success);
784 result->data_.u_increment_register.reg = reg; 766 result->data_.u_increment_register.reg = reg;
785 return result; 767 return result;
786 } 768 }
787 769
788 770
789 ActionNode* ActionNode::StorePosition(intptr_t reg, 771 ActionNode* ActionNode::StorePosition(intptr_t reg,
790 bool is_capture, 772 bool is_capture,
791 RegExpNode* on_success) { 773 RegExpNode* on_success) {
792 ActionNode* result = 774 ActionNode* result =
793 new(on_success->zone()) ActionNode(STORE_POSITION, on_success); 775 new (on_success->zone()) ActionNode(STORE_POSITION, on_success);
794 result->data_.u_position_register.reg = reg; 776 result->data_.u_position_register.reg = reg;
795 result->data_.u_position_register.is_capture = is_capture; 777 result->data_.u_position_register.is_capture = is_capture;
796 return result; 778 return result;
797 } 779 }
798 780
799 781
800 ActionNode* ActionNode::ClearCaptures(Interval range, 782 ActionNode* ActionNode::ClearCaptures(Interval range, RegExpNode* on_success) {
801 RegExpNode* on_success) {
802 ActionNode* result = 783 ActionNode* result =
803 new(on_success->zone()) ActionNode(CLEAR_CAPTURES, on_success); 784 new (on_success->zone()) ActionNode(CLEAR_CAPTURES, on_success);
804 result->data_.u_clear_captures.range_from = range.from(); 785 result->data_.u_clear_captures.range_from = range.from();
805 result->data_.u_clear_captures.range_to = range.to(); 786 result->data_.u_clear_captures.range_to = range.to();
806 return result; 787 return result;
807 } 788 }
808 789
809 790
810 ActionNode* ActionNode::BeginSubmatch(intptr_t stack_reg, 791 ActionNode* ActionNode::BeginSubmatch(intptr_t stack_reg,
811 intptr_t position_reg, 792 intptr_t position_reg,
812 RegExpNode* on_success) { 793 RegExpNode* on_success) {
813 ActionNode* result = 794 ActionNode* result =
814 new(on_success->zone()) ActionNode(BEGIN_SUBMATCH, on_success); 795 new (on_success->zone()) ActionNode(BEGIN_SUBMATCH, on_success);
815 result->data_.u_submatch.stack_pointer_register = stack_reg; 796 result->data_.u_submatch.stack_pointer_register = stack_reg;
816 result->data_.u_submatch.current_position_register = position_reg; 797 result->data_.u_submatch.current_position_register = position_reg;
817 return result; 798 return result;
818 } 799 }
819 800
820 801
821 ActionNode* ActionNode::PositiveSubmatchSuccess(intptr_t stack_reg, 802 ActionNode* ActionNode::PositiveSubmatchSuccess(intptr_t stack_reg,
822 intptr_t position_reg, 803 intptr_t position_reg,
823 intptr_t clear_register_count, 804 intptr_t clear_register_count,
824 intptr_t clear_register_from, 805 intptr_t clear_register_from,
825 RegExpNode* on_success) { 806 RegExpNode* on_success) {
826 ActionNode* result = 807 ActionNode* result = new (on_success->zone())
827 new(on_success->zone()) ActionNode(POSITIVE_SUBMATCH_SUCCESS, 808 ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success);
828 on_success);
829 result->data_.u_submatch.stack_pointer_register = stack_reg; 809 result->data_.u_submatch.stack_pointer_register = stack_reg;
830 result->data_.u_submatch.current_position_register = position_reg; 810 result->data_.u_submatch.current_position_register = position_reg;
831 result->data_.u_submatch.clear_register_count = clear_register_count; 811 result->data_.u_submatch.clear_register_count = clear_register_count;
832 result->data_.u_submatch.clear_register_from = clear_register_from; 812 result->data_.u_submatch.clear_register_from = clear_register_from;
833 return result; 813 return result;
834 } 814 }
835 815
836 816
837 ActionNode* ActionNode::EmptyMatchCheck(intptr_t start_register, 817 ActionNode* ActionNode::EmptyMatchCheck(intptr_t start_register,
838 intptr_t repetition_register, 818 intptr_t repetition_register,
839 intptr_t repetition_limit, 819 intptr_t repetition_limit,
840 RegExpNode* on_success) { 820 RegExpNode* on_success) {
841 ActionNode* result = 821 ActionNode* result =
842 new(on_success->zone()) ActionNode(EMPTY_MATCH_CHECK, on_success); 822 new (on_success->zone()) ActionNode(EMPTY_MATCH_CHECK, on_success);
843 result->data_.u_empty_match_check.start_register = start_register; 823 result->data_.u_empty_match_check.start_register = start_register;
844 result->data_.u_empty_match_check.repetition_register = repetition_register; 824 result->data_.u_empty_match_check.repetition_register = repetition_register;
845 result->data_.u_empty_match_check.repetition_limit = repetition_limit; 825 result->data_.u_empty_match_check.repetition_limit = repetition_limit;
846 return result; 826 return result;
847 } 827 }
848 828
849 829
850 #define DEFINE_ACCEPT(Type) \ 830 #define DEFINE_ACCEPT(Type) \
851 void Type##Node::Accept(NodeVisitor* visitor) { \ 831 void Type##Node::Accept(NodeVisitor* visitor) { visitor->Visit##Type(this); }
852 visitor->Visit##Type(this); \
853 }
854 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT) 832 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT)
855 #undef DEFINE_ACCEPT 833 #undef DEFINE_ACCEPT
856 834
857 835
858 void LoopChoiceNode::Accept(NodeVisitor* visitor) { 836 void LoopChoiceNode::Accept(NodeVisitor* visitor) {
859 visitor->VisitLoopChoice(this); 837 visitor->VisitLoopChoice(this);
860 } 838 }
861 839
862 840
863 // ------------------------------------------------------------------- 841 // -------------------------------------------------------------------
864 // Emit code. 842 // Emit code.
865 843
866 844
867 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler, 845 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,
868 Guard* guard, 846 Guard* guard,
869 Trace* trace) { 847 Trace* trace) {
870 switch (guard->op()) { 848 switch (guard->op()) {
871 case Guard::LT: 849 case Guard::LT:
872 ASSERT(!trace->mentions_reg(guard->reg())); 850 ASSERT(!trace->mentions_reg(guard->reg()));
873 macro_assembler->IfRegisterGE(guard->reg(), 851 macro_assembler->IfRegisterGE(guard->reg(), guard->value(),
874 guard->value(),
875 trace->backtrack()); 852 trace->backtrack());
876 break; 853 break;
877 case Guard::GEQ: 854 case Guard::GEQ:
878 ASSERT(!trace->mentions_reg(guard->reg())); 855 ASSERT(!trace->mentions_reg(guard->reg()));
879 macro_assembler->IfRegisterLT(guard->reg(), 856 macro_assembler->IfRegisterLT(guard->reg(), guard->value(),
880 guard->value(),
881 trace->backtrack()); 857 trace->backtrack());
882 break; 858 break;
883 } 859 }
884 } 860 }
885 861
886 862
887 // Returns the number of characters in the equivalence class, omitting those 863 // Returns the number of characters in the equivalence class, omitting those
888 // that cannot occur in the source string because it is ASCII. 864 // that cannot occur in the source string because it is ASCII.
889 static intptr_t GetCaseIndependentLetters(uint16_t character, 865 static intptr_t GetCaseIndependentLetters(uint16_t character,
890 bool one_byte_subject, 866 bool one_byte_subject,
(...skipping 21 matching lines...) Expand all
912 static inline bool EmitSimpleCharacter(Zone* zone, 888 static inline bool EmitSimpleCharacter(Zone* zone,
913 RegExpCompiler* compiler, 889 RegExpCompiler* compiler,
914 uint16_t c, 890 uint16_t c,
915 BlockLabel* on_failure, 891 BlockLabel* on_failure,
916 intptr_t cp_offset, 892 intptr_t cp_offset,
917 bool check, 893 bool check,
918 bool preloaded) { 894 bool preloaded) {
919 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 895 RegExpMacroAssembler* assembler = compiler->macro_assembler();
920 bool bound_checked = false; 896 bool bound_checked = false;
921 if (!preloaded) { 897 if (!preloaded) {
922 assembler->LoadCurrentCharacter( 898 assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
923 cp_offset,
924 on_failure,
925 check);
926 bound_checked = true; 899 bound_checked = true;
927 } 900 }
928 assembler->CheckNotCharacter(c, on_failure); 901 assembler->CheckNotCharacter(c, on_failure);
929 return bound_checked; 902 return bound_checked;
930 } 903 }
931 904
932 905
933 // Only emits non-letters (things that don't have case). Only used for case 906 // Only emits non-letters (things that don't have case). Only used for case
934 // independent matches. 907 // independent matches.
935 static inline bool EmitAtomNonLetter(Zone* zone, 908 static inline bool EmitAtomNonLetter(Zone* zone,
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
988 return true; 961 return true;
989 } 962 }
990 ASSERT(c2 > c1); 963 ASSERT(c2 > c1);
991 uint16_t diff = c2 - c1; 964 uint16_t diff = c2 - c1;
992 if (((diff - 1) & diff) == 0 && c1 >= diff) { 965 if (((diff - 1) & diff) == 0 && c1 >= diff) {
993 // If the characters differ by 2^n but don't differ by one bit then 966 // If the characters differ by 2^n but don't differ by one bit then
994 // subtract the difference from the found character, then do the or 967 // subtract the difference from the found character, then do the or
995 // trick. We avoid the theoretical case where negative numbers are 968 // trick. We avoid the theoretical case where negative numbers are
996 // involved in order to simplify code generation. 969 // involved in order to simplify code generation.
997 uint16_t mask = char_mask ^ diff; 970 uint16_t mask = char_mask ^ diff;
998 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, 971 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, diff, mask,
999 diff,
1000 mask,
1001 on_failure); 972 on_failure);
1002 return true; 973 return true;
1003 } 974 }
1004 return false; 975 return false;
1005 } 976 }
1006 977
1007 978
1008 typedef bool EmitCharacterFunction(Zone* zone, 979 typedef bool EmitCharacterFunction(Zone* zone,
1009 RegExpCompiler* compiler, 980 RegExpCompiler* compiler,
1010 uint16_t c, 981 uint16_t c,
(...skipping 18 matching lines...) Expand all
1029 if (length <= 1) return false; 1000 if (length <= 1) return false;
1030 // We may not need to check against the end of the input string 1001 // We may not need to check against the end of the input string
1031 // if this character lies before a character that matched. 1002 // if this character lies before a character that matched.
1032 if (!preloaded) { 1003 if (!preloaded) {
1033 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); 1004 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
1034 } 1005 }
1035 BlockLabel ok; 1006 BlockLabel ok;
1036 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); 1007 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);
1037 switch (length) { 1008 switch (length) {
1038 case 2: { 1009 case 2: {
1039 if (ShortCutEmitCharacterPair(macro_assembler, 1010 if (ShortCutEmitCharacterPair(macro_assembler, one_byte, chars[0],
1040 one_byte, 1011 chars[1], on_failure)) {
1041 chars[0],
1042 chars[1],
1043 on_failure)) {
1044 } else { 1012 } else {
1045 macro_assembler->CheckCharacter(chars[0], &ok); 1013 macro_assembler->CheckCharacter(chars[0], &ok);
1046 macro_assembler->CheckNotCharacter(chars[1], on_failure); 1014 macro_assembler->CheckNotCharacter(chars[1], on_failure);
1047 macro_assembler->BindBlock(&ok); 1015 macro_assembler->BindBlock(&ok);
1048 } 1016 }
1049 break; 1017 break;
1050 } 1018 }
1051 case 4: 1019 case 4:
1052 macro_assembler->CheckCharacter(chars[3], &ok); 1020 macro_assembler->CheckCharacter(chars[3], &ok);
1053 // Fall through! 1021 // Fall through!
1054 case 3: 1022 case 3:
1055 macro_assembler->CheckCharacter(chars[0], &ok); 1023 macro_assembler->CheckCharacter(chars[0], &ok);
1056 macro_assembler->CheckCharacter(chars[1], &ok); 1024 macro_assembler->CheckCharacter(chars[1], &ok);
1057 macro_assembler->CheckNotCharacter(chars[2], on_failure); 1025 macro_assembler->CheckNotCharacter(chars[2], on_failure);
1058 macro_assembler->BindBlock(&ok); 1026 macro_assembler->BindBlock(&ok);
1059 break; 1027 break;
1060 default: 1028 default:
1061 UNREACHABLE(); 1029 UNREACHABLE();
1062 break; 1030 break;
1063 } 1031 }
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
1097 } else { 1065 } else {
1098 masm->CheckCharacterInRange(first, last, in_range); 1066 masm->CheckCharacterInRange(first, last, in_range);
1099 } 1067 }
1100 if (out_of_range != fall_through) masm->GoTo(out_of_range); 1068 if (out_of_range != fall_through) masm->GoTo(out_of_range);
1101 } 1069 }
1102 } 1070 }
1103 1071
1104 1072
1105 // even_label is for ranges[i] to ranges[i + 1] where i - start_index is even. 1073 // even_label is for ranges[i] to ranges[i + 1] where i - start_index is even.
1106 // odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd. 1074 // odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd.
1107 static void EmitUseLookupTable( 1075 static void EmitUseLookupTable(RegExpMacroAssembler* masm,
1108 RegExpMacroAssembler* masm, 1076 ZoneGrowableArray<int>* ranges,
1109 ZoneGrowableArray<int>* ranges, 1077 intptr_t start_index,
1110 intptr_t start_index, 1078 intptr_t end_index,
1111 intptr_t end_index, 1079 intptr_t min_char,
1112 intptr_t min_char, 1080 BlockLabel* fall_through,
1113 BlockLabel* fall_through, 1081 BlockLabel* even_label,
1114 BlockLabel* even_label, 1082 BlockLabel* odd_label) {
1115 BlockLabel* odd_label) {
1116 static const intptr_t kSize = RegExpMacroAssembler::kTableSize; 1083 static const intptr_t kSize = RegExpMacroAssembler::kTableSize;
1117 static const intptr_t kMask = RegExpMacroAssembler::kTableMask; 1084 static const intptr_t kMask = RegExpMacroAssembler::kTableMask;
1118 1085
1119 intptr_t base = (min_char & ~kMask); 1086 intptr_t base = (min_char & ~kMask);
1120 1087
1121 // Assert that everything is on one kTableSize page. 1088 // Assert that everything is on one kTableSize page.
1122 for (intptr_t i = start_index; i <= end_index; i++) { 1089 for (intptr_t i = start_index; i <= end_index; i++) {
1123 ASSERT((ranges->At(i) & ~kMask) == base); 1090 ASSERT((ranges->At(i) & ~kMask) == base);
1124 } 1091 }
1125 ASSERT(start_index == 0 || (ranges->At(start_index - 1) & ~kMask) <= base); 1092 ASSERT(start_index == 0 || (ranges->At(start_index - 1) & ~kMask) <= base);
(...skipping 21 matching lines...) Expand all
1147 for (j = (ranges->At(i) & kMask); j < (ranges->At(i + 1) & kMask); j++) { 1114 for (j = (ranges->At(i) & kMask); j < (ranges->At(i + 1) & kMask); j++) {
1148 templ[j] = bit; 1115 templ[j] = bit;
1149 } 1116 }
1150 bit ^= 1; 1117 bit ^= 1;
1151 } 1118 }
1152 for (intptr_t i = j; i < kSize; i++) { 1119 for (intptr_t i = j; i < kSize; i++) {
1153 templ[i] = bit; 1120 templ[i] = bit;
1154 } 1121 }
1155 // TODO(erikcorry): Cache these. 1122 // TODO(erikcorry): Cache these.
1156 const TypedData& ba = TypedData::ZoneHandle( 1123 const TypedData& ba = TypedData::ZoneHandle(
1157 masm->zone(), 1124 masm->zone(), TypedData::New(kTypedDataUint8ArrayCid, kSize, Heap::kOld));
1158 TypedData::New(kTypedDataUint8ArrayCid, kSize, Heap::kOld));
1159 for (intptr_t i = 0; i < kSize; i++) { 1125 for (intptr_t i = 0; i < kSize; i++) {
1160 ba.SetUint8(i, templ[i]); 1126 ba.SetUint8(i, templ[i]);
1161 } 1127 }
1162 masm->CheckBitInTable(ba, on_bit_set); 1128 masm->CheckBitInTable(ba, on_bit_set);
1163 if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear); 1129 if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear);
1164 } 1130 }
1165 1131
1166 1132
1167 static void CutOutRange(RegExpMacroAssembler* masm, 1133 static void CutOutRange(RegExpMacroAssembler* masm,
1168 ZoneGrowableArray<int>* ranges, 1134 ZoneGrowableArray<int>* ranges,
1169 intptr_t start_index, 1135 intptr_t start_index,
1170 intptr_t end_index, 1136 intptr_t end_index,
1171 intptr_t cut_index, 1137 intptr_t cut_index,
1172 BlockLabel* even_label, 1138 BlockLabel* even_label,
1173 BlockLabel* odd_label) { 1139 BlockLabel* odd_label) {
1174 bool odd = (((cut_index - start_index) & 1) == 1); 1140 bool odd = (((cut_index - start_index) & 1) == 1);
1175 BlockLabel* in_range_label = odd ? odd_label : even_label; 1141 BlockLabel* in_range_label = odd ? odd_label : even_label;
1176 BlockLabel dummy; 1142 BlockLabel dummy;
1177 EmitDoubleBoundaryTest(masm, 1143 EmitDoubleBoundaryTest(masm, ranges->At(cut_index),
1178 ranges->At(cut_index), 1144 ranges->At(cut_index + 1) - 1, &dummy, in_range_label,
1179 ranges->At(cut_index + 1) - 1,
1180 &dummy,
1181 in_range_label,
1182 &dummy); 1145 &dummy);
1183 ASSERT(!dummy.IsLinked()); 1146 ASSERT(!dummy.IsLinked());
1184 // Cut out the single range by rewriting the array. This creates a new 1147 // Cut out the single range by rewriting the array. This creates a new
1185 // range that is a merger of the two ranges on either side of the one we 1148 // range that is a merger of the two ranges on either side of the one we
1186 // are cutting out. The oddity of the labels is preserved. 1149 // are cutting out. The oddity of the labels is preserved.
1187 for (intptr_t j = cut_index; j > start_index; j--) { 1150 for (intptr_t j = cut_index; j > start_index; j--) {
1188 (*ranges)[j] = ranges->At(j - 1); 1151 (*ranges)[j] = ranges->At(j - 1);
1189 } 1152 }
1190 for (intptr_t j = cut_index + 1; j < end_index; j++) { 1153 for (intptr_t j = cut_index + 1; j < end_index; j++) {
1191 (*ranges)[j] = ranges->At(j + 1); 1154 (*ranges)[j] = ranges->At(j + 1);
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
1224 // 128-character space can take up a lot of space in the ranges array if, 1187 // 128-character space can take up a lot of space in the ranges array if,
1225 // for example, we only want to match every second character (eg. the lower 1188 // for example, we only want to match every second character (eg. the lower
1226 // case characters on some Unicode pages). 1189 // case characters on some Unicode pages).
1227 intptr_t binary_chop_index = (end_index + start_index) / 2; 1190 intptr_t binary_chop_index = (end_index + start_index) / 2;
1228 // The first test ensures that we get to the code that handles the Latin1 1191 // The first test ensures that we get to the code that handles the Latin1
1229 // range with a single not-taken branch, speeding up this important 1192 // range with a single not-taken branch, speeding up this important
1230 // character range (even non-Latin1 charset-based text has spaces and 1193 // character range (even non-Latin1 charset-based text has spaces and
1231 // punctuation). 1194 // punctuation).
1232 if (*border - 1 > Symbols::kMaxOneCharCodeSymbol && // Latin1 case. 1195 if (*border - 1 > Symbols::kMaxOneCharCodeSymbol && // Latin1 case.
1233 end_index - start_index > (*new_start_index - start_index) * 2 && 1196 end_index - start_index > (*new_start_index - start_index) * 2 &&
1234 last - first > kSize * 2 && 1197 last - first > kSize * 2 && binary_chop_index > *new_start_index &&
1235 binary_chop_index > *new_start_index &&
1236 ranges->At(binary_chop_index) >= first + 2 * kSize) { 1198 ranges->At(binary_chop_index) >= first + 2 * kSize) {
1237 intptr_t scan_forward_for_section_border = binary_chop_index; 1199 intptr_t scan_forward_for_section_border = binary_chop_index;
1238 intptr_t new_border = (ranges->At(binary_chop_index) | kMask) + 1; 1200 intptr_t new_border = (ranges->At(binary_chop_index) | kMask) + 1;
1239 1201
1240 while (scan_forward_for_section_border < end_index) { 1202 while (scan_forward_for_section_border < end_index) {
1241 if (ranges->At(scan_forward_for_section_border) > new_border) { 1203 if (ranges->At(scan_forward_for_section_border) > new_border) {
1242 *new_start_index = scan_forward_for_section_border; 1204 *new_start_index = scan_forward_for_section_border;
1243 *border = new_border; 1205 *border = new_border;
1244 break; 1206 break;
1245 } 1207 }
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
1283 // Just need to test if the character is before or on-or-after 1245 // Just need to test if the character is before or on-or-after
1284 // a particular character. 1246 // a particular character.
1285 if (start_index == end_index) { 1247 if (start_index == end_index) {
1286 EmitBoundaryTest(masm, first, fall_through, even_label, odd_label); 1248 EmitBoundaryTest(masm, first, fall_through, even_label, odd_label);
1287 return; 1249 return;
1288 } 1250 }
1289 1251
1290 // Another almost trivial case: There is one interval in the middle that is 1252 // Another almost trivial case: There is one interval in the middle that is
1291 // different from the end intervals. 1253 // different from the end intervals.
1292 if (start_index + 1 == end_index) { 1254 if (start_index + 1 == end_index) {
1293 EmitDoubleBoundaryTest( 1255 EmitDoubleBoundaryTest(masm, first, last, fall_through, even_label,
1294 masm, first, last, fall_through, even_label, odd_label); 1256 odd_label);
1295 return; 1257 return;
1296 } 1258 }
1297 1259
1298 // It's not worth using table lookup if there are very few intervals in the 1260 // It's not worth using table lookup if there are very few intervals in the
1299 // character class. 1261 // character class.
1300 if (end_index - start_index <= 6) { 1262 if (end_index - start_index <= 6) {
1301 // It is faster to test for individual characters, so we look for those 1263 // It is faster to test for individual characters, so we look for those
1302 // first, then try arbitrary ranges in the second round. 1264 // first, then try arbitrary ranges in the second round.
1303 static intptr_t kNoCutIndex = -1; 1265 static intptr_t kNoCutIndex = -1;
1304 intptr_t cut = kNoCutIndex; 1266 intptr_t cut = kNoCutIndex;
1305 for (intptr_t i = start_index; i < end_index; i++) { 1267 for (intptr_t i = start_index; i < end_index; i++) {
1306 if (ranges->At(i) == ranges->At(i + 1) - 1) { 1268 if (ranges->At(i) == ranges->At(i + 1) - 1) {
1307 cut = i; 1269 cut = i;
1308 break; 1270 break;
1309 } 1271 }
1310 } 1272 }
1311 if (cut == kNoCutIndex) cut = start_index; 1273 if (cut == kNoCutIndex) cut = start_index;
1312 CutOutRange( 1274 CutOutRange(masm, ranges, start_index, end_index, cut, even_label,
1313 masm, ranges, start_index, end_index, cut, even_label, odd_label); 1275 odd_label);
1314 ASSERT(end_index - start_index >= 2); 1276 ASSERT(end_index - start_index >= 2);
1315 GenerateBranches(masm, 1277 GenerateBranches(masm, ranges, start_index + 1, end_index - 1, min_char,
1316 ranges, 1278 max_char, fall_through, even_label, odd_label);
1317 start_index + 1,
1318 end_index - 1,
1319 min_char,
1320 max_char,
1321 fall_through,
1322 even_label,
1323 odd_label);
1324 return; 1279 return;
1325 } 1280 }
1326 1281
1327 // If there are a lot of intervals in the regexp, then we will use tables to 1282 // If there are a lot of intervals in the regexp, then we will use tables to
1328 // determine whether the character is inside or outside the character class. 1283 // determine whether the character is inside or outside the character class.
1329 static const intptr_t kBits = RegExpMacroAssembler::kTableSizeBits; 1284 static const intptr_t kBits = RegExpMacroAssembler::kTableSizeBits;
1330 1285
1331 if ((max_char >> kBits) == (min_char >> kBits)) { 1286 if ((max_char >> kBits) == (min_char >> kBits)) {
1332 EmitUseLookupTable(masm, 1287 EmitUseLookupTable(masm, ranges, start_index, end_index, min_char,
1333 ranges, 1288 fall_through, even_label, odd_label);
1334 start_index,
1335 end_index,
1336 min_char,
1337 fall_through,
1338 even_label,
1339 odd_label);
1340 return; 1289 return;
1341 } 1290 }
1342 1291
1343 if ((min_char >> kBits) != (first >> kBits)) { 1292 if ((min_char >> kBits) != (first >> kBits)) {
1344 masm->CheckCharacterLT(first, odd_label); 1293 masm->CheckCharacterLT(first, odd_label);
1345 GenerateBranches(masm, 1294 GenerateBranches(masm, ranges, start_index + 1, end_index, first, max_char,
1346 ranges, 1295 fall_through, odd_label, even_label);
1347 start_index + 1,
1348 end_index,
1349 first,
1350 max_char,
1351 fall_through,
1352 odd_label,
1353 even_label);
1354 return; 1296 return;
1355 } 1297 }
1356 1298
1357 intptr_t new_start_index = 0; 1299 intptr_t new_start_index = 0;
1358 intptr_t new_end_index = 0; 1300 intptr_t new_end_index = 0;
1359 intptr_t border = 0; 1301 intptr_t border = 0;
1360 1302
1361 SplitSearchSpace(ranges, 1303 SplitSearchSpace(ranges, start_index, end_index, &new_start_index,
1362 start_index, 1304 &new_end_index, &border);
1363 end_index,
1364 &new_start_index,
1365 &new_end_index,
1366 &border);
1367 1305
1368 BlockLabel handle_rest; 1306 BlockLabel handle_rest;
1369 BlockLabel* above = &handle_rest; 1307 BlockLabel* above = &handle_rest;
1370 if (border == last + 1) { 1308 if (border == last + 1) {
1371 // We didn't find any section that started after the limit, so everything 1309 // We didn't find any section that started after the limit, so everything
1372 // above the border is one of the terminal labels. 1310 // above the border is one of the terminal labels.
1373 above = (end_index & 1) != (start_index & 1) ? odd_label : even_label; 1311 above = (end_index & 1) != (start_index & 1) ? odd_label : even_label;
1374 ASSERT(new_end_index == end_index - 1); 1312 ASSERT(new_end_index == end_index - 1);
1375 } 1313 }
1376 1314
1377 ASSERT(start_index <= new_end_index); 1315 ASSERT(start_index <= new_end_index);
1378 ASSERT(new_start_index <= end_index); 1316 ASSERT(new_start_index <= end_index);
1379 ASSERT(start_index < new_start_index); 1317 ASSERT(start_index < new_start_index);
1380 ASSERT(new_end_index < end_index); 1318 ASSERT(new_end_index < end_index);
1381 ASSERT(new_end_index + 1 == new_start_index || 1319 ASSERT(new_end_index + 1 == new_start_index ||
1382 (new_end_index + 2 == new_start_index && 1320 (new_end_index + 2 == new_start_index &&
1383 border == ranges->At(new_end_index + 1))); 1321 border == ranges->At(new_end_index + 1)));
1384 ASSERT(min_char < border - 1); 1322 ASSERT(min_char < border - 1);
1385 ASSERT(border < max_char); 1323 ASSERT(border < max_char);
1386 ASSERT(ranges->At(new_end_index) < border); 1324 ASSERT(ranges->At(new_end_index) < border);
1387 ASSERT(border < ranges->At(new_start_index) || 1325 ASSERT(border < ranges->At(new_start_index) ||
1388 (border == ranges->At(new_start_index) && 1326 (border == ranges->At(new_start_index) &&
1389 new_start_index == end_index && 1327 new_start_index == end_index && new_end_index == end_index - 1 &&
1390 new_end_index == end_index - 1 &&
1391 border == last + 1)); 1328 border == last + 1));
1392 ASSERT(new_start_index == 0 || border >= ranges->At(new_start_index - 1)); 1329 ASSERT(new_start_index == 0 || border >= ranges->At(new_start_index - 1));
1393 1330
1394 masm->CheckCharacterGT(border - 1, above); 1331 masm->CheckCharacterGT(border - 1, above);
1395 BlockLabel dummy; 1332 BlockLabel dummy;
1396 GenerateBranches(masm, 1333 GenerateBranches(masm, ranges, start_index, new_end_index, min_char,
1397 ranges, 1334 border - 1, &dummy, even_label, odd_label);
1398 start_index,
1399 new_end_index,
1400 min_char,
1401 border - 1,
1402 &dummy,
1403 even_label,
1404 odd_label);
1405 1335
1406 if (handle_rest.IsLinked()) { 1336 if (handle_rest.IsLinked()) {
1407 masm->BindBlock(&handle_rest); 1337 masm->BindBlock(&handle_rest);
1408 bool flip = (new_start_index & 1) != (start_index & 1); 1338 bool flip = (new_start_index & 1) != (start_index & 1);
1409 GenerateBranches(masm, 1339 GenerateBranches(masm, ranges, new_start_index, end_index, border, max_char,
1410 ranges, 1340 &dummy, flip ? odd_label : even_label,
1411 new_start_index,
1412 end_index,
1413 border,
1414 max_char,
1415 &dummy,
1416 flip ? odd_label : even_label,
1417 flip ? even_label : odd_label); 1341 flip ? even_label : odd_label);
1418 } 1342 }
1419 } 1343 }
1420 1344
1421 1345
1422 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, 1346 static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
1423 RegExpCharacterClass* cc, 1347 RegExpCharacterClass* cc,
1424 bool one_byte, 1348 bool one_byte,
1425 BlockLabel* on_failure, 1349 BlockLabel* on_failure,
1426 intptr_t cp_offset, 1350 intptr_t cp_offset,
(...skipping 26 matching lines...) Expand all
1453 if (last_valid_range < 0) { 1377 if (last_valid_range < 0) {
1454 if (!cc->is_negated()) { 1378 if (!cc->is_negated()) {
1455 macro_assembler->GoTo(on_failure); 1379 macro_assembler->GoTo(on_failure);
1456 } 1380 }
1457 if (check_offset) { 1381 if (check_offset) {
1458 macro_assembler->CheckPosition(cp_offset, on_failure); 1382 macro_assembler->CheckPosition(cp_offset, on_failure);
1459 } 1383 }
1460 return; 1384 return;
1461 } 1385 }
1462 1386
1463 if (last_valid_range == 0 && 1387 if (last_valid_range == 0 && ranges->At(0).IsEverything(max_char)) {
1464 ranges->At(0).IsEverything(max_char)) {
1465 if (cc->is_negated()) { 1388 if (cc->is_negated()) {
1466 macro_assembler->GoTo(on_failure); 1389 macro_assembler->GoTo(on_failure);
1467 } else { 1390 } else {
1468 // This is a common case hit by non-anchored expressions. 1391 // This is a common case hit by non-anchored expressions.
1469 if (check_offset) { 1392 if (check_offset) {
1470 macro_assembler->CheckPosition(cp_offset, on_failure); 1393 macro_assembler->CheckPosition(cp_offset, on_failure);
1471 } 1394 }
1472 } 1395 }
1473 return; 1396 return;
1474 } 1397 }
1475 if (last_valid_range == 0 && 1398 if (last_valid_range == 0 && !cc->is_negated() &&
1476 !cc->is_negated() &&
1477 ranges->At(0).IsEverything(max_char)) { 1399 ranges->At(0).IsEverything(max_char)) {
1478 // This is a common case hit by non-anchored expressions. 1400 // This is a common case hit by non-anchored expressions.
1479 if (check_offset) { 1401 if (check_offset) {
1480 macro_assembler->CheckPosition(cp_offset, on_failure); 1402 macro_assembler->CheckPosition(cp_offset, on_failure);
1481 } 1403 }
1482 return; 1404 return;
1483 } 1405 }
1484 1406
1485 if (!preloaded) { 1407 if (!preloaded) {
1486 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset); 1408 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);
1487 } 1409 }
1488 1410
1489 if (cc->is_standard() && 1411 if (cc->is_standard() &&
1490 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(), 1412 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(),
1491 on_failure)) { 1413 on_failure)) {
1492 return; 1414 return;
1493 } 1415 }
1494 1416
1495 1417
1496 // A new list with ascending entries. Each entry is a code unit 1418 // A new list with ascending entries. Each entry is a code unit
1497 // where there is a boundary between code units that are part of 1419 // where there is a boundary between code units that are part of
1498 // the class and code units that are not. Normally we insert an 1420 // the class and code units that are not. Normally we insert an
1499 // entry at zero which goes to the failure label, but if there 1421 // entry at zero which goes to the failure label, but if there
1500 // was already one there we fall through for success on that entry. 1422 // was already one there we fall through for success on that entry.
1501 // Subsequent entries have alternating meaning (success/failure). 1423 // Subsequent entries have alternating meaning (success/failure).
1502 ZoneGrowableArray<int>* range_boundaries = 1424 ZoneGrowableArray<int>* range_boundaries =
1503 new(zone) ZoneGrowableArray<int>(last_valid_range); 1425 new (zone) ZoneGrowableArray<int>(last_valid_range);
1504 1426
1505 bool zeroth_entry_is_failure = !cc->is_negated(); 1427 bool zeroth_entry_is_failure = !cc->is_negated();
1506 1428
1507 for (intptr_t i = 0; i <= last_valid_range; i++) { 1429 for (intptr_t i = 0; i <= last_valid_range; i++) {
1508 CharacterRange& range = (*ranges)[i]; 1430 CharacterRange& range = (*ranges)[i];
1509 if (range.from() == 0) { 1431 if (range.from() == 0) {
1510 ASSERT(i == 0); 1432 ASSERT(i == 0);
1511 zeroth_entry_is_failure = !zeroth_entry_is_failure; 1433 zeroth_entry_is_failure = !zeroth_entry_is_failure;
1512 } else { 1434 } else {
1513 range_boundaries->Add(range.from()); 1435 range_boundaries->Add(range.from());
1514 } 1436 }
1515 range_boundaries->Add(range.to() + 1); 1437 range_boundaries->Add(range.to() + 1);
1516 } 1438 }
1517 intptr_t end_index = range_boundaries->length() - 1; 1439 intptr_t end_index = range_boundaries->length() - 1;
1518 if (range_boundaries->At(end_index) > max_char) { 1440 if (range_boundaries->At(end_index) > max_char) {
1519 end_index--; 1441 end_index--;
1520 } 1442 }
1521 1443
1522 BlockLabel fall_through; 1444 BlockLabel fall_through;
1523 GenerateBranches(macro_assembler, 1445 GenerateBranches(macro_assembler, range_boundaries,
1524 range_boundaries,
1525 0, // start_index. 1446 0, // start_index.
1526 end_index, 1447 end_index,
1527 0, // min_char. 1448 0, // min_char.
1528 max_char, 1449 max_char, &fall_through,
1529 &fall_through,
1530 zeroth_entry_is_failure ? &fall_through : on_failure, 1450 zeroth_entry_is_failure ? &fall_through : on_failure,
1531 zeroth_entry_is_failure ? on_failure : &fall_through); 1451 zeroth_entry_is_failure ? on_failure : &fall_through);
1532 macro_assembler->BindBlock(&fall_through); 1452 macro_assembler->BindBlock(&fall_through);
1533 } 1453 }
1534 1454
1535 1455
1536 RegExpNode::~RegExpNode() { 1456 RegExpNode::~RegExpNode() {}
1537 }
1538 1457
1539 1458
1540 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, 1459 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
1541 Trace* trace) { 1460 Trace* trace) {
1542 // If we are generating a greedy loop then don't stop and don't reuse code. 1461 // If we are generating a greedy loop then don't stop and don't reuse code.
1543 if (trace->stop_node() != NULL) { 1462 if (trace->stop_node() != NULL) {
1544 return CONTINUE; 1463 return CONTINUE;
1545 } 1464 }
1546 1465
1547 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 1466 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
(...skipping 12 matching lines...) Expand all
1560 return DONE; 1479 return DONE;
1561 } 1480 }
1562 // Generate generic version of the node and bind the label for later use. 1481 // Generate generic version of the node and bind the label for later use.
1563 macro_assembler->BindBlock(&label_); 1482 macro_assembler->BindBlock(&label_);
1564 return CONTINUE; 1483 return CONTINUE;
1565 } 1484 }
1566 1485
1567 // We are being asked to make a non-generic version. Keep track of how many 1486 // We are being asked to make a non-generic version. Keep track of how many
1568 // non-generic versions we generate so as not to overdo it. 1487 // non-generic versions we generate so as not to overdo it.
1569 trace_count_++; 1488 trace_count_++;
1570 if (kRegexpOptimization && 1489 if (kRegexpOptimization && trace_count_ < kMaxCopiesCodeGenerated &&
1571 trace_count_ < kMaxCopiesCodeGenerated &&
1572 compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) { 1490 compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) {
1573 return CONTINUE; 1491 return CONTINUE;
1574 } 1492 }
1575 1493
1576 // If we get here code has been generated for this node too many times or 1494 // If we get here code has been generated for this node too many times or
1577 // recursion is too deep. Time to switch to a generic version. The code for 1495 // recursion is too deep. Time to switch to a generic version. The code for
1578 // generic versions above can handle deep recursion properly. 1496 // generic versions above can handle deep recursion properly.
1579 trace->Flush(compiler, this); 1497 trace->Flush(compiler, this);
1580 return DONE; 1498 return DONE;
1581 } 1499 }
1582 1500
1583 1501
1584 intptr_t ActionNode::EatsAtLeast(intptr_t still_to_find, 1502 intptr_t ActionNode::EatsAtLeast(intptr_t still_to_find,
1585 intptr_t budget, 1503 intptr_t budget,
1586 bool not_at_start) { 1504 bool not_at_start) {
1587 if (budget <= 0) return 0; 1505 if (budget <= 0) return 0;
1588 if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input! 1506 if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input!
1589 return on_success()->EatsAtLeast(still_to_find, 1507 return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start);
1590 budget - 1,
1591 not_at_start);
1592 } 1508 }
1593 1509
1594 1510
1595 void ActionNode::FillInBMInfo(intptr_t offset, 1511 void ActionNode::FillInBMInfo(intptr_t offset,
1596 intptr_t budget, 1512 intptr_t budget,
1597 BoyerMooreLookahead* bm, 1513 BoyerMooreLookahead* bm,
1598 bool not_at_start) { 1514 bool not_at_start) {
1599 if (action_type_ == BEGIN_SUBMATCH) { 1515 if (action_type_ == BEGIN_SUBMATCH) {
1600 bm->SetRest(offset); 1516 bm->SetRest(offset);
1601 } else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) { 1517 } else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) {
1602 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start); 1518 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start);
1603 } 1519 }
1604 SaveBMInfo(bm, not_at_start, offset); 1520 SaveBMInfo(bm, not_at_start, offset);
1605 } 1521 }
1606 1522
1607 1523
1608 intptr_t AssertionNode::EatsAtLeast(intptr_t still_to_find, 1524 intptr_t AssertionNode::EatsAtLeast(intptr_t still_to_find,
1609 intptr_t budget, 1525 intptr_t budget,
1610 bool not_at_start) { 1526 bool not_at_start) {
1611 if (budget <= 0) return 0; 1527 if (budget <= 0) return 0;
1612 // If we know we are not at the start and we are asked "how many characters 1528 // If we know we are not at the start and we are asked "how many characters
1613 // will you match if you succeed?" then we can answer anything since false 1529 // will you match if you succeed?" then we can answer anything since false
1614 // implies false. So lets just return the max answer (still_to_find) since 1530 // implies false. So lets just return the max answer (still_to_find) since
1615 // that won't prevent us from preloading a lot of characters for the other 1531 // that won't prevent us from preloading a lot of characters for the other
1616 // branches in the node graph. 1532 // branches in the node graph.
1617 if (assertion_type() == AT_START && not_at_start) return still_to_find; 1533 if (assertion_type() == AT_START && not_at_start) return still_to_find;
1618 return on_success()->EatsAtLeast(still_to_find, 1534 return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start);
1619 budget - 1,
1620 not_at_start);
1621 } 1535 }
1622 1536
1623 1537
1624 void AssertionNode::FillInBMInfo(intptr_t offset, 1538 void AssertionNode::FillInBMInfo(intptr_t offset,
1625 intptr_t budget, 1539 intptr_t budget,
1626 BoyerMooreLookahead* bm, 1540 BoyerMooreLookahead* bm,
1627 bool not_at_start) { 1541 bool not_at_start) {
1628 // Match the behaviour of EatsAtLeast on this node. 1542 // Match the behaviour of EatsAtLeast on this node.
1629 if (assertion_type() == AT_START && not_at_start) return; 1543 if (assertion_type() == AT_START && not_at_start) return;
1630 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start); 1544 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start);
1631 SaveBMInfo(bm, not_at_start, offset); 1545 SaveBMInfo(bm, not_at_start, offset);
1632 } 1546 }
1633 1547
1634 1548
1635 intptr_t BackReferenceNode::EatsAtLeast(intptr_t still_to_find, 1549 intptr_t BackReferenceNode::EatsAtLeast(intptr_t still_to_find,
1636 intptr_t budget, 1550 intptr_t budget,
1637 bool not_at_start) { 1551 bool not_at_start) {
1638 if (budget <= 0) return 0; 1552 if (budget <= 0) return 0;
1639 return on_success()->EatsAtLeast(still_to_find, 1553 return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start);
1640 budget - 1,
1641 not_at_start);
1642 } 1554 }
1643 1555
1644 1556
1645 intptr_t TextNode::EatsAtLeast(intptr_t still_to_find, 1557 intptr_t TextNode::EatsAtLeast(intptr_t still_to_find,
1646 intptr_t budget, 1558 intptr_t budget,
1647 bool not_at_start) { 1559 bool not_at_start) {
1648 intptr_t answer = Length(); 1560 intptr_t answer = Length();
1649 if (answer >= still_to_find) return answer; 1561 if (answer >= still_to_find) return answer;
1650 if (budget <= 0) return answer; 1562 if (budget <= 0) return answer;
1651 // We are not at start after this node so we set the last argument to 'true'. 1563 // We are not at start after this node so we set the last argument to 'true'.
1652 return answer + on_success()->EatsAtLeast(still_to_find - answer, 1564 return answer +
1653 budget - 1, 1565 on_success()->EatsAtLeast(still_to_find - answer, budget - 1, true);
1654 true);
1655 } 1566 }
1656 1567
1657 1568
1658 intptr_t NegativeLookaheadChoiceNode::EatsAtLeast(intptr_t still_to_find, 1569 intptr_t NegativeLookaheadChoiceNode::EatsAtLeast(intptr_t still_to_find,
1659 intptr_t budget, 1570 intptr_t budget,
1660 bool not_at_start) { 1571 bool not_at_start) {
1661 if (budget <= 0) return 0; 1572 if (budget <= 0) return 0;
1662 // Alternative 0 is the negative lookahead, alternative 1 is what comes 1573 // Alternative 0 is the negative lookahead, alternative 1 is what comes
1663 // afterwards. 1574 // afterwards.
1664 RegExpNode* node = (*alternatives_)[1].node(); 1575 RegExpNode* node = (*alternatives_)[1].node();
(...skipping 29 matching lines...) Expand all
1694 if (node_eats_at_least < min) min = node_eats_at_least; 1605 if (node_eats_at_least < min) min = node_eats_at_least;
1695 if (min == 0) return 0; 1606 if (min == 0) return 0;
1696 } 1607 }
1697 return min; 1608 return min;
1698 } 1609 }
1699 1610
1700 1611
1701 intptr_t LoopChoiceNode::EatsAtLeast(intptr_t still_to_find, 1612 intptr_t LoopChoiceNode::EatsAtLeast(intptr_t still_to_find,
1702 intptr_t budget, 1613 intptr_t budget,
1703 bool not_at_start) { 1614 bool not_at_start) {
1704 return EatsAtLeastHelper(still_to_find, 1615 return EatsAtLeastHelper(still_to_find, budget - 1, loop_node_, not_at_start);
1705 budget - 1,
1706 loop_node_,
1707 not_at_start);
1708 } 1616 }
1709 1617
1710 1618
1711 intptr_t ChoiceNode::EatsAtLeast(intptr_t still_to_find, 1619 intptr_t ChoiceNode::EatsAtLeast(intptr_t still_to_find,
1712 intptr_t budget, 1620 intptr_t budget,
1713 bool not_at_start) { 1621 bool not_at_start) {
1714 return EatsAtLeastHelper(still_to_find, 1622 return EatsAtLeastHelper(still_to_find, budget, NULL, not_at_start);
1715 budget,
1716 NULL,
1717 not_at_start);
1718 } 1623 }
1719 1624
1720 1625
1721 // Takes the left-most 1-bit and smears it out, setting all bits to its right. 1626 // Takes the left-most 1-bit and smears it out, setting all bits to its right.
1722 static inline uint32_t SmearBitsRight(uint32_t v) { 1627 static inline uint32_t SmearBitsRight(uint32_t v) {
1723 v |= v >> 1; 1628 v |= v >> 1;
1724 v |= v >> 2; 1629 v |= v >> 2;
1725 v |= v >> 4; 1630 v |= v >> 4;
1726 v |= v >> 8; 1631 v |= v >> 8;
1727 v |= v >> 16; 1632 v |= v >> 16;
(...skipping 26 matching lines...) Expand all
1754 1659
1755 1660
1756 bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler, 1661 bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
1757 Trace* bounds_check_trace, 1662 Trace* bounds_check_trace,
1758 Trace* trace, 1663 Trace* trace,
1759 bool preload_has_checked_bounds, 1664 bool preload_has_checked_bounds,
1760 BlockLabel* on_possible_success, 1665 BlockLabel* on_possible_success,
1761 QuickCheckDetails* details, 1666 QuickCheckDetails* details,
1762 bool fall_through_on_failure) { 1667 bool fall_through_on_failure) {
1763 if (details->characters() == 0) return false; 1668 if (details->characters() == 0) return false;
1764 GetQuickCheckDetails( 1669 GetQuickCheckDetails(details, compiler, 0,
1765 details, compiler, 0, trace->at_start() == Trace::FALSE_VALUE); 1670 trace->at_start() == Trace::FALSE_VALUE);
1766 if (details->cannot_match()) return false; 1671 if (details->cannot_match()) return false;
1767 if (!details->Rationalize(compiler->one_byte())) return false; 1672 if (!details->Rationalize(compiler->one_byte())) return false;
1768 ASSERT(details->characters() == 1 || 1673 ASSERT(details->characters() == 1 ||
1769 compiler->macro_assembler()->CanReadUnaligned()); 1674 compiler->macro_assembler()->CanReadUnaligned());
1770 uint32_t mask = details->mask(); 1675 uint32_t mask = details->mask();
1771 uint32_t value = details->value(); 1676 uint32_t value = details->value();
1772 1677
1773 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 1678 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1774 1679
1775 if (trace->characters_preloaded() != details->characters()) { 1680 if (trace->characters_preloaded() != details->characters()) {
1776 ASSERT(trace->cp_offset() == bounds_check_trace->cp_offset()); 1681 ASSERT(trace->cp_offset() == bounds_check_trace->cp_offset());
1777 // We are attempting to preload the minimum number of characters 1682 // We are attempting to preload the minimum number of characters
1778 // any choice would eat, so if the bounds check fails, then none of the 1683 // any choice would eat, so if the bounds check fails, then none of the
1779 // choices can succeed, so we can just immediately backtrack, rather 1684 // choices can succeed, so we can just immediately backtrack, rather
1780 // than go to the next choice. 1685 // than go to the next choice.
1781 assembler->LoadCurrentCharacter(trace->cp_offset(), 1686 assembler->LoadCurrentCharacter(
1782 bounds_check_trace->backtrack(), 1687 trace->cp_offset(), bounds_check_trace->backtrack(),
1783 !preload_has_checked_bounds, 1688 !preload_has_checked_bounds, details->characters());
1784 details->characters());
1785 } 1689 }
1786 1690
1787 1691
1788 bool need_mask = true; 1692 bool need_mask = true;
1789 1693
1790 if (details->characters() == 1) { 1694 if (details->characters() == 1) {
1791 // If number of characters preloaded is 1 then we used a byte or 16 bit 1695 // If number of characters preloaded is 1 then we used a byte or 16 bit
1792 // load so the value is already masked down. 1696 // load so the value is already masked down.
1793 uint32_t char_mask; 1697 uint32_t char_mask;
1794 if (compiler->one_byte()) { 1698 if (compiler->one_byte()) {
(...skipping 146 matching lines...) Expand 10 before | Expand all | Expand 10 after
1941 CharacterRange range = ranges->At(first_range); 1845 CharacterRange range = ranges->At(first_range);
1942 uint16_t from = range.from(); 1846 uint16_t from = range.from();
1943 uint16_t to = range.to(); 1847 uint16_t to = range.to();
1944 if (to > char_mask) { 1848 if (to > char_mask) {
1945 to = char_mask; 1849 to = char_mask;
1946 } 1850 }
1947 uint32_t differing_bits = (from ^ to); 1851 uint32_t differing_bits = (from ^ to);
1948 // A mask and compare is only perfect if the differing bits form a 1852 // A mask and compare is only perfect if the differing bits form a
1949 // number like 00011111 with one single block of trailing 1s. 1853 // number like 00011111 with one single block of trailing 1s.
1950 if ((differing_bits & (differing_bits + 1)) == 0 && 1854 if ((differing_bits & (differing_bits + 1)) == 0 &&
1951 from + differing_bits == to) { 1855 from + differing_bits == to) {
1952 pos->determines_perfectly = true; 1856 pos->determines_perfectly = true;
1953 } 1857 }
1954 uint32_t common_bits = ~SmearBitsRight(differing_bits); 1858 uint32_t common_bits = ~SmearBitsRight(differing_bits);
1955 uint32_t bits = (from & common_bits); 1859 uint32_t bits = (from & common_bits);
1956 for (intptr_t i = first_range + 1; i < ranges->length(); i++) { 1860 for (intptr_t i = first_range + 1; i < ranges->length(); i++) {
1957 CharacterRange range = ranges->At(i); 1861 CharacterRange range = ranges->At(i);
1958 uint16_t from = range.from(); 1862 uint16_t from = range.from();
1959 uint16_t to = range.to(); 1863 uint16_t to = range.to();
1960 if (from > char_mask) continue; 1864 if (from > char_mask) continue;
1961 if (to > char_mask) to = char_mask; 1865 if (to > char_mask) to = char_mask;
(...skipping 16 matching lines...) Expand all
1978 } 1882 }
1979 characters_filled_in++; 1883 characters_filled_in++;
1980 ASSERT(characters_filled_in <= details->characters()); 1884 ASSERT(characters_filled_in <= details->characters());
1981 if (characters_filled_in == details->characters()) { 1885 if (characters_filled_in == details->characters()) {
1982 return; 1886 return;
1983 } 1887 }
1984 } 1888 }
1985 } 1889 }
1986 ASSERT(characters_filled_in != details->characters()); 1890 ASSERT(characters_filled_in != details->characters());
1987 if (!details->cannot_match()) { 1891 if (!details->cannot_match()) {
1988 on_success()-> GetQuickCheckDetails(details, 1892 on_success()->GetQuickCheckDetails(details, compiler, characters_filled_in,
1989 compiler, 1893 true);
1990 characters_filled_in,
1991 true);
1992 } 1894 }
1993 } 1895 }
1994 1896
1995 1897
1996 void QuickCheckDetails::Clear() { 1898 void QuickCheckDetails::Clear() {
1997 for (int i = 0; i < characters_; i++) { 1899 for (int i = 0; i < characters_; i++) {
1998 positions_[i].mask = 0; 1900 positions_[i].mask = 0;
1999 positions_[i].value = 0; 1901 positions_[i].value = 0;
2000 positions_[i].determines_perfectly = false; 1902 positions_[i].determines_perfectly = false;
2001 } 1903 }
(...skipping 27 matching lines...) Expand all
2029 if (other->cannot_match_) { 1931 if (other->cannot_match_) {
2030 return; 1932 return;
2031 } 1933 }
2032 if (cannot_match_) { 1934 if (cannot_match_) {
2033 *this = *other; 1935 *this = *other;
2034 return; 1936 return;
2035 } 1937 }
2036 for (intptr_t i = from_index; i < characters_; i++) { 1938 for (intptr_t i = from_index; i < characters_; i++) {
2037 QuickCheckDetails::Position* pos = positions(i); 1939 QuickCheckDetails::Position* pos = positions(i);
2038 QuickCheckDetails::Position* other_pos = other->positions(i); 1940 QuickCheckDetails::Position* other_pos = other->positions(i);
2039 if (pos->mask != other_pos->mask || 1941 if (pos->mask != other_pos->mask || pos->value != other_pos->value ||
2040 pos->value != other_pos->value ||
2041 !other_pos->determines_perfectly) { 1942 !other_pos->determines_perfectly) {
2042 // Our mask-compare operation will be approximate unless we have the 1943 // Our mask-compare operation will be approximate unless we have the
2043 // exact same operation on both sides of the alternation. 1944 // exact same operation on both sides of the alternation.
2044 pos->determines_perfectly = false; 1945 pos->determines_perfectly = false;
2045 } 1946 }
2046 pos->mask &= other_pos->mask; 1947 pos->mask &= other_pos->mask;
2047 pos->value &= pos->mask; 1948 pos->value &= pos->mask;
2048 other_pos->value &= pos->mask; 1949 other_pos->value &= pos->mask;
2049 uint16_t differing_bits = (pos->value ^ other_pos->value); 1950 uint16_t differing_bits = (pos->value ^ other_pos->value);
2050 pos->mask &= ~differing_bits; 1951 pos->mask &= ~differing_bits;
2051 pos->value &= pos->mask; 1952 pos->value &= pos->mask;
2052 } 1953 }
2053 } 1954 }
2054 1955
2055 1956
2056 class VisitMarker : public ValueObject { 1957 class VisitMarker : public ValueObject {
2057 public: 1958 public:
2058 explicit VisitMarker(NodeInfo* info) : info_(info) { 1959 explicit VisitMarker(NodeInfo* info) : info_(info) {
2059 ASSERT(!info->visited); 1960 ASSERT(!info->visited);
2060 info->visited = true; 1961 info->visited = true;
2061 } 1962 }
2062 ~VisitMarker() { 1963 ~VisitMarker() { info_->visited = false; }
2063 info_->visited = false; 1964
2064 }
2065 private: 1965 private:
2066 NodeInfo* info_; 1966 NodeInfo* info_;
2067 }; 1967 };
2068 1968
2069 1969
2070 RegExpNode* SeqRegExpNode::FilterOneByte(intptr_t depth, bool ignore_case) { 1970 RegExpNode* SeqRegExpNode::FilterOneByte(intptr_t depth, bool ignore_case) {
2071 if (info()->replacement_calculated) return replacement(); 1971 if (info()->replacement_calculated) return replacement();
2072 if (depth < 0) return this; 1972 if (depth < 0) return this;
2073 ASSERT(!info()->visited); 1973 ASSERT(!info()->visited);
2074 VisitMarker marker(info()); 1974 VisitMarker marker(info());
2075 return FilterSuccessor(depth - 1, ignore_case); 1975 return FilterSuccessor(depth - 1, ignore_case);
2076 } 1976 }
2077 1977
2078 1978
2079 RegExpNode* SeqRegExpNode::FilterSuccessor(intptr_t depth, bool ignore_case) { 1979 RegExpNode* SeqRegExpNode::FilterSuccessor(intptr_t depth, bool ignore_case) {
2080 RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case); 1980 RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case);
2081 if (next == NULL) return set_replacement(NULL); 1981 if (next == NULL) return set_replacement(NULL);
2082 on_success_ = next; 1982 on_success_ = next;
2083 return set_replacement(this); 1983 return set_replacement(this);
2084 } 1984 }
2085 1985
2086 1986
2087 // We need to check for the following characters: 0x39c 0x3bc 0x178. 1987 // We need to check for the following characters: 0x39c 0x3bc 0x178.
2088 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) { 1988 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) {
2089 // TODO(dcarney): this could be a lot more efficient. 1989 // TODO(dcarney): this could be a lot more efficient.
2090 return range.Contains(0x39c) || 1990 return range.Contains(0x39c) || range.Contains(0x3bc) ||
2091 range.Contains(0x3bc) || range.Contains(0x178); 1991 range.Contains(0x178);
2092 } 1992 }
2093 1993
2094 1994
2095 static bool RangesContainLatin1Equivalents( 1995 static bool RangesContainLatin1Equivalents(
2096 ZoneGrowableArray<CharacterRange>* ranges) { 1996 ZoneGrowableArray<CharacterRange>* ranges) {
2097 for (intptr_t i = 0; i < ranges->length(); i++) { 1997 for (intptr_t i = 0; i < ranges->length(); i++) {
2098 // TODO(dcarney): this could be a lot more efficient. 1998 // TODO(dcarney): this could be a lot more efficient.
2099 if (RangeContainsLatin1Equivalents(ranges->At(i))) return true; 1999 if (RangeContainsLatin1Equivalents(ranges->At(i))) return true;
2100 } 2000 }
2101 return false; 2001 return false;
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
2143 } else { 2043 } else {
2144 ASSERT(elm.text_type() == TextElement::CHAR_CLASS); 2044 ASSERT(elm.text_type() == TextElement::CHAR_CLASS);
2145 RegExpCharacterClass* cc = elm.char_class(); 2045 RegExpCharacterClass* cc = elm.char_class();
2146 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges(); 2046 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges();
2147 if (!CharacterRange::IsCanonical(ranges)) { 2047 if (!CharacterRange::IsCanonical(ranges)) {
2148 CharacterRange::Canonicalize(ranges); 2048 CharacterRange::Canonicalize(ranges);
2149 } 2049 }
2150 // Now they are in order so we only need to look at the first. 2050 // Now they are in order so we only need to look at the first.
2151 intptr_t range_count = ranges->length(); 2051 intptr_t range_count = ranges->length();
2152 if (cc->is_negated()) { 2052 if (cc->is_negated()) {
2153 if (range_count != 0 && 2053 if (range_count != 0 && ranges->At(0).from() == 0 &&
2154 ranges->At(0).from() == 0 &&
2155 ranges->At(0).to() >= Symbols::kMaxOneCharCodeSymbol) { 2054 ranges->At(0).to() >= Symbols::kMaxOneCharCodeSymbol) {
2156 // This will be handled in a later filter. 2055 // This will be handled in a later filter.
2157 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; 2056 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;
2158 return set_replacement(NULL); 2057 return set_replacement(NULL);
2159 } 2058 }
2160 } else { 2059 } else {
2161 if (range_count == 0 || 2060 if (range_count == 0 ||
2162 ranges->At(0).from() > Symbols::kMaxOneCharCodeSymbol) { 2061 ranges->At(0).from() > Symbols::kMaxOneCharCodeSymbol) {
2163 // This will be handled in a later filter. 2062 // This will be handled in a later filter.
2164 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; 2063 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
2219 } 2118 }
2220 if (surviving < 2) return set_replacement(survivor); 2119 if (surviving < 2) return set_replacement(survivor);
2221 2120
2222 set_replacement(this); 2121 set_replacement(this);
2223 if (surviving == choice_count) { 2122 if (surviving == choice_count) {
2224 return this; 2123 return this;
2225 } 2124 }
2226 // Only some of the nodes survived the filtering. We need to rebuild the 2125 // Only some of the nodes survived the filtering. We need to rebuild the
2227 // alternatives list. 2126 // alternatives list.
2228 ZoneGrowableArray<GuardedAlternative>* new_alternatives = 2127 ZoneGrowableArray<GuardedAlternative>* new_alternatives =
2229 new(Z) ZoneGrowableArray<GuardedAlternative>(surviving); 2128 new (Z) ZoneGrowableArray<GuardedAlternative>(surviving);
2230 for (intptr_t i = 0; i < choice_count; i++) { 2129 for (intptr_t i = 0; i < choice_count; i++) {
2231 RegExpNode* replacement = 2130 RegExpNode* replacement =
2232 (*alternatives_)[i].node()->FilterOneByte(depth - 1, ignore_case); 2131 (*alternatives_)[i].node()->FilterOneByte(depth - 1, ignore_case);
2233 if (replacement != NULL) { 2132 if (replacement != NULL) {
2234 (*alternatives_)[i].set_node(replacement); 2133 (*alternatives_)[i].set_node(replacement);
2235 new_alternatives->Add((*alternatives_)[i]); 2134 new_alternatives->Add((*alternatives_)[i]);
2236 } 2135 }
2237 } 2136 }
2238 alternatives_ = new_alternatives; 2137 alternatives_ = new_alternatives;
2239 return this; 2138 return this;
(...skipping 22 matching lines...) Expand all
2262 return set_replacement(this); 2161 return set_replacement(this);
2263 } 2162 }
2264 2163
2265 2164
2266 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, 2165 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
2267 RegExpCompiler* compiler, 2166 RegExpCompiler* compiler,
2268 intptr_t characters_filled_in, 2167 intptr_t characters_filled_in,
2269 bool not_at_start) { 2168 bool not_at_start) {
2270 if (body_can_be_zero_length_ || info()->visited) return; 2169 if (body_can_be_zero_length_ || info()->visited) return;
2271 VisitMarker marker(info()); 2170 VisitMarker marker(info());
2272 return ChoiceNode::GetQuickCheckDetails(details, 2171 return ChoiceNode::GetQuickCheckDetails(details, compiler,
2273 compiler, 2172 characters_filled_in, not_at_start);
2274 characters_filled_in,
2275 not_at_start);
2276 } 2173 }
2277 2174
2278 2175
2279 void LoopChoiceNode::FillInBMInfo(intptr_t offset, 2176 void LoopChoiceNode::FillInBMInfo(intptr_t offset,
2280 intptr_t budget, 2177 intptr_t budget,
2281 BoyerMooreLookahead* bm, 2178 BoyerMooreLookahead* bm,
2282 bool not_at_start) { 2179 bool not_at_start) {
2283 if (body_can_be_zero_length_ || budget <= 0) { 2180 if (body_can_be_zero_length_ || budget <= 0) {
2284 bm->SetRest(offset); 2181 bm->SetRest(offset);
2285 SaveBMInfo(bm, not_at_start, offset); 2182 SaveBMInfo(bm, not_at_start, offset);
2286 return; 2183 return;
2287 } 2184 }
2288 ChoiceNode::FillInBMInfo(offset, budget - 1, bm, not_at_start); 2185 ChoiceNode::FillInBMInfo(offset, budget - 1, bm, not_at_start);
2289 SaveBMInfo(bm, not_at_start, offset); 2186 SaveBMInfo(bm, not_at_start, offset);
2290 } 2187 }
2291 2188
2292 2189
2293 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, 2190 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
2294 RegExpCompiler* compiler, 2191 RegExpCompiler* compiler,
2295 intptr_t characters_filled_in, 2192 intptr_t characters_filled_in,
2296 bool not_at_start) { 2193 bool not_at_start) {
2297 not_at_start = (not_at_start || not_at_start_); 2194 not_at_start = (not_at_start || not_at_start_);
2298 intptr_t choice_count = alternatives_->length(); 2195 intptr_t choice_count = alternatives_->length();
2299 ASSERT(choice_count > 0); 2196 ASSERT(choice_count > 0);
2300 (*alternatives_)[0].node()->GetQuickCheckDetails(details, 2197 (*alternatives_)[0].node()->GetQuickCheckDetails(
2301 compiler, 2198 details, compiler, characters_filled_in, not_at_start);
2302 characters_filled_in,
2303 not_at_start);
2304 for (intptr_t i = 1; i < choice_count; i++) { 2199 for (intptr_t i = 1; i < choice_count; i++) {
2305 QuickCheckDetails new_details(details->characters()); 2200 QuickCheckDetails new_details(details->characters());
2306 RegExpNode* node = (*alternatives_)[i].node(); 2201 RegExpNode* node = (*alternatives_)[i].node();
2307 node->GetQuickCheckDetails(&new_details, compiler, 2202 node->GetQuickCheckDetails(&new_details, compiler, characters_filled_in,
2308 characters_filled_in,
2309 not_at_start); 2203 not_at_start);
2310 // Here we merge the quick match details of the two branches. 2204 // Here we merge the quick match details of the two branches.
2311 details->Merge(&new_details, characters_filled_in); 2205 details->Merge(&new_details, characters_filled_in);
2312 } 2206 }
2313 } 2207 }
2314 2208
2315 2209
2316 // Check for [0-9A-Z_a-z]. 2210 // Check for [0-9A-Z_a-z].
2317 static void EmitWordCheck(RegExpMacroAssembler* assembler, 2211 static void EmitWordCheck(RegExpMacroAssembler* assembler,
2318 BlockLabel* word, 2212 BlockLabel* word,
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
2350 new_trace.InvalidateCurrentCharacter(); 2244 new_trace.InvalidateCurrentCharacter();
2351 2245
2352 BlockLabel ok; 2246 BlockLabel ok;
2353 if (new_trace.cp_offset() == 0) { 2247 if (new_trace.cp_offset() == 0) {
2354 // The start of input counts as a newline in this context, so skip to 2248 // The start of input counts as a newline in this context, so skip to
2355 // ok if we are at the start. 2249 // ok if we are at the start.
2356 assembler->CheckAtStart(&ok); 2250 assembler->CheckAtStart(&ok);
2357 } 2251 }
2358 // We already checked that we are not at the start of input so it must be 2252 // We already checked that we are not at the start of input so it must be
2359 // OK to load the previous character. 2253 // OK to load the previous character.
2360 assembler->LoadCurrentCharacter(new_trace.cp_offset() -1, 2254 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1,
2361 new_trace.backtrack(), 2255 new_trace.backtrack(), false);
2362 false); 2256 if (!assembler->CheckSpecialCharacterClass('n', new_trace.backtrack())) {
2363 if (!assembler->CheckSpecialCharacterClass('n',
2364 new_trace.backtrack())) {
2365 // Newline means \n, \r, 0x2028 or 0x2029. 2257 // Newline means \n, \r, 0x2028 or 0x2029.
2366 if (!compiler->one_byte()) { 2258 if (!compiler->one_byte()) {
2367 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok); 2259 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);
2368 } 2260 }
2369 assembler->CheckCharacter('\n', &ok); 2261 assembler->CheckCharacter('\n', &ok);
2370 assembler->CheckNotCharacter('\r', new_trace.backtrack()); 2262 assembler->CheckNotCharacter('\r', new_trace.backtrack());
2371 } 2263 }
2372 assembler->BindBlock(&ok); 2264 assembler->BindBlock(&ok);
2373 on_success->Emit(compiler, &new_trace); 2265 on_success->Emit(compiler, &new_trace);
2374 } 2266 }
2375 2267
2376 2268
2377 // Emit the code to handle \b and \B (word-boundary or non-word-boundary). 2269 // Emit the code to handle \b and \B (word-boundary or non-word-boundary).
2378 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) { 2270 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {
2379 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 2271 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2380 Trace::TriBool next_is_word_character = Trace::UNKNOWN; 2272 Trace::TriBool next_is_word_character = Trace::UNKNOWN;
2381 bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE); 2273 bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE);
2382 BoyerMooreLookahead* lookahead = bm_info(not_at_start); 2274 BoyerMooreLookahead* lookahead = bm_info(not_at_start);
2383 if (lookahead == NULL) { 2275 if (lookahead == NULL) {
2384 intptr_t eats_at_least = 2276 intptr_t eats_at_least =
2385 Utils::Minimum(kMaxLookaheadForBoyerMoore, 2277 Utils::Minimum(kMaxLookaheadForBoyerMoore,
2386 EatsAtLeast(kMaxLookaheadForBoyerMoore, 2278 EatsAtLeast(kMaxLookaheadForBoyerMoore, kRecursionBudget,
2387 kRecursionBudget,
2388 not_at_start)); 2279 not_at_start));
2389 if (eats_at_least >= 1) { 2280 if (eats_at_least >= 1) {
2390 BoyerMooreLookahead* bm = 2281 BoyerMooreLookahead* bm =
2391 new(Z) BoyerMooreLookahead(eats_at_least, compiler, Z); 2282 new (Z) BoyerMooreLookahead(eats_at_least, compiler, Z);
2392 FillInBMInfo(0, kRecursionBudget, bm, not_at_start); 2283 FillInBMInfo(0, kRecursionBudget, bm, not_at_start);
2393 if (bm->at(0)->is_non_word()) 2284 if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE_VALUE;
2394 next_is_word_character = Trace::FALSE_VALUE;
2395 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE; 2285 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE;
2396 } 2286 }
2397 } else { 2287 } else {
2398 if (lookahead->at(0)->is_non_word()) 2288 if (lookahead->at(0)->is_non_word())
2399 next_is_word_character = Trace::FALSE_VALUE; 2289 next_is_word_character = Trace::FALSE_VALUE;
2400 if (lookahead->at(0)->is_word()) 2290 if (lookahead->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE;
2401 next_is_word_character = Trace::TRUE_VALUE;
2402 } 2291 }
2403 bool at_boundary = (assertion_type_ == AssertionNode::AT_BOUNDARY); 2292 bool at_boundary = (assertion_type_ == AssertionNode::AT_BOUNDARY);
2404 if (next_is_word_character == Trace::UNKNOWN) { 2293 if (next_is_word_character == Trace::UNKNOWN) {
2405 BlockLabel before_non_word; 2294 BlockLabel before_non_word;
2406 BlockLabel before_word; 2295 BlockLabel before_word;
2407 if (trace->characters_preloaded() != 1) { 2296 if (trace->characters_preloaded() != 1) {
2408 assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word); 2297 assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);
2409 } 2298 }
2410 // Fall through on non-word. 2299 // Fall through on non-word.
2411 EmitWordCheck(assembler, &before_word, &before_non_word, false); 2300 EmitWordCheck(assembler, &before_word, &before_non_word, false);
(...skipping 23 matching lines...) Expand all
2435 void AssertionNode::BacktrackIfPrevious( 2324 void AssertionNode::BacktrackIfPrevious(
2436 RegExpCompiler* compiler, 2325 RegExpCompiler* compiler,
2437 Trace* trace, 2326 Trace* trace,
2438 AssertionNode::IfPrevious backtrack_if_previous) { 2327 AssertionNode::IfPrevious backtrack_if_previous) {
2439 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 2328 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2440 Trace new_trace(*trace); 2329 Trace new_trace(*trace);
2441 new_trace.InvalidateCurrentCharacter(); 2330 new_trace.InvalidateCurrentCharacter();
2442 2331
2443 BlockLabel fall_through, dummy; 2332 BlockLabel fall_through, dummy;
2444 2333
2445 BlockLabel* non_word = backtrack_if_previous == kIsNonWord ? 2334 BlockLabel* non_word = backtrack_if_previous == kIsNonWord
2446 new_trace.backtrack() : 2335 ? new_trace.backtrack()
2447 &fall_through; 2336 : &fall_through;
2448 BlockLabel* word = backtrack_if_previous == kIsNonWord ? 2337 BlockLabel* word = backtrack_if_previous == kIsNonWord
2449 &fall_through : 2338 ? &fall_through
2450 new_trace.backtrack(); 2339 : new_trace.backtrack();
2451 2340
2452 if (new_trace.cp_offset() == 0) { 2341 if (new_trace.cp_offset() == 0) {
2453 // The start of input counts as a non-word character, so the question is 2342 // The start of input counts as a non-word character, so the question is
2454 // decided if we are at the start. 2343 // decided if we are at the start.
2455 assembler->CheckAtStart(non_word); 2344 assembler->CheckAtStart(non_word);
2456 } 2345 }
2457 // We already checked that we are not at the start of input so it must be 2346 // We already checked that we are not at the start of input so it must be
2458 // OK to load the previous character. 2347 // OK to load the previous character.
2459 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false); 2348 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false);
2460 EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord); 2349 EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord);
2461 2350
2462 assembler->BindBlock(&fall_through); 2351 assembler->BindBlock(&fall_through);
2463 on_success()->Emit(compiler, &new_trace); 2352 on_success()->Emit(compiler, &new_trace);
2464 } 2353 }
2465 2354
2466 2355
2467 void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details, 2356 void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details,
2468 RegExpCompiler* compiler, 2357 RegExpCompiler* compiler,
2469 intptr_t filled_in, 2358 intptr_t filled_in,
2470 bool not_at_start) { 2359 bool not_at_start) {
2471 if (assertion_type_ == AT_START && not_at_start) { 2360 if (assertion_type_ == AT_START && not_at_start) {
2472 details->set_cannot_match(); 2361 details->set_cannot_match();
2473 return; 2362 return;
2474 } 2363 }
2475 return on_success()->GetQuickCheckDetails(details, 2364 return on_success()->GetQuickCheckDetails(details, compiler, filled_in,
2476 compiler,
2477 filled_in,
2478 not_at_start); 2365 not_at_start);
2479 } 2366 }
2480 2367
2481 2368
2482 void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) { 2369 void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
2483 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 2370 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2484 switch (assertion_type_) { 2371 switch (assertion_type_) {
2485 case AT_END: { 2372 case AT_END: {
2486 BlockLabel ok; 2373 BlockLabel ok;
2487 assembler->CheckPosition(trace->cp_offset(), &ok); 2374 assembler->CheckPosition(trace->cp_offset(), &ok);
2488 assembler->GoTo(trace->backtrack()); 2375 assembler->GoTo(trace->backtrack());
2489 assembler->BindBlock(&ok); 2376 assembler->BindBlock(&ok);
2490 break; 2377 break;
2491 } 2378 }
2492 case AT_START: { 2379 case AT_START: {
2493 if (trace->at_start() == Trace::FALSE_VALUE) { 2380 if (trace->at_start() == Trace::FALSE_VALUE) {
2494 assembler->GoTo(trace->backtrack()); 2381 assembler->GoTo(trace->backtrack());
2495 return; 2382 return;
2496 } 2383 }
2497 if (trace->at_start() == Trace::UNKNOWN) { 2384 if (trace->at_start() == Trace::UNKNOWN) {
2498 assembler->CheckNotAtStart(trace->backtrack()); 2385 assembler->CheckNotAtStart(trace->backtrack());
2499 Trace at_start_trace = *trace; 2386 Trace at_start_trace = *trace;
2500 at_start_trace.set_at_start(true); 2387 at_start_trace.set_at_start(true);
2501 on_success()->Emit(compiler, &at_start_trace); 2388 on_success()->Emit(compiler, &at_start_trace);
2502 return; 2389 return;
2503 } 2390 }
2504 } 2391 } break;
2505 break;
2506 case AFTER_NEWLINE: 2392 case AFTER_NEWLINE:
2507 EmitHat(compiler, on_success(), trace); 2393 EmitHat(compiler, on_success(), trace);
2508 return; 2394 return;
2509 case AT_BOUNDARY: 2395 case AT_BOUNDARY:
2510 case AT_NON_BOUNDARY: { 2396 case AT_NON_BOUNDARY: {
2511 EmitBoundaryCheck(compiler, trace); 2397 EmitBoundaryCheck(compiler, trace);
2512 return; 2398 return;
2513 } 2399 }
2514 } 2400 }
2515 on_success()->Emit(compiler, trace); 2401 on_success()->Emit(compiler, trace);
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after
2593 case SIMPLE_CHARACTER_MATCH: 2479 case SIMPLE_CHARACTER_MATCH:
2594 emit_function = &EmitSimpleCharacter; 2480 emit_function = &EmitSimpleCharacter;
2595 break; 2481 break;
2596 case CASE_CHARACTER_MATCH: 2482 case CASE_CHARACTER_MATCH:
2597 emit_function = &EmitAtomLetter; 2483 emit_function = &EmitAtomLetter;
2598 break; 2484 break;
2599 default: 2485 default:
2600 break; 2486 break;
2601 } 2487 }
2602 if (emit_function != NULL) { 2488 if (emit_function != NULL) {
2603 bool bound_checked = emit_function(Z, 2489 bool bound_checked = emit_function(
2604 compiler, 2490 Z, compiler, quarks->At(j), backtrack, cp_offset + j,
2605 quarks->At(j), 2491 *checked_up_to < cp_offset + j, preloaded);
2606 backtrack,
2607 cp_offset + j,
2608 *checked_up_to < cp_offset + j,
2609 preloaded);
2610 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); 2492 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
2611 } 2493 }
2612 } 2494 }
2613 } else { 2495 } else {
2614 ASSERT(elm.text_type() == TextElement::CHAR_CLASS); 2496 ASSERT(elm.text_type() == TextElement::CHAR_CLASS);
2615 if (pass == CHARACTER_CLASS_MATCH) { 2497 if (pass == CHARACTER_CLASS_MATCH) {
2616 if (first_element_checked && i == 0) continue; 2498 if (first_element_checked && i == 0) continue;
2617 if (DeterminedAlready(quick_check, elm.cp_offset())) continue; 2499 if (DeterminedAlready(quick_check, elm.cp_offset())) continue;
2618 RegExpCharacterClass* cc = elm.char_class(); 2500 RegExpCharacterClass* cc = elm.char_class();
2619 EmitCharClass(assembler, 2501 EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset,
2620 cc, 2502 *checked_up_to < cp_offset, preloaded, Z);
2621 one_byte,
2622 backtrack,
2623 cp_offset,
2624 *checked_up_to < cp_offset,
2625 preloaded,
2626 Z);
2627 UpdateBoundsCheck(cp_offset, checked_up_to); 2503 UpdateBoundsCheck(cp_offset, checked_up_to);
2628 } 2504 }
2629 } 2505 }
2630 } 2506 }
2631 } 2507 }
2632 2508
2633 2509
2634 intptr_t TextNode::Length() { 2510 intptr_t TextNode::Length() {
2635 TextElement elm = elms_->Last(); 2511 TextElement elm = elms_->Last();
2636 ASSERT(elm.cp_offset() >= 0); 2512 ASSERT(elm.cp_offset() >= 0);
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
2671 2547
2672 bool first_elt_done = false; 2548 bool first_elt_done = false;
2673 intptr_t bound_checked_to = trace->cp_offset() - 1; 2549 intptr_t bound_checked_to = trace->cp_offset() - 1;
2674 bound_checked_to += trace->bound_checked_up_to(); 2550 bound_checked_to += trace->bound_checked_up_to();
2675 2551
2676 // If a character is preloaded into the current character register then 2552 // If a character is preloaded into the current character register then
2677 // check that now. 2553 // check that now.
2678 if (trace->characters_preloaded() == 1) { 2554 if (trace->characters_preloaded() == 1) {
2679 for (intptr_t pass = kFirstRealPass; pass <= kLastPass; pass++) { 2555 for (intptr_t pass = kFirstRealPass; pass <= kLastPass; pass++) {
2680 if (!SkipPass(pass, compiler->ignore_case())) { 2556 if (!SkipPass(pass, compiler->ignore_case())) {
2681 TextEmitPass(compiler, 2557 TextEmitPass(compiler, static_cast<TextEmitPassType>(pass), true, trace,
2682 static_cast<TextEmitPassType>(pass), 2558 false, &bound_checked_to);
2683 true,
2684 trace,
2685 false,
2686 &bound_checked_to);
2687 } 2559 }
2688 } 2560 }
2689 first_elt_done = true; 2561 first_elt_done = true;
2690 } 2562 }
2691 2563
2692 for (intptr_t pass = kFirstRealPass; pass <= kLastPass; pass++) { 2564 for (intptr_t pass = kFirstRealPass; pass <= kLastPass; pass++) {
2693 if (!SkipPass(pass, compiler->ignore_case())) { 2565 if (!SkipPass(pass, compiler->ignore_case())) {
2694 TextEmitPass(compiler, 2566 TextEmitPass(compiler, static_cast<TextEmitPassType>(pass), false, trace,
2695 static_cast<TextEmitPassType>(pass), 2567 first_elt_done, &bound_checked_to);
2696 false,
2697 trace,
2698 first_elt_done,
2699 &bound_checked_to);
2700 } 2568 }
2701 } 2569 }
2702 2570
2703 Trace successor_trace(*trace); 2571 Trace successor_trace(*trace);
2704 successor_trace.set_at_start(false); 2572 successor_trace.set_at_start(false);
2705 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler); 2573 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler);
2706 RecursionCheck rc(compiler); 2574 RecursionCheck rc(compiler);
2707 on_success()->Emit(compiler, &successor_trace); 2575 on_success()->Emit(compiler, &successor_trace);
2708 } 2576 }
2709 2577
(...skipping 12 matching lines...) Expand all
2722 characters_preloaded_ = 0; 2590 characters_preloaded_ = 0;
2723 // Adjust the offsets of the quick check performed information. This 2591 // Adjust the offsets of the quick check performed information. This
2724 // information is used to find out what we already determined about the 2592 // information is used to find out what we already determined about the
2725 // characters by means of mask and compare. 2593 // characters by means of mask and compare.
2726 quick_check_performed_.Advance(by, compiler->one_byte()); 2594 quick_check_performed_.Advance(by, compiler->one_byte());
2727 cp_offset_ += by; 2595 cp_offset_ += by;
2728 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) { 2596 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {
2729 compiler->SetRegExpTooBig(); 2597 compiler->SetRegExpTooBig();
2730 cp_offset_ = 0; 2598 cp_offset_ = 0;
2731 } 2599 }
2732 bound_checked_up_to_ = Utils::Maximum(static_cast<intptr_t>(0), 2600 bound_checked_up_to_ =
2733 bound_checked_up_to_ - by); 2601 Utils::Maximum(static_cast<intptr_t>(0), bound_checked_up_to_ - by);
2734 } 2602 }
2735 2603
2736 2604
2737 void TextNode::MakeCaseIndependent(bool is_one_byte) { 2605 void TextNode::MakeCaseIndependent(bool is_one_byte) {
2738 intptr_t element_count = elms_->length(); 2606 intptr_t element_count = elms_->length();
2739 for (intptr_t i = 0; i < element_count; i++) { 2607 for (intptr_t i = 0; i < element_count; i++) {
2740 TextElement elm = elms_->At(i); 2608 TextElement elm = elms_->At(i);
2741 if (elm.text_type() == TextElement::CHAR_CLASS) { 2609 if (elm.text_type() == TextElement::CHAR_CLASS) {
2742 RegExpCharacterClass* cc = elm.char_class(); 2610 RegExpCharacterClass* cc = elm.char_class();
2743 // None of the standard character classes is different in the case 2611 // None of the standard character classes is different in the case
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after
2842 if (!trace->is_trivial()) { 2710 if (!trace->is_trivial()) {
2843 trace->Flush(compiler, this); 2711 trace->Flush(compiler, this);
2844 return; 2712 return;
2845 } 2713 }
2846 ChoiceNode::Emit(compiler, trace); 2714 ChoiceNode::Emit(compiler, trace);
2847 } 2715 }
2848 2716
2849 2717
2850 intptr_t ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler, 2718 intptr_t ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,
2851 intptr_t eats_at_least) { 2719 intptr_t eats_at_least) {
2852 intptr_t preload_characters = Utils::Minimum(static_cast<intptr_t>(4), 2720 intptr_t preload_characters =
2853 eats_at_least); 2721 Utils::Minimum(static_cast<intptr_t>(4), eats_at_least);
2854 if (compiler->macro_assembler()->CanReadUnaligned()) { 2722 if (compiler->macro_assembler()->CanReadUnaligned()) {
2855 bool one_byte = compiler->one_byte(); 2723 bool one_byte = compiler->one_byte();
2856 if (one_byte) { 2724 if (one_byte) {
2857 if (preload_characters > 4) preload_characters = 4; 2725 if (preload_characters > 4) preload_characters = 4;
2858 // We can't preload 3 characters because there is no machine instruction 2726 // We can't preload 3 characters because there is no machine instruction
2859 // to do that. We can't just load 4 because we could be reading 2727 // to do that. We can't just load 4 because we could be reading
2860 // beyond the end of the string, which could cause a memory fault. 2728 // beyond the end of the string, which could cause a memory fault.
2861 if (preload_characters == 3) preload_characters = 2; 2729 if (preload_characters == 3) preload_characters = 2;
2862 } else { 2730 } else {
2863 if (preload_characters > 2) preload_characters = 2; 2731 if (preload_characters > 2) preload_characters = 2;
2864 } 2732 }
2865 } else { 2733 } else {
2866 if (preload_characters > 1) preload_characters = 1; 2734 if (preload_characters > 1) preload_characters = 1;
2867 } 2735 }
2868 return preload_characters; 2736 return preload_characters;
2869 } 2737 }
2870 2738
2871 2739
2872 // This structure is used when generating the alternatives in a choice node. It 2740 // This structure is used when generating the alternatives in a choice node. It
2873 // records the way the alternative is being code generated. 2741 // records the way the alternative is being code generated.
2874 struct AlternativeGeneration { 2742 struct AlternativeGeneration {
2875 AlternativeGeneration() 2743 AlternativeGeneration()
2876 : possible_success(), 2744 : possible_success(),
2877 expects_preload(false), 2745 expects_preload(false),
2878 after(), 2746 after(),
2879 quick_check_details() { } 2747 quick_check_details() {}
2880 BlockLabel possible_success; 2748 BlockLabel possible_success;
2881 bool expects_preload; 2749 bool expects_preload;
2882 BlockLabel after; 2750 BlockLabel after;
2883 QuickCheckDetails quick_check_details; 2751 QuickCheckDetails quick_check_details;
2884 }; 2752 };
2885 2753
2886 2754
2887 // Creates a list of AlternativeGenerations. If the list has a reasonable 2755 // Creates a list of AlternativeGenerations. If the list has a reasonable
2888 // size then it is on the stack, otherwise the excess is on the heap. 2756 // size then it is on the stack, otherwise the excess is on the heap.
2889 class AlternativeGenerationList { 2757 class AlternativeGenerationList {
2890 public: 2758 public:
2891 explicit AlternativeGenerationList(intptr_t count) 2759 explicit AlternativeGenerationList(intptr_t count) : alt_gens_(count) {
2892 : alt_gens_(count) {
2893 for (intptr_t i = 0; i < count && i < kAFew; i++) { 2760 for (intptr_t i = 0; i < count && i < kAFew; i++) {
2894 alt_gens_.Add(a_few_alt_gens_ + i); 2761 alt_gens_.Add(a_few_alt_gens_ + i);
2895 } 2762 }
2896 for (intptr_t i = kAFew; i < count; i++) { 2763 for (intptr_t i = kAFew; i < count; i++) {
2897 alt_gens_.Add(new AlternativeGeneration()); 2764 alt_gens_.Add(new AlternativeGeneration());
2898 } 2765 }
2899 } 2766 }
2900 ~AlternativeGenerationList() { 2767 ~AlternativeGenerationList() {
2901 for (intptr_t i = kAFew; i < alt_gens_.length(); i++) { 2768 for (intptr_t i = kAFew; i < alt_gens_.length(); i++) {
2902 delete alt_gens_[i]; 2769 delete alt_gens_[i];
2903 alt_gens_[i] = NULL; 2770 alt_gens_[i] = NULL;
2904 } 2771 }
2905 } 2772 }
2906 2773
2907 AlternativeGeneration* at(intptr_t i) { 2774 AlternativeGeneration* at(intptr_t i) { return alt_gens_[i]; }
2908 return alt_gens_[i];
2909 }
2910 2775
2911 private: 2776 private:
2912 static const intptr_t kAFew = 10; 2777 static const intptr_t kAFew = 10;
2913 GrowableArray<AlternativeGeneration*> alt_gens_; 2778 GrowableArray<AlternativeGeneration*> alt_gens_;
2914 AlternativeGeneration a_few_alt_gens_[kAFew]; 2779 AlternativeGeneration a_few_alt_gens_[kAFew];
2915 2780
2916 DISALLOW_ALLOCATION(); 2781 DISALLOW_ALLOCATION();
2917 }; 2782 };
2918 2783
2919 2784
2920 // The '2' variant is inclusive from and exclusive to. 2785 // The '2' variant is inclusive from and exclusive to.
2921 // This covers \s as defined in ECMA-262 5.1, 15.10.2.12, 2786 // This covers \s as defined in ECMA-262 5.1, 15.10.2.12,
2922 // which include WhiteSpace (7.2) or LineTerminator (7.3) values. 2787 // which include WhiteSpace (7.2) or LineTerminator (7.3) values.
2923 static const intptr_t kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1, 2788 static const intptr_t kSpaceRanges[] = {
2924 0x00A0, 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B, 2789 '\t', '\r' + 1, ' ', ' ' + 1, 0x00A0, 0x00A1, 0x1680, 0x1681,
2925 0x2028, 0x202A, 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001, 2790 0x180E, 0x180F, 0x2000, 0x200B, 0x2028, 0x202A, 0x202F, 0x2030,
2926 0xFEFF, 0xFF00, 0x10000 }; 2791 0x205F, 0x2060, 0x3000, 0x3001, 0xFEFF, 0xFF00, 0x10000};
2927 static const intptr_t kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges); 2792 static const intptr_t kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges);
2928 static const intptr_t kWordRanges[] = { 2793 static const intptr_t kWordRanges[] = {'0', '9' + 1, 'A', 'Z' + 1, '_',
2929 '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, 0x10000 }; 2794 '_' + 1, 'a', 'z' + 1, 0x10000};
2930 static const intptr_t kWordRangeCount = ARRAY_SIZE(kWordRanges); 2795 static const intptr_t kWordRangeCount = ARRAY_SIZE(kWordRanges);
2931 static const intptr_t kDigitRanges[] = { '0', '9' + 1, 0x10000 }; 2796 static const intptr_t kDigitRanges[] = {'0', '9' + 1, 0x10000};
2932 static const intptr_t kDigitRangeCount = ARRAY_SIZE(kDigitRanges); 2797 static const intptr_t kDigitRangeCount = ARRAY_SIZE(kDigitRanges);
2933 static const intptr_t kSurrogateRanges[] = { 0xd800, 0xe000, 0x10000 }; 2798 static const intptr_t kSurrogateRanges[] = {0xd800, 0xe000, 0x10000};
2934 static const intptr_t kSurrogateRangeCount = ARRAY_SIZE(kSurrogateRanges); 2799 static const intptr_t kSurrogateRangeCount = ARRAY_SIZE(kSurrogateRanges);
2935 static const intptr_t kLineTerminatorRanges[] = { 2800 static const intptr_t kLineTerminatorRanges[] = {0x000A, 0x000B, 0x000D, 0x000E,
2936 0x000A, 0x000B, 0x000D, 0x000E, 0x2028, 0x202A, 0x10000 }; 2801 0x2028, 0x202A, 0x10000};
2937 static const intptr_t kLineTerminatorRangeCount = 2802 static const intptr_t kLineTerminatorRangeCount =
2938 ARRAY_SIZE(kLineTerminatorRanges); 2803 ARRAY_SIZE(kLineTerminatorRanges);
2939 2804
2940 2805
2941 void BoyerMoorePositionInfo::Set(intptr_t character) { 2806 void BoyerMoorePositionInfo::Set(intptr_t character) {
2942 SetInterval(Interval(character, character)); 2807 SetInterval(Interval(character, character));
2943 } 2808 }
2944 2809
2945 2810
2946 void BoyerMoorePositionInfo::SetInterval(const Interval& interval) { 2811 void BoyerMoorePositionInfo::SetInterval(const Interval& interval) {
2947 s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval); 2812 s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval);
2948 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval); 2813 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval);
2949 d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval); 2814 d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval);
2950 surrogate_ = 2815 surrogate_ =
2951 AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval); 2816 AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval);
2952 if (interval.to() - interval.from() >= kMapSize - 1) { 2817 if (interval.to() - interval.from() >= kMapSize - 1) {
2953 if (map_count_ != kMapSize) { 2818 if (map_count_ != kMapSize) {
2954 map_count_ = kMapSize; 2819 map_count_ = kMapSize;
2955 for (intptr_t i = 0; i < kMapSize; i++) (*map_)[i] = true; 2820 for (intptr_t i = 0; i < kMapSize; i++)
2821 (*map_)[i] = true;
2956 } 2822 }
2957 return; 2823 return;
2958 } 2824 }
2959 for (intptr_t i = interval.from(); i <= interval.to(); i++) { 2825 for (intptr_t i = interval.from(); i <= interval.to(); i++) {
2960 intptr_t mod_character = (i & kMask); 2826 intptr_t mod_character = (i & kMask);
2961 if (!map_->At(mod_character)) { 2827 if (!map_->At(mod_character)) {
2962 map_count_++; 2828 map_count_++;
2963 (*map_)[mod_character] = true; 2829 (*map_)[mod_character] = true;
2964 } 2830 }
2965 if (map_count_ == kMapSize) return; 2831 if (map_count_ == kMapSize) return;
2966 } 2832 }
2967 } 2833 }
2968 2834
2969 2835
2970 void BoyerMoorePositionInfo::SetAll() { 2836 void BoyerMoorePositionInfo::SetAll() {
2971 s_ = w_ = d_ = kLatticeUnknown; 2837 s_ = w_ = d_ = kLatticeUnknown;
2972 if (map_count_ != kMapSize) { 2838 if (map_count_ != kMapSize) {
2973 map_count_ = kMapSize; 2839 map_count_ = kMapSize;
2974 for (intptr_t i = 0; i < kMapSize; i++) (*map_)[i] = true; 2840 for (intptr_t i = 0; i < kMapSize; i++)
2841 (*map_)[i] = true;
2975 } 2842 }
2976 } 2843 }
2977 2844
2978 2845
2979 BoyerMooreLookahead::BoyerMooreLookahead( 2846 BoyerMooreLookahead::BoyerMooreLookahead(intptr_t length,
2980 intptr_t length, RegExpCompiler* compiler, Zone* zone) 2847 RegExpCompiler* compiler,
2981 : length_(length), 2848 Zone* zone)
2982 compiler_(compiler) { 2849 : length_(length), compiler_(compiler) {
2983 if (compiler->one_byte()) { 2850 if (compiler->one_byte()) {
2984 max_char_ = Symbols::kMaxOneCharCodeSymbol; 2851 max_char_ = Symbols::kMaxOneCharCodeSymbol;
2985 } else { 2852 } else {
2986 max_char_ = Utf16::kMaxCodeUnit; 2853 max_char_ = Utf16::kMaxCodeUnit;
2987 } 2854 }
2988 bitmaps_ = new(zone) ZoneGrowableArray<BoyerMoorePositionInfo*>(length); 2855 bitmaps_ = new (zone) ZoneGrowableArray<BoyerMoorePositionInfo*>(length);
2989 for (intptr_t i = 0; i < length; i++) { 2856 for (intptr_t i = 0; i < length; i++) {
2990 bitmaps_->Add(new(zone) BoyerMoorePositionInfo(zone)); 2857 bitmaps_->Add(new (zone) BoyerMoorePositionInfo(zone));
2991 } 2858 }
2992 } 2859 }
2993 2860
2994 2861
2995 // Find the longest range of lookahead that has the fewest number of different 2862 // Find the longest range of lookahead that has the fewest number of different
2996 // characters that can occur at a given position. Since we are optimizing two 2863 // characters that can occur at a given position. Since we are optimizing two
2997 // different parameters at once this is a tradeoff. 2864 // different parameters at once this is a tradeoff.
2998 bool BoyerMooreLookahead::FindWorthwhileInterval(intptr_t* from, intptr_t* to) { 2865 bool BoyerMooreLookahead::FindWorthwhileInterval(intptr_t* from, intptr_t* to) {
2999 intptr_t biggest_points = 0; 2866 intptr_t biggest_points = 0;
3000 // If more than 32 characters out of 128 can occur it is unlikely that we can 2867 // If more than 32 characters out of 128 can occur it is unlikely that we can
3001 // be lucky enough to step forwards much of the time. 2868 // be lucky enough to step forwards much of the time.
3002 const intptr_t kMaxMax = 32; 2869 const intptr_t kMaxMax = 32;
3003 for (intptr_t max_number_of_chars = 4; 2870 for (intptr_t max_number_of_chars = 4; max_number_of_chars < kMaxMax;
3004 max_number_of_chars < kMaxMax;
3005 max_number_of_chars *= 2) { 2871 max_number_of_chars *= 2) {
3006 biggest_points = 2872 biggest_points =
3007 FindBestInterval(max_number_of_chars, biggest_points, from, to); 2873 FindBestInterval(max_number_of_chars, biggest_points, from, to);
3008 } 2874 }
3009 if (biggest_points == 0) return false; 2875 if (biggest_points == 0) return false;
3010 return true; 2876 return true;
3011 } 2877 }
3012 2878
3013 2879
3014 // Find the highest-points range between 0 and length_ where the character 2880 // Find the highest-points range between 0 and length_ where the character
3015 // information is not too vague. 'Too vague' means that there are more than 2881 // information is not too vague. 'Too vague' means that there are more than
3016 // max_number_of_chars that can occur at this position. Calculates the number 2882 // max_number_of_chars that can occur at this position. Calculates the number
3017 // of points as the product of width-of-the-range and 2883 // of points as the product of width-of-the-range and
3018 // probability-of-finding-one-of-the-characters, where the probability is 2884 // probability-of-finding-one-of-the-characters, where the probability is
3019 // calculated using the frequency distribution of the sample subject string. 2885 // calculated using the frequency distribution of the sample subject string.
3020 intptr_t BoyerMooreLookahead::FindBestInterval( 2886 intptr_t BoyerMooreLookahead::FindBestInterval(intptr_t max_number_of_chars,
3021 intptr_t max_number_of_chars, 2887 intptr_t old_biggest_points,
3022 intptr_t old_biggest_points, 2888 intptr_t* from,
3023 intptr_t* from, 2889 intptr_t* to) {
3024 intptr_t* to) {
3025 intptr_t biggest_points = old_biggest_points; 2890 intptr_t biggest_points = old_biggest_points;
3026 static const intptr_t kSize = RegExpMacroAssembler::kTableSize; 2891 static const intptr_t kSize = RegExpMacroAssembler::kTableSize;
3027 for (intptr_t i = 0; i < length_; ) { 2892 for (intptr_t i = 0; i < length_;) {
3028 while (i < length_ && Count(i) > max_number_of_chars) i++; 2893 while (i < length_ && Count(i) > max_number_of_chars)
2894 i++;
3029 if (i == length_) break; 2895 if (i == length_) break;
3030 intptr_t remembered_from = i; 2896 intptr_t remembered_from = i;
3031 bool union_map[kSize]; 2897 bool union_map[kSize];
3032 for (intptr_t j = 0; j < kSize; j++) union_map[j] = false; 2898 for (intptr_t j = 0; j < kSize; j++)
2899 union_map[j] = false;
3033 while (i < length_ && Count(i) <= max_number_of_chars) { 2900 while (i < length_ && Count(i) <= max_number_of_chars) {
3034 BoyerMoorePositionInfo* map = bitmaps_->At(i); 2901 BoyerMoorePositionInfo* map = bitmaps_->At(i);
3035 for (intptr_t j = 0; j < kSize; j++) union_map[j] |= map->at(j); 2902 for (intptr_t j = 0; j < kSize; j++)
2903 union_map[j] |= map->at(j);
3036 i++; 2904 i++;
3037 } 2905 }
3038 intptr_t frequency = 0; 2906 intptr_t frequency = 0;
3039 for (intptr_t j = 0; j < kSize; j++) { 2907 for (intptr_t j = 0; j < kSize; j++) {
3040 if (union_map[j]) { 2908 if (union_map[j]) {
3041 // Add 1 to the frequency to give a small per-character boost for 2909 // Add 1 to the frequency to give a small per-character boost for
3042 // the cases where our sampling is not good enough and many 2910 // the cases where our sampling is not good enough and many
3043 // characters have a frequency of zero. This means the frequency 2911 // characters have a frequency of zero. This means the frequency
3044 // can theoretically be up to 2*kSize though we treat it mostly as 2912 // can theoretically be up to 2*kSize though we treat it mostly as
3045 // a fraction of kSize. 2913 // a fraction of kSize.
3046 frequency += compiler_->frequency_collator()->Frequency(j) + 1; 2914 frequency += compiler_->frequency_collator()->Frequency(j) + 1;
3047 } 2915 }
3048 } 2916 }
3049 // We use the probability of skipping times the distance we are skipping to 2917 // We use the probability of skipping times the distance we are skipping to
3050 // judge the effectiveness of this. Actually we have a cut-off: By 2918 // judge the effectiveness of this. Actually we have a cut-off: By
3051 // dividing by 2 we switch off the skipping if the probability of skipping 2919 // dividing by 2 we switch off the skipping if the probability of skipping
3052 // is less than 50%. This is because the multibyte mask-and-compare 2920 // is less than 50%. This is because the multibyte mask-and-compare
3053 // skipping in quickcheck is more likely to do well on this case. 2921 // skipping in quickcheck is more likely to do well on this case.
3054 bool in_quickcheck_range = ((i - remembered_from < 4) || 2922 bool in_quickcheck_range =
3055 (compiler_->one_byte() ? remembered_from <= 4 : remembered_from <= 2)); 2923 ((i - remembered_from < 4) ||
2924 (compiler_->one_byte() ? remembered_from <= 4 : remembered_from <= 2));
3056 // Called 'probability' but it is only a rough estimate and can actually 2925 // Called 'probability' but it is only a rough estimate and can actually
3057 // be outside the 0-kSize range. 2926 // be outside the 0-kSize range.
3058 intptr_t probability = 2927 intptr_t probability =
3059 (in_quickcheck_range ? kSize / 2 : kSize) - frequency; 2928 (in_quickcheck_range ? kSize / 2 : kSize) - frequency;
3060 intptr_t points = (i - remembered_from) * probability; 2929 intptr_t points = (i - remembered_from) * probability;
3061 if (points > biggest_points) { 2930 if (points > biggest_points) {
3062 *from = remembered_from; 2931 *from = remembered_from;
3063 *to = i - 1; 2932 *to = i - 1;
3064 biggest_points = points; 2933 biggest_points = points;
3065 } 2934 }
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
3133 // The mask-compare can probably handle this better. 3002 // The mask-compare can probably handle this better.
3134 return; 3003 return;
3135 } 3004 }
3136 3005
3137 if (found_single_character) { 3006 if (found_single_character) {
3138 BlockLabel cont, again; 3007 BlockLabel cont, again;
3139 masm->BindBlock(&again); 3008 masm->BindBlock(&again);
3140 masm->LoadCurrentCharacter(max_lookahead, &cont, true); 3009 masm->LoadCurrentCharacter(max_lookahead, &cont, true);
3141 if (max_char_ > kSize) { 3010 if (max_char_ > kSize) {
3142 masm->CheckCharacterAfterAnd(single_character, 3011 masm->CheckCharacterAfterAnd(single_character,
3143 RegExpMacroAssembler::kTableMask, 3012 RegExpMacroAssembler::kTableMask, &cont);
3144 &cont);
3145 } else { 3013 } else {
3146 masm->CheckCharacter(single_character, &cont); 3014 masm->CheckCharacter(single_character, &cont);
3147 } 3015 }
3148 masm->AdvanceCurrentPosition(lookahead_width); 3016 masm->AdvanceCurrentPosition(lookahead_width);
3149 masm->GoTo(&again); 3017 masm->GoTo(&again);
3150 masm->BindBlock(&cont); 3018 masm->BindBlock(&cont);
3151 return; 3019 return;
3152 } 3020 }
3153 3021
3154 const TypedData& boolean_skip_table = TypedData::ZoneHandle( 3022 const TypedData& boolean_skip_table = TypedData::ZoneHandle(
3155 compiler_->zone(), 3023 compiler_->zone(),
3156 TypedData::New(kTypedDataUint8ArrayCid, kSize, Heap::kOld)); 3024 TypedData::New(kTypedDataUint8ArrayCid, kSize, Heap::kOld));
3157 intptr_t skip_distance = GetSkipTable( 3025 intptr_t skip_distance =
3158 min_lookahead, max_lookahead, boolean_skip_table); 3026 GetSkipTable(min_lookahead, max_lookahead, boolean_skip_table);
3159 ASSERT(skip_distance != 0); 3027 ASSERT(skip_distance != 0);
3160 3028
3161 BlockLabel cont, again; 3029 BlockLabel cont, again;
3162 3030
3163 masm->BindBlock(&again); 3031 masm->BindBlock(&again);
3164 masm->LoadCurrentCharacter(max_lookahead, &cont, true); 3032 masm->LoadCurrentCharacter(max_lookahead, &cont, true);
3165 masm->CheckBitInTable(boolean_skip_table, &cont); 3033 masm->CheckBitInTable(boolean_skip_table, &cont);
3166 masm->AdvanceCurrentPosition(skip_distance); 3034 masm->AdvanceCurrentPosition(skip_distance);
3167 masm->GoTo(&again); 3035 masm->GoTo(&again);
3168 masm->BindBlock(&cont); 3036 masm->BindBlock(&cont);
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after
3262 ASSERT(!trace->mentions_reg(guards->At(j)->reg())); 3130 ASSERT(!trace->mentions_reg(guards->At(j)->reg()));
3263 } 3131 }
3264 } 3132 }
3265 #endif 3133 #endif
3266 } 3134 }
3267 3135
3268 3136
3269 void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler, 3137 void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler,
3270 Trace* current_trace, 3138 Trace* current_trace,
3271 PreloadState* state) { 3139 PreloadState* state) {
3272 if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) { 3140 if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) {
3273 // Save some time by looking at most one machine word ahead. 3141 // Save some time by looking at most one machine word ahead.
3274 state->eats_at_least_ = 3142 state->eats_at_least_ =
3275 EatsAtLeast(compiler->one_byte() ? 4 : 2, kRecursionBudget, 3143 EatsAtLeast(compiler->one_byte() ? 4 : 2, kRecursionBudget,
3276 current_trace->at_start() == Trace::FALSE_VALUE); 3144 current_trace->at_start() == Trace::FALSE_VALUE);
3277 } 3145 }
3278 state->preload_characters_ = 3146 state->preload_characters_ =
3279 CalculatePreloadCharacters(compiler, state->eats_at_least_); 3147 CalculatePreloadCharacters(compiler, state->eats_at_least_);
3280 3148
3281 state->preload_is_current_ = 3149 state->preload_is_current_ =
3282 (current_trace->characters_preloaded() == state->preload_characters_); 3150 (current_trace->characters_preloaded() == state->preload_characters_);
3283 state->preload_has_checked_bounds_ = state->preload_is_current_; 3151 state->preload_has_checked_bounds_ = state->preload_is_current_;
3284 } 3152 }
3285 3153
3286 3154
3287 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { 3155 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3288 intptr_t choice_count = alternatives_->length(); 3156 intptr_t choice_count = alternatives_->length();
3289 3157
3290 AssertGuardsMentionRegisters(trace); 3158 AssertGuardsMentionRegisters(trace);
3291 3159
3292 LimitResult limit_result = LimitVersions(compiler, trace); 3160 LimitResult limit_result = LimitVersions(compiler, trace);
3293 if (limit_result == DONE) return; 3161 if (limit_result == DONE) return;
(...skipping 10 matching lines...) Expand all
3304 3172
3305 PreloadState preload; 3173 PreloadState preload;
3306 preload.init(); 3174 preload.init();
3307 GreedyLoopState greedy_loop_state(not_at_start()); 3175 GreedyLoopState greedy_loop_state(not_at_start());
3308 3176
3309 intptr_t text_length = 3177 intptr_t text_length =
3310 GreedyLoopTextLengthForAlternative(&((*alternatives_)[0])); 3178 GreedyLoopTextLengthForAlternative(&((*alternatives_)[0]));
3311 AlternativeGenerationList alt_gens(choice_count); 3179 AlternativeGenerationList alt_gens(choice_count);
3312 3180
3313 if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) { 3181 if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
3314 trace = EmitGreedyLoop(compiler, 3182 trace = EmitGreedyLoop(compiler, trace, &alt_gens, &preload,
3315 trace, 3183 &greedy_loop_state, text_length);
3316 &alt_gens,
3317 &preload,
3318 &greedy_loop_state,
3319 text_length);
3320 } else { 3184 } else {
3321 // TODO(erikcorry): Delete this. We don't need this label, but it makes us 3185 // TODO(erikcorry): Delete this. We don't need this label, but it makes us
3322 // match the traces produced pre-cleanup. 3186 // match the traces produced pre-cleanup.
3323 BlockLabel second_choice; 3187 BlockLabel second_choice;
3324 compiler->macro_assembler()->BindBlock(&second_choice); 3188 compiler->macro_assembler()->BindBlock(&second_choice);
3325 3189
3326 preload.eats_at_least_ = EmitOptimizedUnanchoredSearch(compiler, trace); 3190 preload.eats_at_least_ = EmitOptimizedUnanchoredSearch(compiler, trace);
3327 3191
3328 EmitChoices(compiler, 3192 EmitChoices(compiler, &alt_gens, 0, trace, &preload);
3329 &alt_gens,
3330 0,
3331 trace,
3332 &preload);
3333 } 3193 }
3334 3194
3335 // At this point we need to generate slow checks for the alternatives where 3195 // At this point we need to generate slow checks for the alternatives where
3336 // the quick check was inlined. We can recognize these because the associated 3196 // the quick check was inlined. We can recognize these because the associated
3337 // label was bound. 3197 // label was bound.
3338 intptr_t new_flush_budget = trace->flush_budget() / choice_count; 3198 intptr_t new_flush_budget = trace->flush_budget() / choice_count;
3339 for (intptr_t i = 0; i < choice_count; i++) { 3199 for (intptr_t i = 0; i < choice_count; i++) {
3340 AlternativeGeneration* alt_gen = alt_gens.at(i); 3200 AlternativeGeneration* alt_gen = alt_gens.at(i);
3341 Trace new_trace(*trace); 3201 Trace new_trace(*trace);
3342 // If there are actions to be flushed we have to limit how many times 3202 // If there are actions to be flushed we have to limit how many times
3343 // they are flushed. Take the budget of the parent trace and distribute 3203 // they are flushed. Take the budget of the parent trace and distribute
3344 // it fairly amongst the children. 3204 // it fairly amongst the children.
3345 if (new_trace.actions() != NULL) { 3205 if (new_trace.actions() != NULL) {
3346 new_trace.set_flush_budget(new_flush_budget); 3206 new_trace.set_flush_budget(new_flush_budget);
3347 } 3207 }
3348 bool next_expects_preload = 3208 bool next_expects_preload =
3349 i == choice_count - 1 ? false : alt_gens.at(i + 1)->expects_preload; 3209 i == choice_count - 1 ? false : alt_gens.at(i + 1)->expects_preload;
3350 EmitOutOfLineContinuation(compiler, 3210 EmitOutOfLineContinuation(compiler, &new_trace, alternatives_->At(i),
3351 &new_trace, 3211 alt_gen, preload.preload_characters_,
3352 alternatives_->At(i),
3353 alt_gen,
3354 preload.preload_characters_,
3355 next_expects_preload); 3212 next_expects_preload);
3356 } 3213 }
3357 } 3214 }
3358 3215
3359 Trace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler, 3216 Trace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler,
3360 Trace* trace, 3217 Trace* trace,
3361 AlternativeGenerationList* alt_gens, 3218 AlternativeGenerationList* alt_gens,
3362 PreloadState* preload, 3219 PreloadState* preload,
3363 GreedyLoopState* greedy_loop_state, 3220 GreedyLoopState* greedy_loop_state,
3364 intptr_t text_length) { 3221 intptr_t text_length) {
(...skipping 16 matching lines...) Expand all
3381 greedy_match_trace.set_stop_node(this); 3238 greedy_match_trace.set_stop_node(this);
3382 greedy_match_trace.set_loop_label(&loop_label); 3239 greedy_match_trace.set_loop_label(&loop_label);
3383 (*alternatives_)[0].node()->Emit(compiler, &greedy_match_trace); 3240 (*alternatives_)[0].node()->Emit(compiler, &greedy_match_trace);
3384 macro_assembler->BindBlock(&greedy_match_failed); 3241 macro_assembler->BindBlock(&greedy_match_failed);
3385 3242
3386 BlockLabel second_choice; // For use in greedy matches. 3243 BlockLabel second_choice; // For use in greedy matches.
3387 macro_assembler->BindBlock(&second_choice); 3244 macro_assembler->BindBlock(&second_choice);
3388 3245
3389 Trace* new_trace = greedy_loop_state->counter_backtrack_trace(); 3246 Trace* new_trace = greedy_loop_state->counter_backtrack_trace();
3390 3247
3391 EmitChoices(compiler, 3248 EmitChoices(compiler, alt_gens, 1, new_trace, preload);
3392 alt_gens,
3393 1,
3394 new_trace,
3395 preload);
3396 3249
3397 macro_assembler->BindBlock(greedy_loop_state->label()); 3250 macro_assembler->BindBlock(greedy_loop_state->label());
3398 // If we have unwound to the bottom then backtrack. 3251 // If we have unwound to the bottom then backtrack.
3399 macro_assembler->CheckGreedyLoop(trace->backtrack()); 3252 macro_assembler->CheckGreedyLoop(trace->backtrack());
3400 // Otherwise try the second priority at an earlier position. 3253 // Otherwise try the second priority at an earlier position.
3401 macro_assembler->AdvanceCurrentPosition(-text_length); 3254 macro_assembler->AdvanceCurrentPosition(-text_length);
3402 macro_assembler->GoTo(&second_choice); 3255 macro_assembler->GoTo(&second_choice);
3403 return new_trace; 3256 return new_trace;
3404 } 3257 }
3405 3258
(...skipping 23 matching lines...) Expand all
3429 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 3282 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3430 // At this point we know that we are at a non-greedy loop that will eat 3283 // At this point we know that we are at a non-greedy loop that will eat
3431 // any character one at a time. Any non-anchored regexp has such a 3284 // any character one at a time. Any non-anchored regexp has such a
3432 // loop prepended to it in order to find where it starts. We look for 3285 // loop prepended to it in order to find where it starts. We look for
3433 // a pattern of the form ...abc... where we can look 6 characters ahead 3286 // a pattern of the form ...abc... where we can look 6 characters ahead
3434 // and step forwards 3 if the character is not one of abc. Abc need 3287 // and step forwards 3 if the character is not one of abc. Abc need
3435 // not be atoms, they can be any reasonably limited character class or 3288 // not be atoms, they can be any reasonably limited character class or
3436 // small alternation. 3289 // small alternation.
3437 BoyerMooreLookahead* bm = bm_info(false); 3290 BoyerMooreLookahead* bm = bm_info(false);
3438 if (bm == NULL) { 3291 if (bm == NULL) {
3439 eats_at_least = Utils::Minimum(kMaxLookaheadForBoyerMoore, 3292 eats_at_least = Utils::Minimum(
3440 EatsAtLeast(kMaxLookaheadForBoyerMoore, 3293 kMaxLookaheadForBoyerMoore,
3441 kRecursionBudget, 3294 EatsAtLeast(kMaxLookaheadForBoyerMoore, kRecursionBudget, false));
3442 false));
3443 if (eats_at_least >= 1) { 3295 if (eats_at_least >= 1) {
3444 bm = new(Z) BoyerMooreLookahead(eats_at_least, compiler, Z); 3296 bm = new (Z) BoyerMooreLookahead(eats_at_least, compiler, Z);
3445 GuardedAlternative alt0 = alternatives_->At(0); 3297 GuardedAlternative alt0 = alternatives_->At(0);
3446 alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, false); 3298 alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, false);
3447 } 3299 }
3448 } 3300 }
3449 if (bm != NULL) { 3301 if (bm != NULL) {
3450 bm->EmitSkipInstructions(macro_assembler); 3302 bm->EmitSkipInstructions(macro_assembler);
3451 } 3303 }
3452 return eats_at_least; 3304 return eats_at_least;
3453 } 3305 }
3454 3306
(...skipping 14 matching lines...) Expand all
3469 3321
3470 for (intptr_t i = first_choice; i < choice_count; i++) { 3322 for (intptr_t i = first_choice; i < choice_count; i++) {
3471 bool is_last = i == choice_count - 1; 3323 bool is_last = i == choice_count - 1;
3472 bool fall_through_on_failure = !is_last; 3324 bool fall_through_on_failure = !is_last;
3473 GuardedAlternative alternative = alternatives_->At(i); 3325 GuardedAlternative alternative = alternatives_->At(i);
3474 AlternativeGeneration* alt_gen = alt_gens->at(i); 3326 AlternativeGeneration* alt_gen = alt_gens->at(i);
3475 alt_gen->quick_check_details.set_characters(preload->preload_characters_); 3327 alt_gen->quick_check_details.set_characters(preload->preload_characters_);
3476 ZoneGrowableArray<Guard*>* guards = alternative.guards(); 3328 ZoneGrowableArray<Guard*>* guards = alternative.guards();
3477 intptr_t guard_count = (guards == NULL) ? 0 : guards->length(); 3329 intptr_t guard_count = (guards == NULL) ? 0 : guards->length();
3478 Trace new_trace(*trace); 3330 Trace new_trace(*trace);
3479 new_trace.set_characters_preloaded(preload->preload_is_current_ ? 3331 new_trace.set_characters_preloaded(
3480 preload->preload_characters_ : 3332 preload->preload_is_current_ ? preload->preload_characters_ : 0);
3481 0);
3482 if (preload->preload_has_checked_bounds_) { 3333 if (preload->preload_has_checked_bounds_) {
3483 new_trace.set_bound_checked_up_to(preload->preload_characters_); 3334 new_trace.set_bound_checked_up_to(preload->preload_characters_);
3484 } 3335 }
3485 new_trace.quick_check_performed()->Clear(); 3336 new_trace.quick_check_performed()->Clear();
3486 if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE); 3337 if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE);
3487 if (!is_last) { 3338 if (!is_last) {
3488 new_trace.set_backtrack(&alt_gen->after); 3339 new_trace.set_backtrack(&alt_gen->after);
3489 } 3340 }
3490 alt_gen->expects_preload = preload->preload_is_current_; 3341 alt_gen->expects_preload = preload->preload_is_current_;
3491 bool generate_full_check_inline = false; 3342 bool generate_full_check_inline = false;
3492 if (kRegexpOptimization && 3343 if (kRegexpOptimization &&
3493 try_to_emit_quick_check_for_alternative(i == 0) && 3344 try_to_emit_quick_check_for_alternative(i == 0) &&
3494 alternative.node()->EmitQuickCheck(compiler, 3345 alternative.node()->EmitQuickCheck(
3495 trace, 3346 compiler, trace, &new_trace, preload->preload_has_checked_bounds_,
3496 &new_trace, 3347 &alt_gen->possible_success, &alt_gen->quick_check_details,
3497 preload->preload_has_checked_bounds_, 3348 fall_through_on_failure)) {
3498 &alt_gen->possible_success,
3499 &alt_gen->quick_check_details,
3500 fall_through_on_failure)) {
3501 // Quick check was generated for this choice. 3349 // Quick check was generated for this choice.
3502 preload->preload_is_current_ = true; 3350 preload->preload_is_current_ = true;
3503 preload->preload_has_checked_bounds_ = true; 3351 preload->preload_has_checked_bounds_ = true;
3504 // If we generated the quick check to fall through on possible success, 3352 // If we generated the quick check to fall through on possible success,
3505 // we now need to generate the full check inline. 3353 // we now need to generate the full check inline.
3506 if (!fall_through_on_failure) { 3354 if (!fall_through_on_failure) {
3507 macro_assembler->BindBlock(&alt_gen->possible_success); 3355 macro_assembler->BindBlock(&alt_gen->possible_success);
3508 new_trace.set_quick_check_performed(&alt_gen->quick_check_details); 3356 new_trace.set_quick_check_performed(&alt_gen->quick_check_details);
3509 new_trace.set_characters_preloaded(preload->preload_characters_); 3357 new_trace.set_characters_preloaded(preload->preload_characters_);
3510 new_trace.set_bound_checked_up_to(preload->preload_characters_); 3358 new_trace.set_bound_checked_up_to(preload->preload_characters_);
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
3562 BlockLabel reload_current_char; 3410 BlockLabel reload_current_char;
3563 out_of_line_trace.set_backtrack(&reload_current_char); 3411 out_of_line_trace.set_backtrack(&reload_current_char);
3564 for (intptr_t j = 0; j < guard_count; j++) { 3412 for (intptr_t j = 0; j < guard_count; j++) {
3565 GenerateGuard(macro_assembler, guards->At(j), &out_of_line_trace); 3413 GenerateGuard(macro_assembler, guards->At(j), &out_of_line_trace);
3566 } 3414 }
3567 alternative.node()->Emit(compiler, &out_of_line_trace); 3415 alternative.node()->Emit(compiler, &out_of_line_trace);
3568 macro_assembler->BindBlock(&reload_current_char); 3416 macro_assembler->BindBlock(&reload_current_char);
3569 // Reload the current character, since the next quick check expects that. 3417 // Reload the current character, since the next quick check expects that.
3570 // We don't need to check bounds here because we only get into this 3418 // We don't need to check bounds here because we only get into this
3571 // code through a quick check which already did the checked load. 3419 // code through a quick check which already did the checked load.
3572 macro_assembler->LoadCurrentCharacter(trace->cp_offset(), 3420 macro_assembler->LoadCurrentCharacter(trace->cp_offset(), NULL, false,
3573 NULL,
3574 false,
3575 preload_characters); 3421 preload_characters);
3576 macro_assembler->GoTo(&(alt_gen->after)); 3422 macro_assembler->GoTo(&(alt_gen->after));
3577 } else { 3423 } else {
3578 out_of_line_trace.set_backtrack(&(alt_gen->after)); 3424 out_of_line_trace.set_backtrack(&(alt_gen->after));
3579 for (intptr_t j = 0; j < guard_count; j++) { 3425 for (intptr_t j = 0; j < guard_count; j++) {
3580 GenerateGuard(macro_assembler, guards->At(j), &out_of_line_trace); 3426 GenerateGuard(macro_assembler, guards->At(j), &out_of_line_trace);
3581 } 3427 }
3582 alternative.node()->Emit(compiler, &out_of_line_trace); 3428 alternative.node()->Emit(compiler, &out_of_line_trace);
3583 } 3429 }
3584 } 3430 }
3585 3431
3586 3432
3587 void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) { 3433 void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3588 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 3434 RegExpMacroAssembler* assembler = compiler->macro_assembler();
3589 LimitResult limit_result = LimitVersions(compiler, trace); 3435 LimitResult limit_result = LimitVersions(compiler, trace);
3590 if (limit_result == DONE) return; 3436 if (limit_result == DONE) return;
3591 ASSERT(limit_result == CONTINUE); 3437 ASSERT(limit_result == CONTINUE);
3592 3438
3593 RecursionCheck rc(compiler); 3439 RecursionCheck rc(compiler);
3594 3440
3595 switch (action_type_) { 3441 switch (action_type_) {
3596 case STORE_POSITION: { 3442 case STORE_POSITION: {
3597 Trace::DeferredCapture 3443 Trace::DeferredCapture new_capture(data_.u_position_register.reg,
3598 new_capture(data_.u_position_register.reg, 3444 data_.u_position_register.is_capture,
3599 data_.u_position_register.is_capture, 3445 trace);
3600 trace);
3601 Trace new_trace = *trace; 3446 Trace new_trace = *trace;
3602 new_trace.add_action(&new_capture); 3447 new_trace.add_action(&new_capture);
3603 on_success()->Emit(compiler, &new_trace); 3448 on_success()->Emit(compiler, &new_trace);
3604 break; 3449 break;
3605 } 3450 }
3606 case INCREMENT_REGISTER: { 3451 case INCREMENT_REGISTER: {
3607 Trace::DeferredIncrementRegister 3452 Trace::DeferredIncrementRegister new_increment(
3608 new_increment(data_.u_increment_register.reg); 3453 data_.u_increment_register.reg);
3609 Trace new_trace = *trace; 3454 Trace new_trace = *trace;
3610 new_trace.add_action(&new_increment); 3455 new_trace.add_action(&new_increment);
3611 on_success()->Emit(compiler, &new_trace); 3456 on_success()->Emit(compiler, &new_trace);
3612 break; 3457 break;
3613 } 3458 }
3614 case SET_REGISTER: { 3459 case SET_REGISTER: {
3615 Trace::DeferredSetRegister 3460 Trace::DeferredSetRegister new_set(data_.u_store_register.reg,
3616 new_set(data_.u_store_register.reg, data_.u_store_register.value); 3461 data_.u_store_register.value);
3617 Trace new_trace = *trace; 3462 Trace new_trace = *trace;
3618 new_trace.add_action(&new_set); 3463 new_trace.add_action(&new_set);
3619 on_success()->Emit(compiler, &new_trace); 3464 on_success()->Emit(compiler, &new_trace);
3620 break; 3465 break;
3621 } 3466 }
3622 case CLEAR_CAPTURES: { 3467 case CLEAR_CAPTURES: {
3623 Trace::DeferredClearCaptures 3468 Trace::DeferredClearCaptures new_capture(Interval(
3624 new_capture(Interval(data_.u_clear_captures.range_from, 3469 data_.u_clear_captures.range_from, data_.u_clear_captures.range_to));
3625 data_.u_clear_captures.range_to));
3626 Trace new_trace = *trace; 3470 Trace new_trace = *trace;
3627 new_trace.add_action(&new_capture); 3471 new_trace.add_action(&new_capture);
3628 on_success()->Emit(compiler, &new_trace); 3472 on_success()->Emit(compiler, &new_trace);
3629 break; 3473 break;
3630 } 3474 }
3631 case BEGIN_SUBMATCH: 3475 case BEGIN_SUBMATCH:
3632 if (!trace->is_trivial()) { 3476 if (!trace->is_trivial()) {
3633 trace->Flush(compiler, this); 3477 trace->Flush(compiler, this);
3634 } else { 3478 } else {
3635 assembler->WriteCurrentPositionToRegister( 3479 assembler->WriteCurrentPositionToRegister(
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
3715 } 3559 }
3716 3560
3717 LimitResult limit_result = LimitVersions(compiler, trace); 3561 LimitResult limit_result = LimitVersions(compiler, trace);
3718 if (limit_result == DONE) return; 3562 if (limit_result == DONE) return;
3719 ASSERT(limit_result == CONTINUE); 3563 ASSERT(limit_result == CONTINUE);
3720 3564
3721 RecursionCheck rc(compiler); 3565 RecursionCheck rc(compiler);
3722 3566
3723 ASSERT(start_reg_ + 1 == end_reg_); 3567 ASSERT(start_reg_ + 1 == end_reg_);
3724 if (compiler->ignore_case()) { 3568 if (compiler->ignore_case()) {
3725 assembler->CheckNotBackReferenceIgnoreCase(start_reg_, 3569 assembler->CheckNotBackReferenceIgnoreCase(start_reg_, trace->backtrack());
3726 trace->backtrack());
3727 } else { 3570 } else {
3728 assembler->CheckNotBackReference(start_reg_, trace->backtrack()); 3571 assembler->CheckNotBackReference(start_reg_, trace->backtrack());
3729 } 3572 }
3730 on_success()->Emit(compiler, trace); 3573 on_success()->Emit(compiler, trace);
3731 } 3574 }
3732 3575
3733 3576
3734 // ------------------------------------------------------------------- 3577 // -------------------------------------------------------------------
3735 // Dot/dotty output 3578 // Dot/dotty output
3736 3579
3737 3580
3738 #ifdef DEBUG 3581 #ifdef DEBUG
3739 3582
3740 3583
3741 class DotPrinter: public NodeVisitor { 3584 class DotPrinter : public NodeVisitor {
3742 public: 3585 public:
3743 explicit DotPrinter(bool ignore_case) {} 3586 explicit DotPrinter(bool ignore_case) {}
3744 void PrintNode(const char* label, RegExpNode* node); 3587 void PrintNode(const char* label, RegExpNode* node);
3745 void Visit(RegExpNode* node); 3588 void Visit(RegExpNode* node);
3746 void PrintAttributes(RegExpNode* from); 3589 void PrintAttributes(RegExpNode* from);
3747 void PrintOnFailure(RegExpNode* from, RegExpNode* to); 3590 void PrintOnFailure(RegExpNode* from, RegExpNode* to);
3748 #define DECLARE_VISIT(Type) \ 3591 #define DECLARE_VISIT(Type) virtual void Visit##Type(Type##Node* that);
3749 virtual void Visit##Type(Type##Node* that); 3592 FOR_EACH_NODE_TYPE(DECLARE_VISIT)
3750 FOR_EACH_NODE_TYPE(DECLARE_VISIT)
3751 #undef DECLARE_VISIT 3593 #undef DECLARE_VISIT
3752 }; 3594 };
3753 3595
3754 3596
3755 void DotPrinter::PrintNode(const char* label, RegExpNode* node) { 3597 void DotPrinter::PrintNode(const char* label, RegExpNode* node) {
3756 OS::Print("digraph G {\n graph [label=\""); 3598 OS::Print("digraph G {\n graph [label=\"");
3757 for (intptr_t i = 0; label[i]; i++) { 3599 for (intptr_t i = 0; label[i]; i++) {
3758 switch (label[i]) { 3600 switch (label[i]) {
3759 case '\\': 3601 case '\\':
3760 OS::Print("\\\\"); 3602 OS::Print("\\\\");
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
3806 PrintSeparator(); 3648 PrintSeparator();
3807 OS::Print("{%s|%" Pd "}", name, value); 3649 OS::Print("{%s|%" Pd "}", name, value);
3808 } 3650 }
3809 3651
3810 private: 3652 private:
3811 bool first_; 3653 bool first_;
3812 }; 3654 };
3813 3655
3814 3656
3815 void DotPrinter::PrintAttributes(RegExpNode* that) { 3657 void DotPrinter::PrintAttributes(RegExpNode* that) {
3816 OS::Print(" a%p [shape=Mrecord, color=grey, fontcolor=grey, " 3658 OS::Print(
3817 "margin=0.1, fontsize=10, label=\"{", that); 3659 " a%p [shape=Mrecord, color=grey, fontcolor=grey, "
3660 "margin=0.1, fontsize=10, label=\"{",
3661 that);
3818 AttributePrinter printer; 3662 AttributePrinter printer;
3819 NodeInfo* info = that->info(); 3663 NodeInfo* info = that->info();
3820 printer.PrintBit("NI", info->follows_newline_interest); 3664 printer.PrintBit("NI", info->follows_newline_interest);
3821 printer.PrintBit("WI", info->follows_word_interest); 3665 printer.PrintBit("WI", info->follows_word_interest);
3822 printer.PrintBit("SI", info->follows_start_interest); 3666 printer.PrintBit("SI", info->follows_start_interest);
3823 BlockLabel* label = that->label(); 3667 BlockLabel* label = that->label();
3824 if (label->IsBound()) 3668 if (label->IsBound()) printer.PrintPositive("@", label->Position());
3825 printer.PrintPositive("@", label->Position()); 3669 OS::Print(
3826 OS::Print("}\"];\n" 3670 "}\"];\n"
3827 " a%p -> n%p [style=dashed, color=grey, arrowhead=none];\n", 3671 " a%p -> n%p [style=dashed, color=grey, arrowhead=none];\n",
3828 that, that); 3672 that, that);
3829 } 3673 }
3830 3674
3831 3675
3832 void DotPrinter::VisitChoice(ChoiceNode* that) { 3676 void DotPrinter::VisitChoice(ChoiceNode* that) {
3833 OS::Print(" n%p [shape=Mrecord, label=\"?\"];\n", that); 3677 OS::Print(" n%p [shape=Mrecord, label=\"?\"];\n", that);
3834 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { 3678 for (intptr_t i = 0; i < that->alternatives()->length(); i++) {
3835 GuardedAlternative alt = that->alternatives()->At(i); 3679 GuardedAlternative alt = that->alternatives()->At(i);
3836 OS::Print(" n%p -> n%p", that, alt.node()); 3680 OS::Print(" n%p -> n%p", that, alt.node());
3837 } 3681 }
3838 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { 3682 for (intptr_t i = 0; i < that->alternatives()->length(); i++) {
(...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after
3978 3822
3979 // ------------------------------------------------------------------- 3823 // -------------------------------------------------------------------
3980 // Tree to graph conversion 3824 // Tree to graph conversion
3981 3825
3982 // The zone in which we allocate graph nodes. 3826 // The zone in which we allocate graph nodes.
3983 #define OZ (on_success->zone()) 3827 #define OZ (on_success->zone())
3984 3828
3985 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, 3829 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,
3986 RegExpNode* on_success) { 3830 RegExpNode* on_success) {
3987 ZoneGrowableArray<TextElement>* elms = 3831 ZoneGrowableArray<TextElement>* elms =
3988 new(OZ) ZoneGrowableArray<TextElement>(1); 3832 new (OZ) ZoneGrowableArray<TextElement>(1);
3989 elms->Add(TextElement::Atom(this)); 3833 elms->Add(TextElement::Atom(this));
3990 return new(OZ) TextNode(elms, on_success); 3834 return new (OZ) TextNode(elms, on_success);
3991 } 3835 }
3992 3836
3993 3837
3994 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, 3838 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,
3995 RegExpNode* on_success) { 3839 RegExpNode* on_success) {
3996 ZoneGrowableArray<TextElement>* elms = 3840 ZoneGrowableArray<TextElement>* elms =
3997 new(OZ) ZoneGrowableArray<TextElement>(1); 3841 new (OZ) ZoneGrowableArray<TextElement>(1);
3998 for (intptr_t i = 0; i < elements()->length(); i++) { 3842 for (intptr_t i = 0; i < elements()->length(); i++) {
3999 elms->Add(elements()->At(i)); 3843 elms->Add(elements()->At(i));
4000 } 3844 }
4001 return new(OZ) TextNode(elms, on_success); 3845 return new (OZ) TextNode(elms, on_success);
4002 } 3846 }
4003 3847
4004 3848
4005 static bool CompareInverseRanges(ZoneGrowableArray<CharacterRange>* ranges, 3849 static bool CompareInverseRanges(ZoneGrowableArray<CharacterRange>* ranges,
4006 const intptr_t* special_class, 3850 const intptr_t* special_class,
4007 intptr_t length) { 3851 intptr_t length) {
4008 length--; // Remove final 0x10000. 3852 length--; // Remove final 0x10000.
4009 ASSERT(special_class[length] == 0x10000); 3853 ASSERT(special_class[length] == 0x10000);
4010 ASSERT(ranges->length() != 0); 3854 ASSERT(ranges->length() != 0);
4011 ASSERT(length != 0); 3855 ASSERT(length != 0);
4012 ASSERT(special_class[0] != 0); 3856 ASSERT(special_class[0] != 0);
4013 if (ranges->length() != (length >> 1) + 1) { 3857 if (ranges->length() != (length >> 1) + 1) {
4014 return false; 3858 return false;
4015 } 3859 }
4016 CharacterRange range = ranges->At(0); 3860 CharacterRange range = ranges->At(0);
4017 if (range.from() != 0) { 3861 if (range.from() != 0) {
4018 return false; 3862 return false;
4019 } 3863 }
4020 for (intptr_t i = 0; i < length; i += 2) { 3864 for (intptr_t i = 0; i < length; i += 2) {
4021 if (special_class[i] != (range.to() + 1)) { 3865 if (special_class[i] != (range.to() + 1)) {
4022 return false; 3866 return false;
4023 } 3867 }
4024 range = ranges->At((i >> 1) + 1); 3868 range = ranges->At((i >> 1) + 1);
4025 if (special_class[i+1] != range.from()) { 3869 if (special_class[i + 1] != range.from()) {
4026 return false; 3870 return false;
4027 } 3871 }
4028 } 3872 }
4029 if (range.to() != 0xffff) { 3873 if (range.to() != 0xffff) {
4030 return false; 3874 return false;
4031 } 3875 }
4032 return true; 3876 return true;
4033 } 3877 }
4034 3878
4035 3879
(...skipping 26 matching lines...) Expand all
4062 return true; 3906 return true;
4063 } 3907 }
4064 if (CompareRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) { 3908 if (CompareRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) {
4065 set_.set_standard_set_type('s'); 3909 set_.set_standard_set_type('s');
4066 return true; 3910 return true;
4067 } 3911 }
4068 if (CompareInverseRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) { 3912 if (CompareInverseRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) {
4069 set_.set_standard_set_type('S'); 3913 set_.set_standard_set_type('S');
4070 return true; 3914 return true;
4071 } 3915 }
4072 if (CompareInverseRanges(set_.ranges(), 3916 if (CompareInverseRanges(set_.ranges(), kLineTerminatorRanges,
4073 kLineTerminatorRanges,
4074 kLineTerminatorRangeCount)) { 3917 kLineTerminatorRangeCount)) {
4075 set_.set_standard_set_type('.'); 3918 set_.set_standard_set_type('.');
4076 return true; 3919 return true;
4077 } 3920 }
4078 if (CompareRanges(set_.ranges(), 3921 if (CompareRanges(set_.ranges(), kLineTerminatorRanges,
4079 kLineTerminatorRanges,
4080 kLineTerminatorRangeCount)) { 3922 kLineTerminatorRangeCount)) {
4081 set_.set_standard_set_type('n'); 3923 set_.set_standard_set_type('n');
4082 return true; 3924 return true;
4083 } 3925 }
4084 if (CompareRanges(set_.ranges(), kWordRanges, kWordRangeCount)) { 3926 if (CompareRanges(set_.ranges(), kWordRanges, kWordRangeCount)) {
4085 set_.set_standard_set_type('w'); 3927 set_.set_standard_set_type('w');
4086 return true; 3928 return true;
4087 } 3929 }
4088 if (CompareInverseRanges(set_.ranges(), kWordRanges, kWordRangeCount)) { 3930 if (CompareInverseRanges(set_.ranges(), kWordRanges, kWordRangeCount)) {
4089 set_.set_standard_set_type('W'); 3931 set_.set_standard_set_type('W');
4090 return true; 3932 return true;
4091 } 3933 }
4092 return false; 3934 return false;
4093 } 3935 }
4094 3936
4095 3937
4096 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, 3938 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
4097 RegExpNode* on_success) { 3939 RegExpNode* on_success) {
4098 return new(OZ) TextNode(this, on_success); 3940 return new (OZ) TextNode(this, on_success);
4099 } 3941 }
4100 3942
4101 3943
4102 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, 3944 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,
4103 RegExpNode* on_success) { 3945 RegExpNode* on_success) {
4104 ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives(); 3946 ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives();
4105 intptr_t length = alternatives->length(); 3947 intptr_t length = alternatives->length();
4106 ChoiceNode* result = 3948 ChoiceNode* result = new (OZ) ChoiceNode(length, OZ);
4107 new(OZ) ChoiceNode(length, OZ);
4108 for (intptr_t i = 0; i < length; i++) { 3949 for (intptr_t i = 0; i < length; i++) {
4109 GuardedAlternative alternative(alternatives->At(i)->ToNode(compiler, 3950 GuardedAlternative alternative(
4110 on_success)); 3951 alternatives->At(i)->ToNode(compiler, on_success));
4111 result->AddAlternative(alternative); 3952 result->AddAlternative(alternative);
4112 } 3953 }
4113 return result; 3954 return result;
4114 } 3955 }
4115 3956
4116 3957
4117 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler, 3958 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler,
4118 RegExpNode* on_success) { 3959 RegExpNode* on_success) {
4119 return ToNode(min(), 3960 return ToNode(min(), max(), is_greedy(), body(), compiler, on_success);
4120 max(),
4121 is_greedy(),
4122 body(),
4123 compiler,
4124 on_success);
4125 } 3961 }
4126 3962
4127 3963
4128 // Scoped object to keep track of how much we unroll quantifier loops in the 3964 // Scoped object to keep track of how much we unroll quantifier loops in the
4129 // regexp graph generator. 3965 // regexp graph generator.
4130 class RegExpExpansionLimiter : public ValueObject { 3966 class RegExpExpansionLimiter : public ValueObject {
4131 public: 3967 public:
4132 static const intptr_t kMaxExpansionFactor = 6; 3968 static const intptr_t kMaxExpansionFactor = 6;
4133 RegExpExpansionLimiter(RegExpCompiler* compiler, intptr_t factor) 3969 RegExpExpansionLimiter(RegExpCompiler* compiler, intptr_t factor)
4134 : compiler_(compiler), 3970 : compiler_(compiler),
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
4200 Interval capture_registers = body->CaptureRegisters(); 4036 Interval capture_registers = body->CaptureRegisters();
4201 bool needs_capture_clearing = !capture_registers.is_empty(); 4037 bool needs_capture_clearing = !capture_registers.is_empty();
4202 Zone* zone = compiler->zone(); 4038 Zone* zone = compiler->zone();
4203 4039
4204 if (body_can_be_empty) { 4040 if (body_can_be_empty) {
4205 body_start_reg = compiler->AllocateRegister(); 4041 body_start_reg = compiler->AllocateRegister();
4206 } else if (kRegexpOptimization && !needs_capture_clearing) { 4042 } else if (kRegexpOptimization && !needs_capture_clearing) {
4207 // Only unroll if there are no captures and the body can't be 4043 // Only unroll if there are no captures and the body can't be
4208 // empty. 4044 // empty.
4209 { 4045 {
4210 RegExpExpansionLimiter limiter( 4046 RegExpExpansionLimiter limiter(compiler, min + ((max != min) ? 1 : 0));
4211 compiler, min + ((max != min) ? 1 : 0));
4212 if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) { 4047 if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) {
4213 intptr_t new_max = (max == kInfinity) ? max : max - min; 4048 intptr_t new_max = (max == kInfinity) ? max : max - min;
4214 // Recurse once to get the loop or optional matches after the fixed 4049 // Recurse once to get the loop or optional matches after the fixed
4215 // ones. 4050 // ones.
4216 RegExpNode* answer = ToNode( 4051 RegExpNode* answer =
4217 0, new_max, is_greedy, body, compiler, on_success, true); 4052 ToNode(0, new_max, is_greedy, body, compiler, on_success, true);
4218 // Unroll the forced matches from 0 to min. This can cause chains of 4053 // Unroll the forced matches from 0 to min. This can cause chains of
4219 // TextNodes (which the parser does not generate). These should be 4054 // TextNodes (which the parser does not generate). These should be
4220 // combined if it turns out they hinder good code generation. 4055 // combined if it turns out they hinder good code generation.
4221 for (intptr_t i = 0; i < min; i++) { 4056 for (intptr_t i = 0; i < min; i++) {
4222 answer = body->ToNode(compiler, answer); 4057 answer = body->ToNode(compiler, answer);
4223 } 4058 }
4224 return answer; 4059 return answer;
4225 } 4060 }
4226 } 4061 }
4227 if (max <= kMaxUnrolledMaxMatches && min == 0) { 4062 if (max <= kMaxUnrolledMaxMatches && min == 0) {
4228 ASSERT(max > 0); // Due to the 'if' above. 4063 ASSERT(max > 0); // Due to the 'if' above.
4229 RegExpExpansionLimiter limiter(compiler, max); 4064 RegExpExpansionLimiter limiter(compiler, max);
4230 if (limiter.ok_to_expand()) { 4065 if (limiter.ok_to_expand()) {
4231 // Unroll the optional matches up to max. 4066 // Unroll the optional matches up to max.
4232 RegExpNode* answer = on_success; 4067 RegExpNode* answer = on_success;
4233 for (intptr_t i = 0; i < max; i++) { 4068 for (intptr_t i = 0; i < max; i++) {
4234 ChoiceNode* alternation = new(zone) ChoiceNode(2, zone); 4069 ChoiceNode* alternation = new (zone) ChoiceNode(2, zone);
4235 if (is_greedy) { 4070 if (is_greedy) {
4236 alternation->AddAlternative( 4071 alternation->AddAlternative(
4237 GuardedAlternative(body->ToNode(compiler, answer))); 4072 GuardedAlternative(body->ToNode(compiler, answer)));
4238 alternation->AddAlternative(GuardedAlternative(on_success)); 4073 alternation->AddAlternative(GuardedAlternative(on_success));
4239 } else { 4074 } else {
4240 alternation->AddAlternative(GuardedAlternative(on_success)); 4075 alternation->AddAlternative(GuardedAlternative(on_success));
4241 alternation->AddAlternative( 4076 alternation->AddAlternative(
4242 GuardedAlternative(body->ToNode(compiler, answer))); 4077 GuardedAlternative(body->ToNode(compiler, answer)));
4243 } 4078 }
4244 answer = alternation; 4079 answer = alternation;
4245 if (not_at_start) alternation->set_not_at_start(); 4080 if (not_at_start) alternation->set_not_at_start();
4246 } 4081 }
4247 return answer; 4082 return answer;
4248 } 4083 }
4249 } 4084 }
4250 } 4085 }
4251 bool has_min = min > 0; 4086 bool has_min = min > 0;
4252 bool has_max = max < RegExpTree::kInfinity; 4087 bool has_max = max < RegExpTree::kInfinity;
4253 bool needs_counter = has_min || has_max; 4088 bool needs_counter = has_min || has_max;
4254 intptr_t reg_ctr = needs_counter 4089 intptr_t reg_ctr = needs_counter ? compiler->AllocateRegister()
4255 ? compiler->AllocateRegister() 4090 : RegExpCompiler::kNoRegister;
4256 : RegExpCompiler::kNoRegister; 4091 LoopChoiceNode* center =
4257 LoopChoiceNode* center = new(zone) LoopChoiceNode(body->min_match() == 0, 4092 new (zone) LoopChoiceNode(body->min_match() == 0, zone);
4258 zone);
4259 if (not_at_start) center->set_not_at_start(); 4093 if (not_at_start) center->set_not_at_start();
4260 RegExpNode* loop_return = needs_counter 4094 RegExpNode* loop_return =
4261 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center)) 4095 needs_counter ? static_cast<RegExpNode*>(
4262 : static_cast<RegExpNode*>(center); 4096 ActionNode::IncrementRegister(reg_ctr, center))
4097 : static_cast<RegExpNode*>(center);
4263 if (body_can_be_empty) { 4098 if (body_can_be_empty) {
4264 // If the body can be empty we need to check if it was and then 4099 // If the body can be empty we need to check if it was and then
4265 // backtrack. 4100 // backtrack.
4266 loop_return = ActionNode::EmptyMatchCheck(body_start_reg, 4101 loop_return =
4267 reg_ctr, 4102 ActionNode::EmptyMatchCheck(body_start_reg, reg_ctr, min, loop_return);
4268 min,
4269 loop_return);
4270 } 4103 }
4271 RegExpNode* body_node = body->ToNode(compiler, loop_return); 4104 RegExpNode* body_node = body->ToNode(compiler, loop_return);
4272 if (body_can_be_empty) { 4105 if (body_can_be_empty) {
4273 // If the body can be empty we need to store the start position 4106 // If the body can be empty we need to store the start position
4274 // so we can bail out if it was empty. 4107 // so we can bail out if it was empty.
4275 body_node = ActionNode::StorePosition(body_start_reg, false, body_node); 4108 body_node = ActionNode::StorePosition(body_start_reg, false, body_node);
4276 } 4109 }
4277 if (needs_capture_clearing) { 4110 if (needs_capture_clearing) {
4278 // Before entering the body of this loop we need to clear captures. 4111 // Before entering the body of this loop we need to clear captures.
4279 body_node = ActionNode::ClearCaptures(capture_registers, body_node); 4112 body_node = ActionNode::ClearCaptures(capture_registers, body_node);
4280 } 4113 }
4281 GuardedAlternative body_alt(body_node); 4114 GuardedAlternative body_alt(body_node);
4282 if (has_max) { 4115 if (has_max) {
4283 Guard* body_guard = 4116 Guard* body_guard = new (zone) Guard(reg_ctr, Guard::LT, max);
4284 new(zone) Guard(reg_ctr, Guard::LT, max);
4285 body_alt.AddGuard(body_guard, zone); 4117 body_alt.AddGuard(body_guard, zone);
4286 } 4118 }
4287 GuardedAlternative rest_alt(on_success); 4119 GuardedAlternative rest_alt(on_success);
4288 if (has_min) { 4120 if (has_min) {
4289 Guard* rest_guard = new(zone) Guard(reg_ctr, Guard::GEQ, min); 4121 Guard* rest_guard = new (zone) Guard(reg_ctr, Guard::GEQ, min);
4290 rest_alt.AddGuard(rest_guard, zone); 4122 rest_alt.AddGuard(rest_guard, zone);
4291 } 4123 }
4292 if (is_greedy) { 4124 if (is_greedy) {
4293 center->AddLoopAlternative(body_alt); 4125 center->AddLoopAlternative(body_alt);
4294 center->AddContinueAlternative(rest_alt); 4126 center->AddContinueAlternative(rest_alt);
4295 } else { 4127 } else {
4296 center->AddContinueAlternative(rest_alt); 4128 center->AddContinueAlternative(rest_alt);
4297 center->AddLoopAlternative(body_alt); 4129 center->AddLoopAlternative(body_alt);
4298 } 4130 }
4299 if (needs_counter) { 4131 if (needs_counter) {
(...skipping 24 matching lines...) Expand all
4324 intptr_t stack_pointer_register = compiler->AllocateRegister(); 4156 intptr_t stack_pointer_register = compiler->AllocateRegister();
4325 intptr_t position_register = compiler->AllocateRegister(); 4157 intptr_t position_register = compiler->AllocateRegister();
4326 // The ChoiceNode to distinguish between a newline and end-of-input. 4158 // The ChoiceNode to distinguish between a newline and end-of-input.
4327 ChoiceNode* result = new ChoiceNode(2, on_success->zone()); 4159 ChoiceNode* result = new ChoiceNode(2, on_success->zone());
4328 // Create a newline atom. 4160 // Create a newline atom.
4329 ZoneGrowableArray<CharacterRange>* newline_ranges = 4161 ZoneGrowableArray<CharacterRange>* newline_ranges =
4330 new ZoneGrowableArray<CharacterRange>(3); 4162 new ZoneGrowableArray<CharacterRange>(3);
4331 CharacterRange::AddClassEscape('n', newline_ranges); 4163 CharacterRange::AddClassEscape('n', newline_ranges);
4332 RegExpCharacterClass* newline_atom = new RegExpCharacterClass('n'); 4164 RegExpCharacterClass* newline_atom = new RegExpCharacterClass('n');
4333 TextNode* newline_matcher = new TextNode( 4165 TextNode* newline_matcher = new TextNode(
4334 newline_atom, 4166 newline_atom, ActionNode::PositiveSubmatchSuccess(
4335 ActionNode::PositiveSubmatchSuccess(stack_pointer_register, 4167 stack_pointer_register, position_register,
4336 position_register, 4168 0, // No captures inside.
4337 0, // No captures inside. 4169 -1, // Ignored if no captures.
4338 -1, // Ignored if no captures. 4170 on_success));
4339 on_success));
4340 // Create an end-of-input matcher. 4171 // Create an end-of-input matcher.
4341 RegExpNode* end_of_line = ActionNode::BeginSubmatch( 4172 RegExpNode* end_of_line = ActionNode::BeginSubmatch(
4342 stack_pointer_register, 4173 stack_pointer_register, position_register, newline_matcher);
4343 position_register,
4344 newline_matcher);
4345 // Add the two alternatives to the ChoiceNode. 4174 // Add the two alternatives to the ChoiceNode.
4346 GuardedAlternative eol_alternative(end_of_line); 4175 GuardedAlternative eol_alternative(end_of_line);
4347 result->AddAlternative(eol_alternative); 4176 result->AddAlternative(eol_alternative);
4348 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success)); 4177 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success));
4349 result->AddAlternative(end_alternative); 4178 result->AddAlternative(end_alternative);
4350 return result; 4179 return result;
4351 } 4180 }
4352 default: 4181 default:
4353 UNREACHABLE(); 4182 UNREACHABLE();
4354 } 4183 }
4355 return on_success; 4184 return on_success;
4356 } 4185 }
4357 4186
4358 4187
4359 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, 4188 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,
4360 RegExpNode* on_success) { 4189 RegExpNode* on_success) {
4361 return new(OZ) 4190 return new (OZ)
4362 BackReferenceNode(RegExpCapture::StartRegister(index()), 4191 BackReferenceNode(RegExpCapture::StartRegister(index()),
4363 RegExpCapture::EndRegister(index()), 4192 RegExpCapture::EndRegister(index()), on_success);
4364 on_success);
4365 } 4193 }
4366 4194
4367 4195
4368 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, 4196 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
4369 RegExpNode* on_success) { 4197 RegExpNode* on_success) {
4370 return on_success; 4198 return on_success;
4371 } 4199 }
4372 4200
4373 4201
4374 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, 4202 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,
4375 RegExpNode* on_success) { 4203 RegExpNode* on_success) {
4376 intptr_t stack_pointer_register = compiler->AllocateRegister(); 4204 intptr_t stack_pointer_register = compiler->AllocateRegister();
4377 intptr_t position_register = compiler->AllocateRegister(); 4205 intptr_t position_register = compiler->AllocateRegister();
4378 4206
4379 const intptr_t registers_per_capture = 2; 4207 const intptr_t registers_per_capture = 2;
4380 const intptr_t register_of_first_capture = 2; 4208 const intptr_t register_of_first_capture = 2;
4381 intptr_t register_count = capture_count_ * registers_per_capture; 4209 intptr_t register_count = capture_count_ * registers_per_capture;
4382 intptr_t register_start = 4210 intptr_t register_start =
4383 register_of_first_capture + capture_from_ * registers_per_capture; 4211 register_of_first_capture + capture_from_ * registers_per_capture;
4384 4212
4385 RegExpNode* success; 4213 RegExpNode* success;
4386 if (is_positive()) { 4214 if (is_positive()) {
4387 RegExpNode* node = ActionNode::BeginSubmatch( 4215 RegExpNode* node = ActionNode::BeginSubmatch(
4388 stack_pointer_register, 4216 stack_pointer_register, position_register,
4389 position_register, 4217 body()->ToNode(compiler,
4390 body()->ToNode( 4218 ActionNode::PositiveSubmatchSuccess(
4391 compiler, 4219 stack_pointer_register, position_register,
4392 ActionNode::PositiveSubmatchSuccess(stack_pointer_register, 4220 register_count, register_start, on_success)));
4393 position_register,
4394 register_count,
4395 register_start,
4396 on_success)));
4397 return node; 4221 return node;
4398 } else { 4222 } else {
4399 // We use a ChoiceNode for a negative lookahead because it has most of 4223 // We use a ChoiceNode for a negative lookahead because it has most of
4400 // the characteristics we need. It has the body of the lookahead as its 4224 // the characteristics we need. It has the body of the lookahead as its
4401 // first alternative and the expression after the lookahead of the second 4225 // first alternative and the expression after the lookahead of the second
4402 // alternative. If the first alternative succeeds then the 4226 // alternative. If the first alternative succeeds then the
4403 // NegativeSubmatchSuccess will unwind the stack including everything the 4227 // NegativeSubmatchSuccess will unwind the stack including everything the
4404 // choice node set up and backtrack. If the first alternative fails then 4228 // choice node set up and backtrack. If the first alternative fails then
4405 // the second alternative is tried, which is exactly the desired result 4229 // the second alternative is tried, which is exactly the desired result
4406 // for a negative lookahead. The NegativeLookaheadChoiceNode is a special 4230 // for a negative lookahead. The NegativeLookaheadChoiceNode is a special
4407 // ChoiceNode that knows to ignore the first exit when calculating quick 4231 // ChoiceNode that knows to ignore the first exit when calculating quick
4408 // checks. 4232 // checks.
4409 4233
4410 GuardedAlternative body_alt( 4234 GuardedAlternative body_alt(
4411 body()->ToNode( 4235 body()->ToNode(compiler, success = new (OZ) NegativeSubmatchSuccess(
4412 compiler, 4236 stack_pointer_register, position_register,
4413 success = new(OZ) NegativeSubmatchSuccess(stack_pointer_register, 4237 register_count, register_start, OZ)));
4414 position_register, 4238 ChoiceNode* choice_node = new (OZ) NegativeLookaheadChoiceNode(
4415 register_count, 4239 body_alt, GuardedAlternative(on_success), OZ);
4416 register_start, 4240 return ActionNode::BeginSubmatch(stack_pointer_register, position_register,
4417 OZ)));
4418 ChoiceNode* choice_node =
4419 new(OZ) NegativeLookaheadChoiceNode(body_alt,
4420 GuardedAlternative(on_success),
4421 OZ);
4422 return ActionNode::BeginSubmatch(stack_pointer_register,
4423 position_register,
4424 choice_node); 4241 choice_node);
4425 } 4242 }
4426 } 4243 }
4427 4244
4428 4245
4429 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, 4246 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,
4430 RegExpNode* on_success) { 4247 RegExpNode* on_success) {
4431 return ToNode(body(), index(), compiler, on_success); 4248 return ToNode(body(), index(), compiler, on_success);
4432 } 4249 }
4433 4250
(...skipping 26 matching lines...) Expand all
4460 ZoneGrowableArray<CharacterRange>* ranges) { 4277 ZoneGrowableArray<CharacterRange>* ranges) {
4461 elmc--; 4278 elmc--;
4462 ASSERT(elmv[elmc] == 0x10000); 4279 ASSERT(elmv[elmc] == 0x10000);
4463 for (intptr_t i = 0; i < elmc; i += 2) { 4280 for (intptr_t i = 0; i < elmc; i += 2) {
4464 ASSERT(elmv[i] < elmv[i + 1]); 4281 ASSERT(elmv[i] < elmv[i + 1]);
4465 ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1)); 4282 ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1));
4466 } 4283 }
4467 } 4284 }
4468 4285
4469 4286
4470 static void AddClassNegated(const intptr_t *elmv, 4287 static void AddClassNegated(const intptr_t* elmv,
4471 intptr_t elmc, 4288 intptr_t elmc,
4472 ZoneGrowableArray<CharacterRange>* ranges) { 4289 ZoneGrowableArray<CharacterRange>* ranges) {
4473 elmc--; 4290 elmc--;
4474 ASSERT(elmv[elmc] == 0x10000); 4291 ASSERT(elmv[elmc] == 0x10000);
4475 ASSERT(elmv[0] != 0x0000); 4292 ASSERT(elmv[0] != 0x0000);
4476 ASSERT(elmv[elmc-1] != Utf16::kMaxCodeUnit); 4293 ASSERT(elmv[elmc - 1] != Utf16::kMaxCodeUnit);
4477 uint16_t last = 0x0000; 4294 uint16_t last = 0x0000;
4478 for (intptr_t i = 0; i < elmc; i += 2) { 4295 for (intptr_t i = 0; i < elmc; i += 2) {
4479 ASSERT(last <= elmv[i] - 1); 4296 ASSERT(last <= elmv[i] - 1);
4480 ASSERT(elmv[i] < elmv[i + 1]); 4297 ASSERT(elmv[i] < elmv[i + 1]);
4481 ranges->Add(CharacterRange(last, elmv[i] - 1)); 4298 ranges->Add(CharacterRange(last, elmv[i] - 1));
4482 last = elmv[i + 1]; 4299 last = elmv[i + 1];
4483 } 4300 }
4484 ranges->Add(CharacterRange(last, Utf16::kMaxCodeUnit)); 4301 ranges->Add(CharacterRange(last, Utf16::kMaxCodeUnit));
4485 } 4302 }
4486 4303
(...skipping 13 matching lines...) Expand all
4500 case 'W': 4317 case 'W':
4501 AddClassNegated(kWordRanges, kWordRangeCount, ranges); 4318 AddClassNegated(kWordRanges, kWordRangeCount, ranges);
4502 break; 4319 break;
4503 case 'd': 4320 case 'd':
4504 AddClass(kDigitRanges, kDigitRangeCount, ranges); 4321 AddClass(kDigitRanges, kDigitRangeCount, ranges);
4505 break; 4322 break;
4506 case 'D': 4323 case 'D':
4507 AddClassNegated(kDigitRanges, kDigitRangeCount, ranges); 4324 AddClassNegated(kDigitRanges, kDigitRangeCount, ranges);
4508 break; 4325 break;
4509 case '.': 4326 case '.':
4510 AddClassNegated(kLineTerminatorRanges, 4327 AddClassNegated(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges);
4511 kLineTerminatorRangeCount,
4512 ranges);
4513 break; 4328 break;
4514 // This is not a character range as defined by the spec but a 4329 // This is not a character range as defined by the spec but a
4515 // convenient shorthand for a character class that matches any 4330 // convenient shorthand for a character class that matches any
4516 // character. 4331 // character.
4517 case '*': 4332 case '*':
4518 ranges->Add(CharacterRange::Everything()); 4333 ranges->Add(CharacterRange::Everything());
4519 break; 4334 break;
4520 // This is the set of characters matched by the $ and ^ symbols 4335 // This is the set of characters matched by the $ and ^ symbols
4521 // in multiline mode. 4336 // in multiline mode.
4522 case 'n': 4337 case 'n':
4523 AddClass(kLineTerminatorRanges, 4338 AddClass(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges);
4524 kLineTerminatorRangeCount,
4525 ranges);
4526 break; 4339 break;
4527 default: 4340 default:
4528 UNREACHABLE(); 4341 UNREACHABLE();
4529 } 4342 }
4530 } 4343 }
4531 4344
4532 4345
4533 void CharacterRange::AddCaseEquivalents( 4346 void CharacterRange::AddCaseEquivalents(
4534 ZoneGrowableArray<CharacterRange>* ranges, 4347 ZoneGrowableArray<CharacterRange>* ranges,
4535 bool is_one_byte, 4348 bool is_one_byte,
4536 Zone* zone) { 4349 Zone* zone) {
4537 uint16_t bottom = from(); 4350 uint16_t bottom = from();
4538 uint16_t top = to(); 4351 uint16_t top = to();
4539 if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) { 4352 if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) {
4540 if (bottom > Symbols::kMaxOneCharCodeSymbol) return; 4353 if (bottom > Symbols::kMaxOneCharCodeSymbol) return;
4541 if (top > Symbols::kMaxOneCharCodeSymbol) { 4354 if (top > Symbols::kMaxOneCharCodeSymbol) {
4542 top = Symbols::kMaxOneCharCodeSymbol; 4355 top = Symbols::kMaxOneCharCodeSymbol;
4543 } 4356 }
4544 } 4357 }
4545 4358
4546 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize; 4359 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize;
4547 unibrow::Mapping<unibrow::CanonicalizationRange> jsregexp_canonrange; 4360 unibrow::Mapping<unibrow::CanonicalizationRange> jsregexp_canonrange;
4548 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 4361 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
4549 if (top == bottom) { 4362 if (top == bottom) {
4550 // If this is a singleton we just expand the one character. 4363 // If this is a singleton we just expand the one character.
4551 intptr_t length = jsregexp_uncanonicalize.get(bottom, '\0', chars); // NOLIN T 4364 intptr_t length =
4365 jsregexp_uncanonicalize.get(bottom, '\0', chars); // NOLINT
4552 for (intptr_t i = 0; i < length; i++) { 4366 for (intptr_t i = 0; i < length; i++) {
4553 uint32_t chr = chars[i]; 4367 uint32_t chr = chars[i];
4554 if (chr != bottom) { 4368 if (chr != bottom) {
4555 ranges->Add(CharacterRange::Singleton(chars[i])); 4369 ranges->Add(CharacterRange::Singleton(chars[i]));
4556 } 4370 }
4557 } 4371 }
4558 } else { 4372 } else {
4559 // If this is a range we expand the characters block by block, 4373 // If this is a range we expand the characters block by block,
4560 // expanding contiguous subranges (blocks) one at a time. 4374 // expanding contiguous subranges (blocks) one at a time.
4561 // The approach is as follows. For a given start character we 4375 // The approach is as follows. For a given start character we
(...skipping 17 matching lines...) Expand all
4579 while (pos <= top) { 4393 while (pos <= top) {
4580 intptr_t length = jsregexp_canonrange.get(pos, '\0', range); 4394 intptr_t length = jsregexp_canonrange.get(pos, '\0', range);
4581 uint16_t block_end; 4395 uint16_t block_end;
4582 if (length == 0) { 4396 if (length == 0) {
4583 block_end = pos; 4397 block_end = pos;
4584 } else { 4398 } else {
4585 ASSERT(length == 1); 4399 ASSERT(length == 1);
4586 block_end = range[0]; 4400 block_end = range[0];
4587 } 4401 }
4588 intptr_t end = (block_end > top) ? top : block_end; 4402 intptr_t end = (block_end > top) ? top : block_end;
4589 length = jsregexp_uncanonicalize.get(block_end, '\0', range); // NOLINT 4403 length = jsregexp_uncanonicalize.get(block_end, '\0', range); // NOLINT
4590 for (intptr_t i = 0; i < length; i++) { 4404 for (intptr_t i = 0; i < length; i++) {
4591 uint32_t c = range[i]; 4405 uint32_t c = range[i];
4592 uint16_t range_from = c - (block_end - pos); 4406 uint16_t range_from = c - (block_end - pos);
4593 uint16_t range_to = c - (block_end - end); 4407 uint16_t range_to = c - (block_end - end);
4594 if (!(bottom <= range_from && range_to <= top)) { 4408 if (!(bottom <= range_from && range_to <= top)) {
4595 ranges->Add(CharacterRange(range_from, range_to)); 4409 ranges->Add(CharacterRange(range_from, range_to));
4596 } 4410 }
4597 } 4411 }
4598 pos = end + 1; 4412 pos = end + 1;
4599 } 4413 }
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
4637 } 4451 }
4638 } else { 4452 } else {
4639 for (intptr_t i = 0; i < count; i++) { 4453 for (intptr_t i = 0; i < count; i++) {
4640 (*list)[to + i] = list->At(from + i); 4454 (*list)[to + i] = list->At(from + i);
4641 } 4455 }
4642 } 4456 }
4643 } 4457 }
4644 4458
4645 4459
4646 static intptr_t InsertRangeInCanonicalList( 4460 static intptr_t InsertRangeInCanonicalList(
4647 ZoneGrowableArray<CharacterRange>* list, 4461 ZoneGrowableArray<CharacterRange>* list,
4648 intptr_t count, 4462 intptr_t count,
4649 CharacterRange insert) { 4463 CharacterRange insert) {
4650 // Inserts a range into list[0..count[, which must be sorted 4464 // Inserts a range into list[0..count[, which must be sorted
4651 // by from value and non-overlapping and non-adjacent, using at most 4465 // by from value and non-overlapping and non-adjacent, using at most
4652 // list[0..count] for the result. Returns the number of resulting 4466 // list[0..count] for the result. Returns the number of resulting
4653 // canonicalized ranges. Inserting a range may collapse existing ranges into 4467 // canonicalized ranges. Inserting a range may collapse existing ranges into
4654 // fewer ranges, so the return value can be anything in the range 1..count+1. 4468 // fewer ranges, so the return value can be anything in the range 1..count+1.
4655 uint16_t from = insert.from(); 4469 uint16_t from = insert.from();
4656 uint16_t to = insert.to(); 4470 uint16_t to = insert.to();
4657 intptr_t start_pos = 0; 4471 intptr_t start_pos = 0;
4658 intptr_t end_pos = count; 4472 intptr_t end_pos = count;
4659 for (intptr_t i = count - 1; i >= 0; i--) { 4473 for (intptr_t i = count - 1; i >= 0; i--) {
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
4727 i++; 4541 i++;
4728 } 4542 }
4729 // Canonical until the i'th range. If that's all of them, we are done. 4543 // Canonical until the i'th range. If that's all of them, we are done.
4730 if (i == n) return; 4544 if (i == n) return;
4731 4545
4732 // The ranges at index i and forward are not canonicalized. Make them so by 4546 // The ranges at index i and forward are not canonicalized. Make them so by
4733 // doing the equivalent of insertion sort (inserting each into the previous 4547 // doing the equivalent of insertion sort (inserting each into the previous
4734 // list, in order). 4548 // list, in order).
4735 // Notice that inserting a range can reduce the number of ranges in the 4549 // Notice that inserting a range can reduce the number of ranges in the
4736 // result due to combining of adjacent and overlapping ranges. 4550 // result due to combining of adjacent and overlapping ranges.
4737 intptr_t read = i; // Range to insert. 4551 intptr_t read = i; // Range to insert.
4738 intptr_t num_canonical = i; // Length of canonicalized part of list. 4552 intptr_t num_canonical = i; // Length of canonicalized part of list.
4739 do { 4553 do {
4740 num_canonical = InsertRangeInCanonicalList(character_ranges, 4554 num_canonical = InsertRangeInCanonicalList(character_ranges, num_canonical,
4741 num_canonical,
4742 character_ranges->At(read)); 4555 character_ranges->At(read));
4743 read++; 4556 read++;
4744 } while (read < n); 4557 } while (read < n);
4745 character_ranges->TruncateTo(num_canonical); 4558 character_ranges->TruncateTo(num_canonical);
4746 4559
4747 ASSERT(CharacterRange::IsCanonical(character_ranges)); 4560 ASSERT(CharacterRange::IsCanonical(character_ranges));
4748 } 4561 }
4749 4562
4750 4563
4751 void CharacterRange::Negate(ZoneGrowableArray<CharacterRange>* ranges, 4564 void CharacterRange::Negate(ZoneGrowableArray<CharacterRange>* ranges,
(...skipping 17 matching lines...) Expand all
4769 negated_ranges->Add(CharacterRange(from + 1, Utf16::kMaxCodeUnit)); 4582 negated_ranges->Add(CharacterRange(from + 1, Utf16::kMaxCodeUnit));
4770 } 4583 }
4771 } 4584 }
4772 4585
4773 4586
4774 // ------------------------------------------------------------------- 4587 // -------------------------------------------------------------------
4775 // Splay tree 4588 // Splay tree
4776 4589
4777 4590
4778 // Workaround for the fact that ZoneGrowableArray does not have contains(). 4591 // Workaround for the fact that ZoneGrowableArray does not have contains().
4779 static bool ArrayContains(ZoneGrowableArray<unsigned>* array, 4592 static bool ArrayContains(ZoneGrowableArray<unsigned>* array, unsigned value) {
4780 unsigned value) {
4781 for (intptr_t i = 0; i < array->length(); i++) { 4593 for (intptr_t i = 0; i < array->length(); i++) {
4782 if (array->At(i) == value) { 4594 if (array->At(i) == value) {
4783 return true; 4595 return true;
4784 } 4596 }
4785 } 4597 }
4786 return false; 4598 return false;
4787 } 4599 }
4788 4600
4789 4601
4790 void OutSet::Set(unsigned value, Zone* zone) { 4602 void OutSet::Set(unsigned value, Zone* zone) {
4791 if (value < kFirstLimit) { 4603 if (value < kFirstLimit) {
4792 first_ |= (1 << value); 4604 first_ |= (1 << value);
4793 } else { 4605 } else {
4794 if (remaining_ == NULL) 4606 if (remaining_ == NULL)
4795 remaining_ = new(zone) ZoneGrowableArray<unsigned>(1); 4607 remaining_ = new (zone) ZoneGrowableArray<unsigned>(1);
4796 4608
4797 bool remaining_contains_value = ArrayContains(remaining_, value); 4609 bool remaining_contains_value = ArrayContains(remaining_, value);
4798 if (remaining_->is_empty() || !remaining_contains_value) { 4610 if (remaining_->is_empty() || !remaining_contains_value) {
4799 remaining_->Add(value); 4611 remaining_->Add(value);
4800 } 4612 }
4801 } 4613 }
4802 } 4614 }
4803 4615
4804 4616
4805 bool OutSet::Get(unsigned value) const { 4617 bool OutSet::Get(unsigned value) const {
4806 if (value < kFirstLimit) { 4618 if (value < kFirstLimit) {
4807 return (first_ & (1 << value)) != 0; 4619 return (first_ & (1 << value)) != 0;
4808 } else if (remaining_ == NULL) { 4620 } else if (remaining_ == NULL) {
4809 return false; 4621 return false;
4810 } else { 4622 } else {
4811 return ArrayContains(remaining_, value); 4623 return ArrayContains(remaining_, value);
4812 } 4624 }
4813 } 4625 }
4814 4626
4815 4627
4816 // ------------------------------------------------------------------- 4628 // -------------------------------------------------------------------
4817 // Analysis 4629 // Analysis
4818 4630
4819 4631
4820 void Analysis::EnsureAnalyzed(RegExpNode* that) { 4632 void Analysis::EnsureAnalyzed(RegExpNode* that) {
4821 if (that->info()->been_analyzed || that->info()->being_analyzed) 4633 if (that->info()->been_analyzed || that->info()->being_analyzed) return;
4822 return;
4823 that->info()->being_analyzed = true; 4634 that->info()->being_analyzed = true;
4824 that->Accept(this); 4635 that->Accept(this);
4825 that->info()->being_analyzed = false; 4636 that->info()->being_analyzed = false;
4826 that->info()->been_analyzed = true; 4637 that->info()->been_analyzed = true;
4827 } 4638 }
4828 4639
4829 4640
4830 void Analysis::VisitEnd(EndNode* that) { 4641 void Analysis::VisitEnd(EndNode* that) {
4831 // nothing to do 4642 // nothing to do
4832 } 4643 }
(...skipping 127 matching lines...) Expand 10 before | Expand all | Expand 10 after
4960 RegExpAtom* atom = text.atom(); 4771 RegExpAtom* atom = text.atom();
4961 for (intptr_t j = 0; j < atom->length(); j++, offset++) { 4772 for (intptr_t j = 0; j < atom->length(); j++, offset++) {
4962 if (offset >= bm->length()) { 4773 if (offset >= bm->length()) {
4963 if (initial_offset == 0) set_bm_info(not_at_start, bm); 4774 if (initial_offset == 0) set_bm_info(not_at_start, bm);
4964 return; 4775 return;
4965 } 4776 }
4966 uint16_t character = atom->data()->At(j); 4777 uint16_t character = atom->data()->At(j);
4967 if (bm->compiler()->ignore_case()) { 4778 if (bm->compiler()->ignore_case()) {
4968 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 4779 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
4969 intptr_t length = GetCaseIndependentLetters( 4780 intptr_t length = GetCaseIndependentLetters(
4970 character, 4781 character, bm->max_char() == Symbols::kMaxOneCharCodeSymbol,
4971 bm->max_char() == Symbols::kMaxOneCharCodeSymbol,
4972 chars); 4782 chars);
4973 for (intptr_t j = 0; j < length; j++) { 4783 for (intptr_t j = 0; j < length; j++) {
4974 bm->Set(offset, chars[j]); 4784 bm->Set(offset, chars[j]);
4975 } 4785 }
4976 } else { 4786 } else {
4977 if (character <= max_char) bm->Set(offset, character); 4787 if (character <= max_char) bm->Set(offset, character);
4978 } 4788 }
4979 } 4789 }
4980 } else { 4790 } else {
4981 ASSERT(text.text_type() == TextElement::CHAR_CLASS); 4791 ASSERT(text.text_type() == TextElement::CHAR_CLASS);
4982 RegExpCharacterClass* char_class = text.char_class(); 4792 RegExpCharacterClass* char_class = text.char_class();
4983 ZoneGrowableArray<CharacterRange>* ranges = char_class->ranges(); 4793 ZoneGrowableArray<CharacterRange>* ranges = char_class->ranges();
4984 if (char_class->is_negated()) { 4794 if (char_class->is_negated()) {
4985 bm->SetAll(offset); 4795 bm->SetAll(offset);
4986 } else { 4796 } else {
4987 for (intptr_t k = 0; k < ranges->length(); k++) { 4797 for (intptr_t k = 0; k < ranges->length(); k++) {
4988 CharacterRange& range = (*ranges)[k]; 4798 CharacterRange& range = (*ranges)[k];
4989 if (range.from() > max_char) continue; 4799 if (range.from() > max_char) continue;
4990 intptr_t to = Utils::Minimum(max_char, 4800 intptr_t to =
4991 static_cast<intptr_t>(range.to())); 4801 Utils::Minimum(max_char, static_cast<intptr_t>(range.to()));
4992 bm->SetInterval(offset, Interval(range.from(), to)); 4802 bm->SetInterval(offset, Interval(range.from(), to));
4993 } 4803 }
4994 } 4804 }
4995 offset++; 4805 offset++;
4996 } 4806 }
4997 } 4807 }
4998 if (offset >= bm->length()) { 4808 if (offset >= bm->length()) {
4999 if (initial_offset == 0) set_bm_info(not_at_start, bm); 4809 if (initial_offset == 0) set_bm_info(not_at_start, bm);
5000 return; 4810 return;
5001 } 4811 }
5002 on_success()->FillInBMInfo(offset, 4812 on_success()->FillInBMInfo(offset, budget - 1, bm,
5003 budget - 1,
5004 bm,
5005 true); // Not at start after a text node. 4813 true); // Not at start after a text node.
5006 if (initial_offset == 0) set_bm_info(not_at_start, bm); 4814 if (initial_offset == 0) set_bm_info(not_at_start, bm);
5007 } 4815 }
5008 4816
5009 4817
5010 RegExpEngine::CompilationResult RegExpEngine::CompileIR( 4818 RegExpEngine::CompilationResult RegExpEngine::CompileIR(
5011 RegExpCompileData* data, 4819 RegExpCompileData* data,
5012 const ParsedFunction* parsed_function, 4820 const ParsedFunction* parsed_function,
5013 const ZoneGrowableArray<const ICData*>& ic_data_array) { 4821 const ZoneGrowableArray<const ICData*>& ic_data_array) {
5014 ASSERT(!FLAG_interpret_irregexp); 4822 ASSERT(!FLAG_interpret_irregexp);
(...skipping 17 matching lines...) Expand all
5032 // TODO(zerny): Frequency sampling is currently disabled because of several 4840 // TODO(zerny): Frequency sampling is currently disabled because of several
5033 // issues. We do not want to store subject strings in the regexp object since 4841 // issues. We do not want to store subject strings in the regexp object since
5034 // they might be long and we should not prevent their garbage collection. 4842 // they might be long and we should not prevent their garbage collection.
5035 // Passing them to this function explicitly does not help, since we must 4843 // Passing them to this function explicitly does not help, since we must
5036 // generate exactly the same IR for both the unoptimizing and optimizing 4844 // generate exactly the same IR for both the unoptimizing and optimizing
5037 // pipelines (otherwise it gets confused when i.e. deopt id's differ). 4845 // pipelines (otherwise it gets confused when i.e. deopt id's differ).
5038 // An option would be to store sampling results in the regexp object, but 4846 // An option would be to store sampling results in the regexp object, but
5039 // I'm not sure the performance gains are relevant enough. 4847 // I'm not sure the performance gains are relevant enough.
5040 4848
5041 // Wrap the body of the regexp in capture #0. 4849 // Wrap the body of the regexp in capture #0.
5042 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree, 4850 RegExpNode* captured_body =
5043 0, 4851 RegExpCapture::ToNode(data->tree, 0, &compiler, compiler.accept());
5044 &compiler,
5045 compiler.accept());
5046 4852
5047 RegExpNode* node = captured_body; 4853 RegExpNode* node = captured_body;
5048 bool is_end_anchored = data->tree->IsAnchoredAtEnd(); 4854 bool is_end_anchored = data->tree->IsAnchoredAtEnd();
5049 bool is_start_anchored = data->tree->IsAnchoredAtStart(); 4855 bool is_start_anchored = data->tree->IsAnchoredAtStart();
5050 intptr_t max_length = data->tree->max_match(); 4856 intptr_t max_length = data->tree->max_match();
5051 if (!is_start_anchored) { 4857 if (!is_start_anchored) {
5052 // Add a .*? at the beginning, outside the body capture, unless 4858 // Add a .*? at the beginning, outside the body capture, unless
5053 // this expression is anchored at the beginning. 4859 // this expression is anchored at the beginning.
5054 RegExpNode* loop_node = 4860 RegExpNode* loop_node = RegExpQuantifier::ToNode(
5055 RegExpQuantifier::ToNode(0, 4861 0, RegExpTree::kInfinity, false, new (zone) RegExpCharacterClass('*'),
5056 RegExpTree::kInfinity, 4862 &compiler, captured_body, data->contains_anchor);
5057 false,
5058 new(zone) RegExpCharacterClass('*'),
5059 &compiler,
5060 captured_body,
5061 data->contains_anchor);
5062 4863
5063 if (data->contains_anchor) { 4864 if (data->contains_anchor) {
5064 // Unroll loop once, to take care of the case that might start 4865 // Unroll loop once, to take care of the case that might start
5065 // at the start of input. 4866 // at the start of input.
5066 ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone); 4867 ChoiceNode* first_step_node = new (zone) ChoiceNode(2, zone);
5067 first_step_node->AddAlternative(GuardedAlternative(captured_body)); 4868 first_step_node->AddAlternative(GuardedAlternative(captured_body));
5068 first_step_node->AddAlternative(GuardedAlternative( 4869 first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode(
5069 new(zone) TextNode( 4870 new (zone) RegExpCharacterClass('*'), loop_node)));
5070 new(zone) RegExpCharacterClass('*'), loop_node)));
5071 node = first_step_node; 4871 node = first_step_node;
5072 } else { 4872 } else {
5073 node = loop_node; 4873 node = loop_node;
5074 } 4874 }
5075 } 4875 }
5076 if (is_one_byte) { 4876 if (is_one_byte) {
5077 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); 4877 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
5078 // Do it again to propagate the new nodes to places where they were not 4878 // Do it again to propagate the new nodes to places where they were not
5079 // put because they had not been calculated yet. 4879 // put because they had not been calculated yet.
5080 if (node != NULL) { 4880 if (node != NULL) {
5081 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); 4881 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
5082 } 4882 }
5083 } 4883 }
5084 4884
5085 if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone); 4885 if (node == NULL) node = new (zone) EndNode(EndNode::BACKTRACK, zone);
5086 data->node = node; 4886 data->node = node;
5087 Analysis analysis(ignore_case, is_one_byte); 4887 Analysis analysis(ignore_case, is_one_byte);
5088 analysis.EnsureAnalyzed(node); 4888 analysis.EnsureAnalyzed(node);
5089 if (analysis.has_failed()) { 4889 if (analysis.has_failed()) {
5090 const char* error_message = analysis.error_message(); 4890 const char* error_message = analysis.error_message();
5091 return CompilationResult(error_message); 4891 return CompilationResult(error_message);
5092 } 4892 }
5093 4893
5094 // Native regexp implementation. 4894 // Native regexp implementation.
5095 4895
5096 IRRegExpMacroAssembler* macro_assembler = 4896 IRRegExpMacroAssembler* macro_assembler =
5097 new(zone) IRRegExpMacroAssembler(specialization_cid, 4897 new (zone) IRRegExpMacroAssembler(specialization_cid, data->capture_count,
5098 data->capture_count, 4898 parsed_function, ic_data_array, zone);
5099 parsed_function,
5100 ic_data_array,
5101 zone);
5102 4899
5103 // Inserted here, instead of in Assembler, because it depends on information 4900 // Inserted here, instead of in Assembler, because it depends on information
5104 // in the AST that isn't replicated in the Node structure. 4901 // in the AST that isn't replicated in the Node structure.
5105 static const intptr_t kMaxBacksearchLimit = 1024; 4902 static const intptr_t kMaxBacksearchLimit = 1024;
5106 if (is_end_anchored && 4903 if (is_end_anchored && !is_start_anchored &&
5107 !is_start_anchored &&
5108 max_length < kMaxBacksearchLimit) { 4904 max_length < kMaxBacksearchLimit) {
5109 macro_assembler->SetCurrentPositionFromEnd(max_length); 4905 macro_assembler->SetCurrentPositionFromEnd(max_length);
5110 } 4906 }
5111 4907
5112 if (is_global) { 4908 if (is_global) {
5113 macro_assembler->set_global_mode( 4909 macro_assembler->set_global_mode(
5114 (data->tree->min_match() > 0) 4910 (data->tree->min_match() > 0)
5115 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK 4911 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK
5116 : RegExpMacroAssembler::GLOBAL); 4912 : RegExpMacroAssembler::GLOBAL);
5117 } 4913 }
5118 4914
5119 RegExpEngine::CompilationResult result = 4915 RegExpEngine::CompilationResult result =
5120 compiler.Assemble(macro_assembler, 4916 compiler.Assemble(macro_assembler, node, data->capture_count, pattern);
5121 node,
5122 data->capture_count,
5123 pattern);
5124 4917
5125 if (FLAG_trace_irregexp) { 4918 if (FLAG_trace_irregexp) {
5126 macro_assembler->PrintBlocks(); 4919 macro_assembler->PrintBlocks();
5127 } 4920 }
5128 4921
5129 return result; 4922 return result;
5130 } 4923 }
5131 4924
5132 4925
5133 RegExpEngine::CompilationResult RegExpEngine::CompileBytecode( 4926 RegExpEngine::CompilationResult RegExpEngine::CompileBytecode(
(...skipping 15 matching lines...) Expand all
5149 // TODO(zerny): Frequency sampling is currently disabled because of several 4942 // TODO(zerny): Frequency sampling is currently disabled because of several
5150 // issues. We do not want to store subject strings in the regexp object since 4943 // issues. We do not want to store subject strings in the regexp object since
5151 // they might be long and we should not prevent their garbage collection. 4944 // they might be long and we should not prevent their garbage collection.
5152 // Passing them to this function explicitly does not help, since we must 4945 // Passing them to this function explicitly does not help, since we must
5153 // generate exactly the same IR for both the unoptimizing and optimizing 4946 // generate exactly the same IR for both the unoptimizing and optimizing
5154 // pipelines (otherwise it gets confused when i.e. deopt id's differ). 4947 // pipelines (otherwise it gets confused when i.e. deopt id's differ).
5155 // An option would be to store sampling results in the regexp object, but 4948 // An option would be to store sampling results in the regexp object, but
5156 // I'm not sure the performance gains are relevant enough. 4949 // I'm not sure the performance gains are relevant enough.
5157 4950
5158 // Wrap the body of the regexp in capture #0. 4951 // Wrap the body of the regexp in capture #0.
5159 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree, 4952 RegExpNode* captured_body =
5160 0, 4953 RegExpCapture::ToNode(data->tree, 0, &compiler, compiler.accept());
5161 &compiler,
5162 compiler.accept());
5163 4954
5164 RegExpNode* node = captured_body; 4955 RegExpNode* node = captured_body;
5165 bool is_end_anchored = data->tree->IsAnchoredAtEnd(); 4956 bool is_end_anchored = data->tree->IsAnchoredAtEnd();
5166 bool is_start_anchored = data->tree->IsAnchoredAtStart(); 4957 bool is_start_anchored = data->tree->IsAnchoredAtStart();
5167 intptr_t max_length = data->tree->max_match(); 4958 intptr_t max_length = data->tree->max_match();
5168 if (!is_start_anchored) { 4959 if (!is_start_anchored) {
5169 // Add a .*? at the beginning, outside the body capture, unless 4960 // Add a .*? at the beginning, outside the body capture, unless
5170 // this expression is anchored at the beginning. 4961 // this expression is anchored at the beginning.
5171 RegExpNode* loop_node = 4962 RegExpNode* loop_node = RegExpQuantifier::ToNode(
5172 RegExpQuantifier::ToNode(0, 4963 0, RegExpTree::kInfinity, false, new (zone) RegExpCharacterClass('*'),
5173 RegExpTree::kInfinity, 4964 &compiler, captured_body, data->contains_anchor);
5174 false,
5175 new(zone) RegExpCharacterClass('*'),
5176 &compiler,
5177 captured_body,
5178 data->contains_anchor);
5179 4965
5180 if (data->contains_anchor) { 4966 if (data->contains_anchor) {
5181 // Unroll loop once, to take care of the case that might start 4967 // Unroll loop once, to take care of the case that might start
5182 // at the start of input. 4968 // at the start of input.
5183 ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone); 4969 ChoiceNode* first_step_node = new (zone) ChoiceNode(2, zone);
5184 first_step_node->AddAlternative(GuardedAlternative(captured_body)); 4970 first_step_node->AddAlternative(GuardedAlternative(captured_body));
5185 first_step_node->AddAlternative(GuardedAlternative( 4971 first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode(
5186 new(zone) TextNode( 4972 new (zone) RegExpCharacterClass('*'), loop_node)));
5187 new(zone) RegExpCharacterClass('*'), loop_node)));
5188 node = first_step_node; 4973 node = first_step_node;
5189 } else { 4974 } else {
5190 node = loop_node; 4975 node = loop_node;
5191 } 4976 }
5192 } 4977 }
5193 if (is_one_byte) { 4978 if (is_one_byte) {
5194 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); 4979 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
5195 // Do it again to propagate the new nodes to places where they were not 4980 // Do it again to propagate the new nodes to places where they were not
5196 // put because they had not been calculated yet. 4981 // put because they had not been calculated yet.
5197 if (node != NULL) { 4982 if (node != NULL) {
5198 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); 4983 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
5199 } 4984 }
5200 } 4985 }
5201 4986
5202 if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone); 4987 if (node == NULL) node = new (zone) EndNode(EndNode::BACKTRACK, zone);
5203 data->node = node; 4988 data->node = node;
5204 Analysis analysis(ignore_case, is_one_byte); 4989 Analysis analysis(ignore_case, is_one_byte);
5205 analysis.EnsureAnalyzed(node); 4990 analysis.EnsureAnalyzed(node);
5206 if (analysis.has_failed()) { 4991 if (analysis.has_failed()) {
5207 const char* error_message = analysis.error_message(); 4992 const char* error_message = analysis.error_message();
5208 return CompilationResult(error_message); 4993 return CompilationResult(error_message);
5209 } 4994 }
5210 4995
5211 // Bytecode regexp implementation. 4996 // Bytecode regexp implementation.
5212 4997
5213 ZoneGrowableArray<uint8_t> buffer(zone, 1024); 4998 ZoneGrowableArray<uint8_t> buffer(zone, 1024);
5214 BytecodeRegExpMacroAssembler* macro_assembler = 4999 BytecodeRegExpMacroAssembler* macro_assembler =
5215 new(zone) BytecodeRegExpMacroAssembler(&buffer, zone); 5000 new (zone) BytecodeRegExpMacroAssembler(&buffer, zone);
5216 5001
5217 // Inserted here, instead of in Assembler, because it depends on information 5002 // Inserted here, instead of in Assembler, because it depends on information
5218 // in the AST that isn't replicated in the Node structure. 5003 // in the AST that isn't replicated in the Node structure.
5219 static const intptr_t kMaxBacksearchLimit = 1024; 5004 static const intptr_t kMaxBacksearchLimit = 1024;
5220 if (is_end_anchored && 5005 if (is_end_anchored && !is_start_anchored &&
5221 !is_start_anchored &&
5222 max_length < kMaxBacksearchLimit) { 5006 max_length < kMaxBacksearchLimit) {
5223 macro_assembler->SetCurrentPositionFromEnd(max_length); 5007 macro_assembler->SetCurrentPositionFromEnd(max_length);
5224 } 5008 }
5225 5009
5226 if (is_global) { 5010 if (is_global) {
5227 macro_assembler->set_global_mode( 5011 macro_assembler->set_global_mode(
5228 (data->tree->min_match() > 0) 5012 (data->tree->min_match() > 0)
5229 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK 5013 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK
5230 : RegExpMacroAssembler::GLOBAL); 5014 : RegExpMacroAssembler::GLOBAL);
5231 } 5015 }
5232 5016
5233 RegExpEngine::CompilationResult result = 5017 RegExpEngine::CompilationResult result =
5234 compiler.Assemble(macro_assembler, 5018 compiler.Assemble(macro_assembler, node, data->capture_count, pattern);
5235 node,
5236 data->capture_count,
5237 pattern);
5238 5019
5239 if (FLAG_trace_irregexp) { 5020 if (FLAG_trace_irregexp) {
5240 macro_assembler->PrintBlocks(); 5021 macro_assembler->PrintBlocks();
5241 } 5022 }
5242 5023
5243 return result; 5024 return result;
5244 } 5025 }
5245 5026
5246 5027
5247 static void CreateSpecializedFunction(Thread* thread, Zone* zone, 5028 static void CreateSpecializedFunction(Thread* thread,
5029 Zone* zone,
5248 const RegExp& regexp, 5030 const RegExp& regexp,
5249 intptr_t specialization_cid, 5031 intptr_t specialization_cid,
5250 const Object& owner) { 5032 const Object& owner) {
5251 const intptr_t kParamCount = RegExpMacroAssembler::kParamCount; 5033 const intptr_t kParamCount = RegExpMacroAssembler::kParamCount;
5252 5034
5253 Function& fn = Function::Handle(zone, Function::New( 5035 Function& fn =
5254 Symbols::ColonMatcher(), 5036 Function::Handle(zone, Function::New(Symbols::ColonMatcher(),
5255 RawFunction::kIrregexpFunction, 5037 RawFunction::kIrregexpFunction,
5256 true, // Static. 5038 true, // Static.
5257 false, // Not const. 5039 false, // Not const.
5258 false, // Not abstract. 5040 false, // Not abstract.
5259 false, // Not external. 5041 false, // Not external.
5260 false, // Not native. 5042 false, // Not native.
5261 owner, 5043 owner, TokenPosition::kMinSource));
5262 TokenPosition::kMinSource));
5263 5044
5264 // TODO(zerny): Share these arrays between all irregexp functions. 5045 // TODO(zerny): Share these arrays between all irregexp functions.
5265 fn.set_num_fixed_parameters(kParamCount); 5046 fn.set_num_fixed_parameters(kParamCount);
5266 fn.set_parameter_types(Array::Handle(zone, Array::New(kParamCount, 5047 fn.set_parameter_types(
5267 Heap::kOld))); 5048 Array::Handle(zone, Array::New(kParamCount, Heap::kOld)));
5268 fn.set_parameter_names(Array::Handle(zone, Array::New(kParamCount, 5049 fn.set_parameter_names(
5269 Heap::kOld))); 5050 Array::Handle(zone, Array::New(kParamCount, Heap::kOld)));
5270 fn.SetParameterTypeAt(RegExpMacroAssembler::kParamRegExpIndex, 5051 fn.SetParameterTypeAt(RegExpMacroAssembler::kParamRegExpIndex,
5271 Object::dynamic_type()); 5052 Object::dynamic_type());
5272 fn.SetParameterNameAt(RegExpMacroAssembler::kParamRegExpIndex, 5053 fn.SetParameterNameAt(RegExpMacroAssembler::kParamRegExpIndex,
5273 Symbols::This()); 5054 Symbols::This());
5274 fn.SetParameterTypeAt(RegExpMacroAssembler::kParamStringIndex, 5055 fn.SetParameterTypeAt(RegExpMacroAssembler::kParamStringIndex,
5275 Object::dynamic_type()); 5056 Object::dynamic_type());
5276 fn.SetParameterNameAt(RegExpMacroAssembler::kParamStringIndex, 5057 fn.SetParameterNameAt(RegExpMacroAssembler::kParamStringIndex,
5277 Symbols::string_param()); 5058 Symbols::string_param());
5278 fn.SetParameterTypeAt(RegExpMacroAssembler::kParamStartOffsetIndex, 5059 fn.SetParameterTypeAt(RegExpMacroAssembler::kParamStartOffsetIndex,
5279 Object::dynamic_type()); 5060 Object::dynamic_type());
(...skipping 23 matching lines...) Expand all
5303 if (multi_line) { 5084 if (multi_line) {
5304 regexp.set_is_multi_line(); 5085 regexp.set_is_multi_line();
5305 } 5086 }
5306 if (ignore_case) { 5087 if (ignore_case) {
5307 regexp.set_is_ignore_case(); 5088 regexp.set_is_ignore_case();
5308 } 5089 }
5309 5090
5310 // TODO(zerny): We might want to use normal string searching algorithms 5091 // TODO(zerny): We might want to use normal string searching algorithms
5311 // for simple patterns. 5092 // for simple patterns.
5312 regexp.set_is_complex(); 5093 regexp.set_is_complex();
5313 regexp.set_is_global(); // All dart regexps are global. 5094 regexp.set_is_global(); // All dart regexps are global.
5314 5095
5315 if (!FLAG_interpret_irregexp) { 5096 if (!FLAG_interpret_irregexp) {
5316 const Library& lib = Library::Handle(zone, Library::CoreLibrary()); 5097 const Library& lib = Library::Handle(zone, Library::CoreLibrary());
5317 const Class& owner = Class::Handle(zone, 5098 const Class& owner =
5318 lib.LookupClass(Symbols::RegExp())); 5099 Class::Handle(zone, lib.LookupClass(Symbols::RegExp()));
5319 5100
5320 CreateSpecializedFunction(thread, zone, regexp, kOneByteStringCid, owner); 5101 CreateSpecializedFunction(thread, zone, regexp, kOneByteStringCid, owner);
5321 CreateSpecializedFunction(thread, zone, regexp, kTwoByteStringCid, owner); 5102 CreateSpecializedFunction(thread, zone, regexp, kTwoByteStringCid, owner);
5322 CreateSpecializedFunction(thread, zone, 5103 CreateSpecializedFunction(thread, zone, regexp, kExternalOneByteStringCid,
5323 regexp, kExternalOneByteStringCid, owner); 5104 owner);
5324 CreateSpecializedFunction(thread, zone, 5105 CreateSpecializedFunction(thread, zone, regexp, kExternalTwoByteStringCid,
5325 regexp, kExternalTwoByteStringCid, owner); 5106 owner);
5326 } 5107 }
5327 5108
5328 return regexp.raw(); 5109 return regexp.raw();
5329 } 5110 }
5330 5111
5331 5112
5332 } // namespace dart 5113 } // namespace dart
OLDNEW
« no previous file with comments | « runtime/vm/regexp.h ('k') | runtime/vm/regexp_assembler.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698