| OLD | NEW |
| 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 #include "vm/regexp.h" | 5 #include "vm/regexp.h" |
| 6 | 6 |
| 7 #include "vm/dart_entry.h" | 7 #include "vm/dart_entry.h" |
| 8 #include "vm/regexp_assembler.h" | 8 #include "vm/regexp_assembler.h" |
| 9 #include "vm/regexp_assembler_bytecode.h" | 9 #include "vm/regexp_assembler_bytecode.h" |
| 10 #include "vm/regexp_assembler_ir.h" | 10 #include "vm/regexp_assembler_ir.h" |
| (...skipping 252 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 263 ASSERT((in_character & RegExpMacroAssembler::kTableMask) == in_character); | 263 ASSERT((in_character & RegExpMacroAssembler::kTableMask) == in_character); |
| 264 if (total_samples_ < 1) return 1; // Division by zero. | 264 if (total_samples_ < 1) return 1; // Division by zero. |
| 265 intptr_t freq_in_per128 = | 265 intptr_t freq_in_per128 = |
| 266 (frequencies_[in_character].counter() * 128) / total_samples_; | 266 (frequencies_[in_character].counter() * 128) / total_samples_; |
| 267 return freq_in_per128; | 267 return freq_in_per128; |
| 268 } | 268 } |
| 269 | 269 |
| 270 private: | 270 private: |
| 271 class CharacterFrequency { | 271 class CharacterFrequency { |
| 272 public: | 272 public: |
| 273 CharacterFrequency() : counter_(0), character_(-1) { } | 273 CharacterFrequency() : counter_(0), character_(-1) {} |
| 274 explicit CharacterFrequency(intptr_t character) | 274 explicit CharacterFrequency(intptr_t character) |
| 275 : counter_(0), character_(character) { } | 275 : counter_(0), character_(character) {} |
| 276 | 276 |
| 277 void Increment() { counter_++; } | 277 void Increment() { counter_++; } |
| 278 intptr_t counter() { return counter_; } | 278 intptr_t counter() { return counter_; } |
| 279 intptr_t character() { return character_; } | 279 intptr_t character() { return character_; } |
| 280 | 280 |
| 281 private: | 281 private: |
| 282 intptr_t counter_; | 282 intptr_t counter_; |
| 283 intptr_t character_; | 283 intptr_t character_; |
| 284 | 284 |
| 285 DISALLOW_ALLOCATION(); | 285 DISALLOW_ALLOCATION(); |
| 286 }; | 286 }; |
| 287 | 287 |
| 288 | 288 |
| 289 private: | 289 private: |
| 290 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize]; | 290 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize]; |
| 291 intptr_t total_samples_; | 291 intptr_t total_samples_; |
| 292 }; | 292 }; |
| 293 | 293 |
| 294 | 294 |
| 295 class RegExpCompiler : public ValueObject { | 295 class RegExpCompiler : public ValueObject { |
| 296 public: | 296 public: |
| 297 RegExpCompiler(intptr_t capture_count, | 297 RegExpCompiler(intptr_t capture_count, bool ignore_case, bool is_one_byte); |
| 298 bool ignore_case, | |
| 299 bool is_one_byte); | |
| 300 | 298 |
| 301 intptr_t AllocateRegister() { | 299 intptr_t AllocateRegister() { return next_register_++; } |
| 302 return next_register_++; | |
| 303 } | |
| 304 | 300 |
| 305 RegExpEngine::CompilationResult Assemble( | 301 RegExpEngine::CompilationResult Assemble(IRRegExpMacroAssembler* assembler, |
| 306 IRRegExpMacroAssembler* assembler, | 302 RegExpNode* start, |
| 307 RegExpNode* start, | 303 intptr_t capture_count, |
| 308 intptr_t capture_count, | 304 const String& pattern); |
| 309 const String& pattern); | |
| 310 | 305 |
| 311 RegExpEngine::CompilationResult Assemble( | 306 RegExpEngine::CompilationResult Assemble( |
| 312 BytecodeRegExpMacroAssembler* assembler, | 307 BytecodeRegExpMacroAssembler* assembler, |
| 313 RegExpNode* start, | 308 RegExpNode* start, |
| 314 intptr_t capture_count, | 309 intptr_t capture_count, |
| 315 const String& pattern); | 310 const String& pattern); |
| 316 | 311 |
| 317 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } | 312 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } |
| 318 | 313 |
| 319 static const intptr_t kImplementationOffset = 0; | 314 static const intptr_t kImplementationOffset = 0; |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 357 Zone* zone_; | 352 Zone* zone_; |
| 358 }; | 353 }; |
| 359 | 354 |
| 360 | 355 |
| 361 class RecursionCheck : public ValueObject { | 356 class RecursionCheck : public ValueObject { |
| 362 public: | 357 public: |
| 363 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { | 358 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { |
| 364 compiler->IncrementRecursionDepth(); | 359 compiler->IncrementRecursionDepth(); |
| 365 } | 360 } |
| 366 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } | 361 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } |
| 362 |
| 367 private: | 363 private: |
| 368 RegExpCompiler* compiler_; | 364 RegExpCompiler* compiler_; |
| 369 }; | 365 }; |
| 370 | 366 |
| 371 | 367 |
| 372 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { | 368 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { |
| 373 return RegExpEngine::CompilationResult("RegExp too big"); | 369 return RegExpEngine::CompilationResult("RegExp too big"); |
| 374 } | 370 } |
| 375 | 371 |
| 376 | 372 |
| 377 // Attempts to compile the regexp using an Irregexp code generator. Returns | 373 // Attempts to compile the regexp using an Irregexp code generator. Returns |
| 378 // a fixed array or a null handle depending on whether it succeeded. | 374 // a fixed array or a null handle depending on whether it succeeded. |
| 379 RegExpCompiler::RegExpCompiler(intptr_t capture_count, | 375 RegExpCompiler::RegExpCompiler(intptr_t capture_count, |
| 380 bool ignore_case, | 376 bool ignore_case, |
| 381 bool is_one_byte) | 377 bool is_one_byte) |
| 382 : next_register_(2 * (capture_count + 1)), | 378 : next_register_(2 * (capture_count + 1)), |
| 383 work_list_(NULL), | 379 work_list_(NULL), |
| 384 recursion_depth_(0), | 380 recursion_depth_(0), |
| 385 ignore_case_(ignore_case), | 381 ignore_case_(ignore_case), |
| 386 is_one_byte_(is_one_byte), | 382 is_one_byte_(is_one_byte), |
| 387 reg_exp_too_big_(false), | 383 reg_exp_too_big_(false), |
| 388 current_expansion_factor_(1), | 384 current_expansion_factor_(1), |
| 389 zone_(Thread::Current()->zone()) { | 385 zone_(Thread::Current()->zone()) { |
| 390 accept_ = new(Z) EndNode(EndNode::ACCEPT, Z); | 386 accept_ = new (Z) EndNode(EndNode::ACCEPT, Z); |
| 391 } | 387 } |
| 392 | 388 |
| 393 | 389 |
| 394 RegExpEngine::CompilationResult RegExpCompiler::Assemble( | 390 RegExpEngine::CompilationResult RegExpCompiler::Assemble( |
| 395 IRRegExpMacroAssembler* macro_assembler, | 391 IRRegExpMacroAssembler* macro_assembler, |
| 396 RegExpNode* start, | 392 RegExpNode* start, |
| 397 intptr_t capture_count, | 393 intptr_t capture_count, |
| 398 const String& pattern) { | 394 const String& pattern) { |
| 399 macro_assembler->set_slow_safe(false /* use_slow_safe_regexp_compiler */); | 395 macro_assembler->set_slow_safe(false /* use_slow_safe_regexp_compiler */); |
| 400 macro_assembler_ = macro_assembler; | 396 macro_assembler_ = macro_assembler; |
| 401 | 397 |
| 402 ZoneGrowableArray<RegExpNode*> work_list(0); | 398 ZoneGrowableArray<RegExpNode*> work_list(0); |
| 403 work_list_ = &work_list; | 399 work_list_ = &work_list; |
| 404 BlockLabel fail; | 400 BlockLabel fail; |
| 405 macro_assembler_->PushBacktrack(&fail); | 401 macro_assembler_->PushBacktrack(&fail); |
| 406 Trace new_trace; | 402 Trace new_trace; |
| 407 start->Emit(this, &new_trace); | 403 start->Emit(this, &new_trace); |
| 408 macro_assembler_->BindBlock(&fail); | 404 macro_assembler_->BindBlock(&fail); |
| 409 macro_assembler_->Fail(); | 405 macro_assembler_->Fail(); |
| 410 while (!work_list.is_empty()) { | 406 while (!work_list.is_empty()) { |
| 411 work_list.RemoveLast()->Emit(this, &new_trace); | 407 work_list.RemoveLast()->Emit(this, &new_trace); |
| 412 } | 408 } |
| 413 if (reg_exp_too_big_) return IrregexpRegExpTooBig(); | 409 if (reg_exp_too_big_) return IrregexpRegExpTooBig(); |
| 414 | 410 |
| 415 macro_assembler->GenerateBacktrackBlock(); | 411 macro_assembler->GenerateBacktrackBlock(); |
| 416 macro_assembler->FinalizeRegistersArray(); | 412 macro_assembler->FinalizeRegistersArray(); |
| 417 | 413 |
| 418 return RegExpEngine::CompilationResult(macro_assembler->backtrack_goto(), | 414 return RegExpEngine::CompilationResult( |
| 419 macro_assembler->graph_entry(), | 415 macro_assembler->backtrack_goto(), macro_assembler->graph_entry(), |
| 420 macro_assembler->num_blocks(), | 416 macro_assembler->num_blocks(), macro_assembler->num_stack_locals(), |
| 421 macro_assembler->num_stack_locals(), | 417 next_register_); |
| 422 next_register_); | |
| 423 } | 418 } |
| 424 | 419 |
| 425 | 420 |
| 426 RegExpEngine::CompilationResult RegExpCompiler::Assemble( | 421 RegExpEngine::CompilationResult RegExpCompiler::Assemble( |
| 427 BytecodeRegExpMacroAssembler* macro_assembler, | 422 BytecodeRegExpMacroAssembler* macro_assembler, |
| 428 RegExpNode* start, | 423 RegExpNode* start, |
| 429 intptr_t capture_count, | 424 intptr_t capture_count, |
| 430 const String& pattern) { | 425 const String& pattern) { |
| 431 macro_assembler->set_slow_safe(false /* use_slow_safe_regexp_compiler */); | 426 macro_assembler->set_slow_safe(false /* use_slow_safe_regexp_compiler */); |
| 432 macro_assembler_ = macro_assembler; | 427 macro_assembler_ = macro_assembler; |
| (...skipping 20 matching lines...) Expand all Loading... |
| 453 if (action_type() == ActionNode::CLEAR_CAPTURES) { | 448 if (action_type() == ActionNode::CLEAR_CAPTURES) { |
| 454 Interval range = static_cast<DeferredClearCaptures*>(this)->range(); | 449 Interval range = static_cast<DeferredClearCaptures*>(this)->range(); |
| 455 return range.Contains(that); | 450 return range.Contains(that); |
| 456 } else { | 451 } else { |
| 457 return reg() == that; | 452 return reg() == that; |
| 458 } | 453 } |
| 459 } | 454 } |
| 460 | 455 |
| 461 | 456 |
| 462 bool Trace::mentions_reg(intptr_t reg) { | 457 bool Trace::mentions_reg(intptr_t reg) { |
| 463 for (DeferredAction* action = actions_; | 458 for (DeferredAction* action = actions_; action != NULL; |
| 464 action != NULL; | |
| 465 action = action->next()) { | 459 action = action->next()) { |
| 466 if (action->Mentions(reg)) | 460 if (action->Mentions(reg)) return true; |
| 467 return true; | |
| 468 } | 461 } |
| 469 return false; | 462 return false; |
| 470 } | 463 } |
| 471 | 464 |
| 472 | 465 |
| 473 bool Trace::GetStoredPosition(intptr_t reg, intptr_t* cp_offset) { | 466 bool Trace::GetStoredPosition(intptr_t reg, intptr_t* cp_offset) { |
| 474 ASSERT(*cp_offset == 0); | 467 ASSERT(*cp_offset == 0); |
| 475 for (DeferredAction* action = actions_; | 468 for (DeferredAction* action = actions_; action != NULL; |
| 476 action != NULL; | |
| 477 action = action->next()) { | 469 action = action->next()) { |
| 478 if (action->Mentions(reg)) { | 470 if (action->Mentions(reg)) { |
| 479 if (action->action_type() == ActionNode::STORE_POSITION) { | 471 if (action->action_type() == ActionNode::STORE_POSITION) { |
| 480 *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset(); | 472 *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset(); |
| 481 return true; | 473 return true; |
| 482 } else { | 474 } else { |
| 483 return false; | 475 return false; |
| 484 } | 476 } |
| 485 } | 477 } |
| 486 } | 478 } |
| 487 return false; | 479 return false; |
| 488 } | 480 } |
| 489 | 481 |
| 490 | 482 |
| 491 // This is called as we come into a loop choice node and some other tricky | 483 // This is called as we come into a loop choice node and some other tricky |
| 492 // nodes. It normalizes the state of the code generator to ensure we can | 484 // nodes. It normalizes the state of the code generator to ensure we can |
| 493 // generate generic code. | 485 // generate generic code. |
| 494 intptr_t Trace::FindAffectedRegisters(OutSet* affected_registers, | 486 intptr_t Trace::FindAffectedRegisters(OutSet* affected_registers, Zone* zone) { |
| 495 Zone* zone) { | |
| 496 intptr_t max_register = RegExpCompiler::kNoRegister; | 487 intptr_t max_register = RegExpCompiler::kNoRegister; |
| 497 for (DeferredAction* action = actions_; | 488 for (DeferredAction* action = actions_; action != NULL; |
| 498 action != NULL; | |
| 499 action = action->next()) { | 489 action = action->next()) { |
| 500 if (action->action_type() == ActionNode::CLEAR_CAPTURES) { | 490 if (action->action_type() == ActionNode::CLEAR_CAPTURES) { |
| 501 Interval range = static_cast<DeferredClearCaptures*>(action)->range(); | 491 Interval range = static_cast<DeferredClearCaptures*>(action)->range(); |
| 502 for (intptr_t i = range.from(); i <= range.to(); i++) | 492 for (intptr_t i = range.from(); i <= range.to(); i++) |
| 503 affected_registers->Set(i, zone); | 493 affected_registers->Set(i, zone); |
| 504 if (range.to() > max_register) max_register = range.to(); | 494 if (range.to() > max_register) max_register = range.to(); |
| 505 } else { | 495 } else { |
| 506 affected_registers->Set(action->reg(), zone); | 496 affected_registers->Set(action->reg(), zone); |
| 507 if (action->reg() > max_register) max_register = action->reg(); | 497 if (action->reg() > max_register) max_register = action->reg(); |
| 508 } | 498 } |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 545 // to its previous state (or not, if it's safe to ignore it). | 535 // to its previous state (or not, if it's safe to ignore it). |
| 546 enum DeferredActionUndoType { ACTION_IGNORE, ACTION_RESTORE, ACTION_CLEAR }; | 536 enum DeferredActionUndoType { ACTION_IGNORE, ACTION_RESTORE, ACTION_CLEAR }; |
| 547 DeferredActionUndoType undo_action = ACTION_IGNORE; | 537 DeferredActionUndoType undo_action = ACTION_IGNORE; |
| 548 | 538 |
| 549 intptr_t value = 0; | 539 intptr_t value = 0; |
| 550 bool absolute = false; | 540 bool absolute = false; |
| 551 bool clear = false; | 541 bool clear = false; |
| 552 intptr_t store_position = -1; | 542 intptr_t store_position = -1; |
| 553 // This is a little tricky because we are scanning the actions in reverse | 543 // This is a little tricky because we are scanning the actions in reverse |
| 554 // historical order (newest first). | 544 // historical order (newest first). |
| 555 for (DeferredAction* action = actions_; | 545 for (DeferredAction* action = actions_; action != NULL; |
| 556 action != NULL; | |
| 557 action = action->next()) { | 546 action = action->next()) { |
| 558 if (action->Mentions(reg)) { | 547 if (action->Mentions(reg)) { |
| 559 switch (action->action_type()) { | 548 switch (action->action_type()) { |
| 560 case ActionNode::SET_REGISTER: { | 549 case ActionNode::SET_REGISTER: { |
| 561 Trace::DeferredSetRegister* psr = | 550 Trace::DeferredSetRegister* psr = |
| 562 static_cast<Trace::DeferredSetRegister*>(action); | 551 static_cast<Trace::DeferredSetRegister*>(action); |
| 563 if (!absolute) { | 552 if (!absolute) { |
| 564 value += psr->value(); | 553 value += psr->value(); |
| 565 absolute = true; | 554 absolute = true; |
| 566 } | 555 } |
| (...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 670 if (backtrack() != NULL) { | 659 if (backtrack() != NULL) { |
| 671 // Here we have a concrete backtrack location. These are set up by choice | 660 // Here we have a concrete backtrack location. These are set up by choice |
| 672 // nodes and so they indicate that we have a deferred save of the current | 661 // nodes and so they indicate that we have a deferred save of the current |
| 673 // position which we may need to emit here. | 662 // position which we may need to emit here. |
| 674 assembler->PushCurrentPosition(); | 663 assembler->PushCurrentPosition(); |
| 675 } | 664 } |
| 676 Zone* zone = successor->zone(); | 665 Zone* zone = successor->zone(); |
| 677 intptr_t max_register = FindAffectedRegisters(&affected_registers, zone); | 666 intptr_t max_register = FindAffectedRegisters(&affected_registers, zone); |
| 678 OutSet registers_to_pop; | 667 OutSet registers_to_pop; |
| 679 OutSet registers_to_clear; | 668 OutSet registers_to_clear; |
| 680 PerformDeferredActions(assembler, | 669 PerformDeferredActions(assembler, max_register, affected_registers, |
| 681 max_register, | 670 ®isters_to_pop, ®isters_to_clear, zone); |
| 682 affected_registers, | |
| 683 ®isters_to_pop, | |
| 684 ®isters_to_clear, | |
| 685 zone); | |
| 686 if (cp_offset_ != 0) { | 671 if (cp_offset_ != 0) { |
| 687 assembler->AdvanceCurrentPosition(cp_offset_); | 672 assembler->AdvanceCurrentPosition(cp_offset_); |
| 688 } | 673 } |
| 689 | 674 |
| 690 // Create a new trivial state and generate the node with that. | 675 // Create a new trivial state and generate the node with that. |
| 691 BlockLabel undo; | 676 BlockLabel undo; |
| 692 assembler->PushBacktrack(&undo); | 677 assembler->PushBacktrack(&undo); |
| 693 Trace new_state; | 678 Trace new_state; |
| 694 successor->Emit(compiler, &new_state); | 679 successor->Emit(compiler, &new_state); |
| 695 | 680 |
| 696 // On backtrack we need to restore state. | 681 // On backtrack we need to restore state. |
| 697 assembler->BindBlock(&undo); | 682 assembler->BindBlock(&undo); |
| 698 RestoreAffectedRegisters(assembler, | 683 RestoreAffectedRegisters(assembler, max_register, registers_to_pop, |
| 699 max_register, | |
| 700 registers_to_pop, | |
| 701 registers_to_clear); | 684 registers_to_clear); |
| 702 if (backtrack() == NULL) { | 685 if (backtrack() == NULL) { |
| 703 assembler->Backtrack(); | 686 assembler->Backtrack(); |
| 704 } else { | 687 } else { |
| 705 assembler->PopCurrentPosition(); | 688 assembler->PopCurrentPosition(); |
| 706 assembler->GoTo(backtrack()); | 689 assembler->GoTo(backtrack()); |
| 707 } | 690 } |
| 708 } | 691 } |
| 709 | 692 |
| 710 | 693 |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 753 return; | 736 return; |
| 754 case NEGATIVE_SUBMATCH_SUCCESS: | 737 case NEGATIVE_SUBMATCH_SUCCESS: |
| 755 // This case is handled in a different virtual method. | 738 // This case is handled in a different virtual method. |
| 756 UNREACHABLE(); | 739 UNREACHABLE(); |
| 757 } | 740 } |
| 758 UNIMPLEMENTED(); | 741 UNIMPLEMENTED(); |
| 759 } | 742 } |
| 760 | 743 |
| 761 | 744 |
| 762 void GuardedAlternative::AddGuard(Guard* guard, Zone* zone) { | 745 void GuardedAlternative::AddGuard(Guard* guard, Zone* zone) { |
| 763 if (guards_ == NULL) | 746 if (guards_ == NULL) guards_ = new (zone) ZoneGrowableArray<Guard*>(1); |
| 764 guards_ = new(zone) ZoneGrowableArray<Guard*>(1); | |
| 765 guards_->Add(guard); | 747 guards_->Add(guard); |
| 766 } | 748 } |
| 767 | 749 |
| 768 | 750 |
| 769 ActionNode* ActionNode::SetRegister(intptr_t reg, | 751 ActionNode* ActionNode::SetRegister(intptr_t reg, |
| 770 intptr_t val, | 752 intptr_t val, |
| 771 RegExpNode* on_success) { | 753 RegExpNode* on_success) { |
| 772 ActionNode* result = | 754 ActionNode* result = |
| 773 new(on_success->zone()) ActionNode(SET_REGISTER, on_success); | 755 new (on_success->zone()) ActionNode(SET_REGISTER, on_success); |
| 774 result->data_.u_store_register.reg = reg; | 756 result->data_.u_store_register.reg = reg; |
| 775 result->data_.u_store_register.value = val; | 757 result->data_.u_store_register.value = val; |
| 776 return result; | 758 return result; |
| 777 } | 759 } |
| 778 | 760 |
| 779 | 761 |
| 780 ActionNode* ActionNode::IncrementRegister(intptr_t reg, | 762 ActionNode* ActionNode::IncrementRegister(intptr_t reg, |
| 781 RegExpNode* on_success) { | 763 RegExpNode* on_success) { |
| 782 ActionNode* result = | 764 ActionNode* result = |
| 783 new(on_success->zone()) ActionNode(INCREMENT_REGISTER, on_success); | 765 new (on_success->zone()) ActionNode(INCREMENT_REGISTER, on_success); |
| 784 result->data_.u_increment_register.reg = reg; | 766 result->data_.u_increment_register.reg = reg; |
| 785 return result; | 767 return result; |
| 786 } | 768 } |
| 787 | 769 |
| 788 | 770 |
| 789 ActionNode* ActionNode::StorePosition(intptr_t reg, | 771 ActionNode* ActionNode::StorePosition(intptr_t reg, |
| 790 bool is_capture, | 772 bool is_capture, |
| 791 RegExpNode* on_success) { | 773 RegExpNode* on_success) { |
| 792 ActionNode* result = | 774 ActionNode* result = |
| 793 new(on_success->zone()) ActionNode(STORE_POSITION, on_success); | 775 new (on_success->zone()) ActionNode(STORE_POSITION, on_success); |
| 794 result->data_.u_position_register.reg = reg; | 776 result->data_.u_position_register.reg = reg; |
| 795 result->data_.u_position_register.is_capture = is_capture; | 777 result->data_.u_position_register.is_capture = is_capture; |
| 796 return result; | 778 return result; |
| 797 } | 779 } |
| 798 | 780 |
| 799 | 781 |
| 800 ActionNode* ActionNode::ClearCaptures(Interval range, | 782 ActionNode* ActionNode::ClearCaptures(Interval range, RegExpNode* on_success) { |
| 801 RegExpNode* on_success) { | |
| 802 ActionNode* result = | 783 ActionNode* result = |
| 803 new(on_success->zone()) ActionNode(CLEAR_CAPTURES, on_success); | 784 new (on_success->zone()) ActionNode(CLEAR_CAPTURES, on_success); |
| 804 result->data_.u_clear_captures.range_from = range.from(); | 785 result->data_.u_clear_captures.range_from = range.from(); |
| 805 result->data_.u_clear_captures.range_to = range.to(); | 786 result->data_.u_clear_captures.range_to = range.to(); |
| 806 return result; | 787 return result; |
| 807 } | 788 } |
| 808 | 789 |
| 809 | 790 |
| 810 ActionNode* ActionNode::BeginSubmatch(intptr_t stack_reg, | 791 ActionNode* ActionNode::BeginSubmatch(intptr_t stack_reg, |
| 811 intptr_t position_reg, | 792 intptr_t position_reg, |
| 812 RegExpNode* on_success) { | 793 RegExpNode* on_success) { |
| 813 ActionNode* result = | 794 ActionNode* result = |
| 814 new(on_success->zone()) ActionNode(BEGIN_SUBMATCH, on_success); | 795 new (on_success->zone()) ActionNode(BEGIN_SUBMATCH, on_success); |
| 815 result->data_.u_submatch.stack_pointer_register = stack_reg; | 796 result->data_.u_submatch.stack_pointer_register = stack_reg; |
| 816 result->data_.u_submatch.current_position_register = position_reg; | 797 result->data_.u_submatch.current_position_register = position_reg; |
| 817 return result; | 798 return result; |
| 818 } | 799 } |
| 819 | 800 |
| 820 | 801 |
| 821 ActionNode* ActionNode::PositiveSubmatchSuccess(intptr_t stack_reg, | 802 ActionNode* ActionNode::PositiveSubmatchSuccess(intptr_t stack_reg, |
| 822 intptr_t position_reg, | 803 intptr_t position_reg, |
| 823 intptr_t clear_register_count, | 804 intptr_t clear_register_count, |
| 824 intptr_t clear_register_from, | 805 intptr_t clear_register_from, |
| 825 RegExpNode* on_success) { | 806 RegExpNode* on_success) { |
| 826 ActionNode* result = | 807 ActionNode* result = new (on_success->zone()) |
| 827 new(on_success->zone()) ActionNode(POSITIVE_SUBMATCH_SUCCESS, | 808 ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success); |
| 828 on_success); | |
| 829 result->data_.u_submatch.stack_pointer_register = stack_reg; | 809 result->data_.u_submatch.stack_pointer_register = stack_reg; |
| 830 result->data_.u_submatch.current_position_register = position_reg; | 810 result->data_.u_submatch.current_position_register = position_reg; |
| 831 result->data_.u_submatch.clear_register_count = clear_register_count; | 811 result->data_.u_submatch.clear_register_count = clear_register_count; |
| 832 result->data_.u_submatch.clear_register_from = clear_register_from; | 812 result->data_.u_submatch.clear_register_from = clear_register_from; |
| 833 return result; | 813 return result; |
| 834 } | 814 } |
| 835 | 815 |
| 836 | 816 |
| 837 ActionNode* ActionNode::EmptyMatchCheck(intptr_t start_register, | 817 ActionNode* ActionNode::EmptyMatchCheck(intptr_t start_register, |
| 838 intptr_t repetition_register, | 818 intptr_t repetition_register, |
| 839 intptr_t repetition_limit, | 819 intptr_t repetition_limit, |
| 840 RegExpNode* on_success) { | 820 RegExpNode* on_success) { |
| 841 ActionNode* result = | 821 ActionNode* result = |
| 842 new(on_success->zone()) ActionNode(EMPTY_MATCH_CHECK, on_success); | 822 new (on_success->zone()) ActionNode(EMPTY_MATCH_CHECK, on_success); |
| 843 result->data_.u_empty_match_check.start_register = start_register; | 823 result->data_.u_empty_match_check.start_register = start_register; |
| 844 result->data_.u_empty_match_check.repetition_register = repetition_register; | 824 result->data_.u_empty_match_check.repetition_register = repetition_register; |
| 845 result->data_.u_empty_match_check.repetition_limit = repetition_limit; | 825 result->data_.u_empty_match_check.repetition_limit = repetition_limit; |
| 846 return result; | 826 return result; |
| 847 } | 827 } |
| 848 | 828 |
| 849 | 829 |
| 850 #define DEFINE_ACCEPT(Type) \ | 830 #define DEFINE_ACCEPT(Type) \ |
| 851 void Type##Node::Accept(NodeVisitor* visitor) { \ | 831 void Type##Node::Accept(NodeVisitor* visitor) { visitor->Visit##Type(this); } |
| 852 visitor->Visit##Type(this); \ | |
| 853 } | |
| 854 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT) | 832 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT) |
| 855 #undef DEFINE_ACCEPT | 833 #undef DEFINE_ACCEPT |
| 856 | 834 |
| 857 | 835 |
| 858 void LoopChoiceNode::Accept(NodeVisitor* visitor) { | 836 void LoopChoiceNode::Accept(NodeVisitor* visitor) { |
| 859 visitor->VisitLoopChoice(this); | 837 visitor->VisitLoopChoice(this); |
| 860 } | 838 } |
| 861 | 839 |
| 862 | 840 |
| 863 // ------------------------------------------------------------------- | 841 // ------------------------------------------------------------------- |
| 864 // Emit code. | 842 // Emit code. |
| 865 | 843 |
| 866 | 844 |
| 867 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler, | 845 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler, |
| 868 Guard* guard, | 846 Guard* guard, |
| 869 Trace* trace) { | 847 Trace* trace) { |
| 870 switch (guard->op()) { | 848 switch (guard->op()) { |
| 871 case Guard::LT: | 849 case Guard::LT: |
| 872 ASSERT(!trace->mentions_reg(guard->reg())); | 850 ASSERT(!trace->mentions_reg(guard->reg())); |
| 873 macro_assembler->IfRegisterGE(guard->reg(), | 851 macro_assembler->IfRegisterGE(guard->reg(), guard->value(), |
| 874 guard->value(), | |
| 875 trace->backtrack()); | 852 trace->backtrack()); |
| 876 break; | 853 break; |
| 877 case Guard::GEQ: | 854 case Guard::GEQ: |
| 878 ASSERT(!trace->mentions_reg(guard->reg())); | 855 ASSERT(!trace->mentions_reg(guard->reg())); |
| 879 macro_assembler->IfRegisterLT(guard->reg(), | 856 macro_assembler->IfRegisterLT(guard->reg(), guard->value(), |
| 880 guard->value(), | |
| 881 trace->backtrack()); | 857 trace->backtrack()); |
| 882 break; | 858 break; |
| 883 } | 859 } |
| 884 } | 860 } |
| 885 | 861 |
| 886 | 862 |
| 887 // Returns the number of characters in the equivalence class, omitting those | 863 // Returns the number of characters in the equivalence class, omitting those |
| 888 // that cannot occur in the source string because it is ASCII. | 864 // that cannot occur in the source string because it is ASCII. |
| 889 static intptr_t GetCaseIndependentLetters(uint16_t character, | 865 static intptr_t GetCaseIndependentLetters(uint16_t character, |
| 890 bool one_byte_subject, | 866 bool one_byte_subject, |
| (...skipping 21 matching lines...) Expand all Loading... |
| 912 static inline bool EmitSimpleCharacter(Zone* zone, | 888 static inline bool EmitSimpleCharacter(Zone* zone, |
| 913 RegExpCompiler* compiler, | 889 RegExpCompiler* compiler, |
| 914 uint16_t c, | 890 uint16_t c, |
| 915 BlockLabel* on_failure, | 891 BlockLabel* on_failure, |
| 916 intptr_t cp_offset, | 892 intptr_t cp_offset, |
| 917 bool check, | 893 bool check, |
| 918 bool preloaded) { | 894 bool preloaded) { |
| 919 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 895 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| 920 bool bound_checked = false; | 896 bool bound_checked = false; |
| 921 if (!preloaded) { | 897 if (!preloaded) { |
| 922 assembler->LoadCurrentCharacter( | 898 assembler->LoadCurrentCharacter(cp_offset, on_failure, check); |
| 923 cp_offset, | |
| 924 on_failure, | |
| 925 check); | |
| 926 bound_checked = true; | 899 bound_checked = true; |
| 927 } | 900 } |
| 928 assembler->CheckNotCharacter(c, on_failure); | 901 assembler->CheckNotCharacter(c, on_failure); |
| 929 return bound_checked; | 902 return bound_checked; |
| 930 } | 903 } |
| 931 | 904 |
| 932 | 905 |
| 933 // Only emits non-letters (things that don't have case). Only used for case | 906 // Only emits non-letters (things that don't have case). Only used for case |
| 934 // independent matches. | 907 // independent matches. |
| 935 static inline bool EmitAtomNonLetter(Zone* zone, | 908 static inline bool EmitAtomNonLetter(Zone* zone, |
| (...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 988 return true; | 961 return true; |
| 989 } | 962 } |
| 990 ASSERT(c2 > c1); | 963 ASSERT(c2 > c1); |
| 991 uint16_t diff = c2 - c1; | 964 uint16_t diff = c2 - c1; |
| 992 if (((diff - 1) & diff) == 0 && c1 >= diff) { | 965 if (((diff - 1) & diff) == 0 && c1 >= diff) { |
| 993 // If the characters differ by 2^n but don't differ by one bit then | 966 // If the characters differ by 2^n but don't differ by one bit then |
| 994 // subtract the difference from the found character, then do the or | 967 // subtract the difference from the found character, then do the or |
| 995 // trick. We avoid the theoretical case where negative numbers are | 968 // trick. We avoid the theoretical case where negative numbers are |
| 996 // involved in order to simplify code generation. | 969 // involved in order to simplify code generation. |
| 997 uint16_t mask = char_mask ^ diff; | 970 uint16_t mask = char_mask ^ diff; |
| 998 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, | 971 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, diff, mask, |
| 999 diff, | |
| 1000 mask, | |
| 1001 on_failure); | 972 on_failure); |
| 1002 return true; | 973 return true; |
| 1003 } | 974 } |
| 1004 return false; | 975 return false; |
| 1005 } | 976 } |
| 1006 | 977 |
| 1007 | 978 |
| 1008 typedef bool EmitCharacterFunction(Zone* zone, | 979 typedef bool EmitCharacterFunction(Zone* zone, |
| 1009 RegExpCompiler* compiler, | 980 RegExpCompiler* compiler, |
| 1010 uint16_t c, | 981 uint16_t c, |
| (...skipping 18 matching lines...) Expand all Loading... |
| 1029 if (length <= 1) return false; | 1000 if (length <= 1) return false; |
| 1030 // We may not need to check against the end of the input string | 1001 // We may not need to check against the end of the input string |
| 1031 // if this character lies before a character that matched. | 1002 // if this character lies before a character that matched. |
| 1032 if (!preloaded) { | 1003 if (!preloaded) { |
| 1033 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); | 1004 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); |
| 1034 } | 1005 } |
| 1035 BlockLabel ok; | 1006 BlockLabel ok; |
| 1036 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); | 1007 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); |
| 1037 switch (length) { | 1008 switch (length) { |
| 1038 case 2: { | 1009 case 2: { |
| 1039 if (ShortCutEmitCharacterPair(macro_assembler, | 1010 if (ShortCutEmitCharacterPair(macro_assembler, one_byte, chars[0], |
| 1040 one_byte, | 1011 chars[1], on_failure)) { |
| 1041 chars[0], | |
| 1042 chars[1], | |
| 1043 on_failure)) { | |
| 1044 } else { | 1012 } else { |
| 1045 macro_assembler->CheckCharacter(chars[0], &ok); | 1013 macro_assembler->CheckCharacter(chars[0], &ok); |
| 1046 macro_assembler->CheckNotCharacter(chars[1], on_failure); | 1014 macro_assembler->CheckNotCharacter(chars[1], on_failure); |
| 1047 macro_assembler->BindBlock(&ok); | 1015 macro_assembler->BindBlock(&ok); |
| 1048 } | 1016 } |
| 1049 break; | 1017 break; |
| 1050 } | 1018 } |
| 1051 case 4: | 1019 case 4: |
| 1052 macro_assembler->CheckCharacter(chars[3], &ok); | 1020 macro_assembler->CheckCharacter(chars[3], &ok); |
| 1053 // Fall through! | 1021 // Fall through! |
| 1054 case 3: | 1022 case 3: |
| 1055 macro_assembler->CheckCharacter(chars[0], &ok); | 1023 macro_assembler->CheckCharacter(chars[0], &ok); |
| 1056 macro_assembler->CheckCharacter(chars[1], &ok); | 1024 macro_assembler->CheckCharacter(chars[1], &ok); |
| 1057 macro_assembler->CheckNotCharacter(chars[2], on_failure); | 1025 macro_assembler->CheckNotCharacter(chars[2], on_failure); |
| 1058 macro_assembler->BindBlock(&ok); | 1026 macro_assembler->BindBlock(&ok); |
| 1059 break; | 1027 break; |
| 1060 default: | 1028 default: |
| 1061 UNREACHABLE(); | 1029 UNREACHABLE(); |
| 1062 break; | 1030 break; |
| 1063 } | 1031 } |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1097 } else { | 1065 } else { |
| 1098 masm->CheckCharacterInRange(first, last, in_range); | 1066 masm->CheckCharacterInRange(first, last, in_range); |
| 1099 } | 1067 } |
| 1100 if (out_of_range != fall_through) masm->GoTo(out_of_range); | 1068 if (out_of_range != fall_through) masm->GoTo(out_of_range); |
| 1101 } | 1069 } |
| 1102 } | 1070 } |
| 1103 | 1071 |
| 1104 | 1072 |
| 1105 // even_label is for ranges[i] to ranges[i + 1] where i - start_index is even. | 1073 // even_label is for ranges[i] to ranges[i + 1] where i - start_index is even. |
| 1106 // odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd. | 1074 // odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd. |
| 1107 static void EmitUseLookupTable( | 1075 static void EmitUseLookupTable(RegExpMacroAssembler* masm, |
| 1108 RegExpMacroAssembler* masm, | 1076 ZoneGrowableArray<int>* ranges, |
| 1109 ZoneGrowableArray<int>* ranges, | 1077 intptr_t start_index, |
| 1110 intptr_t start_index, | 1078 intptr_t end_index, |
| 1111 intptr_t end_index, | 1079 intptr_t min_char, |
| 1112 intptr_t min_char, | 1080 BlockLabel* fall_through, |
| 1113 BlockLabel* fall_through, | 1081 BlockLabel* even_label, |
| 1114 BlockLabel* even_label, | 1082 BlockLabel* odd_label) { |
| 1115 BlockLabel* odd_label) { | |
| 1116 static const intptr_t kSize = RegExpMacroAssembler::kTableSize; | 1083 static const intptr_t kSize = RegExpMacroAssembler::kTableSize; |
| 1117 static const intptr_t kMask = RegExpMacroAssembler::kTableMask; | 1084 static const intptr_t kMask = RegExpMacroAssembler::kTableMask; |
| 1118 | 1085 |
| 1119 intptr_t base = (min_char & ~kMask); | 1086 intptr_t base = (min_char & ~kMask); |
| 1120 | 1087 |
| 1121 // Assert that everything is on one kTableSize page. | 1088 // Assert that everything is on one kTableSize page. |
| 1122 for (intptr_t i = start_index; i <= end_index; i++) { | 1089 for (intptr_t i = start_index; i <= end_index; i++) { |
| 1123 ASSERT((ranges->At(i) & ~kMask) == base); | 1090 ASSERT((ranges->At(i) & ~kMask) == base); |
| 1124 } | 1091 } |
| 1125 ASSERT(start_index == 0 || (ranges->At(start_index - 1) & ~kMask) <= base); | 1092 ASSERT(start_index == 0 || (ranges->At(start_index - 1) & ~kMask) <= base); |
| (...skipping 21 matching lines...) Expand all Loading... |
| 1147 for (j = (ranges->At(i) & kMask); j < (ranges->At(i + 1) & kMask); j++) { | 1114 for (j = (ranges->At(i) & kMask); j < (ranges->At(i + 1) & kMask); j++) { |
| 1148 templ[j] = bit; | 1115 templ[j] = bit; |
| 1149 } | 1116 } |
| 1150 bit ^= 1; | 1117 bit ^= 1; |
| 1151 } | 1118 } |
| 1152 for (intptr_t i = j; i < kSize; i++) { | 1119 for (intptr_t i = j; i < kSize; i++) { |
| 1153 templ[i] = bit; | 1120 templ[i] = bit; |
| 1154 } | 1121 } |
| 1155 // TODO(erikcorry): Cache these. | 1122 // TODO(erikcorry): Cache these. |
| 1156 const TypedData& ba = TypedData::ZoneHandle( | 1123 const TypedData& ba = TypedData::ZoneHandle( |
| 1157 masm->zone(), | 1124 masm->zone(), TypedData::New(kTypedDataUint8ArrayCid, kSize, Heap::kOld)); |
| 1158 TypedData::New(kTypedDataUint8ArrayCid, kSize, Heap::kOld)); | |
| 1159 for (intptr_t i = 0; i < kSize; i++) { | 1125 for (intptr_t i = 0; i < kSize; i++) { |
| 1160 ba.SetUint8(i, templ[i]); | 1126 ba.SetUint8(i, templ[i]); |
| 1161 } | 1127 } |
| 1162 masm->CheckBitInTable(ba, on_bit_set); | 1128 masm->CheckBitInTable(ba, on_bit_set); |
| 1163 if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear); | 1129 if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear); |
| 1164 } | 1130 } |
| 1165 | 1131 |
| 1166 | 1132 |
| 1167 static void CutOutRange(RegExpMacroAssembler* masm, | 1133 static void CutOutRange(RegExpMacroAssembler* masm, |
| 1168 ZoneGrowableArray<int>* ranges, | 1134 ZoneGrowableArray<int>* ranges, |
| 1169 intptr_t start_index, | 1135 intptr_t start_index, |
| 1170 intptr_t end_index, | 1136 intptr_t end_index, |
| 1171 intptr_t cut_index, | 1137 intptr_t cut_index, |
| 1172 BlockLabel* even_label, | 1138 BlockLabel* even_label, |
| 1173 BlockLabel* odd_label) { | 1139 BlockLabel* odd_label) { |
| 1174 bool odd = (((cut_index - start_index) & 1) == 1); | 1140 bool odd = (((cut_index - start_index) & 1) == 1); |
| 1175 BlockLabel* in_range_label = odd ? odd_label : even_label; | 1141 BlockLabel* in_range_label = odd ? odd_label : even_label; |
| 1176 BlockLabel dummy; | 1142 BlockLabel dummy; |
| 1177 EmitDoubleBoundaryTest(masm, | 1143 EmitDoubleBoundaryTest(masm, ranges->At(cut_index), |
| 1178 ranges->At(cut_index), | 1144 ranges->At(cut_index + 1) - 1, &dummy, in_range_label, |
| 1179 ranges->At(cut_index + 1) - 1, | |
| 1180 &dummy, | |
| 1181 in_range_label, | |
| 1182 &dummy); | 1145 &dummy); |
| 1183 ASSERT(!dummy.IsLinked()); | 1146 ASSERT(!dummy.IsLinked()); |
| 1184 // Cut out the single range by rewriting the array. This creates a new | 1147 // Cut out the single range by rewriting the array. This creates a new |
| 1185 // range that is a merger of the two ranges on either side of the one we | 1148 // range that is a merger of the two ranges on either side of the one we |
| 1186 // are cutting out. The oddity of the labels is preserved. | 1149 // are cutting out. The oddity of the labels is preserved. |
| 1187 for (intptr_t j = cut_index; j > start_index; j--) { | 1150 for (intptr_t j = cut_index; j > start_index; j--) { |
| 1188 (*ranges)[j] = ranges->At(j - 1); | 1151 (*ranges)[j] = ranges->At(j - 1); |
| 1189 } | 1152 } |
| 1190 for (intptr_t j = cut_index + 1; j < end_index; j++) { | 1153 for (intptr_t j = cut_index + 1; j < end_index; j++) { |
| 1191 (*ranges)[j] = ranges->At(j + 1); | 1154 (*ranges)[j] = ranges->At(j + 1); |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1224 // 128-character space can take up a lot of space in the ranges array if, | 1187 // 128-character space can take up a lot of space in the ranges array if, |
| 1225 // for example, we only want to match every second character (eg. the lower | 1188 // for example, we only want to match every second character (eg. the lower |
| 1226 // case characters on some Unicode pages). | 1189 // case characters on some Unicode pages). |
| 1227 intptr_t binary_chop_index = (end_index + start_index) / 2; | 1190 intptr_t binary_chop_index = (end_index + start_index) / 2; |
| 1228 // The first test ensures that we get to the code that handles the Latin1 | 1191 // The first test ensures that we get to the code that handles the Latin1 |
| 1229 // range with a single not-taken branch, speeding up this important | 1192 // range with a single not-taken branch, speeding up this important |
| 1230 // character range (even non-Latin1 charset-based text has spaces and | 1193 // character range (even non-Latin1 charset-based text has spaces and |
| 1231 // punctuation). | 1194 // punctuation). |
| 1232 if (*border - 1 > Symbols::kMaxOneCharCodeSymbol && // Latin1 case. | 1195 if (*border - 1 > Symbols::kMaxOneCharCodeSymbol && // Latin1 case. |
| 1233 end_index - start_index > (*new_start_index - start_index) * 2 && | 1196 end_index - start_index > (*new_start_index - start_index) * 2 && |
| 1234 last - first > kSize * 2 && | 1197 last - first > kSize * 2 && binary_chop_index > *new_start_index && |
| 1235 binary_chop_index > *new_start_index && | |
| 1236 ranges->At(binary_chop_index) >= first + 2 * kSize) { | 1198 ranges->At(binary_chop_index) >= first + 2 * kSize) { |
| 1237 intptr_t scan_forward_for_section_border = binary_chop_index; | 1199 intptr_t scan_forward_for_section_border = binary_chop_index; |
| 1238 intptr_t new_border = (ranges->At(binary_chop_index) | kMask) + 1; | 1200 intptr_t new_border = (ranges->At(binary_chop_index) | kMask) + 1; |
| 1239 | 1201 |
| 1240 while (scan_forward_for_section_border < end_index) { | 1202 while (scan_forward_for_section_border < end_index) { |
| 1241 if (ranges->At(scan_forward_for_section_border) > new_border) { | 1203 if (ranges->At(scan_forward_for_section_border) > new_border) { |
| 1242 *new_start_index = scan_forward_for_section_border; | 1204 *new_start_index = scan_forward_for_section_border; |
| 1243 *border = new_border; | 1205 *border = new_border; |
| 1244 break; | 1206 break; |
| 1245 } | 1207 } |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1283 // Just need to test if the character is before or on-or-after | 1245 // Just need to test if the character is before or on-or-after |
| 1284 // a particular character. | 1246 // a particular character. |
| 1285 if (start_index == end_index) { | 1247 if (start_index == end_index) { |
| 1286 EmitBoundaryTest(masm, first, fall_through, even_label, odd_label); | 1248 EmitBoundaryTest(masm, first, fall_through, even_label, odd_label); |
| 1287 return; | 1249 return; |
| 1288 } | 1250 } |
| 1289 | 1251 |
| 1290 // Another almost trivial case: There is one interval in the middle that is | 1252 // Another almost trivial case: There is one interval in the middle that is |
| 1291 // different from the end intervals. | 1253 // different from the end intervals. |
| 1292 if (start_index + 1 == end_index) { | 1254 if (start_index + 1 == end_index) { |
| 1293 EmitDoubleBoundaryTest( | 1255 EmitDoubleBoundaryTest(masm, first, last, fall_through, even_label, |
| 1294 masm, first, last, fall_through, even_label, odd_label); | 1256 odd_label); |
| 1295 return; | 1257 return; |
| 1296 } | 1258 } |
| 1297 | 1259 |
| 1298 // It's not worth using table lookup if there are very few intervals in the | 1260 // It's not worth using table lookup if there are very few intervals in the |
| 1299 // character class. | 1261 // character class. |
| 1300 if (end_index - start_index <= 6) { | 1262 if (end_index - start_index <= 6) { |
| 1301 // It is faster to test for individual characters, so we look for those | 1263 // It is faster to test for individual characters, so we look for those |
| 1302 // first, then try arbitrary ranges in the second round. | 1264 // first, then try arbitrary ranges in the second round. |
| 1303 static intptr_t kNoCutIndex = -1; | 1265 static intptr_t kNoCutIndex = -1; |
| 1304 intptr_t cut = kNoCutIndex; | 1266 intptr_t cut = kNoCutIndex; |
| 1305 for (intptr_t i = start_index; i < end_index; i++) { | 1267 for (intptr_t i = start_index; i < end_index; i++) { |
| 1306 if (ranges->At(i) == ranges->At(i + 1) - 1) { | 1268 if (ranges->At(i) == ranges->At(i + 1) - 1) { |
| 1307 cut = i; | 1269 cut = i; |
| 1308 break; | 1270 break; |
| 1309 } | 1271 } |
| 1310 } | 1272 } |
| 1311 if (cut == kNoCutIndex) cut = start_index; | 1273 if (cut == kNoCutIndex) cut = start_index; |
| 1312 CutOutRange( | 1274 CutOutRange(masm, ranges, start_index, end_index, cut, even_label, |
| 1313 masm, ranges, start_index, end_index, cut, even_label, odd_label); | 1275 odd_label); |
| 1314 ASSERT(end_index - start_index >= 2); | 1276 ASSERT(end_index - start_index >= 2); |
| 1315 GenerateBranches(masm, | 1277 GenerateBranches(masm, ranges, start_index + 1, end_index - 1, min_char, |
| 1316 ranges, | 1278 max_char, fall_through, even_label, odd_label); |
| 1317 start_index + 1, | |
| 1318 end_index - 1, | |
| 1319 min_char, | |
| 1320 max_char, | |
| 1321 fall_through, | |
| 1322 even_label, | |
| 1323 odd_label); | |
| 1324 return; | 1279 return; |
| 1325 } | 1280 } |
| 1326 | 1281 |
| 1327 // If there are a lot of intervals in the regexp, then we will use tables to | 1282 // If there are a lot of intervals in the regexp, then we will use tables to |
| 1328 // determine whether the character is inside or outside the character class. | 1283 // determine whether the character is inside or outside the character class. |
| 1329 static const intptr_t kBits = RegExpMacroAssembler::kTableSizeBits; | 1284 static const intptr_t kBits = RegExpMacroAssembler::kTableSizeBits; |
| 1330 | 1285 |
| 1331 if ((max_char >> kBits) == (min_char >> kBits)) { | 1286 if ((max_char >> kBits) == (min_char >> kBits)) { |
| 1332 EmitUseLookupTable(masm, | 1287 EmitUseLookupTable(masm, ranges, start_index, end_index, min_char, |
| 1333 ranges, | 1288 fall_through, even_label, odd_label); |
| 1334 start_index, | |
| 1335 end_index, | |
| 1336 min_char, | |
| 1337 fall_through, | |
| 1338 even_label, | |
| 1339 odd_label); | |
| 1340 return; | 1289 return; |
| 1341 } | 1290 } |
| 1342 | 1291 |
| 1343 if ((min_char >> kBits) != (first >> kBits)) { | 1292 if ((min_char >> kBits) != (first >> kBits)) { |
| 1344 masm->CheckCharacterLT(first, odd_label); | 1293 masm->CheckCharacterLT(first, odd_label); |
| 1345 GenerateBranches(masm, | 1294 GenerateBranches(masm, ranges, start_index + 1, end_index, first, max_char, |
| 1346 ranges, | 1295 fall_through, odd_label, even_label); |
| 1347 start_index + 1, | |
| 1348 end_index, | |
| 1349 first, | |
| 1350 max_char, | |
| 1351 fall_through, | |
| 1352 odd_label, | |
| 1353 even_label); | |
| 1354 return; | 1296 return; |
| 1355 } | 1297 } |
| 1356 | 1298 |
| 1357 intptr_t new_start_index = 0; | 1299 intptr_t new_start_index = 0; |
| 1358 intptr_t new_end_index = 0; | 1300 intptr_t new_end_index = 0; |
| 1359 intptr_t border = 0; | 1301 intptr_t border = 0; |
| 1360 | 1302 |
| 1361 SplitSearchSpace(ranges, | 1303 SplitSearchSpace(ranges, start_index, end_index, &new_start_index, |
| 1362 start_index, | 1304 &new_end_index, &border); |
| 1363 end_index, | |
| 1364 &new_start_index, | |
| 1365 &new_end_index, | |
| 1366 &border); | |
| 1367 | 1305 |
| 1368 BlockLabel handle_rest; | 1306 BlockLabel handle_rest; |
| 1369 BlockLabel* above = &handle_rest; | 1307 BlockLabel* above = &handle_rest; |
| 1370 if (border == last + 1) { | 1308 if (border == last + 1) { |
| 1371 // We didn't find any section that started after the limit, so everything | 1309 // We didn't find any section that started after the limit, so everything |
| 1372 // above the border is one of the terminal labels. | 1310 // above the border is one of the terminal labels. |
| 1373 above = (end_index & 1) != (start_index & 1) ? odd_label : even_label; | 1311 above = (end_index & 1) != (start_index & 1) ? odd_label : even_label; |
| 1374 ASSERT(new_end_index == end_index - 1); | 1312 ASSERT(new_end_index == end_index - 1); |
| 1375 } | 1313 } |
| 1376 | 1314 |
| 1377 ASSERT(start_index <= new_end_index); | 1315 ASSERT(start_index <= new_end_index); |
| 1378 ASSERT(new_start_index <= end_index); | 1316 ASSERT(new_start_index <= end_index); |
| 1379 ASSERT(start_index < new_start_index); | 1317 ASSERT(start_index < new_start_index); |
| 1380 ASSERT(new_end_index < end_index); | 1318 ASSERT(new_end_index < end_index); |
| 1381 ASSERT(new_end_index + 1 == new_start_index || | 1319 ASSERT(new_end_index + 1 == new_start_index || |
| 1382 (new_end_index + 2 == new_start_index && | 1320 (new_end_index + 2 == new_start_index && |
| 1383 border == ranges->At(new_end_index + 1))); | 1321 border == ranges->At(new_end_index + 1))); |
| 1384 ASSERT(min_char < border - 1); | 1322 ASSERT(min_char < border - 1); |
| 1385 ASSERT(border < max_char); | 1323 ASSERT(border < max_char); |
| 1386 ASSERT(ranges->At(new_end_index) < border); | 1324 ASSERT(ranges->At(new_end_index) < border); |
| 1387 ASSERT(border < ranges->At(new_start_index) || | 1325 ASSERT(border < ranges->At(new_start_index) || |
| 1388 (border == ranges->At(new_start_index) && | 1326 (border == ranges->At(new_start_index) && |
| 1389 new_start_index == end_index && | 1327 new_start_index == end_index && new_end_index == end_index - 1 && |
| 1390 new_end_index == end_index - 1 && | |
| 1391 border == last + 1)); | 1328 border == last + 1)); |
| 1392 ASSERT(new_start_index == 0 || border >= ranges->At(new_start_index - 1)); | 1329 ASSERT(new_start_index == 0 || border >= ranges->At(new_start_index - 1)); |
| 1393 | 1330 |
| 1394 masm->CheckCharacterGT(border - 1, above); | 1331 masm->CheckCharacterGT(border - 1, above); |
| 1395 BlockLabel dummy; | 1332 BlockLabel dummy; |
| 1396 GenerateBranches(masm, | 1333 GenerateBranches(masm, ranges, start_index, new_end_index, min_char, |
| 1397 ranges, | 1334 border - 1, &dummy, even_label, odd_label); |
| 1398 start_index, | |
| 1399 new_end_index, | |
| 1400 min_char, | |
| 1401 border - 1, | |
| 1402 &dummy, | |
| 1403 even_label, | |
| 1404 odd_label); | |
| 1405 | 1335 |
| 1406 if (handle_rest.IsLinked()) { | 1336 if (handle_rest.IsLinked()) { |
| 1407 masm->BindBlock(&handle_rest); | 1337 masm->BindBlock(&handle_rest); |
| 1408 bool flip = (new_start_index & 1) != (start_index & 1); | 1338 bool flip = (new_start_index & 1) != (start_index & 1); |
| 1409 GenerateBranches(masm, | 1339 GenerateBranches(masm, ranges, new_start_index, end_index, border, max_char, |
| 1410 ranges, | 1340 &dummy, flip ? odd_label : even_label, |
| 1411 new_start_index, | |
| 1412 end_index, | |
| 1413 border, | |
| 1414 max_char, | |
| 1415 &dummy, | |
| 1416 flip ? odd_label : even_label, | |
| 1417 flip ? even_label : odd_label); | 1341 flip ? even_label : odd_label); |
| 1418 } | 1342 } |
| 1419 } | 1343 } |
| 1420 | 1344 |
| 1421 | 1345 |
| 1422 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, | 1346 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, |
| 1423 RegExpCharacterClass* cc, | 1347 RegExpCharacterClass* cc, |
| 1424 bool one_byte, | 1348 bool one_byte, |
| 1425 BlockLabel* on_failure, | 1349 BlockLabel* on_failure, |
| 1426 intptr_t cp_offset, | 1350 intptr_t cp_offset, |
| (...skipping 26 matching lines...) Expand all Loading... |
| 1453 if (last_valid_range < 0) { | 1377 if (last_valid_range < 0) { |
| 1454 if (!cc->is_negated()) { | 1378 if (!cc->is_negated()) { |
| 1455 macro_assembler->GoTo(on_failure); | 1379 macro_assembler->GoTo(on_failure); |
| 1456 } | 1380 } |
| 1457 if (check_offset) { | 1381 if (check_offset) { |
| 1458 macro_assembler->CheckPosition(cp_offset, on_failure); | 1382 macro_assembler->CheckPosition(cp_offset, on_failure); |
| 1459 } | 1383 } |
| 1460 return; | 1384 return; |
| 1461 } | 1385 } |
| 1462 | 1386 |
| 1463 if (last_valid_range == 0 && | 1387 if (last_valid_range == 0 && ranges->At(0).IsEverything(max_char)) { |
| 1464 ranges->At(0).IsEverything(max_char)) { | |
| 1465 if (cc->is_negated()) { | 1388 if (cc->is_negated()) { |
| 1466 macro_assembler->GoTo(on_failure); | 1389 macro_assembler->GoTo(on_failure); |
| 1467 } else { | 1390 } else { |
| 1468 // This is a common case hit by non-anchored expressions. | 1391 // This is a common case hit by non-anchored expressions. |
| 1469 if (check_offset) { | 1392 if (check_offset) { |
| 1470 macro_assembler->CheckPosition(cp_offset, on_failure); | 1393 macro_assembler->CheckPosition(cp_offset, on_failure); |
| 1471 } | 1394 } |
| 1472 } | 1395 } |
| 1473 return; | 1396 return; |
| 1474 } | 1397 } |
| 1475 if (last_valid_range == 0 && | 1398 if (last_valid_range == 0 && !cc->is_negated() && |
| 1476 !cc->is_negated() && | |
| 1477 ranges->At(0).IsEverything(max_char)) { | 1399 ranges->At(0).IsEverything(max_char)) { |
| 1478 // This is a common case hit by non-anchored expressions. | 1400 // This is a common case hit by non-anchored expressions. |
| 1479 if (check_offset) { | 1401 if (check_offset) { |
| 1480 macro_assembler->CheckPosition(cp_offset, on_failure); | 1402 macro_assembler->CheckPosition(cp_offset, on_failure); |
| 1481 } | 1403 } |
| 1482 return; | 1404 return; |
| 1483 } | 1405 } |
| 1484 | 1406 |
| 1485 if (!preloaded) { | 1407 if (!preloaded) { |
| 1486 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset); | 1408 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset); |
| 1487 } | 1409 } |
| 1488 | 1410 |
| 1489 if (cc->is_standard() && | 1411 if (cc->is_standard() && |
| 1490 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(), | 1412 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(), |
| 1491 on_failure)) { | 1413 on_failure)) { |
| 1492 return; | 1414 return; |
| 1493 } | 1415 } |
| 1494 | 1416 |
| 1495 | 1417 |
| 1496 // A new list with ascending entries. Each entry is a code unit | 1418 // A new list with ascending entries. Each entry is a code unit |
| 1497 // where there is a boundary between code units that are part of | 1419 // where there is a boundary between code units that are part of |
| 1498 // the class and code units that are not. Normally we insert an | 1420 // the class and code units that are not. Normally we insert an |
| 1499 // entry at zero which goes to the failure label, but if there | 1421 // entry at zero which goes to the failure label, but if there |
| 1500 // was already one there we fall through for success on that entry. | 1422 // was already one there we fall through for success on that entry. |
| 1501 // Subsequent entries have alternating meaning (success/failure). | 1423 // Subsequent entries have alternating meaning (success/failure). |
| 1502 ZoneGrowableArray<int>* range_boundaries = | 1424 ZoneGrowableArray<int>* range_boundaries = |
| 1503 new(zone) ZoneGrowableArray<int>(last_valid_range); | 1425 new (zone) ZoneGrowableArray<int>(last_valid_range); |
| 1504 | 1426 |
| 1505 bool zeroth_entry_is_failure = !cc->is_negated(); | 1427 bool zeroth_entry_is_failure = !cc->is_negated(); |
| 1506 | 1428 |
| 1507 for (intptr_t i = 0; i <= last_valid_range; i++) { | 1429 for (intptr_t i = 0; i <= last_valid_range; i++) { |
| 1508 CharacterRange& range = (*ranges)[i]; | 1430 CharacterRange& range = (*ranges)[i]; |
| 1509 if (range.from() == 0) { | 1431 if (range.from() == 0) { |
| 1510 ASSERT(i == 0); | 1432 ASSERT(i == 0); |
| 1511 zeroth_entry_is_failure = !zeroth_entry_is_failure; | 1433 zeroth_entry_is_failure = !zeroth_entry_is_failure; |
| 1512 } else { | 1434 } else { |
| 1513 range_boundaries->Add(range.from()); | 1435 range_boundaries->Add(range.from()); |
| 1514 } | 1436 } |
| 1515 range_boundaries->Add(range.to() + 1); | 1437 range_boundaries->Add(range.to() + 1); |
| 1516 } | 1438 } |
| 1517 intptr_t end_index = range_boundaries->length() - 1; | 1439 intptr_t end_index = range_boundaries->length() - 1; |
| 1518 if (range_boundaries->At(end_index) > max_char) { | 1440 if (range_boundaries->At(end_index) > max_char) { |
| 1519 end_index--; | 1441 end_index--; |
| 1520 } | 1442 } |
| 1521 | 1443 |
| 1522 BlockLabel fall_through; | 1444 BlockLabel fall_through; |
| 1523 GenerateBranches(macro_assembler, | 1445 GenerateBranches(macro_assembler, range_boundaries, |
| 1524 range_boundaries, | |
| 1525 0, // start_index. | 1446 0, // start_index. |
| 1526 end_index, | 1447 end_index, |
| 1527 0, // min_char. | 1448 0, // min_char. |
| 1528 max_char, | 1449 max_char, &fall_through, |
| 1529 &fall_through, | |
| 1530 zeroth_entry_is_failure ? &fall_through : on_failure, | 1450 zeroth_entry_is_failure ? &fall_through : on_failure, |
| 1531 zeroth_entry_is_failure ? on_failure : &fall_through); | 1451 zeroth_entry_is_failure ? on_failure : &fall_through); |
| 1532 macro_assembler->BindBlock(&fall_through); | 1452 macro_assembler->BindBlock(&fall_through); |
| 1533 } | 1453 } |
| 1534 | 1454 |
| 1535 | 1455 |
| 1536 RegExpNode::~RegExpNode() { | 1456 RegExpNode::~RegExpNode() {} |
| 1537 } | |
| 1538 | 1457 |
| 1539 | 1458 |
| 1540 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, | 1459 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, |
| 1541 Trace* trace) { | 1460 Trace* trace) { |
| 1542 // If we are generating a greedy loop then don't stop and don't reuse code. | 1461 // If we are generating a greedy loop then don't stop and don't reuse code. |
| 1543 if (trace->stop_node() != NULL) { | 1462 if (trace->stop_node() != NULL) { |
| 1544 return CONTINUE; | 1463 return CONTINUE; |
| 1545 } | 1464 } |
| 1546 | 1465 |
| 1547 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 1466 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
| (...skipping 12 matching lines...) Expand all Loading... |
| 1560 return DONE; | 1479 return DONE; |
| 1561 } | 1480 } |
| 1562 // Generate generic version of the node and bind the label for later use. | 1481 // Generate generic version of the node and bind the label for later use. |
| 1563 macro_assembler->BindBlock(&label_); | 1482 macro_assembler->BindBlock(&label_); |
| 1564 return CONTINUE; | 1483 return CONTINUE; |
| 1565 } | 1484 } |
| 1566 | 1485 |
| 1567 // We are being asked to make a non-generic version. Keep track of how many | 1486 // We are being asked to make a non-generic version. Keep track of how many |
| 1568 // non-generic versions we generate so as not to overdo it. | 1487 // non-generic versions we generate so as not to overdo it. |
| 1569 trace_count_++; | 1488 trace_count_++; |
| 1570 if (kRegexpOptimization && | 1489 if (kRegexpOptimization && trace_count_ < kMaxCopiesCodeGenerated && |
| 1571 trace_count_ < kMaxCopiesCodeGenerated && | |
| 1572 compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) { | 1490 compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) { |
| 1573 return CONTINUE; | 1491 return CONTINUE; |
| 1574 } | 1492 } |
| 1575 | 1493 |
| 1576 // If we get here code has been generated for this node too many times or | 1494 // If we get here code has been generated for this node too many times or |
| 1577 // recursion is too deep. Time to switch to a generic version. The code for | 1495 // recursion is too deep. Time to switch to a generic version. The code for |
| 1578 // generic versions above can handle deep recursion properly. | 1496 // generic versions above can handle deep recursion properly. |
| 1579 trace->Flush(compiler, this); | 1497 trace->Flush(compiler, this); |
| 1580 return DONE; | 1498 return DONE; |
| 1581 } | 1499 } |
| 1582 | 1500 |
| 1583 | 1501 |
| 1584 intptr_t ActionNode::EatsAtLeast(intptr_t still_to_find, | 1502 intptr_t ActionNode::EatsAtLeast(intptr_t still_to_find, |
| 1585 intptr_t budget, | 1503 intptr_t budget, |
| 1586 bool not_at_start) { | 1504 bool not_at_start) { |
| 1587 if (budget <= 0) return 0; | 1505 if (budget <= 0) return 0; |
| 1588 if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input! | 1506 if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input! |
| 1589 return on_success()->EatsAtLeast(still_to_find, | 1507 return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start); |
| 1590 budget - 1, | |
| 1591 not_at_start); | |
| 1592 } | 1508 } |
| 1593 | 1509 |
| 1594 | 1510 |
| 1595 void ActionNode::FillInBMInfo(intptr_t offset, | 1511 void ActionNode::FillInBMInfo(intptr_t offset, |
| 1596 intptr_t budget, | 1512 intptr_t budget, |
| 1597 BoyerMooreLookahead* bm, | 1513 BoyerMooreLookahead* bm, |
| 1598 bool not_at_start) { | 1514 bool not_at_start) { |
| 1599 if (action_type_ == BEGIN_SUBMATCH) { | 1515 if (action_type_ == BEGIN_SUBMATCH) { |
| 1600 bm->SetRest(offset); | 1516 bm->SetRest(offset); |
| 1601 } else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) { | 1517 } else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) { |
| 1602 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start); | 1518 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start); |
| 1603 } | 1519 } |
| 1604 SaveBMInfo(bm, not_at_start, offset); | 1520 SaveBMInfo(bm, not_at_start, offset); |
| 1605 } | 1521 } |
| 1606 | 1522 |
| 1607 | 1523 |
| 1608 intptr_t AssertionNode::EatsAtLeast(intptr_t still_to_find, | 1524 intptr_t AssertionNode::EatsAtLeast(intptr_t still_to_find, |
| 1609 intptr_t budget, | 1525 intptr_t budget, |
| 1610 bool not_at_start) { | 1526 bool not_at_start) { |
| 1611 if (budget <= 0) return 0; | 1527 if (budget <= 0) return 0; |
| 1612 // If we know we are not at the start and we are asked "how many characters | 1528 // If we know we are not at the start and we are asked "how many characters |
| 1613 // will you match if you succeed?" then we can answer anything since false | 1529 // will you match if you succeed?" then we can answer anything since false |
| 1614 // implies false. So lets just return the max answer (still_to_find) since | 1530 // implies false. So lets just return the max answer (still_to_find) since |
| 1615 // that won't prevent us from preloading a lot of characters for the other | 1531 // that won't prevent us from preloading a lot of characters for the other |
| 1616 // branches in the node graph. | 1532 // branches in the node graph. |
| 1617 if (assertion_type() == AT_START && not_at_start) return still_to_find; | 1533 if (assertion_type() == AT_START && not_at_start) return still_to_find; |
| 1618 return on_success()->EatsAtLeast(still_to_find, | 1534 return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start); |
| 1619 budget - 1, | |
| 1620 not_at_start); | |
| 1621 } | 1535 } |
| 1622 | 1536 |
| 1623 | 1537 |
| 1624 void AssertionNode::FillInBMInfo(intptr_t offset, | 1538 void AssertionNode::FillInBMInfo(intptr_t offset, |
| 1625 intptr_t budget, | 1539 intptr_t budget, |
| 1626 BoyerMooreLookahead* bm, | 1540 BoyerMooreLookahead* bm, |
| 1627 bool not_at_start) { | 1541 bool not_at_start) { |
| 1628 // Match the behaviour of EatsAtLeast on this node. | 1542 // Match the behaviour of EatsAtLeast on this node. |
| 1629 if (assertion_type() == AT_START && not_at_start) return; | 1543 if (assertion_type() == AT_START && not_at_start) return; |
| 1630 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start); | 1544 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start); |
| 1631 SaveBMInfo(bm, not_at_start, offset); | 1545 SaveBMInfo(bm, not_at_start, offset); |
| 1632 } | 1546 } |
| 1633 | 1547 |
| 1634 | 1548 |
| 1635 intptr_t BackReferenceNode::EatsAtLeast(intptr_t still_to_find, | 1549 intptr_t BackReferenceNode::EatsAtLeast(intptr_t still_to_find, |
| 1636 intptr_t budget, | 1550 intptr_t budget, |
| 1637 bool not_at_start) { | 1551 bool not_at_start) { |
| 1638 if (budget <= 0) return 0; | 1552 if (budget <= 0) return 0; |
| 1639 return on_success()->EatsAtLeast(still_to_find, | 1553 return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start); |
| 1640 budget - 1, | |
| 1641 not_at_start); | |
| 1642 } | 1554 } |
| 1643 | 1555 |
| 1644 | 1556 |
| 1645 intptr_t TextNode::EatsAtLeast(intptr_t still_to_find, | 1557 intptr_t TextNode::EatsAtLeast(intptr_t still_to_find, |
| 1646 intptr_t budget, | 1558 intptr_t budget, |
| 1647 bool not_at_start) { | 1559 bool not_at_start) { |
| 1648 intptr_t answer = Length(); | 1560 intptr_t answer = Length(); |
| 1649 if (answer >= still_to_find) return answer; | 1561 if (answer >= still_to_find) return answer; |
| 1650 if (budget <= 0) return answer; | 1562 if (budget <= 0) return answer; |
| 1651 // We are not at start after this node so we set the last argument to 'true'. | 1563 // We are not at start after this node so we set the last argument to 'true'. |
| 1652 return answer + on_success()->EatsAtLeast(still_to_find - answer, | 1564 return answer + |
| 1653 budget - 1, | 1565 on_success()->EatsAtLeast(still_to_find - answer, budget - 1, true); |
| 1654 true); | |
| 1655 } | 1566 } |
| 1656 | 1567 |
| 1657 | 1568 |
| 1658 intptr_t NegativeLookaheadChoiceNode::EatsAtLeast(intptr_t still_to_find, | 1569 intptr_t NegativeLookaheadChoiceNode::EatsAtLeast(intptr_t still_to_find, |
| 1659 intptr_t budget, | 1570 intptr_t budget, |
| 1660 bool not_at_start) { | 1571 bool not_at_start) { |
| 1661 if (budget <= 0) return 0; | 1572 if (budget <= 0) return 0; |
| 1662 // Alternative 0 is the negative lookahead, alternative 1 is what comes | 1573 // Alternative 0 is the negative lookahead, alternative 1 is what comes |
| 1663 // afterwards. | 1574 // afterwards. |
| 1664 RegExpNode* node = (*alternatives_)[1].node(); | 1575 RegExpNode* node = (*alternatives_)[1].node(); |
| (...skipping 29 matching lines...) Expand all Loading... |
| 1694 if (node_eats_at_least < min) min = node_eats_at_least; | 1605 if (node_eats_at_least < min) min = node_eats_at_least; |
| 1695 if (min == 0) return 0; | 1606 if (min == 0) return 0; |
| 1696 } | 1607 } |
| 1697 return min; | 1608 return min; |
| 1698 } | 1609 } |
| 1699 | 1610 |
| 1700 | 1611 |
| 1701 intptr_t LoopChoiceNode::EatsAtLeast(intptr_t still_to_find, | 1612 intptr_t LoopChoiceNode::EatsAtLeast(intptr_t still_to_find, |
| 1702 intptr_t budget, | 1613 intptr_t budget, |
| 1703 bool not_at_start) { | 1614 bool not_at_start) { |
| 1704 return EatsAtLeastHelper(still_to_find, | 1615 return EatsAtLeastHelper(still_to_find, budget - 1, loop_node_, not_at_start); |
| 1705 budget - 1, | |
| 1706 loop_node_, | |
| 1707 not_at_start); | |
| 1708 } | 1616 } |
| 1709 | 1617 |
| 1710 | 1618 |
| 1711 intptr_t ChoiceNode::EatsAtLeast(intptr_t still_to_find, | 1619 intptr_t ChoiceNode::EatsAtLeast(intptr_t still_to_find, |
| 1712 intptr_t budget, | 1620 intptr_t budget, |
| 1713 bool not_at_start) { | 1621 bool not_at_start) { |
| 1714 return EatsAtLeastHelper(still_to_find, | 1622 return EatsAtLeastHelper(still_to_find, budget, NULL, not_at_start); |
| 1715 budget, | |
| 1716 NULL, | |
| 1717 not_at_start); | |
| 1718 } | 1623 } |
| 1719 | 1624 |
| 1720 | 1625 |
| 1721 // Takes the left-most 1-bit and smears it out, setting all bits to its right. | 1626 // Takes the left-most 1-bit and smears it out, setting all bits to its right. |
| 1722 static inline uint32_t SmearBitsRight(uint32_t v) { | 1627 static inline uint32_t SmearBitsRight(uint32_t v) { |
| 1723 v |= v >> 1; | 1628 v |= v >> 1; |
| 1724 v |= v >> 2; | 1629 v |= v >> 2; |
| 1725 v |= v >> 4; | 1630 v |= v >> 4; |
| 1726 v |= v >> 8; | 1631 v |= v >> 8; |
| 1727 v |= v >> 16; | 1632 v |= v >> 16; |
| (...skipping 26 matching lines...) Expand all Loading... |
| 1754 | 1659 |
| 1755 | 1660 |
| 1756 bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler, | 1661 bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler, |
| 1757 Trace* bounds_check_trace, | 1662 Trace* bounds_check_trace, |
| 1758 Trace* trace, | 1663 Trace* trace, |
| 1759 bool preload_has_checked_bounds, | 1664 bool preload_has_checked_bounds, |
| 1760 BlockLabel* on_possible_success, | 1665 BlockLabel* on_possible_success, |
| 1761 QuickCheckDetails* details, | 1666 QuickCheckDetails* details, |
| 1762 bool fall_through_on_failure) { | 1667 bool fall_through_on_failure) { |
| 1763 if (details->characters() == 0) return false; | 1668 if (details->characters() == 0) return false; |
| 1764 GetQuickCheckDetails( | 1669 GetQuickCheckDetails(details, compiler, 0, |
| 1765 details, compiler, 0, trace->at_start() == Trace::FALSE_VALUE); | 1670 trace->at_start() == Trace::FALSE_VALUE); |
| 1766 if (details->cannot_match()) return false; | 1671 if (details->cannot_match()) return false; |
| 1767 if (!details->Rationalize(compiler->one_byte())) return false; | 1672 if (!details->Rationalize(compiler->one_byte())) return false; |
| 1768 ASSERT(details->characters() == 1 || | 1673 ASSERT(details->characters() == 1 || |
| 1769 compiler->macro_assembler()->CanReadUnaligned()); | 1674 compiler->macro_assembler()->CanReadUnaligned()); |
| 1770 uint32_t mask = details->mask(); | 1675 uint32_t mask = details->mask(); |
| 1771 uint32_t value = details->value(); | 1676 uint32_t value = details->value(); |
| 1772 | 1677 |
| 1773 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 1678 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| 1774 | 1679 |
| 1775 if (trace->characters_preloaded() != details->characters()) { | 1680 if (trace->characters_preloaded() != details->characters()) { |
| 1776 ASSERT(trace->cp_offset() == bounds_check_trace->cp_offset()); | 1681 ASSERT(trace->cp_offset() == bounds_check_trace->cp_offset()); |
| 1777 // We are attempting to preload the minimum number of characters | 1682 // We are attempting to preload the minimum number of characters |
| 1778 // any choice would eat, so if the bounds check fails, then none of the | 1683 // any choice would eat, so if the bounds check fails, then none of the |
| 1779 // choices can succeed, so we can just immediately backtrack, rather | 1684 // choices can succeed, so we can just immediately backtrack, rather |
| 1780 // than go to the next choice. | 1685 // than go to the next choice. |
| 1781 assembler->LoadCurrentCharacter(trace->cp_offset(), | 1686 assembler->LoadCurrentCharacter( |
| 1782 bounds_check_trace->backtrack(), | 1687 trace->cp_offset(), bounds_check_trace->backtrack(), |
| 1783 !preload_has_checked_bounds, | 1688 !preload_has_checked_bounds, details->characters()); |
| 1784 details->characters()); | |
| 1785 } | 1689 } |
| 1786 | 1690 |
| 1787 | 1691 |
| 1788 bool need_mask = true; | 1692 bool need_mask = true; |
| 1789 | 1693 |
| 1790 if (details->characters() == 1) { | 1694 if (details->characters() == 1) { |
| 1791 // If number of characters preloaded is 1 then we used a byte or 16 bit | 1695 // If number of characters preloaded is 1 then we used a byte or 16 bit |
| 1792 // load so the value is already masked down. | 1696 // load so the value is already masked down. |
| 1793 uint32_t char_mask; | 1697 uint32_t char_mask; |
| 1794 if (compiler->one_byte()) { | 1698 if (compiler->one_byte()) { |
| (...skipping 146 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1941 CharacterRange range = ranges->At(first_range); | 1845 CharacterRange range = ranges->At(first_range); |
| 1942 uint16_t from = range.from(); | 1846 uint16_t from = range.from(); |
| 1943 uint16_t to = range.to(); | 1847 uint16_t to = range.to(); |
| 1944 if (to > char_mask) { | 1848 if (to > char_mask) { |
| 1945 to = char_mask; | 1849 to = char_mask; |
| 1946 } | 1850 } |
| 1947 uint32_t differing_bits = (from ^ to); | 1851 uint32_t differing_bits = (from ^ to); |
| 1948 // A mask and compare is only perfect if the differing bits form a | 1852 // A mask and compare is only perfect if the differing bits form a |
| 1949 // number like 00011111 with one single block of trailing 1s. | 1853 // number like 00011111 with one single block of trailing 1s. |
| 1950 if ((differing_bits & (differing_bits + 1)) == 0 && | 1854 if ((differing_bits & (differing_bits + 1)) == 0 && |
| 1951 from + differing_bits == to) { | 1855 from + differing_bits == to) { |
| 1952 pos->determines_perfectly = true; | 1856 pos->determines_perfectly = true; |
| 1953 } | 1857 } |
| 1954 uint32_t common_bits = ~SmearBitsRight(differing_bits); | 1858 uint32_t common_bits = ~SmearBitsRight(differing_bits); |
| 1955 uint32_t bits = (from & common_bits); | 1859 uint32_t bits = (from & common_bits); |
| 1956 for (intptr_t i = first_range + 1; i < ranges->length(); i++) { | 1860 for (intptr_t i = first_range + 1; i < ranges->length(); i++) { |
| 1957 CharacterRange range = ranges->At(i); | 1861 CharacterRange range = ranges->At(i); |
| 1958 uint16_t from = range.from(); | 1862 uint16_t from = range.from(); |
| 1959 uint16_t to = range.to(); | 1863 uint16_t to = range.to(); |
| 1960 if (from > char_mask) continue; | 1864 if (from > char_mask) continue; |
| 1961 if (to > char_mask) to = char_mask; | 1865 if (to > char_mask) to = char_mask; |
| (...skipping 16 matching lines...) Expand all Loading... |
| 1978 } | 1882 } |
| 1979 characters_filled_in++; | 1883 characters_filled_in++; |
| 1980 ASSERT(characters_filled_in <= details->characters()); | 1884 ASSERT(characters_filled_in <= details->characters()); |
| 1981 if (characters_filled_in == details->characters()) { | 1885 if (characters_filled_in == details->characters()) { |
| 1982 return; | 1886 return; |
| 1983 } | 1887 } |
| 1984 } | 1888 } |
| 1985 } | 1889 } |
| 1986 ASSERT(characters_filled_in != details->characters()); | 1890 ASSERT(characters_filled_in != details->characters()); |
| 1987 if (!details->cannot_match()) { | 1891 if (!details->cannot_match()) { |
| 1988 on_success()-> GetQuickCheckDetails(details, | 1892 on_success()->GetQuickCheckDetails(details, compiler, characters_filled_in, |
| 1989 compiler, | 1893 true); |
| 1990 characters_filled_in, | |
| 1991 true); | |
| 1992 } | 1894 } |
| 1993 } | 1895 } |
| 1994 | 1896 |
| 1995 | 1897 |
| 1996 void QuickCheckDetails::Clear() { | 1898 void QuickCheckDetails::Clear() { |
| 1997 for (int i = 0; i < characters_; i++) { | 1899 for (int i = 0; i < characters_; i++) { |
| 1998 positions_[i].mask = 0; | 1900 positions_[i].mask = 0; |
| 1999 positions_[i].value = 0; | 1901 positions_[i].value = 0; |
| 2000 positions_[i].determines_perfectly = false; | 1902 positions_[i].determines_perfectly = false; |
| 2001 } | 1903 } |
| (...skipping 27 matching lines...) Expand all Loading... |
| 2029 if (other->cannot_match_) { | 1931 if (other->cannot_match_) { |
| 2030 return; | 1932 return; |
| 2031 } | 1933 } |
| 2032 if (cannot_match_) { | 1934 if (cannot_match_) { |
| 2033 *this = *other; | 1935 *this = *other; |
| 2034 return; | 1936 return; |
| 2035 } | 1937 } |
| 2036 for (intptr_t i = from_index; i < characters_; i++) { | 1938 for (intptr_t i = from_index; i < characters_; i++) { |
| 2037 QuickCheckDetails::Position* pos = positions(i); | 1939 QuickCheckDetails::Position* pos = positions(i); |
| 2038 QuickCheckDetails::Position* other_pos = other->positions(i); | 1940 QuickCheckDetails::Position* other_pos = other->positions(i); |
| 2039 if (pos->mask != other_pos->mask || | 1941 if (pos->mask != other_pos->mask || pos->value != other_pos->value || |
| 2040 pos->value != other_pos->value || | |
| 2041 !other_pos->determines_perfectly) { | 1942 !other_pos->determines_perfectly) { |
| 2042 // Our mask-compare operation will be approximate unless we have the | 1943 // Our mask-compare operation will be approximate unless we have the |
| 2043 // exact same operation on both sides of the alternation. | 1944 // exact same operation on both sides of the alternation. |
| 2044 pos->determines_perfectly = false; | 1945 pos->determines_perfectly = false; |
| 2045 } | 1946 } |
| 2046 pos->mask &= other_pos->mask; | 1947 pos->mask &= other_pos->mask; |
| 2047 pos->value &= pos->mask; | 1948 pos->value &= pos->mask; |
| 2048 other_pos->value &= pos->mask; | 1949 other_pos->value &= pos->mask; |
| 2049 uint16_t differing_bits = (pos->value ^ other_pos->value); | 1950 uint16_t differing_bits = (pos->value ^ other_pos->value); |
| 2050 pos->mask &= ~differing_bits; | 1951 pos->mask &= ~differing_bits; |
| 2051 pos->value &= pos->mask; | 1952 pos->value &= pos->mask; |
| 2052 } | 1953 } |
| 2053 } | 1954 } |
| 2054 | 1955 |
| 2055 | 1956 |
| 2056 class VisitMarker : public ValueObject { | 1957 class VisitMarker : public ValueObject { |
| 2057 public: | 1958 public: |
| 2058 explicit VisitMarker(NodeInfo* info) : info_(info) { | 1959 explicit VisitMarker(NodeInfo* info) : info_(info) { |
| 2059 ASSERT(!info->visited); | 1960 ASSERT(!info->visited); |
| 2060 info->visited = true; | 1961 info->visited = true; |
| 2061 } | 1962 } |
| 2062 ~VisitMarker() { | 1963 ~VisitMarker() { info_->visited = false; } |
| 2063 info_->visited = false; | 1964 |
| 2064 } | |
| 2065 private: | 1965 private: |
| 2066 NodeInfo* info_; | 1966 NodeInfo* info_; |
| 2067 }; | 1967 }; |
| 2068 | 1968 |
| 2069 | 1969 |
| 2070 RegExpNode* SeqRegExpNode::FilterOneByte(intptr_t depth, bool ignore_case) { | 1970 RegExpNode* SeqRegExpNode::FilterOneByte(intptr_t depth, bool ignore_case) { |
| 2071 if (info()->replacement_calculated) return replacement(); | 1971 if (info()->replacement_calculated) return replacement(); |
| 2072 if (depth < 0) return this; | 1972 if (depth < 0) return this; |
| 2073 ASSERT(!info()->visited); | 1973 ASSERT(!info()->visited); |
| 2074 VisitMarker marker(info()); | 1974 VisitMarker marker(info()); |
| 2075 return FilterSuccessor(depth - 1, ignore_case); | 1975 return FilterSuccessor(depth - 1, ignore_case); |
| 2076 } | 1976 } |
| 2077 | 1977 |
| 2078 | 1978 |
| 2079 RegExpNode* SeqRegExpNode::FilterSuccessor(intptr_t depth, bool ignore_case) { | 1979 RegExpNode* SeqRegExpNode::FilterSuccessor(intptr_t depth, bool ignore_case) { |
| 2080 RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case); | 1980 RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case); |
| 2081 if (next == NULL) return set_replacement(NULL); | 1981 if (next == NULL) return set_replacement(NULL); |
| 2082 on_success_ = next; | 1982 on_success_ = next; |
| 2083 return set_replacement(this); | 1983 return set_replacement(this); |
| 2084 } | 1984 } |
| 2085 | 1985 |
| 2086 | 1986 |
| 2087 // We need to check for the following characters: 0x39c 0x3bc 0x178. | 1987 // We need to check for the following characters: 0x39c 0x3bc 0x178. |
| 2088 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) { | 1988 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) { |
| 2089 // TODO(dcarney): this could be a lot more efficient. | 1989 // TODO(dcarney): this could be a lot more efficient. |
| 2090 return range.Contains(0x39c) || | 1990 return range.Contains(0x39c) || range.Contains(0x3bc) || |
| 2091 range.Contains(0x3bc) || range.Contains(0x178); | 1991 range.Contains(0x178); |
| 2092 } | 1992 } |
| 2093 | 1993 |
| 2094 | 1994 |
| 2095 static bool RangesContainLatin1Equivalents( | 1995 static bool RangesContainLatin1Equivalents( |
| 2096 ZoneGrowableArray<CharacterRange>* ranges) { | 1996 ZoneGrowableArray<CharacterRange>* ranges) { |
| 2097 for (intptr_t i = 0; i < ranges->length(); i++) { | 1997 for (intptr_t i = 0; i < ranges->length(); i++) { |
| 2098 // TODO(dcarney): this could be a lot more efficient. | 1998 // TODO(dcarney): this could be a lot more efficient. |
| 2099 if (RangeContainsLatin1Equivalents(ranges->At(i))) return true; | 1999 if (RangeContainsLatin1Equivalents(ranges->At(i))) return true; |
| 2100 } | 2000 } |
| 2101 return false; | 2001 return false; |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2143 } else { | 2043 } else { |
| 2144 ASSERT(elm.text_type() == TextElement::CHAR_CLASS); | 2044 ASSERT(elm.text_type() == TextElement::CHAR_CLASS); |
| 2145 RegExpCharacterClass* cc = elm.char_class(); | 2045 RegExpCharacterClass* cc = elm.char_class(); |
| 2146 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges(); | 2046 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges(); |
| 2147 if (!CharacterRange::IsCanonical(ranges)) { | 2047 if (!CharacterRange::IsCanonical(ranges)) { |
| 2148 CharacterRange::Canonicalize(ranges); | 2048 CharacterRange::Canonicalize(ranges); |
| 2149 } | 2049 } |
| 2150 // Now they are in order so we only need to look at the first. | 2050 // Now they are in order so we only need to look at the first. |
| 2151 intptr_t range_count = ranges->length(); | 2051 intptr_t range_count = ranges->length(); |
| 2152 if (cc->is_negated()) { | 2052 if (cc->is_negated()) { |
| 2153 if (range_count != 0 && | 2053 if (range_count != 0 && ranges->At(0).from() == 0 && |
| 2154 ranges->At(0).from() == 0 && | |
| 2155 ranges->At(0).to() >= Symbols::kMaxOneCharCodeSymbol) { | 2054 ranges->At(0).to() >= Symbols::kMaxOneCharCodeSymbol) { |
| 2156 // This will be handled in a later filter. | 2055 // This will be handled in a later filter. |
| 2157 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; | 2056 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; |
| 2158 return set_replacement(NULL); | 2057 return set_replacement(NULL); |
| 2159 } | 2058 } |
| 2160 } else { | 2059 } else { |
| 2161 if (range_count == 0 || | 2060 if (range_count == 0 || |
| 2162 ranges->At(0).from() > Symbols::kMaxOneCharCodeSymbol) { | 2061 ranges->At(0).from() > Symbols::kMaxOneCharCodeSymbol) { |
| 2163 // This will be handled in a later filter. | 2062 // This will be handled in a later filter. |
| 2164 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; | 2063 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; |
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2219 } | 2118 } |
| 2220 if (surviving < 2) return set_replacement(survivor); | 2119 if (surviving < 2) return set_replacement(survivor); |
| 2221 | 2120 |
| 2222 set_replacement(this); | 2121 set_replacement(this); |
| 2223 if (surviving == choice_count) { | 2122 if (surviving == choice_count) { |
| 2224 return this; | 2123 return this; |
| 2225 } | 2124 } |
| 2226 // Only some of the nodes survived the filtering. We need to rebuild the | 2125 // Only some of the nodes survived the filtering. We need to rebuild the |
| 2227 // alternatives list. | 2126 // alternatives list. |
| 2228 ZoneGrowableArray<GuardedAlternative>* new_alternatives = | 2127 ZoneGrowableArray<GuardedAlternative>* new_alternatives = |
| 2229 new(Z) ZoneGrowableArray<GuardedAlternative>(surviving); | 2128 new (Z) ZoneGrowableArray<GuardedAlternative>(surviving); |
| 2230 for (intptr_t i = 0; i < choice_count; i++) { | 2129 for (intptr_t i = 0; i < choice_count; i++) { |
| 2231 RegExpNode* replacement = | 2130 RegExpNode* replacement = |
| 2232 (*alternatives_)[i].node()->FilterOneByte(depth - 1, ignore_case); | 2131 (*alternatives_)[i].node()->FilterOneByte(depth - 1, ignore_case); |
| 2233 if (replacement != NULL) { | 2132 if (replacement != NULL) { |
| 2234 (*alternatives_)[i].set_node(replacement); | 2133 (*alternatives_)[i].set_node(replacement); |
| 2235 new_alternatives->Add((*alternatives_)[i]); | 2134 new_alternatives->Add((*alternatives_)[i]); |
| 2236 } | 2135 } |
| 2237 } | 2136 } |
| 2238 alternatives_ = new_alternatives; | 2137 alternatives_ = new_alternatives; |
| 2239 return this; | 2138 return this; |
| (...skipping 22 matching lines...) Expand all Loading... |
| 2262 return set_replacement(this); | 2161 return set_replacement(this); |
| 2263 } | 2162 } |
| 2264 | 2163 |
| 2265 | 2164 |
| 2266 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, | 2165 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, |
| 2267 RegExpCompiler* compiler, | 2166 RegExpCompiler* compiler, |
| 2268 intptr_t characters_filled_in, | 2167 intptr_t characters_filled_in, |
| 2269 bool not_at_start) { | 2168 bool not_at_start) { |
| 2270 if (body_can_be_zero_length_ || info()->visited) return; | 2169 if (body_can_be_zero_length_ || info()->visited) return; |
| 2271 VisitMarker marker(info()); | 2170 VisitMarker marker(info()); |
| 2272 return ChoiceNode::GetQuickCheckDetails(details, | 2171 return ChoiceNode::GetQuickCheckDetails(details, compiler, |
| 2273 compiler, | 2172 characters_filled_in, not_at_start); |
| 2274 characters_filled_in, | |
| 2275 not_at_start); | |
| 2276 } | 2173 } |
| 2277 | 2174 |
| 2278 | 2175 |
| 2279 void LoopChoiceNode::FillInBMInfo(intptr_t offset, | 2176 void LoopChoiceNode::FillInBMInfo(intptr_t offset, |
| 2280 intptr_t budget, | 2177 intptr_t budget, |
| 2281 BoyerMooreLookahead* bm, | 2178 BoyerMooreLookahead* bm, |
| 2282 bool not_at_start) { | 2179 bool not_at_start) { |
| 2283 if (body_can_be_zero_length_ || budget <= 0) { | 2180 if (body_can_be_zero_length_ || budget <= 0) { |
| 2284 bm->SetRest(offset); | 2181 bm->SetRest(offset); |
| 2285 SaveBMInfo(bm, not_at_start, offset); | 2182 SaveBMInfo(bm, not_at_start, offset); |
| 2286 return; | 2183 return; |
| 2287 } | 2184 } |
| 2288 ChoiceNode::FillInBMInfo(offset, budget - 1, bm, not_at_start); | 2185 ChoiceNode::FillInBMInfo(offset, budget - 1, bm, not_at_start); |
| 2289 SaveBMInfo(bm, not_at_start, offset); | 2186 SaveBMInfo(bm, not_at_start, offset); |
| 2290 } | 2187 } |
| 2291 | 2188 |
| 2292 | 2189 |
| 2293 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, | 2190 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, |
| 2294 RegExpCompiler* compiler, | 2191 RegExpCompiler* compiler, |
| 2295 intptr_t characters_filled_in, | 2192 intptr_t characters_filled_in, |
| 2296 bool not_at_start) { | 2193 bool not_at_start) { |
| 2297 not_at_start = (not_at_start || not_at_start_); | 2194 not_at_start = (not_at_start || not_at_start_); |
| 2298 intptr_t choice_count = alternatives_->length(); | 2195 intptr_t choice_count = alternatives_->length(); |
| 2299 ASSERT(choice_count > 0); | 2196 ASSERT(choice_count > 0); |
| 2300 (*alternatives_)[0].node()->GetQuickCheckDetails(details, | 2197 (*alternatives_)[0].node()->GetQuickCheckDetails( |
| 2301 compiler, | 2198 details, compiler, characters_filled_in, not_at_start); |
| 2302 characters_filled_in, | |
| 2303 not_at_start); | |
| 2304 for (intptr_t i = 1; i < choice_count; i++) { | 2199 for (intptr_t i = 1; i < choice_count; i++) { |
| 2305 QuickCheckDetails new_details(details->characters()); | 2200 QuickCheckDetails new_details(details->characters()); |
| 2306 RegExpNode* node = (*alternatives_)[i].node(); | 2201 RegExpNode* node = (*alternatives_)[i].node(); |
| 2307 node->GetQuickCheckDetails(&new_details, compiler, | 2202 node->GetQuickCheckDetails(&new_details, compiler, characters_filled_in, |
| 2308 characters_filled_in, | |
| 2309 not_at_start); | 2203 not_at_start); |
| 2310 // Here we merge the quick match details of the two branches. | 2204 // Here we merge the quick match details of the two branches. |
| 2311 details->Merge(&new_details, characters_filled_in); | 2205 details->Merge(&new_details, characters_filled_in); |
| 2312 } | 2206 } |
| 2313 } | 2207 } |
| 2314 | 2208 |
| 2315 | 2209 |
| 2316 // Check for [0-9A-Z_a-z]. | 2210 // Check for [0-9A-Z_a-z]. |
| 2317 static void EmitWordCheck(RegExpMacroAssembler* assembler, | 2211 static void EmitWordCheck(RegExpMacroAssembler* assembler, |
| 2318 BlockLabel* word, | 2212 BlockLabel* word, |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2350 new_trace.InvalidateCurrentCharacter(); | 2244 new_trace.InvalidateCurrentCharacter(); |
| 2351 | 2245 |
| 2352 BlockLabel ok; | 2246 BlockLabel ok; |
| 2353 if (new_trace.cp_offset() == 0) { | 2247 if (new_trace.cp_offset() == 0) { |
| 2354 // The start of input counts as a newline in this context, so skip to | 2248 // The start of input counts as a newline in this context, so skip to |
| 2355 // ok if we are at the start. | 2249 // ok if we are at the start. |
| 2356 assembler->CheckAtStart(&ok); | 2250 assembler->CheckAtStart(&ok); |
| 2357 } | 2251 } |
| 2358 // We already checked that we are not at the start of input so it must be | 2252 // We already checked that we are not at the start of input so it must be |
| 2359 // OK to load the previous character. | 2253 // OK to load the previous character. |
| 2360 assembler->LoadCurrentCharacter(new_trace.cp_offset() -1, | 2254 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, |
| 2361 new_trace.backtrack(), | 2255 new_trace.backtrack(), false); |
| 2362 false); | 2256 if (!assembler->CheckSpecialCharacterClass('n', new_trace.backtrack())) { |
| 2363 if (!assembler->CheckSpecialCharacterClass('n', | |
| 2364 new_trace.backtrack())) { | |
| 2365 // Newline means \n, \r, 0x2028 or 0x2029. | 2257 // Newline means \n, \r, 0x2028 or 0x2029. |
| 2366 if (!compiler->one_byte()) { | 2258 if (!compiler->one_byte()) { |
| 2367 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok); | 2259 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok); |
| 2368 } | 2260 } |
| 2369 assembler->CheckCharacter('\n', &ok); | 2261 assembler->CheckCharacter('\n', &ok); |
| 2370 assembler->CheckNotCharacter('\r', new_trace.backtrack()); | 2262 assembler->CheckNotCharacter('\r', new_trace.backtrack()); |
| 2371 } | 2263 } |
| 2372 assembler->BindBlock(&ok); | 2264 assembler->BindBlock(&ok); |
| 2373 on_success->Emit(compiler, &new_trace); | 2265 on_success->Emit(compiler, &new_trace); |
| 2374 } | 2266 } |
| 2375 | 2267 |
| 2376 | 2268 |
| 2377 // Emit the code to handle \b and \B (word-boundary or non-word-boundary). | 2269 // Emit the code to handle \b and \B (word-boundary or non-word-boundary). |
| 2378 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) { | 2270 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) { |
| 2379 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 2271 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| 2380 Trace::TriBool next_is_word_character = Trace::UNKNOWN; | 2272 Trace::TriBool next_is_word_character = Trace::UNKNOWN; |
| 2381 bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE); | 2273 bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE); |
| 2382 BoyerMooreLookahead* lookahead = bm_info(not_at_start); | 2274 BoyerMooreLookahead* lookahead = bm_info(not_at_start); |
| 2383 if (lookahead == NULL) { | 2275 if (lookahead == NULL) { |
| 2384 intptr_t eats_at_least = | 2276 intptr_t eats_at_least = |
| 2385 Utils::Minimum(kMaxLookaheadForBoyerMoore, | 2277 Utils::Minimum(kMaxLookaheadForBoyerMoore, |
| 2386 EatsAtLeast(kMaxLookaheadForBoyerMoore, | 2278 EatsAtLeast(kMaxLookaheadForBoyerMoore, kRecursionBudget, |
| 2387 kRecursionBudget, | |
| 2388 not_at_start)); | 2279 not_at_start)); |
| 2389 if (eats_at_least >= 1) { | 2280 if (eats_at_least >= 1) { |
| 2390 BoyerMooreLookahead* bm = | 2281 BoyerMooreLookahead* bm = |
| 2391 new(Z) BoyerMooreLookahead(eats_at_least, compiler, Z); | 2282 new (Z) BoyerMooreLookahead(eats_at_least, compiler, Z); |
| 2392 FillInBMInfo(0, kRecursionBudget, bm, not_at_start); | 2283 FillInBMInfo(0, kRecursionBudget, bm, not_at_start); |
| 2393 if (bm->at(0)->is_non_word()) | 2284 if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE_VALUE; |
| 2394 next_is_word_character = Trace::FALSE_VALUE; | |
| 2395 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE; | 2285 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE; |
| 2396 } | 2286 } |
| 2397 } else { | 2287 } else { |
| 2398 if (lookahead->at(0)->is_non_word()) | 2288 if (lookahead->at(0)->is_non_word()) |
| 2399 next_is_word_character = Trace::FALSE_VALUE; | 2289 next_is_word_character = Trace::FALSE_VALUE; |
| 2400 if (lookahead->at(0)->is_word()) | 2290 if (lookahead->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE; |
| 2401 next_is_word_character = Trace::TRUE_VALUE; | |
| 2402 } | 2291 } |
| 2403 bool at_boundary = (assertion_type_ == AssertionNode::AT_BOUNDARY); | 2292 bool at_boundary = (assertion_type_ == AssertionNode::AT_BOUNDARY); |
| 2404 if (next_is_word_character == Trace::UNKNOWN) { | 2293 if (next_is_word_character == Trace::UNKNOWN) { |
| 2405 BlockLabel before_non_word; | 2294 BlockLabel before_non_word; |
| 2406 BlockLabel before_word; | 2295 BlockLabel before_word; |
| 2407 if (trace->characters_preloaded() != 1) { | 2296 if (trace->characters_preloaded() != 1) { |
| 2408 assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word); | 2297 assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word); |
| 2409 } | 2298 } |
| 2410 // Fall through on non-word. | 2299 // Fall through on non-word. |
| 2411 EmitWordCheck(assembler, &before_word, &before_non_word, false); | 2300 EmitWordCheck(assembler, &before_word, &before_non_word, false); |
| (...skipping 23 matching lines...) Expand all Loading... |
| 2435 void AssertionNode::BacktrackIfPrevious( | 2324 void AssertionNode::BacktrackIfPrevious( |
| 2436 RegExpCompiler* compiler, | 2325 RegExpCompiler* compiler, |
| 2437 Trace* trace, | 2326 Trace* trace, |
| 2438 AssertionNode::IfPrevious backtrack_if_previous) { | 2327 AssertionNode::IfPrevious backtrack_if_previous) { |
| 2439 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 2328 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| 2440 Trace new_trace(*trace); | 2329 Trace new_trace(*trace); |
| 2441 new_trace.InvalidateCurrentCharacter(); | 2330 new_trace.InvalidateCurrentCharacter(); |
| 2442 | 2331 |
| 2443 BlockLabel fall_through, dummy; | 2332 BlockLabel fall_through, dummy; |
| 2444 | 2333 |
| 2445 BlockLabel* non_word = backtrack_if_previous == kIsNonWord ? | 2334 BlockLabel* non_word = backtrack_if_previous == kIsNonWord |
| 2446 new_trace.backtrack() : | 2335 ? new_trace.backtrack() |
| 2447 &fall_through; | 2336 : &fall_through; |
| 2448 BlockLabel* word = backtrack_if_previous == kIsNonWord ? | 2337 BlockLabel* word = backtrack_if_previous == kIsNonWord |
| 2449 &fall_through : | 2338 ? &fall_through |
| 2450 new_trace.backtrack(); | 2339 : new_trace.backtrack(); |
| 2451 | 2340 |
| 2452 if (new_trace.cp_offset() == 0) { | 2341 if (new_trace.cp_offset() == 0) { |
| 2453 // The start of input counts as a non-word character, so the question is | 2342 // The start of input counts as a non-word character, so the question is |
| 2454 // decided if we are at the start. | 2343 // decided if we are at the start. |
| 2455 assembler->CheckAtStart(non_word); | 2344 assembler->CheckAtStart(non_word); |
| 2456 } | 2345 } |
| 2457 // We already checked that we are not at the start of input so it must be | 2346 // We already checked that we are not at the start of input so it must be |
| 2458 // OK to load the previous character. | 2347 // OK to load the previous character. |
| 2459 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false); | 2348 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false); |
| 2460 EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord); | 2349 EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord); |
| 2461 | 2350 |
| 2462 assembler->BindBlock(&fall_through); | 2351 assembler->BindBlock(&fall_through); |
| 2463 on_success()->Emit(compiler, &new_trace); | 2352 on_success()->Emit(compiler, &new_trace); |
| 2464 } | 2353 } |
| 2465 | 2354 |
| 2466 | 2355 |
| 2467 void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details, | 2356 void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details, |
| 2468 RegExpCompiler* compiler, | 2357 RegExpCompiler* compiler, |
| 2469 intptr_t filled_in, | 2358 intptr_t filled_in, |
| 2470 bool not_at_start) { | 2359 bool not_at_start) { |
| 2471 if (assertion_type_ == AT_START && not_at_start) { | 2360 if (assertion_type_ == AT_START && not_at_start) { |
| 2472 details->set_cannot_match(); | 2361 details->set_cannot_match(); |
| 2473 return; | 2362 return; |
| 2474 } | 2363 } |
| 2475 return on_success()->GetQuickCheckDetails(details, | 2364 return on_success()->GetQuickCheckDetails(details, compiler, filled_in, |
| 2476 compiler, | |
| 2477 filled_in, | |
| 2478 not_at_start); | 2365 not_at_start); |
| 2479 } | 2366 } |
| 2480 | 2367 |
| 2481 | 2368 |
| 2482 void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 2369 void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
| 2483 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 2370 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| 2484 switch (assertion_type_) { | 2371 switch (assertion_type_) { |
| 2485 case AT_END: { | 2372 case AT_END: { |
| 2486 BlockLabel ok; | 2373 BlockLabel ok; |
| 2487 assembler->CheckPosition(trace->cp_offset(), &ok); | 2374 assembler->CheckPosition(trace->cp_offset(), &ok); |
| 2488 assembler->GoTo(trace->backtrack()); | 2375 assembler->GoTo(trace->backtrack()); |
| 2489 assembler->BindBlock(&ok); | 2376 assembler->BindBlock(&ok); |
| 2490 break; | 2377 break; |
| 2491 } | 2378 } |
| 2492 case AT_START: { | 2379 case AT_START: { |
| 2493 if (trace->at_start() == Trace::FALSE_VALUE) { | 2380 if (trace->at_start() == Trace::FALSE_VALUE) { |
| 2494 assembler->GoTo(trace->backtrack()); | 2381 assembler->GoTo(trace->backtrack()); |
| 2495 return; | 2382 return; |
| 2496 } | 2383 } |
| 2497 if (trace->at_start() == Trace::UNKNOWN) { | 2384 if (trace->at_start() == Trace::UNKNOWN) { |
| 2498 assembler->CheckNotAtStart(trace->backtrack()); | 2385 assembler->CheckNotAtStart(trace->backtrack()); |
| 2499 Trace at_start_trace = *trace; | 2386 Trace at_start_trace = *trace; |
| 2500 at_start_trace.set_at_start(true); | 2387 at_start_trace.set_at_start(true); |
| 2501 on_success()->Emit(compiler, &at_start_trace); | 2388 on_success()->Emit(compiler, &at_start_trace); |
| 2502 return; | 2389 return; |
| 2503 } | 2390 } |
| 2504 } | 2391 } break; |
| 2505 break; | |
| 2506 case AFTER_NEWLINE: | 2392 case AFTER_NEWLINE: |
| 2507 EmitHat(compiler, on_success(), trace); | 2393 EmitHat(compiler, on_success(), trace); |
| 2508 return; | 2394 return; |
| 2509 case AT_BOUNDARY: | 2395 case AT_BOUNDARY: |
| 2510 case AT_NON_BOUNDARY: { | 2396 case AT_NON_BOUNDARY: { |
| 2511 EmitBoundaryCheck(compiler, trace); | 2397 EmitBoundaryCheck(compiler, trace); |
| 2512 return; | 2398 return; |
| 2513 } | 2399 } |
| 2514 } | 2400 } |
| 2515 on_success()->Emit(compiler, trace); | 2401 on_success()->Emit(compiler, trace); |
| (...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2593 case SIMPLE_CHARACTER_MATCH: | 2479 case SIMPLE_CHARACTER_MATCH: |
| 2594 emit_function = &EmitSimpleCharacter; | 2480 emit_function = &EmitSimpleCharacter; |
| 2595 break; | 2481 break; |
| 2596 case CASE_CHARACTER_MATCH: | 2482 case CASE_CHARACTER_MATCH: |
| 2597 emit_function = &EmitAtomLetter; | 2483 emit_function = &EmitAtomLetter; |
| 2598 break; | 2484 break; |
| 2599 default: | 2485 default: |
| 2600 break; | 2486 break; |
| 2601 } | 2487 } |
| 2602 if (emit_function != NULL) { | 2488 if (emit_function != NULL) { |
| 2603 bool bound_checked = emit_function(Z, | 2489 bool bound_checked = emit_function( |
| 2604 compiler, | 2490 Z, compiler, quarks->At(j), backtrack, cp_offset + j, |
| 2605 quarks->At(j), | 2491 *checked_up_to < cp_offset + j, preloaded); |
| 2606 backtrack, | |
| 2607 cp_offset + j, | |
| 2608 *checked_up_to < cp_offset + j, | |
| 2609 preloaded); | |
| 2610 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); | 2492 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); |
| 2611 } | 2493 } |
| 2612 } | 2494 } |
| 2613 } else { | 2495 } else { |
| 2614 ASSERT(elm.text_type() == TextElement::CHAR_CLASS); | 2496 ASSERT(elm.text_type() == TextElement::CHAR_CLASS); |
| 2615 if (pass == CHARACTER_CLASS_MATCH) { | 2497 if (pass == CHARACTER_CLASS_MATCH) { |
| 2616 if (first_element_checked && i == 0) continue; | 2498 if (first_element_checked && i == 0) continue; |
| 2617 if (DeterminedAlready(quick_check, elm.cp_offset())) continue; | 2499 if (DeterminedAlready(quick_check, elm.cp_offset())) continue; |
| 2618 RegExpCharacterClass* cc = elm.char_class(); | 2500 RegExpCharacterClass* cc = elm.char_class(); |
| 2619 EmitCharClass(assembler, | 2501 EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset, |
| 2620 cc, | 2502 *checked_up_to < cp_offset, preloaded, Z); |
| 2621 one_byte, | |
| 2622 backtrack, | |
| 2623 cp_offset, | |
| 2624 *checked_up_to < cp_offset, | |
| 2625 preloaded, | |
| 2626 Z); | |
| 2627 UpdateBoundsCheck(cp_offset, checked_up_to); | 2503 UpdateBoundsCheck(cp_offset, checked_up_to); |
| 2628 } | 2504 } |
| 2629 } | 2505 } |
| 2630 } | 2506 } |
| 2631 } | 2507 } |
| 2632 | 2508 |
| 2633 | 2509 |
| 2634 intptr_t TextNode::Length() { | 2510 intptr_t TextNode::Length() { |
| 2635 TextElement elm = elms_->Last(); | 2511 TextElement elm = elms_->Last(); |
| 2636 ASSERT(elm.cp_offset() >= 0); | 2512 ASSERT(elm.cp_offset() >= 0); |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2671 | 2547 |
| 2672 bool first_elt_done = false; | 2548 bool first_elt_done = false; |
| 2673 intptr_t bound_checked_to = trace->cp_offset() - 1; | 2549 intptr_t bound_checked_to = trace->cp_offset() - 1; |
| 2674 bound_checked_to += trace->bound_checked_up_to(); | 2550 bound_checked_to += trace->bound_checked_up_to(); |
| 2675 | 2551 |
| 2676 // If a character is preloaded into the current character register then | 2552 // If a character is preloaded into the current character register then |
| 2677 // check that now. | 2553 // check that now. |
| 2678 if (trace->characters_preloaded() == 1) { | 2554 if (trace->characters_preloaded() == 1) { |
| 2679 for (intptr_t pass = kFirstRealPass; pass <= kLastPass; pass++) { | 2555 for (intptr_t pass = kFirstRealPass; pass <= kLastPass; pass++) { |
| 2680 if (!SkipPass(pass, compiler->ignore_case())) { | 2556 if (!SkipPass(pass, compiler->ignore_case())) { |
| 2681 TextEmitPass(compiler, | 2557 TextEmitPass(compiler, static_cast<TextEmitPassType>(pass), true, trace, |
| 2682 static_cast<TextEmitPassType>(pass), | 2558 false, &bound_checked_to); |
| 2683 true, | |
| 2684 trace, | |
| 2685 false, | |
| 2686 &bound_checked_to); | |
| 2687 } | 2559 } |
| 2688 } | 2560 } |
| 2689 first_elt_done = true; | 2561 first_elt_done = true; |
| 2690 } | 2562 } |
| 2691 | 2563 |
| 2692 for (intptr_t pass = kFirstRealPass; pass <= kLastPass; pass++) { | 2564 for (intptr_t pass = kFirstRealPass; pass <= kLastPass; pass++) { |
| 2693 if (!SkipPass(pass, compiler->ignore_case())) { | 2565 if (!SkipPass(pass, compiler->ignore_case())) { |
| 2694 TextEmitPass(compiler, | 2566 TextEmitPass(compiler, static_cast<TextEmitPassType>(pass), false, trace, |
| 2695 static_cast<TextEmitPassType>(pass), | 2567 first_elt_done, &bound_checked_to); |
| 2696 false, | |
| 2697 trace, | |
| 2698 first_elt_done, | |
| 2699 &bound_checked_to); | |
| 2700 } | 2568 } |
| 2701 } | 2569 } |
| 2702 | 2570 |
| 2703 Trace successor_trace(*trace); | 2571 Trace successor_trace(*trace); |
| 2704 successor_trace.set_at_start(false); | 2572 successor_trace.set_at_start(false); |
| 2705 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler); | 2573 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler); |
| 2706 RecursionCheck rc(compiler); | 2574 RecursionCheck rc(compiler); |
| 2707 on_success()->Emit(compiler, &successor_trace); | 2575 on_success()->Emit(compiler, &successor_trace); |
| 2708 } | 2576 } |
| 2709 | 2577 |
| (...skipping 12 matching lines...) Expand all Loading... |
| 2722 characters_preloaded_ = 0; | 2590 characters_preloaded_ = 0; |
| 2723 // Adjust the offsets of the quick check performed information. This | 2591 // Adjust the offsets of the quick check performed information. This |
| 2724 // information is used to find out what we already determined about the | 2592 // information is used to find out what we already determined about the |
| 2725 // characters by means of mask and compare. | 2593 // characters by means of mask and compare. |
| 2726 quick_check_performed_.Advance(by, compiler->one_byte()); | 2594 quick_check_performed_.Advance(by, compiler->one_byte()); |
| 2727 cp_offset_ += by; | 2595 cp_offset_ += by; |
| 2728 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) { | 2596 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) { |
| 2729 compiler->SetRegExpTooBig(); | 2597 compiler->SetRegExpTooBig(); |
| 2730 cp_offset_ = 0; | 2598 cp_offset_ = 0; |
| 2731 } | 2599 } |
| 2732 bound_checked_up_to_ = Utils::Maximum(static_cast<intptr_t>(0), | 2600 bound_checked_up_to_ = |
| 2733 bound_checked_up_to_ - by); | 2601 Utils::Maximum(static_cast<intptr_t>(0), bound_checked_up_to_ - by); |
| 2734 } | 2602 } |
| 2735 | 2603 |
| 2736 | 2604 |
| 2737 void TextNode::MakeCaseIndependent(bool is_one_byte) { | 2605 void TextNode::MakeCaseIndependent(bool is_one_byte) { |
| 2738 intptr_t element_count = elms_->length(); | 2606 intptr_t element_count = elms_->length(); |
| 2739 for (intptr_t i = 0; i < element_count; i++) { | 2607 for (intptr_t i = 0; i < element_count; i++) { |
| 2740 TextElement elm = elms_->At(i); | 2608 TextElement elm = elms_->At(i); |
| 2741 if (elm.text_type() == TextElement::CHAR_CLASS) { | 2609 if (elm.text_type() == TextElement::CHAR_CLASS) { |
| 2742 RegExpCharacterClass* cc = elm.char_class(); | 2610 RegExpCharacterClass* cc = elm.char_class(); |
| 2743 // None of the standard character classes is different in the case | 2611 // None of the standard character classes is different in the case |
| (...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2842 if (!trace->is_trivial()) { | 2710 if (!trace->is_trivial()) { |
| 2843 trace->Flush(compiler, this); | 2711 trace->Flush(compiler, this); |
| 2844 return; | 2712 return; |
| 2845 } | 2713 } |
| 2846 ChoiceNode::Emit(compiler, trace); | 2714 ChoiceNode::Emit(compiler, trace); |
| 2847 } | 2715 } |
| 2848 | 2716 |
| 2849 | 2717 |
| 2850 intptr_t ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler, | 2718 intptr_t ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler, |
| 2851 intptr_t eats_at_least) { | 2719 intptr_t eats_at_least) { |
| 2852 intptr_t preload_characters = Utils::Minimum(static_cast<intptr_t>(4), | 2720 intptr_t preload_characters = |
| 2853 eats_at_least); | 2721 Utils::Minimum(static_cast<intptr_t>(4), eats_at_least); |
| 2854 if (compiler->macro_assembler()->CanReadUnaligned()) { | 2722 if (compiler->macro_assembler()->CanReadUnaligned()) { |
| 2855 bool one_byte = compiler->one_byte(); | 2723 bool one_byte = compiler->one_byte(); |
| 2856 if (one_byte) { | 2724 if (one_byte) { |
| 2857 if (preload_characters > 4) preload_characters = 4; | 2725 if (preload_characters > 4) preload_characters = 4; |
| 2858 // We can't preload 3 characters because there is no machine instruction | 2726 // We can't preload 3 characters because there is no machine instruction |
| 2859 // to do that. We can't just load 4 because we could be reading | 2727 // to do that. We can't just load 4 because we could be reading |
| 2860 // beyond the end of the string, which could cause a memory fault. | 2728 // beyond the end of the string, which could cause a memory fault. |
| 2861 if (preload_characters == 3) preload_characters = 2; | 2729 if (preload_characters == 3) preload_characters = 2; |
| 2862 } else { | 2730 } else { |
| 2863 if (preload_characters > 2) preload_characters = 2; | 2731 if (preload_characters > 2) preload_characters = 2; |
| 2864 } | 2732 } |
| 2865 } else { | 2733 } else { |
| 2866 if (preload_characters > 1) preload_characters = 1; | 2734 if (preload_characters > 1) preload_characters = 1; |
| 2867 } | 2735 } |
| 2868 return preload_characters; | 2736 return preload_characters; |
| 2869 } | 2737 } |
| 2870 | 2738 |
| 2871 | 2739 |
| 2872 // This structure is used when generating the alternatives in a choice node. It | 2740 // This structure is used when generating the alternatives in a choice node. It |
| 2873 // records the way the alternative is being code generated. | 2741 // records the way the alternative is being code generated. |
| 2874 struct AlternativeGeneration { | 2742 struct AlternativeGeneration { |
| 2875 AlternativeGeneration() | 2743 AlternativeGeneration() |
| 2876 : possible_success(), | 2744 : possible_success(), |
| 2877 expects_preload(false), | 2745 expects_preload(false), |
| 2878 after(), | 2746 after(), |
| 2879 quick_check_details() { } | 2747 quick_check_details() {} |
| 2880 BlockLabel possible_success; | 2748 BlockLabel possible_success; |
| 2881 bool expects_preload; | 2749 bool expects_preload; |
| 2882 BlockLabel after; | 2750 BlockLabel after; |
| 2883 QuickCheckDetails quick_check_details; | 2751 QuickCheckDetails quick_check_details; |
| 2884 }; | 2752 }; |
| 2885 | 2753 |
| 2886 | 2754 |
| 2887 // Creates a list of AlternativeGenerations. If the list has a reasonable | 2755 // Creates a list of AlternativeGenerations. If the list has a reasonable |
| 2888 // size then it is on the stack, otherwise the excess is on the heap. | 2756 // size then it is on the stack, otherwise the excess is on the heap. |
| 2889 class AlternativeGenerationList { | 2757 class AlternativeGenerationList { |
| 2890 public: | 2758 public: |
| 2891 explicit AlternativeGenerationList(intptr_t count) | 2759 explicit AlternativeGenerationList(intptr_t count) : alt_gens_(count) { |
| 2892 : alt_gens_(count) { | |
| 2893 for (intptr_t i = 0; i < count && i < kAFew; i++) { | 2760 for (intptr_t i = 0; i < count && i < kAFew; i++) { |
| 2894 alt_gens_.Add(a_few_alt_gens_ + i); | 2761 alt_gens_.Add(a_few_alt_gens_ + i); |
| 2895 } | 2762 } |
| 2896 for (intptr_t i = kAFew; i < count; i++) { | 2763 for (intptr_t i = kAFew; i < count; i++) { |
| 2897 alt_gens_.Add(new AlternativeGeneration()); | 2764 alt_gens_.Add(new AlternativeGeneration()); |
| 2898 } | 2765 } |
| 2899 } | 2766 } |
| 2900 ~AlternativeGenerationList() { | 2767 ~AlternativeGenerationList() { |
| 2901 for (intptr_t i = kAFew; i < alt_gens_.length(); i++) { | 2768 for (intptr_t i = kAFew; i < alt_gens_.length(); i++) { |
| 2902 delete alt_gens_[i]; | 2769 delete alt_gens_[i]; |
| 2903 alt_gens_[i] = NULL; | 2770 alt_gens_[i] = NULL; |
| 2904 } | 2771 } |
| 2905 } | 2772 } |
| 2906 | 2773 |
| 2907 AlternativeGeneration* at(intptr_t i) { | 2774 AlternativeGeneration* at(intptr_t i) { return alt_gens_[i]; } |
| 2908 return alt_gens_[i]; | |
| 2909 } | |
| 2910 | 2775 |
| 2911 private: | 2776 private: |
| 2912 static const intptr_t kAFew = 10; | 2777 static const intptr_t kAFew = 10; |
| 2913 GrowableArray<AlternativeGeneration*> alt_gens_; | 2778 GrowableArray<AlternativeGeneration*> alt_gens_; |
| 2914 AlternativeGeneration a_few_alt_gens_[kAFew]; | 2779 AlternativeGeneration a_few_alt_gens_[kAFew]; |
| 2915 | 2780 |
| 2916 DISALLOW_ALLOCATION(); | 2781 DISALLOW_ALLOCATION(); |
| 2917 }; | 2782 }; |
| 2918 | 2783 |
| 2919 | 2784 |
| 2920 // The '2' variant is inclusive from and exclusive to. | 2785 // The '2' variant is inclusive from and exclusive to. |
| 2921 // This covers \s as defined in ECMA-262 5.1, 15.10.2.12, | 2786 // This covers \s as defined in ECMA-262 5.1, 15.10.2.12, |
| 2922 // which include WhiteSpace (7.2) or LineTerminator (7.3) values. | 2787 // which include WhiteSpace (7.2) or LineTerminator (7.3) values. |
| 2923 static const intptr_t kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1, | 2788 static const intptr_t kSpaceRanges[] = { |
| 2924 0x00A0, 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B, | 2789 '\t', '\r' + 1, ' ', ' ' + 1, 0x00A0, 0x00A1, 0x1680, 0x1681, |
| 2925 0x2028, 0x202A, 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001, | 2790 0x180E, 0x180F, 0x2000, 0x200B, 0x2028, 0x202A, 0x202F, 0x2030, |
| 2926 0xFEFF, 0xFF00, 0x10000 }; | 2791 0x205F, 0x2060, 0x3000, 0x3001, 0xFEFF, 0xFF00, 0x10000}; |
| 2927 static const intptr_t kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges); | 2792 static const intptr_t kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges); |
| 2928 static const intptr_t kWordRanges[] = { | 2793 static const intptr_t kWordRanges[] = {'0', '9' + 1, 'A', 'Z' + 1, '_', |
| 2929 '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, 0x10000 }; | 2794 '_' + 1, 'a', 'z' + 1, 0x10000}; |
| 2930 static const intptr_t kWordRangeCount = ARRAY_SIZE(kWordRanges); | 2795 static const intptr_t kWordRangeCount = ARRAY_SIZE(kWordRanges); |
| 2931 static const intptr_t kDigitRanges[] = { '0', '9' + 1, 0x10000 }; | 2796 static const intptr_t kDigitRanges[] = {'0', '9' + 1, 0x10000}; |
| 2932 static const intptr_t kDigitRangeCount = ARRAY_SIZE(kDigitRanges); | 2797 static const intptr_t kDigitRangeCount = ARRAY_SIZE(kDigitRanges); |
| 2933 static const intptr_t kSurrogateRanges[] = { 0xd800, 0xe000, 0x10000 }; | 2798 static const intptr_t kSurrogateRanges[] = {0xd800, 0xe000, 0x10000}; |
| 2934 static const intptr_t kSurrogateRangeCount = ARRAY_SIZE(kSurrogateRanges); | 2799 static const intptr_t kSurrogateRangeCount = ARRAY_SIZE(kSurrogateRanges); |
| 2935 static const intptr_t kLineTerminatorRanges[] = { | 2800 static const intptr_t kLineTerminatorRanges[] = {0x000A, 0x000B, 0x000D, 0x000E, |
| 2936 0x000A, 0x000B, 0x000D, 0x000E, 0x2028, 0x202A, 0x10000 }; | 2801 0x2028, 0x202A, 0x10000}; |
| 2937 static const intptr_t kLineTerminatorRangeCount = | 2802 static const intptr_t kLineTerminatorRangeCount = |
| 2938 ARRAY_SIZE(kLineTerminatorRanges); | 2803 ARRAY_SIZE(kLineTerminatorRanges); |
| 2939 | 2804 |
| 2940 | 2805 |
| 2941 void BoyerMoorePositionInfo::Set(intptr_t character) { | 2806 void BoyerMoorePositionInfo::Set(intptr_t character) { |
| 2942 SetInterval(Interval(character, character)); | 2807 SetInterval(Interval(character, character)); |
| 2943 } | 2808 } |
| 2944 | 2809 |
| 2945 | 2810 |
| 2946 void BoyerMoorePositionInfo::SetInterval(const Interval& interval) { | 2811 void BoyerMoorePositionInfo::SetInterval(const Interval& interval) { |
| 2947 s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval); | 2812 s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval); |
| 2948 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval); | 2813 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval); |
| 2949 d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval); | 2814 d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval); |
| 2950 surrogate_ = | 2815 surrogate_ = |
| 2951 AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval); | 2816 AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval); |
| 2952 if (interval.to() - interval.from() >= kMapSize - 1) { | 2817 if (interval.to() - interval.from() >= kMapSize - 1) { |
| 2953 if (map_count_ != kMapSize) { | 2818 if (map_count_ != kMapSize) { |
| 2954 map_count_ = kMapSize; | 2819 map_count_ = kMapSize; |
| 2955 for (intptr_t i = 0; i < kMapSize; i++) (*map_)[i] = true; | 2820 for (intptr_t i = 0; i < kMapSize; i++) |
| 2821 (*map_)[i] = true; |
| 2956 } | 2822 } |
| 2957 return; | 2823 return; |
| 2958 } | 2824 } |
| 2959 for (intptr_t i = interval.from(); i <= interval.to(); i++) { | 2825 for (intptr_t i = interval.from(); i <= interval.to(); i++) { |
| 2960 intptr_t mod_character = (i & kMask); | 2826 intptr_t mod_character = (i & kMask); |
| 2961 if (!map_->At(mod_character)) { | 2827 if (!map_->At(mod_character)) { |
| 2962 map_count_++; | 2828 map_count_++; |
| 2963 (*map_)[mod_character] = true; | 2829 (*map_)[mod_character] = true; |
| 2964 } | 2830 } |
| 2965 if (map_count_ == kMapSize) return; | 2831 if (map_count_ == kMapSize) return; |
| 2966 } | 2832 } |
| 2967 } | 2833 } |
| 2968 | 2834 |
| 2969 | 2835 |
| 2970 void BoyerMoorePositionInfo::SetAll() { | 2836 void BoyerMoorePositionInfo::SetAll() { |
| 2971 s_ = w_ = d_ = kLatticeUnknown; | 2837 s_ = w_ = d_ = kLatticeUnknown; |
| 2972 if (map_count_ != kMapSize) { | 2838 if (map_count_ != kMapSize) { |
| 2973 map_count_ = kMapSize; | 2839 map_count_ = kMapSize; |
| 2974 for (intptr_t i = 0; i < kMapSize; i++) (*map_)[i] = true; | 2840 for (intptr_t i = 0; i < kMapSize; i++) |
| 2841 (*map_)[i] = true; |
| 2975 } | 2842 } |
| 2976 } | 2843 } |
| 2977 | 2844 |
| 2978 | 2845 |
| 2979 BoyerMooreLookahead::BoyerMooreLookahead( | 2846 BoyerMooreLookahead::BoyerMooreLookahead(intptr_t length, |
| 2980 intptr_t length, RegExpCompiler* compiler, Zone* zone) | 2847 RegExpCompiler* compiler, |
| 2981 : length_(length), | 2848 Zone* zone) |
| 2982 compiler_(compiler) { | 2849 : length_(length), compiler_(compiler) { |
| 2983 if (compiler->one_byte()) { | 2850 if (compiler->one_byte()) { |
| 2984 max_char_ = Symbols::kMaxOneCharCodeSymbol; | 2851 max_char_ = Symbols::kMaxOneCharCodeSymbol; |
| 2985 } else { | 2852 } else { |
| 2986 max_char_ = Utf16::kMaxCodeUnit; | 2853 max_char_ = Utf16::kMaxCodeUnit; |
| 2987 } | 2854 } |
| 2988 bitmaps_ = new(zone) ZoneGrowableArray<BoyerMoorePositionInfo*>(length); | 2855 bitmaps_ = new (zone) ZoneGrowableArray<BoyerMoorePositionInfo*>(length); |
| 2989 for (intptr_t i = 0; i < length; i++) { | 2856 for (intptr_t i = 0; i < length; i++) { |
| 2990 bitmaps_->Add(new(zone) BoyerMoorePositionInfo(zone)); | 2857 bitmaps_->Add(new (zone) BoyerMoorePositionInfo(zone)); |
| 2991 } | 2858 } |
| 2992 } | 2859 } |
| 2993 | 2860 |
| 2994 | 2861 |
| 2995 // Find the longest range of lookahead that has the fewest number of different | 2862 // Find the longest range of lookahead that has the fewest number of different |
| 2996 // characters that can occur at a given position. Since we are optimizing two | 2863 // characters that can occur at a given position. Since we are optimizing two |
| 2997 // different parameters at once this is a tradeoff. | 2864 // different parameters at once this is a tradeoff. |
| 2998 bool BoyerMooreLookahead::FindWorthwhileInterval(intptr_t* from, intptr_t* to) { | 2865 bool BoyerMooreLookahead::FindWorthwhileInterval(intptr_t* from, intptr_t* to) { |
| 2999 intptr_t biggest_points = 0; | 2866 intptr_t biggest_points = 0; |
| 3000 // If more than 32 characters out of 128 can occur it is unlikely that we can | 2867 // If more than 32 characters out of 128 can occur it is unlikely that we can |
| 3001 // be lucky enough to step forwards much of the time. | 2868 // be lucky enough to step forwards much of the time. |
| 3002 const intptr_t kMaxMax = 32; | 2869 const intptr_t kMaxMax = 32; |
| 3003 for (intptr_t max_number_of_chars = 4; | 2870 for (intptr_t max_number_of_chars = 4; max_number_of_chars < kMaxMax; |
| 3004 max_number_of_chars < kMaxMax; | |
| 3005 max_number_of_chars *= 2) { | 2871 max_number_of_chars *= 2) { |
| 3006 biggest_points = | 2872 biggest_points = |
| 3007 FindBestInterval(max_number_of_chars, biggest_points, from, to); | 2873 FindBestInterval(max_number_of_chars, biggest_points, from, to); |
| 3008 } | 2874 } |
| 3009 if (biggest_points == 0) return false; | 2875 if (biggest_points == 0) return false; |
| 3010 return true; | 2876 return true; |
| 3011 } | 2877 } |
| 3012 | 2878 |
| 3013 | 2879 |
| 3014 // Find the highest-points range between 0 and length_ where the character | 2880 // Find the highest-points range between 0 and length_ where the character |
| 3015 // information is not too vague. 'Too vague' means that there are more than | 2881 // information is not too vague. 'Too vague' means that there are more than |
| 3016 // max_number_of_chars that can occur at this position. Calculates the number | 2882 // max_number_of_chars that can occur at this position. Calculates the number |
| 3017 // of points as the product of width-of-the-range and | 2883 // of points as the product of width-of-the-range and |
| 3018 // probability-of-finding-one-of-the-characters, where the probability is | 2884 // probability-of-finding-one-of-the-characters, where the probability is |
| 3019 // calculated using the frequency distribution of the sample subject string. | 2885 // calculated using the frequency distribution of the sample subject string. |
| 3020 intptr_t BoyerMooreLookahead::FindBestInterval( | 2886 intptr_t BoyerMooreLookahead::FindBestInterval(intptr_t max_number_of_chars, |
| 3021 intptr_t max_number_of_chars, | 2887 intptr_t old_biggest_points, |
| 3022 intptr_t old_biggest_points, | 2888 intptr_t* from, |
| 3023 intptr_t* from, | 2889 intptr_t* to) { |
| 3024 intptr_t* to) { | |
| 3025 intptr_t biggest_points = old_biggest_points; | 2890 intptr_t biggest_points = old_biggest_points; |
| 3026 static const intptr_t kSize = RegExpMacroAssembler::kTableSize; | 2891 static const intptr_t kSize = RegExpMacroAssembler::kTableSize; |
| 3027 for (intptr_t i = 0; i < length_; ) { | 2892 for (intptr_t i = 0; i < length_;) { |
| 3028 while (i < length_ && Count(i) > max_number_of_chars) i++; | 2893 while (i < length_ && Count(i) > max_number_of_chars) |
| 2894 i++; |
| 3029 if (i == length_) break; | 2895 if (i == length_) break; |
| 3030 intptr_t remembered_from = i; | 2896 intptr_t remembered_from = i; |
| 3031 bool union_map[kSize]; | 2897 bool union_map[kSize]; |
| 3032 for (intptr_t j = 0; j < kSize; j++) union_map[j] = false; | 2898 for (intptr_t j = 0; j < kSize; j++) |
| 2899 union_map[j] = false; |
| 3033 while (i < length_ && Count(i) <= max_number_of_chars) { | 2900 while (i < length_ && Count(i) <= max_number_of_chars) { |
| 3034 BoyerMoorePositionInfo* map = bitmaps_->At(i); | 2901 BoyerMoorePositionInfo* map = bitmaps_->At(i); |
| 3035 for (intptr_t j = 0; j < kSize; j++) union_map[j] |= map->at(j); | 2902 for (intptr_t j = 0; j < kSize; j++) |
| 2903 union_map[j] |= map->at(j); |
| 3036 i++; | 2904 i++; |
| 3037 } | 2905 } |
| 3038 intptr_t frequency = 0; | 2906 intptr_t frequency = 0; |
| 3039 for (intptr_t j = 0; j < kSize; j++) { | 2907 for (intptr_t j = 0; j < kSize; j++) { |
| 3040 if (union_map[j]) { | 2908 if (union_map[j]) { |
| 3041 // Add 1 to the frequency to give a small per-character boost for | 2909 // Add 1 to the frequency to give a small per-character boost for |
| 3042 // the cases where our sampling is not good enough and many | 2910 // the cases where our sampling is not good enough and many |
| 3043 // characters have a frequency of zero. This means the frequency | 2911 // characters have a frequency of zero. This means the frequency |
| 3044 // can theoretically be up to 2*kSize though we treat it mostly as | 2912 // can theoretically be up to 2*kSize though we treat it mostly as |
| 3045 // a fraction of kSize. | 2913 // a fraction of kSize. |
| 3046 frequency += compiler_->frequency_collator()->Frequency(j) + 1; | 2914 frequency += compiler_->frequency_collator()->Frequency(j) + 1; |
| 3047 } | 2915 } |
| 3048 } | 2916 } |
| 3049 // We use the probability of skipping times the distance we are skipping to | 2917 // We use the probability of skipping times the distance we are skipping to |
| 3050 // judge the effectiveness of this. Actually we have a cut-off: By | 2918 // judge the effectiveness of this. Actually we have a cut-off: By |
| 3051 // dividing by 2 we switch off the skipping if the probability of skipping | 2919 // dividing by 2 we switch off the skipping if the probability of skipping |
| 3052 // is less than 50%. This is because the multibyte mask-and-compare | 2920 // is less than 50%. This is because the multibyte mask-and-compare |
| 3053 // skipping in quickcheck is more likely to do well on this case. | 2921 // skipping in quickcheck is more likely to do well on this case. |
| 3054 bool in_quickcheck_range = ((i - remembered_from < 4) || | 2922 bool in_quickcheck_range = |
| 3055 (compiler_->one_byte() ? remembered_from <= 4 : remembered_from <= 2)); | 2923 ((i - remembered_from < 4) || |
| 2924 (compiler_->one_byte() ? remembered_from <= 4 : remembered_from <= 2)); |
| 3056 // Called 'probability' but it is only a rough estimate and can actually | 2925 // Called 'probability' but it is only a rough estimate and can actually |
| 3057 // be outside the 0-kSize range. | 2926 // be outside the 0-kSize range. |
| 3058 intptr_t probability = | 2927 intptr_t probability = |
| 3059 (in_quickcheck_range ? kSize / 2 : kSize) - frequency; | 2928 (in_quickcheck_range ? kSize / 2 : kSize) - frequency; |
| 3060 intptr_t points = (i - remembered_from) * probability; | 2929 intptr_t points = (i - remembered_from) * probability; |
| 3061 if (points > biggest_points) { | 2930 if (points > biggest_points) { |
| 3062 *from = remembered_from; | 2931 *from = remembered_from; |
| 3063 *to = i - 1; | 2932 *to = i - 1; |
| 3064 biggest_points = points; | 2933 biggest_points = points; |
| 3065 } | 2934 } |
| (...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3133 // The mask-compare can probably handle this better. | 3002 // The mask-compare can probably handle this better. |
| 3134 return; | 3003 return; |
| 3135 } | 3004 } |
| 3136 | 3005 |
| 3137 if (found_single_character) { | 3006 if (found_single_character) { |
| 3138 BlockLabel cont, again; | 3007 BlockLabel cont, again; |
| 3139 masm->BindBlock(&again); | 3008 masm->BindBlock(&again); |
| 3140 masm->LoadCurrentCharacter(max_lookahead, &cont, true); | 3009 masm->LoadCurrentCharacter(max_lookahead, &cont, true); |
| 3141 if (max_char_ > kSize) { | 3010 if (max_char_ > kSize) { |
| 3142 masm->CheckCharacterAfterAnd(single_character, | 3011 masm->CheckCharacterAfterAnd(single_character, |
| 3143 RegExpMacroAssembler::kTableMask, | 3012 RegExpMacroAssembler::kTableMask, &cont); |
| 3144 &cont); | |
| 3145 } else { | 3013 } else { |
| 3146 masm->CheckCharacter(single_character, &cont); | 3014 masm->CheckCharacter(single_character, &cont); |
| 3147 } | 3015 } |
| 3148 masm->AdvanceCurrentPosition(lookahead_width); | 3016 masm->AdvanceCurrentPosition(lookahead_width); |
| 3149 masm->GoTo(&again); | 3017 masm->GoTo(&again); |
| 3150 masm->BindBlock(&cont); | 3018 masm->BindBlock(&cont); |
| 3151 return; | 3019 return; |
| 3152 } | 3020 } |
| 3153 | 3021 |
| 3154 const TypedData& boolean_skip_table = TypedData::ZoneHandle( | 3022 const TypedData& boolean_skip_table = TypedData::ZoneHandle( |
| 3155 compiler_->zone(), | 3023 compiler_->zone(), |
| 3156 TypedData::New(kTypedDataUint8ArrayCid, kSize, Heap::kOld)); | 3024 TypedData::New(kTypedDataUint8ArrayCid, kSize, Heap::kOld)); |
| 3157 intptr_t skip_distance = GetSkipTable( | 3025 intptr_t skip_distance = |
| 3158 min_lookahead, max_lookahead, boolean_skip_table); | 3026 GetSkipTable(min_lookahead, max_lookahead, boolean_skip_table); |
| 3159 ASSERT(skip_distance != 0); | 3027 ASSERT(skip_distance != 0); |
| 3160 | 3028 |
| 3161 BlockLabel cont, again; | 3029 BlockLabel cont, again; |
| 3162 | 3030 |
| 3163 masm->BindBlock(&again); | 3031 masm->BindBlock(&again); |
| 3164 masm->LoadCurrentCharacter(max_lookahead, &cont, true); | 3032 masm->LoadCurrentCharacter(max_lookahead, &cont, true); |
| 3165 masm->CheckBitInTable(boolean_skip_table, &cont); | 3033 masm->CheckBitInTable(boolean_skip_table, &cont); |
| 3166 masm->AdvanceCurrentPosition(skip_distance); | 3034 masm->AdvanceCurrentPosition(skip_distance); |
| 3167 masm->GoTo(&again); | 3035 masm->GoTo(&again); |
| 3168 masm->BindBlock(&cont); | 3036 masm->BindBlock(&cont); |
| (...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3262 ASSERT(!trace->mentions_reg(guards->At(j)->reg())); | 3130 ASSERT(!trace->mentions_reg(guards->At(j)->reg())); |
| 3263 } | 3131 } |
| 3264 } | 3132 } |
| 3265 #endif | 3133 #endif |
| 3266 } | 3134 } |
| 3267 | 3135 |
| 3268 | 3136 |
| 3269 void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler, | 3137 void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler, |
| 3270 Trace* current_trace, | 3138 Trace* current_trace, |
| 3271 PreloadState* state) { | 3139 PreloadState* state) { |
| 3272 if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) { | 3140 if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) { |
| 3273 // Save some time by looking at most one machine word ahead. | 3141 // Save some time by looking at most one machine word ahead. |
| 3274 state->eats_at_least_ = | 3142 state->eats_at_least_ = |
| 3275 EatsAtLeast(compiler->one_byte() ? 4 : 2, kRecursionBudget, | 3143 EatsAtLeast(compiler->one_byte() ? 4 : 2, kRecursionBudget, |
| 3276 current_trace->at_start() == Trace::FALSE_VALUE); | 3144 current_trace->at_start() == Trace::FALSE_VALUE); |
| 3277 } | 3145 } |
| 3278 state->preload_characters_ = | 3146 state->preload_characters_ = |
| 3279 CalculatePreloadCharacters(compiler, state->eats_at_least_); | 3147 CalculatePreloadCharacters(compiler, state->eats_at_least_); |
| 3280 | 3148 |
| 3281 state->preload_is_current_ = | 3149 state->preload_is_current_ = |
| 3282 (current_trace->characters_preloaded() == state->preload_characters_); | 3150 (current_trace->characters_preloaded() == state->preload_characters_); |
| 3283 state->preload_has_checked_bounds_ = state->preload_is_current_; | 3151 state->preload_has_checked_bounds_ = state->preload_is_current_; |
| 3284 } | 3152 } |
| 3285 | 3153 |
| 3286 | 3154 |
| 3287 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 3155 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
| 3288 intptr_t choice_count = alternatives_->length(); | 3156 intptr_t choice_count = alternatives_->length(); |
| 3289 | 3157 |
| 3290 AssertGuardsMentionRegisters(trace); | 3158 AssertGuardsMentionRegisters(trace); |
| 3291 | 3159 |
| 3292 LimitResult limit_result = LimitVersions(compiler, trace); | 3160 LimitResult limit_result = LimitVersions(compiler, trace); |
| 3293 if (limit_result == DONE) return; | 3161 if (limit_result == DONE) return; |
| (...skipping 10 matching lines...) Expand all Loading... |
| 3304 | 3172 |
| 3305 PreloadState preload; | 3173 PreloadState preload; |
| 3306 preload.init(); | 3174 preload.init(); |
| 3307 GreedyLoopState greedy_loop_state(not_at_start()); | 3175 GreedyLoopState greedy_loop_state(not_at_start()); |
| 3308 | 3176 |
| 3309 intptr_t text_length = | 3177 intptr_t text_length = |
| 3310 GreedyLoopTextLengthForAlternative(&((*alternatives_)[0])); | 3178 GreedyLoopTextLengthForAlternative(&((*alternatives_)[0])); |
| 3311 AlternativeGenerationList alt_gens(choice_count); | 3179 AlternativeGenerationList alt_gens(choice_count); |
| 3312 | 3180 |
| 3313 if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) { | 3181 if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) { |
| 3314 trace = EmitGreedyLoop(compiler, | 3182 trace = EmitGreedyLoop(compiler, trace, &alt_gens, &preload, |
| 3315 trace, | 3183 &greedy_loop_state, text_length); |
| 3316 &alt_gens, | |
| 3317 &preload, | |
| 3318 &greedy_loop_state, | |
| 3319 text_length); | |
| 3320 } else { | 3184 } else { |
| 3321 // TODO(erikcorry): Delete this. We don't need this label, but it makes us | 3185 // TODO(erikcorry): Delete this. We don't need this label, but it makes us |
| 3322 // match the traces produced pre-cleanup. | 3186 // match the traces produced pre-cleanup. |
| 3323 BlockLabel second_choice; | 3187 BlockLabel second_choice; |
| 3324 compiler->macro_assembler()->BindBlock(&second_choice); | 3188 compiler->macro_assembler()->BindBlock(&second_choice); |
| 3325 | 3189 |
| 3326 preload.eats_at_least_ = EmitOptimizedUnanchoredSearch(compiler, trace); | 3190 preload.eats_at_least_ = EmitOptimizedUnanchoredSearch(compiler, trace); |
| 3327 | 3191 |
| 3328 EmitChoices(compiler, | 3192 EmitChoices(compiler, &alt_gens, 0, trace, &preload); |
| 3329 &alt_gens, | |
| 3330 0, | |
| 3331 trace, | |
| 3332 &preload); | |
| 3333 } | 3193 } |
| 3334 | 3194 |
| 3335 // At this point we need to generate slow checks for the alternatives where | 3195 // At this point we need to generate slow checks for the alternatives where |
| 3336 // the quick check was inlined. We can recognize these because the associated | 3196 // the quick check was inlined. We can recognize these because the associated |
| 3337 // label was bound. | 3197 // label was bound. |
| 3338 intptr_t new_flush_budget = trace->flush_budget() / choice_count; | 3198 intptr_t new_flush_budget = trace->flush_budget() / choice_count; |
| 3339 for (intptr_t i = 0; i < choice_count; i++) { | 3199 for (intptr_t i = 0; i < choice_count; i++) { |
| 3340 AlternativeGeneration* alt_gen = alt_gens.at(i); | 3200 AlternativeGeneration* alt_gen = alt_gens.at(i); |
| 3341 Trace new_trace(*trace); | 3201 Trace new_trace(*trace); |
| 3342 // If there are actions to be flushed we have to limit how many times | 3202 // If there are actions to be flushed we have to limit how many times |
| 3343 // they are flushed. Take the budget of the parent trace and distribute | 3203 // they are flushed. Take the budget of the parent trace and distribute |
| 3344 // it fairly amongst the children. | 3204 // it fairly amongst the children. |
| 3345 if (new_trace.actions() != NULL) { | 3205 if (new_trace.actions() != NULL) { |
| 3346 new_trace.set_flush_budget(new_flush_budget); | 3206 new_trace.set_flush_budget(new_flush_budget); |
| 3347 } | 3207 } |
| 3348 bool next_expects_preload = | 3208 bool next_expects_preload = |
| 3349 i == choice_count - 1 ? false : alt_gens.at(i + 1)->expects_preload; | 3209 i == choice_count - 1 ? false : alt_gens.at(i + 1)->expects_preload; |
| 3350 EmitOutOfLineContinuation(compiler, | 3210 EmitOutOfLineContinuation(compiler, &new_trace, alternatives_->At(i), |
| 3351 &new_trace, | 3211 alt_gen, preload.preload_characters_, |
| 3352 alternatives_->At(i), | |
| 3353 alt_gen, | |
| 3354 preload.preload_characters_, | |
| 3355 next_expects_preload); | 3212 next_expects_preload); |
| 3356 } | 3213 } |
| 3357 } | 3214 } |
| 3358 | 3215 |
| 3359 Trace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler, | 3216 Trace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler, |
| 3360 Trace* trace, | 3217 Trace* trace, |
| 3361 AlternativeGenerationList* alt_gens, | 3218 AlternativeGenerationList* alt_gens, |
| 3362 PreloadState* preload, | 3219 PreloadState* preload, |
| 3363 GreedyLoopState* greedy_loop_state, | 3220 GreedyLoopState* greedy_loop_state, |
| 3364 intptr_t text_length) { | 3221 intptr_t text_length) { |
| (...skipping 16 matching lines...) Expand all Loading... |
| 3381 greedy_match_trace.set_stop_node(this); | 3238 greedy_match_trace.set_stop_node(this); |
| 3382 greedy_match_trace.set_loop_label(&loop_label); | 3239 greedy_match_trace.set_loop_label(&loop_label); |
| 3383 (*alternatives_)[0].node()->Emit(compiler, &greedy_match_trace); | 3240 (*alternatives_)[0].node()->Emit(compiler, &greedy_match_trace); |
| 3384 macro_assembler->BindBlock(&greedy_match_failed); | 3241 macro_assembler->BindBlock(&greedy_match_failed); |
| 3385 | 3242 |
| 3386 BlockLabel second_choice; // For use in greedy matches. | 3243 BlockLabel second_choice; // For use in greedy matches. |
| 3387 macro_assembler->BindBlock(&second_choice); | 3244 macro_assembler->BindBlock(&second_choice); |
| 3388 | 3245 |
| 3389 Trace* new_trace = greedy_loop_state->counter_backtrack_trace(); | 3246 Trace* new_trace = greedy_loop_state->counter_backtrack_trace(); |
| 3390 | 3247 |
| 3391 EmitChoices(compiler, | 3248 EmitChoices(compiler, alt_gens, 1, new_trace, preload); |
| 3392 alt_gens, | |
| 3393 1, | |
| 3394 new_trace, | |
| 3395 preload); | |
| 3396 | 3249 |
| 3397 macro_assembler->BindBlock(greedy_loop_state->label()); | 3250 macro_assembler->BindBlock(greedy_loop_state->label()); |
| 3398 // If we have unwound to the bottom then backtrack. | 3251 // If we have unwound to the bottom then backtrack. |
| 3399 macro_assembler->CheckGreedyLoop(trace->backtrack()); | 3252 macro_assembler->CheckGreedyLoop(trace->backtrack()); |
| 3400 // Otherwise try the second priority at an earlier position. | 3253 // Otherwise try the second priority at an earlier position. |
| 3401 macro_assembler->AdvanceCurrentPosition(-text_length); | 3254 macro_assembler->AdvanceCurrentPosition(-text_length); |
| 3402 macro_assembler->GoTo(&second_choice); | 3255 macro_assembler->GoTo(&second_choice); |
| 3403 return new_trace; | 3256 return new_trace; |
| 3404 } | 3257 } |
| 3405 | 3258 |
| (...skipping 23 matching lines...) Expand all Loading... |
| 3429 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 3282 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
| 3430 // At this point we know that we are at a non-greedy loop that will eat | 3283 // At this point we know that we are at a non-greedy loop that will eat |
| 3431 // any character one at a time. Any non-anchored regexp has such a | 3284 // any character one at a time. Any non-anchored regexp has such a |
| 3432 // loop prepended to it in order to find where it starts. We look for | 3285 // loop prepended to it in order to find where it starts. We look for |
| 3433 // a pattern of the form ...abc... where we can look 6 characters ahead | 3286 // a pattern of the form ...abc... where we can look 6 characters ahead |
| 3434 // and step forwards 3 if the character is not one of abc. Abc need | 3287 // and step forwards 3 if the character is not one of abc. Abc need |
| 3435 // not be atoms, they can be any reasonably limited character class or | 3288 // not be atoms, they can be any reasonably limited character class or |
| 3436 // small alternation. | 3289 // small alternation. |
| 3437 BoyerMooreLookahead* bm = bm_info(false); | 3290 BoyerMooreLookahead* bm = bm_info(false); |
| 3438 if (bm == NULL) { | 3291 if (bm == NULL) { |
| 3439 eats_at_least = Utils::Minimum(kMaxLookaheadForBoyerMoore, | 3292 eats_at_least = Utils::Minimum( |
| 3440 EatsAtLeast(kMaxLookaheadForBoyerMoore, | 3293 kMaxLookaheadForBoyerMoore, |
| 3441 kRecursionBudget, | 3294 EatsAtLeast(kMaxLookaheadForBoyerMoore, kRecursionBudget, false)); |
| 3442 false)); | |
| 3443 if (eats_at_least >= 1) { | 3295 if (eats_at_least >= 1) { |
| 3444 bm = new(Z) BoyerMooreLookahead(eats_at_least, compiler, Z); | 3296 bm = new (Z) BoyerMooreLookahead(eats_at_least, compiler, Z); |
| 3445 GuardedAlternative alt0 = alternatives_->At(0); | 3297 GuardedAlternative alt0 = alternatives_->At(0); |
| 3446 alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, false); | 3298 alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, false); |
| 3447 } | 3299 } |
| 3448 } | 3300 } |
| 3449 if (bm != NULL) { | 3301 if (bm != NULL) { |
| 3450 bm->EmitSkipInstructions(macro_assembler); | 3302 bm->EmitSkipInstructions(macro_assembler); |
| 3451 } | 3303 } |
| 3452 return eats_at_least; | 3304 return eats_at_least; |
| 3453 } | 3305 } |
| 3454 | 3306 |
| (...skipping 14 matching lines...) Expand all Loading... |
| 3469 | 3321 |
| 3470 for (intptr_t i = first_choice; i < choice_count; i++) { | 3322 for (intptr_t i = first_choice; i < choice_count; i++) { |
| 3471 bool is_last = i == choice_count - 1; | 3323 bool is_last = i == choice_count - 1; |
| 3472 bool fall_through_on_failure = !is_last; | 3324 bool fall_through_on_failure = !is_last; |
| 3473 GuardedAlternative alternative = alternatives_->At(i); | 3325 GuardedAlternative alternative = alternatives_->At(i); |
| 3474 AlternativeGeneration* alt_gen = alt_gens->at(i); | 3326 AlternativeGeneration* alt_gen = alt_gens->at(i); |
| 3475 alt_gen->quick_check_details.set_characters(preload->preload_characters_); | 3327 alt_gen->quick_check_details.set_characters(preload->preload_characters_); |
| 3476 ZoneGrowableArray<Guard*>* guards = alternative.guards(); | 3328 ZoneGrowableArray<Guard*>* guards = alternative.guards(); |
| 3477 intptr_t guard_count = (guards == NULL) ? 0 : guards->length(); | 3329 intptr_t guard_count = (guards == NULL) ? 0 : guards->length(); |
| 3478 Trace new_trace(*trace); | 3330 Trace new_trace(*trace); |
| 3479 new_trace.set_characters_preloaded(preload->preload_is_current_ ? | 3331 new_trace.set_characters_preloaded( |
| 3480 preload->preload_characters_ : | 3332 preload->preload_is_current_ ? preload->preload_characters_ : 0); |
| 3481 0); | |
| 3482 if (preload->preload_has_checked_bounds_) { | 3333 if (preload->preload_has_checked_bounds_) { |
| 3483 new_trace.set_bound_checked_up_to(preload->preload_characters_); | 3334 new_trace.set_bound_checked_up_to(preload->preload_characters_); |
| 3484 } | 3335 } |
| 3485 new_trace.quick_check_performed()->Clear(); | 3336 new_trace.quick_check_performed()->Clear(); |
| 3486 if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE); | 3337 if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE); |
| 3487 if (!is_last) { | 3338 if (!is_last) { |
| 3488 new_trace.set_backtrack(&alt_gen->after); | 3339 new_trace.set_backtrack(&alt_gen->after); |
| 3489 } | 3340 } |
| 3490 alt_gen->expects_preload = preload->preload_is_current_; | 3341 alt_gen->expects_preload = preload->preload_is_current_; |
| 3491 bool generate_full_check_inline = false; | 3342 bool generate_full_check_inline = false; |
| 3492 if (kRegexpOptimization && | 3343 if (kRegexpOptimization && |
| 3493 try_to_emit_quick_check_for_alternative(i == 0) && | 3344 try_to_emit_quick_check_for_alternative(i == 0) && |
| 3494 alternative.node()->EmitQuickCheck(compiler, | 3345 alternative.node()->EmitQuickCheck( |
| 3495 trace, | 3346 compiler, trace, &new_trace, preload->preload_has_checked_bounds_, |
| 3496 &new_trace, | 3347 &alt_gen->possible_success, &alt_gen->quick_check_details, |
| 3497 preload->preload_has_checked_bounds_, | 3348 fall_through_on_failure)) { |
| 3498 &alt_gen->possible_success, | |
| 3499 &alt_gen->quick_check_details, | |
| 3500 fall_through_on_failure)) { | |
| 3501 // Quick check was generated for this choice. | 3349 // Quick check was generated for this choice. |
| 3502 preload->preload_is_current_ = true; | 3350 preload->preload_is_current_ = true; |
| 3503 preload->preload_has_checked_bounds_ = true; | 3351 preload->preload_has_checked_bounds_ = true; |
| 3504 // If we generated the quick check to fall through on possible success, | 3352 // If we generated the quick check to fall through on possible success, |
| 3505 // we now need to generate the full check inline. | 3353 // we now need to generate the full check inline. |
| 3506 if (!fall_through_on_failure) { | 3354 if (!fall_through_on_failure) { |
| 3507 macro_assembler->BindBlock(&alt_gen->possible_success); | 3355 macro_assembler->BindBlock(&alt_gen->possible_success); |
| 3508 new_trace.set_quick_check_performed(&alt_gen->quick_check_details); | 3356 new_trace.set_quick_check_performed(&alt_gen->quick_check_details); |
| 3509 new_trace.set_characters_preloaded(preload->preload_characters_); | 3357 new_trace.set_characters_preloaded(preload->preload_characters_); |
| 3510 new_trace.set_bound_checked_up_to(preload->preload_characters_); | 3358 new_trace.set_bound_checked_up_to(preload->preload_characters_); |
| (...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3562 BlockLabel reload_current_char; | 3410 BlockLabel reload_current_char; |
| 3563 out_of_line_trace.set_backtrack(&reload_current_char); | 3411 out_of_line_trace.set_backtrack(&reload_current_char); |
| 3564 for (intptr_t j = 0; j < guard_count; j++) { | 3412 for (intptr_t j = 0; j < guard_count; j++) { |
| 3565 GenerateGuard(macro_assembler, guards->At(j), &out_of_line_trace); | 3413 GenerateGuard(macro_assembler, guards->At(j), &out_of_line_trace); |
| 3566 } | 3414 } |
| 3567 alternative.node()->Emit(compiler, &out_of_line_trace); | 3415 alternative.node()->Emit(compiler, &out_of_line_trace); |
| 3568 macro_assembler->BindBlock(&reload_current_char); | 3416 macro_assembler->BindBlock(&reload_current_char); |
| 3569 // Reload the current character, since the next quick check expects that. | 3417 // Reload the current character, since the next quick check expects that. |
| 3570 // We don't need to check bounds here because we only get into this | 3418 // We don't need to check bounds here because we only get into this |
| 3571 // code through a quick check which already did the checked load. | 3419 // code through a quick check which already did the checked load. |
| 3572 macro_assembler->LoadCurrentCharacter(trace->cp_offset(), | 3420 macro_assembler->LoadCurrentCharacter(trace->cp_offset(), NULL, false, |
| 3573 NULL, | |
| 3574 false, | |
| 3575 preload_characters); | 3421 preload_characters); |
| 3576 macro_assembler->GoTo(&(alt_gen->after)); | 3422 macro_assembler->GoTo(&(alt_gen->after)); |
| 3577 } else { | 3423 } else { |
| 3578 out_of_line_trace.set_backtrack(&(alt_gen->after)); | 3424 out_of_line_trace.set_backtrack(&(alt_gen->after)); |
| 3579 for (intptr_t j = 0; j < guard_count; j++) { | 3425 for (intptr_t j = 0; j < guard_count; j++) { |
| 3580 GenerateGuard(macro_assembler, guards->At(j), &out_of_line_trace); | 3426 GenerateGuard(macro_assembler, guards->At(j), &out_of_line_trace); |
| 3581 } | 3427 } |
| 3582 alternative.node()->Emit(compiler, &out_of_line_trace); | 3428 alternative.node()->Emit(compiler, &out_of_line_trace); |
| 3583 } | 3429 } |
| 3584 } | 3430 } |
| 3585 | 3431 |
| 3586 | 3432 |
| 3587 void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 3433 void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
| 3588 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 3434 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| 3589 LimitResult limit_result = LimitVersions(compiler, trace); | 3435 LimitResult limit_result = LimitVersions(compiler, trace); |
| 3590 if (limit_result == DONE) return; | 3436 if (limit_result == DONE) return; |
| 3591 ASSERT(limit_result == CONTINUE); | 3437 ASSERT(limit_result == CONTINUE); |
| 3592 | 3438 |
| 3593 RecursionCheck rc(compiler); | 3439 RecursionCheck rc(compiler); |
| 3594 | 3440 |
| 3595 switch (action_type_) { | 3441 switch (action_type_) { |
| 3596 case STORE_POSITION: { | 3442 case STORE_POSITION: { |
| 3597 Trace::DeferredCapture | 3443 Trace::DeferredCapture new_capture(data_.u_position_register.reg, |
| 3598 new_capture(data_.u_position_register.reg, | 3444 data_.u_position_register.is_capture, |
| 3599 data_.u_position_register.is_capture, | 3445 trace); |
| 3600 trace); | |
| 3601 Trace new_trace = *trace; | 3446 Trace new_trace = *trace; |
| 3602 new_trace.add_action(&new_capture); | 3447 new_trace.add_action(&new_capture); |
| 3603 on_success()->Emit(compiler, &new_trace); | 3448 on_success()->Emit(compiler, &new_trace); |
| 3604 break; | 3449 break; |
| 3605 } | 3450 } |
| 3606 case INCREMENT_REGISTER: { | 3451 case INCREMENT_REGISTER: { |
| 3607 Trace::DeferredIncrementRegister | 3452 Trace::DeferredIncrementRegister new_increment( |
| 3608 new_increment(data_.u_increment_register.reg); | 3453 data_.u_increment_register.reg); |
| 3609 Trace new_trace = *trace; | 3454 Trace new_trace = *trace; |
| 3610 new_trace.add_action(&new_increment); | 3455 new_trace.add_action(&new_increment); |
| 3611 on_success()->Emit(compiler, &new_trace); | 3456 on_success()->Emit(compiler, &new_trace); |
| 3612 break; | 3457 break; |
| 3613 } | 3458 } |
| 3614 case SET_REGISTER: { | 3459 case SET_REGISTER: { |
| 3615 Trace::DeferredSetRegister | 3460 Trace::DeferredSetRegister new_set(data_.u_store_register.reg, |
| 3616 new_set(data_.u_store_register.reg, data_.u_store_register.value); | 3461 data_.u_store_register.value); |
| 3617 Trace new_trace = *trace; | 3462 Trace new_trace = *trace; |
| 3618 new_trace.add_action(&new_set); | 3463 new_trace.add_action(&new_set); |
| 3619 on_success()->Emit(compiler, &new_trace); | 3464 on_success()->Emit(compiler, &new_trace); |
| 3620 break; | 3465 break; |
| 3621 } | 3466 } |
| 3622 case CLEAR_CAPTURES: { | 3467 case CLEAR_CAPTURES: { |
| 3623 Trace::DeferredClearCaptures | 3468 Trace::DeferredClearCaptures new_capture(Interval( |
| 3624 new_capture(Interval(data_.u_clear_captures.range_from, | 3469 data_.u_clear_captures.range_from, data_.u_clear_captures.range_to)); |
| 3625 data_.u_clear_captures.range_to)); | |
| 3626 Trace new_trace = *trace; | 3470 Trace new_trace = *trace; |
| 3627 new_trace.add_action(&new_capture); | 3471 new_trace.add_action(&new_capture); |
| 3628 on_success()->Emit(compiler, &new_trace); | 3472 on_success()->Emit(compiler, &new_trace); |
| 3629 break; | 3473 break; |
| 3630 } | 3474 } |
| 3631 case BEGIN_SUBMATCH: | 3475 case BEGIN_SUBMATCH: |
| 3632 if (!trace->is_trivial()) { | 3476 if (!trace->is_trivial()) { |
| 3633 trace->Flush(compiler, this); | 3477 trace->Flush(compiler, this); |
| 3634 } else { | 3478 } else { |
| 3635 assembler->WriteCurrentPositionToRegister( | 3479 assembler->WriteCurrentPositionToRegister( |
| (...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3715 } | 3559 } |
| 3716 | 3560 |
| 3717 LimitResult limit_result = LimitVersions(compiler, trace); | 3561 LimitResult limit_result = LimitVersions(compiler, trace); |
| 3718 if (limit_result == DONE) return; | 3562 if (limit_result == DONE) return; |
| 3719 ASSERT(limit_result == CONTINUE); | 3563 ASSERT(limit_result == CONTINUE); |
| 3720 | 3564 |
| 3721 RecursionCheck rc(compiler); | 3565 RecursionCheck rc(compiler); |
| 3722 | 3566 |
| 3723 ASSERT(start_reg_ + 1 == end_reg_); | 3567 ASSERT(start_reg_ + 1 == end_reg_); |
| 3724 if (compiler->ignore_case()) { | 3568 if (compiler->ignore_case()) { |
| 3725 assembler->CheckNotBackReferenceIgnoreCase(start_reg_, | 3569 assembler->CheckNotBackReferenceIgnoreCase(start_reg_, trace->backtrack()); |
| 3726 trace->backtrack()); | |
| 3727 } else { | 3570 } else { |
| 3728 assembler->CheckNotBackReference(start_reg_, trace->backtrack()); | 3571 assembler->CheckNotBackReference(start_reg_, trace->backtrack()); |
| 3729 } | 3572 } |
| 3730 on_success()->Emit(compiler, trace); | 3573 on_success()->Emit(compiler, trace); |
| 3731 } | 3574 } |
| 3732 | 3575 |
| 3733 | 3576 |
| 3734 // ------------------------------------------------------------------- | 3577 // ------------------------------------------------------------------- |
| 3735 // Dot/dotty output | 3578 // Dot/dotty output |
| 3736 | 3579 |
| 3737 | 3580 |
| 3738 #ifdef DEBUG | 3581 #ifdef DEBUG |
| 3739 | 3582 |
| 3740 | 3583 |
| 3741 class DotPrinter: public NodeVisitor { | 3584 class DotPrinter : public NodeVisitor { |
| 3742 public: | 3585 public: |
| 3743 explicit DotPrinter(bool ignore_case) {} | 3586 explicit DotPrinter(bool ignore_case) {} |
| 3744 void PrintNode(const char* label, RegExpNode* node); | 3587 void PrintNode(const char* label, RegExpNode* node); |
| 3745 void Visit(RegExpNode* node); | 3588 void Visit(RegExpNode* node); |
| 3746 void PrintAttributes(RegExpNode* from); | 3589 void PrintAttributes(RegExpNode* from); |
| 3747 void PrintOnFailure(RegExpNode* from, RegExpNode* to); | 3590 void PrintOnFailure(RegExpNode* from, RegExpNode* to); |
| 3748 #define DECLARE_VISIT(Type) \ | 3591 #define DECLARE_VISIT(Type) virtual void Visit##Type(Type##Node* that); |
| 3749 virtual void Visit##Type(Type##Node* that); | 3592 FOR_EACH_NODE_TYPE(DECLARE_VISIT) |
| 3750 FOR_EACH_NODE_TYPE(DECLARE_VISIT) | |
| 3751 #undef DECLARE_VISIT | 3593 #undef DECLARE_VISIT |
| 3752 }; | 3594 }; |
| 3753 | 3595 |
| 3754 | 3596 |
| 3755 void DotPrinter::PrintNode(const char* label, RegExpNode* node) { | 3597 void DotPrinter::PrintNode(const char* label, RegExpNode* node) { |
| 3756 OS::Print("digraph G {\n graph [label=\""); | 3598 OS::Print("digraph G {\n graph [label=\""); |
| 3757 for (intptr_t i = 0; label[i]; i++) { | 3599 for (intptr_t i = 0; label[i]; i++) { |
| 3758 switch (label[i]) { | 3600 switch (label[i]) { |
| 3759 case '\\': | 3601 case '\\': |
| 3760 OS::Print("\\\\"); | 3602 OS::Print("\\\\"); |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3806 PrintSeparator(); | 3648 PrintSeparator(); |
| 3807 OS::Print("{%s|%" Pd "}", name, value); | 3649 OS::Print("{%s|%" Pd "}", name, value); |
| 3808 } | 3650 } |
| 3809 | 3651 |
| 3810 private: | 3652 private: |
| 3811 bool first_; | 3653 bool first_; |
| 3812 }; | 3654 }; |
| 3813 | 3655 |
| 3814 | 3656 |
| 3815 void DotPrinter::PrintAttributes(RegExpNode* that) { | 3657 void DotPrinter::PrintAttributes(RegExpNode* that) { |
| 3816 OS::Print(" a%p [shape=Mrecord, color=grey, fontcolor=grey, " | 3658 OS::Print( |
| 3817 "margin=0.1, fontsize=10, label=\"{", that); | 3659 " a%p [shape=Mrecord, color=grey, fontcolor=grey, " |
| 3660 "margin=0.1, fontsize=10, label=\"{", |
| 3661 that); |
| 3818 AttributePrinter printer; | 3662 AttributePrinter printer; |
| 3819 NodeInfo* info = that->info(); | 3663 NodeInfo* info = that->info(); |
| 3820 printer.PrintBit("NI", info->follows_newline_interest); | 3664 printer.PrintBit("NI", info->follows_newline_interest); |
| 3821 printer.PrintBit("WI", info->follows_word_interest); | 3665 printer.PrintBit("WI", info->follows_word_interest); |
| 3822 printer.PrintBit("SI", info->follows_start_interest); | 3666 printer.PrintBit("SI", info->follows_start_interest); |
| 3823 BlockLabel* label = that->label(); | 3667 BlockLabel* label = that->label(); |
| 3824 if (label->IsBound()) | 3668 if (label->IsBound()) printer.PrintPositive("@", label->Position()); |
| 3825 printer.PrintPositive("@", label->Position()); | 3669 OS::Print( |
| 3826 OS::Print("}\"];\n" | 3670 "}\"];\n" |
| 3827 " a%p -> n%p [style=dashed, color=grey, arrowhead=none];\n", | 3671 " a%p -> n%p [style=dashed, color=grey, arrowhead=none];\n", |
| 3828 that, that); | 3672 that, that); |
| 3829 } | 3673 } |
| 3830 | 3674 |
| 3831 | 3675 |
| 3832 void DotPrinter::VisitChoice(ChoiceNode* that) { | 3676 void DotPrinter::VisitChoice(ChoiceNode* that) { |
| 3833 OS::Print(" n%p [shape=Mrecord, label=\"?\"];\n", that); | 3677 OS::Print(" n%p [shape=Mrecord, label=\"?\"];\n", that); |
| 3834 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { | 3678 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { |
| 3835 GuardedAlternative alt = that->alternatives()->At(i); | 3679 GuardedAlternative alt = that->alternatives()->At(i); |
| 3836 OS::Print(" n%p -> n%p", that, alt.node()); | 3680 OS::Print(" n%p -> n%p", that, alt.node()); |
| 3837 } | 3681 } |
| 3838 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { | 3682 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { |
| (...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3978 | 3822 |
| 3979 // ------------------------------------------------------------------- | 3823 // ------------------------------------------------------------------- |
| 3980 // Tree to graph conversion | 3824 // Tree to graph conversion |
| 3981 | 3825 |
| 3982 // The zone in which we allocate graph nodes. | 3826 // The zone in which we allocate graph nodes. |
| 3983 #define OZ (on_success->zone()) | 3827 #define OZ (on_success->zone()) |
| 3984 | 3828 |
| 3985 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, | 3829 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, |
| 3986 RegExpNode* on_success) { | 3830 RegExpNode* on_success) { |
| 3987 ZoneGrowableArray<TextElement>* elms = | 3831 ZoneGrowableArray<TextElement>* elms = |
| 3988 new(OZ) ZoneGrowableArray<TextElement>(1); | 3832 new (OZ) ZoneGrowableArray<TextElement>(1); |
| 3989 elms->Add(TextElement::Atom(this)); | 3833 elms->Add(TextElement::Atom(this)); |
| 3990 return new(OZ) TextNode(elms, on_success); | 3834 return new (OZ) TextNode(elms, on_success); |
| 3991 } | 3835 } |
| 3992 | 3836 |
| 3993 | 3837 |
| 3994 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, | 3838 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, |
| 3995 RegExpNode* on_success) { | 3839 RegExpNode* on_success) { |
| 3996 ZoneGrowableArray<TextElement>* elms = | 3840 ZoneGrowableArray<TextElement>* elms = |
| 3997 new(OZ) ZoneGrowableArray<TextElement>(1); | 3841 new (OZ) ZoneGrowableArray<TextElement>(1); |
| 3998 for (intptr_t i = 0; i < elements()->length(); i++) { | 3842 for (intptr_t i = 0; i < elements()->length(); i++) { |
| 3999 elms->Add(elements()->At(i)); | 3843 elms->Add(elements()->At(i)); |
| 4000 } | 3844 } |
| 4001 return new(OZ) TextNode(elms, on_success); | 3845 return new (OZ) TextNode(elms, on_success); |
| 4002 } | 3846 } |
| 4003 | 3847 |
| 4004 | 3848 |
| 4005 static bool CompareInverseRanges(ZoneGrowableArray<CharacterRange>* ranges, | 3849 static bool CompareInverseRanges(ZoneGrowableArray<CharacterRange>* ranges, |
| 4006 const intptr_t* special_class, | 3850 const intptr_t* special_class, |
| 4007 intptr_t length) { | 3851 intptr_t length) { |
| 4008 length--; // Remove final 0x10000. | 3852 length--; // Remove final 0x10000. |
| 4009 ASSERT(special_class[length] == 0x10000); | 3853 ASSERT(special_class[length] == 0x10000); |
| 4010 ASSERT(ranges->length() != 0); | 3854 ASSERT(ranges->length() != 0); |
| 4011 ASSERT(length != 0); | 3855 ASSERT(length != 0); |
| 4012 ASSERT(special_class[0] != 0); | 3856 ASSERT(special_class[0] != 0); |
| 4013 if (ranges->length() != (length >> 1) + 1) { | 3857 if (ranges->length() != (length >> 1) + 1) { |
| 4014 return false; | 3858 return false; |
| 4015 } | 3859 } |
| 4016 CharacterRange range = ranges->At(0); | 3860 CharacterRange range = ranges->At(0); |
| 4017 if (range.from() != 0) { | 3861 if (range.from() != 0) { |
| 4018 return false; | 3862 return false; |
| 4019 } | 3863 } |
| 4020 for (intptr_t i = 0; i < length; i += 2) { | 3864 for (intptr_t i = 0; i < length; i += 2) { |
| 4021 if (special_class[i] != (range.to() + 1)) { | 3865 if (special_class[i] != (range.to() + 1)) { |
| 4022 return false; | 3866 return false; |
| 4023 } | 3867 } |
| 4024 range = ranges->At((i >> 1) + 1); | 3868 range = ranges->At((i >> 1) + 1); |
| 4025 if (special_class[i+1] != range.from()) { | 3869 if (special_class[i + 1] != range.from()) { |
| 4026 return false; | 3870 return false; |
| 4027 } | 3871 } |
| 4028 } | 3872 } |
| 4029 if (range.to() != 0xffff) { | 3873 if (range.to() != 0xffff) { |
| 4030 return false; | 3874 return false; |
| 4031 } | 3875 } |
| 4032 return true; | 3876 return true; |
| 4033 } | 3877 } |
| 4034 | 3878 |
| 4035 | 3879 |
| (...skipping 26 matching lines...) Expand all Loading... |
| 4062 return true; | 3906 return true; |
| 4063 } | 3907 } |
| 4064 if (CompareRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) { | 3908 if (CompareRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) { |
| 4065 set_.set_standard_set_type('s'); | 3909 set_.set_standard_set_type('s'); |
| 4066 return true; | 3910 return true; |
| 4067 } | 3911 } |
| 4068 if (CompareInverseRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) { | 3912 if (CompareInverseRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) { |
| 4069 set_.set_standard_set_type('S'); | 3913 set_.set_standard_set_type('S'); |
| 4070 return true; | 3914 return true; |
| 4071 } | 3915 } |
| 4072 if (CompareInverseRanges(set_.ranges(), | 3916 if (CompareInverseRanges(set_.ranges(), kLineTerminatorRanges, |
| 4073 kLineTerminatorRanges, | |
| 4074 kLineTerminatorRangeCount)) { | 3917 kLineTerminatorRangeCount)) { |
| 4075 set_.set_standard_set_type('.'); | 3918 set_.set_standard_set_type('.'); |
| 4076 return true; | 3919 return true; |
| 4077 } | 3920 } |
| 4078 if (CompareRanges(set_.ranges(), | 3921 if (CompareRanges(set_.ranges(), kLineTerminatorRanges, |
| 4079 kLineTerminatorRanges, | |
| 4080 kLineTerminatorRangeCount)) { | 3922 kLineTerminatorRangeCount)) { |
| 4081 set_.set_standard_set_type('n'); | 3923 set_.set_standard_set_type('n'); |
| 4082 return true; | 3924 return true; |
| 4083 } | 3925 } |
| 4084 if (CompareRanges(set_.ranges(), kWordRanges, kWordRangeCount)) { | 3926 if (CompareRanges(set_.ranges(), kWordRanges, kWordRangeCount)) { |
| 4085 set_.set_standard_set_type('w'); | 3927 set_.set_standard_set_type('w'); |
| 4086 return true; | 3928 return true; |
| 4087 } | 3929 } |
| 4088 if (CompareInverseRanges(set_.ranges(), kWordRanges, kWordRangeCount)) { | 3930 if (CompareInverseRanges(set_.ranges(), kWordRanges, kWordRangeCount)) { |
| 4089 set_.set_standard_set_type('W'); | 3931 set_.set_standard_set_type('W'); |
| 4090 return true; | 3932 return true; |
| 4091 } | 3933 } |
| 4092 return false; | 3934 return false; |
| 4093 } | 3935 } |
| 4094 | 3936 |
| 4095 | 3937 |
| 4096 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, | 3938 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, |
| 4097 RegExpNode* on_success) { | 3939 RegExpNode* on_success) { |
| 4098 return new(OZ) TextNode(this, on_success); | 3940 return new (OZ) TextNode(this, on_success); |
| 4099 } | 3941 } |
| 4100 | 3942 |
| 4101 | 3943 |
| 4102 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, | 3944 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, |
| 4103 RegExpNode* on_success) { | 3945 RegExpNode* on_success) { |
| 4104 ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives(); | 3946 ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives(); |
| 4105 intptr_t length = alternatives->length(); | 3947 intptr_t length = alternatives->length(); |
| 4106 ChoiceNode* result = | 3948 ChoiceNode* result = new (OZ) ChoiceNode(length, OZ); |
| 4107 new(OZ) ChoiceNode(length, OZ); | |
| 4108 for (intptr_t i = 0; i < length; i++) { | 3949 for (intptr_t i = 0; i < length; i++) { |
| 4109 GuardedAlternative alternative(alternatives->At(i)->ToNode(compiler, | 3950 GuardedAlternative alternative( |
| 4110 on_success)); | 3951 alternatives->At(i)->ToNode(compiler, on_success)); |
| 4111 result->AddAlternative(alternative); | 3952 result->AddAlternative(alternative); |
| 4112 } | 3953 } |
| 4113 return result; | 3954 return result; |
| 4114 } | 3955 } |
| 4115 | 3956 |
| 4116 | 3957 |
| 4117 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler, | 3958 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler, |
| 4118 RegExpNode* on_success) { | 3959 RegExpNode* on_success) { |
| 4119 return ToNode(min(), | 3960 return ToNode(min(), max(), is_greedy(), body(), compiler, on_success); |
| 4120 max(), | |
| 4121 is_greedy(), | |
| 4122 body(), | |
| 4123 compiler, | |
| 4124 on_success); | |
| 4125 } | 3961 } |
| 4126 | 3962 |
| 4127 | 3963 |
| 4128 // Scoped object to keep track of how much we unroll quantifier loops in the | 3964 // Scoped object to keep track of how much we unroll quantifier loops in the |
| 4129 // regexp graph generator. | 3965 // regexp graph generator. |
| 4130 class RegExpExpansionLimiter : public ValueObject { | 3966 class RegExpExpansionLimiter : public ValueObject { |
| 4131 public: | 3967 public: |
| 4132 static const intptr_t kMaxExpansionFactor = 6; | 3968 static const intptr_t kMaxExpansionFactor = 6; |
| 4133 RegExpExpansionLimiter(RegExpCompiler* compiler, intptr_t factor) | 3969 RegExpExpansionLimiter(RegExpCompiler* compiler, intptr_t factor) |
| 4134 : compiler_(compiler), | 3970 : compiler_(compiler), |
| (...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4200 Interval capture_registers = body->CaptureRegisters(); | 4036 Interval capture_registers = body->CaptureRegisters(); |
| 4201 bool needs_capture_clearing = !capture_registers.is_empty(); | 4037 bool needs_capture_clearing = !capture_registers.is_empty(); |
| 4202 Zone* zone = compiler->zone(); | 4038 Zone* zone = compiler->zone(); |
| 4203 | 4039 |
| 4204 if (body_can_be_empty) { | 4040 if (body_can_be_empty) { |
| 4205 body_start_reg = compiler->AllocateRegister(); | 4041 body_start_reg = compiler->AllocateRegister(); |
| 4206 } else if (kRegexpOptimization && !needs_capture_clearing) { | 4042 } else if (kRegexpOptimization && !needs_capture_clearing) { |
| 4207 // Only unroll if there are no captures and the body can't be | 4043 // Only unroll if there are no captures and the body can't be |
| 4208 // empty. | 4044 // empty. |
| 4209 { | 4045 { |
| 4210 RegExpExpansionLimiter limiter( | 4046 RegExpExpansionLimiter limiter(compiler, min + ((max != min) ? 1 : 0)); |
| 4211 compiler, min + ((max != min) ? 1 : 0)); | |
| 4212 if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) { | 4047 if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) { |
| 4213 intptr_t new_max = (max == kInfinity) ? max : max - min; | 4048 intptr_t new_max = (max == kInfinity) ? max : max - min; |
| 4214 // Recurse once to get the loop or optional matches after the fixed | 4049 // Recurse once to get the loop or optional matches after the fixed |
| 4215 // ones. | 4050 // ones. |
| 4216 RegExpNode* answer = ToNode( | 4051 RegExpNode* answer = |
| 4217 0, new_max, is_greedy, body, compiler, on_success, true); | 4052 ToNode(0, new_max, is_greedy, body, compiler, on_success, true); |
| 4218 // Unroll the forced matches from 0 to min. This can cause chains of | 4053 // Unroll the forced matches from 0 to min. This can cause chains of |
| 4219 // TextNodes (which the parser does not generate). These should be | 4054 // TextNodes (which the parser does not generate). These should be |
| 4220 // combined if it turns out they hinder good code generation. | 4055 // combined if it turns out they hinder good code generation. |
| 4221 for (intptr_t i = 0; i < min; i++) { | 4056 for (intptr_t i = 0; i < min; i++) { |
| 4222 answer = body->ToNode(compiler, answer); | 4057 answer = body->ToNode(compiler, answer); |
| 4223 } | 4058 } |
| 4224 return answer; | 4059 return answer; |
| 4225 } | 4060 } |
| 4226 } | 4061 } |
| 4227 if (max <= kMaxUnrolledMaxMatches && min == 0) { | 4062 if (max <= kMaxUnrolledMaxMatches && min == 0) { |
| 4228 ASSERT(max > 0); // Due to the 'if' above. | 4063 ASSERT(max > 0); // Due to the 'if' above. |
| 4229 RegExpExpansionLimiter limiter(compiler, max); | 4064 RegExpExpansionLimiter limiter(compiler, max); |
| 4230 if (limiter.ok_to_expand()) { | 4065 if (limiter.ok_to_expand()) { |
| 4231 // Unroll the optional matches up to max. | 4066 // Unroll the optional matches up to max. |
| 4232 RegExpNode* answer = on_success; | 4067 RegExpNode* answer = on_success; |
| 4233 for (intptr_t i = 0; i < max; i++) { | 4068 for (intptr_t i = 0; i < max; i++) { |
| 4234 ChoiceNode* alternation = new(zone) ChoiceNode(2, zone); | 4069 ChoiceNode* alternation = new (zone) ChoiceNode(2, zone); |
| 4235 if (is_greedy) { | 4070 if (is_greedy) { |
| 4236 alternation->AddAlternative( | 4071 alternation->AddAlternative( |
| 4237 GuardedAlternative(body->ToNode(compiler, answer))); | 4072 GuardedAlternative(body->ToNode(compiler, answer))); |
| 4238 alternation->AddAlternative(GuardedAlternative(on_success)); | 4073 alternation->AddAlternative(GuardedAlternative(on_success)); |
| 4239 } else { | 4074 } else { |
| 4240 alternation->AddAlternative(GuardedAlternative(on_success)); | 4075 alternation->AddAlternative(GuardedAlternative(on_success)); |
| 4241 alternation->AddAlternative( | 4076 alternation->AddAlternative( |
| 4242 GuardedAlternative(body->ToNode(compiler, answer))); | 4077 GuardedAlternative(body->ToNode(compiler, answer))); |
| 4243 } | 4078 } |
| 4244 answer = alternation; | 4079 answer = alternation; |
| 4245 if (not_at_start) alternation->set_not_at_start(); | 4080 if (not_at_start) alternation->set_not_at_start(); |
| 4246 } | 4081 } |
| 4247 return answer; | 4082 return answer; |
| 4248 } | 4083 } |
| 4249 } | 4084 } |
| 4250 } | 4085 } |
| 4251 bool has_min = min > 0; | 4086 bool has_min = min > 0; |
| 4252 bool has_max = max < RegExpTree::kInfinity; | 4087 bool has_max = max < RegExpTree::kInfinity; |
| 4253 bool needs_counter = has_min || has_max; | 4088 bool needs_counter = has_min || has_max; |
| 4254 intptr_t reg_ctr = needs_counter | 4089 intptr_t reg_ctr = needs_counter ? compiler->AllocateRegister() |
| 4255 ? compiler->AllocateRegister() | 4090 : RegExpCompiler::kNoRegister; |
| 4256 : RegExpCompiler::kNoRegister; | 4091 LoopChoiceNode* center = |
| 4257 LoopChoiceNode* center = new(zone) LoopChoiceNode(body->min_match() == 0, | 4092 new (zone) LoopChoiceNode(body->min_match() == 0, zone); |
| 4258 zone); | |
| 4259 if (not_at_start) center->set_not_at_start(); | 4093 if (not_at_start) center->set_not_at_start(); |
| 4260 RegExpNode* loop_return = needs_counter | 4094 RegExpNode* loop_return = |
| 4261 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center)) | 4095 needs_counter ? static_cast<RegExpNode*>( |
| 4262 : static_cast<RegExpNode*>(center); | 4096 ActionNode::IncrementRegister(reg_ctr, center)) |
| 4097 : static_cast<RegExpNode*>(center); |
| 4263 if (body_can_be_empty) { | 4098 if (body_can_be_empty) { |
| 4264 // If the body can be empty we need to check if it was and then | 4099 // If the body can be empty we need to check if it was and then |
| 4265 // backtrack. | 4100 // backtrack. |
| 4266 loop_return = ActionNode::EmptyMatchCheck(body_start_reg, | 4101 loop_return = |
| 4267 reg_ctr, | 4102 ActionNode::EmptyMatchCheck(body_start_reg, reg_ctr, min, loop_return); |
| 4268 min, | |
| 4269 loop_return); | |
| 4270 } | 4103 } |
| 4271 RegExpNode* body_node = body->ToNode(compiler, loop_return); | 4104 RegExpNode* body_node = body->ToNode(compiler, loop_return); |
| 4272 if (body_can_be_empty) { | 4105 if (body_can_be_empty) { |
| 4273 // If the body can be empty we need to store the start position | 4106 // If the body can be empty we need to store the start position |
| 4274 // so we can bail out if it was empty. | 4107 // so we can bail out if it was empty. |
| 4275 body_node = ActionNode::StorePosition(body_start_reg, false, body_node); | 4108 body_node = ActionNode::StorePosition(body_start_reg, false, body_node); |
| 4276 } | 4109 } |
| 4277 if (needs_capture_clearing) { | 4110 if (needs_capture_clearing) { |
| 4278 // Before entering the body of this loop we need to clear captures. | 4111 // Before entering the body of this loop we need to clear captures. |
| 4279 body_node = ActionNode::ClearCaptures(capture_registers, body_node); | 4112 body_node = ActionNode::ClearCaptures(capture_registers, body_node); |
| 4280 } | 4113 } |
| 4281 GuardedAlternative body_alt(body_node); | 4114 GuardedAlternative body_alt(body_node); |
| 4282 if (has_max) { | 4115 if (has_max) { |
| 4283 Guard* body_guard = | 4116 Guard* body_guard = new (zone) Guard(reg_ctr, Guard::LT, max); |
| 4284 new(zone) Guard(reg_ctr, Guard::LT, max); | |
| 4285 body_alt.AddGuard(body_guard, zone); | 4117 body_alt.AddGuard(body_guard, zone); |
| 4286 } | 4118 } |
| 4287 GuardedAlternative rest_alt(on_success); | 4119 GuardedAlternative rest_alt(on_success); |
| 4288 if (has_min) { | 4120 if (has_min) { |
| 4289 Guard* rest_guard = new(zone) Guard(reg_ctr, Guard::GEQ, min); | 4121 Guard* rest_guard = new (zone) Guard(reg_ctr, Guard::GEQ, min); |
| 4290 rest_alt.AddGuard(rest_guard, zone); | 4122 rest_alt.AddGuard(rest_guard, zone); |
| 4291 } | 4123 } |
| 4292 if (is_greedy) { | 4124 if (is_greedy) { |
| 4293 center->AddLoopAlternative(body_alt); | 4125 center->AddLoopAlternative(body_alt); |
| 4294 center->AddContinueAlternative(rest_alt); | 4126 center->AddContinueAlternative(rest_alt); |
| 4295 } else { | 4127 } else { |
| 4296 center->AddContinueAlternative(rest_alt); | 4128 center->AddContinueAlternative(rest_alt); |
| 4297 center->AddLoopAlternative(body_alt); | 4129 center->AddLoopAlternative(body_alt); |
| 4298 } | 4130 } |
| 4299 if (needs_counter) { | 4131 if (needs_counter) { |
| (...skipping 24 matching lines...) Expand all Loading... |
| 4324 intptr_t stack_pointer_register = compiler->AllocateRegister(); | 4156 intptr_t stack_pointer_register = compiler->AllocateRegister(); |
| 4325 intptr_t position_register = compiler->AllocateRegister(); | 4157 intptr_t position_register = compiler->AllocateRegister(); |
| 4326 // The ChoiceNode to distinguish between a newline and end-of-input. | 4158 // The ChoiceNode to distinguish between a newline and end-of-input. |
| 4327 ChoiceNode* result = new ChoiceNode(2, on_success->zone()); | 4159 ChoiceNode* result = new ChoiceNode(2, on_success->zone()); |
| 4328 // Create a newline atom. | 4160 // Create a newline atom. |
| 4329 ZoneGrowableArray<CharacterRange>* newline_ranges = | 4161 ZoneGrowableArray<CharacterRange>* newline_ranges = |
| 4330 new ZoneGrowableArray<CharacterRange>(3); | 4162 new ZoneGrowableArray<CharacterRange>(3); |
| 4331 CharacterRange::AddClassEscape('n', newline_ranges); | 4163 CharacterRange::AddClassEscape('n', newline_ranges); |
| 4332 RegExpCharacterClass* newline_atom = new RegExpCharacterClass('n'); | 4164 RegExpCharacterClass* newline_atom = new RegExpCharacterClass('n'); |
| 4333 TextNode* newline_matcher = new TextNode( | 4165 TextNode* newline_matcher = new TextNode( |
| 4334 newline_atom, | 4166 newline_atom, ActionNode::PositiveSubmatchSuccess( |
| 4335 ActionNode::PositiveSubmatchSuccess(stack_pointer_register, | 4167 stack_pointer_register, position_register, |
| 4336 position_register, | 4168 0, // No captures inside. |
| 4337 0, // No captures inside. | 4169 -1, // Ignored if no captures. |
| 4338 -1, // Ignored if no captures. | 4170 on_success)); |
| 4339 on_success)); | |
| 4340 // Create an end-of-input matcher. | 4171 // Create an end-of-input matcher. |
| 4341 RegExpNode* end_of_line = ActionNode::BeginSubmatch( | 4172 RegExpNode* end_of_line = ActionNode::BeginSubmatch( |
| 4342 stack_pointer_register, | 4173 stack_pointer_register, position_register, newline_matcher); |
| 4343 position_register, | |
| 4344 newline_matcher); | |
| 4345 // Add the two alternatives to the ChoiceNode. | 4174 // Add the two alternatives to the ChoiceNode. |
| 4346 GuardedAlternative eol_alternative(end_of_line); | 4175 GuardedAlternative eol_alternative(end_of_line); |
| 4347 result->AddAlternative(eol_alternative); | 4176 result->AddAlternative(eol_alternative); |
| 4348 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success)); | 4177 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success)); |
| 4349 result->AddAlternative(end_alternative); | 4178 result->AddAlternative(end_alternative); |
| 4350 return result; | 4179 return result; |
| 4351 } | 4180 } |
| 4352 default: | 4181 default: |
| 4353 UNREACHABLE(); | 4182 UNREACHABLE(); |
| 4354 } | 4183 } |
| 4355 return on_success; | 4184 return on_success; |
| 4356 } | 4185 } |
| 4357 | 4186 |
| 4358 | 4187 |
| 4359 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, | 4188 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, |
| 4360 RegExpNode* on_success) { | 4189 RegExpNode* on_success) { |
| 4361 return new(OZ) | 4190 return new (OZ) |
| 4362 BackReferenceNode(RegExpCapture::StartRegister(index()), | 4191 BackReferenceNode(RegExpCapture::StartRegister(index()), |
| 4363 RegExpCapture::EndRegister(index()), | 4192 RegExpCapture::EndRegister(index()), on_success); |
| 4364 on_success); | |
| 4365 } | 4193 } |
| 4366 | 4194 |
| 4367 | 4195 |
| 4368 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, | 4196 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, |
| 4369 RegExpNode* on_success) { | 4197 RegExpNode* on_success) { |
| 4370 return on_success; | 4198 return on_success; |
| 4371 } | 4199 } |
| 4372 | 4200 |
| 4373 | 4201 |
| 4374 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, | 4202 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, |
| 4375 RegExpNode* on_success) { | 4203 RegExpNode* on_success) { |
| 4376 intptr_t stack_pointer_register = compiler->AllocateRegister(); | 4204 intptr_t stack_pointer_register = compiler->AllocateRegister(); |
| 4377 intptr_t position_register = compiler->AllocateRegister(); | 4205 intptr_t position_register = compiler->AllocateRegister(); |
| 4378 | 4206 |
| 4379 const intptr_t registers_per_capture = 2; | 4207 const intptr_t registers_per_capture = 2; |
| 4380 const intptr_t register_of_first_capture = 2; | 4208 const intptr_t register_of_first_capture = 2; |
| 4381 intptr_t register_count = capture_count_ * registers_per_capture; | 4209 intptr_t register_count = capture_count_ * registers_per_capture; |
| 4382 intptr_t register_start = | 4210 intptr_t register_start = |
| 4383 register_of_first_capture + capture_from_ * registers_per_capture; | 4211 register_of_first_capture + capture_from_ * registers_per_capture; |
| 4384 | 4212 |
| 4385 RegExpNode* success; | 4213 RegExpNode* success; |
| 4386 if (is_positive()) { | 4214 if (is_positive()) { |
| 4387 RegExpNode* node = ActionNode::BeginSubmatch( | 4215 RegExpNode* node = ActionNode::BeginSubmatch( |
| 4388 stack_pointer_register, | 4216 stack_pointer_register, position_register, |
| 4389 position_register, | 4217 body()->ToNode(compiler, |
| 4390 body()->ToNode( | 4218 ActionNode::PositiveSubmatchSuccess( |
| 4391 compiler, | 4219 stack_pointer_register, position_register, |
| 4392 ActionNode::PositiveSubmatchSuccess(stack_pointer_register, | 4220 register_count, register_start, on_success))); |
| 4393 position_register, | |
| 4394 register_count, | |
| 4395 register_start, | |
| 4396 on_success))); | |
| 4397 return node; | 4221 return node; |
| 4398 } else { | 4222 } else { |
| 4399 // We use a ChoiceNode for a negative lookahead because it has most of | 4223 // We use a ChoiceNode for a negative lookahead because it has most of |
| 4400 // the characteristics we need. It has the body of the lookahead as its | 4224 // the characteristics we need. It has the body of the lookahead as its |
| 4401 // first alternative and the expression after the lookahead of the second | 4225 // first alternative and the expression after the lookahead of the second |
| 4402 // alternative. If the first alternative succeeds then the | 4226 // alternative. If the first alternative succeeds then the |
| 4403 // NegativeSubmatchSuccess will unwind the stack including everything the | 4227 // NegativeSubmatchSuccess will unwind the stack including everything the |
| 4404 // choice node set up and backtrack. If the first alternative fails then | 4228 // choice node set up and backtrack. If the first alternative fails then |
| 4405 // the second alternative is tried, which is exactly the desired result | 4229 // the second alternative is tried, which is exactly the desired result |
| 4406 // for a negative lookahead. The NegativeLookaheadChoiceNode is a special | 4230 // for a negative lookahead. The NegativeLookaheadChoiceNode is a special |
| 4407 // ChoiceNode that knows to ignore the first exit when calculating quick | 4231 // ChoiceNode that knows to ignore the first exit when calculating quick |
| 4408 // checks. | 4232 // checks. |
| 4409 | 4233 |
| 4410 GuardedAlternative body_alt( | 4234 GuardedAlternative body_alt( |
| 4411 body()->ToNode( | 4235 body()->ToNode(compiler, success = new (OZ) NegativeSubmatchSuccess( |
| 4412 compiler, | 4236 stack_pointer_register, position_register, |
| 4413 success = new(OZ) NegativeSubmatchSuccess(stack_pointer_register, | 4237 register_count, register_start, OZ))); |
| 4414 position_register, | 4238 ChoiceNode* choice_node = new (OZ) NegativeLookaheadChoiceNode( |
| 4415 register_count, | 4239 body_alt, GuardedAlternative(on_success), OZ); |
| 4416 register_start, | 4240 return ActionNode::BeginSubmatch(stack_pointer_register, position_register, |
| 4417 OZ))); | |
| 4418 ChoiceNode* choice_node = | |
| 4419 new(OZ) NegativeLookaheadChoiceNode(body_alt, | |
| 4420 GuardedAlternative(on_success), | |
| 4421 OZ); | |
| 4422 return ActionNode::BeginSubmatch(stack_pointer_register, | |
| 4423 position_register, | |
| 4424 choice_node); | 4241 choice_node); |
| 4425 } | 4242 } |
| 4426 } | 4243 } |
| 4427 | 4244 |
| 4428 | 4245 |
| 4429 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, | 4246 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, |
| 4430 RegExpNode* on_success) { | 4247 RegExpNode* on_success) { |
| 4431 return ToNode(body(), index(), compiler, on_success); | 4248 return ToNode(body(), index(), compiler, on_success); |
| 4432 } | 4249 } |
| 4433 | 4250 |
| (...skipping 26 matching lines...) Expand all Loading... |
| 4460 ZoneGrowableArray<CharacterRange>* ranges) { | 4277 ZoneGrowableArray<CharacterRange>* ranges) { |
| 4461 elmc--; | 4278 elmc--; |
| 4462 ASSERT(elmv[elmc] == 0x10000); | 4279 ASSERT(elmv[elmc] == 0x10000); |
| 4463 for (intptr_t i = 0; i < elmc; i += 2) { | 4280 for (intptr_t i = 0; i < elmc; i += 2) { |
| 4464 ASSERT(elmv[i] < elmv[i + 1]); | 4281 ASSERT(elmv[i] < elmv[i + 1]); |
| 4465 ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1)); | 4282 ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1)); |
| 4466 } | 4283 } |
| 4467 } | 4284 } |
| 4468 | 4285 |
| 4469 | 4286 |
| 4470 static void AddClassNegated(const intptr_t *elmv, | 4287 static void AddClassNegated(const intptr_t* elmv, |
| 4471 intptr_t elmc, | 4288 intptr_t elmc, |
| 4472 ZoneGrowableArray<CharacterRange>* ranges) { | 4289 ZoneGrowableArray<CharacterRange>* ranges) { |
| 4473 elmc--; | 4290 elmc--; |
| 4474 ASSERT(elmv[elmc] == 0x10000); | 4291 ASSERT(elmv[elmc] == 0x10000); |
| 4475 ASSERT(elmv[0] != 0x0000); | 4292 ASSERT(elmv[0] != 0x0000); |
| 4476 ASSERT(elmv[elmc-1] != Utf16::kMaxCodeUnit); | 4293 ASSERT(elmv[elmc - 1] != Utf16::kMaxCodeUnit); |
| 4477 uint16_t last = 0x0000; | 4294 uint16_t last = 0x0000; |
| 4478 for (intptr_t i = 0; i < elmc; i += 2) { | 4295 for (intptr_t i = 0; i < elmc; i += 2) { |
| 4479 ASSERT(last <= elmv[i] - 1); | 4296 ASSERT(last <= elmv[i] - 1); |
| 4480 ASSERT(elmv[i] < elmv[i + 1]); | 4297 ASSERT(elmv[i] < elmv[i + 1]); |
| 4481 ranges->Add(CharacterRange(last, elmv[i] - 1)); | 4298 ranges->Add(CharacterRange(last, elmv[i] - 1)); |
| 4482 last = elmv[i + 1]; | 4299 last = elmv[i + 1]; |
| 4483 } | 4300 } |
| 4484 ranges->Add(CharacterRange(last, Utf16::kMaxCodeUnit)); | 4301 ranges->Add(CharacterRange(last, Utf16::kMaxCodeUnit)); |
| 4485 } | 4302 } |
| 4486 | 4303 |
| (...skipping 13 matching lines...) Expand all Loading... |
| 4500 case 'W': | 4317 case 'W': |
| 4501 AddClassNegated(kWordRanges, kWordRangeCount, ranges); | 4318 AddClassNegated(kWordRanges, kWordRangeCount, ranges); |
| 4502 break; | 4319 break; |
| 4503 case 'd': | 4320 case 'd': |
| 4504 AddClass(kDigitRanges, kDigitRangeCount, ranges); | 4321 AddClass(kDigitRanges, kDigitRangeCount, ranges); |
| 4505 break; | 4322 break; |
| 4506 case 'D': | 4323 case 'D': |
| 4507 AddClassNegated(kDigitRanges, kDigitRangeCount, ranges); | 4324 AddClassNegated(kDigitRanges, kDigitRangeCount, ranges); |
| 4508 break; | 4325 break; |
| 4509 case '.': | 4326 case '.': |
| 4510 AddClassNegated(kLineTerminatorRanges, | 4327 AddClassNegated(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges); |
| 4511 kLineTerminatorRangeCount, | |
| 4512 ranges); | |
| 4513 break; | 4328 break; |
| 4514 // This is not a character range as defined by the spec but a | 4329 // This is not a character range as defined by the spec but a |
| 4515 // convenient shorthand for a character class that matches any | 4330 // convenient shorthand for a character class that matches any |
| 4516 // character. | 4331 // character. |
| 4517 case '*': | 4332 case '*': |
| 4518 ranges->Add(CharacterRange::Everything()); | 4333 ranges->Add(CharacterRange::Everything()); |
| 4519 break; | 4334 break; |
| 4520 // This is the set of characters matched by the $ and ^ symbols | 4335 // This is the set of characters matched by the $ and ^ symbols |
| 4521 // in multiline mode. | 4336 // in multiline mode. |
| 4522 case 'n': | 4337 case 'n': |
| 4523 AddClass(kLineTerminatorRanges, | 4338 AddClass(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges); |
| 4524 kLineTerminatorRangeCount, | |
| 4525 ranges); | |
| 4526 break; | 4339 break; |
| 4527 default: | 4340 default: |
| 4528 UNREACHABLE(); | 4341 UNREACHABLE(); |
| 4529 } | 4342 } |
| 4530 } | 4343 } |
| 4531 | 4344 |
| 4532 | 4345 |
| 4533 void CharacterRange::AddCaseEquivalents( | 4346 void CharacterRange::AddCaseEquivalents( |
| 4534 ZoneGrowableArray<CharacterRange>* ranges, | 4347 ZoneGrowableArray<CharacterRange>* ranges, |
| 4535 bool is_one_byte, | 4348 bool is_one_byte, |
| 4536 Zone* zone) { | 4349 Zone* zone) { |
| 4537 uint16_t bottom = from(); | 4350 uint16_t bottom = from(); |
| 4538 uint16_t top = to(); | 4351 uint16_t top = to(); |
| 4539 if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) { | 4352 if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) { |
| 4540 if (bottom > Symbols::kMaxOneCharCodeSymbol) return; | 4353 if (bottom > Symbols::kMaxOneCharCodeSymbol) return; |
| 4541 if (top > Symbols::kMaxOneCharCodeSymbol) { | 4354 if (top > Symbols::kMaxOneCharCodeSymbol) { |
| 4542 top = Symbols::kMaxOneCharCodeSymbol; | 4355 top = Symbols::kMaxOneCharCodeSymbol; |
| 4543 } | 4356 } |
| 4544 } | 4357 } |
| 4545 | 4358 |
| 4546 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize; | 4359 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize; |
| 4547 unibrow::Mapping<unibrow::CanonicalizationRange> jsregexp_canonrange; | 4360 unibrow::Mapping<unibrow::CanonicalizationRange> jsregexp_canonrange; |
| 4548 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 4361 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 4549 if (top == bottom) { | 4362 if (top == bottom) { |
| 4550 // If this is a singleton we just expand the one character. | 4363 // If this is a singleton we just expand the one character. |
| 4551 intptr_t length = jsregexp_uncanonicalize.get(bottom, '\0', chars); // NOLIN
T | 4364 intptr_t length = |
| 4365 jsregexp_uncanonicalize.get(bottom, '\0', chars); // NOLINT |
| 4552 for (intptr_t i = 0; i < length; i++) { | 4366 for (intptr_t i = 0; i < length; i++) { |
| 4553 uint32_t chr = chars[i]; | 4367 uint32_t chr = chars[i]; |
| 4554 if (chr != bottom) { | 4368 if (chr != bottom) { |
| 4555 ranges->Add(CharacterRange::Singleton(chars[i])); | 4369 ranges->Add(CharacterRange::Singleton(chars[i])); |
| 4556 } | 4370 } |
| 4557 } | 4371 } |
| 4558 } else { | 4372 } else { |
| 4559 // If this is a range we expand the characters block by block, | 4373 // If this is a range we expand the characters block by block, |
| 4560 // expanding contiguous subranges (blocks) one at a time. | 4374 // expanding contiguous subranges (blocks) one at a time. |
| 4561 // The approach is as follows. For a given start character we | 4375 // The approach is as follows. For a given start character we |
| (...skipping 17 matching lines...) Expand all Loading... |
| 4579 while (pos <= top) { | 4393 while (pos <= top) { |
| 4580 intptr_t length = jsregexp_canonrange.get(pos, '\0', range); | 4394 intptr_t length = jsregexp_canonrange.get(pos, '\0', range); |
| 4581 uint16_t block_end; | 4395 uint16_t block_end; |
| 4582 if (length == 0) { | 4396 if (length == 0) { |
| 4583 block_end = pos; | 4397 block_end = pos; |
| 4584 } else { | 4398 } else { |
| 4585 ASSERT(length == 1); | 4399 ASSERT(length == 1); |
| 4586 block_end = range[0]; | 4400 block_end = range[0]; |
| 4587 } | 4401 } |
| 4588 intptr_t end = (block_end > top) ? top : block_end; | 4402 intptr_t end = (block_end > top) ? top : block_end; |
| 4589 length = jsregexp_uncanonicalize.get(block_end, '\0', range); // NOLINT | 4403 length = jsregexp_uncanonicalize.get(block_end, '\0', range); // NOLINT |
| 4590 for (intptr_t i = 0; i < length; i++) { | 4404 for (intptr_t i = 0; i < length; i++) { |
| 4591 uint32_t c = range[i]; | 4405 uint32_t c = range[i]; |
| 4592 uint16_t range_from = c - (block_end - pos); | 4406 uint16_t range_from = c - (block_end - pos); |
| 4593 uint16_t range_to = c - (block_end - end); | 4407 uint16_t range_to = c - (block_end - end); |
| 4594 if (!(bottom <= range_from && range_to <= top)) { | 4408 if (!(bottom <= range_from && range_to <= top)) { |
| 4595 ranges->Add(CharacterRange(range_from, range_to)); | 4409 ranges->Add(CharacterRange(range_from, range_to)); |
| 4596 } | 4410 } |
| 4597 } | 4411 } |
| 4598 pos = end + 1; | 4412 pos = end + 1; |
| 4599 } | 4413 } |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4637 } | 4451 } |
| 4638 } else { | 4452 } else { |
| 4639 for (intptr_t i = 0; i < count; i++) { | 4453 for (intptr_t i = 0; i < count; i++) { |
| 4640 (*list)[to + i] = list->At(from + i); | 4454 (*list)[to + i] = list->At(from + i); |
| 4641 } | 4455 } |
| 4642 } | 4456 } |
| 4643 } | 4457 } |
| 4644 | 4458 |
| 4645 | 4459 |
| 4646 static intptr_t InsertRangeInCanonicalList( | 4460 static intptr_t InsertRangeInCanonicalList( |
| 4647 ZoneGrowableArray<CharacterRange>* list, | 4461 ZoneGrowableArray<CharacterRange>* list, |
| 4648 intptr_t count, | 4462 intptr_t count, |
| 4649 CharacterRange insert) { | 4463 CharacterRange insert) { |
| 4650 // Inserts a range into list[0..count[, which must be sorted | 4464 // Inserts a range into list[0..count[, which must be sorted |
| 4651 // by from value and non-overlapping and non-adjacent, using at most | 4465 // by from value and non-overlapping and non-adjacent, using at most |
| 4652 // list[0..count] for the result. Returns the number of resulting | 4466 // list[0..count] for the result. Returns the number of resulting |
| 4653 // canonicalized ranges. Inserting a range may collapse existing ranges into | 4467 // canonicalized ranges. Inserting a range may collapse existing ranges into |
| 4654 // fewer ranges, so the return value can be anything in the range 1..count+1. | 4468 // fewer ranges, so the return value can be anything in the range 1..count+1. |
| 4655 uint16_t from = insert.from(); | 4469 uint16_t from = insert.from(); |
| 4656 uint16_t to = insert.to(); | 4470 uint16_t to = insert.to(); |
| 4657 intptr_t start_pos = 0; | 4471 intptr_t start_pos = 0; |
| 4658 intptr_t end_pos = count; | 4472 intptr_t end_pos = count; |
| 4659 for (intptr_t i = count - 1; i >= 0; i--) { | 4473 for (intptr_t i = count - 1; i >= 0; i--) { |
| (...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4727 i++; | 4541 i++; |
| 4728 } | 4542 } |
| 4729 // Canonical until the i'th range. If that's all of them, we are done. | 4543 // Canonical until the i'th range. If that's all of them, we are done. |
| 4730 if (i == n) return; | 4544 if (i == n) return; |
| 4731 | 4545 |
| 4732 // The ranges at index i and forward are not canonicalized. Make them so by | 4546 // The ranges at index i and forward are not canonicalized. Make them so by |
| 4733 // doing the equivalent of insertion sort (inserting each into the previous | 4547 // doing the equivalent of insertion sort (inserting each into the previous |
| 4734 // list, in order). | 4548 // list, in order). |
| 4735 // Notice that inserting a range can reduce the number of ranges in the | 4549 // Notice that inserting a range can reduce the number of ranges in the |
| 4736 // result due to combining of adjacent and overlapping ranges. | 4550 // result due to combining of adjacent and overlapping ranges. |
| 4737 intptr_t read = i; // Range to insert. | 4551 intptr_t read = i; // Range to insert. |
| 4738 intptr_t num_canonical = i; // Length of canonicalized part of list. | 4552 intptr_t num_canonical = i; // Length of canonicalized part of list. |
| 4739 do { | 4553 do { |
| 4740 num_canonical = InsertRangeInCanonicalList(character_ranges, | 4554 num_canonical = InsertRangeInCanonicalList(character_ranges, num_canonical, |
| 4741 num_canonical, | |
| 4742 character_ranges->At(read)); | 4555 character_ranges->At(read)); |
| 4743 read++; | 4556 read++; |
| 4744 } while (read < n); | 4557 } while (read < n); |
| 4745 character_ranges->TruncateTo(num_canonical); | 4558 character_ranges->TruncateTo(num_canonical); |
| 4746 | 4559 |
| 4747 ASSERT(CharacterRange::IsCanonical(character_ranges)); | 4560 ASSERT(CharacterRange::IsCanonical(character_ranges)); |
| 4748 } | 4561 } |
| 4749 | 4562 |
| 4750 | 4563 |
| 4751 void CharacterRange::Negate(ZoneGrowableArray<CharacterRange>* ranges, | 4564 void CharacterRange::Negate(ZoneGrowableArray<CharacterRange>* ranges, |
| (...skipping 17 matching lines...) Expand all Loading... |
| 4769 negated_ranges->Add(CharacterRange(from + 1, Utf16::kMaxCodeUnit)); | 4582 negated_ranges->Add(CharacterRange(from + 1, Utf16::kMaxCodeUnit)); |
| 4770 } | 4583 } |
| 4771 } | 4584 } |
| 4772 | 4585 |
| 4773 | 4586 |
| 4774 // ------------------------------------------------------------------- | 4587 // ------------------------------------------------------------------- |
| 4775 // Splay tree | 4588 // Splay tree |
| 4776 | 4589 |
| 4777 | 4590 |
| 4778 // Workaround for the fact that ZoneGrowableArray does not have contains(). | 4591 // Workaround for the fact that ZoneGrowableArray does not have contains(). |
| 4779 static bool ArrayContains(ZoneGrowableArray<unsigned>* array, | 4592 static bool ArrayContains(ZoneGrowableArray<unsigned>* array, unsigned value) { |
| 4780 unsigned value) { | |
| 4781 for (intptr_t i = 0; i < array->length(); i++) { | 4593 for (intptr_t i = 0; i < array->length(); i++) { |
| 4782 if (array->At(i) == value) { | 4594 if (array->At(i) == value) { |
| 4783 return true; | 4595 return true; |
| 4784 } | 4596 } |
| 4785 } | 4597 } |
| 4786 return false; | 4598 return false; |
| 4787 } | 4599 } |
| 4788 | 4600 |
| 4789 | 4601 |
| 4790 void OutSet::Set(unsigned value, Zone* zone) { | 4602 void OutSet::Set(unsigned value, Zone* zone) { |
| 4791 if (value < kFirstLimit) { | 4603 if (value < kFirstLimit) { |
| 4792 first_ |= (1 << value); | 4604 first_ |= (1 << value); |
| 4793 } else { | 4605 } else { |
| 4794 if (remaining_ == NULL) | 4606 if (remaining_ == NULL) |
| 4795 remaining_ = new(zone) ZoneGrowableArray<unsigned>(1); | 4607 remaining_ = new (zone) ZoneGrowableArray<unsigned>(1); |
| 4796 | 4608 |
| 4797 bool remaining_contains_value = ArrayContains(remaining_, value); | 4609 bool remaining_contains_value = ArrayContains(remaining_, value); |
| 4798 if (remaining_->is_empty() || !remaining_contains_value) { | 4610 if (remaining_->is_empty() || !remaining_contains_value) { |
| 4799 remaining_->Add(value); | 4611 remaining_->Add(value); |
| 4800 } | 4612 } |
| 4801 } | 4613 } |
| 4802 } | 4614 } |
| 4803 | 4615 |
| 4804 | 4616 |
| 4805 bool OutSet::Get(unsigned value) const { | 4617 bool OutSet::Get(unsigned value) const { |
| 4806 if (value < kFirstLimit) { | 4618 if (value < kFirstLimit) { |
| 4807 return (first_ & (1 << value)) != 0; | 4619 return (first_ & (1 << value)) != 0; |
| 4808 } else if (remaining_ == NULL) { | 4620 } else if (remaining_ == NULL) { |
| 4809 return false; | 4621 return false; |
| 4810 } else { | 4622 } else { |
| 4811 return ArrayContains(remaining_, value); | 4623 return ArrayContains(remaining_, value); |
| 4812 } | 4624 } |
| 4813 } | 4625 } |
| 4814 | 4626 |
| 4815 | 4627 |
| 4816 // ------------------------------------------------------------------- | 4628 // ------------------------------------------------------------------- |
| 4817 // Analysis | 4629 // Analysis |
| 4818 | 4630 |
| 4819 | 4631 |
| 4820 void Analysis::EnsureAnalyzed(RegExpNode* that) { | 4632 void Analysis::EnsureAnalyzed(RegExpNode* that) { |
| 4821 if (that->info()->been_analyzed || that->info()->being_analyzed) | 4633 if (that->info()->been_analyzed || that->info()->being_analyzed) return; |
| 4822 return; | |
| 4823 that->info()->being_analyzed = true; | 4634 that->info()->being_analyzed = true; |
| 4824 that->Accept(this); | 4635 that->Accept(this); |
| 4825 that->info()->being_analyzed = false; | 4636 that->info()->being_analyzed = false; |
| 4826 that->info()->been_analyzed = true; | 4637 that->info()->been_analyzed = true; |
| 4827 } | 4638 } |
| 4828 | 4639 |
| 4829 | 4640 |
| 4830 void Analysis::VisitEnd(EndNode* that) { | 4641 void Analysis::VisitEnd(EndNode* that) { |
| 4831 // nothing to do | 4642 // nothing to do |
| 4832 } | 4643 } |
| (...skipping 127 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4960 RegExpAtom* atom = text.atom(); | 4771 RegExpAtom* atom = text.atom(); |
| 4961 for (intptr_t j = 0; j < atom->length(); j++, offset++) { | 4772 for (intptr_t j = 0; j < atom->length(); j++, offset++) { |
| 4962 if (offset >= bm->length()) { | 4773 if (offset >= bm->length()) { |
| 4963 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 4774 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
| 4964 return; | 4775 return; |
| 4965 } | 4776 } |
| 4966 uint16_t character = atom->data()->At(j); | 4777 uint16_t character = atom->data()->At(j); |
| 4967 if (bm->compiler()->ignore_case()) { | 4778 if (bm->compiler()->ignore_case()) { |
| 4968 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 4779 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 4969 intptr_t length = GetCaseIndependentLetters( | 4780 intptr_t length = GetCaseIndependentLetters( |
| 4970 character, | 4781 character, bm->max_char() == Symbols::kMaxOneCharCodeSymbol, |
| 4971 bm->max_char() == Symbols::kMaxOneCharCodeSymbol, | |
| 4972 chars); | 4782 chars); |
| 4973 for (intptr_t j = 0; j < length; j++) { | 4783 for (intptr_t j = 0; j < length; j++) { |
| 4974 bm->Set(offset, chars[j]); | 4784 bm->Set(offset, chars[j]); |
| 4975 } | 4785 } |
| 4976 } else { | 4786 } else { |
| 4977 if (character <= max_char) bm->Set(offset, character); | 4787 if (character <= max_char) bm->Set(offset, character); |
| 4978 } | 4788 } |
| 4979 } | 4789 } |
| 4980 } else { | 4790 } else { |
| 4981 ASSERT(text.text_type() == TextElement::CHAR_CLASS); | 4791 ASSERT(text.text_type() == TextElement::CHAR_CLASS); |
| 4982 RegExpCharacterClass* char_class = text.char_class(); | 4792 RegExpCharacterClass* char_class = text.char_class(); |
| 4983 ZoneGrowableArray<CharacterRange>* ranges = char_class->ranges(); | 4793 ZoneGrowableArray<CharacterRange>* ranges = char_class->ranges(); |
| 4984 if (char_class->is_negated()) { | 4794 if (char_class->is_negated()) { |
| 4985 bm->SetAll(offset); | 4795 bm->SetAll(offset); |
| 4986 } else { | 4796 } else { |
| 4987 for (intptr_t k = 0; k < ranges->length(); k++) { | 4797 for (intptr_t k = 0; k < ranges->length(); k++) { |
| 4988 CharacterRange& range = (*ranges)[k]; | 4798 CharacterRange& range = (*ranges)[k]; |
| 4989 if (range.from() > max_char) continue; | 4799 if (range.from() > max_char) continue; |
| 4990 intptr_t to = Utils::Minimum(max_char, | 4800 intptr_t to = |
| 4991 static_cast<intptr_t>(range.to())); | 4801 Utils::Minimum(max_char, static_cast<intptr_t>(range.to())); |
| 4992 bm->SetInterval(offset, Interval(range.from(), to)); | 4802 bm->SetInterval(offset, Interval(range.from(), to)); |
| 4993 } | 4803 } |
| 4994 } | 4804 } |
| 4995 offset++; | 4805 offset++; |
| 4996 } | 4806 } |
| 4997 } | 4807 } |
| 4998 if (offset >= bm->length()) { | 4808 if (offset >= bm->length()) { |
| 4999 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 4809 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
| 5000 return; | 4810 return; |
| 5001 } | 4811 } |
| 5002 on_success()->FillInBMInfo(offset, | 4812 on_success()->FillInBMInfo(offset, budget - 1, bm, |
| 5003 budget - 1, | |
| 5004 bm, | |
| 5005 true); // Not at start after a text node. | 4813 true); // Not at start after a text node. |
| 5006 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 4814 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
| 5007 } | 4815 } |
| 5008 | 4816 |
| 5009 | 4817 |
| 5010 RegExpEngine::CompilationResult RegExpEngine::CompileIR( | 4818 RegExpEngine::CompilationResult RegExpEngine::CompileIR( |
| 5011 RegExpCompileData* data, | 4819 RegExpCompileData* data, |
| 5012 const ParsedFunction* parsed_function, | 4820 const ParsedFunction* parsed_function, |
| 5013 const ZoneGrowableArray<const ICData*>& ic_data_array) { | 4821 const ZoneGrowableArray<const ICData*>& ic_data_array) { |
| 5014 ASSERT(!FLAG_interpret_irregexp); | 4822 ASSERT(!FLAG_interpret_irregexp); |
| (...skipping 17 matching lines...) Expand all Loading... |
| 5032 // TODO(zerny): Frequency sampling is currently disabled because of several | 4840 // TODO(zerny): Frequency sampling is currently disabled because of several |
| 5033 // issues. We do not want to store subject strings in the regexp object since | 4841 // issues. We do not want to store subject strings in the regexp object since |
| 5034 // they might be long and we should not prevent their garbage collection. | 4842 // they might be long and we should not prevent their garbage collection. |
| 5035 // Passing them to this function explicitly does not help, since we must | 4843 // Passing them to this function explicitly does not help, since we must |
| 5036 // generate exactly the same IR for both the unoptimizing and optimizing | 4844 // generate exactly the same IR for both the unoptimizing and optimizing |
| 5037 // pipelines (otherwise it gets confused when i.e. deopt id's differ). | 4845 // pipelines (otherwise it gets confused when i.e. deopt id's differ). |
| 5038 // An option would be to store sampling results in the regexp object, but | 4846 // An option would be to store sampling results in the regexp object, but |
| 5039 // I'm not sure the performance gains are relevant enough. | 4847 // I'm not sure the performance gains are relevant enough. |
| 5040 | 4848 |
| 5041 // Wrap the body of the regexp in capture #0. | 4849 // Wrap the body of the regexp in capture #0. |
| 5042 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree, | 4850 RegExpNode* captured_body = |
| 5043 0, | 4851 RegExpCapture::ToNode(data->tree, 0, &compiler, compiler.accept()); |
| 5044 &compiler, | |
| 5045 compiler.accept()); | |
| 5046 | 4852 |
| 5047 RegExpNode* node = captured_body; | 4853 RegExpNode* node = captured_body; |
| 5048 bool is_end_anchored = data->tree->IsAnchoredAtEnd(); | 4854 bool is_end_anchored = data->tree->IsAnchoredAtEnd(); |
| 5049 bool is_start_anchored = data->tree->IsAnchoredAtStart(); | 4855 bool is_start_anchored = data->tree->IsAnchoredAtStart(); |
| 5050 intptr_t max_length = data->tree->max_match(); | 4856 intptr_t max_length = data->tree->max_match(); |
| 5051 if (!is_start_anchored) { | 4857 if (!is_start_anchored) { |
| 5052 // Add a .*? at the beginning, outside the body capture, unless | 4858 // Add a .*? at the beginning, outside the body capture, unless |
| 5053 // this expression is anchored at the beginning. | 4859 // this expression is anchored at the beginning. |
| 5054 RegExpNode* loop_node = | 4860 RegExpNode* loop_node = RegExpQuantifier::ToNode( |
| 5055 RegExpQuantifier::ToNode(0, | 4861 0, RegExpTree::kInfinity, false, new (zone) RegExpCharacterClass('*'), |
| 5056 RegExpTree::kInfinity, | 4862 &compiler, captured_body, data->contains_anchor); |
| 5057 false, | |
| 5058 new(zone) RegExpCharacterClass('*'), | |
| 5059 &compiler, | |
| 5060 captured_body, | |
| 5061 data->contains_anchor); | |
| 5062 | 4863 |
| 5063 if (data->contains_anchor) { | 4864 if (data->contains_anchor) { |
| 5064 // Unroll loop once, to take care of the case that might start | 4865 // Unroll loop once, to take care of the case that might start |
| 5065 // at the start of input. | 4866 // at the start of input. |
| 5066 ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone); | 4867 ChoiceNode* first_step_node = new (zone) ChoiceNode(2, zone); |
| 5067 first_step_node->AddAlternative(GuardedAlternative(captured_body)); | 4868 first_step_node->AddAlternative(GuardedAlternative(captured_body)); |
| 5068 first_step_node->AddAlternative(GuardedAlternative( | 4869 first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode( |
| 5069 new(zone) TextNode( | 4870 new (zone) RegExpCharacterClass('*'), loop_node))); |
| 5070 new(zone) RegExpCharacterClass('*'), loop_node))); | |
| 5071 node = first_step_node; | 4871 node = first_step_node; |
| 5072 } else { | 4872 } else { |
| 5073 node = loop_node; | 4873 node = loop_node; |
| 5074 } | 4874 } |
| 5075 } | 4875 } |
| 5076 if (is_one_byte) { | 4876 if (is_one_byte) { |
| 5077 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); | 4877 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); |
| 5078 // Do it again to propagate the new nodes to places where they were not | 4878 // Do it again to propagate the new nodes to places where they were not |
| 5079 // put because they had not been calculated yet. | 4879 // put because they had not been calculated yet. |
| 5080 if (node != NULL) { | 4880 if (node != NULL) { |
| 5081 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); | 4881 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); |
| 5082 } | 4882 } |
| 5083 } | 4883 } |
| 5084 | 4884 |
| 5085 if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone); | 4885 if (node == NULL) node = new (zone) EndNode(EndNode::BACKTRACK, zone); |
| 5086 data->node = node; | 4886 data->node = node; |
| 5087 Analysis analysis(ignore_case, is_one_byte); | 4887 Analysis analysis(ignore_case, is_one_byte); |
| 5088 analysis.EnsureAnalyzed(node); | 4888 analysis.EnsureAnalyzed(node); |
| 5089 if (analysis.has_failed()) { | 4889 if (analysis.has_failed()) { |
| 5090 const char* error_message = analysis.error_message(); | 4890 const char* error_message = analysis.error_message(); |
| 5091 return CompilationResult(error_message); | 4891 return CompilationResult(error_message); |
| 5092 } | 4892 } |
| 5093 | 4893 |
| 5094 // Native regexp implementation. | 4894 // Native regexp implementation. |
| 5095 | 4895 |
| 5096 IRRegExpMacroAssembler* macro_assembler = | 4896 IRRegExpMacroAssembler* macro_assembler = |
| 5097 new(zone) IRRegExpMacroAssembler(specialization_cid, | 4897 new (zone) IRRegExpMacroAssembler(specialization_cid, data->capture_count, |
| 5098 data->capture_count, | 4898 parsed_function, ic_data_array, zone); |
| 5099 parsed_function, | |
| 5100 ic_data_array, | |
| 5101 zone); | |
| 5102 | 4899 |
| 5103 // Inserted here, instead of in Assembler, because it depends on information | 4900 // Inserted here, instead of in Assembler, because it depends on information |
| 5104 // in the AST that isn't replicated in the Node structure. | 4901 // in the AST that isn't replicated in the Node structure. |
| 5105 static const intptr_t kMaxBacksearchLimit = 1024; | 4902 static const intptr_t kMaxBacksearchLimit = 1024; |
| 5106 if (is_end_anchored && | 4903 if (is_end_anchored && !is_start_anchored && |
| 5107 !is_start_anchored && | |
| 5108 max_length < kMaxBacksearchLimit) { | 4904 max_length < kMaxBacksearchLimit) { |
| 5109 macro_assembler->SetCurrentPositionFromEnd(max_length); | 4905 macro_assembler->SetCurrentPositionFromEnd(max_length); |
| 5110 } | 4906 } |
| 5111 | 4907 |
| 5112 if (is_global) { | 4908 if (is_global) { |
| 5113 macro_assembler->set_global_mode( | 4909 macro_assembler->set_global_mode( |
| 5114 (data->tree->min_match() > 0) | 4910 (data->tree->min_match() > 0) |
| 5115 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK | 4911 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK |
| 5116 : RegExpMacroAssembler::GLOBAL); | 4912 : RegExpMacroAssembler::GLOBAL); |
| 5117 } | 4913 } |
| 5118 | 4914 |
| 5119 RegExpEngine::CompilationResult result = | 4915 RegExpEngine::CompilationResult result = |
| 5120 compiler.Assemble(macro_assembler, | 4916 compiler.Assemble(macro_assembler, node, data->capture_count, pattern); |
| 5121 node, | |
| 5122 data->capture_count, | |
| 5123 pattern); | |
| 5124 | 4917 |
| 5125 if (FLAG_trace_irregexp) { | 4918 if (FLAG_trace_irregexp) { |
| 5126 macro_assembler->PrintBlocks(); | 4919 macro_assembler->PrintBlocks(); |
| 5127 } | 4920 } |
| 5128 | 4921 |
| 5129 return result; | 4922 return result; |
| 5130 } | 4923 } |
| 5131 | 4924 |
| 5132 | 4925 |
| 5133 RegExpEngine::CompilationResult RegExpEngine::CompileBytecode( | 4926 RegExpEngine::CompilationResult RegExpEngine::CompileBytecode( |
| (...skipping 15 matching lines...) Expand all Loading... |
| 5149 // TODO(zerny): Frequency sampling is currently disabled because of several | 4942 // TODO(zerny): Frequency sampling is currently disabled because of several |
| 5150 // issues. We do not want to store subject strings in the regexp object since | 4943 // issues. We do not want to store subject strings in the regexp object since |
| 5151 // they might be long and we should not prevent their garbage collection. | 4944 // they might be long and we should not prevent their garbage collection. |
| 5152 // Passing them to this function explicitly does not help, since we must | 4945 // Passing them to this function explicitly does not help, since we must |
| 5153 // generate exactly the same IR for both the unoptimizing and optimizing | 4946 // generate exactly the same IR for both the unoptimizing and optimizing |
| 5154 // pipelines (otherwise it gets confused when i.e. deopt id's differ). | 4947 // pipelines (otherwise it gets confused when i.e. deopt id's differ). |
| 5155 // An option would be to store sampling results in the regexp object, but | 4948 // An option would be to store sampling results in the regexp object, but |
| 5156 // I'm not sure the performance gains are relevant enough. | 4949 // I'm not sure the performance gains are relevant enough. |
| 5157 | 4950 |
| 5158 // Wrap the body of the regexp in capture #0. | 4951 // Wrap the body of the regexp in capture #0. |
| 5159 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree, | 4952 RegExpNode* captured_body = |
| 5160 0, | 4953 RegExpCapture::ToNode(data->tree, 0, &compiler, compiler.accept()); |
| 5161 &compiler, | |
| 5162 compiler.accept()); | |
| 5163 | 4954 |
| 5164 RegExpNode* node = captured_body; | 4955 RegExpNode* node = captured_body; |
| 5165 bool is_end_anchored = data->tree->IsAnchoredAtEnd(); | 4956 bool is_end_anchored = data->tree->IsAnchoredAtEnd(); |
| 5166 bool is_start_anchored = data->tree->IsAnchoredAtStart(); | 4957 bool is_start_anchored = data->tree->IsAnchoredAtStart(); |
| 5167 intptr_t max_length = data->tree->max_match(); | 4958 intptr_t max_length = data->tree->max_match(); |
| 5168 if (!is_start_anchored) { | 4959 if (!is_start_anchored) { |
| 5169 // Add a .*? at the beginning, outside the body capture, unless | 4960 // Add a .*? at the beginning, outside the body capture, unless |
| 5170 // this expression is anchored at the beginning. | 4961 // this expression is anchored at the beginning. |
| 5171 RegExpNode* loop_node = | 4962 RegExpNode* loop_node = RegExpQuantifier::ToNode( |
| 5172 RegExpQuantifier::ToNode(0, | 4963 0, RegExpTree::kInfinity, false, new (zone) RegExpCharacterClass('*'), |
| 5173 RegExpTree::kInfinity, | 4964 &compiler, captured_body, data->contains_anchor); |
| 5174 false, | |
| 5175 new(zone) RegExpCharacterClass('*'), | |
| 5176 &compiler, | |
| 5177 captured_body, | |
| 5178 data->contains_anchor); | |
| 5179 | 4965 |
| 5180 if (data->contains_anchor) { | 4966 if (data->contains_anchor) { |
| 5181 // Unroll loop once, to take care of the case that might start | 4967 // Unroll loop once, to take care of the case that might start |
| 5182 // at the start of input. | 4968 // at the start of input. |
| 5183 ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone); | 4969 ChoiceNode* first_step_node = new (zone) ChoiceNode(2, zone); |
| 5184 first_step_node->AddAlternative(GuardedAlternative(captured_body)); | 4970 first_step_node->AddAlternative(GuardedAlternative(captured_body)); |
| 5185 first_step_node->AddAlternative(GuardedAlternative( | 4971 first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode( |
| 5186 new(zone) TextNode( | 4972 new (zone) RegExpCharacterClass('*'), loop_node))); |
| 5187 new(zone) RegExpCharacterClass('*'), loop_node))); | |
| 5188 node = first_step_node; | 4973 node = first_step_node; |
| 5189 } else { | 4974 } else { |
| 5190 node = loop_node; | 4975 node = loop_node; |
| 5191 } | 4976 } |
| 5192 } | 4977 } |
| 5193 if (is_one_byte) { | 4978 if (is_one_byte) { |
| 5194 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); | 4979 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); |
| 5195 // Do it again to propagate the new nodes to places where they were not | 4980 // Do it again to propagate the new nodes to places where they were not |
| 5196 // put because they had not been calculated yet. | 4981 // put because they had not been calculated yet. |
| 5197 if (node != NULL) { | 4982 if (node != NULL) { |
| 5198 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); | 4983 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); |
| 5199 } | 4984 } |
| 5200 } | 4985 } |
| 5201 | 4986 |
| 5202 if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone); | 4987 if (node == NULL) node = new (zone) EndNode(EndNode::BACKTRACK, zone); |
| 5203 data->node = node; | 4988 data->node = node; |
| 5204 Analysis analysis(ignore_case, is_one_byte); | 4989 Analysis analysis(ignore_case, is_one_byte); |
| 5205 analysis.EnsureAnalyzed(node); | 4990 analysis.EnsureAnalyzed(node); |
| 5206 if (analysis.has_failed()) { | 4991 if (analysis.has_failed()) { |
| 5207 const char* error_message = analysis.error_message(); | 4992 const char* error_message = analysis.error_message(); |
| 5208 return CompilationResult(error_message); | 4993 return CompilationResult(error_message); |
| 5209 } | 4994 } |
| 5210 | 4995 |
| 5211 // Bytecode regexp implementation. | 4996 // Bytecode regexp implementation. |
| 5212 | 4997 |
| 5213 ZoneGrowableArray<uint8_t> buffer(zone, 1024); | 4998 ZoneGrowableArray<uint8_t> buffer(zone, 1024); |
| 5214 BytecodeRegExpMacroAssembler* macro_assembler = | 4999 BytecodeRegExpMacroAssembler* macro_assembler = |
| 5215 new(zone) BytecodeRegExpMacroAssembler(&buffer, zone); | 5000 new (zone) BytecodeRegExpMacroAssembler(&buffer, zone); |
| 5216 | 5001 |
| 5217 // Inserted here, instead of in Assembler, because it depends on information | 5002 // Inserted here, instead of in Assembler, because it depends on information |
| 5218 // in the AST that isn't replicated in the Node structure. | 5003 // in the AST that isn't replicated in the Node structure. |
| 5219 static const intptr_t kMaxBacksearchLimit = 1024; | 5004 static const intptr_t kMaxBacksearchLimit = 1024; |
| 5220 if (is_end_anchored && | 5005 if (is_end_anchored && !is_start_anchored && |
| 5221 !is_start_anchored && | |
| 5222 max_length < kMaxBacksearchLimit) { | 5006 max_length < kMaxBacksearchLimit) { |
| 5223 macro_assembler->SetCurrentPositionFromEnd(max_length); | 5007 macro_assembler->SetCurrentPositionFromEnd(max_length); |
| 5224 } | 5008 } |
| 5225 | 5009 |
| 5226 if (is_global) { | 5010 if (is_global) { |
| 5227 macro_assembler->set_global_mode( | 5011 macro_assembler->set_global_mode( |
| 5228 (data->tree->min_match() > 0) | 5012 (data->tree->min_match() > 0) |
| 5229 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK | 5013 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK |
| 5230 : RegExpMacroAssembler::GLOBAL); | 5014 : RegExpMacroAssembler::GLOBAL); |
| 5231 } | 5015 } |
| 5232 | 5016 |
| 5233 RegExpEngine::CompilationResult result = | 5017 RegExpEngine::CompilationResult result = |
| 5234 compiler.Assemble(macro_assembler, | 5018 compiler.Assemble(macro_assembler, node, data->capture_count, pattern); |
| 5235 node, | |
| 5236 data->capture_count, | |
| 5237 pattern); | |
| 5238 | 5019 |
| 5239 if (FLAG_trace_irregexp) { | 5020 if (FLAG_trace_irregexp) { |
| 5240 macro_assembler->PrintBlocks(); | 5021 macro_assembler->PrintBlocks(); |
| 5241 } | 5022 } |
| 5242 | 5023 |
| 5243 return result; | 5024 return result; |
| 5244 } | 5025 } |
| 5245 | 5026 |
| 5246 | 5027 |
| 5247 static void CreateSpecializedFunction(Thread* thread, Zone* zone, | 5028 static void CreateSpecializedFunction(Thread* thread, |
| 5029 Zone* zone, |
| 5248 const RegExp& regexp, | 5030 const RegExp& regexp, |
| 5249 intptr_t specialization_cid, | 5031 intptr_t specialization_cid, |
| 5250 const Object& owner) { | 5032 const Object& owner) { |
| 5251 const intptr_t kParamCount = RegExpMacroAssembler::kParamCount; | 5033 const intptr_t kParamCount = RegExpMacroAssembler::kParamCount; |
| 5252 | 5034 |
| 5253 Function& fn = Function::Handle(zone, Function::New( | 5035 Function& fn = |
| 5254 Symbols::ColonMatcher(), | 5036 Function::Handle(zone, Function::New(Symbols::ColonMatcher(), |
| 5255 RawFunction::kIrregexpFunction, | 5037 RawFunction::kIrregexpFunction, |
| 5256 true, // Static. | 5038 true, // Static. |
| 5257 false, // Not const. | 5039 false, // Not const. |
| 5258 false, // Not abstract. | 5040 false, // Not abstract. |
| 5259 false, // Not external. | 5041 false, // Not external. |
| 5260 false, // Not native. | 5042 false, // Not native. |
| 5261 owner, | 5043 owner, TokenPosition::kMinSource)); |
| 5262 TokenPosition::kMinSource)); | |
| 5263 | 5044 |
| 5264 // TODO(zerny): Share these arrays between all irregexp functions. | 5045 // TODO(zerny): Share these arrays between all irregexp functions. |
| 5265 fn.set_num_fixed_parameters(kParamCount); | 5046 fn.set_num_fixed_parameters(kParamCount); |
| 5266 fn.set_parameter_types(Array::Handle(zone, Array::New(kParamCount, | 5047 fn.set_parameter_types( |
| 5267 Heap::kOld))); | 5048 Array::Handle(zone, Array::New(kParamCount, Heap::kOld))); |
| 5268 fn.set_parameter_names(Array::Handle(zone, Array::New(kParamCount, | 5049 fn.set_parameter_names( |
| 5269 Heap::kOld))); | 5050 Array::Handle(zone, Array::New(kParamCount, Heap::kOld))); |
| 5270 fn.SetParameterTypeAt(RegExpMacroAssembler::kParamRegExpIndex, | 5051 fn.SetParameterTypeAt(RegExpMacroAssembler::kParamRegExpIndex, |
| 5271 Object::dynamic_type()); | 5052 Object::dynamic_type()); |
| 5272 fn.SetParameterNameAt(RegExpMacroAssembler::kParamRegExpIndex, | 5053 fn.SetParameterNameAt(RegExpMacroAssembler::kParamRegExpIndex, |
| 5273 Symbols::This()); | 5054 Symbols::This()); |
| 5274 fn.SetParameterTypeAt(RegExpMacroAssembler::kParamStringIndex, | 5055 fn.SetParameterTypeAt(RegExpMacroAssembler::kParamStringIndex, |
| 5275 Object::dynamic_type()); | 5056 Object::dynamic_type()); |
| 5276 fn.SetParameterNameAt(RegExpMacroAssembler::kParamStringIndex, | 5057 fn.SetParameterNameAt(RegExpMacroAssembler::kParamStringIndex, |
| 5277 Symbols::string_param()); | 5058 Symbols::string_param()); |
| 5278 fn.SetParameterTypeAt(RegExpMacroAssembler::kParamStartOffsetIndex, | 5059 fn.SetParameterTypeAt(RegExpMacroAssembler::kParamStartOffsetIndex, |
| 5279 Object::dynamic_type()); | 5060 Object::dynamic_type()); |
| (...skipping 23 matching lines...) Expand all Loading... |
| 5303 if (multi_line) { | 5084 if (multi_line) { |
| 5304 regexp.set_is_multi_line(); | 5085 regexp.set_is_multi_line(); |
| 5305 } | 5086 } |
| 5306 if (ignore_case) { | 5087 if (ignore_case) { |
| 5307 regexp.set_is_ignore_case(); | 5088 regexp.set_is_ignore_case(); |
| 5308 } | 5089 } |
| 5309 | 5090 |
| 5310 // TODO(zerny): We might want to use normal string searching algorithms | 5091 // TODO(zerny): We might want to use normal string searching algorithms |
| 5311 // for simple patterns. | 5092 // for simple patterns. |
| 5312 regexp.set_is_complex(); | 5093 regexp.set_is_complex(); |
| 5313 regexp.set_is_global(); // All dart regexps are global. | 5094 regexp.set_is_global(); // All dart regexps are global. |
| 5314 | 5095 |
| 5315 if (!FLAG_interpret_irregexp) { | 5096 if (!FLAG_interpret_irregexp) { |
| 5316 const Library& lib = Library::Handle(zone, Library::CoreLibrary()); | 5097 const Library& lib = Library::Handle(zone, Library::CoreLibrary()); |
| 5317 const Class& owner = Class::Handle(zone, | 5098 const Class& owner = |
| 5318 lib.LookupClass(Symbols::RegExp())); | 5099 Class::Handle(zone, lib.LookupClass(Symbols::RegExp())); |
| 5319 | 5100 |
| 5320 CreateSpecializedFunction(thread, zone, regexp, kOneByteStringCid, owner); | 5101 CreateSpecializedFunction(thread, zone, regexp, kOneByteStringCid, owner); |
| 5321 CreateSpecializedFunction(thread, zone, regexp, kTwoByteStringCid, owner); | 5102 CreateSpecializedFunction(thread, zone, regexp, kTwoByteStringCid, owner); |
| 5322 CreateSpecializedFunction(thread, zone, | 5103 CreateSpecializedFunction(thread, zone, regexp, kExternalOneByteStringCid, |
| 5323 regexp, kExternalOneByteStringCid, owner); | 5104 owner); |
| 5324 CreateSpecializedFunction(thread, zone, | 5105 CreateSpecializedFunction(thread, zone, regexp, kExternalTwoByteStringCid, |
| 5325 regexp, kExternalTwoByteStringCid, owner); | 5106 owner); |
| 5326 } | 5107 } |
| 5327 | 5108 |
| 5328 return regexp.raw(); | 5109 return regexp.raw(); |
| 5329 } | 5110 } |
| 5330 | 5111 |
| 5331 | 5112 |
| 5332 } // namespace dart | 5113 } // namespace dart |
| OLD | NEW |