OLD | NEW |
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 #include "vm/regexp.h" | 5 #include "vm/regexp.h" |
6 | 6 |
7 #include "vm/dart_entry.h" | 7 #include "vm/dart_entry.h" |
8 #include "vm/regexp_assembler.h" | 8 #include "vm/regexp_assembler.h" |
9 #include "vm/regexp_assembler_bytecode.h" | 9 #include "vm/regexp_assembler_bytecode.h" |
10 #include "vm/regexp_assembler_ir.h" | 10 #include "vm/regexp_assembler_ir.h" |
(...skipping 252 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
263 ASSERT((in_character & RegExpMacroAssembler::kTableMask) == in_character); | 263 ASSERT((in_character & RegExpMacroAssembler::kTableMask) == in_character); |
264 if (total_samples_ < 1) return 1; // Division by zero. | 264 if (total_samples_ < 1) return 1; // Division by zero. |
265 intptr_t freq_in_per128 = | 265 intptr_t freq_in_per128 = |
266 (frequencies_[in_character].counter() * 128) / total_samples_; | 266 (frequencies_[in_character].counter() * 128) / total_samples_; |
267 return freq_in_per128; | 267 return freq_in_per128; |
268 } | 268 } |
269 | 269 |
270 private: | 270 private: |
271 class CharacterFrequency { | 271 class CharacterFrequency { |
272 public: | 272 public: |
273 CharacterFrequency() : counter_(0), character_(-1) { } | 273 CharacterFrequency() : counter_(0), character_(-1) {} |
274 explicit CharacterFrequency(intptr_t character) | 274 explicit CharacterFrequency(intptr_t character) |
275 : counter_(0), character_(character) { } | 275 : counter_(0), character_(character) {} |
276 | 276 |
277 void Increment() { counter_++; } | 277 void Increment() { counter_++; } |
278 intptr_t counter() { return counter_; } | 278 intptr_t counter() { return counter_; } |
279 intptr_t character() { return character_; } | 279 intptr_t character() { return character_; } |
280 | 280 |
281 private: | 281 private: |
282 intptr_t counter_; | 282 intptr_t counter_; |
283 intptr_t character_; | 283 intptr_t character_; |
284 | 284 |
285 DISALLOW_ALLOCATION(); | 285 DISALLOW_ALLOCATION(); |
286 }; | 286 }; |
287 | 287 |
288 | 288 |
289 private: | 289 private: |
290 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize]; | 290 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize]; |
291 intptr_t total_samples_; | 291 intptr_t total_samples_; |
292 }; | 292 }; |
293 | 293 |
294 | 294 |
295 class RegExpCompiler : public ValueObject { | 295 class RegExpCompiler : public ValueObject { |
296 public: | 296 public: |
297 RegExpCompiler(intptr_t capture_count, | 297 RegExpCompiler(intptr_t capture_count, bool ignore_case, bool is_one_byte); |
298 bool ignore_case, | |
299 bool is_one_byte); | |
300 | 298 |
301 intptr_t AllocateRegister() { | 299 intptr_t AllocateRegister() { return next_register_++; } |
302 return next_register_++; | |
303 } | |
304 | 300 |
305 RegExpEngine::CompilationResult Assemble( | 301 RegExpEngine::CompilationResult Assemble(IRRegExpMacroAssembler* assembler, |
306 IRRegExpMacroAssembler* assembler, | 302 RegExpNode* start, |
307 RegExpNode* start, | 303 intptr_t capture_count, |
308 intptr_t capture_count, | 304 const String& pattern); |
309 const String& pattern); | |
310 | 305 |
311 RegExpEngine::CompilationResult Assemble( | 306 RegExpEngine::CompilationResult Assemble( |
312 BytecodeRegExpMacroAssembler* assembler, | 307 BytecodeRegExpMacroAssembler* assembler, |
313 RegExpNode* start, | 308 RegExpNode* start, |
314 intptr_t capture_count, | 309 intptr_t capture_count, |
315 const String& pattern); | 310 const String& pattern); |
316 | 311 |
317 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } | 312 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } |
318 | 313 |
319 static const intptr_t kImplementationOffset = 0; | 314 static const intptr_t kImplementationOffset = 0; |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
357 Zone* zone_; | 352 Zone* zone_; |
358 }; | 353 }; |
359 | 354 |
360 | 355 |
361 class RecursionCheck : public ValueObject { | 356 class RecursionCheck : public ValueObject { |
362 public: | 357 public: |
363 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { | 358 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { |
364 compiler->IncrementRecursionDepth(); | 359 compiler->IncrementRecursionDepth(); |
365 } | 360 } |
366 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } | 361 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } |
| 362 |
367 private: | 363 private: |
368 RegExpCompiler* compiler_; | 364 RegExpCompiler* compiler_; |
369 }; | 365 }; |
370 | 366 |
371 | 367 |
372 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { | 368 static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { |
373 return RegExpEngine::CompilationResult("RegExp too big"); | 369 return RegExpEngine::CompilationResult("RegExp too big"); |
374 } | 370 } |
375 | 371 |
376 | 372 |
377 // Attempts to compile the regexp using an Irregexp code generator. Returns | 373 // Attempts to compile the regexp using an Irregexp code generator. Returns |
378 // a fixed array or a null handle depending on whether it succeeded. | 374 // a fixed array or a null handle depending on whether it succeeded. |
379 RegExpCompiler::RegExpCompiler(intptr_t capture_count, | 375 RegExpCompiler::RegExpCompiler(intptr_t capture_count, |
380 bool ignore_case, | 376 bool ignore_case, |
381 bool is_one_byte) | 377 bool is_one_byte) |
382 : next_register_(2 * (capture_count + 1)), | 378 : next_register_(2 * (capture_count + 1)), |
383 work_list_(NULL), | 379 work_list_(NULL), |
384 recursion_depth_(0), | 380 recursion_depth_(0), |
385 ignore_case_(ignore_case), | 381 ignore_case_(ignore_case), |
386 is_one_byte_(is_one_byte), | 382 is_one_byte_(is_one_byte), |
387 reg_exp_too_big_(false), | 383 reg_exp_too_big_(false), |
388 current_expansion_factor_(1), | 384 current_expansion_factor_(1), |
389 zone_(Thread::Current()->zone()) { | 385 zone_(Thread::Current()->zone()) { |
390 accept_ = new(Z) EndNode(EndNode::ACCEPT, Z); | 386 accept_ = new (Z) EndNode(EndNode::ACCEPT, Z); |
391 } | 387 } |
392 | 388 |
393 | 389 |
394 RegExpEngine::CompilationResult RegExpCompiler::Assemble( | 390 RegExpEngine::CompilationResult RegExpCompiler::Assemble( |
395 IRRegExpMacroAssembler* macro_assembler, | 391 IRRegExpMacroAssembler* macro_assembler, |
396 RegExpNode* start, | 392 RegExpNode* start, |
397 intptr_t capture_count, | 393 intptr_t capture_count, |
398 const String& pattern) { | 394 const String& pattern) { |
399 macro_assembler->set_slow_safe(false /* use_slow_safe_regexp_compiler */); | 395 macro_assembler->set_slow_safe(false /* use_slow_safe_regexp_compiler */); |
400 macro_assembler_ = macro_assembler; | 396 macro_assembler_ = macro_assembler; |
401 | 397 |
402 ZoneGrowableArray<RegExpNode*> work_list(0); | 398 ZoneGrowableArray<RegExpNode*> work_list(0); |
403 work_list_ = &work_list; | 399 work_list_ = &work_list; |
404 BlockLabel fail; | 400 BlockLabel fail; |
405 macro_assembler_->PushBacktrack(&fail); | 401 macro_assembler_->PushBacktrack(&fail); |
406 Trace new_trace; | 402 Trace new_trace; |
407 start->Emit(this, &new_trace); | 403 start->Emit(this, &new_trace); |
408 macro_assembler_->BindBlock(&fail); | 404 macro_assembler_->BindBlock(&fail); |
409 macro_assembler_->Fail(); | 405 macro_assembler_->Fail(); |
410 while (!work_list.is_empty()) { | 406 while (!work_list.is_empty()) { |
411 work_list.RemoveLast()->Emit(this, &new_trace); | 407 work_list.RemoveLast()->Emit(this, &new_trace); |
412 } | 408 } |
413 if (reg_exp_too_big_) return IrregexpRegExpTooBig(); | 409 if (reg_exp_too_big_) return IrregexpRegExpTooBig(); |
414 | 410 |
415 macro_assembler->GenerateBacktrackBlock(); | 411 macro_assembler->GenerateBacktrackBlock(); |
416 macro_assembler->FinalizeRegistersArray(); | 412 macro_assembler->FinalizeRegistersArray(); |
417 | 413 |
418 return RegExpEngine::CompilationResult(macro_assembler->backtrack_goto(), | 414 return RegExpEngine::CompilationResult( |
419 macro_assembler->graph_entry(), | 415 macro_assembler->backtrack_goto(), macro_assembler->graph_entry(), |
420 macro_assembler->num_blocks(), | 416 macro_assembler->num_blocks(), macro_assembler->num_stack_locals(), |
421 macro_assembler->num_stack_locals(), | 417 next_register_); |
422 next_register_); | |
423 } | 418 } |
424 | 419 |
425 | 420 |
426 RegExpEngine::CompilationResult RegExpCompiler::Assemble( | 421 RegExpEngine::CompilationResult RegExpCompiler::Assemble( |
427 BytecodeRegExpMacroAssembler* macro_assembler, | 422 BytecodeRegExpMacroAssembler* macro_assembler, |
428 RegExpNode* start, | 423 RegExpNode* start, |
429 intptr_t capture_count, | 424 intptr_t capture_count, |
430 const String& pattern) { | 425 const String& pattern) { |
431 macro_assembler->set_slow_safe(false /* use_slow_safe_regexp_compiler */); | 426 macro_assembler->set_slow_safe(false /* use_slow_safe_regexp_compiler */); |
432 macro_assembler_ = macro_assembler; | 427 macro_assembler_ = macro_assembler; |
(...skipping 20 matching lines...) Expand all Loading... |
453 if (action_type() == ActionNode::CLEAR_CAPTURES) { | 448 if (action_type() == ActionNode::CLEAR_CAPTURES) { |
454 Interval range = static_cast<DeferredClearCaptures*>(this)->range(); | 449 Interval range = static_cast<DeferredClearCaptures*>(this)->range(); |
455 return range.Contains(that); | 450 return range.Contains(that); |
456 } else { | 451 } else { |
457 return reg() == that; | 452 return reg() == that; |
458 } | 453 } |
459 } | 454 } |
460 | 455 |
461 | 456 |
462 bool Trace::mentions_reg(intptr_t reg) { | 457 bool Trace::mentions_reg(intptr_t reg) { |
463 for (DeferredAction* action = actions_; | 458 for (DeferredAction* action = actions_; action != NULL; |
464 action != NULL; | |
465 action = action->next()) { | 459 action = action->next()) { |
466 if (action->Mentions(reg)) | 460 if (action->Mentions(reg)) return true; |
467 return true; | |
468 } | 461 } |
469 return false; | 462 return false; |
470 } | 463 } |
471 | 464 |
472 | 465 |
473 bool Trace::GetStoredPosition(intptr_t reg, intptr_t* cp_offset) { | 466 bool Trace::GetStoredPosition(intptr_t reg, intptr_t* cp_offset) { |
474 ASSERT(*cp_offset == 0); | 467 ASSERT(*cp_offset == 0); |
475 for (DeferredAction* action = actions_; | 468 for (DeferredAction* action = actions_; action != NULL; |
476 action != NULL; | |
477 action = action->next()) { | 469 action = action->next()) { |
478 if (action->Mentions(reg)) { | 470 if (action->Mentions(reg)) { |
479 if (action->action_type() == ActionNode::STORE_POSITION) { | 471 if (action->action_type() == ActionNode::STORE_POSITION) { |
480 *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset(); | 472 *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset(); |
481 return true; | 473 return true; |
482 } else { | 474 } else { |
483 return false; | 475 return false; |
484 } | 476 } |
485 } | 477 } |
486 } | 478 } |
487 return false; | 479 return false; |
488 } | 480 } |
489 | 481 |
490 | 482 |
491 // This is called as we come into a loop choice node and some other tricky | 483 // This is called as we come into a loop choice node and some other tricky |
492 // nodes. It normalizes the state of the code generator to ensure we can | 484 // nodes. It normalizes the state of the code generator to ensure we can |
493 // generate generic code. | 485 // generate generic code. |
494 intptr_t Trace::FindAffectedRegisters(OutSet* affected_registers, | 486 intptr_t Trace::FindAffectedRegisters(OutSet* affected_registers, Zone* zone) { |
495 Zone* zone) { | |
496 intptr_t max_register = RegExpCompiler::kNoRegister; | 487 intptr_t max_register = RegExpCompiler::kNoRegister; |
497 for (DeferredAction* action = actions_; | 488 for (DeferredAction* action = actions_; action != NULL; |
498 action != NULL; | |
499 action = action->next()) { | 489 action = action->next()) { |
500 if (action->action_type() == ActionNode::CLEAR_CAPTURES) { | 490 if (action->action_type() == ActionNode::CLEAR_CAPTURES) { |
501 Interval range = static_cast<DeferredClearCaptures*>(action)->range(); | 491 Interval range = static_cast<DeferredClearCaptures*>(action)->range(); |
502 for (intptr_t i = range.from(); i <= range.to(); i++) | 492 for (intptr_t i = range.from(); i <= range.to(); i++) |
503 affected_registers->Set(i, zone); | 493 affected_registers->Set(i, zone); |
504 if (range.to() > max_register) max_register = range.to(); | 494 if (range.to() > max_register) max_register = range.to(); |
505 } else { | 495 } else { |
506 affected_registers->Set(action->reg(), zone); | 496 affected_registers->Set(action->reg(), zone); |
507 if (action->reg() > max_register) max_register = action->reg(); | 497 if (action->reg() > max_register) max_register = action->reg(); |
508 } | 498 } |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
545 // to its previous state (or not, if it's safe to ignore it). | 535 // to its previous state (or not, if it's safe to ignore it). |
546 enum DeferredActionUndoType { ACTION_IGNORE, ACTION_RESTORE, ACTION_CLEAR }; | 536 enum DeferredActionUndoType { ACTION_IGNORE, ACTION_RESTORE, ACTION_CLEAR }; |
547 DeferredActionUndoType undo_action = ACTION_IGNORE; | 537 DeferredActionUndoType undo_action = ACTION_IGNORE; |
548 | 538 |
549 intptr_t value = 0; | 539 intptr_t value = 0; |
550 bool absolute = false; | 540 bool absolute = false; |
551 bool clear = false; | 541 bool clear = false; |
552 intptr_t store_position = -1; | 542 intptr_t store_position = -1; |
553 // This is a little tricky because we are scanning the actions in reverse | 543 // This is a little tricky because we are scanning the actions in reverse |
554 // historical order (newest first). | 544 // historical order (newest first). |
555 for (DeferredAction* action = actions_; | 545 for (DeferredAction* action = actions_; action != NULL; |
556 action != NULL; | |
557 action = action->next()) { | 546 action = action->next()) { |
558 if (action->Mentions(reg)) { | 547 if (action->Mentions(reg)) { |
559 switch (action->action_type()) { | 548 switch (action->action_type()) { |
560 case ActionNode::SET_REGISTER: { | 549 case ActionNode::SET_REGISTER: { |
561 Trace::DeferredSetRegister* psr = | 550 Trace::DeferredSetRegister* psr = |
562 static_cast<Trace::DeferredSetRegister*>(action); | 551 static_cast<Trace::DeferredSetRegister*>(action); |
563 if (!absolute) { | 552 if (!absolute) { |
564 value += psr->value(); | 553 value += psr->value(); |
565 absolute = true; | 554 absolute = true; |
566 } | 555 } |
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
670 if (backtrack() != NULL) { | 659 if (backtrack() != NULL) { |
671 // Here we have a concrete backtrack location. These are set up by choice | 660 // Here we have a concrete backtrack location. These are set up by choice |
672 // nodes and so they indicate that we have a deferred save of the current | 661 // nodes and so they indicate that we have a deferred save of the current |
673 // position which we may need to emit here. | 662 // position which we may need to emit here. |
674 assembler->PushCurrentPosition(); | 663 assembler->PushCurrentPosition(); |
675 } | 664 } |
676 Zone* zone = successor->zone(); | 665 Zone* zone = successor->zone(); |
677 intptr_t max_register = FindAffectedRegisters(&affected_registers, zone); | 666 intptr_t max_register = FindAffectedRegisters(&affected_registers, zone); |
678 OutSet registers_to_pop; | 667 OutSet registers_to_pop; |
679 OutSet registers_to_clear; | 668 OutSet registers_to_clear; |
680 PerformDeferredActions(assembler, | 669 PerformDeferredActions(assembler, max_register, affected_registers, |
681 max_register, | 670 ®isters_to_pop, ®isters_to_clear, zone); |
682 affected_registers, | |
683 ®isters_to_pop, | |
684 ®isters_to_clear, | |
685 zone); | |
686 if (cp_offset_ != 0) { | 671 if (cp_offset_ != 0) { |
687 assembler->AdvanceCurrentPosition(cp_offset_); | 672 assembler->AdvanceCurrentPosition(cp_offset_); |
688 } | 673 } |
689 | 674 |
690 // Create a new trivial state and generate the node with that. | 675 // Create a new trivial state and generate the node with that. |
691 BlockLabel undo; | 676 BlockLabel undo; |
692 assembler->PushBacktrack(&undo); | 677 assembler->PushBacktrack(&undo); |
693 Trace new_state; | 678 Trace new_state; |
694 successor->Emit(compiler, &new_state); | 679 successor->Emit(compiler, &new_state); |
695 | 680 |
696 // On backtrack we need to restore state. | 681 // On backtrack we need to restore state. |
697 assembler->BindBlock(&undo); | 682 assembler->BindBlock(&undo); |
698 RestoreAffectedRegisters(assembler, | 683 RestoreAffectedRegisters(assembler, max_register, registers_to_pop, |
699 max_register, | |
700 registers_to_pop, | |
701 registers_to_clear); | 684 registers_to_clear); |
702 if (backtrack() == NULL) { | 685 if (backtrack() == NULL) { |
703 assembler->Backtrack(); | 686 assembler->Backtrack(); |
704 } else { | 687 } else { |
705 assembler->PopCurrentPosition(); | 688 assembler->PopCurrentPosition(); |
706 assembler->GoTo(backtrack()); | 689 assembler->GoTo(backtrack()); |
707 } | 690 } |
708 } | 691 } |
709 | 692 |
710 | 693 |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
753 return; | 736 return; |
754 case NEGATIVE_SUBMATCH_SUCCESS: | 737 case NEGATIVE_SUBMATCH_SUCCESS: |
755 // This case is handled in a different virtual method. | 738 // This case is handled in a different virtual method. |
756 UNREACHABLE(); | 739 UNREACHABLE(); |
757 } | 740 } |
758 UNIMPLEMENTED(); | 741 UNIMPLEMENTED(); |
759 } | 742 } |
760 | 743 |
761 | 744 |
762 void GuardedAlternative::AddGuard(Guard* guard, Zone* zone) { | 745 void GuardedAlternative::AddGuard(Guard* guard, Zone* zone) { |
763 if (guards_ == NULL) | 746 if (guards_ == NULL) guards_ = new (zone) ZoneGrowableArray<Guard*>(1); |
764 guards_ = new(zone) ZoneGrowableArray<Guard*>(1); | |
765 guards_->Add(guard); | 747 guards_->Add(guard); |
766 } | 748 } |
767 | 749 |
768 | 750 |
769 ActionNode* ActionNode::SetRegister(intptr_t reg, | 751 ActionNode* ActionNode::SetRegister(intptr_t reg, |
770 intptr_t val, | 752 intptr_t val, |
771 RegExpNode* on_success) { | 753 RegExpNode* on_success) { |
772 ActionNode* result = | 754 ActionNode* result = |
773 new(on_success->zone()) ActionNode(SET_REGISTER, on_success); | 755 new (on_success->zone()) ActionNode(SET_REGISTER, on_success); |
774 result->data_.u_store_register.reg = reg; | 756 result->data_.u_store_register.reg = reg; |
775 result->data_.u_store_register.value = val; | 757 result->data_.u_store_register.value = val; |
776 return result; | 758 return result; |
777 } | 759 } |
778 | 760 |
779 | 761 |
780 ActionNode* ActionNode::IncrementRegister(intptr_t reg, | 762 ActionNode* ActionNode::IncrementRegister(intptr_t reg, |
781 RegExpNode* on_success) { | 763 RegExpNode* on_success) { |
782 ActionNode* result = | 764 ActionNode* result = |
783 new(on_success->zone()) ActionNode(INCREMENT_REGISTER, on_success); | 765 new (on_success->zone()) ActionNode(INCREMENT_REGISTER, on_success); |
784 result->data_.u_increment_register.reg = reg; | 766 result->data_.u_increment_register.reg = reg; |
785 return result; | 767 return result; |
786 } | 768 } |
787 | 769 |
788 | 770 |
789 ActionNode* ActionNode::StorePosition(intptr_t reg, | 771 ActionNode* ActionNode::StorePosition(intptr_t reg, |
790 bool is_capture, | 772 bool is_capture, |
791 RegExpNode* on_success) { | 773 RegExpNode* on_success) { |
792 ActionNode* result = | 774 ActionNode* result = |
793 new(on_success->zone()) ActionNode(STORE_POSITION, on_success); | 775 new (on_success->zone()) ActionNode(STORE_POSITION, on_success); |
794 result->data_.u_position_register.reg = reg; | 776 result->data_.u_position_register.reg = reg; |
795 result->data_.u_position_register.is_capture = is_capture; | 777 result->data_.u_position_register.is_capture = is_capture; |
796 return result; | 778 return result; |
797 } | 779 } |
798 | 780 |
799 | 781 |
800 ActionNode* ActionNode::ClearCaptures(Interval range, | 782 ActionNode* ActionNode::ClearCaptures(Interval range, RegExpNode* on_success) { |
801 RegExpNode* on_success) { | |
802 ActionNode* result = | 783 ActionNode* result = |
803 new(on_success->zone()) ActionNode(CLEAR_CAPTURES, on_success); | 784 new (on_success->zone()) ActionNode(CLEAR_CAPTURES, on_success); |
804 result->data_.u_clear_captures.range_from = range.from(); | 785 result->data_.u_clear_captures.range_from = range.from(); |
805 result->data_.u_clear_captures.range_to = range.to(); | 786 result->data_.u_clear_captures.range_to = range.to(); |
806 return result; | 787 return result; |
807 } | 788 } |
808 | 789 |
809 | 790 |
810 ActionNode* ActionNode::BeginSubmatch(intptr_t stack_reg, | 791 ActionNode* ActionNode::BeginSubmatch(intptr_t stack_reg, |
811 intptr_t position_reg, | 792 intptr_t position_reg, |
812 RegExpNode* on_success) { | 793 RegExpNode* on_success) { |
813 ActionNode* result = | 794 ActionNode* result = |
814 new(on_success->zone()) ActionNode(BEGIN_SUBMATCH, on_success); | 795 new (on_success->zone()) ActionNode(BEGIN_SUBMATCH, on_success); |
815 result->data_.u_submatch.stack_pointer_register = stack_reg; | 796 result->data_.u_submatch.stack_pointer_register = stack_reg; |
816 result->data_.u_submatch.current_position_register = position_reg; | 797 result->data_.u_submatch.current_position_register = position_reg; |
817 return result; | 798 return result; |
818 } | 799 } |
819 | 800 |
820 | 801 |
821 ActionNode* ActionNode::PositiveSubmatchSuccess(intptr_t stack_reg, | 802 ActionNode* ActionNode::PositiveSubmatchSuccess(intptr_t stack_reg, |
822 intptr_t position_reg, | 803 intptr_t position_reg, |
823 intptr_t clear_register_count, | 804 intptr_t clear_register_count, |
824 intptr_t clear_register_from, | 805 intptr_t clear_register_from, |
825 RegExpNode* on_success) { | 806 RegExpNode* on_success) { |
826 ActionNode* result = | 807 ActionNode* result = new (on_success->zone()) |
827 new(on_success->zone()) ActionNode(POSITIVE_SUBMATCH_SUCCESS, | 808 ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success); |
828 on_success); | |
829 result->data_.u_submatch.stack_pointer_register = stack_reg; | 809 result->data_.u_submatch.stack_pointer_register = stack_reg; |
830 result->data_.u_submatch.current_position_register = position_reg; | 810 result->data_.u_submatch.current_position_register = position_reg; |
831 result->data_.u_submatch.clear_register_count = clear_register_count; | 811 result->data_.u_submatch.clear_register_count = clear_register_count; |
832 result->data_.u_submatch.clear_register_from = clear_register_from; | 812 result->data_.u_submatch.clear_register_from = clear_register_from; |
833 return result; | 813 return result; |
834 } | 814 } |
835 | 815 |
836 | 816 |
837 ActionNode* ActionNode::EmptyMatchCheck(intptr_t start_register, | 817 ActionNode* ActionNode::EmptyMatchCheck(intptr_t start_register, |
838 intptr_t repetition_register, | 818 intptr_t repetition_register, |
839 intptr_t repetition_limit, | 819 intptr_t repetition_limit, |
840 RegExpNode* on_success) { | 820 RegExpNode* on_success) { |
841 ActionNode* result = | 821 ActionNode* result = |
842 new(on_success->zone()) ActionNode(EMPTY_MATCH_CHECK, on_success); | 822 new (on_success->zone()) ActionNode(EMPTY_MATCH_CHECK, on_success); |
843 result->data_.u_empty_match_check.start_register = start_register; | 823 result->data_.u_empty_match_check.start_register = start_register; |
844 result->data_.u_empty_match_check.repetition_register = repetition_register; | 824 result->data_.u_empty_match_check.repetition_register = repetition_register; |
845 result->data_.u_empty_match_check.repetition_limit = repetition_limit; | 825 result->data_.u_empty_match_check.repetition_limit = repetition_limit; |
846 return result; | 826 return result; |
847 } | 827 } |
848 | 828 |
849 | 829 |
850 #define DEFINE_ACCEPT(Type) \ | 830 #define DEFINE_ACCEPT(Type) \ |
851 void Type##Node::Accept(NodeVisitor* visitor) { \ | 831 void Type##Node::Accept(NodeVisitor* visitor) { visitor->Visit##Type(this); } |
852 visitor->Visit##Type(this); \ | |
853 } | |
854 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT) | 832 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT) |
855 #undef DEFINE_ACCEPT | 833 #undef DEFINE_ACCEPT |
856 | 834 |
857 | 835 |
858 void LoopChoiceNode::Accept(NodeVisitor* visitor) { | 836 void LoopChoiceNode::Accept(NodeVisitor* visitor) { |
859 visitor->VisitLoopChoice(this); | 837 visitor->VisitLoopChoice(this); |
860 } | 838 } |
861 | 839 |
862 | 840 |
863 // ------------------------------------------------------------------- | 841 // ------------------------------------------------------------------- |
864 // Emit code. | 842 // Emit code. |
865 | 843 |
866 | 844 |
867 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler, | 845 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler, |
868 Guard* guard, | 846 Guard* guard, |
869 Trace* trace) { | 847 Trace* trace) { |
870 switch (guard->op()) { | 848 switch (guard->op()) { |
871 case Guard::LT: | 849 case Guard::LT: |
872 ASSERT(!trace->mentions_reg(guard->reg())); | 850 ASSERT(!trace->mentions_reg(guard->reg())); |
873 macro_assembler->IfRegisterGE(guard->reg(), | 851 macro_assembler->IfRegisterGE(guard->reg(), guard->value(), |
874 guard->value(), | |
875 trace->backtrack()); | 852 trace->backtrack()); |
876 break; | 853 break; |
877 case Guard::GEQ: | 854 case Guard::GEQ: |
878 ASSERT(!trace->mentions_reg(guard->reg())); | 855 ASSERT(!trace->mentions_reg(guard->reg())); |
879 macro_assembler->IfRegisterLT(guard->reg(), | 856 macro_assembler->IfRegisterLT(guard->reg(), guard->value(), |
880 guard->value(), | |
881 trace->backtrack()); | 857 trace->backtrack()); |
882 break; | 858 break; |
883 } | 859 } |
884 } | 860 } |
885 | 861 |
886 | 862 |
887 // Returns the number of characters in the equivalence class, omitting those | 863 // Returns the number of characters in the equivalence class, omitting those |
888 // that cannot occur in the source string because it is ASCII. | 864 // that cannot occur in the source string because it is ASCII. |
889 static intptr_t GetCaseIndependentLetters(uint16_t character, | 865 static intptr_t GetCaseIndependentLetters(uint16_t character, |
890 bool one_byte_subject, | 866 bool one_byte_subject, |
(...skipping 21 matching lines...) Expand all Loading... |
912 static inline bool EmitSimpleCharacter(Zone* zone, | 888 static inline bool EmitSimpleCharacter(Zone* zone, |
913 RegExpCompiler* compiler, | 889 RegExpCompiler* compiler, |
914 uint16_t c, | 890 uint16_t c, |
915 BlockLabel* on_failure, | 891 BlockLabel* on_failure, |
916 intptr_t cp_offset, | 892 intptr_t cp_offset, |
917 bool check, | 893 bool check, |
918 bool preloaded) { | 894 bool preloaded) { |
919 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 895 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
920 bool bound_checked = false; | 896 bool bound_checked = false; |
921 if (!preloaded) { | 897 if (!preloaded) { |
922 assembler->LoadCurrentCharacter( | 898 assembler->LoadCurrentCharacter(cp_offset, on_failure, check); |
923 cp_offset, | |
924 on_failure, | |
925 check); | |
926 bound_checked = true; | 899 bound_checked = true; |
927 } | 900 } |
928 assembler->CheckNotCharacter(c, on_failure); | 901 assembler->CheckNotCharacter(c, on_failure); |
929 return bound_checked; | 902 return bound_checked; |
930 } | 903 } |
931 | 904 |
932 | 905 |
933 // Only emits non-letters (things that don't have case). Only used for case | 906 // Only emits non-letters (things that don't have case). Only used for case |
934 // independent matches. | 907 // independent matches. |
935 static inline bool EmitAtomNonLetter(Zone* zone, | 908 static inline bool EmitAtomNonLetter(Zone* zone, |
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
988 return true; | 961 return true; |
989 } | 962 } |
990 ASSERT(c2 > c1); | 963 ASSERT(c2 > c1); |
991 uint16_t diff = c2 - c1; | 964 uint16_t diff = c2 - c1; |
992 if (((diff - 1) & diff) == 0 && c1 >= diff) { | 965 if (((diff - 1) & diff) == 0 && c1 >= diff) { |
993 // If the characters differ by 2^n but don't differ by one bit then | 966 // If the characters differ by 2^n but don't differ by one bit then |
994 // subtract the difference from the found character, then do the or | 967 // subtract the difference from the found character, then do the or |
995 // trick. We avoid the theoretical case where negative numbers are | 968 // trick. We avoid the theoretical case where negative numbers are |
996 // involved in order to simplify code generation. | 969 // involved in order to simplify code generation. |
997 uint16_t mask = char_mask ^ diff; | 970 uint16_t mask = char_mask ^ diff; |
998 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, | 971 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, diff, mask, |
999 diff, | |
1000 mask, | |
1001 on_failure); | 972 on_failure); |
1002 return true; | 973 return true; |
1003 } | 974 } |
1004 return false; | 975 return false; |
1005 } | 976 } |
1006 | 977 |
1007 | 978 |
1008 typedef bool EmitCharacterFunction(Zone* zone, | 979 typedef bool EmitCharacterFunction(Zone* zone, |
1009 RegExpCompiler* compiler, | 980 RegExpCompiler* compiler, |
1010 uint16_t c, | 981 uint16_t c, |
(...skipping 18 matching lines...) Expand all Loading... |
1029 if (length <= 1) return false; | 1000 if (length <= 1) return false; |
1030 // We may not need to check against the end of the input string | 1001 // We may not need to check against the end of the input string |
1031 // if this character lies before a character that matched. | 1002 // if this character lies before a character that matched. |
1032 if (!preloaded) { | 1003 if (!preloaded) { |
1033 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); | 1004 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); |
1034 } | 1005 } |
1035 BlockLabel ok; | 1006 BlockLabel ok; |
1036 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); | 1007 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); |
1037 switch (length) { | 1008 switch (length) { |
1038 case 2: { | 1009 case 2: { |
1039 if (ShortCutEmitCharacterPair(macro_assembler, | 1010 if (ShortCutEmitCharacterPair(macro_assembler, one_byte, chars[0], |
1040 one_byte, | 1011 chars[1], on_failure)) { |
1041 chars[0], | |
1042 chars[1], | |
1043 on_failure)) { | |
1044 } else { | 1012 } else { |
1045 macro_assembler->CheckCharacter(chars[0], &ok); | 1013 macro_assembler->CheckCharacter(chars[0], &ok); |
1046 macro_assembler->CheckNotCharacter(chars[1], on_failure); | 1014 macro_assembler->CheckNotCharacter(chars[1], on_failure); |
1047 macro_assembler->BindBlock(&ok); | 1015 macro_assembler->BindBlock(&ok); |
1048 } | 1016 } |
1049 break; | 1017 break; |
1050 } | 1018 } |
1051 case 4: | 1019 case 4: |
1052 macro_assembler->CheckCharacter(chars[3], &ok); | 1020 macro_assembler->CheckCharacter(chars[3], &ok); |
1053 // Fall through! | 1021 // Fall through! |
1054 case 3: | 1022 case 3: |
1055 macro_assembler->CheckCharacter(chars[0], &ok); | 1023 macro_assembler->CheckCharacter(chars[0], &ok); |
1056 macro_assembler->CheckCharacter(chars[1], &ok); | 1024 macro_assembler->CheckCharacter(chars[1], &ok); |
1057 macro_assembler->CheckNotCharacter(chars[2], on_failure); | 1025 macro_assembler->CheckNotCharacter(chars[2], on_failure); |
1058 macro_assembler->BindBlock(&ok); | 1026 macro_assembler->BindBlock(&ok); |
1059 break; | 1027 break; |
1060 default: | 1028 default: |
1061 UNREACHABLE(); | 1029 UNREACHABLE(); |
1062 break; | 1030 break; |
1063 } | 1031 } |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1097 } else { | 1065 } else { |
1098 masm->CheckCharacterInRange(first, last, in_range); | 1066 masm->CheckCharacterInRange(first, last, in_range); |
1099 } | 1067 } |
1100 if (out_of_range != fall_through) masm->GoTo(out_of_range); | 1068 if (out_of_range != fall_through) masm->GoTo(out_of_range); |
1101 } | 1069 } |
1102 } | 1070 } |
1103 | 1071 |
1104 | 1072 |
1105 // even_label is for ranges[i] to ranges[i + 1] where i - start_index is even. | 1073 // even_label is for ranges[i] to ranges[i + 1] where i - start_index is even. |
1106 // odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd. | 1074 // odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd. |
1107 static void EmitUseLookupTable( | 1075 static void EmitUseLookupTable(RegExpMacroAssembler* masm, |
1108 RegExpMacroAssembler* masm, | 1076 ZoneGrowableArray<int>* ranges, |
1109 ZoneGrowableArray<int>* ranges, | 1077 intptr_t start_index, |
1110 intptr_t start_index, | 1078 intptr_t end_index, |
1111 intptr_t end_index, | 1079 intptr_t min_char, |
1112 intptr_t min_char, | 1080 BlockLabel* fall_through, |
1113 BlockLabel* fall_through, | 1081 BlockLabel* even_label, |
1114 BlockLabel* even_label, | 1082 BlockLabel* odd_label) { |
1115 BlockLabel* odd_label) { | |
1116 static const intptr_t kSize = RegExpMacroAssembler::kTableSize; | 1083 static const intptr_t kSize = RegExpMacroAssembler::kTableSize; |
1117 static const intptr_t kMask = RegExpMacroAssembler::kTableMask; | 1084 static const intptr_t kMask = RegExpMacroAssembler::kTableMask; |
1118 | 1085 |
1119 intptr_t base = (min_char & ~kMask); | 1086 intptr_t base = (min_char & ~kMask); |
1120 | 1087 |
1121 // Assert that everything is on one kTableSize page. | 1088 // Assert that everything is on one kTableSize page. |
1122 for (intptr_t i = start_index; i <= end_index; i++) { | 1089 for (intptr_t i = start_index; i <= end_index; i++) { |
1123 ASSERT((ranges->At(i) & ~kMask) == base); | 1090 ASSERT((ranges->At(i) & ~kMask) == base); |
1124 } | 1091 } |
1125 ASSERT(start_index == 0 || (ranges->At(start_index - 1) & ~kMask) <= base); | 1092 ASSERT(start_index == 0 || (ranges->At(start_index - 1) & ~kMask) <= base); |
(...skipping 21 matching lines...) Expand all Loading... |
1147 for (j = (ranges->At(i) & kMask); j < (ranges->At(i + 1) & kMask); j++) { | 1114 for (j = (ranges->At(i) & kMask); j < (ranges->At(i + 1) & kMask); j++) { |
1148 templ[j] = bit; | 1115 templ[j] = bit; |
1149 } | 1116 } |
1150 bit ^= 1; | 1117 bit ^= 1; |
1151 } | 1118 } |
1152 for (intptr_t i = j; i < kSize; i++) { | 1119 for (intptr_t i = j; i < kSize; i++) { |
1153 templ[i] = bit; | 1120 templ[i] = bit; |
1154 } | 1121 } |
1155 // TODO(erikcorry): Cache these. | 1122 // TODO(erikcorry): Cache these. |
1156 const TypedData& ba = TypedData::ZoneHandle( | 1123 const TypedData& ba = TypedData::ZoneHandle( |
1157 masm->zone(), | 1124 masm->zone(), TypedData::New(kTypedDataUint8ArrayCid, kSize, Heap::kOld)); |
1158 TypedData::New(kTypedDataUint8ArrayCid, kSize, Heap::kOld)); | |
1159 for (intptr_t i = 0; i < kSize; i++) { | 1125 for (intptr_t i = 0; i < kSize; i++) { |
1160 ba.SetUint8(i, templ[i]); | 1126 ba.SetUint8(i, templ[i]); |
1161 } | 1127 } |
1162 masm->CheckBitInTable(ba, on_bit_set); | 1128 masm->CheckBitInTable(ba, on_bit_set); |
1163 if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear); | 1129 if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear); |
1164 } | 1130 } |
1165 | 1131 |
1166 | 1132 |
1167 static void CutOutRange(RegExpMacroAssembler* masm, | 1133 static void CutOutRange(RegExpMacroAssembler* masm, |
1168 ZoneGrowableArray<int>* ranges, | 1134 ZoneGrowableArray<int>* ranges, |
1169 intptr_t start_index, | 1135 intptr_t start_index, |
1170 intptr_t end_index, | 1136 intptr_t end_index, |
1171 intptr_t cut_index, | 1137 intptr_t cut_index, |
1172 BlockLabel* even_label, | 1138 BlockLabel* even_label, |
1173 BlockLabel* odd_label) { | 1139 BlockLabel* odd_label) { |
1174 bool odd = (((cut_index - start_index) & 1) == 1); | 1140 bool odd = (((cut_index - start_index) & 1) == 1); |
1175 BlockLabel* in_range_label = odd ? odd_label : even_label; | 1141 BlockLabel* in_range_label = odd ? odd_label : even_label; |
1176 BlockLabel dummy; | 1142 BlockLabel dummy; |
1177 EmitDoubleBoundaryTest(masm, | 1143 EmitDoubleBoundaryTest(masm, ranges->At(cut_index), |
1178 ranges->At(cut_index), | 1144 ranges->At(cut_index + 1) - 1, &dummy, in_range_label, |
1179 ranges->At(cut_index + 1) - 1, | |
1180 &dummy, | |
1181 in_range_label, | |
1182 &dummy); | 1145 &dummy); |
1183 ASSERT(!dummy.IsLinked()); | 1146 ASSERT(!dummy.IsLinked()); |
1184 // Cut out the single range by rewriting the array. This creates a new | 1147 // Cut out the single range by rewriting the array. This creates a new |
1185 // range that is a merger of the two ranges on either side of the one we | 1148 // range that is a merger of the two ranges on either side of the one we |
1186 // are cutting out. The oddity of the labels is preserved. | 1149 // are cutting out. The oddity of the labels is preserved. |
1187 for (intptr_t j = cut_index; j > start_index; j--) { | 1150 for (intptr_t j = cut_index; j > start_index; j--) { |
1188 (*ranges)[j] = ranges->At(j - 1); | 1151 (*ranges)[j] = ranges->At(j - 1); |
1189 } | 1152 } |
1190 for (intptr_t j = cut_index + 1; j < end_index; j++) { | 1153 for (intptr_t j = cut_index + 1; j < end_index; j++) { |
1191 (*ranges)[j] = ranges->At(j + 1); | 1154 (*ranges)[j] = ranges->At(j + 1); |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1224 // 128-character space can take up a lot of space in the ranges array if, | 1187 // 128-character space can take up a lot of space in the ranges array if, |
1225 // for example, we only want to match every second character (eg. the lower | 1188 // for example, we only want to match every second character (eg. the lower |
1226 // case characters on some Unicode pages). | 1189 // case characters on some Unicode pages). |
1227 intptr_t binary_chop_index = (end_index + start_index) / 2; | 1190 intptr_t binary_chop_index = (end_index + start_index) / 2; |
1228 // The first test ensures that we get to the code that handles the Latin1 | 1191 // The first test ensures that we get to the code that handles the Latin1 |
1229 // range with a single not-taken branch, speeding up this important | 1192 // range with a single not-taken branch, speeding up this important |
1230 // character range (even non-Latin1 charset-based text has spaces and | 1193 // character range (even non-Latin1 charset-based text has spaces and |
1231 // punctuation). | 1194 // punctuation). |
1232 if (*border - 1 > Symbols::kMaxOneCharCodeSymbol && // Latin1 case. | 1195 if (*border - 1 > Symbols::kMaxOneCharCodeSymbol && // Latin1 case. |
1233 end_index - start_index > (*new_start_index - start_index) * 2 && | 1196 end_index - start_index > (*new_start_index - start_index) * 2 && |
1234 last - first > kSize * 2 && | 1197 last - first > kSize * 2 && binary_chop_index > *new_start_index && |
1235 binary_chop_index > *new_start_index && | |
1236 ranges->At(binary_chop_index) >= first + 2 * kSize) { | 1198 ranges->At(binary_chop_index) >= first + 2 * kSize) { |
1237 intptr_t scan_forward_for_section_border = binary_chop_index; | 1199 intptr_t scan_forward_for_section_border = binary_chop_index; |
1238 intptr_t new_border = (ranges->At(binary_chop_index) | kMask) + 1; | 1200 intptr_t new_border = (ranges->At(binary_chop_index) | kMask) + 1; |
1239 | 1201 |
1240 while (scan_forward_for_section_border < end_index) { | 1202 while (scan_forward_for_section_border < end_index) { |
1241 if (ranges->At(scan_forward_for_section_border) > new_border) { | 1203 if (ranges->At(scan_forward_for_section_border) > new_border) { |
1242 *new_start_index = scan_forward_for_section_border; | 1204 *new_start_index = scan_forward_for_section_border; |
1243 *border = new_border; | 1205 *border = new_border; |
1244 break; | 1206 break; |
1245 } | 1207 } |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1283 // Just need to test if the character is before or on-or-after | 1245 // Just need to test if the character is before or on-or-after |
1284 // a particular character. | 1246 // a particular character. |
1285 if (start_index == end_index) { | 1247 if (start_index == end_index) { |
1286 EmitBoundaryTest(masm, first, fall_through, even_label, odd_label); | 1248 EmitBoundaryTest(masm, first, fall_through, even_label, odd_label); |
1287 return; | 1249 return; |
1288 } | 1250 } |
1289 | 1251 |
1290 // Another almost trivial case: There is one interval in the middle that is | 1252 // Another almost trivial case: There is one interval in the middle that is |
1291 // different from the end intervals. | 1253 // different from the end intervals. |
1292 if (start_index + 1 == end_index) { | 1254 if (start_index + 1 == end_index) { |
1293 EmitDoubleBoundaryTest( | 1255 EmitDoubleBoundaryTest(masm, first, last, fall_through, even_label, |
1294 masm, first, last, fall_through, even_label, odd_label); | 1256 odd_label); |
1295 return; | 1257 return; |
1296 } | 1258 } |
1297 | 1259 |
1298 // It's not worth using table lookup if there are very few intervals in the | 1260 // It's not worth using table lookup if there are very few intervals in the |
1299 // character class. | 1261 // character class. |
1300 if (end_index - start_index <= 6) { | 1262 if (end_index - start_index <= 6) { |
1301 // It is faster to test for individual characters, so we look for those | 1263 // It is faster to test for individual characters, so we look for those |
1302 // first, then try arbitrary ranges in the second round. | 1264 // first, then try arbitrary ranges in the second round. |
1303 static intptr_t kNoCutIndex = -1; | 1265 static intptr_t kNoCutIndex = -1; |
1304 intptr_t cut = kNoCutIndex; | 1266 intptr_t cut = kNoCutIndex; |
1305 for (intptr_t i = start_index; i < end_index; i++) { | 1267 for (intptr_t i = start_index; i < end_index; i++) { |
1306 if (ranges->At(i) == ranges->At(i + 1) - 1) { | 1268 if (ranges->At(i) == ranges->At(i + 1) - 1) { |
1307 cut = i; | 1269 cut = i; |
1308 break; | 1270 break; |
1309 } | 1271 } |
1310 } | 1272 } |
1311 if (cut == kNoCutIndex) cut = start_index; | 1273 if (cut == kNoCutIndex) cut = start_index; |
1312 CutOutRange( | 1274 CutOutRange(masm, ranges, start_index, end_index, cut, even_label, |
1313 masm, ranges, start_index, end_index, cut, even_label, odd_label); | 1275 odd_label); |
1314 ASSERT(end_index - start_index >= 2); | 1276 ASSERT(end_index - start_index >= 2); |
1315 GenerateBranches(masm, | 1277 GenerateBranches(masm, ranges, start_index + 1, end_index - 1, min_char, |
1316 ranges, | 1278 max_char, fall_through, even_label, odd_label); |
1317 start_index + 1, | |
1318 end_index - 1, | |
1319 min_char, | |
1320 max_char, | |
1321 fall_through, | |
1322 even_label, | |
1323 odd_label); | |
1324 return; | 1279 return; |
1325 } | 1280 } |
1326 | 1281 |
1327 // If there are a lot of intervals in the regexp, then we will use tables to | 1282 // If there are a lot of intervals in the regexp, then we will use tables to |
1328 // determine whether the character is inside or outside the character class. | 1283 // determine whether the character is inside or outside the character class. |
1329 static const intptr_t kBits = RegExpMacroAssembler::kTableSizeBits; | 1284 static const intptr_t kBits = RegExpMacroAssembler::kTableSizeBits; |
1330 | 1285 |
1331 if ((max_char >> kBits) == (min_char >> kBits)) { | 1286 if ((max_char >> kBits) == (min_char >> kBits)) { |
1332 EmitUseLookupTable(masm, | 1287 EmitUseLookupTable(masm, ranges, start_index, end_index, min_char, |
1333 ranges, | 1288 fall_through, even_label, odd_label); |
1334 start_index, | |
1335 end_index, | |
1336 min_char, | |
1337 fall_through, | |
1338 even_label, | |
1339 odd_label); | |
1340 return; | 1289 return; |
1341 } | 1290 } |
1342 | 1291 |
1343 if ((min_char >> kBits) != (first >> kBits)) { | 1292 if ((min_char >> kBits) != (first >> kBits)) { |
1344 masm->CheckCharacterLT(first, odd_label); | 1293 masm->CheckCharacterLT(first, odd_label); |
1345 GenerateBranches(masm, | 1294 GenerateBranches(masm, ranges, start_index + 1, end_index, first, max_char, |
1346 ranges, | 1295 fall_through, odd_label, even_label); |
1347 start_index + 1, | |
1348 end_index, | |
1349 first, | |
1350 max_char, | |
1351 fall_through, | |
1352 odd_label, | |
1353 even_label); | |
1354 return; | 1296 return; |
1355 } | 1297 } |
1356 | 1298 |
1357 intptr_t new_start_index = 0; | 1299 intptr_t new_start_index = 0; |
1358 intptr_t new_end_index = 0; | 1300 intptr_t new_end_index = 0; |
1359 intptr_t border = 0; | 1301 intptr_t border = 0; |
1360 | 1302 |
1361 SplitSearchSpace(ranges, | 1303 SplitSearchSpace(ranges, start_index, end_index, &new_start_index, |
1362 start_index, | 1304 &new_end_index, &border); |
1363 end_index, | |
1364 &new_start_index, | |
1365 &new_end_index, | |
1366 &border); | |
1367 | 1305 |
1368 BlockLabel handle_rest; | 1306 BlockLabel handle_rest; |
1369 BlockLabel* above = &handle_rest; | 1307 BlockLabel* above = &handle_rest; |
1370 if (border == last + 1) { | 1308 if (border == last + 1) { |
1371 // We didn't find any section that started after the limit, so everything | 1309 // We didn't find any section that started after the limit, so everything |
1372 // above the border is one of the terminal labels. | 1310 // above the border is one of the terminal labels. |
1373 above = (end_index & 1) != (start_index & 1) ? odd_label : even_label; | 1311 above = (end_index & 1) != (start_index & 1) ? odd_label : even_label; |
1374 ASSERT(new_end_index == end_index - 1); | 1312 ASSERT(new_end_index == end_index - 1); |
1375 } | 1313 } |
1376 | 1314 |
1377 ASSERT(start_index <= new_end_index); | 1315 ASSERT(start_index <= new_end_index); |
1378 ASSERT(new_start_index <= end_index); | 1316 ASSERT(new_start_index <= end_index); |
1379 ASSERT(start_index < new_start_index); | 1317 ASSERT(start_index < new_start_index); |
1380 ASSERT(new_end_index < end_index); | 1318 ASSERT(new_end_index < end_index); |
1381 ASSERT(new_end_index + 1 == new_start_index || | 1319 ASSERT(new_end_index + 1 == new_start_index || |
1382 (new_end_index + 2 == new_start_index && | 1320 (new_end_index + 2 == new_start_index && |
1383 border == ranges->At(new_end_index + 1))); | 1321 border == ranges->At(new_end_index + 1))); |
1384 ASSERT(min_char < border - 1); | 1322 ASSERT(min_char < border - 1); |
1385 ASSERT(border < max_char); | 1323 ASSERT(border < max_char); |
1386 ASSERT(ranges->At(new_end_index) < border); | 1324 ASSERT(ranges->At(new_end_index) < border); |
1387 ASSERT(border < ranges->At(new_start_index) || | 1325 ASSERT(border < ranges->At(new_start_index) || |
1388 (border == ranges->At(new_start_index) && | 1326 (border == ranges->At(new_start_index) && |
1389 new_start_index == end_index && | 1327 new_start_index == end_index && new_end_index == end_index - 1 && |
1390 new_end_index == end_index - 1 && | |
1391 border == last + 1)); | 1328 border == last + 1)); |
1392 ASSERT(new_start_index == 0 || border >= ranges->At(new_start_index - 1)); | 1329 ASSERT(new_start_index == 0 || border >= ranges->At(new_start_index - 1)); |
1393 | 1330 |
1394 masm->CheckCharacterGT(border - 1, above); | 1331 masm->CheckCharacterGT(border - 1, above); |
1395 BlockLabel dummy; | 1332 BlockLabel dummy; |
1396 GenerateBranches(masm, | 1333 GenerateBranches(masm, ranges, start_index, new_end_index, min_char, |
1397 ranges, | 1334 border - 1, &dummy, even_label, odd_label); |
1398 start_index, | |
1399 new_end_index, | |
1400 min_char, | |
1401 border - 1, | |
1402 &dummy, | |
1403 even_label, | |
1404 odd_label); | |
1405 | 1335 |
1406 if (handle_rest.IsLinked()) { | 1336 if (handle_rest.IsLinked()) { |
1407 masm->BindBlock(&handle_rest); | 1337 masm->BindBlock(&handle_rest); |
1408 bool flip = (new_start_index & 1) != (start_index & 1); | 1338 bool flip = (new_start_index & 1) != (start_index & 1); |
1409 GenerateBranches(masm, | 1339 GenerateBranches(masm, ranges, new_start_index, end_index, border, max_char, |
1410 ranges, | 1340 &dummy, flip ? odd_label : even_label, |
1411 new_start_index, | |
1412 end_index, | |
1413 border, | |
1414 max_char, | |
1415 &dummy, | |
1416 flip ? odd_label : even_label, | |
1417 flip ? even_label : odd_label); | 1341 flip ? even_label : odd_label); |
1418 } | 1342 } |
1419 } | 1343 } |
1420 | 1344 |
1421 | 1345 |
1422 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, | 1346 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, |
1423 RegExpCharacterClass* cc, | 1347 RegExpCharacterClass* cc, |
1424 bool one_byte, | 1348 bool one_byte, |
1425 BlockLabel* on_failure, | 1349 BlockLabel* on_failure, |
1426 intptr_t cp_offset, | 1350 intptr_t cp_offset, |
(...skipping 26 matching lines...) Expand all Loading... |
1453 if (last_valid_range < 0) { | 1377 if (last_valid_range < 0) { |
1454 if (!cc->is_negated()) { | 1378 if (!cc->is_negated()) { |
1455 macro_assembler->GoTo(on_failure); | 1379 macro_assembler->GoTo(on_failure); |
1456 } | 1380 } |
1457 if (check_offset) { | 1381 if (check_offset) { |
1458 macro_assembler->CheckPosition(cp_offset, on_failure); | 1382 macro_assembler->CheckPosition(cp_offset, on_failure); |
1459 } | 1383 } |
1460 return; | 1384 return; |
1461 } | 1385 } |
1462 | 1386 |
1463 if (last_valid_range == 0 && | 1387 if (last_valid_range == 0 && ranges->At(0).IsEverything(max_char)) { |
1464 ranges->At(0).IsEverything(max_char)) { | |
1465 if (cc->is_negated()) { | 1388 if (cc->is_negated()) { |
1466 macro_assembler->GoTo(on_failure); | 1389 macro_assembler->GoTo(on_failure); |
1467 } else { | 1390 } else { |
1468 // This is a common case hit by non-anchored expressions. | 1391 // This is a common case hit by non-anchored expressions. |
1469 if (check_offset) { | 1392 if (check_offset) { |
1470 macro_assembler->CheckPosition(cp_offset, on_failure); | 1393 macro_assembler->CheckPosition(cp_offset, on_failure); |
1471 } | 1394 } |
1472 } | 1395 } |
1473 return; | 1396 return; |
1474 } | 1397 } |
1475 if (last_valid_range == 0 && | 1398 if (last_valid_range == 0 && !cc->is_negated() && |
1476 !cc->is_negated() && | |
1477 ranges->At(0).IsEverything(max_char)) { | 1399 ranges->At(0).IsEverything(max_char)) { |
1478 // This is a common case hit by non-anchored expressions. | 1400 // This is a common case hit by non-anchored expressions. |
1479 if (check_offset) { | 1401 if (check_offset) { |
1480 macro_assembler->CheckPosition(cp_offset, on_failure); | 1402 macro_assembler->CheckPosition(cp_offset, on_failure); |
1481 } | 1403 } |
1482 return; | 1404 return; |
1483 } | 1405 } |
1484 | 1406 |
1485 if (!preloaded) { | 1407 if (!preloaded) { |
1486 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset); | 1408 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset); |
1487 } | 1409 } |
1488 | 1410 |
1489 if (cc->is_standard() && | 1411 if (cc->is_standard() && |
1490 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(), | 1412 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(), |
1491 on_failure)) { | 1413 on_failure)) { |
1492 return; | 1414 return; |
1493 } | 1415 } |
1494 | 1416 |
1495 | 1417 |
1496 // A new list with ascending entries. Each entry is a code unit | 1418 // A new list with ascending entries. Each entry is a code unit |
1497 // where there is a boundary between code units that are part of | 1419 // where there is a boundary between code units that are part of |
1498 // the class and code units that are not. Normally we insert an | 1420 // the class and code units that are not. Normally we insert an |
1499 // entry at zero which goes to the failure label, but if there | 1421 // entry at zero which goes to the failure label, but if there |
1500 // was already one there we fall through for success on that entry. | 1422 // was already one there we fall through for success on that entry. |
1501 // Subsequent entries have alternating meaning (success/failure). | 1423 // Subsequent entries have alternating meaning (success/failure). |
1502 ZoneGrowableArray<int>* range_boundaries = | 1424 ZoneGrowableArray<int>* range_boundaries = |
1503 new(zone) ZoneGrowableArray<int>(last_valid_range); | 1425 new (zone) ZoneGrowableArray<int>(last_valid_range); |
1504 | 1426 |
1505 bool zeroth_entry_is_failure = !cc->is_negated(); | 1427 bool zeroth_entry_is_failure = !cc->is_negated(); |
1506 | 1428 |
1507 for (intptr_t i = 0; i <= last_valid_range; i++) { | 1429 for (intptr_t i = 0; i <= last_valid_range; i++) { |
1508 CharacterRange& range = (*ranges)[i]; | 1430 CharacterRange& range = (*ranges)[i]; |
1509 if (range.from() == 0) { | 1431 if (range.from() == 0) { |
1510 ASSERT(i == 0); | 1432 ASSERT(i == 0); |
1511 zeroth_entry_is_failure = !zeroth_entry_is_failure; | 1433 zeroth_entry_is_failure = !zeroth_entry_is_failure; |
1512 } else { | 1434 } else { |
1513 range_boundaries->Add(range.from()); | 1435 range_boundaries->Add(range.from()); |
1514 } | 1436 } |
1515 range_boundaries->Add(range.to() + 1); | 1437 range_boundaries->Add(range.to() + 1); |
1516 } | 1438 } |
1517 intptr_t end_index = range_boundaries->length() - 1; | 1439 intptr_t end_index = range_boundaries->length() - 1; |
1518 if (range_boundaries->At(end_index) > max_char) { | 1440 if (range_boundaries->At(end_index) > max_char) { |
1519 end_index--; | 1441 end_index--; |
1520 } | 1442 } |
1521 | 1443 |
1522 BlockLabel fall_through; | 1444 BlockLabel fall_through; |
1523 GenerateBranches(macro_assembler, | 1445 GenerateBranches(macro_assembler, range_boundaries, |
1524 range_boundaries, | |
1525 0, // start_index. | 1446 0, // start_index. |
1526 end_index, | 1447 end_index, |
1527 0, // min_char. | 1448 0, // min_char. |
1528 max_char, | 1449 max_char, &fall_through, |
1529 &fall_through, | |
1530 zeroth_entry_is_failure ? &fall_through : on_failure, | 1450 zeroth_entry_is_failure ? &fall_through : on_failure, |
1531 zeroth_entry_is_failure ? on_failure : &fall_through); | 1451 zeroth_entry_is_failure ? on_failure : &fall_through); |
1532 macro_assembler->BindBlock(&fall_through); | 1452 macro_assembler->BindBlock(&fall_through); |
1533 } | 1453 } |
1534 | 1454 |
1535 | 1455 |
1536 RegExpNode::~RegExpNode() { | 1456 RegExpNode::~RegExpNode() {} |
1537 } | |
1538 | 1457 |
1539 | 1458 |
1540 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, | 1459 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, |
1541 Trace* trace) { | 1460 Trace* trace) { |
1542 // If we are generating a greedy loop then don't stop and don't reuse code. | 1461 // If we are generating a greedy loop then don't stop and don't reuse code. |
1543 if (trace->stop_node() != NULL) { | 1462 if (trace->stop_node() != NULL) { |
1544 return CONTINUE; | 1463 return CONTINUE; |
1545 } | 1464 } |
1546 | 1465 |
1547 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 1466 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
(...skipping 12 matching lines...) Expand all Loading... |
1560 return DONE; | 1479 return DONE; |
1561 } | 1480 } |
1562 // Generate generic version of the node and bind the label for later use. | 1481 // Generate generic version of the node and bind the label for later use. |
1563 macro_assembler->BindBlock(&label_); | 1482 macro_assembler->BindBlock(&label_); |
1564 return CONTINUE; | 1483 return CONTINUE; |
1565 } | 1484 } |
1566 | 1485 |
1567 // We are being asked to make a non-generic version. Keep track of how many | 1486 // We are being asked to make a non-generic version. Keep track of how many |
1568 // non-generic versions we generate so as not to overdo it. | 1487 // non-generic versions we generate so as not to overdo it. |
1569 trace_count_++; | 1488 trace_count_++; |
1570 if (kRegexpOptimization && | 1489 if (kRegexpOptimization && trace_count_ < kMaxCopiesCodeGenerated && |
1571 trace_count_ < kMaxCopiesCodeGenerated && | |
1572 compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) { | 1490 compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) { |
1573 return CONTINUE; | 1491 return CONTINUE; |
1574 } | 1492 } |
1575 | 1493 |
1576 // If we get here code has been generated for this node too many times or | 1494 // If we get here code has been generated for this node too many times or |
1577 // recursion is too deep. Time to switch to a generic version. The code for | 1495 // recursion is too deep. Time to switch to a generic version. The code for |
1578 // generic versions above can handle deep recursion properly. | 1496 // generic versions above can handle deep recursion properly. |
1579 trace->Flush(compiler, this); | 1497 trace->Flush(compiler, this); |
1580 return DONE; | 1498 return DONE; |
1581 } | 1499 } |
1582 | 1500 |
1583 | 1501 |
1584 intptr_t ActionNode::EatsAtLeast(intptr_t still_to_find, | 1502 intptr_t ActionNode::EatsAtLeast(intptr_t still_to_find, |
1585 intptr_t budget, | 1503 intptr_t budget, |
1586 bool not_at_start) { | 1504 bool not_at_start) { |
1587 if (budget <= 0) return 0; | 1505 if (budget <= 0) return 0; |
1588 if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input! | 1506 if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input! |
1589 return on_success()->EatsAtLeast(still_to_find, | 1507 return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start); |
1590 budget - 1, | |
1591 not_at_start); | |
1592 } | 1508 } |
1593 | 1509 |
1594 | 1510 |
1595 void ActionNode::FillInBMInfo(intptr_t offset, | 1511 void ActionNode::FillInBMInfo(intptr_t offset, |
1596 intptr_t budget, | 1512 intptr_t budget, |
1597 BoyerMooreLookahead* bm, | 1513 BoyerMooreLookahead* bm, |
1598 bool not_at_start) { | 1514 bool not_at_start) { |
1599 if (action_type_ == BEGIN_SUBMATCH) { | 1515 if (action_type_ == BEGIN_SUBMATCH) { |
1600 bm->SetRest(offset); | 1516 bm->SetRest(offset); |
1601 } else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) { | 1517 } else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) { |
1602 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start); | 1518 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start); |
1603 } | 1519 } |
1604 SaveBMInfo(bm, not_at_start, offset); | 1520 SaveBMInfo(bm, not_at_start, offset); |
1605 } | 1521 } |
1606 | 1522 |
1607 | 1523 |
1608 intptr_t AssertionNode::EatsAtLeast(intptr_t still_to_find, | 1524 intptr_t AssertionNode::EatsAtLeast(intptr_t still_to_find, |
1609 intptr_t budget, | 1525 intptr_t budget, |
1610 bool not_at_start) { | 1526 bool not_at_start) { |
1611 if (budget <= 0) return 0; | 1527 if (budget <= 0) return 0; |
1612 // If we know we are not at the start and we are asked "how many characters | 1528 // If we know we are not at the start and we are asked "how many characters |
1613 // will you match if you succeed?" then we can answer anything since false | 1529 // will you match if you succeed?" then we can answer anything since false |
1614 // implies false. So lets just return the max answer (still_to_find) since | 1530 // implies false. So lets just return the max answer (still_to_find) since |
1615 // that won't prevent us from preloading a lot of characters for the other | 1531 // that won't prevent us from preloading a lot of characters for the other |
1616 // branches in the node graph. | 1532 // branches in the node graph. |
1617 if (assertion_type() == AT_START && not_at_start) return still_to_find; | 1533 if (assertion_type() == AT_START && not_at_start) return still_to_find; |
1618 return on_success()->EatsAtLeast(still_to_find, | 1534 return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start); |
1619 budget - 1, | |
1620 not_at_start); | |
1621 } | 1535 } |
1622 | 1536 |
1623 | 1537 |
1624 void AssertionNode::FillInBMInfo(intptr_t offset, | 1538 void AssertionNode::FillInBMInfo(intptr_t offset, |
1625 intptr_t budget, | 1539 intptr_t budget, |
1626 BoyerMooreLookahead* bm, | 1540 BoyerMooreLookahead* bm, |
1627 bool not_at_start) { | 1541 bool not_at_start) { |
1628 // Match the behaviour of EatsAtLeast on this node. | 1542 // Match the behaviour of EatsAtLeast on this node. |
1629 if (assertion_type() == AT_START && not_at_start) return; | 1543 if (assertion_type() == AT_START && not_at_start) return; |
1630 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start); | 1544 on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start); |
1631 SaveBMInfo(bm, not_at_start, offset); | 1545 SaveBMInfo(bm, not_at_start, offset); |
1632 } | 1546 } |
1633 | 1547 |
1634 | 1548 |
1635 intptr_t BackReferenceNode::EatsAtLeast(intptr_t still_to_find, | 1549 intptr_t BackReferenceNode::EatsAtLeast(intptr_t still_to_find, |
1636 intptr_t budget, | 1550 intptr_t budget, |
1637 bool not_at_start) { | 1551 bool not_at_start) { |
1638 if (budget <= 0) return 0; | 1552 if (budget <= 0) return 0; |
1639 return on_success()->EatsAtLeast(still_to_find, | 1553 return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start); |
1640 budget - 1, | |
1641 not_at_start); | |
1642 } | 1554 } |
1643 | 1555 |
1644 | 1556 |
1645 intptr_t TextNode::EatsAtLeast(intptr_t still_to_find, | 1557 intptr_t TextNode::EatsAtLeast(intptr_t still_to_find, |
1646 intptr_t budget, | 1558 intptr_t budget, |
1647 bool not_at_start) { | 1559 bool not_at_start) { |
1648 intptr_t answer = Length(); | 1560 intptr_t answer = Length(); |
1649 if (answer >= still_to_find) return answer; | 1561 if (answer >= still_to_find) return answer; |
1650 if (budget <= 0) return answer; | 1562 if (budget <= 0) return answer; |
1651 // We are not at start after this node so we set the last argument to 'true'. | 1563 // We are not at start after this node so we set the last argument to 'true'. |
1652 return answer + on_success()->EatsAtLeast(still_to_find - answer, | 1564 return answer + |
1653 budget - 1, | 1565 on_success()->EatsAtLeast(still_to_find - answer, budget - 1, true); |
1654 true); | |
1655 } | 1566 } |
1656 | 1567 |
1657 | 1568 |
1658 intptr_t NegativeLookaheadChoiceNode::EatsAtLeast(intptr_t still_to_find, | 1569 intptr_t NegativeLookaheadChoiceNode::EatsAtLeast(intptr_t still_to_find, |
1659 intptr_t budget, | 1570 intptr_t budget, |
1660 bool not_at_start) { | 1571 bool not_at_start) { |
1661 if (budget <= 0) return 0; | 1572 if (budget <= 0) return 0; |
1662 // Alternative 0 is the negative lookahead, alternative 1 is what comes | 1573 // Alternative 0 is the negative lookahead, alternative 1 is what comes |
1663 // afterwards. | 1574 // afterwards. |
1664 RegExpNode* node = (*alternatives_)[1].node(); | 1575 RegExpNode* node = (*alternatives_)[1].node(); |
(...skipping 29 matching lines...) Expand all Loading... |
1694 if (node_eats_at_least < min) min = node_eats_at_least; | 1605 if (node_eats_at_least < min) min = node_eats_at_least; |
1695 if (min == 0) return 0; | 1606 if (min == 0) return 0; |
1696 } | 1607 } |
1697 return min; | 1608 return min; |
1698 } | 1609 } |
1699 | 1610 |
1700 | 1611 |
1701 intptr_t LoopChoiceNode::EatsAtLeast(intptr_t still_to_find, | 1612 intptr_t LoopChoiceNode::EatsAtLeast(intptr_t still_to_find, |
1702 intptr_t budget, | 1613 intptr_t budget, |
1703 bool not_at_start) { | 1614 bool not_at_start) { |
1704 return EatsAtLeastHelper(still_to_find, | 1615 return EatsAtLeastHelper(still_to_find, budget - 1, loop_node_, not_at_start); |
1705 budget - 1, | |
1706 loop_node_, | |
1707 not_at_start); | |
1708 } | 1616 } |
1709 | 1617 |
1710 | 1618 |
1711 intptr_t ChoiceNode::EatsAtLeast(intptr_t still_to_find, | 1619 intptr_t ChoiceNode::EatsAtLeast(intptr_t still_to_find, |
1712 intptr_t budget, | 1620 intptr_t budget, |
1713 bool not_at_start) { | 1621 bool not_at_start) { |
1714 return EatsAtLeastHelper(still_to_find, | 1622 return EatsAtLeastHelper(still_to_find, budget, NULL, not_at_start); |
1715 budget, | |
1716 NULL, | |
1717 not_at_start); | |
1718 } | 1623 } |
1719 | 1624 |
1720 | 1625 |
1721 // Takes the left-most 1-bit and smears it out, setting all bits to its right. | 1626 // Takes the left-most 1-bit and smears it out, setting all bits to its right. |
1722 static inline uint32_t SmearBitsRight(uint32_t v) { | 1627 static inline uint32_t SmearBitsRight(uint32_t v) { |
1723 v |= v >> 1; | 1628 v |= v >> 1; |
1724 v |= v >> 2; | 1629 v |= v >> 2; |
1725 v |= v >> 4; | 1630 v |= v >> 4; |
1726 v |= v >> 8; | 1631 v |= v >> 8; |
1727 v |= v >> 16; | 1632 v |= v >> 16; |
(...skipping 26 matching lines...) Expand all Loading... |
1754 | 1659 |
1755 | 1660 |
1756 bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler, | 1661 bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler, |
1757 Trace* bounds_check_trace, | 1662 Trace* bounds_check_trace, |
1758 Trace* trace, | 1663 Trace* trace, |
1759 bool preload_has_checked_bounds, | 1664 bool preload_has_checked_bounds, |
1760 BlockLabel* on_possible_success, | 1665 BlockLabel* on_possible_success, |
1761 QuickCheckDetails* details, | 1666 QuickCheckDetails* details, |
1762 bool fall_through_on_failure) { | 1667 bool fall_through_on_failure) { |
1763 if (details->characters() == 0) return false; | 1668 if (details->characters() == 0) return false; |
1764 GetQuickCheckDetails( | 1669 GetQuickCheckDetails(details, compiler, 0, |
1765 details, compiler, 0, trace->at_start() == Trace::FALSE_VALUE); | 1670 trace->at_start() == Trace::FALSE_VALUE); |
1766 if (details->cannot_match()) return false; | 1671 if (details->cannot_match()) return false; |
1767 if (!details->Rationalize(compiler->one_byte())) return false; | 1672 if (!details->Rationalize(compiler->one_byte())) return false; |
1768 ASSERT(details->characters() == 1 || | 1673 ASSERT(details->characters() == 1 || |
1769 compiler->macro_assembler()->CanReadUnaligned()); | 1674 compiler->macro_assembler()->CanReadUnaligned()); |
1770 uint32_t mask = details->mask(); | 1675 uint32_t mask = details->mask(); |
1771 uint32_t value = details->value(); | 1676 uint32_t value = details->value(); |
1772 | 1677 |
1773 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 1678 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
1774 | 1679 |
1775 if (trace->characters_preloaded() != details->characters()) { | 1680 if (trace->characters_preloaded() != details->characters()) { |
1776 ASSERT(trace->cp_offset() == bounds_check_trace->cp_offset()); | 1681 ASSERT(trace->cp_offset() == bounds_check_trace->cp_offset()); |
1777 // We are attempting to preload the minimum number of characters | 1682 // We are attempting to preload the minimum number of characters |
1778 // any choice would eat, so if the bounds check fails, then none of the | 1683 // any choice would eat, so if the bounds check fails, then none of the |
1779 // choices can succeed, so we can just immediately backtrack, rather | 1684 // choices can succeed, so we can just immediately backtrack, rather |
1780 // than go to the next choice. | 1685 // than go to the next choice. |
1781 assembler->LoadCurrentCharacter(trace->cp_offset(), | 1686 assembler->LoadCurrentCharacter( |
1782 bounds_check_trace->backtrack(), | 1687 trace->cp_offset(), bounds_check_trace->backtrack(), |
1783 !preload_has_checked_bounds, | 1688 !preload_has_checked_bounds, details->characters()); |
1784 details->characters()); | |
1785 } | 1689 } |
1786 | 1690 |
1787 | 1691 |
1788 bool need_mask = true; | 1692 bool need_mask = true; |
1789 | 1693 |
1790 if (details->characters() == 1) { | 1694 if (details->characters() == 1) { |
1791 // If number of characters preloaded is 1 then we used a byte or 16 bit | 1695 // If number of characters preloaded is 1 then we used a byte or 16 bit |
1792 // load so the value is already masked down. | 1696 // load so the value is already masked down. |
1793 uint32_t char_mask; | 1697 uint32_t char_mask; |
1794 if (compiler->one_byte()) { | 1698 if (compiler->one_byte()) { |
(...skipping 146 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1941 CharacterRange range = ranges->At(first_range); | 1845 CharacterRange range = ranges->At(first_range); |
1942 uint16_t from = range.from(); | 1846 uint16_t from = range.from(); |
1943 uint16_t to = range.to(); | 1847 uint16_t to = range.to(); |
1944 if (to > char_mask) { | 1848 if (to > char_mask) { |
1945 to = char_mask; | 1849 to = char_mask; |
1946 } | 1850 } |
1947 uint32_t differing_bits = (from ^ to); | 1851 uint32_t differing_bits = (from ^ to); |
1948 // A mask and compare is only perfect if the differing bits form a | 1852 // A mask and compare is only perfect if the differing bits form a |
1949 // number like 00011111 with one single block of trailing 1s. | 1853 // number like 00011111 with one single block of trailing 1s. |
1950 if ((differing_bits & (differing_bits + 1)) == 0 && | 1854 if ((differing_bits & (differing_bits + 1)) == 0 && |
1951 from + differing_bits == to) { | 1855 from + differing_bits == to) { |
1952 pos->determines_perfectly = true; | 1856 pos->determines_perfectly = true; |
1953 } | 1857 } |
1954 uint32_t common_bits = ~SmearBitsRight(differing_bits); | 1858 uint32_t common_bits = ~SmearBitsRight(differing_bits); |
1955 uint32_t bits = (from & common_bits); | 1859 uint32_t bits = (from & common_bits); |
1956 for (intptr_t i = first_range + 1; i < ranges->length(); i++) { | 1860 for (intptr_t i = first_range + 1; i < ranges->length(); i++) { |
1957 CharacterRange range = ranges->At(i); | 1861 CharacterRange range = ranges->At(i); |
1958 uint16_t from = range.from(); | 1862 uint16_t from = range.from(); |
1959 uint16_t to = range.to(); | 1863 uint16_t to = range.to(); |
1960 if (from > char_mask) continue; | 1864 if (from > char_mask) continue; |
1961 if (to > char_mask) to = char_mask; | 1865 if (to > char_mask) to = char_mask; |
(...skipping 16 matching lines...) Expand all Loading... |
1978 } | 1882 } |
1979 characters_filled_in++; | 1883 characters_filled_in++; |
1980 ASSERT(characters_filled_in <= details->characters()); | 1884 ASSERT(characters_filled_in <= details->characters()); |
1981 if (characters_filled_in == details->characters()) { | 1885 if (characters_filled_in == details->characters()) { |
1982 return; | 1886 return; |
1983 } | 1887 } |
1984 } | 1888 } |
1985 } | 1889 } |
1986 ASSERT(characters_filled_in != details->characters()); | 1890 ASSERT(characters_filled_in != details->characters()); |
1987 if (!details->cannot_match()) { | 1891 if (!details->cannot_match()) { |
1988 on_success()-> GetQuickCheckDetails(details, | 1892 on_success()->GetQuickCheckDetails(details, compiler, characters_filled_in, |
1989 compiler, | 1893 true); |
1990 characters_filled_in, | |
1991 true); | |
1992 } | 1894 } |
1993 } | 1895 } |
1994 | 1896 |
1995 | 1897 |
1996 void QuickCheckDetails::Clear() { | 1898 void QuickCheckDetails::Clear() { |
1997 for (int i = 0; i < characters_; i++) { | 1899 for (int i = 0; i < characters_; i++) { |
1998 positions_[i].mask = 0; | 1900 positions_[i].mask = 0; |
1999 positions_[i].value = 0; | 1901 positions_[i].value = 0; |
2000 positions_[i].determines_perfectly = false; | 1902 positions_[i].determines_perfectly = false; |
2001 } | 1903 } |
(...skipping 27 matching lines...) Expand all Loading... |
2029 if (other->cannot_match_) { | 1931 if (other->cannot_match_) { |
2030 return; | 1932 return; |
2031 } | 1933 } |
2032 if (cannot_match_) { | 1934 if (cannot_match_) { |
2033 *this = *other; | 1935 *this = *other; |
2034 return; | 1936 return; |
2035 } | 1937 } |
2036 for (intptr_t i = from_index; i < characters_; i++) { | 1938 for (intptr_t i = from_index; i < characters_; i++) { |
2037 QuickCheckDetails::Position* pos = positions(i); | 1939 QuickCheckDetails::Position* pos = positions(i); |
2038 QuickCheckDetails::Position* other_pos = other->positions(i); | 1940 QuickCheckDetails::Position* other_pos = other->positions(i); |
2039 if (pos->mask != other_pos->mask || | 1941 if (pos->mask != other_pos->mask || pos->value != other_pos->value || |
2040 pos->value != other_pos->value || | |
2041 !other_pos->determines_perfectly) { | 1942 !other_pos->determines_perfectly) { |
2042 // Our mask-compare operation will be approximate unless we have the | 1943 // Our mask-compare operation will be approximate unless we have the |
2043 // exact same operation on both sides of the alternation. | 1944 // exact same operation on both sides of the alternation. |
2044 pos->determines_perfectly = false; | 1945 pos->determines_perfectly = false; |
2045 } | 1946 } |
2046 pos->mask &= other_pos->mask; | 1947 pos->mask &= other_pos->mask; |
2047 pos->value &= pos->mask; | 1948 pos->value &= pos->mask; |
2048 other_pos->value &= pos->mask; | 1949 other_pos->value &= pos->mask; |
2049 uint16_t differing_bits = (pos->value ^ other_pos->value); | 1950 uint16_t differing_bits = (pos->value ^ other_pos->value); |
2050 pos->mask &= ~differing_bits; | 1951 pos->mask &= ~differing_bits; |
2051 pos->value &= pos->mask; | 1952 pos->value &= pos->mask; |
2052 } | 1953 } |
2053 } | 1954 } |
2054 | 1955 |
2055 | 1956 |
2056 class VisitMarker : public ValueObject { | 1957 class VisitMarker : public ValueObject { |
2057 public: | 1958 public: |
2058 explicit VisitMarker(NodeInfo* info) : info_(info) { | 1959 explicit VisitMarker(NodeInfo* info) : info_(info) { |
2059 ASSERT(!info->visited); | 1960 ASSERT(!info->visited); |
2060 info->visited = true; | 1961 info->visited = true; |
2061 } | 1962 } |
2062 ~VisitMarker() { | 1963 ~VisitMarker() { info_->visited = false; } |
2063 info_->visited = false; | 1964 |
2064 } | |
2065 private: | 1965 private: |
2066 NodeInfo* info_; | 1966 NodeInfo* info_; |
2067 }; | 1967 }; |
2068 | 1968 |
2069 | 1969 |
2070 RegExpNode* SeqRegExpNode::FilterOneByte(intptr_t depth, bool ignore_case) { | 1970 RegExpNode* SeqRegExpNode::FilterOneByte(intptr_t depth, bool ignore_case) { |
2071 if (info()->replacement_calculated) return replacement(); | 1971 if (info()->replacement_calculated) return replacement(); |
2072 if (depth < 0) return this; | 1972 if (depth < 0) return this; |
2073 ASSERT(!info()->visited); | 1973 ASSERT(!info()->visited); |
2074 VisitMarker marker(info()); | 1974 VisitMarker marker(info()); |
2075 return FilterSuccessor(depth - 1, ignore_case); | 1975 return FilterSuccessor(depth - 1, ignore_case); |
2076 } | 1976 } |
2077 | 1977 |
2078 | 1978 |
2079 RegExpNode* SeqRegExpNode::FilterSuccessor(intptr_t depth, bool ignore_case) { | 1979 RegExpNode* SeqRegExpNode::FilterSuccessor(intptr_t depth, bool ignore_case) { |
2080 RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case); | 1980 RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case); |
2081 if (next == NULL) return set_replacement(NULL); | 1981 if (next == NULL) return set_replacement(NULL); |
2082 on_success_ = next; | 1982 on_success_ = next; |
2083 return set_replacement(this); | 1983 return set_replacement(this); |
2084 } | 1984 } |
2085 | 1985 |
2086 | 1986 |
2087 // We need to check for the following characters: 0x39c 0x3bc 0x178. | 1987 // We need to check for the following characters: 0x39c 0x3bc 0x178. |
2088 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) { | 1988 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) { |
2089 // TODO(dcarney): this could be a lot more efficient. | 1989 // TODO(dcarney): this could be a lot more efficient. |
2090 return range.Contains(0x39c) || | 1990 return range.Contains(0x39c) || range.Contains(0x3bc) || |
2091 range.Contains(0x3bc) || range.Contains(0x178); | 1991 range.Contains(0x178); |
2092 } | 1992 } |
2093 | 1993 |
2094 | 1994 |
2095 static bool RangesContainLatin1Equivalents( | 1995 static bool RangesContainLatin1Equivalents( |
2096 ZoneGrowableArray<CharacterRange>* ranges) { | 1996 ZoneGrowableArray<CharacterRange>* ranges) { |
2097 for (intptr_t i = 0; i < ranges->length(); i++) { | 1997 for (intptr_t i = 0; i < ranges->length(); i++) { |
2098 // TODO(dcarney): this could be a lot more efficient. | 1998 // TODO(dcarney): this could be a lot more efficient. |
2099 if (RangeContainsLatin1Equivalents(ranges->At(i))) return true; | 1999 if (RangeContainsLatin1Equivalents(ranges->At(i))) return true; |
2100 } | 2000 } |
2101 return false; | 2001 return false; |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2143 } else { | 2043 } else { |
2144 ASSERT(elm.text_type() == TextElement::CHAR_CLASS); | 2044 ASSERT(elm.text_type() == TextElement::CHAR_CLASS); |
2145 RegExpCharacterClass* cc = elm.char_class(); | 2045 RegExpCharacterClass* cc = elm.char_class(); |
2146 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges(); | 2046 ZoneGrowableArray<CharacterRange>* ranges = cc->ranges(); |
2147 if (!CharacterRange::IsCanonical(ranges)) { | 2047 if (!CharacterRange::IsCanonical(ranges)) { |
2148 CharacterRange::Canonicalize(ranges); | 2048 CharacterRange::Canonicalize(ranges); |
2149 } | 2049 } |
2150 // Now they are in order so we only need to look at the first. | 2050 // Now they are in order so we only need to look at the first. |
2151 intptr_t range_count = ranges->length(); | 2051 intptr_t range_count = ranges->length(); |
2152 if (cc->is_negated()) { | 2052 if (cc->is_negated()) { |
2153 if (range_count != 0 && | 2053 if (range_count != 0 && ranges->At(0).from() == 0 && |
2154 ranges->At(0).from() == 0 && | |
2155 ranges->At(0).to() >= Symbols::kMaxOneCharCodeSymbol) { | 2054 ranges->At(0).to() >= Symbols::kMaxOneCharCodeSymbol) { |
2156 // This will be handled in a later filter. | 2055 // This will be handled in a later filter. |
2157 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; | 2056 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; |
2158 return set_replacement(NULL); | 2057 return set_replacement(NULL); |
2159 } | 2058 } |
2160 } else { | 2059 } else { |
2161 if (range_count == 0 || | 2060 if (range_count == 0 || |
2162 ranges->At(0).from() > Symbols::kMaxOneCharCodeSymbol) { | 2061 ranges->At(0).from() > Symbols::kMaxOneCharCodeSymbol) { |
2163 // This will be handled in a later filter. | 2062 // This will be handled in a later filter. |
2164 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; | 2063 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2219 } | 2118 } |
2220 if (surviving < 2) return set_replacement(survivor); | 2119 if (surviving < 2) return set_replacement(survivor); |
2221 | 2120 |
2222 set_replacement(this); | 2121 set_replacement(this); |
2223 if (surviving == choice_count) { | 2122 if (surviving == choice_count) { |
2224 return this; | 2123 return this; |
2225 } | 2124 } |
2226 // Only some of the nodes survived the filtering. We need to rebuild the | 2125 // Only some of the nodes survived the filtering. We need to rebuild the |
2227 // alternatives list. | 2126 // alternatives list. |
2228 ZoneGrowableArray<GuardedAlternative>* new_alternatives = | 2127 ZoneGrowableArray<GuardedAlternative>* new_alternatives = |
2229 new(Z) ZoneGrowableArray<GuardedAlternative>(surviving); | 2128 new (Z) ZoneGrowableArray<GuardedAlternative>(surviving); |
2230 for (intptr_t i = 0; i < choice_count; i++) { | 2129 for (intptr_t i = 0; i < choice_count; i++) { |
2231 RegExpNode* replacement = | 2130 RegExpNode* replacement = |
2232 (*alternatives_)[i].node()->FilterOneByte(depth - 1, ignore_case); | 2131 (*alternatives_)[i].node()->FilterOneByte(depth - 1, ignore_case); |
2233 if (replacement != NULL) { | 2132 if (replacement != NULL) { |
2234 (*alternatives_)[i].set_node(replacement); | 2133 (*alternatives_)[i].set_node(replacement); |
2235 new_alternatives->Add((*alternatives_)[i]); | 2134 new_alternatives->Add((*alternatives_)[i]); |
2236 } | 2135 } |
2237 } | 2136 } |
2238 alternatives_ = new_alternatives; | 2137 alternatives_ = new_alternatives; |
2239 return this; | 2138 return this; |
(...skipping 22 matching lines...) Expand all Loading... |
2262 return set_replacement(this); | 2161 return set_replacement(this); |
2263 } | 2162 } |
2264 | 2163 |
2265 | 2164 |
2266 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, | 2165 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, |
2267 RegExpCompiler* compiler, | 2166 RegExpCompiler* compiler, |
2268 intptr_t characters_filled_in, | 2167 intptr_t characters_filled_in, |
2269 bool not_at_start) { | 2168 bool not_at_start) { |
2270 if (body_can_be_zero_length_ || info()->visited) return; | 2169 if (body_can_be_zero_length_ || info()->visited) return; |
2271 VisitMarker marker(info()); | 2170 VisitMarker marker(info()); |
2272 return ChoiceNode::GetQuickCheckDetails(details, | 2171 return ChoiceNode::GetQuickCheckDetails(details, compiler, |
2273 compiler, | 2172 characters_filled_in, not_at_start); |
2274 characters_filled_in, | |
2275 not_at_start); | |
2276 } | 2173 } |
2277 | 2174 |
2278 | 2175 |
2279 void LoopChoiceNode::FillInBMInfo(intptr_t offset, | 2176 void LoopChoiceNode::FillInBMInfo(intptr_t offset, |
2280 intptr_t budget, | 2177 intptr_t budget, |
2281 BoyerMooreLookahead* bm, | 2178 BoyerMooreLookahead* bm, |
2282 bool not_at_start) { | 2179 bool not_at_start) { |
2283 if (body_can_be_zero_length_ || budget <= 0) { | 2180 if (body_can_be_zero_length_ || budget <= 0) { |
2284 bm->SetRest(offset); | 2181 bm->SetRest(offset); |
2285 SaveBMInfo(bm, not_at_start, offset); | 2182 SaveBMInfo(bm, not_at_start, offset); |
2286 return; | 2183 return; |
2287 } | 2184 } |
2288 ChoiceNode::FillInBMInfo(offset, budget - 1, bm, not_at_start); | 2185 ChoiceNode::FillInBMInfo(offset, budget - 1, bm, not_at_start); |
2289 SaveBMInfo(bm, not_at_start, offset); | 2186 SaveBMInfo(bm, not_at_start, offset); |
2290 } | 2187 } |
2291 | 2188 |
2292 | 2189 |
2293 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, | 2190 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, |
2294 RegExpCompiler* compiler, | 2191 RegExpCompiler* compiler, |
2295 intptr_t characters_filled_in, | 2192 intptr_t characters_filled_in, |
2296 bool not_at_start) { | 2193 bool not_at_start) { |
2297 not_at_start = (not_at_start || not_at_start_); | 2194 not_at_start = (not_at_start || not_at_start_); |
2298 intptr_t choice_count = alternatives_->length(); | 2195 intptr_t choice_count = alternatives_->length(); |
2299 ASSERT(choice_count > 0); | 2196 ASSERT(choice_count > 0); |
2300 (*alternatives_)[0].node()->GetQuickCheckDetails(details, | 2197 (*alternatives_)[0].node()->GetQuickCheckDetails( |
2301 compiler, | 2198 details, compiler, characters_filled_in, not_at_start); |
2302 characters_filled_in, | |
2303 not_at_start); | |
2304 for (intptr_t i = 1; i < choice_count; i++) { | 2199 for (intptr_t i = 1; i < choice_count; i++) { |
2305 QuickCheckDetails new_details(details->characters()); | 2200 QuickCheckDetails new_details(details->characters()); |
2306 RegExpNode* node = (*alternatives_)[i].node(); | 2201 RegExpNode* node = (*alternatives_)[i].node(); |
2307 node->GetQuickCheckDetails(&new_details, compiler, | 2202 node->GetQuickCheckDetails(&new_details, compiler, characters_filled_in, |
2308 characters_filled_in, | |
2309 not_at_start); | 2203 not_at_start); |
2310 // Here we merge the quick match details of the two branches. | 2204 // Here we merge the quick match details of the two branches. |
2311 details->Merge(&new_details, characters_filled_in); | 2205 details->Merge(&new_details, characters_filled_in); |
2312 } | 2206 } |
2313 } | 2207 } |
2314 | 2208 |
2315 | 2209 |
2316 // Check for [0-9A-Z_a-z]. | 2210 // Check for [0-9A-Z_a-z]. |
2317 static void EmitWordCheck(RegExpMacroAssembler* assembler, | 2211 static void EmitWordCheck(RegExpMacroAssembler* assembler, |
2318 BlockLabel* word, | 2212 BlockLabel* word, |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2350 new_trace.InvalidateCurrentCharacter(); | 2244 new_trace.InvalidateCurrentCharacter(); |
2351 | 2245 |
2352 BlockLabel ok; | 2246 BlockLabel ok; |
2353 if (new_trace.cp_offset() == 0) { | 2247 if (new_trace.cp_offset() == 0) { |
2354 // The start of input counts as a newline in this context, so skip to | 2248 // The start of input counts as a newline in this context, so skip to |
2355 // ok if we are at the start. | 2249 // ok if we are at the start. |
2356 assembler->CheckAtStart(&ok); | 2250 assembler->CheckAtStart(&ok); |
2357 } | 2251 } |
2358 // We already checked that we are not at the start of input so it must be | 2252 // We already checked that we are not at the start of input so it must be |
2359 // OK to load the previous character. | 2253 // OK to load the previous character. |
2360 assembler->LoadCurrentCharacter(new_trace.cp_offset() -1, | 2254 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, |
2361 new_trace.backtrack(), | 2255 new_trace.backtrack(), false); |
2362 false); | 2256 if (!assembler->CheckSpecialCharacterClass('n', new_trace.backtrack())) { |
2363 if (!assembler->CheckSpecialCharacterClass('n', | |
2364 new_trace.backtrack())) { | |
2365 // Newline means \n, \r, 0x2028 or 0x2029. | 2257 // Newline means \n, \r, 0x2028 or 0x2029. |
2366 if (!compiler->one_byte()) { | 2258 if (!compiler->one_byte()) { |
2367 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok); | 2259 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok); |
2368 } | 2260 } |
2369 assembler->CheckCharacter('\n', &ok); | 2261 assembler->CheckCharacter('\n', &ok); |
2370 assembler->CheckNotCharacter('\r', new_trace.backtrack()); | 2262 assembler->CheckNotCharacter('\r', new_trace.backtrack()); |
2371 } | 2263 } |
2372 assembler->BindBlock(&ok); | 2264 assembler->BindBlock(&ok); |
2373 on_success->Emit(compiler, &new_trace); | 2265 on_success->Emit(compiler, &new_trace); |
2374 } | 2266 } |
2375 | 2267 |
2376 | 2268 |
2377 // Emit the code to handle \b and \B (word-boundary or non-word-boundary). | 2269 // Emit the code to handle \b and \B (word-boundary or non-word-boundary). |
2378 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) { | 2270 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) { |
2379 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 2271 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
2380 Trace::TriBool next_is_word_character = Trace::UNKNOWN; | 2272 Trace::TriBool next_is_word_character = Trace::UNKNOWN; |
2381 bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE); | 2273 bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE); |
2382 BoyerMooreLookahead* lookahead = bm_info(not_at_start); | 2274 BoyerMooreLookahead* lookahead = bm_info(not_at_start); |
2383 if (lookahead == NULL) { | 2275 if (lookahead == NULL) { |
2384 intptr_t eats_at_least = | 2276 intptr_t eats_at_least = |
2385 Utils::Minimum(kMaxLookaheadForBoyerMoore, | 2277 Utils::Minimum(kMaxLookaheadForBoyerMoore, |
2386 EatsAtLeast(kMaxLookaheadForBoyerMoore, | 2278 EatsAtLeast(kMaxLookaheadForBoyerMoore, kRecursionBudget, |
2387 kRecursionBudget, | |
2388 not_at_start)); | 2279 not_at_start)); |
2389 if (eats_at_least >= 1) { | 2280 if (eats_at_least >= 1) { |
2390 BoyerMooreLookahead* bm = | 2281 BoyerMooreLookahead* bm = |
2391 new(Z) BoyerMooreLookahead(eats_at_least, compiler, Z); | 2282 new (Z) BoyerMooreLookahead(eats_at_least, compiler, Z); |
2392 FillInBMInfo(0, kRecursionBudget, bm, not_at_start); | 2283 FillInBMInfo(0, kRecursionBudget, bm, not_at_start); |
2393 if (bm->at(0)->is_non_word()) | 2284 if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE_VALUE; |
2394 next_is_word_character = Trace::FALSE_VALUE; | |
2395 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE; | 2285 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE; |
2396 } | 2286 } |
2397 } else { | 2287 } else { |
2398 if (lookahead->at(0)->is_non_word()) | 2288 if (lookahead->at(0)->is_non_word()) |
2399 next_is_word_character = Trace::FALSE_VALUE; | 2289 next_is_word_character = Trace::FALSE_VALUE; |
2400 if (lookahead->at(0)->is_word()) | 2290 if (lookahead->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE; |
2401 next_is_word_character = Trace::TRUE_VALUE; | |
2402 } | 2291 } |
2403 bool at_boundary = (assertion_type_ == AssertionNode::AT_BOUNDARY); | 2292 bool at_boundary = (assertion_type_ == AssertionNode::AT_BOUNDARY); |
2404 if (next_is_word_character == Trace::UNKNOWN) { | 2293 if (next_is_word_character == Trace::UNKNOWN) { |
2405 BlockLabel before_non_word; | 2294 BlockLabel before_non_word; |
2406 BlockLabel before_word; | 2295 BlockLabel before_word; |
2407 if (trace->characters_preloaded() != 1) { | 2296 if (trace->characters_preloaded() != 1) { |
2408 assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word); | 2297 assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word); |
2409 } | 2298 } |
2410 // Fall through on non-word. | 2299 // Fall through on non-word. |
2411 EmitWordCheck(assembler, &before_word, &before_non_word, false); | 2300 EmitWordCheck(assembler, &before_word, &before_non_word, false); |
(...skipping 23 matching lines...) Expand all Loading... |
2435 void AssertionNode::BacktrackIfPrevious( | 2324 void AssertionNode::BacktrackIfPrevious( |
2436 RegExpCompiler* compiler, | 2325 RegExpCompiler* compiler, |
2437 Trace* trace, | 2326 Trace* trace, |
2438 AssertionNode::IfPrevious backtrack_if_previous) { | 2327 AssertionNode::IfPrevious backtrack_if_previous) { |
2439 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 2328 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
2440 Trace new_trace(*trace); | 2329 Trace new_trace(*trace); |
2441 new_trace.InvalidateCurrentCharacter(); | 2330 new_trace.InvalidateCurrentCharacter(); |
2442 | 2331 |
2443 BlockLabel fall_through, dummy; | 2332 BlockLabel fall_through, dummy; |
2444 | 2333 |
2445 BlockLabel* non_word = backtrack_if_previous == kIsNonWord ? | 2334 BlockLabel* non_word = backtrack_if_previous == kIsNonWord |
2446 new_trace.backtrack() : | 2335 ? new_trace.backtrack() |
2447 &fall_through; | 2336 : &fall_through; |
2448 BlockLabel* word = backtrack_if_previous == kIsNonWord ? | 2337 BlockLabel* word = backtrack_if_previous == kIsNonWord |
2449 &fall_through : | 2338 ? &fall_through |
2450 new_trace.backtrack(); | 2339 : new_trace.backtrack(); |
2451 | 2340 |
2452 if (new_trace.cp_offset() == 0) { | 2341 if (new_trace.cp_offset() == 0) { |
2453 // The start of input counts as a non-word character, so the question is | 2342 // The start of input counts as a non-word character, so the question is |
2454 // decided if we are at the start. | 2343 // decided if we are at the start. |
2455 assembler->CheckAtStart(non_word); | 2344 assembler->CheckAtStart(non_word); |
2456 } | 2345 } |
2457 // We already checked that we are not at the start of input so it must be | 2346 // We already checked that we are not at the start of input so it must be |
2458 // OK to load the previous character. | 2347 // OK to load the previous character. |
2459 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false); | 2348 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false); |
2460 EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord); | 2349 EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord); |
2461 | 2350 |
2462 assembler->BindBlock(&fall_through); | 2351 assembler->BindBlock(&fall_through); |
2463 on_success()->Emit(compiler, &new_trace); | 2352 on_success()->Emit(compiler, &new_trace); |
2464 } | 2353 } |
2465 | 2354 |
2466 | 2355 |
2467 void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details, | 2356 void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details, |
2468 RegExpCompiler* compiler, | 2357 RegExpCompiler* compiler, |
2469 intptr_t filled_in, | 2358 intptr_t filled_in, |
2470 bool not_at_start) { | 2359 bool not_at_start) { |
2471 if (assertion_type_ == AT_START && not_at_start) { | 2360 if (assertion_type_ == AT_START && not_at_start) { |
2472 details->set_cannot_match(); | 2361 details->set_cannot_match(); |
2473 return; | 2362 return; |
2474 } | 2363 } |
2475 return on_success()->GetQuickCheckDetails(details, | 2364 return on_success()->GetQuickCheckDetails(details, compiler, filled_in, |
2476 compiler, | |
2477 filled_in, | |
2478 not_at_start); | 2365 not_at_start); |
2479 } | 2366 } |
2480 | 2367 |
2481 | 2368 |
2482 void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 2369 void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
2483 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 2370 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
2484 switch (assertion_type_) { | 2371 switch (assertion_type_) { |
2485 case AT_END: { | 2372 case AT_END: { |
2486 BlockLabel ok; | 2373 BlockLabel ok; |
2487 assembler->CheckPosition(trace->cp_offset(), &ok); | 2374 assembler->CheckPosition(trace->cp_offset(), &ok); |
2488 assembler->GoTo(trace->backtrack()); | 2375 assembler->GoTo(trace->backtrack()); |
2489 assembler->BindBlock(&ok); | 2376 assembler->BindBlock(&ok); |
2490 break; | 2377 break; |
2491 } | 2378 } |
2492 case AT_START: { | 2379 case AT_START: { |
2493 if (trace->at_start() == Trace::FALSE_VALUE) { | 2380 if (trace->at_start() == Trace::FALSE_VALUE) { |
2494 assembler->GoTo(trace->backtrack()); | 2381 assembler->GoTo(trace->backtrack()); |
2495 return; | 2382 return; |
2496 } | 2383 } |
2497 if (trace->at_start() == Trace::UNKNOWN) { | 2384 if (trace->at_start() == Trace::UNKNOWN) { |
2498 assembler->CheckNotAtStart(trace->backtrack()); | 2385 assembler->CheckNotAtStart(trace->backtrack()); |
2499 Trace at_start_trace = *trace; | 2386 Trace at_start_trace = *trace; |
2500 at_start_trace.set_at_start(true); | 2387 at_start_trace.set_at_start(true); |
2501 on_success()->Emit(compiler, &at_start_trace); | 2388 on_success()->Emit(compiler, &at_start_trace); |
2502 return; | 2389 return; |
2503 } | 2390 } |
2504 } | 2391 } break; |
2505 break; | |
2506 case AFTER_NEWLINE: | 2392 case AFTER_NEWLINE: |
2507 EmitHat(compiler, on_success(), trace); | 2393 EmitHat(compiler, on_success(), trace); |
2508 return; | 2394 return; |
2509 case AT_BOUNDARY: | 2395 case AT_BOUNDARY: |
2510 case AT_NON_BOUNDARY: { | 2396 case AT_NON_BOUNDARY: { |
2511 EmitBoundaryCheck(compiler, trace); | 2397 EmitBoundaryCheck(compiler, trace); |
2512 return; | 2398 return; |
2513 } | 2399 } |
2514 } | 2400 } |
2515 on_success()->Emit(compiler, trace); | 2401 on_success()->Emit(compiler, trace); |
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2593 case SIMPLE_CHARACTER_MATCH: | 2479 case SIMPLE_CHARACTER_MATCH: |
2594 emit_function = &EmitSimpleCharacter; | 2480 emit_function = &EmitSimpleCharacter; |
2595 break; | 2481 break; |
2596 case CASE_CHARACTER_MATCH: | 2482 case CASE_CHARACTER_MATCH: |
2597 emit_function = &EmitAtomLetter; | 2483 emit_function = &EmitAtomLetter; |
2598 break; | 2484 break; |
2599 default: | 2485 default: |
2600 break; | 2486 break; |
2601 } | 2487 } |
2602 if (emit_function != NULL) { | 2488 if (emit_function != NULL) { |
2603 bool bound_checked = emit_function(Z, | 2489 bool bound_checked = emit_function( |
2604 compiler, | 2490 Z, compiler, quarks->At(j), backtrack, cp_offset + j, |
2605 quarks->At(j), | 2491 *checked_up_to < cp_offset + j, preloaded); |
2606 backtrack, | |
2607 cp_offset + j, | |
2608 *checked_up_to < cp_offset + j, | |
2609 preloaded); | |
2610 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); | 2492 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); |
2611 } | 2493 } |
2612 } | 2494 } |
2613 } else { | 2495 } else { |
2614 ASSERT(elm.text_type() == TextElement::CHAR_CLASS); | 2496 ASSERT(elm.text_type() == TextElement::CHAR_CLASS); |
2615 if (pass == CHARACTER_CLASS_MATCH) { | 2497 if (pass == CHARACTER_CLASS_MATCH) { |
2616 if (first_element_checked && i == 0) continue; | 2498 if (first_element_checked && i == 0) continue; |
2617 if (DeterminedAlready(quick_check, elm.cp_offset())) continue; | 2499 if (DeterminedAlready(quick_check, elm.cp_offset())) continue; |
2618 RegExpCharacterClass* cc = elm.char_class(); | 2500 RegExpCharacterClass* cc = elm.char_class(); |
2619 EmitCharClass(assembler, | 2501 EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset, |
2620 cc, | 2502 *checked_up_to < cp_offset, preloaded, Z); |
2621 one_byte, | |
2622 backtrack, | |
2623 cp_offset, | |
2624 *checked_up_to < cp_offset, | |
2625 preloaded, | |
2626 Z); | |
2627 UpdateBoundsCheck(cp_offset, checked_up_to); | 2503 UpdateBoundsCheck(cp_offset, checked_up_to); |
2628 } | 2504 } |
2629 } | 2505 } |
2630 } | 2506 } |
2631 } | 2507 } |
2632 | 2508 |
2633 | 2509 |
2634 intptr_t TextNode::Length() { | 2510 intptr_t TextNode::Length() { |
2635 TextElement elm = elms_->Last(); | 2511 TextElement elm = elms_->Last(); |
2636 ASSERT(elm.cp_offset() >= 0); | 2512 ASSERT(elm.cp_offset() >= 0); |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2671 | 2547 |
2672 bool first_elt_done = false; | 2548 bool first_elt_done = false; |
2673 intptr_t bound_checked_to = trace->cp_offset() - 1; | 2549 intptr_t bound_checked_to = trace->cp_offset() - 1; |
2674 bound_checked_to += trace->bound_checked_up_to(); | 2550 bound_checked_to += trace->bound_checked_up_to(); |
2675 | 2551 |
2676 // If a character is preloaded into the current character register then | 2552 // If a character is preloaded into the current character register then |
2677 // check that now. | 2553 // check that now. |
2678 if (trace->characters_preloaded() == 1) { | 2554 if (trace->characters_preloaded() == 1) { |
2679 for (intptr_t pass = kFirstRealPass; pass <= kLastPass; pass++) { | 2555 for (intptr_t pass = kFirstRealPass; pass <= kLastPass; pass++) { |
2680 if (!SkipPass(pass, compiler->ignore_case())) { | 2556 if (!SkipPass(pass, compiler->ignore_case())) { |
2681 TextEmitPass(compiler, | 2557 TextEmitPass(compiler, static_cast<TextEmitPassType>(pass), true, trace, |
2682 static_cast<TextEmitPassType>(pass), | 2558 false, &bound_checked_to); |
2683 true, | |
2684 trace, | |
2685 false, | |
2686 &bound_checked_to); | |
2687 } | 2559 } |
2688 } | 2560 } |
2689 first_elt_done = true; | 2561 first_elt_done = true; |
2690 } | 2562 } |
2691 | 2563 |
2692 for (intptr_t pass = kFirstRealPass; pass <= kLastPass; pass++) { | 2564 for (intptr_t pass = kFirstRealPass; pass <= kLastPass; pass++) { |
2693 if (!SkipPass(pass, compiler->ignore_case())) { | 2565 if (!SkipPass(pass, compiler->ignore_case())) { |
2694 TextEmitPass(compiler, | 2566 TextEmitPass(compiler, static_cast<TextEmitPassType>(pass), false, trace, |
2695 static_cast<TextEmitPassType>(pass), | 2567 first_elt_done, &bound_checked_to); |
2696 false, | |
2697 trace, | |
2698 first_elt_done, | |
2699 &bound_checked_to); | |
2700 } | 2568 } |
2701 } | 2569 } |
2702 | 2570 |
2703 Trace successor_trace(*trace); | 2571 Trace successor_trace(*trace); |
2704 successor_trace.set_at_start(false); | 2572 successor_trace.set_at_start(false); |
2705 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler); | 2573 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler); |
2706 RecursionCheck rc(compiler); | 2574 RecursionCheck rc(compiler); |
2707 on_success()->Emit(compiler, &successor_trace); | 2575 on_success()->Emit(compiler, &successor_trace); |
2708 } | 2576 } |
2709 | 2577 |
(...skipping 12 matching lines...) Expand all Loading... |
2722 characters_preloaded_ = 0; | 2590 characters_preloaded_ = 0; |
2723 // Adjust the offsets of the quick check performed information. This | 2591 // Adjust the offsets of the quick check performed information. This |
2724 // information is used to find out what we already determined about the | 2592 // information is used to find out what we already determined about the |
2725 // characters by means of mask and compare. | 2593 // characters by means of mask and compare. |
2726 quick_check_performed_.Advance(by, compiler->one_byte()); | 2594 quick_check_performed_.Advance(by, compiler->one_byte()); |
2727 cp_offset_ += by; | 2595 cp_offset_ += by; |
2728 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) { | 2596 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) { |
2729 compiler->SetRegExpTooBig(); | 2597 compiler->SetRegExpTooBig(); |
2730 cp_offset_ = 0; | 2598 cp_offset_ = 0; |
2731 } | 2599 } |
2732 bound_checked_up_to_ = Utils::Maximum(static_cast<intptr_t>(0), | 2600 bound_checked_up_to_ = |
2733 bound_checked_up_to_ - by); | 2601 Utils::Maximum(static_cast<intptr_t>(0), bound_checked_up_to_ - by); |
2734 } | 2602 } |
2735 | 2603 |
2736 | 2604 |
2737 void TextNode::MakeCaseIndependent(bool is_one_byte) { | 2605 void TextNode::MakeCaseIndependent(bool is_one_byte) { |
2738 intptr_t element_count = elms_->length(); | 2606 intptr_t element_count = elms_->length(); |
2739 for (intptr_t i = 0; i < element_count; i++) { | 2607 for (intptr_t i = 0; i < element_count; i++) { |
2740 TextElement elm = elms_->At(i); | 2608 TextElement elm = elms_->At(i); |
2741 if (elm.text_type() == TextElement::CHAR_CLASS) { | 2609 if (elm.text_type() == TextElement::CHAR_CLASS) { |
2742 RegExpCharacterClass* cc = elm.char_class(); | 2610 RegExpCharacterClass* cc = elm.char_class(); |
2743 // None of the standard character classes is different in the case | 2611 // None of the standard character classes is different in the case |
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2842 if (!trace->is_trivial()) { | 2710 if (!trace->is_trivial()) { |
2843 trace->Flush(compiler, this); | 2711 trace->Flush(compiler, this); |
2844 return; | 2712 return; |
2845 } | 2713 } |
2846 ChoiceNode::Emit(compiler, trace); | 2714 ChoiceNode::Emit(compiler, trace); |
2847 } | 2715 } |
2848 | 2716 |
2849 | 2717 |
2850 intptr_t ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler, | 2718 intptr_t ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler, |
2851 intptr_t eats_at_least) { | 2719 intptr_t eats_at_least) { |
2852 intptr_t preload_characters = Utils::Minimum(static_cast<intptr_t>(4), | 2720 intptr_t preload_characters = |
2853 eats_at_least); | 2721 Utils::Minimum(static_cast<intptr_t>(4), eats_at_least); |
2854 if (compiler->macro_assembler()->CanReadUnaligned()) { | 2722 if (compiler->macro_assembler()->CanReadUnaligned()) { |
2855 bool one_byte = compiler->one_byte(); | 2723 bool one_byte = compiler->one_byte(); |
2856 if (one_byte) { | 2724 if (one_byte) { |
2857 if (preload_characters > 4) preload_characters = 4; | 2725 if (preload_characters > 4) preload_characters = 4; |
2858 // We can't preload 3 characters because there is no machine instruction | 2726 // We can't preload 3 characters because there is no machine instruction |
2859 // to do that. We can't just load 4 because we could be reading | 2727 // to do that. We can't just load 4 because we could be reading |
2860 // beyond the end of the string, which could cause a memory fault. | 2728 // beyond the end of the string, which could cause a memory fault. |
2861 if (preload_characters == 3) preload_characters = 2; | 2729 if (preload_characters == 3) preload_characters = 2; |
2862 } else { | 2730 } else { |
2863 if (preload_characters > 2) preload_characters = 2; | 2731 if (preload_characters > 2) preload_characters = 2; |
2864 } | 2732 } |
2865 } else { | 2733 } else { |
2866 if (preload_characters > 1) preload_characters = 1; | 2734 if (preload_characters > 1) preload_characters = 1; |
2867 } | 2735 } |
2868 return preload_characters; | 2736 return preload_characters; |
2869 } | 2737 } |
2870 | 2738 |
2871 | 2739 |
2872 // This structure is used when generating the alternatives in a choice node. It | 2740 // This structure is used when generating the alternatives in a choice node. It |
2873 // records the way the alternative is being code generated. | 2741 // records the way the alternative is being code generated. |
2874 struct AlternativeGeneration { | 2742 struct AlternativeGeneration { |
2875 AlternativeGeneration() | 2743 AlternativeGeneration() |
2876 : possible_success(), | 2744 : possible_success(), |
2877 expects_preload(false), | 2745 expects_preload(false), |
2878 after(), | 2746 after(), |
2879 quick_check_details() { } | 2747 quick_check_details() {} |
2880 BlockLabel possible_success; | 2748 BlockLabel possible_success; |
2881 bool expects_preload; | 2749 bool expects_preload; |
2882 BlockLabel after; | 2750 BlockLabel after; |
2883 QuickCheckDetails quick_check_details; | 2751 QuickCheckDetails quick_check_details; |
2884 }; | 2752 }; |
2885 | 2753 |
2886 | 2754 |
2887 // Creates a list of AlternativeGenerations. If the list has a reasonable | 2755 // Creates a list of AlternativeGenerations. If the list has a reasonable |
2888 // size then it is on the stack, otherwise the excess is on the heap. | 2756 // size then it is on the stack, otherwise the excess is on the heap. |
2889 class AlternativeGenerationList { | 2757 class AlternativeGenerationList { |
2890 public: | 2758 public: |
2891 explicit AlternativeGenerationList(intptr_t count) | 2759 explicit AlternativeGenerationList(intptr_t count) : alt_gens_(count) { |
2892 : alt_gens_(count) { | |
2893 for (intptr_t i = 0; i < count && i < kAFew; i++) { | 2760 for (intptr_t i = 0; i < count && i < kAFew; i++) { |
2894 alt_gens_.Add(a_few_alt_gens_ + i); | 2761 alt_gens_.Add(a_few_alt_gens_ + i); |
2895 } | 2762 } |
2896 for (intptr_t i = kAFew; i < count; i++) { | 2763 for (intptr_t i = kAFew; i < count; i++) { |
2897 alt_gens_.Add(new AlternativeGeneration()); | 2764 alt_gens_.Add(new AlternativeGeneration()); |
2898 } | 2765 } |
2899 } | 2766 } |
2900 ~AlternativeGenerationList() { | 2767 ~AlternativeGenerationList() { |
2901 for (intptr_t i = kAFew; i < alt_gens_.length(); i++) { | 2768 for (intptr_t i = kAFew; i < alt_gens_.length(); i++) { |
2902 delete alt_gens_[i]; | 2769 delete alt_gens_[i]; |
2903 alt_gens_[i] = NULL; | 2770 alt_gens_[i] = NULL; |
2904 } | 2771 } |
2905 } | 2772 } |
2906 | 2773 |
2907 AlternativeGeneration* at(intptr_t i) { | 2774 AlternativeGeneration* at(intptr_t i) { return alt_gens_[i]; } |
2908 return alt_gens_[i]; | |
2909 } | |
2910 | 2775 |
2911 private: | 2776 private: |
2912 static const intptr_t kAFew = 10; | 2777 static const intptr_t kAFew = 10; |
2913 GrowableArray<AlternativeGeneration*> alt_gens_; | 2778 GrowableArray<AlternativeGeneration*> alt_gens_; |
2914 AlternativeGeneration a_few_alt_gens_[kAFew]; | 2779 AlternativeGeneration a_few_alt_gens_[kAFew]; |
2915 | 2780 |
2916 DISALLOW_ALLOCATION(); | 2781 DISALLOW_ALLOCATION(); |
2917 }; | 2782 }; |
2918 | 2783 |
2919 | 2784 |
2920 // The '2' variant is inclusive from and exclusive to. | 2785 // The '2' variant is inclusive from and exclusive to. |
2921 // This covers \s as defined in ECMA-262 5.1, 15.10.2.12, | 2786 // This covers \s as defined in ECMA-262 5.1, 15.10.2.12, |
2922 // which include WhiteSpace (7.2) or LineTerminator (7.3) values. | 2787 // which include WhiteSpace (7.2) or LineTerminator (7.3) values. |
2923 static const intptr_t kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1, | 2788 static const intptr_t kSpaceRanges[] = { |
2924 0x00A0, 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B, | 2789 '\t', '\r' + 1, ' ', ' ' + 1, 0x00A0, 0x00A1, 0x1680, 0x1681, |
2925 0x2028, 0x202A, 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001, | 2790 0x180E, 0x180F, 0x2000, 0x200B, 0x2028, 0x202A, 0x202F, 0x2030, |
2926 0xFEFF, 0xFF00, 0x10000 }; | 2791 0x205F, 0x2060, 0x3000, 0x3001, 0xFEFF, 0xFF00, 0x10000}; |
2927 static const intptr_t kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges); | 2792 static const intptr_t kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges); |
2928 static const intptr_t kWordRanges[] = { | 2793 static const intptr_t kWordRanges[] = {'0', '9' + 1, 'A', 'Z' + 1, '_', |
2929 '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, 0x10000 }; | 2794 '_' + 1, 'a', 'z' + 1, 0x10000}; |
2930 static const intptr_t kWordRangeCount = ARRAY_SIZE(kWordRanges); | 2795 static const intptr_t kWordRangeCount = ARRAY_SIZE(kWordRanges); |
2931 static const intptr_t kDigitRanges[] = { '0', '9' + 1, 0x10000 }; | 2796 static const intptr_t kDigitRanges[] = {'0', '9' + 1, 0x10000}; |
2932 static const intptr_t kDigitRangeCount = ARRAY_SIZE(kDigitRanges); | 2797 static const intptr_t kDigitRangeCount = ARRAY_SIZE(kDigitRanges); |
2933 static const intptr_t kSurrogateRanges[] = { 0xd800, 0xe000, 0x10000 }; | 2798 static const intptr_t kSurrogateRanges[] = {0xd800, 0xe000, 0x10000}; |
2934 static const intptr_t kSurrogateRangeCount = ARRAY_SIZE(kSurrogateRanges); | 2799 static const intptr_t kSurrogateRangeCount = ARRAY_SIZE(kSurrogateRanges); |
2935 static const intptr_t kLineTerminatorRanges[] = { | 2800 static const intptr_t kLineTerminatorRanges[] = {0x000A, 0x000B, 0x000D, 0x000E, |
2936 0x000A, 0x000B, 0x000D, 0x000E, 0x2028, 0x202A, 0x10000 }; | 2801 0x2028, 0x202A, 0x10000}; |
2937 static const intptr_t kLineTerminatorRangeCount = | 2802 static const intptr_t kLineTerminatorRangeCount = |
2938 ARRAY_SIZE(kLineTerminatorRanges); | 2803 ARRAY_SIZE(kLineTerminatorRanges); |
2939 | 2804 |
2940 | 2805 |
2941 void BoyerMoorePositionInfo::Set(intptr_t character) { | 2806 void BoyerMoorePositionInfo::Set(intptr_t character) { |
2942 SetInterval(Interval(character, character)); | 2807 SetInterval(Interval(character, character)); |
2943 } | 2808 } |
2944 | 2809 |
2945 | 2810 |
2946 void BoyerMoorePositionInfo::SetInterval(const Interval& interval) { | 2811 void BoyerMoorePositionInfo::SetInterval(const Interval& interval) { |
2947 s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval); | 2812 s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval); |
2948 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval); | 2813 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval); |
2949 d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval); | 2814 d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval); |
2950 surrogate_ = | 2815 surrogate_ = |
2951 AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval); | 2816 AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval); |
2952 if (interval.to() - interval.from() >= kMapSize - 1) { | 2817 if (interval.to() - interval.from() >= kMapSize - 1) { |
2953 if (map_count_ != kMapSize) { | 2818 if (map_count_ != kMapSize) { |
2954 map_count_ = kMapSize; | 2819 map_count_ = kMapSize; |
2955 for (intptr_t i = 0; i < kMapSize; i++) (*map_)[i] = true; | 2820 for (intptr_t i = 0; i < kMapSize; i++) |
| 2821 (*map_)[i] = true; |
2956 } | 2822 } |
2957 return; | 2823 return; |
2958 } | 2824 } |
2959 for (intptr_t i = interval.from(); i <= interval.to(); i++) { | 2825 for (intptr_t i = interval.from(); i <= interval.to(); i++) { |
2960 intptr_t mod_character = (i & kMask); | 2826 intptr_t mod_character = (i & kMask); |
2961 if (!map_->At(mod_character)) { | 2827 if (!map_->At(mod_character)) { |
2962 map_count_++; | 2828 map_count_++; |
2963 (*map_)[mod_character] = true; | 2829 (*map_)[mod_character] = true; |
2964 } | 2830 } |
2965 if (map_count_ == kMapSize) return; | 2831 if (map_count_ == kMapSize) return; |
2966 } | 2832 } |
2967 } | 2833 } |
2968 | 2834 |
2969 | 2835 |
2970 void BoyerMoorePositionInfo::SetAll() { | 2836 void BoyerMoorePositionInfo::SetAll() { |
2971 s_ = w_ = d_ = kLatticeUnknown; | 2837 s_ = w_ = d_ = kLatticeUnknown; |
2972 if (map_count_ != kMapSize) { | 2838 if (map_count_ != kMapSize) { |
2973 map_count_ = kMapSize; | 2839 map_count_ = kMapSize; |
2974 for (intptr_t i = 0; i < kMapSize; i++) (*map_)[i] = true; | 2840 for (intptr_t i = 0; i < kMapSize; i++) |
| 2841 (*map_)[i] = true; |
2975 } | 2842 } |
2976 } | 2843 } |
2977 | 2844 |
2978 | 2845 |
2979 BoyerMooreLookahead::BoyerMooreLookahead( | 2846 BoyerMooreLookahead::BoyerMooreLookahead(intptr_t length, |
2980 intptr_t length, RegExpCompiler* compiler, Zone* zone) | 2847 RegExpCompiler* compiler, |
2981 : length_(length), | 2848 Zone* zone) |
2982 compiler_(compiler) { | 2849 : length_(length), compiler_(compiler) { |
2983 if (compiler->one_byte()) { | 2850 if (compiler->one_byte()) { |
2984 max_char_ = Symbols::kMaxOneCharCodeSymbol; | 2851 max_char_ = Symbols::kMaxOneCharCodeSymbol; |
2985 } else { | 2852 } else { |
2986 max_char_ = Utf16::kMaxCodeUnit; | 2853 max_char_ = Utf16::kMaxCodeUnit; |
2987 } | 2854 } |
2988 bitmaps_ = new(zone) ZoneGrowableArray<BoyerMoorePositionInfo*>(length); | 2855 bitmaps_ = new (zone) ZoneGrowableArray<BoyerMoorePositionInfo*>(length); |
2989 for (intptr_t i = 0; i < length; i++) { | 2856 for (intptr_t i = 0; i < length; i++) { |
2990 bitmaps_->Add(new(zone) BoyerMoorePositionInfo(zone)); | 2857 bitmaps_->Add(new (zone) BoyerMoorePositionInfo(zone)); |
2991 } | 2858 } |
2992 } | 2859 } |
2993 | 2860 |
2994 | 2861 |
2995 // Find the longest range of lookahead that has the fewest number of different | 2862 // Find the longest range of lookahead that has the fewest number of different |
2996 // characters that can occur at a given position. Since we are optimizing two | 2863 // characters that can occur at a given position. Since we are optimizing two |
2997 // different parameters at once this is a tradeoff. | 2864 // different parameters at once this is a tradeoff. |
2998 bool BoyerMooreLookahead::FindWorthwhileInterval(intptr_t* from, intptr_t* to) { | 2865 bool BoyerMooreLookahead::FindWorthwhileInterval(intptr_t* from, intptr_t* to) { |
2999 intptr_t biggest_points = 0; | 2866 intptr_t biggest_points = 0; |
3000 // If more than 32 characters out of 128 can occur it is unlikely that we can | 2867 // If more than 32 characters out of 128 can occur it is unlikely that we can |
3001 // be lucky enough to step forwards much of the time. | 2868 // be lucky enough to step forwards much of the time. |
3002 const intptr_t kMaxMax = 32; | 2869 const intptr_t kMaxMax = 32; |
3003 for (intptr_t max_number_of_chars = 4; | 2870 for (intptr_t max_number_of_chars = 4; max_number_of_chars < kMaxMax; |
3004 max_number_of_chars < kMaxMax; | |
3005 max_number_of_chars *= 2) { | 2871 max_number_of_chars *= 2) { |
3006 biggest_points = | 2872 biggest_points = |
3007 FindBestInterval(max_number_of_chars, biggest_points, from, to); | 2873 FindBestInterval(max_number_of_chars, biggest_points, from, to); |
3008 } | 2874 } |
3009 if (biggest_points == 0) return false; | 2875 if (biggest_points == 0) return false; |
3010 return true; | 2876 return true; |
3011 } | 2877 } |
3012 | 2878 |
3013 | 2879 |
3014 // Find the highest-points range between 0 and length_ where the character | 2880 // Find the highest-points range between 0 and length_ where the character |
3015 // information is not too vague. 'Too vague' means that there are more than | 2881 // information is not too vague. 'Too vague' means that there are more than |
3016 // max_number_of_chars that can occur at this position. Calculates the number | 2882 // max_number_of_chars that can occur at this position. Calculates the number |
3017 // of points as the product of width-of-the-range and | 2883 // of points as the product of width-of-the-range and |
3018 // probability-of-finding-one-of-the-characters, where the probability is | 2884 // probability-of-finding-one-of-the-characters, where the probability is |
3019 // calculated using the frequency distribution of the sample subject string. | 2885 // calculated using the frequency distribution of the sample subject string. |
3020 intptr_t BoyerMooreLookahead::FindBestInterval( | 2886 intptr_t BoyerMooreLookahead::FindBestInterval(intptr_t max_number_of_chars, |
3021 intptr_t max_number_of_chars, | 2887 intptr_t old_biggest_points, |
3022 intptr_t old_biggest_points, | 2888 intptr_t* from, |
3023 intptr_t* from, | 2889 intptr_t* to) { |
3024 intptr_t* to) { | |
3025 intptr_t biggest_points = old_biggest_points; | 2890 intptr_t biggest_points = old_biggest_points; |
3026 static const intptr_t kSize = RegExpMacroAssembler::kTableSize; | 2891 static const intptr_t kSize = RegExpMacroAssembler::kTableSize; |
3027 for (intptr_t i = 0; i < length_; ) { | 2892 for (intptr_t i = 0; i < length_;) { |
3028 while (i < length_ && Count(i) > max_number_of_chars) i++; | 2893 while (i < length_ && Count(i) > max_number_of_chars) |
| 2894 i++; |
3029 if (i == length_) break; | 2895 if (i == length_) break; |
3030 intptr_t remembered_from = i; | 2896 intptr_t remembered_from = i; |
3031 bool union_map[kSize]; | 2897 bool union_map[kSize]; |
3032 for (intptr_t j = 0; j < kSize; j++) union_map[j] = false; | 2898 for (intptr_t j = 0; j < kSize; j++) |
| 2899 union_map[j] = false; |
3033 while (i < length_ && Count(i) <= max_number_of_chars) { | 2900 while (i < length_ && Count(i) <= max_number_of_chars) { |
3034 BoyerMoorePositionInfo* map = bitmaps_->At(i); | 2901 BoyerMoorePositionInfo* map = bitmaps_->At(i); |
3035 for (intptr_t j = 0; j < kSize; j++) union_map[j] |= map->at(j); | 2902 for (intptr_t j = 0; j < kSize; j++) |
| 2903 union_map[j] |= map->at(j); |
3036 i++; | 2904 i++; |
3037 } | 2905 } |
3038 intptr_t frequency = 0; | 2906 intptr_t frequency = 0; |
3039 for (intptr_t j = 0; j < kSize; j++) { | 2907 for (intptr_t j = 0; j < kSize; j++) { |
3040 if (union_map[j]) { | 2908 if (union_map[j]) { |
3041 // Add 1 to the frequency to give a small per-character boost for | 2909 // Add 1 to the frequency to give a small per-character boost for |
3042 // the cases where our sampling is not good enough and many | 2910 // the cases where our sampling is not good enough and many |
3043 // characters have a frequency of zero. This means the frequency | 2911 // characters have a frequency of zero. This means the frequency |
3044 // can theoretically be up to 2*kSize though we treat it mostly as | 2912 // can theoretically be up to 2*kSize though we treat it mostly as |
3045 // a fraction of kSize. | 2913 // a fraction of kSize. |
3046 frequency += compiler_->frequency_collator()->Frequency(j) + 1; | 2914 frequency += compiler_->frequency_collator()->Frequency(j) + 1; |
3047 } | 2915 } |
3048 } | 2916 } |
3049 // We use the probability of skipping times the distance we are skipping to | 2917 // We use the probability of skipping times the distance we are skipping to |
3050 // judge the effectiveness of this. Actually we have a cut-off: By | 2918 // judge the effectiveness of this. Actually we have a cut-off: By |
3051 // dividing by 2 we switch off the skipping if the probability of skipping | 2919 // dividing by 2 we switch off the skipping if the probability of skipping |
3052 // is less than 50%. This is because the multibyte mask-and-compare | 2920 // is less than 50%. This is because the multibyte mask-and-compare |
3053 // skipping in quickcheck is more likely to do well on this case. | 2921 // skipping in quickcheck is more likely to do well on this case. |
3054 bool in_quickcheck_range = ((i - remembered_from < 4) || | 2922 bool in_quickcheck_range = |
3055 (compiler_->one_byte() ? remembered_from <= 4 : remembered_from <= 2)); | 2923 ((i - remembered_from < 4) || |
| 2924 (compiler_->one_byte() ? remembered_from <= 4 : remembered_from <= 2)); |
3056 // Called 'probability' but it is only a rough estimate and can actually | 2925 // Called 'probability' but it is only a rough estimate and can actually |
3057 // be outside the 0-kSize range. | 2926 // be outside the 0-kSize range. |
3058 intptr_t probability = | 2927 intptr_t probability = |
3059 (in_quickcheck_range ? kSize / 2 : kSize) - frequency; | 2928 (in_quickcheck_range ? kSize / 2 : kSize) - frequency; |
3060 intptr_t points = (i - remembered_from) * probability; | 2929 intptr_t points = (i - remembered_from) * probability; |
3061 if (points > biggest_points) { | 2930 if (points > biggest_points) { |
3062 *from = remembered_from; | 2931 *from = remembered_from; |
3063 *to = i - 1; | 2932 *to = i - 1; |
3064 biggest_points = points; | 2933 biggest_points = points; |
3065 } | 2934 } |
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3133 // The mask-compare can probably handle this better. | 3002 // The mask-compare can probably handle this better. |
3134 return; | 3003 return; |
3135 } | 3004 } |
3136 | 3005 |
3137 if (found_single_character) { | 3006 if (found_single_character) { |
3138 BlockLabel cont, again; | 3007 BlockLabel cont, again; |
3139 masm->BindBlock(&again); | 3008 masm->BindBlock(&again); |
3140 masm->LoadCurrentCharacter(max_lookahead, &cont, true); | 3009 masm->LoadCurrentCharacter(max_lookahead, &cont, true); |
3141 if (max_char_ > kSize) { | 3010 if (max_char_ > kSize) { |
3142 masm->CheckCharacterAfterAnd(single_character, | 3011 masm->CheckCharacterAfterAnd(single_character, |
3143 RegExpMacroAssembler::kTableMask, | 3012 RegExpMacroAssembler::kTableMask, &cont); |
3144 &cont); | |
3145 } else { | 3013 } else { |
3146 masm->CheckCharacter(single_character, &cont); | 3014 masm->CheckCharacter(single_character, &cont); |
3147 } | 3015 } |
3148 masm->AdvanceCurrentPosition(lookahead_width); | 3016 masm->AdvanceCurrentPosition(lookahead_width); |
3149 masm->GoTo(&again); | 3017 masm->GoTo(&again); |
3150 masm->BindBlock(&cont); | 3018 masm->BindBlock(&cont); |
3151 return; | 3019 return; |
3152 } | 3020 } |
3153 | 3021 |
3154 const TypedData& boolean_skip_table = TypedData::ZoneHandle( | 3022 const TypedData& boolean_skip_table = TypedData::ZoneHandle( |
3155 compiler_->zone(), | 3023 compiler_->zone(), |
3156 TypedData::New(kTypedDataUint8ArrayCid, kSize, Heap::kOld)); | 3024 TypedData::New(kTypedDataUint8ArrayCid, kSize, Heap::kOld)); |
3157 intptr_t skip_distance = GetSkipTable( | 3025 intptr_t skip_distance = |
3158 min_lookahead, max_lookahead, boolean_skip_table); | 3026 GetSkipTable(min_lookahead, max_lookahead, boolean_skip_table); |
3159 ASSERT(skip_distance != 0); | 3027 ASSERT(skip_distance != 0); |
3160 | 3028 |
3161 BlockLabel cont, again; | 3029 BlockLabel cont, again; |
3162 | 3030 |
3163 masm->BindBlock(&again); | 3031 masm->BindBlock(&again); |
3164 masm->LoadCurrentCharacter(max_lookahead, &cont, true); | 3032 masm->LoadCurrentCharacter(max_lookahead, &cont, true); |
3165 masm->CheckBitInTable(boolean_skip_table, &cont); | 3033 masm->CheckBitInTable(boolean_skip_table, &cont); |
3166 masm->AdvanceCurrentPosition(skip_distance); | 3034 masm->AdvanceCurrentPosition(skip_distance); |
3167 masm->GoTo(&again); | 3035 masm->GoTo(&again); |
3168 masm->BindBlock(&cont); | 3036 masm->BindBlock(&cont); |
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3262 ASSERT(!trace->mentions_reg(guards->At(j)->reg())); | 3130 ASSERT(!trace->mentions_reg(guards->At(j)->reg())); |
3263 } | 3131 } |
3264 } | 3132 } |
3265 #endif | 3133 #endif |
3266 } | 3134 } |
3267 | 3135 |
3268 | 3136 |
3269 void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler, | 3137 void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler, |
3270 Trace* current_trace, | 3138 Trace* current_trace, |
3271 PreloadState* state) { | 3139 PreloadState* state) { |
3272 if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) { | 3140 if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) { |
3273 // Save some time by looking at most one machine word ahead. | 3141 // Save some time by looking at most one machine word ahead. |
3274 state->eats_at_least_ = | 3142 state->eats_at_least_ = |
3275 EatsAtLeast(compiler->one_byte() ? 4 : 2, kRecursionBudget, | 3143 EatsAtLeast(compiler->one_byte() ? 4 : 2, kRecursionBudget, |
3276 current_trace->at_start() == Trace::FALSE_VALUE); | 3144 current_trace->at_start() == Trace::FALSE_VALUE); |
3277 } | 3145 } |
3278 state->preload_characters_ = | 3146 state->preload_characters_ = |
3279 CalculatePreloadCharacters(compiler, state->eats_at_least_); | 3147 CalculatePreloadCharacters(compiler, state->eats_at_least_); |
3280 | 3148 |
3281 state->preload_is_current_ = | 3149 state->preload_is_current_ = |
3282 (current_trace->characters_preloaded() == state->preload_characters_); | 3150 (current_trace->characters_preloaded() == state->preload_characters_); |
3283 state->preload_has_checked_bounds_ = state->preload_is_current_; | 3151 state->preload_has_checked_bounds_ = state->preload_is_current_; |
3284 } | 3152 } |
3285 | 3153 |
3286 | 3154 |
3287 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 3155 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
3288 intptr_t choice_count = alternatives_->length(); | 3156 intptr_t choice_count = alternatives_->length(); |
3289 | 3157 |
3290 AssertGuardsMentionRegisters(trace); | 3158 AssertGuardsMentionRegisters(trace); |
3291 | 3159 |
3292 LimitResult limit_result = LimitVersions(compiler, trace); | 3160 LimitResult limit_result = LimitVersions(compiler, trace); |
3293 if (limit_result == DONE) return; | 3161 if (limit_result == DONE) return; |
(...skipping 10 matching lines...) Expand all Loading... |
3304 | 3172 |
3305 PreloadState preload; | 3173 PreloadState preload; |
3306 preload.init(); | 3174 preload.init(); |
3307 GreedyLoopState greedy_loop_state(not_at_start()); | 3175 GreedyLoopState greedy_loop_state(not_at_start()); |
3308 | 3176 |
3309 intptr_t text_length = | 3177 intptr_t text_length = |
3310 GreedyLoopTextLengthForAlternative(&((*alternatives_)[0])); | 3178 GreedyLoopTextLengthForAlternative(&((*alternatives_)[0])); |
3311 AlternativeGenerationList alt_gens(choice_count); | 3179 AlternativeGenerationList alt_gens(choice_count); |
3312 | 3180 |
3313 if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) { | 3181 if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) { |
3314 trace = EmitGreedyLoop(compiler, | 3182 trace = EmitGreedyLoop(compiler, trace, &alt_gens, &preload, |
3315 trace, | 3183 &greedy_loop_state, text_length); |
3316 &alt_gens, | |
3317 &preload, | |
3318 &greedy_loop_state, | |
3319 text_length); | |
3320 } else { | 3184 } else { |
3321 // TODO(erikcorry): Delete this. We don't need this label, but it makes us | 3185 // TODO(erikcorry): Delete this. We don't need this label, but it makes us |
3322 // match the traces produced pre-cleanup. | 3186 // match the traces produced pre-cleanup. |
3323 BlockLabel second_choice; | 3187 BlockLabel second_choice; |
3324 compiler->macro_assembler()->BindBlock(&second_choice); | 3188 compiler->macro_assembler()->BindBlock(&second_choice); |
3325 | 3189 |
3326 preload.eats_at_least_ = EmitOptimizedUnanchoredSearch(compiler, trace); | 3190 preload.eats_at_least_ = EmitOptimizedUnanchoredSearch(compiler, trace); |
3327 | 3191 |
3328 EmitChoices(compiler, | 3192 EmitChoices(compiler, &alt_gens, 0, trace, &preload); |
3329 &alt_gens, | |
3330 0, | |
3331 trace, | |
3332 &preload); | |
3333 } | 3193 } |
3334 | 3194 |
3335 // At this point we need to generate slow checks for the alternatives where | 3195 // At this point we need to generate slow checks for the alternatives where |
3336 // the quick check was inlined. We can recognize these because the associated | 3196 // the quick check was inlined. We can recognize these because the associated |
3337 // label was bound. | 3197 // label was bound. |
3338 intptr_t new_flush_budget = trace->flush_budget() / choice_count; | 3198 intptr_t new_flush_budget = trace->flush_budget() / choice_count; |
3339 for (intptr_t i = 0; i < choice_count; i++) { | 3199 for (intptr_t i = 0; i < choice_count; i++) { |
3340 AlternativeGeneration* alt_gen = alt_gens.at(i); | 3200 AlternativeGeneration* alt_gen = alt_gens.at(i); |
3341 Trace new_trace(*trace); | 3201 Trace new_trace(*trace); |
3342 // If there are actions to be flushed we have to limit how many times | 3202 // If there are actions to be flushed we have to limit how many times |
3343 // they are flushed. Take the budget of the parent trace and distribute | 3203 // they are flushed. Take the budget of the parent trace and distribute |
3344 // it fairly amongst the children. | 3204 // it fairly amongst the children. |
3345 if (new_trace.actions() != NULL) { | 3205 if (new_trace.actions() != NULL) { |
3346 new_trace.set_flush_budget(new_flush_budget); | 3206 new_trace.set_flush_budget(new_flush_budget); |
3347 } | 3207 } |
3348 bool next_expects_preload = | 3208 bool next_expects_preload = |
3349 i == choice_count - 1 ? false : alt_gens.at(i + 1)->expects_preload; | 3209 i == choice_count - 1 ? false : alt_gens.at(i + 1)->expects_preload; |
3350 EmitOutOfLineContinuation(compiler, | 3210 EmitOutOfLineContinuation(compiler, &new_trace, alternatives_->At(i), |
3351 &new_trace, | 3211 alt_gen, preload.preload_characters_, |
3352 alternatives_->At(i), | |
3353 alt_gen, | |
3354 preload.preload_characters_, | |
3355 next_expects_preload); | 3212 next_expects_preload); |
3356 } | 3213 } |
3357 } | 3214 } |
3358 | 3215 |
3359 Trace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler, | 3216 Trace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler, |
3360 Trace* trace, | 3217 Trace* trace, |
3361 AlternativeGenerationList* alt_gens, | 3218 AlternativeGenerationList* alt_gens, |
3362 PreloadState* preload, | 3219 PreloadState* preload, |
3363 GreedyLoopState* greedy_loop_state, | 3220 GreedyLoopState* greedy_loop_state, |
3364 intptr_t text_length) { | 3221 intptr_t text_length) { |
(...skipping 16 matching lines...) Expand all Loading... |
3381 greedy_match_trace.set_stop_node(this); | 3238 greedy_match_trace.set_stop_node(this); |
3382 greedy_match_trace.set_loop_label(&loop_label); | 3239 greedy_match_trace.set_loop_label(&loop_label); |
3383 (*alternatives_)[0].node()->Emit(compiler, &greedy_match_trace); | 3240 (*alternatives_)[0].node()->Emit(compiler, &greedy_match_trace); |
3384 macro_assembler->BindBlock(&greedy_match_failed); | 3241 macro_assembler->BindBlock(&greedy_match_failed); |
3385 | 3242 |
3386 BlockLabel second_choice; // For use in greedy matches. | 3243 BlockLabel second_choice; // For use in greedy matches. |
3387 macro_assembler->BindBlock(&second_choice); | 3244 macro_assembler->BindBlock(&second_choice); |
3388 | 3245 |
3389 Trace* new_trace = greedy_loop_state->counter_backtrack_trace(); | 3246 Trace* new_trace = greedy_loop_state->counter_backtrack_trace(); |
3390 | 3247 |
3391 EmitChoices(compiler, | 3248 EmitChoices(compiler, alt_gens, 1, new_trace, preload); |
3392 alt_gens, | |
3393 1, | |
3394 new_trace, | |
3395 preload); | |
3396 | 3249 |
3397 macro_assembler->BindBlock(greedy_loop_state->label()); | 3250 macro_assembler->BindBlock(greedy_loop_state->label()); |
3398 // If we have unwound to the bottom then backtrack. | 3251 // If we have unwound to the bottom then backtrack. |
3399 macro_assembler->CheckGreedyLoop(trace->backtrack()); | 3252 macro_assembler->CheckGreedyLoop(trace->backtrack()); |
3400 // Otherwise try the second priority at an earlier position. | 3253 // Otherwise try the second priority at an earlier position. |
3401 macro_assembler->AdvanceCurrentPosition(-text_length); | 3254 macro_assembler->AdvanceCurrentPosition(-text_length); |
3402 macro_assembler->GoTo(&second_choice); | 3255 macro_assembler->GoTo(&second_choice); |
3403 return new_trace; | 3256 return new_trace; |
3404 } | 3257 } |
3405 | 3258 |
(...skipping 23 matching lines...) Expand all Loading... |
3429 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 3282 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
3430 // At this point we know that we are at a non-greedy loop that will eat | 3283 // At this point we know that we are at a non-greedy loop that will eat |
3431 // any character one at a time. Any non-anchored regexp has such a | 3284 // any character one at a time. Any non-anchored regexp has such a |
3432 // loop prepended to it in order to find where it starts. We look for | 3285 // loop prepended to it in order to find where it starts. We look for |
3433 // a pattern of the form ...abc... where we can look 6 characters ahead | 3286 // a pattern of the form ...abc... where we can look 6 characters ahead |
3434 // and step forwards 3 if the character is not one of abc. Abc need | 3287 // and step forwards 3 if the character is not one of abc. Abc need |
3435 // not be atoms, they can be any reasonably limited character class or | 3288 // not be atoms, they can be any reasonably limited character class or |
3436 // small alternation. | 3289 // small alternation. |
3437 BoyerMooreLookahead* bm = bm_info(false); | 3290 BoyerMooreLookahead* bm = bm_info(false); |
3438 if (bm == NULL) { | 3291 if (bm == NULL) { |
3439 eats_at_least = Utils::Minimum(kMaxLookaheadForBoyerMoore, | 3292 eats_at_least = Utils::Minimum( |
3440 EatsAtLeast(kMaxLookaheadForBoyerMoore, | 3293 kMaxLookaheadForBoyerMoore, |
3441 kRecursionBudget, | 3294 EatsAtLeast(kMaxLookaheadForBoyerMoore, kRecursionBudget, false)); |
3442 false)); | |
3443 if (eats_at_least >= 1) { | 3295 if (eats_at_least >= 1) { |
3444 bm = new(Z) BoyerMooreLookahead(eats_at_least, compiler, Z); | 3296 bm = new (Z) BoyerMooreLookahead(eats_at_least, compiler, Z); |
3445 GuardedAlternative alt0 = alternatives_->At(0); | 3297 GuardedAlternative alt0 = alternatives_->At(0); |
3446 alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, false); | 3298 alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, false); |
3447 } | 3299 } |
3448 } | 3300 } |
3449 if (bm != NULL) { | 3301 if (bm != NULL) { |
3450 bm->EmitSkipInstructions(macro_assembler); | 3302 bm->EmitSkipInstructions(macro_assembler); |
3451 } | 3303 } |
3452 return eats_at_least; | 3304 return eats_at_least; |
3453 } | 3305 } |
3454 | 3306 |
(...skipping 14 matching lines...) Expand all Loading... |
3469 | 3321 |
3470 for (intptr_t i = first_choice; i < choice_count; i++) { | 3322 for (intptr_t i = first_choice; i < choice_count; i++) { |
3471 bool is_last = i == choice_count - 1; | 3323 bool is_last = i == choice_count - 1; |
3472 bool fall_through_on_failure = !is_last; | 3324 bool fall_through_on_failure = !is_last; |
3473 GuardedAlternative alternative = alternatives_->At(i); | 3325 GuardedAlternative alternative = alternatives_->At(i); |
3474 AlternativeGeneration* alt_gen = alt_gens->at(i); | 3326 AlternativeGeneration* alt_gen = alt_gens->at(i); |
3475 alt_gen->quick_check_details.set_characters(preload->preload_characters_); | 3327 alt_gen->quick_check_details.set_characters(preload->preload_characters_); |
3476 ZoneGrowableArray<Guard*>* guards = alternative.guards(); | 3328 ZoneGrowableArray<Guard*>* guards = alternative.guards(); |
3477 intptr_t guard_count = (guards == NULL) ? 0 : guards->length(); | 3329 intptr_t guard_count = (guards == NULL) ? 0 : guards->length(); |
3478 Trace new_trace(*trace); | 3330 Trace new_trace(*trace); |
3479 new_trace.set_characters_preloaded(preload->preload_is_current_ ? | 3331 new_trace.set_characters_preloaded( |
3480 preload->preload_characters_ : | 3332 preload->preload_is_current_ ? preload->preload_characters_ : 0); |
3481 0); | |
3482 if (preload->preload_has_checked_bounds_) { | 3333 if (preload->preload_has_checked_bounds_) { |
3483 new_trace.set_bound_checked_up_to(preload->preload_characters_); | 3334 new_trace.set_bound_checked_up_to(preload->preload_characters_); |
3484 } | 3335 } |
3485 new_trace.quick_check_performed()->Clear(); | 3336 new_trace.quick_check_performed()->Clear(); |
3486 if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE); | 3337 if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE); |
3487 if (!is_last) { | 3338 if (!is_last) { |
3488 new_trace.set_backtrack(&alt_gen->after); | 3339 new_trace.set_backtrack(&alt_gen->after); |
3489 } | 3340 } |
3490 alt_gen->expects_preload = preload->preload_is_current_; | 3341 alt_gen->expects_preload = preload->preload_is_current_; |
3491 bool generate_full_check_inline = false; | 3342 bool generate_full_check_inline = false; |
3492 if (kRegexpOptimization && | 3343 if (kRegexpOptimization && |
3493 try_to_emit_quick_check_for_alternative(i == 0) && | 3344 try_to_emit_quick_check_for_alternative(i == 0) && |
3494 alternative.node()->EmitQuickCheck(compiler, | 3345 alternative.node()->EmitQuickCheck( |
3495 trace, | 3346 compiler, trace, &new_trace, preload->preload_has_checked_bounds_, |
3496 &new_trace, | 3347 &alt_gen->possible_success, &alt_gen->quick_check_details, |
3497 preload->preload_has_checked_bounds_, | 3348 fall_through_on_failure)) { |
3498 &alt_gen->possible_success, | |
3499 &alt_gen->quick_check_details, | |
3500 fall_through_on_failure)) { | |
3501 // Quick check was generated for this choice. | 3349 // Quick check was generated for this choice. |
3502 preload->preload_is_current_ = true; | 3350 preload->preload_is_current_ = true; |
3503 preload->preload_has_checked_bounds_ = true; | 3351 preload->preload_has_checked_bounds_ = true; |
3504 // If we generated the quick check to fall through on possible success, | 3352 // If we generated the quick check to fall through on possible success, |
3505 // we now need to generate the full check inline. | 3353 // we now need to generate the full check inline. |
3506 if (!fall_through_on_failure) { | 3354 if (!fall_through_on_failure) { |
3507 macro_assembler->BindBlock(&alt_gen->possible_success); | 3355 macro_assembler->BindBlock(&alt_gen->possible_success); |
3508 new_trace.set_quick_check_performed(&alt_gen->quick_check_details); | 3356 new_trace.set_quick_check_performed(&alt_gen->quick_check_details); |
3509 new_trace.set_characters_preloaded(preload->preload_characters_); | 3357 new_trace.set_characters_preloaded(preload->preload_characters_); |
3510 new_trace.set_bound_checked_up_to(preload->preload_characters_); | 3358 new_trace.set_bound_checked_up_to(preload->preload_characters_); |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3562 BlockLabel reload_current_char; | 3410 BlockLabel reload_current_char; |
3563 out_of_line_trace.set_backtrack(&reload_current_char); | 3411 out_of_line_trace.set_backtrack(&reload_current_char); |
3564 for (intptr_t j = 0; j < guard_count; j++) { | 3412 for (intptr_t j = 0; j < guard_count; j++) { |
3565 GenerateGuard(macro_assembler, guards->At(j), &out_of_line_trace); | 3413 GenerateGuard(macro_assembler, guards->At(j), &out_of_line_trace); |
3566 } | 3414 } |
3567 alternative.node()->Emit(compiler, &out_of_line_trace); | 3415 alternative.node()->Emit(compiler, &out_of_line_trace); |
3568 macro_assembler->BindBlock(&reload_current_char); | 3416 macro_assembler->BindBlock(&reload_current_char); |
3569 // Reload the current character, since the next quick check expects that. | 3417 // Reload the current character, since the next quick check expects that. |
3570 // We don't need to check bounds here because we only get into this | 3418 // We don't need to check bounds here because we only get into this |
3571 // code through a quick check which already did the checked load. | 3419 // code through a quick check which already did the checked load. |
3572 macro_assembler->LoadCurrentCharacter(trace->cp_offset(), | 3420 macro_assembler->LoadCurrentCharacter(trace->cp_offset(), NULL, false, |
3573 NULL, | |
3574 false, | |
3575 preload_characters); | 3421 preload_characters); |
3576 macro_assembler->GoTo(&(alt_gen->after)); | 3422 macro_assembler->GoTo(&(alt_gen->after)); |
3577 } else { | 3423 } else { |
3578 out_of_line_trace.set_backtrack(&(alt_gen->after)); | 3424 out_of_line_trace.set_backtrack(&(alt_gen->after)); |
3579 for (intptr_t j = 0; j < guard_count; j++) { | 3425 for (intptr_t j = 0; j < guard_count; j++) { |
3580 GenerateGuard(macro_assembler, guards->At(j), &out_of_line_trace); | 3426 GenerateGuard(macro_assembler, guards->At(j), &out_of_line_trace); |
3581 } | 3427 } |
3582 alternative.node()->Emit(compiler, &out_of_line_trace); | 3428 alternative.node()->Emit(compiler, &out_of_line_trace); |
3583 } | 3429 } |
3584 } | 3430 } |
3585 | 3431 |
3586 | 3432 |
3587 void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 3433 void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
3588 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 3434 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
3589 LimitResult limit_result = LimitVersions(compiler, trace); | 3435 LimitResult limit_result = LimitVersions(compiler, trace); |
3590 if (limit_result == DONE) return; | 3436 if (limit_result == DONE) return; |
3591 ASSERT(limit_result == CONTINUE); | 3437 ASSERT(limit_result == CONTINUE); |
3592 | 3438 |
3593 RecursionCheck rc(compiler); | 3439 RecursionCheck rc(compiler); |
3594 | 3440 |
3595 switch (action_type_) { | 3441 switch (action_type_) { |
3596 case STORE_POSITION: { | 3442 case STORE_POSITION: { |
3597 Trace::DeferredCapture | 3443 Trace::DeferredCapture new_capture(data_.u_position_register.reg, |
3598 new_capture(data_.u_position_register.reg, | 3444 data_.u_position_register.is_capture, |
3599 data_.u_position_register.is_capture, | 3445 trace); |
3600 trace); | |
3601 Trace new_trace = *trace; | 3446 Trace new_trace = *trace; |
3602 new_trace.add_action(&new_capture); | 3447 new_trace.add_action(&new_capture); |
3603 on_success()->Emit(compiler, &new_trace); | 3448 on_success()->Emit(compiler, &new_trace); |
3604 break; | 3449 break; |
3605 } | 3450 } |
3606 case INCREMENT_REGISTER: { | 3451 case INCREMENT_REGISTER: { |
3607 Trace::DeferredIncrementRegister | 3452 Trace::DeferredIncrementRegister new_increment( |
3608 new_increment(data_.u_increment_register.reg); | 3453 data_.u_increment_register.reg); |
3609 Trace new_trace = *trace; | 3454 Trace new_trace = *trace; |
3610 new_trace.add_action(&new_increment); | 3455 new_trace.add_action(&new_increment); |
3611 on_success()->Emit(compiler, &new_trace); | 3456 on_success()->Emit(compiler, &new_trace); |
3612 break; | 3457 break; |
3613 } | 3458 } |
3614 case SET_REGISTER: { | 3459 case SET_REGISTER: { |
3615 Trace::DeferredSetRegister | 3460 Trace::DeferredSetRegister new_set(data_.u_store_register.reg, |
3616 new_set(data_.u_store_register.reg, data_.u_store_register.value); | 3461 data_.u_store_register.value); |
3617 Trace new_trace = *trace; | 3462 Trace new_trace = *trace; |
3618 new_trace.add_action(&new_set); | 3463 new_trace.add_action(&new_set); |
3619 on_success()->Emit(compiler, &new_trace); | 3464 on_success()->Emit(compiler, &new_trace); |
3620 break; | 3465 break; |
3621 } | 3466 } |
3622 case CLEAR_CAPTURES: { | 3467 case CLEAR_CAPTURES: { |
3623 Trace::DeferredClearCaptures | 3468 Trace::DeferredClearCaptures new_capture(Interval( |
3624 new_capture(Interval(data_.u_clear_captures.range_from, | 3469 data_.u_clear_captures.range_from, data_.u_clear_captures.range_to)); |
3625 data_.u_clear_captures.range_to)); | |
3626 Trace new_trace = *trace; | 3470 Trace new_trace = *trace; |
3627 new_trace.add_action(&new_capture); | 3471 new_trace.add_action(&new_capture); |
3628 on_success()->Emit(compiler, &new_trace); | 3472 on_success()->Emit(compiler, &new_trace); |
3629 break; | 3473 break; |
3630 } | 3474 } |
3631 case BEGIN_SUBMATCH: | 3475 case BEGIN_SUBMATCH: |
3632 if (!trace->is_trivial()) { | 3476 if (!trace->is_trivial()) { |
3633 trace->Flush(compiler, this); | 3477 trace->Flush(compiler, this); |
3634 } else { | 3478 } else { |
3635 assembler->WriteCurrentPositionToRegister( | 3479 assembler->WriteCurrentPositionToRegister( |
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3715 } | 3559 } |
3716 | 3560 |
3717 LimitResult limit_result = LimitVersions(compiler, trace); | 3561 LimitResult limit_result = LimitVersions(compiler, trace); |
3718 if (limit_result == DONE) return; | 3562 if (limit_result == DONE) return; |
3719 ASSERT(limit_result == CONTINUE); | 3563 ASSERT(limit_result == CONTINUE); |
3720 | 3564 |
3721 RecursionCheck rc(compiler); | 3565 RecursionCheck rc(compiler); |
3722 | 3566 |
3723 ASSERT(start_reg_ + 1 == end_reg_); | 3567 ASSERT(start_reg_ + 1 == end_reg_); |
3724 if (compiler->ignore_case()) { | 3568 if (compiler->ignore_case()) { |
3725 assembler->CheckNotBackReferenceIgnoreCase(start_reg_, | 3569 assembler->CheckNotBackReferenceIgnoreCase(start_reg_, trace->backtrack()); |
3726 trace->backtrack()); | |
3727 } else { | 3570 } else { |
3728 assembler->CheckNotBackReference(start_reg_, trace->backtrack()); | 3571 assembler->CheckNotBackReference(start_reg_, trace->backtrack()); |
3729 } | 3572 } |
3730 on_success()->Emit(compiler, trace); | 3573 on_success()->Emit(compiler, trace); |
3731 } | 3574 } |
3732 | 3575 |
3733 | 3576 |
3734 // ------------------------------------------------------------------- | 3577 // ------------------------------------------------------------------- |
3735 // Dot/dotty output | 3578 // Dot/dotty output |
3736 | 3579 |
3737 | 3580 |
3738 #ifdef DEBUG | 3581 #ifdef DEBUG |
3739 | 3582 |
3740 | 3583 |
3741 class DotPrinter: public NodeVisitor { | 3584 class DotPrinter : public NodeVisitor { |
3742 public: | 3585 public: |
3743 explicit DotPrinter(bool ignore_case) {} | 3586 explicit DotPrinter(bool ignore_case) {} |
3744 void PrintNode(const char* label, RegExpNode* node); | 3587 void PrintNode(const char* label, RegExpNode* node); |
3745 void Visit(RegExpNode* node); | 3588 void Visit(RegExpNode* node); |
3746 void PrintAttributes(RegExpNode* from); | 3589 void PrintAttributes(RegExpNode* from); |
3747 void PrintOnFailure(RegExpNode* from, RegExpNode* to); | 3590 void PrintOnFailure(RegExpNode* from, RegExpNode* to); |
3748 #define DECLARE_VISIT(Type) \ | 3591 #define DECLARE_VISIT(Type) virtual void Visit##Type(Type##Node* that); |
3749 virtual void Visit##Type(Type##Node* that); | 3592 FOR_EACH_NODE_TYPE(DECLARE_VISIT) |
3750 FOR_EACH_NODE_TYPE(DECLARE_VISIT) | |
3751 #undef DECLARE_VISIT | 3593 #undef DECLARE_VISIT |
3752 }; | 3594 }; |
3753 | 3595 |
3754 | 3596 |
3755 void DotPrinter::PrintNode(const char* label, RegExpNode* node) { | 3597 void DotPrinter::PrintNode(const char* label, RegExpNode* node) { |
3756 OS::Print("digraph G {\n graph [label=\""); | 3598 OS::Print("digraph G {\n graph [label=\""); |
3757 for (intptr_t i = 0; label[i]; i++) { | 3599 for (intptr_t i = 0; label[i]; i++) { |
3758 switch (label[i]) { | 3600 switch (label[i]) { |
3759 case '\\': | 3601 case '\\': |
3760 OS::Print("\\\\"); | 3602 OS::Print("\\\\"); |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3806 PrintSeparator(); | 3648 PrintSeparator(); |
3807 OS::Print("{%s|%" Pd "}", name, value); | 3649 OS::Print("{%s|%" Pd "}", name, value); |
3808 } | 3650 } |
3809 | 3651 |
3810 private: | 3652 private: |
3811 bool first_; | 3653 bool first_; |
3812 }; | 3654 }; |
3813 | 3655 |
3814 | 3656 |
3815 void DotPrinter::PrintAttributes(RegExpNode* that) { | 3657 void DotPrinter::PrintAttributes(RegExpNode* that) { |
3816 OS::Print(" a%p [shape=Mrecord, color=grey, fontcolor=grey, " | 3658 OS::Print( |
3817 "margin=0.1, fontsize=10, label=\"{", that); | 3659 " a%p [shape=Mrecord, color=grey, fontcolor=grey, " |
| 3660 "margin=0.1, fontsize=10, label=\"{", |
| 3661 that); |
3818 AttributePrinter printer; | 3662 AttributePrinter printer; |
3819 NodeInfo* info = that->info(); | 3663 NodeInfo* info = that->info(); |
3820 printer.PrintBit("NI", info->follows_newline_interest); | 3664 printer.PrintBit("NI", info->follows_newline_interest); |
3821 printer.PrintBit("WI", info->follows_word_interest); | 3665 printer.PrintBit("WI", info->follows_word_interest); |
3822 printer.PrintBit("SI", info->follows_start_interest); | 3666 printer.PrintBit("SI", info->follows_start_interest); |
3823 BlockLabel* label = that->label(); | 3667 BlockLabel* label = that->label(); |
3824 if (label->IsBound()) | 3668 if (label->IsBound()) printer.PrintPositive("@", label->Position()); |
3825 printer.PrintPositive("@", label->Position()); | 3669 OS::Print( |
3826 OS::Print("}\"];\n" | 3670 "}\"];\n" |
3827 " a%p -> n%p [style=dashed, color=grey, arrowhead=none];\n", | 3671 " a%p -> n%p [style=dashed, color=grey, arrowhead=none];\n", |
3828 that, that); | 3672 that, that); |
3829 } | 3673 } |
3830 | 3674 |
3831 | 3675 |
3832 void DotPrinter::VisitChoice(ChoiceNode* that) { | 3676 void DotPrinter::VisitChoice(ChoiceNode* that) { |
3833 OS::Print(" n%p [shape=Mrecord, label=\"?\"];\n", that); | 3677 OS::Print(" n%p [shape=Mrecord, label=\"?\"];\n", that); |
3834 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { | 3678 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { |
3835 GuardedAlternative alt = that->alternatives()->At(i); | 3679 GuardedAlternative alt = that->alternatives()->At(i); |
3836 OS::Print(" n%p -> n%p", that, alt.node()); | 3680 OS::Print(" n%p -> n%p", that, alt.node()); |
3837 } | 3681 } |
3838 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { | 3682 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { |
(...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3978 | 3822 |
3979 // ------------------------------------------------------------------- | 3823 // ------------------------------------------------------------------- |
3980 // Tree to graph conversion | 3824 // Tree to graph conversion |
3981 | 3825 |
3982 // The zone in which we allocate graph nodes. | 3826 // The zone in which we allocate graph nodes. |
3983 #define OZ (on_success->zone()) | 3827 #define OZ (on_success->zone()) |
3984 | 3828 |
3985 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, | 3829 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, |
3986 RegExpNode* on_success) { | 3830 RegExpNode* on_success) { |
3987 ZoneGrowableArray<TextElement>* elms = | 3831 ZoneGrowableArray<TextElement>* elms = |
3988 new(OZ) ZoneGrowableArray<TextElement>(1); | 3832 new (OZ) ZoneGrowableArray<TextElement>(1); |
3989 elms->Add(TextElement::Atom(this)); | 3833 elms->Add(TextElement::Atom(this)); |
3990 return new(OZ) TextNode(elms, on_success); | 3834 return new (OZ) TextNode(elms, on_success); |
3991 } | 3835 } |
3992 | 3836 |
3993 | 3837 |
3994 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, | 3838 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, |
3995 RegExpNode* on_success) { | 3839 RegExpNode* on_success) { |
3996 ZoneGrowableArray<TextElement>* elms = | 3840 ZoneGrowableArray<TextElement>* elms = |
3997 new(OZ) ZoneGrowableArray<TextElement>(1); | 3841 new (OZ) ZoneGrowableArray<TextElement>(1); |
3998 for (intptr_t i = 0; i < elements()->length(); i++) { | 3842 for (intptr_t i = 0; i < elements()->length(); i++) { |
3999 elms->Add(elements()->At(i)); | 3843 elms->Add(elements()->At(i)); |
4000 } | 3844 } |
4001 return new(OZ) TextNode(elms, on_success); | 3845 return new (OZ) TextNode(elms, on_success); |
4002 } | 3846 } |
4003 | 3847 |
4004 | 3848 |
4005 static bool CompareInverseRanges(ZoneGrowableArray<CharacterRange>* ranges, | 3849 static bool CompareInverseRanges(ZoneGrowableArray<CharacterRange>* ranges, |
4006 const intptr_t* special_class, | 3850 const intptr_t* special_class, |
4007 intptr_t length) { | 3851 intptr_t length) { |
4008 length--; // Remove final 0x10000. | 3852 length--; // Remove final 0x10000. |
4009 ASSERT(special_class[length] == 0x10000); | 3853 ASSERT(special_class[length] == 0x10000); |
4010 ASSERT(ranges->length() != 0); | 3854 ASSERT(ranges->length() != 0); |
4011 ASSERT(length != 0); | 3855 ASSERT(length != 0); |
4012 ASSERT(special_class[0] != 0); | 3856 ASSERT(special_class[0] != 0); |
4013 if (ranges->length() != (length >> 1) + 1) { | 3857 if (ranges->length() != (length >> 1) + 1) { |
4014 return false; | 3858 return false; |
4015 } | 3859 } |
4016 CharacterRange range = ranges->At(0); | 3860 CharacterRange range = ranges->At(0); |
4017 if (range.from() != 0) { | 3861 if (range.from() != 0) { |
4018 return false; | 3862 return false; |
4019 } | 3863 } |
4020 for (intptr_t i = 0; i < length; i += 2) { | 3864 for (intptr_t i = 0; i < length; i += 2) { |
4021 if (special_class[i] != (range.to() + 1)) { | 3865 if (special_class[i] != (range.to() + 1)) { |
4022 return false; | 3866 return false; |
4023 } | 3867 } |
4024 range = ranges->At((i >> 1) + 1); | 3868 range = ranges->At((i >> 1) + 1); |
4025 if (special_class[i+1] != range.from()) { | 3869 if (special_class[i + 1] != range.from()) { |
4026 return false; | 3870 return false; |
4027 } | 3871 } |
4028 } | 3872 } |
4029 if (range.to() != 0xffff) { | 3873 if (range.to() != 0xffff) { |
4030 return false; | 3874 return false; |
4031 } | 3875 } |
4032 return true; | 3876 return true; |
4033 } | 3877 } |
4034 | 3878 |
4035 | 3879 |
(...skipping 26 matching lines...) Expand all Loading... |
4062 return true; | 3906 return true; |
4063 } | 3907 } |
4064 if (CompareRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) { | 3908 if (CompareRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) { |
4065 set_.set_standard_set_type('s'); | 3909 set_.set_standard_set_type('s'); |
4066 return true; | 3910 return true; |
4067 } | 3911 } |
4068 if (CompareInverseRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) { | 3912 if (CompareInverseRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) { |
4069 set_.set_standard_set_type('S'); | 3913 set_.set_standard_set_type('S'); |
4070 return true; | 3914 return true; |
4071 } | 3915 } |
4072 if (CompareInverseRanges(set_.ranges(), | 3916 if (CompareInverseRanges(set_.ranges(), kLineTerminatorRanges, |
4073 kLineTerminatorRanges, | |
4074 kLineTerminatorRangeCount)) { | 3917 kLineTerminatorRangeCount)) { |
4075 set_.set_standard_set_type('.'); | 3918 set_.set_standard_set_type('.'); |
4076 return true; | 3919 return true; |
4077 } | 3920 } |
4078 if (CompareRanges(set_.ranges(), | 3921 if (CompareRanges(set_.ranges(), kLineTerminatorRanges, |
4079 kLineTerminatorRanges, | |
4080 kLineTerminatorRangeCount)) { | 3922 kLineTerminatorRangeCount)) { |
4081 set_.set_standard_set_type('n'); | 3923 set_.set_standard_set_type('n'); |
4082 return true; | 3924 return true; |
4083 } | 3925 } |
4084 if (CompareRanges(set_.ranges(), kWordRanges, kWordRangeCount)) { | 3926 if (CompareRanges(set_.ranges(), kWordRanges, kWordRangeCount)) { |
4085 set_.set_standard_set_type('w'); | 3927 set_.set_standard_set_type('w'); |
4086 return true; | 3928 return true; |
4087 } | 3929 } |
4088 if (CompareInverseRanges(set_.ranges(), kWordRanges, kWordRangeCount)) { | 3930 if (CompareInverseRanges(set_.ranges(), kWordRanges, kWordRangeCount)) { |
4089 set_.set_standard_set_type('W'); | 3931 set_.set_standard_set_type('W'); |
4090 return true; | 3932 return true; |
4091 } | 3933 } |
4092 return false; | 3934 return false; |
4093 } | 3935 } |
4094 | 3936 |
4095 | 3937 |
4096 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, | 3938 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, |
4097 RegExpNode* on_success) { | 3939 RegExpNode* on_success) { |
4098 return new(OZ) TextNode(this, on_success); | 3940 return new (OZ) TextNode(this, on_success); |
4099 } | 3941 } |
4100 | 3942 |
4101 | 3943 |
4102 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, | 3944 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, |
4103 RegExpNode* on_success) { | 3945 RegExpNode* on_success) { |
4104 ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives(); | 3946 ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives(); |
4105 intptr_t length = alternatives->length(); | 3947 intptr_t length = alternatives->length(); |
4106 ChoiceNode* result = | 3948 ChoiceNode* result = new (OZ) ChoiceNode(length, OZ); |
4107 new(OZ) ChoiceNode(length, OZ); | |
4108 for (intptr_t i = 0; i < length; i++) { | 3949 for (intptr_t i = 0; i < length; i++) { |
4109 GuardedAlternative alternative(alternatives->At(i)->ToNode(compiler, | 3950 GuardedAlternative alternative( |
4110 on_success)); | 3951 alternatives->At(i)->ToNode(compiler, on_success)); |
4111 result->AddAlternative(alternative); | 3952 result->AddAlternative(alternative); |
4112 } | 3953 } |
4113 return result; | 3954 return result; |
4114 } | 3955 } |
4115 | 3956 |
4116 | 3957 |
4117 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler, | 3958 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler, |
4118 RegExpNode* on_success) { | 3959 RegExpNode* on_success) { |
4119 return ToNode(min(), | 3960 return ToNode(min(), max(), is_greedy(), body(), compiler, on_success); |
4120 max(), | |
4121 is_greedy(), | |
4122 body(), | |
4123 compiler, | |
4124 on_success); | |
4125 } | 3961 } |
4126 | 3962 |
4127 | 3963 |
4128 // Scoped object to keep track of how much we unroll quantifier loops in the | 3964 // Scoped object to keep track of how much we unroll quantifier loops in the |
4129 // regexp graph generator. | 3965 // regexp graph generator. |
4130 class RegExpExpansionLimiter : public ValueObject { | 3966 class RegExpExpansionLimiter : public ValueObject { |
4131 public: | 3967 public: |
4132 static const intptr_t kMaxExpansionFactor = 6; | 3968 static const intptr_t kMaxExpansionFactor = 6; |
4133 RegExpExpansionLimiter(RegExpCompiler* compiler, intptr_t factor) | 3969 RegExpExpansionLimiter(RegExpCompiler* compiler, intptr_t factor) |
4134 : compiler_(compiler), | 3970 : compiler_(compiler), |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4200 Interval capture_registers = body->CaptureRegisters(); | 4036 Interval capture_registers = body->CaptureRegisters(); |
4201 bool needs_capture_clearing = !capture_registers.is_empty(); | 4037 bool needs_capture_clearing = !capture_registers.is_empty(); |
4202 Zone* zone = compiler->zone(); | 4038 Zone* zone = compiler->zone(); |
4203 | 4039 |
4204 if (body_can_be_empty) { | 4040 if (body_can_be_empty) { |
4205 body_start_reg = compiler->AllocateRegister(); | 4041 body_start_reg = compiler->AllocateRegister(); |
4206 } else if (kRegexpOptimization && !needs_capture_clearing) { | 4042 } else if (kRegexpOptimization && !needs_capture_clearing) { |
4207 // Only unroll if there are no captures and the body can't be | 4043 // Only unroll if there are no captures and the body can't be |
4208 // empty. | 4044 // empty. |
4209 { | 4045 { |
4210 RegExpExpansionLimiter limiter( | 4046 RegExpExpansionLimiter limiter(compiler, min + ((max != min) ? 1 : 0)); |
4211 compiler, min + ((max != min) ? 1 : 0)); | |
4212 if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) { | 4047 if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) { |
4213 intptr_t new_max = (max == kInfinity) ? max : max - min; | 4048 intptr_t new_max = (max == kInfinity) ? max : max - min; |
4214 // Recurse once to get the loop or optional matches after the fixed | 4049 // Recurse once to get the loop or optional matches after the fixed |
4215 // ones. | 4050 // ones. |
4216 RegExpNode* answer = ToNode( | 4051 RegExpNode* answer = |
4217 0, new_max, is_greedy, body, compiler, on_success, true); | 4052 ToNode(0, new_max, is_greedy, body, compiler, on_success, true); |
4218 // Unroll the forced matches from 0 to min. This can cause chains of | 4053 // Unroll the forced matches from 0 to min. This can cause chains of |
4219 // TextNodes (which the parser does not generate). These should be | 4054 // TextNodes (which the parser does not generate). These should be |
4220 // combined if it turns out they hinder good code generation. | 4055 // combined if it turns out they hinder good code generation. |
4221 for (intptr_t i = 0; i < min; i++) { | 4056 for (intptr_t i = 0; i < min; i++) { |
4222 answer = body->ToNode(compiler, answer); | 4057 answer = body->ToNode(compiler, answer); |
4223 } | 4058 } |
4224 return answer; | 4059 return answer; |
4225 } | 4060 } |
4226 } | 4061 } |
4227 if (max <= kMaxUnrolledMaxMatches && min == 0) { | 4062 if (max <= kMaxUnrolledMaxMatches && min == 0) { |
4228 ASSERT(max > 0); // Due to the 'if' above. | 4063 ASSERT(max > 0); // Due to the 'if' above. |
4229 RegExpExpansionLimiter limiter(compiler, max); | 4064 RegExpExpansionLimiter limiter(compiler, max); |
4230 if (limiter.ok_to_expand()) { | 4065 if (limiter.ok_to_expand()) { |
4231 // Unroll the optional matches up to max. | 4066 // Unroll the optional matches up to max. |
4232 RegExpNode* answer = on_success; | 4067 RegExpNode* answer = on_success; |
4233 for (intptr_t i = 0; i < max; i++) { | 4068 for (intptr_t i = 0; i < max; i++) { |
4234 ChoiceNode* alternation = new(zone) ChoiceNode(2, zone); | 4069 ChoiceNode* alternation = new (zone) ChoiceNode(2, zone); |
4235 if (is_greedy) { | 4070 if (is_greedy) { |
4236 alternation->AddAlternative( | 4071 alternation->AddAlternative( |
4237 GuardedAlternative(body->ToNode(compiler, answer))); | 4072 GuardedAlternative(body->ToNode(compiler, answer))); |
4238 alternation->AddAlternative(GuardedAlternative(on_success)); | 4073 alternation->AddAlternative(GuardedAlternative(on_success)); |
4239 } else { | 4074 } else { |
4240 alternation->AddAlternative(GuardedAlternative(on_success)); | 4075 alternation->AddAlternative(GuardedAlternative(on_success)); |
4241 alternation->AddAlternative( | 4076 alternation->AddAlternative( |
4242 GuardedAlternative(body->ToNode(compiler, answer))); | 4077 GuardedAlternative(body->ToNode(compiler, answer))); |
4243 } | 4078 } |
4244 answer = alternation; | 4079 answer = alternation; |
4245 if (not_at_start) alternation->set_not_at_start(); | 4080 if (not_at_start) alternation->set_not_at_start(); |
4246 } | 4081 } |
4247 return answer; | 4082 return answer; |
4248 } | 4083 } |
4249 } | 4084 } |
4250 } | 4085 } |
4251 bool has_min = min > 0; | 4086 bool has_min = min > 0; |
4252 bool has_max = max < RegExpTree::kInfinity; | 4087 bool has_max = max < RegExpTree::kInfinity; |
4253 bool needs_counter = has_min || has_max; | 4088 bool needs_counter = has_min || has_max; |
4254 intptr_t reg_ctr = needs_counter | 4089 intptr_t reg_ctr = needs_counter ? compiler->AllocateRegister() |
4255 ? compiler->AllocateRegister() | 4090 : RegExpCompiler::kNoRegister; |
4256 : RegExpCompiler::kNoRegister; | 4091 LoopChoiceNode* center = |
4257 LoopChoiceNode* center = new(zone) LoopChoiceNode(body->min_match() == 0, | 4092 new (zone) LoopChoiceNode(body->min_match() == 0, zone); |
4258 zone); | |
4259 if (not_at_start) center->set_not_at_start(); | 4093 if (not_at_start) center->set_not_at_start(); |
4260 RegExpNode* loop_return = needs_counter | 4094 RegExpNode* loop_return = |
4261 ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center)) | 4095 needs_counter ? static_cast<RegExpNode*>( |
4262 : static_cast<RegExpNode*>(center); | 4096 ActionNode::IncrementRegister(reg_ctr, center)) |
| 4097 : static_cast<RegExpNode*>(center); |
4263 if (body_can_be_empty) { | 4098 if (body_can_be_empty) { |
4264 // If the body can be empty we need to check if it was and then | 4099 // If the body can be empty we need to check if it was and then |
4265 // backtrack. | 4100 // backtrack. |
4266 loop_return = ActionNode::EmptyMatchCheck(body_start_reg, | 4101 loop_return = |
4267 reg_ctr, | 4102 ActionNode::EmptyMatchCheck(body_start_reg, reg_ctr, min, loop_return); |
4268 min, | |
4269 loop_return); | |
4270 } | 4103 } |
4271 RegExpNode* body_node = body->ToNode(compiler, loop_return); | 4104 RegExpNode* body_node = body->ToNode(compiler, loop_return); |
4272 if (body_can_be_empty) { | 4105 if (body_can_be_empty) { |
4273 // If the body can be empty we need to store the start position | 4106 // If the body can be empty we need to store the start position |
4274 // so we can bail out if it was empty. | 4107 // so we can bail out if it was empty. |
4275 body_node = ActionNode::StorePosition(body_start_reg, false, body_node); | 4108 body_node = ActionNode::StorePosition(body_start_reg, false, body_node); |
4276 } | 4109 } |
4277 if (needs_capture_clearing) { | 4110 if (needs_capture_clearing) { |
4278 // Before entering the body of this loop we need to clear captures. | 4111 // Before entering the body of this loop we need to clear captures. |
4279 body_node = ActionNode::ClearCaptures(capture_registers, body_node); | 4112 body_node = ActionNode::ClearCaptures(capture_registers, body_node); |
4280 } | 4113 } |
4281 GuardedAlternative body_alt(body_node); | 4114 GuardedAlternative body_alt(body_node); |
4282 if (has_max) { | 4115 if (has_max) { |
4283 Guard* body_guard = | 4116 Guard* body_guard = new (zone) Guard(reg_ctr, Guard::LT, max); |
4284 new(zone) Guard(reg_ctr, Guard::LT, max); | |
4285 body_alt.AddGuard(body_guard, zone); | 4117 body_alt.AddGuard(body_guard, zone); |
4286 } | 4118 } |
4287 GuardedAlternative rest_alt(on_success); | 4119 GuardedAlternative rest_alt(on_success); |
4288 if (has_min) { | 4120 if (has_min) { |
4289 Guard* rest_guard = new(zone) Guard(reg_ctr, Guard::GEQ, min); | 4121 Guard* rest_guard = new (zone) Guard(reg_ctr, Guard::GEQ, min); |
4290 rest_alt.AddGuard(rest_guard, zone); | 4122 rest_alt.AddGuard(rest_guard, zone); |
4291 } | 4123 } |
4292 if (is_greedy) { | 4124 if (is_greedy) { |
4293 center->AddLoopAlternative(body_alt); | 4125 center->AddLoopAlternative(body_alt); |
4294 center->AddContinueAlternative(rest_alt); | 4126 center->AddContinueAlternative(rest_alt); |
4295 } else { | 4127 } else { |
4296 center->AddContinueAlternative(rest_alt); | 4128 center->AddContinueAlternative(rest_alt); |
4297 center->AddLoopAlternative(body_alt); | 4129 center->AddLoopAlternative(body_alt); |
4298 } | 4130 } |
4299 if (needs_counter) { | 4131 if (needs_counter) { |
(...skipping 24 matching lines...) Expand all Loading... |
4324 intptr_t stack_pointer_register = compiler->AllocateRegister(); | 4156 intptr_t stack_pointer_register = compiler->AllocateRegister(); |
4325 intptr_t position_register = compiler->AllocateRegister(); | 4157 intptr_t position_register = compiler->AllocateRegister(); |
4326 // The ChoiceNode to distinguish between a newline and end-of-input. | 4158 // The ChoiceNode to distinguish between a newline and end-of-input. |
4327 ChoiceNode* result = new ChoiceNode(2, on_success->zone()); | 4159 ChoiceNode* result = new ChoiceNode(2, on_success->zone()); |
4328 // Create a newline atom. | 4160 // Create a newline atom. |
4329 ZoneGrowableArray<CharacterRange>* newline_ranges = | 4161 ZoneGrowableArray<CharacterRange>* newline_ranges = |
4330 new ZoneGrowableArray<CharacterRange>(3); | 4162 new ZoneGrowableArray<CharacterRange>(3); |
4331 CharacterRange::AddClassEscape('n', newline_ranges); | 4163 CharacterRange::AddClassEscape('n', newline_ranges); |
4332 RegExpCharacterClass* newline_atom = new RegExpCharacterClass('n'); | 4164 RegExpCharacterClass* newline_atom = new RegExpCharacterClass('n'); |
4333 TextNode* newline_matcher = new TextNode( | 4165 TextNode* newline_matcher = new TextNode( |
4334 newline_atom, | 4166 newline_atom, ActionNode::PositiveSubmatchSuccess( |
4335 ActionNode::PositiveSubmatchSuccess(stack_pointer_register, | 4167 stack_pointer_register, position_register, |
4336 position_register, | 4168 0, // No captures inside. |
4337 0, // No captures inside. | 4169 -1, // Ignored if no captures. |
4338 -1, // Ignored if no captures. | 4170 on_success)); |
4339 on_success)); | |
4340 // Create an end-of-input matcher. | 4171 // Create an end-of-input matcher. |
4341 RegExpNode* end_of_line = ActionNode::BeginSubmatch( | 4172 RegExpNode* end_of_line = ActionNode::BeginSubmatch( |
4342 stack_pointer_register, | 4173 stack_pointer_register, position_register, newline_matcher); |
4343 position_register, | |
4344 newline_matcher); | |
4345 // Add the two alternatives to the ChoiceNode. | 4174 // Add the two alternatives to the ChoiceNode. |
4346 GuardedAlternative eol_alternative(end_of_line); | 4175 GuardedAlternative eol_alternative(end_of_line); |
4347 result->AddAlternative(eol_alternative); | 4176 result->AddAlternative(eol_alternative); |
4348 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success)); | 4177 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success)); |
4349 result->AddAlternative(end_alternative); | 4178 result->AddAlternative(end_alternative); |
4350 return result; | 4179 return result; |
4351 } | 4180 } |
4352 default: | 4181 default: |
4353 UNREACHABLE(); | 4182 UNREACHABLE(); |
4354 } | 4183 } |
4355 return on_success; | 4184 return on_success; |
4356 } | 4185 } |
4357 | 4186 |
4358 | 4187 |
4359 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, | 4188 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, |
4360 RegExpNode* on_success) { | 4189 RegExpNode* on_success) { |
4361 return new(OZ) | 4190 return new (OZ) |
4362 BackReferenceNode(RegExpCapture::StartRegister(index()), | 4191 BackReferenceNode(RegExpCapture::StartRegister(index()), |
4363 RegExpCapture::EndRegister(index()), | 4192 RegExpCapture::EndRegister(index()), on_success); |
4364 on_success); | |
4365 } | 4193 } |
4366 | 4194 |
4367 | 4195 |
4368 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, | 4196 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, |
4369 RegExpNode* on_success) { | 4197 RegExpNode* on_success) { |
4370 return on_success; | 4198 return on_success; |
4371 } | 4199 } |
4372 | 4200 |
4373 | 4201 |
4374 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, | 4202 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, |
4375 RegExpNode* on_success) { | 4203 RegExpNode* on_success) { |
4376 intptr_t stack_pointer_register = compiler->AllocateRegister(); | 4204 intptr_t stack_pointer_register = compiler->AllocateRegister(); |
4377 intptr_t position_register = compiler->AllocateRegister(); | 4205 intptr_t position_register = compiler->AllocateRegister(); |
4378 | 4206 |
4379 const intptr_t registers_per_capture = 2; | 4207 const intptr_t registers_per_capture = 2; |
4380 const intptr_t register_of_first_capture = 2; | 4208 const intptr_t register_of_first_capture = 2; |
4381 intptr_t register_count = capture_count_ * registers_per_capture; | 4209 intptr_t register_count = capture_count_ * registers_per_capture; |
4382 intptr_t register_start = | 4210 intptr_t register_start = |
4383 register_of_first_capture + capture_from_ * registers_per_capture; | 4211 register_of_first_capture + capture_from_ * registers_per_capture; |
4384 | 4212 |
4385 RegExpNode* success; | 4213 RegExpNode* success; |
4386 if (is_positive()) { | 4214 if (is_positive()) { |
4387 RegExpNode* node = ActionNode::BeginSubmatch( | 4215 RegExpNode* node = ActionNode::BeginSubmatch( |
4388 stack_pointer_register, | 4216 stack_pointer_register, position_register, |
4389 position_register, | 4217 body()->ToNode(compiler, |
4390 body()->ToNode( | 4218 ActionNode::PositiveSubmatchSuccess( |
4391 compiler, | 4219 stack_pointer_register, position_register, |
4392 ActionNode::PositiveSubmatchSuccess(stack_pointer_register, | 4220 register_count, register_start, on_success))); |
4393 position_register, | |
4394 register_count, | |
4395 register_start, | |
4396 on_success))); | |
4397 return node; | 4221 return node; |
4398 } else { | 4222 } else { |
4399 // We use a ChoiceNode for a negative lookahead because it has most of | 4223 // We use a ChoiceNode for a negative lookahead because it has most of |
4400 // the characteristics we need. It has the body of the lookahead as its | 4224 // the characteristics we need. It has the body of the lookahead as its |
4401 // first alternative and the expression after the lookahead of the second | 4225 // first alternative and the expression after the lookahead of the second |
4402 // alternative. If the first alternative succeeds then the | 4226 // alternative. If the first alternative succeeds then the |
4403 // NegativeSubmatchSuccess will unwind the stack including everything the | 4227 // NegativeSubmatchSuccess will unwind the stack including everything the |
4404 // choice node set up and backtrack. If the first alternative fails then | 4228 // choice node set up and backtrack. If the first alternative fails then |
4405 // the second alternative is tried, which is exactly the desired result | 4229 // the second alternative is tried, which is exactly the desired result |
4406 // for a negative lookahead. The NegativeLookaheadChoiceNode is a special | 4230 // for a negative lookahead. The NegativeLookaheadChoiceNode is a special |
4407 // ChoiceNode that knows to ignore the first exit when calculating quick | 4231 // ChoiceNode that knows to ignore the first exit when calculating quick |
4408 // checks. | 4232 // checks. |
4409 | 4233 |
4410 GuardedAlternative body_alt( | 4234 GuardedAlternative body_alt( |
4411 body()->ToNode( | 4235 body()->ToNode(compiler, success = new (OZ) NegativeSubmatchSuccess( |
4412 compiler, | 4236 stack_pointer_register, position_register, |
4413 success = new(OZ) NegativeSubmatchSuccess(stack_pointer_register, | 4237 register_count, register_start, OZ))); |
4414 position_register, | 4238 ChoiceNode* choice_node = new (OZ) NegativeLookaheadChoiceNode( |
4415 register_count, | 4239 body_alt, GuardedAlternative(on_success), OZ); |
4416 register_start, | 4240 return ActionNode::BeginSubmatch(stack_pointer_register, position_register, |
4417 OZ))); | |
4418 ChoiceNode* choice_node = | |
4419 new(OZ) NegativeLookaheadChoiceNode(body_alt, | |
4420 GuardedAlternative(on_success), | |
4421 OZ); | |
4422 return ActionNode::BeginSubmatch(stack_pointer_register, | |
4423 position_register, | |
4424 choice_node); | 4241 choice_node); |
4425 } | 4242 } |
4426 } | 4243 } |
4427 | 4244 |
4428 | 4245 |
4429 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, | 4246 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, |
4430 RegExpNode* on_success) { | 4247 RegExpNode* on_success) { |
4431 return ToNode(body(), index(), compiler, on_success); | 4248 return ToNode(body(), index(), compiler, on_success); |
4432 } | 4249 } |
4433 | 4250 |
(...skipping 26 matching lines...) Expand all Loading... |
4460 ZoneGrowableArray<CharacterRange>* ranges) { | 4277 ZoneGrowableArray<CharacterRange>* ranges) { |
4461 elmc--; | 4278 elmc--; |
4462 ASSERT(elmv[elmc] == 0x10000); | 4279 ASSERT(elmv[elmc] == 0x10000); |
4463 for (intptr_t i = 0; i < elmc; i += 2) { | 4280 for (intptr_t i = 0; i < elmc; i += 2) { |
4464 ASSERT(elmv[i] < elmv[i + 1]); | 4281 ASSERT(elmv[i] < elmv[i + 1]); |
4465 ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1)); | 4282 ranges->Add(CharacterRange(elmv[i], elmv[i + 1] - 1)); |
4466 } | 4283 } |
4467 } | 4284 } |
4468 | 4285 |
4469 | 4286 |
4470 static void AddClassNegated(const intptr_t *elmv, | 4287 static void AddClassNegated(const intptr_t* elmv, |
4471 intptr_t elmc, | 4288 intptr_t elmc, |
4472 ZoneGrowableArray<CharacterRange>* ranges) { | 4289 ZoneGrowableArray<CharacterRange>* ranges) { |
4473 elmc--; | 4290 elmc--; |
4474 ASSERT(elmv[elmc] == 0x10000); | 4291 ASSERT(elmv[elmc] == 0x10000); |
4475 ASSERT(elmv[0] != 0x0000); | 4292 ASSERT(elmv[0] != 0x0000); |
4476 ASSERT(elmv[elmc-1] != Utf16::kMaxCodeUnit); | 4293 ASSERT(elmv[elmc - 1] != Utf16::kMaxCodeUnit); |
4477 uint16_t last = 0x0000; | 4294 uint16_t last = 0x0000; |
4478 for (intptr_t i = 0; i < elmc; i += 2) { | 4295 for (intptr_t i = 0; i < elmc; i += 2) { |
4479 ASSERT(last <= elmv[i] - 1); | 4296 ASSERT(last <= elmv[i] - 1); |
4480 ASSERT(elmv[i] < elmv[i + 1]); | 4297 ASSERT(elmv[i] < elmv[i + 1]); |
4481 ranges->Add(CharacterRange(last, elmv[i] - 1)); | 4298 ranges->Add(CharacterRange(last, elmv[i] - 1)); |
4482 last = elmv[i + 1]; | 4299 last = elmv[i + 1]; |
4483 } | 4300 } |
4484 ranges->Add(CharacterRange(last, Utf16::kMaxCodeUnit)); | 4301 ranges->Add(CharacterRange(last, Utf16::kMaxCodeUnit)); |
4485 } | 4302 } |
4486 | 4303 |
(...skipping 13 matching lines...) Expand all Loading... |
4500 case 'W': | 4317 case 'W': |
4501 AddClassNegated(kWordRanges, kWordRangeCount, ranges); | 4318 AddClassNegated(kWordRanges, kWordRangeCount, ranges); |
4502 break; | 4319 break; |
4503 case 'd': | 4320 case 'd': |
4504 AddClass(kDigitRanges, kDigitRangeCount, ranges); | 4321 AddClass(kDigitRanges, kDigitRangeCount, ranges); |
4505 break; | 4322 break; |
4506 case 'D': | 4323 case 'D': |
4507 AddClassNegated(kDigitRanges, kDigitRangeCount, ranges); | 4324 AddClassNegated(kDigitRanges, kDigitRangeCount, ranges); |
4508 break; | 4325 break; |
4509 case '.': | 4326 case '.': |
4510 AddClassNegated(kLineTerminatorRanges, | 4327 AddClassNegated(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges); |
4511 kLineTerminatorRangeCount, | |
4512 ranges); | |
4513 break; | 4328 break; |
4514 // This is not a character range as defined by the spec but a | 4329 // This is not a character range as defined by the spec but a |
4515 // convenient shorthand for a character class that matches any | 4330 // convenient shorthand for a character class that matches any |
4516 // character. | 4331 // character. |
4517 case '*': | 4332 case '*': |
4518 ranges->Add(CharacterRange::Everything()); | 4333 ranges->Add(CharacterRange::Everything()); |
4519 break; | 4334 break; |
4520 // This is the set of characters matched by the $ and ^ symbols | 4335 // This is the set of characters matched by the $ and ^ symbols |
4521 // in multiline mode. | 4336 // in multiline mode. |
4522 case 'n': | 4337 case 'n': |
4523 AddClass(kLineTerminatorRanges, | 4338 AddClass(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges); |
4524 kLineTerminatorRangeCount, | |
4525 ranges); | |
4526 break; | 4339 break; |
4527 default: | 4340 default: |
4528 UNREACHABLE(); | 4341 UNREACHABLE(); |
4529 } | 4342 } |
4530 } | 4343 } |
4531 | 4344 |
4532 | 4345 |
4533 void CharacterRange::AddCaseEquivalents( | 4346 void CharacterRange::AddCaseEquivalents( |
4534 ZoneGrowableArray<CharacterRange>* ranges, | 4347 ZoneGrowableArray<CharacterRange>* ranges, |
4535 bool is_one_byte, | 4348 bool is_one_byte, |
4536 Zone* zone) { | 4349 Zone* zone) { |
4537 uint16_t bottom = from(); | 4350 uint16_t bottom = from(); |
4538 uint16_t top = to(); | 4351 uint16_t top = to(); |
4539 if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) { | 4352 if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) { |
4540 if (bottom > Symbols::kMaxOneCharCodeSymbol) return; | 4353 if (bottom > Symbols::kMaxOneCharCodeSymbol) return; |
4541 if (top > Symbols::kMaxOneCharCodeSymbol) { | 4354 if (top > Symbols::kMaxOneCharCodeSymbol) { |
4542 top = Symbols::kMaxOneCharCodeSymbol; | 4355 top = Symbols::kMaxOneCharCodeSymbol; |
4543 } | 4356 } |
4544 } | 4357 } |
4545 | 4358 |
4546 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize; | 4359 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize; |
4547 unibrow::Mapping<unibrow::CanonicalizationRange> jsregexp_canonrange; | 4360 unibrow::Mapping<unibrow::CanonicalizationRange> jsregexp_canonrange; |
4548 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 4361 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
4549 if (top == bottom) { | 4362 if (top == bottom) { |
4550 // If this is a singleton we just expand the one character. | 4363 // If this is a singleton we just expand the one character. |
4551 intptr_t length = jsregexp_uncanonicalize.get(bottom, '\0', chars); // NOLIN
T | 4364 intptr_t length = |
| 4365 jsregexp_uncanonicalize.get(bottom, '\0', chars); // NOLINT |
4552 for (intptr_t i = 0; i < length; i++) { | 4366 for (intptr_t i = 0; i < length; i++) { |
4553 uint32_t chr = chars[i]; | 4367 uint32_t chr = chars[i]; |
4554 if (chr != bottom) { | 4368 if (chr != bottom) { |
4555 ranges->Add(CharacterRange::Singleton(chars[i])); | 4369 ranges->Add(CharacterRange::Singleton(chars[i])); |
4556 } | 4370 } |
4557 } | 4371 } |
4558 } else { | 4372 } else { |
4559 // If this is a range we expand the characters block by block, | 4373 // If this is a range we expand the characters block by block, |
4560 // expanding contiguous subranges (blocks) one at a time. | 4374 // expanding contiguous subranges (blocks) one at a time. |
4561 // The approach is as follows. For a given start character we | 4375 // The approach is as follows. For a given start character we |
(...skipping 17 matching lines...) Expand all Loading... |
4579 while (pos <= top) { | 4393 while (pos <= top) { |
4580 intptr_t length = jsregexp_canonrange.get(pos, '\0', range); | 4394 intptr_t length = jsregexp_canonrange.get(pos, '\0', range); |
4581 uint16_t block_end; | 4395 uint16_t block_end; |
4582 if (length == 0) { | 4396 if (length == 0) { |
4583 block_end = pos; | 4397 block_end = pos; |
4584 } else { | 4398 } else { |
4585 ASSERT(length == 1); | 4399 ASSERT(length == 1); |
4586 block_end = range[0]; | 4400 block_end = range[0]; |
4587 } | 4401 } |
4588 intptr_t end = (block_end > top) ? top : block_end; | 4402 intptr_t end = (block_end > top) ? top : block_end; |
4589 length = jsregexp_uncanonicalize.get(block_end, '\0', range); // NOLINT | 4403 length = jsregexp_uncanonicalize.get(block_end, '\0', range); // NOLINT |
4590 for (intptr_t i = 0; i < length; i++) { | 4404 for (intptr_t i = 0; i < length; i++) { |
4591 uint32_t c = range[i]; | 4405 uint32_t c = range[i]; |
4592 uint16_t range_from = c - (block_end - pos); | 4406 uint16_t range_from = c - (block_end - pos); |
4593 uint16_t range_to = c - (block_end - end); | 4407 uint16_t range_to = c - (block_end - end); |
4594 if (!(bottom <= range_from && range_to <= top)) { | 4408 if (!(bottom <= range_from && range_to <= top)) { |
4595 ranges->Add(CharacterRange(range_from, range_to)); | 4409 ranges->Add(CharacterRange(range_from, range_to)); |
4596 } | 4410 } |
4597 } | 4411 } |
4598 pos = end + 1; | 4412 pos = end + 1; |
4599 } | 4413 } |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4637 } | 4451 } |
4638 } else { | 4452 } else { |
4639 for (intptr_t i = 0; i < count; i++) { | 4453 for (intptr_t i = 0; i < count; i++) { |
4640 (*list)[to + i] = list->At(from + i); | 4454 (*list)[to + i] = list->At(from + i); |
4641 } | 4455 } |
4642 } | 4456 } |
4643 } | 4457 } |
4644 | 4458 |
4645 | 4459 |
4646 static intptr_t InsertRangeInCanonicalList( | 4460 static intptr_t InsertRangeInCanonicalList( |
4647 ZoneGrowableArray<CharacterRange>* list, | 4461 ZoneGrowableArray<CharacterRange>* list, |
4648 intptr_t count, | 4462 intptr_t count, |
4649 CharacterRange insert) { | 4463 CharacterRange insert) { |
4650 // Inserts a range into list[0..count[, which must be sorted | 4464 // Inserts a range into list[0..count[, which must be sorted |
4651 // by from value and non-overlapping and non-adjacent, using at most | 4465 // by from value and non-overlapping and non-adjacent, using at most |
4652 // list[0..count] for the result. Returns the number of resulting | 4466 // list[0..count] for the result. Returns the number of resulting |
4653 // canonicalized ranges. Inserting a range may collapse existing ranges into | 4467 // canonicalized ranges. Inserting a range may collapse existing ranges into |
4654 // fewer ranges, so the return value can be anything in the range 1..count+1. | 4468 // fewer ranges, so the return value can be anything in the range 1..count+1. |
4655 uint16_t from = insert.from(); | 4469 uint16_t from = insert.from(); |
4656 uint16_t to = insert.to(); | 4470 uint16_t to = insert.to(); |
4657 intptr_t start_pos = 0; | 4471 intptr_t start_pos = 0; |
4658 intptr_t end_pos = count; | 4472 intptr_t end_pos = count; |
4659 for (intptr_t i = count - 1; i >= 0; i--) { | 4473 for (intptr_t i = count - 1; i >= 0; i--) { |
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4727 i++; | 4541 i++; |
4728 } | 4542 } |
4729 // Canonical until the i'th range. If that's all of them, we are done. | 4543 // Canonical until the i'th range. If that's all of them, we are done. |
4730 if (i == n) return; | 4544 if (i == n) return; |
4731 | 4545 |
4732 // The ranges at index i and forward are not canonicalized. Make them so by | 4546 // The ranges at index i and forward are not canonicalized. Make them so by |
4733 // doing the equivalent of insertion sort (inserting each into the previous | 4547 // doing the equivalent of insertion sort (inserting each into the previous |
4734 // list, in order). | 4548 // list, in order). |
4735 // Notice that inserting a range can reduce the number of ranges in the | 4549 // Notice that inserting a range can reduce the number of ranges in the |
4736 // result due to combining of adjacent and overlapping ranges. | 4550 // result due to combining of adjacent and overlapping ranges. |
4737 intptr_t read = i; // Range to insert. | 4551 intptr_t read = i; // Range to insert. |
4738 intptr_t num_canonical = i; // Length of canonicalized part of list. | 4552 intptr_t num_canonical = i; // Length of canonicalized part of list. |
4739 do { | 4553 do { |
4740 num_canonical = InsertRangeInCanonicalList(character_ranges, | 4554 num_canonical = InsertRangeInCanonicalList(character_ranges, num_canonical, |
4741 num_canonical, | |
4742 character_ranges->At(read)); | 4555 character_ranges->At(read)); |
4743 read++; | 4556 read++; |
4744 } while (read < n); | 4557 } while (read < n); |
4745 character_ranges->TruncateTo(num_canonical); | 4558 character_ranges->TruncateTo(num_canonical); |
4746 | 4559 |
4747 ASSERT(CharacterRange::IsCanonical(character_ranges)); | 4560 ASSERT(CharacterRange::IsCanonical(character_ranges)); |
4748 } | 4561 } |
4749 | 4562 |
4750 | 4563 |
4751 void CharacterRange::Negate(ZoneGrowableArray<CharacterRange>* ranges, | 4564 void CharacterRange::Negate(ZoneGrowableArray<CharacterRange>* ranges, |
(...skipping 17 matching lines...) Expand all Loading... |
4769 negated_ranges->Add(CharacterRange(from + 1, Utf16::kMaxCodeUnit)); | 4582 negated_ranges->Add(CharacterRange(from + 1, Utf16::kMaxCodeUnit)); |
4770 } | 4583 } |
4771 } | 4584 } |
4772 | 4585 |
4773 | 4586 |
4774 // ------------------------------------------------------------------- | 4587 // ------------------------------------------------------------------- |
4775 // Splay tree | 4588 // Splay tree |
4776 | 4589 |
4777 | 4590 |
4778 // Workaround for the fact that ZoneGrowableArray does not have contains(). | 4591 // Workaround for the fact that ZoneGrowableArray does not have contains(). |
4779 static bool ArrayContains(ZoneGrowableArray<unsigned>* array, | 4592 static bool ArrayContains(ZoneGrowableArray<unsigned>* array, unsigned value) { |
4780 unsigned value) { | |
4781 for (intptr_t i = 0; i < array->length(); i++) { | 4593 for (intptr_t i = 0; i < array->length(); i++) { |
4782 if (array->At(i) == value) { | 4594 if (array->At(i) == value) { |
4783 return true; | 4595 return true; |
4784 } | 4596 } |
4785 } | 4597 } |
4786 return false; | 4598 return false; |
4787 } | 4599 } |
4788 | 4600 |
4789 | 4601 |
4790 void OutSet::Set(unsigned value, Zone* zone) { | 4602 void OutSet::Set(unsigned value, Zone* zone) { |
4791 if (value < kFirstLimit) { | 4603 if (value < kFirstLimit) { |
4792 first_ |= (1 << value); | 4604 first_ |= (1 << value); |
4793 } else { | 4605 } else { |
4794 if (remaining_ == NULL) | 4606 if (remaining_ == NULL) |
4795 remaining_ = new(zone) ZoneGrowableArray<unsigned>(1); | 4607 remaining_ = new (zone) ZoneGrowableArray<unsigned>(1); |
4796 | 4608 |
4797 bool remaining_contains_value = ArrayContains(remaining_, value); | 4609 bool remaining_contains_value = ArrayContains(remaining_, value); |
4798 if (remaining_->is_empty() || !remaining_contains_value) { | 4610 if (remaining_->is_empty() || !remaining_contains_value) { |
4799 remaining_->Add(value); | 4611 remaining_->Add(value); |
4800 } | 4612 } |
4801 } | 4613 } |
4802 } | 4614 } |
4803 | 4615 |
4804 | 4616 |
4805 bool OutSet::Get(unsigned value) const { | 4617 bool OutSet::Get(unsigned value) const { |
4806 if (value < kFirstLimit) { | 4618 if (value < kFirstLimit) { |
4807 return (first_ & (1 << value)) != 0; | 4619 return (first_ & (1 << value)) != 0; |
4808 } else if (remaining_ == NULL) { | 4620 } else if (remaining_ == NULL) { |
4809 return false; | 4621 return false; |
4810 } else { | 4622 } else { |
4811 return ArrayContains(remaining_, value); | 4623 return ArrayContains(remaining_, value); |
4812 } | 4624 } |
4813 } | 4625 } |
4814 | 4626 |
4815 | 4627 |
4816 // ------------------------------------------------------------------- | 4628 // ------------------------------------------------------------------- |
4817 // Analysis | 4629 // Analysis |
4818 | 4630 |
4819 | 4631 |
4820 void Analysis::EnsureAnalyzed(RegExpNode* that) { | 4632 void Analysis::EnsureAnalyzed(RegExpNode* that) { |
4821 if (that->info()->been_analyzed || that->info()->being_analyzed) | 4633 if (that->info()->been_analyzed || that->info()->being_analyzed) return; |
4822 return; | |
4823 that->info()->being_analyzed = true; | 4634 that->info()->being_analyzed = true; |
4824 that->Accept(this); | 4635 that->Accept(this); |
4825 that->info()->being_analyzed = false; | 4636 that->info()->being_analyzed = false; |
4826 that->info()->been_analyzed = true; | 4637 that->info()->been_analyzed = true; |
4827 } | 4638 } |
4828 | 4639 |
4829 | 4640 |
4830 void Analysis::VisitEnd(EndNode* that) { | 4641 void Analysis::VisitEnd(EndNode* that) { |
4831 // nothing to do | 4642 // nothing to do |
4832 } | 4643 } |
(...skipping 127 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4960 RegExpAtom* atom = text.atom(); | 4771 RegExpAtom* atom = text.atom(); |
4961 for (intptr_t j = 0; j < atom->length(); j++, offset++) { | 4772 for (intptr_t j = 0; j < atom->length(); j++, offset++) { |
4962 if (offset >= bm->length()) { | 4773 if (offset >= bm->length()) { |
4963 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 4774 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
4964 return; | 4775 return; |
4965 } | 4776 } |
4966 uint16_t character = atom->data()->At(j); | 4777 uint16_t character = atom->data()->At(j); |
4967 if (bm->compiler()->ignore_case()) { | 4778 if (bm->compiler()->ignore_case()) { |
4968 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 4779 int32_t chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
4969 intptr_t length = GetCaseIndependentLetters( | 4780 intptr_t length = GetCaseIndependentLetters( |
4970 character, | 4781 character, bm->max_char() == Symbols::kMaxOneCharCodeSymbol, |
4971 bm->max_char() == Symbols::kMaxOneCharCodeSymbol, | |
4972 chars); | 4782 chars); |
4973 for (intptr_t j = 0; j < length; j++) { | 4783 for (intptr_t j = 0; j < length; j++) { |
4974 bm->Set(offset, chars[j]); | 4784 bm->Set(offset, chars[j]); |
4975 } | 4785 } |
4976 } else { | 4786 } else { |
4977 if (character <= max_char) bm->Set(offset, character); | 4787 if (character <= max_char) bm->Set(offset, character); |
4978 } | 4788 } |
4979 } | 4789 } |
4980 } else { | 4790 } else { |
4981 ASSERT(text.text_type() == TextElement::CHAR_CLASS); | 4791 ASSERT(text.text_type() == TextElement::CHAR_CLASS); |
4982 RegExpCharacterClass* char_class = text.char_class(); | 4792 RegExpCharacterClass* char_class = text.char_class(); |
4983 ZoneGrowableArray<CharacterRange>* ranges = char_class->ranges(); | 4793 ZoneGrowableArray<CharacterRange>* ranges = char_class->ranges(); |
4984 if (char_class->is_negated()) { | 4794 if (char_class->is_negated()) { |
4985 bm->SetAll(offset); | 4795 bm->SetAll(offset); |
4986 } else { | 4796 } else { |
4987 for (intptr_t k = 0; k < ranges->length(); k++) { | 4797 for (intptr_t k = 0; k < ranges->length(); k++) { |
4988 CharacterRange& range = (*ranges)[k]; | 4798 CharacterRange& range = (*ranges)[k]; |
4989 if (range.from() > max_char) continue; | 4799 if (range.from() > max_char) continue; |
4990 intptr_t to = Utils::Minimum(max_char, | 4800 intptr_t to = |
4991 static_cast<intptr_t>(range.to())); | 4801 Utils::Minimum(max_char, static_cast<intptr_t>(range.to())); |
4992 bm->SetInterval(offset, Interval(range.from(), to)); | 4802 bm->SetInterval(offset, Interval(range.from(), to)); |
4993 } | 4803 } |
4994 } | 4804 } |
4995 offset++; | 4805 offset++; |
4996 } | 4806 } |
4997 } | 4807 } |
4998 if (offset >= bm->length()) { | 4808 if (offset >= bm->length()) { |
4999 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 4809 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
5000 return; | 4810 return; |
5001 } | 4811 } |
5002 on_success()->FillInBMInfo(offset, | 4812 on_success()->FillInBMInfo(offset, budget - 1, bm, |
5003 budget - 1, | |
5004 bm, | |
5005 true); // Not at start after a text node. | 4813 true); // Not at start after a text node. |
5006 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 4814 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
5007 } | 4815 } |
5008 | 4816 |
5009 | 4817 |
5010 RegExpEngine::CompilationResult RegExpEngine::CompileIR( | 4818 RegExpEngine::CompilationResult RegExpEngine::CompileIR( |
5011 RegExpCompileData* data, | 4819 RegExpCompileData* data, |
5012 const ParsedFunction* parsed_function, | 4820 const ParsedFunction* parsed_function, |
5013 const ZoneGrowableArray<const ICData*>& ic_data_array) { | 4821 const ZoneGrowableArray<const ICData*>& ic_data_array) { |
5014 ASSERT(!FLAG_interpret_irregexp); | 4822 ASSERT(!FLAG_interpret_irregexp); |
(...skipping 17 matching lines...) Expand all Loading... |
5032 // TODO(zerny): Frequency sampling is currently disabled because of several | 4840 // TODO(zerny): Frequency sampling is currently disabled because of several |
5033 // issues. We do not want to store subject strings in the regexp object since | 4841 // issues. We do not want to store subject strings in the regexp object since |
5034 // they might be long and we should not prevent their garbage collection. | 4842 // they might be long and we should not prevent their garbage collection. |
5035 // Passing them to this function explicitly does not help, since we must | 4843 // Passing them to this function explicitly does not help, since we must |
5036 // generate exactly the same IR for both the unoptimizing and optimizing | 4844 // generate exactly the same IR for both the unoptimizing and optimizing |
5037 // pipelines (otherwise it gets confused when i.e. deopt id's differ). | 4845 // pipelines (otherwise it gets confused when i.e. deopt id's differ). |
5038 // An option would be to store sampling results in the regexp object, but | 4846 // An option would be to store sampling results in the regexp object, but |
5039 // I'm not sure the performance gains are relevant enough. | 4847 // I'm not sure the performance gains are relevant enough. |
5040 | 4848 |
5041 // Wrap the body of the regexp in capture #0. | 4849 // Wrap the body of the regexp in capture #0. |
5042 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree, | 4850 RegExpNode* captured_body = |
5043 0, | 4851 RegExpCapture::ToNode(data->tree, 0, &compiler, compiler.accept()); |
5044 &compiler, | |
5045 compiler.accept()); | |
5046 | 4852 |
5047 RegExpNode* node = captured_body; | 4853 RegExpNode* node = captured_body; |
5048 bool is_end_anchored = data->tree->IsAnchoredAtEnd(); | 4854 bool is_end_anchored = data->tree->IsAnchoredAtEnd(); |
5049 bool is_start_anchored = data->tree->IsAnchoredAtStart(); | 4855 bool is_start_anchored = data->tree->IsAnchoredAtStart(); |
5050 intptr_t max_length = data->tree->max_match(); | 4856 intptr_t max_length = data->tree->max_match(); |
5051 if (!is_start_anchored) { | 4857 if (!is_start_anchored) { |
5052 // Add a .*? at the beginning, outside the body capture, unless | 4858 // Add a .*? at the beginning, outside the body capture, unless |
5053 // this expression is anchored at the beginning. | 4859 // this expression is anchored at the beginning. |
5054 RegExpNode* loop_node = | 4860 RegExpNode* loop_node = RegExpQuantifier::ToNode( |
5055 RegExpQuantifier::ToNode(0, | 4861 0, RegExpTree::kInfinity, false, new (zone) RegExpCharacterClass('*'), |
5056 RegExpTree::kInfinity, | 4862 &compiler, captured_body, data->contains_anchor); |
5057 false, | |
5058 new(zone) RegExpCharacterClass('*'), | |
5059 &compiler, | |
5060 captured_body, | |
5061 data->contains_anchor); | |
5062 | 4863 |
5063 if (data->contains_anchor) { | 4864 if (data->contains_anchor) { |
5064 // Unroll loop once, to take care of the case that might start | 4865 // Unroll loop once, to take care of the case that might start |
5065 // at the start of input. | 4866 // at the start of input. |
5066 ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone); | 4867 ChoiceNode* first_step_node = new (zone) ChoiceNode(2, zone); |
5067 first_step_node->AddAlternative(GuardedAlternative(captured_body)); | 4868 first_step_node->AddAlternative(GuardedAlternative(captured_body)); |
5068 first_step_node->AddAlternative(GuardedAlternative( | 4869 first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode( |
5069 new(zone) TextNode( | 4870 new (zone) RegExpCharacterClass('*'), loop_node))); |
5070 new(zone) RegExpCharacterClass('*'), loop_node))); | |
5071 node = first_step_node; | 4871 node = first_step_node; |
5072 } else { | 4872 } else { |
5073 node = loop_node; | 4873 node = loop_node; |
5074 } | 4874 } |
5075 } | 4875 } |
5076 if (is_one_byte) { | 4876 if (is_one_byte) { |
5077 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); | 4877 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); |
5078 // Do it again to propagate the new nodes to places where they were not | 4878 // Do it again to propagate the new nodes to places where they were not |
5079 // put because they had not been calculated yet. | 4879 // put because they had not been calculated yet. |
5080 if (node != NULL) { | 4880 if (node != NULL) { |
5081 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); | 4881 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); |
5082 } | 4882 } |
5083 } | 4883 } |
5084 | 4884 |
5085 if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone); | 4885 if (node == NULL) node = new (zone) EndNode(EndNode::BACKTRACK, zone); |
5086 data->node = node; | 4886 data->node = node; |
5087 Analysis analysis(ignore_case, is_one_byte); | 4887 Analysis analysis(ignore_case, is_one_byte); |
5088 analysis.EnsureAnalyzed(node); | 4888 analysis.EnsureAnalyzed(node); |
5089 if (analysis.has_failed()) { | 4889 if (analysis.has_failed()) { |
5090 const char* error_message = analysis.error_message(); | 4890 const char* error_message = analysis.error_message(); |
5091 return CompilationResult(error_message); | 4891 return CompilationResult(error_message); |
5092 } | 4892 } |
5093 | 4893 |
5094 // Native regexp implementation. | 4894 // Native regexp implementation. |
5095 | 4895 |
5096 IRRegExpMacroAssembler* macro_assembler = | 4896 IRRegExpMacroAssembler* macro_assembler = |
5097 new(zone) IRRegExpMacroAssembler(specialization_cid, | 4897 new (zone) IRRegExpMacroAssembler(specialization_cid, data->capture_count, |
5098 data->capture_count, | 4898 parsed_function, ic_data_array, zone); |
5099 parsed_function, | |
5100 ic_data_array, | |
5101 zone); | |
5102 | 4899 |
5103 // Inserted here, instead of in Assembler, because it depends on information | 4900 // Inserted here, instead of in Assembler, because it depends on information |
5104 // in the AST that isn't replicated in the Node structure. | 4901 // in the AST that isn't replicated in the Node structure. |
5105 static const intptr_t kMaxBacksearchLimit = 1024; | 4902 static const intptr_t kMaxBacksearchLimit = 1024; |
5106 if (is_end_anchored && | 4903 if (is_end_anchored && !is_start_anchored && |
5107 !is_start_anchored && | |
5108 max_length < kMaxBacksearchLimit) { | 4904 max_length < kMaxBacksearchLimit) { |
5109 macro_assembler->SetCurrentPositionFromEnd(max_length); | 4905 macro_assembler->SetCurrentPositionFromEnd(max_length); |
5110 } | 4906 } |
5111 | 4907 |
5112 if (is_global) { | 4908 if (is_global) { |
5113 macro_assembler->set_global_mode( | 4909 macro_assembler->set_global_mode( |
5114 (data->tree->min_match() > 0) | 4910 (data->tree->min_match() > 0) |
5115 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK | 4911 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK |
5116 : RegExpMacroAssembler::GLOBAL); | 4912 : RegExpMacroAssembler::GLOBAL); |
5117 } | 4913 } |
5118 | 4914 |
5119 RegExpEngine::CompilationResult result = | 4915 RegExpEngine::CompilationResult result = |
5120 compiler.Assemble(macro_assembler, | 4916 compiler.Assemble(macro_assembler, node, data->capture_count, pattern); |
5121 node, | |
5122 data->capture_count, | |
5123 pattern); | |
5124 | 4917 |
5125 if (FLAG_trace_irregexp) { | 4918 if (FLAG_trace_irregexp) { |
5126 macro_assembler->PrintBlocks(); | 4919 macro_assembler->PrintBlocks(); |
5127 } | 4920 } |
5128 | 4921 |
5129 return result; | 4922 return result; |
5130 } | 4923 } |
5131 | 4924 |
5132 | 4925 |
5133 RegExpEngine::CompilationResult RegExpEngine::CompileBytecode( | 4926 RegExpEngine::CompilationResult RegExpEngine::CompileBytecode( |
(...skipping 15 matching lines...) Expand all Loading... |
5149 // TODO(zerny): Frequency sampling is currently disabled because of several | 4942 // TODO(zerny): Frequency sampling is currently disabled because of several |
5150 // issues. We do not want to store subject strings in the regexp object since | 4943 // issues. We do not want to store subject strings in the regexp object since |
5151 // they might be long and we should not prevent their garbage collection. | 4944 // they might be long and we should not prevent their garbage collection. |
5152 // Passing them to this function explicitly does not help, since we must | 4945 // Passing them to this function explicitly does not help, since we must |
5153 // generate exactly the same IR for both the unoptimizing and optimizing | 4946 // generate exactly the same IR for both the unoptimizing and optimizing |
5154 // pipelines (otherwise it gets confused when i.e. deopt id's differ). | 4947 // pipelines (otherwise it gets confused when i.e. deopt id's differ). |
5155 // An option would be to store sampling results in the regexp object, but | 4948 // An option would be to store sampling results in the regexp object, but |
5156 // I'm not sure the performance gains are relevant enough. | 4949 // I'm not sure the performance gains are relevant enough. |
5157 | 4950 |
5158 // Wrap the body of the regexp in capture #0. | 4951 // Wrap the body of the regexp in capture #0. |
5159 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree, | 4952 RegExpNode* captured_body = |
5160 0, | 4953 RegExpCapture::ToNode(data->tree, 0, &compiler, compiler.accept()); |
5161 &compiler, | |
5162 compiler.accept()); | |
5163 | 4954 |
5164 RegExpNode* node = captured_body; | 4955 RegExpNode* node = captured_body; |
5165 bool is_end_anchored = data->tree->IsAnchoredAtEnd(); | 4956 bool is_end_anchored = data->tree->IsAnchoredAtEnd(); |
5166 bool is_start_anchored = data->tree->IsAnchoredAtStart(); | 4957 bool is_start_anchored = data->tree->IsAnchoredAtStart(); |
5167 intptr_t max_length = data->tree->max_match(); | 4958 intptr_t max_length = data->tree->max_match(); |
5168 if (!is_start_anchored) { | 4959 if (!is_start_anchored) { |
5169 // Add a .*? at the beginning, outside the body capture, unless | 4960 // Add a .*? at the beginning, outside the body capture, unless |
5170 // this expression is anchored at the beginning. | 4961 // this expression is anchored at the beginning. |
5171 RegExpNode* loop_node = | 4962 RegExpNode* loop_node = RegExpQuantifier::ToNode( |
5172 RegExpQuantifier::ToNode(0, | 4963 0, RegExpTree::kInfinity, false, new (zone) RegExpCharacterClass('*'), |
5173 RegExpTree::kInfinity, | 4964 &compiler, captured_body, data->contains_anchor); |
5174 false, | |
5175 new(zone) RegExpCharacterClass('*'), | |
5176 &compiler, | |
5177 captured_body, | |
5178 data->contains_anchor); | |
5179 | 4965 |
5180 if (data->contains_anchor) { | 4966 if (data->contains_anchor) { |
5181 // Unroll loop once, to take care of the case that might start | 4967 // Unroll loop once, to take care of the case that might start |
5182 // at the start of input. | 4968 // at the start of input. |
5183 ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone); | 4969 ChoiceNode* first_step_node = new (zone) ChoiceNode(2, zone); |
5184 first_step_node->AddAlternative(GuardedAlternative(captured_body)); | 4970 first_step_node->AddAlternative(GuardedAlternative(captured_body)); |
5185 first_step_node->AddAlternative(GuardedAlternative( | 4971 first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode( |
5186 new(zone) TextNode( | 4972 new (zone) RegExpCharacterClass('*'), loop_node))); |
5187 new(zone) RegExpCharacterClass('*'), loop_node))); | |
5188 node = first_step_node; | 4973 node = first_step_node; |
5189 } else { | 4974 } else { |
5190 node = loop_node; | 4975 node = loop_node; |
5191 } | 4976 } |
5192 } | 4977 } |
5193 if (is_one_byte) { | 4978 if (is_one_byte) { |
5194 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); | 4979 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); |
5195 // Do it again to propagate the new nodes to places where they were not | 4980 // Do it again to propagate the new nodes to places where they were not |
5196 // put because they had not been calculated yet. | 4981 // put because they had not been calculated yet. |
5197 if (node != NULL) { | 4982 if (node != NULL) { |
5198 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); | 4983 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); |
5199 } | 4984 } |
5200 } | 4985 } |
5201 | 4986 |
5202 if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone); | 4987 if (node == NULL) node = new (zone) EndNode(EndNode::BACKTRACK, zone); |
5203 data->node = node; | 4988 data->node = node; |
5204 Analysis analysis(ignore_case, is_one_byte); | 4989 Analysis analysis(ignore_case, is_one_byte); |
5205 analysis.EnsureAnalyzed(node); | 4990 analysis.EnsureAnalyzed(node); |
5206 if (analysis.has_failed()) { | 4991 if (analysis.has_failed()) { |
5207 const char* error_message = analysis.error_message(); | 4992 const char* error_message = analysis.error_message(); |
5208 return CompilationResult(error_message); | 4993 return CompilationResult(error_message); |
5209 } | 4994 } |
5210 | 4995 |
5211 // Bytecode regexp implementation. | 4996 // Bytecode regexp implementation. |
5212 | 4997 |
5213 ZoneGrowableArray<uint8_t> buffer(zone, 1024); | 4998 ZoneGrowableArray<uint8_t> buffer(zone, 1024); |
5214 BytecodeRegExpMacroAssembler* macro_assembler = | 4999 BytecodeRegExpMacroAssembler* macro_assembler = |
5215 new(zone) BytecodeRegExpMacroAssembler(&buffer, zone); | 5000 new (zone) BytecodeRegExpMacroAssembler(&buffer, zone); |
5216 | 5001 |
5217 // Inserted here, instead of in Assembler, because it depends on information | 5002 // Inserted here, instead of in Assembler, because it depends on information |
5218 // in the AST that isn't replicated in the Node structure. | 5003 // in the AST that isn't replicated in the Node structure. |
5219 static const intptr_t kMaxBacksearchLimit = 1024; | 5004 static const intptr_t kMaxBacksearchLimit = 1024; |
5220 if (is_end_anchored && | 5005 if (is_end_anchored && !is_start_anchored && |
5221 !is_start_anchored && | |
5222 max_length < kMaxBacksearchLimit) { | 5006 max_length < kMaxBacksearchLimit) { |
5223 macro_assembler->SetCurrentPositionFromEnd(max_length); | 5007 macro_assembler->SetCurrentPositionFromEnd(max_length); |
5224 } | 5008 } |
5225 | 5009 |
5226 if (is_global) { | 5010 if (is_global) { |
5227 macro_assembler->set_global_mode( | 5011 macro_assembler->set_global_mode( |
5228 (data->tree->min_match() > 0) | 5012 (data->tree->min_match() > 0) |
5229 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK | 5013 ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK |
5230 : RegExpMacroAssembler::GLOBAL); | 5014 : RegExpMacroAssembler::GLOBAL); |
5231 } | 5015 } |
5232 | 5016 |
5233 RegExpEngine::CompilationResult result = | 5017 RegExpEngine::CompilationResult result = |
5234 compiler.Assemble(macro_assembler, | 5018 compiler.Assemble(macro_assembler, node, data->capture_count, pattern); |
5235 node, | |
5236 data->capture_count, | |
5237 pattern); | |
5238 | 5019 |
5239 if (FLAG_trace_irregexp) { | 5020 if (FLAG_trace_irregexp) { |
5240 macro_assembler->PrintBlocks(); | 5021 macro_assembler->PrintBlocks(); |
5241 } | 5022 } |
5242 | 5023 |
5243 return result; | 5024 return result; |
5244 } | 5025 } |
5245 | 5026 |
5246 | 5027 |
5247 static void CreateSpecializedFunction(Thread* thread, Zone* zone, | 5028 static void CreateSpecializedFunction(Thread* thread, |
| 5029 Zone* zone, |
5248 const RegExp& regexp, | 5030 const RegExp& regexp, |
5249 intptr_t specialization_cid, | 5031 intptr_t specialization_cid, |
5250 const Object& owner) { | 5032 const Object& owner) { |
5251 const intptr_t kParamCount = RegExpMacroAssembler::kParamCount; | 5033 const intptr_t kParamCount = RegExpMacroAssembler::kParamCount; |
5252 | 5034 |
5253 Function& fn = Function::Handle(zone, Function::New( | 5035 Function& fn = |
5254 Symbols::ColonMatcher(), | 5036 Function::Handle(zone, Function::New(Symbols::ColonMatcher(), |
5255 RawFunction::kIrregexpFunction, | 5037 RawFunction::kIrregexpFunction, |
5256 true, // Static. | 5038 true, // Static. |
5257 false, // Not const. | 5039 false, // Not const. |
5258 false, // Not abstract. | 5040 false, // Not abstract. |
5259 false, // Not external. | 5041 false, // Not external. |
5260 false, // Not native. | 5042 false, // Not native. |
5261 owner, | 5043 owner, TokenPosition::kMinSource)); |
5262 TokenPosition::kMinSource)); | |
5263 | 5044 |
5264 // TODO(zerny): Share these arrays between all irregexp functions. | 5045 // TODO(zerny): Share these arrays between all irregexp functions. |
5265 fn.set_num_fixed_parameters(kParamCount); | 5046 fn.set_num_fixed_parameters(kParamCount); |
5266 fn.set_parameter_types(Array::Handle(zone, Array::New(kParamCount, | 5047 fn.set_parameter_types( |
5267 Heap::kOld))); | 5048 Array::Handle(zone, Array::New(kParamCount, Heap::kOld))); |
5268 fn.set_parameter_names(Array::Handle(zone, Array::New(kParamCount, | 5049 fn.set_parameter_names( |
5269 Heap::kOld))); | 5050 Array::Handle(zone, Array::New(kParamCount, Heap::kOld))); |
5270 fn.SetParameterTypeAt(RegExpMacroAssembler::kParamRegExpIndex, | 5051 fn.SetParameterTypeAt(RegExpMacroAssembler::kParamRegExpIndex, |
5271 Object::dynamic_type()); | 5052 Object::dynamic_type()); |
5272 fn.SetParameterNameAt(RegExpMacroAssembler::kParamRegExpIndex, | 5053 fn.SetParameterNameAt(RegExpMacroAssembler::kParamRegExpIndex, |
5273 Symbols::This()); | 5054 Symbols::This()); |
5274 fn.SetParameterTypeAt(RegExpMacroAssembler::kParamStringIndex, | 5055 fn.SetParameterTypeAt(RegExpMacroAssembler::kParamStringIndex, |
5275 Object::dynamic_type()); | 5056 Object::dynamic_type()); |
5276 fn.SetParameterNameAt(RegExpMacroAssembler::kParamStringIndex, | 5057 fn.SetParameterNameAt(RegExpMacroAssembler::kParamStringIndex, |
5277 Symbols::string_param()); | 5058 Symbols::string_param()); |
5278 fn.SetParameterTypeAt(RegExpMacroAssembler::kParamStartOffsetIndex, | 5059 fn.SetParameterTypeAt(RegExpMacroAssembler::kParamStartOffsetIndex, |
5279 Object::dynamic_type()); | 5060 Object::dynamic_type()); |
(...skipping 23 matching lines...) Expand all Loading... |
5303 if (multi_line) { | 5084 if (multi_line) { |
5304 regexp.set_is_multi_line(); | 5085 regexp.set_is_multi_line(); |
5305 } | 5086 } |
5306 if (ignore_case) { | 5087 if (ignore_case) { |
5307 regexp.set_is_ignore_case(); | 5088 regexp.set_is_ignore_case(); |
5308 } | 5089 } |
5309 | 5090 |
5310 // TODO(zerny): We might want to use normal string searching algorithms | 5091 // TODO(zerny): We might want to use normal string searching algorithms |
5311 // for simple patterns. | 5092 // for simple patterns. |
5312 regexp.set_is_complex(); | 5093 regexp.set_is_complex(); |
5313 regexp.set_is_global(); // All dart regexps are global. | 5094 regexp.set_is_global(); // All dart regexps are global. |
5314 | 5095 |
5315 if (!FLAG_interpret_irregexp) { | 5096 if (!FLAG_interpret_irregexp) { |
5316 const Library& lib = Library::Handle(zone, Library::CoreLibrary()); | 5097 const Library& lib = Library::Handle(zone, Library::CoreLibrary()); |
5317 const Class& owner = Class::Handle(zone, | 5098 const Class& owner = |
5318 lib.LookupClass(Symbols::RegExp())); | 5099 Class::Handle(zone, lib.LookupClass(Symbols::RegExp())); |
5319 | 5100 |
5320 CreateSpecializedFunction(thread, zone, regexp, kOneByteStringCid, owner); | 5101 CreateSpecializedFunction(thread, zone, regexp, kOneByteStringCid, owner); |
5321 CreateSpecializedFunction(thread, zone, regexp, kTwoByteStringCid, owner); | 5102 CreateSpecializedFunction(thread, zone, regexp, kTwoByteStringCid, owner); |
5322 CreateSpecializedFunction(thread, zone, | 5103 CreateSpecializedFunction(thread, zone, regexp, kExternalOneByteStringCid, |
5323 regexp, kExternalOneByteStringCid, owner); | 5104 owner); |
5324 CreateSpecializedFunction(thread, zone, | 5105 CreateSpecializedFunction(thread, zone, regexp, kExternalTwoByteStringCid, |
5325 regexp, kExternalTwoByteStringCid, owner); | 5106 owner); |
5326 } | 5107 } |
5327 | 5108 |
5328 return regexp.raw(); | 5109 return regexp.raw(); |
5329 } | 5110 } |
5330 | 5111 |
5331 | 5112 |
5332 } // namespace dart | 5113 } // namespace dart |
OLD | NEW |