| Index: src/jsregexp.cc
|
| diff --git a/src/jsregexp.cc b/src/jsregexp.cc
|
| index 7891cd8744bbba2fe168f2b78f739632e5248f24..8a94e819e2c1cbbc04e79e904b80b2e6fef4acdf 100644
|
| --- a/src/jsregexp.cc
|
| +++ b/src/jsregexp.cc
|
| @@ -3493,6 +3493,7 @@ void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) {
|
| void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
|
| RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
|
| if (trace->stop_node() == this) {
|
| + // Back edge of greedy optimized loop node graph.
|
| int text_length =
|
| GreedyLoopTextLengthForAlternative(&(alternatives_->at(0)));
|
| DCHECK(text_length != kNodeIsTooComplexForGreedyLoops);
|
| @@ -3759,13 +3760,13 @@ int BoyerMooreLookahead::GetSkipTable(int min_lookahead,
|
|
|
|
|
| // See comment above on the implementation of GetSkipTable.
|
| -bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
|
| +void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
|
| const int kSize = RegExpMacroAssembler::kTableSize;
|
|
|
| int min_lookahead = 0;
|
| int max_lookahead = 0;
|
|
|
| - if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return false;
|
| + if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return;
|
|
|
| bool found_single_character = false;
|
| int single_character = 0;
|
| @@ -3789,7 +3790,7 @@ bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
|
|
|
| if (found_single_character && lookahead_width == 1 && max_lookahead < 3) {
|
| // The mask-compare can probably handle this better.
|
| - return false;
|
| + return;
|
| }
|
|
|
| if (found_single_character) {
|
| @@ -3806,7 +3807,7 @@ bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
|
| masm->AdvanceCurrentPosition(lookahead_width);
|
| masm->GoTo(&again);
|
| masm->Bind(&cont);
|
| - return true;
|
| + return;
|
| }
|
|
|
| Factory* factory = masm->zone()->isolate()->factory();
|
| @@ -3822,8 +3823,6 @@ bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
|
| masm->AdvanceCurrentPosition(skip_distance);
|
| masm->GoTo(&again);
|
| masm->Bind(&cont);
|
| -
|
| - return true;
|
| }
|
|
|
|
|
| @@ -3905,10 +3904,15 @@ bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
|
| * \______________/
|
| */
|
|
|
| -void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
|
| - RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
|
| - int choice_count = alternatives_->length();
|
| +GreedyLoopState::GreedyLoopState(bool not_at_start) {
|
| + counter_backtrack_trace_.set_backtrack(&label_);
|
| + if (not_at_start) counter_backtrack_trace_.set_at_start(false);
|
| +}
|
| +
|
| +
|
| +void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) {
|
| #ifdef DEBUG
|
| + int choice_count = alternatives_->length();
|
| for (int i = 0; i < choice_count - 1; i++) {
|
| GuardedAlternative alternative = alternatives_->at(i);
|
| ZoneList<Guard*>* guards = alternative.guards();
|
| @@ -3918,12 +3922,39 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
|
| }
|
| }
|
| #endif
|
| +}
|
| +
|
| +
|
| +void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler,
|
| + Trace* current_trace,
|
| + PreloadState* state) {
|
| + if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) {
|
| + // Save some time by looking at most one machine word ahead.
|
| + state->eats_at_least_ =
|
| + EatsAtLeast(compiler->ascii() ? 4 : 2,
|
| + kRecursionBudget,
|
| + current_trace->at_start() == Trace::FALSE_VALUE);
|
| + }
|
| + state->preload_characters_ =
|
| + CalculatePreloadCharacters(compiler, state->eats_at_least_);
|
| +
|
| + state->preload_is_current_ =
|
| + (current_trace->characters_preloaded() == state->preload_characters_);
|
| + state->preload_has_checked_bounds_ = state->preload_is_current_;
|
| +}
|
| +
|
| +
|
| +void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
|
| + int choice_count = alternatives_->length();
|
| +
|
| + AssertGuardsMentionRegisters(trace);
|
|
|
| LimitResult limit_result = LimitVersions(compiler, trace);
|
| if (limit_result == DONE) return;
|
| DCHECK(limit_result == CONTINUE);
|
|
|
| - int new_flush_budget = trace->flush_budget() / choice_count;
|
| + // For loop nodes we already flushed (see LoopChoiceNode::Emit), but for
|
| + // other choice nodes we only flush if we are out of code size budget.
|
| if (trace->flush_budget() == 0 && trace->actions() != NULL) {
|
| trace->Flush(compiler, this);
|
| return;
|
| @@ -3931,143 +3962,216 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
|
|
|
| RecursionCheck rc(compiler);
|
|
|
| - Trace* current_trace = trace;
|
| + PreloadState preload;
|
| + preload.init();
|
| + GreedyLoopState greedy_loop_state(not_at_start());
|
|
|
| - int text_length = GreedyLoopTextLengthForAlternative(&(alternatives_->at(0)));
|
| - bool greedy_loop = false;
|
| - Label greedy_loop_label;
|
| - Trace counter_backtrack_trace;
|
| - counter_backtrack_trace.set_backtrack(&greedy_loop_label);
|
| - if (not_at_start()) counter_backtrack_trace.set_at_start(false);
|
| + int text_length = GreedyLoopTextLengthForAlternative(&alternatives_->at(0));
|
| + AlternativeGenerationList alt_gens(choice_count, zone());
|
|
|
| if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
|
| - // Here we have special handling for greedy loops containing only text nodes
|
| - // and other simple nodes. These are handled by pushing the current
|
| - // position on the stack and then incrementing the current position each
|
| - // time around the switch. On backtrack we decrement the current position
|
| - // and check it against the pushed value. This avoids pushing backtrack
|
| - // information for each iteration of the loop, which could take up a lot of
|
| - // space.
|
| - greedy_loop = true;
|
| - DCHECK(trace->stop_node() == NULL);
|
| - macro_assembler->PushCurrentPosition();
|
| - current_trace = &counter_backtrack_trace;
|
| - Label greedy_match_failed;
|
| - Trace greedy_match_trace;
|
| - if (not_at_start()) greedy_match_trace.set_at_start(false);
|
| - greedy_match_trace.set_backtrack(&greedy_match_failed);
|
| - Label loop_label;
|
| - macro_assembler->Bind(&loop_label);
|
| - greedy_match_trace.set_stop_node(this);
|
| - greedy_match_trace.set_loop_label(&loop_label);
|
| - alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);
|
| - macro_assembler->Bind(&greedy_match_failed);
|
| + trace = EmitGreedyLoop(compiler,
|
| + trace,
|
| + &alt_gens,
|
| + &preload,
|
| + &greedy_loop_state,
|
| + text_length);
|
| + } else {
|
| + // TODO(erikcorry): Delete this. We don't need this label, but it makes us
|
| + // match the traces produced pre-cleanup.
|
| + Label second_choice;
|
| + compiler->macro_assembler()->Bind(&second_choice);
|
| +
|
| + preload.eats_at_least_ = EmitOptimizedUnanchoredSearch(compiler, trace);
|
| +
|
| + EmitChoices(compiler,
|
| + &alt_gens,
|
| + 0,
|
| + trace,
|
| + false, // Not greedy loop.
|
| + &preload);
|
| + }
|
| +
|
| + // At this point we need to generate slow checks for the alternatives where
|
| + // the quick check was inlined. We can recognize these because the associated
|
| + // label was bound.
|
| + int new_flush_budget = trace->flush_budget() / choice_count;
|
| + for (int i = 0; i < choice_count - 1; i++) {
|
| + AlternativeGeneration* alt_gen = alt_gens.at(i);
|
| + Trace new_trace(*trace);
|
| + // If there are actions to be flushed we have to limit how many times
|
| + // they are flushed. Take the budget of the parent trace and distribute
|
| + // it fairly amongst the children.
|
| + if (new_trace.actions() != NULL) {
|
| + new_trace.set_flush_budget(new_flush_budget);
|
| + }
|
| + EmitOutOfLineContinuation(compiler,
|
| + &new_trace,
|
| + alternatives_->at(i),
|
| + alt_gen,
|
| + preload.preload_characters_,
|
| + alt_gens.at(i + 1)->expects_preload);
|
| }
|
| +}
|
| +
|
| +
|
| +Trace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler,
|
| + Trace* trace,
|
| + AlternativeGenerationList* alt_gens,
|
| + PreloadState* preload,
|
| + GreedyLoopState* greedy_loop_state,
|
| + int text_length) {
|
| + RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
|
| + // Here we have special handling for greedy loops containing only text nodes
|
| + // and other simple nodes. These are handled by pushing the current
|
| + // position on the stack and then incrementing the current position each
|
| + // time around the switch. On backtrack we decrement the current position
|
| + // and check it against the pushed value. This avoids pushing backtrack
|
| + // information for each iteration of the loop, which could take up a lot of
|
| + // space.
|
| + DCHECK(trace->stop_node() == NULL);
|
| + macro_assembler->PushCurrentPosition();
|
| + Label greedy_match_failed;
|
| + Trace greedy_match_trace;
|
| + if (not_at_start()) greedy_match_trace.set_at_start(false);
|
| + greedy_match_trace.set_backtrack(&greedy_match_failed);
|
| + Label loop_label;
|
| + macro_assembler->Bind(&loop_label);
|
| + greedy_match_trace.set_stop_node(this);
|
| + greedy_match_trace.set_loop_label(&loop_label);
|
| + alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);
|
| + macro_assembler->Bind(&greedy_match_failed);
|
|
|
| Label second_choice; // For use in greedy matches.
|
| macro_assembler->Bind(&second_choice);
|
|
|
| - int first_normal_choice = greedy_loop ? 1 : 0;
|
| -
|
| - bool not_at_start = current_trace->at_start() == Trace::FALSE_VALUE;
|
| - const int kEatsAtLeastNotYetInitialized = -1;
|
| - int eats_at_least = kEatsAtLeastNotYetInitialized;
|
| -
|
| - bool skip_was_emitted = false;
|
| -
|
| - if (!greedy_loop && choice_count == 2) {
|
| - GuardedAlternative alt1 = alternatives_->at(1);
|
| - if (alt1.guards() == NULL || alt1.guards()->length() == 0) {
|
| - RegExpNode* eats_anything_node = alt1.node();
|
| - if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) ==
|
| - this) {
|
| - // At this point we know that we are at a non-greedy loop that will eat
|
| - // any character one at a time. Any non-anchored regexp has such a
|
| - // loop prepended to it in order to find where it starts. We look for
|
| - // a pattern of the form ...abc... where we can look 6 characters ahead
|
| - // and step forwards 3 if the character is not one of abc. Abc need
|
| - // not be atoms, they can be any reasonably limited character class or
|
| - // small alternation.
|
| - DCHECK(trace->is_trivial()); // This is the case on LoopChoiceNodes.
|
| - BoyerMooreLookahead* lookahead = bm_info(not_at_start);
|
| - if (lookahead == NULL) {
|
| - eats_at_least = Min(kMaxLookaheadForBoyerMoore,
|
| - EatsAtLeast(kMaxLookaheadForBoyerMoore,
|
| - kRecursionBudget,
|
| - not_at_start));
|
| - if (eats_at_least >= 1) {
|
| - BoyerMooreLookahead* bm =
|
| - new(zone()) BoyerMooreLookahead(eats_at_least,
|
| - compiler,
|
| - zone());
|
| - GuardedAlternative alt0 = alternatives_->at(0);
|
| - alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, not_at_start);
|
| - skip_was_emitted = bm->EmitSkipInstructions(macro_assembler);
|
| - }
|
| - } else {
|
| - skip_was_emitted = lookahead->EmitSkipInstructions(macro_assembler);
|
| - }
|
| - }
|
| - }
|
| + Trace* new_trace = greedy_loop_state->counter_backtrack_trace();
|
| +
|
| + EmitChoices(compiler,
|
| + alt_gens,
|
| + 1,
|
| + new_trace,
|
| + true, // Is greedy loop.
|
| + preload);
|
| +
|
| + macro_assembler->Bind(greedy_loop_state->label());
|
| + // If we have unwound to the bottom then backtrack.
|
| + macro_assembler->CheckGreedyLoop(trace->backtrack());
|
| + // Otherwise try the second priority at an earlier position.
|
| + macro_assembler->AdvanceCurrentPosition(-text_length);
|
| + macro_assembler->GoTo(&second_choice);
|
| + return new_trace;
|
| +}
|
| +
|
| +int ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler,
|
| + Trace* trace) {
|
| + int eats_at_least = PreloadState::kEatsAtLeastNotYetInitialized;
|
| + if (alternatives_->length() != 2) return eats_at_least;
|
| +
|
| + GuardedAlternative alt1 = alternatives_->at(1);
|
| + if (alt1.guards() != NULL && alt1.guards()->length() != 0) {
|
| + return eats_at_least;
|
| }
|
| + RegExpNode* eats_anything_node = alt1.node();
|
| + if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) != this) {
|
| + return eats_at_least;
|
| + }
|
| +
|
| + // Really we should be creating a new trace when we execute this function,
|
| + // but there is no need, because the code it generates cannot backtrack, and
|
| + // we always arrive here with a trivial trace (since it's the entry to a
|
| + // loop. That also implies that there are no preloaded characters, which is
|
| + // good, because it means we won't be violating any assumptions by
|
| + // overwriting those characters with new load instructions.
|
| + DCHECK(trace->is_trivial());
|
|
|
| - if (eats_at_least == kEatsAtLeastNotYetInitialized) {
|
| - // Save some time by looking at most one machine word ahead.
|
| - eats_at_least =
|
| - EatsAtLeast(compiler->ascii() ? 4 : 2, kRecursionBudget, not_at_start);
|
| + RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
|
| + // At this point we know that we are at a non-greedy loop that will eat
|
| + // any character one at a time. Any non-anchored regexp has such a
|
| + // loop prepended to it in order to find where it starts. We look for
|
| + // a pattern of the form ...abc... where we can look 6 characters ahead
|
| + // and step forwards 3 if the character is not one of abc. Abc need
|
| + // not be atoms, they can be any reasonably limited character class or
|
| + // small alternation.
|
| + BoyerMooreLookahead* bm = bm_info(false);
|
| + if (bm == NULL) {
|
| + eats_at_least = Min(kMaxLookaheadForBoyerMoore,
|
| + EatsAtLeast(kMaxLookaheadForBoyerMoore,
|
| + kRecursionBudget,
|
| + false));
|
| + if (eats_at_least >= 1) {
|
| + bm = new(zone()) BoyerMooreLookahead(eats_at_least,
|
| + compiler,
|
| + zone());
|
| + GuardedAlternative alt0 = alternatives_->at(0);
|
| + alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, false);
|
| + }
|
| + }
|
| + if (bm != NULL) {
|
| + bm->EmitSkipInstructions(macro_assembler);
|
| }
|
| - int preload_characters = CalculatePreloadCharacters(compiler, eats_at_least);
|
| + return eats_at_least;
|
| +}
|
|
|
| - bool preload_is_current = !skip_was_emitted &&
|
| - (current_trace->characters_preloaded() == preload_characters);
|
| - bool preload_has_checked_bounds = preload_is_current;
|
|
|
| - AlternativeGenerationList alt_gens(choice_count, zone());
|
| +void ChoiceNode::EmitChoices(RegExpCompiler* compiler,
|
| + AlternativeGenerationList* alt_gens,
|
| + int first_choice,
|
| + Trace* trace,
|
| + bool is_greedy_loop,
|
| + PreloadState* preload) {
|
| + RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
|
| + SetUpPreLoad(compiler, trace, preload);
|
|
|
| // For now we just call all choices one after the other. The idea ultimately
|
| // is to use the Dispatch table to try only the relevant ones.
|
| - for (int i = first_normal_choice; i < choice_count; i++) {
|
| + int choice_count = alternatives_->length();
|
| +
|
| + int new_flush_budget = trace->flush_budget() / choice_count;
|
| +
|
| + for (int i = first_choice; i < choice_count; i++) {
|
| + bool is_last = i == choice_count - 1;
|
| GuardedAlternative alternative = alternatives_->at(i);
|
| - AlternativeGeneration* alt_gen = alt_gens.at(i);
|
| - alt_gen->quick_check_details.set_characters(preload_characters);
|
| + AlternativeGeneration* alt_gen = alt_gens->at(i);
|
| + alt_gen->quick_check_details.set_characters(preload->preload_characters_);
|
| ZoneList<Guard*>* guards = alternative.guards();
|
| int guard_count = (guards == NULL) ? 0 : guards->length();
|
| - Trace new_trace(*current_trace);
|
| - new_trace.set_characters_preloaded(preload_is_current ?
|
| - preload_characters :
|
| + Trace new_trace(*trace);
|
| + new_trace.set_characters_preloaded(preload->preload_is_current_ ?
|
| + preload->preload_characters_ :
|
| 0);
|
| - if (preload_has_checked_bounds) {
|
| - new_trace.set_bound_checked_up_to(preload_characters);
|
| + if (preload->preload_has_checked_bounds_) {
|
| + new_trace.set_bound_checked_up_to(preload->preload_characters_);
|
| }
|
| new_trace.quick_check_performed()->Clear();
|
| if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE);
|
| - alt_gen->expects_preload = preload_is_current;
|
| + alt_gen->expects_preload = preload->preload_is_current_;
|
| bool generate_full_check_inline = false;
|
| if (FLAG_regexp_optimization &&
|
| - try_to_emit_quick_check_for_alternative(i) &&
|
| + try_to_emit_quick_check_for_alternative(i == 0) &&
|
| alternative.node()->EmitQuickCheck(compiler,
|
| &new_trace,
|
| - preload_has_checked_bounds,
|
| + preload->preload_has_checked_bounds_,
|
| &alt_gen->possible_success,
|
| &alt_gen->quick_check_details,
|
| - i < choice_count - 1)) {
|
| + !is_last)) {
|
| // Quick check was generated for this choice.
|
| - preload_is_current = true;
|
| - preload_has_checked_bounds = true;
|
| + preload->preload_is_current_ = true;
|
| + preload->preload_has_checked_bounds_ = true;
|
| // On the last choice in the ChoiceNode we generated the quick
|
| // check to fall through on possible success. So now we need to
|
| // generate the full check inline.
|
| - if (i == choice_count - 1) {
|
| + if (is_last) {
|
| macro_assembler->Bind(&alt_gen->possible_success);
|
| new_trace.set_quick_check_performed(&alt_gen->quick_check_details);
|
| - new_trace.set_characters_preloaded(preload_characters);
|
| - new_trace.set_bound_checked_up_to(preload_characters);
|
| + new_trace.set_characters_preloaded(preload->preload_characters_);
|
| + new_trace.set_bound_checked_up_to(preload->preload_characters_);
|
| generate_full_check_inline = true;
|
| }
|
| } else if (alt_gen->quick_check_details.cannot_match()) {
|
| - if (i == choice_count - 1 && !greedy_loop) {
|
| + if (is_last && !is_greedy_loop) {
|
| macro_assembler->GoTo(trace->backtrack());
|
| - }
|
| + } // Else just fall through to the next test.
|
| continue;
|
| } else {
|
| // No quick check was generated. Put the full code here.
|
| @@ -4075,11 +4179,11 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
|
| // previous cases that go here when they fail. There's no reason to
|
| // insist that they preload characters since the slow check we are about
|
| // to generate probably can't use it.
|
| - if (i != first_normal_choice) {
|
| + if (i != first_choice) {
|
| alt_gen->expects_preload = false;
|
| new_trace.InvalidateCurrentCharacter();
|
| }
|
| - if (i < choice_count - 1) {
|
| + if (!is_last) {
|
| new_trace.set_backtrack(&alt_gen->after);
|
| }
|
| generate_full_check_inline = true;
|
| @@ -4092,38 +4196,10 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
|
| GenerateGuard(macro_assembler, guards->at(j), &new_trace);
|
| }
|
| alternative.node()->Emit(compiler, &new_trace);
|
| - preload_is_current = false;
|
| + preload->preload_is_current_ = false;
|
| }
|
| macro_assembler->Bind(&alt_gen->after);
|
| }
|
| - if (greedy_loop) {
|
| - macro_assembler->Bind(&greedy_loop_label);
|
| - // If we have unwound to the bottom then backtrack.
|
| - macro_assembler->CheckGreedyLoop(trace->backtrack());
|
| - // Otherwise try the second priority at an earlier position.
|
| - macro_assembler->AdvanceCurrentPosition(-text_length);
|
| - macro_assembler->GoTo(&second_choice);
|
| - }
|
| -
|
| - // At this point we need to generate slow checks for the alternatives where
|
| - // the quick check was inlined. We can recognize these because the associated
|
| - // label was bound.
|
| - for (int i = first_normal_choice; i < choice_count - 1; i++) {
|
| - AlternativeGeneration* alt_gen = alt_gens.at(i);
|
| - Trace new_trace(*current_trace);
|
| - // If there are actions to be flushed we have to limit how many times
|
| - // they are flushed. Take the budget of the parent trace and distribute
|
| - // it fairly amongst the children.
|
| - if (new_trace.actions() != NULL) {
|
| - new_trace.set_flush_budget(new_flush_budget);
|
| - }
|
| - EmitOutOfLineContinuation(compiler,
|
| - &new_trace,
|
| - alternatives_->at(i),
|
| - alt_gen,
|
| - preload_characters,
|
| - alt_gens.at(i + 1)->expects_preload);
|
| - }
|
| }
|
|
|
|
|
|
|