Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(140)

Unified Diff: src/jsregexp.cc

Issue 537913002: Regexp: Refactor ChoiceNode::Emit (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Nixed 'not' nit Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/jsregexp.h ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/jsregexp.cc
diff --git a/src/jsregexp.cc b/src/jsregexp.cc
index 7891cd8744bbba2fe168f2b78f739632e5248f24..8a94e819e2c1cbbc04e79e904b80b2e6fef4acdf 100644
--- a/src/jsregexp.cc
+++ b/src/jsregexp.cc
@@ -3493,6 +3493,7 @@ void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) {
void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
if (trace->stop_node() == this) {
+ // Back edge of greedy optimized loop node graph.
int text_length =
GreedyLoopTextLengthForAlternative(&(alternatives_->at(0)));
DCHECK(text_length != kNodeIsTooComplexForGreedyLoops);
@@ -3759,13 +3760,13 @@ int BoyerMooreLookahead::GetSkipTable(int min_lookahead,
// See comment above on the implementation of GetSkipTable.
-bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
+void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
const int kSize = RegExpMacroAssembler::kTableSize;
int min_lookahead = 0;
int max_lookahead = 0;
- if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return false;
+ if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return;
bool found_single_character = false;
int single_character = 0;
@@ -3789,7 +3790,7 @@ bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
if (found_single_character && lookahead_width == 1 && max_lookahead < 3) {
// The mask-compare can probably handle this better.
- return false;
+ return;
}
if (found_single_character) {
@@ -3806,7 +3807,7 @@ bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
masm->AdvanceCurrentPosition(lookahead_width);
masm->GoTo(&again);
masm->Bind(&cont);
- return true;
+ return;
}
Factory* factory = masm->zone()->isolate()->factory();
@@ -3822,8 +3823,6 @@ bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
masm->AdvanceCurrentPosition(skip_distance);
masm->GoTo(&again);
masm->Bind(&cont);
-
- return true;
}
@@ -3905,10 +3904,15 @@ bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
* \______________/
*/
-void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
- RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
- int choice_count = alternatives_->length();
+GreedyLoopState::GreedyLoopState(bool not_at_start) {
+ counter_backtrack_trace_.set_backtrack(&label_);
+ if (not_at_start) counter_backtrack_trace_.set_at_start(false);
+}
+
+
+void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) {
#ifdef DEBUG
+ int choice_count = alternatives_->length();
for (int i = 0; i < choice_count - 1; i++) {
GuardedAlternative alternative = alternatives_->at(i);
ZoneList<Guard*>* guards = alternative.guards();
@@ -3918,12 +3922,39 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
}
}
#endif
+}
+
+
+void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler,
+ Trace* current_trace,
+ PreloadState* state) {
+ if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) {
+ // Save some time by looking at most one machine word ahead.
+ state->eats_at_least_ =
+ EatsAtLeast(compiler->ascii() ? 4 : 2,
+ kRecursionBudget,
+ current_trace->at_start() == Trace::FALSE_VALUE);
+ }
+ state->preload_characters_ =
+ CalculatePreloadCharacters(compiler, state->eats_at_least_);
+
+ state->preload_is_current_ =
+ (current_trace->characters_preloaded() == state->preload_characters_);
+ state->preload_has_checked_bounds_ = state->preload_is_current_;
+}
+
+
+void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
+ int choice_count = alternatives_->length();
+
+ AssertGuardsMentionRegisters(trace);
LimitResult limit_result = LimitVersions(compiler, trace);
if (limit_result == DONE) return;
DCHECK(limit_result == CONTINUE);
- int new_flush_budget = trace->flush_budget() / choice_count;
+ // For loop nodes we already flushed (see LoopChoiceNode::Emit), but for
+ // other choice nodes we only flush if we are out of code size budget.
if (trace->flush_budget() == 0 && trace->actions() != NULL) {
trace->Flush(compiler, this);
return;
@@ -3931,143 +3962,216 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
RecursionCheck rc(compiler);
- Trace* current_trace = trace;
+ PreloadState preload;
+ preload.init();
+ GreedyLoopState greedy_loop_state(not_at_start());
- int text_length = GreedyLoopTextLengthForAlternative(&(alternatives_->at(0)));
- bool greedy_loop = false;
- Label greedy_loop_label;
- Trace counter_backtrack_trace;
- counter_backtrack_trace.set_backtrack(&greedy_loop_label);
- if (not_at_start()) counter_backtrack_trace.set_at_start(false);
+ int text_length = GreedyLoopTextLengthForAlternative(&alternatives_->at(0));
+ AlternativeGenerationList alt_gens(choice_count, zone());
if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
- // Here we have special handling for greedy loops containing only text nodes
- // and other simple nodes. These are handled by pushing the current
- // position on the stack and then incrementing the current position each
- // time around the switch. On backtrack we decrement the current position
- // and check it against the pushed value. This avoids pushing backtrack
- // information for each iteration of the loop, which could take up a lot of
- // space.
- greedy_loop = true;
- DCHECK(trace->stop_node() == NULL);
- macro_assembler->PushCurrentPosition();
- current_trace = &counter_backtrack_trace;
- Label greedy_match_failed;
- Trace greedy_match_trace;
- if (not_at_start()) greedy_match_trace.set_at_start(false);
- greedy_match_trace.set_backtrack(&greedy_match_failed);
- Label loop_label;
- macro_assembler->Bind(&loop_label);
- greedy_match_trace.set_stop_node(this);
- greedy_match_trace.set_loop_label(&loop_label);
- alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);
- macro_assembler->Bind(&greedy_match_failed);
+ trace = EmitGreedyLoop(compiler,
+ trace,
+ &alt_gens,
+ &preload,
+ &greedy_loop_state,
+ text_length);
+ } else {
+ // TODO(erikcorry): Delete this. We don't need this label, but it makes us
+ // match the traces produced pre-cleanup.
+ Label second_choice;
+ compiler->macro_assembler()->Bind(&second_choice);
+
+ preload.eats_at_least_ = EmitOptimizedUnanchoredSearch(compiler, trace);
+
+ EmitChoices(compiler,
+ &alt_gens,
+ 0,
+ trace,
+ false, // Not greedy loop.
+ &preload);
+ }
+
+ // At this point we need to generate slow checks for the alternatives where
+ // the quick check was inlined. We can recognize these because the associated
+ // label was bound.
+ int new_flush_budget = trace->flush_budget() / choice_count;
+ for (int i = 0; i < choice_count - 1; i++) {
+ AlternativeGeneration* alt_gen = alt_gens.at(i);
+ Trace new_trace(*trace);
+ // If there are actions to be flushed we have to limit how many times
+ // they are flushed. Take the budget of the parent trace and distribute
+ // it fairly amongst the children.
+ if (new_trace.actions() != NULL) {
+ new_trace.set_flush_budget(new_flush_budget);
+ }
+ EmitOutOfLineContinuation(compiler,
+ &new_trace,
+ alternatives_->at(i),
+ alt_gen,
+ preload.preload_characters_,
+ alt_gens.at(i + 1)->expects_preload);
}
+}
+
+
+Trace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler,
+ Trace* trace,
+ AlternativeGenerationList* alt_gens,
+ PreloadState* preload,
+ GreedyLoopState* greedy_loop_state,
+ int text_length) {
+ RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
+ // Here we have special handling for greedy loops containing only text nodes
+ // and other simple nodes. These are handled by pushing the current
+ // position on the stack and then incrementing the current position each
+ // time around the switch. On backtrack we decrement the current position
+ // and check it against the pushed value. This avoids pushing backtrack
+ // information for each iteration of the loop, which could take up a lot of
+ // space.
+ DCHECK(trace->stop_node() == NULL);
+ macro_assembler->PushCurrentPosition();
+ Label greedy_match_failed;
+ Trace greedy_match_trace;
+ if (not_at_start()) greedy_match_trace.set_at_start(false);
+ greedy_match_trace.set_backtrack(&greedy_match_failed);
+ Label loop_label;
+ macro_assembler->Bind(&loop_label);
+ greedy_match_trace.set_stop_node(this);
+ greedy_match_trace.set_loop_label(&loop_label);
+ alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);
+ macro_assembler->Bind(&greedy_match_failed);
Label second_choice; // For use in greedy matches.
macro_assembler->Bind(&second_choice);
- int first_normal_choice = greedy_loop ? 1 : 0;
-
- bool not_at_start = current_trace->at_start() == Trace::FALSE_VALUE;
- const int kEatsAtLeastNotYetInitialized = -1;
- int eats_at_least = kEatsAtLeastNotYetInitialized;
-
- bool skip_was_emitted = false;
-
- if (!greedy_loop && choice_count == 2) {
- GuardedAlternative alt1 = alternatives_->at(1);
- if (alt1.guards() == NULL || alt1.guards()->length() == 0) {
- RegExpNode* eats_anything_node = alt1.node();
- if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) ==
- this) {
- // At this point we know that we are at a non-greedy loop that will eat
- // any character one at a time. Any non-anchored regexp has such a
- // loop prepended to it in order to find where it starts. We look for
- // a pattern of the form ...abc... where we can look 6 characters ahead
- // and step forwards 3 if the character is not one of abc. Abc need
- // not be atoms, they can be any reasonably limited character class or
- // small alternation.
- DCHECK(trace->is_trivial()); // This is the case on LoopChoiceNodes.
- BoyerMooreLookahead* lookahead = bm_info(not_at_start);
- if (lookahead == NULL) {
- eats_at_least = Min(kMaxLookaheadForBoyerMoore,
- EatsAtLeast(kMaxLookaheadForBoyerMoore,
- kRecursionBudget,
- not_at_start));
- if (eats_at_least >= 1) {
- BoyerMooreLookahead* bm =
- new(zone()) BoyerMooreLookahead(eats_at_least,
- compiler,
- zone());
- GuardedAlternative alt0 = alternatives_->at(0);
- alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, not_at_start);
- skip_was_emitted = bm->EmitSkipInstructions(macro_assembler);
- }
- } else {
- skip_was_emitted = lookahead->EmitSkipInstructions(macro_assembler);
- }
- }
- }
+ Trace* new_trace = greedy_loop_state->counter_backtrack_trace();
+
+ EmitChoices(compiler,
+ alt_gens,
+ 1,
+ new_trace,
+ true, // Is greedy loop.
+ preload);
+
+ macro_assembler->Bind(greedy_loop_state->label());
+ // If we have unwound to the bottom then backtrack.
+ macro_assembler->CheckGreedyLoop(trace->backtrack());
+ // Otherwise try the second priority at an earlier position.
+ macro_assembler->AdvanceCurrentPosition(-text_length);
+ macro_assembler->GoTo(&second_choice);
+ return new_trace;
+}
+
+int ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler,
+ Trace* trace) {
+ int eats_at_least = PreloadState::kEatsAtLeastNotYetInitialized;
+ if (alternatives_->length() != 2) return eats_at_least;
+
+ GuardedAlternative alt1 = alternatives_->at(1);
+ if (alt1.guards() != NULL && alt1.guards()->length() != 0) {
+ return eats_at_least;
}
+ RegExpNode* eats_anything_node = alt1.node();
+ if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) != this) {
+ return eats_at_least;
+ }
+
+ // Really we should be creating a new trace when we execute this function,
+ // but there is no need, because the code it generates cannot backtrack, and
+ // we always arrive here with a trivial trace (since it's the entry to a
+ // loop. That also implies that there are no preloaded characters, which is
+ // good, because it means we won't be violating any assumptions by
+ // overwriting those characters with new load instructions.
+ DCHECK(trace->is_trivial());
- if (eats_at_least == kEatsAtLeastNotYetInitialized) {
- // Save some time by looking at most one machine word ahead.
- eats_at_least =
- EatsAtLeast(compiler->ascii() ? 4 : 2, kRecursionBudget, not_at_start);
+ RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
+ // At this point we know that we are at a non-greedy loop that will eat
+ // any character one at a time. Any non-anchored regexp has such a
+ // loop prepended to it in order to find where it starts. We look for
+ // a pattern of the form ...abc... where we can look 6 characters ahead
+ // and step forwards 3 if the character is not one of abc. Abc need
+ // not be atoms, they can be any reasonably limited character class or
+ // small alternation.
+ BoyerMooreLookahead* bm = bm_info(false);
+ if (bm == NULL) {
+ eats_at_least = Min(kMaxLookaheadForBoyerMoore,
+ EatsAtLeast(kMaxLookaheadForBoyerMoore,
+ kRecursionBudget,
+ false));
+ if (eats_at_least >= 1) {
+ bm = new(zone()) BoyerMooreLookahead(eats_at_least,
+ compiler,
+ zone());
+ GuardedAlternative alt0 = alternatives_->at(0);
+ alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, false);
+ }
+ }
+ if (bm != NULL) {
+ bm->EmitSkipInstructions(macro_assembler);
}
- int preload_characters = CalculatePreloadCharacters(compiler, eats_at_least);
+ return eats_at_least;
+}
- bool preload_is_current = !skip_was_emitted &&
- (current_trace->characters_preloaded() == preload_characters);
- bool preload_has_checked_bounds = preload_is_current;
- AlternativeGenerationList alt_gens(choice_count, zone());
+void ChoiceNode::EmitChoices(RegExpCompiler* compiler,
+ AlternativeGenerationList* alt_gens,
+ int first_choice,
+ Trace* trace,
+ bool is_greedy_loop,
+ PreloadState* preload) {
+ RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
+ SetUpPreLoad(compiler, trace, preload);
// For now we just call all choices one after the other. The idea ultimately
// is to use the Dispatch table to try only the relevant ones.
- for (int i = first_normal_choice; i < choice_count; i++) {
+ int choice_count = alternatives_->length();
+
+ int new_flush_budget = trace->flush_budget() / choice_count;
+
+ for (int i = first_choice; i < choice_count; i++) {
+ bool is_last = i == choice_count - 1;
GuardedAlternative alternative = alternatives_->at(i);
- AlternativeGeneration* alt_gen = alt_gens.at(i);
- alt_gen->quick_check_details.set_characters(preload_characters);
+ AlternativeGeneration* alt_gen = alt_gens->at(i);
+ alt_gen->quick_check_details.set_characters(preload->preload_characters_);
ZoneList<Guard*>* guards = alternative.guards();
int guard_count = (guards == NULL) ? 0 : guards->length();
- Trace new_trace(*current_trace);
- new_trace.set_characters_preloaded(preload_is_current ?
- preload_characters :
+ Trace new_trace(*trace);
+ new_trace.set_characters_preloaded(preload->preload_is_current_ ?
+ preload->preload_characters_ :
0);
- if (preload_has_checked_bounds) {
- new_trace.set_bound_checked_up_to(preload_characters);
+ if (preload->preload_has_checked_bounds_) {
+ new_trace.set_bound_checked_up_to(preload->preload_characters_);
}
new_trace.quick_check_performed()->Clear();
if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE);
- alt_gen->expects_preload = preload_is_current;
+ alt_gen->expects_preload = preload->preload_is_current_;
bool generate_full_check_inline = false;
if (FLAG_regexp_optimization &&
- try_to_emit_quick_check_for_alternative(i) &&
+ try_to_emit_quick_check_for_alternative(i == 0) &&
alternative.node()->EmitQuickCheck(compiler,
&new_trace,
- preload_has_checked_bounds,
+ preload->preload_has_checked_bounds_,
&alt_gen->possible_success,
&alt_gen->quick_check_details,
- i < choice_count - 1)) {
+ !is_last)) {
// Quick check was generated for this choice.
- preload_is_current = true;
- preload_has_checked_bounds = true;
+ preload->preload_is_current_ = true;
+ preload->preload_has_checked_bounds_ = true;
// On the last choice in the ChoiceNode we generated the quick
// check to fall through on possible success. So now we need to
// generate the full check inline.
- if (i == choice_count - 1) {
+ if (is_last) {
macro_assembler->Bind(&alt_gen->possible_success);
new_trace.set_quick_check_performed(&alt_gen->quick_check_details);
- new_trace.set_characters_preloaded(preload_characters);
- new_trace.set_bound_checked_up_to(preload_characters);
+ new_trace.set_characters_preloaded(preload->preload_characters_);
+ new_trace.set_bound_checked_up_to(preload->preload_characters_);
generate_full_check_inline = true;
}
} else if (alt_gen->quick_check_details.cannot_match()) {
- if (i == choice_count - 1 && !greedy_loop) {
+ if (is_last && !is_greedy_loop) {
macro_assembler->GoTo(trace->backtrack());
- }
+ } // Else just fall through to the next test.
continue;
} else {
// No quick check was generated. Put the full code here.
@@ -4075,11 +4179,11 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
// previous cases that go here when they fail. There's no reason to
// insist that they preload characters since the slow check we are about
// to generate probably can't use it.
- if (i != first_normal_choice) {
+ if (i != first_choice) {
alt_gen->expects_preload = false;
new_trace.InvalidateCurrentCharacter();
}
- if (i < choice_count - 1) {
+ if (!is_last) {
new_trace.set_backtrack(&alt_gen->after);
}
generate_full_check_inline = true;
@@ -4092,38 +4196,10 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
GenerateGuard(macro_assembler, guards->at(j), &new_trace);
}
alternative.node()->Emit(compiler, &new_trace);
- preload_is_current = false;
+ preload->preload_is_current_ = false;
}
macro_assembler->Bind(&alt_gen->after);
}
- if (greedy_loop) {
- macro_assembler->Bind(&greedy_loop_label);
- // If we have unwound to the bottom then backtrack.
- macro_assembler->CheckGreedyLoop(trace->backtrack());
- // Otherwise try the second priority at an earlier position.
- macro_assembler->AdvanceCurrentPosition(-text_length);
- macro_assembler->GoTo(&second_choice);
- }
-
- // At this point we need to generate slow checks for the alternatives where
- // the quick check was inlined. We can recognize these because the associated
- // label was bound.
- for (int i = first_normal_choice; i < choice_count - 1; i++) {
- AlternativeGeneration* alt_gen = alt_gens.at(i);
- Trace new_trace(*current_trace);
- // If there are actions to be flushed we have to limit how many times
- // they are flushed. Take the budget of the parent trace and distribute
- // it fairly amongst the children.
- if (new_trace.actions() != NULL) {
- new_trace.set_flush_budget(new_flush_budget);
- }
- EmitOutOfLineContinuation(compiler,
- &new_trace,
- alternatives_->at(i),
- alt_gen,
- preload_characters,
- alt_gens.at(i + 1)->expects_preload);
- }
}
« no previous file with comments | « src/jsregexp.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698