Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(433)

Unified Diff: src/jsregexp.cc

Issue 19539: Irregexp: Added derived knowledge of whether we are at the start of input. (Closed)
Patch Set: Fixed lint issues. Created 11 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/jsregexp.h ('k') | src/parser.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/jsregexp.cc
diff --git a/src/jsregexp.cc b/src/jsregexp.cc
index d15a3b60a7cda29c51becc233bd43912b8e86149..d7149cabfc54820627925c56b476b9b3766f56a4 100644
--- a/src/jsregexp.cc
+++ b/src/jsregexp.cc
@@ -1269,7 +1269,7 @@ Handle<FixedArray> RegExpCompiler::Assemble(
List <RegExpNode*> work_list(0);
work_list_ = &work_list;
Label fail;
- macro_assembler->PushBacktrack(&fail);
+ macro_assembler_->PushBacktrack(&fail);
Trace new_trace;
start->Emit(this, &new_trace);
macro_assembler_->Bind(&fail);
@@ -2068,11 +2068,12 @@ int NegativeLookaheadChoiceNode:: EatsAtLeast(int still_to_find,
void NegativeLookaheadChoiceNode::GetQuickCheckDetails(
QuickCheckDetails* details,
RegExpCompiler* compiler,
- int filled_in) {
+ int filled_in,
+ bool not_at_start) {
// Alternative 0 is the negative lookahead, alternative 1 is what comes
// afterwards.
RegExpNode* node = alternatives_->at(1).node();
- return node->GetQuickCheckDetails(details, compiler, filled_in);
+ return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start);
}
@@ -2145,7 +2146,8 @@ bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
QuickCheckDetails* details,
bool fall_through_on_failure) {
if (details->characters() == 0) return false;
- GetQuickCheckDetails(details, compiler, 0);
+ GetQuickCheckDetails(details, compiler, 0, trace->at_start() == Trace::FALSE);
+ if (details->cannot_match()) return false;
if (!details->Rationalize(compiler->ascii())) return false;
uint32_t mask = details->mask();
uint32_t value = details->value();
@@ -2210,7 +2212,8 @@ bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
// generating a quick check.
void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler,
- int characters_filled_in) {
+ int characters_filled_in,
+ bool not_at_start) {
ASSERT(characters_filled_in < details->characters());
int characters = details->characters();
int char_mask;
@@ -2322,7 +2325,10 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
}
}
ASSERT(characters_filled_in != details->characters());
- on_success()-> GetQuickCheckDetails(details, compiler, characters_filled_in);
+ on_success()-> GetQuickCheckDetails(details,
+ compiler,
+ characters_filled_in,
+ true);
}
@@ -2359,6 +2365,13 @@ void QuickCheckDetails::Advance(int by, bool ascii) {
void QuickCheckDetails::Merge(QuickCheckDetails* other, int from_index) {
ASSERT(characters_ == other->characters_);
+ if (other->cannot_match_) {
+ return;
+ }
+ if (cannot_match_) {
+ *this = *other;
+ return;
+ }
for (int i = from_index; i < characters_; i++) {
QuickCheckDetails::Position* pos = positions(i);
QuickCheckDetails::Position* other_pos = other->positions(i);
@@ -2395,27 +2408,34 @@ class VisitMarker {
void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler,
- int characters_filled_in) {
+ int characters_filled_in,
+ bool not_at_start) {
if (body_can_be_zero_length_ || info()->visited) return;
VisitMarker marker(info());
return ChoiceNode::GetQuickCheckDetails(details,
compiler,
- characters_filled_in);
+ characters_filled_in,
+ not_at_start);
}
void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler,
- int characters_filled_in) {
+ int characters_filled_in,
+ bool not_at_start) {
+ not_at_start = not_at_start || not_at_start_;
int choice_count = alternatives_->length();
ASSERT(choice_count > 0);
alternatives_->at(0).node()->GetQuickCheckDetails(details,
compiler,
- characters_filled_in);
+ characters_filled_in,
+ not_at_start);
for (int i = 1; i < choice_count; i++) {
QuickCheckDetails new_details(details->characters());
RegExpNode* node = alternatives_->at(i).node();
- node->GetQuickCheckDetails(&new_details, compiler, characters_filled_in);
+ node->GetQuickCheckDetails(&new_details, compiler,
+ characters_filled_in,
+ not_at_start);
// Here we merge the quick match details of the two branches.
details->Merge(&new_details, characters_filled_in);
}
@@ -2541,6 +2561,21 @@ static void EmitBoundaryCheck(AssertionNode::AssertionNodeType type,
}
+void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details,
+ RegExpCompiler* compiler,
+ int filled_in,
+ bool not_at_start) {
+ if (type_ == AT_START && not_at_start) {
+ details->set_cannot_match();
+ return;
+ }
+ return on_success()->GetQuickCheckDetails(details,
+ compiler,
+ filled_in,
+ not_at_start);
+}
+
+
void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
RegExpMacroAssembler* assembler = compiler->macro_assembler();
switch (type_) {
@@ -2551,9 +2586,20 @@ void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
assembler->Bind(&ok);
break;
}
- case AT_START:
- assembler->CheckNotAtStart(trace->backtrack());
- break;
+ case AT_START: {
+ if (trace->at_start() == Trace::FALSE) {
+ assembler->GoTo(trace->backtrack());
+ return;
+ }
+ if (trace->at_start() == Trace::UNKNOWN) {
+ assembler->CheckNotAtStart(trace->backtrack());
+ Trace at_start_trace = *trace;
+ at_start_trace.set_at_start(true);
+ on_success()->Emit(compiler, &at_start_trace);
+ return;
+ }
+ }
+ break;
case AFTER_NEWLINE:
EmitHat(compiler, on_success(), trace);
return;
@@ -2772,6 +2818,7 @@ void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
&bound_checked_to);
Trace successor_trace(*trace);
+ successor_trace.set_at_start(false);
successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler);
RecursionCheck rc(compiler);
on_success()->Emit(compiler, &successor_trace);
@@ -3065,6 +3112,8 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
Label greedy_loop_label;
Trace counter_backtrack_trace;
counter_backtrack_trace.set_backtrack(&greedy_loop_label);
+ if (not_at_start()) counter_backtrack_trace.set_at_start(false);
+
if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
// Here we have special handling for greedy loops containing only text nodes
// and other simple nodes. These are handled by pushing the current
@@ -3079,6 +3128,7 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
current_trace = &counter_backtrack_trace;
Label greedy_match_failed;
Trace greedy_match_trace;
+ if (not_at_start()) greedy_match_trace.set_at_start(false);
greedy_match_trace.set_backtrack(&greedy_match_failed);
Label loop_label;
macro_assembler->Bind(&loop_label);
@@ -3139,6 +3189,11 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
new_trace.set_bound_checked_up_to(preload_characters);
generate_full_check_inline = true;
}
+ } else if (alt_gen->quick_check_details.cannot_match()) {
+ if (i == choice_count - 1 && !greedy_loop) {
+ macro_assembler->GoTo(trace->backtrack());
+ }
+ continue;
} else {
// No quick check was generated. Put the full code here.
// If this is not the first choice then there could be slow checks from
@@ -3868,7 +3923,8 @@ RegExpNode* RegExpQuantifier::ToNode(int min,
bool is_greedy,
RegExpTree* body,
RegExpCompiler* compiler,
- RegExpNode* on_success) {
+ RegExpNode* on_success,
+ bool not_at_start) {
// x{f, t} becomes this:
//
// (r++)<-.
@@ -3907,8 +3963,8 @@ RegExpNode* RegExpQuantifier::ToNode(int min,
if (min > 0 && min <= kMaxUnrolledMinMatches) {
int new_max = (max == kInfinity) ? max : max - min;
// Recurse once to get the loop or optional matches after the fixed ones.
- RegExpNode* answer =
- ToNode(0, new_max, is_greedy, body, compiler, on_success);
+ RegExpNode* answer = ToNode(
+ 0, new_max, is_greedy, body, compiler, on_success, true);
// Unroll the forced matches from 0 to min. This can cause chains of
// TextNodes (which the parser does not generate). These should be
// combined if it turns out they hinder good code generation.
@@ -3933,6 +3989,7 @@ RegExpNode* RegExpQuantifier::ToNode(int min,
answer)));
}
answer = alternation;
+ if (not_at_start) alternation->set_not_at_start();
}
return answer;
}
@@ -3944,6 +4001,7 @@ RegExpNode* RegExpQuantifier::ToNode(int min,
? compiler->AllocateRegister()
: RegExpCompiler::kNoRegister;
LoopChoiceNode* center = new LoopChoiceNode(body->min_match() == 0);
+ if (not_at_start) center->set_not_at_start();
RegExpNode* loop_return = needs_counter
? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center))
: static_cast<RegExpNode*>(center);
@@ -4764,12 +4822,25 @@ Handle<FixedArray> RegExpEngine::Compile(RegExpCompileData* data,
if (!data->tree->IsAnchored()) {
// Add a .*? at the beginning, outside the body capture, unless
// this expression is anchored at the beginning.
- node = RegExpQuantifier::ToNode(0,
- RegExpTree::kInfinity,
- false,
- new RegExpCharacterClass('*'),
- &compiler,
- captured_body);
+ RegExpNode* loop_node =
+ RegExpQuantifier::ToNode(0,
+ RegExpTree::kInfinity,
+ false,
+ new RegExpCharacterClass('*'),
+ &compiler,
+ captured_body,
+ data->contains_anchor);
+
+ if (data->contains_anchor) {
+ // Unroll once, and tell the loop that it's not at the start of the input.
+ ChoiceNode* first_step_node = new ChoiceNode(2);
+ first_step_node->AddAlternative(GuardedAlternative(captured_body));
+ first_step_node->AddAlternative(GuardedAlternative(
+ new TextNode(new RegExpCharacterClass('*'), loop_node)));
+ node = first_step_node;
+ } else {
+ node = loop_node;
+ }
}
data->node = node;
Analysis analysis(ignore_case);
« no previous file with comments | « src/jsregexp.h ('k') | src/parser.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698