| Index: src/jsregexp.cc
|
| diff --git a/src/jsregexp.cc b/src/jsregexp.cc
|
| index d15a3b60a7cda29c51becc233bd43912b8e86149..d7149cabfc54820627925c56b476b9b3766f56a4 100644
|
| --- a/src/jsregexp.cc
|
| +++ b/src/jsregexp.cc
|
| @@ -1269,7 +1269,7 @@ Handle<FixedArray> RegExpCompiler::Assemble(
|
| List <RegExpNode*> work_list(0);
|
| work_list_ = &work_list;
|
| Label fail;
|
| - macro_assembler->PushBacktrack(&fail);
|
| + macro_assembler_->PushBacktrack(&fail);
|
| Trace new_trace;
|
| start->Emit(this, &new_trace);
|
| macro_assembler_->Bind(&fail);
|
| @@ -2068,11 +2068,12 @@ int NegativeLookaheadChoiceNode:: EatsAtLeast(int still_to_find,
|
| void NegativeLookaheadChoiceNode::GetQuickCheckDetails(
|
| QuickCheckDetails* details,
|
| RegExpCompiler* compiler,
|
| - int filled_in) {
|
| + int filled_in,
|
| + bool not_at_start) {
|
| // Alternative 0 is the negative lookahead, alternative 1 is what comes
|
| // afterwards.
|
| RegExpNode* node = alternatives_->at(1).node();
|
| - return node->GetQuickCheckDetails(details, compiler, filled_in);
|
| + return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start);
|
| }
|
|
|
|
|
| @@ -2145,7 +2146,8 @@ bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
|
| QuickCheckDetails* details,
|
| bool fall_through_on_failure) {
|
| if (details->characters() == 0) return false;
|
| - GetQuickCheckDetails(details, compiler, 0);
|
| + GetQuickCheckDetails(details, compiler, 0, trace->at_start() == Trace::FALSE);
|
| + if (details->cannot_match()) return false;
|
| if (!details->Rationalize(compiler->ascii())) return false;
|
| uint32_t mask = details->mask();
|
| uint32_t value = details->value();
|
| @@ -2210,7 +2212,8 @@ bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
|
| // generating a quick check.
|
| void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
|
| RegExpCompiler* compiler,
|
| - int characters_filled_in) {
|
| + int characters_filled_in,
|
| + bool not_at_start) {
|
| ASSERT(characters_filled_in < details->characters());
|
| int characters = details->characters();
|
| int char_mask;
|
| @@ -2322,7 +2325,10 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
|
| }
|
| }
|
| ASSERT(characters_filled_in != details->characters());
|
| - on_success()-> GetQuickCheckDetails(details, compiler, characters_filled_in);
|
| + on_success()-> GetQuickCheckDetails(details,
|
| + compiler,
|
| + characters_filled_in,
|
| + true);
|
| }
|
|
|
|
|
| @@ -2359,6 +2365,13 @@ void QuickCheckDetails::Advance(int by, bool ascii) {
|
|
|
| void QuickCheckDetails::Merge(QuickCheckDetails* other, int from_index) {
|
| ASSERT(characters_ == other->characters_);
|
| + if (other->cannot_match_) {
|
| + return;
|
| + }
|
| + if (cannot_match_) {
|
| + *this = *other;
|
| + return;
|
| + }
|
| for (int i = from_index; i < characters_; i++) {
|
| QuickCheckDetails::Position* pos = positions(i);
|
| QuickCheckDetails::Position* other_pos = other->positions(i);
|
| @@ -2395,27 +2408,34 @@ class VisitMarker {
|
|
|
| void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
|
| RegExpCompiler* compiler,
|
| - int characters_filled_in) {
|
| + int characters_filled_in,
|
| + bool not_at_start) {
|
| if (body_can_be_zero_length_ || info()->visited) return;
|
| VisitMarker marker(info());
|
| return ChoiceNode::GetQuickCheckDetails(details,
|
| compiler,
|
| - characters_filled_in);
|
| + characters_filled_in,
|
| + not_at_start);
|
| }
|
|
|
|
|
| void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
|
| RegExpCompiler* compiler,
|
| - int characters_filled_in) {
|
| + int characters_filled_in,
|
| + bool not_at_start) {
|
| + not_at_start = not_at_start || not_at_start_;
|
| int choice_count = alternatives_->length();
|
| ASSERT(choice_count > 0);
|
| alternatives_->at(0).node()->GetQuickCheckDetails(details,
|
| compiler,
|
| - characters_filled_in);
|
| + characters_filled_in,
|
| + not_at_start);
|
| for (int i = 1; i < choice_count; i++) {
|
| QuickCheckDetails new_details(details->characters());
|
| RegExpNode* node = alternatives_->at(i).node();
|
| - node->GetQuickCheckDetails(&new_details, compiler, characters_filled_in);
|
| + node->GetQuickCheckDetails(&new_details, compiler,
|
| + characters_filled_in,
|
| + not_at_start);
|
| // Here we merge the quick match details of the two branches.
|
| details->Merge(&new_details, characters_filled_in);
|
| }
|
| @@ -2541,6 +2561,21 @@ static void EmitBoundaryCheck(AssertionNode::AssertionNodeType type,
|
| }
|
|
|
|
|
| +void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details,
|
| + RegExpCompiler* compiler,
|
| + int filled_in,
|
| + bool not_at_start) {
|
| + if (type_ == AT_START && not_at_start) {
|
| + details->set_cannot_match();
|
| + return;
|
| + }
|
| + return on_success()->GetQuickCheckDetails(details,
|
| + compiler,
|
| + filled_in,
|
| + not_at_start);
|
| +}
|
| +
|
| +
|
| void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
|
| RegExpMacroAssembler* assembler = compiler->macro_assembler();
|
| switch (type_) {
|
| @@ -2551,9 +2586,20 @@ void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
|
| assembler->Bind(&ok);
|
| break;
|
| }
|
| - case AT_START:
|
| - assembler->CheckNotAtStart(trace->backtrack());
|
| - break;
|
| + case AT_START: {
|
| + if (trace->at_start() == Trace::FALSE) {
|
| + assembler->GoTo(trace->backtrack());
|
| + return;
|
| + }
|
| + if (trace->at_start() == Trace::UNKNOWN) {
|
| + assembler->CheckNotAtStart(trace->backtrack());
|
| + Trace at_start_trace = *trace;
|
| + at_start_trace.set_at_start(true);
|
| + on_success()->Emit(compiler, &at_start_trace);
|
| + return;
|
| + }
|
| + }
|
| + break;
|
| case AFTER_NEWLINE:
|
| EmitHat(compiler, on_success(), trace);
|
| return;
|
| @@ -2772,6 +2818,7 @@ void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
|
| &bound_checked_to);
|
|
|
| Trace successor_trace(*trace);
|
| + successor_trace.set_at_start(false);
|
| successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler);
|
| RecursionCheck rc(compiler);
|
| on_success()->Emit(compiler, &successor_trace);
|
| @@ -3065,6 +3112,8 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
|
| Label greedy_loop_label;
|
| Trace counter_backtrack_trace;
|
| counter_backtrack_trace.set_backtrack(&greedy_loop_label);
|
| + if (not_at_start()) counter_backtrack_trace.set_at_start(false);
|
| +
|
| if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
|
| // Here we have special handling for greedy loops containing only text nodes
|
| // and other simple nodes. These are handled by pushing the current
|
| @@ -3079,6 +3128,7 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
|
| current_trace = &counter_backtrack_trace;
|
| Label greedy_match_failed;
|
| Trace greedy_match_trace;
|
| + if (not_at_start()) greedy_match_trace.set_at_start(false);
|
| greedy_match_trace.set_backtrack(&greedy_match_failed);
|
| Label loop_label;
|
| macro_assembler->Bind(&loop_label);
|
| @@ -3139,6 +3189,11 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
|
| new_trace.set_bound_checked_up_to(preload_characters);
|
| generate_full_check_inline = true;
|
| }
|
| + } else if (alt_gen->quick_check_details.cannot_match()) {
|
| + if (i == choice_count - 1 && !greedy_loop) {
|
| + macro_assembler->GoTo(trace->backtrack());
|
| + }
|
| + continue;
|
| } else {
|
| // No quick check was generated. Put the full code here.
|
| // If this is not the first choice then there could be slow checks from
|
| @@ -3868,7 +3923,8 @@ RegExpNode* RegExpQuantifier::ToNode(int min,
|
| bool is_greedy,
|
| RegExpTree* body,
|
| RegExpCompiler* compiler,
|
| - RegExpNode* on_success) {
|
| + RegExpNode* on_success,
|
| + bool not_at_start) {
|
| // x{f, t} becomes this:
|
| //
|
| // (r++)<-.
|
| @@ -3907,8 +3963,8 @@ RegExpNode* RegExpQuantifier::ToNode(int min,
|
| if (min > 0 && min <= kMaxUnrolledMinMatches) {
|
| int new_max = (max == kInfinity) ? max : max - min;
|
| // Recurse once to get the loop or optional matches after the fixed ones.
|
| - RegExpNode* answer =
|
| - ToNode(0, new_max, is_greedy, body, compiler, on_success);
|
| + RegExpNode* answer = ToNode(
|
| + 0, new_max, is_greedy, body, compiler, on_success, true);
|
| // Unroll the forced matches from 0 to min. This can cause chains of
|
| // TextNodes (which the parser does not generate). These should be
|
| // combined if it turns out they hinder good code generation.
|
| @@ -3933,6 +3989,7 @@ RegExpNode* RegExpQuantifier::ToNode(int min,
|
| answer)));
|
| }
|
| answer = alternation;
|
| + if (not_at_start) alternation->set_not_at_start();
|
| }
|
| return answer;
|
| }
|
| @@ -3944,6 +4001,7 @@ RegExpNode* RegExpQuantifier::ToNode(int min,
|
| ? compiler->AllocateRegister()
|
| : RegExpCompiler::kNoRegister;
|
| LoopChoiceNode* center = new LoopChoiceNode(body->min_match() == 0);
|
| + if (not_at_start) center->set_not_at_start();
|
| RegExpNode* loop_return = needs_counter
|
| ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center))
|
| : static_cast<RegExpNode*>(center);
|
| @@ -4764,12 +4822,25 @@ Handle<FixedArray> RegExpEngine::Compile(RegExpCompileData* data,
|
| if (!data->tree->IsAnchored()) {
|
| // Add a .*? at the beginning, outside the body capture, unless
|
| // this expression is anchored at the beginning.
|
| - node = RegExpQuantifier::ToNode(0,
|
| - RegExpTree::kInfinity,
|
| - false,
|
| - new RegExpCharacterClass('*'),
|
| - &compiler,
|
| - captured_body);
|
| + RegExpNode* loop_node =
|
| + RegExpQuantifier::ToNode(0,
|
| + RegExpTree::kInfinity,
|
| + false,
|
| + new RegExpCharacterClass('*'),
|
| + &compiler,
|
| + captured_body,
|
| + data->contains_anchor);
|
| +
|
| + if (data->contains_anchor) {
|
| + // Unroll once, and tell the loop that it's not at the start of the input.
|
| + ChoiceNode* first_step_node = new ChoiceNode(2);
|
| + first_step_node->AddAlternative(GuardedAlternative(captured_body));
|
| + first_step_node->AddAlternative(GuardedAlternative(
|
| + new TextNode(new RegExpCharacterClass('*'), loop_node)));
|
| + node = first_step_node;
|
| + } else {
|
| + node = loop_node;
|
| + }
|
| }
|
| data->node = node;
|
| Analysis analysis(ignore_case);
|
|
|