Chromium Code Reviews| Index: src/jsregexp.cc |
| =================================================================== |
| --- src/jsregexp.cc (revision 1094) |
| +++ src/jsregexp.cc (working copy) |
| @@ -1522,18 +1522,6 @@ |
| } |
| -void EndNode::EmitInfoChecks(RegExpMacroAssembler* assembler, Trace* trace) { |
| - if (info()->at_end) { |
| - Label succeed; |
| - // LoadCurrentCharacter will go to the label if we are at the end of the |
| - // input string. |
| - assembler->LoadCurrentCharacter(0, &succeed); |
| - assembler->GoTo(trace->backtrack()); |
| - assembler->Bind(&succeed); |
| - } |
| -} |
| - |
| - |
| bool NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) { |
| if (!trace->is_trivial()) { |
| return trace->Flush(compiler, this); |
| @@ -1542,7 +1530,6 @@ |
| if (!label()->is_bound()) { |
| assembler->Bind(label()); |
| } |
| - EmitInfoChecks(assembler, trace); |
| assembler->ReadCurrentPositionFromRegister(current_position_register_); |
| assembler->ReadStackPointerFromRegister(stack_pointer_register_); |
| // Now that we have unwound the stack we find at the top of the stack the |
| @@ -1562,11 +1549,9 @@ |
| } |
| switch (action_) { |
| case ACCEPT: |
| - EmitInfoChecks(assembler, trace); |
| assembler->Succeed(); |
| return true; |
| case BACKTRACK: |
| - ASSERT(!info()->at_end); |
| assembler->GoTo(trace->backtrack()); |
| return true; |
| case NEGATIVE_SUBMATCH_SUCCESS: |
| @@ -1935,13 +1920,6 @@ |
| RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, |
| Trace* trace) { |
| - // TODO(erikcorry): Implement support. |
| - if (info_.follows_word_interest || |
| - info_.follows_newline_interest || |
| - info_.follows_start_interest) { |
| - return FAIL; |
| - } |
| - |
| // If we are generating a greedy loop then don't stop and don't reuse code. |
| if (trace->stop_node() != NULL) { |
| return CONTINUE; |
| @@ -1990,6 +1968,19 @@ |
| } |
| +int AssertionNode::EatsAtLeast(int recursion_depth) { |
| + if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; |
| + return on_success()->EatsAtLeast(recursion_depth + 1); |
| +} |
| + |
| + |
| +int BackReferenceNode::EatsAtLeast(int recursion_depth) { |
| + if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; |
| + return on_success()->EatsAtLeast(recursion_depth + 1); |
| +} |
| + |
| + |
| + |
| int TextNode::EatsAtLeast(int recursion_depth) { |
| int answer = Length(); |
| if (answer >= 4) return answer; |
| @@ -2257,7 +2248,7 @@ |
| void QuickCheckDetails::Advance(int by, bool ascii) { |
| - ASSERT(by > 0); |
| + ASSERT(by >= 0); |
| if (by >= characters_) { |
| Clear(); |
| return; |
| @@ -2342,6 +2333,116 @@ |
| } |
| +// Check for [0-9A-Z_a-z]. |
| +static void EmitWordCheck(RegExpMacroAssembler* assembler, |
| + Label* word, |
| + Label* non_word, |
| + bool fall_through_on_word) { |
| + assembler->CheckCharacterGT('z', non_word); |
| + assembler->CheckCharacterLT('0', non_word); |
| + assembler->CheckCharacterGT('a' - 1, word); |
| + assembler->CheckCharacterLT('9' + 1, word); |
| + assembler->CheckCharacterLT('A', non_word); |
| + assembler->CheckCharacterLT('Z' + 1, word); |
| + if (fall_through_on_word) { |
| + assembler->CheckNotCharacter('_', non_word); |
| + } else { |
| + assembler->CheckCharacter('_', word); |
| + } |
| +} |
| + |
| + |
| +bool AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
| + RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| + switch (type_) { |
| + case AT_END: { |
| + Label ok; |
| + assembler->LoadCurrentCharacter(trace->cp_offset(), &ok); |
| + assembler->GoTo(trace->backtrack()); |
| + assembler->Bind(&ok); |
| + break; |
| + } |
| + case AT_START: |
| + assembler->CheckNotAtStart(trace->backtrack()); |
| + break; |
| + case AFTER_NEWLINE: { |
| + // Check whether we are either at the start or immediately after a |
| + // newline. |
| + Trace new_trace(*trace); |
| + // Invalidate the current character register. |
| + new_trace.AdvanceCurrentPositionInTrace(0, compiler->ascii()); |
| + |
| + Label ok; |
| + if (new_trace.cp_offset() == 0) { |
| + assembler->CheckAtStart(&ok); |
| + } |
| + assembler->LoadCurrentCharacter(-1, new_trace.backtrack(), false); |
|
Christian Plesner Hansen
2009/01/19 10:28:59
Maybe a comment about why this works when you're a
|
| + // Newline means \n, \r, 0x2028 or 0x2029. |
| + if (!compiler->ascii()) { |
| + assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok); |
| + } |
| + assembler->CheckCharacter('\n', &ok); |
| + assembler->CheckNotCharacter('\r', new_trace.backtrack()); |
| + assembler->Bind(&ok); |
| + return on_success()->Emit(compiler, &new_trace); |
| + } |
| + case AT_NON_BOUNDARY: |
| + case AT_BOUNDARY: { |
| + Label before_non_word; |
|
Christian Plesner Hansen
2009/01/19 10:28:59
Maybe you could split these huge cases out into se
|
| + Label before_word; |
| + if (trace->characters_preloaded() != 1) { |
| + assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word); |
| + } |
| + // Fall through on non-word. |
| + EmitWordCheck(assembler, &before_word, &before_non_word, false); |
| + |
| + // Invalidate the current character register. |
| + Trace new_trace(*trace); |
| + new_trace.AdvanceCurrentPositionInTrace(0, compiler->ascii()); |
| + |
| + Label ok; |
| + Label* boundary; |
| + Label* not_boundary; |
| + if (type_ == AT_BOUNDARY) { |
| + boundary = &ok; |
| + not_boundary = new_trace.backtrack(); |
| + } else { |
| + not_boundary = &ok; |
| + boundary = new_trace.backtrack(); |
| + } |
| + |
| + // Next character is not a word character. |
| + assembler->Bind(&before_non_word); |
| + if (new_trace.cp_offset() == 0) { |
| + assembler->CheckAtStart(not_boundary); |
| + } |
| + assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, |
|
Christian Plesner Hansen
2009/01/19 10:28:59
What happens here if we're right at the beginning
|
| + &ok, // Unused dummy label in this call. |
| + false); |
| + // Fall through on non-word. |
| + EmitWordCheck(assembler, boundary, not_boundary, false); |
| + assembler->GoTo(not_boundary); |
| + |
| + // Next character is a word character. |
| + assembler->Bind(&before_word); |
| + if (new_trace.cp_offset() == 0) { |
| + assembler->CheckAtStart(boundary); |
| + } |
| + assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, |
| + &ok, // Unused dummy label in this call. |
| + false); |
| + bool fall_through_on_word = (type_ == AT_NON_BOUNDARY); |
| + EmitWordCheck(assembler, not_boundary, boundary, fall_through_on_word); |
| + |
| + assembler->Bind(&ok); |
| + |
| + return on_success()->Emit(compiler, &new_trace); |
| + } |
| + } |
| + return on_success()->Emit(compiler, trace); |
| +} |
| + |
| + |
| // We call this repeatedly to generate code for each pass over the text node. |
| // The passes are in increasing order of difficulty because we hope one |
| // of the first passes will fail in which case we are saved the work of the |
| @@ -2487,17 +2588,6 @@ |
| if (limit_result == DONE) return true; |
| ASSERT(limit_result == CONTINUE); |
| - if (info()->follows_word_interest || |
| - info()->follows_newline_interest || |
| - info()->follows_start_interest) { |
| - return false; |
| - } |
| - |
| - if (info()->at_end) { |
| - compiler->macro_assembler()->GoTo(trace->backtrack()); |
| - return true; |
| - } |
| - |
| if (compiler->ascii()) { |
| int dummy = 0; |
| TextEmitPass(compiler, NON_ASCII_MATCH, false, trace, false, &dummy); |
| @@ -2562,7 +2652,9 @@ |
| void Trace::AdvanceCurrentPositionInTrace(int by, bool ascii) { |
| - ASSERT(by > 0); |
| + // We can call this with by == 0. In that case it just means that the |
|
Christian Plesner Hansen
2009/01/19 10:28:59
Maybe split invalidation out into a separate metho
|
| + // current position register has been invalidated. |
| + |
| // We don't have an instruction for shifting the current character register |
| // down or for using a shifted value for anything so lets just forget that |
| // we preloaded any characters into it. |
| @@ -2616,12 +2708,6 @@ |
| if (recursion_depth++ > RegExpCompiler::kMaxRecursion) { |
| return kNodeIsTooComplexForGreedyLoops; |
| } |
| - NodeInfo* info = node->info(); |
| - if (info->follows_word_interest || |
| - info->follows_newline_interest || |
| - info->follows_start_interest) { |
| - return kNodeIsTooComplexForGreedyLoops; |
| - } |
| int node_length = node->GreedyLoopTextLength(); |
| if (node_length == kNodeIsTooComplexForGreedyLoops) { |
| return kNodeIsTooComplexForGreedyLoops; |
| @@ -3096,20 +3182,6 @@ |
| } |
| case POSITIVE_SUBMATCH_SUCCESS: |
| if (!trace->is_trivial()) return trace->Flush(compiler, this); |
| - // TODO(erikcorry): Implement support. |
| - if (info()->follows_word_interest || |
| - info()->follows_newline_interest || |
| - info()->follows_start_interest) { |
| - return false; |
| - } |
| - if (info()->at_end) { |
| - Label at_end; |
| - // Load current character jumps to the label if we are beyond the string |
| - // end. |
| - assembler->LoadCurrentCharacter(0, &at_end); |
| - assembler->GoTo(trace->backtrack()); |
| - assembler->Bind(&at_end); |
| - } |
| assembler->ReadCurrentPositionFromRegister( |
| data_.u_submatch.current_position_register); |
| assembler->ReadStackPointerFromRegister( |
| @@ -3136,19 +3208,11 @@ |
| RecursionCheck rc(compiler); |
| ASSERT_EQ(start_reg_ + 1, end_reg_); |
| - if (info()->at_end) { |
| - // If we are constrained to match at the end of the input then succeed |
| - // iff the back reference is empty. |
| - assembler->CheckNotRegistersEqual(start_reg_, |
| - end_reg_, |
| - trace->backtrack()); |
| + if (compiler->ignore_case()) { |
| + assembler->CheckNotBackReferenceIgnoreCase(start_reg_, |
| + trace->backtrack()); |
| } else { |
| - if (compiler->ignore_case()) { |
| - assembler->CheckNotBackReferenceIgnoreCase(start_reg_, |
| - trace->backtrack()); |
| - } else { |
| - assembler->CheckNotBackReference(start_reg_, trace->backtrack()); |
| - } |
| + assembler->CheckNotBackReference(start_reg_, trace->backtrack()); |
| } |
| return on_success()->Emit(compiler, trace); |
| } |
| @@ -3389,6 +3453,33 @@ |
| } |
| +void DotPrinter::VisitAssertion(AssertionNode* that) { |
| + stream()->Add(" n%p [", that); |
| + switch (that->type()) { |
| + case AssertionNode::AT_END: |
| + stream()->Add("label=\"$\", shape=septagon"); |
| + break; |
| + case AssertionNode::AT_START: |
| + stream()->Add("label=\"^\", shape=septagon"); |
| + break; |
| + case AssertionNode::AT_BOUNDARY: |
| + stream()->Add("label=\"\\b\", shape=septagon"); |
| + break; |
| + case AssertionNode::AT_NON_BOUNDARY: |
| + stream()->Add("label=\"\\B\", shape=septagon"); |
| + break; |
| + case AssertionNode::AFTER_NEWLINE: |
| + stream()->Add("label=\"(?<=\\n)\", shape=septagon"); |
| + break; |
| + } |
| + stream()->Add("];\n"); |
| + PrintAttributes(that); |
| + RegExpNode* successor = that->on_success(); |
| + stream()->Add(" n%p -> n%p;\n", that, successor); |
| + Visit(successor); |
| +} |
| + |
| + |
| void DotPrinter::VisitAction(ActionNode* that) { |
| stream()->Add(" n%p [", that); |
| switch (that->type_) { |
| @@ -3749,22 +3840,50 @@ |
| NodeInfo info; |
| switch (type()) { |
| case START_OF_LINE: |
| - info.follows_newline_interest = true; |
| - break; |
| + return AssertionNode::AfterNewline(on_success); |
| case START_OF_INPUT: |
| - info.follows_start_interest = true; |
| - break; |
| - case BOUNDARY: case NON_BOUNDARY: |
| - info.follows_word_interest = true; |
| - break; |
| + return AssertionNode::AtStart(on_success); |
| + case BOUNDARY: |
| + return AssertionNode::AtBoundary(on_success); |
| + case NON_BOUNDARY: |
| + return AssertionNode::AtNonBoundary(on_success); |
| case END_OF_INPUT: |
| - info.at_end = true; |
| - break; |
| + return AssertionNode::AtEnd(on_success); |
| case END_OF_LINE: |
| - // This is wrong but has the effect of making the compiler abort. |
| - info.at_end = true; |
| + // Compile $ in multiline regexps as an alternation with a positive |
| + // lookahead in one side and an end-of-input on the other side. |
| + // We need two registers for the lookahead. |
| + int stack_pointer_register = compiler->AllocateRegister(); |
| + int position_register = compiler->AllocateRegister(); |
| + // The ChoiceNode to distinguish between a newline and end-of-input. |
| + ChoiceNode* result = new ChoiceNode(2); |
| + // Create a newline atom. |
| + ZoneList<CharacterRange>* newline_ranges = |
| + new ZoneList<CharacterRange>(3); |
| + CharacterRange::AddClassEscape('n', newline_ranges); |
| + RegExpCharacterClass* newline_atom = |
| + new RegExpCharacterClass(newline_ranges, false); |
|
Christian Plesner Hansen
2009/01/19 10:28:59
You should be able to just use "new RegExpCharacte
|
| + ZoneList<TextElement>* elms = new ZoneList<TextElement>(1); |
| + elms->Add(TextElement::CharClass(newline_atom)); |
| + TextNode* newline_matcher = new TextNode( |
|
Christian Plesner Hansen
2009/01/19 10:28:59
You should be able to use "new TextNode(newline_at
|
| + elms, |
| + ActionNode::PositiveSubmatchSuccess(stack_pointer_register, |
| + position_register, |
| + on_success)); |
| + // Create an end-of-input matcher. |
| + RegExpNode* end_of_line = ActionNode::BeginSubmatch( |
| + stack_pointer_register, |
| + position_register, |
| + newline_matcher); |
| + // Add the two alternatives to the ChoiceNode. |
| + GuardedAlternative eol_alternative(end_of_line); |
| + result->AddAlternative(eol_alternative); |
| + GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success)); |
| + result->AddAlternative(end_alternative); |
| + return result; |
| } |
| - return on_success->PropagateForward(&info); |
| + UNREACHABLE(); |
|
Christian Plesner Hansen
2009/01/19 10:28:59
Maybe have this in the 'default' case, that way th
|
| + return on_success; |
| } |
| @@ -3911,6 +4030,13 @@ |
| case '*': |
| ranges->Add(CharacterRange::Everything()); |
| break; |
| + // This is the set of characters matched by the $ and ^ symbols |
| + // in multiline mode. |
| + case 'n': |
| + AddClass(kLineTerminatorRanges, |
| + kLineTerminatorRangeCount, |
| + ranges); |
| + break; |
| default: |
| UNREACHABLE(); |
| } |
| @@ -4096,62 +4222,6 @@ |
| } |
| -RegExpNode* ActionNode::PropagateForward(NodeInfo* info) { |
|
Christian Plesner Hansen
2009/01/19 10:28:59
O joy, we can get rid of all this cra..., I mean s
|
| - NodeInfo full_info(*this->info()); |
| - full_info.AddFromPreceding(info); |
| - bool cloned = false; |
| - ActionNode* action = EnsureSibling(this, &full_info, &cloned); |
| - action->set_on_success(action->on_success()->PropagateForward(info)); |
| - return action; |
| -} |
| - |
| - |
| -RegExpNode* ChoiceNode::PropagateForward(NodeInfo* info) { |
| - NodeInfo full_info(*this->info()); |
| - full_info.AddFromPreceding(info); |
| - bool cloned = false; |
| - ChoiceNode* choice = EnsureSibling(this, &full_info, &cloned); |
| - if (cloned) { |
| - ZoneList<GuardedAlternative>* old_alternatives = alternatives(); |
| - int count = old_alternatives->length(); |
| - choice->alternatives_ = new ZoneList<GuardedAlternative>(count); |
| - for (int i = 0; i < count; i++) { |
| - GuardedAlternative alternative = old_alternatives->at(i); |
| - alternative.set_node(alternative.node()->PropagateForward(info)); |
| - choice->alternatives()->Add(alternative); |
| - } |
| - } |
| - return choice; |
| -} |
| - |
| - |
| -RegExpNode* EndNode::PropagateForward(NodeInfo* info) { |
| - return PropagateToEndpoint(this, info); |
| -} |
| - |
| - |
| -RegExpNode* BackReferenceNode::PropagateForward(NodeInfo* info) { |
| - NodeInfo full_info(*this->info()); |
| - full_info.AddFromPreceding(info); |
| - bool cloned = false; |
| - BackReferenceNode* back_ref = EnsureSibling(this, &full_info, &cloned); |
| - if (cloned) { |
| - // TODO(erikcorry): A back reference has to have two successors (by default |
| - // the same node). The first is used if the back reference matches a non- |
| - // empty back reference, the second if it matches an empty one. This |
| - // doesn't matter for at_end, which is the only one implemented right now, |
| - // but it will matter for other pieces of info. |
| - back_ref->set_on_success(back_ref->on_success()->PropagateForward(info)); |
| - } |
| - return back_ref; |
| -} |
| - |
| - |
| -RegExpNode* TextNode::PropagateForward(NodeInfo* info) { |
| - return PropagateToEndpoint(this, info); |
| -} |
| - |
| - |
| // ------------------------------------------------------------------- |
| // Splay tree |
| @@ -4389,6 +4459,11 @@ |
| } |
| +void Analysis::VisitAssertion(AssertionNode* that) { |
| + EnsureAnalyzed(that->on_success()); |
| +} |
| + |
| + |
| // ------------------------------------------------------------------- |
| // Dispatch table construction |
| @@ -4441,7 +4516,13 @@ |
| } |
| +void DispatchTableConstructor::VisitAssertion(AssertionNode* that) { |
| + RegExpNode* target = that->on_success(); |
| + target->Accept(this); |
| +} |
| + |
| + |
| static int CompareRangeByFrom(const CharacterRange* a, |
| const CharacterRange* b) { |
| return Compare<uc16>(a->from(), b->from()); |
| @@ -4527,10 +4608,6 @@ |
| NodeInfo info = *node->info(); |
| - if (is_multiline && !FLAG_attempt_multiline_irregexp) { |
| - return Handle<FixedArray>::null(); |
| - } |
| - |
| if (FLAG_irregexp_native) { |
| #ifdef ARM |
| // Unimplemented, fall-through to bytecode implementation. |