Chromium Code Reviews| Index: src/jsregexp.cc | 
| =================================================================== | 
| --- src/jsregexp.cc (revision 1094) | 
| +++ src/jsregexp.cc (working copy) | 
| @@ -1522,18 +1522,6 @@ | 
| } | 
| -void EndNode::EmitInfoChecks(RegExpMacroAssembler* assembler, Trace* trace) { | 
| - if (info()->at_end) { | 
| - Label succeed; | 
| - // LoadCurrentCharacter will go to the label if we are at the end of the | 
| - // input string. | 
| - assembler->LoadCurrentCharacter(0, &succeed); | 
| - assembler->GoTo(trace->backtrack()); | 
| - assembler->Bind(&succeed); | 
| - } | 
| -} | 
| - | 
| - | 
| bool NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) { | 
| if (!trace->is_trivial()) { | 
| return trace->Flush(compiler, this); | 
| @@ -1542,7 +1530,6 @@ | 
| if (!label()->is_bound()) { | 
| assembler->Bind(label()); | 
| } | 
| - EmitInfoChecks(assembler, trace); | 
| assembler->ReadCurrentPositionFromRegister(current_position_register_); | 
| assembler->ReadStackPointerFromRegister(stack_pointer_register_); | 
| // Now that we have unwound the stack we find at the top of the stack the | 
| @@ -1562,11 +1549,9 @@ | 
| } | 
| switch (action_) { | 
| case ACCEPT: | 
| - EmitInfoChecks(assembler, trace); | 
| assembler->Succeed(); | 
| return true; | 
| case BACKTRACK: | 
| - ASSERT(!info()->at_end); | 
| assembler->GoTo(trace->backtrack()); | 
| return true; | 
| case NEGATIVE_SUBMATCH_SUCCESS: | 
| @@ -1935,13 +1920,6 @@ | 
| RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, | 
| Trace* trace) { | 
| - // TODO(erikcorry): Implement support. | 
| - if (info_.follows_word_interest || | 
| - info_.follows_newline_interest || | 
| - info_.follows_start_interest) { | 
| - return FAIL; | 
| - } | 
| - | 
| // If we are generating a greedy loop then don't stop and don't reuse code. | 
| if (trace->stop_node() != NULL) { | 
| return CONTINUE; | 
| @@ -1990,6 +1968,19 @@ | 
| } | 
| +int AssertionNode::EatsAtLeast(int recursion_depth) { | 
| + if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; | 
| + return on_success()->EatsAtLeast(recursion_depth + 1); | 
| +} | 
| + | 
| + | 
| +int BackReferenceNode::EatsAtLeast(int recursion_depth) { | 
| + if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; | 
| + return on_success()->EatsAtLeast(recursion_depth + 1); | 
| +} | 
| + | 
| + | 
| + | 
| int TextNode::EatsAtLeast(int recursion_depth) { | 
| int answer = Length(); | 
| if (answer >= 4) return answer; | 
| @@ -2257,7 +2248,7 @@ | 
| void QuickCheckDetails::Advance(int by, bool ascii) { | 
| - ASSERT(by > 0); | 
| + ASSERT(by >= 0); | 
| if (by >= characters_) { | 
| Clear(); | 
| return; | 
| @@ -2342,6 +2333,116 @@ | 
| } | 
| +// Check for [0-9A-Z_a-z]. | 
| +static void EmitWordCheck(RegExpMacroAssembler* assembler, | 
| + Label* word, | 
| + Label* non_word, | 
| + bool fall_through_on_word) { | 
| + assembler->CheckCharacterGT('z', non_word); | 
| + assembler->CheckCharacterLT('0', non_word); | 
| + assembler->CheckCharacterGT('a' - 1, word); | 
| + assembler->CheckCharacterLT('9' + 1, word); | 
| + assembler->CheckCharacterLT('A', non_word); | 
| + assembler->CheckCharacterLT('Z' + 1, word); | 
| + if (fall_through_on_word) { | 
| + assembler->CheckNotCharacter('_', non_word); | 
| + } else { | 
| + assembler->CheckCharacter('_', word); | 
| + } | 
| +} | 
| + | 
| + | 
| +bool AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 
| + RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 
| + switch (type_) { | 
| + case AT_END: { | 
| + Label ok; | 
| + assembler->LoadCurrentCharacter(trace->cp_offset(), &ok); | 
| + assembler->GoTo(trace->backtrack()); | 
| + assembler->Bind(&ok); | 
| + break; | 
| + } | 
| + case AT_START: | 
| + assembler->CheckNotAtStart(trace->backtrack()); | 
| + break; | 
| + case AFTER_NEWLINE: { | 
| + // Check whether we are either at the start or immediately after a | 
| + // newline. | 
| + Trace new_trace(*trace); | 
| + // Invalidate the current character register. | 
| + new_trace.AdvanceCurrentPositionInTrace(0, compiler->ascii()); | 
| + | 
| + Label ok; | 
| + if (new_trace.cp_offset() == 0) { | 
| + assembler->CheckAtStart(&ok); | 
| + } | 
| + assembler->LoadCurrentCharacter(-1, new_trace.backtrack(), false); | 
| 
 
Christian Plesner Hansen
2009/01/19 10:28:59
Maybe a comment about why this works when you're a
 
 | 
| + // Newline means \n, \r, 0x2028 or 0x2029. | 
| + if (!compiler->ascii()) { | 
| + assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok); | 
| + } | 
| + assembler->CheckCharacter('\n', &ok); | 
| + assembler->CheckNotCharacter('\r', new_trace.backtrack()); | 
| + assembler->Bind(&ok); | 
| + return on_success()->Emit(compiler, &new_trace); | 
| + } | 
| + case AT_NON_BOUNDARY: | 
| + case AT_BOUNDARY: { | 
| + Label before_non_word; | 
| 
 
Christian Plesner Hansen
2009/01/19 10:28:59
Maybe you could split these huge cases out into se
 
 | 
| + Label before_word; | 
| + if (trace->characters_preloaded() != 1) { | 
| + assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word); | 
| + } | 
| + // Fall through on non-word. | 
| + EmitWordCheck(assembler, &before_word, &before_non_word, false); | 
| + | 
| + // Invalidate the current character register. | 
| + Trace new_trace(*trace); | 
| + new_trace.AdvanceCurrentPositionInTrace(0, compiler->ascii()); | 
| + | 
| + Label ok; | 
| + Label* boundary; | 
| + Label* not_boundary; | 
| + if (type_ == AT_BOUNDARY) { | 
| + boundary = &ok; | 
| + not_boundary = new_trace.backtrack(); | 
| + } else { | 
| + not_boundary = &ok; | 
| + boundary = new_trace.backtrack(); | 
| + } | 
| + | 
| + // Next character is not a word character. | 
| + assembler->Bind(&before_non_word); | 
| + if (new_trace.cp_offset() == 0) { | 
| + assembler->CheckAtStart(not_boundary); | 
| + } | 
| + assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, | 
| 
 
Christian Plesner Hansen
2009/01/19 10:28:59
What happens here if we're right at the beginning
 
 | 
| + &ok, // Unused dummy label in this call. | 
| + false); | 
| + // Fall through on non-word. | 
| + EmitWordCheck(assembler, boundary, not_boundary, false); | 
| + assembler->GoTo(not_boundary); | 
| + | 
| + // Next character is a word character. | 
| + assembler->Bind(&before_word); | 
| + if (new_trace.cp_offset() == 0) { | 
| + assembler->CheckAtStart(boundary); | 
| + } | 
| + assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, | 
| + &ok, // Unused dummy label in this call. | 
| + false); | 
| + bool fall_through_on_word = (type_ == AT_NON_BOUNDARY); | 
| + EmitWordCheck(assembler, not_boundary, boundary, fall_through_on_word); | 
| + | 
| + assembler->Bind(&ok); | 
| + | 
| + return on_success()->Emit(compiler, &new_trace); | 
| + } | 
| + } | 
| + return on_success()->Emit(compiler, trace); | 
| +} | 
| + | 
| + | 
| // We call this repeatedly to generate code for each pass over the text node. | 
| // The passes are in increasing order of difficulty because we hope one | 
| // of the first passes will fail in which case we are saved the work of the | 
| @@ -2487,17 +2588,6 @@ | 
| if (limit_result == DONE) return true; | 
| ASSERT(limit_result == CONTINUE); | 
| - if (info()->follows_word_interest || | 
| - info()->follows_newline_interest || | 
| - info()->follows_start_interest) { | 
| - return false; | 
| - } | 
| - | 
| - if (info()->at_end) { | 
| - compiler->macro_assembler()->GoTo(trace->backtrack()); | 
| - return true; | 
| - } | 
| - | 
| if (compiler->ascii()) { | 
| int dummy = 0; | 
| TextEmitPass(compiler, NON_ASCII_MATCH, false, trace, false, &dummy); | 
| @@ -2562,7 +2652,9 @@ | 
| void Trace::AdvanceCurrentPositionInTrace(int by, bool ascii) { | 
| - ASSERT(by > 0); | 
| + // We can call this with by == 0. In that case it just means that the | 
| 
 
Christian Plesner Hansen
2009/01/19 10:28:59
Maybe split invalidation out into a separate metho
 
 | 
| + // current position register has been invalidated. | 
| + | 
| // We don't have an instruction for shifting the current character register | 
| // down or for using a shifted value for anything so lets just forget that | 
| // we preloaded any characters into it. | 
| @@ -2616,12 +2708,6 @@ | 
| if (recursion_depth++ > RegExpCompiler::kMaxRecursion) { | 
| return kNodeIsTooComplexForGreedyLoops; | 
| } | 
| - NodeInfo* info = node->info(); | 
| - if (info->follows_word_interest || | 
| - info->follows_newline_interest || | 
| - info->follows_start_interest) { | 
| - return kNodeIsTooComplexForGreedyLoops; | 
| - } | 
| int node_length = node->GreedyLoopTextLength(); | 
| if (node_length == kNodeIsTooComplexForGreedyLoops) { | 
| return kNodeIsTooComplexForGreedyLoops; | 
| @@ -3096,20 +3182,6 @@ | 
| } | 
| case POSITIVE_SUBMATCH_SUCCESS: | 
| if (!trace->is_trivial()) return trace->Flush(compiler, this); | 
| - // TODO(erikcorry): Implement support. | 
| - if (info()->follows_word_interest || | 
| - info()->follows_newline_interest || | 
| - info()->follows_start_interest) { | 
| - return false; | 
| - } | 
| - if (info()->at_end) { | 
| - Label at_end; | 
| - // Load current character jumps to the label if we are beyond the string | 
| - // end. | 
| - assembler->LoadCurrentCharacter(0, &at_end); | 
| - assembler->GoTo(trace->backtrack()); | 
| - assembler->Bind(&at_end); | 
| - } | 
| assembler->ReadCurrentPositionFromRegister( | 
| data_.u_submatch.current_position_register); | 
| assembler->ReadStackPointerFromRegister( | 
| @@ -3136,19 +3208,11 @@ | 
| RecursionCheck rc(compiler); | 
| ASSERT_EQ(start_reg_ + 1, end_reg_); | 
| - if (info()->at_end) { | 
| - // If we are constrained to match at the end of the input then succeed | 
| - // iff the back reference is empty. | 
| - assembler->CheckNotRegistersEqual(start_reg_, | 
| - end_reg_, | 
| - trace->backtrack()); | 
| + if (compiler->ignore_case()) { | 
| + assembler->CheckNotBackReferenceIgnoreCase(start_reg_, | 
| + trace->backtrack()); | 
| } else { | 
| - if (compiler->ignore_case()) { | 
| - assembler->CheckNotBackReferenceIgnoreCase(start_reg_, | 
| - trace->backtrack()); | 
| - } else { | 
| - assembler->CheckNotBackReference(start_reg_, trace->backtrack()); | 
| - } | 
| + assembler->CheckNotBackReference(start_reg_, trace->backtrack()); | 
| } | 
| return on_success()->Emit(compiler, trace); | 
| } | 
| @@ -3389,6 +3453,33 @@ | 
| } | 
| +void DotPrinter::VisitAssertion(AssertionNode* that) { | 
| + stream()->Add(" n%p [", that); | 
| + switch (that->type()) { | 
| + case AssertionNode::AT_END: | 
| + stream()->Add("label=\"$\", shape=septagon"); | 
| + break; | 
| + case AssertionNode::AT_START: | 
| + stream()->Add("label=\"^\", shape=septagon"); | 
| + break; | 
| + case AssertionNode::AT_BOUNDARY: | 
| + stream()->Add("label=\"\\b\", shape=septagon"); | 
| + break; | 
| + case AssertionNode::AT_NON_BOUNDARY: | 
| + stream()->Add("label=\"\\B\", shape=septagon"); | 
| + break; | 
| + case AssertionNode::AFTER_NEWLINE: | 
| + stream()->Add("label=\"(?<=\\n)\", shape=septagon"); | 
| + break; | 
| + } | 
| + stream()->Add("];\n"); | 
| + PrintAttributes(that); | 
| + RegExpNode* successor = that->on_success(); | 
| + stream()->Add(" n%p -> n%p;\n", that, successor); | 
| + Visit(successor); | 
| +} | 
| + | 
| + | 
| void DotPrinter::VisitAction(ActionNode* that) { | 
| stream()->Add(" n%p [", that); | 
| switch (that->type_) { | 
| @@ -3749,22 +3840,50 @@ | 
| NodeInfo info; | 
| switch (type()) { | 
| case START_OF_LINE: | 
| - info.follows_newline_interest = true; | 
| - break; | 
| + return AssertionNode::AfterNewline(on_success); | 
| case START_OF_INPUT: | 
| - info.follows_start_interest = true; | 
| - break; | 
| - case BOUNDARY: case NON_BOUNDARY: | 
| - info.follows_word_interest = true; | 
| - break; | 
| + return AssertionNode::AtStart(on_success); | 
| + case BOUNDARY: | 
| + return AssertionNode::AtBoundary(on_success); | 
| + case NON_BOUNDARY: | 
| + return AssertionNode::AtNonBoundary(on_success); | 
| case END_OF_INPUT: | 
| - info.at_end = true; | 
| - break; | 
| + return AssertionNode::AtEnd(on_success); | 
| case END_OF_LINE: | 
| - // This is wrong but has the effect of making the compiler abort. | 
| - info.at_end = true; | 
| + // Compile $ in multiline regexps as an alternation with a positive | 
| + // lookahead in one side and an end-of-input on the other side. | 
| + // We need two registers for the lookahead. | 
| + int stack_pointer_register = compiler->AllocateRegister(); | 
| + int position_register = compiler->AllocateRegister(); | 
| + // The ChoiceNode to distinguish between a newline and end-of-input. | 
| + ChoiceNode* result = new ChoiceNode(2); | 
| + // Create a newline atom. | 
| + ZoneList<CharacterRange>* newline_ranges = | 
| + new ZoneList<CharacterRange>(3); | 
| + CharacterRange::AddClassEscape('n', newline_ranges); | 
| + RegExpCharacterClass* newline_atom = | 
| + new RegExpCharacterClass(newline_ranges, false); | 
| 
 
Christian Plesner Hansen
2009/01/19 10:28:59
You should be able to just use "new RegExpCharacte
 
 | 
| + ZoneList<TextElement>* elms = new ZoneList<TextElement>(1); | 
| + elms->Add(TextElement::CharClass(newline_atom)); | 
| + TextNode* newline_matcher = new TextNode( | 
| 
 
Christian Plesner Hansen
2009/01/19 10:28:59
You should be able to use "new TextNode(newline_at
 
 | 
| + elms, | 
| + ActionNode::PositiveSubmatchSuccess(stack_pointer_register, | 
| + position_register, | 
| + on_success)); | 
| + // Create an end-of-input matcher. | 
| + RegExpNode* end_of_line = ActionNode::BeginSubmatch( | 
| + stack_pointer_register, | 
| + position_register, | 
| + newline_matcher); | 
| + // Add the two alternatives to the ChoiceNode. | 
| + GuardedAlternative eol_alternative(end_of_line); | 
| + result->AddAlternative(eol_alternative); | 
| + GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success)); | 
| + result->AddAlternative(end_alternative); | 
| + return result; | 
| } | 
| - return on_success->PropagateForward(&info); | 
| + UNREACHABLE(); | 
| 
 
Christian Plesner Hansen
2009/01/19 10:28:59
Maybe have this in the 'default' case, that way th
 
 | 
| + return on_success; | 
| } | 
| @@ -3911,6 +4030,13 @@ | 
| case '*': | 
| ranges->Add(CharacterRange::Everything()); | 
| break; | 
| + // This is the set of characters matched by the $ and ^ symbols | 
| + // in multiline mode. | 
| + case 'n': | 
| + AddClass(kLineTerminatorRanges, | 
| + kLineTerminatorRangeCount, | 
| + ranges); | 
| + break; | 
| default: | 
| UNREACHABLE(); | 
| } | 
| @@ -4096,62 +4222,6 @@ | 
| } | 
| -RegExpNode* ActionNode::PropagateForward(NodeInfo* info) { | 
| 
 
Christian Plesner Hansen
2009/01/19 10:28:59
O joy, we can get rid of all this cra..., I mean s
 
 | 
| - NodeInfo full_info(*this->info()); | 
| - full_info.AddFromPreceding(info); | 
| - bool cloned = false; | 
| - ActionNode* action = EnsureSibling(this, &full_info, &cloned); | 
| - action->set_on_success(action->on_success()->PropagateForward(info)); | 
| - return action; | 
| -} | 
| - | 
| - | 
| -RegExpNode* ChoiceNode::PropagateForward(NodeInfo* info) { | 
| - NodeInfo full_info(*this->info()); | 
| - full_info.AddFromPreceding(info); | 
| - bool cloned = false; | 
| - ChoiceNode* choice = EnsureSibling(this, &full_info, &cloned); | 
| - if (cloned) { | 
| - ZoneList<GuardedAlternative>* old_alternatives = alternatives(); | 
| - int count = old_alternatives->length(); | 
| - choice->alternatives_ = new ZoneList<GuardedAlternative>(count); | 
| - for (int i = 0; i < count; i++) { | 
| - GuardedAlternative alternative = old_alternatives->at(i); | 
| - alternative.set_node(alternative.node()->PropagateForward(info)); | 
| - choice->alternatives()->Add(alternative); | 
| - } | 
| - } | 
| - return choice; | 
| -} | 
| - | 
| - | 
| -RegExpNode* EndNode::PropagateForward(NodeInfo* info) { | 
| - return PropagateToEndpoint(this, info); | 
| -} | 
| - | 
| - | 
| -RegExpNode* BackReferenceNode::PropagateForward(NodeInfo* info) { | 
| - NodeInfo full_info(*this->info()); | 
| - full_info.AddFromPreceding(info); | 
| - bool cloned = false; | 
| - BackReferenceNode* back_ref = EnsureSibling(this, &full_info, &cloned); | 
| - if (cloned) { | 
| - // TODO(erikcorry): A back reference has to have two successors (by default | 
| - // the same node). The first is used if the back reference matches a non- | 
| - // empty back reference, the second if it matches an empty one. This | 
| - // doesn't matter for at_end, which is the only one implemented right now, | 
| - // but it will matter for other pieces of info. | 
| - back_ref->set_on_success(back_ref->on_success()->PropagateForward(info)); | 
| - } | 
| - return back_ref; | 
| -} | 
| - | 
| - | 
| -RegExpNode* TextNode::PropagateForward(NodeInfo* info) { | 
| - return PropagateToEndpoint(this, info); | 
| -} | 
| - | 
| - | 
| // ------------------------------------------------------------------- | 
| // Splay tree | 
| @@ -4389,6 +4459,11 @@ | 
| } | 
| +void Analysis::VisitAssertion(AssertionNode* that) { | 
| + EnsureAnalyzed(that->on_success()); | 
| +} | 
| + | 
| + | 
| // ------------------------------------------------------------------- | 
| // Dispatch table construction | 
| @@ -4441,7 +4516,13 @@ | 
| } | 
| +void DispatchTableConstructor::VisitAssertion(AssertionNode* that) { | 
| + RegExpNode* target = that->on_success(); | 
| + target->Accept(this); | 
| +} | 
| + | 
| + | 
| static int CompareRangeByFrom(const CharacterRange* a, | 
| const CharacterRange* b) { | 
| return Compare<uc16>(a->from(), b->from()); | 
| @@ -4527,10 +4608,6 @@ | 
| NodeInfo info = *node->info(); | 
| - if (is_multiline && !FLAG_attempt_multiline_irregexp) { | 
| - return Handle<FixedArray>::null(); | 
| - } | 
| - | 
| if (FLAG_irregexp_native) { | 
| #ifdef ARM | 
| // Unimplemented, fall-through to bytecode implementation. |