Index: src/jsregexp.cc |
=================================================================== |
--- src/jsregexp.cc (revision 860) |
+++ src/jsregexp.cc (working copy) |
@@ -992,8 +992,7 @@ |
// TODO(erikcorry): Implement support. |
if (info_.follows_word_interest || |
info_.follows_newline_interest || |
- info_.follows_start_interest || |
- info_.at_end) { |
+ info_.follows_start_interest) { |
return false; |
} |
if (label_.is_bound()) { |
@@ -1014,25 +1013,17 @@ |
} |
+// EndNodes are special. Because they can be very common and they are very |
+// short we normally inline them. That is, if we are asked to emit a GoTo |
+// we just emit the entire node. Since they don't have successors this |
+// works. |
bool EndNode::GoTo(RegExpCompiler* compiler) { |
if (info()->follows_word_interest || |
info()->follows_newline_interest || |
- info()->follows_start_interest || |
- info()->at_end) { |
+ info()->follows_start_interest) { |
return false; |
} |
- if (!label()->is_bound()) { |
- Bind(compiler->macro_assembler()); |
- } |
- switch (action_) { |
- case ACCEPT: |
- compiler->macro_assembler()->Succeed(); |
- break; |
- case BACKTRACK: |
- compiler->macro_assembler()->Backtrack(); |
- break; |
- } |
- return true; |
+ return Emit(compiler); |
} |
@@ -1045,11 +1036,20 @@ |
RegExpMacroAssembler* macro = compiler->macro_assembler(); |
switch (action_) { |
case ACCEPT: |
- Bind(macro); |
+ if (!label()->is_bound()) Bind(macro); |
+ if (info()->at_end) { |
+ Label succeed; |
+ // LoadCurrentCharacter will go to the label if we are at the end of the |
+ // input string. |
+ macro->LoadCurrentCharacter(0, &succeed); |
+ macro->Backtrack(); |
+ macro->Bind(&succeed); |
+ } |
macro->Succeed(); |
return true; |
case BACKTRACK: |
- Bind(macro); |
+ if (!label()->is_bound()) Bind(macro); |
+ ASSERT(!info()->at_end); |
macro->Backtrack(); |
return true; |
} |
@@ -1088,13 +1088,6 @@ |
} |
-ActionNode* ActionNode::SavePosition(int reg, RegExpNode* on_success) { |
- ActionNode* result = new ActionNode(SAVE_POSITION, on_success); |
- result->data_.u_position_register.reg = reg; |
- return result; |
-} |
- |
- |
ActionNode* ActionNode::RestorePosition(int reg, RegExpNode* on_success) { |
ActionNode* result = new ActionNode(RESTORE_POSITION, on_success); |
result->data_.u_position_register.reg = reg; |
@@ -1102,16 +1095,27 @@ |
} |
-ActionNode* ActionNode::BeginSubmatch(int reg, RegExpNode* on_success) { |
+ActionNode* ActionNode::BeginSubmatch(int stack_reg, |
+ int position_reg, |
+ RegExpNode* on_success) { |
ActionNode* result = new ActionNode(BEGIN_SUBMATCH, on_success); |
- result->data_.u_submatch_stack_pointer_register.reg = reg; |
+ result->data_.u_submatch.stack_pointer_register = stack_reg; |
+ result->data_.u_submatch.current_position_register = position_reg; |
return result; |
} |
-ActionNode* ActionNode::EscapeSubmatch(int reg, RegExpNode* on_success) { |
+ActionNode* ActionNode::EscapeSubmatch(int stack_reg, |
+ bool restore_position, |
+ int position_reg, |
+ RegExpNode* on_success) { |
ActionNode* result = new ActionNode(ESCAPE_SUBMATCH, on_success); |
- result->data_.u_submatch_stack_pointer_register.reg = reg; |
+ result->data_.u_submatch.stack_pointer_register = stack_reg; |
+ if (restore_position) { |
+ result->data_.u_submatch.current_position_register = position_reg; |
+ } else { |
+ result->data_.u_submatch.current_position_register = -1; |
+ } |
return result; |
} |
@@ -1320,7 +1324,12 @@ |
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
Bind(macro_assembler); |
int element_count = elms_->length(); |
+ ASSERT(element_count != 0); |
int cp_offset = 0; |
+ if (info()->at_end) { |
+ macro_assembler->Backtrack(); |
+ return true; |
+ } |
// First, handle straight character matches. |
for (int i = 0; i < element_count; i++) { |
TextElement elm = elms_->at(i); |
@@ -1481,21 +1490,31 @@ |
macro->Backtrack(); |
break; |
} |
- case SAVE_POSITION: |
- macro->WriteCurrentPositionToRegister( |
- data_.u_position_register.reg); |
- break; |
case RESTORE_POSITION: |
macro->ReadCurrentPositionFromRegister( |
data_.u_position_register.reg); |
break; |
case BEGIN_SUBMATCH: |
+ macro->WriteCurrentPositionToRegister( |
+ data_.u_submatch.current_position_register); |
macro->WriteStackPointerToRegister( |
- data_.u_submatch_stack_pointer_register.reg); |
+ data_.u_submatch.stack_pointer_register); |
break; |
case ESCAPE_SUBMATCH: |
+ if (info()->at_end) { |
+ Label at_end; |
+ // Load current character jumps to the label if we are beyond the string |
+ // end. |
+ macro->LoadCurrentCharacter(0, &at_end); |
+ macro->Backtrack(); |
+ macro->Bind(&at_end); |
+ } |
+ if (data_.u_submatch.current_position_register != -1) { |
+ macro->ReadCurrentPositionFromRegister( |
+ data_.u_submatch.current_position_register); |
+ } |
macro->ReadStackPointerFromRegister( |
- data_.u_submatch_stack_pointer_register.reg); |
+ data_.u_submatch.stack_pointer_register); |
break; |
default: |
UNREACHABLE(); |
@@ -1513,10 +1532,16 @@ |
macro->IfRegisterLT(start_reg_, 0, on_success()->label()); |
macro->IfRegisterLT(end_reg_, 0, on_success()->label()); |
ASSERT_EQ(start_reg_ + 1, end_reg_); |
- if (compiler->ignore_case()) { |
- macro->CheckNotBackReferenceIgnoreCase(start_reg_, on_failure_->label()); |
+ if (info()->at_end) { |
+ // If we are constrained to match at the end of the input then succeed |
+ // iff the back reference is empty. |
+ macro->CheckNotRegistersEqual(start_reg_, end_reg_, on_failure_->label()); |
} else { |
- macro->CheckNotBackReference(start_reg_, on_failure_->label()); |
+ if (compiler->ignore_case()) { |
+ macro->CheckNotBackReferenceIgnoreCase(start_reg_, on_failure_->label()); |
+ } else { |
+ macro->CheckNotBackReference(start_reg_, on_failure_->label()); |
+ } |
} |
return on_success()->GoTo(compiler); |
} |
@@ -1751,16 +1776,13 @@ |
stream()->Add("label=\"$%i:=$pos\", shape=octagon", |
that->data_.u_position_register.reg); |
break; |
- case ActionNode::SAVE_POSITION: |
- stream()->Add("label=\"$%i:=$pos\", shape=octagon", |
- that->data_.u_position_register.reg); |
- break; |
case ActionNode::RESTORE_POSITION: |
stream()->Add("label=\"$pos:=$%i\", shape=octagon", |
that->data_.u_position_register.reg); |
break; |
case ActionNode::BEGIN_SUBMATCH: |
- stream()->Add("label=\"begin\", shape=septagon"); |
+ stream()->Add("label=\"$%i:=$pos,begin\", shape=septagon", |
+ that->data_.u_submatch.current_position_register); |
break; |
case ActionNode::ESCAPE_SUBMATCH: |
stream()->Add("label=\"escape\", shape=septagon"); |
@@ -1991,15 +2013,15 @@ |
// fail |
return ActionNode::BeginSubmatch( |
stack_pointer_register, |
- ActionNode::SavePosition( |
- position_register, |
- body()->ToNode( |
- compiler, |
- ActionNode::RestorePosition( |
- position_register, |
- ActionNode::EscapeSubmatch(stack_pointer_register, |
- on_success)), |
- on_failure))); |
+ position_register, |
+ body()->ToNode( |
+ compiler, |
+ ActionNode::EscapeSubmatch( |
+ stack_pointer_register, |
+ true, // Also restore input position. |
+ position_register, |
+ on_success), |
+ on_failure)); |
} else { |
// begin submatch scope |
// try |
@@ -2018,14 +2040,16 @@ |
on_success)); |
RegExpNode* body_node = body()->ToNode( |
compiler, |
- ActionNode::EscapeSubmatch(stack_pointer_register, on_failure), |
+ ActionNode::EscapeSubmatch(stack_pointer_register, |
+ false, // Don't also restore position |
+ 0, // Unused arguments. |
+ on_failure), |
compiler->backtrack()); |
GuardedAlternative body_alt(body_node); |
try_node->AddAlternative(body_alt); |
return ActionNode::BeginSubmatch(stack_pointer_register, |
- ActionNode::SavePosition( |
- position_register, |
- try_node)); |
+ position_register, |
+ try_node); |
} |
} |
@@ -2270,7 +2294,9 @@ |
ActionNode* action = new ActionNode(*this); |
action->info()->AddFromPreceding(&full_info); |
AddSibling(action); |
- action->set_on_success(action->on_success()->PropagateForward(info)); |
+ if (type_ != ESCAPE_SUBMATCH) { |
+ action->set_on_success(action->on_success()->PropagateForward(info)); |
+ } |
return action; |
} |
@@ -2292,6 +2318,9 @@ |
alternative.set_node(alternative.node()->PropagateForward(info)); |
choice->alternatives()->Add(alternative); |
} |
+ if (!choice->on_failure_->IsBacktrack()) { |
+ choice->on_failure_ = choice->on_failure_->PropagateForward(info); |
+ } |
return choice; |
} |
@@ -2302,7 +2331,21 @@ |
RegExpNode* BackReferenceNode::PropagateForward(NodeInfo* info) { |
- return PropagateToEndpoint(this, info); |
+ NodeInfo full_info(*this->info()); |
+ full_info.AddFromPreceding(info); |
+ RegExpNode* sibling = GetSibling(&full_info); |
+ if (sibling != NULL) return sibling; |
+ EnsureSiblings(); |
+ BackReferenceNode* back_ref = new BackReferenceNode(*this); |
+ back_ref->info()->AddFromPreceding(&full_info); |
+ AddSibling(back_ref); |
+ // TODO(erikcorry): A back reference has to have two successors (by default |
Christian Plesner Hansen
2008/11/28 08:44:53
I wonder if maybe we always have to propagate to t
|
+ // the same node). The first is used if the back reference matches a non- |
+ // empty back reference, the second if it matches an empty one. This doesn't |
+ // matter for at_end, which is the only one implemented right now, but it will |
+ // matter for other pieces of info. |
+ back_ref->set_on_success(back_ref->on_success()->PropagateForward(info)); |
+ return back_ref; |
} |
@@ -2672,6 +2715,10 @@ |
return Handle<FixedArray>::null(); |
} |
+ if (is_multiline && !FLAG_attempt_multiline_irregexp) { |
+ return Handle<FixedArray>::null(); |
+ } |
+ |
if (FLAG_irregexp_native) { |
#ifdef ARM |
UNIMPLEMENTED(); |