Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(675)

Unified Diff: src/jsregexp.cc

Issue 10992: Implement $ for non-multiline. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 12 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/jsregexp.h ('k') | src/regexp-macro-assembler.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/jsregexp.cc
===================================================================
--- src/jsregexp.cc (revision 860)
+++ src/jsregexp.cc (working copy)
@@ -992,8 +992,7 @@
// TODO(erikcorry): Implement support.
if (info_.follows_word_interest ||
info_.follows_newline_interest ||
- info_.follows_start_interest ||
- info_.at_end) {
+ info_.follows_start_interest) {
return false;
}
if (label_.is_bound()) {
@@ -1014,25 +1013,17 @@
}
+// EndNodes are special. Because they can be very common and they are very
+// short we normally inline them. That is, if we are asked to emit a GoTo
+// we just emit the entire node. Since they don't have successors this
+// works.
bool EndNode::GoTo(RegExpCompiler* compiler) {
if (info()->follows_word_interest ||
info()->follows_newline_interest ||
- info()->follows_start_interest ||
- info()->at_end) {
+ info()->follows_start_interest) {
return false;
}
- if (!label()->is_bound()) {
- Bind(compiler->macro_assembler());
- }
- switch (action_) {
- case ACCEPT:
- compiler->macro_assembler()->Succeed();
- break;
- case BACKTRACK:
- compiler->macro_assembler()->Backtrack();
- break;
- }
- return true;
+ return Emit(compiler);
}
@@ -1045,11 +1036,20 @@
RegExpMacroAssembler* macro = compiler->macro_assembler();
switch (action_) {
case ACCEPT:
- Bind(macro);
+ if (!label()->is_bound()) Bind(macro);
+ if (info()->at_end) {
+ Label succeed;
+ // LoadCurrentCharacter will go to the label if we are at the end of the
+ // input string.
+ macro->LoadCurrentCharacter(0, &succeed);
+ macro->Backtrack();
+ macro->Bind(&succeed);
+ }
macro->Succeed();
return true;
case BACKTRACK:
- Bind(macro);
+ if (!label()->is_bound()) Bind(macro);
+ ASSERT(!info()->at_end);
macro->Backtrack();
return true;
}
@@ -1088,13 +1088,6 @@
}
-ActionNode* ActionNode::SavePosition(int reg, RegExpNode* on_success) {
- ActionNode* result = new ActionNode(SAVE_POSITION, on_success);
- result->data_.u_position_register.reg = reg;
- return result;
-}
-
-
ActionNode* ActionNode::RestorePosition(int reg, RegExpNode* on_success) {
ActionNode* result = new ActionNode(RESTORE_POSITION, on_success);
result->data_.u_position_register.reg = reg;
@@ -1102,16 +1095,27 @@
}
-ActionNode* ActionNode::BeginSubmatch(int reg, RegExpNode* on_success) {
+ActionNode* ActionNode::BeginSubmatch(int stack_reg,
+ int position_reg,
+ RegExpNode* on_success) {
ActionNode* result = new ActionNode(BEGIN_SUBMATCH, on_success);
- result->data_.u_submatch_stack_pointer_register.reg = reg;
+ result->data_.u_submatch.stack_pointer_register = stack_reg;
+ result->data_.u_submatch.current_position_register = position_reg;
return result;
}
-ActionNode* ActionNode::EscapeSubmatch(int reg, RegExpNode* on_success) {
+ActionNode* ActionNode::EscapeSubmatch(int stack_reg,
+ bool restore_position,
+ int position_reg,
+ RegExpNode* on_success) {
ActionNode* result = new ActionNode(ESCAPE_SUBMATCH, on_success);
- result->data_.u_submatch_stack_pointer_register.reg = reg;
+ result->data_.u_submatch.stack_pointer_register = stack_reg;
+ if (restore_position) {
+ result->data_.u_submatch.current_position_register = position_reg;
+ } else {
+ result->data_.u_submatch.current_position_register = -1;
+ }
return result;
}
@@ -1320,7 +1324,12 @@
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
Bind(macro_assembler);
int element_count = elms_->length();
+ ASSERT(element_count != 0);
int cp_offset = 0;
+ if (info()->at_end) {
+ macro_assembler->Backtrack();
+ return true;
+ }
// First, handle straight character matches.
for (int i = 0; i < element_count; i++) {
TextElement elm = elms_->at(i);
@@ -1481,21 +1490,31 @@
macro->Backtrack();
break;
}
- case SAVE_POSITION:
- macro->WriteCurrentPositionToRegister(
- data_.u_position_register.reg);
- break;
case RESTORE_POSITION:
macro->ReadCurrentPositionFromRegister(
data_.u_position_register.reg);
break;
case BEGIN_SUBMATCH:
+ macro->WriteCurrentPositionToRegister(
+ data_.u_submatch.current_position_register);
macro->WriteStackPointerToRegister(
- data_.u_submatch_stack_pointer_register.reg);
+ data_.u_submatch.stack_pointer_register);
break;
case ESCAPE_SUBMATCH:
+ if (info()->at_end) {
+ Label at_end;
+ // Load current character jumps to the label if we are beyond the string
+ // end.
+ macro->LoadCurrentCharacter(0, &at_end);
+ macro->Backtrack();
+ macro->Bind(&at_end);
+ }
+ if (data_.u_submatch.current_position_register != -1) {
+ macro->ReadCurrentPositionFromRegister(
+ data_.u_submatch.current_position_register);
+ }
macro->ReadStackPointerFromRegister(
- data_.u_submatch_stack_pointer_register.reg);
+ data_.u_submatch.stack_pointer_register);
break;
default:
UNREACHABLE();
@@ -1513,10 +1532,16 @@
macro->IfRegisterLT(start_reg_, 0, on_success()->label());
macro->IfRegisterLT(end_reg_, 0, on_success()->label());
ASSERT_EQ(start_reg_ + 1, end_reg_);
- if (compiler->ignore_case()) {
- macro->CheckNotBackReferenceIgnoreCase(start_reg_, on_failure_->label());
+ if (info()->at_end) {
+ // If we are constrained to match at the end of the input then succeed
+ // iff the back reference is empty.
+ macro->CheckNotRegistersEqual(start_reg_, end_reg_, on_failure_->label());
} else {
- macro->CheckNotBackReference(start_reg_, on_failure_->label());
+ if (compiler->ignore_case()) {
+ macro->CheckNotBackReferenceIgnoreCase(start_reg_, on_failure_->label());
+ } else {
+ macro->CheckNotBackReference(start_reg_, on_failure_->label());
+ }
}
return on_success()->GoTo(compiler);
}
@@ -1751,16 +1776,13 @@
stream()->Add("label=\"$%i:=$pos\", shape=octagon",
that->data_.u_position_register.reg);
break;
- case ActionNode::SAVE_POSITION:
- stream()->Add("label=\"$%i:=$pos\", shape=octagon",
- that->data_.u_position_register.reg);
- break;
case ActionNode::RESTORE_POSITION:
stream()->Add("label=\"$pos:=$%i\", shape=octagon",
that->data_.u_position_register.reg);
break;
case ActionNode::BEGIN_SUBMATCH:
- stream()->Add("label=\"begin\", shape=septagon");
+ stream()->Add("label=\"$%i:=$pos,begin\", shape=septagon",
+ that->data_.u_submatch.current_position_register);
break;
case ActionNode::ESCAPE_SUBMATCH:
stream()->Add("label=\"escape\", shape=septagon");
@@ -1991,15 +2013,15 @@
// fail
return ActionNode::BeginSubmatch(
stack_pointer_register,
- ActionNode::SavePosition(
- position_register,
- body()->ToNode(
- compiler,
- ActionNode::RestorePosition(
- position_register,
- ActionNode::EscapeSubmatch(stack_pointer_register,
- on_success)),
- on_failure)));
+ position_register,
+ body()->ToNode(
+ compiler,
+ ActionNode::EscapeSubmatch(
+ stack_pointer_register,
+ true, // Also restore input position.
+ position_register,
+ on_success),
+ on_failure));
} else {
// begin submatch scope
// try
@@ -2018,14 +2040,16 @@
on_success));
RegExpNode* body_node = body()->ToNode(
compiler,
- ActionNode::EscapeSubmatch(stack_pointer_register, on_failure),
+ ActionNode::EscapeSubmatch(stack_pointer_register,
+ false, // Don't also restore position
+ 0, // Unused arguments.
+ on_failure),
compiler->backtrack());
GuardedAlternative body_alt(body_node);
try_node->AddAlternative(body_alt);
return ActionNode::BeginSubmatch(stack_pointer_register,
- ActionNode::SavePosition(
- position_register,
- try_node));
+ position_register,
+ try_node);
}
}
@@ -2270,7 +2294,9 @@
ActionNode* action = new ActionNode(*this);
action->info()->AddFromPreceding(&full_info);
AddSibling(action);
- action->set_on_success(action->on_success()->PropagateForward(info));
+ if (type_ != ESCAPE_SUBMATCH) {
+ action->set_on_success(action->on_success()->PropagateForward(info));
+ }
return action;
}
@@ -2292,6 +2318,9 @@
alternative.set_node(alternative.node()->PropagateForward(info));
choice->alternatives()->Add(alternative);
}
+ if (!choice->on_failure_->IsBacktrack()) {
+ choice->on_failure_ = choice->on_failure_->PropagateForward(info);
+ }
return choice;
}
@@ -2302,7 +2331,21 @@
RegExpNode* BackReferenceNode::PropagateForward(NodeInfo* info) {
- return PropagateToEndpoint(this, info);
+ NodeInfo full_info(*this->info());
+ full_info.AddFromPreceding(info);
+ RegExpNode* sibling = GetSibling(&full_info);
+ if (sibling != NULL) return sibling;
+ EnsureSiblings();
+ BackReferenceNode* back_ref = new BackReferenceNode(*this);
+ back_ref->info()->AddFromPreceding(&full_info);
+ AddSibling(back_ref);
+ // TODO(erikcorry): A back reference has to have two successors (by default
Christian Plesner Hansen 2008/11/28 08:44:53 I wonder if maybe we always have to propagate to t
+ // the same node). The first is used if the back reference matches a non-
+ // empty back reference, the second if it matches an empty one. This doesn't
+ // matter for at_end, which is the only one implemented right now, but it will
+ // matter for other pieces of info.
+ back_ref->set_on_success(back_ref->on_success()->PropagateForward(info));
+ return back_ref;
}
@@ -2672,6 +2715,10 @@
return Handle<FixedArray>::null();
}
+ if (is_multiline && !FLAG_attempt_multiline_irregexp) {
+ return Handle<FixedArray>::null();
+ }
+
if (FLAG_irregexp_native) {
#ifdef ARM
UNIMPLEMENTED();
« no previous file with comments | « src/jsregexp.h ('k') | src/regexp-macro-assembler.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698