Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(646)

Unified Diff: src/jsregexp.cc

Issue 18842: Experimental: periodic merge of the bleeding_edge branch to the... (Closed) Base URL: http://v8.googlecode.com/svn/branches/experimental/toiger/
Patch Set: Created 11 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/jsregexp.h ('k') | src/macro-assembler-arm.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/jsregexp.cc
===================================================================
--- src/jsregexp.cc (revision 1168)
+++ src/jsregexp.cc (working copy)
@@ -298,21 +298,10 @@
return AtomExec(regexp, subject, index);
case JSRegExp::IRREGEXP: {
Handle<Object> result = IrregexpExec(regexp, subject, index);
- if (!result.is_null() || Top::has_pending_exception()) {
- return result;
- }
- // We couldn't handle the regexp using Irregexp, so fall back
- // on JSCRE.
- // Reset the JSRegExp to use JSCRE.
- JscrePrepare(regexp,
- Handle<String>(regexp->Pattern()),
- regexp->GetFlags());
- // Fall-through to JSCRE.
+ if (result.is_null()) ASSERT(Top::has_pending_exception());
+ return result;
}
case JSRegExp::JSCRE:
- if (FLAG_disable_jscre) {
- UNIMPLEMENTED();
- }
return JscreExec(regexp, subject, index);
default:
UNREACHABLE();
@@ -328,22 +317,10 @@
return AtomExecGlobal(regexp, subject);
case JSRegExp::IRREGEXP: {
Handle<Object> result = IrregexpExecGlobal(regexp, subject);
- if (!result.is_null() || Top::has_pending_exception()) {
- return result;
- }
- // Empty handle as result but no exception thrown means that
- // the regexp contains features not yet handled by the irregexp
- // compiler.
- // We have to fall back on JSCRE. Reset the JSRegExp to use JSCRE.
- JscrePrepare(regexp,
- Handle<String>(regexp->Pattern()),
- regexp->GetFlags());
- // Fall-through to JSCRE.
+ if (result.is_null()) ASSERT(Top::has_pending_exception());
+ return result;
}
case JSRegExp::JSCRE:
- if (FLAG_disable_jscre) {
- UNIMPLEMENTED();
- }
return JscreExecGlobal(regexp, subject);
default:
UNREACHABLE();
@@ -460,7 +437,7 @@
&JSREMalloc,
&JSREFree);
if (*code == NULL && (malloc_failure->IsRetryAfterGC() ||
- malloc_failure->IsOutOfMemoryFailure())) {
+ malloc_failure->IsOutOfMemoryFailure())) {
return malloc_failure;
} else {
// It doesn't matter which object we return here, we just need to return
@@ -697,7 +674,7 @@
Handle<String> pattern(re->Pattern());
StringShape shape(*pattern);
if (!pattern->IsFlat(shape)) {
- pattern->Flatten(shape);
+ FlattenString(pattern);
}
RegExpCompileData compile_data;
@@ -824,7 +801,7 @@
Handle<Object> matches;
if (!subject->IsFlat(shape)) {
- subject->Flatten(shape);
+ FlattenString(subject);
}
while (true) {
@@ -920,6 +897,7 @@
offsets_vector,
previous_index == 0);
} else { // Sequential string
+ ASSERT(StringShape(*subject).IsSequential());
Address char_address =
is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress()
: SeqTwoByteString::cast(*subject)->GetCharsAddress();
@@ -1197,7 +1175,13 @@
public:
RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii);
- int AllocateRegister() { return next_register_++; }
+ int AllocateRegister() {
+ if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {
+ reg_exp_too_big_ = true;
+ return next_register_;
+ }
+ return next_register_++;
+ }
Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler,
RegExpNode* start,
@@ -1218,6 +1202,8 @@
inline void IncrementRecursionDepth() { recursion_depth_++; }
inline void DecrementRecursionDepth() { recursion_depth_--; }
+ void SetRegExpTooBig() { reg_exp_too_big_ = true; }
+
inline bool ignore_case() { return ignore_case_; }
inline bool ascii() { return ascii_; }
@@ -1230,6 +1216,7 @@
RegExpMacroAssembler* macro_assembler_;
bool ignore_case_;
bool ascii_;
+ bool reg_exp_too_big_;
};
@@ -1244,6 +1231,18 @@
};
+static Handle<FixedArray> IrregexpRegExpTooBig(Handle<String> pattern) {
+ Handle<JSArray> array = Factory::NewJSArray(2);
+ SetElement(array, 0, pattern);
+ const char* message = "RegExp too big";
+ SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(message)));
+ Handle<Object> regexp_err =
+ Factory::NewSyntaxError("malformed_regexp", array);
+ Top::Throw(*regexp_err);
+ return Handle<FixedArray>();
+}
+
+
// Attempts to compile the regexp using an Irregexp code generator. Returns
// a fixed array or a null handle depending on whether it succeeded.
RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii)
@@ -1251,8 +1250,10 @@
work_list_(NULL),
recursion_depth_(0),
ignore_case_(ignore_case),
- ascii_(ascii) {
+ ascii_(ascii),
+ reg_exp_too_big_(false) {
accept_ = new EndNode(EndNode::ACCEPT);
+ ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister);
}
@@ -1272,17 +1273,13 @@
Label fail;
macro_assembler->PushBacktrack(&fail);
Trace new_trace;
- if (!start->Emit(this, &new_trace)) {
- fail.Unuse();
- return Handle<FixedArray>::null();
- }
+ start->Emit(this, &new_trace);
macro_assembler_->Bind(&fail);
macro_assembler_->Fail();
while (!work_list.is_empty()) {
- if (!work_list.RemoveLast()->Emit(this, &new_trace)) {
- return Handle<FixedArray>::null();
- }
+ work_list.RemoveLast()->Emit(this, &new_trace);
}
+ if (reg_exp_too_big_) return IrregexpRegExpTooBig(pattern);
Handle<FixedArray> array =
Factory::NewFixedArray(RegExpImpl::kIrregexpDataLength);
array->set(RegExpImpl::kIrregexpImplementationIndex,
@@ -1302,6 +1299,7 @@
return array;
}
+
bool Trace::DeferredAction::Mentions(int that) {
if (type() == ActionNode::CLEAR_CAPTURES) {
Interval range = static_cast<DeferredClearCaptures*>(this)->range();
@@ -1360,41 +1358,44 @@
}
-void Trace::PushAffectedRegisters(RegExpMacroAssembler* assembler,
- int max_register,
- OutSet& affected_registers) {
- // Stay safe and check every half times the limit.
- // (Round up in case the limit is 1).
- int push_limit = (assembler->stack_limit_slack() + 1) / 2;
- for (int reg = 0, pushes = 0; reg <= max_register; reg++) {
- if (affected_registers.Get(reg)) {
- pushes++;
- RegExpMacroAssembler::StackCheckFlag check_stack_limit =
- (pushes % push_limit) == 0 ?
- RegExpMacroAssembler::kCheckStackLimit :
- RegExpMacroAssembler::kNoStackLimitCheck;
- assembler->PushRegister(reg, check_stack_limit);
- }
- }
-}
-
-
void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,
int max_register,
- OutSet& affected_registers) {
+ OutSet& registers_to_pop,
+ OutSet& registers_to_clear) {
for (int reg = max_register; reg >= 0; reg--) {
- if (affected_registers.Get(reg)) assembler->PopRegister(reg);
+ if (registers_to_pop.Get(reg)) assembler->PopRegister(reg);
+ else if (registers_to_clear.Get(reg)) {
+ int clear_to = reg;
+ while (reg > 0 && registers_to_clear.Get(reg - 1)) {
+ reg--;
+ }
+ assembler->ClearRegisters(reg, clear_to);
+ }
}
}
void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
int max_register,
- OutSet& affected_registers) {
+ OutSet& affected_registers,
+ OutSet* registers_to_pop,
+ OutSet* registers_to_clear) {
+ // The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1.
+ const int push_limit = (assembler->stack_limit_slack() + 1) / 2;
+
for (int reg = 0; reg <= max_register; reg++) {
if (!affected_registers.Get(reg)) {
continue;
}
+ // Count pushes performed to force a stack limit check occasionally.
+ int pushes = 0;
+
+ // The chronologically first deferred action in the trace
+ // is used to infer the action needed to restore a register
+ // to its previous state (or not, if it's safe to ignore it).
+ enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR };
+ DeferredActionUndoType undo_action = IGNORE;
+
int value = 0;
bool absolute = false;
bool clear = false;
@@ -1409,8 +1410,16 @@
case ActionNode::SET_REGISTER: {
Trace::DeferredSetRegister* psr =
static_cast<Trace::DeferredSetRegister*>(action);
- value += psr->value();
- absolute = true;
+ if (!absolute) {
+ value += psr->value();
+ absolute = true;
+ }
+ // SET_REGISTER is currently only used for newly introduced loop
+ // counters. They can have a significant previous value if they
+ // occour in a loop. TODO(lrn): Propagate this information, so
+ // we can set undo_action to IGNORE if we know there is no value to
+ // restore.
+ undo_action = RESTORE;
ASSERT_EQ(store_position, -1);
ASSERT(!clear);
break;
@@ -1421,6 +1430,7 @@
}
ASSERT_EQ(store_position, -1);
ASSERT(!clear);
+ undo_action = RESTORE;
break;
case ActionNode::STORE_POSITION: {
Trace::DeferredCapture* pc =
@@ -1428,6 +1438,19 @@
if (!clear && store_position == -1) {
store_position = pc->cp_offset();
}
+
+ // For captures we know that stores and clears alternate.
+ // Other register, are never cleared, and if the occur
+ // inside a loop, they might be assigned more than once.
+ if (reg <= 1) {
+ // Registers zero and one, aka "capture zero", is
+ // always set correctly if we succeed. There is no
+ // need to undo a setting on backtrack, because we
+ // will set it again or fail.
+ undo_action = IGNORE;
+ } else {
+ undo_action = pc->is_capture() ? CLEAR : RESTORE;
+ }
ASSERT(!absolute);
ASSERT_EQ(value, 0);
break;
@@ -1436,8 +1459,10 @@
// Since we're scanning in reverse order, if we've already
// set the position we have to ignore historically earlier
// clearing operations.
- if (store_position == -1)
+ if (store_position == -1) {
clear = true;
+ }
+ undo_action = RESTORE;
ASSERT(!absolute);
ASSERT_EQ(value, 0);
break;
@@ -1448,10 +1473,27 @@
}
}
}
+ // Prepare for the undo-action (e.g., push if it's going to be popped).
+ if (undo_action == RESTORE) {
+ pushes++;
+ RegExpMacroAssembler::StackCheckFlag stack_check =
+ RegExpMacroAssembler::kNoStackLimitCheck;
+ if (pushes == push_limit) {
+ stack_check = RegExpMacroAssembler::kCheckStackLimit;
+ pushes = 0;
+ }
+
+ assembler->PushRegister(reg, stack_check);
+ registers_to_pop->Set(reg);
+ } else if (undo_action == CLEAR) {
+ registers_to_clear->Set(reg);
+ }
+ // Perform the chronologically last action (or accumulated increment)
+ // for the register.
if (store_position != -1) {
assembler->WriteCurrentPositionToRegister(reg, store_position);
} else if (clear) {
- assembler->ClearRegister(reg);
+ assembler->ClearRegisters(reg, reg);
} else if (absolute) {
assembler->SetRegister(reg, value);
} else if (value != 0) {
@@ -1462,9 +1504,9 @@
// This is called as we come into a loop choice node and some other tricky
-// nodes. It normalises the state of the code generator to ensure we can
+// nodes. It normalizes the state of the code generator to ensure we can
// generate generic code.
-bool Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
+void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
RegExpMacroAssembler* assembler = compiler->macro_assembler();
ASSERT(actions_ != NULL ||
@@ -1481,14 +1523,21 @@
if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_);
// Create a new trivial state and generate the node with that.
Trace new_state;
- return successor->Emit(compiler, &new_state);
+ successor->Emit(compiler, &new_state);
+ return;
}
// Generate deferred actions here along with code to undo them again.
OutSet affected_registers;
+
int max_register = FindAffectedRegisters(&affected_registers);
- PushAffectedRegisters(assembler, max_register, affected_registers);
- PerformDeferredActions(assembler, max_register, affected_registers);
+ OutSet registers_to_pop;
+ OutSet registers_to_clear;
+ PerformDeferredActions(assembler,
+ max_register,
+ affected_registers,
+ &registers_to_pop,
+ &registers_to_clear);
if (backtrack() != NULL) {
// Here we have a concrete backtrack location. These are set up by choice
// nodes and so they indicate that we have a deferred save of the current
@@ -1503,58 +1552,56 @@
Label undo;
assembler->PushBacktrack(&undo);
Trace new_state;
- bool ok = successor->Emit(compiler, &new_state);
+ successor->Emit(compiler, &new_state);
// On backtrack we need to restore state.
assembler->Bind(&undo);
- if (!ok) return false;
if (backtrack() != NULL) {
assembler->PopCurrentPosition();
}
- RestoreAffectedRegisters(assembler, max_register, affected_registers);
+ RestoreAffectedRegisters(assembler,
+ max_register,
+ registers_to_pop,
+ registers_to_clear);
if (backtrack() == NULL) {
assembler->Backtrack();
} else {
assembler->GoTo(backtrack());
}
-
- return true;
}
-void EndNode::EmitInfoChecks(RegExpMacroAssembler* assembler, Trace* trace) {
- if (info()->at_end) {
- Label succeed;
- // LoadCurrentCharacter will go to the label if we are at the end of the
- // input string.
- assembler->LoadCurrentCharacter(0, &succeed);
- assembler->GoTo(trace->backtrack());
- assembler->Bind(&succeed);
- }
-}
+void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) {
+ RegExpMacroAssembler* assembler = compiler->macro_assembler();
+ // Omit flushing the trace. We discard the entire stack frame anyway.
-bool NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) {
- if (!trace->is_trivial()) {
- return trace->Flush(compiler, this);
- }
- RegExpMacroAssembler* assembler = compiler->macro_assembler();
if (!label()->is_bound()) {
+ // We are completely independent of the trace, since we ignore it,
+ // so this code can be used as the generic version.
assembler->Bind(label());
}
- EmitInfoChecks(assembler, trace);
+
+ // Throw away everything on the backtrack stack since the start
+ // of the negative submatch and restore the character position.
assembler->ReadCurrentPositionFromRegister(current_position_register_);
assembler->ReadStackPointerFromRegister(stack_pointer_register_);
+ if (clear_capture_count_ > 0) {
+ // Clear any captures that might have been performed during the success
+ // of the body of the negative look-ahead.
+ int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1;
+ assembler->ClearRegisters(clear_capture_start_, clear_capture_end);
+ }
// Now that we have unwound the stack we find at the top of the stack the
// backtrack that the BeginSubmatch node got.
assembler->Backtrack();
- return true;
}
-bool EndNode::Emit(RegExpCompiler* compiler, Trace* trace) {
+void EndNode::Emit(RegExpCompiler* compiler, Trace* trace) {
if (!trace->is_trivial()) {
- return trace->Flush(compiler, this);
+ trace->Flush(compiler, this);
+ return;
}
RegExpMacroAssembler* assembler = compiler->macro_assembler();
if (!label()->is_bound()) {
@@ -1562,19 +1609,16 @@
}
switch (action_) {
case ACCEPT:
- EmitInfoChecks(assembler, trace);
assembler->Succeed();
- return true;
+ return;
case BACKTRACK:
- ASSERT(!info()->at_end);
assembler->GoTo(trace->backtrack());
- return true;
+ return;
case NEGATIVE_SUBMATCH_SUCCESS:
// This case is handled in a different virtual method.
UNREACHABLE();
}
UNIMPLEMENTED();
- return false;
}
@@ -1602,9 +1646,12 @@
}
-ActionNode* ActionNode::StorePosition(int reg, RegExpNode* on_success) {
+ActionNode* ActionNode::StorePosition(int reg,
+ bool is_capture,
+ RegExpNode* on_success) {
ActionNode* result = new ActionNode(STORE_POSITION, on_success);
result->data_.u_position_register.reg = reg;
+ result->data_.u_position_register.is_capture = is_capture;
return result;
}
@@ -1630,10 +1677,14 @@
ActionNode* ActionNode::PositiveSubmatchSuccess(int stack_reg,
int position_reg,
+ int clear_register_count,
+ int clear_register_from,
RegExpNode* on_success) {
ActionNode* result = new ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success);
result->data_.u_submatch.stack_pointer_register = stack_reg;
result->data_.u_submatch.current_position_register = position_reg;
+ result->data_.u_submatch.clear_register_count = clear_register_count;
+ result->data_.u_submatch.clear_register_from = clear_register_from;
return result;
}
@@ -1849,6 +1900,9 @@
// ASCII optimizations for us.
macro_assembler->GoTo(on_failure);
}
+ if (check_offset) {
+ macro_assembler->CheckPosition(cp_offset, on_failure);
+ }
return;
}
@@ -1856,10 +1910,8 @@
!cc->is_negated() &&
ranges->at(0).IsEverything(max_char)) {
// This is a common case hit by non-anchored expressions.
- // TODO(erikcorry): We should have a macro assembler instruction that just
- // checks for end of string without loading the character.
if (check_offset) {
- macro_assembler->LoadCurrentCharacter(cp_offset, on_failure);
+ macro_assembler->CheckPosition(cp_offset, on_failure);
}
return;
}
@@ -1935,13 +1987,6 @@
RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
Trace* trace) {
- // TODO(erikcorry): Implement support.
- if (info_.follows_word_interest ||
- info_.follows_newline_interest ||
- info_.follows_start_interest) {
- return FAIL;
- }
-
// If we are generating a greedy loop then don't stop and don't reuse code.
if (trace->stop_node() != NULL) {
return CONTINUE;
@@ -1978,27 +2023,62 @@
// If we get here code has been generated for this node too many times or
// recursion is too deep. Time to switch to a generic version. The code for
// generic versions above can handle deep recursion properly.
- bool ok = trace->Flush(compiler, this);
- return ok ? DONE : FAIL;
+ trace->Flush(compiler, this);
+ return DONE;
}
-int ActionNode::EatsAtLeast(int recursion_depth) {
+int ActionNode::EatsAtLeast(int still_to_find, int recursion_depth) {
if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
if (type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input!
- return on_success()->EatsAtLeast(recursion_depth + 1);
+ return on_success()->EatsAtLeast(still_to_find, recursion_depth + 1);
}
-int TextNode::EatsAtLeast(int recursion_depth) {
+int AssertionNode::EatsAtLeast(int still_to_find, int recursion_depth) {
+ if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
+ return on_success()->EatsAtLeast(still_to_find, recursion_depth + 1);
+}
+
+
+int BackReferenceNode::EatsAtLeast(int still_to_find, int recursion_depth) {
+ if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
+ return on_success()->EatsAtLeast(still_to_find, recursion_depth + 1);
+}
+
+
+int TextNode::EatsAtLeast(int still_to_find, int recursion_depth) {
int answer = Length();
- if (answer >= 4) return answer;
+ if (answer >= still_to_find) return answer;
if (recursion_depth > RegExpCompiler::kMaxRecursion) return answer;
- return answer + on_success()->EatsAtLeast(recursion_depth + 1);
+ return answer + on_success()->EatsAtLeast(still_to_find - answer,
+ recursion_depth + 1);
}
-int ChoiceNode::EatsAtLeastHelper(int recursion_depth,
+int NegativeLookaheadChoiceNode:: EatsAtLeast(int still_to_find,
+ int recursion_depth) {
+ if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
+ // Alternative 0 is the negative lookahead, alternative 1 is what comes
+ // afterwards.
+ RegExpNode* node = alternatives_->at(1).node();
+ return node->EatsAtLeast(still_to_find, recursion_depth + 1);
+}
+
+
+void NegativeLookaheadChoiceNode::GetQuickCheckDetails(
+ QuickCheckDetails* details,
+ RegExpCompiler* compiler,
+ int filled_in) {
+ // Alternative 0 is the negative lookahead, alternative 1 is what comes
+ // afterwards.
+ RegExpNode* node = alternatives_->at(1).node();
+ return node->GetQuickCheckDetails(details, compiler, filled_in);
+}
+
+
+int ChoiceNode::EatsAtLeastHelper(int still_to_find,
+ int recursion_depth,
RegExpNode* ignore_this_node) {
if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
int min = 100;
@@ -2006,20 +2086,21 @@
for (int i = 0; i < choice_count; i++) {
RegExpNode* node = alternatives_->at(i).node();
if (node == ignore_this_node) continue;
- int node_eats_at_least = node->EatsAtLeast(recursion_depth + 1);
+ int node_eats_at_least = node->EatsAtLeast(still_to_find,
+ recursion_depth + 1);
if (node_eats_at_least < min) min = node_eats_at_least;
}
return min;
}
-int LoopChoiceNode::EatsAtLeast(int recursion_depth) {
- return EatsAtLeastHelper(recursion_depth, loop_node_);
+int LoopChoiceNode::EatsAtLeast(int still_to_find, int recursion_depth) {
+ return EatsAtLeastHelper(still_to_find, recursion_depth, loop_node_);
}
-int ChoiceNode::EatsAtLeast(int recursion_depth) {
- return EatsAtLeastHelper(recursion_depth, NULL);
+int ChoiceNode::EatsAtLeast(int still_to_find, int recursion_depth) {
+ return EatsAtLeastHelper(still_to_find, recursion_depth, NULL);
}
@@ -2257,7 +2338,7 @@
void QuickCheckDetails::Advance(int by, bool ascii) {
- ASSERT(by > 0);
+ ASSERT(by >= 0);
if (by >= characters_) {
Clear();
return;
@@ -2342,6 +2423,150 @@
}
+// Check for [0-9A-Z_a-z].
+static void EmitWordCheck(RegExpMacroAssembler* assembler,
+ Label* word,
+ Label* non_word,
+ bool fall_through_on_word) {
+ assembler->CheckCharacterGT('z', non_word);
+ assembler->CheckCharacterLT('0', non_word);
+ assembler->CheckCharacterGT('a' - 1, word);
+ assembler->CheckCharacterLT('9' + 1, word);
+ assembler->CheckCharacterLT('A', non_word);
+ assembler->CheckCharacterLT('Z' + 1, word);
+ if (fall_through_on_word) {
+ assembler->CheckNotCharacter('_', non_word);
+ } else {
+ assembler->CheckCharacter('_', word);
+ }
+}
+
+
+// Emit the code to check for a ^ in multiline mode (1-character lookbehind
+// that matches newline or the start of input).
+static void EmitHat(RegExpCompiler* compiler,
+ RegExpNode* on_success,
+ Trace* trace) {
+ RegExpMacroAssembler* assembler = compiler->macro_assembler();
+ // We will be loading the previous character into the current character
+ // register.
+ Trace new_trace(*trace);
+ new_trace.InvalidateCurrentCharacter();
+
+ Label ok;
+ if (new_trace.cp_offset() == 0) {
+ // The start of input counts as a newline in this context, so skip to
+ // ok if we are at the start.
+ assembler->CheckAtStart(&ok);
+ }
+ // We already checked that we are not at the start of input so it must be
+ // OK to load the previous character.
+ assembler->LoadCurrentCharacter(new_trace.cp_offset() -1,
+ new_trace.backtrack(),
+ false);
+ // Newline means \n, \r, 0x2028 or 0x2029.
+ if (!compiler->ascii()) {
+ assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);
+ }
+ assembler->CheckCharacter('\n', &ok);
+ assembler->CheckNotCharacter('\r', new_trace.backtrack());
+ assembler->Bind(&ok);
+ on_success->Emit(compiler, &new_trace);
+}
+
+
+// Emit the code to handle \b and \B (word-boundary or non-word-boundary).
+static void EmitBoundaryCheck(AssertionNode::AssertionNodeType type,
+ RegExpCompiler* compiler,
+ RegExpNode* on_success,
+ Trace* trace) {
+ RegExpMacroAssembler* assembler = compiler->macro_assembler();
+ Label before_non_word;
+ Label before_word;
+ if (trace->characters_preloaded() != 1) {
+ assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);
+ }
+ // Fall through on non-word.
+ EmitWordCheck(assembler, &before_word, &before_non_word, false);
+
+ // We will be loading the previous character into the current character
+ // register.
+ Trace new_trace(*trace);
+ new_trace.InvalidateCurrentCharacter();
+
+ Label ok;
+ Label* boundary;
+ Label* not_boundary;
+ if (type == AssertionNode::AT_BOUNDARY) {
+ boundary = &ok;
+ not_boundary = new_trace.backtrack();
+ } else {
+ not_boundary = &ok;
+ boundary = new_trace.backtrack();
+ }
+
+ // Next character is not a word character.
+ assembler->Bind(&before_non_word);
+ if (new_trace.cp_offset() == 0) {
+ // The start of input counts as a non-word character, so the question is
+ // decided if we are at the start.
+ assembler->CheckAtStart(not_boundary);
+ }
+ // We already checked that we are not at the start of input so it must be
+ // OK to load the previous character.
+ assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1,
+ &ok, // Unused dummy label in this call.
+ false);
+ // Fall through on non-word.
+ EmitWordCheck(assembler, boundary, not_boundary, false);
+ assembler->GoTo(not_boundary);
+
+ // Next character is a word character.
+ assembler->Bind(&before_word);
+ if (new_trace.cp_offset() == 0) {
+ // The start of input counts as a non-word character, so the question is
+ // decided if we are at the start.
+ assembler->CheckAtStart(boundary);
+ }
+ // We already checked that we are not at the start of input so it must be
+ // OK to load the previous character.
+ assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1,
+ &ok, // Unused dummy label in this call.
+ false);
+ bool fall_through_on_word = (type == AssertionNode::AT_NON_BOUNDARY);
+ EmitWordCheck(assembler, not_boundary, boundary, fall_through_on_word);
+
+ assembler->Bind(&ok);
+
+ on_success->Emit(compiler, &new_trace);
+}
+
+
+void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
+ RegExpMacroAssembler* assembler = compiler->macro_assembler();
+ switch (type_) {
+ case AT_END: {
+ Label ok;
+ assembler->CheckPosition(trace->cp_offset(), &ok);
+ assembler->GoTo(trace->backtrack());
+ assembler->Bind(&ok);
+ break;
+ }
+ case AT_START:
+ assembler->CheckNotAtStart(trace->backtrack());
+ break;
+ case AFTER_NEWLINE:
+ EmitHat(compiler, on_success(), trace);
+ return;
+ case AT_NON_BOUNDARY:
+ case AT_BOUNDARY:
+ EmitBoundaryCheck(type_, compiler, on_success(), trace);
+ return;
+ }
+ on_success()->Emit(compiler, trace);
+}
+
+
// We call this repeatedly to generate code for each pass over the text node.
// The passes are in increasing order of difficulty because we hope one
// of the first passes will fail in which case we are saved the work of the
@@ -2481,23 +2706,16 @@
// pass from left to right. Instead we pass over the text node several times,
// emitting code for some character positions every time. See the comment on
// TextEmitPass for details.
-bool TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
+void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
LimitResult limit_result = LimitVersions(compiler, trace);
- if (limit_result == FAIL) return false;
- if (limit_result == DONE) return true;
+ if (limit_result == DONE) return;
ASSERT(limit_result == CONTINUE);
- if (info()->follows_word_interest ||
- info()->follows_newline_interest ||
- info()->follows_start_interest) {
- return false;
+ if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) {
+ compiler->SetRegExpTooBig();
+ return;
}
- if (info()->at_end) {
- compiler->macro_assembler()->GoTo(trace->backtrack());
- return true;
- }
-
if (compiler->ascii()) {
int dummy = 0;
TextEmitPass(compiler, NON_ASCII_MATCH, false, trace, false, &dummy);
@@ -2555,13 +2773,18 @@
&bound_checked_to);
Trace successor_trace(*trace);
- successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler->ascii());
+ successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler);
RecursionCheck rc(compiler);
- return on_success()->Emit(compiler, &successor_trace);
+ on_success()->Emit(compiler, &successor_trace);
}
-void Trace::AdvanceCurrentPositionInTrace(int by, bool ascii) {
+void Trace::InvalidateCurrentCharacter() {
+ characters_preloaded_ = 0;
+}
+
+
+void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {
ASSERT(by > 0);
// We don't have an instruction for shifting the current character register
// down or for using a shifted value for anything so lets just forget that
@@ -2570,8 +2793,12 @@
// Adjust the offsets of the quick check performed information. This
// information is used to find out what we already determined about the
// characters by means of mask and compare.
- quick_check_performed_.Advance(by, ascii);
+ quick_check_performed_.Advance(by, compiler->ascii());
cp_offset_ += by;
+ if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {
+ compiler->SetRegExpTooBig();
+ cp_offset_ = 0;
+ }
bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by);
}
@@ -2616,12 +2843,6 @@
if (recursion_depth++ > RegExpCompiler::kMaxRecursion) {
return kNodeIsTooComplexForGreedyLoops;
}
- NodeInfo* info = node->info();
- if (info->follows_word_interest ||
- info->follows_newline_interest ||
- info->follows_start_interest) {
- return kNodeIsTooComplexForGreedyLoops;
- }
int node_length = node->GreedyLoopTextLength();
if (node_length == kNodeIsTooComplexForGreedyLoops) {
return kNodeIsTooComplexForGreedyLoops;
@@ -2648,7 +2869,7 @@
}
-bool LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
+void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
if (trace->stop_node() == this) {
int text_length = GreedyLoopTextLength(&(alternatives_->at(0)));
@@ -2658,18 +2879,19 @@
ASSERT(trace->cp_offset() == text_length);
macro_assembler->AdvanceCurrentPosition(text_length);
macro_assembler->GoTo(trace->loop_label());
- return true;
+ return;
}
ASSERT(trace->stop_node() == NULL);
if (!trace->is_trivial()) {
- return trace->Flush(compiler, this);
+ trace->Flush(compiler, this);
+ return;
}
- return ChoiceNode::Emit(compiler, trace);
+ ChoiceNode::Emit(compiler, trace);
}
int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler) {
- int preload_characters = EatsAtLeast(0);
+ int preload_characters = EatsAtLeast(4, 0);
#ifdef CAN_READ_UNALIGNED
bool ascii = compiler->ascii();
if (ascii) {
@@ -2817,7 +3039,7 @@
*/
-bool ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
+void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
int choice_count = alternatives_->length();
#ifdef DEBUG
@@ -2832,8 +3054,7 @@
#endif
LimitResult limit_result = LimitVersions(compiler, trace);
- if (limit_result == DONE) return true;
- if (limit_result == FAIL) return false;
+ if (limit_result == DONE) return;
ASSERT(limit_result == CONTINUE);
RecursionCheck rc(compiler);
@@ -2864,13 +3085,8 @@
macro_assembler->Bind(&loop_label);
greedy_match_trace.set_stop_node(this);
greedy_match_trace.set_loop_label(&loop_label);
- bool ok = alternatives_->at(0).node()->Emit(compiler,
- &greedy_match_trace);
+ alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);
macro_assembler->Bind(&greedy_match_failed);
- if (!ok) {
- greedy_loop_label.Unuse();
- return false;
- }
}
Label second_choice; // For use in greedy matches.
@@ -2903,7 +3119,8 @@
new_trace.quick_check_performed()->Clear();
alt_gen->expects_preload = preload_is_current;
bool generate_full_check_inline = false;
- if (alternative.node()->EmitQuickCheck(compiler,
+ if (try_to_emit_quick_check_for_alternative(i) &&
+ alternative.node()->EmitQuickCheck(compiler,
&new_trace,
preload_has_checked_bounds,
&alt_gen->possible_success,
@@ -2941,10 +3158,7 @@
for (int j = 0; j < guard_count; j++) {
GenerateGuard(macro_assembler, guards->at(j), &new_trace);
}
- if (!alternative.node()->Emit(compiler, &new_trace)) {
- greedy_loop_label.Unuse();
- return false;
- }
+ alternative.node()->Emit(compiler, &new_trace);
preload_is_current = false;
}
macro_assembler->Bind(&alt_gen->after);
@@ -2962,26 +3176,23 @@
// label was bound.
for (int i = first_normal_choice; i < choice_count - 1; i++) {
AlternativeGeneration* alt_gen = alt_gens.at(i);
- if (!EmitOutOfLineContinuation(compiler,
- current_trace,
- alternatives_->at(i),
- alt_gen,
- preload_characters,
- alt_gens.at(i + 1)->expects_preload)) {
- return false;
- }
+ EmitOutOfLineContinuation(compiler,
+ current_trace,
+ alternatives_->at(i),
+ alt_gen,
+ preload_characters,
+ alt_gens.at(i + 1)->expects_preload);
}
- return true;
}
-bool ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,
+void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,
Trace* trace,
GuardedAlternative alternative,
AlternativeGeneration* alt_gen,
int preload_characters,
bool next_expects_preload) {
- if (!alt_gen->possible_success.is_linked()) return true;
+ if (!alt_gen->possible_success.is_linked()) return;
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
macro_assembler->Bind(&alt_gen->possible_success);
@@ -2996,7 +3207,7 @@
for (int j = 0; j < guard_count; j++) {
GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);
}
- bool ok = alternative.node()->Emit(compiler, &out_of_line_trace);
+ alternative.node()->Emit(compiler, &out_of_line_trace);
macro_assembler->Bind(&reload_current_char);
// Reload the current character, since the next quick check expects that.
// We don't need to check bounds here because we only get into this
@@ -3006,22 +3217,20 @@
false,
preload_characters);
macro_assembler->GoTo(&(alt_gen->after));
- return ok;
} else {
out_of_line_trace.set_backtrack(&(alt_gen->after));
for (int j = 0; j < guard_count; j++) {
GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);
}
- return alternative.node()->Emit(compiler, &out_of_line_trace);
+ alternative.node()->Emit(compiler, &out_of_line_trace);
}
}
-bool ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
+void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
RegExpMacroAssembler* assembler = compiler->macro_assembler();
LimitResult limit_result = LimitVersions(compiler, trace);
- if (limit_result == DONE) return true;
- if (limit_result == FAIL) return false;
+ if (limit_result == DONE) return;
ASSERT(limit_result == CONTINUE);
RecursionCheck rc(compiler);
@@ -3029,24 +3238,29 @@
switch (type_) {
case STORE_POSITION: {
Trace::DeferredCapture
- new_capture(data_.u_position_register.reg, trace);
+ new_capture(data_.u_position_register.reg,
+ data_.u_position_register.is_capture,
+ trace);
Trace new_trace = *trace;
new_trace.add_action(&new_capture);
- return on_success()->Emit(compiler, &new_trace);
+ on_success()->Emit(compiler, &new_trace);
+ break;
}
case INCREMENT_REGISTER: {
Trace::DeferredIncrementRegister
new_increment(data_.u_increment_register.reg);
Trace new_trace = *trace;
new_trace.add_action(&new_increment);
- return on_success()->Emit(compiler, &new_trace);
+ on_success()->Emit(compiler, &new_trace);
+ break;
}
case SET_REGISTER: {
Trace::DeferredSetRegister
new_set(data_.u_store_register.reg, data_.u_store_register.value);
Trace new_trace = *trace;
new_trace.add_action(&new_set);
- return on_success()->Emit(compiler, &new_trace);
+ on_success()->Emit(compiler, &new_trace);
+ break;
}
case CLEAR_CAPTURES: {
Trace::DeferredClearCaptures
@@ -3054,15 +3268,20 @@
data_.u_clear_captures.range_to));
Trace new_trace = *trace;
new_trace.add_action(&new_capture);
- return on_success()->Emit(compiler, &new_trace);
+ on_success()->Emit(compiler, &new_trace);
+ break;
}
case BEGIN_SUBMATCH:
- if (!trace->is_trivial()) return trace->Flush(compiler, this);
- assembler->WriteCurrentPositionToRegister(
- data_.u_submatch.current_position_register, 0);
- assembler->WriteStackPointerToRegister(
- data_.u_submatch.stack_pointer_register);
- return on_success()->Emit(compiler, trace);
+ if (!trace->is_trivial()) {
+ trace->Flush(compiler, this);
+ } else {
+ assembler->WriteCurrentPositionToRegister(
+ data_.u_submatch.current_position_register, 0);
+ assembler->WriteStackPointerToRegister(
+ data_.u_submatch.stack_pointer_register);
+ on_success()->Emit(compiler, trace);
+ }
+ break;
case EMPTY_MATCH_CHECK: {
int start_pos_reg = data_.u_empty_match_check.start_register;
int stored_pos = 0;
@@ -3073,84 +3292,84 @@
// If we know we haven't advanced and there is no minimum we
// can just backtrack immediately.
assembler->GoTo(trace->backtrack());
- return true;
} else if (know_dist && stored_pos < trace->cp_offset()) {
// If we know we've advanced we can generate the continuation
// immediately.
- return on_success()->Emit(compiler, trace);
+ on_success()->Emit(compiler, trace);
+ } else if (!trace->is_trivial()) {
+ trace->Flush(compiler, this);
+ } else {
+ Label skip_empty_check;
+ // If we have a minimum number of repetitions we check the current
+ // number first and skip the empty check if it's not enough.
+ if (has_minimum) {
+ int limit = data_.u_empty_match_check.repetition_limit;
+ assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check);
+ }
+ // If the match is empty we bail out, otherwise we fall through
+ // to the on-success continuation.
+ assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register,
+ trace->backtrack());
+ assembler->Bind(&skip_empty_check);
+ on_success()->Emit(compiler, trace);
}
- if (!trace->is_trivial()) return trace->Flush(compiler, this);
- Label skip_empty_check;
- // If we have a minimum number of repetitions we check the current
- // number first and skip the empty check if it's not enough.
- if (has_minimum) {
- int limit = data_.u_empty_match_check.repetition_limit;
- assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check);
- }
- // If the match is empty we bail out, otherwise we fall through
- // to the on-success continuation.
- assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register,
- trace->backtrack());
- assembler->Bind(&skip_empty_check);
- return on_success()->Emit(compiler, trace);
+ break;
}
- case POSITIVE_SUBMATCH_SUCCESS:
- if (!trace->is_trivial()) return trace->Flush(compiler, this);
- // TODO(erikcorry): Implement support.
- if (info()->follows_word_interest ||
- info()->follows_newline_interest ||
- info()->follows_start_interest) {
- return false;
+ case POSITIVE_SUBMATCH_SUCCESS: {
+ if (!trace->is_trivial()) {
+ trace->Flush(compiler, this);
+ return;
}
- if (info()->at_end) {
- Label at_end;
- // Load current character jumps to the label if we are beyond the string
- // end.
- assembler->LoadCurrentCharacter(0, &at_end);
- assembler->GoTo(trace->backtrack());
- assembler->Bind(&at_end);
- }
assembler->ReadCurrentPositionFromRegister(
data_.u_submatch.current_position_register);
assembler->ReadStackPointerFromRegister(
data_.u_submatch.stack_pointer_register);
- return on_success()->Emit(compiler, trace);
+ int clear_register_count = data_.u_submatch.clear_register_count;
+ if (clear_register_count == 0) {
+ on_success()->Emit(compiler, trace);
+ return;
+ }
+ int clear_registers_from = data_.u_submatch.clear_register_from;
+ Label clear_registers_backtrack;
+ Trace new_trace = *trace;
+ new_trace.set_backtrack(&clear_registers_backtrack);
+ on_success()->Emit(compiler, &new_trace);
+
+ assembler->Bind(&clear_registers_backtrack);
+ int clear_registers_to = clear_registers_from + clear_register_count - 1;
+ assembler->ClearRegisters(clear_registers_from, clear_registers_to);
+
+ ASSERT(trace->backtrack() == NULL);
+ assembler->Backtrack();
+ return;
+ }
default:
UNREACHABLE();
- return false;
}
}
-bool BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
+void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
RegExpMacroAssembler* assembler = compiler->macro_assembler();
if (!trace->is_trivial()) {
- return trace->Flush(compiler, this);
+ trace->Flush(compiler, this);
+ return;
}
LimitResult limit_result = LimitVersions(compiler, trace);
- if (limit_result == DONE) return true;
- if (limit_result == FAIL) return false;
+ if (limit_result == DONE) return;
ASSERT(limit_result == CONTINUE);
RecursionCheck rc(compiler);
ASSERT_EQ(start_reg_ + 1, end_reg_);
- if (info()->at_end) {
- // If we are constrained to match at the end of the input then succeed
- // iff the back reference is empty.
- assembler->CheckNotRegistersEqual(start_reg_,
- end_reg_,
- trace->backtrack());
+ if (compiler->ignore_case()) {
+ assembler->CheckNotBackReferenceIgnoreCase(start_reg_,
+ trace->backtrack());
} else {
- if (compiler->ignore_case()) {
- assembler->CheckNotBackReferenceIgnoreCase(start_reg_,
- trace->backtrack());
- } else {
- assembler->CheckNotBackReference(start_reg_, trace->backtrack());
- }
+ assembler->CheckNotBackReference(start_reg_, trace->backtrack());
}
- return on_success()->Emit(compiler, trace);
+ on_success()->Emit(compiler, trace);
}
@@ -3389,6 +3608,33 @@
}
+void DotPrinter::VisitAssertion(AssertionNode* that) {
+ stream()->Add(" n%p [", that);
+ switch (that->type()) {
+ case AssertionNode::AT_END:
+ stream()->Add("label=\"$\", shape=septagon");
+ break;
+ case AssertionNode::AT_START:
+ stream()->Add("label=\"^\", shape=septagon");
+ break;
+ case AssertionNode::AT_BOUNDARY:
+ stream()->Add("label=\"\\b\", shape=septagon");
+ break;
+ case AssertionNode::AT_NON_BOUNDARY:
+ stream()->Add("label=\"\\B\", shape=septagon");
+ break;
+ case AssertionNode::AFTER_NEWLINE:
+ stream()->Add("label=\"(?<=\\n)\", shape=septagon");
+ break;
+ }
+ stream()->Add("];\n");
+ PrintAttributes(that);
+ RegExpNode* successor = that->on_success();
+ stream()->Add(" n%p -> n%p;\n", that, successor);
+ Visit(successor);
+}
+
+
void DotPrinter::VisitAction(ActionNode* that) {
stream()->Add(" n%p [", that);
switch (that->type_) {
@@ -3713,7 +3959,7 @@
if (body_can_be_empty) {
// If the body can be empty we need to store the start position
// so we can bail out if it was empty.
- body_node = ActionNode::StorePosition(body_start_reg, body_node);
+ body_node = ActionNode::StorePosition(body_start_reg, false, body_node);
}
if (needs_capture_clearing) {
// Before entering the body of this loop we need to clear captures.
@@ -3749,22 +3995,51 @@
NodeInfo info;
switch (type()) {
case START_OF_LINE:
- info.follows_newline_interest = true;
- break;
+ return AssertionNode::AfterNewline(on_success);
case START_OF_INPUT:
- info.follows_start_interest = true;
- break;
- case BOUNDARY: case NON_BOUNDARY:
- info.follows_word_interest = true;
- break;
+ return AssertionNode::AtStart(on_success);
+ case BOUNDARY:
+ return AssertionNode::AtBoundary(on_success);
+ case NON_BOUNDARY:
+ return AssertionNode::AtNonBoundary(on_success);
case END_OF_INPUT:
- info.at_end = true;
- break;
- case END_OF_LINE:
- // This is wrong but has the effect of making the compiler abort.
- info.at_end = true;
+ return AssertionNode::AtEnd(on_success);
+ case END_OF_LINE: {
+ // Compile $ in multiline regexps as an alternation with a positive
+ // lookahead in one side and an end-of-input on the other side.
+ // We need two registers for the lookahead.
+ int stack_pointer_register = compiler->AllocateRegister();
+ int position_register = compiler->AllocateRegister();
+ // The ChoiceNode to distinguish between a newline and end-of-input.
+ ChoiceNode* result = new ChoiceNode(2);
+ // Create a newline atom.
+ ZoneList<CharacterRange>* newline_ranges =
+ new ZoneList<CharacterRange>(3);
+ CharacterRange::AddClassEscape('n', newline_ranges);
+ RegExpCharacterClass* newline_atom = new RegExpCharacterClass('n');
+ TextNode* newline_matcher = new TextNode(
+ newline_atom,
+ ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
+ position_register,
+ 0, // No captures inside.
+ -1, // Ignored if no captures.
+ on_success));
+ // Create an end-of-input matcher.
+ RegExpNode* end_of_line = ActionNode::BeginSubmatch(
+ stack_pointer_register,
+ position_register,
+ newline_matcher);
+ // Add the two alternatives to the ChoiceNode.
+ GuardedAlternative eol_alternative(end_of_line);
+ result->AddAlternative(eol_alternative);
+ GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success));
+ result->AddAlternative(end_alternative);
+ return result;
+ }
+ default:
+ UNREACHABLE();
}
- return on_success->PropagateForward(&info);
+ return on_success;
}
@@ -3786,16 +4061,26 @@
RegExpNode* on_success) {
int stack_pointer_register = compiler->AllocateRegister();
int position_register = compiler->AllocateRegister();
+
+ const int registers_per_capture = 2;
+ const int register_of_first_capture = 2;
+ int register_count = capture_count_ * registers_per_capture;
+ int register_start =
+ register_of_first_capture + capture_from_ * registers_per_capture;
+
RegExpNode* success;
if (is_positive()) {
- return ActionNode::BeginSubmatch(
+ RegExpNode* node = ActionNode::BeginSubmatch(
stack_pointer_register,
position_register,
body()->ToNode(
compiler,
ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
position_register,
+ register_count,
+ register_start,
on_success)));
+ return node;
} else {
// We use a ChoiceNode for a negative lookahead because it has most of
// the characteristics we need. It has the body of the lookahead as its
@@ -3804,19 +4089,19 @@
// NegativeSubmatchSuccess will unwind the stack including everything the
// choice node set up and backtrack. If the first alternative fails then
// the second alternative is tried, which is exactly the desired result
- // for a negative lookahead. In the case where the dispatch table
- // determines that the first alternative cannot match we will save time
- // by not trying it. Things are not quite so well-optimized if the
- // dispatch table determines that the second alternative cannot match.
- // In this case we could optimize by immediately backtracking.
- ChoiceNode* choice_node = new ChoiceNode(2);
+ // for a negative lookahead. The NegativeLookaheadChoiceNode is a special
+ // ChoiceNode that knows to ignore the first exit when calculating quick
+ // checks.
GuardedAlternative body_alt(
body()->ToNode(
compiler,
success = new NegativeSubmatchSuccess(stack_pointer_register,
- position_register)));
- choice_node->AddAlternative(body_alt);
- choice_node->AddAlternative(GuardedAlternative(on_success));
+ position_register,
+ register_count,
+ register_start)));
+ ChoiceNode* choice_node =
+ new NegativeLookaheadChoiceNode(body_alt,
+ GuardedAlternative(on_success));
return ActionNode::BeginSubmatch(stack_pointer_register,
position_register,
choice_node);
@@ -3836,9 +4121,9 @@
RegExpNode* on_success) {
int start_reg = RegExpCapture::StartRegister(index);
int end_reg = RegExpCapture::EndRegister(index);
- RegExpNode* store_end = ActionNode::StorePosition(end_reg, on_success);
+ RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);
RegExpNode* body_node = body->ToNode(compiler, store_end);
- return ActionNode::StorePosition(start_reg, body_node);
+ return ActionNode::StorePosition(start_reg, true, body_node);
}
@@ -3911,6 +4196,13 @@
case '*':
ranges->Add(CharacterRange::Everything());
break;
+ // This is the set of characters matched by the $ and ^ symbols
+ // in multiline mode.
+ case 'n':
+ AddClass(kLineTerminatorRanges,
+ kLineTerminatorRangeCount,
+ ranges);
+ break;
default:
UNREACHABLE();
}
@@ -4096,62 +4388,6 @@
}
-RegExpNode* ActionNode::PropagateForward(NodeInfo* info) {
- NodeInfo full_info(*this->info());
- full_info.AddFromPreceding(info);
- bool cloned = false;
- ActionNode* action = EnsureSibling(this, &full_info, &cloned);
- action->set_on_success(action->on_success()->PropagateForward(info));
- return action;
-}
-
-
-RegExpNode* ChoiceNode::PropagateForward(NodeInfo* info) {
- NodeInfo full_info(*this->info());
- full_info.AddFromPreceding(info);
- bool cloned = false;
- ChoiceNode* choice = EnsureSibling(this, &full_info, &cloned);
- if (cloned) {
- ZoneList<GuardedAlternative>* old_alternatives = alternatives();
- int count = old_alternatives->length();
- choice->alternatives_ = new ZoneList<GuardedAlternative>(count);
- for (int i = 0; i < count; i++) {
- GuardedAlternative alternative = old_alternatives->at(i);
- alternative.set_node(alternative.node()->PropagateForward(info));
- choice->alternatives()->Add(alternative);
- }
- }
- return choice;
-}
-
-
-RegExpNode* EndNode::PropagateForward(NodeInfo* info) {
- return PropagateToEndpoint(this, info);
-}
-
-
-RegExpNode* BackReferenceNode::PropagateForward(NodeInfo* info) {
- NodeInfo full_info(*this->info());
- full_info.AddFromPreceding(info);
- bool cloned = false;
- BackReferenceNode* back_ref = EnsureSibling(this, &full_info, &cloned);
- if (cloned) {
- // TODO(erikcorry): A back reference has to have two successors (by default
- // the same node). The first is used if the back reference matches a non-
- // empty back reference, the second if it matches an empty one. This
- // doesn't matter for at_end, which is the only one implemented right now,
- // but it will matter for other pieces of info.
- back_ref->set_on_success(back_ref->on_success()->PropagateForward(info));
- }
- return back_ref;
-}
-
-
-RegExpNode* TextNode::PropagateForward(NodeInfo* info) {
- return PropagateToEndpoint(this, info);
-}
-
-
// -------------------------------------------------------------------
// Splay tree
@@ -4389,6 +4625,11 @@
}
+void Analysis::VisitAssertion(AssertionNode* that) {
+ EnsureAnalyzed(that->on_success());
+}
+
+
// -------------------------------------------------------------------
// Dispatch table construction
@@ -4441,7 +4682,13 @@
}
+void DispatchTableConstructor::VisitAssertion(AssertionNode* that) {
+ RegExpNode* target = that->on_success();
+ target->Accept(this);
+}
+
+
static int CompareRangeByFrom(const CharacterRange* a,
const CharacterRange* b) {
return Compare<uc16>(a->from(), b->from());
@@ -4504,33 +4751,32 @@
bool is_multiline,
Handle<String> pattern,
bool is_ascii) {
+ if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
+ return IrregexpRegExpTooBig(pattern);
+ }
RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii);
// Wrap the body of the regexp in capture #0.
RegExpNode* captured_body = RegExpCapture::ToNode(data->tree,
0,
&compiler,
compiler.accept());
- // Add a .*? at the beginning, outside the body capture.
- // Note: We could choose to not add this if the regexp is anchored at
- // the start of the input but I'm not sure how best to do that and
- // since we don't even handle ^ yet I'm saving that optimization for
- // later.
- RegExpNode* node = RegExpQuantifier::ToNode(0,
- RegExpTree::kInfinity,
- false,
- new RegExpCharacterClass('*'),
- &compiler,
- captured_body);
+ RegExpNode* node = captured_body;
+ if (!data->tree->IsAnchored()) {
+ // Add a .*? at the beginning, outside the body capture, unless
+ // this expression is anchored at the beginning.
+ node = RegExpQuantifier::ToNode(0,
+ RegExpTree::kInfinity,
+ false,
+ new RegExpCharacterClass('*'),
+ &compiler,
+ captured_body);
+ }
data->node = node;
Analysis analysis(ignore_case);
analysis.EnsureAnalyzed(node);
NodeInfo info = *node->info();
- if (is_multiline && !FLAG_attempt_multiline_irregexp) {
- return Handle<FixedArray>::null();
- }
-
if (FLAG_irregexp_native) {
#ifdef ARM
// Unimplemented, fall-through to bytecode implementation.
« no previous file with comments | « src/jsregexp.h ('k') | src/macro-assembler-arm.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698