src/jsregexp.cc - Issue 18842: Experimental: periodic merge of the bleeding_edge branch to the...

Unified Diff: src/jsregexp.cc

Issue 18842: Experimental: periodic merge of the bleeding_edge branch to the... (Closed) Base URL: http://v8.googlecode.com/svn/branches/experimental/toiger/

Patch Set: Created 11 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/jsregexp.cc

===================================================================

--- src/jsregexp.cc (revision 1168)

+++ src/jsregexp.cc (working copy)

@@ -298,21 +298,10 @@

return AtomExec(regexp, subject, index);

case JSRegExp::IRREGEXP: {

Handle<Object> result = IrregexpExec(regexp, subject, index);

- if (!result.is_null() || Top::has_pending_exception()) {

- return result;

- }

- // We couldn't handle the regexp using Irregexp, so fall back

- // on JSCRE.

- // Reset the JSRegExp to use JSCRE.

- JscrePrepare(regexp,

- Handle<String>(regexp->Pattern()),

- regexp->GetFlags());

- // Fall-through to JSCRE.

+ if (result.is_null()) ASSERT(Top::has_pending_exception());

+ return result;

}

case JSRegExp::JSCRE:

- if (FLAG_disable_jscre) {

- UNIMPLEMENTED();

- }

return JscreExec(regexp, subject, index);

default:

UNREACHABLE();

@@ -328,22 +317,10 @@

return AtomExecGlobal(regexp, subject);

case JSRegExp::IRREGEXP: {

Handle<Object> result = IrregexpExecGlobal(regexp, subject);

- if (!result.is_null() || Top::has_pending_exception()) {

- return result;

- }

- // Empty handle as result but no exception thrown means that

- // the regexp contains features not yet handled by the irregexp

- // compiler.

- // We have to fall back on JSCRE. Reset the JSRegExp to use JSCRE.

- JscrePrepare(regexp,

- Handle<String>(regexp->Pattern()),

- regexp->GetFlags());

- // Fall-through to JSCRE.

+ if (result.is_null()) ASSERT(Top::has_pending_exception());

+ return result;

}

case JSRegExp::JSCRE:

- if (FLAG_disable_jscre) {

- UNIMPLEMENTED();

- }

return JscreExecGlobal(regexp, subject);

default:

UNREACHABLE();

@@ -460,7 +437,7 @@

&JSREMalloc,

&JSREFree);

if (*code == NULL && (malloc_failure->IsRetryAfterGC() ||

- malloc_failure->IsOutOfMemoryFailure())) {

+ malloc_failure->IsOutOfMemoryFailure())) {

return malloc_failure;

} else {

// It doesn't matter which object we return here, we just need to return

@@ -697,7 +674,7 @@

Handle<String> pattern(re->Pattern());

StringShape shape(*pattern);

if (!pattern->IsFlat(shape)) {

- pattern->Flatten(shape);

+ FlattenString(pattern);

}

RegExpCompileData compile_data;

@@ -824,7 +801,7 @@

Handle<Object> matches;

if (!subject->IsFlat(shape)) {

- subject->Flatten(shape);

+ FlattenString(subject);

}

while (true) {

@@ -920,6 +897,7 @@

offsets_vector,

previous_index == 0);

} else { // Sequential string

+ ASSERT(StringShape(*subject).IsSequential());

Address char_address =

is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress()

: SeqTwoByteString::cast(*subject)->GetCharsAddress();

@@ -1197,7 +1175,13 @@

public:

RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii);

- int AllocateRegister() { return next_register_++; }

+ int AllocateRegister() {

+ if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {

+ reg_exp_too_big_ = true;

+ return next_register_;

+ }

+ return next_register_++;

+ }

Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler,

RegExpNode* start,

@@ -1218,6 +1202,8 @@

inline void IncrementRecursionDepth() { recursion_depth_++; }

inline void DecrementRecursionDepth() { recursion_depth_--; }

+ void SetRegExpTooBig() { reg_exp_too_big_ = true; }

inline bool ignore_case() { return ignore_case_; }

inline bool ascii() { return ascii_; }

@@ -1230,6 +1216,7 @@

RegExpMacroAssembler* macro_assembler_;

bool ignore_case_;

bool ascii_;

+ bool reg_exp_too_big_;

};

@@ -1244,6 +1231,18 @@

};

+static Handle<FixedArray> IrregexpRegExpTooBig(Handle<String> pattern) {

+ Handle<JSArray> array = Factory::NewJSArray(2);

+ SetElement(array, 0, pattern);

+ const char* message = "RegExp too big";

+ SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(message)));

+ Handle<Object> regexp_err =

+ Factory::NewSyntaxError("malformed_regexp", array);

+ Top::Throw(*regexp_err);

+ return Handle<FixedArray>();

// Attempts to compile the regexp using an Irregexp code generator. Returns

// a fixed array or a null handle depending on whether it succeeded.

RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii)

@@ -1251,8 +1250,10 @@

work_list_(NULL),

recursion_depth_(0),

ignore_case_(ignore_case),

- ascii_(ascii) {

+ ascii_(ascii),

+ reg_exp_too_big_(false) {

accept_ = new EndNode(EndNode::ACCEPT);

+ ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister);

}

@@ -1272,17 +1273,13 @@

Label fail;

macro_assembler->PushBacktrack(&fail);

Trace new_trace;

- if (!start->Emit(this, &new_trace)) {

- fail.Unuse();

- return Handle<FixedArray>::null();

- }

+ start->Emit(this, &new_trace);

macro_assembler_->Bind(&fail);

macro_assembler_->Fail();

while (!work_list.is_empty()) {

- if (!work_list.RemoveLast()->Emit(this, &new_trace)) {

- return Handle<FixedArray>::null();

- }

+ work_list.RemoveLast()->Emit(this, &new_trace);

}

+ if (reg_exp_too_big_) return IrregexpRegExpTooBig(pattern);

Handle<FixedArray> array =

Factory::NewFixedArray(RegExpImpl::kIrregexpDataLength);

array->set(RegExpImpl::kIrregexpImplementationIndex,

@@ -1302,6 +1299,7 @@

return array;

}

bool Trace::DeferredAction::Mentions(int that) {

if (type() == ActionNode::CLEAR_CAPTURES) {

Interval range = static_cast<DeferredClearCaptures*>(this)->range();

@@ -1360,41 +1358,44 @@

}

-void Trace::PushAffectedRegisters(RegExpMacroAssembler* assembler,

- int max_register,

- OutSet& affected_registers) {

- // Stay safe and check every half times the limit.

- // (Round up in case the limit is 1).

- int push_limit = (assembler->stack_limit_slack() + 1) / 2;

- for (int reg = 0, pushes = 0; reg <= max_register; reg++) {

- if (affected_registers.Get(reg)) {

- pushes++;

- RegExpMacroAssembler::StackCheckFlag check_stack_limit =

- (pushes % push_limit) == 0 ?

- RegExpMacroAssembler::kCheckStackLimit :

- RegExpMacroAssembler::kNoStackLimitCheck;

- assembler->PushRegister(reg, check_stack_limit);

- }

void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,

int max_register,

- OutSet& affected_registers) {

+ OutSet& registers_to_pop,

+ OutSet& registers_to_clear) {

for (int reg = max_register; reg >= 0; reg--) {

- if (affected_registers.Get(reg)) assembler->PopRegister(reg);

+ if (registers_to_pop.Get(reg)) assembler->PopRegister(reg);

+ else if (registers_to_clear.Get(reg)) {

+ int clear_to = reg;

+ while (reg > 0 && registers_to_clear.Get(reg - 1)) {

+ reg--;

+ }

+ assembler->ClearRegisters(reg, clear_to);

+ }

}

void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,

int max_register,

- OutSet& affected_registers) {

+ OutSet& affected_registers,

+ OutSet* registers_to_pop,

+ OutSet* registers_to_clear) {

+ // The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1.

+ const int push_limit = (assembler->stack_limit_slack() + 1) / 2;

for (int reg = 0; reg <= max_register; reg++) {

if (!affected_registers.Get(reg)) {

continue;

}

+ // Count pushes performed to force a stack limit check occasionally.

+ int pushes = 0;

+ // The chronologically first deferred action in the trace

+ // is used to infer the action needed to restore a register

+ // to its previous state (or not, if it's safe to ignore it).

+ enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR };

+ DeferredActionUndoType undo_action = IGNORE;

int value = 0;

bool absolute = false;

bool clear = false;

@@ -1409,8 +1410,16 @@

case ActionNode::SET_REGISTER: {

Trace::DeferredSetRegister* psr =

static_cast<Trace::DeferredSetRegister*>(action);

- value += psr->value();

- absolute = true;

+ if (!absolute) {

+ value += psr->value();

+ absolute = true;

+ }

+ // SET_REGISTER is currently only used for newly introduced loop

+ // counters. They can have a significant previous value if they

+ // occour in a loop. TODO(lrn): Propagate this information, so

+ // we can set undo_action to IGNORE if we know there is no value to

+ // restore.

+ undo_action = RESTORE;

ASSERT_EQ(store_position, -1);

ASSERT(!clear);

break;

@@ -1421,6 +1430,7 @@

}

ASSERT_EQ(store_position, -1);

ASSERT(!clear);

+ undo_action = RESTORE;

break;

case ActionNode::STORE_POSITION: {

Trace::DeferredCapture* pc =

@@ -1428,6 +1438,19 @@

if (!clear && store_position == -1) {

store_position = pc->cp_offset();

}

+ // For captures we know that stores and clears alternate.

+ // Other register, are never cleared, and if the occur

+ // inside a loop, they might be assigned more than once.

+ if (reg <= 1) {

+ // Registers zero and one, aka "capture zero", is

+ // always set correctly if we succeed. There is no

+ // need to undo a setting on backtrack, because we

+ // will set it again or fail.

+ undo_action = IGNORE;

+ } else {

+ undo_action = pc->is_capture() ? CLEAR : RESTORE;

+ }

ASSERT(!absolute);

ASSERT_EQ(value, 0);

break;

@@ -1436,8 +1459,10 @@

// Since we're scanning in reverse order, if we've already

// set the position we have to ignore historically earlier

// clearing operations.

- if (store_position == -1)

+ if (store_position == -1) {

clear = true;

+ }

+ undo_action = RESTORE;

ASSERT(!absolute);

ASSERT_EQ(value, 0);

break;

@@ -1448,10 +1473,27 @@

}

+ // Prepare for the undo-action (e.g., push if it's going to be popped).

+ if (undo_action == RESTORE) {

+ pushes++;

+ RegExpMacroAssembler::StackCheckFlag stack_check =

+ RegExpMacroAssembler::kNoStackLimitCheck;

+ if (pushes == push_limit) {

+ stack_check = RegExpMacroAssembler::kCheckStackLimit;

+ pushes = 0;

+ }

+ assembler->PushRegister(reg, stack_check);

+ registers_to_pop->Set(reg);

+ } else if (undo_action == CLEAR) {

+ registers_to_clear->Set(reg);

+ }

+ // Perform the chronologically last action (or accumulated increment)

+ // for the register.

if (store_position != -1) {

assembler->WriteCurrentPositionToRegister(reg, store_position);

} else if (clear) {

- assembler->ClearRegister(reg);

+ assembler->ClearRegisters(reg, reg);

} else if (absolute) {

assembler->SetRegister(reg, value);

} else if (value != 0) {

@@ -1462,9 +1504,9 @@

// This is called as we come into a loop choice node and some other tricky

-// nodes. It normalises the state of the code generator to ensure we can

+// nodes. It normalizes the state of the code generator to ensure we can

// generate generic code.

-bool Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {

+void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {

RegExpMacroAssembler* assembler = compiler->macro_assembler();

ASSERT(actions_ != NULL ||

@@ -1481,14 +1523,21 @@

if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_);

// Create a new trivial state and generate the node with that.

Trace new_state;

- return successor->Emit(compiler, &new_state);

+ successor->Emit(compiler, &new_state);

+ return;

}

// Generate deferred actions here along with code to undo them again.

OutSet affected_registers;

int max_register = FindAffectedRegisters(&affected_registers);

- PushAffectedRegisters(assembler, max_register, affected_registers);

- PerformDeferredActions(assembler, max_register, affected_registers);

+ OutSet registers_to_pop;

+ OutSet registers_to_clear;

+ PerformDeferredActions(assembler,

+ max_register,

+ affected_registers,

+ &registers_to_pop,

+ &registers_to_clear);

if (backtrack() != NULL) {

// Here we have a concrete backtrack location. These are set up by choice

// nodes and so they indicate that we have a deferred save of the current

@@ -1503,58 +1552,56 @@

Label undo;

assembler->PushBacktrack(&undo);

Trace new_state;

- bool ok = successor->Emit(compiler, &new_state);

+ successor->Emit(compiler, &new_state);

// On backtrack we need to restore state.

assembler->Bind(&undo);

- if (!ok) return false;

if (backtrack() != NULL) {

assembler->PopCurrentPosition();

}

- RestoreAffectedRegisters(assembler, max_register, affected_registers);

+ RestoreAffectedRegisters(assembler,

+ max_register,

+ registers_to_pop,

+ registers_to_clear);

if (backtrack() == NULL) {

assembler->Backtrack();

} else {

assembler->GoTo(backtrack());

}

- return true;

}

-void EndNode::EmitInfoChecks(RegExpMacroAssembler* assembler, Trace* trace) {

- if (info()->at_end) {

- Label succeed;

- // LoadCurrentCharacter will go to the label if we are at the end of the

- // input string.

- assembler->LoadCurrentCharacter(0, &succeed);

- assembler->GoTo(trace->backtrack());

- assembler->Bind(&succeed);

- }

+void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) {

+ RegExpMacroAssembler* assembler = compiler->macro_assembler();

+ // Omit flushing the trace. We discard the entire stack frame anyway.

-bool NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) {

- if (!trace->is_trivial()) {

- return trace->Flush(compiler, this);

- }

- RegExpMacroAssembler* assembler = compiler->macro_assembler();

if (!label()->is_bound()) {

+ // We are completely independent of the trace, since we ignore it,

+ // so this code can be used as the generic version.

assembler->Bind(label());

}

- EmitInfoChecks(assembler, trace);

+ // Throw away everything on the backtrack stack since the start

+ // of the negative submatch and restore the character position.

assembler->ReadCurrentPositionFromRegister(current_position_register_);

assembler->ReadStackPointerFromRegister(stack_pointer_register_);

+ if (clear_capture_count_ > 0) {

+ // Clear any captures that might have been performed during the success

+ // of the body of the negative look-ahead.

+ int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1;

+ assembler->ClearRegisters(clear_capture_start_, clear_capture_end);

+ }

// Now that we have unwound the stack we find at the top of the stack the

// backtrack that the BeginSubmatch node got.

assembler->Backtrack();

- return true;

}

-bool EndNode::Emit(RegExpCompiler* compiler, Trace* trace) {

+void EndNode::Emit(RegExpCompiler* compiler, Trace* trace) {

if (!trace->is_trivial()) {

- return trace->Flush(compiler, this);

+ trace->Flush(compiler, this);

+ return;

}

RegExpMacroAssembler* assembler = compiler->macro_assembler();

if (!label()->is_bound()) {

@@ -1562,19 +1609,16 @@

}

switch (action_) {

case ACCEPT:

- EmitInfoChecks(assembler, trace);

assembler->Succeed();

- return true;

+ return;

case BACKTRACK:

- ASSERT(!info()->at_end);

assembler->GoTo(trace->backtrack());

- return true;

+ return;

case NEGATIVE_SUBMATCH_SUCCESS:

// This case is handled in a different virtual method.

UNREACHABLE();

}

UNIMPLEMENTED();

- return false;

}

@@ -1602,9 +1646,12 @@

}

-ActionNode* ActionNode::StorePosition(int reg, RegExpNode* on_success) {

+ActionNode* ActionNode::StorePosition(int reg,

+ bool is_capture,

+ RegExpNode* on_success) {

ActionNode* result = new ActionNode(STORE_POSITION, on_success);

result->data_.u_position_register.reg = reg;

+ result->data_.u_position_register.is_capture = is_capture;

return result;

}

@@ -1630,10 +1677,14 @@

ActionNode* ActionNode::PositiveSubmatchSuccess(int stack_reg,

int position_reg,

+ int clear_register_count,

+ int clear_register_from,

RegExpNode* on_success) {

ActionNode* result = new ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success);

result->data_.u_submatch.stack_pointer_register = stack_reg;

result->data_.u_submatch.current_position_register = position_reg;

+ result->data_.u_submatch.clear_register_count = clear_register_count;

+ result->data_.u_submatch.clear_register_from = clear_register_from;

return result;

}

@@ -1849,6 +1900,9 @@

// ASCII optimizations for us.

macro_assembler->GoTo(on_failure);

}

+ if (check_offset) {

+ macro_assembler->CheckPosition(cp_offset, on_failure);

+ }

return;

}

@@ -1856,10 +1910,8 @@

!cc->is_negated() &&

ranges->at(0).IsEverything(max_char)) {

// This is a common case hit by non-anchored expressions.

- // TODO(erikcorry): We should have a macro assembler instruction that just

- // checks for end of string without loading the character.

if (check_offset) {

- macro_assembler->LoadCurrentCharacter(cp_offset, on_failure);

+ macro_assembler->CheckPosition(cp_offset, on_failure);

}

return;

}

@@ -1935,13 +1987,6 @@

RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,

Trace* trace) {

- // TODO(erikcorry): Implement support.

- if (info_.follows_word_interest ||

- info_.follows_newline_interest ||

- info_.follows_start_interest) {

- return FAIL;

- }

// If we are generating a greedy loop then don't stop and don't reuse code.

if (trace->stop_node() != NULL) {

return CONTINUE;

@@ -1978,27 +2023,62 @@

// If we get here code has been generated for this node too many times or

// recursion is too deep. Time to switch to a generic version. The code for

// generic versions above can handle deep recursion properly.

- bool ok = trace->Flush(compiler, this);

- return ok ? DONE : FAIL;

+ trace->Flush(compiler, this);

+ return DONE;

}

-int ActionNode::EatsAtLeast(int recursion_depth) {

+int ActionNode::EatsAtLeast(int still_to_find, int recursion_depth) {

if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;

if (type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input!

- return on_success()->EatsAtLeast(recursion_depth + 1);

+ return on_success()->EatsAtLeast(still_to_find, recursion_depth + 1);

}

-int TextNode::EatsAtLeast(int recursion_depth) {

+int AssertionNode::EatsAtLeast(int still_to_find, int recursion_depth) {

+ if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;

+ return on_success()->EatsAtLeast(still_to_find, recursion_depth + 1);

+int BackReferenceNode::EatsAtLeast(int still_to_find, int recursion_depth) {

+ if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;

+ return on_success()->EatsAtLeast(still_to_find, recursion_depth + 1);

+int TextNode::EatsAtLeast(int still_to_find, int recursion_depth) {

int answer = Length();

- if (answer >= 4) return answer;

+ if (answer >= still_to_find) return answer;

if (recursion_depth > RegExpCompiler::kMaxRecursion) return answer;

- return answer + on_success()->EatsAtLeast(recursion_depth + 1);

+ return answer + on_success()->EatsAtLeast(still_to_find - answer,

+ recursion_depth + 1);

}

-int ChoiceNode::EatsAtLeastHelper(int recursion_depth,

+int NegativeLookaheadChoiceNode:: EatsAtLeast(int still_to_find,

+ int recursion_depth) {

+ if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;

+ // Alternative 0 is the negative lookahead, alternative 1 is what comes

+ // afterwards.

+ RegExpNode* node = alternatives_->at(1).node();

+ return node->EatsAtLeast(still_to_find, recursion_depth + 1);

+void NegativeLookaheadChoiceNode::GetQuickCheckDetails(

+ QuickCheckDetails* details,

+ RegExpCompiler* compiler,

+ int filled_in) {

+ // Alternative 0 is the negative lookahead, alternative 1 is what comes

+ // afterwards.

+ RegExpNode* node = alternatives_->at(1).node();

+ return node->GetQuickCheckDetails(details, compiler, filled_in);

+int ChoiceNode::EatsAtLeastHelper(int still_to_find,

+ int recursion_depth,

RegExpNode* ignore_this_node) {

if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;

int min = 100;

@@ -2006,20 +2086,21 @@

for (int i = 0; i < choice_count; i++) {

RegExpNode* node = alternatives_->at(i).node();

if (node == ignore_this_node) continue;

- int node_eats_at_least = node->EatsAtLeast(recursion_depth + 1);

+ int node_eats_at_least = node->EatsAtLeast(still_to_find,

+ recursion_depth + 1);

if (node_eats_at_least < min) min = node_eats_at_least;

}

return min;

}

-int LoopChoiceNode::EatsAtLeast(int recursion_depth) {

- return EatsAtLeastHelper(recursion_depth, loop_node_);

+int LoopChoiceNode::EatsAtLeast(int still_to_find, int recursion_depth) {

+ return EatsAtLeastHelper(still_to_find, recursion_depth, loop_node_);

}

-int ChoiceNode::EatsAtLeast(int recursion_depth) {

- return EatsAtLeastHelper(recursion_depth, NULL);

+int ChoiceNode::EatsAtLeast(int still_to_find, int recursion_depth) {

+ return EatsAtLeastHelper(still_to_find, recursion_depth, NULL);

}

@@ -2257,7 +2338,7 @@

void QuickCheckDetails::Advance(int by, bool ascii) {

- ASSERT(by > 0);

+ ASSERT(by >= 0);

if (by >= characters_) {

Clear();

return;

@@ -2342,6 +2423,150 @@

}

+// Check for [0-9A-Z_a-z].

+static void EmitWordCheck(RegExpMacroAssembler* assembler,

+ Label* word,

+ Label* non_word,

+ bool fall_through_on_word) {

+ assembler->CheckCharacterGT('z', non_word);

+ assembler->CheckCharacterLT('0', non_word);

+ assembler->CheckCharacterGT('a' - 1, word);

+ assembler->CheckCharacterLT('9' + 1, word);

+ assembler->CheckCharacterLT('A', non_word);

+ assembler->CheckCharacterLT('Z' + 1, word);

+ if (fall_through_on_word) {

+ assembler->CheckNotCharacter('_', non_word);

+ } else {

+ assembler->CheckCharacter('_', word);

+ }

+// Emit the code to check for a ^ in multiline mode (1-character lookbehind

+// that matches newline or the start of input).

+static void EmitHat(RegExpCompiler* compiler,

+ RegExpNode* on_success,

+ Trace* trace) {

+ RegExpMacroAssembler* assembler = compiler->macro_assembler();

+ // We will be loading the previous character into the current character

+ // register.

+ Trace new_trace(*trace);

+ new_trace.InvalidateCurrentCharacter();

+ Label ok;

+ if (new_trace.cp_offset() == 0) {

+ // The start of input counts as a newline in this context, so skip to

+ // ok if we are at the start.

+ assembler->CheckAtStart(&ok);

+ }

+ // We already checked that we are not at the start of input so it must be

+ // OK to load the previous character.

+ assembler->LoadCurrentCharacter(new_trace.cp_offset() -1,

+ new_trace.backtrack(),

+ false);

+ // Newline means \n, \r, 0x2028 or 0x2029.

+ if (!compiler->ascii()) {

+ assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);

+ }

+ assembler->CheckCharacter('\n', &ok);

+ assembler->CheckNotCharacter('\r', new_trace.backtrack());

+ assembler->Bind(&ok);

+ on_success->Emit(compiler, &new_trace);

+// Emit the code to handle \b and \B (word-boundary or non-word-boundary).

+static void EmitBoundaryCheck(AssertionNode::AssertionNodeType type,

+ RegExpCompiler* compiler,

+ RegExpNode* on_success,

+ Trace* trace) {

+ RegExpMacroAssembler* assembler = compiler->macro_assembler();

+ Label before_non_word;

+ Label before_word;

+ if (trace->characters_preloaded() != 1) {

+ assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);

+ }

+ // Fall through on non-word.

+ EmitWordCheck(assembler, &before_word, &before_non_word, false);

+ // We will be loading the previous character into the current character

+ // register.

+ Trace new_trace(*trace);

+ new_trace.InvalidateCurrentCharacter();

+ Label ok;

+ Label* boundary;

+ Label* not_boundary;

+ if (type == AssertionNode::AT_BOUNDARY) {

+ boundary = &ok;

+ not_boundary = new_trace.backtrack();

+ } else {

+ not_boundary = &ok;

+ boundary = new_trace.backtrack();

+ }

+ // Next character is not a word character.

+ assembler->Bind(&before_non_word);

+ if (new_trace.cp_offset() == 0) {

+ // The start of input counts as a non-word character, so the question is

+ // decided if we are at the start.

+ assembler->CheckAtStart(not_boundary);

+ }

+ // We already checked that we are not at the start of input so it must be

+ // OK to load the previous character.

+ assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1,

+ &ok, // Unused dummy label in this call.

+ false);

+ // Fall through on non-word.

+ EmitWordCheck(assembler, boundary, not_boundary, false);

+ assembler->GoTo(not_boundary);

+ // Next character is a word character.

+ assembler->Bind(&before_word);

+ if (new_trace.cp_offset() == 0) {

+ // The start of input counts as a non-word character, so the question is

+ // decided if we are at the start.

+ assembler->CheckAtStart(boundary);

+ }

+ // We already checked that we are not at the start of input so it must be

+ // OK to load the previous character.

+ assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1,

+ &ok, // Unused dummy label in this call.

+ false);

+ bool fall_through_on_word = (type == AssertionNode::AT_NON_BOUNDARY);

+ EmitWordCheck(assembler, not_boundary, boundary, fall_through_on_word);

+ assembler->Bind(&ok);

+ on_success->Emit(compiler, &new_trace);

+void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {

+ RegExpMacroAssembler* assembler = compiler->macro_assembler();

+ switch (type_) {

+ case AT_END: {

+ Label ok;

+ assembler->CheckPosition(trace->cp_offset(), &ok);

+ assembler->GoTo(trace->backtrack());

+ assembler->Bind(&ok);

+ break;

+ }

+ case AT_START:

+ assembler->CheckNotAtStart(trace->backtrack());

+ break;

+ case AFTER_NEWLINE:

+ EmitHat(compiler, on_success(), trace);

+ return;

+ case AT_NON_BOUNDARY:

+ case AT_BOUNDARY:

+ EmitBoundaryCheck(type_, compiler, on_success(), trace);

+ return;

+ }

+ on_success()->Emit(compiler, trace);

// We call this repeatedly to generate code for each pass over the text node.

// The passes are in increasing order of difficulty because we hope one

// of the first passes will fail in which case we are saved the work of the

@@ -2481,23 +2706,16 @@

// pass from left to right. Instead we pass over the text node several times,

// emitting code for some character positions every time. See the comment on

// TextEmitPass for details.

-bool TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {

+void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {

LimitResult limit_result = LimitVersions(compiler, trace);

- if (limit_result == FAIL) return false;

- if (limit_result == DONE) return true;

+ if (limit_result == DONE) return;

ASSERT(limit_result == CONTINUE);

- if (info()->follows_word_interest ||

- info()->follows_newline_interest ||

- info()->follows_start_interest) {

- return false;

+ if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) {

+ compiler->SetRegExpTooBig();

+ return;

}

- if (info()->at_end) {

- compiler->macro_assembler()->GoTo(trace->backtrack());

- return true;

- }

if (compiler->ascii()) {

int dummy = 0;

TextEmitPass(compiler, NON_ASCII_MATCH, false, trace, false, &dummy);

@@ -2555,13 +2773,18 @@

&bound_checked_to);

Trace successor_trace(*trace);

- successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler->ascii());

+ successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler);

RecursionCheck rc(compiler);

- return on_success()->Emit(compiler, &successor_trace);

+ on_success()->Emit(compiler, &successor_trace);

}

-void Trace::AdvanceCurrentPositionInTrace(int by, bool ascii) {

+void Trace::InvalidateCurrentCharacter() {

+ characters_preloaded_ = 0;

+void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {

ASSERT(by > 0);

// We don't have an instruction for shifting the current character register

// down or for using a shifted value for anything so lets just forget that

@@ -2570,8 +2793,12 @@

// Adjust the offsets of the quick check performed information. This

// information is used to find out what we already determined about the

// characters by means of mask and compare.

- quick_check_performed_.Advance(by, ascii);

+ quick_check_performed_.Advance(by, compiler->ascii());

cp_offset_ += by;

+ if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {

+ compiler->SetRegExpTooBig();

+ cp_offset_ = 0;

+ }

bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by);

}

@@ -2616,12 +2843,6 @@

if (recursion_depth++ > RegExpCompiler::kMaxRecursion) {

return kNodeIsTooComplexForGreedyLoops;

}

- NodeInfo* info = node->info();

- if (info->follows_word_interest ||

- info->follows_newline_interest ||

- info->follows_start_interest) {

- return kNodeIsTooComplexForGreedyLoops;

- }

int node_length = node->GreedyLoopTextLength();

if (node_length == kNodeIsTooComplexForGreedyLoops) {

return kNodeIsTooComplexForGreedyLoops;

@@ -2648,7 +2869,7 @@

}

-bool LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {

+void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {

RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

if (trace->stop_node() == this) {

int text_length = GreedyLoopTextLength(&(alternatives_->at(0)));

@@ -2658,18 +2879,19 @@

ASSERT(trace->cp_offset() == text_length);

macro_assembler->AdvanceCurrentPosition(text_length);

macro_assembler->GoTo(trace->loop_label());

- return true;

+ return;

}

ASSERT(trace->stop_node() == NULL);

if (!trace->is_trivial()) {

- return trace->Flush(compiler, this);

+ trace->Flush(compiler, this);

+ return;

}

- return ChoiceNode::Emit(compiler, trace);

+ ChoiceNode::Emit(compiler, trace);

}

int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler) {

- int preload_characters = EatsAtLeast(0);

+ int preload_characters = EatsAtLeast(4, 0);

#ifdef CAN_READ_UNALIGNED

bool ascii = compiler->ascii();

if (ascii) {

@@ -2817,7 +3039,7 @@

-bool ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {

+void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {

RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

int choice_count = alternatives_->length();

#ifdef DEBUG

@@ -2832,8 +3054,7 @@

#endif

LimitResult limit_result = LimitVersions(compiler, trace);

- if (limit_result == DONE) return true;

- if (limit_result == FAIL) return false;

+ if (limit_result == DONE) return;

ASSERT(limit_result == CONTINUE);

RecursionCheck rc(compiler);

@@ -2864,13 +3085,8 @@

macro_assembler->Bind(&loop_label);

greedy_match_trace.set_stop_node(this);

greedy_match_trace.set_loop_label(&loop_label);

- bool ok = alternatives_->at(0).node()->Emit(compiler,

- &greedy_match_trace);

+ alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);

macro_assembler->Bind(&greedy_match_failed);

- if (!ok) {

- greedy_loop_label.Unuse();

- return false;

- }

}

Label second_choice; // For use in greedy matches.

@@ -2903,7 +3119,8 @@

new_trace.quick_check_performed()->Clear();

alt_gen->expects_preload = preload_is_current;

bool generate_full_check_inline = false;

- if (alternative.node()->EmitQuickCheck(compiler,

+ if (try_to_emit_quick_check_for_alternative(i) &&

+ alternative.node()->EmitQuickCheck(compiler,

&new_trace,

preload_has_checked_bounds,

&alt_gen->possible_success,

@@ -2941,10 +3158,7 @@

for (int j = 0; j < guard_count; j++) {

GenerateGuard(macro_assembler, guards->at(j), &new_trace);

}

- if (!alternative.node()->Emit(compiler, &new_trace)) {

- greedy_loop_label.Unuse();

- return false;

- }

+ alternative.node()->Emit(compiler, &new_trace);

preload_is_current = false;

}

macro_assembler->Bind(&alt_gen->after);

@@ -2962,26 +3176,23 @@

// label was bound.

for (int i = first_normal_choice; i < choice_count - 1; i++) {

AlternativeGeneration* alt_gen = alt_gens.at(i);

- if (!EmitOutOfLineContinuation(compiler,

- current_trace,

- alternatives_->at(i),

- alt_gen,

- preload_characters,

- alt_gens.at(i + 1)->expects_preload)) {

- return false;

- }

+ EmitOutOfLineContinuation(compiler,

+ current_trace,

+ alternatives_->at(i),

+ alt_gen,

+ preload_characters,

+ alt_gens.at(i + 1)->expects_preload);

}

- return true;

}

-bool ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,

+void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,

Trace* trace,

GuardedAlternative alternative,

AlternativeGeneration* alt_gen,

int preload_characters,

bool next_expects_preload) {

- if (!alt_gen->possible_success.is_linked()) return true;

+ if (!alt_gen->possible_success.is_linked()) return;

RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();

macro_assembler->Bind(&alt_gen->possible_success);

@@ -2996,7 +3207,7 @@

for (int j = 0; j < guard_count; j++) {

GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);

}

- bool ok = alternative.node()->Emit(compiler, &out_of_line_trace);

+ alternative.node()->Emit(compiler, &out_of_line_trace);

macro_assembler->Bind(&reload_current_char);

// Reload the current character, since the next quick check expects that.

// We don't need to check bounds here because we only get into this

@@ -3006,22 +3217,20 @@

false,

preload_characters);

macro_assembler->GoTo(&(alt_gen->after));

- return ok;

} else {

out_of_line_trace.set_backtrack(&(alt_gen->after));

for (int j = 0; j < guard_count; j++) {

GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);

}

- return alternative.node()->Emit(compiler, &out_of_line_trace);

+ alternative.node()->Emit(compiler, &out_of_line_trace);

}

-bool ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {

+void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {

RegExpMacroAssembler* assembler = compiler->macro_assembler();

LimitResult limit_result = LimitVersions(compiler, trace);

- if (limit_result == DONE) return true;

- if (limit_result == FAIL) return false;

+ if (limit_result == DONE) return;

ASSERT(limit_result == CONTINUE);

RecursionCheck rc(compiler);

@@ -3029,24 +3238,29 @@

switch (type_) {

case STORE_POSITION: {

Trace::DeferredCapture

- new_capture(data_.u_position_register.reg, trace);

+ new_capture(data_.u_position_register.reg,

+ data_.u_position_register.is_capture,

+ trace);

Trace new_trace = *trace;

new_trace.add_action(&new_capture);

- return on_success()->Emit(compiler, &new_trace);

+ on_success()->Emit(compiler, &new_trace);

+ break;

}

case INCREMENT_REGISTER: {

Trace::DeferredIncrementRegister

new_increment(data_.u_increment_register.reg);

Trace new_trace = *trace;

new_trace.add_action(&new_increment);

- return on_success()->Emit(compiler, &new_trace);

+ on_success()->Emit(compiler, &new_trace);

+ break;

}

case SET_REGISTER: {

Trace::DeferredSetRegister

new_set(data_.u_store_register.reg, data_.u_store_register.value);

Trace new_trace = *trace;

new_trace.add_action(&new_set);

- return on_success()->Emit(compiler, &new_trace);

+ on_success()->Emit(compiler, &new_trace);

+ break;

}

case CLEAR_CAPTURES: {

Trace::DeferredClearCaptures

@@ -3054,15 +3268,20 @@

data_.u_clear_captures.range_to));

Trace new_trace = *trace;

new_trace.add_action(&new_capture);

- return on_success()->Emit(compiler, &new_trace);

+ on_success()->Emit(compiler, &new_trace);

+ break;

}

case BEGIN_SUBMATCH:

- if (!trace->is_trivial()) return trace->Flush(compiler, this);

- assembler->WriteCurrentPositionToRegister(

- data_.u_submatch.current_position_register, 0);

- assembler->WriteStackPointerToRegister(

- data_.u_submatch.stack_pointer_register);

- return on_success()->Emit(compiler, trace);

+ if (!trace->is_trivial()) {

+ trace->Flush(compiler, this);

+ } else {

+ assembler->WriteCurrentPositionToRegister(

+ data_.u_submatch.current_position_register, 0);

+ assembler->WriteStackPointerToRegister(

+ data_.u_submatch.stack_pointer_register);

+ on_success()->Emit(compiler, trace);

+ }

+ break;

case EMPTY_MATCH_CHECK: {

int start_pos_reg = data_.u_empty_match_check.start_register;

int stored_pos = 0;

@@ -3073,84 +3292,84 @@

// If we know we haven't advanced and there is no minimum we

// can just backtrack immediately.

assembler->GoTo(trace->backtrack());

- return true;

} else if (know_dist && stored_pos < trace->cp_offset()) {

// If we know we've advanced we can generate the continuation

// immediately.

- return on_success()->Emit(compiler, trace);

+ on_success()->Emit(compiler, trace);

+ } else if (!trace->is_trivial()) {

+ trace->Flush(compiler, this);

+ } else {

+ Label skip_empty_check;

+ // If we have a minimum number of repetitions we check the current

+ // number first and skip the empty check if it's not enough.

+ if (has_minimum) {

+ int limit = data_.u_empty_match_check.repetition_limit;

+ assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check);

+ }

+ // If the match is empty we bail out, otherwise we fall through

+ // to the on-success continuation.

+ assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register,

+ trace->backtrack());

+ assembler->Bind(&skip_empty_check);

+ on_success()->Emit(compiler, trace);

}

- if (!trace->is_trivial()) return trace->Flush(compiler, this);

- Label skip_empty_check;

- // If we have a minimum number of repetitions we check the current

- // number first and skip the empty check if it's not enough.

- if (has_minimum) {

- int limit = data_.u_empty_match_check.repetition_limit;

- assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check);

- }

- // If the match is empty we bail out, otherwise we fall through

- // to the on-success continuation.

- assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register,

- trace->backtrack());

- assembler->Bind(&skip_empty_check);

- return on_success()->Emit(compiler, trace);

+ break;

}

- case POSITIVE_SUBMATCH_SUCCESS:

- if (!trace->is_trivial()) return trace->Flush(compiler, this);

- // TODO(erikcorry): Implement support.

- if (info()->follows_word_interest ||

- info()->follows_newline_interest ||

- info()->follows_start_interest) {

- return false;

+ case POSITIVE_SUBMATCH_SUCCESS: {

+ if (!trace->is_trivial()) {

+ trace->Flush(compiler, this);

+ return;

}

- if (info()->at_end) {

- Label at_end;

- // Load current character jumps to the label if we are beyond the string

- // end.

- assembler->LoadCurrentCharacter(0, &at_end);

- assembler->GoTo(trace->backtrack());

- assembler->Bind(&at_end);

- }

assembler->ReadCurrentPositionFromRegister(

data_.u_submatch.current_position_register);

assembler->ReadStackPointerFromRegister(

data_.u_submatch.stack_pointer_register);

- return on_success()->Emit(compiler, trace);

+ int clear_register_count = data_.u_submatch.clear_register_count;

+ if (clear_register_count == 0) {

+ on_success()->Emit(compiler, trace);

+ return;

+ }

+ int clear_registers_from = data_.u_submatch.clear_register_from;

+ Label clear_registers_backtrack;

+ Trace new_trace = *trace;

+ new_trace.set_backtrack(&clear_registers_backtrack);

+ on_success()->Emit(compiler, &new_trace);

+ assembler->Bind(&clear_registers_backtrack);

+ int clear_registers_to = clear_registers_from + clear_register_count - 1;

+ assembler->ClearRegisters(clear_registers_from, clear_registers_to);

+ ASSERT(trace->backtrack() == NULL);

+ assembler->Backtrack();

+ return;

+ }

default:

UNREACHABLE();

- return false;

}

-bool BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {

+void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {

RegExpMacroAssembler* assembler = compiler->macro_assembler();

if (!trace->is_trivial()) {

- return trace->Flush(compiler, this);

+ trace->Flush(compiler, this);

+ return;

}

LimitResult limit_result = LimitVersions(compiler, trace);

- if (limit_result == DONE) return true;

- if (limit_result == FAIL) return false;

+ if (limit_result == DONE) return;

ASSERT(limit_result == CONTINUE);

RecursionCheck rc(compiler);

ASSERT_EQ(start_reg_ + 1, end_reg_);

- if (info()->at_end) {

- // If we are constrained to match at the end of the input then succeed

- // iff the back reference is empty.

- assembler->CheckNotRegistersEqual(start_reg_,

- end_reg_,

- trace->backtrack());

+ if (compiler->ignore_case()) {

+ assembler->CheckNotBackReferenceIgnoreCase(start_reg_,

+ trace->backtrack());

} else {

- if (compiler->ignore_case()) {

- assembler->CheckNotBackReferenceIgnoreCase(start_reg_,

- trace->backtrack());

- } else {

- assembler->CheckNotBackReference(start_reg_, trace->backtrack());

- }

+ assembler->CheckNotBackReference(start_reg_, trace->backtrack());

}

- return on_success()->Emit(compiler, trace);

+ on_success()->Emit(compiler, trace);

}

@@ -3389,6 +3608,33 @@

}

+void DotPrinter::VisitAssertion(AssertionNode* that) {

+ stream()->Add(" n%p [", that);

+ switch (that->type()) {

+ case AssertionNode::AT_END:

+ stream()->Add("label=\"$\", shape=septagon");

+ break;

+ case AssertionNode::AT_START:

+ stream()->Add("label=\"^\", shape=septagon");

+ break;

+ case AssertionNode::AT_BOUNDARY:

+ stream()->Add("label=\"\\b\", shape=septagon");

+ break;

+ case AssertionNode::AT_NON_BOUNDARY:

+ stream()->Add("label=\"\\B\", shape=septagon");

+ break;

+ case AssertionNode::AFTER_NEWLINE:

+ stream()->Add("label=\"(?<=\\n)\", shape=septagon");

+ break;

+ }

+ stream()->Add("];\n");

+ PrintAttributes(that);

+ RegExpNode* successor = that->on_success();

+ stream()->Add(" n%p -> n%p;\n", that, successor);

+ Visit(successor);

void DotPrinter::VisitAction(ActionNode* that) {

stream()->Add(" n%p [", that);

switch (that->type_) {

@@ -3713,7 +3959,7 @@

if (body_can_be_empty) {

// If the body can be empty we need to store the start position

// so we can bail out if it was empty.

- body_node = ActionNode::StorePosition(body_start_reg, body_node);

+ body_node = ActionNode::StorePosition(body_start_reg, false, body_node);

}

if (needs_capture_clearing) {

// Before entering the body of this loop we need to clear captures.

@@ -3749,22 +3995,51 @@

NodeInfo info;

switch (type()) {

case START_OF_LINE:

- info.follows_newline_interest = true;

- break;

+ return AssertionNode::AfterNewline(on_success);

case START_OF_INPUT:

- info.follows_start_interest = true;

- break;

- case BOUNDARY: case NON_BOUNDARY:

- info.follows_word_interest = true;

- break;

+ return AssertionNode::AtStart(on_success);

+ case BOUNDARY:

+ return AssertionNode::AtBoundary(on_success);

+ case NON_BOUNDARY:

+ return AssertionNode::AtNonBoundary(on_success);

case END_OF_INPUT:

- info.at_end = true;

- break;

- case END_OF_LINE:

- // This is wrong but has the effect of making the compiler abort.

- info.at_end = true;

+ return AssertionNode::AtEnd(on_success);

+ case END_OF_LINE: {

+ // Compile $ in multiline regexps as an alternation with a positive

+ // lookahead in one side and an end-of-input on the other side.

+ // We need two registers for the lookahead.

+ int stack_pointer_register = compiler->AllocateRegister();

+ int position_register = compiler->AllocateRegister();

+ // The ChoiceNode to distinguish between a newline and end-of-input.

+ ChoiceNode* result = new ChoiceNode(2);

+ // Create a newline atom.

+ ZoneList<CharacterRange>* newline_ranges =

+ new ZoneList<CharacterRange>(3);

+ CharacterRange::AddClassEscape('n', newline_ranges);

+ RegExpCharacterClass* newline_atom = new RegExpCharacterClass('n');

+ TextNode* newline_matcher = new TextNode(

+ newline_atom,

+ ActionNode::PositiveSubmatchSuccess(stack_pointer_register,

+ position_register,

+ 0, // No captures inside.

+ -1, // Ignored if no captures.

+ on_success));

+ // Create an end-of-input matcher.

+ RegExpNode* end_of_line = ActionNode::BeginSubmatch(

+ stack_pointer_register,

+ position_register,

+ newline_matcher);

+ // Add the two alternatives to the ChoiceNode.

+ GuardedAlternative eol_alternative(end_of_line);

+ result->AddAlternative(eol_alternative);

+ GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success));

+ result->AddAlternative(end_alternative);

+ return result;

+ }

+ default:

+ UNREACHABLE();

}

- return on_success->PropagateForward(&info);

+ return on_success;

}

@@ -3786,16 +4061,26 @@

RegExpNode* on_success) {

int stack_pointer_register = compiler->AllocateRegister();

int position_register = compiler->AllocateRegister();

+ const int registers_per_capture = 2;

+ const int register_of_first_capture = 2;

+ int register_count = capture_count_ * registers_per_capture;

+ int register_start =

+ register_of_first_capture + capture_from_ * registers_per_capture;

RegExpNode* success;

if (is_positive()) {

- return ActionNode::BeginSubmatch(

+ RegExpNode* node = ActionNode::BeginSubmatch(

stack_pointer_register,

position_register,

body()->ToNode(

compiler,

ActionNode::PositiveSubmatchSuccess(stack_pointer_register,

position_register,

+ register_count,

+ register_start,

on_success)));

+ return node;

} else {

// We use a ChoiceNode for a negative lookahead because it has most of

// the characteristics we need. It has the body of the lookahead as its

@@ -3804,19 +4089,19 @@

// NegativeSubmatchSuccess will unwind the stack including everything the

// choice node set up and backtrack. If the first alternative fails then

// the second alternative is tried, which is exactly the desired result

- // for a negative lookahead. In the case where the dispatch table

- // determines that the first alternative cannot match we will save time

- // by not trying it. Things are not quite so well-optimized if the

- // dispatch table determines that the second alternative cannot match.

- // In this case we could optimize by immediately backtracking.

- ChoiceNode* choice_node = new ChoiceNode(2);

+ // for a negative lookahead. The NegativeLookaheadChoiceNode is a special

+ // ChoiceNode that knows to ignore the first exit when calculating quick

+ // checks.

GuardedAlternative body_alt(

body()->ToNode(

compiler,

success = new NegativeSubmatchSuccess(stack_pointer_register,

- position_register)));

- choice_node->AddAlternative(body_alt);

- choice_node->AddAlternative(GuardedAlternative(on_success));

+ position_register,

+ register_count,

+ register_start)));

+ ChoiceNode* choice_node =

+ new NegativeLookaheadChoiceNode(body_alt,

+ GuardedAlternative(on_success));

return ActionNode::BeginSubmatch(stack_pointer_register,

position_register,

choice_node);

@@ -3836,9 +4121,9 @@

RegExpNode* on_success) {

int start_reg = RegExpCapture::StartRegister(index);

int end_reg = RegExpCapture::EndRegister(index);

- RegExpNode* store_end = ActionNode::StorePosition(end_reg, on_success);

+ RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);

RegExpNode* body_node = body->ToNode(compiler, store_end);

- return ActionNode::StorePosition(start_reg, body_node);

+ return ActionNode::StorePosition(start_reg, true, body_node);

}

@@ -3911,6 +4196,13 @@

case '*':

ranges->Add(CharacterRange::Everything());

break;

+ // This is the set of characters matched by the $ and ^ symbols

+ // in multiline mode.

+ case 'n':

+ AddClass(kLineTerminatorRanges,

+ kLineTerminatorRangeCount,

+ ranges);

+ break;

default:

UNREACHABLE();

}

@@ -4096,62 +4388,6 @@

}

-RegExpNode* ActionNode::PropagateForward(NodeInfo* info) {

- NodeInfo full_info(*this->info());

- full_info.AddFromPreceding(info);

- bool cloned = false;

- ActionNode* action = EnsureSibling(this, &full_info, &cloned);

- action->set_on_success(action->on_success()->PropagateForward(info));

- return action;

-RegExpNode* ChoiceNode::PropagateForward(NodeInfo* info) {

- NodeInfo full_info(*this->info());

- full_info.AddFromPreceding(info);

- bool cloned = false;

- ChoiceNode* choice = EnsureSibling(this, &full_info, &cloned);

- if (cloned) {

- ZoneList<GuardedAlternative>* old_alternatives = alternatives();

- int count = old_alternatives->length();

- choice->alternatives_ = new ZoneList<GuardedAlternative>(count);

- for (int i = 0; i < count; i++) {

- GuardedAlternative alternative = old_alternatives->at(i);

- alternative.set_node(alternative.node()->PropagateForward(info));

- choice->alternatives()->Add(alternative);

- }

- return choice;

-RegExpNode* EndNode::PropagateForward(NodeInfo* info) {

- return PropagateToEndpoint(this, info);

-RegExpNode* BackReferenceNode::PropagateForward(NodeInfo* info) {

- NodeInfo full_info(*this->info());

- full_info.AddFromPreceding(info);

- bool cloned = false;

- BackReferenceNode* back_ref = EnsureSibling(this, &full_info, &cloned);

- if (cloned) {

- // TODO(erikcorry): A back reference has to have two successors (by default

- // the same node). The first is used if the back reference matches a non-

- // empty back reference, the second if it matches an empty one. This

- // doesn't matter for at_end, which is the only one implemented right now,

- // but it will matter for other pieces of info.

- back_ref->set_on_success(back_ref->on_success()->PropagateForward(info));

- }

- return back_ref;

-RegExpNode* TextNode::PropagateForward(NodeInfo* info) {

- return PropagateToEndpoint(this, info);

// -------------------------------------------------------------------

// Splay tree

@@ -4389,6 +4625,11 @@

}

+void Analysis::VisitAssertion(AssertionNode* that) {

+ EnsureAnalyzed(that->on_success());

// -------------------------------------------------------------------

// Dispatch table construction

@@ -4441,7 +4682,13 @@

}

+void DispatchTableConstructor::VisitAssertion(AssertionNode* that) {

+ RegExpNode* target = that->on_success();

+ target->Accept(this);

static int CompareRangeByFrom(const CharacterRange* a,

const CharacterRange* b) {

return Compare<uc16>(a->from(), b->from());

@@ -4504,33 +4751,32 @@

bool is_multiline,

Handle<String> pattern,

bool is_ascii) {

+ if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {

+ return IrregexpRegExpTooBig(pattern);

+ }

RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii);

// Wrap the body of the regexp in capture #0.

RegExpNode* captured_body = RegExpCapture::ToNode(data->tree,

&compiler,

compiler.accept());

- // Add a .*? at the beginning, outside the body capture.

- // Note: We could choose to not add this if the regexp is anchored at

- // the start of the input but I'm not sure how best to do that and

- // since we don't even handle ^ yet I'm saving that optimization for

- // later.

- RegExpNode* node = RegExpQuantifier::ToNode(0,

- RegExpTree::kInfinity,

- false,

- new RegExpCharacterClass('*'),

- &compiler,

- captured_body);

+ RegExpNode* node = captured_body;

+ if (!data->tree->IsAnchored()) {

+ // Add a .*? at the beginning, outside the body capture, unless

+ // this expression is anchored at the beginning.

+ node = RegExpQuantifier::ToNode(0,

+ RegExpTree::kInfinity,

+ false,

+ new RegExpCharacterClass('*'),

+ &compiler,

+ captured_body);

+ }

data->node = node;

Analysis analysis(ignore_case);

analysis.EnsureAnalyzed(node);

NodeInfo info = *node->info();

- if (is_multiline && !FLAG_attempt_multiline_irregexp) {

- return Handle<FixedArray>::null();

- }

if (FLAG_irregexp_native) {

#ifdef ARM

// Unimplemented, fall-through to bytecode implementation.

« no previous file with comments | « src/jsregexp.h ('k') | src/macro-assembler-arm.cc » ('j') | no next file with comments »