Index: src/regexp/jsregexp.cc |
diff --git a/src/regexp/jsregexp.cc b/src/regexp/jsregexp.cc |
index 225ad73c4e27c499196ec0504b3eab9b3e5c355f..a41ab90b1e295d508c0f87abbfe166836e1aab97 100644 |
--- a/src/regexp/jsregexp.cc |
+++ b/src/regexp/jsregexp.cc |
@@ -1224,7 +1224,8 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, |
int value = 0; |
bool absolute = false; |
bool clear = false; |
- int store_position = -1; |
+ static const int kNoStore = kMinInt; |
+ int store_position = kNoStore; |
// This is a little tricky because we are scanning the actions in reverse |
// historical order (newest first). |
for (DeferredAction* action = actions_; |
@@ -1245,7 +1246,7 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, |
// we can set undo_action to IGNORE if we know there is no value to |
// restore. |
undo_action = RESTORE; |
- DCHECK_EQ(store_position, -1); |
+ DCHECK_EQ(store_position, kNoStore); |
DCHECK(!clear); |
break; |
} |
@@ -1253,14 +1254,14 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, |
if (!absolute) { |
value++; |
} |
- DCHECK_EQ(store_position, -1); |
+ DCHECK_EQ(store_position, kNoStore); |
DCHECK(!clear); |
undo_action = RESTORE; |
break; |
case ActionNode::STORE_POSITION: { |
Trace::DeferredCapture* pc = |
static_cast<Trace::DeferredCapture*>(action); |
- if (!clear && store_position == -1) { |
+ if (!clear && store_position == kNoStore) { |
store_position = pc->cp_offset(); |
} |
@@ -1284,7 +1285,7 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, |
// Since we're scanning in reverse order, if we've already |
// set the position we have to ignore historically earlier |
// clearing operations. |
- if (store_position == -1) { |
+ if (store_position == kNoStore) { |
clear = true; |
} |
undo_action = RESTORE; |
@@ -1315,7 +1316,7 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, |
} |
// Perform the chronologically last action (or accumulated increment) |
// for the register. |
- if (store_position != -1) { |
+ if (store_position != kNoStore) { |
assembler->WriteCurrentPositionToRegister(reg, store_position); |
} else if (clear) { |
assembler->ClearRegisters(reg, reg); |
@@ -2313,6 +2314,7 @@ void AssertionNode::FillInBMInfo(Isolate* isolate, int offset, int budget, |
int BackReferenceNode::EatsAtLeast(int still_to_find, |
int budget, |
bool not_at_start) { |
+ if (read_backward()) return 0; |
if (budget <= 0) return 0; |
return on_success()->EatsAtLeast(still_to_find, |
budget - 1, |
@@ -2323,6 +2325,7 @@ int BackReferenceNode::EatsAtLeast(int still_to_find, |
int TextNode::EatsAtLeast(int still_to_find, |
int budget, |
bool not_at_start) { |
+ if (read_backward()) return 0; |
int answer = Length(); |
if (answer >= still_to_find) return answer; |
if (budget <= 0) return answer; |
@@ -2526,8 +2529,8 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, |
} else { |
char_mask = String::kMaxUtf16CodeUnit; |
} |
- for (int k = 0; k < elms_->length(); k++) { |
- TextElement elm = elms_->at(k); |
+ for (int k = 0; k < elements()->length(); k++) { |
+ TextElement elm = elements()->at(k); |
if (elm.text_type() == TextElement::ATOM) { |
Vector<const uc16> quarks = elm.atom()->data(); |
for (int i = 0; i < characters && i < quarks.length(); i++) { |
@@ -2678,7 +2681,6 @@ void QuickCheckDetails::Clear() { |
void QuickCheckDetails::Advance(int by, bool one_byte) { |
- DCHECK(by >= 0); |
if (by >= characters_) { |
Clear(); |
return; |
@@ -2780,9 +2782,9 @@ RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) { |
if (depth < 0) return this; |
DCHECK(!info()->visited); |
VisitMarker marker(info()); |
- int element_count = elms_->length(); |
+ int element_count = elements()->length(); |
for (int i = 0; i < element_count; i++) { |
- TextElement elm = elms_->at(i); |
+ TextElement elm = elements()->at(i); |
if (elm.text_type() == TextElement::ATOM) { |
Vector<const uc16> quarks = elm.atom()->data(); |
for (int j = 0; j < quarks.length(); j++) { |
@@ -3146,9 +3148,9 @@ void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
return; |
} |
if (trace->at_start() == Trace::UNKNOWN) { |
- assembler->CheckNotAtStart(trace->backtrack()); |
+ assembler->CheckNotAtStart(trace->cp_offset(), trace->backtrack()); |
Trace at_start_trace = *trace; |
- at_start_trace.set_at_start(true); |
+ at_start_trace.set_at_start(Trace::TRUE_VALUE); |
on_success()->Emit(compiler, &at_start_trace); |
return; |
} |
@@ -3221,10 +3223,11 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler, |
bool one_byte = compiler->one_byte(); |
Label* backtrack = trace->backtrack(); |
QuickCheckDetails* quick_check = trace->quick_check_performed(); |
- int element_count = elms_->length(); |
+ int element_count = elements()->length(); |
+ int backward_offset = read_backward() ? -Length() : 0; |
for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { |
- TextElement elm = elms_->at(i); |
- int cp_offset = trace->cp_offset() + elm.cp_offset(); |
+ TextElement elm = elements()->at(i); |
+ int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset; |
if (elm.text_type() == TextElement::ATOM) { |
Vector<const uc16> quarks = elm.atom()->data(); |
for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { |
@@ -3252,13 +3255,10 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler, |
break; |
} |
if (emit_function != NULL) { |
- bool bound_checked = emit_function(isolate, |
- compiler, |
- quarks[j], |
- backtrack, |
- cp_offset + j, |
- *checked_up_to < cp_offset + j, |
- preloaded); |
+ bool bounds_check = *checked_up_to < cp_offset + j || read_backward(); |
+ bool bound_checked = |
+ emit_function(isolate, compiler, quarks[j], backtrack, |
+ cp_offset + j, bounds_check, preloaded); |
if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); |
} |
} |
@@ -3268,8 +3268,9 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler, |
if (first_element_checked && i == 0) continue; |
if (DeterminedAlready(quick_check, elm.cp_offset())) continue; |
RegExpCharacterClass* cc = elm.char_class(); |
+ bool bounds_check = *checked_up_to < cp_offset || read_backward(); |
EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset, |
- *checked_up_to < cp_offset, preloaded, zone()); |
+ bounds_check, preloaded, zone()); |
UpdateBoundsCheck(cp_offset, checked_up_to); |
} |
} |
@@ -3278,7 +3279,7 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler, |
int TextNode::Length() { |
- TextElement elm = elms_->last(); |
+ TextElement elm = elements()->last(); |
DCHECK(elm.cp_offset() >= 0); |
return elm.cp_offset() + elm.length(); |
} |
@@ -3347,8 +3348,11 @@ void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
} |
Trace successor_trace(*trace); |
- successor_trace.set_at_start(false); |
- successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler); |
+ // If we advance backward, we may end up at the start. |
+ successor_trace.AdvanceCurrentPositionInTrace( |
+ read_backward() ? -Length() : Length(), compiler); |
+ successor_trace.set_at_start(read_backward() ? Trace::UNKNOWN |
+ : Trace::FALSE_VALUE); |
RecursionCheck rc(compiler); |
on_success()->Emit(compiler, &successor_trace); |
} |
@@ -3360,7 +3364,6 @@ void Trace::InvalidateCurrentCharacter() { |
void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) { |
- DCHECK(by > 0); |
// We don't have an instruction for shifting the current character register |
// down or for using a shifted value for anything so lets just forget that |
// we preloaded any characters into it. |
@@ -3379,9 +3382,9 @@ void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) { |
void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte) { |
- int element_count = elms_->length(); |
+ int element_count = elements()->length(); |
for (int i = 0; i < element_count; i++) { |
- TextElement elm = elms_->at(i); |
+ TextElement elm = elements()->at(i); |
if (elm.text_type() == TextElement::CHAR_CLASS) { |
RegExpCharacterClass* cc = elm.char_class(); |
// None of the standard character classes is different in the case |
@@ -3398,15 +3401,14 @@ void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte) { |
int TextNode::GreedyLoopTextLength() { |
- TextElement elm = elms_->at(elms_->length() - 1); |
- return elm.cp_offset() + elm.length(); |
+ return read_backward() ? kNodeIsTooComplexForGreedyLoops : Length(); |
erikcorry
2015/11/11 15:39:04
This is a case where perhaps we do need a little o
Yang
2015/11/12 08:25:35
Done.
|
} |
RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode( |
erikcorry
2015/11/11 15:39:04
Not quite sure, but this optimization may have to
Yang
2015/11/12 08:25:35
I haven't found a test case where this would becom
|
RegExpCompiler* compiler) { |
- if (elms_->length() != 1) return NULL; |
- TextElement elm = elms_->at(0); |
+ if (elements()->length() != 1) return NULL; |
+ TextElement elm = elements()->at(0); |
if (elm.text_type() != TextElement::CHAR_CLASS) return NULL; |
RegExpCharacterClass* node = elm.char_class(); |
ZoneList<CharacterRange>* ranges = node->ranges(zone()); |
@@ -3881,7 +3883,7 @@ void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) { |
GreedyLoopState::GreedyLoopState(bool not_at_start) { |
counter_backtrack_trace_.set_backtrack(&label_); |
- if (not_at_start) counter_backtrack_trace_.set_at_start(false); |
+ if (not_at_start) counter_backtrack_trace_.set_at_start(Trace::FALSE_VALUE); |
} |
@@ -4008,7 +4010,7 @@ Trace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler, |
macro_assembler->PushCurrentPosition(); |
Label greedy_match_failed; |
Trace greedy_match_trace; |
- if (not_at_start()) greedy_match_trace.set_at_start(false); |
+ if (not_at_start()) greedy_match_trace.set_at_start(Trace::FALSE_VALUE); |
greedy_match_trace.set_backtrack(&greedy_match_failed); |
Label loop_label; |
macro_assembler->Bind(&loop_label); |
@@ -4354,11 +4356,14 @@ void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
DCHECK_EQ(start_reg_ + 1, end_reg_); |
if (compiler->ignore_case()) { |
- assembler->CheckNotBackReferenceIgnoreCase(start_reg_, |
+ assembler->CheckNotBackReferenceIgnoreCase(start_reg_, read_backward(), |
trace->backtrack()); |
} else { |
- assembler->CheckNotBackReference(start_reg_, trace->backtrack()); |
+ assembler->CheckNotBackReference(start_reg_, read_backward(), |
+ trace->backtrack()); |
} |
+ // We are going to advance backward, so we may end up at the start. |
+ if (read_backward()) trace->set_at_start(Trace::UNKNOWN); |
on_success()->Emit(compiler, trace); |
} |
@@ -4719,13 +4724,14 @@ RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, |
ZoneList<TextElement>* elms = |
new(compiler->zone()) ZoneList<TextElement>(1, compiler->zone()); |
elms->Add(TextElement::Atom(this), compiler->zone()); |
- return new(compiler->zone()) TextNode(elms, on_success); |
+ return new (compiler->zone()) TextNode(elms, read_backward(), on_success); |
} |
RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, |
RegExpNode* on_success) { |
- return new(compiler->zone()) TextNode(elements(), on_success); |
+ return new (compiler->zone()) |
+ TextNode(elements(), read_backward(), on_success); |
} |
@@ -4822,7 +4828,7 @@ bool RegExpCharacterClass::is_standard(Zone* zone) { |
RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, |
RegExpNode* on_success) { |
- return new(compiler->zone()) TextNode(this, on_success); |
+ return new (compiler->zone()) TextNode(this, read_backward(), on_success); |
} |
@@ -4967,8 +4973,8 @@ void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) { |
} |
} |
} |
- RegExpAtom* prefix = |
- new (zone) RegExpAtom(atom->data().SubVector(0, prefix_length)); |
+ RegExpAtom* prefix = new (zone) RegExpAtom( |
+ atom->data().SubVector(0, prefix_length), read_direction()); |
ZoneList<RegExpTree*>* pair = new (zone) ZoneList<RegExpTree*>(2, zone); |
pair->Add(prefix, zone); |
ZoneList<RegExpTree*>* suffixes = |
@@ -4981,12 +4987,14 @@ void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) { |
suffixes->Add(new (zone) RegExpEmpty(), zone); |
} else { |
RegExpTree* suffix = new (zone) RegExpAtom( |
- old_atom->data().SubVector(prefix_length, old_atom->length())); |
+ old_atom->data().SubVector(prefix_length, old_atom->length()), |
+ read_direction()); |
suffixes->Add(suffix, zone); |
} |
} |
- pair->Add(new (zone) RegExpDisjunction(suffixes), zone); |
- alternatives->at(write_posn++) = new (zone) RegExpAlternative(pair); |
+ pair->Add(new (zone) RegExpDisjunction(suffixes, read_direction()), zone); |
+ alternatives->at(write_posn++) = |
+ new (zone) RegExpAlternative(pair, read_direction()); |
} else { |
// Just copy any non-worthwhile alternatives. |
for (int j = first_with_prefix; j < i; j++) { |
@@ -5040,7 +5048,7 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions( |
ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone); |
} |
alternatives->at(write_posn++) = |
- new (zone) RegExpCharacterClass(ranges, false); |
+ new (zone) RegExpCharacterClass(ranges, false, read_direction()); |
} else { |
// Just copy any trivial alternatives. |
for (int j = first_in_run; j < i; j++) { |
@@ -5294,14 +5302,14 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler, |
ZoneList<CharacterRange>* newline_ranges = |
new(zone) ZoneList<CharacterRange>(3, zone); |
CharacterRange::AddClassEscape('n', newline_ranges, zone); |
- RegExpCharacterClass* newline_atom = new(zone) RegExpCharacterClass('n'); |
- TextNode* newline_matcher = new(zone) TextNode( |
- newline_atom, |
- ActionNode::PositiveSubmatchSuccess(stack_pointer_register, |
- position_register, |
- 0, // No captures inside. |
- -1, // Ignored if no captures. |
- on_success)); |
+ RegExpCharacterClass* newline_atom = |
+ new (zone) RegExpCharacterClass('n', RegExpTree::READ_FORWARD); |
erikcorry
2015/11/11 15:39:04
Yes :-)
Do we have a test of $ and ^ inside lookb
Yang
2015/11/12 08:25:34
Added a couple of tests.
|
+ TextNode* newline_matcher = new (zone) TextNode( |
+ newline_atom, false, ActionNode::PositiveSubmatchSuccess( |
+ stack_pointer_register, position_register, |
+ 0, // No captures inside. |
+ -1, // Ignored if no captures. |
+ on_success)); |
// Create an end-of-input matcher. |
RegExpNode* end_of_line = ActionNode::BeginSubmatch( |
stack_pointer_register, |
@@ -5323,10 +5331,9 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler, |
RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, |
RegExpNode* on_success) { |
- return new(compiler->zone()) |
- BackReferenceNode(RegExpCapture::StartRegister(index()), |
- RegExpCapture::EndRegister(index()), |
- on_success); |
+ return new (compiler->zone()) BackReferenceNode( |
+ RegExpCapture::StartRegister(index()), |
+ RegExpCapture::EndRegister(index()), read_backward(), on_success); |
} |
@@ -5336,8 +5343,8 @@ RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, |
} |
-RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, |
- RegExpNode* on_success) { |
+RegExpNode* RegExpLookaround::ToNode(RegExpCompiler* compiler, |
+ RegExpNode* on_success) { |
int stack_pointer_register = compiler->AllocateRegister(); |
int position_register = compiler->AllocateRegister(); |
@@ -5347,7 +5354,6 @@ RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, |
int register_start = |
register_of_first_capture + capture_from_ * registers_per_capture; |
- RegExpNode* success; |
if (is_positive()) { |
RegExpNode* node = ActionNode::BeginSubmatch( |
stack_pointer_register, |
@@ -5374,13 +5380,9 @@ RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, |
Zone* zone = compiler->zone(); |
GuardedAlternative body_alt( |
- body()->ToNode( |
- compiler, |
- success = new(zone) NegativeSubmatchSuccess(stack_pointer_register, |
- position_register, |
- register_count, |
- register_start, |
- zone))); |
+ body()->ToNode(compiler, new (zone) NegativeSubmatchSuccess( |
+ stack_pointer_register, position_register, |
+ register_count, register_start, zone))); |
ChoiceNode* choice_node = |
new(zone) NegativeLookaheadChoiceNode(body_alt, |
GuardedAlternative(on_success), |
@@ -5402,8 +5404,10 @@ RegExpNode* RegExpCapture::ToNode(RegExpTree* body, |
int index, |
RegExpCompiler* compiler, |
RegExpNode* on_success) { |
+ DCHECK_NOT_NULL(body); |
int start_reg = RegExpCapture::StartRegister(index); |
int end_reg = RegExpCapture::EndRegister(index); |
+ if (body->read_backward()) std::swap(start_reg, end_reg); |
RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success); |
RegExpNode* body_node = body->ToNode(compiler, store_end); |
return ActionNode::StorePosition(start_reg, true, body_node); |
@@ -5414,8 +5418,14 @@ RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler, |
RegExpNode* on_success) { |
ZoneList<RegExpTree*>* children = nodes(); |
RegExpNode* current = on_success; |
- for (int i = children->length() - 1; i >= 0; i--) { |
- current = children->at(i)->ToNode(compiler, current); |
+ if (read_backward()) { |
+ for (int i = 0; i < children->length(); i++) { |
+ current = children->at(i)->ToNode(compiler, current); |
+ } |
+ } else { |
+ for (int i = children->length() - 1; i >= 0; i--) { |
+ current = children->at(i)->ToNode(compiler, current); |
+ } |
} |
return current; |
} |
@@ -6291,22 +6301,19 @@ RegExpEngine::CompilationResult RegExpEngine::Compile( |
if (!is_start_anchored && !is_sticky) { |
// Add a .*? at the beginning, outside the body capture, unless |
// this expression is anchored at the beginning or sticky. |
- RegExpNode* loop_node = |
- RegExpQuantifier::ToNode(0, |
- RegExpTree::kInfinity, |
- false, |
- new(zone) RegExpCharacterClass('*'), |
- &compiler, |
- captured_body, |
- data->contains_anchor); |
+ RegExpNode* loop_node = RegExpQuantifier::ToNode( |
+ 0, RegExpTree::kInfinity, false, |
+ new (zone) RegExpCharacterClass('*', RegExpTree::READ_FORWARD), |
+ &compiler, captured_body, data->contains_anchor); |
if (data->contains_anchor) { |
// Unroll loop once, to take care of the case that might start |
// at the start of input. |
ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone); |
first_step_node->AddAlternative(GuardedAlternative(captured_body)); |
- first_step_node->AddAlternative(GuardedAlternative( |
- new(zone) TextNode(new(zone) RegExpCharacterClass('*'), loop_node))); |
+ first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode( |
+ new (zone) RegExpCharacterClass('*', RegExpTree::READ_FORWARD), false, |
+ loop_node))); |
node = first_step_node; |
} else { |
node = loop_node; |