Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(308)

Unified Diff: src/regexp/jsregexp.cc

Issue 1418963009: Experimental support for RegExp lookbehind. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: fix arm64 debug code assertion Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/regexp/jsregexp.h ('k') | src/regexp/mips/regexp-macro-assembler-mips.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/regexp/jsregexp.cc
diff --git a/src/regexp/jsregexp.cc b/src/regexp/jsregexp.cc
index 225ad73c4e27c499196ec0504b3eab9b3e5c355f..b940bb74b118addaa759af38bb61e61ff050b402 100644
--- a/src/regexp/jsregexp.cc
+++ b/src/regexp/jsregexp.cc
@@ -1002,6 +1002,8 @@ class RegExpCompiler {
inline void set_limiting_recursion(bool value) {
limiting_recursion_ = value;
}
+ bool read_backward() { return read_backward_; }
+ void set_read_backward(bool value) { read_backward_ = value; }
FrequencyCollator* frequency_collator() { return &frequency_collator_; }
int current_expansion_factor() { return current_expansion_factor_; }
@@ -1025,6 +1027,7 @@ class RegExpCompiler {
bool reg_exp_too_big_;
bool limiting_recursion_;
bool optimize_;
+ bool read_backward_;
int current_expansion_factor_;
FrequencyCollator frequency_collator_;
Isolate* isolate_;
@@ -1060,6 +1063,7 @@ RegExpCompiler::RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,
reg_exp_too_big_(false),
limiting_recursion_(false),
optimize_(FLAG_regexp_optimization),
+ read_backward_(false),
current_expansion_factor_(1),
frequency_collator_(),
isolate_(isolate),
@@ -1224,7 +1228,8 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
int value = 0;
bool absolute = false;
bool clear = false;
- int store_position = -1;
+ static const int kNoStore = kMinInt;
+ int store_position = kNoStore;
// This is a little tricky because we are scanning the actions in reverse
// historical order (newest first).
for (DeferredAction* action = actions_;
@@ -1245,7 +1250,7 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
// we can set undo_action to IGNORE if we know there is no value to
// restore.
undo_action = RESTORE;
- DCHECK_EQ(store_position, -1);
+ DCHECK_EQ(store_position, kNoStore);
DCHECK(!clear);
break;
}
@@ -1253,14 +1258,14 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
if (!absolute) {
value++;
}
- DCHECK_EQ(store_position, -1);
+ DCHECK_EQ(store_position, kNoStore);
DCHECK(!clear);
undo_action = RESTORE;
break;
case ActionNode::STORE_POSITION: {
Trace::DeferredCapture* pc =
static_cast<Trace::DeferredCapture*>(action);
- if (!clear && store_position == -1) {
+ if (!clear && store_position == kNoStore) {
store_position = pc->cp_offset();
}
@@ -1284,7 +1289,7 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
// Since we're scanning in reverse order, if we've already
// set the position we have to ignore historically earlier
// clearing operations.
- if (store_position == -1) {
+ if (store_position == kNoStore) {
clear = true;
}
undo_action = RESTORE;
@@ -1315,7 +1320,7 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
}
// Perform the chronologically last action (or accumulated increment)
// for the register.
- if (store_position != -1) {
+ if (store_position != kNoStore) {
assembler->WriteCurrentPositionToRegister(reg, store_position);
} else if (clear) {
assembler->ClearRegisters(reg, reg);
@@ -2313,6 +2318,7 @@ void AssertionNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
int BackReferenceNode::EatsAtLeast(int still_to_find,
int budget,
bool not_at_start) {
+ if (read_backward()) return 0;
if (budget <= 0) return 0;
return on_success()->EatsAtLeast(still_to_find,
budget - 1,
@@ -2323,6 +2329,7 @@ int BackReferenceNode::EatsAtLeast(int still_to_find,
int TextNode::EatsAtLeast(int still_to_find,
int budget,
bool not_at_start) {
+ if (read_backward()) return 0;
int answer = Length();
if (answer >= still_to_find) return answer;
if (budget <= 0) return answer;
@@ -2526,8 +2533,8 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
} else {
char_mask = String::kMaxUtf16CodeUnit;
}
- for (int k = 0; k < elms_->length(); k++) {
- TextElement elm = elms_->at(k);
+ for (int k = 0; k < elements()->length(); k++) {
+ TextElement elm = elements()->at(k);
if (elm.text_type() == TextElement::ATOM) {
Vector<const uc16> quarks = elm.atom()->data();
for (int i = 0; i < characters && i < quarks.length(); i++) {
@@ -2678,7 +2685,6 @@ void QuickCheckDetails::Clear() {
void QuickCheckDetails::Advance(int by, bool one_byte) {
- DCHECK(by >= 0);
if (by >= characters_) {
Clear();
return;
@@ -2780,9 +2786,9 @@ RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) {
if (depth < 0) return this;
DCHECK(!info()->visited);
VisitMarker marker(info());
- int element_count = elms_->length();
+ int element_count = elements()->length();
for (int i = 0; i < element_count; i++) {
- TextElement elm = elms_->at(i);
+ TextElement elm = elements()->at(i);
if (elm.text_type() == TextElement::ATOM) {
Vector<const uc16> quarks = elm.atom()->data();
for (int j = 0; j < quarks.length(); j++) {
@@ -3146,9 +3152,9 @@ void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
return;
}
if (trace->at_start() == Trace::UNKNOWN) {
- assembler->CheckNotAtStart(trace->backtrack());
+ assembler->CheckNotAtStart(trace->cp_offset(), trace->backtrack());
Trace at_start_trace = *trace;
- at_start_trace.set_at_start(true);
+ at_start_trace.set_at_start(Trace::TRUE_VALUE);
on_success()->Emit(compiler, &at_start_trace);
return;
}
@@ -3221,10 +3227,11 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
bool one_byte = compiler->one_byte();
Label* backtrack = trace->backtrack();
QuickCheckDetails* quick_check = trace->quick_check_performed();
- int element_count = elms_->length();
+ int element_count = elements()->length();
+ int backward_offset = read_backward() ? -Length() : 0;
for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
- TextElement elm = elms_->at(i);
- int cp_offset = trace->cp_offset() + elm.cp_offset();
+ TextElement elm = elements()->at(i);
+ int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset;
if (elm.text_type() == TextElement::ATOM) {
Vector<const uc16> quarks = elm.atom()->data();
for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
@@ -3252,13 +3259,10 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
break;
}
if (emit_function != NULL) {
- bool bound_checked = emit_function(isolate,
- compiler,
- quarks[j],
- backtrack,
- cp_offset + j,
- *checked_up_to < cp_offset + j,
- preloaded);
+ bool bounds_check = *checked_up_to < cp_offset + j || read_backward();
+ bool bound_checked =
+ emit_function(isolate, compiler, quarks[j], backtrack,
+ cp_offset + j, bounds_check, preloaded);
if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
}
}
@@ -3268,8 +3272,9 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
if (first_element_checked && i == 0) continue;
if (DeterminedAlready(quick_check, elm.cp_offset())) continue;
RegExpCharacterClass* cc = elm.char_class();
+ bool bounds_check = *checked_up_to < cp_offset || read_backward();
EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset,
- *checked_up_to < cp_offset, preloaded, zone());
+ bounds_check, preloaded, zone());
UpdateBoundsCheck(cp_offset, checked_up_to);
}
}
@@ -3278,7 +3283,7 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
int TextNode::Length() {
- TextElement elm = elms_->last();
+ TextElement elm = elements()->last();
DCHECK(elm.cp_offset() >= 0);
return elm.cp_offset() + elm.length();
}
@@ -3347,8 +3352,11 @@ void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
}
Trace successor_trace(*trace);
- successor_trace.set_at_start(false);
- successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler);
+ // If we advance backward, we may end up at the start.
+ successor_trace.AdvanceCurrentPositionInTrace(
+ read_backward() ? -Length() : Length(), compiler);
+ successor_trace.set_at_start(read_backward() ? Trace::UNKNOWN
+ : Trace::FALSE_VALUE);
RecursionCheck rc(compiler);
on_success()->Emit(compiler, &successor_trace);
}
@@ -3360,7 +3368,6 @@ void Trace::InvalidateCurrentCharacter() {
void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {
- DCHECK(by > 0);
// We don't have an instruction for shifting the current character register
// down or for using a shifted value for anything so lets just forget that
// we preloaded any characters into it.
@@ -3379,9 +3386,9 @@ void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {
void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte) {
- int element_count = elms_->length();
+ int element_count = elements()->length();
for (int i = 0; i < element_count; i++) {
- TextElement elm = elms_->at(i);
+ TextElement elm = elements()->at(i);
if (elm.text_type() == TextElement::CHAR_CLASS) {
RegExpCharacterClass* cc = elm.char_class();
// None of the standard character classes is different in the case
@@ -3397,16 +3404,14 @@ void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte) {
}
-int TextNode::GreedyLoopTextLength() {
- TextElement elm = elms_->at(elms_->length() - 1);
- return elm.cp_offset() + elm.length();
-}
+int TextNode::GreedyLoopTextLength() { return Length(); }
RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(
RegExpCompiler* compiler) {
- if (elms_->length() != 1) return NULL;
- TextElement elm = elms_->at(0);
+ if (read_backward()) return NULL;
+ if (elements()->length() != 1) return NULL;
+ TextElement elm = elements()->at(0);
if (elm.text_type() != TextElement::CHAR_CLASS) return NULL;
RegExpCharacterClass* node = elm.char_class();
ZoneList<CharacterRange>* ranges = node->ranges(zone());
@@ -3450,7 +3455,7 @@ int ChoiceNode::GreedyLoopTextLengthForAlternative(
SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node);
node = seq_node->on_success();
}
- return length;
+ return read_backward() ? -length : length;
}
@@ -3881,7 +3886,7 @@ void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
GreedyLoopState::GreedyLoopState(bool not_at_start) {
counter_backtrack_trace_.set_backtrack(&label_);
- if (not_at_start) counter_backtrack_trace_.set_at_start(false);
+ if (not_at_start) counter_backtrack_trace_.set_at_start(Trace::FALSE_VALUE);
}
@@ -4008,7 +4013,7 @@ Trace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler,
macro_assembler->PushCurrentPosition();
Label greedy_match_failed;
Trace greedy_match_trace;
- if (not_at_start()) greedy_match_trace.set_at_start(false);
+ if (not_at_start()) greedy_match_trace.set_at_start(Trace::FALSE_VALUE);
greedy_match_trace.set_backtrack(&greedy_match_failed);
Label loop_label;
macro_assembler->Bind(&loop_label);
@@ -4354,11 +4359,14 @@ void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
DCHECK_EQ(start_reg_ + 1, end_reg_);
if (compiler->ignore_case()) {
- assembler->CheckNotBackReferenceIgnoreCase(start_reg_,
+ assembler->CheckNotBackReferenceIgnoreCase(start_reg_, read_backward(),
trace->backtrack());
} else {
- assembler->CheckNotBackReference(start_reg_, trace->backtrack());
+ assembler->CheckNotBackReference(start_reg_, read_backward(),
+ trace->backtrack());
}
+ // We are going to advance backward, so we may end up at the start.
+ if (read_backward()) trace->set_at_start(Trace::UNKNOWN);
on_success()->Emit(compiler, trace);
}
@@ -4719,13 +4727,15 @@ RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,
ZoneList<TextElement>* elms =
new(compiler->zone()) ZoneList<TextElement>(1, compiler->zone());
elms->Add(TextElement::Atom(this), compiler->zone());
- return new(compiler->zone()) TextNode(elms, on_success);
+ return new (compiler->zone())
+ TextNode(elms, compiler->read_backward(), on_success);
}
RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) {
- return new(compiler->zone()) TextNode(elements(), on_success);
+ return new (compiler->zone())
+ TextNode(elements(), compiler->read_backward(), on_success);
}
@@ -4822,7 +4832,8 @@ bool RegExpCharacterClass::is_standard(Zone* zone) {
RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) {
- return new(compiler->zone()) TextNode(this, on_success);
+ return new (compiler->zone())
+ TextNode(this, compiler->read_backward(), on_success);
}
@@ -5204,7 +5215,9 @@ RegExpNode* RegExpQuantifier::ToNode(int min,
GuardedAlternative(body->ToNode(compiler, answer)));
}
answer = alternation;
- if (not_at_start) alternation->set_not_at_start();
+ if (not_at_start && !compiler->read_backward()) {
+ alternation->set_not_at_start();
+ }
}
return answer;
}
@@ -5216,9 +5229,9 @@ RegExpNode* RegExpQuantifier::ToNode(int min,
int reg_ctr = needs_counter
? compiler->AllocateRegister()
: RegExpCompiler::kNoRegister;
- LoopChoiceNode* center = new(zone) LoopChoiceNode(body->min_match() == 0,
- zone);
- if (not_at_start) center->set_not_at_start();
+ LoopChoiceNode* center = new (zone)
+ LoopChoiceNode(body->min_match() == 0, compiler->read_backward(), zone);
+ if (not_at_start && !compiler->read_backward()) center->set_not_at_start();
RegExpNode* loop_return = needs_counter
? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center))
: static_cast<RegExpNode*>(center);
@@ -5294,14 +5307,13 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
ZoneList<CharacterRange>* newline_ranges =
new(zone) ZoneList<CharacterRange>(3, zone);
CharacterRange::AddClassEscape('n', newline_ranges, zone);
- RegExpCharacterClass* newline_atom = new(zone) RegExpCharacterClass('n');
- TextNode* newline_matcher = new(zone) TextNode(
- newline_atom,
- ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
- position_register,
- 0, // No captures inside.
- -1, // Ignored if no captures.
- on_success));
+ RegExpCharacterClass* newline_atom = new (zone) RegExpCharacterClass('n');
+ TextNode* newline_matcher = new (zone) TextNode(
+ newline_atom, false, ActionNode::PositiveSubmatchSuccess(
+ stack_pointer_register, position_register,
+ 0, // No captures inside.
+ -1, // Ignored if no captures.
+ on_success));
// Create an end-of-input matcher.
RegExpNode* end_of_line = ActionNode::BeginSubmatch(
stack_pointer_register,
@@ -5323,10 +5335,10 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) {
- return new(compiler->zone())
+ return new (compiler->zone())
BackReferenceNode(RegExpCapture::StartRegister(index()),
RegExpCapture::EndRegister(index()),
- on_success);
+ compiler->read_backward(), on_success);
}
@@ -5336,8 +5348,8 @@ RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
}
-RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,
- RegExpNode* on_success) {
+RegExpNode* RegExpLookaround::ToNode(RegExpCompiler* compiler,
+ RegExpNode* on_success) {
int stack_pointer_register = compiler->AllocateRegister();
int position_register = compiler->AllocateRegister();
@@ -5347,19 +5359,16 @@ RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,
int register_start =
register_of_first_capture + capture_from_ * registers_per_capture;
- RegExpNode* success;
+ RegExpNode* result;
+ bool was_reading_backward = compiler->read_backward();
+ compiler->set_read_backward(type() == LOOKBEHIND);
if (is_positive()) {
- RegExpNode* node = ActionNode::BeginSubmatch(
- stack_pointer_register,
- position_register,
- body()->ToNode(
- compiler,
- ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
- position_register,
- register_count,
- register_start,
- on_success)));
- return node;
+ result = ActionNode::BeginSubmatch(
+ stack_pointer_register, position_register,
+ body()->ToNode(compiler,
+ ActionNode::PositiveSubmatchSuccess(
+ stack_pointer_register, position_register,
+ register_count, register_start, on_success)));
} else {
// We use a ChoiceNode for a negative lookahead because it has most of
// the characteristics we need. It has the body of the lookahead as its
@@ -5374,21 +5383,18 @@ RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,
Zone* zone = compiler->zone();
GuardedAlternative body_alt(
- body()->ToNode(
- compiler,
- success = new(zone) NegativeSubmatchSuccess(stack_pointer_register,
- position_register,
- register_count,
- register_start,
- zone)));
+ body()->ToNode(compiler, new (zone) NegativeSubmatchSuccess(
+ stack_pointer_register, position_register,
+ register_count, register_start, zone)));
ChoiceNode* choice_node =
new(zone) NegativeLookaheadChoiceNode(body_alt,
GuardedAlternative(on_success),
zone);
- return ActionNode::BeginSubmatch(stack_pointer_register,
- position_register,
- choice_node);
+ result = ActionNode::BeginSubmatch(stack_pointer_register,
+ position_register, choice_node);
}
+ compiler->set_read_backward(was_reading_backward);
+ return result;
}
@@ -5402,8 +5408,10 @@ RegExpNode* RegExpCapture::ToNode(RegExpTree* body,
int index,
RegExpCompiler* compiler,
RegExpNode* on_success) {
+ DCHECK_NOT_NULL(body);
int start_reg = RegExpCapture::StartRegister(index);
int end_reg = RegExpCapture::EndRegister(index);
+ if (compiler->read_backward()) std::swap(start_reg, end_reg);
RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);
RegExpNode* body_node = body->ToNode(compiler, store_end);
return ActionNode::StorePosition(start_reg, true, body_node);
@@ -5414,8 +5422,14 @@ RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) {
ZoneList<RegExpTree*>* children = nodes();
RegExpNode* current = on_success;
- for (int i = children->length() - 1; i >= 0; i--) {
- current = children->at(i)->ToNode(compiler, current);
+ if (compiler->read_backward()) {
+ for (int i = 0; i < children->length(); i++) {
+ current = children->at(i)->ToNode(compiler, current);
+ }
+ } else {
+ for (int i = children->length() - 1; i >= 0; i--) {
+ current = children->at(i)->ToNode(compiler, current);
+ }
}
return current;
}
@@ -6291,22 +6305,17 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(
if (!is_start_anchored && !is_sticky) {
// Add a .*? at the beginning, outside the body capture, unless
// this expression is anchored at the beginning or sticky.
- RegExpNode* loop_node =
- RegExpQuantifier::ToNode(0,
- RegExpTree::kInfinity,
- false,
- new(zone) RegExpCharacterClass('*'),
- &compiler,
- captured_body,
- data->contains_anchor);
+ RegExpNode* loop_node = RegExpQuantifier::ToNode(
+ 0, RegExpTree::kInfinity, false, new (zone) RegExpCharacterClass('*'),
+ &compiler, captured_body, data->contains_anchor);
if (data->contains_anchor) {
// Unroll loop once, to take care of the case that might start
// at the start of input.
ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone);
first_step_node->AddAlternative(GuardedAlternative(captured_body));
- first_step_node->AddAlternative(GuardedAlternative(
- new(zone) TextNode(new(zone) RegExpCharacterClass('*'), loop_node)));
+ first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode(
+ new (zone) RegExpCharacterClass('*'), false, loop_node)));
node = first_step_node;
} else {
node = loop_node;
« no previous file with comments | « src/regexp/jsregexp.h ('k') | src/regexp/mips/regexp-macro-assembler-mips.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698