Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(332)

Unified Diff: src/parser.cc

Issue 1418963009: Experimental support for RegExp lookbehind. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: fix arm64 debug code assertion Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/parser.h ('k') | src/regexp/arm/regexp-macro-assembler-arm.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/parser.cc
diff --git a/src/parser.cc b/src/parser.cc
index 0e49d6deba4abc1a8e2a09d9ef7e896146eb3e11..45f0b39a17d42f9599e2d2e269d41f769a824dbb 100644
--- a/src/parser.cc
+++ b/src/parser.cc
@@ -5182,6 +5182,7 @@ RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error,
in_(in),
current_(kEndMarker),
next_pos_(0),
+ captures_started_(0),
capture_count_(0),
has_more_(true),
multiline_(multiline),
@@ -5285,25 +5286,26 @@ RegExpTree* RegExpParser::ParsePattern() {
// Atom Quantifier
RegExpTree* RegExpParser::ParseDisjunction() {
// Used to store current state while parsing subexpressions.
- RegExpParserState initial_state(NULL, INITIAL, 0, zone());
- RegExpParserState* stored_state = &initial_state;
+ RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0,
+ zone());
+ RegExpParserState* state = &initial_state;
// Cache the builder in a local variable for quick access.
RegExpBuilder* builder = initial_state.builder();
while (true) {
switch (current()) {
case kEndMarker:
- if (stored_state->IsSubexpression()) {
+ if (state->IsSubexpression()) {
// Inside a parenthesized group when hitting end of input.
ReportError(CStrVector("Unterminated group") CHECK_FAILED);
}
- DCHECK_EQ(INITIAL, stored_state->group_type());
+ DCHECK_EQ(INITIAL, state->group_type());
// Parsing completed successfully.
return builder->ToRegExp();
case ')': {
- if (!stored_state->IsSubexpression()) {
+ if (!state->IsSubexpression()) {
ReportError(CStrVector("Unmatched ')'") CHECK_FAILED);
}
- DCHECK_NE(INITIAL, stored_state->group_type());
+ DCHECK_NE(INITIAL, state->group_type());
Advance();
// End disjunction parsing and convert builder content to new single
@@ -5312,27 +5314,27 @@ RegExpTree* RegExpParser::ParseDisjunction() {
int end_capture_index = captures_started();
- int capture_index = stored_state->capture_index();
- SubexpressionType group_type = stored_state->group_type();
-
- // Restore previous state.
- stored_state = stored_state->previous_state();
- builder = stored_state->builder();
+ int capture_index = state->capture_index();
+ SubexpressionType group_type = state->group_type();
// Build result of subexpression.
if (group_type == CAPTURE) {
- RegExpCapture* capture = new(zone()) RegExpCapture(body, capture_index);
- captures_->at(capture_index - 1) = capture;
+ RegExpCapture* capture = GetCapture(capture_index);
+ capture->set_body(body);
body = capture;
} else if (group_type != GROUPING) {
- DCHECK(group_type == POSITIVE_LOOKAHEAD ||
- group_type == NEGATIVE_LOOKAHEAD);
- bool is_positive = (group_type == POSITIVE_LOOKAHEAD);
- body = new(zone()) RegExpLookahead(body,
- is_positive,
- end_capture_index - capture_index,
- capture_index);
+ DCHECK(group_type == POSITIVE_LOOKAROUND ||
+ group_type == NEGATIVE_LOOKAROUND);
+ bool is_positive = (group_type == POSITIVE_LOOKAROUND);
+ body = new (zone()) RegExpLookaround(
+ body, is_positive, end_capture_index - capture_index, capture_index,
+ state->lookaround_type());
}
+
+ // Restore previous state.
+ state = state->previous_state();
+ builder = state->builder();
+
builder->AddAtom(body);
// For compatability with JSC and ES3, we allow quantifiers after
// lookaheads, and break in all cases.
@@ -5379,6 +5381,7 @@ RegExpTree* RegExpParser::ParseDisjunction() {
}
case '(': {
SubexpressionType subexpr_type = CAPTURE;
+ RegExpLookaround::Type lookaround_type = state->lookaround_type();
Advance();
if (current() == '?') {
switch (Next()) {
@@ -5386,29 +5389,41 @@ RegExpTree* RegExpParser::ParseDisjunction() {
subexpr_type = GROUPING;
break;
case '=':
- subexpr_type = POSITIVE_LOOKAHEAD;
+ lookaround_type = RegExpLookaround::LOOKAHEAD;
+ subexpr_type = POSITIVE_LOOKAROUND;
break;
case '!':
- subexpr_type = NEGATIVE_LOOKAHEAD;
+ lookaround_type = RegExpLookaround::LOOKAHEAD;
+ subexpr_type = NEGATIVE_LOOKAROUND;
break;
+ case '<':
+ if (FLAG_harmony_regexp_lookbehind) {
+ Advance();
+ lookaround_type = RegExpLookaround::LOOKBEHIND;
+ if (Next() == '=') {
+ subexpr_type = POSITIVE_LOOKAROUND;
+ break;
+ } else if (Next() == '!') {
+ subexpr_type = NEGATIVE_LOOKAROUND;
+ break;
+ }
+ }
+ // Fall through.
default:
ReportError(CStrVector("Invalid group") CHECK_FAILED);
break;
}
Advance(2);
} else {
- if (captures_ == NULL) {
- captures_ = new(zone()) ZoneList<RegExpCapture*>(2, zone());
- }
- if (captures_started() >= kMaxCaptures) {
+ if (captures_started_ >= kMaxCaptures) {
ReportError(CStrVector("Too many captures") CHECK_FAILED);
}
- captures_->Add(NULL, zone());
+ captures_started_++;
}
// Store current state and begin new disjunction parsing.
- stored_state = new(zone()) RegExpParserState(stored_state, subexpr_type,
- captures_started(), zone());
- builder = stored_state->builder();
+ state = new (zone()) RegExpParserState(
+ state, subexpr_type, lookaround_type, captures_started_, zone());
+ builder = state->builder();
continue;
}
case '[': {
@@ -5451,16 +5466,15 @@ RegExpTree* RegExpParser::ParseDisjunction() {
case '7': case '8': case '9': {
int index = 0;
if (ParseBackReferenceIndex(&index)) {
- RegExpCapture* capture = NULL;
- if (captures_ != NULL && index <= captures_->length()) {
- capture = captures_->at(index - 1);
- }
- if (capture == NULL) {
+ if (state->IsInsideCaptureGroup(index)) {
+ // The backreference is inside the capture group it refers to.
+ // Nothing can possibly have been captured yet.
builder->AddEmpty();
- break;
+ } else {
+ RegExpCapture* capture = GetCapture(index);
+ RegExpTree* atom = new (zone()) RegExpBackReference(capture);
+ builder->AddAtom(atom);
}
- RegExpTree* atom = new(zone()) RegExpBackReference(capture);
- builder->AddAtom(atom);
break;
}
uc32 first_digit = Next();
@@ -5721,6 +5735,34 @@ bool RegExpParser::ParseBackReferenceIndex(int* index_out) {
}
+RegExpCapture* RegExpParser::GetCapture(int index) {
+ // The index for the capture groups are one-based. Its index in the list is
+ // zero-based.
+ int know_captures =
+ is_scanned_for_captures_ ? capture_count_ : captures_started_;
+ DCHECK(index <= know_captures);
+ if (captures_ == NULL) {
+ captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone());
+ }
+ while (captures_->length() < know_captures) {
+ captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone());
+ }
+ return captures_->at(index - 1);
+}
+
+
+bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(int index) {
+ for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) {
+ if (s->group_type() != CAPTURE) continue;
+ // Return true if we found the matching capture index.
+ if (index == s->capture_index()) return true;
+ // Abort if index is larger than what has been parsed up till this state.
+ if (index > s->capture_index()) return false;
+ }
+ return false;
+}
+
+
// QuantifierPrefix ::
// { DecimalDigits }
// { DecimalDigits , }
@@ -6052,7 +6094,7 @@ RegExpTree* RegExpParser::ParseCharacterClass() {
ranges->Add(CharacterRange::Everything(), zone());
is_negated = !is_negated;
}
- return new(zone()) RegExpCharacterClass(ranges, is_negated);
+ return new (zone()) RegExpCharacterClass(ranges, is_negated);
}
« no previous file with comments | « src/parser.h ('k') | src/regexp/arm/regexp-macro-assembler-arm.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698