Index: src/regexp/regexp-parser.cc |
diff --git a/src/regexp/regexp-parser.cc b/src/regexp/regexp-parser.cc |
index 718ec882e3dbc7d1d081f04319f93e9cabb49e55..68c5a6d2633cd31ee8ee17c0487ec87ea44d7b7d 100644 |
--- a/src/regexp/regexp-parser.cc |
+++ b/src/regexp/regexp-parser.cc |
@@ -40,6 +40,7 @@ RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, |
simple_(false), |
contains_anchor_(false), |
is_scanned_for_captures_(false), |
+ has_named_captures_(false), |
failed_(false) { |
DCHECK_IMPLIES(dotall(), FLAG_harmony_regexp_dotall); |
Advance(); |
@@ -325,7 +326,8 @@ RegExpTree* RegExpParser::ParseDisjunction() { |
break; |
} |
} |
- if (FLAG_harmony_regexp_named_captures && unicode()) { |
+ if (FLAG_harmony_regexp_named_captures) { |
+ has_named_captures_ = true; |
is_named_capture = true; |
Advance(); |
break; |
@@ -541,7 +543,13 @@ RegExpTree* RegExpParser::ParseDisjunction() { |
break; |
} |
case 'k': |
- if (FLAG_harmony_regexp_named_captures && unicode()) { |
+ // Either an identity escape or a named back-reference. The two |
+ // interpretations are mutually exclusive: '\k' is interpreted as |
+ // an identity escape for non-unicode patterns without named |
+ // capture groups, and as the beginning of a named back-reference |
+ // in all other cases. |
+ if (FLAG_harmony_regexp_named_captures && |
+ (unicode() || HasNamedCaptures())) { |
Advance(2); |
ParseNamedBackReference(builder, state CHECK_FAILED); |
break; |
@@ -657,6 +665,8 @@ static bool IsSpecialClassEscape(uc32 c) { |
// noncapturing parentheses and can skip character classes and backslash-escaped |
// characters. |
void RegExpParser::ScanForCaptures() { |
+ DCHECK(!is_scanned_for_captures_); |
+ const int saved_position = position(); |
// Start with captures started previous to current position |
int capture_count = captures_started(); |
// Add count of captures after this position. |
@@ -692,11 +702,19 @@ void RegExpParser::ScanForCaptures() { |
Advance(); |
if (current() != '<') break; |
- // TODO(jgruber): To be more future-proof we could test for |
- // IdentifierStart here once it becomes clear whether group names |
- // allow unicode escapes. |
- Advance(); |
- if (current() == '=' || current() == '!') break; |
+ if (FLAG_harmony_regexp_lookbehind) { |
+ // TODO(jgruber): To be more future-proof we could test for |
+ // IdentifierStart here once it becomes clear whether group names |
+ // allow unicode escapes. |
+ // https://github.com/tc39/proposal-regexp-named-groups/issues/23 |
+ Advance(); |
+ if (current() == '=' || current() == '!') break; |
+ } |
+ |
+ // Found a possible named capture. It could turn out to be a syntax |
+ // error (e.g. an unterminated or invalid name), but that distinction |
+ // does not matter for our purposes. |
+ has_named_captures_ = true; |
} |
capture_count++; |
break; |
@@ -704,6 +722,7 @@ void RegExpParser::ScanForCaptures() { |
} |
capture_count_ = capture_count; |
is_scanned_for_captures_ = true; |
+ Reset(saved_position); |
} |
@@ -729,11 +748,7 @@ bool RegExpParser::ParseBackReferenceIndex(int* index_out) { |
} |
} |
if (value > captures_started()) { |
- if (!is_scanned_for_captures_) { |
- int saved_position = position(); |
- ScanForCaptures(); |
- Reset(saved_position); |
- } |
+ if (!is_scanned_for_captures_) ScanForCaptures(); |
if (value > capture_count_) { |
Reset(start); |
return false; |
@@ -754,7 +769,6 @@ static void push_code_unit(ZoneVector<uc16>* v, uint32_t code_unit) { |
const ZoneVector<uc16>* RegExpParser::ParseCaptureGroupName() { |
DCHECK(FLAG_harmony_regexp_named_captures); |
- DCHECK(unicode()); |
ZoneVector<uc16>* name = |
new (zone()->New(sizeof(ZoneVector<uc16>))) ZoneVector<uc16>(zone()); |
@@ -766,6 +780,8 @@ const ZoneVector<uc16>* RegExpParser::ParseCaptureGroupName() { |
// Convert unicode escapes. |
if (c == '\\' && current() == 'u') { |
+ // TODO(jgruber): Reconsider this once the spec has settled. |
+ // https://github.com/tc39/proposal-regexp-named-groups/issues/23 |
Advance(); |
if (!ParseUnicodeEscape(&c)) { |
ReportError(CStrVector("Invalid Unicode escape sequence")); |
@@ -798,7 +814,6 @@ const ZoneVector<uc16>* RegExpParser::ParseCaptureGroupName() { |
bool RegExpParser::CreateNamedCaptureAtIndex(const ZoneVector<uc16>* name, |
int index) { |
DCHECK(FLAG_harmony_regexp_named_captures); |
- DCHECK(unicode()); |
DCHECK(0 < index && index <= captures_started_); |
DCHECK_NOT_NULL(name); |
@@ -806,6 +821,7 @@ bool RegExpParser::CreateNamedCaptureAtIndex(const ZoneVector<uc16>* name, |
named_captures_ = new (zone()) ZoneList<RegExpCapture*>(1, zone()); |
} else { |
// Check for duplicates and bail if we find any. |
+ // TODO(jgruber): O(n^2). |
for (const auto& named_capture : *named_captures_) { |
if (*named_capture->name() == *name) { |
ReportError(CStrVector("Duplicate capture group name")); |
@@ -920,6 +936,16 @@ Handle<FixedArray> RegExpParser::CreateCaptureNameMap() { |
return array; |
} |
+bool RegExpParser::HasNamedCaptures() { |
+ if (has_named_captures_ || is_scanned_for_captures_) { |
+ return has_named_captures_; |
+ } |
+ |
+ ScanForCaptures(); |
+ DCHECK(is_scanned_for_captures_); |
+ return has_named_captures_; |
+} |
+ |
bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(int index) { |
for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) { |
if (s->group_type() != CAPTURE) continue; |