Index: src/regexp/regexp-parser.cc |
diff --git a/src/regexp/regexp-parser.cc b/src/regexp/regexp-parser.cc |
index 3e5998719436e1cfe743e694b2eaf526b46bc377..656e88cff89578f0ab488b0392956b681ace48a8 100644 |
--- a/src/regexp/regexp-parser.cc |
+++ b/src/regexp/regexp-parser.cc |
@@ -46,13 +46,13 @@ RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, |
Advance(); |
} |
-template <bool update_position> |
-inline uc32 RegExpParser::ReadNext() { |
+inline uc32 RegExpParser::ReadNext(bool update_position, ScanMode mode) { |
int position = next_pos_; |
uc32 c0 = in()->Get(position); |
position++; |
- // Read the whole surrogate pair in case of unicode flag, if possible. |
- if (unicode() && position < in()->length() && |
+ const bool try_combine_surrogate_pairs = |
+ (unicode() || mode == ScanMode::FORCE_COMBINE_SURROGATE_PAIRS); |
+ if (try_combine_surrogate_pairs && position < in()->length() && |
unibrow::Utf16::IsLeadSurrogate(static_cast<uc16>(c0))) { |
uc16 c1 = in()->Get(position); |
if (unibrow::Utf16::IsTrailSurrogate(c1)) { |
@@ -67,14 +67,13 @@ inline uc32 RegExpParser::ReadNext() { |
uc32 RegExpParser::Next() { |
if (has_next()) { |
- return ReadNext<false>(); |
+ return ReadNext(false, ScanMode::DEFAULT); |
} else { |
return kEndMarker; |
} |
} |
- |
-void RegExpParser::Advance() { |
+void RegExpParser::Advance(ScanMode mode) { |
if (has_next()) { |
StackLimitCheck check(isolate()); |
if (check.HasOverflowed()) { |
@@ -84,7 +83,7 @@ void RegExpParser::Advance() { |
} else if (zone()->excess_allocation()) { |
ReportError(CStrVector("Regular expression too large")); |
} else { |
- current_ = ReadNext<true>(); |
+ current_ = ReadNext(true, mode); |
} |
} else { |
current_ = kEndMarker; |
@@ -102,10 +101,9 @@ void RegExpParser::Reset(int pos) { |
Advance(); |
} |
- |
-void RegExpParser::Advance(int dist) { |
+void RegExpParser::Advance(int dist, ScanMode mode) { |
next_pos_ += dist - 1; |
- Advance(); |
+ Advance(mode); |
} |
@@ -329,7 +327,6 @@ RegExpTree* RegExpParser::ParseDisjunction() { |
if (FLAG_harmony_regexp_named_captures) { |
has_named_captures_ = true; |
is_named_capture = true; |
- Advance(); |
break; |
} |
// Fall through. |
@@ -769,20 +766,26 @@ static void push_code_unit(ZoneVector<uc16>* v, uint32_t code_unit) { |
const ZoneVector<uc16>* RegExpParser::ParseCaptureGroupName() { |
DCHECK(FLAG_harmony_regexp_named_captures); |
+ DCHECK_EQ(current(), '<'); |
ZoneVector<uc16>* name = |
new (zone()->New(sizeof(ZoneVector<uc16>))) ZoneVector<uc16>(zone()); |
+ // Capture names can always contain surrogate pairs, and we need to scan |
+ // accordingly. |
+ const ScanMode scan_mode = ScanMode::FORCE_COMBINE_SURROGATE_PAIRS; |
+ Advance(scan_mode); |
+ |
bool at_start = true; |
while (true) { |
uc32 c = current(); |
- Advance(); |
+ Advance(scan_mode); |
// Convert unicode escapes. |
if (c == '\\' && current() == 'u') { |
// TODO(jgruber): Reconsider this once the spec has settled. |
// https://github.com/tc39/proposal-regexp-named-groups/issues/23 |
- Advance(); |
+ Advance(scan_mode); |
if (!ParseUnicodeEscape(&c)) { |
ReportError(CStrVector("Invalid Unicode escape sequence")); |
return nullptr; |
@@ -853,7 +856,6 @@ bool RegExpParser::ParseNamedBackReference(RegExpBuilder* builder, |
return false; |
} |
- Advance(); |
const ZoneVector<uc16>* name = ParseCaptureGroupName(); |
if (name == nullptr) { |
return false; |