Chromium Code Reviews| Index: src/regexp/regexp-parser.cc |
| diff --git a/src/regexp/regexp-parser.cc b/src/regexp/regexp-parser.cc |
| index 68c5a6d2633cd31ee8ee17c0487ec87ea44d7b7d..b6250b98b51319cbcfa3d721420ed575f9ca0acf 100644 |
| --- a/src/regexp/regexp-parser.cc |
| +++ b/src/regexp/regexp-parser.cc |
| @@ -47,12 +47,13 @@ RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, |
| } |
| template <bool update_position> |
|
Yang
2017/04/05 12:52:38
I wonder whether it makes sense to make the scan m
jgruber
2017/04/05 12:57:30
Agreed, will do that.
jgruber
2017/04/07 07:10:56
Done.
|
| -inline uc32 RegExpParser::ReadNext() { |
| +inline uc32 RegExpParser::ReadNext(ScanMode mode) { |
| int position = next_pos_; |
| uc32 c0 = in()->Get(position); |
| position++; |
| - // Read the whole surrogate pair in case of unicode flag, if possible. |
| - if (unicode() && position < in()->length() && |
| + const bool try_combine_surrogate_pairs = |
| + (unicode() || mode == ScanMode::FORCE_COMBINE_SURROGATE_PAIRS); |
| + if (try_combine_surrogate_pairs && position < in()->length() && |
| unibrow::Utf16::IsLeadSurrogate(static_cast<uc16>(c0))) { |
| uc16 c1 = in()->Get(position); |
| if (unibrow::Utf16::IsTrailSurrogate(c1)) { |
| @@ -67,14 +68,13 @@ inline uc32 RegExpParser::ReadNext() { |
| uc32 RegExpParser::Next() { |
| if (has_next()) { |
| - return ReadNext<false>(); |
| + return ReadNext<false>(ScanMode::DEFAULT); |
| } else { |
| return kEndMarker; |
| } |
| } |
| - |
| -void RegExpParser::Advance() { |
| +void RegExpParser::Advance(ScanMode mode) { |
| if (has_next()) { |
| StackLimitCheck check(isolate()); |
| if (check.HasOverflowed()) { |
| @@ -84,7 +84,7 @@ void RegExpParser::Advance() { |
| } else if (zone()->excess_allocation()) { |
| ReportError(CStrVector("Regular expression too large")); |
| } else { |
| - current_ = ReadNext<true>(); |
| + current_ = ReadNext<true>(mode); |
| } |
| } else { |
| current_ = kEndMarker; |
| @@ -102,10 +102,9 @@ void RegExpParser::Reset(int pos) { |
| Advance(); |
| } |
| - |
| -void RegExpParser::Advance(int dist) { |
| +void RegExpParser::Advance(int dist, ScanMode mode) { |
| next_pos_ += dist - 1; |
| - Advance(); |
| + Advance(mode); |
| } |
| @@ -329,7 +328,6 @@ RegExpTree* RegExpParser::ParseDisjunction() { |
| if (FLAG_harmony_regexp_named_captures) { |
| has_named_captures_ = true; |
| is_named_capture = true; |
| - Advance(); |
|
Yang
2017/04/05 12:52:38
How come we can remove this Advance here and below
jgruber
2017/04/05 12:57:30
The advance is now done in ParseCaptureGroupName.
|
| break; |
| } |
| // Fall through. |
| @@ -769,20 +767,26 @@ static void push_code_unit(ZoneVector<uc16>* v, uint32_t code_unit) { |
| const ZoneVector<uc16>* RegExpParser::ParseCaptureGroupName() { |
| DCHECK(FLAG_harmony_regexp_named_captures); |
| + DCHECK_EQ(current(), '<'); |
| ZoneVector<uc16>* name = |
| new (zone()->New(sizeof(ZoneVector<uc16>))) ZoneVector<uc16>(zone()); |
| + // Capture names can always contain surrogate pairs, and we need to scan |
| + // accordingly. |
| + const ScanMode scan_mode = ScanMode::FORCE_COMBINE_SURROGATE_PAIRS; |
| + Advance(scan_mode); |
| + |
| bool at_start = true; |
| while (true) { |
| uc32 c = current(); |
| - Advance(); |
| + Advance(scan_mode); |
| // Convert unicode escapes. |
| if (c == '\\' && current() == 'u') { |
| // TODO(jgruber): Reconsider this once the spec has settled. |
| // https://github.com/tc39/proposal-regexp-named-groups/issues/23 |
| - Advance(); |
| + Advance(scan_mode); |
| if (!ParseUnicodeEscape(&c)) { |
| ReportError(CStrVector("Invalid Unicode escape sequence")); |
| return nullptr; |
| @@ -847,7 +851,6 @@ bool RegExpParser::ParseNamedBackReference(RegExpBuilder* builder, |
| return false; |
| } |
| - Advance(); |
| const ZoneVector<uc16>* name = ParseCaptureGroupName(); |
| if (name == nullptr) { |
| return false; |