| Index: src/regexp/regexp-parser.cc
|
| diff --git a/src/regexp/regexp-parser.cc b/src/regexp/regexp-parser.cc
|
| index 3e5998719436e1cfe743e694b2eaf526b46bc377..656e88cff89578f0ab488b0392956b681ace48a8 100644
|
| --- a/src/regexp/regexp-parser.cc
|
| +++ b/src/regexp/regexp-parser.cc
|
| @@ -46,13 +46,13 @@ RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error,
|
| Advance();
|
| }
|
|
|
| -template <bool update_position>
|
| -inline uc32 RegExpParser::ReadNext() {
|
| +inline uc32 RegExpParser::ReadNext(bool update_position, ScanMode mode) {
|
| int position = next_pos_;
|
| uc32 c0 = in()->Get(position);
|
| position++;
|
| - // Read the whole surrogate pair in case of unicode flag, if possible.
|
| - if (unicode() && position < in()->length() &&
|
| + const bool try_combine_surrogate_pairs =
|
| + (unicode() || mode == ScanMode::FORCE_COMBINE_SURROGATE_PAIRS);
|
| + if (try_combine_surrogate_pairs && position < in()->length() &&
|
| unibrow::Utf16::IsLeadSurrogate(static_cast<uc16>(c0))) {
|
| uc16 c1 = in()->Get(position);
|
| if (unibrow::Utf16::IsTrailSurrogate(c1)) {
|
| @@ -67,14 +67,13 @@ inline uc32 RegExpParser::ReadNext() {
|
|
|
| uc32 RegExpParser::Next() {
|
| if (has_next()) {
|
| - return ReadNext<false>();
|
| + return ReadNext(false, ScanMode::DEFAULT);
|
| } else {
|
| return kEndMarker;
|
| }
|
| }
|
|
|
| -
|
| -void RegExpParser::Advance() {
|
| +void RegExpParser::Advance(ScanMode mode) {
|
| if (has_next()) {
|
| StackLimitCheck check(isolate());
|
| if (check.HasOverflowed()) {
|
| @@ -84,7 +83,7 @@ void RegExpParser::Advance() {
|
| } else if (zone()->excess_allocation()) {
|
| ReportError(CStrVector("Regular expression too large"));
|
| } else {
|
| - current_ = ReadNext<true>();
|
| + current_ = ReadNext(true, mode);
|
| }
|
| } else {
|
| current_ = kEndMarker;
|
| @@ -102,10 +101,9 @@ void RegExpParser::Reset(int pos) {
|
| Advance();
|
| }
|
|
|
| -
|
| -void RegExpParser::Advance(int dist) {
|
| +void RegExpParser::Advance(int dist, ScanMode mode) {
|
| next_pos_ += dist - 1;
|
| - Advance();
|
| + Advance(mode);
|
| }
|
|
|
|
|
| @@ -329,7 +327,6 @@ RegExpTree* RegExpParser::ParseDisjunction() {
|
| if (FLAG_harmony_regexp_named_captures) {
|
| has_named_captures_ = true;
|
| is_named_capture = true;
|
| - Advance();
|
| break;
|
| }
|
| // Fall through.
|
| @@ -769,20 +766,26 @@ static void push_code_unit(ZoneVector<uc16>* v, uint32_t code_unit) {
|
|
|
| const ZoneVector<uc16>* RegExpParser::ParseCaptureGroupName() {
|
| DCHECK(FLAG_harmony_regexp_named_captures);
|
| + DCHECK_EQ(current(), '<');
|
|
|
| ZoneVector<uc16>* name =
|
| new (zone()->New(sizeof(ZoneVector<uc16>))) ZoneVector<uc16>(zone());
|
|
|
| + // Capture names can always contain surrogate pairs, and we need to scan
|
| + // accordingly.
|
| + const ScanMode scan_mode = ScanMode::FORCE_COMBINE_SURROGATE_PAIRS;
|
| + Advance(scan_mode);
|
| +
|
| bool at_start = true;
|
| while (true) {
|
| uc32 c = current();
|
| - Advance();
|
| + Advance(scan_mode);
|
|
|
| // Convert unicode escapes.
|
| if (c == '\\' && current() == 'u') {
|
| // TODO(jgruber): Reconsider this once the spec has settled.
|
| // https://github.com/tc39/proposal-regexp-named-groups/issues/23
|
| - Advance();
|
| + Advance(scan_mode);
|
| if (!ParseUnicodeEscape(&c)) {
|
| ReportError(CStrVector("Invalid Unicode escape sequence"));
|
| return nullptr;
|
| @@ -853,7 +856,6 @@ bool RegExpParser::ParseNamedBackReference(RegExpBuilder* builder,
|
| return false;
|
| }
|
|
|
| - Advance();
|
| const ZoneVector<uc16>* name = ParseCaptureGroupName();
|
| if (name == nullptr) {
|
| return false;
|
|
|