Chromium Code Reviews| Index: src/regexp/regexp-parser.cc |
| diff --git a/src/regexp/regexp-parser.cc b/src/regexp/regexp-parser.cc |
| index 5cdad974b3886254a50073798a24df09754182b5..cb6073ee4a0bec92e908d98118599ee02a996d31 100644 |
| --- a/src/regexp/regexp-parser.cc |
| +++ b/src/regexp/regexp-parser.cc |
| @@ -56,6 +56,16 @@ void RegExpParser::Advance() { |
| } else { |
| current_ = in()->Get(next_pos_); |
| next_pos_++; |
| + // Read the whole surrogate pair in case of unicode flag, if possible. |
| + if (unicode_ && next_pos_ < in()->length() && |
| + unibrow::Utf16::IsLeadSurrogate(static_cast<uc16>(current_))) { |
| + uc16 trail = in()->Get(next_pos_); |
| + if (unibrow::Utf16::IsTrailSurrogate(trail)) { |
| + current_ = unibrow::Utf16::CombineSurrogatePair( |
| + static_cast<uc16>(current_), trail); |
| + next_pos_++; |
| + } |
| + } |
| } |
| } else { |
| current_ = kEndMarker; |
| @@ -417,12 +427,7 @@ RegExpTree* RegExpParser::ParseDisjunction() { |
| Advance(2); |
| uc32 value; |
| if (ParseUnicodeEscape(&value)) { |
| - if (value > unibrow::Utf16::kMaxNonSurrogateCharCode) { |
| - builder->AddCharacter(unibrow::Utf16::LeadSurrogate(value)); |
| - builder->AddCharacter(unibrow::Utf16::TrailSurrogate(value)); |
| - } else { |
| - builder->AddCharacter(static_cast<uc16>(value)); |
| - } |
| + builder->AddUnicodeCharacter(value); |
| } else if (!FLAG_harmony_unicode_regexps || !unicode_) { |
| builder->AddCharacter('u'); |
| } else { |
| @@ -457,7 +462,11 @@ RegExpTree* RegExpParser::ParseDisjunction() { |
| // fallthrough |
| } |
| default: |
| - builder->AddCharacter(current()); |
| + if (unicode_) { |
|
rossberg
2016/01/11 12:25:41
Nit: is this if necessary? Can't you always use Ad
Yang
2016/01/11 14:41:44
Good point. Done.
|
| + builder->AddUnicodeCharacter(current()); |
| + } else { |
| + builder->AddCharacter(current()); |
| + } |
| Advance(); |
| break; |
| } // end switch(current()) |
| @@ -1059,6 +1068,19 @@ void RegExpBuilder::AddCharacter(uc16 c) { |
| } |
| +void RegExpBuilder::AddUnicodeCharacter(uc32 c) { |
| + if (c > unibrow::Utf16::kMaxNonSurrogateCharCode) { |
| + ZoneList<uc16> surrogate_pair(2, zone()); |
| + surrogate_pair.Add(unibrow::Utf16::LeadSurrogate(c), zone()); |
| + surrogate_pair.Add(unibrow::Utf16::TrailSurrogate(c), zone()); |
| + RegExpAtom* atom = new (zone()) RegExpAtom(surrogate_pair.ToConstVector()); |
| + AddAtom(atom); |
| + } else { |
| + AddCharacter(static_cast<uc16>(c)); |
| + } |
| +} |
| + |
| + |
| void RegExpBuilder::AddEmpty() { pending_empty_ = true; } |