| Index: src/regexp/regexp-parser.cc
|
| diff --git a/src/regexp/regexp-parser.cc b/src/regexp/regexp-parser.cc
|
| index ad74b3d723e7b253af17094264df310663464a3c..fa8900342cfc4878411a1c06d753254024f138fe 100644
|
| --- a/src/regexp/regexp-parser.cc
|
| +++ b/src/regexp/regexp-parser.cc
|
| @@ -56,6 +56,16 @@ void RegExpParser::Advance() {
|
| } else {
|
| current_ = in()->Get(next_pos_);
|
| next_pos_++;
|
| + // Read the whole surrogate pair in case of unicode flag, if possible.
|
| + if (unicode_ && next_pos_ < in()->length() &&
|
| + unibrow::Utf16::IsLeadSurrogate(static_cast<uc16>(current_))) {
|
| + uc16 trail = in()->Get(next_pos_);
|
| + if (unibrow::Utf16::IsTrailSurrogate(trail)) {
|
| + current_ = unibrow::Utf16::CombineSurrogatePair(
|
| + static_cast<uc16>(current_), trail);
|
| + next_pos_++;
|
| + }
|
| + }
|
| }
|
| } else {
|
| current_ = kEndMarker;
|
| @@ -417,12 +427,7 @@ RegExpTree* RegExpParser::ParseDisjunction() {
|
| Advance(2);
|
| uc32 value;
|
| if (ParseUnicodeEscape(&value)) {
|
| - if (value > unibrow::Utf16::kMaxNonSurrogateCharCode) {
|
| - builder->AddCharacter(unibrow::Utf16::LeadSurrogate(value));
|
| - builder->AddCharacter(unibrow::Utf16::TrailSurrogate(value));
|
| - } else {
|
| - builder->AddCharacter(static_cast<uc16>(value));
|
| - }
|
| + builder->AddUnicodeCharacter(value);
|
| } else if (!unicode_) {
|
| builder->AddCharacter('u');
|
| } else {
|
| @@ -456,7 +461,7 @@ RegExpTree* RegExpParser::ParseDisjunction() {
|
| // fallthrough
|
| }
|
| default:
|
| - builder->AddCharacter(current());
|
| + builder->AddUnicodeCharacter(current());
|
| Advance();
|
| break;
|
| } // end switch(current())
|
| @@ -1057,6 +1062,19 @@ void RegExpBuilder::AddCharacter(uc16 c) {
|
| }
|
|
|
|
|
| +void RegExpBuilder::AddUnicodeCharacter(uc32 c) {
|
| + if (c > unibrow::Utf16::kMaxNonSurrogateCharCode) {
|
| + ZoneList<uc16> surrogate_pair(2, zone());
|
| + surrogate_pair.Add(unibrow::Utf16::LeadSurrogate(c), zone());
|
| + surrogate_pair.Add(unibrow::Utf16::TrailSurrogate(c), zone());
|
| + RegExpAtom* atom = new (zone()) RegExpAtom(surrogate_pair.ToConstVector());
|
| + AddAtom(atom);
|
| + } else {
|
| + AddCharacter(static_cast<uc16>(c));
|
| + }
|
| +}
|
| +
|
| +
|
| void RegExpBuilder::AddEmpty() { pending_empty_ = true; }
|
|
|
|
|
|
|