Index: src/regexp/regexp-parser.cc |
diff --git a/src/regexp/regexp-parser.cc b/src/regexp/regexp-parser.cc |
index ad74b3d723e7b253af17094264df310663464a3c..fa8900342cfc4878411a1c06d753254024f138fe 100644 |
--- a/src/regexp/regexp-parser.cc |
+++ b/src/regexp/regexp-parser.cc |
@@ -56,6 +56,16 @@ void RegExpParser::Advance() { |
} else { |
current_ = in()->Get(next_pos_); |
next_pos_++; |
+ // Read the whole surrogate pair in case of unicode flag, if possible. |
+ if (unicode_ && next_pos_ < in()->length() && |
+ unibrow::Utf16::IsLeadSurrogate(static_cast<uc16>(current_))) { |
+ uc16 trail = in()->Get(next_pos_); |
+ if (unibrow::Utf16::IsTrailSurrogate(trail)) { |
+ current_ = unibrow::Utf16::CombineSurrogatePair( |
+ static_cast<uc16>(current_), trail); |
+ next_pos_++; |
+ } |
+ } |
} |
} else { |
current_ = kEndMarker; |
@@ -417,12 +427,7 @@ RegExpTree* RegExpParser::ParseDisjunction() { |
Advance(2); |
uc32 value; |
if (ParseUnicodeEscape(&value)) { |
- if (value > unibrow::Utf16::kMaxNonSurrogateCharCode) { |
- builder->AddCharacter(unibrow::Utf16::LeadSurrogate(value)); |
- builder->AddCharacter(unibrow::Utf16::TrailSurrogate(value)); |
- } else { |
- builder->AddCharacter(static_cast<uc16>(value)); |
- } |
+ builder->AddUnicodeCharacter(value); |
} else if (!unicode_) { |
builder->AddCharacter('u'); |
} else { |
@@ -456,7 +461,7 @@ RegExpTree* RegExpParser::ParseDisjunction() { |
// fallthrough |
} |
default: |
- builder->AddCharacter(current()); |
+ builder->AddUnicodeCharacter(current()); |
Advance(); |
break; |
} // end switch(current()) |
@@ -1057,6 +1062,19 @@ void RegExpBuilder::AddCharacter(uc16 c) { |
} |
+void RegExpBuilder::AddUnicodeCharacter(uc32 c) { |
+ if (c > unibrow::Utf16::kMaxNonSurrogateCharCode) { |
+ ZoneList<uc16> surrogate_pair(2, zone()); |
+ surrogate_pair.Add(unibrow::Utf16::LeadSurrogate(c), zone()); |
+ surrogate_pair.Add(unibrow::Utf16::TrailSurrogate(c), zone()); |
+ RegExpAtom* atom = new (zone()) RegExpAtom(surrogate_pair.ToConstVector()); |
+ AddAtom(atom); |
+ } else { |
+ AddCharacter(static_cast<uc16>(c)); |
+ } |
+} |
+ |
+ |
void RegExpBuilder::AddEmpty() { pending_empty_ = true; } |