Index: src/regexp/regexp-parser.cc |
diff --git a/src/regexp/regexp-parser.cc b/src/regexp/regexp-parser.cc |
index 5cdad974b3886254a50073798a24df09754182b5..cb6073ee4a0bec92e908d98118599ee02a996d31 100644 |
--- a/src/regexp/regexp-parser.cc |
+++ b/src/regexp/regexp-parser.cc |
@@ -56,6 +56,16 @@ void RegExpParser::Advance() { |
} else { |
current_ = in()->Get(next_pos_); |
next_pos_++; |
+ // Read the whole surrogate pair in case of unicode flag, if possible. |
+ if (unicode_ && next_pos_ < in()->length() && |
+ unibrow::Utf16::IsLeadSurrogate(static_cast<uc16>(current_))) { |
+ uc16 trail = in()->Get(next_pos_); |
+ if (unibrow::Utf16::IsTrailSurrogate(trail)) { |
+ current_ = unibrow::Utf16::CombineSurrogatePair( |
+ static_cast<uc16>(current_), trail); |
+ next_pos_++; |
+ } |
+ } |
} |
} else { |
current_ = kEndMarker; |
@@ -417,12 +427,7 @@ RegExpTree* RegExpParser::ParseDisjunction() { |
Advance(2); |
uc32 value; |
if (ParseUnicodeEscape(&value)) { |
- if (value > unibrow::Utf16::kMaxNonSurrogateCharCode) { |
- builder->AddCharacter(unibrow::Utf16::LeadSurrogate(value)); |
- builder->AddCharacter(unibrow::Utf16::TrailSurrogate(value)); |
- } else { |
- builder->AddCharacter(static_cast<uc16>(value)); |
- } |
+ builder->AddUnicodeCharacter(value); |
} else if (!FLAG_harmony_unicode_regexps || !unicode_) { |
builder->AddCharacter('u'); |
} else { |
@@ -457,7 +462,11 @@ RegExpTree* RegExpParser::ParseDisjunction() { |
// fallthrough |
} |
default: |
- builder->AddCharacter(current()); |
+ if (unicode_) { |
rossberg
2016/01/11 12:25:41
Nit: is this if necessary? Can't you always use Ad
Yang
2016/01/11 14:41:44
Good point. Done.
|
+ builder->AddUnicodeCharacter(current()); |
+ } else { |
+ builder->AddCharacter(current()); |
+ } |
Advance(); |
break; |
} // end switch(current()) |
@@ -1059,6 +1068,19 @@ void RegExpBuilder::AddCharacter(uc16 c) { |
} |
+void RegExpBuilder::AddUnicodeCharacter(uc32 c) { |
+ if (c > unibrow::Utf16::kMaxNonSurrogateCharCode) { |
+ ZoneList<uc16> surrogate_pair(2, zone()); |
+ surrogate_pair.Add(unibrow::Utf16::LeadSurrogate(c), zone()); |
+ surrogate_pair.Add(unibrow::Utf16::TrailSurrogate(c), zone()); |
+ RegExpAtom* atom = new (zone()) RegExpAtom(surrogate_pair.ToConstVector()); |
+ AddAtom(atom); |
+ } else { |
+ AddCharacter(static_cast<uc16>(c)); |
+ } |
+} |
+ |
+ |
void RegExpBuilder::AddEmpty() { pending_empty_ = true; } |