Index: src/regexp/jsregexp.cc |
diff --git a/src/regexp/jsregexp.cc b/src/regexp/jsregexp.cc |
index 8ab2681dcf0ef6fec4798bca9e58398770257b08..add1d2006cbba69f6172002f71899cc10a619ae4 100644 |
--- a/src/regexp/jsregexp.cc |
+++ b/src/regexp/jsregexp.cc |
@@ -3327,9 +3327,8 @@ TextNode* TextNode::CreateForCharacterRanges(Zone* zone, |
RegExpNode* on_success) { |
DCHECK_NOT_NULL(ranges); |
ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(1, zone); |
- elms->Add( |
- TextElement::CharClass(new (zone) RegExpCharacterClass(ranges, false)), |
- zone); |
+ elms->Add(TextElement::CharClass(new (zone) RegExpCharacterClass(ranges)), |
+ zone); |
return new (zone) TextNode(elms, read_backward, on_success); |
} |
@@ -3341,12 +3340,12 @@ TextNode* TextNode::CreateForSurrogatePair(Zone* zone, CharacterRange lead, |
ZoneList<CharacterRange>* lead_ranges = CharacterRange::List(zone, lead); |
ZoneList<CharacterRange>* trail_ranges = CharacterRange::List(zone, trail); |
ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(2, zone); |
- elms->Add(TextElement::CharClass( |
- new (zone) RegExpCharacterClass(lead_ranges, false)), |
- zone); |
- elms->Add(TextElement::CharClass( |
- new (zone) RegExpCharacterClass(trail_ranges, false)), |
- zone); |
+ elms->Add( |
+ TextElement::CharClass(new (zone) RegExpCharacterClass(lead_ranges)), |
+ zone); |
+ elms->Add( |
+ TextElement::CharClass(new (zone) RegExpCharacterClass(trail_ranges)), |
+ zone); |
return new (zone) TextNode(elms, read_backward, on_success); |
} |
@@ -4851,7 +4850,7 @@ static bool CompareRanges(ZoneList<CharacterRange>* ranges, |
bool RegExpCharacterClass::is_standard(Zone* zone) { |
// TODO(lrn): Remove need for this function, by not throwing away information |
// along the way. |
- if (is_negated_) { |
+ if (is_negated()) { |
return false; |
} |
if (set_.is_standard()) { |
@@ -5144,7 +5143,8 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, |
if (compiler->needs_unicode_case_equivalents()) { |
AddUnicodeCaseEquivalents(ranges, zone); |
} |
- if (compiler->unicode() && !compiler->one_byte()) { |
+ if (compiler->unicode() && !compiler->one_byte() && |
+ !contains_split_surrogate()) { |
if (is_negated()) { |
ZoneList<CharacterRange>* negated = |
new (zone) ZoneList<CharacterRange>(2, zone); |
@@ -5154,7 +5154,7 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, |
if (ranges->length() == 0) { |
ranges->Add(CharacterRange::Everything(), zone); |
RegExpCharacterClass* fail = |
- new (zone) RegExpCharacterClass(ranges, true); |
+ new (zone) RegExpCharacterClass(ranges, NEGATED); |
return new (zone) TextNode(fail, compiler->read_backward(), on_success); |
} |
if (standard_type() == '*') { |
@@ -5368,6 +5368,9 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions( |
i++; |
continue; |
} |
+ DCHECK(!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0))); |
+ bool contains_trail_surrogate = |
+ unibrow::Utf16::IsTrailSurrogate(atom->data().at(0)); |
int first_in_run = i; |
i++; |
while (i < length) { |
@@ -5375,6 +5378,9 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions( |
if (!alternative->IsAtom()) break; |
atom = alternative->AsAtom(); |
if (atom->length() != 1) break; |
+ DCHECK(!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0))); |
+ contains_trail_surrogate |= |
+ unibrow::Utf16::IsTrailSurrogate(atom->data().at(0)); |
i++; |
} |
if (i > first_in_run + 1) { |
@@ -5387,8 +5393,12 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions( |
DCHECK_EQ(old_atom->length(), 1); |
ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone); |
} |
+ RegExpCharacterClass::Flags flags; |
+ if (compiler->unicode() && contains_trail_surrogate) { |
+ flags = RegExpCharacterClass::CONTAINS_SPLIT_SURROGATE; |
+ } |
alternatives->at(write_posn++) = |
- new (zone) RegExpCharacterClass(ranges, false); |
+ new (zone) RegExpCharacterClass(ranges, flags); |
} else { |
// Just copy any trivial alternatives. |
for (int j = first_in_run; j < i; j++) { |