| Index: src/regexp/jsregexp.cc
|
| diff --git a/src/regexp/jsregexp.cc b/src/regexp/jsregexp.cc
|
| index 8ab2681dcf0ef6fec4798bca9e58398770257b08..add1d2006cbba69f6172002f71899cc10a619ae4 100644
|
| --- a/src/regexp/jsregexp.cc
|
| +++ b/src/regexp/jsregexp.cc
|
| @@ -3327,9 +3327,8 @@ TextNode* TextNode::CreateForCharacterRanges(Zone* zone,
|
| RegExpNode* on_success) {
|
| DCHECK_NOT_NULL(ranges);
|
| ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(1, zone);
|
| - elms->Add(
|
| - TextElement::CharClass(new (zone) RegExpCharacterClass(ranges, false)),
|
| - zone);
|
| + elms->Add(TextElement::CharClass(new (zone) RegExpCharacterClass(ranges)),
|
| + zone);
|
| return new (zone) TextNode(elms, read_backward, on_success);
|
| }
|
|
|
| @@ -3341,12 +3340,12 @@ TextNode* TextNode::CreateForSurrogatePair(Zone* zone, CharacterRange lead,
|
| ZoneList<CharacterRange>* lead_ranges = CharacterRange::List(zone, lead);
|
| ZoneList<CharacterRange>* trail_ranges = CharacterRange::List(zone, trail);
|
| ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(2, zone);
|
| - elms->Add(TextElement::CharClass(
|
| - new (zone) RegExpCharacterClass(lead_ranges, false)),
|
| - zone);
|
| - elms->Add(TextElement::CharClass(
|
| - new (zone) RegExpCharacterClass(trail_ranges, false)),
|
| - zone);
|
| + elms->Add(
|
| + TextElement::CharClass(new (zone) RegExpCharacterClass(lead_ranges)),
|
| + zone);
|
| + elms->Add(
|
| + TextElement::CharClass(new (zone) RegExpCharacterClass(trail_ranges)),
|
| + zone);
|
| return new (zone) TextNode(elms, read_backward, on_success);
|
| }
|
|
|
| @@ -4851,7 +4850,7 @@ static bool CompareRanges(ZoneList<CharacterRange>* ranges,
|
| bool RegExpCharacterClass::is_standard(Zone* zone) {
|
| // TODO(lrn): Remove need for this function, by not throwing away information
|
| // along the way.
|
| - if (is_negated_) {
|
| + if (is_negated()) {
|
| return false;
|
| }
|
| if (set_.is_standard()) {
|
| @@ -5144,7 +5143,8 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
|
| if (compiler->needs_unicode_case_equivalents()) {
|
| AddUnicodeCaseEquivalents(ranges, zone);
|
| }
|
| - if (compiler->unicode() && !compiler->one_byte()) {
|
| + if (compiler->unicode() && !compiler->one_byte() &&
|
| + !contains_split_surrogate()) {
|
| if (is_negated()) {
|
| ZoneList<CharacterRange>* negated =
|
| new (zone) ZoneList<CharacterRange>(2, zone);
|
| @@ -5154,7 +5154,7 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
|
| if (ranges->length() == 0) {
|
| ranges->Add(CharacterRange::Everything(), zone);
|
| RegExpCharacterClass* fail =
|
| - new (zone) RegExpCharacterClass(ranges, true);
|
| + new (zone) RegExpCharacterClass(ranges, NEGATED);
|
| return new (zone) TextNode(fail, compiler->read_backward(), on_success);
|
| }
|
| if (standard_type() == '*') {
|
| @@ -5368,6 +5368,9 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
|
| i++;
|
| continue;
|
| }
|
| + DCHECK(!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
|
| + bool contains_trail_surrogate =
|
| + unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
|
| int first_in_run = i;
|
| i++;
|
| while (i < length) {
|
| @@ -5375,6 +5378,9 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
|
| if (!alternative->IsAtom()) break;
|
| atom = alternative->AsAtom();
|
| if (atom->length() != 1) break;
|
| + DCHECK(!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
|
| + contains_trail_surrogate |=
|
| + unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
|
| i++;
|
| }
|
| if (i > first_in_run + 1) {
|
| @@ -5387,8 +5393,12 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
|
| DCHECK_EQ(old_atom->length(), 1);
|
| ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone);
|
| }
|
| + RegExpCharacterClass::Flags flags;
|
| + if (compiler->unicode() && contains_trail_surrogate) {
|
| + flags = RegExpCharacterClass::CONTAINS_SPLIT_SURROGATE;
|
| + }
|
| alternatives->at(write_posn++) =
|
| - new (zone) RegExpCharacterClass(ranges, false);
|
| + new (zone) RegExpCharacterClass(ranges, flags);
|
| } else {
|
| // Just copy any trivial alternatives.
|
| for (int j = first_in_run; j < i; j++) {
|
|
|