Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(15)

Unified Diff: src/regexp/jsregexp.cc

Issue 2813893002: [regexp] Consider surrogate pairs when optimizing disjunctions (Closed)
Patch Set: DCHECK(!IsLeadSurrogate) Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | src/regexp/regexp-ast.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/regexp/jsregexp.cc
diff --git a/src/regexp/jsregexp.cc b/src/regexp/jsregexp.cc
index 8ab2681dcf0ef6fec4798bca9e58398770257b08..add1d2006cbba69f6172002f71899cc10a619ae4 100644
--- a/src/regexp/jsregexp.cc
+++ b/src/regexp/jsregexp.cc
@@ -3327,9 +3327,8 @@ TextNode* TextNode::CreateForCharacterRanges(Zone* zone,
RegExpNode* on_success) {
DCHECK_NOT_NULL(ranges);
ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(1, zone);
- elms->Add(
- TextElement::CharClass(new (zone) RegExpCharacterClass(ranges, false)),
- zone);
+ elms->Add(TextElement::CharClass(new (zone) RegExpCharacterClass(ranges)),
+ zone);
return new (zone) TextNode(elms, read_backward, on_success);
}
@@ -3341,12 +3340,12 @@ TextNode* TextNode::CreateForSurrogatePair(Zone* zone, CharacterRange lead,
ZoneList<CharacterRange>* lead_ranges = CharacterRange::List(zone, lead);
ZoneList<CharacterRange>* trail_ranges = CharacterRange::List(zone, trail);
ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(2, zone);
- elms->Add(TextElement::CharClass(
- new (zone) RegExpCharacterClass(lead_ranges, false)),
- zone);
- elms->Add(TextElement::CharClass(
- new (zone) RegExpCharacterClass(trail_ranges, false)),
- zone);
+ elms->Add(
+ TextElement::CharClass(new (zone) RegExpCharacterClass(lead_ranges)),
+ zone);
+ elms->Add(
+ TextElement::CharClass(new (zone) RegExpCharacterClass(trail_ranges)),
+ zone);
return new (zone) TextNode(elms, read_backward, on_success);
}
@@ -4851,7 +4850,7 @@ static bool CompareRanges(ZoneList<CharacterRange>* ranges,
bool RegExpCharacterClass::is_standard(Zone* zone) {
// TODO(lrn): Remove need for this function, by not throwing away information
// along the way.
- if (is_negated_) {
+ if (is_negated()) {
return false;
}
if (set_.is_standard()) {
@@ -5144,7 +5143,8 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
if (compiler->needs_unicode_case_equivalents()) {
AddUnicodeCaseEquivalents(ranges, zone);
}
- if (compiler->unicode() && !compiler->one_byte()) {
+ if (compiler->unicode() && !compiler->one_byte() &&
+ !contains_split_surrogate()) {
if (is_negated()) {
ZoneList<CharacterRange>* negated =
new (zone) ZoneList<CharacterRange>(2, zone);
@@ -5154,7 +5154,7 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
if (ranges->length() == 0) {
ranges->Add(CharacterRange::Everything(), zone);
RegExpCharacterClass* fail =
- new (zone) RegExpCharacterClass(ranges, true);
+ new (zone) RegExpCharacterClass(ranges, NEGATED);
return new (zone) TextNode(fail, compiler->read_backward(), on_success);
}
if (standard_type() == '*') {
@@ -5368,6 +5368,9 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
i++;
continue;
}
+ DCHECK(!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
+ bool contains_trail_surrogate =
+ unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
int first_in_run = i;
i++;
while (i < length) {
@@ -5375,6 +5378,9 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
if (!alternative->IsAtom()) break;
atom = alternative->AsAtom();
if (atom->length() != 1) break;
+ DCHECK(!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
+ contains_trail_surrogate |=
+ unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
i++;
}
if (i > first_in_run + 1) {
@@ -5387,8 +5393,12 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
DCHECK_EQ(old_atom->length(), 1);
ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone);
}
+ RegExpCharacterClass::Flags flags;
+ if (compiler->unicode() && contains_trail_surrogate) {
+ flags = RegExpCharacterClass::CONTAINS_SPLIT_SURROGATE;
+ }
alternatives->at(write_posn++) =
- new (zone) RegExpCharacterClass(ranges, false);
+ new (zone) RegExpCharacterClass(ranges, flags);
} else {
// Just copy any trivial alternatives.
for (int j = first_in_run; j < i; j++) {
« no previous file with comments | « no previous file | src/regexp/regexp-ast.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698