Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(80)

Unified Diff: src/regexp/jsregexp.cc

Issue 1676293003: [regexp] simplify unanchored advance for unicode regexps. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: shorten test Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | test/mjsunit/harmony/unicode-regexp-unanchored-advance.js » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/regexp/jsregexp.cc
diff --git a/src/regexp/jsregexp.cc b/src/regexp/jsregexp.cc
index 7b510b072b0bba0d6e66726397213c52ead70a9f..c958310d15370c465e21263e0a21c2b7ea9ac118 100644
--- a/src/regexp/jsregexp.cc
+++ b/src/regexp/jsregexp.cc
@@ -5085,34 +5085,18 @@ void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
result->AddAlternative(GuardedAlternative(match));
}
-
-void AddUnanchoredAdvance(RegExpCompiler* compiler, ChoiceNode* result,
- RegExpNode* on_success) {
+RegExpNode* UnanchoredAdvance(RegExpCompiler* compiler,
+ RegExpNode* on_success) {
// This implements ES2015 21.2.5.2.3, AdvanceStringIndex.
DCHECK(!compiler->read_backward());
Zone* zone = compiler->zone();
- // Advancing can either consume a BMP character or a trail surrogate.
- ZoneList<CharacterRange>* bmp_and_trail =
- new (zone) ZoneList<CharacterRange>(2, zone);
- bmp_and_trail->Add(CharacterRange::Range(0, kLeadSurrogateStart - 1), zone);
- bmp_and_trail->Add(
- CharacterRange::Range(kLeadSurrogateEnd + 1, kNonBmpStart - 1), zone);
- result->AddAlternative(GuardedAlternative(TextNode::CreateForCharacterRanges(
- zone, bmp_and_trail, false, on_success)));
-
- // Or it could consume a lead optionally followed by a trail surrogate.
- ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List(
- zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd));
- ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List(
- zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd));
- ChoiceNode* optional_trail = new (zone) ChoiceNode(2, zone);
- optional_trail->AddAlternative(
- GuardedAlternative(TextNode::CreateForCharacterRanges(
- zone, trail_surrogates, false, on_success)));
- optional_trail->AddAlternative(GuardedAlternative(on_success));
- RegExpNode* optional_pair = TextNode::CreateForCharacterRanges(
- zone, lead_surrogates, false, optional_trail);
- result->AddAlternative(GuardedAlternative(optional_pair));
+ // Advance any character. If the character happens to be a lead surrogate and
+ // we advanced into the middle of a surrogate pair, it will work out, as
+ // nothing will match from there. We will have to advance again, consuming
+ // the associated trail surrogate.
+ ZoneList<CharacterRange>* range = CharacterRange::List(
+ zone, CharacterRange::Range(0, String::kMaxUtf16CodeUnit));
+ return TextNode::CreateForCharacterRanges(zone, range, false, on_success);
}
@@ -5174,17 +5158,17 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
// No matches possible.
return new (zone) EndNode(EndNode::BACKTRACK, zone);
}
- ChoiceNode* result = new (zone) ChoiceNode(2, zone);
if (standard_type() == '*') {
- AddUnanchoredAdvance(compiler, result, on_success);
+ return UnanchoredAdvance(compiler, on_success);
} else {
+ ChoiceNode* result = new (zone) ChoiceNode(2, zone);
UnicodeRangeSplitter splitter(zone, ranges);
AddBmpCharacters(compiler, result, on_success, &splitter);
AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter);
AddLoneLeadSurrogates(compiler, result, on_success, &splitter);
AddLoneTrailSurrogates(compiler, result, on_success, &splitter);
+ return result;
}
- return result;
} else {
return new (zone) TextNode(this, compiler->read_backward(), on_success);
}
« no previous file with comments | « no previous file | test/mjsunit/harmony/unicode-regexp-unanchored-advance.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698