Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(84)

Unified Diff: src/regexp/jsregexp.cc

Issue 1608693003: [regexp] step back if starting unicode regexp within surrogate pair. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@unicodeclass
Patch Set: fix Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | src/regexp/regexp-ast.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/regexp/jsregexp.cc
diff --git a/src/regexp/jsregexp.cc b/src/regexp/jsregexp.cc
index 3559bcd111a4c19eb5755dd264d6a4a8cf39a88a..6235c25c7762f05ef399fc31838e64a10e2b2a39 100644
--- a/src/regexp/jsregexp.cc
+++ b/src/regexp/jsregexp.cc
@@ -3957,6 +3957,11 @@ void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler,
void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
int choice_count = alternatives_->length();
+ if (choice_count == 1 && alternatives_->at(0).guards() == NULL) {
+ alternatives_->at(0).node()->Emit(compiler, trace);
+ return;
+ }
+
AssertGuardsMentionRegisters(trace);
LimitResult limit_result = LimitVersions(compiler, trace);
@@ -5040,22 +5045,21 @@ void AddLoneLeadSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
if (lead_surrogates == nullptr) return;
Zone* zone = compiler->zone();
// E.g. \ud801 becomes \ud801(?![\udc00-\udfff]).
- ZoneList<CharacterRange>* trail_surrogates =
- new (zone) ZoneList<CharacterRange>(1, zone);
- trail_surrogates->Add(
- CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd), zone);
-
- RegExpNode* match =
- compiler->read_backward()
- // Reading backward. Assert that reading forward, there is no trail
- // surrogate, and then backward match the lead surrogate.
- ? NegativeLookaroundAgainstReadDirectionAndMatch(
- compiler, trail_surrogates, lead_surrogates, on_success, true)
- // Reading forward. Forwrad match the lead surrogate and assert that
- // no
- // trail surrogate follows.
- : MatchAndNegativeLookaroundInReadDirection(
- compiler, lead_surrogates, trail_surrogates, on_success, false);
+ ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List(
+ zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd));
+
+ RegExpNode* match;
+ if (compiler->read_backward()) {
+ // Reading backward. Assert that reading forward, there is no trail
+ // surrogate, and then backward match the lead surrogate.
+ match = NegativeLookaroundAgainstReadDirectionAndMatch(
+ compiler, trail_surrogates, lead_surrogates, on_success, true);
+ } else {
+ // Reading forward. Forward match the lead surrogate and assert that
+ // no trail surrogate follows.
+ match = MatchAndNegativeLookaroundInReadDirection(
+ compiler, lead_surrogates, trail_surrogates, on_success, false);
+ }
result->AddAlternative(GuardedAlternative(match));
}
@@ -5067,25 +5071,55 @@ void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
if (trail_surrogates == nullptr) return;
Zone* zone = compiler->zone();
// E.g. \udc01 becomes (?<![\ud800-\udbff])\udc01
- ZoneList<CharacterRange>* lead_surrogates =
- new (zone) ZoneList<CharacterRange>(1, zone);
- lead_surrogates->Add(
- CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd), zone);
-
- RegExpNode* match =
- compiler->read_backward()
- // Reading backward. Backward match the trail surrogate and assert
- // that no lead surrogate precedes it.
- ? MatchAndNegativeLookaroundInReadDirection(
- compiler, trail_surrogates, lead_surrogates, on_success, true)
- // Reading forward. Assert that reading backward, there is no lead
- // surrogate, and then forward match the trail surrogate.
- : NegativeLookaroundAgainstReadDirectionAndMatch(
- compiler, lead_surrogates, trail_surrogates, on_success, false);
+ ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List(
+ zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd));
+
+ RegExpNode* match;
+ if (compiler->read_backward()) {
+ // Reading backward. Backward match the trail surrogate and assert that no
+ // lead surrogate precedes it.
+ match = MatchAndNegativeLookaroundInReadDirection(
+ compiler, trail_surrogates, lead_surrogates, on_success, true);
+ } else {
+ // Reading forward. Assert that reading backward, there is no lead
+ // surrogate, and then forward match the trail surrogate.
+ match = NegativeLookaroundAgainstReadDirectionAndMatch(
+ compiler, lead_surrogates, trail_surrogates, on_success, false);
+ }
result->AddAlternative(GuardedAlternative(match));
}
+void AddUnanchoredAdvance(RegExpCompiler* compiler, ChoiceNode* result,
+ RegExpNode* on_success) {
+ // This implements ES2015 21.2.5.2.3, AdvanceStringIndex.
+ DCHECK(!compiler->read_backward());
+ Zone* zone = compiler->zone();
+ // Advancing can either consume a BMP character or a trail surrogate.
+ ZoneList<CharacterRange>* bmp_and_trail =
+ new (zone) ZoneList<CharacterRange>(2, zone);
+ bmp_and_trail->Add(CharacterRange::Range(0, kLeadSurrogateStart - 1), zone);
+ bmp_and_trail->Add(
+ CharacterRange::Range(kLeadSurrogateEnd + 1, kNonBmpStart - 1), zone);
+ result->AddAlternative(GuardedAlternative(TextNode::CreateForCharacterRanges(
+ zone, bmp_and_trail, false, on_success)));
+
+ // Or it could consume a lead optionally followed by a trail surrogate.
+ ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List(
+ zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd));
+ ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List(
+ zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd));
+ ChoiceNode* optional_trail = new (zone) ChoiceNode(2, zone);
+ optional_trail->AddAlternative(
+ GuardedAlternative(TextNode::CreateForCharacterRanges(
+ zone, trail_surrogates, false, on_success)));
+ optional_trail->AddAlternative(GuardedAlternative(on_success));
+ RegExpNode* optional_pair = TextNode::CreateForCharacterRanges(
+ zone, lead_surrogates, false, optional_trail);
+ result->AddAlternative(GuardedAlternative(optional_pair));
+}
+
+
RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) {
set_.Canonicalize();
@@ -5102,12 +5136,16 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
// No matches possible.
return new (zone) EndNode(EndNode::BACKTRACK, zone);
}
- UnicodeRangeSplitter splitter(zone, ranges);
- ChoiceNode* result = new (compiler->zone()) ChoiceNode(2, compiler->zone());
- AddBmpCharacters(compiler, result, on_success, &splitter);
- AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter);
- AddLoneLeadSurrogates(compiler, result, on_success, &splitter);
- AddLoneTrailSurrogates(compiler, result, on_success, &splitter);
+ ChoiceNode* result = new (zone) ChoiceNode(2, zone);
+ if (standard_type() == '*') {
+ AddUnanchoredAdvance(compiler, result, on_success);
+ } else {
+ UnicodeRangeSplitter splitter(zone, ranges);
+ AddBmpCharacters(compiler, result, on_success, &splitter);
+ AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter);
+ AddLoneLeadSurrogates(compiler, result, on_success, &splitter);
+ AddLoneTrailSurrogates(compiler, result, on_success, &splitter);
+ }
return result;
} else {
return new (zone) TextNode(this, compiler->read_backward(), on_success);
@@ -6513,6 +6551,36 @@ void DispatchTableConstructor::VisitAction(ActionNode* that) {
}
+RegExpNode* OptionallyStepBackToLeadSurrogate(RegExpCompiler* compiler,
+ RegExpNode* on_success) {
+ // If the regexp matching starts within a surrogate pair, step back
+ // to the lead surrogate and start matching from there.
+ DCHECK(!compiler->read_backward());
+ Zone* zone = compiler->zone();
+ ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List(
+ zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd));
+ ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List(
+ zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd));
+
+ ChoiceNode* optional_step_back = new (zone) ChoiceNode(2, zone);
+
+ int stack_register = compiler->UnicodeLookaroundStackRegister();
+ int position_register = compiler->UnicodeLookaroundPositionRegister();
+ RegExpNode* step_back = TextNode::CreateForCharacterRanges(
+ zone, lead_surrogates, true, on_success);
+ RegExpLookaround::Builder builder(true, step_back, stack_register,
+ position_register);
+ RegExpNode* match_trail = TextNode::CreateForCharacterRanges(
+ zone, trail_surrogates, false, builder.on_match_success());
+
+ optional_step_back->AddAlternative(
+ GuardedAlternative(builder.ForMatch(match_trail)));
+ optional_step_back->AddAlternative(GuardedAlternative(on_success));
+
+ return optional_step_back;
+}
+
+
RegExpEngine::CompilationResult RegExpEngine::Compile(
Isolate* isolate, Zone* zone, RegExpCompileData* data,
JSRegExp::Flags flags, Handle<String> pattern,
@@ -6575,6 +6643,8 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(
if (node != NULL) {
node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
}
+ } else if (compiler.unicode() && (is_global || is_sticky)) {
+ node = OptionallyStepBackToLeadSurrogate(&compiler, node);
}
if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone);
« no previous file with comments | « no previous file | src/regexp/regexp-ast.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698