Index: src/regexp/jsregexp.cc |
diff --git a/src/regexp/jsregexp.cc b/src/regexp/jsregexp.cc |
index 3559bcd111a4c19eb5755dd264d6a4a8cf39a88a..6235c25c7762f05ef399fc31838e64a10e2b2a39 100644 |
--- a/src/regexp/jsregexp.cc |
+++ b/src/regexp/jsregexp.cc |
@@ -3957,6 +3957,11 @@ void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler, |
void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
int choice_count = alternatives_->length(); |
+ if (choice_count == 1 && alternatives_->at(0).guards() == NULL) { |
+ alternatives_->at(0).node()->Emit(compiler, trace); |
+ return; |
+ } |
+ |
AssertGuardsMentionRegisters(trace); |
LimitResult limit_result = LimitVersions(compiler, trace); |
@@ -5040,22 +5045,21 @@ void AddLoneLeadSurrogates(RegExpCompiler* compiler, ChoiceNode* result, |
if (lead_surrogates == nullptr) return; |
Zone* zone = compiler->zone(); |
// E.g. \ud801 becomes \ud801(?![\udc00-\udfff]). |
- ZoneList<CharacterRange>* trail_surrogates = |
- new (zone) ZoneList<CharacterRange>(1, zone); |
- trail_surrogates->Add( |
- CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd), zone); |
- |
- RegExpNode* match = |
- compiler->read_backward() |
- // Reading backward. Assert that reading forward, there is no trail |
- // surrogate, and then backward match the lead surrogate. |
- ? NegativeLookaroundAgainstReadDirectionAndMatch( |
- compiler, trail_surrogates, lead_surrogates, on_success, true) |
- // Reading forward. Forwrad match the lead surrogate and assert that |
- // no |
- // trail surrogate follows. |
- : MatchAndNegativeLookaroundInReadDirection( |
- compiler, lead_surrogates, trail_surrogates, on_success, false); |
+ ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List( |
+ zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd)); |
+ |
+ RegExpNode* match; |
+ if (compiler->read_backward()) { |
+ // Reading backward. Assert that reading forward, there is no trail |
+ // surrogate, and then backward match the lead surrogate. |
+ match = NegativeLookaroundAgainstReadDirectionAndMatch( |
+ compiler, trail_surrogates, lead_surrogates, on_success, true); |
+ } else { |
+ // Reading forward. Forward match the lead surrogate and assert that |
+ // no trail surrogate follows. |
+ match = MatchAndNegativeLookaroundInReadDirection( |
+ compiler, lead_surrogates, trail_surrogates, on_success, false); |
+ } |
result->AddAlternative(GuardedAlternative(match)); |
} |
@@ -5067,25 +5071,55 @@ void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result, |
if (trail_surrogates == nullptr) return; |
Zone* zone = compiler->zone(); |
// E.g. \udc01 becomes (?<![\ud800-\udbff])\udc01 |
- ZoneList<CharacterRange>* lead_surrogates = |
- new (zone) ZoneList<CharacterRange>(1, zone); |
- lead_surrogates->Add( |
- CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd), zone); |
- |
- RegExpNode* match = |
- compiler->read_backward() |
- // Reading backward. Backward match the trail surrogate and assert |
- // that no lead surrogate precedes it. |
- ? MatchAndNegativeLookaroundInReadDirection( |
- compiler, trail_surrogates, lead_surrogates, on_success, true) |
- // Reading forward. Assert that reading backward, there is no lead |
- // surrogate, and then forward match the trail surrogate. |
- : NegativeLookaroundAgainstReadDirectionAndMatch( |
- compiler, lead_surrogates, trail_surrogates, on_success, false); |
+ ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List( |
+ zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd)); |
+ |
+ RegExpNode* match; |
+ if (compiler->read_backward()) { |
+ // Reading backward. Backward match the trail surrogate and assert that no |
+ // lead surrogate precedes it. |
+ match = MatchAndNegativeLookaroundInReadDirection( |
+ compiler, trail_surrogates, lead_surrogates, on_success, true); |
+ } else { |
+ // Reading forward. Assert that reading backward, there is no lead |
+ // surrogate, and then forward match the trail surrogate. |
+ match = NegativeLookaroundAgainstReadDirectionAndMatch( |
+ compiler, lead_surrogates, trail_surrogates, on_success, false); |
+ } |
result->AddAlternative(GuardedAlternative(match)); |
} |
+void AddUnanchoredAdvance(RegExpCompiler* compiler, ChoiceNode* result, |
+ RegExpNode* on_success) { |
+ // This implements ES2015 21.2.5.2.3, AdvanceStringIndex. |
+ DCHECK(!compiler->read_backward()); |
+ Zone* zone = compiler->zone(); |
+ // Advancing can either consume a BMP character or a trail surrogate. |
+ ZoneList<CharacterRange>* bmp_and_trail = |
+ new (zone) ZoneList<CharacterRange>(2, zone); |
+ bmp_and_trail->Add(CharacterRange::Range(0, kLeadSurrogateStart - 1), zone); |
+ bmp_and_trail->Add( |
+ CharacterRange::Range(kLeadSurrogateEnd + 1, kNonBmpStart - 1), zone); |
+ result->AddAlternative(GuardedAlternative(TextNode::CreateForCharacterRanges( |
+ zone, bmp_and_trail, false, on_success))); |
+ |
+ // Or it could consume a lead optionally followed by a trail surrogate. |
+ ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List( |
+ zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd)); |
+ ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List( |
+ zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd)); |
+ ChoiceNode* optional_trail = new (zone) ChoiceNode(2, zone); |
+ optional_trail->AddAlternative( |
+ GuardedAlternative(TextNode::CreateForCharacterRanges( |
+ zone, trail_surrogates, false, on_success))); |
+ optional_trail->AddAlternative(GuardedAlternative(on_success)); |
+ RegExpNode* optional_pair = TextNode::CreateForCharacterRanges( |
+ zone, lead_surrogates, false, optional_trail); |
+ result->AddAlternative(GuardedAlternative(optional_pair)); |
+} |
+ |
+ |
RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, |
RegExpNode* on_success) { |
set_.Canonicalize(); |
@@ -5102,12 +5136,16 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, |
// No matches possible. |
return new (zone) EndNode(EndNode::BACKTRACK, zone); |
} |
- UnicodeRangeSplitter splitter(zone, ranges); |
- ChoiceNode* result = new (compiler->zone()) ChoiceNode(2, compiler->zone()); |
- AddBmpCharacters(compiler, result, on_success, &splitter); |
- AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter); |
- AddLoneLeadSurrogates(compiler, result, on_success, &splitter); |
- AddLoneTrailSurrogates(compiler, result, on_success, &splitter); |
+ ChoiceNode* result = new (zone) ChoiceNode(2, zone); |
+ if (standard_type() == '*') { |
+ AddUnanchoredAdvance(compiler, result, on_success); |
+ } else { |
+ UnicodeRangeSplitter splitter(zone, ranges); |
+ AddBmpCharacters(compiler, result, on_success, &splitter); |
+ AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter); |
+ AddLoneLeadSurrogates(compiler, result, on_success, &splitter); |
+ AddLoneTrailSurrogates(compiler, result, on_success, &splitter); |
+ } |
return result; |
} else { |
return new (zone) TextNode(this, compiler->read_backward(), on_success); |
@@ -6513,6 +6551,36 @@ void DispatchTableConstructor::VisitAction(ActionNode* that) { |
} |
+RegExpNode* OptionallyStepBackToLeadSurrogate(RegExpCompiler* compiler, |
+ RegExpNode* on_success) { |
+ // If the regexp matching starts within a surrogate pair, step back |
+ // to the lead surrogate and start matching from there. |
+ DCHECK(!compiler->read_backward()); |
+ Zone* zone = compiler->zone(); |
+ ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List( |
+ zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd)); |
+ ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List( |
+ zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd)); |
+ |
+ ChoiceNode* optional_step_back = new (zone) ChoiceNode(2, zone); |
+ |
+ int stack_register = compiler->UnicodeLookaroundStackRegister(); |
+ int position_register = compiler->UnicodeLookaroundPositionRegister(); |
+ RegExpNode* step_back = TextNode::CreateForCharacterRanges( |
+ zone, lead_surrogates, true, on_success); |
+ RegExpLookaround::Builder builder(true, step_back, stack_register, |
+ position_register); |
+ RegExpNode* match_trail = TextNode::CreateForCharacterRanges( |
+ zone, trail_surrogates, false, builder.on_match_success()); |
+ |
+ optional_step_back->AddAlternative( |
+ GuardedAlternative(builder.ForMatch(match_trail))); |
+ optional_step_back->AddAlternative(GuardedAlternative(on_success)); |
+ |
+ return optional_step_back; |
+} |
+ |
+ |
RegExpEngine::CompilationResult RegExpEngine::Compile( |
Isolate* isolate, Zone* zone, RegExpCompileData* data, |
JSRegExp::Flags flags, Handle<String> pattern, |
@@ -6575,6 +6643,8 @@ RegExpEngine::CompilationResult RegExpEngine::Compile( |
if (node != NULL) { |
node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); |
} |
+ } else if (compiler.unicode() && (is_global || is_sticky)) { |
+ node = OptionallyStepBackToLeadSurrogate(&compiler, node); |
} |
if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone); |