Chromium Code Reviews| Index: src/regexp/jsregexp.cc |
| diff --git a/src/regexp/jsregexp.cc b/src/regexp/jsregexp.cc |
| index 3559bcd111a4c19eb5755dd264d6a4a8cf39a88a..fe379b2fc403b1899f9b496619defdb9ba9e5c0e 100644 |
| --- a/src/regexp/jsregexp.cc |
| +++ b/src/regexp/jsregexp.cc |
| @@ -5032,7 +5032,6 @@ RegExpNode* MatchAndNegativeLookaroundInReadDirection( |
| zone, match, read_backward, lookaround.ForMatch(negative_match)); |
| } |
| - |
|
erikcorry
2016/01/22 10:10:10
Inadvertent edit?
Yang
2016/01/25 07:38:41
Undone.
|
| void AddLoneLeadSurrogates(RegExpCompiler* compiler, ChoiceNode* result, |
| RegExpNode* on_success, |
| UnicodeRangeSplitter* splitter) { |
| @@ -5040,10 +5039,8 @@ void AddLoneLeadSurrogates(RegExpCompiler* compiler, ChoiceNode* result, |
| if (lead_surrogates == nullptr) return; |
| Zone* zone = compiler->zone(); |
| // E.g. \ud801 becomes \ud801(?![\udc00-\udfff]). |
| - ZoneList<CharacterRange>* trail_surrogates = |
| - new (zone) ZoneList<CharacterRange>(1, zone); |
| - trail_surrogates->Add( |
| - CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd), zone); |
| + ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List( |
| + zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd)); |
| RegExpNode* match = |
| compiler->read_backward() |
| @@ -5067,10 +5064,8 @@ void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result, |
| if (trail_surrogates == nullptr) return; |
| Zone* zone = compiler->zone(); |
| // E.g. \udc01 becomes (?<![\ud800-\udbff])\udc01 |
| - ZoneList<CharacterRange>* lead_surrogates = |
| - new (zone) ZoneList<CharacterRange>(1, zone); |
| - lead_surrogates->Add( |
| - CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd), zone); |
| + ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List( |
| + zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd)); |
| RegExpNode* match = |
| compiler->read_backward() |
| @@ -5086,6 +5081,36 @@ void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result, |
| } |
| +void AddUnanchoredAdvance(RegExpCompiler* compiler, ChoiceNode* result, |
| + RegExpNode* on_success) { |
| + // This implements ES2015 21.2.5.2.3, AdvanceStringIndex. |
| + DCHECK(!compiler->read_backward()); |
| + Zone* zone = compiler->zone(); |
| + // Advancing can either consume a BMP character or a trail surrogate. |
| + ZoneList<CharacterRange>* bmp_and_trail = |
| + new (zone) ZoneList<CharacterRange>(2, zone); |
| + bmp_and_trail->Add(CharacterRange::Range(0, kLeadSurrogateStart - 1), zone); |
| + bmp_and_trail->Add( |
| + CharacterRange::Range(kLeadSurrogateEnd + 1, kNonBmpStart - 1), zone); |
| + result->AddAlternative(GuardedAlternative(TextNode::CreateForCharacterRanges( |
| + zone, bmp_and_trail, false, on_success))); |
| + |
| + // Or it could consume a lead optionally followed by a trail surrogate. |
| + ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List( |
| + zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd)); |
| + ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List( |
| + zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd)); |
| + ChoiceNode* optional_trail = new (zone) ChoiceNode(2, zone); |
| + optional_trail->AddAlternative( |
| + GuardedAlternative(TextNode::CreateForCharacterRanges( |
| + zone, trail_surrogates, false, on_success))); |
| + optional_trail->AddAlternative(GuardedAlternative(on_success)); |
| + RegExpNode* optional_pair = TextNode::CreateForCharacterRanges( |
| + zone, lead_surrogates, false, optional_trail); |
| + result->AddAlternative(GuardedAlternative(optional_pair)); |
| +} |
| + |
| + |
| RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, |
| RegExpNode* on_success) { |
| set_.Canonicalize(); |
| @@ -5102,12 +5127,16 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, |
| // No matches possible. |
| return new (zone) EndNode(EndNode::BACKTRACK, zone); |
| } |
| - UnicodeRangeSplitter splitter(zone, ranges); |
| - ChoiceNode* result = new (compiler->zone()) ChoiceNode(2, compiler->zone()); |
| - AddBmpCharacters(compiler, result, on_success, &splitter); |
| - AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter); |
| - AddLoneLeadSurrogates(compiler, result, on_success, &splitter); |
| - AddLoneTrailSurrogates(compiler, result, on_success, &splitter); |
| + ChoiceNode* result = new (zone) ChoiceNode(2, zone); |
| + if (standard_type() == '*') { |
| + AddUnanchoredAdvance(compiler, result, on_success); |
| + } else { |
| + UnicodeRangeSplitter splitter(zone, ranges); |
| + AddBmpCharacters(compiler, result, on_success, &splitter); |
| + AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter); |
| + AddLoneLeadSurrogates(compiler, result, on_success, &splitter); |
| + AddLoneTrailSurrogates(compiler, result, on_success, &splitter); |
| + } |
| return result; |
| } else { |
| return new (zone) TextNode(this, compiler->read_backward(), on_success); |
| @@ -6513,6 +6542,36 @@ void DispatchTableConstructor::VisitAction(ActionNode* that) { |
| } |
| +RegExpNode* OptionallyStepBackToLeadSurrogate(RegExpCompiler* compiler, |
| + RegExpNode* on_success) { |
| + // If the regexp matching starts within a surrogate pair, step back |
| + // to the lead surrogate and start matching from there. |
| + DCHECK(!compiler->read_backward()); |
| + Zone* zone = compiler->zone(); |
| + ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List( |
| + zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd)); |
| + ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List( |
| + zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd)); |
| + |
| + ChoiceNode* optional_step_back = new (zone) ChoiceNode(2, zone); |
| + |
| + int stack_register = compiler->UnicodeLookaroundStackRegister(); |
| + int position_register = compiler->UnicodeLookaroundPositionRegister(); |
| + RegExpNode* step_back = TextNode::CreateForCharacterRanges( |
| + zone, lead_surrogates, true, on_success); |
| + RegExpLookaround::Builder builder(true, step_back, stack_register, |
| + position_register); |
| + RegExpNode* match_trail = TextNode::CreateForCharacterRanges( |
| + zone, trail_surrogates, false, builder.on_match_success()); |
| + |
| + optional_step_back->AddAlternative( |
| + GuardedAlternative(builder.ForMatch(match_trail))); |
| + optional_step_back->AddAlternative(GuardedAlternative(on_success)); |
| + |
| + return optional_step_back; |
| +} |
| + |
| + |
| RegExpEngine::CompilationResult RegExpEngine::Compile( |
| Isolate* isolate, Zone* zone, RegExpCompileData* data, |
| JSRegExp::Flags flags, Handle<String> pattern, |
| @@ -6575,6 +6634,8 @@ RegExpEngine::CompilationResult RegExpEngine::Compile( |
| if (node != NULL) { |
| node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); |
| } |
| + } else if (compiler.unicode()) { |
| + node = OptionallyStepBackToLeadSurrogate(&compiler, node); |
| } |
| if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone); |