Index: src/regexp/jsregexp.cc |
diff --git a/src/regexp/jsregexp.cc b/src/regexp/jsregexp.cc |
index 3559bcd111a4c19eb5755dd264d6a4a8cf39a88a..fe379b2fc403b1899f9b496619defdb9ba9e5c0e 100644 |
--- a/src/regexp/jsregexp.cc |
+++ b/src/regexp/jsregexp.cc |
@@ -5032,7 +5032,6 @@ RegExpNode* MatchAndNegativeLookaroundInReadDirection( |
zone, match, read_backward, lookaround.ForMatch(negative_match)); |
} |
- |
erikcorry
2016/01/22 10:10:10
Inadvertent edit?
Yang
2016/01/25 07:38:41
Undone.
|
void AddLoneLeadSurrogates(RegExpCompiler* compiler, ChoiceNode* result, |
RegExpNode* on_success, |
UnicodeRangeSplitter* splitter) { |
@@ -5040,10 +5039,8 @@ void AddLoneLeadSurrogates(RegExpCompiler* compiler, ChoiceNode* result, |
if (lead_surrogates == nullptr) return; |
Zone* zone = compiler->zone(); |
// E.g. \ud801 becomes \ud801(?![\udc00-\udfff]). |
- ZoneList<CharacterRange>* trail_surrogates = |
- new (zone) ZoneList<CharacterRange>(1, zone); |
- trail_surrogates->Add( |
- CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd), zone); |
+ ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List( |
+ zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd)); |
RegExpNode* match = |
compiler->read_backward() |
@@ -5067,10 +5064,8 @@ void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result, |
if (trail_surrogates == nullptr) return; |
Zone* zone = compiler->zone(); |
// E.g. \udc01 becomes (?<![\ud800-\udbff])\udc01 |
- ZoneList<CharacterRange>* lead_surrogates = |
- new (zone) ZoneList<CharacterRange>(1, zone); |
- lead_surrogates->Add( |
- CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd), zone); |
+ ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List( |
+ zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd)); |
RegExpNode* match = |
compiler->read_backward() |
@@ -5086,6 +5081,36 @@ void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result, |
} |
+void AddUnanchoredAdvance(RegExpCompiler* compiler, ChoiceNode* result, |
+ RegExpNode* on_success) { |
+ // This implements ES2015 21.2.5.2.3, AdvanceStringIndex. |
+ DCHECK(!compiler->read_backward()); |
+ Zone* zone = compiler->zone(); |
+ // Advancing can either consume a BMP character or a trail surrogate. |
+ ZoneList<CharacterRange>* bmp_and_trail = |
+ new (zone) ZoneList<CharacterRange>(2, zone); |
+ bmp_and_trail->Add(CharacterRange::Range(0, kLeadSurrogateStart - 1), zone); |
+ bmp_and_trail->Add( |
+ CharacterRange::Range(kLeadSurrogateEnd + 1, kNonBmpStart - 1), zone); |
+ result->AddAlternative(GuardedAlternative(TextNode::CreateForCharacterRanges( |
+ zone, bmp_and_trail, false, on_success))); |
+ |
+ // Or it could consume a lead optionally followed by a trail surrogate. |
+ ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List( |
+ zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd)); |
+ ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List( |
+ zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd)); |
+ ChoiceNode* optional_trail = new (zone) ChoiceNode(2, zone); |
+ optional_trail->AddAlternative( |
+ GuardedAlternative(TextNode::CreateForCharacterRanges( |
+ zone, trail_surrogates, false, on_success))); |
+ optional_trail->AddAlternative(GuardedAlternative(on_success)); |
+ RegExpNode* optional_pair = TextNode::CreateForCharacterRanges( |
+ zone, lead_surrogates, false, optional_trail); |
+ result->AddAlternative(GuardedAlternative(optional_pair)); |
+} |
+ |
+ |
RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, |
RegExpNode* on_success) { |
set_.Canonicalize(); |
@@ -5102,12 +5127,16 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, |
// No matches possible. |
return new (zone) EndNode(EndNode::BACKTRACK, zone); |
} |
- UnicodeRangeSplitter splitter(zone, ranges); |
- ChoiceNode* result = new (compiler->zone()) ChoiceNode(2, compiler->zone()); |
- AddBmpCharacters(compiler, result, on_success, &splitter); |
- AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter); |
- AddLoneLeadSurrogates(compiler, result, on_success, &splitter); |
- AddLoneTrailSurrogates(compiler, result, on_success, &splitter); |
+ ChoiceNode* result = new (zone) ChoiceNode(2, zone); |
+ if (standard_type() == '*') { |
+ AddUnanchoredAdvance(compiler, result, on_success); |
+ } else { |
+ UnicodeRangeSplitter splitter(zone, ranges); |
+ AddBmpCharacters(compiler, result, on_success, &splitter); |
+ AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter); |
+ AddLoneLeadSurrogates(compiler, result, on_success, &splitter); |
+ AddLoneTrailSurrogates(compiler, result, on_success, &splitter); |
+ } |
return result; |
} else { |
return new (zone) TextNode(this, compiler->read_backward(), on_success); |
@@ -6513,6 +6542,36 @@ void DispatchTableConstructor::VisitAction(ActionNode* that) { |
} |
+RegExpNode* OptionallyStepBackToLeadSurrogate(RegExpCompiler* compiler, |
+ RegExpNode* on_success) { |
+ // If the regexp matching starts within a surrogate pair, step back |
+ // to the lead surrogate and start matching from there. |
+ DCHECK(!compiler->read_backward()); |
+ Zone* zone = compiler->zone(); |
+ ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List( |
+ zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd)); |
+ ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List( |
+ zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd)); |
+ |
+ ChoiceNode* optional_step_back = new (zone) ChoiceNode(2, zone); |
+ |
+ int stack_register = compiler->UnicodeLookaroundStackRegister(); |
+ int position_register = compiler->UnicodeLookaroundPositionRegister(); |
+ RegExpNode* step_back = TextNode::CreateForCharacterRanges( |
+ zone, lead_surrogates, true, on_success); |
+ RegExpLookaround::Builder builder(true, step_back, stack_register, |
+ position_register); |
+ RegExpNode* match_trail = TextNode::CreateForCharacterRanges( |
+ zone, trail_surrogates, false, builder.on_match_success()); |
+ |
+ optional_step_back->AddAlternative( |
+ GuardedAlternative(builder.ForMatch(match_trail))); |
+ optional_step_back->AddAlternative(GuardedAlternative(on_success)); |
+ |
+ return optional_step_back; |
+} |
+ |
+ |
RegExpEngine::CompilationResult RegExpEngine::Compile( |
Isolate* isolate, Zone* zone, RegExpCompileData* data, |
JSRegExp::Flags flags, Handle<String> pattern, |
@@ -6575,6 +6634,8 @@ RegExpEngine::CompilationResult RegExpEngine::Compile( |
if (node != NULL) { |
node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); |
} |
+ } else if (compiler.unicode()) { |
+ node = OptionallyStepBackToLeadSurrogate(&compiler, node); |
} |
if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone); |