Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/regexp/jsregexp.h" | 5 #include "src/regexp/jsregexp.h" |
| 6 | 6 |
| 7 #include "src/ast/ast.h" | 7 #include "src/ast/ast.h" |
| 8 #include "src/base/platform/platform.h" | 8 #include "src/base/platform/platform.h" |
| 9 #include "src/compilation-cache.h" | 9 #include "src/compilation-cache.h" |
| 10 #include "src/compiler.h" | 10 #include "src/compiler.h" |
| (...skipping 5014 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5025 int stack_register = compiler->UnicodeLookaroundStackRegister(); | 5025 int stack_register = compiler->UnicodeLookaroundStackRegister(); |
| 5026 int position_register = compiler->UnicodeLookaroundPositionRegister(); | 5026 int position_register = compiler->UnicodeLookaroundPositionRegister(); |
| 5027 RegExpLookaround::Builder lookaround(false, on_success, stack_register, | 5027 RegExpLookaround::Builder lookaround(false, on_success, stack_register, |
| 5028 position_register); | 5028 position_register); |
| 5029 RegExpNode* negative_match = TextNode::CreateForCharacterRanges( | 5029 RegExpNode* negative_match = TextNode::CreateForCharacterRanges( |
| 5030 zone, lookahead, read_backward, lookaround.on_match_success()); | 5030 zone, lookahead, read_backward, lookaround.on_match_success()); |
| 5031 return TextNode::CreateForCharacterRanges( | 5031 return TextNode::CreateForCharacterRanges( |
| 5032 zone, match, read_backward, lookaround.ForMatch(negative_match)); | 5032 zone, match, read_backward, lookaround.ForMatch(negative_match)); |
| 5033 } | 5033 } |
| 5034 | 5034 |
| 5035 | |
|
erikcorry
2016/01/22 10:10:10
Inadvertent edit?
Yang
2016/01/25 07:38:41
Undone.
| |
| 5036 void AddLoneLeadSurrogates(RegExpCompiler* compiler, ChoiceNode* result, | 5035 void AddLoneLeadSurrogates(RegExpCompiler* compiler, ChoiceNode* result, |
| 5037 RegExpNode* on_success, | 5036 RegExpNode* on_success, |
| 5038 UnicodeRangeSplitter* splitter) { | 5037 UnicodeRangeSplitter* splitter) { |
| 5039 ZoneList<CharacterRange>* lead_surrogates = splitter->lead_surrogates(); | 5038 ZoneList<CharacterRange>* lead_surrogates = splitter->lead_surrogates(); |
| 5040 if (lead_surrogates == nullptr) return; | 5039 if (lead_surrogates == nullptr) return; |
| 5041 Zone* zone = compiler->zone(); | 5040 Zone* zone = compiler->zone(); |
| 5042 // E.g. \ud801 becomes \ud801(?![\udc00-\udfff]). | 5041 // E.g. \ud801 becomes \ud801(?![\udc00-\udfff]). |
| 5043 ZoneList<CharacterRange>* trail_surrogates = | 5042 ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List( |
| 5044 new (zone) ZoneList<CharacterRange>(1, zone); | 5043 zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd)); |
| 5045 trail_surrogates->Add( | |
| 5046 CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd), zone); | |
| 5047 | 5044 |
| 5048 RegExpNode* match = | 5045 RegExpNode* match = |
| 5049 compiler->read_backward() | 5046 compiler->read_backward() |
| 5050 // Reading backward. Assert that reading forward, there is no trail | 5047 // Reading backward. Assert that reading forward, there is no trail |
| 5051 // surrogate, and then backward match the lead surrogate. | 5048 // surrogate, and then backward match the lead surrogate. |
| 5052 ? NegativeLookaroundAgainstReadDirectionAndMatch( | 5049 ? NegativeLookaroundAgainstReadDirectionAndMatch( |
| 5053 compiler, trail_surrogates, lead_surrogates, on_success, true) | 5050 compiler, trail_surrogates, lead_surrogates, on_success, true) |
| 5054 // Reading forward. Forwrad match the lead surrogate and assert that | 5051 // Reading forward. Forwrad match the lead surrogate and assert that |
| 5055 // no | 5052 // no |
| 5056 // trail surrogate follows. | 5053 // trail surrogate follows. |
| 5057 : MatchAndNegativeLookaroundInReadDirection( | 5054 : MatchAndNegativeLookaroundInReadDirection( |
| 5058 compiler, lead_surrogates, trail_surrogates, on_success, false); | 5055 compiler, lead_surrogates, trail_surrogates, on_success, false); |
| 5059 result->AddAlternative(GuardedAlternative(match)); | 5056 result->AddAlternative(GuardedAlternative(match)); |
| 5060 } | 5057 } |
| 5061 | 5058 |
| 5062 | 5059 |
| 5063 void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result, | 5060 void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result, |
| 5064 RegExpNode* on_success, | 5061 RegExpNode* on_success, |
| 5065 UnicodeRangeSplitter* splitter) { | 5062 UnicodeRangeSplitter* splitter) { |
| 5066 ZoneList<CharacterRange>* trail_surrogates = splitter->trail_surrogates(); | 5063 ZoneList<CharacterRange>* trail_surrogates = splitter->trail_surrogates(); |
| 5067 if (trail_surrogates == nullptr) return; | 5064 if (trail_surrogates == nullptr) return; |
| 5068 Zone* zone = compiler->zone(); | 5065 Zone* zone = compiler->zone(); |
| 5069 // E.g. \udc01 becomes (?<![\ud800-\udbff])\udc01 | 5066 // E.g. \udc01 becomes (?<![\ud800-\udbff])\udc01 |
| 5070 ZoneList<CharacterRange>* lead_surrogates = | 5067 ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List( |
| 5071 new (zone) ZoneList<CharacterRange>(1, zone); | 5068 zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd)); |
| 5072 lead_surrogates->Add( | |
| 5073 CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd), zone); | |
| 5074 | 5069 |
| 5075 RegExpNode* match = | 5070 RegExpNode* match = |
| 5076 compiler->read_backward() | 5071 compiler->read_backward() |
| 5077 // Reading backward. Backward match the trail surrogate and assert | 5072 // Reading backward. Backward match the trail surrogate and assert |
| 5078 // that no lead surrogate precedes it. | 5073 // that no lead surrogate precedes it. |
| 5079 ? MatchAndNegativeLookaroundInReadDirection( | 5074 ? MatchAndNegativeLookaroundInReadDirection( |
| 5080 compiler, trail_surrogates, lead_surrogates, on_success, true) | 5075 compiler, trail_surrogates, lead_surrogates, on_success, true) |
| 5081 // Reading forward. Assert that reading backward, there is no lead | 5076 // Reading forward. Assert that reading backward, there is no lead |
| 5082 // surrogate, and then forward match the trail surrogate. | 5077 // surrogate, and then forward match the trail surrogate. |
| 5083 : NegativeLookaroundAgainstReadDirectionAndMatch( | 5078 : NegativeLookaroundAgainstReadDirectionAndMatch( |
| 5084 compiler, lead_surrogates, trail_surrogates, on_success, false); | 5079 compiler, lead_surrogates, trail_surrogates, on_success, false); |
| 5085 result->AddAlternative(GuardedAlternative(match)); | 5080 result->AddAlternative(GuardedAlternative(match)); |
| 5086 } | 5081 } |
| 5087 | 5082 |
| 5088 | 5083 |
| 5084 void AddUnanchoredAdvance(RegExpCompiler* compiler, ChoiceNode* result, | |
| 5085 RegExpNode* on_success) { | |
| 5086 // This implements ES2015 21.2.5.2.3, AdvanceStringIndex. | |
| 5087 DCHECK(!compiler->read_backward()); | |
| 5088 Zone* zone = compiler->zone(); | |
| 5089 // Advancing can either consume a BMP character or a trail surrogate. | |
| 5090 ZoneList<CharacterRange>* bmp_and_trail = | |
| 5091 new (zone) ZoneList<CharacterRange>(2, zone); | |
| 5092 bmp_and_trail->Add(CharacterRange::Range(0, kLeadSurrogateStart - 1), zone); | |
| 5093 bmp_and_trail->Add( | |
| 5094 CharacterRange::Range(kLeadSurrogateEnd + 1, kNonBmpStart - 1), zone); | |
| 5095 result->AddAlternative(GuardedAlternative(TextNode::CreateForCharacterRanges( | |
| 5096 zone, bmp_and_trail, false, on_success))); | |
| 5097 | |
| 5098 // Or it could consume a lead optionally followed by a trail surrogate. | |
| 5099 ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List( | |
| 5100 zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd)); | |
| 5101 ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List( | |
| 5102 zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd)); | |
| 5103 ChoiceNode* optional_trail = new (zone) ChoiceNode(2, zone); | |
| 5104 optional_trail->AddAlternative( | |
| 5105 GuardedAlternative(TextNode::CreateForCharacterRanges( | |
| 5106 zone, trail_surrogates, false, on_success))); | |
| 5107 optional_trail->AddAlternative(GuardedAlternative(on_success)); | |
| 5108 RegExpNode* optional_pair = TextNode::CreateForCharacterRanges( | |
| 5109 zone, lead_surrogates, false, optional_trail); | |
| 5110 result->AddAlternative(GuardedAlternative(optional_pair)); | |
| 5111 } | |
| 5112 | |
| 5113 | |
| 5089 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, | 5114 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, |
| 5090 RegExpNode* on_success) { | 5115 RegExpNode* on_success) { |
| 5091 set_.Canonicalize(); | 5116 set_.Canonicalize(); |
| 5092 Zone* zone = compiler->zone(); | 5117 Zone* zone = compiler->zone(); |
| 5093 ZoneList<CharacterRange>* ranges = this->ranges(zone); | 5118 ZoneList<CharacterRange>* ranges = this->ranges(zone); |
| 5094 if (compiler->unicode() && !compiler->one_byte()) { | 5119 if (compiler->unicode() && !compiler->one_byte()) { |
| 5095 if (is_negated()) { | 5120 if (is_negated()) { |
| 5096 ZoneList<CharacterRange>* negated = | 5121 ZoneList<CharacterRange>* negated = |
| 5097 new (zone) ZoneList<CharacterRange>(2, zone); | 5122 new (zone) ZoneList<CharacterRange>(2, zone); |
| 5098 CharacterRange::Negate(ranges, negated, zone); | 5123 CharacterRange::Negate(ranges, negated, zone); |
| 5099 ranges = negated; | 5124 ranges = negated; |
| 5100 } | 5125 } |
| 5101 if (ranges->length() == 0) { | 5126 if (ranges->length() == 0) { |
| 5102 // No matches possible. | 5127 // No matches possible. |
| 5103 return new (zone) EndNode(EndNode::BACKTRACK, zone); | 5128 return new (zone) EndNode(EndNode::BACKTRACK, zone); |
| 5104 } | 5129 } |
| 5105 UnicodeRangeSplitter splitter(zone, ranges); | 5130 ChoiceNode* result = new (zone) ChoiceNode(2, zone); |
| 5106 ChoiceNode* result = new (compiler->zone()) ChoiceNode(2, compiler->zone()); | 5131 if (standard_type() == '*') { |
| 5107 AddBmpCharacters(compiler, result, on_success, &splitter); | 5132 AddUnanchoredAdvance(compiler, result, on_success); |
| 5108 AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter); | 5133 } else { |
| 5109 AddLoneLeadSurrogates(compiler, result, on_success, &splitter); | 5134 UnicodeRangeSplitter splitter(zone, ranges); |
| 5110 AddLoneTrailSurrogates(compiler, result, on_success, &splitter); | 5135 AddBmpCharacters(compiler, result, on_success, &splitter); |
| 5136 AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter); | |
| 5137 AddLoneLeadSurrogates(compiler, result, on_success, &splitter); | |
| 5138 AddLoneTrailSurrogates(compiler, result, on_success, &splitter); | |
| 5139 } | |
| 5111 return result; | 5140 return result; |
| 5112 } else { | 5141 } else { |
| 5113 return new (zone) TextNode(this, compiler->read_backward(), on_success); | 5142 return new (zone) TextNode(this, compiler->read_backward(), on_success); |
| 5114 } | 5143 } |
| 5115 } | 5144 } |
| 5116 | 5145 |
| 5117 | 5146 |
| 5118 int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) { | 5147 int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) { |
| 5119 RegExpAtom* atom1 = (*a)->AsAtom(); | 5148 RegExpAtom* atom1 = (*a)->AsAtom(); |
| 5120 RegExpAtom* atom2 = (*b)->AsAtom(); | 5149 RegExpAtom* atom2 = (*b)->AsAtom(); |
| (...skipping 1385 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 6506 } | 6535 } |
| 6507 } | 6536 } |
| 6508 | 6537 |
| 6509 | 6538 |
| 6510 void DispatchTableConstructor::VisitAction(ActionNode* that) { | 6539 void DispatchTableConstructor::VisitAction(ActionNode* that) { |
| 6511 RegExpNode* target = that->on_success(); | 6540 RegExpNode* target = that->on_success(); |
| 6512 target->Accept(this); | 6541 target->Accept(this); |
| 6513 } | 6542 } |
| 6514 | 6543 |
| 6515 | 6544 |
| 6545 RegExpNode* OptionallyStepBackToLeadSurrogate(RegExpCompiler* compiler, | |
| 6546 RegExpNode* on_success) { | |
| 6547 // If the regexp matching starts within a surrogate pair, step back | |
| 6548 // to the lead surrogate and start matching from there. | |
| 6549 DCHECK(!compiler->read_backward()); | |
| 6550 Zone* zone = compiler->zone(); | |
| 6551 ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List( | |
| 6552 zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd)); | |
| 6553 ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List( | |
| 6554 zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd)); | |
| 6555 | |
| 6556 ChoiceNode* optional_step_back = new (zone) ChoiceNode(2, zone); | |
| 6557 | |
| 6558 int stack_register = compiler->UnicodeLookaroundStackRegister(); | |
| 6559 int position_register = compiler->UnicodeLookaroundPositionRegister(); | |
| 6560 RegExpNode* step_back = TextNode::CreateForCharacterRanges( | |
| 6561 zone, lead_surrogates, true, on_success); | |
| 6562 RegExpLookaround::Builder builder(true, step_back, stack_register, | |
| 6563 position_register); | |
| 6564 RegExpNode* match_trail = TextNode::CreateForCharacterRanges( | |
| 6565 zone, trail_surrogates, false, builder.on_match_success()); | |
| 6566 | |
| 6567 optional_step_back->AddAlternative( | |
| 6568 GuardedAlternative(builder.ForMatch(match_trail))); | |
| 6569 optional_step_back->AddAlternative(GuardedAlternative(on_success)); | |
| 6570 | |
| 6571 return optional_step_back; | |
| 6572 } | |
| 6573 | |
| 6574 | |
| 6516 RegExpEngine::CompilationResult RegExpEngine::Compile( | 6575 RegExpEngine::CompilationResult RegExpEngine::Compile( |
| 6517 Isolate* isolate, Zone* zone, RegExpCompileData* data, | 6576 Isolate* isolate, Zone* zone, RegExpCompileData* data, |
| 6518 JSRegExp::Flags flags, Handle<String> pattern, | 6577 JSRegExp::Flags flags, Handle<String> pattern, |
| 6519 Handle<String> sample_subject, bool is_one_byte) { | 6578 Handle<String> sample_subject, bool is_one_byte) { |
| 6520 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { | 6579 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { |
| 6521 return IrregexpRegExpTooBig(isolate); | 6580 return IrregexpRegExpTooBig(isolate); |
| 6522 } | 6581 } |
| 6523 bool ignore_case = flags & JSRegExp::kIgnoreCase; | 6582 bool ignore_case = flags & JSRegExp::kIgnoreCase; |
| 6524 bool is_sticky = flags & JSRegExp::kSticky; | 6583 bool is_sticky = flags & JSRegExp::kSticky; |
| 6525 bool is_global = flags & JSRegExp::kGlobal; | 6584 bool is_global = flags & JSRegExp::kGlobal; |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 6568 node = loop_node; | 6627 node = loop_node; |
| 6569 } | 6628 } |
| 6570 } | 6629 } |
| 6571 if (is_one_byte) { | 6630 if (is_one_byte) { |
| 6572 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); | 6631 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); |
| 6573 // Do it again to propagate the new nodes to places where they were not | 6632 // Do it again to propagate the new nodes to places where they were not |
| 6574 // put because they had not been calculated yet. | 6633 // put because they had not been calculated yet. |
| 6575 if (node != NULL) { | 6634 if (node != NULL) { |
| 6576 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); | 6635 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); |
| 6577 } | 6636 } |
| 6637 } else if (compiler.unicode()) { | |
| 6638 node = OptionallyStepBackToLeadSurrogate(&compiler, node); | |
| 6578 } | 6639 } |
| 6579 | 6640 |
| 6580 if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone); | 6641 if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone); |
| 6581 data->node = node; | 6642 data->node = node; |
| 6582 Analysis analysis(isolate, ignore_case, is_one_byte); | 6643 Analysis analysis(isolate, ignore_case, is_one_byte); |
| 6583 analysis.EnsureAnalyzed(node); | 6644 analysis.EnsureAnalyzed(node); |
| 6584 if (analysis.has_failed()) { | 6645 if (analysis.has_failed()) { |
| 6585 const char* error_message = analysis.error_message(); | 6646 const char* error_message = analysis.error_message(); |
| 6586 return CompilationResult(isolate, error_message); | 6647 return CompilationResult(isolate, error_message); |
| 6587 } | 6648 } |
| (...skipping 171 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 6759 | 6820 |
| 6760 | 6821 |
| 6761 void RegExpResultsCache::Clear(FixedArray* cache) { | 6822 void RegExpResultsCache::Clear(FixedArray* cache) { |
| 6762 for (int i = 0; i < kRegExpResultsCacheSize; i++) { | 6823 for (int i = 0; i < kRegExpResultsCacheSize; i++) { |
| 6763 cache->set(i, Smi::FromInt(0)); | 6824 cache->set(i, Smi::FromInt(0)); |
| 6764 } | 6825 } |
| 6765 } | 6826 } |
| 6766 | 6827 |
| 6767 } // namespace internal | 6828 } // namespace internal |
| 6768 } // namespace v8 | 6829 } // namespace v8 |
| OLD | NEW |