OLD | NEW |
---|---|
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/regexp/jsregexp.h" | 5 #include "src/regexp/jsregexp.h" |
6 | 6 |
7 #include "src/ast/ast.h" | 7 #include "src/ast/ast.h" |
8 #include "src/base/platform/platform.h" | 8 #include "src/base/platform/platform.h" |
9 #include "src/compilation-cache.h" | 9 #include "src/compilation-cache.h" |
10 #include "src/compiler.h" | 10 #include "src/compiler.h" |
(...skipping 5014 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5025 int stack_register = compiler->UnicodeLookaroundStackRegister(); | 5025 int stack_register = compiler->UnicodeLookaroundStackRegister(); |
5026 int position_register = compiler->UnicodeLookaroundPositionRegister(); | 5026 int position_register = compiler->UnicodeLookaroundPositionRegister(); |
5027 RegExpLookaround::Builder lookaround(false, on_success, stack_register, | 5027 RegExpLookaround::Builder lookaround(false, on_success, stack_register, |
5028 position_register); | 5028 position_register); |
5029 RegExpNode* negative_match = TextNode::CreateForCharacterRanges( | 5029 RegExpNode* negative_match = TextNode::CreateForCharacterRanges( |
5030 zone, lookahead, read_backward, lookaround.on_match_success()); | 5030 zone, lookahead, read_backward, lookaround.on_match_success()); |
5031 return TextNode::CreateForCharacterRanges( | 5031 return TextNode::CreateForCharacterRanges( |
5032 zone, match, read_backward, lookaround.ForMatch(negative_match)); | 5032 zone, match, read_backward, lookaround.ForMatch(negative_match)); |
5033 } | 5033 } |
5034 | 5034 |
5035 | |
5036 void AddLoneLeadSurrogates(RegExpCompiler* compiler, ChoiceNode* result, | 5035 void AddLoneLeadSurrogates(RegExpCompiler* compiler, ChoiceNode* result, |
5037 RegExpNode* on_success, | 5036 RegExpNode* on_success, |
5038 UnicodeRangeSplitter* splitter) { | 5037 UnicodeRangeSplitter* splitter) { |
5039 ZoneList<CharacterRange>* lead_surrogates = splitter->lead_surrogates(); | 5038 ZoneList<CharacterRange>* lead_surrogates = splitter->lead_surrogates(); |
5040 if (lead_surrogates == nullptr) return; | 5039 if (lead_surrogates == nullptr) return; |
5041 Zone* zone = compiler->zone(); | 5040 Zone* zone = compiler->zone(); |
5042 // E.g. \ud801 becomes \ud801(?![\udc00-\udfff]). | 5041 // E.g. \ud801 becomes \ud801(?![\udc00-\udfff]). |
5043 ZoneList<CharacterRange>* trail_surrogates = | 5042 ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List( |
5044 new (zone) ZoneList<CharacterRange>(1, zone); | 5043 zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd)); |
5045 trail_surrogates->Add( | |
5046 CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd), zone); | |
5047 | 5044 |
5048 RegExpNode* match = | 5045 RegExpNode* match = |
5049 compiler->read_backward() | 5046 compiler->read_backward() |
5050 // Reading backward. Assert that reading forward, there is no trail | 5047 // Reading backward. Assert that reading forward, there is no trail |
5051 // surrogate, and then backward match the lead surrogate. | 5048 // surrogate, and then backward match the lead surrogate. |
5052 ? NegativeLookaroundAgainstReadDirectionAndMatch( | 5049 ? NegativeLookaroundAgainstReadDirectionAndMatch( |
5053 compiler, trail_surrogates, lead_surrogates, on_success, true) | 5050 compiler, trail_surrogates, lead_surrogates, on_success, true) |
5054 // Reading forward. Forwrad match the lead surrogate and assert that | 5051 // Reading forward. Forwrad match the lead surrogate and assert that |
erikcorry
2016/01/22 10:10:10
Forwrad -> Forward
Also, this ternary expression
Yang
2016/01/25 07:38:41
Done.
| |
5055 // no | 5052 // no |
erikcorry
2016/01/22 10:10:10
Unfortunate line break. (Autoformatter doesn't re
Yang
2016/01/25 07:38:41
Done.
And thanks for the vim tip :)
| |
5056 // trail surrogate follows. | 5053 // trail surrogate follows. |
5057 : MatchAndNegativeLookaroundInReadDirection( | 5054 : MatchAndNegativeLookaroundInReadDirection( |
5058 compiler, lead_surrogates, trail_surrogates, on_success, false); | 5055 compiler, lead_surrogates, trail_surrogates, on_success, false); |
5059 result->AddAlternative(GuardedAlternative(match)); | 5056 result->AddAlternative(GuardedAlternative(match)); |
5060 } | 5057 } |
5061 | 5058 |
5062 | 5059 |
5063 void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result, | 5060 void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result, |
5064 RegExpNode* on_success, | 5061 RegExpNode* on_success, |
5065 UnicodeRangeSplitter* splitter) { | 5062 UnicodeRangeSplitter* splitter) { |
5066 ZoneList<CharacterRange>* trail_surrogates = splitter->trail_surrogates(); | 5063 ZoneList<CharacterRange>* trail_surrogates = splitter->trail_surrogates(); |
5067 if (trail_surrogates == nullptr) return; | 5064 if (trail_surrogates == nullptr) return; |
5068 Zone* zone = compiler->zone(); | 5065 Zone* zone = compiler->zone(); |
5069 // E.g. \udc01 becomes (?<![\ud800-\udbff])\udc01 | 5066 // E.g. \udc01 becomes (?<![\ud800-\udbff])\udc01 |
5070 ZoneList<CharacterRange>* lead_surrogates = | 5067 ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List( |
5071 new (zone) ZoneList<CharacterRange>(1, zone); | 5068 zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd)); |
5072 lead_surrogates->Add( | |
5073 CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd), zone); | |
5074 | 5069 |
5075 RegExpNode* match = | 5070 RegExpNode* match = |
5076 compiler->read_backward() | 5071 compiler->read_backward() |
5077 // Reading backward. Backward match the trail surrogate and assert | 5072 // Reading backward. Backward match the trail surrogate and assert |
5078 // that no lead surrogate precedes it. | 5073 // that no lead surrogate precedes it. |
5079 ? MatchAndNegativeLookaroundInReadDirection( | 5074 ? MatchAndNegativeLookaroundInReadDirection( |
5080 compiler, trail_surrogates, lead_surrogates, on_success, true) | 5075 compiler, trail_surrogates, lead_surrogates, on_success, true) |
5081 // Reading forward. Assert that reading backward, there is no lead | 5076 // Reading forward. Assert that reading backward, there is no lead |
5082 // surrogate, and then forward match the trail surrogate. | 5077 // surrogate, and then forward match the trail surrogate. |
5083 : NegativeLookaroundAgainstReadDirectionAndMatch( | 5078 : NegativeLookaroundAgainstReadDirectionAndMatch( |
5084 compiler, lead_surrogates, trail_surrogates, on_success, false); | 5079 compiler, lead_surrogates, trail_surrogates, on_success, false); |
5085 result->AddAlternative(GuardedAlternative(match)); | 5080 result->AddAlternative(GuardedAlternative(match)); |
5086 } | 5081 } |
5087 | 5082 |
5088 | 5083 |
5084 void AddUnanchoredAdvance(RegExpCompiler* compiler, ChoiceNode* result, | |
5085 RegExpNode* on_success) { | |
5086 // This implements ES2015 21.2.5.2.3, AdvanceStringIndex. | |
5087 DCHECK(!compiler->read_backward()); | |
5088 Zone* zone = compiler->zone(); | |
5089 // Advancing can either consume a BMP character or a trail surrogate. | |
5090 ZoneList<CharacterRange>* bmp_and_trail = | |
5091 new (zone) ZoneList<CharacterRange>(2, zone); | |
5092 bmp_and_trail->Add(CharacterRange::Range(0, kLeadSurrogateStart - 1), zone); | |
5093 bmp_and_trail->Add( | |
5094 CharacterRange::Range(kLeadSurrogateEnd + 1, kNonBmpStart - 1), zone); | |
5095 result->AddAlternative(GuardedAlternative(TextNode::CreateForCharacterRanges( | |
5096 zone, bmp_and_trail, false, on_success))); | |
5097 | |
5098 // Or it could consume a lead optionally followed by a trail surrogate. | |
5099 ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List( | |
5100 zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd)); | |
5101 ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List( | |
5102 zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd)); | |
5103 ChoiceNode* optional_trail = new (zone) ChoiceNode(2, zone); | |
5104 optional_trail->AddAlternative( | |
5105 GuardedAlternative(TextNode::CreateForCharacterRanges( | |
5106 zone, trail_surrogates, false, on_success))); | |
5107 optional_trail->AddAlternative(GuardedAlternative(on_success)); | |
5108 RegExpNode* optional_pair = TextNode::CreateForCharacterRanges( | |
5109 zone, lead_surrogates, false, optional_trail); | |
5110 result->AddAlternative(GuardedAlternative(optional_pair)); | |
5111 } | |
5112 | |
5113 | |
5089 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, | 5114 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, |
5090 RegExpNode* on_success) { | 5115 RegExpNode* on_success) { |
5091 set_.Canonicalize(); | 5116 set_.Canonicalize(); |
5092 Zone* zone = compiler->zone(); | 5117 Zone* zone = compiler->zone(); |
5093 ZoneList<CharacterRange>* ranges = this->ranges(zone); | 5118 ZoneList<CharacterRange>* ranges = this->ranges(zone); |
5094 if (compiler->unicode() && !compiler->one_byte()) { | 5119 if (compiler->unicode() && !compiler->one_byte()) { |
5095 if (is_negated()) { | 5120 if (is_negated()) { |
5096 ZoneList<CharacterRange>* negated = | 5121 ZoneList<CharacterRange>* negated = |
5097 new (zone) ZoneList<CharacterRange>(2, zone); | 5122 new (zone) ZoneList<CharacterRange>(2, zone); |
5098 CharacterRange::Negate(ranges, negated, zone); | 5123 CharacterRange::Negate(ranges, negated, zone); |
5099 ranges = negated; | 5124 ranges = negated; |
5100 } | 5125 } |
5101 if (ranges->length() == 0) { | 5126 if (ranges->length() == 0) { |
5102 // No matches possible. | 5127 // No matches possible. |
5103 return new (zone) EndNode(EndNode::BACKTRACK, zone); | 5128 return new (zone) EndNode(EndNode::BACKTRACK, zone); |
5104 } | 5129 } |
5105 UnicodeRangeSplitter splitter(zone, ranges); | 5130 ChoiceNode* result = new (zone) ChoiceNode(2, zone); |
5106 ChoiceNode* result = new (compiler->zone()) ChoiceNode(2, compiler->zone()); | 5131 if (standard_type() == '*') { |
5107 AddBmpCharacters(compiler, result, on_success, &splitter); | 5132 AddUnanchoredAdvance(compiler, result, on_success); |
erikcorry
2016/01/22 10:10:10
Is this better than what the other half of the 'if
Yang
2016/01/25 07:38:41
This adds
[\0-\ud7ff\uc000\uffff]|[\ud800-\udbff][
| |
5108 AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter); | 5133 } else { |
5109 AddLoneLeadSurrogates(compiler, result, on_success, &splitter); | 5134 UnicodeRangeSplitter splitter(zone, ranges); |
5110 AddLoneTrailSurrogates(compiler, result, on_success, &splitter); | 5135 AddBmpCharacters(compiler, result, on_success, &splitter); |
5136 AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter); | |
5137 AddLoneLeadSurrogates(compiler, result, on_success, &splitter); | |
5138 AddLoneTrailSurrogates(compiler, result, on_success, &splitter); | |
5139 } | |
5111 return result; | 5140 return result; |
erikcorry
2016/01/22 10:10:10
I think it's worth checking if the choice node has
Yang
2016/01/25 07:38:41
Done in ChoiceNode::Emit.
| |
5112 } else { | 5141 } else { |
5113 return new (zone) TextNode(this, compiler->read_backward(), on_success); | 5142 return new (zone) TextNode(this, compiler->read_backward(), on_success); |
5114 } | 5143 } |
5115 } | 5144 } |
5116 | 5145 |
5117 | 5146 |
5118 int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) { | 5147 int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) { |
5119 RegExpAtom* atom1 = (*a)->AsAtom(); | 5148 RegExpAtom* atom1 = (*a)->AsAtom(); |
5120 RegExpAtom* atom2 = (*b)->AsAtom(); | 5149 RegExpAtom* atom2 = (*b)->AsAtom(); |
5121 uc16 character1 = atom1->data().at(0); | 5150 uc16 character1 = atom1->data().at(0); |
(...skipping 1384 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
6506 } | 6535 } |
6507 } | 6536 } |
6508 | 6537 |
6509 | 6538 |
6510 void DispatchTableConstructor::VisitAction(ActionNode* that) { | 6539 void DispatchTableConstructor::VisitAction(ActionNode* that) { |
6511 RegExpNode* target = that->on_success(); | 6540 RegExpNode* target = that->on_success(); |
6512 target->Accept(this); | 6541 target->Accept(this); |
6513 } | 6542 } |
6514 | 6543 |
6515 | 6544 |
6545 RegExpNode* OptionallyStepBackToLeadSurrogate(RegExpCompiler* compiler, | |
erikcorry
2016/01/22 10:10:10
Going backwards is some standards-mandated strange
Yang
2016/01/25 07:38:41
You are right. I have to figure out this one. In c
| |
6546 RegExpNode* on_success) { | |
6547 // If the regexp matching starts within a surrogate pair, step back | |
6548 // to the lead surrogate and start matching from there. | |
6549 DCHECK(!compiler->read_backward()); | |
6550 Zone* zone = compiler->zone(); | |
6551 ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List( | |
6552 zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd)); | |
6553 ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List( | |
6554 zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd)); | |
6555 | |
6556 ChoiceNode* optional_step_back = new (zone) ChoiceNode(2, zone); | |
6557 | |
6558 int stack_register = compiler->UnicodeLookaroundStackRegister(); | |
6559 int position_register = compiler->UnicodeLookaroundPositionRegister(); | |
erikcorry
2016/01/22 10:10:10
How does this actually do the backwards step? Som
Yang
2016/01/25 07:38:41
The TextNode for lead surrogates below consumes on
| |
6560 RegExpNode* step_back = TextNode::CreateForCharacterRanges( | |
6561 zone, lead_surrogates, true, on_success); | |
6562 RegExpLookaround::Builder builder(true, step_back, stack_register, | |
6563 position_register); | |
6564 RegExpNode* match_trail = TextNode::CreateForCharacterRanges( | |
6565 zone, trail_surrogates, false, builder.on_match_success()); | |
6566 | |
6567 optional_step_back->AddAlternative( | |
6568 GuardedAlternative(builder.ForMatch(match_trail))); | |
6569 optional_step_back->AddAlternative(GuardedAlternative(on_success)); | |
6570 | |
6571 return optional_step_back; | |
6572 } | |
6573 | |
6574 | |
6516 RegExpEngine::CompilationResult RegExpEngine::Compile( | 6575 RegExpEngine::CompilationResult RegExpEngine::Compile( |
6517 Isolate* isolate, Zone* zone, RegExpCompileData* data, | 6576 Isolate* isolate, Zone* zone, RegExpCompileData* data, |
6518 JSRegExp::Flags flags, Handle<String> pattern, | 6577 JSRegExp::Flags flags, Handle<String> pattern, |
6519 Handle<String> sample_subject, bool is_one_byte) { | 6578 Handle<String> sample_subject, bool is_one_byte) { |
6520 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { | 6579 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { |
6521 return IrregexpRegExpTooBig(isolate); | 6580 return IrregexpRegExpTooBig(isolate); |
6522 } | 6581 } |
6523 bool ignore_case = flags & JSRegExp::kIgnoreCase; | 6582 bool ignore_case = flags & JSRegExp::kIgnoreCase; |
6524 bool is_sticky = flags & JSRegExp::kSticky; | 6583 bool is_sticky = flags & JSRegExp::kSticky; |
6525 bool is_global = flags & JSRegExp::kGlobal; | 6584 bool is_global = flags & JSRegExp::kGlobal; |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
6568 node = loop_node; | 6627 node = loop_node; |
6569 } | 6628 } |
6570 } | 6629 } |
6571 if (is_one_byte) { | 6630 if (is_one_byte) { |
6572 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); | 6631 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); |
6573 // Do it again to propagate the new nodes to places where they were not | 6632 // Do it again to propagate the new nodes to places where they were not |
6574 // put because they had not been calculated yet. | 6633 // put because they had not been calculated yet. |
6575 if (node != NULL) { | 6634 if (node != NULL) { |
6576 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); | 6635 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); |
6577 } | 6636 } |
6637 } else if (compiler.unicode()) { | |
6638 node = OptionallyStepBackToLeadSurrogate(&compiler, node); | |
erikcorry
2016/01/22 10:10:10
Do we need this for non-sticky non-global regexps?
Yang
2016/01/25 07:38:41
Done.
| |
6578 } | 6639 } |
6579 | 6640 |
6580 if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone); | 6641 if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone); |
6581 data->node = node; | 6642 data->node = node; |
6582 Analysis analysis(isolate, ignore_case, is_one_byte); | 6643 Analysis analysis(isolate, ignore_case, is_one_byte); |
6583 analysis.EnsureAnalyzed(node); | 6644 analysis.EnsureAnalyzed(node); |
6584 if (analysis.has_failed()) { | 6645 if (analysis.has_failed()) { |
6585 const char* error_message = analysis.error_message(); | 6646 const char* error_message = analysis.error_message(); |
6586 return CompilationResult(isolate, error_message); | 6647 return CompilationResult(isolate, error_message); |
6587 } | 6648 } |
(...skipping 171 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
6759 | 6820 |
6760 | 6821 |
6761 void RegExpResultsCache::Clear(FixedArray* cache) { | 6822 void RegExpResultsCache::Clear(FixedArray* cache) { |
6762 for (int i = 0; i < kRegExpResultsCacheSize; i++) { | 6823 for (int i = 0; i < kRegExpResultsCacheSize; i++) { |
6763 cache->set(i, Smi::FromInt(0)); | 6824 cache->set(i, Smi::FromInt(0)); |
6764 } | 6825 } |
6765 } | 6826 } |
6766 | 6827 |
6767 } // namespace internal | 6828 } // namespace internal |
6768 } // namespace v8 | 6829 } // namespace v8 |
OLD | NEW |