OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/regexp/jsregexp.h" | 5 #include "src/regexp/jsregexp.h" |
6 | 6 |
7 #include "src/ast/ast.h" | 7 #include "src/ast/ast.h" |
8 #include "src/base/platform/platform.h" | 8 #include "src/base/platform/platform.h" |
9 #include "src/compilation-cache.h" | 9 #include "src/compilation-cache.h" |
10 #include "src/compiler.h" | 10 #include "src/compiler.h" |
(...skipping 3939 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3950 | 3950 |
3951 state->preload_is_current_ = | 3951 state->preload_is_current_ = |
3952 (current_trace->characters_preloaded() == state->preload_characters_); | 3952 (current_trace->characters_preloaded() == state->preload_characters_); |
3953 state->preload_has_checked_bounds_ = state->preload_is_current_; | 3953 state->preload_has_checked_bounds_ = state->preload_is_current_; |
3954 } | 3954 } |
3955 | 3955 |
3956 | 3956 |
3957 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 3957 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
3958 int choice_count = alternatives_->length(); | 3958 int choice_count = alternatives_->length(); |
3959 | 3959 |
| 3960 if (choice_count == 1 && alternatives_->at(0).guards() == NULL) { |
| 3961 alternatives_->at(0).node()->Emit(compiler, trace); |
| 3962 return; |
| 3963 } |
| 3964 |
3960 AssertGuardsMentionRegisters(trace); | 3965 AssertGuardsMentionRegisters(trace); |
3961 | 3966 |
3962 LimitResult limit_result = LimitVersions(compiler, trace); | 3967 LimitResult limit_result = LimitVersions(compiler, trace); |
3963 if (limit_result == DONE) return; | 3968 if (limit_result == DONE) return; |
3964 DCHECK(limit_result == CONTINUE); | 3969 DCHECK(limit_result == CONTINUE); |
3965 | 3970 |
3966 // For loop nodes we already flushed (see LoopChoiceNode::Emit), but for | 3971 // For loop nodes we already flushed (see LoopChoiceNode::Emit), but for |
3967 // other choice nodes we only flush if we are out of code size budget. | 3972 // other choice nodes we only flush if we are out of code size budget. |
3968 if (trace->flush_budget() == 0 && trace->actions() != NULL) { | 3973 if (trace->flush_budget() == 0 && trace->actions() != NULL) { |
3969 trace->Flush(compiler, this); | 3974 trace->Flush(compiler, this); |
(...skipping 1063 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5033 } | 5038 } |
5034 | 5039 |
5035 | 5040 |
5036 void AddLoneLeadSurrogates(RegExpCompiler* compiler, ChoiceNode* result, | 5041 void AddLoneLeadSurrogates(RegExpCompiler* compiler, ChoiceNode* result, |
5037 RegExpNode* on_success, | 5042 RegExpNode* on_success, |
5038 UnicodeRangeSplitter* splitter) { | 5043 UnicodeRangeSplitter* splitter) { |
5039 ZoneList<CharacterRange>* lead_surrogates = splitter->lead_surrogates(); | 5044 ZoneList<CharacterRange>* lead_surrogates = splitter->lead_surrogates(); |
5040 if (lead_surrogates == nullptr) return; | 5045 if (lead_surrogates == nullptr) return; |
5041 Zone* zone = compiler->zone(); | 5046 Zone* zone = compiler->zone(); |
5042 // E.g. \ud801 becomes \ud801(?![\udc00-\udfff]). | 5047 // E.g. \ud801 becomes \ud801(?![\udc00-\udfff]). |
5043 ZoneList<CharacterRange>* trail_surrogates = | 5048 ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List( |
5044 new (zone) ZoneList<CharacterRange>(1, zone); | 5049 zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd)); |
5045 trail_surrogates->Add( | |
5046 CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd), zone); | |
5047 | 5050 |
5048 RegExpNode* match = | 5051 RegExpNode* match; |
5049 compiler->read_backward() | 5052 if (compiler->read_backward()) { |
5050 // Reading backward. Assert that reading forward, there is no trail | 5053 // Reading backward. Assert that reading forward, there is no trail |
5051 // surrogate, and then backward match the lead surrogate. | 5054 // surrogate, and then backward match the lead surrogate. |
5052 ? NegativeLookaroundAgainstReadDirectionAndMatch( | 5055 match = NegativeLookaroundAgainstReadDirectionAndMatch( |
5053 compiler, trail_surrogates, lead_surrogates, on_success, true) | 5056 compiler, trail_surrogates, lead_surrogates, on_success, true); |
5054 // Reading forward. Forwrad match the lead surrogate and assert that | 5057 } else { |
5055 // no | 5058 // Reading forward. Forward match the lead surrogate and assert that |
5056 // trail surrogate follows. | 5059 // no trail surrogate follows. |
5057 : MatchAndNegativeLookaroundInReadDirection( | 5060 match = MatchAndNegativeLookaroundInReadDirection( |
5058 compiler, lead_surrogates, trail_surrogates, on_success, false); | 5061 compiler, lead_surrogates, trail_surrogates, on_success, false); |
| 5062 } |
5059 result->AddAlternative(GuardedAlternative(match)); | 5063 result->AddAlternative(GuardedAlternative(match)); |
5060 } | 5064 } |
5061 | 5065 |
5062 | 5066 |
5063 void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result, | 5067 void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result, |
5064 RegExpNode* on_success, | 5068 RegExpNode* on_success, |
5065 UnicodeRangeSplitter* splitter) { | 5069 UnicodeRangeSplitter* splitter) { |
5066 ZoneList<CharacterRange>* trail_surrogates = splitter->trail_surrogates(); | 5070 ZoneList<CharacterRange>* trail_surrogates = splitter->trail_surrogates(); |
5067 if (trail_surrogates == nullptr) return; | 5071 if (trail_surrogates == nullptr) return; |
5068 Zone* zone = compiler->zone(); | 5072 Zone* zone = compiler->zone(); |
5069 // E.g. \udc01 becomes (?<![\ud800-\udbff])\udc01 | 5073 // E.g. \udc01 becomes (?<![\ud800-\udbff])\udc01 |
5070 ZoneList<CharacterRange>* lead_surrogates = | 5074 ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List( |
5071 new (zone) ZoneList<CharacterRange>(1, zone); | 5075 zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd)); |
5072 lead_surrogates->Add( | |
5073 CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd), zone); | |
5074 | 5076 |
5075 RegExpNode* match = | 5077 RegExpNode* match; |
5076 compiler->read_backward() | 5078 if (compiler->read_backward()) { |
5077 // Reading backward. Backward match the trail surrogate and assert | 5079 // Reading backward. Backward match the trail surrogate and assert that no |
5078 // that no lead surrogate precedes it. | 5080 // lead surrogate precedes it. |
5079 ? MatchAndNegativeLookaroundInReadDirection( | 5081 match = MatchAndNegativeLookaroundInReadDirection( |
5080 compiler, trail_surrogates, lead_surrogates, on_success, true) | 5082 compiler, trail_surrogates, lead_surrogates, on_success, true); |
5081 // Reading forward. Assert that reading backward, there is no lead | 5083 } else { |
5082 // surrogate, and then forward match the trail surrogate. | 5084 // Reading forward. Assert that reading backward, there is no lead |
5083 : NegativeLookaroundAgainstReadDirectionAndMatch( | 5085 // surrogate, and then forward match the trail surrogate. |
5084 compiler, lead_surrogates, trail_surrogates, on_success, false); | 5086 match = NegativeLookaroundAgainstReadDirectionAndMatch( |
| 5087 compiler, lead_surrogates, trail_surrogates, on_success, false); |
| 5088 } |
5085 result->AddAlternative(GuardedAlternative(match)); | 5089 result->AddAlternative(GuardedAlternative(match)); |
5086 } | 5090 } |
5087 | 5091 |
5088 | 5092 |
| 5093 void AddUnanchoredAdvance(RegExpCompiler* compiler, ChoiceNode* result, |
| 5094 RegExpNode* on_success) { |
| 5095 // This implements ES2015 21.2.5.2.3, AdvanceStringIndex. |
| 5096 DCHECK(!compiler->read_backward()); |
| 5097 Zone* zone = compiler->zone(); |
| 5098 // Advancing can either consume a BMP character or a trail surrogate. |
| 5099 ZoneList<CharacterRange>* bmp_and_trail = |
| 5100 new (zone) ZoneList<CharacterRange>(2, zone); |
| 5101 bmp_and_trail->Add(CharacterRange::Range(0, kLeadSurrogateStart - 1), zone); |
| 5102 bmp_and_trail->Add( |
| 5103 CharacterRange::Range(kLeadSurrogateEnd + 1, kNonBmpStart - 1), zone); |
| 5104 result->AddAlternative(GuardedAlternative(TextNode::CreateForCharacterRanges( |
| 5105 zone, bmp_and_trail, false, on_success))); |
| 5106 |
| 5107 // Or it could consume a lead optionally followed by a trail surrogate. |
| 5108 ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List( |
| 5109 zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd)); |
| 5110 ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List( |
| 5111 zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd)); |
| 5112 ChoiceNode* optional_trail = new (zone) ChoiceNode(2, zone); |
| 5113 optional_trail->AddAlternative( |
| 5114 GuardedAlternative(TextNode::CreateForCharacterRanges( |
| 5115 zone, trail_surrogates, false, on_success))); |
| 5116 optional_trail->AddAlternative(GuardedAlternative(on_success)); |
| 5117 RegExpNode* optional_pair = TextNode::CreateForCharacterRanges( |
| 5118 zone, lead_surrogates, false, optional_trail); |
| 5119 result->AddAlternative(GuardedAlternative(optional_pair)); |
| 5120 } |
| 5121 |
| 5122 |
5089 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, | 5123 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, |
5090 RegExpNode* on_success) { | 5124 RegExpNode* on_success) { |
5091 set_.Canonicalize(); | 5125 set_.Canonicalize(); |
5092 Zone* zone = compiler->zone(); | 5126 Zone* zone = compiler->zone(); |
5093 ZoneList<CharacterRange>* ranges = this->ranges(zone); | 5127 ZoneList<CharacterRange>* ranges = this->ranges(zone); |
5094 if (compiler->unicode() && !compiler->one_byte()) { | 5128 if (compiler->unicode() && !compiler->one_byte()) { |
5095 if (is_negated()) { | 5129 if (is_negated()) { |
5096 ZoneList<CharacterRange>* negated = | 5130 ZoneList<CharacterRange>* negated = |
5097 new (zone) ZoneList<CharacterRange>(2, zone); | 5131 new (zone) ZoneList<CharacterRange>(2, zone); |
5098 CharacterRange::Negate(ranges, negated, zone); | 5132 CharacterRange::Negate(ranges, negated, zone); |
5099 ranges = negated; | 5133 ranges = negated; |
5100 } | 5134 } |
5101 if (ranges->length() == 0) { | 5135 if (ranges->length() == 0) { |
5102 // No matches possible. | 5136 // No matches possible. |
5103 return new (zone) EndNode(EndNode::BACKTRACK, zone); | 5137 return new (zone) EndNode(EndNode::BACKTRACK, zone); |
5104 } | 5138 } |
5105 UnicodeRangeSplitter splitter(zone, ranges); | 5139 ChoiceNode* result = new (zone) ChoiceNode(2, zone); |
5106 ChoiceNode* result = new (compiler->zone()) ChoiceNode(2, compiler->zone()); | 5140 if (standard_type() == '*') { |
5107 AddBmpCharacters(compiler, result, on_success, &splitter); | 5141 AddUnanchoredAdvance(compiler, result, on_success); |
5108 AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter); | 5142 } else { |
5109 AddLoneLeadSurrogates(compiler, result, on_success, &splitter); | 5143 UnicodeRangeSplitter splitter(zone, ranges); |
5110 AddLoneTrailSurrogates(compiler, result, on_success, &splitter); | 5144 AddBmpCharacters(compiler, result, on_success, &splitter); |
| 5145 AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter); |
| 5146 AddLoneLeadSurrogates(compiler, result, on_success, &splitter); |
| 5147 AddLoneTrailSurrogates(compiler, result, on_success, &splitter); |
| 5148 } |
5111 return result; | 5149 return result; |
5112 } else { | 5150 } else { |
5113 return new (zone) TextNode(this, compiler->read_backward(), on_success); | 5151 return new (zone) TextNode(this, compiler->read_backward(), on_success); |
5114 } | 5152 } |
5115 } | 5153 } |
5116 | 5154 |
5117 | 5155 |
5118 int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) { | 5156 int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) { |
5119 RegExpAtom* atom1 = (*a)->AsAtom(); | 5157 RegExpAtom* atom1 = (*a)->AsAtom(); |
5120 RegExpAtom* atom2 = (*b)->AsAtom(); | 5158 RegExpAtom* atom2 = (*b)->AsAtom(); |
(...skipping 1385 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6506 } | 6544 } |
6507 } | 6545 } |
6508 | 6546 |
6509 | 6547 |
6510 void DispatchTableConstructor::VisitAction(ActionNode* that) { | 6548 void DispatchTableConstructor::VisitAction(ActionNode* that) { |
6511 RegExpNode* target = that->on_success(); | 6549 RegExpNode* target = that->on_success(); |
6512 target->Accept(this); | 6550 target->Accept(this); |
6513 } | 6551 } |
6514 | 6552 |
6515 | 6553 |
| 6554 RegExpNode* OptionallyStepBackToLeadSurrogate(RegExpCompiler* compiler, |
| 6555 RegExpNode* on_success) { |
| 6556 // If the regexp matching starts within a surrogate pair, step back |
| 6557 // to the lead surrogate and start matching from there. |
| 6558 DCHECK(!compiler->read_backward()); |
| 6559 Zone* zone = compiler->zone(); |
| 6560 ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List( |
| 6561 zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd)); |
| 6562 ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List( |
| 6563 zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd)); |
| 6564 |
| 6565 ChoiceNode* optional_step_back = new (zone) ChoiceNode(2, zone); |
| 6566 |
| 6567 int stack_register = compiler->UnicodeLookaroundStackRegister(); |
| 6568 int position_register = compiler->UnicodeLookaroundPositionRegister(); |
| 6569 RegExpNode* step_back = TextNode::CreateForCharacterRanges( |
| 6570 zone, lead_surrogates, true, on_success); |
| 6571 RegExpLookaround::Builder builder(true, step_back, stack_register, |
| 6572 position_register); |
| 6573 RegExpNode* match_trail = TextNode::CreateForCharacterRanges( |
| 6574 zone, trail_surrogates, false, builder.on_match_success()); |
| 6575 |
| 6576 optional_step_back->AddAlternative( |
| 6577 GuardedAlternative(builder.ForMatch(match_trail))); |
| 6578 optional_step_back->AddAlternative(GuardedAlternative(on_success)); |
| 6579 |
| 6580 return optional_step_back; |
| 6581 } |
| 6582 |
| 6583 |
6516 RegExpEngine::CompilationResult RegExpEngine::Compile( | 6584 RegExpEngine::CompilationResult RegExpEngine::Compile( |
6517 Isolate* isolate, Zone* zone, RegExpCompileData* data, | 6585 Isolate* isolate, Zone* zone, RegExpCompileData* data, |
6518 JSRegExp::Flags flags, Handle<String> pattern, | 6586 JSRegExp::Flags flags, Handle<String> pattern, |
6519 Handle<String> sample_subject, bool is_one_byte) { | 6587 Handle<String> sample_subject, bool is_one_byte) { |
6520 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { | 6588 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { |
6521 return IrregexpRegExpTooBig(isolate); | 6589 return IrregexpRegExpTooBig(isolate); |
6522 } | 6590 } |
6523 bool ignore_case = flags & JSRegExp::kIgnoreCase; | 6591 bool ignore_case = flags & JSRegExp::kIgnoreCase; |
6524 bool is_sticky = flags & JSRegExp::kSticky; | 6592 bool is_sticky = flags & JSRegExp::kSticky; |
6525 bool is_global = flags & JSRegExp::kGlobal; | 6593 bool is_global = flags & JSRegExp::kGlobal; |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6568 node = loop_node; | 6636 node = loop_node; |
6569 } | 6637 } |
6570 } | 6638 } |
6571 if (is_one_byte) { | 6639 if (is_one_byte) { |
6572 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); | 6640 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); |
6573 // Do it again to propagate the new nodes to places where they were not | 6641 // Do it again to propagate the new nodes to places where they were not |
6574 // put because they had not been calculated yet. | 6642 // put because they had not been calculated yet. |
6575 if (node != NULL) { | 6643 if (node != NULL) { |
6576 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); | 6644 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); |
6577 } | 6645 } |
| 6646 } else if (compiler.unicode() && (is_global || is_sticky)) { |
| 6647 node = OptionallyStepBackToLeadSurrogate(&compiler, node); |
6578 } | 6648 } |
6579 | 6649 |
6580 if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone); | 6650 if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone); |
6581 data->node = node; | 6651 data->node = node; |
6582 Analysis analysis(isolate, ignore_case, is_one_byte); | 6652 Analysis analysis(isolate, ignore_case, is_one_byte); |
6583 analysis.EnsureAnalyzed(node); | 6653 analysis.EnsureAnalyzed(node); |
6584 if (analysis.has_failed()) { | 6654 if (analysis.has_failed()) { |
6585 const char* error_message = analysis.error_message(); | 6655 const char* error_message = analysis.error_message(); |
6586 return CompilationResult(isolate, error_message); | 6656 return CompilationResult(isolate, error_message); |
6587 } | 6657 } |
(...skipping 171 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6759 | 6829 |
6760 | 6830 |
6761 void RegExpResultsCache::Clear(FixedArray* cache) { | 6831 void RegExpResultsCache::Clear(FixedArray* cache) { |
6762 for (int i = 0; i < kRegExpResultsCacheSize; i++) { | 6832 for (int i = 0; i < kRegExpResultsCacheSize; i++) { |
6763 cache->set(i, Smi::FromInt(0)); | 6833 cache->set(i, Smi::FromInt(0)); |
6764 } | 6834 } |
6765 } | 6835 } |
6766 | 6836 |
6767 } // namespace internal | 6837 } // namespace internal |
6768 } // namespace v8 | 6838 } // namespace v8 |
OLD | NEW |