OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/regexp/jsregexp.h" | 5 #include "src/regexp/jsregexp.h" |
6 | 6 |
7 #include "src/ast/ast.h" | 7 #include "src/ast/ast.h" |
8 #include "src/base/platform/platform.h" | 8 #include "src/base/platform/platform.h" |
9 #include "src/compilation-cache.h" | 9 #include "src/compilation-cache.h" |
10 #include "src/compiler.h" | 10 #include "src/compiler.h" |
(...skipping 5067 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5078 compiler, trail_surrogates, lead_surrogates, on_success, true); | 5078 compiler, trail_surrogates, lead_surrogates, on_success, true); |
5079 } else { | 5079 } else { |
5080 // Reading forward. Assert that reading backward, there is no lead | 5080 // Reading forward. Assert that reading backward, there is no lead |
5081 // surrogate, and then forward match the trail surrogate. | 5081 // surrogate, and then forward match the trail surrogate. |
5082 match = NegativeLookaroundAgainstReadDirectionAndMatch( | 5082 match = NegativeLookaroundAgainstReadDirectionAndMatch( |
5083 compiler, lead_surrogates, trail_surrogates, on_success, false); | 5083 compiler, lead_surrogates, trail_surrogates, on_success, false); |
5084 } | 5084 } |
5085 result->AddAlternative(GuardedAlternative(match)); | 5085 result->AddAlternative(GuardedAlternative(match)); |
5086 } | 5086 } |
5087 | 5087 |
5088 | 5088 RegExpNode* UnanchoredAdvance(RegExpCompiler* compiler, |
5089 void AddUnanchoredAdvance(RegExpCompiler* compiler, ChoiceNode* result, | 5089 RegExpNode* on_success) { |
5090 RegExpNode* on_success) { | |
5091 // This implements ES2015 21.2.5.2.3, AdvanceStringIndex. | 5090 // This implements ES2015 21.2.5.2.3, AdvanceStringIndex. |
5092 DCHECK(!compiler->read_backward()); | 5091 DCHECK(!compiler->read_backward()); |
5093 Zone* zone = compiler->zone(); | 5092 Zone* zone = compiler->zone(); |
5094 // Advancing can either consume a BMP character or a trail surrogate. | 5093 // Advance any character. If the character happens to be a lead surrogate and |
5095 ZoneList<CharacterRange>* bmp_and_trail = | 5094 // we advanced into the middle of a surrogate pair, it will work out, as |
5096 new (zone) ZoneList<CharacterRange>(2, zone); | 5095 // nothing will match from there. We will have to advance again, consuming |
5097 bmp_and_trail->Add(CharacterRange::Range(0, kLeadSurrogateStart - 1), zone); | 5096 // the associated trail surrogate. |
5098 bmp_and_trail->Add( | 5097 ZoneList<CharacterRange>* range = CharacterRange::List( |
5099 CharacterRange::Range(kLeadSurrogateEnd + 1, kNonBmpStart - 1), zone); | 5098 zone, CharacterRange::Range(0, String::kMaxUtf16CodeUnit)); |
5100 result->AddAlternative(GuardedAlternative(TextNode::CreateForCharacterRanges( | 5099 return TextNode::CreateForCharacterRanges(zone, range, false, on_success); |
5101 zone, bmp_and_trail, false, on_success))); | |
5102 | |
5103 // Or it could consume a lead optionally followed by a trail surrogate. | |
5104 ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List( | |
5105 zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd)); | |
5106 ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List( | |
5107 zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd)); | |
5108 ChoiceNode* optional_trail = new (zone) ChoiceNode(2, zone); | |
5109 optional_trail->AddAlternative( | |
5110 GuardedAlternative(TextNode::CreateForCharacterRanges( | |
5111 zone, trail_surrogates, false, on_success))); | |
5112 optional_trail->AddAlternative(GuardedAlternative(on_success)); | |
5113 RegExpNode* optional_pair = TextNode::CreateForCharacterRanges( | |
5114 zone, lead_surrogates, false, optional_trail); | |
5115 result->AddAlternative(GuardedAlternative(optional_pair)); | |
5116 } | 5100 } |
5117 | 5101 |
5118 | 5102 |
5119 void AddUnicodeCaseEquivalents(RegExpCompiler* compiler, | 5103 void AddUnicodeCaseEquivalents(RegExpCompiler* compiler, |
5120 ZoneList<CharacterRange>* ranges) { | 5104 ZoneList<CharacterRange>* ranges) { |
5121 #ifdef V8_I18N_SUPPORT | 5105 #ifdef V8_I18N_SUPPORT |
5122 // Use ICU to compute the case fold closure over the ranges. | 5106 // Use ICU to compute the case fold closure over the ranges. |
5123 DCHECK(compiler->unicode()); | 5107 DCHECK(compiler->unicode()); |
5124 DCHECK(compiler->ignore_case()); | 5108 DCHECK(compiler->ignore_case()); |
5125 USet* set = uset_openEmpty(); | 5109 USet* set = uset_openEmpty(); |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5167 if (is_negated()) { | 5151 if (is_negated()) { |
5168 ZoneList<CharacterRange>* negated = | 5152 ZoneList<CharacterRange>* negated = |
5169 new (zone) ZoneList<CharacterRange>(2, zone); | 5153 new (zone) ZoneList<CharacterRange>(2, zone); |
5170 CharacterRange::Negate(ranges, negated, zone); | 5154 CharacterRange::Negate(ranges, negated, zone); |
5171 ranges = negated; | 5155 ranges = negated; |
5172 } | 5156 } |
5173 if (ranges->length() == 0) { | 5157 if (ranges->length() == 0) { |
5174 // No matches possible. | 5158 // No matches possible. |
5175 return new (zone) EndNode(EndNode::BACKTRACK, zone); | 5159 return new (zone) EndNode(EndNode::BACKTRACK, zone); |
5176 } | 5160 } |
5177 ChoiceNode* result = new (zone) ChoiceNode(2, zone); | |
5178 if (standard_type() == '*') { | 5161 if (standard_type() == '*') { |
5179 AddUnanchoredAdvance(compiler, result, on_success); | 5162 return UnanchoredAdvance(compiler, on_success); |
5180 } else { | 5163 } else { |
| 5164 ChoiceNode* result = new (zone) ChoiceNode(2, zone); |
5181 UnicodeRangeSplitter splitter(zone, ranges); | 5165 UnicodeRangeSplitter splitter(zone, ranges); |
5182 AddBmpCharacters(compiler, result, on_success, &splitter); | 5166 AddBmpCharacters(compiler, result, on_success, &splitter); |
5183 AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter); | 5167 AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter); |
5184 AddLoneLeadSurrogates(compiler, result, on_success, &splitter); | 5168 AddLoneLeadSurrogates(compiler, result, on_success, &splitter); |
5185 AddLoneTrailSurrogates(compiler, result, on_success, &splitter); | 5169 AddLoneTrailSurrogates(compiler, result, on_success, &splitter); |
| 5170 return result; |
5186 } | 5171 } |
5187 return result; | |
5188 } else { | 5172 } else { |
5189 return new (zone) TextNode(this, compiler->read_backward(), on_success); | 5173 return new (zone) TextNode(this, compiler->read_backward(), on_success); |
5190 } | 5174 } |
5191 } | 5175 } |
5192 | 5176 |
5193 | 5177 |
5194 int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) { | 5178 int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) { |
5195 RegExpAtom* atom1 = (*a)->AsAtom(); | 5179 RegExpAtom* atom1 = (*a)->AsAtom(); |
5196 RegExpAtom* atom2 = (*b)->AsAtom(); | 5180 RegExpAtom* atom2 = (*b)->AsAtom(); |
5197 uc16 character1 = atom1->data().at(0); | 5181 uc16 character1 = atom1->data().at(0); |
(...skipping 1677 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6875 | 6859 |
6876 | 6860 |
6877 void RegExpResultsCache::Clear(FixedArray* cache) { | 6861 void RegExpResultsCache::Clear(FixedArray* cache) { |
6878 for (int i = 0; i < kRegExpResultsCacheSize; i++) { | 6862 for (int i = 0; i < kRegExpResultsCacheSize; i++) { |
6879 cache->set(i, Smi::FromInt(0)); | 6863 cache->set(i, Smi::FromInt(0)); |
6880 } | 6864 } |
6881 } | 6865 } |
6882 | 6866 |
6883 } // namespace internal | 6867 } // namespace internal |
6884 } // namespace v8 | 6868 } // namespace v8 |
OLD | NEW |