| OLD | NEW |
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/regexp/jsregexp.h" | 5 #include "src/regexp/jsregexp.h" |
| 6 | 6 |
| 7 #include "src/ast/ast.h" | 7 #include "src/ast/ast.h" |
| 8 #include "src/base/platform/platform.h" | 8 #include "src/base/platform/platform.h" |
| 9 #include "src/compilation-cache.h" | 9 #include "src/compilation-cache.h" |
| 10 #include "src/compiler.h" | 10 #include "src/compiler.h" |
| (...skipping 5067 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5078 compiler, trail_surrogates, lead_surrogates, on_success, true); | 5078 compiler, trail_surrogates, lead_surrogates, on_success, true); |
| 5079 } else { | 5079 } else { |
| 5080 // Reading forward. Assert that reading backward, there is no lead | 5080 // Reading forward. Assert that reading backward, there is no lead |
| 5081 // surrogate, and then forward match the trail surrogate. | 5081 // surrogate, and then forward match the trail surrogate. |
| 5082 match = NegativeLookaroundAgainstReadDirectionAndMatch( | 5082 match = NegativeLookaroundAgainstReadDirectionAndMatch( |
| 5083 compiler, lead_surrogates, trail_surrogates, on_success, false); | 5083 compiler, lead_surrogates, trail_surrogates, on_success, false); |
| 5084 } | 5084 } |
| 5085 result->AddAlternative(GuardedAlternative(match)); | 5085 result->AddAlternative(GuardedAlternative(match)); |
| 5086 } | 5086 } |
| 5087 | 5087 |
| 5088 | 5088 RegExpNode* UnanchoredAdvance(RegExpCompiler* compiler, |
| 5089 void AddUnanchoredAdvance(RegExpCompiler* compiler, ChoiceNode* result, | 5089 RegExpNode* on_success) { |
| 5090 RegExpNode* on_success) { | |
| 5091 // This implements ES2015 21.2.5.2.3, AdvanceStringIndex. | 5090 // This implements ES2015 21.2.5.2.3, AdvanceStringIndex. |
| 5092 DCHECK(!compiler->read_backward()); | 5091 DCHECK(!compiler->read_backward()); |
| 5093 Zone* zone = compiler->zone(); | 5092 Zone* zone = compiler->zone(); |
| 5094 // Advancing can either consume a BMP character or a trail surrogate. | 5093 // Advance any character. If the character happens to be a lead surrogate and |
| 5095 ZoneList<CharacterRange>* bmp_and_trail = | 5094 // we advanced into the middle of a surrogate pair, it will work out, as |
| 5096 new (zone) ZoneList<CharacterRange>(2, zone); | 5095 // nothing will match from there. We will have to advance again, consuming |
| 5097 bmp_and_trail->Add(CharacterRange::Range(0, kLeadSurrogateStart - 1), zone); | 5096 // the associated trail surrogate. |
| 5098 bmp_and_trail->Add( | 5097 ZoneList<CharacterRange>* range = CharacterRange::List( |
| 5099 CharacterRange::Range(kLeadSurrogateEnd + 1, kNonBmpStart - 1), zone); | 5098 zone, CharacterRange::Range(0, String::kMaxUtf16CodeUnit)); |
| 5100 result->AddAlternative(GuardedAlternative(TextNode::CreateForCharacterRanges( | 5099 return TextNode::CreateForCharacterRanges(zone, range, false, on_success); |
| 5101 zone, bmp_and_trail, false, on_success))); | |
| 5102 | |
| 5103 // Or it could consume a lead optionally followed by a trail surrogate. | |
| 5104 ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List( | |
| 5105 zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd)); | |
| 5106 ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List( | |
| 5107 zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd)); | |
| 5108 ChoiceNode* optional_trail = new (zone) ChoiceNode(2, zone); | |
| 5109 optional_trail->AddAlternative( | |
| 5110 GuardedAlternative(TextNode::CreateForCharacterRanges( | |
| 5111 zone, trail_surrogates, false, on_success))); | |
| 5112 optional_trail->AddAlternative(GuardedAlternative(on_success)); | |
| 5113 RegExpNode* optional_pair = TextNode::CreateForCharacterRanges( | |
| 5114 zone, lead_surrogates, false, optional_trail); | |
| 5115 result->AddAlternative(GuardedAlternative(optional_pair)); | |
| 5116 } | 5100 } |
| 5117 | 5101 |
| 5118 | 5102 |
| 5119 void AddUnicodeCaseEquivalents(RegExpCompiler* compiler, | 5103 void AddUnicodeCaseEquivalents(RegExpCompiler* compiler, |
| 5120 ZoneList<CharacterRange>* ranges) { | 5104 ZoneList<CharacterRange>* ranges) { |
| 5121 #ifdef V8_I18N_SUPPORT | 5105 #ifdef V8_I18N_SUPPORT |
| 5122 // Use ICU to compute the case fold closure over the ranges. | 5106 // Use ICU to compute the case fold closure over the ranges. |
| 5123 DCHECK(compiler->unicode()); | 5107 DCHECK(compiler->unicode()); |
| 5124 DCHECK(compiler->ignore_case()); | 5108 DCHECK(compiler->ignore_case()); |
| 5125 USet* set = uset_openEmpty(); | 5109 USet* set = uset_openEmpty(); |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5167 if (is_negated()) { | 5151 if (is_negated()) { |
| 5168 ZoneList<CharacterRange>* negated = | 5152 ZoneList<CharacterRange>* negated = |
| 5169 new (zone) ZoneList<CharacterRange>(2, zone); | 5153 new (zone) ZoneList<CharacterRange>(2, zone); |
| 5170 CharacterRange::Negate(ranges, negated, zone); | 5154 CharacterRange::Negate(ranges, negated, zone); |
| 5171 ranges = negated; | 5155 ranges = negated; |
| 5172 } | 5156 } |
| 5173 if (ranges->length() == 0) { | 5157 if (ranges->length() == 0) { |
| 5174 // No matches possible. | 5158 // No matches possible. |
| 5175 return new (zone) EndNode(EndNode::BACKTRACK, zone); | 5159 return new (zone) EndNode(EndNode::BACKTRACK, zone); |
| 5176 } | 5160 } |
| 5177 ChoiceNode* result = new (zone) ChoiceNode(2, zone); | |
| 5178 if (standard_type() == '*') { | 5161 if (standard_type() == '*') { |
| 5179 AddUnanchoredAdvance(compiler, result, on_success); | 5162 return UnanchoredAdvance(compiler, on_success); |
| 5180 } else { | 5163 } else { |
| 5164 ChoiceNode* result = new (zone) ChoiceNode(2, zone); |
| 5181 UnicodeRangeSplitter splitter(zone, ranges); | 5165 UnicodeRangeSplitter splitter(zone, ranges); |
| 5182 AddBmpCharacters(compiler, result, on_success, &splitter); | 5166 AddBmpCharacters(compiler, result, on_success, &splitter); |
| 5183 AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter); | 5167 AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter); |
| 5184 AddLoneLeadSurrogates(compiler, result, on_success, &splitter); | 5168 AddLoneLeadSurrogates(compiler, result, on_success, &splitter); |
| 5185 AddLoneTrailSurrogates(compiler, result, on_success, &splitter); | 5169 AddLoneTrailSurrogates(compiler, result, on_success, &splitter); |
| 5170 return result; |
| 5186 } | 5171 } |
| 5187 return result; | |
| 5188 } else { | 5172 } else { |
| 5189 return new (zone) TextNode(this, compiler->read_backward(), on_success); | 5173 return new (zone) TextNode(this, compiler->read_backward(), on_success); |
| 5190 } | 5174 } |
| 5191 } | 5175 } |
| 5192 | 5176 |
| 5193 | 5177 |
| 5194 int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) { | 5178 int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) { |
| 5195 RegExpAtom* atom1 = (*a)->AsAtom(); | 5179 RegExpAtom* atom1 = (*a)->AsAtom(); |
| 5196 RegExpAtom* atom2 = (*b)->AsAtom(); | 5180 RegExpAtom* atom2 = (*b)->AsAtom(); |
| 5197 uc16 character1 = atom1->data().at(0); | 5181 uc16 character1 = atom1->data().at(0); |
| (...skipping 1677 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6875 | 6859 |
| 6876 | 6860 |
| 6877 void RegExpResultsCache::Clear(FixedArray* cache) { | 6861 void RegExpResultsCache::Clear(FixedArray* cache) { |
| 6878 for (int i = 0; i < kRegExpResultsCacheSize; i++) { | 6862 for (int i = 0; i < kRegExpResultsCacheSize; i++) { |
| 6879 cache->set(i, Smi::FromInt(0)); | 6863 cache->set(i, Smi::FromInt(0)); |
| 6880 } | 6864 } |
| 6881 } | 6865 } |
| 6882 | 6866 |
| 6883 } // namespace internal | 6867 } // namespace internal |
| 6884 } // namespace v8 | 6868 } // namespace v8 |
| OLD | NEW |