Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(81)

Side by Side Diff: src/regexp/jsregexp.cc

Issue 1676293003: [regexp] simplify unanchored advance for unicode regexps. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: shorten test Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | test/mjsunit/harmony/unicode-regexp-unanchored-advance.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/regexp/jsregexp.h" 5 #include "src/regexp/jsregexp.h"
6 6
7 #include "src/ast/ast.h" 7 #include "src/ast/ast.h"
8 #include "src/base/platform/platform.h" 8 #include "src/base/platform/platform.h"
9 #include "src/compilation-cache.h" 9 #include "src/compilation-cache.h"
10 #include "src/compiler.h" 10 #include "src/compiler.h"
(...skipping 5067 matching lines...) Expand 10 before | Expand all | Expand 10 after
5078 compiler, trail_surrogates, lead_surrogates, on_success, true); 5078 compiler, trail_surrogates, lead_surrogates, on_success, true);
5079 } else { 5079 } else {
5080 // Reading forward. Assert that reading backward, there is no lead 5080 // Reading forward. Assert that reading backward, there is no lead
5081 // surrogate, and then forward match the trail surrogate. 5081 // surrogate, and then forward match the trail surrogate.
5082 match = NegativeLookaroundAgainstReadDirectionAndMatch( 5082 match = NegativeLookaroundAgainstReadDirectionAndMatch(
5083 compiler, lead_surrogates, trail_surrogates, on_success, false); 5083 compiler, lead_surrogates, trail_surrogates, on_success, false);
5084 } 5084 }
5085 result->AddAlternative(GuardedAlternative(match)); 5085 result->AddAlternative(GuardedAlternative(match));
5086 } 5086 }
5087 5087
5088 5088 RegExpNode* UnanchoredAdvance(RegExpCompiler* compiler,
5089 void AddUnanchoredAdvance(RegExpCompiler* compiler, ChoiceNode* result, 5089 RegExpNode* on_success) {
5090 RegExpNode* on_success) {
5091 // This implements ES2015 21.2.5.2.3, AdvanceStringIndex. 5090 // This implements ES2015 21.2.5.2.3, AdvanceStringIndex.
5092 DCHECK(!compiler->read_backward()); 5091 DCHECK(!compiler->read_backward());
5093 Zone* zone = compiler->zone(); 5092 Zone* zone = compiler->zone();
5094 // Advancing can either consume a BMP character or a trail surrogate. 5093 // Advance any character. If the character happens to be a lead surrogate and
5095 ZoneList<CharacterRange>* bmp_and_trail = 5094 // we advanced into the middle of a surrogate pair, it will work out, as
5096 new (zone) ZoneList<CharacterRange>(2, zone); 5095 // nothing will match from there. We will have to advance again, consuming
5097 bmp_and_trail->Add(CharacterRange::Range(0, kLeadSurrogateStart - 1), zone); 5096 // the associated trail surrogate.
5098 bmp_and_trail->Add( 5097 ZoneList<CharacterRange>* range = CharacterRange::List(
5099 CharacterRange::Range(kLeadSurrogateEnd + 1, kNonBmpStart - 1), zone); 5098 zone, CharacterRange::Range(0, String::kMaxUtf16CodeUnit));
5100 result->AddAlternative(GuardedAlternative(TextNode::CreateForCharacterRanges( 5099 return TextNode::CreateForCharacterRanges(zone, range, false, on_success);
5101 zone, bmp_and_trail, false, on_success)));
5102
5103 // Or it could consume a lead optionally followed by a trail surrogate.
5104 ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List(
5105 zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd));
5106 ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List(
5107 zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd));
5108 ChoiceNode* optional_trail = new (zone) ChoiceNode(2, zone);
5109 optional_trail->AddAlternative(
5110 GuardedAlternative(TextNode::CreateForCharacterRanges(
5111 zone, trail_surrogates, false, on_success)));
5112 optional_trail->AddAlternative(GuardedAlternative(on_success));
5113 RegExpNode* optional_pair = TextNode::CreateForCharacterRanges(
5114 zone, lead_surrogates, false, optional_trail);
5115 result->AddAlternative(GuardedAlternative(optional_pair));
5116 } 5100 }
5117 5101
5118 5102
5119 void AddUnicodeCaseEquivalents(RegExpCompiler* compiler, 5103 void AddUnicodeCaseEquivalents(RegExpCompiler* compiler,
5120 ZoneList<CharacterRange>* ranges) { 5104 ZoneList<CharacterRange>* ranges) {
5121 #ifdef V8_I18N_SUPPORT 5105 #ifdef V8_I18N_SUPPORT
5122 // Use ICU to compute the case fold closure over the ranges. 5106 // Use ICU to compute the case fold closure over the ranges.
5123 DCHECK(compiler->unicode()); 5107 DCHECK(compiler->unicode());
5124 DCHECK(compiler->ignore_case()); 5108 DCHECK(compiler->ignore_case());
5125 USet* set = uset_openEmpty(); 5109 USet* set = uset_openEmpty();
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
5167 if (is_negated()) { 5151 if (is_negated()) {
5168 ZoneList<CharacterRange>* negated = 5152 ZoneList<CharacterRange>* negated =
5169 new (zone) ZoneList<CharacterRange>(2, zone); 5153 new (zone) ZoneList<CharacterRange>(2, zone);
5170 CharacterRange::Negate(ranges, negated, zone); 5154 CharacterRange::Negate(ranges, negated, zone);
5171 ranges = negated; 5155 ranges = negated;
5172 } 5156 }
5173 if (ranges->length() == 0) { 5157 if (ranges->length() == 0) {
5174 // No matches possible. 5158 // No matches possible.
5175 return new (zone) EndNode(EndNode::BACKTRACK, zone); 5159 return new (zone) EndNode(EndNode::BACKTRACK, zone);
5176 } 5160 }
5177 ChoiceNode* result = new (zone) ChoiceNode(2, zone);
5178 if (standard_type() == '*') { 5161 if (standard_type() == '*') {
5179 AddUnanchoredAdvance(compiler, result, on_success); 5162 return UnanchoredAdvance(compiler, on_success);
5180 } else { 5163 } else {
5164 ChoiceNode* result = new (zone) ChoiceNode(2, zone);
5181 UnicodeRangeSplitter splitter(zone, ranges); 5165 UnicodeRangeSplitter splitter(zone, ranges);
5182 AddBmpCharacters(compiler, result, on_success, &splitter); 5166 AddBmpCharacters(compiler, result, on_success, &splitter);
5183 AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter); 5167 AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter);
5184 AddLoneLeadSurrogates(compiler, result, on_success, &splitter); 5168 AddLoneLeadSurrogates(compiler, result, on_success, &splitter);
5185 AddLoneTrailSurrogates(compiler, result, on_success, &splitter); 5169 AddLoneTrailSurrogates(compiler, result, on_success, &splitter);
5170 return result;
5186 } 5171 }
5187 return result;
5188 } else { 5172 } else {
5189 return new (zone) TextNode(this, compiler->read_backward(), on_success); 5173 return new (zone) TextNode(this, compiler->read_backward(), on_success);
5190 } 5174 }
5191 } 5175 }
5192 5176
5193 5177
5194 int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) { 5178 int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) {
5195 RegExpAtom* atom1 = (*a)->AsAtom(); 5179 RegExpAtom* atom1 = (*a)->AsAtom();
5196 RegExpAtom* atom2 = (*b)->AsAtom(); 5180 RegExpAtom* atom2 = (*b)->AsAtom();
5197 uc16 character1 = atom1->data().at(0); 5181 uc16 character1 = atom1->data().at(0);
(...skipping 1677 matching lines...) Expand 10 before | Expand all | Expand 10 after
6875 6859
6876 6860
6877 void RegExpResultsCache::Clear(FixedArray* cache) { 6861 void RegExpResultsCache::Clear(FixedArray* cache) {
6878 for (int i = 0; i < kRegExpResultsCacheSize; i++) { 6862 for (int i = 0; i < kRegExpResultsCacheSize; i++) {
6879 cache->set(i, Smi::FromInt(0)); 6863 cache->set(i, Smi::FromInt(0));
6880 } 6864 }
6881 } 6865 }
6882 6866
6883 } // namespace internal 6867 } // namespace internal
6884 } // namespace v8 6868 } // namespace v8
OLDNEW
« no previous file with comments | « no previous file | test/mjsunit/harmony/unicode-regexp-unanchored-advance.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698