Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(327)

Side by Side Diff: src/regexp/jsregexp.cc

Issue 1820823003: [regexp] Fix issues with character range limit. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/regexp/jsregexp.h" 5 #include "src/regexp/jsregexp.h"
6 6
7 #include "src/ast/ast.h" 7 #include "src/ast/ast.h"
8 #include "src/base/platform/platform.h" 8 #include "src/base/platform/platform.h"
9 #include "src/compilation-cache.h" 9 #include "src/compilation-cache.h"
10 #include "src/compiler.h" 10 #include "src/compiler.h"
(...skipping 1958 matching lines...) Expand 10 before | Expand all | Expand 10 after
1969 } 1969 }
1970 } 1970 }
1971 1971
1972 1972
1973 // Gets a series of segment boundaries representing a character class. If the 1973 // Gets a series of segment boundaries representing a character class. If the
1974 // character is in the range between an even and an odd boundary (counting from 1974 // character is in the range between an even and an odd boundary (counting from
1975 // start_index) then go to even_label, otherwise go to odd_label. We already 1975 // start_index) then go to even_label, otherwise go to odd_label. We already
1976 // know that the character is in the range of min_char to max_char inclusive. 1976 // know that the character is in the range of min_char to max_char inclusive.
1977 // Either label can be NULL indicating backtracking. Either label can also be 1977 // Either label can be NULL indicating backtracking. Either label can also be
1978 // equal to the fall_through label. 1978 // equal to the fall_through label.
1979 static void GenerateBranches(RegExpMacroAssembler* masm, 1979 static void GenerateBranches(RegExpMacroAssembler* masm, ZoneList<int>* ranges,
1980 ZoneList<int>* ranges, 1980 int start_index, int end_index, uc32 min_char,
1981 int start_index, 1981 uc32 max_char, Label* fall_through,
1982 int end_index, 1982 Label* even_label, Label* odd_label) {
1983 uc16 min_char, 1983 DCHECK_LE(min_char, String::kMaxUtf16CodeUnit);
1984 uc16 max_char, 1984 DCHECK_LE(max_char, String::kMaxUtf16CodeUnit);
1985 Label* fall_through, 1985
1986 Label* even_label,
1987 Label* odd_label) {
1988 int first = ranges->at(start_index); 1986 int first = ranges->at(start_index);
1989 int last = ranges->at(end_index) - 1; 1987 int last = ranges->at(end_index) - 1;
1990 1988
1991 DCHECK_LT(min_char, first); 1989 DCHECK_LT(min_char, first);
1992 1990
1993 // Just need to test if the character is before or on-or-after 1991 // Just need to test if the character is before or on-or-after
1994 // a particular character. 1992 // a particular character.
1995 if (start_index == end_index) { 1993 if (start_index == end_index) {
1996 EmitBoundaryTest(masm, first, fall_through, even_label, odd_label); 1994 EmitBoundaryTest(masm, first, fall_through, even_label, odd_label);
1997 return; 1995 return;
(...skipping 489 matching lines...) Expand 10 before | Expand all | Expand 10 after
2487 if (compiler->one_byte()) { 2485 if (compiler->one_byte()) {
2488 char_mask = String::kMaxOneByteCharCode; 2486 char_mask = String::kMaxOneByteCharCode;
2489 } else { 2487 } else {
2490 char_mask = String::kMaxUtf16CodeUnit; 2488 char_mask = String::kMaxUtf16CodeUnit;
2491 } 2489 }
2492 if ((mask & char_mask) == char_mask) need_mask = false; 2490 if ((mask & char_mask) == char_mask) need_mask = false;
2493 mask &= char_mask; 2491 mask &= char_mask;
2494 } else { 2492 } else {
2495 // For 2-character preloads in one-byte mode or 1-character preloads in 2493 // For 2-character preloads in one-byte mode or 1-character preloads in
2496 // two-byte mode we also use a 16 bit load with zero extend. 2494 // two-byte mode we also use a 16 bit load with zero extend.
2495 static const uint32_t kTwoByteMask = 0xffff;
2496 static const uint32_t kFourByteMask = 0xffffffff;
2497 if (details->characters() == 2 && compiler->one_byte()) { 2497 if (details->characters() == 2 && compiler->one_byte()) {
2498 if ((mask & 0xffff) == 0xffff) need_mask = false; 2498 if ((mask & kTwoByteMask) == kTwoByteMask) need_mask = false;
2499 } else if (details->characters() == 1 && !compiler->one_byte()) { 2499 } else if (details->characters() == 1 && !compiler->one_byte()) {
2500 if ((mask & 0xffff) == 0xffff) need_mask = false; 2500 if ((mask & kTwoByteMask) == kTwoByteMask) need_mask = false;
2501 } else { 2501 } else {
2502 if (mask == 0xffffffff) need_mask = false; 2502 if (mask == kFourByteMask) need_mask = false;
2503 } 2503 }
2504 } 2504 }
2505 2505
2506 if (fall_through_on_failure) { 2506 if (fall_through_on_failure) {
2507 if (need_mask) { 2507 if (need_mask) {
2508 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success); 2508 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);
2509 } else { 2509 } else {
2510 assembler->CheckCharacter(value, on_possible_success); 2510 assembler->CheckCharacter(value, on_possible_success);
2511 } 2511 }
2512 } else { 2512 } else {
(...skipping 2292 matching lines...) Expand 10 before | Expand all | Expand 10 after
4805 } 4805 }
4806 for (int i = 0; i < length; i += 2) { 4806 for (int i = 0; i < length; i += 2) {
4807 if (special_class[i] != (range.to() + 1)) { 4807 if (special_class[i] != (range.to() + 1)) {
4808 return false; 4808 return false;
4809 } 4809 }
4810 range = ranges->at((i >> 1) + 1); 4810 range = ranges->at((i >> 1) + 1);
4811 if (special_class[i+1] != range.from()) { 4811 if (special_class[i+1] != range.from()) {
4812 return false; 4812 return false;
4813 } 4813 }
4814 } 4814 }
4815 if (range.to() != 0xffff) { 4815 if (range.to() != String::kMaxCodePoint) {
4816 return false; 4816 return false;
4817 } 4817 }
4818 return true; 4818 return true;
4819 } 4819 }
4820 4820
4821 4821
4822 static bool CompareRanges(ZoneList<CharacterRange>* ranges, 4822 static bool CompareRanges(ZoneList<CharacterRange>* ranges,
4823 const int* special_class, 4823 const int* special_class,
4824 int length) { 4824 int length) {
4825 length--; // Remove final marker. 4825 length--; // Remove final marker.
(...skipping 1050 matching lines...) Expand 10 before | Expand all | Expand 10 after
5876 } 5876 }
5877 5877
5878 5878
5879 void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone, 5879 void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone,
5880 ZoneList<CharacterRange>* ranges, 5880 ZoneList<CharacterRange>* ranges,
5881 bool is_one_byte) { 5881 bool is_one_byte) {
5882 int range_count = ranges->length(); 5882 int range_count = ranges->length();
5883 for (int i = 0; i < range_count; i++) { 5883 for (int i = 0; i < range_count; i++) {
5884 CharacterRange range = ranges->at(i); 5884 CharacterRange range = ranges->at(i);
5885 uc32 bottom = range.from(); 5885 uc32 bottom = range.from();
5886 uc32 top = range.to(); 5886 if (bottom > String::kMaxUtf16CodeUnit) return;
5887 uc32 top = Min(range.to(), String::kMaxUtf16CodeUnit);
5887 // Nothing to be done for surrogates. 5888 // Nothing to be done for surrogates.
5888 if (bottom >= kLeadSurrogateStart && top <= kTrailSurrogateEnd) return; 5889 if (bottom >= kLeadSurrogateStart && top <= kTrailSurrogateEnd) return;
5889 if (is_one_byte && !RangeContainsLatin1Equivalents(range)) { 5890 if (is_one_byte && !RangeContainsLatin1Equivalents(range)) {
5890 if (bottom > String::kMaxOneByteCharCode) return; 5891 if (bottom > String::kMaxOneByteCharCode) return;
5891 if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode; 5892 if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;
5892 } 5893 }
5893 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 5894 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
5894 if (top == bottom) { 5895 if (top == bottom) {
5895 // If this is a singleton we just expand the one character. 5896 // If this is a singleton we just expand the one character.
5896 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars); 5897 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);
(...skipping 341 matching lines...) Expand 10 before | Expand all | Expand 10 after
6238 ins.set_value(Entry(current.to() + 1, 6239 ins.set_value(Entry(current.to() + 1,
6239 entry->to(), 6240 entry->to(),
6240 entry->out_set())); 6241 entry->out_set()));
6241 entry->set_to(current.to()); 6242 entry->set_to(current.to());
6242 } 6243 }
6243 DCHECK(entry->to() <= current.to()); 6244 DCHECK(entry->to() <= current.to());
6244 // The overlapping range is now completely contained by the range 6245 // The overlapping range is now completely contained by the range
6245 // we're adding so we can just update it and move the start point 6246 // we're adding so we can just update it and move the start point
6246 // of the range we're adding just past it. 6247 // of the range we're adding just past it.
6247 entry->AddValue(value, zone); 6248 entry->AddValue(value, zone);
6248 // Bail out if the last interval ended at 0xFFFF since otherwise
6249 // adding 1 will wrap around to 0.
6250 if (entry->to() == String::kMaxUtf16CodeUnit)
6251 break;
6252 DCHECK(entry->to() + 1 > current.from()); 6249 DCHECK(entry->to() + 1 > current.from());
6253 current.set_from(entry->to() + 1); 6250 current.set_from(entry->to() + 1);
6254 } else { 6251 } else {
6255 // There is no overlap so we can just add the range 6252 // There is no overlap so we can just add the range
6256 ZoneSplayTree<Config>::Locator ins; 6253 ZoneSplayTree<Config>::Locator ins;
6257 bool inserted = tree()->Insert(current.from(), &ins); 6254 bool inserted = tree()->Insert(current.from(), &ins);
6258 DCHECK(inserted); 6255 DCHECK(inserted);
6259 USE(inserted); 6256 USE(inserted);
6260 ins.set_value(Entry(current.from(), 6257 ins.set_value(Entry(current.from(),
6261 current.to(), 6258 current.to(),
(...skipping 270 matching lines...) Expand 10 before | Expand all | Expand 10 after
6532 6529
6533 6530
6534 void DispatchTableConstructor::AddInverse(ZoneList<CharacterRange>* ranges) { 6531 void DispatchTableConstructor::AddInverse(ZoneList<CharacterRange>* ranges) {
6535 ranges->Sort(CompareRangeByFrom); 6532 ranges->Sort(CompareRangeByFrom);
6536 uc16 last = 0; 6533 uc16 last = 0;
6537 for (int i = 0; i < ranges->length(); i++) { 6534 for (int i = 0; i < ranges->length(); i++) {
6538 CharacterRange range = ranges->at(i); 6535 CharacterRange range = ranges->at(i);
6539 if (last < range.from()) 6536 if (last < range.from())
6540 AddRange(CharacterRange::Range(last, range.from() - 1)); 6537 AddRange(CharacterRange::Range(last, range.from() - 1));
6541 if (range.to() >= last) { 6538 if (range.to() >= last) {
6542 if (range.to() == String::kMaxUtf16CodeUnit) { 6539 if (range.to() == String::kMaxCodePoint) {
6543 return; 6540 return;
6544 } else { 6541 } else {
6545 last = range.to() + 1; 6542 last = range.to() + 1;
6546 } 6543 }
6547 } 6544 }
6548 } 6545 }
6549 AddRange(CharacterRange::Range(last, String::kMaxUtf16CodeUnit)); 6546 AddRange(CharacterRange::Range(last, String::kMaxCodePoint));
6550 } 6547 }
6551 6548
6552 6549
6553 void DispatchTableConstructor::VisitText(TextNode* that) { 6550 void DispatchTableConstructor::VisitText(TextNode* that) {
6554 TextElement elm = that->elements()->at(0); 6551 TextElement elm = that->elements()->at(0);
6555 switch (elm.text_type()) { 6552 switch (elm.text_type()) {
6556 case TextElement::ATOM: { 6553 case TextElement::ATOM: {
6557 uc16 c = elm.atom()->data()[0]; 6554 uc16 c = elm.atom()->data()[0];
6558 AddRange(CharacterRange::Range(c, c)); 6555 AddRange(CharacterRange::Range(c, c));
6559 break; 6556 break;
(...skipping 307 matching lines...) Expand 10 before | Expand all | Expand 10 after
6867 6864
6868 6865
6869 void RegExpResultsCache::Clear(FixedArray* cache) { 6866 void RegExpResultsCache::Clear(FixedArray* cache) {
6870 for (int i = 0; i < kRegExpResultsCacheSize; i++) { 6867 for (int i = 0; i < kRegExpResultsCacheSize; i++) {
6871 cache->set(i, Smi::FromInt(0)); 6868 cache->set(i, Smi::FromInt(0));
6872 } 6869 }
6873 } 6870 }
6874 6871
6875 } // namespace internal 6872 } // namespace internal
6876 } // namespace v8 6873 } // namespace v8
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698