src/regexp/jsregexp.cc - Issue 1820823003: [regexp] Fix issues with character range limit.

Side by Side Diff: src/regexp/jsregexp.cc

Issue 1820823003: [regexp] Fix issues with character range limit. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Created 4 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2012 the V8 project authors. All rights reserved.	1 // Copyright 2012 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "src/regexp/jsregexp.h"	5 #include "src/regexp/jsregexp.h"

6	6

7 #include "src/ast/ast.h"	7 #include "src/ast/ast.h"

8 #include "src/base/platform/platform.h"	8 #include "src/base/platform/platform.h"

9 #include "src/compilation-cache.h"	9 #include "src/compilation-cache.h"

10 #include "src/compiler.h"	10 #include "src/compiler.h"

(...skipping 1958 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1969 }	1969 }

1970 }	1970 }

1971	1971

1972	1972

1973 // Gets a series of segment boundaries representing a character class. If the	1973 // Gets a series of segment boundaries representing a character class. If the

1974 // character is in the range between an even and an odd boundary (counting from	1974 // character is in the range between an even and an odd boundary (counting from

1975 // start_index) then go to even_label, otherwise go to odd_label. We already	1975 // start_index) then go to even_label, otherwise go to odd_label. We already

1976 // know that the character is in the range of min_char to max_char inclusive.	1976 // know that the character is in the range of min_char to max_char inclusive.

1977 // Either label can be NULL indicating backtracking. Either label can also be	1977 // Either label can be NULL indicating backtracking. Either label can also be

1978 // equal to the fall_through label.	1978 // equal to the fall_through label.

1979 static void GenerateBranches(RegExpMacroAssembler* masm,	1979 static void GenerateBranches(RegExpMacroAssembler* masm, ZoneList<int>* ranges,

1980 ZoneList<int>* ranges,	1980 int start_index, int end_index, uc32 min_char,

1981 int start_index,	1981 uc32 max_char, Label* fall_through,

1982 int end_index,	1982 Label* even_label, Label* odd_label) {

1983 uc16 min_char,	1983 DCHECK_LE(min_char, String::kMaxUtf16CodeUnit);

1984 uc16 max_char,	1984 DCHECK_LE(max_char, String::kMaxUtf16CodeUnit);

1985 Label* fall_through,	1985

1986 Label* even_label,

1987 Label* odd_label) {

1988 int first = ranges->at(start_index);	1986 int first = ranges->at(start_index);

1989 int last = ranges->at(end_index) - 1;	1987 int last = ranges->at(end_index) - 1;

1990	1988

1991 DCHECK_LT(min_char, first);	1989 DCHECK_LT(min_char, first);

1992	1990

1993 // Just need to test if the character is before or on-or-after	1991 // Just need to test if the character is before or on-or-after

1994 // a particular character.	1992 // a particular character.

1995 if (start_index == end_index) {	1993 if (start_index == end_index) {

1996 EmitBoundaryTest(masm, first, fall_through, even_label, odd_label);	1994 EmitBoundaryTest(masm, first, fall_through, even_label, odd_label);

1997 return;	1995 return;

(...skipping 489 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2487 if (compiler->one_byte()) {	2485 if (compiler->one_byte()) {

2488 char_mask = String::kMaxOneByteCharCode;	2486 char_mask = String::kMaxOneByteCharCode;

2489 } else {	2487 } else {

2490 char_mask = String::kMaxUtf16CodeUnit;	2488 char_mask = String::kMaxUtf16CodeUnit;

2491 }	2489 }

2492 if ((mask & char_mask) == char_mask) need_mask = false;	2490 if ((mask & char_mask) == char_mask) need_mask = false;

2493 mask &= char_mask;	2491 mask &= char_mask;

2494 } else {	2492 } else {

2495 // For 2-character preloads in one-byte mode or 1-character preloads in	2493 // For 2-character preloads in one-byte mode or 1-character preloads in

2496 // two-byte mode we also use a 16 bit load with zero extend.	2494 // two-byte mode we also use a 16 bit load with zero extend.

	2495 static const uint32_t kTwoByteMask = 0xffff;

	2496 static const uint32_t kFourByteMask = 0xffffffff;

2497 if (details->characters() == 2 && compiler->one_byte()) {	2497 if (details->characters() == 2 && compiler->one_byte()) {

2498 if ((mask & 0xffff) == 0xffff) need_mask = false;	2498 if ((mask & kTwoByteMask) == kTwoByteMask) need_mask = false;

2499 } else if (details->characters() == 1 && !compiler->one_byte()) {	2499 } else if (details->characters() == 1 && !compiler->one_byte()) {

2500 if ((mask & 0xffff) == 0xffff) need_mask = false;	2500 if ((mask & kTwoByteMask) == kTwoByteMask) need_mask = false;

2501 } else {	2501 } else {

2502 if (mask == 0xffffffff) need_mask = false;	2502 if (mask == kFourByteMask) need_mask = false;

2503 }	2503 }

2504 }	2504 }

2505	2505

2506 if (fall_through_on_failure) {	2506 if (fall_through_on_failure) {

2507 if (need_mask) {	2507 if (need_mask) {

2508 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);	2508 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);

2509 } else {	2509 } else {

2510 assembler->CheckCharacter(value, on_possible_success);	2510 assembler->CheckCharacter(value, on_possible_success);

2511 }	2511 }

2512 } else {	2512 } else {

(...skipping 2292 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4805 }	4805 }

4806 for (int i = 0; i < length; i += 2) {	4806 for (int i = 0; i < length; i += 2) {

4807 if (special_class[i] != (range.to() + 1)) {	4807 if (special_class[i] != (range.to() + 1)) {

4808 return false;	4808 return false;

4809 }	4809 }

4810 range = ranges->at((i >> 1) + 1);	4810 range = ranges->at((i >> 1) + 1);

4811 if (special_class[i+1] != range.from()) {	4811 if (special_class[i+1] != range.from()) {

4812 return false;	4812 return false;

4813 }	4813 }

4814 }	4814 }

4815 if (range.to() != 0xffff) {	4815 if (range.to() != String::kMaxCodePoint) {

4816 return false;	4816 return false;

4817 }	4817 }

4818 return true;	4818 return true;

4819 }	4819 }

4820	4820

4821	4821

4822 static bool CompareRanges(ZoneList<CharacterRange>* ranges,	4822 static bool CompareRanges(ZoneList<CharacterRange>* ranges,

4823 const int* special_class,	4823 const int* special_class,

4824 int length) {	4824 int length) {

4825 length--; // Remove final marker.	4825 length--; // Remove final marker.

(...skipping 1050 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5876 }	5876 }

5877	5877

5878	5878

5879 void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone,	5879 void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone,

5880 ZoneList<CharacterRange>* ranges,	5880 ZoneList<CharacterRange>* ranges,

5881 bool is_one_byte) {	5881 bool is_one_byte) {

5882 int range_count = ranges->length();	5882 int range_count = ranges->length();

5883 for (int i = 0; i < range_count; i++) {	5883 for (int i = 0; i < range_count; i++) {

5884 CharacterRange range = ranges->at(i);	5884 CharacterRange range = ranges->at(i);

5885 uc32 bottom = range.from();	5885 uc32 bottom = range.from();

5886 uc32 top = range.to();	5886 if (bottom > String::kMaxUtf16CodeUnit) return;

	5887 uc32 top = Min(range.to(), String::kMaxUtf16CodeUnit);

5887 // Nothing to be done for surrogates.	5888 // Nothing to be done for surrogates.

5888 if (bottom >= kLeadSurrogateStart && top <= kTrailSurrogateEnd) return;	5889 if (bottom >= kLeadSurrogateStart && top <= kTrailSurrogateEnd) return;

5889 if (is_one_byte && !RangeContainsLatin1Equivalents(range)) {	5890 if (is_one_byte && !RangeContainsLatin1Equivalents(range)) {

5890 if (bottom > String::kMaxOneByteCharCode) return;	5891 if (bottom > String::kMaxOneByteCharCode) return;

5891 if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;	5892 if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;

5892 }	5893 }

5893 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];	5894 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

5894 if (top == bottom) {	5895 if (top == bottom) {

5895 // If this is a singleton we just expand the one character.	5896 // If this is a singleton we just expand the one character.

5896 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);	5897 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);

(...skipping 341 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6238 ins.set_value(Entry(current.to() + 1,	6239 ins.set_value(Entry(current.to() + 1,

6239 entry->to(),	6240 entry->to(),

6240 entry->out_set()));	6241 entry->out_set()));

6241 entry->set_to(current.to());	6242 entry->set_to(current.to());

6242 }	6243 }

6243 DCHECK(entry->to() <= current.to());	6244 DCHECK(entry->to() <= current.to());

6244 // The overlapping range is now completely contained by the range	6245 // The overlapping range is now completely contained by the range

6245 // we're adding so we can just update it and move the start point	6246 // we're adding so we can just update it and move the start point

6246 // of the range we're adding just past it.	6247 // of the range we're adding just past it.

6247 entry->AddValue(value, zone);	6248 entry->AddValue(value, zone);

6248 // Bail out if the last interval ended at 0xFFFF since otherwise

6249 // adding 1 will wrap around to 0.

6250 if (entry->to() == String::kMaxUtf16CodeUnit)

6251 break;

6252 DCHECK(entry->to() + 1 > current.from());	6249 DCHECK(entry->to() + 1 > current.from());

6253 current.set_from(entry->to() + 1);	6250 current.set_from(entry->to() + 1);

6254 } else {	6251 } else {

6255 // There is no overlap so we can just add the range	6252 // There is no overlap so we can just add the range

6256 ZoneSplayTree<Config>::Locator ins;	6253 ZoneSplayTree<Config>::Locator ins;

6257 bool inserted = tree()->Insert(current.from(), &ins);	6254 bool inserted = tree()->Insert(current.from(), &ins);

6258 DCHECK(inserted);	6255 DCHECK(inserted);

6259 USE(inserted);	6256 USE(inserted);

6260 ins.set_value(Entry(current.from(),	6257 ins.set_value(Entry(current.from(),

6261 current.to(),	6258 current.to(),

(...skipping 270 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6532	6529

6533	6530

6534 void DispatchTableConstructor::AddInverse(ZoneList<CharacterRange>* ranges) {	6531 void DispatchTableConstructor::AddInverse(ZoneList<CharacterRange>* ranges) {

6535 ranges->Sort(CompareRangeByFrom);	6532 ranges->Sort(CompareRangeByFrom);

6536 uc16 last = 0;	6533 uc16 last = 0;

6537 for (int i = 0; i < ranges->length(); i++) {	6534 for (int i = 0; i < ranges->length(); i++) {

6538 CharacterRange range = ranges->at(i);	6535 CharacterRange range = ranges->at(i);

6539 if (last < range.from())	6536 if (last < range.from())

6540 AddRange(CharacterRange::Range(last, range.from() - 1));	6537 AddRange(CharacterRange::Range(last, range.from() - 1));

6541 if (range.to() >= last) {	6538 if (range.to() >= last) {

6542 if (range.to() == String::kMaxUtf16CodeUnit) {	6539 if (range.to() == String::kMaxCodePoint) {

6543 return;	6540 return;

6544 } else {	6541 } else {

6545 last = range.to() + 1;	6542 last = range.to() + 1;

6546 }	6543 }

6547 }	6544 }

6548 }	6545 }

6549 AddRange(CharacterRange::Range(last, String::kMaxUtf16CodeUnit));	6546 AddRange(CharacterRange::Range(last, String::kMaxCodePoint));

6550 }	6547 }

6551	6548

6552	6549

6553 void DispatchTableConstructor::VisitText(TextNode* that) {	6550 void DispatchTableConstructor::VisitText(TextNode* that) {

6554 TextElement elm = that->elements()->at(0);	6551 TextElement elm = that->elements()->at(0);

6555 switch (elm.text_type()) {	6552 switch (elm.text_type()) {

6556 case TextElement::ATOM: {	6553 case TextElement::ATOM: {

6557 uc16 c = elm.atom()->data()[0];	6554 uc16 c = elm.atom()->data()[0];

6558 AddRange(CharacterRange::Range(c, c));	6555 AddRange(CharacterRange::Range(c, c));

6559 break;	6556 break;

(...skipping 307 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6867	6864

6868	6865

6869 void RegExpResultsCache::Clear(FixedArray* cache) {	6866 void RegExpResultsCache::Clear(FixedArray* cache) {

6870 for (int i = 0; i < kRegExpResultsCacheSize; i++) {	6867 for (int i = 0; i < kRegExpResultsCacheSize; i++) {

6871 cache->set(i, Smi::FromInt(0));	6868 cache->set(i, Smi::FromInt(0));

6872 }	6869 }

6873 }	6870 }

6874	6871

6875 } // namespace internal	6872 } // namespace internal

6876 } // namespace v8	6873 } // namespace v8

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »