OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/regexp/jsregexp.h" | 5 #include "src/regexp/jsregexp.h" |
6 | 6 |
7 #include "src/ast/ast.h" | 7 #include "src/ast/ast.h" |
8 #include "src/base/platform/platform.h" | 8 #include "src/base/platform/platform.h" |
9 #include "src/compilation-cache.h" | 9 #include "src/compilation-cache.h" |
10 #include "src/compiler.h" | 10 #include "src/compiler.h" |
(...skipping 1958 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1969 } | 1969 } |
1970 } | 1970 } |
1971 | 1971 |
1972 | 1972 |
1973 // Gets a series of segment boundaries representing a character class. If the | 1973 // Gets a series of segment boundaries representing a character class. If the |
1974 // character is in the range between an even and an odd boundary (counting from | 1974 // character is in the range between an even and an odd boundary (counting from |
1975 // start_index) then go to even_label, otherwise go to odd_label. We already | 1975 // start_index) then go to even_label, otherwise go to odd_label. We already |
1976 // know that the character is in the range of min_char to max_char inclusive. | 1976 // know that the character is in the range of min_char to max_char inclusive. |
1977 // Either label can be NULL indicating backtracking. Either label can also be | 1977 // Either label can be NULL indicating backtracking. Either label can also be |
1978 // equal to the fall_through label. | 1978 // equal to the fall_through label. |
1979 static void GenerateBranches(RegExpMacroAssembler* masm, | 1979 static void GenerateBranches(RegExpMacroAssembler* masm, ZoneList<int>* ranges, |
1980 ZoneList<int>* ranges, | 1980 int start_index, int end_index, uc32 min_char, |
1981 int start_index, | 1981 uc32 max_char, Label* fall_through, |
1982 int end_index, | 1982 Label* even_label, Label* odd_label) { |
1983 uc16 min_char, | 1983 DCHECK_LE(min_char, String::kMaxUtf16CodeUnit); |
1984 uc16 max_char, | 1984 DCHECK_LE(max_char, String::kMaxUtf16CodeUnit); |
1985 Label* fall_through, | 1985 |
1986 Label* even_label, | |
1987 Label* odd_label) { | |
1988 int first = ranges->at(start_index); | 1986 int first = ranges->at(start_index); |
1989 int last = ranges->at(end_index) - 1; | 1987 int last = ranges->at(end_index) - 1; |
1990 | 1988 |
1991 DCHECK_LT(min_char, first); | 1989 DCHECK_LT(min_char, first); |
1992 | 1990 |
1993 // Just need to test if the character is before or on-or-after | 1991 // Just need to test if the character is before or on-or-after |
1994 // a particular character. | 1992 // a particular character. |
1995 if (start_index == end_index) { | 1993 if (start_index == end_index) { |
1996 EmitBoundaryTest(masm, first, fall_through, even_label, odd_label); | 1994 EmitBoundaryTest(masm, first, fall_through, even_label, odd_label); |
1997 return; | 1995 return; |
(...skipping 489 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2487 if (compiler->one_byte()) { | 2485 if (compiler->one_byte()) { |
2488 char_mask = String::kMaxOneByteCharCode; | 2486 char_mask = String::kMaxOneByteCharCode; |
2489 } else { | 2487 } else { |
2490 char_mask = String::kMaxUtf16CodeUnit; | 2488 char_mask = String::kMaxUtf16CodeUnit; |
2491 } | 2489 } |
2492 if ((mask & char_mask) == char_mask) need_mask = false; | 2490 if ((mask & char_mask) == char_mask) need_mask = false; |
2493 mask &= char_mask; | 2491 mask &= char_mask; |
2494 } else { | 2492 } else { |
2495 // For 2-character preloads in one-byte mode or 1-character preloads in | 2493 // For 2-character preloads in one-byte mode or 1-character preloads in |
2496 // two-byte mode we also use a 16 bit load with zero extend. | 2494 // two-byte mode we also use a 16 bit load with zero extend. |
| 2495 static const uint32_t kTwoByteMask = 0xffff; |
| 2496 static const uint32_t kFourByteMask = 0xffffffff; |
2497 if (details->characters() == 2 && compiler->one_byte()) { | 2497 if (details->characters() == 2 && compiler->one_byte()) { |
2498 if ((mask & 0xffff) == 0xffff) need_mask = false; | 2498 if ((mask & kTwoByteMask) == kTwoByteMask) need_mask = false; |
2499 } else if (details->characters() == 1 && !compiler->one_byte()) { | 2499 } else if (details->characters() == 1 && !compiler->one_byte()) { |
2500 if ((mask & 0xffff) == 0xffff) need_mask = false; | 2500 if ((mask & kTwoByteMask) == kTwoByteMask) need_mask = false; |
2501 } else { | 2501 } else { |
2502 if (mask == 0xffffffff) need_mask = false; | 2502 if (mask == kFourByteMask) need_mask = false; |
2503 } | 2503 } |
2504 } | 2504 } |
2505 | 2505 |
2506 if (fall_through_on_failure) { | 2506 if (fall_through_on_failure) { |
2507 if (need_mask) { | 2507 if (need_mask) { |
2508 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success); | 2508 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success); |
2509 } else { | 2509 } else { |
2510 assembler->CheckCharacter(value, on_possible_success); | 2510 assembler->CheckCharacter(value, on_possible_success); |
2511 } | 2511 } |
2512 } else { | 2512 } else { |
(...skipping 2292 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4805 } | 4805 } |
4806 for (int i = 0; i < length; i += 2) { | 4806 for (int i = 0; i < length; i += 2) { |
4807 if (special_class[i] != (range.to() + 1)) { | 4807 if (special_class[i] != (range.to() + 1)) { |
4808 return false; | 4808 return false; |
4809 } | 4809 } |
4810 range = ranges->at((i >> 1) + 1); | 4810 range = ranges->at((i >> 1) + 1); |
4811 if (special_class[i+1] != range.from()) { | 4811 if (special_class[i+1] != range.from()) { |
4812 return false; | 4812 return false; |
4813 } | 4813 } |
4814 } | 4814 } |
4815 if (range.to() != 0xffff) { | 4815 if (range.to() != String::kMaxCodePoint) { |
4816 return false; | 4816 return false; |
4817 } | 4817 } |
4818 return true; | 4818 return true; |
4819 } | 4819 } |
4820 | 4820 |
4821 | 4821 |
4822 static bool CompareRanges(ZoneList<CharacterRange>* ranges, | 4822 static bool CompareRanges(ZoneList<CharacterRange>* ranges, |
4823 const int* special_class, | 4823 const int* special_class, |
4824 int length) { | 4824 int length) { |
4825 length--; // Remove final marker. | 4825 length--; // Remove final marker. |
(...skipping 1050 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5876 } | 5876 } |
5877 | 5877 |
5878 | 5878 |
5879 void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone, | 5879 void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone, |
5880 ZoneList<CharacterRange>* ranges, | 5880 ZoneList<CharacterRange>* ranges, |
5881 bool is_one_byte) { | 5881 bool is_one_byte) { |
5882 int range_count = ranges->length(); | 5882 int range_count = ranges->length(); |
5883 for (int i = 0; i < range_count; i++) { | 5883 for (int i = 0; i < range_count; i++) { |
5884 CharacterRange range = ranges->at(i); | 5884 CharacterRange range = ranges->at(i); |
5885 uc32 bottom = range.from(); | 5885 uc32 bottom = range.from(); |
5886 uc32 top = range.to(); | 5886 if (bottom > String::kMaxUtf16CodeUnit) return; |
| 5887 uc32 top = Min(range.to(), String::kMaxUtf16CodeUnit); |
5887 // Nothing to be done for surrogates. | 5888 // Nothing to be done for surrogates. |
5888 if (bottom >= kLeadSurrogateStart && top <= kTrailSurrogateEnd) return; | 5889 if (bottom >= kLeadSurrogateStart && top <= kTrailSurrogateEnd) return; |
5889 if (is_one_byte && !RangeContainsLatin1Equivalents(range)) { | 5890 if (is_one_byte && !RangeContainsLatin1Equivalents(range)) { |
5890 if (bottom > String::kMaxOneByteCharCode) return; | 5891 if (bottom > String::kMaxOneByteCharCode) return; |
5891 if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode; | 5892 if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode; |
5892 } | 5893 } |
5893 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 5894 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
5894 if (top == bottom) { | 5895 if (top == bottom) { |
5895 // If this is a singleton we just expand the one character. | 5896 // If this is a singleton we just expand the one character. |
5896 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars); | 5897 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars); |
(...skipping 341 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6238 ins.set_value(Entry(current.to() + 1, | 6239 ins.set_value(Entry(current.to() + 1, |
6239 entry->to(), | 6240 entry->to(), |
6240 entry->out_set())); | 6241 entry->out_set())); |
6241 entry->set_to(current.to()); | 6242 entry->set_to(current.to()); |
6242 } | 6243 } |
6243 DCHECK(entry->to() <= current.to()); | 6244 DCHECK(entry->to() <= current.to()); |
6244 // The overlapping range is now completely contained by the range | 6245 // The overlapping range is now completely contained by the range |
6245 // we're adding so we can just update it and move the start point | 6246 // we're adding so we can just update it and move the start point |
6246 // of the range we're adding just past it. | 6247 // of the range we're adding just past it. |
6247 entry->AddValue(value, zone); | 6248 entry->AddValue(value, zone); |
6248 // Bail out if the last interval ended at 0xFFFF since otherwise | |
6249 // adding 1 will wrap around to 0. | |
6250 if (entry->to() == String::kMaxUtf16CodeUnit) | |
6251 break; | |
6252 DCHECK(entry->to() + 1 > current.from()); | 6249 DCHECK(entry->to() + 1 > current.from()); |
6253 current.set_from(entry->to() + 1); | 6250 current.set_from(entry->to() + 1); |
6254 } else { | 6251 } else { |
6255 // There is no overlap so we can just add the range | 6252 // There is no overlap so we can just add the range |
6256 ZoneSplayTree<Config>::Locator ins; | 6253 ZoneSplayTree<Config>::Locator ins; |
6257 bool inserted = tree()->Insert(current.from(), &ins); | 6254 bool inserted = tree()->Insert(current.from(), &ins); |
6258 DCHECK(inserted); | 6255 DCHECK(inserted); |
6259 USE(inserted); | 6256 USE(inserted); |
6260 ins.set_value(Entry(current.from(), | 6257 ins.set_value(Entry(current.from(), |
6261 current.to(), | 6258 current.to(), |
(...skipping 270 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6532 | 6529 |
6533 | 6530 |
6534 void DispatchTableConstructor::AddInverse(ZoneList<CharacterRange>* ranges) { | 6531 void DispatchTableConstructor::AddInverse(ZoneList<CharacterRange>* ranges) { |
6535 ranges->Sort(CompareRangeByFrom); | 6532 ranges->Sort(CompareRangeByFrom); |
6536 uc16 last = 0; | 6533 uc16 last = 0; |
6537 for (int i = 0; i < ranges->length(); i++) { | 6534 for (int i = 0; i < ranges->length(); i++) { |
6538 CharacterRange range = ranges->at(i); | 6535 CharacterRange range = ranges->at(i); |
6539 if (last < range.from()) | 6536 if (last < range.from()) |
6540 AddRange(CharacterRange::Range(last, range.from() - 1)); | 6537 AddRange(CharacterRange::Range(last, range.from() - 1)); |
6541 if (range.to() >= last) { | 6538 if (range.to() >= last) { |
6542 if (range.to() == String::kMaxUtf16CodeUnit) { | 6539 if (range.to() == String::kMaxCodePoint) { |
6543 return; | 6540 return; |
6544 } else { | 6541 } else { |
6545 last = range.to() + 1; | 6542 last = range.to() + 1; |
6546 } | 6543 } |
6547 } | 6544 } |
6548 } | 6545 } |
6549 AddRange(CharacterRange::Range(last, String::kMaxUtf16CodeUnit)); | 6546 AddRange(CharacterRange::Range(last, String::kMaxCodePoint)); |
6550 } | 6547 } |
6551 | 6548 |
6552 | 6549 |
6553 void DispatchTableConstructor::VisitText(TextNode* that) { | 6550 void DispatchTableConstructor::VisitText(TextNode* that) { |
6554 TextElement elm = that->elements()->at(0); | 6551 TextElement elm = that->elements()->at(0); |
6555 switch (elm.text_type()) { | 6552 switch (elm.text_type()) { |
6556 case TextElement::ATOM: { | 6553 case TextElement::ATOM: { |
6557 uc16 c = elm.atom()->data()[0]; | 6554 uc16 c = elm.atom()->data()[0]; |
6558 AddRange(CharacterRange::Range(c, c)); | 6555 AddRange(CharacterRange::Range(c, c)); |
6559 break; | 6556 break; |
(...skipping 307 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6867 | 6864 |
6868 | 6865 |
6869 void RegExpResultsCache::Clear(FixedArray* cache) { | 6866 void RegExpResultsCache::Clear(FixedArray* cache) { |
6870 for (int i = 0; i < kRegExpResultsCacheSize; i++) { | 6867 for (int i = 0; i < kRegExpResultsCacheSize; i++) { |
6871 cache->set(i, Smi::FromInt(0)); | 6868 cache->set(i, Smi::FromInt(0)); |
6872 } | 6869 } |
6873 } | 6870 } |
6874 | 6871 |
6875 } // namespace internal | 6872 } // namespace internal |
6876 } // namespace v8 | 6873 } // namespace v8 |
OLD | NEW |