Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 1663 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1674 bool ascii_subject, | 1674 bool ascii_subject, |
| 1675 unibrow::uchar* letters) { | 1675 unibrow::uchar* letters) { |
| 1676 int length = | 1676 int length = |
| 1677 isolate->jsregexp_uncanonicalize()->get(character, '\0', letters); | 1677 isolate->jsregexp_uncanonicalize()->get(character, '\0', letters); |
| 1678 // Unibrow returns 0 or 1 for characters where case independence is | 1678 // Unibrow returns 0 or 1 for characters where case independence is |
| 1679 // trivial. | 1679 // trivial. |
| 1680 if (length == 0) { | 1680 if (length == 0) { |
| 1681 letters[0] = character; | 1681 letters[0] = character; |
| 1682 length = 1; | 1682 length = 1; |
| 1683 } | 1683 } |
| 1684 if (!ascii_subject || character <= String::kMaxAsciiCharCode) { | 1684 if (!ascii_subject || character <= String::kMaxOneByteCharCode) { |
| 1685 return length; | 1685 return length; |
| 1686 } | 1686 } |
| 1687 // The standard requires that non-ASCII characters cannot have ASCII | 1687 // The standard requires that non-ASCII characters cannot have ASCII |
| 1688 // character codes in their equivalence class. | 1688 // character codes in their equivalence class. |
| 1689 return 0; | 1689 return 0; |
| 1690 } | 1690 } |
| 1691 | 1691 |
| 1692 | 1692 |
| 1693 static inline bool EmitSimpleCharacter(Isolate* isolate, | 1693 static inline bool EmitSimpleCharacter(Isolate* isolate, |
| 1694 RegExpCompiler* compiler, | 1694 RegExpCompiler* compiler, |
| (...skipping 30 matching lines...) Expand all Loading... | |
| 1725 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 1725 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 1726 int length = GetCaseIndependentLetters(isolate, c, ascii, chars); | 1726 int length = GetCaseIndependentLetters(isolate, c, ascii, chars); |
| 1727 if (length < 1) { | 1727 if (length < 1) { |
| 1728 // This can't match. Must be an ASCII subject and a non-ASCII character. | 1728 // This can't match. Must be an ASCII subject and a non-ASCII character. |
| 1729 // We do not need to do anything since the ASCII pass already handled this. | 1729 // We do not need to do anything since the ASCII pass already handled this. |
| 1730 return false; // Bounds not checked. | 1730 return false; // Bounds not checked. |
| 1731 } | 1731 } |
| 1732 bool checked = false; | 1732 bool checked = false; |
| 1733 // We handle the length > 1 case in a later pass. | 1733 // We handle the length > 1 case in a later pass. |
| 1734 if (length == 1) { | 1734 if (length == 1) { |
| 1735 if (ascii && c > String::kMaxAsciiCharCodeU) { | 1735 if (ascii && c > String::kMaxOneByteCharCodeU) { |
| 1736 // Can't match - see above. | 1736 // Can't match - see above. |
| 1737 return false; // Bounds not checked. | 1737 return false; // Bounds not checked. |
| 1738 } | 1738 } |
| 1739 if (!preloaded) { | 1739 if (!preloaded) { |
| 1740 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); | 1740 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); |
| 1741 checked = check; | 1741 checked = check; |
| 1742 } | 1742 } |
| 1743 macro_assembler->CheckNotCharacter(c, on_failure); | 1743 macro_assembler->CheckNotCharacter(c, on_failure); |
| 1744 } | 1744 } |
| 1745 return checked; | 1745 return checked; |
| 1746 } | 1746 } |
| 1747 | 1747 |
| 1748 | 1748 |
| 1749 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, | 1749 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, |
| 1750 bool ascii, | 1750 bool ascii, |
| 1751 uc16 c1, | 1751 uc16 c1, |
| 1752 uc16 c2, | 1752 uc16 c2, |
| 1753 Label* on_failure) { | 1753 Label* on_failure) { |
| 1754 uc16 char_mask; | 1754 uc16 char_mask; |
| 1755 if (ascii) { | 1755 if (ascii) { |
| 1756 char_mask = String::kMaxAsciiCharCode; | 1756 char_mask = String::kMaxOneByteCharCode; |
| 1757 } else { | 1757 } else { |
| 1758 char_mask = String::kMaxUtf16CodeUnit; | 1758 char_mask = String::kMaxUtf16CodeUnit; |
| 1759 } | 1759 } |
| 1760 uc16 exor = c1 ^ c2; | 1760 uc16 exor = c1 ^ c2; |
| 1761 // Check whether exor has only one bit set. | 1761 // Check whether exor has only one bit set. |
| 1762 if (((exor - 1) & exor) == 0) { | 1762 if (((exor - 1) & exor) == 0) { |
| 1763 // If c1 and c2 differ only by one bit. | 1763 // If c1 and c2 differ only by one bit. |
| 1764 // Ecma262UnCanonicalize always gives the highest number last. | 1764 // Ecma262UnCanonicalize always gives the highest number last. |
| 1765 ASSERT(c2 > c1); | 1765 ASSERT(c2 > c1); |
| 1766 uc16 mask = char_mask ^ exor; | 1766 uc16 mask = char_mask ^ exor; |
| (...skipping 233 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2000 // encoding space can be quickly tested with a table lookup, so we don't | 2000 // encoding space can be quickly tested with a table lookup, so we don't |
| 2001 // wish to do binary chop search at a smaller granularity than that. A | 2001 // wish to do binary chop search at a smaller granularity than that. A |
| 2002 // 128-character space can take up a lot of space in the ranges array if, | 2002 // 128-character space can take up a lot of space in the ranges array if, |
| 2003 // for example, we only want to match every second character (eg. the lower | 2003 // for example, we only want to match every second character (eg. the lower |
| 2004 // case characters on some Unicode pages). | 2004 // case characters on some Unicode pages). |
| 2005 int binary_chop_index = (end_index + start_index) / 2; | 2005 int binary_chop_index = (end_index + start_index) / 2; |
| 2006 // The first test ensures that we get to the code that handles the ASCII | 2006 // The first test ensures that we get to the code that handles the ASCII |
| 2007 // range with a single not-taken branch, speeding up this important | 2007 // range with a single not-taken branch, speeding up this important |
| 2008 // character range (even non-ASCII charset-based text has spaces and | 2008 // character range (even non-ASCII charset-based text has spaces and |
| 2009 // punctuation). | 2009 // punctuation). |
| 2010 if (*border - 1 > String::kMaxAsciiCharCode && // ASCII case. | 2010 if (*border - 1 > String::kMaxOneByteCharCode && // ASCII case. |
|
Yang
2013/01/07 16:15:07
Change comment.
| |
| 2011 end_index - start_index > (*new_start_index - start_index) * 2 && | 2011 end_index - start_index > (*new_start_index - start_index) * 2 && |
| 2012 last - first > kSize * 2 && | 2012 last - first > kSize * 2 && |
| 2013 binary_chop_index > *new_start_index && | 2013 binary_chop_index > *new_start_index && |
| 2014 ranges->at(binary_chop_index) >= first + 2 * kSize) { | 2014 ranges->at(binary_chop_index) >= first + 2 * kSize) { |
| 2015 int scan_forward_for_section_border = binary_chop_index;; | 2015 int scan_forward_for_section_border = binary_chop_index;; |
| 2016 int new_border = (ranges->at(binary_chop_index) | kMask) + 1; | 2016 int new_border = (ranges->at(binary_chop_index) | kMask) + 1; |
| 2017 | 2017 |
| 2018 while (scan_forward_for_section_border < end_index) { | 2018 while (scan_forward_for_section_border < end_index) { |
| 2019 if (ranges->at(scan_forward_for_section_border) > new_border) { | 2019 if (ranges->at(scan_forward_for_section_border) > new_border) { |
| 2020 *new_start_index = scan_forward_for_section_border; | 2020 *new_start_index = scan_forward_for_section_border; |
| (...skipping 170 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2191 max_char, | 2191 max_char, |
| 2192 &dummy, | 2192 &dummy, |
| 2193 flip ? odd_label : even_label, | 2193 flip ? odd_label : even_label, |
| 2194 flip ? even_label : odd_label); | 2194 flip ? even_label : odd_label); |
| 2195 } | 2195 } |
| 2196 } | 2196 } |
| 2197 | 2197 |
| 2198 | 2198 |
| 2199 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, | 2199 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, |
| 2200 RegExpCharacterClass* cc, | 2200 RegExpCharacterClass* cc, |
| 2201 bool ascii, | 2201 bool ascii, |
|
Yang
2013/01/07 16:15:07
Changing the parameter name would make sense.
| |
| 2202 Label* on_failure, | 2202 Label* on_failure, |
| 2203 int cp_offset, | 2203 int cp_offset, |
| 2204 bool check_offset, | 2204 bool check_offset, |
| 2205 bool preloaded, | 2205 bool preloaded, |
| 2206 Zone* zone) { | 2206 Zone* zone) { |
| 2207 ZoneList<CharacterRange>* ranges = cc->ranges(zone); | 2207 ZoneList<CharacterRange>* ranges = cc->ranges(zone); |
| 2208 if (!CharacterRange::IsCanonical(ranges)) { | 2208 if (!CharacterRange::IsCanonical(ranges)) { |
| 2209 CharacterRange::Canonicalize(ranges); | 2209 CharacterRange::Canonicalize(ranges); |
| 2210 } | 2210 } |
| 2211 | 2211 |
| 2212 int max_char; | 2212 int max_char; |
| 2213 if (ascii) { | 2213 if (ascii) { |
| 2214 max_char = String::kMaxAsciiCharCode; | 2214 max_char = String::kMaxOneByteCharCode; |
| 2215 } else { | 2215 } else { |
| 2216 max_char = String::kMaxUtf16CodeUnit; | 2216 max_char = String::kMaxUtf16CodeUnit; |
| 2217 } | 2217 } |
| 2218 | 2218 |
| 2219 int range_count = ranges->length(); | 2219 int range_count = ranges->length(); |
| 2220 | 2220 |
| 2221 int last_valid_range = range_count - 1; | 2221 int last_valid_range = range_count - 1; |
| 2222 while (last_valid_range >= 0) { | 2222 while (last_valid_range >= 0) { |
| 2223 CharacterRange& range = ranges->at(last_valid_range); | 2223 CharacterRange& range = ranges->at(last_valid_range); |
| 2224 if (range.from() <= max_char) { | 2224 if (range.from() <= max_char) { |
| (...skipping 277 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2502 static inline uint32_t SmearBitsRight(uint32_t v) { | 2502 static inline uint32_t SmearBitsRight(uint32_t v) { |
| 2503 v |= v >> 1; | 2503 v |= v >> 1; |
| 2504 v |= v >> 2; | 2504 v |= v >> 2; |
| 2505 v |= v >> 4; | 2505 v |= v >> 4; |
| 2506 v |= v >> 8; | 2506 v |= v >> 8; |
| 2507 v |= v >> 16; | 2507 v |= v >> 16; |
| 2508 return v; | 2508 return v; |
| 2509 } | 2509 } |
| 2510 | 2510 |
| 2511 | 2511 |
| 2512 bool QuickCheckDetails::Rationalize(bool asc) { | 2512 bool QuickCheckDetails::Rationalize(bool asc) { |
|
Yang
2013/01/07 16:15:07
Ditto (parameter name).
| |
| 2513 bool found_useful_op = false; | 2513 bool found_useful_op = false; |
| 2514 uint32_t char_mask; | 2514 uint32_t char_mask; |
| 2515 if (asc) { | 2515 if (asc) { |
| 2516 char_mask = String::kMaxAsciiCharCode; | 2516 char_mask = String::kMaxOneByteCharCode; |
| 2517 } else { | 2517 } else { |
| 2518 char_mask = String::kMaxUtf16CodeUnit; | 2518 char_mask = String::kMaxUtf16CodeUnit; |
| 2519 } | 2519 } |
| 2520 mask_ = 0; | 2520 mask_ = 0; |
| 2521 value_ = 0; | 2521 value_ = 0; |
| 2522 int char_shift = 0; | 2522 int char_shift = 0; |
| 2523 for (int i = 0; i < characters_; i++) { | 2523 for (int i = 0; i < characters_; i++) { |
| 2524 Position* pos = &positions_[i]; | 2524 Position* pos = &positions_[i]; |
| 2525 if ((pos->mask & String::kMaxAsciiCharCode) != 0) { | 2525 if ((pos->mask & String::kMaxOneByteCharCode) != 0) { |
| 2526 found_useful_op = true; | 2526 found_useful_op = true; |
| 2527 } | 2527 } |
| 2528 mask_ |= (pos->mask & char_mask) << char_shift; | 2528 mask_ |= (pos->mask & char_mask) << char_shift; |
| 2529 value_ |= (pos->value & char_mask) << char_shift; | 2529 value_ |= (pos->value & char_mask) << char_shift; |
| 2530 char_shift += asc ? 8 : 16; | 2530 char_shift += asc ? 8 : 16; |
| 2531 } | 2531 } |
| 2532 return found_useful_op; | 2532 return found_useful_op; |
| 2533 } | 2533 } |
| 2534 | 2534 |
| 2535 | 2535 |
| (...skipping 21 matching lines...) Expand all Loading... | |
| 2557 details->characters()); | 2557 details->characters()); |
| 2558 } | 2558 } |
| 2559 | 2559 |
| 2560 | 2560 |
| 2561 bool need_mask = true; | 2561 bool need_mask = true; |
| 2562 | 2562 |
| 2563 if (details->characters() == 1) { | 2563 if (details->characters() == 1) { |
| 2564 // If number of characters preloaded is 1 then we used a byte or 16 bit | 2564 // If number of characters preloaded is 1 then we used a byte or 16 bit |
| 2565 // load so the value is already masked down. | 2565 // load so the value is already masked down. |
| 2566 uint32_t char_mask; | 2566 uint32_t char_mask; |
| 2567 if (compiler->ascii()) { | 2567 if (compiler->ascii()) { |
|
Yang
2013/01/07 16:15:07
Maybe onebyte() instead of ascii()?
| |
| 2568 char_mask = String::kMaxAsciiCharCode; | 2568 char_mask = String::kMaxOneByteCharCode; |
| 2569 } else { | 2569 } else { |
| 2570 char_mask = String::kMaxUtf16CodeUnit; | 2570 char_mask = String::kMaxUtf16CodeUnit; |
| 2571 } | 2571 } |
| 2572 if ((mask & char_mask) == char_mask) need_mask = false; | 2572 if ((mask & char_mask) == char_mask) need_mask = false; |
| 2573 mask &= char_mask; | 2573 mask &= char_mask; |
| 2574 } else { | 2574 } else { |
| 2575 // For 2-character preloads in ASCII mode or 1-character preloads in | 2575 // For 2-character preloads in ASCII mode or 1-character preloads in |
| 2576 // TWO_BYTE mode we also use a 16 bit load with zero extend. | 2576 // TWO_BYTE mode we also use a 16 bit load with zero extend. |
| 2577 if (details->characters() == 2 && compiler->ascii()) { | 2577 if (details->characters() == 2 && compiler->ascii()) { |
| 2578 if ((mask & 0x7f7f) == 0x7f7f) need_mask = false; | 2578 if ((mask & 0xffff) == 0xffff) need_mask = false; |
| 2579 } else if (details->characters() == 1 && !compiler->ascii()) { | 2579 } else if (details->characters() == 1 && !compiler->ascii()) { |
| 2580 if ((mask & 0xffff) == 0xffff) need_mask = false; | 2580 if ((mask & 0xffff) == 0xffff) need_mask = false; |
| 2581 } else { | 2581 } else { |
| 2582 if (mask == 0xffffffff) need_mask = false; | 2582 if (mask == 0xffffffff) need_mask = false; |
| 2583 } | 2583 } |
| 2584 } | 2584 } |
| 2585 | 2585 |
| 2586 if (fall_through_on_failure) { | 2586 if (fall_through_on_failure) { |
| 2587 if (need_mask) { | 2587 if (need_mask) { |
| 2588 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success); | 2588 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success); |
| (...skipping 21 matching lines...) Expand all Loading... | |
| 2610 // generating a quick check. | 2610 // generating a quick check. |
| 2611 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, | 2611 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, |
| 2612 RegExpCompiler* compiler, | 2612 RegExpCompiler* compiler, |
| 2613 int characters_filled_in, | 2613 int characters_filled_in, |
| 2614 bool not_at_start) { | 2614 bool not_at_start) { |
| 2615 Isolate* isolate = Isolate::Current(); | 2615 Isolate* isolate = Isolate::Current(); |
| 2616 ASSERT(characters_filled_in < details->characters()); | 2616 ASSERT(characters_filled_in < details->characters()); |
| 2617 int characters = details->characters(); | 2617 int characters = details->characters(); |
| 2618 int char_mask; | 2618 int char_mask; |
| 2619 if (compiler->ascii()) { | 2619 if (compiler->ascii()) { |
| 2620 char_mask = String::kMaxAsciiCharCode; | 2620 char_mask = String::kMaxOneByteCharCode; |
| 2621 } else { | 2621 } else { |
| 2622 char_mask = String::kMaxUtf16CodeUnit; | 2622 char_mask = String::kMaxUtf16CodeUnit; |
| 2623 } | 2623 } |
| 2624 for (int k = 0; k < elms_->length(); k++) { | 2624 for (int k = 0; k < elms_->length(); k++) { |
| 2625 TextElement elm = elms_->at(k); | 2625 TextElement elm = elms_->at(k); |
| 2626 if (elm.type == TextElement::ATOM) { | 2626 if (elm.type == TextElement::ATOM) { |
| 2627 Vector<const uc16> quarks = elm.data.u_atom->data(); | 2627 Vector<const uc16> quarks = elm.data.u_atom->data(); |
| 2628 for (int i = 0; i < characters && i < quarks.length(); i++) { | 2628 for (int i = 0; i < characters && i < quarks.length(); i++) { |
| 2629 QuickCheckDetails::Position* pos = | 2629 QuickCheckDetails::Position* pos = |
| 2630 details->positions(characters_filled_in); | 2630 details->positions(characters_filled_in); |
| (...skipping 227 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2858 VisitMarker marker(info()); | 2858 VisitMarker marker(info()); |
| 2859 int element_count = elms_->length(); | 2859 int element_count = elms_->length(); |
| 2860 for (int i = 0; i < element_count; i++) { | 2860 for (int i = 0; i < element_count; i++) { |
| 2861 TextElement elm = elms_->at(i); | 2861 TextElement elm = elms_->at(i); |
| 2862 if (elm.type == TextElement::ATOM) { | 2862 if (elm.type == TextElement::ATOM) { |
| 2863 Vector<const uc16> quarks = elm.data.u_atom->data(); | 2863 Vector<const uc16> quarks = elm.data.u_atom->data(); |
| 2864 for (int j = 0; j < quarks.length(); j++) { | 2864 for (int j = 0; j < quarks.length(); j++) { |
| 2865 // We don't need special handling for case independence | 2865 // We don't need special handling for case independence |
| 2866 // because of the rule that case independence cannot make | 2866 // because of the rule that case independence cannot make |
| 2867 // a non-ASCII character match an ASCII character. | 2867 // a non-ASCII character match an ASCII character. |
| 2868 if (quarks[j] > String::kMaxAsciiCharCode) { | 2868 if (quarks[j] > String::kMaxOneByteCharCode) { |
|
Yang
2013/01/07 16:15:07
Does the comment still hold true for the Latin1 ch
| |
| 2869 return set_replacement(NULL); | 2869 return set_replacement(NULL); |
| 2870 } | 2870 } |
| 2871 } | 2871 } |
| 2872 } else { | 2872 } else { |
| 2873 ASSERT(elm.type == TextElement::CHAR_CLASS); | 2873 ASSERT(elm.type == TextElement::CHAR_CLASS); |
| 2874 RegExpCharacterClass* cc = elm.data.u_char_class; | 2874 RegExpCharacterClass* cc = elm.data.u_char_class; |
| 2875 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); | 2875 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); |
| 2876 if (!CharacterRange::IsCanonical(ranges)) { | 2876 if (!CharacterRange::IsCanonical(ranges)) { |
| 2877 CharacterRange::Canonicalize(ranges); | 2877 CharacterRange::Canonicalize(ranges); |
| 2878 } | 2878 } |
| 2879 // Now they are in order so we only need to look at the first. | 2879 // Now they are in order so we only need to look at the first. |
| 2880 int range_count = ranges->length(); | 2880 int range_count = ranges->length(); |
| 2881 if (cc->is_negated()) { | 2881 if (cc->is_negated()) { |
| 2882 if (range_count != 0 && | 2882 if (range_count != 0 && |
| 2883 ranges->at(0).from() == 0 && | 2883 ranges->at(0).from() == 0 && |
| 2884 ranges->at(0).to() >= String::kMaxAsciiCharCode) { | 2884 ranges->at(0).to() >= String::kMaxOneByteCharCode) { |
| 2885 return set_replacement(NULL); | 2885 return set_replacement(NULL); |
| 2886 } | 2886 } |
| 2887 } else { | 2887 } else { |
| 2888 if (range_count == 0 || | 2888 if (range_count == 0 || |
| 2889 ranges->at(0).from() > String::kMaxAsciiCharCode) { | 2889 ranges->at(0).from() > String::kMaxOneByteCharCode) { |
| 2890 return set_replacement(NULL); | 2890 return set_replacement(NULL); |
| 2891 } | 2891 } |
| 2892 } | 2892 } |
| 2893 } | 2893 } |
| 2894 } | 2894 } |
| 2895 return FilterSuccessor(depth - 1); | 2895 return FilterSuccessor(depth - 1); |
| 2896 } | 2896 } |
| 2897 | 2897 |
| 2898 | 2898 |
| 2899 RegExpNode* LoopChoiceNode::FilterASCII(int depth) { | 2899 RegExpNode* LoopChoiceNode::FilterASCII(int depth) { |
| (...skipping 392 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3292 int cp_offset = trace->cp_offset() + elm.cp_offset; | 3292 int cp_offset = trace->cp_offset() + elm.cp_offset; |
| 3293 if (elm.type == TextElement::ATOM) { | 3293 if (elm.type == TextElement::ATOM) { |
| 3294 Vector<const uc16> quarks = elm.data.u_atom->data(); | 3294 Vector<const uc16> quarks = elm.data.u_atom->data(); |
| 3295 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { | 3295 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { |
| 3296 if (first_element_checked && i == 0 && j == 0) continue; | 3296 if (first_element_checked && i == 0 && j == 0) continue; |
| 3297 if (DeterminedAlready(quick_check, elm.cp_offset + j)) continue; | 3297 if (DeterminedAlready(quick_check, elm.cp_offset + j)) continue; |
| 3298 EmitCharacterFunction* emit_function = NULL; | 3298 EmitCharacterFunction* emit_function = NULL; |
| 3299 switch (pass) { | 3299 switch (pass) { |
| 3300 case NON_ASCII_MATCH: | 3300 case NON_ASCII_MATCH: |
| 3301 ASSERT(ascii); | 3301 ASSERT(ascii); |
| 3302 if (quarks[j] > String::kMaxAsciiCharCode) { | 3302 if (quarks[j] > String::kMaxOneByteCharCode) { |
| 3303 assembler->GoTo(backtrack); | 3303 assembler->GoTo(backtrack); |
| 3304 return; | 3304 return; |
| 3305 } | 3305 } |
| 3306 break; | 3306 break; |
| 3307 case NON_LETTER_CHARACTER_MATCH: | 3307 case NON_LETTER_CHARACTER_MATCH: |
| 3308 emit_function = &EmitAtomNonLetter; | 3308 emit_function = &EmitAtomNonLetter; |
| 3309 break; | 3309 break; |
| 3310 case SIMPLE_CHARACTER_MATCH: | 3310 case SIMPLE_CHARACTER_MATCH: |
| 3311 emit_function = &EmitSimpleCharacter; | 3311 emit_function = &EmitSimpleCharacter; |
| 3312 break; | 3312 break; |
| (...skipping 178 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3491 ZoneList<CharacterRange>* ranges = node->ranges(zone()); | 3491 ZoneList<CharacterRange>* ranges = node->ranges(zone()); |
| 3492 if (!CharacterRange::IsCanonical(ranges)) { | 3492 if (!CharacterRange::IsCanonical(ranges)) { |
| 3493 CharacterRange::Canonicalize(ranges); | 3493 CharacterRange::Canonicalize(ranges); |
| 3494 } | 3494 } |
| 3495 if (node->is_negated()) { | 3495 if (node->is_negated()) { |
| 3496 return ranges->length() == 0 ? on_success() : NULL; | 3496 return ranges->length() == 0 ? on_success() : NULL; |
| 3497 } | 3497 } |
| 3498 if (ranges->length() != 1) return NULL; | 3498 if (ranges->length() != 1) return NULL; |
| 3499 uint32_t max_char; | 3499 uint32_t max_char; |
| 3500 if (compiler->ascii()) { | 3500 if (compiler->ascii()) { |
| 3501 max_char = String::kMaxAsciiCharCode; | 3501 max_char = String::kMaxOneByteCharCode; |
| 3502 } else { | 3502 } else { |
| 3503 max_char = String::kMaxUtf16CodeUnit; | 3503 max_char = String::kMaxUtf16CodeUnit; |
| 3504 } | 3504 } |
| 3505 return ranges->at(0).IsEverything(max_char) ? on_success() : NULL; | 3505 return ranges->at(0).IsEverything(max_char) ? on_success() : NULL; |
| 3506 } | 3506 } |
| 3507 | 3507 |
| 3508 | 3508 |
| 3509 // Finds the fixed match length of a sequence of nodes that goes from | 3509 // Finds the fixed match length of a sequence of nodes that goes from |
| 3510 // this alternative and back to this choice node. If there are variable | 3510 // this alternative and back to this choice node. If there are variable |
| 3511 // length nodes or other complications in the way then return a sentinel | 3511 // length nodes or other complications in the way then return a sentinel |
| (...skipping 179 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3691 for (int i = 0; i < kMapSize; i++) map_->at(i) = true; | 3691 for (int i = 0; i < kMapSize; i++) map_->at(i) = true; |
| 3692 } | 3692 } |
| 3693 } | 3693 } |
| 3694 | 3694 |
| 3695 | 3695 |
| 3696 BoyerMooreLookahead::BoyerMooreLookahead( | 3696 BoyerMooreLookahead::BoyerMooreLookahead( |
| 3697 int length, RegExpCompiler* compiler, Zone* zone) | 3697 int length, RegExpCompiler* compiler, Zone* zone) |
| 3698 : length_(length), | 3698 : length_(length), |
| 3699 compiler_(compiler) { | 3699 compiler_(compiler) { |
| 3700 if (compiler->ascii()) { | 3700 if (compiler->ascii()) { |
| 3701 max_char_ = String::kMaxAsciiCharCode; | 3701 max_char_ = String::kMaxOneByteCharCode; |
| 3702 } else { | 3702 } else { |
| 3703 max_char_ = String::kMaxUtf16CodeUnit; | 3703 max_char_ = String::kMaxUtf16CodeUnit; |
| 3704 } | 3704 } |
| 3705 bitmaps_ = new(zone) ZoneList<BoyerMoorePositionInfo*>(length, zone); | 3705 bitmaps_ = new(zone) ZoneList<BoyerMoorePositionInfo*>(length, zone); |
| 3706 for (int i = 0; i < length; i++) { | 3706 for (int i = 0; i < length; i++) { |
| 3707 bitmaps_->Add(new(zone) BoyerMoorePositionInfo(zone), zone); | 3707 bitmaps_->Add(new(zone) BoyerMoorePositionInfo(zone), zone); |
| 3708 } | 3708 } |
| 3709 } | 3709 } |
| 3710 | 3710 |
| 3711 | 3711 |
| (...skipping 1618 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5330 } | 5330 } |
| 5331 | 5331 |
| 5332 | 5332 |
| 5333 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, | 5333 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, |
| 5334 bool is_ascii, | 5334 bool is_ascii, |
| 5335 Zone* zone) { | 5335 Zone* zone) { |
| 5336 Isolate* isolate = Isolate::Current(); | 5336 Isolate* isolate = Isolate::Current(); |
| 5337 uc16 bottom = from(); | 5337 uc16 bottom = from(); |
| 5338 uc16 top = to(); | 5338 uc16 top = to(); |
| 5339 if (is_ascii) { | 5339 if (is_ascii) { |
| 5340 if (bottom > String::kMaxAsciiCharCode) return; | 5340 if (bottom > String::kMaxOneByteCharCode) return; |
| 5341 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode; | 5341 if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode; |
| 5342 } | 5342 } |
| 5343 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 5343 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 5344 if (top == bottom) { | 5344 if (top == bottom) { |
| 5345 // If this is a singleton we just expand the one character. | 5345 // If this is a singleton we just expand the one character. |
| 5346 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars); | 5346 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars); |
| 5347 for (int i = 0; i < length; i++) { | 5347 for (int i = 0; i < length; i++) { |
| 5348 uc32 chr = chars[i]; | 5348 uc32 chr = chars[i]; |
| 5349 if (chr != bottom) { | 5349 if (chr != bottom) { |
| 5350 ranges->Add(CharacterRange::Singleton(chars[i]), zone); | 5350 ranges->Add(CharacterRange::Singleton(chars[i]), zone); |
| 5351 } | 5351 } |
| (...skipping 526 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5878 if (offset >= bm->length()) { | 5878 if (offset >= bm->length()) { |
| 5879 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 5879 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
| 5880 return; | 5880 return; |
| 5881 } | 5881 } |
| 5882 uc16 character = atom->data()[j]; | 5882 uc16 character = atom->data()[j]; |
| 5883 if (bm->compiler()->ignore_case()) { | 5883 if (bm->compiler()->ignore_case()) { |
| 5884 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 5884 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 5885 int length = GetCaseIndependentLetters( | 5885 int length = GetCaseIndependentLetters( |
| 5886 ISOLATE, | 5886 ISOLATE, |
| 5887 character, | 5887 character, |
| 5888 bm->max_char() == String::kMaxAsciiCharCode, | 5888 bm->max_char() == String::kMaxOneByteCharCode, |
| 5889 chars); | 5889 chars); |
| 5890 for (int j = 0; j < length; j++) { | 5890 for (int j = 0; j < length; j++) { |
| 5891 bm->Set(offset, chars[j]); | 5891 bm->Set(offset, chars[j]); |
| 5892 } | 5892 } |
| 5893 } else { | 5893 } else { |
| 5894 if (character <= max_char) bm->Set(offset, character); | 5894 if (character <= max_char) bm->Set(offset, character); |
| 5895 } | 5895 } |
| 5896 } | 5896 } |
| 5897 } else { | 5897 } else { |
| 5898 ASSERT(text.type == TextElement::CHAR_CLASS); | 5898 ASSERT(text.type == TextElement::CHAR_CLASS); |
| (...skipping 260 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 6159 } | 6159 } |
| 6160 | 6160 |
| 6161 return compiler.Assemble(¯o_assembler, | 6161 return compiler.Assemble(¯o_assembler, |
| 6162 node, | 6162 node, |
| 6163 data->capture_count, | 6163 data->capture_count, |
| 6164 pattern); | 6164 pattern); |
| 6165 } | 6165 } |
| 6166 | 6166 |
| 6167 | 6167 |
| 6168 }} // namespace v8::internal | 6168 }} // namespace v8::internal |
| OLD | NEW |