| OLD | NEW |
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 2494 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2505 char_mask = String::kMaxOneByteCharCode; | 2505 char_mask = String::kMaxOneByteCharCode; |
| 2506 } else { | 2506 } else { |
| 2507 char_mask = String::kMaxUtf16CodeUnit; | 2507 char_mask = String::kMaxUtf16CodeUnit; |
| 2508 } | 2508 } |
| 2509 if ((mask & char_mask) == char_mask) need_mask = false; | 2509 if ((mask & char_mask) == char_mask) need_mask = false; |
| 2510 mask &= char_mask; | 2510 mask &= char_mask; |
| 2511 } else { | 2511 } else { |
| 2512 // For 2-character preloads in ASCII mode or 1-character preloads in | 2512 // For 2-character preloads in ASCII mode or 1-character preloads in |
| 2513 // TWO_BYTE mode we also use a 16 bit load with zero extend. | 2513 // TWO_BYTE mode we also use a 16 bit load with zero extend. |
| 2514 if (details->characters() == 2 && compiler->ascii()) { | 2514 if (details->characters() == 2 && compiler->ascii()) { |
| 2515 #ifndef ENABLE_LATIN_1 | |
| 2516 if ((mask & 0x7f7f) == 0xffff) need_mask = false; | |
| 2517 #else | |
| 2518 if ((mask & 0xffff) == 0xffff) need_mask = false; | 2515 if ((mask & 0xffff) == 0xffff) need_mask = false; |
| 2519 #endif | |
| 2520 } else if (details->characters() == 1 && !compiler->ascii()) { | 2516 } else if (details->characters() == 1 && !compiler->ascii()) { |
| 2521 if ((mask & 0xffff) == 0xffff) need_mask = false; | 2517 if ((mask & 0xffff) == 0xffff) need_mask = false; |
| 2522 } else { | 2518 } else { |
| 2523 if (mask == 0xffffffff) need_mask = false; | 2519 if (mask == 0xffffffff) need_mask = false; |
| 2524 } | 2520 } |
| 2525 } | 2521 } |
| 2526 | 2522 |
| 2527 if (fall_through_on_failure) { | 2523 if (fall_through_on_failure) { |
| 2528 if (need_mask) { | 2524 if (need_mask) { |
| 2529 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success); | 2525 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success); |
| (...skipping 257 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2787 RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) { | 2783 RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) { |
| 2788 RegExpNode* next = on_success_->FilterASCII(depth - 1, ignore_case); | 2784 RegExpNode* next = on_success_->FilterASCII(depth - 1, ignore_case); |
| 2789 if (next == NULL) return set_replacement(NULL); | 2785 if (next == NULL) return set_replacement(NULL); |
| 2790 on_success_ = next; | 2786 on_success_ = next; |
| 2791 return set_replacement(this); | 2787 return set_replacement(this); |
| 2792 } | 2788 } |
| 2793 | 2789 |
| 2794 | 2790 |
| 2795 // We need to check for the following characters: 0x39c 0x3bc 0x178. | 2791 // We need to check for the following characters: 0x39c 0x3bc 0x178. |
| 2796 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) { | 2792 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) { |
| 2797 #ifdef ENABLE_LATIN_1 | |
| 2798 // TODO(dcarney): this could be a lot more efficient. | 2793 // TODO(dcarney): this could be a lot more efficient. |
| 2799 return range.Contains(0x39c) || | 2794 return range.Contains(0x39c) || |
| 2800 range.Contains(0x3bc) || range.Contains(0x178); | 2795 range.Contains(0x3bc) || range.Contains(0x178); |
| 2801 #else | |
| 2802 return false; | |
| 2803 #endif | |
| 2804 } | 2796 } |
| 2805 | 2797 |
| 2806 | 2798 |
| 2807 #ifdef ENABLE_LATIN_1 | |
| 2808 static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) { | 2799 static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) { |
| 2809 for (int i = 0; i < ranges->length(); i++) { | 2800 for (int i = 0; i < ranges->length(); i++) { |
| 2810 // TODO(dcarney): this could be a lot more efficient. | 2801 // TODO(dcarney): this could be a lot more efficient. |
| 2811 if (RangeContainsLatin1Equivalents(ranges->at(i))) return true; | 2802 if (RangeContainsLatin1Equivalents(ranges->at(i))) return true; |
| 2812 } | 2803 } |
| 2813 return false; | 2804 return false; |
| 2814 } | 2805 } |
| 2815 #endif | |
| 2816 | 2806 |
| 2817 | 2807 |
| 2818 RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) { | 2808 RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) { |
| 2819 if (info()->replacement_calculated) return replacement(); | 2809 if (info()->replacement_calculated) return replacement(); |
| 2820 if (depth < 0) return this; | 2810 if (depth < 0) return this; |
| 2821 ASSERT(!info()->visited); | 2811 ASSERT(!info()->visited); |
| 2822 VisitMarker marker(info()); | 2812 VisitMarker marker(info()); |
| 2823 int element_count = elms_->length(); | 2813 int element_count = elms_->length(); |
| 2824 for (int i = 0; i < element_count; i++) { | 2814 for (int i = 0; i < element_count; i++) { |
| 2825 TextElement elm = elms_->at(i); | 2815 TextElement elm = elms_->at(i); |
| 2826 if (elm.type == TextElement::ATOM) { | 2816 if (elm.type == TextElement::ATOM) { |
| 2827 Vector<const uc16> quarks = elm.data.u_atom->data(); | 2817 Vector<const uc16> quarks = elm.data.u_atom->data(); |
| 2828 for (int j = 0; j < quarks.length(); j++) { | 2818 for (int j = 0; j < quarks.length(); j++) { |
| 2829 #ifndef ENABLE_LATIN_1 | |
| 2830 if (quarks[j] > String::kMaxOneByteCharCode) { | |
| 2831 return set_replacement(NULL); | |
| 2832 } | |
| 2833 #else | |
| 2834 uint16_t c = quarks[j]; | 2819 uint16_t c = quarks[j]; |
| 2835 if (c <= String::kMaxOneByteCharCode) continue; | 2820 if (c <= String::kMaxOneByteCharCode) continue; |
| 2836 if (!ignore_case) return set_replacement(NULL); | 2821 if (!ignore_case) return set_replacement(NULL); |
| 2837 // Here, we need to check for characters whose upper and lower cases | 2822 // Here, we need to check for characters whose upper and lower cases |
| 2838 // are outside the Latin-1 range. | 2823 // are outside the Latin-1 range. |
| 2839 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c); | 2824 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c); |
| 2840 // Character is outside Latin-1 completely | 2825 // Character is outside Latin-1 completely |
| 2841 if (converted == 0) return set_replacement(NULL); | 2826 if (converted == 0) return set_replacement(NULL); |
| 2842 // Convert quark to Latin-1 in place. | 2827 // Convert quark to Latin-1 in place. |
| 2843 uint16_t* copy = const_cast<uint16_t*>(quarks.start()); | 2828 uint16_t* copy = const_cast<uint16_t*>(quarks.start()); |
| 2844 copy[j] = converted; | 2829 copy[j] = converted; |
| 2845 #endif | |
| 2846 } | 2830 } |
| 2847 } else { | 2831 } else { |
| 2848 ASSERT(elm.type == TextElement::CHAR_CLASS); | 2832 ASSERT(elm.type == TextElement::CHAR_CLASS); |
| 2849 RegExpCharacterClass* cc = elm.data.u_char_class; | 2833 RegExpCharacterClass* cc = elm.data.u_char_class; |
| 2850 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); | 2834 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); |
| 2851 if (!CharacterRange::IsCanonical(ranges)) { | 2835 if (!CharacterRange::IsCanonical(ranges)) { |
| 2852 CharacterRange::Canonicalize(ranges); | 2836 CharacterRange::Canonicalize(ranges); |
| 2853 } | 2837 } |
| 2854 // Now they are in order so we only need to look at the first. | 2838 // Now they are in order so we only need to look at the first. |
| 2855 int range_count = ranges->length(); | 2839 int range_count = ranges->length(); |
| 2856 if (cc->is_negated()) { | 2840 if (cc->is_negated()) { |
| 2857 if (range_count != 0 && | 2841 if (range_count != 0 && |
| 2858 ranges->at(0).from() == 0 && | 2842 ranges->at(0).from() == 0 && |
| 2859 ranges->at(0).to() >= String::kMaxOneByteCharCode) { | 2843 ranges->at(0).to() >= String::kMaxOneByteCharCode) { |
| 2860 #ifdef ENABLE_LATIN_1 | |
| 2861 // This will be handled in a later filter. | 2844 // This will be handled in a later filter. |
| 2862 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; | 2845 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; |
| 2863 #endif | |
| 2864 return set_replacement(NULL); | 2846 return set_replacement(NULL); |
| 2865 } | 2847 } |
| 2866 } else { | 2848 } else { |
| 2867 if (range_count == 0 || | 2849 if (range_count == 0 || |
| 2868 ranges->at(0).from() > String::kMaxOneByteCharCode) { | 2850 ranges->at(0).from() > String::kMaxOneByteCharCode) { |
| 2869 #ifdef ENABLE_LATIN_1 | |
| 2870 // This will be handled in a later filter. | 2851 // This will be handled in a later filter. |
| 2871 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; | 2852 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; |
| 2872 #endif | |
| 2873 return set_replacement(NULL); | 2853 return set_replacement(NULL); |
| 2874 } | 2854 } |
| 2875 } | 2855 } |
| 2876 } | 2856 } |
| 2877 } | 2857 } |
| 2878 return FilterSuccessor(depth - 1, ignore_case); | 2858 return FilterSuccessor(depth - 1, ignore_case); |
| 2879 } | 2859 } |
| 2880 | 2860 |
| 2881 | 2861 |
| 2882 RegExpNode* LoopChoiceNode::FilterASCII(int depth, bool ignore_case) { | 2862 RegExpNode* LoopChoiceNode::FilterASCII(int depth, bool ignore_case) { |
| (...skipping 3258 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6141 } | 6121 } |
| 6142 | 6122 |
| 6143 return compiler.Assemble(¯o_assembler, | 6123 return compiler.Assemble(¯o_assembler, |
| 6144 node, | 6124 node, |
| 6145 data->capture_count, | 6125 data->capture_count, |
| 6146 pattern); | 6126 pattern); |
| 6147 } | 6127 } |
| 6148 | 6128 |
| 6149 | 6129 |
| 6150 }} // namespace v8::internal | 6130 }} // namespace v8::internal |
| OLD | NEW |