OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 2494 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2505 char_mask = String::kMaxOneByteCharCode; | 2505 char_mask = String::kMaxOneByteCharCode; |
2506 } else { | 2506 } else { |
2507 char_mask = String::kMaxUtf16CodeUnit; | 2507 char_mask = String::kMaxUtf16CodeUnit; |
2508 } | 2508 } |
2509 if ((mask & char_mask) == char_mask) need_mask = false; | 2509 if ((mask & char_mask) == char_mask) need_mask = false; |
2510 mask &= char_mask; | 2510 mask &= char_mask; |
2511 } else { | 2511 } else { |
2512 // For 2-character preloads in ASCII mode or 1-character preloads in | 2512 // For 2-character preloads in ASCII mode or 1-character preloads in |
2513 // TWO_BYTE mode we also use a 16 bit load with zero extend. | 2513 // TWO_BYTE mode we also use a 16 bit load with zero extend. |
2514 if (details->characters() == 2 && compiler->ascii()) { | 2514 if (details->characters() == 2 && compiler->ascii()) { |
2515 #ifndef ENABLE_LATIN_1 | |
2516 if ((mask & 0x7f7f) == 0xffff) need_mask = false; | |
2517 #else | |
2518 if ((mask & 0xffff) == 0xffff) need_mask = false; | 2515 if ((mask & 0xffff) == 0xffff) need_mask = false; |
2519 #endif | |
2520 } else if (details->characters() == 1 && !compiler->ascii()) { | 2516 } else if (details->characters() == 1 && !compiler->ascii()) { |
2521 if ((mask & 0xffff) == 0xffff) need_mask = false; | 2517 if ((mask & 0xffff) == 0xffff) need_mask = false; |
2522 } else { | 2518 } else { |
2523 if (mask == 0xffffffff) need_mask = false; | 2519 if (mask == 0xffffffff) need_mask = false; |
2524 } | 2520 } |
2525 } | 2521 } |
2526 | 2522 |
2527 if (fall_through_on_failure) { | 2523 if (fall_through_on_failure) { |
2528 if (need_mask) { | 2524 if (need_mask) { |
2529 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success); | 2525 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success); |
(...skipping 257 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2787 RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) { | 2783 RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) { |
2788 RegExpNode* next = on_success_->FilterASCII(depth - 1, ignore_case); | 2784 RegExpNode* next = on_success_->FilterASCII(depth - 1, ignore_case); |
2789 if (next == NULL) return set_replacement(NULL); | 2785 if (next == NULL) return set_replacement(NULL); |
2790 on_success_ = next; | 2786 on_success_ = next; |
2791 return set_replacement(this); | 2787 return set_replacement(this); |
2792 } | 2788 } |
2793 | 2789 |
2794 | 2790 |
2795 // We need to check for the following characters: 0x39c 0x3bc 0x178. | 2791 // We need to check for the following characters: 0x39c 0x3bc 0x178. |
2796 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) { | 2792 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) { |
2797 #ifdef ENABLE_LATIN_1 | |
2798 // TODO(dcarney): this could be a lot more efficient. | 2793 // TODO(dcarney): this could be a lot more efficient. |
2799 return range.Contains(0x39c) || | 2794 return range.Contains(0x39c) || |
2800 range.Contains(0x3bc) || range.Contains(0x178); | 2795 range.Contains(0x3bc) || range.Contains(0x178); |
2801 #else | |
2802 return false; | |
2803 #endif | |
2804 } | 2796 } |
2805 | 2797 |
2806 | 2798 |
2807 #ifdef ENABLE_LATIN_1 | |
2808 static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) { | 2799 static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) { |
2809 for (int i = 0; i < ranges->length(); i++) { | 2800 for (int i = 0; i < ranges->length(); i++) { |
2810 // TODO(dcarney): this could be a lot more efficient. | 2801 // TODO(dcarney): this could be a lot more efficient. |
2811 if (RangeContainsLatin1Equivalents(ranges->at(i))) return true; | 2802 if (RangeContainsLatin1Equivalents(ranges->at(i))) return true; |
2812 } | 2803 } |
2813 return false; | 2804 return false; |
2814 } | 2805 } |
2815 #endif | |
2816 | 2806 |
2817 | 2807 |
2818 RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) { | 2808 RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) { |
2819 if (info()->replacement_calculated) return replacement(); | 2809 if (info()->replacement_calculated) return replacement(); |
2820 if (depth < 0) return this; | 2810 if (depth < 0) return this; |
2821 ASSERT(!info()->visited); | 2811 ASSERT(!info()->visited); |
2822 VisitMarker marker(info()); | 2812 VisitMarker marker(info()); |
2823 int element_count = elms_->length(); | 2813 int element_count = elms_->length(); |
2824 for (int i = 0; i < element_count; i++) { | 2814 for (int i = 0; i < element_count; i++) { |
2825 TextElement elm = elms_->at(i); | 2815 TextElement elm = elms_->at(i); |
2826 if (elm.type == TextElement::ATOM) { | 2816 if (elm.type == TextElement::ATOM) { |
2827 Vector<const uc16> quarks = elm.data.u_atom->data(); | 2817 Vector<const uc16> quarks = elm.data.u_atom->data(); |
2828 for (int j = 0; j < quarks.length(); j++) { | 2818 for (int j = 0; j < quarks.length(); j++) { |
2829 #ifndef ENABLE_LATIN_1 | |
2830 if (quarks[j] > String::kMaxOneByteCharCode) { | |
2831 return set_replacement(NULL); | |
2832 } | |
2833 #else | |
2834 uint16_t c = quarks[j]; | 2819 uint16_t c = quarks[j]; |
2835 if (c <= String::kMaxOneByteCharCode) continue; | 2820 if (c <= String::kMaxOneByteCharCode) continue; |
2836 if (!ignore_case) return set_replacement(NULL); | 2821 if (!ignore_case) return set_replacement(NULL); |
2837 // Here, we need to check for characters whose upper and lower cases | 2822 // Here, we need to check for characters whose upper and lower cases |
2838 // are outside the Latin-1 range. | 2823 // are outside the Latin-1 range. |
2839 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c); | 2824 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c); |
2840 // Character is outside Latin-1 completely | 2825 // Character is outside Latin-1 completely |
2841 if (converted == 0) return set_replacement(NULL); | 2826 if (converted == 0) return set_replacement(NULL); |
2842 // Convert quark to Latin-1 in place. | 2827 // Convert quark to Latin-1 in place. |
2843 uint16_t* copy = const_cast<uint16_t*>(quarks.start()); | 2828 uint16_t* copy = const_cast<uint16_t*>(quarks.start()); |
2844 copy[j] = converted; | 2829 copy[j] = converted; |
2845 #endif | |
2846 } | 2830 } |
2847 } else { | 2831 } else { |
2848 ASSERT(elm.type == TextElement::CHAR_CLASS); | 2832 ASSERT(elm.type == TextElement::CHAR_CLASS); |
2849 RegExpCharacterClass* cc = elm.data.u_char_class; | 2833 RegExpCharacterClass* cc = elm.data.u_char_class; |
2850 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); | 2834 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); |
2851 if (!CharacterRange::IsCanonical(ranges)) { | 2835 if (!CharacterRange::IsCanonical(ranges)) { |
2852 CharacterRange::Canonicalize(ranges); | 2836 CharacterRange::Canonicalize(ranges); |
2853 } | 2837 } |
2854 // Now they are in order so we only need to look at the first. | 2838 // Now they are in order so we only need to look at the first. |
2855 int range_count = ranges->length(); | 2839 int range_count = ranges->length(); |
2856 if (cc->is_negated()) { | 2840 if (cc->is_negated()) { |
2857 if (range_count != 0 && | 2841 if (range_count != 0 && |
2858 ranges->at(0).from() == 0 && | 2842 ranges->at(0).from() == 0 && |
2859 ranges->at(0).to() >= String::kMaxOneByteCharCode) { | 2843 ranges->at(0).to() >= String::kMaxOneByteCharCode) { |
2860 #ifdef ENABLE_LATIN_1 | |
2861 // This will be handled in a later filter. | 2844 // This will be handled in a later filter. |
2862 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; | 2845 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; |
2863 #endif | |
2864 return set_replacement(NULL); | 2846 return set_replacement(NULL); |
2865 } | 2847 } |
2866 } else { | 2848 } else { |
2867 if (range_count == 0 || | 2849 if (range_count == 0 || |
2868 ranges->at(0).from() > String::kMaxOneByteCharCode) { | 2850 ranges->at(0).from() > String::kMaxOneByteCharCode) { |
2869 #ifdef ENABLE_LATIN_1 | |
2870 // This will be handled in a later filter. | 2851 // This will be handled in a later filter. |
2871 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; | 2852 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; |
2872 #endif | |
2873 return set_replacement(NULL); | 2853 return set_replacement(NULL); |
2874 } | 2854 } |
2875 } | 2855 } |
2876 } | 2856 } |
2877 } | 2857 } |
2878 return FilterSuccessor(depth - 1, ignore_case); | 2858 return FilterSuccessor(depth - 1, ignore_case); |
2879 } | 2859 } |
2880 | 2860 |
2881 | 2861 |
2882 RegExpNode* LoopChoiceNode::FilterASCII(int depth, bool ignore_case) { | 2862 RegExpNode* LoopChoiceNode::FilterASCII(int depth, bool ignore_case) { |
(...skipping 3258 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6141 } | 6121 } |
6142 | 6122 |
6143 return compiler.Assemble(¯o_assembler, | 6123 return compiler.Assemble(¯o_assembler, |
6144 node, | 6124 node, |
6145 data->capture_count, | 6125 data->capture_count, |
6146 pattern); | 6126 pattern); |
6147 } | 6127 } |
6148 | 6128 |
6149 | 6129 |
6150 }} // namespace v8::internal | 6130 }} // namespace v8::internal |
OLD | NEW |