Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(50)

Side by Side Diff: src/jsregexp.cc

Issue 12700008: remove latin-1 flag (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: removed SeqOneByteStringVerify Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/ia32/regexp-macro-assembler-ia32.cc ('k') | src/mips/regexp-macro-assembler-mips.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 2494 matching lines...) Expand 10 before | Expand all | Expand 10 after
2505 char_mask = String::kMaxOneByteCharCode; 2505 char_mask = String::kMaxOneByteCharCode;
2506 } else { 2506 } else {
2507 char_mask = String::kMaxUtf16CodeUnit; 2507 char_mask = String::kMaxUtf16CodeUnit;
2508 } 2508 }
2509 if ((mask & char_mask) == char_mask) need_mask = false; 2509 if ((mask & char_mask) == char_mask) need_mask = false;
2510 mask &= char_mask; 2510 mask &= char_mask;
2511 } else { 2511 } else {
2512 // For 2-character preloads in ASCII mode or 1-character preloads in 2512 // For 2-character preloads in ASCII mode or 1-character preloads in
2513 // TWO_BYTE mode we also use a 16 bit load with zero extend. 2513 // TWO_BYTE mode we also use a 16 bit load with zero extend.
2514 if (details->characters() == 2 && compiler->ascii()) { 2514 if (details->characters() == 2 && compiler->ascii()) {
2515 #ifndef ENABLE_LATIN_1
2516 if ((mask & 0x7f7f) == 0xffff) need_mask = false;
2517 #else
2518 if ((mask & 0xffff) == 0xffff) need_mask = false; 2515 if ((mask & 0xffff) == 0xffff) need_mask = false;
2519 #endif
2520 } else if (details->characters() == 1 && !compiler->ascii()) { 2516 } else if (details->characters() == 1 && !compiler->ascii()) {
2521 if ((mask & 0xffff) == 0xffff) need_mask = false; 2517 if ((mask & 0xffff) == 0xffff) need_mask = false;
2522 } else { 2518 } else {
2523 if (mask == 0xffffffff) need_mask = false; 2519 if (mask == 0xffffffff) need_mask = false;
2524 } 2520 }
2525 } 2521 }
2526 2522
2527 if (fall_through_on_failure) { 2523 if (fall_through_on_failure) {
2528 if (need_mask) { 2524 if (need_mask) {
2529 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success); 2525 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);
(...skipping 257 matching lines...) Expand 10 before | Expand all | Expand 10 after
2787 RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) { 2783 RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) {
2788 RegExpNode* next = on_success_->FilterASCII(depth - 1, ignore_case); 2784 RegExpNode* next = on_success_->FilterASCII(depth - 1, ignore_case);
2789 if (next == NULL) return set_replacement(NULL); 2785 if (next == NULL) return set_replacement(NULL);
2790 on_success_ = next; 2786 on_success_ = next;
2791 return set_replacement(this); 2787 return set_replacement(this);
2792 } 2788 }
2793 2789
2794 2790
2795 // We need to check for the following characters: 0x39c 0x3bc 0x178. 2791 // We need to check for the following characters: 0x39c 0x3bc 0x178.
2796 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) { 2792 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) {
2797 #ifdef ENABLE_LATIN_1
2798 // TODO(dcarney): this could be a lot more efficient. 2793 // TODO(dcarney): this could be a lot more efficient.
2799 return range.Contains(0x39c) || 2794 return range.Contains(0x39c) ||
2800 range.Contains(0x3bc) || range.Contains(0x178); 2795 range.Contains(0x3bc) || range.Contains(0x178);
2801 #else
2802 return false;
2803 #endif
2804 } 2796 }
2805 2797
2806 2798
2807 #ifdef ENABLE_LATIN_1
2808 static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) { 2799 static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) {
2809 for (int i = 0; i < ranges->length(); i++) { 2800 for (int i = 0; i < ranges->length(); i++) {
2810 // TODO(dcarney): this could be a lot more efficient. 2801 // TODO(dcarney): this could be a lot more efficient.
2811 if (RangeContainsLatin1Equivalents(ranges->at(i))) return true; 2802 if (RangeContainsLatin1Equivalents(ranges->at(i))) return true;
2812 } 2803 }
2813 return false; 2804 return false;
2814 } 2805 }
2815 #endif
2816 2806
2817 2807
2818 RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) { 2808 RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) {
2819 if (info()->replacement_calculated) return replacement(); 2809 if (info()->replacement_calculated) return replacement();
2820 if (depth < 0) return this; 2810 if (depth < 0) return this;
2821 ASSERT(!info()->visited); 2811 ASSERT(!info()->visited);
2822 VisitMarker marker(info()); 2812 VisitMarker marker(info());
2823 int element_count = elms_->length(); 2813 int element_count = elms_->length();
2824 for (int i = 0; i < element_count; i++) { 2814 for (int i = 0; i < element_count; i++) {
2825 TextElement elm = elms_->at(i); 2815 TextElement elm = elms_->at(i);
2826 if (elm.type == TextElement::ATOM) { 2816 if (elm.type == TextElement::ATOM) {
2827 Vector<const uc16> quarks = elm.data.u_atom->data(); 2817 Vector<const uc16> quarks = elm.data.u_atom->data();
2828 for (int j = 0; j < quarks.length(); j++) { 2818 for (int j = 0; j < quarks.length(); j++) {
2829 #ifndef ENABLE_LATIN_1
2830 if (quarks[j] > String::kMaxOneByteCharCode) {
2831 return set_replacement(NULL);
2832 }
2833 #else
2834 uint16_t c = quarks[j]; 2819 uint16_t c = quarks[j];
2835 if (c <= String::kMaxOneByteCharCode) continue; 2820 if (c <= String::kMaxOneByteCharCode) continue;
2836 if (!ignore_case) return set_replacement(NULL); 2821 if (!ignore_case) return set_replacement(NULL);
2837 // Here, we need to check for characters whose upper and lower cases 2822 // Here, we need to check for characters whose upper and lower cases
2838 // are outside the Latin-1 range. 2823 // are outside the Latin-1 range.
2839 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c); 2824 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c);
2840 // Character is outside Latin-1 completely 2825 // Character is outside Latin-1 completely
2841 if (converted == 0) return set_replacement(NULL); 2826 if (converted == 0) return set_replacement(NULL);
2842 // Convert quark to Latin-1 in place. 2827 // Convert quark to Latin-1 in place.
2843 uint16_t* copy = const_cast<uint16_t*>(quarks.start()); 2828 uint16_t* copy = const_cast<uint16_t*>(quarks.start());
2844 copy[j] = converted; 2829 copy[j] = converted;
2845 #endif
2846 } 2830 }
2847 } else { 2831 } else {
2848 ASSERT(elm.type == TextElement::CHAR_CLASS); 2832 ASSERT(elm.type == TextElement::CHAR_CLASS);
2849 RegExpCharacterClass* cc = elm.data.u_char_class; 2833 RegExpCharacterClass* cc = elm.data.u_char_class;
2850 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); 2834 ZoneList<CharacterRange>* ranges = cc->ranges(zone());
2851 if (!CharacterRange::IsCanonical(ranges)) { 2835 if (!CharacterRange::IsCanonical(ranges)) {
2852 CharacterRange::Canonicalize(ranges); 2836 CharacterRange::Canonicalize(ranges);
2853 } 2837 }
2854 // Now they are in order so we only need to look at the first. 2838 // Now they are in order so we only need to look at the first.
2855 int range_count = ranges->length(); 2839 int range_count = ranges->length();
2856 if (cc->is_negated()) { 2840 if (cc->is_negated()) {
2857 if (range_count != 0 && 2841 if (range_count != 0 &&
2858 ranges->at(0).from() == 0 && 2842 ranges->at(0).from() == 0 &&
2859 ranges->at(0).to() >= String::kMaxOneByteCharCode) { 2843 ranges->at(0).to() >= String::kMaxOneByteCharCode) {
2860 #ifdef ENABLE_LATIN_1
2861 // This will be handled in a later filter. 2844 // This will be handled in a later filter.
2862 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; 2845 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;
2863 #endif
2864 return set_replacement(NULL); 2846 return set_replacement(NULL);
2865 } 2847 }
2866 } else { 2848 } else {
2867 if (range_count == 0 || 2849 if (range_count == 0 ||
2868 ranges->at(0).from() > String::kMaxOneByteCharCode) { 2850 ranges->at(0).from() > String::kMaxOneByteCharCode) {
2869 #ifdef ENABLE_LATIN_1
2870 // This will be handled in a later filter. 2851 // This will be handled in a later filter.
2871 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; 2852 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;
2872 #endif
2873 return set_replacement(NULL); 2853 return set_replacement(NULL);
2874 } 2854 }
2875 } 2855 }
2876 } 2856 }
2877 } 2857 }
2878 return FilterSuccessor(depth - 1, ignore_case); 2858 return FilterSuccessor(depth - 1, ignore_case);
2879 } 2859 }
2880 2860
2881 2861
2882 RegExpNode* LoopChoiceNode::FilterASCII(int depth, bool ignore_case) { 2862 RegExpNode* LoopChoiceNode::FilterASCII(int depth, bool ignore_case) {
(...skipping 3258 matching lines...) Expand 10 before | Expand all | Expand 10 after
6141 } 6121 }
6142 6122
6143 return compiler.Assemble(&macro_assembler, 6123 return compiler.Assemble(&macro_assembler,
6144 node, 6124 node,
6145 data->capture_count, 6125 data->capture_count,
6146 pattern); 6126 pattern);
6147 } 6127 }
6148 6128
6149 6129
6150 }} // namespace v8::internal 6130 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/ia32/regexp-macro-assembler-ia32.cc ('k') | src/mips/regexp-macro-assembler-mips.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698