src/jsregexp.cc - Issue 12700008: remove latin-1 flag

Side by Side Diff: src/jsregexp.cc

Issue 12700008: remove latin-1 flag (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: removed SeqOneByteStringVerify Created 7 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2012 the V8 project authors. All rights reserved.	1 // Copyright 2012 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 2494 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2505 char_mask = String::kMaxOneByteCharCode;	2505 char_mask = String::kMaxOneByteCharCode;

2506 } else {	2506 } else {

2507 char_mask = String::kMaxUtf16CodeUnit;	2507 char_mask = String::kMaxUtf16CodeUnit;

2508 }	2508 }

2509 if ((mask & char_mask) == char_mask) need_mask = false;	2509 if ((mask & char_mask) == char_mask) need_mask = false;

2510 mask &= char_mask;	2510 mask &= char_mask;

2511 } else {	2511 } else {

2512 // For 2-character preloads in ASCII mode or 1-character preloads in	2512 // For 2-character preloads in ASCII mode or 1-character preloads in

2513 // TWO_BYTE mode we also use a 16 bit load with zero extend.	2513 // TWO_BYTE mode we also use a 16 bit load with zero extend.

2514 if (details->characters() == 2 && compiler->ascii()) {	2514 if (details->characters() == 2 && compiler->ascii()) {

2515 #ifndef ENABLE_LATIN_1

2516 if ((mask & 0x7f7f) == 0xffff) need_mask = false;

2517 #else

2518 if ((mask & 0xffff) == 0xffff) need_mask = false;	2515 if ((mask & 0xffff) == 0xffff) need_mask = false;

2519 #endif

2520 } else if (details->characters() == 1 && !compiler->ascii()) {	2516 } else if (details->characters() == 1 && !compiler->ascii()) {

2521 if ((mask & 0xffff) == 0xffff) need_mask = false;	2517 if ((mask & 0xffff) == 0xffff) need_mask = false;

2522 } else {	2518 } else {

2523 if (mask == 0xffffffff) need_mask = false;	2519 if (mask == 0xffffffff) need_mask = false;

2524 }	2520 }

2525 }	2521 }

2526	2522

2527 if (fall_through_on_failure) {	2523 if (fall_through_on_failure) {

2528 if (need_mask) {	2524 if (need_mask) {

2529 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);	2525 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);

(...skipping 257 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2787 RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) {	2783 RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) {

2788 RegExpNode* next = on_success_->FilterASCII(depth - 1, ignore_case);	2784 RegExpNode* next = on_success_->FilterASCII(depth - 1, ignore_case);

2789 if (next == NULL) return set_replacement(NULL);	2785 if (next == NULL) return set_replacement(NULL);

2790 on_success_ = next;	2786 on_success_ = next;

2791 return set_replacement(this);	2787 return set_replacement(this);

2792 }	2788 }

2793	2789

2794	2790

2795 // We need to check for the following characters: 0x39c 0x3bc 0x178.	2791 // We need to check for the following characters: 0x39c 0x3bc 0x178.

2796 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) {	2792 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) {

2797 #ifdef ENABLE_LATIN_1

2798 // TODO(dcarney): this could be a lot more efficient.	2793 // TODO(dcarney): this could be a lot more efficient.

2799 return range.Contains(0x39c) \|\|	2794 return range.Contains(0x39c) \|\|

2800 range.Contains(0x3bc) \|\| range.Contains(0x178);	2795 range.Contains(0x3bc) \|\| range.Contains(0x178);

2801 #else

2802 return false;

2803 #endif

2804 }	2796 }

2805	2797

2806	2798

2807 #ifdef ENABLE_LATIN_1

2808 static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) {	2799 static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) {

2809 for (int i = 0; i < ranges->length(); i++) {	2800 for (int i = 0; i < ranges->length(); i++) {

2810 // TODO(dcarney): this could be a lot more efficient.	2801 // TODO(dcarney): this could be a lot more efficient.

2811 if (RangeContainsLatin1Equivalents(ranges->at(i))) return true;	2802 if (RangeContainsLatin1Equivalents(ranges->at(i))) return true;

2812 }	2803 }

2813 return false;	2804 return false;

2814 }	2805 }

2815 #endif

2816	2806

2817	2807

2818 RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) {	2808 RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) {

2819 if (info()->replacement_calculated) return replacement();	2809 if (info()->replacement_calculated) return replacement();

2820 if (depth < 0) return this;	2810 if (depth < 0) return this;

2821 ASSERT(!info()->visited);	2811 ASSERT(!info()->visited);

2822 VisitMarker marker(info());	2812 VisitMarker marker(info());

2823 int element_count = elms_->length();	2813 int element_count = elms_->length();

2824 for (int i = 0; i < element_count; i++) {	2814 for (int i = 0; i < element_count; i++) {

2825 TextElement elm = elms_->at(i);	2815 TextElement elm = elms_->at(i);

2826 if (elm.type == TextElement::ATOM) {	2816 if (elm.type == TextElement::ATOM) {

2827 Vector<const uc16> quarks = elm.data.u_atom->data();	2817 Vector<const uc16> quarks = elm.data.u_atom->data();

2828 for (int j = 0; j < quarks.length(); j++) {	2818 for (int j = 0; j < quarks.length(); j++) {

2829 #ifndef ENABLE_LATIN_1

2830 if (quarks[j] > String::kMaxOneByteCharCode) {

2831 return set_replacement(NULL);

2832 }

2833 #else

2834 uint16_t c = quarks[j];	2819 uint16_t c = quarks[j];

2835 if (c <= String::kMaxOneByteCharCode) continue;	2820 if (c <= String::kMaxOneByteCharCode) continue;

2836 if (!ignore_case) return set_replacement(NULL);	2821 if (!ignore_case) return set_replacement(NULL);

2837 // Here, we need to check for characters whose upper and lower cases	2822 // Here, we need to check for characters whose upper and lower cases

2838 // are outside the Latin-1 range.	2823 // are outside the Latin-1 range.

2839 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c);	2824 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c);

2840 // Character is outside Latin-1 completely	2825 // Character is outside Latin-1 completely

2841 if (converted == 0) return set_replacement(NULL);	2826 if (converted == 0) return set_replacement(NULL);

2842 // Convert quark to Latin-1 in place.	2827 // Convert quark to Latin-1 in place.

2843 uint16_t* copy = const_cast<uint16_t*>(quarks.start());	2828 uint16_t* copy = const_cast<uint16_t*>(quarks.start());

2844 copy[j] = converted;	2829 copy[j] = converted;

2845 #endif

2846 }	2830 }

2847 } else {	2831 } else {

2848 ASSERT(elm.type == TextElement::CHAR_CLASS);	2832 ASSERT(elm.type == TextElement::CHAR_CLASS);

2849 RegExpCharacterClass* cc = elm.data.u_char_class;	2833 RegExpCharacterClass* cc = elm.data.u_char_class;

2850 ZoneList<CharacterRange>* ranges = cc->ranges(zone());	2834 ZoneList<CharacterRange>* ranges = cc->ranges(zone());

2851 if (!CharacterRange::IsCanonical(ranges)) {	2835 if (!CharacterRange::IsCanonical(ranges)) {

2852 CharacterRange::Canonicalize(ranges);	2836 CharacterRange::Canonicalize(ranges);

2853 }	2837 }

2854 // Now they are in order so we only need to look at the first.	2838 // Now they are in order so we only need to look at the first.

2855 int range_count = ranges->length();	2839 int range_count = ranges->length();

2856 if (cc->is_negated()) {	2840 if (cc->is_negated()) {

2857 if (range_count != 0 &&	2841 if (range_count != 0 &&

2858 ranges->at(0).from() == 0 &&	2842 ranges->at(0).from() == 0 &&

2859 ranges->at(0).to() >= String::kMaxOneByteCharCode) {	2843 ranges->at(0).to() >= String::kMaxOneByteCharCode) {

2860 #ifdef ENABLE_LATIN_1

2861 // This will be handled in a later filter.	2844 // This will be handled in a later filter.

2862 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;	2845 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;

2863 #endif

2864 return set_replacement(NULL);	2846 return set_replacement(NULL);

2865 }	2847 }

2866 } else {	2848 } else {

2867 if (range_count == 0 \|\|	2849 if (range_count == 0 \|\|

2868 ranges->at(0).from() > String::kMaxOneByteCharCode) {	2850 ranges->at(0).from() > String::kMaxOneByteCharCode) {

2869 #ifdef ENABLE_LATIN_1

2870 // This will be handled in a later filter.	2851 // This will be handled in a later filter.

2871 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;	2852 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;

2872 #endif

2873 return set_replacement(NULL);	2853 return set_replacement(NULL);

2874 }	2854 }

2875 }	2855 }

2876 }	2856 }

2877 }	2857 }

2878 return FilterSuccessor(depth - 1, ignore_case);	2858 return FilterSuccessor(depth - 1, ignore_case);

2879 }	2859 }

2880	2860

2881	2861

2882 RegExpNode* LoopChoiceNode::FilterASCII(int depth, bool ignore_case) {	2862 RegExpNode* LoopChoiceNode::FilterASCII(int depth, bool ignore_case) {

(...skipping 3258 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6141 }	6121 }

6142	6122

6143 return compiler.Assemble(&macro_assembler,	6123 return compiler.Assemble(&macro_assembler,

6144 node,	6124 node,

6145 data->capture_count,	6125 data->capture_count,

6146 pattern);	6126 pattern);

6147 }	6127 }

6148	6128

6149	6129

6150 }} // namespace v8::internal	6130 }} // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/ia32/regexp-macro-assembler-ia32.cc ('k') | src/mips/regexp-macro-assembler-mips.cc » ('j') | no next file with comments »