src/jsregexp.cc - Issue 11759008: Introduce ENABLE_LATIN_1 compile flag

Side by Side Diff: src/jsregexp.cc

Issue 11759008: Introduce ENABLE_LATIN_1 compile flag (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: a bunch of sign conversions Created 7 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2012 the V8 project authors. All rights reserved.	1 // Copyright 2012 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 1663 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1674 bool ascii_subject,	1674 bool ascii_subject,

1675 unibrow::uchar* letters) {	1675 unibrow::uchar* letters) {

1676 int length =	1676 int length =

1677 isolate->jsregexp_uncanonicalize()->get(character, '\0', letters);	1677 isolate->jsregexp_uncanonicalize()->get(character, '\0', letters);

1678 // Unibrow returns 0 or 1 for characters where case independence is	1678 // Unibrow returns 0 or 1 for characters where case independence is

1679 // trivial.	1679 // trivial.

1680 if (length == 0) {	1680 if (length == 0) {

1681 letters[0] = character;	1681 letters[0] = character;

1682 length = 1;	1682 length = 1;

1683 }	1683 }

1684 if (!ascii_subject \|\| character <= String::kMaxAsciiCharCode) {	1684 if (!ascii_subject \|\| character <= String::kMaxOneByteCharCode) {

1685 return length;	1685 return length;

1686 }	1686 }

1687 // The standard requires that non-ASCII characters cannot have ASCII	1687 // The standard requires that non-ASCII characters cannot have ASCII

1688 // character codes in their equivalence class.	1688 // character codes in their equivalence class.

1689 return 0;	1689 return 0;

1690 }	1690 }

1691	1691

1692	1692

1693 static inline bool EmitSimpleCharacter(Isolate* isolate,	1693 static inline bool EmitSimpleCharacter(Isolate* isolate,

1694 RegExpCompiler* compiler,	1694 RegExpCompiler* compiler,

(...skipping 30 matching lines...) Expand all Loading...
1725 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];	1725 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

1726 int length = GetCaseIndependentLetters(isolate, c, ascii, chars);	1726 int length = GetCaseIndependentLetters(isolate, c, ascii, chars);

1727 if (length < 1) {	1727 if (length < 1) {

1728 // This can't match. Must be an ASCII subject and a non-ASCII character.	1728 // This can't match. Must be an ASCII subject and a non-ASCII character.

1729 // We do not need to do anything since the ASCII pass already handled this.	1729 // We do not need to do anything since the ASCII pass already handled this.

1730 return false; // Bounds not checked.	1730 return false; // Bounds not checked.

1731 }	1731 }

1732 bool checked = false;	1732 bool checked = false;

1733 // We handle the length > 1 case in a later pass.	1733 // We handle the length > 1 case in a later pass.

1734 if (length == 1) {	1734 if (length == 1) {

1735 if (ascii && c > String::kMaxAsciiCharCodeU) {	1735 if (ascii && c > String::kMaxOneByteCharCodeU) {

1736 // Can't match - see above.	1736 // Can't match - see above.

1737 return false; // Bounds not checked.	1737 return false; // Bounds not checked.

1738 }	1738 }

1739 if (!preloaded) {	1739 if (!preloaded) {

1740 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);	1740 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);

1741 checked = check;	1741 checked = check;

1742 }	1742 }

1743 macro_assembler->CheckNotCharacter(c, on_failure);	1743 macro_assembler->CheckNotCharacter(c, on_failure);

1744 }	1744 }

1745 return checked;	1745 return checked;

1746 }	1746 }

1747	1747

1748	1748

1749 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,	1749 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,

1750 bool ascii,	1750 bool ascii,

1751 uc16 c1,	1751 uc16 c1,

1752 uc16 c2,	1752 uc16 c2,

1753 Label* on_failure) {	1753 Label* on_failure) {

1754 uc16 char_mask;	1754 uc16 char_mask;

1755 if (ascii) {	1755 if (ascii) {

1756 char_mask = String::kMaxAsciiCharCode;	1756 char_mask = String::kMaxOneByteCharCode;

1757 } else {	1757 } else {

1758 char_mask = String::kMaxUtf16CodeUnit;	1758 char_mask = String::kMaxUtf16CodeUnit;

1759 }	1759 }

1760 uc16 exor = c1 ^ c2;	1760 uc16 exor = c1 ^ c2;

1761 // Check whether exor has only one bit set.	1761 // Check whether exor has only one bit set.

1762 if (((exor - 1) & exor) == 0) {	1762 if (((exor - 1) & exor) == 0) {

1763 // If c1 and c2 differ only by one bit.	1763 // If c1 and c2 differ only by one bit.

1764 // Ecma262UnCanonicalize always gives the highest number last.	1764 // Ecma262UnCanonicalize always gives the highest number last.

1765 ASSERT(c2 > c1);	1765 ASSERT(c2 > c1);

1766 uc16 mask = char_mask ^ exor;	1766 uc16 mask = char_mask ^ exor;

(...skipping 233 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2000 // encoding space can be quickly tested with a table lookup, so we don't	2000 // encoding space can be quickly tested with a table lookup, so we don't

2001 // wish to do binary chop search at a smaller granularity than that. A	2001 // wish to do binary chop search at a smaller granularity than that. A

2002 // 128-character space can take up a lot of space in the ranges array if,	2002 // 128-character space can take up a lot of space in the ranges array if,

2003 // for example, we only want to match every second character (eg. the lower	2003 // for example, we only want to match every second character (eg. the lower

2004 // case characters on some Unicode pages).	2004 // case characters on some Unicode pages).

2005 int binary_chop_index = (end_index + start_index) / 2;	2005 int binary_chop_index = (end_index + start_index) / 2;

2006 // The first test ensures that we get to the code that handles the ASCII	2006 // The first test ensures that we get to the code that handles the ASCII

2007 // range with a single not-taken branch, speeding up this important	2007 // range with a single not-taken branch, speeding up this important

2008 // character range (even non-ASCII charset-based text has spaces and	2008 // character range (even non-ASCII charset-based text has spaces and

2009 // punctuation).	2009 // punctuation).

2010 if (*border - 1 > String::kMaxAsciiCharCode && // ASCII case.	2010 if (*border - 1 > String::kMaxOneByteCharCode && // ASCII case.
	Yang 2013/01/07 16:15:07 Change comment. Change comment.
2011 end_index - start_index > (new_start_index - start_index) 2 &&	2011 end_index - start_index > (new_start_index - start_index) 2 &&

2012 last - first > kSize * 2 &&	2012 last - first > kSize * 2 &&

2013 binary_chop_index > *new_start_index &&	2013 binary_chop_index > *new_start_index &&

2014 ranges->at(binary_chop_index) >= first + 2 * kSize) {	2014 ranges->at(binary_chop_index) >= first + 2 * kSize) {

2015 int scan_forward_for_section_border = binary_chop_index;;	2015 int scan_forward_for_section_border = binary_chop_index;;

2016 int new_border = (ranges->at(binary_chop_index) \| kMask) + 1;	2016 int new_border = (ranges->at(binary_chop_index) \| kMask) + 1;

2017	2017

2018 while (scan_forward_for_section_border < end_index) {	2018 while (scan_forward_for_section_border < end_index) {

2019 if (ranges->at(scan_forward_for_section_border) > new_border) {	2019 if (ranges->at(scan_forward_for_section_border) > new_border) {

2020 *new_start_index = scan_forward_for_section_border;	2020 *new_start_index = scan_forward_for_section_border;

(...skipping 170 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2191 max_char,	2191 max_char,

2192 &dummy,	2192 &dummy,

2193 flip ? odd_label : even_label,	2193 flip ? odd_label : even_label,

2194 flip ? even_label : odd_label);	2194 flip ? even_label : odd_label);

2195 }	2195 }

2196 }	2196 }

2197	2197

2198	2198

2199 static void EmitCharClass(RegExpMacroAssembler* macro_assembler,	2199 static void EmitCharClass(RegExpMacroAssembler* macro_assembler,

2200 RegExpCharacterClass* cc,	2200 RegExpCharacterClass* cc,

2201 bool ascii,	2201 bool ascii,
	Yang 2013/01/07 16:15:07 Changing the parameter name would make sense. Changing the parameter name would make sense.
2202 Label* on_failure,	2202 Label* on_failure,

2203 int cp_offset,	2203 int cp_offset,

2204 bool check_offset,	2204 bool check_offset,

2205 bool preloaded,	2205 bool preloaded,

2206 Zone* zone) {	2206 Zone* zone) {

2207 ZoneList<CharacterRange>* ranges = cc->ranges(zone);	2207 ZoneList<CharacterRange>* ranges = cc->ranges(zone);

2208 if (!CharacterRange::IsCanonical(ranges)) {	2208 if (!CharacterRange::IsCanonical(ranges)) {

2209 CharacterRange::Canonicalize(ranges);	2209 CharacterRange::Canonicalize(ranges);

2210 }	2210 }

2211	2211

2212 int max_char;	2212 int max_char;

2213 if (ascii) {	2213 if (ascii) {

2214 max_char = String::kMaxAsciiCharCode;	2214 max_char = String::kMaxOneByteCharCode;

2215 } else {	2215 } else {

2216 max_char = String::kMaxUtf16CodeUnit;	2216 max_char = String::kMaxUtf16CodeUnit;

2217 }	2217 }

2218	2218

2219 int range_count = ranges->length();	2219 int range_count = ranges->length();

2220	2220

2221 int last_valid_range = range_count - 1;	2221 int last_valid_range = range_count - 1;

2222 while (last_valid_range >= 0) {	2222 while (last_valid_range >= 0) {

2223 CharacterRange& range = ranges->at(last_valid_range);	2223 CharacterRange& range = ranges->at(last_valid_range);

2224 if (range.from() <= max_char) {	2224 if (range.from() <= max_char) {

(...skipping 277 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2502 static inline uint32_t SmearBitsRight(uint32_t v) {	2502 static inline uint32_t SmearBitsRight(uint32_t v) {

2503 v \|= v >> 1;	2503 v \|= v >> 1;

2504 v \|= v >> 2;	2504 v \|= v >> 2;

2505 v \|= v >> 4;	2505 v \|= v >> 4;

2506 v \|= v >> 8;	2506 v \|= v >> 8;

2507 v \|= v >> 16;	2507 v \|= v >> 16;

2508 return v;	2508 return v;

2509 }	2509 }

2510	2510

2511	2511

2512 bool QuickCheckDetails::Rationalize(bool asc) {	2512 bool QuickCheckDetails::Rationalize(bool asc) {
	Yang 2013/01/07 16:15:07 Ditto (parameter name). Ditto (parameter name).
2513 bool found_useful_op = false;	2513 bool found_useful_op = false;

2514 uint32_t char_mask;	2514 uint32_t char_mask;

2515 if (asc) {	2515 if (asc) {

2516 char_mask = String::kMaxAsciiCharCode;	2516 char_mask = String::kMaxOneByteCharCode;

2517 } else {	2517 } else {

2518 char_mask = String::kMaxUtf16CodeUnit;	2518 char_mask = String::kMaxUtf16CodeUnit;

2519 }	2519 }

2520 mask_ = 0;	2520 mask_ = 0;

2521 value_ = 0;	2521 value_ = 0;

2522 int char_shift = 0;	2522 int char_shift = 0;

2523 for (int i = 0; i < characters_; i++) {	2523 for (int i = 0; i < characters_; i++) {

2524 Position* pos = &positions_[i];	2524 Position* pos = &positions_[i];

2525 if ((pos->mask & String::kMaxAsciiCharCode) != 0) {	2525 if ((pos->mask & String::kMaxOneByteCharCode) != 0) {

2526 found_useful_op = true;	2526 found_useful_op = true;

2527 }	2527 }

2528 mask_ \|= (pos->mask & char_mask) << char_shift;	2528 mask_ \|= (pos->mask & char_mask) << char_shift;

2529 value_ \|= (pos->value & char_mask) << char_shift;	2529 value_ \|= (pos->value & char_mask) << char_shift;

2530 char_shift += asc ? 8 : 16;	2530 char_shift += asc ? 8 : 16;

2531 }	2531 }

2532 return found_useful_op;	2532 return found_useful_op;

2533 }	2533 }

2534	2534

2535	2535

(...skipping 21 matching lines...) Expand all Loading...
2557 details->characters());	2557 details->characters());

2558 }	2558 }

2559	2559

2560	2560

2561 bool need_mask = true;	2561 bool need_mask = true;

2562	2562

2563 if (details->characters() == 1) {	2563 if (details->characters() == 1) {

2564 // If number of characters preloaded is 1 then we used a byte or 16 bit	2564 // If number of characters preloaded is 1 then we used a byte or 16 bit

2565 // load so the value is already masked down.	2565 // load so the value is already masked down.

2566 uint32_t char_mask;	2566 uint32_t char_mask;

2567 if (compiler->ascii()) {	2567 if (compiler->ascii()) {
	Yang 2013/01/07 16:15:07 Maybe onebyte() instead of ascii()? Maybe onebyte() instead of ascii()?
2568 char_mask = String::kMaxAsciiCharCode;	2568 char_mask = String::kMaxOneByteCharCode;

2569 } else {	2569 } else {

2570 char_mask = String::kMaxUtf16CodeUnit;	2570 char_mask = String::kMaxUtf16CodeUnit;

2571 }	2571 }

2572 if ((mask & char_mask) == char_mask) need_mask = false;	2572 if ((mask & char_mask) == char_mask) need_mask = false;

2573 mask &= char_mask;	2573 mask &= char_mask;

2574 } else {	2574 } else {

2575 // For 2-character preloads in ASCII mode or 1-character preloads in	2575 // For 2-character preloads in ASCII mode or 1-character preloads in

2576 // TWO_BYTE mode we also use a 16 bit load with zero extend.	2576 // TWO_BYTE mode we also use a 16 bit load with zero extend.

2577 if (details->characters() == 2 && compiler->ascii()) {	2577 if (details->characters() == 2 && compiler->ascii()) {

2578 if ((mask & 0x7f7f) == 0x7f7f) need_mask = false;	2578 if ((mask & 0xffff) == 0xffff) need_mask = false;

2579 } else if (details->characters() == 1 && !compiler->ascii()) {	2579 } else if (details->characters() == 1 && !compiler->ascii()) {

2580 if ((mask & 0xffff) == 0xffff) need_mask = false;	2580 if ((mask & 0xffff) == 0xffff) need_mask = false;

2581 } else {	2581 } else {

2582 if (mask == 0xffffffff) need_mask = false;	2582 if (mask == 0xffffffff) need_mask = false;

2583 }	2583 }

2584 }	2584 }

2585	2585

2586 if (fall_through_on_failure) {	2586 if (fall_through_on_failure) {

2587 if (need_mask) {	2587 if (need_mask) {

2588 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);	2588 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);

(...skipping 21 matching lines...) Expand all Loading...
2610 // generating a quick check.	2610 // generating a quick check.

2611 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,	2611 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,

2612 RegExpCompiler* compiler,	2612 RegExpCompiler* compiler,

2613 int characters_filled_in,	2613 int characters_filled_in,

2614 bool not_at_start) {	2614 bool not_at_start) {

2615 Isolate* isolate = Isolate::Current();	2615 Isolate* isolate = Isolate::Current();

2616 ASSERT(characters_filled_in < details->characters());	2616 ASSERT(characters_filled_in < details->characters());

2617 int characters = details->characters();	2617 int characters = details->characters();

2618 int char_mask;	2618 int char_mask;

2619 if (compiler->ascii()) {	2619 if (compiler->ascii()) {

2620 char_mask = String::kMaxAsciiCharCode;	2620 char_mask = String::kMaxOneByteCharCode;

2621 } else {	2621 } else {

2622 char_mask = String::kMaxUtf16CodeUnit;	2622 char_mask = String::kMaxUtf16CodeUnit;

2623 }	2623 }

2624 for (int k = 0; k < elms_->length(); k++) {	2624 for (int k = 0; k < elms_->length(); k++) {

2625 TextElement elm = elms_->at(k);	2625 TextElement elm = elms_->at(k);

2626 if (elm.type == TextElement::ATOM) {	2626 if (elm.type == TextElement::ATOM) {

2627 Vector<const uc16> quarks = elm.data.u_atom->data();	2627 Vector<const uc16> quarks = elm.data.u_atom->data();

2628 for (int i = 0; i < characters && i < quarks.length(); i++) {	2628 for (int i = 0; i < characters && i < quarks.length(); i++) {

2629 QuickCheckDetails::Position* pos =	2629 QuickCheckDetails::Position* pos =

2630 details->positions(characters_filled_in);	2630 details->positions(characters_filled_in);

(...skipping 227 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2858 VisitMarker marker(info());	2858 VisitMarker marker(info());

2859 int element_count = elms_->length();	2859 int element_count = elms_->length();

2860 for (int i = 0; i < element_count; i++) {	2860 for (int i = 0; i < element_count; i++) {

2861 TextElement elm = elms_->at(i);	2861 TextElement elm = elms_->at(i);

2862 if (elm.type == TextElement::ATOM) {	2862 if (elm.type == TextElement::ATOM) {

2863 Vector<const uc16> quarks = elm.data.u_atom->data();	2863 Vector<const uc16> quarks = elm.data.u_atom->data();

2864 for (int j = 0; j < quarks.length(); j++) {	2864 for (int j = 0; j < quarks.length(); j++) {

2865 // We don't need special handling for case independence	2865 // We don't need special handling for case independence

2866 // because of the rule that case independence cannot make	2866 // because of the rule that case independence cannot make

2867 // a non-ASCII character match an ASCII character.	2867 // a non-ASCII character match an ASCII character.

2868 if (quarks[j] > String::kMaxAsciiCharCode) {	2868 if (quarks[j] > String::kMaxOneByteCharCode) {
	Yang 2013/01/07 16:15:07 Does the comment still hold true for the Latin1 ch Does the comment still hold true for the Latin1 char set? Apparently ÿ (\u00ff) is part of Latin1, but its uppercase Ÿ is is \u0178... Maybe we want to keep FilterASCII the way it is, and use it in combination with kAsciiDataHintTag.
2869 return set_replacement(NULL);	2869 return set_replacement(NULL);

2870 }	2870 }

2871 }	2871 }

2872 } else {	2872 } else {

2873 ASSERT(elm.type == TextElement::CHAR_CLASS);	2873 ASSERT(elm.type == TextElement::CHAR_CLASS);

2874 RegExpCharacterClass* cc = elm.data.u_char_class;	2874 RegExpCharacterClass* cc = elm.data.u_char_class;

2875 ZoneList<CharacterRange>* ranges = cc->ranges(zone());	2875 ZoneList<CharacterRange>* ranges = cc->ranges(zone());

2876 if (!CharacterRange::IsCanonical(ranges)) {	2876 if (!CharacterRange::IsCanonical(ranges)) {

2877 CharacterRange::Canonicalize(ranges);	2877 CharacterRange::Canonicalize(ranges);

2878 }	2878 }

2879 // Now they are in order so we only need to look at the first.	2879 // Now they are in order so we only need to look at the first.

2880 int range_count = ranges->length();	2880 int range_count = ranges->length();

2881 if (cc->is_negated()) {	2881 if (cc->is_negated()) {

2882 if (range_count != 0 &&	2882 if (range_count != 0 &&

2883 ranges->at(0).from() == 0 &&	2883 ranges->at(0).from() == 0 &&

2884 ranges->at(0).to() >= String::kMaxAsciiCharCode) {	2884 ranges->at(0).to() >= String::kMaxOneByteCharCode) {

2885 return set_replacement(NULL);	2885 return set_replacement(NULL);

2886 }	2886 }

2887 } else {	2887 } else {

2888 if (range_count == 0 \|\|	2888 if (range_count == 0 \|\|

2889 ranges->at(0).from() > String::kMaxAsciiCharCode) {	2889 ranges->at(0).from() > String::kMaxOneByteCharCode) {

2890 return set_replacement(NULL);	2890 return set_replacement(NULL);

2891 }	2891 }

2892 }	2892 }

2893 }	2893 }

2894 }	2894 }

2895 return FilterSuccessor(depth - 1);	2895 return FilterSuccessor(depth - 1);

2896 }	2896 }

2897	2897

2898	2898

2899 RegExpNode* LoopChoiceNode::FilterASCII(int depth) {	2899 RegExpNode* LoopChoiceNode::FilterASCII(int depth) {

(...skipping 392 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3292 int cp_offset = trace->cp_offset() + elm.cp_offset;	3292 int cp_offset = trace->cp_offset() + elm.cp_offset;

3293 if (elm.type == TextElement::ATOM) {	3293 if (elm.type == TextElement::ATOM) {

3294 Vector<const uc16> quarks = elm.data.u_atom->data();	3294 Vector<const uc16> quarks = elm.data.u_atom->data();

3295 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {	3295 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {

3296 if (first_element_checked && i == 0 && j == 0) continue;	3296 if (first_element_checked && i == 0 && j == 0) continue;

3297 if (DeterminedAlready(quick_check, elm.cp_offset + j)) continue;	3297 if (DeterminedAlready(quick_check, elm.cp_offset + j)) continue;

3298 EmitCharacterFunction* emit_function = NULL;	3298 EmitCharacterFunction* emit_function = NULL;

3299 switch (pass) {	3299 switch (pass) {

3300 case NON_ASCII_MATCH:	3300 case NON_ASCII_MATCH:

3301 ASSERT(ascii);	3301 ASSERT(ascii);

3302 if (quarks[j] > String::kMaxAsciiCharCode) {	3302 if (quarks[j] > String::kMaxOneByteCharCode) {

3303 assembler->GoTo(backtrack);	3303 assembler->GoTo(backtrack);

3304 return;	3304 return;

3305 }	3305 }

3306 break;	3306 break;

3307 case NON_LETTER_CHARACTER_MATCH:	3307 case NON_LETTER_CHARACTER_MATCH:

3308 emit_function = &EmitAtomNonLetter;	3308 emit_function = &EmitAtomNonLetter;

3309 break;	3309 break;

3310 case SIMPLE_CHARACTER_MATCH:	3310 case SIMPLE_CHARACTER_MATCH:

3311 emit_function = &EmitSimpleCharacter;	3311 emit_function = &EmitSimpleCharacter;

3312 break;	3312 break;

(...skipping 178 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3491 ZoneList<CharacterRange>* ranges = node->ranges(zone());	3491 ZoneList<CharacterRange>* ranges = node->ranges(zone());

3492 if (!CharacterRange::IsCanonical(ranges)) {	3492 if (!CharacterRange::IsCanonical(ranges)) {

3493 CharacterRange::Canonicalize(ranges);	3493 CharacterRange::Canonicalize(ranges);

3494 }	3494 }

3495 if (node->is_negated()) {	3495 if (node->is_negated()) {

3496 return ranges->length() == 0 ? on_success() : NULL;	3496 return ranges->length() == 0 ? on_success() : NULL;

3497 }	3497 }

3498 if (ranges->length() != 1) return NULL;	3498 if (ranges->length() != 1) return NULL;

3499 uint32_t max_char;	3499 uint32_t max_char;

3500 if (compiler->ascii()) {	3500 if (compiler->ascii()) {

3501 max_char = String::kMaxAsciiCharCode;	3501 max_char = String::kMaxOneByteCharCode;

3502 } else {	3502 } else {

3503 max_char = String::kMaxUtf16CodeUnit;	3503 max_char = String::kMaxUtf16CodeUnit;

3504 }	3504 }

3505 return ranges->at(0).IsEverything(max_char) ? on_success() : NULL;	3505 return ranges->at(0).IsEverything(max_char) ? on_success() : NULL;

3506 }	3506 }

3507	3507

3508	3508

3509 // Finds the fixed match length of a sequence of nodes that goes from	3509 // Finds the fixed match length of a sequence of nodes that goes from

3510 // this alternative and back to this choice node. If there are variable	3510 // this alternative and back to this choice node. If there are variable

3511 // length nodes or other complications in the way then return a sentinel	3511 // length nodes or other complications in the way then return a sentinel

(...skipping 179 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3691 for (int i = 0; i < kMapSize; i++) map_->at(i) = true;	3691 for (int i = 0; i < kMapSize; i++) map_->at(i) = true;

3692 }	3692 }

3693 }	3693 }

3694	3694

3695	3695

3696 BoyerMooreLookahead::BoyerMooreLookahead(	3696 BoyerMooreLookahead::BoyerMooreLookahead(

3697 int length, RegExpCompiler* compiler, Zone* zone)	3697 int length, RegExpCompiler* compiler, Zone* zone)

3698 : length_(length),	3698 : length_(length),

3699 compiler_(compiler) {	3699 compiler_(compiler) {

3700 if (compiler->ascii()) {	3700 if (compiler->ascii()) {

3701 max_char_ = String::kMaxAsciiCharCode;	3701 max_char_ = String::kMaxOneByteCharCode;

3702 } else {	3702 } else {

3703 max_char_ = String::kMaxUtf16CodeUnit;	3703 max_char_ = String::kMaxUtf16CodeUnit;

3704 }	3704 }

3705 bitmaps_ = new(zone) ZoneList<BoyerMoorePositionInfo*>(length, zone);	3705 bitmaps_ = new(zone) ZoneList<BoyerMoorePositionInfo*>(length, zone);

3706 for (int i = 0; i < length; i++) {	3706 for (int i = 0; i < length; i++) {

3707 bitmaps_->Add(new(zone) BoyerMoorePositionInfo(zone), zone);	3707 bitmaps_->Add(new(zone) BoyerMoorePositionInfo(zone), zone);

3708 }	3708 }

3709 }	3709 }

3710	3710

3711	3711

(...skipping 1618 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5330 }	5330 }

5331	5331

5332	5332

5333 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,	5333 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,

5334 bool is_ascii,	5334 bool is_ascii,

5335 Zone* zone) {	5335 Zone* zone) {

5336 Isolate* isolate = Isolate::Current();	5336 Isolate* isolate = Isolate::Current();

5337 uc16 bottom = from();	5337 uc16 bottom = from();

5338 uc16 top = to();	5338 uc16 top = to();

5339 if (is_ascii) {	5339 if (is_ascii) {

5340 if (bottom > String::kMaxAsciiCharCode) return;	5340 if (bottom > String::kMaxOneByteCharCode) return;

5341 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode;	5341 if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;

5342 }	5342 }

5343 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];	5343 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

5344 if (top == bottom) {	5344 if (top == bottom) {

5345 // If this is a singleton we just expand the one character.	5345 // If this is a singleton we just expand the one character.

5346 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);	5346 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);

5347 for (int i = 0; i < length; i++) {	5347 for (int i = 0; i < length; i++) {

5348 uc32 chr = chars[i];	5348 uc32 chr = chars[i];

5349 if (chr != bottom) {	5349 if (chr != bottom) {

5350 ranges->Add(CharacterRange::Singleton(chars[i]), zone);	5350 ranges->Add(CharacterRange::Singleton(chars[i]), zone);

5351 }	5351 }

(...skipping 526 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5878 if (offset >= bm->length()) {	5878 if (offset >= bm->length()) {

5879 if (initial_offset == 0) set_bm_info(not_at_start, bm);	5879 if (initial_offset == 0) set_bm_info(not_at_start, bm);

5880 return;	5880 return;

5881 }	5881 }

5882 uc16 character = atom->data()[j];	5882 uc16 character = atom->data()[j];

5883 if (bm->compiler()->ignore_case()) {	5883 if (bm->compiler()->ignore_case()) {

5884 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];	5884 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];

5885 int length = GetCaseIndependentLetters(	5885 int length = GetCaseIndependentLetters(

5886 ISOLATE,	5886 ISOLATE,

5887 character,	5887 character,

5888 bm->max_char() == String::kMaxAsciiCharCode,	5888 bm->max_char() == String::kMaxOneByteCharCode,

5889 chars);	5889 chars);

5890 for (int j = 0; j < length; j++) {	5890 for (int j = 0; j < length; j++) {

5891 bm->Set(offset, chars[j]);	5891 bm->Set(offset, chars[j]);

5892 }	5892 }

5893 } else {	5893 } else {

5894 if (character <= max_char) bm->Set(offset, character);	5894 if (character <= max_char) bm->Set(offset, character);

5895 }	5895 }

5896 }	5896 }

5897 } else {	5897 } else {

5898 ASSERT(text.type == TextElement::CHAR_CLASS);	5898 ASSERT(text.type == TextElement::CHAR_CLASS);

(...skipping 260 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6159 }	6159 }

6160	6160

6161 return compiler.Assemble(&macro_assembler,	6161 return compiler.Assemble(&macro_assembler,

6162 node,	6162 node,

6163 data->capture_count,	6163 data->capture_count,

6164 pattern);	6164 pattern);

6165 }	6165 }

6166	6166

6167	6167

6168 }} // namespace v8::internal	6168 }} // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/json-parser.h ('k') | src/log.cc » ('j') | src/objects.h » ('J')