OLD | NEW |
---|---|
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 1663 matching lines...) Loading... | |
1674 bool ascii_subject, | 1674 bool ascii_subject, |
1675 unibrow::uchar* letters) { | 1675 unibrow::uchar* letters) { |
1676 int length = | 1676 int length = |
1677 isolate->jsregexp_uncanonicalize()->get(character, '\0', letters); | 1677 isolate->jsregexp_uncanonicalize()->get(character, '\0', letters); |
1678 // Unibrow returns 0 or 1 for characters where case independence is | 1678 // Unibrow returns 0 or 1 for characters where case independence is |
1679 // trivial. | 1679 // trivial. |
1680 if (length == 0) { | 1680 if (length == 0) { |
1681 letters[0] = character; | 1681 letters[0] = character; |
1682 length = 1; | 1682 length = 1; |
1683 } | 1683 } |
1684 if (!ascii_subject || character <= String::kMaxAsciiCharCode) { | 1684 if (!ascii_subject || character <= String::kMaxOneByteCharCode) { |
1685 return length; | 1685 return length; |
1686 } | 1686 } |
1687 // The standard requires that non-ASCII characters cannot have ASCII | 1687 // The standard requires that non-ASCII characters cannot have ASCII |
1688 // character codes in their equivalence class. | 1688 // character codes in their equivalence class. |
1689 return 0; | 1689 return 0; |
1690 } | 1690 } |
1691 | 1691 |
1692 | 1692 |
1693 static inline bool EmitSimpleCharacter(Isolate* isolate, | 1693 static inline bool EmitSimpleCharacter(Isolate* isolate, |
1694 RegExpCompiler* compiler, | 1694 RegExpCompiler* compiler, |
(...skipping 30 matching lines...) Loading... | |
1725 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 1725 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
1726 int length = GetCaseIndependentLetters(isolate, c, ascii, chars); | 1726 int length = GetCaseIndependentLetters(isolate, c, ascii, chars); |
1727 if (length < 1) { | 1727 if (length < 1) { |
1728 // This can't match. Must be an ASCII subject and a non-ASCII character. | 1728 // This can't match. Must be an ASCII subject and a non-ASCII character. |
1729 // We do not need to do anything since the ASCII pass already handled this. | 1729 // We do not need to do anything since the ASCII pass already handled this. |
1730 return false; // Bounds not checked. | 1730 return false; // Bounds not checked. |
1731 } | 1731 } |
1732 bool checked = false; | 1732 bool checked = false; |
1733 // We handle the length > 1 case in a later pass. | 1733 // We handle the length > 1 case in a later pass. |
1734 if (length == 1) { | 1734 if (length == 1) { |
1735 if (ascii && c > String::kMaxAsciiCharCodeU) { | 1735 if (ascii && c > String::kMaxOneByteCharCodeU) { |
1736 // Can't match - see above. | 1736 // Can't match - see above. |
1737 return false; // Bounds not checked. | 1737 return false; // Bounds not checked. |
1738 } | 1738 } |
1739 if (!preloaded) { | 1739 if (!preloaded) { |
1740 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); | 1740 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); |
1741 checked = check; | 1741 checked = check; |
1742 } | 1742 } |
1743 macro_assembler->CheckNotCharacter(c, on_failure); | 1743 macro_assembler->CheckNotCharacter(c, on_failure); |
1744 } | 1744 } |
1745 return checked; | 1745 return checked; |
1746 } | 1746 } |
1747 | 1747 |
1748 | 1748 |
1749 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, | 1749 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, |
1750 bool ascii, | 1750 bool ascii, |
1751 uc16 c1, | 1751 uc16 c1, |
1752 uc16 c2, | 1752 uc16 c2, |
1753 Label* on_failure) { | 1753 Label* on_failure) { |
1754 uc16 char_mask; | 1754 uc16 char_mask; |
1755 if (ascii) { | 1755 if (ascii) { |
1756 char_mask = String::kMaxAsciiCharCode; | 1756 char_mask = String::kMaxOneByteCharCode; |
1757 } else { | 1757 } else { |
1758 char_mask = String::kMaxUtf16CodeUnit; | 1758 char_mask = String::kMaxUtf16CodeUnit; |
1759 } | 1759 } |
1760 uc16 exor = c1 ^ c2; | 1760 uc16 exor = c1 ^ c2; |
1761 // Check whether exor has only one bit set. | 1761 // Check whether exor has only one bit set. |
1762 if (((exor - 1) & exor) == 0) { | 1762 if (((exor - 1) & exor) == 0) { |
1763 // If c1 and c2 differ only by one bit. | 1763 // If c1 and c2 differ only by one bit. |
1764 // Ecma262UnCanonicalize always gives the highest number last. | 1764 // Ecma262UnCanonicalize always gives the highest number last. |
1765 ASSERT(c2 > c1); | 1765 ASSERT(c2 > c1); |
1766 uc16 mask = char_mask ^ exor; | 1766 uc16 mask = char_mask ^ exor; |
(...skipping 233 matching lines...) Loading... | |
2000 // encoding space can be quickly tested with a table lookup, so we don't | 2000 // encoding space can be quickly tested with a table lookup, so we don't |
2001 // wish to do binary chop search at a smaller granularity than that. A | 2001 // wish to do binary chop search at a smaller granularity than that. A |
2002 // 128-character space can take up a lot of space in the ranges array if, | 2002 // 128-character space can take up a lot of space in the ranges array if, |
2003 // for example, we only want to match every second character (eg. the lower | 2003 // for example, we only want to match every second character (eg. the lower |
2004 // case characters on some Unicode pages). | 2004 // case characters on some Unicode pages). |
2005 int binary_chop_index = (end_index + start_index) / 2; | 2005 int binary_chop_index = (end_index + start_index) / 2; |
2006 // The first test ensures that we get to the code that handles the ASCII | 2006 // The first test ensures that we get to the code that handles the ASCII |
2007 // range with a single not-taken branch, speeding up this important | 2007 // range with a single not-taken branch, speeding up this important |
2008 // character range (even non-ASCII charset-based text has spaces and | 2008 // character range (even non-ASCII charset-based text has spaces and |
2009 // punctuation). | 2009 // punctuation). |
2010 if (*border - 1 > String::kMaxAsciiCharCode && // ASCII case. | 2010 if (*border - 1 > String::kMaxOneByteCharCode && // ASCII case. |
Yang
2013/01/07 16:15:07
Change comment.
| |
2011 end_index - start_index > (*new_start_index - start_index) * 2 && | 2011 end_index - start_index > (*new_start_index - start_index) * 2 && |
2012 last - first > kSize * 2 && | 2012 last - first > kSize * 2 && |
2013 binary_chop_index > *new_start_index && | 2013 binary_chop_index > *new_start_index && |
2014 ranges->at(binary_chop_index) >= first + 2 * kSize) { | 2014 ranges->at(binary_chop_index) >= first + 2 * kSize) { |
2015 int scan_forward_for_section_border = binary_chop_index;; | 2015 int scan_forward_for_section_border = binary_chop_index;; |
2016 int new_border = (ranges->at(binary_chop_index) | kMask) + 1; | 2016 int new_border = (ranges->at(binary_chop_index) | kMask) + 1; |
2017 | 2017 |
2018 while (scan_forward_for_section_border < end_index) { | 2018 while (scan_forward_for_section_border < end_index) { |
2019 if (ranges->at(scan_forward_for_section_border) > new_border) { | 2019 if (ranges->at(scan_forward_for_section_border) > new_border) { |
2020 *new_start_index = scan_forward_for_section_border; | 2020 *new_start_index = scan_forward_for_section_border; |
(...skipping 170 matching lines...) Loading... | |
2191 max_char, | 2191 max_char, |
2192 &dummy, | 2192 &dummy, |
2193 flip ? odd_label : even_label, | 2193 flip ? odd_label : even_label, |
2194 flip ? even_label : odd_label); | 2194 flip ? even_label : odd_label); |
2195 } | 2195 } |
2196 } | 2196 } |
2197 | 2197 |
2198 | 2198 |
2199 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, | 2199 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, |
2200 RegExpCharacterClass* cc, | 2200 RegExpCharacterClass* cc, |
2201 bool ascii, | 2201 bool ascii, |
Yang
2013/01/07 16:15:07
Changing the parameter name would make sense.
| |
2202 Label* on_failure, | 2202 Label* on_failure, |
2203 int cp_offset, | 2203 int cp_offset, |
2204 bool check_offset, | 2204 bool check_offset, |
2205 bool preloaded, | 2205 bool preloaded, |
2206 Zone* zone) { | 2206 Zone* zone) { |
2207 ZoneList<CharacterRange>* ranges = cc->ranges(zone); | 2207 ZoneList<CharacterRange>* ranges = cc->ranges(zone); |
2208 if (!CharacterRange::IsCanonical(ranges)) { | 2208 if (!CharacterRange::IsCanonical(ranges)) { |
2209 CharacterRange::Canonicalize(ranges); | 2209 CharacterRange::Canonicalize(ranges); |
2210 } | 2210 } |
2211 | 2211 |
2212 int max_char; | 2212 int max_char; |
2213 if (ascii) { | 2213 if (ascii) { |
2214 max_char = String::kMaxAsciiCharCode; | 2214 max_char = String::kMaxOneByteCharCode; |
2215 } else { | 2215 } else { |
2216 max_char = String::kMaxUtf16CodeUnit; | 2216 max_char = String::kMaxUtf16CodeUnit; |
2217 } | 2217 } |
2218 | 2218 |
2219 int range_count = ranges->length(); | 2219 int range_count = ranges->length(); |
2220 | 2220 |
2221 int last_valid_range = range_count - 1; | 2221 int last_valid_range = range_count - 1; |
2222 while (last_valid_range >= 0) { | 2222 while (last_valid_range >= 0) { |
2223 CharacterRange& range = ranges->at(last_valid_range); | 2223 CharacterRange& range = ranges->at(last_valid_range); |
2224 if (range.from() <= max_char) { | 2224 if (range.from() <= max_char) { |
(...skipping 277 matching lines...) Loading... | |
2502 static inline uint32_t SmearBitsRight(uint32_t v) { | 2502 static inline uint32_t SmearBitsRight(uint32_t v) { |
2503 v |= v >> 1; | 2503 v |= v >> 1; |
2504 v |= v >> 2; | 2504 v |= v >> 2; |
2505 v |= v >> 4; | 2505 v |= v >> 4; |
2506 v |= v >> 8; | 2506 v |= v >> 8; |
2507 v |= v >> 16; | 2507 v |= v >> 16; |
2508 return v; | 2508 return v; |
2509 } | 2509 } |
2510 | 2510 |
2511 | 2511 |
2512 bool QuickCheckDetails::Rationalize(bool asc) { | 2512 bool QuickCheckDetails::Rationalize(bool asc) { |
Yang
2013/01/07 16:15:07
Ditto (parameter name).
| |
2513 bool found_useful_op = false; | 2513 bool found_useful_op = false; |
2514 uint32_t char_mask; | 2514 uint32_t char_mask; |
2515 if (asc) { | 2515 if (asc) { |
2516 char_mask = String::kMaxAsciiCharCode; | 2516 char_mask = String::kMaxOneByteCharCode; |
2517 } else { | 2517 } else { |
2518 char_mask = String::kMaxUtf16CodeUnit; | 2518 char_mask = String::kMaxUtf16CodeUnit; |
2519 } | 2519 } |
2520 mask_ = 0; | 2520 mask_ = 0; |
2521 value_ = 0; | 2521 value_ = 0; |
2522 int char_shift = 0; | 2522 int char_shift = 0; |
2523 for (int i = 0; i < characters_; i++) { | 2523 for (int i = 0; i < characters_; i++) { |
2524 Position* pos = &positions_[i]; | 2524 Position* pos = &positions_[i]; |
2525 if ((pos->mask & String::kMaxAsciiCharCode) != 0) { | 2525 if ((pos->mask & String::kMaxOneByteCharCode) != 0) { |
2526 found_useful_op = true; | 2526 found_useful_op = true; |
2527 } | 2527 } |
2528 mask_ |= (pos->mask & char_mask) << char_shift; | 2528 mask_ |= (pos->mask & char_mask) << char_shift; |
2529 value_ |= (pos->value & char_mask) << char_shift; | 2529 value_ |= (pos->value & char_mask) << char_shift; |
2530 char_shift += asc ? 8 : 16; | 2530 char_shift += asc ? 8 : 16; |
2531 } | 2531 } |
2532 return found_useful_op; | 2532 return found_useful_op; |
2533 } | 2533 } |
2534 | 2534 |
2535 | 2535 |
(...skipping 21 matching lines...) Loading... | |
2557 details->characters()); | 2557 details->characters()); |
2558 } | 2558 } |
2559 | 2559 |
2560 | 2560 |
2561 bool need_mask = true; | 2561 bool need_mask = true; |
2562 | 2562 |
2563 if (details->characters() == 1) { | 2563 if (details->characters() == 1) { |
2564 // If number of characters preloaded is 1 then we used a byte or 16 bit | 2564 // If number of characters preloaded is 1 then we used a byte or 16 bit |
2565 // load so the value is already masked down. | 2565 // load so the value is already masked down. |
2566 uint32_t char_mask; | 2566 uint32_t char_mask; |
2567 if (compiler->ascii()) { | 2567 if (compiler->ascii()) { |
Yang
2013/01/07 16:15:07
Maybe onebyte() instead of ascii()?
| |
2568 char_mask = String::kMaxAsciiCharCode; | 2568 char_mask = String::kMaxOneByteCharCode; |
2569 } else { | 2569 } else { |
2570 char_mask = String::kMaxUtf16CodeUnit; | 2570 char_mask = String::kMaxUtf16CodeUnit; |
2571 } | 2571 } |
2572 if ((mask & char_mask) == char_mask) need_mask = false; | 2572 if ((mask & char_mask) == char_mask) need_mask = false; |
2573 mask &= char_mask; | 2573 mask &= char_mask; |
2574 } else { | 2574 } else { |
2575 // For 2-character preloads in ASCII mode or 1-character preloads in | 2575 // For 2-character preloads in ASCII mode or 1-character preloads in |
2576 // TWO_BYTE mode we also use a 16 bit load with zero extend. | 2576 // TWO_BYTE mode we also use a 16 bit load with zero extend. |
2577 if (details->characters() == 2 && compiler->ascii()) { | 2577 if (details->characters() == 2 && compiler->ascii()) { |
2578 if ((mask & 0x7f7f) == 0x7f7f) need_mask = false; | 2578 if ((mask & 0xffff) == 0xffff) need_mask = false; |
2579 } else if (details->characters() == 1 && !compiler->ascii()) { | 2579 } else if (details->characters() == 1 && !compiler->ascii()) { |
2580 if ((mask & 0xffff) == 0xffff) need_mask = false; | 2580 if ((mask & 0xffff) == 0xffff) need_mask = false; |
2581 } else { | 2581 } else { |
2582 if (mask == 0xffffffff) need_mask = false; | 2582 if (mask == 0xffffffff) need_mask = false; |
2583 } | 2583 } |
2584 } | 2584 } |
2585 | 2585 |
2586 if (fall_through_on_failure) { | 2586 if (fall_through_on_failure) { |
2587 if (need_mask) { | 2587 if (need_mask) { |
2588 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success); | 2588 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success); |
(...skipping 21 matching lines...) Loading... | |
2610 // generating a quick check. | 2610 // generating a quick check. |
2611 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, | 2611 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, |
2612 RegExpCompiler* compiler, | 2612 RegExpCompiler* compiler, |
2613 int characters_filled_in, | 2613 int characters_filled_in, |
2614 bool not_at_start) { | 2614 bool not_at_start) { |
2615 Isolate* isolate = Isolate::Current(); | 2615 Isolate* isolate = Isolate::Current(); |
2616 ASSERT(characters_filled_in < details->characters()); | 2616 ASSERT(characters_filled_in < details->characters()); |
2617 int characters = details->characters(); | 2617 int characters = details->characters(); |
2618 int char_mask; | 2618 int char_mask; |
2619 if (compiler->ascii()) { | 2619 if (compiler->ascii()) { |
2620 char_mask = String::kMaxAsciiCharCode; | 2620 char_mask = String::kMaxOneByteCharCode; |
2621 } else { | 2621 } else { |
2622 char_mask = String::kMaxUtf16CodeUnit; | 2622 char_mask = String::kMaxUtf16CodeUnit; |
2623 } | 2623 } |
2624 for (int k = 0; k < elms_->length(); k++) { | 2624 for (int k = 0; k < elms_->length(); k++) { |
2625 TextElement elm = elms_->at(k); | 2625 TextElement elm = elms_->at(k); |
2626 if (elm.type == TextElement::ATOM) { | 2626 if (elm.type == TextElement::ATOM) { |
2627 Vector<const uc16> quarks = elm.data.u_atom->data(); | 2627 Vector<const uc16> quarks = elm.data.u_atom->data(); |
2628 for (int i = 0; i < characters && i < quarks.length(); i++) { | 2628 for (int i = 0; i < characters && i < quarks.length(); i++) { |
2629 QuickCheckDetails::Position* pos = | 2629 QuickCheckDetails::Position* pos = |
2630 details->positions(characters_filled_in); | 2630 details->positions(characters_filled_in); |
(...skipping 227 matching lines...) Loading... | |
2858 VisitMarker marker(info()); | 2858 VisitMarker marker(info()); |
2859 int element_count = elms_->length(); | 2859 int element_count = elms_->length(); |
2860 for (int i = 0; i < element_count; i++) { | 2860 for (int i = 0; i < element_count; i++) { |
2861 TextElement elm = elms_->at(i); | 2861 TextElement elm = elms_->at(i); |
2862 if (elm.type == TextElement::ATOM) { | 2862 if (elm.type == TextElement::ATOM) { |
2863 Vector<const uc16> quarks = elm.data.u_atom->data(); | 2863 Vector<const uc16> quarks = elm.data.u_atom->data(); |
2864 for (int j = 0; j < quarks.length(); j++) { | 2864 for (int j = 0; j < quarks.length(); j++) { |
2865 // We don't need special handling for case independence | 2865 // We don't need special handling for case independence |
2866 // because of the rule that case independence cannot make | 2866 // because of the rule that case independence cannot make |
2867 // a non-ASCII character match an ASCII character. | 2867 // a non-ASCII character match an ASCII character. |
2868 if (quarks[j] > String::kMaxAsciiCharCode) { | 2868 if (quarks[j] > String::kMaxOneByteCharCode) { |
Yang
2013/01/07 16:15:07
Does the comment still hold true for the Latin1 ch
| |
2869 return set_replacement(NULL); | 2869 return set_replacement(NULL); |
2870 } | 2870 } |
2871 } | 2871 } |
2872 } else { | 2872 } else { |
2873 ASSERT(elm.type == TextElement::CHAR_CLASS); | 2873 ASSERT(elm.type == TextElement::CHAR_CLASS); |
2874 RegExpCharacterClass* cc = elm.data.u_char_class; | 2874 RegExpCharacterClass* cc = elm.data.u_char_class; |
2875 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); | 2875 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); |
2876 if (!CharacterRange::IsCanonical(ranges)) { | 2876 if (!CharacterRange::IsCanonical(ranges)) { |
2877 CharacterRange::Canonicalize(ranges); | 2877 CharacterRange::Canonicalize(ranges); |
2878 } | 2878 } |
2879 // Now they are in order so we only need to look at the first. | 2879 // Now they are in order so we only need to look at the first. |
2880 int range_count = ranges->length(); | 2880 int range_count = ranges->length(); |
2881 if (cc->is_negated()) { | 2881 if (cc->is_negated()) { |
2882 if (range_count != 0 && | 2882 if (range_count != 0 && |
2883 ranges->at(0).from() == 0 && | 2883 ranges->at(0).from() == 0 && |
2884 ranges->at(0).to() >= String::kMaxAsciiCharCode) { | 2884 ranges->at(0).to() >= String::kMaxOneByteCharCode) { |
2885 return set_replacement(NULL); | 2885 return set_replacement(NULL); |
2886 } | 2886 } |
2887 } else { | 2887 } else { |
2888 if (range_count == 0 || | 2888 if (range_count == 0 || |
2889 ranges->at(0).from() > String::kMaxAsciiCharCode) { | 2889 ranges->at(0).from() > String::kMaxOneByteCharCode) { |
2890 return set_replacement(NULL); | 2890 return set_replacement(NULL); |
2891 } | 2891 } |
2892 } | 2892 } |
2893 } | 2893 } |
2894 } | 2894 } |
2895 return FilterSuccessor(depth - 1); | 2895 return FilterSuccessor(depth - 1); |
2896 } | 2896 } |
2897 | 2897 |
2898 | 2898 |
2899 RegExpNode* LoopChoiceNode::FilterASCII(int depth) { | 2899 RegExpNode* LoopChoiceNode::FilterASCII(int depth) { |
(...skipping 392 matching lines...) Loading... | |
3292 int cp_offset = trace->cp_offset() + elm.cp_offset; | 3292 int cp_offset = trace->cp_offset() + elm.cp_offset; |
3293 if (elm.type == TextElement::ATOM) { | 3293 if (elm.type == TextElement::ATOM) { |
3294 Vector<const uc16> quarks = elm.data.u_atom->data(); | 3294 Vector<const uc16> quarks = elm.data.u_atom->data(); |
3295 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { | 3295 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { |
3296 if (first_element_checked && i == 0 && j == 0) continue; | 3296 if (first_element_checked && i == 0 && j == 0) continue; |
3297 if (DeterminedAlready(quick_check, elm.cp_offset + j)) continue; | 3297 if (DeterminedAlready(quick_check, elm.cp_offset + j)) continue; |
3298 EmitCharacterFunction* emit_function = NULL; | 3298 EmitCharacterFunction* emit_function = NULL; |
3299 switch (pass) { | 3299 switch (pass) { |
3300 case NON_ASCII_MATCH: | 3300 case NON_ASCII_MATCH: |
3301 ASSERT(ascii); | 3301 ASSERT(ascii); |
3302 if (quarks[j] > String::kMaxAsciiCharCode) { | 3302 if (quarks[j] > String::kMaxOneByteCharCode) { |
3303 assembler->GoTo(backtrack); | 3303 assembler->GoTo(backtrack); |
3304 return; | 3304 return; |
3305 } | 3305 } |
3306 break; | 3306 break; |
3307 case NON_LETTER_CHARACTER_MATCH: | 3307 case NON_LETTER_CHARACTER_MATCH: |
3308 emit_function = &EmitAtomNonLetter; | 3308 emit_function = &EmitAtomNonLetter; |
3309 break; | 3309 break; |
3310 case SIMPLE_CHARACTER_MATCH: | 3310 case SIMPLE_CHARACTER_MATCH: |
3311 emit_function = &EmitSimpleCharacter; | 3311 emit_function = &EmitSimpleCharacter; |
3312 break; | 3312 break; |
(...skipping 178 matching lines...) Loading... | |
3491 ZoneList<CharacterRange>* ranges = node->ranges(zone()); | 3491 ZoneList<CharacterRange>* ranges = node->ranges(zone()); |
3492 if (!CharacterRange::IsCanonical(ranges)) { | 3492 if (!CharacterRange::IsCanonical(ranges)) { |
3493 CharacterRange::Canonicalize(ranges); | 3493 CharacterRange::Canonicalize(ranges); |
3494 } | 3494 } |
3495 if (node->is_negated()) { | 3495 if (node->is_negated()) { |
3496 return ranges->length() == 0 ? on_success() : NULL; | 3496 return ranges->length() == 0 ? on_success() : NULL; |
3497 } | 3497 } |
3498 if (ranges->length() != 1) return NULL; | 3498 if (ranges->length() != 1) return NULL; |
3499 uint32_t max_char; | 3499 uint32_t max_char; |
3500 if (compiler->ascii()) { | 3500 if (compiler->ascii()) { |
3501 max_char = String::kMaxAsciiCharCode; | 3501 max_char = String::kMaxOneByteCharCode; |
3502 } else { | 3502 } else { |
3503 max_char = String::kMaxUtf16CodeUnit; | 3503 max_char = String::kMaxUtf16CodeUnit; |
3504 } | 3504 } |
3505 return ranges->at(0).IsEverything(max_char) ? on_success() : NULL; | 3505 return ranges->at(0).IsEverything(max_char) ? on_success() : NULL; |
3506 } | 3506 } |
3507 | 3507 |
3508 | 3508 |
3509 // Finds the fixed match length of a sequence of nodes that goes from | 3509 // Finds the fixed match length of a sequence of nodes that goes from |
3510 // this alternative and back to this choice node. If there are variable | 3510 // this alternative and back to this choice node. If there are variable |
3511 // length nodes or other complications in the way then return a sentinel | 3511 // length nodes or other complications in the way then return a sentinel |
(...skipping 179 matching lines...) Loading... | |
3691 for (int i = 0; i < kMapSize; i++) map_->at(i) = true; | 3691 for (int i = 0; i < kMapSize; i++) map_->at(i) = true; |
3692 } | 3692 } |
3693 } | 3693 } |
3694 | 3694 |
3695 | 3695 |
3696 BoyerMooreLookahead::BoyerMooreLookahead( | 3696 BoyerMooreLookahead::BoyerMooreLookahead( |
3697 int length, RegExpCompiler* compiler, Zone* zone) | 3697 int length, RegExpCompiler* compiler, Zone* zone) |
3698 : length_(length), | 3698 : length_(length), |
3699 compiler_(compiler) { | 3699 compiler_(compiler) { |
3700 if (compiler->ascii()) { | 3700 if (compiler->ascii()) { |
3701 max_char_ = String::kMaxAsciiCharCode; | 3701 max_char_ = String::kMaxOneByteCharCode; |
3702 } else { | 3702 } else { |
3703 max_char_ = String::kMaxUtf16CodeUnit; | 3703 max_char_ = String::kMaxUtf16CodeUnit; |
3704 } | 3704 } |
3705 bitmaps_ = new(zone) ZoneList<BoyerMoorePositionInfo*>(length, zone); | 3705 bitmaps_ = new(zone) ZoneList<BoyerMoorePositionInfo*>(length, zone); |
3706 for (int i = 0; i < length; i++) { | 3706 for (int i = 0; i < length; i++) { |
3707 bitmaps_->Add(new(zone) BoyerMoorePositionInfo(zone), zone); | 3707 bitmaps_->Add(new(zone) BoyerMoorePositionInfo(zone), zone); |
3708 } | 3708 } |
3709 } | 3709 } |
3710 | 3710 |
3711 | 3711 |
(...skipping 1618 matching lines...) Loading... | |
5330 } | 5330 } |
5331 | 5331 |
5332 | 5332 |
5333 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, | 5333 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, |
5334 bool is_ascii, | 5334 bool is_ascii, |
5335 Zone* zone) { | 5335 Zone* zone) { |
5336 Isolate* isolate = Isolate::Current(); | 5336 Isolate* isolate = Isolate::Current(); |
5337 uc16 bottom = from(); | 5337 uc16 bottom = from(); |
5338 uc16 top = to(); | 5338 uc16 top = to(); |
5339 if (is_ascii) { | 5339 if (is_ascii) { |
5340 if (bottom > String::kMaxAsciiCharCode) return; | 5340 if (bottom > String::kMaxOneByteCharCode) return; |
5341 if (top > String::kMaxAsciiCharCode) top = String::kMaxAsciiCharCode; | 5341 if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode; |
5342 } | 5342 } |
5343 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 5343 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
5344 if (top == bottom) { | 5344 if (top == bottom) { |
5345 // If this is a singleton we just expand the one character. | 5345 // If this is a singleton we just expand the one character. |
5346 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars); | 5346 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars); |
5347 for (int i = 0; i < length; i++) { | 5347 for (int i = 0; i < length; i++) { |
5348 uc32 chr = chars[i]; | 5348 uc32 chr = chars[i]; |
5349 if (chr != bottom) { | 5349 if (chr != bottom) { |
5350 ranges->Add(CharacterRange::Singleton(chars[i]), zone); | 5350 ranges->Add(CharacterRange::Singleton(chars[i]), zone); |
5351 } | 5351 } |
(...skipping 526 matching lines...) Loading... | |
5878 if (offset >= bm->length()) { | 5878 if (offset >= bm->length()) { |
5879 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 5879 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
5880 return; | 5880 return; |
5881 } | 5881 } |
5882 uc16 character = atom->data()[j]; | 5882 uc16 character = atom->data()[j]; |
5883 if (bm->compiler()->ignore_case()) { | 5883 if (bm->compiler()->ignore_case()) { |
5884 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 5884 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
5885 int length = GetCaseIndependentLetters( | 5885 int length = GetCaseIndependentLetters( |
5886 ISOLATE, | 5886 ISOLATE, |
5887 character, | 5887 character, |
5888 bm->max_char() == String::kMaxAsciiCharCode, | 5888 bm->max_char() == String::kMaxOneByteCharCode, |
5889 chars); | 5889 chars); |
5890 for (int j = 0; j < length; j++) { | 5890 for (int j = 0; j < length; j++) { |
5891 bm->Set(offset, chars[j]); | 5891 bm->Set(offset, chars[j]); |
5892 } | 5892 } |
5893 } else { | 5893 } else { |
5894 if (character <= max_char) bm->Set(offset, character); | 5894 if (character <= max_char) bm->Set(offset, character); |
5895 } | 5895 } |
5896 } | 5896 } |
5897 } else { | 5897 } else { |
5898 ASSERT(text.type == TextElement::CHAR_CLASS); | 5898 ASSERT(text.type == TextElement::CHAR_CLASS); |
(...skipping 260 matching lines...) Loading... | |
6159 } | 6159 } |
6160 | 6160 |
6161 return compiler.Assemble(¯o_assembler, | 6161 return compiler.Assemble(¯o_assembler, |
6162 node, | 6162 node, |
6163 data->capture_count, | 6163 data->capture_count, |
6164 pattern); | 6164 pattern); |
6165 } | 6165 } |
6166 | 6166 |
6167 | 6167 |
6168 }} // namespace v8::internal | 6168 }} // namespace v8::internal |
OLD | NEW |