OLD | NEW |
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 1405 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1416 // This is called as we come into a loop choice node and some other tricky | 1416 // This is called as we come into a loop choice node and some other tricky |
1417 // nodes. It normalises the state of the code generator to ensure we can | 1417 // nodes. It normalises the state of the code generator to ensure we can |
1418 // generate generic code. | 1418 // generate generic code. |
1419 bool GenerationVariant::Flush(RegExpCompiler* compiler, RegExpNode* successor) { | 1419 bool GenerationVariant::Flush(RegExpCompiler* compiler, RegExpNode* successor) { |
1420 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 1420 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
1421 | 1421 |
1422 ASSERT(actions_ != NULL || | 1422 ASSERT(actions_ != NULL || |
1423 cp_offset_ != 0 || | 1423 cp_offset_ != 0 || |
1424 backtrack() != NULL || | 1424 backtrack() != NULL || |
1425 characters_preloaded_ != 0 || | 1425 characters_preloaded_ != 0 || |
1426 quick_check_performed_.characters() != 0); | 1426 quick_check_performed_.characters() != 0 || |
| 1427 bound_checked_up_to_ != 0); |
1427 | 1428 |
1428 if (actions_ == NULL && backtrack() == NULL) { | 1429 if (actions_ == NULL && backtrack() == NULL) { |
1429 // Here we just have some deferred cp advances to fix and we are back to | 1430 // Here we just have some deferred cp advances to fix and we are back to |
1430 // a normal situation. We may also have to forget some information gained | 1431 // a normal situation. We may also have to forget some information gained |
1431 // through a quick check that was already performed. | 1432 // through a quick check that was already performed. |
1432 if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_); | 1433 if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_); |
1433 // Create a new trivial state and generate the node with that. | 1434 // Create a new trivial state and generate the node with that. |
1434 GenerationVariant new_state; | 1435 GenerationVariant new_state; |
1435 return successor->Emit(compiler, &new_state); | 1436 return successor->Emit(compiler, &new_state); |
1436 } | 1437 } |
(...skipping 203 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1640 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); | 1641 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); |
1641 checked = check; | 1642 checked = check; |
1642 } | 1643 } |
1643 macro_assembler->CheckNotCharacter(c, on_failure); | 1644 macro_assembler->CheckNotCharacter(c, on_failure); |
1644 } | 1645 } |
1645 return checked; | 1646 return checked; |
1646 } | 1647 } |
1647 | 1648 |
1648 | 1649 |
1649 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, | 1650 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, |
| 1651 bool ascii, |
1650 uc16 c1, | 1652 uc16 c1, |
1651 uc16 c2, | 1653 uc16 c2, |
1652 Label* on_failure) { | 1654 Label* on_failure) { |
| 1655 uc16 char_mask; |
| 1656 if (ascii) { |
| 1657 char_mask = String::kMaxAsciiCharCode; |
| 1658 } else { |
| 1659 char_mask = String::kMaxUC16CharCode; |
| 1660 } |
1653 uc16 exor = c1 ^ c2; | 1661 uc16 exor = c1 ^ c2; |
1654 // Check whether exor has only one bit set. | 1662 // Check whether exor has only one bit set. |
1655 if (((exor - 1) & exor) == 0) { | 1663 if (((exor - 1) & exor) == 0) { |
1656 // If c1 and c2 differ only by one bit. | 1664 // If c1 and c2 differ only by one bit. |
1657 // Ecma262UnCanonicalize always gives the highest number last. | 1665 // Ecma262UnCanonicalize always gives the highest number last. |
1658 ASSERT(c2 > c1); | 1666 ASSERT(c2 > c1); |
1659 uc16 mask = String::kMaxUC16CharCode ^ exor; | 1667 uc16 mask = char_mask ^ exor; |
1660 macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure); | 1668 macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure); |
1661 return true; | 1669 return true; |
1662 } | 1670 } |
1663 ASSERT(c2 > c1); | 1671 ASSERT(c2 > c1); |
1664 uc16 diff = c2 - c1; | 1672 uc16 diff = c2 - c1; |
1665 if (((diff - 1) & diff) == 0 && c1 >= diff) { | 1673 if (((diff - 1) & diff) == 0 && c1 >= diff) { |
1666 // If the characters differ by 2^n but don't differ by one bit then | 1674 // If the characters differ by 2^n but don't differ by one bit then |
1667 // subtract the difference from the found character, then do the or | 1675 // subtract the difference from the found character, then do the or |
1668 // trick. We avoid the theoretical case where negative numbers are | 1676 // trick. We avoid the theoretical case where negative numbers are |
1669 // involved in order to simplify code generation. | 1677 // involved in order to simplify code generation. |
1670 uc16 mask = String::kMaxUC16CharCode ^ diff; | 1678 uc16 mask = char_mask ^ diff; |
1671 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, | 1679 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, |
1672 diff, | 1680 diff, |
1673 mask, | 1681 mask, |
1674 on_failure); | 1682 on_failure); |
1675 return true; | 1683 return true; |
1676 } | 1684 } |
1677 return false; | 1685 return false; |
1678 } | 1686 } |
1679 | 1687 |
1680 | 1688 |
1681 // Only emits letters (things that have case). Only used for case independent | 1689 // Only emits letters (things that have case). Only used for case independent |
1682 // matches. | 1690 // matches. |
1683 static inline bool EmitAtomLetter( | 1691 static inline bool EmitAtomLetter( |
1684 RegExpMacroAssembler* macro_assembler, | 1692 RegExpMacroAssembler* macro_assembler, |
| 1693 bool ascii, |
1685 uc16 c, | 1694 uc16 c, |
1686 Label* on_failure, | 1695 Label* on_failure, |
1687 int cp_offset, | 1696 int cp_offset, |
1688 bool check, | 1697 bool check, |
1689 bool preloaded) { | 1698 bool preloaded) { |
1690 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 1699 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
1691 int length = uncanonicalize.get(c, '\0', chars); | 1700 int length = uncanonicalize.get(c, '\0', chars); |
1692 if (length <= 1) return false; | 1701 if (length <= 1) return false; |
1693 // We may not need to check against the end of the input string | 1702 // We may not need to check against the end of the input string |
1694 // if this character lies before a character that matched. | 1703 // if this character lies before a character that matched. |
1695 if (!preloaded) { | 1704 if (!preloaded) { |
1696 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); | 1705 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); |
1697 } | 1706 } |
1698 Label ok; | 1707 Label ok; |
1699 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); | 1708 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); |
1700 switch (length) { | 1709 switch (length) { |
1701 case 2: { | 1710 case 2: { |
1702 if (ShortCutEmitCharacterPair(macro_assembler, | 1711 if (ShortCutEmitCharacterPair(macro_assembler, |
| 1712 ascii, |
1703 chars[0], | 1713 chars[0], |
1704 chars[1], | 1714 chars[1], |
1705 on_failure)) { | 1715 on_failure)) { |
1706 } else { | 1716 } else { |
1707 macro_assembler->CheckCharacter(chars[0], &ok); | 1717 macro_assembler->CheckCharacter(chars[0], &ok); |
1708 macro_assembler->CheckNotCharacter(chars[1], on_failure); | 1718 macro_assembler->CheckNotCharacter(chars[1], on_failure); |
1709 macro_assembler->Bind(&ok); | 1719 macro_assembler->Bind(&ok); |
1710 } | 1720 } |
1711 break; | 1721 break; |
1712 } | 1722 } |
(...skipping 287 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2000 if (details->characters() == 1) { | 2010 if (details->characters() == 1) { |
2001 // If number of characters preloaded is 1 then we used a byte or 16 bit | 2011 // If number of characters preloaded is 1 then we used a byte or 16 bit |
2002 // load so the value is already masked down. | 2012 // load so the value is already masked down. |
2003 uint32_t char_mask; | 2013 uint32_t char_mask; |
2004 if (compiler->ascii()) { | 2014 if (compiler->ascii()) { |
2005 char_mask = String::kMaxAsciiCharCode; | 2015 char_mask = String::kMaxAsciiCharCode; |
2006 } else { | 2016 } else { |
2007 char_mask = String::kMaxUC16CharCode; | 2017 char_mask = String::kMaxUC16CharCode; |
2008 } | 2018 } |
2009 if ((mask & char_mask) == char_mask) need_mask = false; | 2019 if ((mask & char_mask) == char_mask) need_mask = false; |
| 2020 mask &= char_mask; |
2010 } else { | 2021 } else { |
2011 // For 2-character preloads in ASCII mode we also use a 16 bit load with | 2022 // For 2-character preloads in ASCII mode we also use a 16 bit load with |
2012 // zero extend. | 2023 // zero extend. |
2013 if (details->characters() == 2 && compiler->ascii()) { | 2024 if (details->characters() == 2 && compiler->ascii()) { |
2014 if ((mask & 0xffff) == 0xffff) need_mask = false; | 2025 if ((mask & 0xffff) == 0xffff) need_mask = false; |
2015 } else { | 2026 } else { |
2016 if (mask == 0xffffffff) need_mask = false; | 2027 if (mask == 0xffffffff) need_mask = false; |
2017 } | 2028 } |
2018 } | 2029 } |
2019 | 2030 |
(...skipping 296 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2316 assembler->LoadCurrentCharacter(cp_offset + j, | 2327 assembler->LoadCurrentCharacter(cp_offset + j, |
2317 backtrack, | 2328 backtrack, |
2318 *checked_up_to < cp_offset + j); | 2329 *checked_up_to < cp_offset + j); |
2319 } | 2330 } |
2320 assembler->CheckNotCharacter(quarks[j], backtrack); | 2331 assembler->CheckNotCharacter(quarks[j], backtrack); |
2321 } | 2332 } |
2322 } else { | 2333 } else { |
2323 ASSERT_EQ(pass, CASE_CHARACTER_MATCH); | 2334 ASSERT_EQ(pass, CASE_CHARACTER_MATCH); |
2324 ASSERT(compiler->ignore_case()); | 2335 ASSERT(compiler->ignore_case()); |
2325 bound_checked = EmitAtomLetter(assembler, | 2336 bound_checked = EmitAtomLetter(assembler, |
| 2337 compiler->ascii(), |
2326 quarks[j], | 2338 quarks[j], |
2327 backtrack, | 2339 backtrack, |
2328 cp_offset + j, | 2340 cp_offset + j, |
2329 *checked_up_to < cp_offset + j, | 2341 *checked_up_to < cp_offset + j, |
2330 preloaded); | 2342 preloaded); |
2331 } | 2343 } |
2332 if (pass != NON_ASCII_MATCH && bound_checked) { | 2344 if (pass != NON_ASCII_MATCH && bound_checked) { |
2333 if (cp_offset + j > *checked_up_to) { | 2345 if (cp_offset + j > *checked_up_to) { |
2334 *checked_up_to = cp_offset + j; | 2346 *checked_up_to = cp_offset + j; |
2335 } | 2347 } |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2396 return true; | 2408 return true; |
2397 } | 2409 } |
2398 | 2410 |
2399 if (compiler->ascii()) { | 2411 if (compiler->ascii()) { |
2400 int dummy = 0; | 2412 int dummy = 0; |
2401 TextEmitPass(compiler, NON_ASCII_MATCH, false, variant, false, &dummy); | 2413 TextEmitPass(compiler, NON_ASCII_MATCH, false, variant, false, &dummy); |
2402 } | 2414 } |
2403 | 2415 |
2404 bool first_elt_done = false; | 2416 bool first_elt_done = false; |
2405 int bound_checked_to = variant->cp_offset() - 1; | 2417 int bound_checked_to = variant->cp_offset() - 1; |
2406 QuickCheckDetails* quick_check = variant->quick_check_performed(); | 2418 bound_checked_to += variant->bound_checked_up_to(); |
2407 bound_checked_to += Max(quick_check->characters(), | |
2408 variant->characters_preloaded()); | |
2409 | 2419 |
2410 // If a character is preloaded into the current character register then | 2420 // If a character is preloaded into the current character register then |
2411 // check that now. | 2421 // check that now. |
2412 if (variant->characters_preloaded() == 1) { | 2422 if (variant->characters_preloaded() == 1) { |
2413 TextEmitPass(compiler, | 2423 TextEmitPass(compiler, |
2414 CHARACTER_MATCH, | 2424 CHARACTER_MATCH, |
2415 true, | 2425 true, |
2416 variant, | 2426 variant, |
2417 false, | 2427 false, |
2418 &bound_checked_to); | 2428 &bound_checked_to); |
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2465 ASSERT(by > 0); | 2475 ASSERT(by > 0); |
2466 // We don't have an instruction for shifting the current character register | 2476 // We don't have an instruction for shifting the current character register |
2467 // down or for using a shifted value for anything so lets just forget that | 2477 // down or for using a shifted value for anything so lets just forget that |
2468 // we preloaded any characters into it. | 2478 // we preloaded any characters into it. |
2469 characters_preloaded_ = 0; | 2479 characters_preloaded_ = 0; |
2470 // Adjust the offsets of the quick check performed information. This | 2480 // Adjust the offsets of the quick check performed information. This |
2471 // information is used to find out what we already determined about the | 2481 // information is used to find out what we already determined about the |
2472 // characters by means of mask and compare. | 2482 // characters by means of mask and compare. |
2473 quick_check_performed_.Advance(by, ascii); | 2483 quick_check_performed_.Advance(by, ascii); |
2474 cp_offset_ += by; | 2484 cp_offset_ += by; |
| 2485 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by); |
2475 } | 2486 } |
2476 | 2487 |
2477 | 2488 |
2478 void TextNode::MakeCaseIndependent() { | 2489 void TextNode::MakeCaseIndependent() { |
2479 int element_count = elms_->length(); | 2490 int element_count = elms_->length(); |
2480 for (int i = 0; i < element_count; i++) { | 2491 for (int i = 0; i < element_count; i++) { |
2481 TextElement elm = elms_->at(i); | 2492 TextElement elm = elms_->at(i); |
2482 if (elm.type == TextElement::CHAR_CLASS) { | 2493 if (elm.type == TextElement::CHAR_CLASS) { |
2483 RegExpCharacterClass* cc = elm.data.u_char_class; | 2494 RegExpCharacterClass* cc = elm.data.u_char_class; |
2484 ZoneList<CharacterRange>* ranges = cc->ranges(); | 2495 ZoneList<CharacterRange>* ranges = cc->ranges(); |
(...skipping 287 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2772 return false; | 2783 return false; |
2773 } | 2784 } |
2774 } | 2785 } |
2775 | 2786 |
2776 Label second_choice; // For use in greedy matches. | 2787 Label second_choice; // For use in greedy matches. |
2777 macro_assembler->Bind(&second_choice); | 2788 macro_assembler->Bind(&second_choice); |
2778 | 2789 |
2779 int first_normal_choice = greedy_loop ? 1 : 0; | 2790 int first_normal_choice = greedy_loop ? 1 : 0; |
2780 | 2791 |
2781 int preload_characters = CalculatePreloadCharacters(compiler); | 2792 int preload_characters = CalculatePreloadCharacters(compiler); |
2782 bool preload_is_current = false; | 2793 bool preload_is_current = |
2783 bool preload_has_checked_bounds = false; | 2794 (current_variant->characters_preloaded() == preload_characters); |
| 2795 bool preload_has_checked_bounds = preload_is_current; |
2784 | 2796 |
2785 AlternativeGenerationList alt_gens(choice_count); | 2797 AlternativeGenerationList alt_gens(choice_count); |
2786 | 2798 |
2787 // For now we just call all choices one after the other. The idea ultimately | 2799 // For now we just call all choices one after the other. The idea ultimately |
2788 // is to use the Dispatch table to try only the relevant ones. | 2800 // is to use the Dispatch table to try only the relevant ones. |
2789 for (int i = first_normal_choice; i < choice_count; i++) { | 2801 for (int i = first_normal_choice; i < choice_count; i++) { |
2790 GuardedAlternative alternative = alternatives_->at(i); | 2802 GuardedAlternative alternative = alternatives_->at(i); |
2791 AlternativeGeneration* alt_gen(alt_gens.at(i)); | 2803 AlternativeGeneration* alt_gen(alt_gens.at(i)); |
2792 alt_gen->quick_check_details.set_characters(preload_characters); | 2804 alt_gen->quick_check_details.set_characters(preload_characters); |
2793 ZoneList<Guard*>* guards = alternative.guards(); | 2805 ZoneList<Guard*>* guards = alternative.guards(); |
2794 int guard_count = (guards == NULL) ? 0 : guards->length(); | 2806 int guard_count = (guards == NULL) ? 0 : guards->length(); |
2795 | |
2796 GenerationVariant new_variant(*current_variant); | 2807 GenerationVariant new_variant(*current_variant); |
2797 new_variant.set_characters_preloaded(preload_is_current ? | 2808 new_variant.set_characters_preloaded(preload_is_current ? |
2798 preload_characters : | 2809 preload_characters : |
2799 0); | 2810 0); |
| 2811 if (preload_has_checked_bounds) { |
| 2812 new_variant.set_bound_checked_up_to(preload_characters); |
| 2813 } |
2800 new_variant.quick_check_performed()->Clear(); | 2814 new_variant.quick_check_performed()->Clear(); |
2801 alt_gen->expects_preload = preload_is_current; | 2815 alt_gen->expects_preload = preload_is_current; |
2802 bool generate_full_check_inline = false; | 2816 bool generate_full_check_inline = false; |
2803 if (alternative.node()->EmitQuickCheck(compiler, | 2817 if (alternative.node()->EmitQuickCheck(compiler, |
2804 &new_variant, | 2818 &new_variant, |
2805 preload_has_checked_bounds, | 2819 preload_has_checked_bounds, |
2806 &alt_gen->possible_success, | 2820 &alt_gen->possible_success, |
2807 &alt_gen->quick_check_details, | 2821 &alt_gen->quick_check_details, |
2808 i < choice_count - 1)) { | 2822 i < choice_count - 1)) { |
2809 // Quick check was generated for this choice. | 2823 // Quick check was generated for this choice. |
2810 preload_is_current = true; | 2824 preload_is_current = true; |
2811 preload_has_checked_bounds = true; | 2825 preload_has_checked_bounds = true; |
2812 // On the last choice in the ChoiceNode we generated the quick | 2826 // On the last choice in the ChoiceNode we generated the quick |
2813 // check to fall through on possible success. So now we need to | 2827 // check to fall through on possible success. So now we need to |
2814 // generate the full check inline. | 2828 // generate the full check inline. |
2815 if (i == choice_count - 1) { | 2829 if (i == choice_count - 1) { |
2816 macro_assembler->Bind(&alt_gen->possible_success); | 2830 macro_assembler->Bind(&alt_gen->possible_success); |
2817 new_variant.set_quick_check_performed(&alt_gen->quick_check_details); | 2831 new_variant.set_quick_check_performed(&alt_gen->quick_check_details); |
2818 new_variant.set_characters_preloaded(preload_characters); | 2832 new_variant.set_characters_preloaded(preload_characters); |
| 2833 new_variant.set_bound_checked_up_to(preload_characters); |
2819 generate_full_check_inline = true; | 2834 generate_full_check_inline = true; |
2820 } | 2835 } |
2821 } else { | 2836 } else { |
2822 // No quick check was generated. Put the full code here. | 2837 // No quick check was generated. Put the full code here. |
| 2838 // If this is not the first choice then there could be slow checks from |
| 2839 // previous cases that go here when they fail. There's no reason to |
| 2840 // insist that they preload characters since the slow check we are about |
| 2841 // to generate probably can't use it. |
| 2842 if (i != first_normal_choice) { |
| 2843 alt_gen->expects_preload = false; |
| 2844 new_variant.set_characters_preloaded(0); |
| 2845 } |
2823 if (i < choice_count - 1) { | 2846 if (i < choice_count - 1) { |
2824 new_variant.set_backtrack(&alt_gen->after); | 2847 new_variant.set_backtrack(&alt_gen->after); |
2825 } | 2848 } |
2826 generate_full_check_inline = true; | 2849 generate_full_check_inline = true; |
2827 } | 2850 } |
2828 if (generate_full_check_inline) { | 2851 if (generate_full_check_inline) { |
2829 if (preload_is_current) { | |
2830 new_variant.set_characters_preloaded(preload_characters); | |
2831 } | |
2832 for (int j = 0; j < guard_count; j++) { | 2852 for (int j = 0; j < guard_count; j++) { |
2833 GenerateGuard(macro_assembler, guards->at(j), &new_variant); | 2853 GenerateGuard(macro_assembler, guards->at(j), &new_variant); |
2834 } | 2854 } |
2835 if (!alternative.node()->Emit(compiler, &new_variant)) { | 2855 if (!alternative.node()->Emit(compiler, &new_variant)) { |
2836 greedy_loop_label.Unuse(); | 2856 greedy_loop_label.Unuse(); |
2837 return false; | 2857 return false; |
2838 } | 2858 } |
2839 preload_is_current = false; | 2859 preload_is_current = false; |
2840 } | 2860 } |
2841 macro_assembler->Bind(&alt_gen->after); | 2861 macro_assembler->Bind(&alt_gen->after); |
(...skipping 1453 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4295 EmbeddedVector<byte, 1024> codes; | 4315 EmbeddedVector<byte, 1024> codes; |
4296 RegExpMacroAssemblerIrregexp macro_assembler(codes); | 4316 RegExpMacroAssemblerIrregexp macro_assembler(codes); |
4297 return compiler.Assemble(¯o_assembler, | 4317 return compiler.Assemble(¯o_assembler, |
4298 node, | 4318 node, |
4299 data->capture_count, | 4319 data->capture_count, |
4300 pattern); | 4320 pattern); |
4301 } | 4321 } |
4302 | 4322 |
4303 | 4323 |
4304 }} // namespace v8::internal | 4324 }} // namespace v8::internal |
OLD | NEW |