Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(513)

Side by Side Diff: src/arm/assembler-arm.cc

Issue 12920009: Use generated Neon version of MemCopy() on ARM, if platform supports it. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 7 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 1994-2006 Sun Microsystems Inc. 1 // Copyright (c) 1994-2006 Sun Microsystems Inc.
2 // All Rights Reserved. 2 // All Rights Reserved.
3 // 3 //
4 // Redistribution and use in source and binary forms, with or without 4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions 5 // modification, are permitted provided that the following conditions
6 // are met: 6 // are met:
7 // 7 //
8 // - Redistributions of source code must retain the above copyright notice, 8 // - Redistributions of source code must retain the above copyright notice,
9 // this list of conditions and the following disclaimer. 9 // this list of conditions and the following disclaimer.
10 // 10 //
(...skipping 136 matching lines...) Expand 10 before | Expand all | Expand 10 after
147 147
148 if (FLAG_enable_movw_movt) { 148 if (FLAG_enable_movw_movt) {
149 supported_ |= static_cast<uint64_t>(1) << MOVW_MOVT_IMMEDIATE_LOADS; 149 supported_ |= static_cast<uint64_t>(1) << MOVW_MOVT_IMMEDIATE_LOADS;
150 } 150 }
151 151
152 if (FLAG_enable_32dregs) { 152 if (FLAG_enable_32dregs) {
153 supported_ |= static_cast<uint64_t>(1) << VFP32DREGS; 153 supported_ |= static_cast<uint64_t>(1) << VFP32DREGS;
154 } 154 }
155 155
156 #else // __arm__ 156 #else // __arm__
157 if (!IsSupported(NEON) && OS::ArmCpuHasFeature(NEON)) {
158 found_by_runtime_probing_only_ |=
159 static_cast<uint64_t>(1) << NEON;
160 }
161
157 // Probe for additional features not already known to be available. 162 // Probe for additional features not already known to be available.
158 if (!IsSupported(VFP3) && OS::ArmCpuHasFeature(VFP3)) { 163 if (!IsSupported(VFP3) && OS::ArmCpuHasFeature(VFP3)) {
159 // This implementation also sets the VFP flags if runtime 164 // This implementation also sets the VFP flags if runtime
160 // detection of VFP returns true. VFPv3 implies ARMv7 and VFP2, see ARM DDI 165 // detection of VFP returns true. VFPv3 implies ARMv7 and VFP2, see ARM DDI
161 // 0406B, page A1-6. 166 // 0406B, page A1-6.
162 found_by_runtime_probing_only_ |= 167 found_by_runtime_probing_only_ |=
163 static_cast<uint64_t>(1) << VFP3 | 168 static_cast<uint64_t>(1) << VFP3 |
164 static_cast<uint64_t>(1) << ARMv7 | 169 static_cast<uint64_t>(1) << ARMv7 |
165 static_cast<uint64_t>(1) << VFP2; 170 static_cast<uint64_t>(1) << VFP2;
166 } else if (!IsSupported(VFP2) && OS::ArmCpuHasFeature(VFP2)) { 171 } else if (!IsSupported(VFP2) && OS::ArmCpuHasFeature(VFP2)) {
(...skipping 1423 matching lines...) Expand 10 before | Expand all | Expand 10 after
1590 } 1595 }
1591 1596
1592 1597
1593 void Assembler::stm(BlockAddrMode am, 1598 void Assembler::stm(BlockAddrMode am,
1594 Register base, 1599 Register base,
1595 RegList src, 1600 RegList src,
1596 Condition cond) { 1601 Condition cond) {
1597 addrmod4(cond | B27 | am, base, src); 1602 addrmod4(cond | B27 | am, base, src);
1598 } 1603 }
1599 1604
1605 static uint32_t pld_op(Register base, int offset, bool write) {
1606 // PLD(literal)/PLDW(literal) preload data for read or write.
1607 // Instruction details available in ARM DDI 0406B, A8-239.
1608 // 1111(31-28) | 0101(27-24)| U(23) | R(22) | 01(21-20) | Rn(19-16) |
1609 // 1111(15-12) | imm12(11-0)
1610 int U = (offset >= 0) ? B23 : 0;
1611 int R = !write ? B22 : 0;
1612 if (offset < 0)
1613 offset = -offset;
1614 ASSERT(is_uint12(offset));
1615
1616 return 0xf510f000 | base.code()*B16 | U | R | offset;
1617 }
1618
1619
1620 void Assembler::pld(Register base, int offset) {
1621 emit(pld_op(base, offset, false));
1622 }
1623
1624
1625 void Assembler::pldw(Register base, int offset) {
1626 emit(pld_op(base, offset, true));
1627 }
1628
1600 1629
1601 // Exception-generating instructions and debugging support. 1630 // Exception-generating instructions and debugging support.
1602 // Stops with a non-negative code less than kNumOfWatchedStops support 1631 // Stops with a non-negative code less than kNumOfWatchedStops support
1603 // enabling/disabling and a counter feature. See simulator-arm.h . 1632 // enabling/disabling and a counter feature. See simulator-arm.h .
1604 void Assembler::stop(const char* msg, Condition cond, int32_t code) { 1633 void Assembler::stop(const char* msg, Condition cond, int32_t code) {
1605 #ifndef __arm__ 1634 #ifndef __arm__
1606 ASSERT(code >= kDefaultStopCode); 1635 ASSERT(code >= kDefaultStopCode);
1607 { 1636 {
1608 // The Simulator will handle the stop instruction and get the message 1637 // The Simulator will handle the stop instruction and get the message
1609 // address. It expects to find the address just after the svc instruction. 1638 // address. It expects to find the address just after the svc instruction.
(...skipping 1034 matching lines...) Expand 10 before | Expand all | Expand 10 after
2644 ASSERT(IsEnabled(VFP2)); 2673 ASSERT(IsEnabled(VFP2));
2645 int vd, d; 2674 int vd, d;
2646 dst.split_code(&vd, &d); 2675 dst.split_code(&vd, &d);
2647 int vm, m; 2676 int vm, m;
2648 src.split_code(&vm, &m); 2677 src.split_code(&vm, &m);
2649 emit(cond | 0x1D*B23 | d*B22 | 0x3*B20 | B16 | vd*B12 | 0x5*B9 | B8 | 0x3*B6 | 2678 emit(cond | 0x1D*B23 | d*B22 | 0x3*B20 | B16 | vd*B12 | 0x5*B9 | B8 | 0x3*B6 |
2650 m*B5 | vm); 2679 m*B5 | vm);
2651 } 2680 }
2652 2681
2653 2682
2683 static inline int bin_log(int x) {
2684 switch (x) {
2685 case 1:
2686 return 0;
2687 case 2:
2688 return 1;
2689 case 4:
2690 return 2;
2691 case 8:
2692 return 3;
2693 case 16:
2694 return 4;
2695 case 32:
2696 return 5;
2697 case 64:
2698 return 6;
2699 default:
2700 UNREACHABLE();
2701 return 0;
2702 }
2703 }
2704
2705
2706 static inline int count_to_type(int count) {
2707 switch (count) {
2708 case 1:
2709 return 7;
2710 case 2:
2711 return 10;
2712 case 3:
2713 return 6;
2714 case 4:
2715 return 2;
2716 default:
2717 UNREACHABLE();
2718 return 0;
2719 }
2720 }
2721
2722
2723 enum NeonElementType {
2724 MultipleElements = 0,
2725 SingleElement = 1
2726 };
2727
2728 enum NeonLoadStoreType {
2729 Store = 0,
2730 Load = 1
2731 };
2732
2733 static uint32_t neon_vector_op(int size,
2734 const Register base,
2735 const DwVfpRegister first,
2736 NeonWritebackType writeback,
2737 int align_bytes,
2738 NeonLoadStoreType load,
2739 NeonElementType single,
2740 NeonElementIndex element_index,
2741 int type) {
2742
2743 ASSERT(type >= 0);
2744 ASSERT(!base.is(pc));
2745 ASSERT(size == 8 || size == 16 || size == 32);
2746
2747 int d, Vd;
2748 first.split_code(&Vd, &d);
2749 // We don't need arbitrary Rm so far.
2750 int rm = (writeback == Writeback) ? 13 : 15;
2751 uint32_t result =
2752 (0xf << 28) | B26 | d*B22 | load*B21 | base.code()*B16 |
2753 Vd*B12 | rm;
2754
2755 int index_align = 0;
2756 if (single == SingleElement) {
2757 switch (align_bytes) {
2758 case 16:
2759 ASSERT(size == 32);
2760 ASSERT(element_index < element_2);
2761 index_align = 0x3 | ((element_index & 0x1) << 3);
2762 break;
2763 case 8:
2764 ASSERT(size == 16 || size == 32);
2765 ASSERT(element_index < element_4);
2766 index_align = 0x1 | ((element_index & 0x3) << 2);
2767 break;
2768 case 4:
2769 ASSERT(size == 8);
2770 index_align = (element_index & 0x7) << 1;
2771 break;
2772 case 1:
2773 index_align = 0;
2774 break;
2775 default:
2776 UNREACHABLE();
2777 }
2778 }
2779
2780 int size_enc = bin_log(size) - 3;
2781 if (single == SingleElement) {
2782 result |= B23 | (1<<10)*size_enc | B9 | B8 | index_align*B4;
2783 } else {
2784 int align_enc = align_bytes == 1 ? 0 : bin_log(align_bytes / 4);
2785 result |= type*B8 | size_enc*B6 | align_enc*B4;
2786 }
2787 return result;
2788 }
2789
2790 void Assembler::vld1(int size,
2791 const Register base,
2792 const DwVfpRegister first,
2793 const DwVfpRegister last,
2794 NeonWritebackType writeback,
2795 int align_bytes) {
2796 // VLD1 (multiple single elements).
2797 // Load elements from memory into one, two, three, or four registers,
2798 // without de-interleaving.
2799 // Instruction details available in ARM DDI 0406B, A8-602.
2800 // 1111(31-28) | 01000 (27-23) | D(22)| 10 (21-20) |
2801 // Rn(19-16) | Vd (15-12) | size(11-10) | 00 (9-8) | index_align(7-4) |
2802 // Rm(3-0)
2803 ASSERT(IsEnabled(NEON));
2804 ASSERT_LE(first.code(), last.code());
2805 int count = last.code() - first.code() + 1;
2806 ASSERT(count <= 4);
2807 emit(neon_vector_op(size, base, first, writeback, align_bytes,
2808 Load, MultipleElements, element_0,
2809 count_to_type(count)));
2810 }
2811
2812 void Assembler::vld1(int size,
2813 const Register base,
2814 const DwVfpRegister first,
2815 NeonElementIndex element_index,
2816 NeonWritebackType writeback,
2817 int align_bytes) {
2818 // VLD1 (single element to one lane).
2819 // Load one element from memory into one element of a register.
2820 // Instruction details available in ARM DDI 0406B, A8-604.
2821 // 1111(31-28) | 01001 (27-23) | D(22)| 10 (21-20) |
2822 // Rn(19-16) | Vd (15-12) | type(11-8) | size (7-6) | align(5-4) | Rm(3-0)
2823 ASSERT(IsEnabled(NEON));
2824 emit(neon_vector_op(size, base, first, writeback, align_bytes,
2825 Load, SingleElement, element_index, 0));
2826 }
2827
2828 void Assembler::vst1(int size,
2829 const Register base,
2830 const DwVfpRegister first,
2831 const DwVfpRegister last,
2832 NeonWritebackType writeback,
2833 int align_bytes) {
2834 // VST1 (multiple single elements).
2835 // Store elements to memory from one, two, three, or four registers,
2836 // without interleaving.
2837 // Instruction details available in ARM DDI 0406B, A8-768.
2838 // 1111(31-28) | 01000 (27-23) | D(22)| 00 (21-20) |
2839 // Rn(19-16) | Vd (15-12) | type(11-8) | size (7-6) | align(5-4) |
2840 // Rm(3-0)
2841 ASSERT(IsEnabled(NEON));
2842 ASSERT_LE(first.code(), last.code());
2843 int count = last.code() - first.code() + 1;
2844 ASSERT(count <= 4);
2845 emit(neon_vector_op(size, base, first, writeback, align_bytes,
2846 Store, MultipleElements, element_0,
2847 count_to_type(count)));
2848 }
2849
2850 void Assembler::vst1(int size,
2851 const Register base,
2852 const DwVfpRegister first,
2853 NeonElementIndex element_index,
2854 NeonWritebackType writeback,
2855 int align_bytes) {
2856 // VST1 (single element from one lane).
2857 // Store one element to memory from one element of a register.
2858 // Instruction details available in ARM DDI 0406B, A8-770.
2859 // 1111(31-28) | 01001 (27-23) | D(22)| 00 (21-20) |
2860 // Rn(19-16) | Vd (15-12) | size(11-10) | 00 (9-8) | index_align(7-4) |
2861 // Rm(3-0)
2862 ASSERT(IsEnabled(NEON));
2863 emit(neon_vector_op(size, base, first, writeback, align_bytes,
2864 Store, SingleElement, element_index, 0));
2865 }
2866
2867 void Assembler::vld4(int size,
2868 const Register base,
2869 const DwVfpRegister first,
2870 const DwVfpRegister last,
2871 NeonWritebackType writeback,
2872 int align_bytes) {
2873 // VLD4 (multiple 4-element structures).
2874 // Load multiple 4-element structures from memory into four registers,
2875 // with de-interleaving.
2876 // Instruction details available in ARM DDI 0406B, A8-620.
2877 // 1111(31-28) | 01000 (27-23) | D(22)| 10 (21-20) |
2878 // Rn(19-16) | Vd (15-12) | type(11-8) | size (7-6) | align(5-4) |
2879 // Rm(3-0)
2880 ASSERT(IsEnabled(NEON));
2881 // We don't support increment == 2, yet.
2882 int type = 0;
2883 emit(neon_vector_op(size, base, first, writeback, align_bytes,
2884 Load, MultipleElements, element_0, type));
2885 }
2886
2887 void Assembler::vld4(int size,
2888 const Register base,
2889 const DwVfpRegister first,
2890 NeonElementIndex element_index,
2891 NeonWritebackType writeback,
2892 int align_bytes) {
2893 // VLD4 (single 4-element structure to one lane).
2894 // Load one 4-element structure from memory into corresponding elements of
2895 // four registers.
2896 // Instruction details available in ARM DDI 0406B, A8-622.
2897 // 1111(31-28) | 01001 (27-23) | D(22)| 10 (21-20) |
2898 // Rn(19-16) | Vd (15-12) | size(11-10) | 11 (9-8) | index_align(7-4) |
2899 // Rm(3-0)
2900 ASSERT(IsEnabled(NEON));
2901 emit(neon_vector_op(size, base, first, writeback, align_bytes,
2902 Load, SingleElement, element_index, 0));
2903 }
2904
2905 void Assembler::vst4(int size,
2906 const Register base,
2907 const DwVfpRegister first,
2908 const DwVfpRegister last,
2909 NeonWritebackType writeback,
2910 int align_bytes) {
2911 // VST4 (multiple 4-element structures).
2912 // Store multiple 4-element structures to memory from four registers,
2913 // with interleaving.
2914 // Instruction details available in ARM DDI 0406B, A8-780.
2915 // 1111(31-28) | 01000 (27-23) | D(22)| 00 (21-20) |
2916 // Rn(19-16) | Vd (15-12) | type(11-8) | size (7-6) | align(5-4) |
2917 // Rm(3-0)
2918 ASSERT(IsEnabled(NEON));
2919 // We don't support increment == 2, yet.
2920 int type = 0;
2921 emit(neon_vector_op(size, base, first, writeback, align_bytes,
2922 Store, MultipleElements, element_0, type));
2923 }
2924
2925 void Assembler::vst4(int size,
2926 const Register base,
2927 const DwVfpRegister first,
2928 NeonElementIndex element_index,
2929 NeonWritebackType writeback,
2930 int align_bytes) {
2931 // VST4 (single 4-element structure from one lane).
2932 // Store one 4-element structure to memory from corresponding elements
2933 // of four registers.
2934 // Instruction details available in ARM DDI 0406B, A8-782.
2935 // 1111(31-28) | 01001 (27-23) | D(22)| 00 (21-20) |
2936 // Rn(19-16) | Vd (15-12) | size(11-10) | 11 (9-8) | index_align(7-4) |
2937 // Rm(3-0)
2938 ASSERT(IsEnabled(NEON));
2939 emit(neon_vector_op(size, base, first, writeback, align_bytes,
2940 Store, SingleElement, element_index, 0));
2941 }
2942
2654 // Pseudo instructions. 2943 // Pseudo instructions.
2655 void Assembler::nop(int type) { 2944 void Assembler::nop(int type) {
2656 // ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes 2945 // ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes
2657 // some of the CPU's pipeline and has to issue. Older ARM chips simply used 2946 // some of the CPU's pipeline and has to issue. Older ARM chips simply used
2658 // MOV Rx, Rx as NOP and it performs better even in newer CPUs. 2947 // MOV Rx, Rx as NOP and it performs better even in newer CPUs.
2659 // We therefore use MOV Rx, Rx, even on newer CPUs, and use Rx to encode 2948 // We therefore use MOV Rx, Rx, even on newer CPUs, and use Rx to encode
2660 // a type. 2949 // a type.
2661 ASSERT(0 <= type && type <= 14); // mov pc, pc isn't a nop. 2950 ASSERT(0 <= type && type <= 14); // mov pc, pc isn't a nop.
2662 emit(al | 13*B21 | type*B12 | type); 2951 emit(al | 13*B21 | type*B12 | type);
2663 } 2952 }
(...skipping 387 matching lines...) Expand 10 before | Expand all | Expand 10 after
3051 3340
3052 // Since a constant pool was just emitted, move the check offset forward by 3341 // Since a constant pool was just emitted, move the check offset forward by
3053 // the standard interval. 3342 // the standard interval.
3054 next_buffer_check_ = pc_offset() + kCheckPoolInterval; 3343 next_buffer_check_ = pc_offset() + kCheckPoolInterval;
3055 } 3344 }
3056 3345
3057 3346
3058 } } // namespace v8::internal 3347 } } // namespace v8::internal
3059 3348
3060 #endif // V8_TARGET_ARCH_ARM 3349 #endif // V8_TARGET_ARCH_ARM
OLDNEW
« no previous file with comments | « src/arm/assembler-arm.h ('k') | src/arm/codegen-arm.cc » ('j') | src/arm/codegen-arm.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698