Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(75)

Side by Side Diff: src/arm/assembler-arm.cc

Issue 12920009: Use generated Neon version of MemCopy() on ARM, if platform supports it. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 1994-2006 Sun Microsystems Inc. 1 // Copyright (c) 1994-2006 Sun Microsystems Inc.
2 // All Rights Reserved. 2 // All Rights Reserved.
3 // 3 //
4 // Redistribution and use in source and binary forms, with or without 4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions 5 // modification, are permitted provided that the following conditions
6 // are met: 6 // are met:
7 // 7 //
8 // - Redistributions of source code must retain the above copyright notice, 8 // - Redistributions of source code must retain the above copyright notice,
9 // this list of conditions and the following disclaimer. 9 // this list of conditions and the following disclaimer.
10 // 10 //
(...skipping 136 matching lines...) Expand 10 before | Expand all | Expand 10 after
147 147
148 if (FLAG_enable_movw_movt) { 148 if (FLAG_enable_movw_movt) {
149 supported_ |= static_cast<uint64_t>(1) << MOVW_MOVT_IMMEDIATE_LOADS; 149 supported_ |= static_cast<uint64_t>(1) << MOVW_MOVT_IMMEDIATE_LOADS;
150 } 150 }
151 151
152 if (FLAG_enable_32dregs) { 152 if (FLAG_enable_32dregs) {
153 supported_ |= static_cast<uint64_t>(1) << VFP32DREGS; 153 supported_ |= static_cast<uint64_t>(1) << VFP32DREGS;
154 } 154 }
155 155
156 #else // __arm__ 156 #else // __arm__
157 if (!IsSupported(NEON) && OS::ArmCpuHasFeature(NEON)) {
158 found_by_runtime_probing_only_ |=
159 static_cast<uint64_t>(1) << NEON;
160 }
161
157 // Probe for additional features not already known to be available. 162 // Probe for additional features not already known to be available.
158 if (!IsSupported(VFP3) && OS::ArmCpuHasFeature(VFP3)) { 163 if (!IsSupported(VFP3) && OS::ArmCpuHasFeature(VFP3)) {
159 // This implementation also sets the VFP flags if runtime 164 // This implementation also sets the VFP flags if runtime
160 // detection of VFP returns true. VFPv3 implies ARMv7 and VFP2, see ARM DDI 165 // detection of VFP returns true. VFPv3 implies ARMv7 and VFP2, see ARM DDI
161 // 0406B, page A1-6. 166 // 0406B, page A1-6.
162 found_by_runtime_probing_only_ |= 167 found_by_runtime_probing_only_ |=
163 static_cast<uint64_t>(1) << VFP3 | 168 static_cast<uint64_t>(1) << VFP3 |
164 static_cast<uint64_t>(1) << ARMv7 | 169 static_cast<uint64_t>(1) << ARMv7 |
165 static_cast<uint64_t>(1) << VFP2; 170 static_cast<uint64_t>(1) << VFP2;
166 } else if (!IsSupported(VFP2) && OS::ArmCpuHasFeature(VFP2)) { 171 } else if (!IsSupported(VFP2) && OS::ArmCpuHasFeature(VFP2)) {
(...skipping 1423 matching lines...) Expand 10 before | Expand all | Expand 10 after
1590 } 1595 }
1591 1596
1592 1597
1593 void Assembler::stm(BlockAddrMode am, 1598 void Assembler::stm(BlockAddrMode am,
1594 Register base, 1599 Register base,
1595 RegList src, 1600 RegList src,
1596 Condition cond) { 1601 Condition cond) {
1597 addrmod4(cond | B27 | am, base, src); 1602 addrmod4(cond | B27 | am, base, src);
1598 } 1603 }
1599 1604
1605 void Assembler::pld(Register base, int offset, int write) {
hans 2013/04/02 12:34:29 Many of the newly added functions to the assembler
Nike 2013/04/03 15:04:06 Done.
Nike 2013/04/03 15:04:06 Done description, is it OK to make simulator+test
hans 2013/04/04 09:56:51 Thanks! This makes it much easier to verify the co
1606 int U = (offset >= 0) ? B23 : 0;
hans 2013/04/02 12:34:29 looks like indentation is too deep in this functio
Nike 2013/04/03 15:04:06 Done.
1607 int R = (write == 0) ? B22 : 0;
hans 2013/04/02 12:34:29 since write is just a flag, should it be a bool ra
Nike 2013/04/03 15:04:06 Done.
1608 if (offset < 0)
1609 offset = -offset;
1610 ASSERT(is_uint12(offset));
1611
1612 emit(0xf510f000 | B16 * base.code() | U | R | offset);
hans 2013/04/02 12:34:29 I think "base.code()*B16" would be more in line wi
Nike 2013/04/03 15:04:06 Done.
1613 }
1614
1600 1615
1601 // Exception-generating instructions and debugging support. 1616 // Exception-generating instructions and debugging support.
1602 // Stops with a non-negative code less than kNumOfWatchedStops support 1617 // Stops with a non-negative code less than kNumOfWatchedStops support
1603 // enabling/disabling and a counter feature. See simulator-arm.h . 1618 // enabling/disabling and a counter feature. See simulator-arm.h .
1604 void Assembler::stop(const char* msg, Condition cond, int32_t code) { 1619 void Assembler::stop(const char* msg, Condition cond, int32_t code) {
1605 #ifndef __arm__ 1620 #ifndef __arm__
1606 ASSERT(code >= kDefaultStopCode); 1621 ASSERT(code >= kDefaultStopCode);
1607 { 1622 {
1608 // The Simulator will handle the stop instruction and get the message 1623 // The Simulator will handle the stop instruction and get the message
1609 // address. It expects to find the address just after the svc instruction. 1624 // address. It expects to find the address just after the svc instruction.
(...skipping 1032 matching lines...) Expand 10 before | Expand all | Expand 10 after
2642 // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0001(19-16) | 2657 // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0001(19-16) |
2643 // Vd(15-12) | 101(11-9) | sz=1(8) | 11(7-6) | M(5) | 0(4) | Vm(3-0) 2658 // Vd(15-12) | 101(11-9) | sz=1(8) | 11(7-6) | M(5) | 0(4) | Vm(3-0)
2644 ASSERT(IsEnabled(VFP2)); 2659 ASSERT(IsEnabled(VFP2));
2645 int vd, d; 2660 int vd, d;
2646 dst.split_code(&vd, &d); 2661 dst.split_code(&vd, &d);
2647 int vm, m; 2662 int vm, m;
2648 src.split_code(&vm, &m); 2663 src.split_code(&vm, &m);
2649 emit(cond | 0x1D*B23 | d*B22 | 0x3*B20 | B16 | vd*B12 | 0x5*B9 | B8 | 0x3*B6 | 2664 emit(cond | 0x1D*B23 | d*B22 | 0x3*B20 | B16 | vd*B12 | 0x5*B9 | B8 | 0x3*B6 |
2650 m*B5 | vm); 2665 m*B5 | vm);
2651 } 2666 }
2652 2667
hans 2013/04/02 12:34:29 v8 likes to have two empty lines between each func
Nike 2013/04/03 15:04:06 Done.
2668 static inline int bin_log(int x) {
2669 switch (x) {
2670 case 1:
2671 return 0;
2672 case 2:
2673 return 1;
2674 case 4:
2675 return 2;
2676 case 8:
2677 return 3;
2678 case 16:
2679 return 4;
2680 case 32:
2681 return 5;
2682 case 64:
2683 return 6;
2684 default:
2685 UNREACHABLE();
2686 return 0;
2687 }
2688 }
2689
2690 static inline int count_to_type(int count) {
2691 switch (count) {
2692 case 1:
2693 return 7;
2694 case 2:
2695 return 10;
2696 case 3:
2697 return 6;
2698 case 4:
2699 return 2;
2700 default:
2701 UNREACHABLE();
2702 return 0;
2703 }
2704 }
2705
2706 enum NeonElementType {
2707 MultipleElements = 0,
2708 SingleElement = 1
2709 };
2710
2711 enum NeonLoadStoreType {
2712 Store = 0,
2713 Load = 1
2714 };
2715
2716 static uint32_t neon_vector_op(int size,
2717 const Register base,
2718 const DwVfpRegister first,
2719 NeonWritebackType writeback,
2720 int align_bytes,
2721 NeonLoadStoreType load,
2722 NeonElementType single,
2723 NeonElementIndex element_index,
2724 int type) {
2725 ASSERT(type >= 0);
2726 ASSERT(!base.is(pc));
2727 ASSERT(size == 8 || size == 16 || size == 32);
2728
2729 int d = (first.code() >> 4) & 1;
2730 int Vd = first.code() & 0xf;
hans 2013/04/02 12:34:29 I think it's nicer to use DwVfpRegister::split to
Nike 2013/04/03 15:04:06 Done.
2731 // we don't need arbitrary Rm so far
hans 2013/04/02 12:34:29 nit: comments should ideally start with a capital
Nike 2013/04/03 15:04:06 Done.
2732 int rm = (writeback == Writeback) ? 13 : 15;
2733 int size_enc = bin_log(size) - 3;
hans 2013/04/02 12:34:29 Since size_enc isn't going to be used until furthe
Nike 2013/04/03 15:04:06 Done.
2734 uint32_t result =
2735 (0xf << 28) | B26 | B22*d | B21*load | B16*base.code() |
hans 2013/04/02 12:34:29 Instead of "0xf << 28", 0xf*B28 would be more in l
Nike 2013/04/03 15:04:06 B22 done. Using 0xf*B28 gives error: integer over
2736 B12*Vd | rm;
2737
2738 int index_align = 0;
2739 if (single == SingleElement) {
2740 switch (align_bytes) {
2741 case 16:
2742 ASSERT(size == 32);
2743 ASSERT(element_index < element_2);
2744 index_align = 0x3 | ((element_index & 0x1) << 3);
2745 break;
2746 case 8:
2747 ASSERT(size == 16 || size == 32);
2748 ASSERT(element_index < element_4);
2749 index_align = 0x1 | ((element_index & 0x3) << 2);
2750 break;
2751 case 4:
2752 ASSERT(size == 8);
2753 index_align = (element_index & 0x7) << 1;
2754 break;
2755 case 1:
2756 index_align = 0;
2757 break;
2758 default:
2759 UNREACHABLE();
2760 }
2761 }
2762
2763 if (single == SingleElement) {
2764 result |= B23 | (1<<10)*size_enc | B9 | B8 | B4*index_align;
2765 } else {
2766 int align_enc = align_bytes == 1 ? 0 : bin_log(align_bytes / 4);
2767 result |= B8*type | B6*size_enc | B4*align_enc;
2768 }
2769 return result;
2770 }
2771
2772 void Assembler::vld1(int size,
2773 const Register base,
2774 const DwVfpRegister first,
2775 const DwVfpRegister last,
2776 NeonWritebackType writeback,
2777 int align_bytes) {
2778 ASSERT(IsEnabled(NEON));
2779 ASSERT_LE(first.code(), last.code());
2780 int count = last.code() - first.code() + 1;
2781 ASSERT(count <= 4);
2782 emit(neon_vector_op(size, base, first, writeback, align_bytes,
2783 Load, MultipleElements, element_0,
2784 count_to_type(count)));
2785 }
2786
2787 void Assembler::vld1(int size,
2788 const Register base,
2789 const DwVfpRegister first,
2790 NeonElementIndex element_index,
2791 NeonWritebackType writeback,
2792 int align_bytes) {
2793 ASSERT(IsEnabled(NEON));
2794 emit(neon_vector_op(size, base, first, writeback, align_bytes,
2795 Load, SingleElement, element_index, 0));
2796 }
2797
2798 void Assembler::vst1(int size,
2799 const Register base,
2800 const DwVfpRegister first,
2801 const DwVfpRegister last,
2802 NeonWritebackType writeback,
2803 int align_bytes) {
2804 ASSERT(IsEnabled(NEON));
2805 ASSERT_LE(first.code(), last.code());
2806 int count = last.code() - first.code() + 1;
2807 ASSERT(count <= 4);
2808 emit(neon_vector_op(size, base, first, writeback, align_bytes,
2809 Store, MultipleElements, element_0,
2810 count_to_type(count)));
2811 }
2812
2813 void Assembler::vst1(int size,
2814 const Register base,
2815 const DwVfpRegister first,
2816 NeonElementIndex element_index,
2817 NeonWritebackType writeback,
2818 int align_bytes) {
2819 ASSERT(IsEnabled(NEON));
2820 emit(neon_vector_op(size, base, first, writeback, align_bytes,
2821 Store, SingleElement, element_index, 0));
2822 }
2823
2824 void Assembler::vld4(int size,
2825 const Register base,
2826 const DwVfpRegister first,
2827 const DwVfpRegister last,
2828 NeonWritebackType writeback,
2829 int align_bytes) {
2830 ASSERT(IsEnabled(NEON));
2831 // we don't support increment == 2, yet
2832 int type = 0;
2833 emit(neon_vector_op(size, base, first, writeback, align_bytes,
2834 Load, MultipleElements, element_0, type));
2835 }
2836
2837 void Assembler::vld4(int size,
2838 const Register base,
2839 const DwVfpRegister first,
2840 NeonElementIndex element_index,
2841 NeonWritebackType writeback,
2842 int align_bytes) {
2843 ASSERT(IsEnabled(NEON));
2844 emit(neon_vector_op(size, base, first, writeback, align_bytes,
2845 Load, SingleElement, element_index, 0));
2846 }
2847
2848 void Assembler::vst4(int size,
2849 const Register base,
2850 const DwVfpRegister first,
2851 const DwVfpRegister last,
2852 NeonWritebackType writeback,
2853 int align_bytes) {
2854 ASSERT(IsEnabled(NEON));
2855 // we don't support increment == 2, yet
2856 int type = 0;
2857 emit(neon_vector_op(size, base, first, writeback, align_bytes,
2858 Store, MultipleElements, element_0, type));
2859 }
2860
2861 void Assembler::vst4(int size,
2862 const Register base,
2863 const DwVfpRegister first,
2864 NeonElementIndex element_index,
2865 NeonWritebackType writeback,
2866 int align_bytes) {
2867 ASSERT(IsEnabled(NEON));
2868 emit(neon_vector_op(size, base, first, writeback, align_bytes,
2869 Store, SingleElement, element_index, 0));
2870 }
2653 2871
2654 // Pseudo instructions. 2872 // Pseudo instructions.
2655 void Assembler::nop(int type) { 2873 void Assembler::nop(int type) {
2656 // ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes 2874 // ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes
2657 // some of the CPU's pipeline and has to issue. Older ARM chips simply used 2875 // some of the CPU's pipeline and has to issue. Older ARM chips simply used
2658 // MOV Rx, Rx as NOP and it performs better even in newer CPUs. 2876 // MOV Rx, Rx as NOP and it performs better even in newer CPUs.
2659 // We therefore use MOV Rx, Rx, even on newer CPUs, and use Rx to encode 2877 // We therefore use MOV Rx, Rx, even on newer CPUs, and use Rx to encode
2660 // a type. 2878 // a type.
2661 ASSERT(0 <= type && type <= 14); // mov pc, pc isn't a nop. 2879 ASSERT(0 <= type && type <= 14); // mov pc, pc isn't a nop.
2662 emit(al | 13*B21 | type*B12 | type); 2880 emit(al | 13*B21 | type*B12 | type);
(...skipping 388 matching lines...) Expand 10 before | Expand all | Expand 10 after
3051 3269
3052 // Since a constant pool was just emitted, move the check offset forward by 3270 // Since a constant pool was just emitted, move the check offset forward by
3053 // the standard interval. 3271 // the standard interval.
3054 next_buffer_check_ = pc_offset() + kCheckPoolInterval; 3272 next_buffer_check_ = pc_offset() + kCheckPoolInterval;
3055 } 3273 }
3056 3274
3057 3275
3058 } } // namespace v8::internal 3276 } } // namespace v8::internal
3059 3277
3060 #endif // V8_TARGET_ARCH_ARM 3278 #endif // V8_TARGET_ARCH_ARM
OLDNEW
« no previous file with comments | « src/arm/assembler-arm.h ('k') | src/arm/codegen-arm.cc » ('j') | src/arm/codegen-arm.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698