src/arm/assembler-arm.cc - Issue 12920009: Use generated Neon version of MemCopy() on ARM, if platform supports it.

Side by Side Diff: src/arm/assembler-arm.cc

Issue 12920009: Use generated Neon version of MemCopy() on ARM, if platform supports it. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/

Patch Set: Created 7 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 1994-2006 Sun Microsystems Inc.	1 // Copyright (c) 1994-2006 Sun Microsystems Inc.

2 // All Rights Reserved.	2 // All Rights Reserved.

3 //	3 //

4 // Redistribution and use in source and binary forms, with or without	4 // Redistribution and use in source and binary forms, with or without

5 // modification, are permitted provided that the following conditions	5 // modification, are permitted provided that the following conditions

6 // are met:	6 // are met:

7 //	7 //

8 // - Redistributions of source code must retain the above copyright notice,	8 // - Redistributions of source code must retain the above copyright notice,

9 // this list of conditions and the following disclaimer.	9 // this list of conditions and the following disclaimer.

10 //	10 //

(...skipping 136 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
147	147

148 if (FLAG_enable_movw_movt) {	148 if (FLAG_enable_movw_movt) {

149 supported_ \|= static_cast<uint64_t>(1) << MOVW_MOVT_IMMEDIATE_LOADS;	149 supported_ \|= static_cast<uint64_t>(1) << MOVW_MOVT_IMMEDIATE_LOADS;

150 }	150 }

151	151

152 if (FLAG_enable_32dregs) {	152 if (FLAG_enable_32dregs) {

153 supported_ \|= static_cast<uint64_t>(1) << VFP32DREGS;	153 supported_ \|= static_cast<uint64_t>(1) << VFP32DREGS;

154 }	154 }

155	155

156 #else // __arm__	156 #else // __arm__

	157 if (!IsSupported(NEON) && OS::ArmCpuHasFeature(NEON)) {

	158 found_by_runtime_probing_only_ \|=

	159 static_cast<uint64_t>(1) << NEON;

	160 }

	161

157 // Probe for additional features not already known to be available.	162 // Probe for additional features not already known to be available.

158 if (!IsSupported(VFP3) && OS::ArmCpuHasFeature(VFP3)) {	163 if (!IsSupported(VFP3) && OS::ArmCpuHasFeature(VFP3)) {

159 // This implementation also sets the VFP flags if runtime	164 // This implementation also sets the VFP flags if runtime

160 // detection of VFP returns true. VFPv3 implies ARMv7 and VFP2, see ARM DDI	165 // detection of VFP returns true. VFPv3 implies ARMv7 and VFP2, see ARM DDI

161 // 0406B, page A1-6.	166 // 0406B, page A1-6.

162 found_by_runtime_probing_only_ \|=	167 found_by_runtime_probing_only_ \|=

163 static_cast<uint64_t>(1) << VFP3 \|	168 static_cast<uint64_t>(1) << VFP3 \|

164 static_cast<uint64_t>(1) << ARMv7 \|	169 static_cast<uint64_t>(1) << ARMv7 \|

165 static_cast<uint64_t>(1) << VFP2;	170 static_cast<uint64_t>(1) << VFP2;

166 } else if (!IsSupported(VFP2) && OS::ArmCpuHasFeature(VFP2)) {	171 } else if (!IsSupported(VFP2) && OS::ArmCpuHasFeature(VFP2)) {

(...skipping 1423 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1590 }	1595 }

1591	1596

1592	1597

1593 void Assembler::stm(BlockAddrMode am,	1598 void Assembler::stm(BlockAddrMode am,

1594 Register base,	1599 Register base,

1595 RegList src,	1600 RegList src,

1596 Condition cond) {	1601 Condition cond) {

1597 addrmod4(cond \| B27 \| am, base, src);	1602 addrmod4(cond \| B27 \| am, base, src);

1598 }	1603 }

1599	1604

	1605 void Assembler::pld(Register base, int offset, int write) {
	hans 2013/04/02 12:34:29 Many of the newly added functions to the assembler Many of the newly added functions to the assembler have references to the appropriate section in the ARM ARM, and also a comment that lays out the instruction encoding that makes it much easier to verify that the code is correct. See e.g. vadd below. All new assembler functions also need corresponding changes in the simulator and disassembler + tests. Nike 2013/04/03 15:04:06 Done. Show quoted text On 2013/04/02 12:34:29, hans wrote: > Many of the newly added functions to the assembler have references to the > appropriate section in the ARM ARM, and also a comment that lays out the > instruction encoding that makes it much easier to verify that the code is > correct. See e.g. vadd below. > > All new assembler functions also need corresponding changes in the simulator and > disassembler + tests. Done. Nike 2013/04/03 15:04:06 Done description, is it OK to make simulator+test Done description, is it OK to make simulator+test changes in separate CL? hans 2013/04/04 09:56:51 Thanks! This makes it much easier to verify the co Show quoted text On 2013/04/03 15:04:06, Nike wrote: > Done description, Thanks! This makes it much easier to verify the code. Show quoted text > is it OK to make simulator+test changes in separate CL? No, the preferred way is to do it in the same patch. If V8 starts generating code which it cannot simulate or disassemble, that would be a problem. If it's not tested, that's also a problem.
	1606 int U = (offset >= 0) ? B23 : 0;
	hans 2013/04/02 12:34:29 looks like indentation is too deep in this functio looks like indentation is too deep in this function Nike 2013/04/03 15:04:06 Done. Show quoted text On 2013/04/02 12:34:29, hans wrote: > looks like indentation is too deep in this function Done.
	1607 int R = (write == 0) ? B22 : 0;
	hans 2013/04/02 12:34:29 since write is just a flag, should it be a bool ra since write is just a flag, should it be a bool rather than an int? or an enum? Nike 2013/04/03 15:04:06 Done. Show quoted text On 2013/04/02 12:34:29, hans wrote: > since write is just a flag, should it be a bool rather than an int? or an enum? Done.
	1608 if (offset < 0)

	1609 offset = -offset;

	1610 ASSERT(is_uint12(offset));

	1611

	1612 emit(0xf510f000 \| B16 * base.code() \| U \| R \| offset);
	hans 2013/04/02 12:34:29 I think "base.code()B16" would be more in line wi I think "base.code()B16" would be more in line with the rest of the assembler. Nike 2013/04/03 15:04:06 Done. Show quoted text On 2013/04/02 12:34:29, hans wrote: > I think "base.code()*B16" would be more in line with the rest of the assembler. Done.
	1613 }

	1614

1600	1615

1601 // Exception-generating instructions and debugging support.	1616 // Exception-generating instructions and debugging support.

1602 // Stops with a non-negative code less than kNumOfWatchedStops support	1617 // Stops with a non-negative code less than kNumOfWatchedStops support

1603 // enabling/disabling and a counter feature. See simulator-arm.h .	1618 // enabling/disabling and a counter feature. See simulator-arm.h .

1604 void Assembler::stop(const char* msg, Condition cond, int32_t code) {	1619 void Assembler::stop(const char* msg, Condition cond, int32_t code) {

1605 #ifndef __arm__	1620 #ifndef __arm__

1606 ASSERT(code >= kDefaultStopCode);	1621 ASSERT(code >= kDefaultStopCode);

1607 {	1622 {

1608 // The Simulator will handle the stop instruction and get the message	1623 // The Simulator will handle the stop instruction and get the message

1609 // address. It expects to find the address just after the svc instruction.	1624 // address. It expects to find the address just after the svc instruction.

(...skipping 1032 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2642 // cond(31-28) \| 11101(27-23)\| D(22) \| 11(21-20) \| 0001(19-16) \|	2657 // cond(31-28) \| 11101(27-23)\| D(22) \| 11(21-20) \| 0001(19-16) \|

2643 // Vd(15-12) \| 101(11-9) \| sz=1(8) \| 11(7-6) \| M(5) \| 0(4) \| Vm(3-0)	2658 // Vd(15-12) \| 101(11-9) \| sz=1(8) \| 11(7-6) \| M(5) \| 0(4) \| Vm(3-0)

2644 ASSERT(IsEnabled(VFP2));	2659 ASSERT(IsEnabled(VFP2));

2645 int vd, d;	2660 int vd, d;

2646 dst.split_code(&vd, &d);	2661 dst.split_code(&vd, &d);

2647 int vm, m;	2662 int vm, m;

2648 src.split_code(&vm, &m);	2663 src.split_code(&vm, &m);

2649 emit(cond \| 0x1DB23 \| dB22 \| 0x3B20 \| B16 \| vdB12 \| 0x5B9 \| B8 \| 0x3B6 \|	2664 emit(cond \| 0x1DB23 \| dB22 \| 0x3B20 \| B16 \| vdB12 \| 0x5B9 \| B8 \| 0x3B6 \|

2650 m*B5 \| vm);	2665 m*B5 \| vm);

2651 }	2666 }

2652	2667
	hans 2013/04/02 12:34:29 v8 likes to have two empty lines between each func v8 likes to have two empty lines between each function definition; this applies below too Nike 2013/04/03 15:04:06 Done. Show quoted text On 2013/04/02 12:34:29, hans wrote: > v8 likes to have two empty lines between each function definition; this applies > below too Done.
	2668 static inline int bin_log(int x) {

	2669 switch (x) {

	2670 case 1:

	2671 return 0;

	2672 case 2:

	2673 return 1;

	2674 case 4:

	2675 return 2;

	2676 case 8:

	2677 return 3;

	2678 case 16:

	2679 return 4;

	2680 case 32:

	2681 return 5;

	2682 case 64:

	2683 return 6;

	2684 default:

	2685 UNREACHABLE();

	2686 return 0;

	2687 }

	2688 }

	2689

	2690 static inline int count_to_type(int count) {

	2691 switch (count) {

	2692 case 1:

	2693 return 7;

	2694 case 2:

	2695 return 10;

	2696 case 3:

	2697 return 6;

	2698 case 4:

	2699 return 2;

	2700 default:

	2701 UNREACHABLE();

	2702 return 0;

	2703 }

	2704 }

	2705

	2706 enum NeonElementType {

	2707 MultipleElements = 0,

	2708 SingleElement = 1

	2709 };

	2710

	2711 enum NeonLoadStoreType {

	2712 Store = 0,

	2713 Load = 1

	2714 };

	2715

	2716 static uint32_t neon_vector_op(int size,

	2717 const Register base,

	2718 const DwVfpRegister first,

	2719 NeonWritebackType writeback,

	2720 int align_bytes,

	2721 NeonLoadStoreType load,

	2722 NeonElementType single,

	2723 NeonElementIndex element_index,

	2724 int type) {

	2725 ASSERT(type >= 0);

	2726 ASSERT(!base.is(pc));

	2727 ASSERT(size == 8 \|\| size == 16 \|\| size == 32);

	2728

	2729 int d = (first.code() >> 4) & 1;

	2730 int Vd = first.code() & 0xf;
	hans 2013/04/02 12:34:29 I think it's nicer to use DwVfpRegister::split to I think it's nicer to use DwVfpRegister::split to get vd and d; see e.g. how vadd does this. Nike 2013/04/03 15:04:06 Done. Show quoted text On 2013/04/02 12:34:29, hans wrote: > I think it's nicer to use DwVfpRegister::split to get vd and d; see e.g. how > vadd does this. Done.
	2731 // we don't need arbitrary Rm so far
	hans 2013/04/02 12:34:29 nit: comments should ideally start with a capital nit: comments should ideally start with a capital letter and end with a period Nike 2013/04/03 15:04:06 Done. Show quoted text On 2013/04/02 12:34:29, hans wrote: > nit: comments should ideally start with a capital letter and end with a period Done.
	2732 int rm = (writeback == Writeback) ? 13 : 15;

	2733 int size_enc = bin_log(size) - 3;
	hans 2013/04/02 12:34:29 Since size_enc isn't going to be used until furthe Since size_enc isn't going to be used until further down in the function, perhaps it can be moved closer to its first use? Nike 2013/04/03 15:04:06 Done. Show quoted text On 2013/04/02 12:34:29, hans wrote: > Since size_enc isn't going to be used until further down in the function, > perhaps it can be moved closer to its first use? Done.
	2734 uint32_t result =

	2735 (0xf << 28) \| B26 \| B22d \| B21load \| B16*base.code() \|
	hans 2013/04/02 12:34:29 Instead of "0xf << 28", 0xfB28 would be more in l Instead of "0xf << 28", 0xfB28 would be more in line with the rest of the assembler. Also, instead of doing "B22d" etc, I think "dB22" is more in line with the rest of the assembler. These comments apply where "result" is modified further down in this function too. Nike 2013/04/03 15:04:06 B22 done. Using 0xfB28 gives error: integer over B22 done. Using 0xfB28 gives error: integer overflow in expression [-Werror=overflow] On 2013/04/02 12:34:29, hans wrote: Show quoted text > Instead of "0xf << 28", 0xfB28 would be more in line with the rest of the > assembler. > > Also, instead of doing "B22d" etc, I think "d*B22" is more in line with the > rest of the assembler. > > These comments apply where "result" is modified further down in this function > too.
	2736 B12*Vd \| rm;

	2737

	2738 int index_align = 0;

	2739 if (single == SingleElement) {

	2740 switch (align_bytes) {

	2741 case 16:

	2742 ASSERT(size == 32);

	2743 ASSERT(element_index < element_2);

	2744 index_align = 0x3 \| ((element_index & 0x1) << 3);

	2745 break;

	2746 case 8:

	2747 ASSERT(size == 16 \|\| size == 32);

	2748 ASSERT(element_index < element_4);

	2749 index_align = 0x1 \| ((element_index & 0x3) << 2);

	2750 break;

	2751 case 4:

	2752 ASSERT(size == 8);

	2753 index_align = (element_index & 0x7) << 1;

	2754 break;

	2755 case 1:

	2756 index_align = 0;

	2757 break;

	2758 default:

	2759 UNREACHABLE();

	2760 }

	2761 }

	2762

	2763 if (single == SingleElement) {

	2764 result \|= B23 \| (1<<10)size_enc \| B9 \| B8 \| B4index_align;

	2765 } else {

	2766 int align_enc = align_bytes == 1 ? 0 : bin_log(align_bytes / 4);

	2767 result \|= B8type \| B6size_enc \| B4*align_enc;

	2768 }

	2769 return result;

	2770 }

	2771

	2772 void Assembler::vld1(int size,

	2773 const Register base,

	2774 const DwVfpRegister first,

	2775 const DwVfpRegister last,

	2776 NeonWritebackType writeback,

	2777 int align_bytes) {

	2778 ASSERT(IsEnabled(NEON));

	2779 ASSERT_LE(first.code(), last.code());

	2780 int count = last.code() - first.code() + 1;

	2781 ASSERT(count <= 4);

	2782 emit(neon_vector_op(size, base, first, writeback, align_bytes,

	2783 Load, MultipleElements, element_0,

	2784 count_to_type(count)));

	2785 }

	2786

	2787 void Assembler::vld1(int size,

	2788 const Register base,

	2789 const DwVfpRegister first,

	2790 NeonElementIndex element_index,

	2791 NeonWritebackType writeback,

	2792 int align_bytes) {

	2793 ASSERT(IsEnabled(NEON));

	2794 emit(neon_vector_op(size, base, first, writeback, align_bytes,

	2795 Load, SingleElement, element_index, 0));

	2796 }

	2797

	2798 void Assembler::vst1(int size,

	2799 const Register base,

	2800 const DwVfpRegister first,

	2801 const DwVfpRegister last,

	2802 NeonWritebackType writeback,

	2803 int align_bytes) {

	2804 ASSERT(IsEnabled(NEON));

	2805 ASSERT_LE(first.code(), last.code());

	2806 int count = last.code() - first.code() + 1;

	2807 ASSERT(count <= 4);

	2808 emit(neon_vector_op(size, base, first, writeback, align_bytes,

	2809 Store, MultipleElements, element_0,

	2810 count_to_type(count)));

	2811 }

	2812

	2813 void Assembler::vst1(int size,

	2814 const Register base,

	2815 const DwVfpRegister first,

	2816 NeonElementIndex element_index,

	2817 NeonWritebackType writeback,

	2818 int align_bytes) {

	2819 ASSERT(IsEnabled(NEON));

	2820 emit(neon_vector_op(size, base, first, writeback, align_bytes,

	2821 Store, SingleElement, element_index, 0));

	2822 }

	2823

	2824 void Assembler::vld4(int size,

	2825 const Register base,

	2826 const DwVfpRegister first,

	2827 const DwVfpRegister last,

	2828 NeonWritebackType writeback,

	2829 int align_bytes) {

	2830 ASSERT(IsEnabled(NEON));

	2831 // we don't support increment == 2, yet

	2832 int type = 0;

	2833 emit(neon_vector_op(size, base, first, writeback, align_bytes,

	2834 Load, MultipleElements, element_0, type));

	2835 }

	2836

	2837 void Assembler::vld4(int size,

	2838 const Register base,

	2839 const DwVfpRegister first,

	2840 NeonElementIndex element_index,

	2841 NeonWritebackType writeback,

	2842 int align_bytes) {

	2843 ASSERT(IsEnabled(NEON));

	2844 emit(neon_vector_op(size, base, first, writeback, align_bytes,

	2845 Load, SingleElement, element_index, 0));

	2846 }

	2847

	2848 void Assembler::vst4(int size,

	2849 const Register base,

	2850 const DwVfpRegister first,

	2851 const DwVfpRegister last,

	2852 NeonWritebackType writeback,

	2853 int align_bytes) {

	2854 ASSERT(IsEnabled(NEON));

	2855 // we don't support increment == 2, yet

	2856 int type = 0;

	2857 emit(neon_vector_op(size, base, first, writeback, align_bytes,

	2858 Store, MultipleElements, element_0, type));

	2859 }

	2860

	2861 void Assembler::vst4(int size,

	2862 const Register base,

	2863 const DwVfpRegister first,

	2864 NeonElementIndex element_index,

	2865 NeonWritebackType writeback,

	2866 int align_bytes) {

	2867 ASSERT(IsEnabled(NEON));

	2868 emit(neon_vector_op(size, base, first, writeback, align_bytes,

	2869 Store, SingleElement, element_index, 0));

	2870 }

2653	2871

2654 // Pseudo instructions.	2872 // Pseudo instructions.

2655 void Assembler::nop(int type) {	2873 void Assembler::nop(int type) {

2656 // ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes	2874 // ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes

2657 // some of the CPU's pipeline and has to issue. Older ARM chips simply used	2875 // some of the CPU's pipeline and has to issue. Older ARM chips simply used

2658 // MOV Rx, Rx as NOP and it performs better even in newer CPUs.	2876 // MOV Rx, Rx as NOP and it performs better even in newer CPUs.

2659 // We therefore use MOV Rx, Rx, even on newer CPUs, and use Rx to encode	2877 // We therefore use MOV Rx, Rx, even on newer CPUs, and use Rx to encode

2660 // a type.	2878 // a type.

2661 ASSERT(0 <= type && type <= 14); // mov pc, pc isn't a nop.	2879 ASSERT(0 <= type && type <= 14); // mov pc, pc isn't a nop.

2662 emit(al \| 13B21 \| typeB12 \| type);	2880 emit(al \| 13B21 \| typeB12 \| type);

(...skipping 388 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3051	3269

3052 // Since a constant pool was just emitted, move the check offset forward by	3270 // Since a constant pool was just emitted, move the check offset forward by

3053 // the standard interval.	3271 // the standard interval.

3054 next_buffer_check_ = pc_offset() + kCheckPoolInterval;	3272 next_buffer_check_ = pc_offset() + kCheckPoolInterval;

3055 }	3273 }

3056	3274

3057	3275

3058 } } // namespace v8::internal	3276 } } // namespace v8::internal

3059	3277

3060 #endif // V8_TARGET_ARCH_ARM	3278 #endif // V8_TARGET_ARCH_ARM

OLD	NEW

« no previous file with comments | « src/arm/assembler-arm.h ('k') | src/arm/codegen-arm.cc » ('j') | src/arm/codegen-arm.cc » ('J')