tests_lit/llvm2ice_tests/vector-arg.ll - Issue 372113005: Add support for passing and returning vectors in accordance with the x86 calling convention.

Side by Side Diff: tests_lit/llvm2ice_tests/vector-arg.ll

Issue 372113005: Add support for passing and returning vectors in accordance with the x86 calling convention. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master

Patch Set: First round of changes, including new argument lowering. Created 6 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 ; This file checks that Subzero generates code in accordance with the

	2 ; calling convention for vectors.

	3

	4 ; NOTE: CHECK / OPTM1 lines containing the following strings may be

	5 ; subject to change:

	6 ;

	7 ; * movups: The movups instruction may be changed to movaps when the

	8 ; load / store operation is 16 byte aligned.

	9 ;

	10 ; * stack offsets: These may need to be changed if stack alignment

	11 ; support is implemented.

	12

	13 ; RUN: %llvm2ice -O2 --verbose none %s \| FileCheck %s

	14 ; RUN: %llvm2ice -Om1 --verbose none %s \| FileCheck --check-prefix=OPTM1 %s

	15 ; RUN: %llvm2ice --verbose none %s \| FileCheck --check-prefix=ERRORS %s

	16 ; RUN: %llvm2iceinsts %s \| %szdiff %s \| FileCheck --check-prefix=DUMP %s

	17 ; RUN: %llvm2iceinsts --pnacl %s \| %szdiff %s \

	18 ; RUN: \| FileCheck --check-prefix=DUMP %s

	19

	20 ; The first five functions test that vectors are moved from their

	21 ; correct argument location to xmm0.

	22

	23 define <4 x float> @test_returning_arg0(<4 x float> %arg0, <4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3, <4 x float> %arg4, <4 x float> %arg5) {

	24 entry:

	25 ret <4 x float> %arg0

	26 ; CHECK-LABEL: test_returning_arg0:

	27 ; CHECK-NOT: mov

	28 ; CHECK: ret

	29

	30 ; OPTM1-LABEL: test_returning_arg0:

	31 ; OPTM1: movups xmmword ptr [[LOC:.*]], xmm0

	32 ; OPTM1: movups xmm0, xmmword ptr [[LOC]]

	33 ; OPTM1: ret

	34 }

	35

	36 define <4 x float> @test_returning_arg1(<4 x float> %arg0, <4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3, <4 x float> %arg4, <4 x float> %arg5) {

	37 entry:

	38 ret <4 x float> %arg1

	39 ; CHECK-LABEL: test_returning_arg1:

	40 ; CHECK: movups xmm0, xmm1

	41 ; CHECK: ret

	42

	43 ; OPTM1-LABEL: test_returning_arg1:

	44 ; OPTM1: movups xmmword ptr [[LOC:.*]], xmm1

	45 ; OPTM1: movups xmm0, xmmword ptr [[LOC]]

	46 ; OPTM1: ret

	47 }

	48

	49 define <4 x float> @test_returning_arg2(<4 x float> %arg0, <4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3, <4 x float> %arg4, <4 x float> %arg5) {

	50 entry:

	51 ret <4 x float> %arg2

	52 ; CHECK-LABEL: test_returning_arg2:

	53 ; CHECK: movups xmm0, xmm2

	54 ; CHECK: ret

	55

	56 ; OPTM1-LABEL: test_returning_arg2:

	57 ; OPTM1: movups xmmword ptr [[LOC:.*]], xmm2

	58 ; OPTM1: movups xmm0, xmmword ptr [[LOC]]

	59 ; OPTM1: ret

	60 }

	61

	62 define <4 x float> @test_returning_arg3(<4 x float> %arg0, <4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3, <4 x float> %arg4, <4 x float> %arg5) {

	63 entry:

	64 ret <4 x float> %arg3

	65 ; CHECK-LABEL: test_returning_arg3:

	66 ; CHECK: movups xmm0, xmm3

	67 ; CHECK: ret

	68

	69 ; OPTM1-LABEL: test_returning_arg3:

	70 ; OPTM1: movups xmmword ptr [[LOC:.*]], xmm3

	71 ; OPTM1: movups xmm0, xmmword ptr [[LOC]]

	72 ; OPTM1: ret

	73 }

	74

	75 define <4 x float> @test_returning_arg4(<4 x float> %arg0, <4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3, <4 x float> %arg4, <4 x float> %arg5) {

	76 entry:

	77 ret <4 x float> %arg4

	78 ; CHECK-LABEL: test_returning_arg4:

	79 ; CHECK: movups xmm0, xmmword ptr [esp+4]

	80 ; CHECK: ret

	81

	82 ; OPTM1-LABEL: test_returning_arg4:

	83 ; OPTM1: movups xmm0, xmmword ptr {{.*}}

	84 ; OPTM1: ret

	85 }

	86

	87 ; The next five functions check that xmm arguments are handled

	88 ; correctly when interspersed with stack arguments in the argument

	89 ; list.

	90

	91 define <4 x float> @test_returning_interspersed_arg0(i32 %i32arg0, double %doubl earg0, <4 x float> %arg0, <4 x float> %arg1, i32 %i32arg1, <4 x float> %arg2, do uble %doublearg1, <4 x float> %arg3, i32 %i32arg2, double %doublearg2, float %fl oatarg0, <4 x float> %arg4, <4 x float> %arg5, float %floatarg1) {

	92 entry:

	93 ret <4 x float> %arg0

	94 ; CHECK-LABEL: test_returning_interspersed_arg0:

	95 ; CHECK-NOT: mov

	96 ; CHECK: ret

	97

	98 ; OPTM1-LABEL: test_returning_interspersed_arg0:

	99 ; OPTM1: movups xmmword ptr [[LOC:.*]], xmm0

	100 ; OPTM1: movups xmm0, xmmword ptr [[LOC]]

	101 ; OPTM1: ret

	102 }

	103

	104 define <4 x float> @test_returning_interspersed_arg1(i32 %i32arg0, double %doubl earg0, <4 x float> %arg0, <4 x float> %arg1, i32 %i32arg1, <4 x float> %arg2, do uble %doublearg1, <4 x float> %arg3, i32 %i32arg2, double %doublearg2, float %fl oatarg0, <4 x float> %arg4, <4 x float> %arg5, float %floatarg1) {

	105 entry:

	106 ret <4 x float> %arg1

	107 ; CHECK-LABEL: test_returning_interspersed_arg1:

	108 ; CHECK: movups xmm0, xmm1

	109 ; CHECK: ret

	110

	111 ; OPTM1-LABEL: test_returning_interspersed_arg1:

	112 ; OPTM1: movups xmmword ptr [[LOC:.*]], xmm1

	113 ; OPTM1: movups xmm0, xmmword ptr [[LOC]]

	114 ; OPTM1: ret

	115 }

	116

	117 define <4 x float> @test_returning_interspersed_arg2(i32 %i32arg0, double %doubl earg0, <4 x float> %arg0, <4 x float> %arg1, i32 %i32arg1, <4 x float> %arg2, do uble %doublearg1, <4 x float> %arg3, i32 %i32arg2, double %doublearg2, float %fl oatarg0, <4 x float> %arg4, <4 x float> %arg5, float %floatarg1) {

	118 entry:

	119 ret <4 x float> %arg2

	120 ; CHECK-LABEL: test_returning_interspersed_arg2:

	121 ; CHECK: movups xmm0, xmm2

	122 ; CHECK: ret

	123

	124 ; OPTM1-LABEL: test_returning_interspersed_arg2:

	125 ; OPTM1: movups xmmword ptr [[LOC:.*]], xmm2

	126 ; OPTM1: movups xmm0, xmmword ptr [[LOC]]

	127 ; OPTM1: ret

	128 }

	129

	130 define <4 x float> @test_returning_interspersed_arg3(i32 %i32arg0, double %doubl earg0, <4 x float> %arg0, <4 x float> %arg1, i32 %i32arg1, <4 x float> %arg2, do uble %doublearg1, <4 x float> %arg3, i32 %i32arg2, double %doublearg2, float %fl oatarg0, <4 x float> %arg4, <4 x float> %arg5, float %floatarg1) {

	131 entry:

	132 ret <4 x float> %arg3

	133 ; CHECK-LABEL: test_returning_interspersed_arg3:

	134 ; CHECK: movups xmm0, xmm3

	135 ; CHECK: ret

	136

	137 ; OPTM1-LABEL: test_returning_interspersed_arg3:

	138 ; OPTM1: movups xmmword ptr [[LOC:.*]], xmm3

	139 ; OPTM1: movups xmm0, xmmword ptr [[LOC]]

	140 ; OPTM1: ret

	141 }

	142

	143 define <4 x float> @test_returning_interspersed_arg4(i32 %i32arg0, double %doubl earg0, <4 x float> %arg0, <4 x float> %arg1, i32 %i32arg1, <4 x float> %arg2, do uble %doublearg1, <4 x float> %arg3, i32 %i32arg2, double %doublearg2, float %fl oatarg0, <4 x float> %arg4, <4 x float> %arg5, float %floatarg1) {

	144 entry:

	145 ret <4 x float> %arg4

	146 ; CHECK-LABEL: test_returning_interspersed_arg4:

	147 ; CHECK: movups xmm0, xmmword ptr [esp+44]

	148 ; CHECK: ret

	149

	150 ; OPTM1-LABEL: test_returning_interspersed_arg4:

	151 ; OPTM1: movups xmm0, xmmword ptr {{.*}}

	152 ; OPTM1: ret

	153 }

	154

	155 ; Test that vectors are passed correctly as arguments to a function.

	156

	157 declare void @VectorArgs(<4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>)

	158

	159 define void @test_passing_vectors(<4 x float> %arg0, <4 x float> %arg1, <4 x flo at> %arg2, <4 x float> %arg3, <4 x float> %arg4, <4 x float> %arg5, <4 x float> %arg6, <4 x float> %arg7, <4 x float> %arg8, <4 x float> %arg9) {

	160 entry:

	161 call void @VectorArgs(<4 x float> %arg9, <4 x float> %arg8, <4 x float> %arg7, <4 x float> %arg6, <4 x float> %arg5, <4 x float> %arg4)

	162 ret void

	163 ; CHECK-LABEL: test_passing_vectors:

	164 ; CHECK: movups xmm0, xmmword ptr [esp+4]
	jvoung (off chromium) 2014/07/09 04:26:53 What are the first few "CHECK: movups xmm0, xmmwor What are the first few "CHECK: movups xmm0, xmmword ptr [esp+4]", etc. related to? Setting up the xmm0-3 "actual" arguments for the call? Or is there some spill code earlier on, that puts arg9 at esp+4, arg8 at esp+20, etc.? Otherwise, shouldn't arg9 (start) further up the stack than arg0 from the caller's perspective? Similarly, are "ARG5" == %arg4, and "ARG6" == "%arg5" ? wala 2014/07/09 19:05:12 The order that the vector arguments are passed is Show quoted text On 2014/07/09 04:26:53, jvoung wrote: > What are the first few "CHECK: movups xmm0, xmmword ptr [esp+4]", etc. related > to? Setting up the xmm0-3 "actual" arguments for the call? > > Or is there some spill code earlier on, that puts arg9 at esp+4, arg8 at esp+20, > etc.? Otherwise, shouldn't arg9 (start) further up the stack than arg0 from the > caller's perspective? > > Similarly, are "ARG5" == %arg4, and "ARG6" == "%arg5" ? The order that the vector arguments are passed is wrong. It should be reversed. This test was also wrong. Good catch, thank you.
	165 ; CHECK: movups xmm1, xmmword ptr [esp+20]

	166 ; CHECK: movups xmm2, xmmword ptr [esp+36]

	167 ; CHECK: movups xmm3, xmmword ptr [esp+52]

	168 ; CHECK: movups [[ARG5:.*]], xmmword ptr [esp+68]

	169 ; CHECK: movups [[ARG6:.*]], xmmword ptr [esp+84]

	170 ; CHECK: sub esp, 16
	jvoung (off chromium) 2014/07/09 04:26:53 At some point we might want to consolidate the sta At some point we might want to consolidate the stack adjustments for function calls. It's especially bad for NaCl x86-64 and ARM, since each time you add or subtract rsp/esp, you have to restore the invariant that the stack pointer is within bounds (zero-extend + add r15 or bic). E.g., naclasp and naclssp: https://developer.chrome.com/native-client/reference/sandbox_internals/x86-64... Maybe at some point the caller's prologue would just reserve enough space for any of the calls, once. wala 2014/07/09 19:05:13 Added a note in the test and in lowerCall(). Show quoted text On 2014/07/09 04:26:53, jvoung wrote: > At some point we might want to consolidate the stack adjustments for function > calls. > > It's especially bad for NaCl x86-64 and ARM, since each time you add or subtract > rsp/esp, you have to restore the invariant that the stack pointer is within > bounds (zero-extend + add r15 or bic). E.g., naclasp and naclssp: > https://developer.chrome.com/native-client/reference/sandbox_internals/x86-64... > > Maybe at some point the caller's prologue would just reserve enough space for > any of the calls, once. Added a note in the test and in lowerCall().
	171 ; CHECK-NEXT: movups xmmword ptr [esp], [[ARG5]]

	172 ; CHECK: sub esp, 16

	173 ; CHECK-NEXT: movups xmmword ptr [esp], [[ARG6]]

	174 ; CHECK: call VectorArgs

	175 ; CHECK-NEXT: add esp, 32

	176 ; CHECK: ret

	177

	178 ; OPTM1-LABEL: test_passing_vectors:

	179 ; OPTM1: movups xmm0, xmmword ptr {{.*}}

	180 ; OPTM1: movups xmm1, xmmword ptr {{.*}}

	181 ; OPTM1: movups xmm2, xmmword ptr {{.*}}

	182 ; OPTM1: movups xmm3, xmmword ptr {{.*}}

	183 ; OPTM1: sub esp, 16

	184 ; OPTM1: movups [[ARG5:.]], xmmword ptr {{.}}

	185 ; OPTM1-NEXT: movups xmmword ptr [esp], [[ARG5]]

	186 ; OPTM1: sub esp, 16

	187 ; OPTM1: movups [[ARG6:.]], xmmword ptr {{.}}

	188 ; OPTM1: movups xmmword ptr [esp], [[ARG6]]

	189 ; OPTM1: call VectorArgs

	190 ; OPTM1: add esp, 32

	191 ; OPTM1: ret

	192 }

	193

	194 ; Test that a vector returned from a function is recognized to be in

	195 ; xmm0.

	196

	197 declare <4 x float> @VectorReturn(<4 x float> %arg0)

	198

	199 define void @test_receiving_vectors(<4 x float> %arg0) {

	200 entry:

	201 %result = call <4 x float> @VectorReturn(<4 x float> %arg0)

	202 %result2 = call <4 x float> @VectorReturn(<4 x float> %result)

	203 ret void

	204 ; CHECK-LABEL: test_receiving_vectors:

	205 ; CHECK: call VectorReturn

	206 ; CHECK-NOT: movups xmm0

	207 ; CHECK: call VectorReturn

	208 ; CHECK: ret

	209

	210 ; OPTM1-LABEL: test_receiving_vectors:

	211 ; OPTM1: call VectorReturn

	212 ; OPTM1: movups [[LOC:.*]], xmm0

	213 ; OPTM1: movups xmm0, [[LOC]]

	214 ; OPTM1: call VectorReturn

	215 ; OPTM1: ret

	216 }

	217

	218 ; ERRORS-NOT: ICE translation error

	219 ; DUMP-NOT: SZ

OLD	NEW

« src/IceTargetLoweringX8632.cpp ('K') | « src/IceTargetLoweringX8632.cpp ('k') | no next file » | no next file with comments »