tests_lit/llvm2ice_tests/large_stack_offs.ll - Issue 1241763002: ARM: Add a postRA pass to legalize stack offsets. Greedy approach (reserve IP).

Side by Side Diff: tests_lit/llvm2ice_tests/large_stack_offs.ll

Issue 1241763002: ARM: Add a postRA pass to legalize stack offsets. Greedy approach (reserve IP). (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: review Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 ; This tries to create variables with very large stack offsets.

	2 ; This requires a lot of variables/register pressure. To simplify this

	3 ; we assume poor register allocation from Om1, and a flag that forces

	4 ; the frame to add K amount of unused stack for testing.

	5 ; We only need to test ARM and other architectures which have limited space

	6 ; for specifying an offset within an instruction.

	7

	8 ; RUN: %if --need=target_ARM32 --need=allow_dump \

	9 ; RUN: --command %p2i --filetype=asm --assemble --disassemble --target arm32 \

	10 ; RUN: -i %s --args -Om1 --skip-unimplemented --test-stack-extra 4096 \

	11 ; RUN: \| %if --need=target_ARM32 --need=allow_dump \

	12 ; RUN: --command FileCheck --check-prefix ARM32 %s

	13

	14 declare i64 @dummy(i32 %t1, i32 %t2, i32 %t3, i64 %t4, i64 %t5)

	15

	16 ; Test a function that requires lots of stack (due to test flag), and uses

	17 ; SP as the base register (originally).

	18 define internal i64 @lotsOfStack(i32 %a, i32 %b, i32 %c, i32 %d) {

	19 entry:

	20 %t1 = xor i32 %a, %b

	21 %t2 = or i32 %c, %d

	22 %cmp = icmp eq i32 %t1, %t2

	23 br i1 %cmp, label %br_1, label %br_2

	24

	25 br_1:

	26 %x1 = zext i32 %t1 to i64

	27 %y1 = ashr i64 %x1, 17

	28 ; Use some stack during the call, so that references to %t1 and %t2's

	29 ; stack slots require stack adjustment.

	30 %r1 = call i64 @dummy(i32 123, i32 321, i32 %t2, i64 %x1, i64 %y1)

	31 %z1 = sub i64 %r1, %y1

	32 br label %end

	33

	34 br_2:

	35 %x2 = zext i32 %t2 to i64

	36 %y2 = and i64 %x2, 123

	37 %r2 = call i64 @dummy(i32 123, i32 321, i32 %t2, i64 %x2, i64 %y2)

	38 %z2 = and i64 %r2, %y2

	39 br label %end

	40

	41 end:

	42 %x3 = phi i64 [ %x1, %br_1 ], [ %x2, %br_2 ]

	43 %z3 = phi i64 [ %z1, %br_1 ], [ %z2, %br_2 ]

	44 %r3 = and i64 %x3, %z3

	45 ret i64 %r3

	46 }

	47 ; ARM32-LABEL: lotsOfStack

	48 ; ARM32-NOT: mov fp, sp

	49 ; ARM32: movw ip, #4{{.*}}

	50 ; ARM32-NEXT: sub sp, sp, ip

	51 ; ARM32: movw ip, #4232

	52 ; ARM32-NEXT: add ip, sp, ip

	53 ; ARM32-NOT: movw ip

	54 ; %t2 is the result of the "or", and %t2 will be passed via r1 to the call.

	55 ; Use that to check the stack offset of %t2. The first offset and the

	56 ; later offset right before the call should be 16 bytes apart,

	57 ; because of the sub sp, sp, #16.

	58 ; ARM32: orr [[REG:r.]], {{.}},

	59 ; I.e., the slot for t2 is (sp0 + 4232 - 20) == sp0 + 4212.

	60 ; ARM32: str [[REG]], [ip, #-20]

	61 ; ARM32: b {{[a-f0-9]+}}

	62 ; Now skip ahead to where the call in br_1 begins, to check how %t2 is used.

	63 ; ARM32: movw ip, #4216

	64 ; ARM32-NEXT: add ip, sp, ip

	65 ; ARM32: sub sp, sp, #16

	66 ; Now sp1 = sp0 - 16, but ip is still in terms of sp0.

	67 ; So, sp0 + 4212 == ip - 4.

	68 ; ARM32: ldr r2, [ip, #-4]

	69 ; ARM32: bl {{.*}} dummy

	70 ; ARM32: add sp, sp

	71 ; The call clobbers ip, so we need to re-create the base register.

	72 ; ARM32: movw ip, #4{{.*}}

	73 ; ARM32: b {{[a-f0-9]+}}

	74 ; ARM32: bl {{.*}} dummy

	75

	76 ; Similar, but test a function that uses FP as the base register (originally).

	77 define internal i64 @usesFrameReg(i32 %a, i32 %b, i32 %c, i32 %d) {

	78 entry:

	79 %p = alloca i8, i32 %d, align 4

	80 %t1 = xor i32 %a, %b

	81 %t2 = or i32 %c, %d

	82 %cmp = icmp eq i32 %t1, %t2

	83 br i1 %cmp, label %br_1, label %br_2

	84

	85 br_1:

	86 %x1 = zext i32 %t1 to i64

	87 %y1 = ashr i64 %x1, 17

	88 %p32 = ptrtoint i8* %p to i32

	89 %r1 = call i64 @dummy(i32 %p32, i32 321, i32 %t2, i64 %x1, i64 %y1)

	90 %z1 = sub i64 %r1, %y1

	91 br label %end

	92

	93 br_2:

	94 %x2 = zext i32 %t2 to i64

	95 %y2 = and i64 %x2, 123

	96 %r2 = call i64 @dummy(i32 123, i32 321, i32 %d, i64 %x2, i64 %y2)

	97 %z2 = and i64 %r2, %y2

	98 br label %end

	99

	100 end:

	101 %x3 = phi i64 [ %x1, %br_1 ], [ %x2, %br_2 ]

	102 %z3 = phi i64 [ %z1, %br_1 ], [ %z2, %br_2 ]

	103 %r3 = and i64 %x3, %z3

	104 ret i64 %r3

	105 }

	106 ; ARM32-LABEL: usesFrameReg

	107 ; ARM32: mov fp, sp

	108 ; ARM32: movw ip, #4{{.*}}

	109 ; ARM32-NEXT: sub sp, sp, ip

	110 ; ARM32: movw ip, #4100

	111 ; ARM32-NEXT: sub ip, fp, ip

	112 ; ARM32-NOT: movw ip

	113 ; %t2 is the result of the "or", and %t2 will be passed via r1 to the call.

	114 ; Use that to check the stack offset of %t2. It should be the same offset

	115 ; even after sub sp, sp, #16, because the base register was originally

	116 ; the FP and not the SP.

	117 ; ARM32: orr [[REG:r.]], {{.}},

	118 ; I.e., the slot for t2 is (fp0 - 4100 -24) == fp0 - 4124

	119 ; ARM32: str [[REG]], [ip, #-24]

	120 ; ARM32: b {{[a-f0-9]+}}

	121 ; Now skip ahead to where the call in br_1 begins, to check how %t2 is used.

	122 ; ARM32: movw ip, #4120

	123 ; ARM32-NEXT: sub ip, fp, ip

	124 ; ARM32: sub sp, sp, #16

	125 ; Now sp1 = sp0 - 16, but ip is still in terms of fp0.

	126 ; So, fp0 - 4124 == ip - 4.

	127 ; ARM32: ldr r2, [ip, #-4]

	128 ; ARM32: bl {{.*}} dummy

	129 ; ARM32: add sp, sp

	130 ; The call clobbers ip, so we need to re-create the base register.

	131 ; ARM32: movw ip, #4{{.*}}

	132 ; ARM32: b {{[a-f0-9]+}}

	133 ; ARM32: bl {{.*}} dummy

OLD	NEW

« no previous file with comments | « src/IceTargetLoweringX86BaseImpl.h ('k') | no next file » | no next file with comments »