tests_lit/llvm2ice_tests/large_stack_offs.ll - Issue 1241763002: ARM: Add a postRA pass to legalize stack offsets. Greedy approach (reserve IP).

Unified Diff: tests_lit/llvm2ice_tests/large_stack_offs.ll

Issue 1241763002: ARM: Add a postRA pass to legalize stack offsets. Greedy approach (reserve IP). (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: review Created 5 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tests_lit/llvm2ice_tests/large_stack_offs.ll

diff --git a/tests_lit/llvm2ice_tests/large_stack_offs.ll b/tests_lit/llvm2ice_tests/large_stack_offs.ll

new file mode 100644

index 0000000000000000000000000000000000000000..6fba4dfe2b29a8e29cdadc07ac886539c081588c

--- /dev/null

+++ b/tests_lit/llvm2ice_tests/large_stack_offs.ll

@@ -0,0 +1,133 @@

+; This tries to create variables with very large stack offsets.

+; This requires a lot of variables/register pressure. To simplify this

+; we assume poor register allocation from Om1, and a flag that forces

+; the frame to add K amount of unused stack for testing.

+; We only need to test ARM and other architectures which have limited space

+; for specifying an offset within an instruction.

+; RUN: %if --need=target_ARM32 --need=allow_dump \

+; RUN: --command %p2i --filetype=asm --assemble --disassemble --target arm32 \

+; RUN: -i %s --args -Om1 --skip-unimplemented --test-stack-extra 4096 \

+; RUN: | %if --need=target_ARM32 --need=allow_dump \

+; RUN: --command FileCheck --check-prefix ARM32 %s

+declare i64 @dummy(i32 %t1, i32 %t2, i32 %t3, i64 %t4, i64 %t5)

+; Test a function that requires lots of stack (due to test flag), and uses

+; SP as the base register (originally).

+define internal i64 @lotsOfStack(i32 %a, i32 %b, i32 %c, i32 %d) {

+entry:

+ %t1 = xor i32 %a, %b

+ %t2 = or i32 %c, %d

+ %cmp = icmp eq i32 %t1, %t2

+ br i1 %cmp, label %br_1, label %br_2

+br_1:

+ %x1 = zext i32 %t1 to i64

+ %y1 = ashr i64 %x1, 17

+ ; Use some stack during the call, so that references to %t1 and %t2's

+ ; stack slots require stack adjustment.

+ %r1 = call i64 @dummy(i32 123, i32 321, i32 %t2, i64 %x1, i64 %y1)

+ %z1 = sub i64 %r1, %y1

+ br label %end

+br_2:

+ %x2 = zext i32 %t2 to i64

+ %y2 = and i64 %x2, 123

+ %r2 = call i64 @dummy(i32 123, i32 321, i32 %t2, i64 %x2, i64 %y2)

+ %z2 = and i64 %r2, %y2

+ br label %end

+end:

+ %x3 = phi i64 [ %x1, %br_1 ], [ %x2, %br_2 ]

+ %z3 = phi i64 [ %z1, %br_1 ], [ %z2, %br_2 ]

+ %r3 = and i64 %x3, %z3

+ ret i64 %r3

+; ARM32-LABEL: lotsOfStack

+; ARM32-NOT: mov fp, sp

+; ARM32: movw ip, #4{{.*}}

+; ARM32-NEXT: sub sp, sp, ip

+; ARM32: movw ip, #4232

+; ARM32-NEXT: add ip, sp, ip

+; ARM32-NOT: movw ip

+; %t2 is the result of the "or", and %t2 will be passed via r1 to the call.

+; Use that to check the stack offset of %t2. The first offset and the

+; later offset right before the call should be 16 bytes apart,

+; because of the sub sp, sp, #16.

+; ARM32: orr [[REG:r.*]], {{.*}},

+; I.e., the slot for t2 is (sp0 + 4232 - 20) == sp0 + 4212.

+; ARM32: str [[REG]], [ip, #-20]

+; ARM32: b {{[a-f0-9]+}}

+; Now skip ahead to where the call in br_1 begins, to check how %t2 is used.

+; ARM32: movw ip, #4216

+; ARM32-NEXT: add ip, sp, ip

+; ARM32: sub sp, sp, #16

+; Now sp1 = sp0 - 16, but ip is still in terms of sp0.

+; So, sp0 + 4212 == ip - 4.

+; ARM32: ldr r2, [ip, #-4]

+; ARM32: bl {{.*}} dummy

+; ARM32: add sp, sp

+; The call clobbers ip, so we need to re-create the base register.

+; ARM32: movw ip, #4{{.*}}

+; ARM32: b {{[a-f0-9]+}}

+; ARM32: bl {{.*}} dummy

+; Similar, but test a function that uses FP as the base register (originally).

+define internal i64 @usesFrameReg(i32 %a, i32 %b, i32 %c, i32 %d) {

+entry:

+ %p = alloca i8, i32 %d, align 4

+ %t1 = xor i32 %a, %b

+ %t2 = or i32 %c, %d

+ %cmp = icmp eq i32 %t1, %t2

+ br i1 %cmp, label %br_1, label %br_2

+br_1:

+ %x1 = zext i32 %t1 to i64

+ %y1 = ashr i64 %x1, 17

+ %p32 = ptrtoint i8* %p to i32

+ %r1 = call i64 @dummy(i32 %p32, i32 321, i32 %t2, i64 %x1, i64 %y1)

+ %z1 = sub i64 %r1, %y1

+ br label %end

+br_2:

+ %x2 = zext i32 %t2 to i64

+ %y2 = and i64 %x2, 123

+ %r2 = call i64 @dummy(i32 123, i32 321, i32 %d, i64 %x2, i64 %y2)

+ %z2 = and i64 %r2, %y2

+ br label %end

+end:

+ %x3 = phi i64 [ %x1, %br_1 ], [ %x2, %br_2 ]

+ %z3 = phi i64 [ %z1, %br_1 ], [ %z2, %br_2 ]

+ %r3 = and i64 %x3, %z3

+ ret i64 %r3

+; ARM32-LABEL: usesFrameReg

+; ARM32: mov fp, sp

+; ARM32: movw ip, #4{{.*}}

+; ARM32-NEXT: sub sp, sp, ip

+; ARM32: movw ip, #4100

+; ARM32-NEXT: sub ip, fp, ip

+; ARM32-NOT: movw ip

+; %t2 is the result of the "or", and %t2 will be passed via r1 to the call.

+; Use that to check the stack offset of %t2. It should be the same offset

+; even after sub sp, sp, #16, because the base register was originally

+; the FP and not the SP.

+; ARM32: orr [[REG:r.*]], {{.*}},

+; I.e., the slot for t2 is (fp0 - 4100 -24) == fp0 - 4124

+; ARM32: str [[REG]], [ip, #-24]

+; ARM32: b {{[a-f0-9]+}}

+; Now skip ahead to where the call in br_1 begins, to check how %t2 is used.

+; ARM32: movw ip, #4120

+; ARM32-NEXT: sub ip, fp, ip

+; ARM32: sub sp, sp, #16

+; Now sp1 = sp0 - 16, but ip is still in terms of fp0.

+; So, fp0 - 4124 == ip - 4.

+; ARM32: ldr r2, [ip, #-4]

+; ARM32: bl {{.*}} dummy

+; ARM32: add sp, sp

+; The call clobbers ip, so we need to re-create the base register.

+; ARM32: movw ip, #4{{.*}}

+; ARM32: b {{[a-f0-9]+}}

+; ARM32: bl {{.*}} dummy

« no previous file with comments | « src/IceTargetLoweringX86BaseImpl.h ('k') | no next file » | no next file with comments »