OLD | NEW |
(Empty) | |
| 1 ; This tries to create variables with very large stack offsets. |
| 2 ; This requires a lot of variables/register pressure. To simplify this |
| 3 ; we assume poor register allocation from Om1, and a flag that forces |
| 4 ; the frame to add K amount of unused stack for testing. |
| 5 ; We only need to test ARM and other architectures which have limited space |
| 6 ; for specifying an offset within an instruction. |
| 7 |
| 8 ; RUN: %if --need=target_ARM32 --need=allow_dump \ |
| 9 ; RUN: --command %p2i --filetype=asm --assemble --disassemble --target arm32 \ |
| 10 ; RUN: -i %s --args -Om1 --skip-unimplemented --test-stack-extra 4096 \ |
| 11 ; RUN: | %if --need=target_ARM32 --need=allow_dump \ |
| 12 ; RUN: --command FileCheck --check-prefix ARM32 %s |
| 13 |
| 14 declare i64 @dummy(i32 %t1, i32 %t2, i32 %t3, i64 %t4, i64 %t5) |
| 15 |
| 16 ; Test a function that requires lots of stack (due to test flag), and uses |
| 17 ; SP as the base register (originally). |
| 18 define internal i64 @lotsOfStack(i32 %a, i32 %b, i32 %c, i32 %d) { |
| 19 entry: |
| 20 %t1 = xor i32 %a, %b |
| 21 %t2 = or i32 %c, %d |
| 22 %cmp = icmp eq i32 %t1, %t2 |
| 23 br i1 %cmp, label %br_1, label %br_2 |
| 24 |
| 25 br_1: |
| 26 %x1 = zext i32 %t1 to i64 |
| 27 %y1 = ashr i64 %x1, 17 |
| 28 ; Use some stack during the call, so that references to %t1 and %t2's |
| 29 ; stack slots require stack adjustment. |
| 30 %r1 = call i64 @dummy(i32 123, i32 321, i32 %t2, i64 %x1, i64 %y1) |
| 31 %z1 = sub i64 %r1, %y1 |
| 32 br label %end |
| 33 |
| 34 br_2: |
| 35 %x2 = zext i32 %t2 to i64 |
| 36 %y2 = and i64 %x2, 123 |
| 37 %r2 = call i64 @dummy(i32 123, i32 321, i32 %t2, i64 %x2, i64 %y2) |
| 38 %z2 = and i64 %r2, %y2 |
| 39 br label %end |
| 40 |
| 41 end: |
| 42 %x3 = phi i64 [ %x1, %br_1 ], [ %x2, %br_2 ] |
| 43 %z3 = phi i64 [ %z1, %br_1 ], [ %z2, %br_2 ] |
| 44 %r3 = and i64 %x3, %z3 |
| 45 ret i64 %r3 |
| 46 } |
| 47 ; ARM32-LABEL: lotsOfStack |
| 48 ; ARM32-NOT: mov fp, sp |
| 49 ; ARM32: movw ip, #4{{.*}} |
| 50 ; ARM32-NEXT: sub sp, sp, ip |
| 51 ; ARM32: movw ip, #4232 |
| 52 ; ARM32-NEXT: add ip, sp, ip |
| 53 ; ARM32-NOT: movw ip |
| 54 ; %t2 is the result of the "or", and %t2 will be passed via r1 to the call. |
| 55 ; Use that to check the stack offset of %t2. The first offset and the |
| 56 ; later offset right before the call should be 16 bytes apart, |
| 57 ; because of the sub sp, sp, #16. |
| 58 ; ARM32: orr [[REG:r.*]], {{.*}}, |
| 59 ; I.e., the slot for t2 is (sp0 + 4232 - 20) == sp0 + 4212. |
| 60 ; ARM32: str [[REG]], [ip, #-20] |
| 61 ; ARM32: b {{[a-f0-9]+}} |
| 62 ; Now skip ahead to where the call in br_1 begins, to check how %t2 is used. |
| 63 ; ARM32: movw ip, #4216 |
| 64 ; ARM32-NEXT: add ip, sp, ip |
| 65 ; ARM32: sub sp, sp, #16 |
| 66 ; Now sp1 = sp0 - 16, but ip is still in terms of sp0. |
| 67 ; So, sp0 + 4212 == ip - 4. |
| 68 ; ARM32: ldr r2, [ip, #-4] |
| 69 ; ARM32: bl {{.*}} dummy |
| 70 ; ARM32: add sp, sp |
| 71 ; The call clobbers ip, so we need to re-create the base register. |
| 72 ; ARM32: movw ip, #4{{.*}} |
| 73 ; ARM32: b {{[a-f0-9]+}} |
| 74 ; ARM32: bl {{.*}} dummy |
| 75 |
| 76 ; Similar, but test a function that uses FP as the base register (originally). |
| 77 define internal i64 @usesFrameReg(i32 %a, i32 %b, i32 %c, i32 %d) { |
| 78 entry: |
| 79 %p = alloca i8, i32 %d, align 4 |
| 80 %t1 = xor i32 %a, %b |
| 81 %t2 = or i32 %c, %d |
| 82 %cmp = icmp eq i32 %t1, %t2 |
| 83 br i1 %cmp, label %br_1, label %br_2 |
| 84 |
| 85 br_1: |
| 86 %x1 = zext i32 %t1 to i64 |
| 87 %y1 = ashr i64 %x1, 17 |
| 88 %p32 = ptrtoint i8* %p to i32 |
| 89 %r1 = call i64 @dummy(i32 %p32, i32 321, i32 %t2, i64 %x1, i64 %y1) |
| 90 %z1 = sub i64 %r1, %y1 |
| 91 br label %end |
| 92 |
| 93 br_2: |
| 94 %x2 = zext i32 %t2 to i64 |
| 95 %y2 = and i64 %x2, 123 |
| 96 %r2 = call i64 @dummy(i32 123, i32 321, i32 %d, i64 %x2, i64 %y2) |
| 97 %z2 = and i64 %r2, %y2 |
| 98 br label %end |
| 99 |
| 100 end: |
| 101 %x3 = phi i64 [ %x1, %br_1 ], [ %x2, %br_2 ] |
| 102 %z3 = phi i64 [ %z1, %br_1 ], [ %z2, %br_2 ] |
| 103 %r3 = and i64 %x3, %z3 |
| 104 ret i64 %r3 |
| 105 } |
| 106 ; ARM32-LABEL: usesFrameReg |
| 107 ; ARM32: mov fp, sp |
| 108 ; ARM32: movw ip, #4{{.*}} |
| 109 ; ARM32-NEXT: sub sp, sp, ip |
| 110 ; ARM32: movw ip, #4100 |
| 111 ; ARM32-NEXT: sub ip, fp, ip |
| 112 ; ARM32-NOT: movw ip |
| 113 ; %t2 is the result of the "or", and %t2 will be passed via r1 to the call. |
| 114 ; Use that to check the stack offset of %t2. It should be the same offset |
| 115 ; even after sub sp, sp, #16, because the base register was originally |
| 116 ; the FP and not the SP. |
| 117 ; ARM32: orr [[REG:r.*]], {{.*}}, |
| 118 ; I.e., the slot for t2 is (fp0 - 4100 -24) == fp0 - 4124 |
| 119 ; ARM32: str [[REG]], [ip, #-24] |
| 120 ; ARM32: b {{[a-f0-9]+}} |
| 121 ; Now skip ahead to where the call in br_1 begins, to check how %t2 is used. |
| 122 ; ARM32: movw ip, #4120 |
| 123 ; ARM32-NEXT: sub ip, fp, ip |
| 124 ; ARM32: sub sp, sp, #16 |
| 125 ; Now sp1 = sp0 - 16, but ip is still in terms of fp0. |
| 126 ; So, fp0 - 4124 == ip - 4. |
| 127 ; ARM32: ldr r2, [ip, #-4] |
| 128 ; ARM32: bl {{.*}} dummy |
| 129 ; ARM32: add sp, sp |
| 130 ; The call clobbers ip, so we need to re-create the base register. |
| 131 ; ARM32: movw ip, #4{{.*}} |
| 132 ; ARM32: b {{[a-f0-9]+}} |
| 133 ; ARM32: bl {{.*}} dummy |
OLD | NEW |