OLD | NEW |
1 ; This checks to ensure that Subzero aligns spill slots. | 1 ; This checks to ensure that Subzero aligns spill slots. |
2 | 2 |
3 ; RUN: %p2i --filetype=obj --disassemble -i %s --args -Om1 | FileCheck %s | 3 ; RUN: %p2i --filetype=obj --disassemble -i %s --args -Om1 \ |
4 ; RUN: %p2i --filetype=obj --disassemble -i %s --args -O2 | FileCheck %s | 4 ; RUN: -allow-externally-defined-symbols | FileCheck %s |
| 5 ; RUN: %p2i --filetype=obj --disassemble -i %s --args -O2 \ |
| 6 ; RUN: -allow-externally-defined-symbols | FileCheck %s |
5 | 7 |
6 ; The location of the stack slot for a variable is inferred from the | 8 ; The location of the stack slot for a variable is inferred from the |
7 ; return sequence. | 9 ; return sequence. |
8 | 10 |
9 ; In this file, "global" refers to a variable with a live range across | 11 ; In this file, "global" refers to a variable with a live range across |
10 ; multiple basic blocks (not an LLVM global variable) and "local" | 12 ; multiple basic blocks (not an LLVM global variable) and "local" |
11 ; refers to a variable that is live in only a single basic block. | 13 ; refers to a variable that is live in only a single basic block. |
12 | 14 |
13 define <4 x i32> @align_global_vector(i32 %arg) { | 15 define internal <4 x i32> @align_global_vector(i32 %arg) { |
14 entry: | 16 entry: |
15 %vec.global = insertelement <4 x i32> undef, i32 %arg, i32 0 | 17 %vec.global = insertelement <4 x i32> undef, i32 %arg, i32 0 |
16 br label %block | 18 br label %block |
17 block: | 19 block: |
18 call void @ForceXmmSpills() | 20 call void @ForceXmmSpills() |
19 ret <4 x i32> %vec.global | 21 ret <4 x i32> %vec.global |
20 ; CHECK-LABEL: align_global_vector | 22 ; CHECK-LABEL: align_global_vector |
21 ; CHECK: movups xmm0,XMMWORD PTR [esp] | 23 ; CHECK: movups xmm0,XMMWORD PTR [esp] |
22 ; CHECK-NEXT: add esp,0x1c | 24 ; CHECK-NEXT: add esp,0x1c |
23 ; CHECK-NEXT: ret | 25 ; CHECK-NEXT: ret |
24 } | 26 } |
25 | 27 |
26 define <4 x i32> @align_local_vector(i32 %arg) { | 28 define internal <4 x i32> @align_local_vector(i32 %arg) { |
27 entry: | 29 entry: |
28 br label %block | 30 br label %block |
29 block: | 31 block: |
30 %vec.local = insertelement <4 x i32> undef, i32 %arg, i32 0 | 32 %vec.local = insertelement <4 x i32> undef, i32 %arg, i32 0 |
31 call void @ForceXmmSpills() | 33 call void @ForceXmmSpills() |
32 ret <4 x i32> %vec.local | 34 ret <4 x i32> %vec.local |
33 ; CHECK-LABEL: align_local_vector | 35 ; CHECK-LABEL: align_local_vector |
34 ; CHECK: movups xmm0,XMMWORD PTR [esp] | 36 ; CHECK: movups xmm0,XMMWORD PTR [esp] |
35 ; CHECK-NEXT: add esp,0x1c | 37 ; CHECK-NEXT: add esp,0x1c |
36 ; CHECK-NEXT: ret | 38 ; CHECK-NEXT: ret |
37 } | 39 } |
38 | 40 |
39 declare void @ForceXmmSpills() | 41 declare void @ForceXmmSpills() |
40 | 42 |
41 define <4 x i32> @align_global_vector_ebp_based(i32 %arg) { | 43 define internal <4 x i32> @align_global_vector_ebp_based(i32 %arg) { |
42 entry: | 44 entry: |
43 br label %eblock ; Disable alloca optimization | 45 br label %eblock ; Disable alloca optimization |
44 eblock: | 46 eblock: |
45 %alloc = alloca i8, i32 1, align 1 | 47 %alloc = alloca i8, i32 1, align 1 |
46 %vec.global = insertelement <4 x i32> undef, i32 %arg, i32 0 | 48 %vec.global = insertelement <4 x i32> undef, i32 %arg, i32 0 |
47 br label %block | 49 br label %block |
48 block: | 50 block: |
49 call void @ForceXmmSpillsAndUseAlloca(i8* %alloc) | 51 call void @ForceXmmSpillsAndUseAlloca(i8* %alloc) |
50 ret <4 x i32> %vec.global | 52 ret <4 x i32> %vec.global |
51 ; CHECK-LABEL: align_global_vector_ebp_based | 53 ; CHECK-LABEL: align_global_vector_ebp_based |
52 ; CHECK: movups xmm0,XMMWORD PTR [ebp-0x18] | 54 ; CHECK: movups xmm0,XMMWORD PTR [ebp-0x18] |
53 ; CHECK-NEXT: mov esp,ebp | 55 ; CHECK-NEXT: mov esp,ebp |
54 ; CHECK-NEXT: pop ebp | 56 ; CHECK-NEXT: pop ebp |
55 ; CHECK: ret | 57 ; CHECK: ret |
56 } | 58 } |
57 | 59 |
58 define <4 x i32> @align_local_vector_ebp_based(i32 %arg) { | 60 define internal <4 x i32> @align_local_vector_ebp_based(i32 %arg) { |
59 entry: | 61 entry: |
60 br label %eblock ; Disable alloca optimization | 62 br label %eblock ; Disable alloca optimization |
61 eblock: | 63 eblock: |
62 %alloc = alloca i8, i32 1, align 1 | 64 %alloc = alloca i8, i32 1, align 1 |
63 %vec.local = insertelement <4 x i32> undef, i32 %arg, i32 0 | 65 %vec.local = insertelement <4 x i32> undef, i32 %arg, i32 0 |
64 call void @ForceXmmSpillsAndUseAlloca(i8* %alloc) | 66 call void @ForceXmmSpillsAndUseAlloca(i8* %alloc) |
65 ret <4 x i32> %vec.local | 67 ret <4 x i32> %vec.local |
66 ; CHECK-LABEL: align_local_vector_ebp_based | 68 ; CHECK-LABEL: align_local_vector_ebp_based |
67 ; CHECK: movups xmm0,XMMWORD PTR [ebp-0x18] | 69 ; CHECK: movups xmm0,XMMWORD PTR [ebp-0x18] |
68 ; CHECK-NEXT: mov esp,ebp | 70 ; CHECK-NEXT: mov esp,ebp |
69 ; CHECK-NEXT: pop ebp | 71 ; CHECK-NEXT: pop ebp |
70 ; CHECK: ret | 72 ; CHECK: ret |
71 } | 73 } |
72 | 74 |
73 define <4 x i32> @align_local_vector_and_global_float(i32 %arg) { | 75 define internal <4 x i32> @align_local_vector_and_global_float(i32 %arg) { |
74 entry: | 76 entry: |
75 %float.global = sitofp i32 %arg to float | 77 %float.global = sitofp i32 %arg to float |
76 call void @ForceXmmSpillsAndUseFloat(float %float.global) | 78 call void @ForceXmmSpillsAndUseFloat(float %float.global) |
77 br label %block | 79 br label %block |
78 block: | 80 block: |
79 %vec.local = insertelement <4 x i32> undef, i32 undef, i32 0 | 81 %vec.local = insertelement <4 x i32> undef, i32 undef, i32 0 |
80 call void @ForceXmmSpillsAndUseFloat(float %float.global) | 82 call void @ForceXmmSpillsAndUseFloat(float %float.global) |
81 ret <4 x i32> %vec.local | 83 ret <4 x i32> %vec.local |
82 ; CHECK-LABEL: align_local_vector_and_global_float | 84 ; CHECK-LABEL: align_local_vector_and_global_float |
83 ; CHECK: cvtsi2ss xmm0,eax | 85 ; CHECK: cvtsi2ss xmm0,eax |
84 ; CHECK-NEXT: movss DWORD PTR [esp+{{0xc|0x1c}}],xmm0 | 86 ; CHECK-NEXT: movss DWORD PTR [esp+{{0xc|0x1c}}],xmm0 |
85 ; CHECK: movups xmm0,XMMWORD PTR [{{esp|esp\+0x10}}] | 87 ; CHECK: movups xmm0,XMMWORD PTR [{{esp|esp\+0x10}}] |
86 ; CHECK-NEXT: add esp,0x2c | 88 ; CHECK-NEXT: add esp,0x2c |
87 ; CHECK-NEXT: ret | 89 ; CHECK-NEXT: ret |
88 } | 90 } |
89 | 91 |
90 declare void @ForceXmmSpillsAndUseAlloca(i8*) | 92 declare void @ForceXmmSpillsAndUseAlloca(i8*) |
91 declare void @ForceXmmSpillsAndUseFloat(float) | 93 declare void @ForceXmmSpillsAndUseFloat(float) |
OLD | NEW |