| OLD | NEW |
| 1 ; This checks to ensure that Subzero aligns spill slots. | 1 ; This checks to ensure that Subzero aligns spill slots. |
| 2 | 2 |
| 3 ; RUN: %p2i --filetype=obj --disassemble -i %s --args -Om1 | FileCheck %s | 3 ; RUN: %p2i --filetype=obj --disassemble -i %s --args -Om1 \ |
| 4 ; RUN: %p2i --filetype=obj --disassemble -i %s --args -O2 | FileCheck %s | 4 ; RUN: -allow-externally-defined-symbols | FileCheck %s |
| 5 ; RUN: %p2i --filetype=obj --disassemble -i %s --args -O2 \ |
| 6 ; RUN: -allow-externally-defined-symbols | FileCheck %s |
| 5 | 7 |
| 6 ; The location of the stack slot for a variable is inferred from the | 8 ; The location of the stack slot for a variable is inferred from the |
| 7 ; return sequence. | 9 ; return sequence. |
| 8 | 10 |
| 9 ; In this file, "global" refers to a variable with a live range across | 11 ; In this file, "global" refers to a variable with a live range across |
| 10 ; multiple basic blocks (not an LLVM global variable) and "local" | 12 ; multiple basic blocks (not an LLVM global variable) and "local" |
| 11 ; refers to a variable that is live in only a single basic block. | 13 ; refers to a variable that is live in only a single basic block. |
| 12 | 14 |
| 13 define <4 x i32> @align_global_vector(i32 %arg) { | 15 define internal <4 x i32> @align_global_vector(i32 %arg) { |
| 14 entry: | 16 entry: |
| 15 %vec.global = insertelement <4 x i32> undef, i32 %arg, i32 0 | 17 %vec.global = insertelement <4 x i32> undef, i32 %arg, i32 0 |
| 16 br label %block | 18 br label %block |
| 17 block: | 19 block: |
| 18 call void @ForceXmmSpills() | 20 call void @ForceXmmSpills() |
| 19 ret <4 x i32> %vec.global | 21 ret <4 x i32> %vec.global |
| 20 ; CHECK-LABEL: align_global_vector | 22 ; CHECK-LABEL: align_global_vector |
| 21 ; CHECK: movups xmm0,XMMWORD PTR [esp] | 23 ; CHECK: movups xmm0,XMMWORD PTR [esp] |
| 22 ; CHECK-NEXT: add esp,0x1c | 24 ; CHECK-NEXT: add esp,0x1c |
| 23 ; CHECK-NEXT: ret | 25 ; CHECK-NEXT: ret |
| 24 } | 26 } |
| 25 | 27 |
| 26 define <4 x i32> @align_local_vector(i32 %arg) { | 28 define internal <4 x i32> @align_local_vector(i32 %arg) { |
| 27 entry: | 29 entry: |
| 28 br label %block | 30 br label %block |
| 29 block: | 31 block: |
| 30 %vec.local = insertelement <4 x i32> undef, i32 %arg, i32 0 | 32 %vec.local = insertelement <4 x i32> undef, i32 %arg, i32 0 |
| 31 call void @ForceXmmSpills() | 33 call void @ForceXmmSpills() |
| 32 ret <4 x i32> %vec.local | 34 ret <4 x i32> %vec.local |
| 33 ; CHECK-LABEL: align_local_vector | 35 ; CHECK-LABEL: align_local_vector |
| 34 ; CHECK: movups xmm0,XMMWORD PTR [esp] | 36 ; CHECK: movups xmm0,XMMWORD PTR [esp] |
| 35 ; CHECK-NEXT: add esp,0x1c | 37 ; CHECK-NEXT: add esp,0x1c |
| 36 ; CHECK-NEXT: ret | 38 ; CHECK-NEXT: ret |
| 37 } | 39 } |
| 38 | 40 |
| 39 declare void @ForceXmmSpills() | 41 declare void @ForceXmmSpills() |
| 40 | 42 |
| 41 define <4 x i32> @align_global_vector_ebp_based(i32 %arg) { | 43 define internal <4 x i32> @align_global_vector_ebp_based(i32 %arg) { |
| 42 entry: | 44 entry: |
| 43 br label %eblock ; Disable alloca optimization | 45 br label %eblock ; Disable alloca optimization |
| 44 eblock: | 46 eblock: |
| 45 %alloc = alloca i8, i32 1, align 1 | 47 %alloc = alloca i8, i32 1, align 1 |
| 46 %vec.global = insertelement <4 x i32> undef, i32 %arg, i32 0 | 48 %vec.global = insertelement <4 x i32> undef, i32 %arg, i32 0 |
| 47 br label %block | 49 br label %block |
| 48 block: | 50 block: |
| 49 call void @ForceXmmSpillsAndUseAlloca(i8* %alloc) | 51 call void @ForceXmmSpillsAndUseAlloca(i8* %alloc) |
| 50 ret <4 x i32> %vec.global | 52 ret <4 x i32> %vec.global |
| 51 ; CHECK-LABEL: align_global_vector_ebp_based | 53 ; CHECK-LABEL: align_global_vector_ebp_based |
| 52 ; CHECK: movups xmm0,XMMWORD PTR [ebp-0x18] | 54 ; CHECK: movups xmm0,XMMWORD PTR [ebp-0x18] |
| 53 ; CHECK-NEXT: mov esp,ebp | 55 ; CHECK-NEXT: mov esp,ebp |
| 54 ; CHECK-NEXT: pop ebp | 56 ; CHECK-NEXT: pop ebp |
| 55 ; CHECK: ret | 57 ; CHECK: ret |
| 56 } | 58 } |
| 57 | 59 |
| 58 define <4 x i32> @align_local_vector_ebp_based(i32 %arg) { | 60 define internal <4 x i32> @align_local_vector_ebp_based(i32 %arg) { |
| 59 entry: | 61 entry: |
| 60 br label %eblock ; Disable alloca optimization | 62 br label %eblock ; Disable alloca optimization |
| 61 eblock: | 63 eblock: |
| 62 %alloc = alloca i8, i32 1, align 1 | 64 %alloc = alloca i8, i32 1, align 1 |
| 63 %vec.local = insertelement <4 x i32> undef, i32 %arg, i32 0 | 65 %vec.local = insertelement <4 x i32> undef, i32 %arg, i32 0 |
| 64 call void @ForceXmmSpillsAndUseAlloca(i8* %alloc) | 66 call void @ForceXmmSpillsAndUseAlloca(i8* %alloc) |
| 65 ret <4 x i32> %vec.local | 67 ret <4 x i32> %vec.local |
| 66 ; CHECK-LABEL: align_local_vector_ebp_based | 68 ; CHECK-LABEL: align_local_vector_ebp_based |
| 67 ; CHECK: movups xmm0,XMMWORD PTR [ebp-0x18] | 69 ; CHECK: movups xmm0,XMMWORD PTR [ebp-0x18] |
| 68 ; CHECK-NEXT: mov esp,ebp | 70 ; CHECK-NEXT: mov esp,ebp |
| 69 ; CHECK-NEXT: pop ebp | 71 ; CHECK-NEXT: pop ebp |
| 70 ; CHECK: ret | 72 ; CHECK: ret |
| 71 } | 73 } |
| 72 | 74 |
| 73 define <4 x i32> @align_local_vector_and_global_float(i32 %arg) { | 75 define internal <4 x i32> @align_local_vector_and_global_float(i32 %arg) { |
| 74 entry: | 76 entry: |
| 75 %float.global = sitofp i32 %arg to float | 77 %float.global = sitofp i32 %arg to float |
| 76 call void @ForceXmmSpillsAndUseFloat(float %float.global) | 78 call void @ForceXmmSpillsAndUseFloat(float %float.global) |
| 77 br label %block | 79 br label %block |
| 78 block: | 80 block: |
| 79 %vec.local = insertelement <4 x i32> undef, i32 undef, i32 0 | 81 %vec.local = insertelement <4 x i32> undef, i32 undef, i32 0 |
| 80 call void @ForceXmmSpillsAndUseFloat(float %float.global) | 82 call void @ForceXmmSpillsAndUseFloat(float %float.global) |
| 81 ret <4 x i32> %vec.local | 83 ret <4 x i32> %vec.local |
| 82 ; CHECK-LABEL: align_local_vector_and_global_float | 84 ; CHECK-LABEL: align_local_vector_and_global_float |
| 83 ; CHECK: cvtsi2ss xmm0,eax | 85 ; CHECK: cvtsi2ss xmm0,eax |
| 84 ; CHECK-NEXT: movss DWORD PTR [esp+{{0xc|0x1c}}],xmm0 | 86 ; CHECK-NEXT: movss DWORD PTR [esp+{{0xc|0x1c}}],xmm0 |
| 85 ; CHECK: movups xmm0,XMMWORD PTR [{{esp|esp\+0x10}}] | 87 ; CHECK: movups xmm0,XMMWORD PTR [{{esp|esp\+0x10}}] |
| 86 ; CHECK-NEXT: add esp,0x2c | 88 ; CHECK-NEXT: add esp,0x2c |
| 87 ; CHECK-NEXT: ret | 89 ; CHECK-NEXT: ret |
| 88 } | 90 } |
| 89 | 91 |
| 90 declare void @ForceXmmSpillsAndUseAlloca(i8*) | 92 declare void @ForceXmmSpillsAndUseAlloca(i8*) |
| 91 declare void @ForceXmmSpillsAndUseFloat(float) | 93 declare void @ForceXmmSpillsAndUseFloat(float) |
| OLD | NEW |