OLD | NEW |
(Empty) | |
| 1 ; RUN: opt -globalize-constant-vectors %s -S | FileCheck %s |
| 2 ; RUN: opt -globalize-constant-vectors %s -S | FileCheck -check-prefix=C4xi1 %s |
| 3 ; RUN: opt -globalize-constant-vectors %s -S | FileCheck -check-prefix=C8xi1 %s |
| 4 ; RUN: opt -globalize-constant-vectors %s -S | FileCheck -check-prefix=C16xi1 %s |
| 5 ; RUN: opt -globalize-constant-vectors %s -S | FileCheck -check-prefix=C16xi8 %s |
| 6 ; RUN: opt -globalize-constant-vectors %s -S | FileCheck -check-prefix=C8xi16 %s |
| 7 ; RUN: opt -globalize-constant-vectors %s -S | FileCheck -check-prefix=C4xi32 %s |
| 8 ; RUN: opt -globalize-constant-vectors %s -S | FileCheck -check-prefix=C4xfloat
%s |
| 9 ; RUN: opt -globalize-constant-vectors %s -S | FileCheck -check-prefix=Cbranch %
s |
| 10 ; RUN: opt -globalize-constant-vectors %s -S | FileCheck -check-prefix=Cduplicat
e %s |
| 11 |
| 12 ; Run the test once per function so that each check can look at its |
| 13 ; globals as well as its function. |
| 14 |
| 15 ; The datalayout is needed to determine the alignment of the globals. |
| 16 target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64
:64:64-p:32:32:32-v128:32:32" |
| 17 |
| 18 ; Globals shouldn't get globalized. |
| 19 ; CHECK: @global_should_stay_untouched = internal constant <4 x i32> <i32 1337,
i32 0, i32 0, i32 0> |
| 20 @global_should_stay_untouched = internal constant <4 x i32> <i32 1337, i32 0, i3
2 0, i32 0> |
| 21 |
| 22 ; 4xi1 vectors should get globalized. |
| 23 define void @test4xi1(<4 x i1> %in) { |
| 24 %ft0 = and <4 x i1> %in, <i1 false, i1 true, i1 false, i1 true> |
| 25 %ft1 = and <4 x i1> <i1 true, i1 false, i1 true, i1 false>, %in |
| 26 ret void |
| 27 } |
| 28 ; C4xi1: @[[C1:[_a-z0-9]+]] = internal constant <4 x i1> <i1 false, i1 true, i1
false, i1 true>, align 4 |
| 29 ; C4xi1: @[[C2:[_a-z0-9]+]] = internal constant <4 x i1> <i1 true, i1 false, i1
true, i1 false>, align 4 |
| 30 ; C4xi1: define void @test4xi1(<4 x i1> %in) { |
| 31 ; C4xi1-NEXT: %[[M1:[_a-z0-9]+]] = load <4 x i1>* @[[C1]], align 4 |
| 32 ; C4xi1-NEXT: %[[M2:[_a-z0-9]+]] = load <4 x i1>* @[[C2]], align 4 |
| 33 ; C4xi1-NEXT: %ft0 = and <4 x i1> %in, %[[M1]] |
| 34 ; C4xi1-NEXT: %ft1 = and <4 x i1> %[[M2]], %in |
| 35 ; C4xi1-NEXT: ret void |
| 36 |
| 37 ; 8xi1 vectors should get globalized. |
| 38 define void @test8xi1(<8 x i1> %in) { |
| 39 %ft0 = and <8 x i1> %in, <i1 false, i1 true, i1 false, i1 true, i1 false, i1 t
rue, i1 false, i1 true> |
| 40 %ft1 = and <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false,
i1 true, i1 false>, %in |
| 41 ret void |
| 42 } |
| 43 ; C8xi1: @[[C1:[_a-z0-9]+]] = internal constant <8 x i1> <i1 false, i1 true, i1
false, i1 true, i1 false, i1 true, i1 false, i1 true>, align 8 |
| 44 ; C8xi1: @[[C2:[_a-z0-9]+]] = internal constant <8 x i1> <i1 true, i1 false, i1
true, i1 false, i1 true, i1 false, i1 true, i1 false>, align 8 |
| 45 ; C8xi1: define void @test8xi1(<8 x i1> %in) { |
| 46 ; C8xi1-NEXT: %[[M1:[_a-z0-9]+]] = load <8 x i1>* @[[C1]], align 8 |
| 47 ; C8xi1-NEXT: %[[M2:[_a-z0-9]+]] = load <8 x i1>* @[[C2]], align 8 |
| 48 ; C8xi1-NEXT: %ft0 = and <8 x i1> %in, %[[M1]] |
| 49 ; C8xi1-NEXT: %ft1 = and <8 x i1> %[[M2]], %in |
| 50 ; C8xi1-NEXT: ret void |
| 51 |
| 52 ; 16xi1 vectors should get globalized. |
| 53 define void @test16xi1(<16 x i1> %in) { |
| 54 %ft0 = and <16 x i1> %in, <i1 false, i1 true, i1 false, i1 true, i1 false, i1
true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true
, i1 false, i1 true> |
| 55 %ft1 = and <16 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false,
i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1
true, i1 false>, %in |
| 56 ret void |
| 57 } |
| 58 ; C16xi1: @[[C1:[_a-z0-9]+]] = internal constant <16 x i1> <i1 false, i1 true, i
1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 fa
lse, i1 true, i1 false, i1 true, i1 false, i1 true>, align 16 |
| 59 ; C16xi1: @[[C2:[_a-z0-9]+]] = internal constant <16 x i1> <i1 true, i1 false, i
1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 tr
ue, i1 false, i1 true, i1 false, i1 true, i1 false>, align 16 |
| 60 ; C16xi1: define void @test16xi1(<16 x i1> %in) { |
| 61 ; C16xi1-NEXT: %[[M1:[_a-z0-9]+]] = load <16 x i1>* @[[C1]], align 16 |
| 62 ; C16xi1-NEXT: %[[M2:[_a-z0-9]+]] = load <16 x i1>* @[[C2]], align 16 |
| 63 ; C16xi1-NEXT: %ft0 = and <16 x i1> %in, %[[M1]] |
| 64 ; C16xi1-NEXT: %ft1 = and <16 x i1> %[[M2]], %in |
| 65 ; C16xi1-NEXT: ret void |
| 66 |
| 67 ; 16xi8 vectors should get globalized. |
| 68 define void @test16xi8(<16 x i8> %in) { |
| 69 %nonsquares = add <16 x i8> %in, <i8 2, i8 3, i8 5, i8 6, i8 7, i8 8, i8 10, i
8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 18, i8 19, i8 20> |
| 70 %sort = add <16 x i8> <i8 0, i8 1, i8 3, i8 5, i8 9, i8 11, i8 14, i8 17, i8 2
5, i8 27, i8 30, i8 33, i8 38, i8 41, i8 45, i8 49>, %in |
| 71 ret void |
| 72 } |
| 73 ; C16xi8: @[[C1:[_a-z0-9]+]] = internal constant <16 x i8> <i8 2, i8 3, i8 5, i8
6, i8 7, i8 8, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 18, i8 19, i
8 20>, align 4 |
| 74 ; C16xi8: @[[C2:[_a-z0-9]+]] = internal constant <16 x i8> <i8 0, i8 1, i8 3, i8
5, i8 9, i8 11, i8 14, i8 17, i8 25, i8 27, i8 30, i8 33, i8 38, i8 41, i8 45,
i8 49>, align 4 |
| 75 ; C16xi8: define void @test16xi8(<16 x i8> %in) { |
| 76 ; C16xi8-NEXT: %[[M1:[_a-z0-9]+]] = load <16 x i8>* @[[C1]], align 4 |
| 77 ; C16xi8-NEXT: %[[M2:[_a-z0-9]+]] = load <16 x i8>* @[[C2]], align 4 |
| 78 ; C16xi8-NEXT: %nonsquares = add <16 x i8> %in, %[[M1]] |
| 79 ; C16xi8-NEXT: %sort = add <16 x i8> %[[M2]], %in |
| 80 ; C16xi8-NEXT: ret void |
| 81 |
| 82 ; 8xi16 vectors should get globalized. |
| 83 define void @test8xi16(<8 x i16> %in) { |
| 84 %fib = add <8 x i16> %in, <i16 0, i16 1, i16 1, i16 2, i16 3, i16 5, i16 8, i1
6 13> |
| 85 %answer = add <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 4
2, i16 42>, %in |
| 86 ret void |
| 87 } |
| 88 ; C8xi16: @[[C1:[_a-z0-9]+]] = internal constant <8 x i16> <i16 0, i16 1, i16 1,
i16 2, i16 3, i16 5, i16 8, i16 13>, align 4 |
| 89 ; C8xi16: @[[C2:[_a-z0-9]+]] = internal constant <8 x i16> <i16 42, i16 42, i16
42, i16 42, i16 42, i16 42, i16 42, i16 42>, align 4 |
| 90 ; C8xi16: define void @test8xi16(<8 x i16> %in) { |
| 91 ; C8xi16-NEXT: %[[M1:[_a-z0-9]+]] = load <8 x i16>* @[[C1]], align 4 |
| 92 ; C8xi16-NEXT: %[[M2:[_a-z0-9]+]] = load <8 x i16>* @[[C2]], align 4 |
| 93 ; C8xi16-NEXT: %fib = add <8 x i16> %in, %[[M1]] |
| 94 ; C8xi16-NEXT: %answer = add <8 x i16> %[[M2]], %in |
| 95 ; C8xi16-NEXT: ret void |
| 96 |
| 97 ; 4xi32 vectors should get globalized. |
| 98 define void @test4xi32(<4 x i32> %in) { |
| 99 %tetrahedral = add <4 x i32> %in, <i32 1, i32 4, i32 10, i32 20> |
| 100 %serauqs = add <4 x i32> <i32 1, i32 4, i32 9, i32 61>, %in |
| 101 ret void |
| 102 } |
| 103 ; C4xi32: @[[C1:[_a-z0-9]+]] = internal constant <4 x i32> <i32 1, i32 4, i32 10
, i32 20>, align 4 |
| 104 ; C4xi32: @[[C2:[_a-z0-9]+]] = internal constant <4 x i32> <i32 1, i32 4, i32 9,
i32 61>, align 4 |
| 105 ; C4xi32: define void @test4xi32(<4 x i32> %in) { |
| 106 ; C4xi32-NEXT: %[[M1:[_a-z0-9]+]] = load <4 x i32>* @[[C1]], align 4 |
| 107 ; C4xi32-NEXT: %[[M2:[_a-z0-9]+]] = load <4 x i32>* @[[C2]], align 4 |
| 108 ; C4xi32-NEXT: %tetrahedral = add <4 x i32> %in, %[[M1]] |
| 109 ; C4xi32-NEXT: %serauqs = add <4 x i32> %[[M2]], %in |
| 110 ; C4xi32-NEXT: ret void |
| 111 |
| 112 ; 4xfloat vectors should get globalized. |
| 113 define void @test4xfloat(<4 x float> %in) { |
| 114 %polyhex = fadd <4 x float> %in, <float 1., float 1., float 3., float 7.> |
| 115 %poset = fadd <4 x float> <float 1., float 1., float 3., float 19.>, %in |
| 116 ret void |
| 117 } |
| 118 ; C4xfloat: @[[C1:[_a-z0-9]+]] = internal constant <4 x float> <float 1.000000e+
00, float 1.000000e+00, float 3.000000e+00, float 7.000000e+00>, align 4 |
| 119 ; C4xfloat: @[[C2:[_a-z0-9]+]] = internal constant <4 x float> <float 1.000000e+
00, float 1.000000e+00, float 3.000000e+00, float 1.900000e+01>, align 4 |
| 120 ; C4xfloat: define void @test4xfloat(<4 x float> %in) { |
| 121 ; C4xfloat-NEXT: %[[M1:[_a-z0-9]+]] = load <4 x float>* @[[C1]], align 4 |
| 122 ; C4xfloat-NEXT: %[[M2:[_a-z0-9]+]] = load <4 x float>* @[[C2]], align 4 |
| 123 ; C4xfloat-NEXT: %polyhex = fadd <4 x float> %in, %[[M1]] |
| 124 ; C4xfloat-NEXT: %poset = fadd <4 x float> %[[M2]], %in |
| 125 ; C4xfloat-NEXT: ret void |
| 126 |
| 127 ; Globalized constant loads have to dominate their use. |
| 128 define void @testbranch(i1 %cond, <4 x i32> %in) { |
| 129 br i1 %cond, label %lhs, label %rhs |
| 130 lhs: |
| 131 %from_lhs = add <4 x i32> %in, <i32 1, i32 1, i32 2, i32 2> |
| 132 br label %done |
| 133 rhs: |
| 134 %from_rhs = add <4 x i32> <i32 2, i32 2, i32 1, i32 1>, %in |
| 135 br label %done |
| 136 done: |
| 137 %merged = phi <4 x i32> [ %from_lhs, %lhs ], [ %from_rhs, %rhs ] |
| 138 ret void |
| 139 } |
| 140 ; Cbranch: @[[C1:[_a-z0-9]+]] = internal constant <4 x i32> <i32 1, i32 1, i32 2
, i32 2>, align 4 |
| 141 ; Cbranch: @[[C2:[_a-z0-9]+]] = internal constant <4 x i32> <i32 2, i32 2, i32 1
, i32 1>, align 4 |
| 142 ; Cbranch: define void @testbranch(i1 %cond, <4 x i32> %in) { |
| 143 ; Cbranch-NEXT: %[[M1:[_a-z0-9]+]] = load <4 x i32>* @[[C1]], align 4 |
| 144 ; Cbranch-NEXT: %[[M2:[_a-z0-9]+]] = load <4 x i32>* @[[C2]], align 4 |
| 145 ; Cbranch-NEXT: br i1 %cond, label %lhs, label %rhs |
| 146 ; Cbranch: lhs: |
| 147 ; Cbranch-NEXT: %from_lhs = add <4 x i32> %in, %[[M1]] |
| 148 ; Cbranch-NEXT: br label %done |
| 149 ; Cbranch: rhs: |
| 150 ; Cbranch-NEXT: %from_rhs = add <4 x i32> %[[M2]], %in |
| 151 ; Cbranch-NEXT: br label %done |
| 152 ; Cbranch: done: |
| 153 ; Cbranch-NEXT: %merged = phi <4 x i32> [ %from_lhs, %lhs ], [ %from_rhs, %rhs ] |
| 154 ; Cbranch-NEXT: ret void |
| 155 |
| 156 ; Globalizing redundant constants between functions should materialize |
| 157 ; them in each function. |
| 158 define void @testduplicate1() { |
| 159 %foo = add <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <i32 0, i32 0, i32 0, i32 0
> |
| 160 ret void |
| 161 } |
| 162 define void @testduplicate2() { |
| 163 %foo = add <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <i32 0, i32 0, i32 0, i32 0
> |
| 164 ret void |
| 165 } |
| 166 ; Cduplicate: @[[C1:[_a-z0-9]+]] = internal constant <4 x i32> <i32 1, i32 1, i3
2 1, i32 1>, align 4 |
| 167 ; Cduplicate: @[[C2:[_a-z0-9]+]] = internal constant <4 x i32> <i32 1, i32 1, i3
2 1, i32 1>, align 4 |
| 168 ; Cduplicate: define void @testduplicate1() { |
| 169 ; Cduplicate-NEXT: %[[M1:[_a-z0-9]+]] = load <4 x i32>* @[[C1]], align 4 |
| 170 ; Cduplicate-NEXT: %foo = add <4 x i32> %[[M1]], zeroinitializer |
| 171 ; Cduplicate-NEXT: ret void |
| 172 ; Cduplicate: define void @testduplicate2() { |
| 173 ; Cduplicate-NEXT: %[[M1:[_a-z0-9]+]] = load <4 x i32>* @[[C2]], align 4 |
| 174 ; Cduplicate-NEXT: %foo = add <4 x i32> %[[M1]], zeroinitializer |
| 175 ; Cduplicate-NEXT: ret void |
OLD | NEW |