OLD | NEW |
1 ; This tests the optimization where producers and consumers of i1 (bool) | 1 ; This tests the optimization where producers and consumers of i1 (bool) |
2 ; variables are combined to implicitly use flags instead of explicitly using | 2 ; variables are combined to implicitly use flags instead of explicitly using |
3 ; stack or register variables. | 3 ; stack or register variables. |
4 | 4 |
5 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s | 5 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s |
6 | 6 |
| 7 ; RUN: %if --need=allow_dump --need=target_ARM32 --command %p2i --filetype=asm \ |
| 8 ; RUN: --target arm32 -i %s --args -O2 --skip-unimplemented \ |
| 9 ; RUN: | %if --need=allow_dump --need=target_ARM32 --command FileCheck %s \ |
| 10 ; RUN: --check-prefix=ARM32 |
| 11 |
7 declare void @use_value(i32) | 12 declare void @use_value(i32) |
8 | 13 |
9 ; Basic cmp/branch folding. | 14 ; Basic cmp/branch folding. |
10 define i32 @fold_cmp_br(i32 %arg1, i32 %arg2) { | 15 define i32 @fold_cmp_br(i32 %arg1, i32 %arg2) { |
11 entry: | 16 entry: |
12 %cmp1 = icmp slt i32 %arg1, %arg2 | 17 %cmp1 = icmp slt i32 %arg1, %arg2 |
13 br i1 %cmp1, label %branch1, label %branch2 | 18 br i1 %cmp1, label %branch1, label %branch2 |
14 branch1: | 19 branch1: |
15 ret i32 1 | 20 ret i32 1 |
16 branch2: | 21 branch2: |
17 ret i32 2 | 22 ret i32 2 |
18 } | 23 } |
19 | 24 |
20 ; CHECK-LABEL: fold_cmp_br | 25 ; CHECK-LABEL: fold_cmp_br |
21 ; CHECK: cmp | 26 ; CHECK: cmp |
22 ; CHECK: jge | 27 ; CHECK: jge |
| 28 ; ARM32-LABEL: fold_cmp_br |
| 29 ; ARM32: cmp |
| 30 ; ARM32: beq |
23 | 31 |
24 | 32 |
25 ; Cmp/branch folding with intervening instructions. | 33 ; Cmp/branch folding with intervening instructions. |
26 define i32 @fold_cmp_br_intervening_insts(i32 %arg1, i32 %arg2) { | 34 define i32 @fold_cmp_br_intervening_insts(i32 %arg1, i32 %arg2) { |
27 entry: | 35 entry: |
28 %cmp1 = icmp slt i32 %arg1, %arg2 | 36 %cmp1 = icmp slt i32 %arg1, %arg2 |
29 call void @use_value(i32 %arg1) | 37 call void @use_value(i32 %arg1) |
30 br i1 %cmp1, label %branch1, label %branch2 | 38 br i1 %cmp1, label %branch1, label %branch2 |
31 branch1: | 39 branch1: |
32 ret i32 1 | 40 ret i32 1 |
33 branch2: | 41 branch2: |
34 ret i32 2 | 42 ret i32 2 |
35 } | 43 } |
36 | 44 |
37 ; CHECK-LABEL: fold_cmp_br_intervening_insts | 45 ; CHECK-LABEL: fold_cmp_br_intervening_insts |
38 ; CHECK-NOT: cmp | 46 ; CHECK-NOT: cmp |
39 ; CHECK: call | 47 ; CHECK: call |
40 ; CHECK: cmp | 48 ; CHECK: cmp |
41 ; CHECK: jge | 49 ; CHECK: jge |
| 50 ; ARM32-LABEL: fold_cmp_br_intervening_insts |
| 51 ; ARM32: push {{[{].*[}]}} |
| 52 ; ARM32: movlt [[TMP:r[0-9]+]], #1 |
| 53 ; ARM32: mov [[P:r[4-7]]], [[TMP]] |
| 54 ; ARM32: bl |
| 55 ; ARM32: cmp [[P]], #0 |
| 56 ; ARM32: beq |
42 | 57 |
43 | 58 |
44 ; Cmp/branch non-folding because of live-out. | 59 ; Cmp/branch non-folding because of live-out. |
45 define i32 @no_fold_cmp_br_liveout(i32 %arg1, i32 %arg2) { | 60 define i32 @no_fold_cmp_br_liveout(i32 %arg1, i32 %arg2) { |
46 entry: | 61 entry: |
47 %cmp1 = icmp slt i32 %arg1, %arg2 | 62 %cmp1 = icmp slt i32 %arg1, %arg2 |
48 br label %next | 63 br label %next |
49 next: | 64 next: |
50 br i1 %cmp1, label %branch1, label %branch2 | 65 br i1 %cmp1, label %branch1, label %branch2 |
51 branch1: | 66 branch1: |
52 ret i32 1 | 67 ret i32 1 |
53 branch2: | 68 branch2: |
54 ret i32 2 | 69 ret i32 2 |
55 } | 70 } |
56 | 71 |
57 ; CHECK-LABEL: no_fold_cmp_br_liveout | 72 ; CHECK-LABEL: no_fold_cmp_br_liveout |
58 ; CHECK: cmp | 73 ; CHECK: cmp |
59 ; CHECK: set | 74 ; CHECK: set |
60 ; CHECK: cmp | 75 ; CHECK: cmp |
61 ; CHECK: je | 76 ; CHECK: je |
| 77 ; ARM32-LABEL: no_fold_cmp_br_liveout |
| 78 ; ARM32: cmp |
| 79 ; ARM32: movlt [[REG:r[0-9]+]] |
| 80 ; ARM32: cmp [[REG]], #0 |
| 81 ; ARM32: beq |
62 | 82 |
63 | 83 |
64 ; Cmp/branch non-folding because of extra non-whitelisted uses. | 84 ; Cmp/branch non-folding because of extra non-whitelisted uses. |
65 define i32 @no_fold_cmp_br_non_whitelist(i32 %arg1, i32 %arg2) { | 85 define i32 @no_fold_cmp_br_non_whitelist(i32 %arg1, i32 %arg2) { |
66 entry: | 86 entry: |
67 %cmp1 = icmp slt i32 %arg1, %arg2 | 87 %cmp1 = icmp slt i32 %arg1, %arg2 |
68 %result = zext i1 %cmp1 to i32 | 88 %result = zext i1 %cmp1 to i32 |
69 br i1 %cmp1, label %branch1, label %branch2 | 89 br i1 %cmp1, label %branch1, label %branch2 |
70 branch1: | 90 branch1: |
71 ret i32 %result | 91 ret i32 %result |
72 branch2: | 92 branch2: |
73 ret i32 2 | 93 ret i32 2 |
74 } | 94 } |
75 | 95 |
76 ; CHECK-LABEL: no_fold_cmp_br_non_whitelist | 96 ; CHECK-LABEL: no_fold_cmp_br_non_whitelist |
77 ; CHECK: cmp | 97 ; CHECK: cmp |
78 ; CHECK: set | 98 ; CHECK: set |
79 ; CHECK: movzx | 99 ; CHECK: movzx |
80 ; CHECK: cmp | 100 ; CHECK: cmp |
81 ; CHECK: je | 101 ; CHECK: je |
| 102 ; ARM32-LABEL: no_fold_cmp_br_non_whitelist |
| 103 ; ARM32: mov [[R:r[0-9]+]], #0 |
| 104 ; ARM32: cmp r0, r1 |
| 105 ; ARM32: movlt [[R]], #1 |
| 106 ; ARM32: mov [[R2:r[0-9]+]], [[R]] |
| 107 ; ARM32: and [[R3:r[0-9]+]], [[R2]], #1 |
| 108 ; ARM32: cmp [[R]] |
| 109 ; ARM32: beq |
82 | 110 |
83 | 111 |
84 ; Basic cmp/select folding. | 112 ; Basic cmp/select folding. |
85 define i32 @fold_cmp_select(i32 %arg1, i32 %arg2) { | 113 define i32 @fold_cmp_select(i32 %arg1, i32 %arg2) { |
86 entry: | 114 entry: |
87 %cmp1 = icmp slt i32 %arg1, %arg2 | 115 %cmp1 = icmp slt i32 %arg1, %arg2 |
88 %result = select i1 %cmp1, i32 %arg1, i32 %arg2 | 116 %result = select i1 %cmp1, i32 %arg1, i32 %arg2 |
89 ret i32 %result | 117 ret i32 %result |
90 } | 118 } |
91 | 119 |
92 ; CHECK-LABEL: fold_cmp_select | 120 ; CHECK-LABEL: fold_cmp_select |
93 ; CHECK: cmp | 121 ; CHECK: cmp |
94 ; CHECK: cmovl | 122 ; CHECK: cmovl |
| 123 ; ARM32-LABEL: fold_cmp_select |
| 124 ; ARM32: mov [[R:r[0-9]+]], #0 |
| 125 ; ARM32: cmp r0, r1 |
| 126 ; ARM32: movlt [[R]], #1 |
| 127 ; ARM32: cmp [[R]], #0 |
95 | 128 |
96 | 129 |
97 ; 64-bit cmp/select folding. | 130 ; 64-bit cmp/select folding. |
98 define i64 @fold_cmp_select_64(i64 %arg1, i64 %arg2) { | 131 define i64 @fold_cmp_select_64(i64 %arg1, i64 %arg2) { |
99 entry: | 132 entry: |
100 %arg1_trunc = trunc i64 %arg1 to i32 | 133 %arg1_trunc = trunc i64 %arg1 to i32 |
101 %arg2_trunc = trunc i64 %arg2 to i32 | 134 %arg2_trunc = trunc i64 %arg2 to i32 |
102 %cmp1 = icmp slt i32 %arg1_trunc, %arg2_trunc | 135 %cmp1 = icmp slt i32 %arg1_trunc, %arg2_trunc |
103 %result = select i1 %cmp1, i64 %arg1, i64 %arg2 | 136 %result = select i1 %cmp1, i64 %arg1, i64 %arg2 |
104 ret i64 %result | 137 ret i64 %result |
105 } | 138 } |
106 | 139 |
107 ; CHECK-LABEL: fold_cmp_select_64 | 140 ; CHECK-LABEL: fold_cmp_select_64 |
108 ; CHECK: cmp | 141 ; CHECK: cmp |
109 ; CHECK: cmovl | 142 ; CHECK: cmovl |
110 ; CHECK: cmovl | 143 ; CHECK: cmovl |
| 144 ; ARM32-LABEL: fold_cmp_select_64 |
| 145 ; ARM32: mov [[R:r[0-9]+]], #0 |
| 146 ; ARM32: cmp r0, r2 |
| 147 ; ARM32: movlt [[R]], #1 |
| 148 ; ARM32: cmp [[R]], #0 |
| 149 ; ARM32: movne |
| 150 ; ARM32: movne |
| 151 ; ARM32-DAG: mov r0 |
| 152 ; ARM32-DAG: mov r1 |
| 153 ; ARM32: bx lr |
| 154 |
111 | 155 |
112 define i64 @fold_cmp_select_64_undef(i64 %arg1) { | 156 define i64 @fold_cmp_select_64_undef(i64 %arg1) { |
113 entry: | 157 entry: |
114 %arg1_trunc = trunc i64 %arg1 to i32 | 158 %arg1_trunc = trunc i64 %arg1 to i32 |
115 %cmp1 = icmp slt i32 undef, %arg1_trunc | 159 %cmp1 = icmp slt i32 undef, %arg1_trunc |
116 %result = select i1 %cmp1, i64 %arg1, i64 undef | 160 %result = select i1 %cmp1, i64 %arg1, i64 undef |
117 ret i64 %result | 161 ret i64 %result |
118 } | 162 } |
119 ; CHECK-LABEL: fold_cmp_select_64_undef | 163 ; CHECK-LABEL: fold_cmp_select_64_undef |
120 ; CHECK: cmp | 164 ; CHECK: cmp |
121 ; CHECK: cmovl | 165 ; CHECK: cmovl |
122 ; CHECK: cmovl | 166 ; CHECK: cmovl |
| 167 ; ARM32-LABEL: fold_cmp_select_64_undef |
| 168 ; ARM32: cmp {{r[0-9]+}}, r0 |
| 169 ; ARM32: movlt [[R:r[0-9]+]], #1 |
| 170 ; ARM32: cmp [[R]] |
| 171 ; ARM32: movne |
| 172 ; ARM32: movne |
| 173 ; ARM32-DAG: mov r0 |
| 174 ; ARM32-DAG: mov r1 |
| 175 ; ARM32: bx lr |
| 176 |
123 | 177 |
124 ; Cmp/select folding with intervening instructions. | 178 ; Cmp/select folding with intervening instructions. |
125 define i32 @fold_cmp_select_intervening_insts(i32 %arg1, i32 %arg2) { | 179 define i32 @fold_cmp_select_intervening_insts(i32 %arg1, i32 %arg2) { |
126 entry: | 180 entry: |
127 %cmp1 = icmp slt i32 %arg1, %arg2 | 181 %cmp1 = icmp slt i32 %arg1, %arg2 |
128 call void @use_value(i32 %arg1) | 182 call void @use_value(i32 %arg1) |
129 %result = select i1 %cmp1, i32 %arg1, i32 %arg2 | 183 %result = select i1 %cmp1, i32 %arg1, i32 %arg2 |
130 ret i32 %result | 184 ret i32 %result |
131 } | 185 } |
132 | 186 |
133 ; CHECK-LABEL: fold_cmp_select_intervening_insts | 187 ; CHECK-LABEL: fold_cmp_select_intervening_insts |
134 ; CHECK-NOT: cmp | 188 ; CHECK-NOT: cmp |
135 ; CHECK: call | 189 ; CHECK: call |
136 ; CHECK: cmp | 190 ; CHECK: cmp |
137 ; CHECK: cmovl | 191 ; CHECK: cmovl |
| 192 ; ARM32-LABEL: fold_cmp_select_intervening_insts |
| 193 ; ARM32: mov [[RES0:r[4-7]+]], r0 |
| 194 ; ARM32: mov [[RES1:r[4-7]+]], r1 |
| 195 ; ARM32: mov [[R:r[0-9]+]], #0 |
| 196 ; ARM32: cmp r{{[0-9]+}}, r{{[0-9]+}} |
| 197 ; ARM32: movlt [[R]], #1 |
| 198 ; ARM32: mov [[R2:r[4-7]]], [[R]] |
| 199 ; ARM32: bl use_value |
| 200 ; ARM32: cmp [[R2]], #0 |
| 201 ; ARM32: movne [[RES1]], [[RES0]] |
| 202 ; ARM32: mov r0, [[RES1]] |
138 | 203 |
139 | 204 |
140 ; Cmp/multi-select folding. | 205 ; Cmp/multi-select folding. |
141 define i32 @fold_cmp_select_multi(i32 %arg1, i32 %arg2) { | 206 define i32 @fold_cmp_select_multi(i32 %arg1, i32 %arg2) { |
142 entry: | 207 entry: |
143 %cmp1 = icmp slt i32 %arg1, %arg2 | 208 %cmp1 = icmp slt i32 %arg1, %arg2 |
144 %a = select i1 %cmp1, i32 %arg1, i32 %arg2 | 209 %a = select i1 %cmp1, i32 %arg1, i32 %arg2 |
145 %b = select i1 %cmp1, i32 %arg2, i32 %arg1 | 210 %b = select i1 %cmp1, i32 %arg2, i32 %arg1 |
146 %c = select i1 %cmp1, i32 123, i32 %arg1 | 211 %c = select i1 %cmp1, i32 123, i32 %arg1 |
147 %partial = add i32 %a, %b | 212 %partial = add i32 %a, %b |
148 %result = add i32 %partial, %c | 213 %result = add i32 %partial, %c |
149 ret i32 %result | 214 ret i32 %result |
150 } | 215 } |
151 | 216 |
152 ; CHECK-LABEL: fold_cmp_select_multi | 217 ; CHECK-LABEL: fold_cmp_select_multi |
153 ; CHECK: cmp | 218 ; CHECK: cmp |
154 ; CHECK: cmovl | 219 ; CHECK: cmovl |
155 ; CHECK: cmp | 220 ; CHECK: cmp |
156 ; CHECK: cmovl | 221 ; CHECK: cmovl |
157 ; CHECK: cmp | 222 ; CHECK: cmp |
158 ; CHECK: cmovge | 223 ; CHECK: cmovge |
159 ; CHECK: add | 224 ; CHECK: add |
160 ; CHECK: add | 225 ; CHECK: add |
| 226 ; ARM32-LABEL: fold_cmp_select_multi |
| 227 ; ARM32-DAG: mov [[T0:r[0-9]+]], #0 |
| 228 ; ARM32-DAG: cmp r0, r1 |
| 229 ; ARM32: movlt [[T0]], #1 |
| 230 ; ARM32-DAG: mov [[T1:r[0-9]+]], r1 |
| 231 ; ARM32-DAG: cmp [[T0]], #0 |
| 232 ; ARM32: [[T1]], r0 |
| 233 ; ARM32-DAG: mov [[T2:r[0-9]+]], r0 |
| 234 ; ARM32-DAG: cmp [[T0]], #0 |
| 235 ; ARM32: [[T2]], r1 |
| 236 ; ARM32: cmp [[T0]], #0 |
| 237 ; ARM32: movne |
| 238 ; ARM32: add |
| 239 ; ARM32: add |
| 240 ; ARM32: bx lr |
161 | 241 |
162 | 242 |
163 ; Cmp/multi-select non-folding because of live-out. | 243 ; Cmp/multi-select non-folding because of live-out. |
164 define i32 @no_fold_cmp_select_multi_liveout(i32 %arg1, i32 %arg2) { | 244 define i32 @no_fold_cmp_select_multi_liveout(i32 %arg1, i32 %arg2) { |
165 entry: | 245 entry: |
166 %cmp1 = icmp slt i32 %arg1, %arg2 | 246 %cmp1 = icmp slt i32 %arg1, %arg2 |
167 %a = select i1 %cmp1, i32 %arg1, i32 %arg2 | 247 %a = select i1 %cmp1, i32 %arg1, i32 %arg2 |
168 %b = select i1 %cmp1, i32 %arg2, i32 %arg1 | 248 %b = select i1 %cmp1, i32 %arg2, i32 %arg1 |
169 br label %next | 249 br label %next |
170 next: | 250 next: |
171 %c = select i1 %cmp1, i32 123, i32 %arg1 | 251 %c = select i1 %cmp1, i32 123, i32 %arg1 |
172 %partial = add i32 %a, %b | 252 %partial = add i32 %a, %b |
173 %result = add i32 %partial, %c | 253 %result = add i32 %partial, %c |
174 ret i32 %result | 254 ret i32 %result |
175 } | 255 } |
176 | 256 |
177 ; CHECK-LABEL: no_fold_cmp_select_multi_liveout | 257 ; CHECK-LABEL: no_fold_cmp_select_multi_liveout |
178 ; CHECK: set | 258 ; CHECK: set |
179 ; CHECK: cmp | 259 ; CHECK: cmp |
180 ; CHECK: cmovne | 260 ; CHECK: cmovne |
181 ; CHECK: cmp | 261 ; CHECK: cmp |
182 ; CHECK: cmovne | 262 ; CHECK: cmovne |
183 ; CHECK: cmp | 263 ; CHECK: cmp |
184 ; CHECK: cmove | 264 ; CHECK: cmove |
185 ; CHECK: add | 265 ; CHECK: add |
186 ; CHECK: add | 266 ; CHECK: add |
187 | 267 ; ARM32-LABEL: no_fold_cmp_select_multi_liveout |
| 268 ; ARM32-LABEL: fold_cmp_select_multi |
| 269 ; ARM32-DAG: mov [[T0:r[0-9]+]], #0 |
| 270 ; ARM32-DAG: cmp r0, r1 |
| 271 ; ARM32: movlt [[T0]], #1 |
| 272 ; ARM32-DAG: mov [[T1:r[0-9]+]], r1 |
| 273 ; ARM32-DAG: cmp [[T0]], #0 |
| 274 ; ARM32: [[T1]], r0 |
| 275 ; ARM32-DAG: mov [[T2:r[0-9]+]], r0 |
| 276 ; ARM32-DAG: cmp [[T0]], #0 |
| 277 ; ARM32: [[T2]], r1 |
| 278 ; ARM32: cmp [[T0]], #0 |
| 279 ; ARM32: movne |
| 280 ; ARM32: add |
| 281 ; ARM32: add |
| 282 ; ARM32: bx lr |
188 | 283 |
189 ; Cmp/multi-select non-folding because of extra non-whitelisted uses. | 284 ; Cmp/multi-select non-folding because of extra non-whitelisted uses. |
190 define i32 @no_fold_cmp_select_multi_non_whitelist(i32 %arg1, i32 %arg2) { | 285 define i32 @no_fold_cmp_select_multi_non_whitelist(i32 %arg1, i32 %arg2) { |
191 entry: | 286 entry: |
192 %cmp1 = icmp slt i32 %arg1, %arg2 | 287 %cmp1 = icmp slt i32 %arg1, %arg2 |
193 %a = select i1 %cmp1, i32 %arg1, i32 %arg2 | 288 %a = select i1 %cmp1, i32 %arg1, i32 %arg2 |
194 %b = select i1 %cmp1, i32 %arg2, i32 %arg1 | 289 %b = select i1 %cmp1, i32 %arg2, i32 %arg1 |
195 %c = select i1 %cmp1, i32 123, i32 %arg1 | 290 %c = select i1 %cmp1, i32 123, i32 %arg1 |
196 %ext = zext i1 %cmp1 to i32 | 291 %ext = zext i1 %cmp1 to i32 |
197 %partial1 = add i32 %a, %b | 292 %partial1 = add i32 %a, %b |
198 %partial2 = add i32 %partial1, %c | 293 %partial2 = add i32 %partial1, %c |
199 %result = add i32 %partial2, %ext | 294 %result = add i32 %partial2, %ext |
200 ret i32 %result | 295 ret i32 %result |
201 } | 296 } |
202 | 297 |
203 ; CHECK-LABEL: no_fold_cmp_select_multi_non_whitelist | 298 ; CHECK-LABEL: no_fold_cmp_select_multi_non_whitelist |
204 ; CHECK: set | 299 ; CHECK: set |
205 ; CHECK: cmp | 300 ; CHECK: cmp |
206 ; CHECK: cmovne | 301 ; CHECK: cmovne |
207 ; CHECK: cmp | 302 ; CHECK: cmp |
208 ; CHECK: cmovne | 303 ; CHECK: cmovne |
209 ; CHECK: cmp | 304 ; CHECK: cmp |
210 ; CHECK: cmove | 305 ; CHECK: cmove |
211 ; CHECK: movzx | 306 ; CHECK: movzx |
212 ; CHECK: add | 307 ; CHECK: add |
213 ; CHECK: add | 308 ; CHECK: add |
214 ; CHECK: add | 309 ; CHECK: add |
| 310 ; ARM32-LABEL: no_fold_cmp_select_multi_non_whitelist |
| 311 ; ARM32-DAG: mov [[T0:r[0-9]+]], #0 |
| 312 ; ARM32-DAG: cmp r0, r1 |
| 313 ; ARM32: movlt [[T0]], #1 |
| 314 ; ARM32-DAG: mov [[T1:r[0-9]+]], r1 |
| 315 ; ARM32-DAG: cmp [[T0]], #0 |
| 316 ; ARM32: [[T1]], r0 |
| 317 ; ARM32-DAG: mov [[T2:r[0-9]+]], r0 |
| 318 ; ARM32-DAG: cmp [[T0]], #0 |
| 319 ; ARM32: [[T2]], r1 |
| 320 ; ARM32: cmp [[T0]], #0 |
| 321 ; ARM32: movne |
| 322 ; ARM32: and {{.*}}, [[T0]], #1 |
| 323 ; ARM32: add |
| 324 ; ARM32: add |
| 325 ; ARM32: add |
| 326 ; ARM32: bx lr |
OLD | NEW |