OLD | NEW |
1 ; This tests the optimization where producers and consumers of i1 (bool) | 1 ; This tests the optimization where producers and consumers of i1 (bool) |
2 ; variables are combined to implicitly use flags instead of explicitly using | 2 ; variables are combined to implicitly use flags instead of explicitly using |
3 ; stack or register variables. | 3 ; stack or register variables. |
4 | 4 |
5 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s | 5 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 \ |
| 6 ; RUN: -allow-externally-defined-symbols | FileCheck %s |
6 | 7 |
7 ; RUN: %if --need=allow_dump --need=target_ARM32 --command %p2i --filetype=asm \ | 8 ; RUN: %if --need=allow_dump --need=target_ARM32 --command %p2i --filetype=asm \ |
8 ; RUN: --target arm32 -i %s --args -O2 --skip-unimplemented \ | 9 ; RUN: --target arm32 -i %s --args -O2 --skip-unimplemented \ |
| 10 ; RUN: -allow-externally-defined-symbols \ |
9 ; RUN: | %if --need=allow_dump --need=target_ARM32 --command FileCheck %s \ | 11 ; RUN: | %if --need=allow_dump --need=target_ARM32 --command FileCheck %s \ |
10 ; RUN: --check-prefix=ARM32 | 12 ; RUN: --check-prefix=ARM32 |
11 | 13 |
12 declare void @use_value(i32) | 14 declare void @use_value(i32) |
13 | 15 |
14 ; Basic cmp/branch folding. | 16 ; Basic cmp/branch folding. |
15 define i32 @fold_cmp_br(i32 %arg1, i32 %arg2) { | 17 define internal i32 @fold_cmp_br(i32 %arg1, i32 %arg2) { |
16 entry: | 18 entry: |
17 %cmp1 = icmp slt i32 %arg1, %arg2 | 19 %cmp1 = icmp slt i32 %arg1, %arg2 |
18 br i1 %cmp1, label %branch1, label %branch2 | 20 br i1 %cmp1, label %branch1, label %branch2 |
19 branch1: | 21 branch1: |
20 ret i32 1 | 22 ret i32 1 |
21 branch2: | 23 branch2: |
22 ret i32 2 | 24 ret i32 2 |
23 } | 25 } |
24 | 26 |
25 ; CHECK-LABEL: fold_cmp_br | 27 ; CHECK-LABEL: fold_cmp_br |
26 ; CHECK: cmp | 28 ; CHECK: cmp |
27 ; CHECK: jge | 29 ; CHECK: jge |
28 ; ARM32-LABEL: fold_cmp_br | 30 ; ARM32-LABEL: fold_cmp_br |
29 ; ARM32: cmp | 31 ; ARM32: cmp |
30 ; ARM32: beq | 32 ; ARM32: beq |
31 | 33 |
32 | 34 |
33 ; Cmp/branch folding with intervening instructions. | 35 ; Cmp/branch folding with intervening instructions. |
34 define i32 @fold_cmp_br_intervening_insts(i32 %arg1, i32 %arg2) { | 36 define internal i32 @fold_cmp_br_intervening_insts(i32 %arg1, i32 %arg2) { |
35 entry: | 37 entry: |
36 %cmp1 = icmp slt i32 %arg1, %arg2 | 38 %cmp1 = icmp slt i32 %arg1, %arg2 |
37 call void @use_value(i32 %arg1) | 39 call void @use_value(i32 %arg1) |
38 br i1 %cmp1, label %branch1, label %branch2 | 40 br i1 %cmp1, label %branch1, label %branch2 |
39 branch1: | 41 branch1: |
40 ret i32 1 | 42 ret i32 1 |
41 branch2: | 43 branch2: |
42 ret i32 2 | 44 ret i32 2 |
43 } | 45 } |
44 | 46 |
45 ; CHECK-LABEL: fold_cmp_br_intervening_insts | 47 ; CHECK-LABEL: fold_cmp_br_intervening_insts |
46 ; CHECK-NOT: cmp | 48 ; CHECK-NOT: cmp |
47 ; CHECK: call | 49 ; CHECK: call |
48 ; CHECK: cmp | 50 ; CHECK: cmp |
49 ; CHECK: jge | 51 ; CHECK: jge |
50 ; ARM32-LABEL: fold_cmp_br_intervening_insts | 52 ; ARM32-LABEL: fold_cmp_br_intervening_insts |
51 ; ARM32: push {{[{].*[}]}} | 53 ; ARM32: push {{[{].*[}]}} |
52 ; ARM32: movlt [[TMP:r[0-9]+]], #1 | 54 ; ARM32: movlt [[TMP:r[0-9]+]], #1 |
53 ; ARM32: mov [[P:r[4-7]]], [[TMP]] | 55 ; ARM32: mov [[P:r[4-7]]], [[TMP]] |
54 ; ARM32: bl | 56 ; ARM32: bl |
55 ; ARM32: cmp [[P]], #0 | 57 ; ARM32: cmp [[P]], #0 |
56 ; ARM32: beq | 58 ; ARM32: beq |
57 | 59 |
58 | 60 |
59 ; Cmp/branch non-folding because of live-out. | 61 ; Cmp/branch non-folding because of live-out. |
60 define i32 @no_fold_cmp_br_liveout(i32 %arg1, i32 %arg2) { | 62 define internal i32 @no_fold_cmp_br_liveout(i32 %arg1, i32 %arg2) { |
61 entry: | 63 entry: |
62 %cmp1 = icmp slt i32 %arg1, %arg2 | 64 %cmp1 = icmp slt i32 %arg1, %arg2 |
63 br label %next | 65 br label %next |
64 next: | 66 next: |
65 br i1 %cmp1, label %branch1, label %branch2 | 67 br i1 %cmp1, label %branch1, label %branch2 |
66 branch1: | 68 branch1: |
67 ret i32 1 | 69 ret i32 1 |
68 branch2: | 70 branch2: |
69 ret i32 2 | 71 ret i32 2 |
70 } | 72 } |
71 | 73 |
72 ; CHECK-LABEL: no_fold_cmp_br_liveout | 74 ; CHECK-LABEL: no_fold_cmp_br_liveout |
73 ; CHECK: cmp | 75 ; CHECK: cmp |
74 ; CHECK: set | 76 ; CHECK: set |
75 ; CHECK: cmp | 77 ; CHECK: cmp |
76 ; CHECK: je | 78 ; CHECK: je |
77 ; ARM32-LABEL: no_fold_cmp_br_liveout | 79 ; ARM32-LABEL: no_fold_cmp_br_liveout |
78 ; ARM32: cmp | 80 ; ARM32: cmp |
79 ; ARM32: movlt [[REG:r[0-9]+]] | 81 ; ARM32: movlt [[REG:r[0-9]+]] |
80 ; ARM32: cmp [[REG]], #0 | 82 ; ARM32: cmp [[REG]], #0 |
81 ; ARM32: beq | 83 ; ARM32: beq |
82 | 84 |
83 | 85 |
84 ; Cmp/branch non-folding because of extra non-whitelisted uses. | 86 ; Cmp/branch non-folding because of extra non-whitelisted uses. |
85 define i32 @no_fold_cmp_br_non_whitelist(i32 %arg1, i32 %arg2) { | 87 define internal i32 @no_fold_cmp_br_non_whitelist(i32 %arg1, i32 %arg2) { |
86 entry: | 88 entry: |
87 %cmp1 = icmp slt i32 %arg1, %arg2 | 89 %cmp1 = icmp slt i32 %arg1, %arg2 |
88 %result = zext i1 %cmp1 to i32 | 90 %result = zext i1 %cmp1 to i32 |
89 br i1 %cmp1, label %branch1, label %branch2 | 91 br i1 %cmp1, label %branch1, label %branch2 |
90 branch1: | 92 branch1: |
91 ret i32 %result | 93 ret i32 %result |
92 branch2: | 94 branch2: |
93 ret i32 2 | 95 ret i32 2 |
94 } | 96 } |
95 | 97 |
96 ; CHECK-LABEL: no_fold_cmp_br_non_whitelist | 98 ; CHECK-LABEL: no_fold_cmp_br_non_whitelist |
97 ; CHECK: cmp | 99 ; CHECK: cmp |
98 ; CHECK: set | 100 ; CHECK: set |
99 ; CHECK: movzx | 101 ; CHECK: movzx |
100 ; CHECK: cmp | 102 ; CHECK: cmp |
101 ; CHECK: je | 103 ; CHECK: je |
102 ; ARM32-LABEL: no_fold_cmp_br_non_whitelist | 104 ; ARM32-LABEL: no_fold_cmp_br_non_whitelist |
103 ; ARM32: mov [[R:r[0-9]+]], #0 | 105 ; ARM32: mov [[R:r[0-9]+]], #0 |
104 ; ARM32: cmp r0, r1 | 106 ; ARM32: cmp r0, r1 |
105 ; ARM32: movlt [[R]], #1 | 107 ; ARM32: movlt [[R]], #1 |
106 ; ARM32: mov [[R2:r[0-9]+]], [[R]] | 108 ; ARM32: mov [[R2:r[0-9]+]], [[R]] |
107 ; ARM32: and [[R3:r[0-9]+]], [[R2]], #1 | 109 ; ARM32: and [[R3:r[0-9]+]], [[R2]], #1 |
108 ; ARM32: cmp [[R]] | 110 ; ARM32: cmp [[R]] |
109 ; ARM32: beq | 111 ; ARM32: beq |
110 | 112 |
111 | 113 |
112 ; Basic cmp/select folding. | 114 ; Basic cmp/select folding. |
113 define i32 @fold_cmp_select(i32 %arg1, i32 %arg2) { | 115 define internal i32 @fold_cmp_select(i32 %arg1, i32 %arg2) { |
114 entry: | 116 entry: |
115 %cmp1 = icmp slt i32 %arg1, %arg2 | 117 %cmp1 = icmp slt i32 %arg1, %arg2 |
116 %result = select i1 %cmp1, i32 %arg1, i32 %arg2 | 118 %result = select i1 %cmp1, i32 %arg1, i32 %arg2 |
117 ret i32 %result | 119 ret i32 %result |
118 } | 120 } |
119 | 121 |
120 ; CHECK-LABEL: fold_cmp_select | 122 ; CHECK-LABEL: fold_cmp_select |
121 ; CHECK: cmp | 123 ; CHECK: cmp |
122 ; CHECK: cmovl | 124 ; CHECK: cmovl |
123 ; ARM32-LABEL: fold_cmp_select | 125 ; ARM32-LABEL: fold_cmp_select |
124 ; ARM32: mov [[R:r[0-9]+]], #0 | 126 ; ARM32: mov [[R:r[0-9]+]], #0 |
125 ; ARM32: cmp r0, r1 | 127 ; ARM32: cmp r0, r1 |
126 ; ARM32: movlt [[R]], #1 | 128 ; ARM32: movlt [[R]], #1 |
127 ; ARM32: cmp [[R]], #0 | 129 ; ARM32: cmp [[R]], #0 |
128 | 130 |
129 | 131 |
130 ; 64-bit cmp/select folding. | 132 ; 64-bit cmp/select folding. |
131 define i64 @fold_cmp_select_64(i64 %arg1, i64 %arg2) { | 133 define internal i64 @fold_cmp_select_64(i64 %arg1, i64 %arg2) { |
132 entry: | 134 entry: |
133 %arg1_trunc = trunc i64 %arg1 to i32 | 135 %arg1_trunc = trunc i64 %arg1 to i32 |
134 %arg2_trunc = trunc i64 %arg2 to i32 | 136 %arg2_trunc = trunc i64 %arg2 to i32 |
135 %cmp1 = icmp slt i32 %arg1_trunc, %arg2_trunc | 137 %cmp1 = icmp slt i32 %arg1_trunc, %arg2_trunc |
136 %result = select i1 %cmp1, i64 %arg1, i64 %arg2 | 138 %result = select i1 %cmp1, i64 %arg1, i64 %arg2 |
137 ret i64 %result | 139 ret i64 %result |
138 } | 140 } |
139 | 141 |
140 ; CHECK-LABEL: fold_cmp_select_64 | 142 ; CHECK-LABEL: fold_cmp_select_64 |
141 ; CHECK: cmp | 143 ; CHECK: cmp |
142 ; CHECK: cmovl | 144 ; CHECK: cmovl |
143 ; CHECK: cmovl | 145 ; CHECK: cmovl |
144 ; ARM32-LABEL: fold_cmp_select_64 | 146 ; ARM32-LABEL: fold_cmp_select_64 |
145 ; ARM32: mov [[R:r[0-9]+]], #0 | 147 ; ARM32: mov [[R:r[0-9]+]], #0 |
146 ; ARM32: cmp r0, r2 | 148 ; ARM32: cmp r0, r2 |
147 ; ARM32: movlt [[R]], #1 | 149 ; ARM32: movlt [[R]], #1 |
148 ; ARM32: cmp [[R]], #0 | 150 ; ARM32: cmp [[R]], #0 |
149 ; ARM32: movne | 151 ; ARM32: movne |
150 ; ARM32: movne | 152 ; ARM32: movne |
151 ; ARM32-DAG: mov r0 | 153 ; ARM32-DAG: mov r0 |
152 ; ARM32-DAG: mov r1 | 154 ; ARM32-DAG: mov r1 |
153 ; ARM32: bx lr | 155 ; ARM32: bx lr |
154 | 156 |
155 | 157 |
156 define i64 @fold_cmp_select_64_undef(i64 %arg1) { | 158 define internal i64 @fold_cmp_select_64_undef(i64 %arg1) { |
157 entry: | 159 entry: |
158 %arg1_trunc = trunc i64 %arg1 to i32 | 160 %arg1_trunc = trunc i64 %arg1 to i32 |
159 %cmp1 = icmp slt i32 undef, %arg1_trunc | 161 %cmp1 = icmp slt i32 undef, %arg1_trunc |
160 %result = select i1 %cmp1, i64 %arg1, i64 undef | 162 %result = select i1 %cmp1, i64 %arg1, i64 undef |
161 ret i64 %result | 163 ret i64 %result |
162 } | 164 } |
163 ; CHECK-LABEL: fold_cmp_select_64_undef | 165 ; CHECK-LABEL: fold_cmp_select_64_undef |
164 ; CHECK: cmp | 166 ; CHECK: cmp |
165 ; CHECK: cmovl | 167 ; CHECK: cmovl |
166 ; CHECK: cmovl | 168 ; CHECK: cmovl |
167 ; ARM32-LABEL: fold_cmp_select_64_undef | 169 ; ARM32-LABEL: fold_cmp_select_64_undef |
168 ; ARM32: cmp {{r[0-9]+}}, r0 | 170 ; ARM32: cmp {{r[0-9]+}}, r0 |
169 ; ARM32: movlt [[R:r[0-9]+]], #1 | 171 ; ARM32: movlt [[R:r[0-9]+]], #1 |
170 ; ARM32: cmp [[R]] | 172 ; ARM32: cmp [[R]] |
171 ; ARM32: movne | 173 ; ARM32: movne |
172 ; ARM32: movne | 174 ; ARM32: movne |
173 ; ARM32-DAG: mov r0 | 175 ; ARM32-DAG: mov r0 |
174 ; ARM32-DAG: mov r1 | 176 ; ARM32-DAG: mov r1 |
175 ; ARM32: bx lr | 177 ; ARM32: bx lr |
176 | 178 |
177 | 179 |
178 ; Cmp/select folding with intervening instructions. | 180 ; Cmp/select folding with intervening instructions. |
179 define i32 @fold_cmp_select_intervening_insts(i32 %arg1, i32 %arg2) { | 181 define internal i32 @fold_cmp_select_intervening_insts(i32 %arg1, i32 %arg2) { |
180 entry: | 182 entry: |
181 %cmp1 = icmp slt i32 %arg1, %arg2 | 183 %cmp1 = icmp slt i32 %arg1, %arg2 |
182 call void @use_value(i32 %arg1) | 184 call void @use_value(i32 %arg1) |
183 %result = select i1 %cmp1, i32 %arg1, i32 %arg2 | 185 %result = select i1 %cmp1, i32 %arg1, i32 %arg2 |
184 ret i32 %result | 186 ret i32 %result |
185 } | 187 } |
186 | 188 |
187 ; CHECK-LABEL: fold_cmp_select_intervening_insts | 189 ; CHECK-LABEL: fold_cmp_select_intervening_insts |
188 ; CHECK-NOT: cmp | 190 ; CHECK-NOT: cmp |
189 ; CHECK: call | 191 ; CHECK: call |
190 ; CHECK: cmp | 192 ; CHECK: cmp |
191 ; CHECK: cmovl | 193 ; CHECK: cmovl |
192 ; ARM32-LABEL: fold_cmp_select_intervening_insts | 194 ; ARM32-LABEL: fold_cmp_select_intervening_insts |
193 ; ARM32: mov [[RES0:r[4-7]+]], r0 | 195 ; ARM32: mov [[RES0:r[4-7]+]], r0 |
194 ; ARM32: mov [[RES1:r[4-7]+]], r1 | 196 ; ARM32: mov [[RES1:r[4-7]+]], r1 |
195 ; ARM32: mov [[R:r[0-9]+]], #0 | 197 ; ARM32: mov [[R:r[0-9]+]], #0 |
196 ; ARM32: cmp r{{[0-9]+}}, r{{[0-9]+}} | 198 ; ARM32: cmp r{{[0-9]+}}, r{{[0-9]+}} |
197 ; ARM32: movlt [[R]], #1 | 199 ; ARM32: movlt [[R]], #1 |
198 ; ARM32: mov [[R2:r[4-7]]], [[R]] | 200 ; ARM32: mov [[R2:r[4-7]]], [[R]] |
199 ; ARM32: bl use_value | 201 ; ARM32: bl use_value |
200 ; ARM32: cmp [[R2]], #0 | 202 ; ARM32: cmp [[R2]], #0 |
201 ; ARM32: movne [[RES1]], [[RES0]] | 203 ; ARM32: movne [[RES1]], [[RES0]] |
202 ; ARM32: mov r0, [[RES1]] | 204 ; ARM32: mov r0, [[RES1]] |
203 | 205 |
204 | 206 |
205 ; Cmp/multi-select folding. | 207 ; Cmp/multi-select folding. |
206 define i32 @fold_cmp_select_multi(i32 %arg1, i32 %arg2) { | 208 define internal i32 @fold_cmp_select_multi(i32 %arg1, i32 %arg2) { |
207 entry: | 209 entry: |
208 %cmp1 = icmp slt i32 %arg1, %arg2 | 210 %cmp1 = icmp slt i32 %arg1, %arg2 |
209 %a = select i1 %cmp1, i32 %arg1, i32 %arg2 | 211 %a = select i1 %cmp1, i32 %arg1, i32 %arg2 |
210 %b = select i1 %cmp1, i32 %arg2, i32 %arg1 | 212 %b = select i1 %cmp1, i32 %arg2, i32 %arg1 |
211 %c = select i1 %cmp1, i32 123, i32 %arg1 | 213 %c = select i1 %cmp1, i32 123, i32 %arg1 |
212 %partial = add i32 %a, %b | 214 %partial = add i32 %a, %b |
213 %result = add i32 %partial, %c | 215 %result = add i32 %partial, %c |
214 ret i32 %result | 216 ret i32 %result |
215 } | 217 } |
216 | 218 |
(...skipping 17 matching lines...) Expand all Loading... |
234 ; ARM32-DAG: cmp [[T0]], #0 | 236 ; ARM32-DAG: cmp [[T0]], #0 |
235 ; ARM32: [[T2]], r1 | 237 ; ARM32: [[T2]], r1 |
236 ; ARM32: cmp [[T0]], #0 | 238 ; ARM32: cmp [[T0]], #0 |
237 ; ARM32: movne | 239 ; ARM32: movne |
238 ; ARM32: add | 240 ; ARM32: add |
239 ; ARM32: add | 241 ; ARM32: add |
240 ; ARM32: bx lr | 242 ; ARM32: bx lr |
241 | 243 |
242 | 244 |
243 ; Cmp/multi-select non-folding because of live-out. | 245 ; Cmp/multi-select non-folding because of live-out. |
244 define i32 @no_fold_cmp_select_multi_liveout(i32 %arg1, i32 %arg2) { | 246 define internal i32 @no_fold_cmp_select_multi_liveout(i32 %arg1, i32 %arg2) { |
245 entry: | 247 entry: |
246 %cmp1 = icmp slt i32 %arg1, %arg2 | 248 %cmp1 = icmp slt i32 %arg1, %arg2 |
247 %a = select i1 %cmp1, i32 %arg1, i32 %arg2 | 249 %a = select i1 %cmp1, i32 %arg1, i32 %arg2 |
248 %b = select i1 %cmp1, i32 %arg2, i32 %arg1 | 250 %b = select i1 %cmp1, i32 %arg2, i32 %arg1 |
249 br label %next | 251 br label %next |
250 next: | 252 next: |
251 %c = select i1 %cmp1, i32 123, i32 %arg1 | 253 %c = select i1 %cmp1, i32 123, i32 %arg1 |
252 %partial = add i32 %a, %b | 254 %partial = add i32 %a, %b |
253 %result = add i32 %partial, %c | 255 %result = add i32 %partial, %c |
254 ret i32 %result | 256 ret i32 %result |
(...skipping 20 matching lines...) Expand all Loading... |
275 ; ARM32-DAG: mov [[T2:r[0-9]+]], r0 | 277 ; ARM32-DAG: mov [[T2:r[0-9]+]], r0 |
276 ; ARM32-DAG: cmp [[T0]], #0 | 278 ; ARM32-DAG: cmp [[T0]], #0 |
277 ; ARM32: [[T2]], r1 | 279 ; ARM32: [[T2]], r1 |
278 ; ARM32: cmp [[T0]], #0 | 280 ; ARM32: cmp [[T0]], #0 |
279 ; ARM32: movne | 281 ; ARM32: movne |
280 ; ARM32: add | 282 ; ARM32: add |
281 ; ARM32: add | 283 ; ARM32: add |
282 ; ARM32: bx lr | 284 ; ARM32: bx lr |
283 | 285 |
284 ; Cmp/multi-select non-folding because of extra non-whitelisted uses. | 286 ; Cmp/multi-select non-folding because of extra non-whitelisted uses. |
285 define i32 @no_fold_cmp_select_multi_non_whitelist(i32 %arg1, i32 %arg2) { | 287 define internal i32 @no_fold_cmp_select_multi_non_whitelist(i32 %arg1, |
| 288 i32 %arg2) { |
286 entry: | 289 entry: |
287 %cmp1 = icmp slt i32 %arg1, %arg2 | 290 %cmp1 = icmp slt i32 %arg1, %arg2 |
288 %a = select i1 %cmp1, i32 %arg1, i32 %arg2 | 291 %a = select i1 %cmp1, i32 %arg1, i32 %arg2 |
289 %b = select i1 %cmp1, i32 %arg2, i32 %arg1 | 292 %b = select i1 %cmp1, i32 %arg2, i32 %arg1 |
290 %c = select i1 %cmp1, i32 123, i32 %arg1 | 293 %c = select i1 %cmp1, i32 123, i32 %arg1 |
291 %ext = zext i1 %cmp1 to i32 | 294 %ext = zext i1 %cmp1 to i32 |
292 %partial1 = add i32 %a, %b | 295 %partial1 = add i32 %a, %b |
293 %partial2 = add i32 %partial1, %c | 296 %partial2 = add i32 %partial1, %c |
294 %result = add i32 %partial2, %ext | 297 %result = add i32 %partial2, %ext |
295 ret i32 %result | 298 ret i32 %result |
(...skipping 21 matching lines...) Expand all Loading... |
317 ; ARM32-DAG: mov [[T2:r[0-9]+]], r0 | 320 ; ARM32-DAG: mov [[T2:r[0-9]+]], r0 |
318 ; ARM32-DAG: cmp [[T0]], #0 | 321 ; ARM32-DAG: cmp [[T0]], #0 |
319 ; ARM32: [[T2]], r1 | 322 ; ARM32: [[T2]], r1 |
320 ; ARM32: cmp [[T0]], #0 | 323 ; ARM32: cmp [[T0]], #0 |
321 ; ARM32: movne | 324 ; ARM32: movne |
322 ; ARM32: and {{.*}}, [[T0]], #1 | 325 ; ARM32: and {{.*}}, [[T0]], #1 |
323 ; ARM32: add | 326 ; ARM32: add |
324 ; ARM32: add | 327 ; ARM32: add |
325 ; ARM32: add | 328 ; ARM32: add |
326 ; ARM32: bx lr | 329 ; ARM32: bx lr |
OLD | NEW |