| OLD | NEW |
| 1 ; This tests the optimization where producers and consumers of i1 (bool) | 1 ; This tests the optimization where producers and consumers of i1 (bool) |
| 2 ; variables are combined to implicitly use flags instead of explicitly using | 2 ; variables are combined to implicitly use flags instead of explicitly using |
| 3 ; stack or register variables. | 3 ; stack or register variables. |
| 4 | 4 |
| 5 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 \ | 5 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 \ |
| 6 ; RUN: -allow-externally-defined-symbols | FileCheck %s | 6 ; RUN: -allow-externally-defined-symbols | FileCheck %s |
| 7 | 7 |
| 8 ; RUN: %if --need=allow_dump --need=target_ARM32 --command %p2i --filetype=asm \ | 8 ; RUN: %if --need=allow_dump --need=target_ARM32 --command %p2i --filetype=asm \ |
| 9 ; RUN: --target arm32 -i %s --args -O2 --skip-unimplemented \ | 9 ; RUN: --target arm32 -i %s --args -O2 --skip-unimplemented \ |
| 10 ; RUN: -allow-externally-defined-symbols \ | 10 ; RUN: -allow-externally-defined-symbols \ |
| (...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 78 } | 78 } |
| 79 | 79 |
| 80 ; CHECK-LABEL: no_fold_cmp_br_liveout | 80 ; CHECK-LABEL: no_fold_cmp_br_liveout |
| 81 ; CHECK: cmp | 81 ; CHECK: cmp |
| 82 ; CHECK: set | 82 ; CHECK: set |
| 83 ; CHECK: cmp | 83 ; CHECK: cmp |
| 84 ; CHECK: je | 84 ; CHECK: je |
| 85 ; ARM32-LABEL: no_fold_cmp_br_liveout | 85 ; ARM32-LABEL: no_fold_cmp_br_liveout |
| 86 ; ARM32: cmp | 86 ; ARM32: cmp |
| 87 ; ARM32: movlt [[REG:r[0-9]+]] | 87 ; ARM32: movlt [[REG:r[0-9]+]] |
| 88 ; ARM32: cmp [[REG]], #0 | 88 ; ARM32: tst [[REG]], #1 |
| 89 ; ARM32: beq | 89 ; ARM32: beq |
| 90 | 90 |
| 91 | 91 |
| 92 ; Cmp/branch non-folding because of extra non-whitelisted uses. | 92 ; Cmp/branch non-folding because of extra non-whitelisted uses. |
| 93 define internal i32 @no_fold_cmp_br_non_whitelist(i32 %arg1, i32 %arg2) { | 93 define internal i32 @no_fold_cmp_br_non_whitelist(i32 %arg1, i32 %arg2) { |
| 94 entry: | 94 entry: |
| 95 %cmp1 = icmp slt i32 %arg1, %arg2 | 95 %cmp1 = icmp slt i32 %arg1, %arg2 |
| 96 %result = zext i1 %cmp1 to i32 | 96 %result = zext i1 %cmp1 to i32 |
| 97 br i1 %cmp1, label %branch1, label %branch2 | 97 br i1 %cmp1, label %branch1, label %branch2 |
| 98 branch1: | 98 branch1: |
| 99 ret i32 %result | 99 ret i32 %result |
| 100 branch2: | 100 branch2: |
| 101 ret i32 2 | 101 ret i32 2 |
| 102 } | 102 } |
| 103 | 103 |
| 104 ; CHECK-LABEL: no_fold_cmp_br_non_whitelist | 104 ; CHECK-LABEL: no_fold_cmp_br_non_whitelist |
| 105 ; CHECK: cmp | 105 ; CHECK: cmp |
| 106 ; CHECK: set | 106 ; CHECK: set |
| 107 ; CHECK: movzx | 107 ; CHECK: movzx |
| 108 ; CHECK: cmp | 108 ; CHECK: cmp |
| 109 ; CHECK: je | 109 ; CHECK: je |
| 110 ; ARM32-LABEL: no_fold_cmp_br_non_whitelist | 110 ; ARM32-LABEL: no_fold_cmp_br_non_whitelist |
| 111 ; ARM32: mov [[R:r[0-9]+]], #0 |
| 111 ; ARM32: cmp r0, r1 | 112 ; ARM32: cmp r0, r1 |
| 112 ; ARM32: movge [[R:r[0-9]+]], #0 | |
| 113 ; ARM32: movlt [[R]], #1 | 113 ; ARM32: movlt [[R]], #1 |
| 114 ; ARM32: cmp r0, r1 | 114 ; ARM32: tst [[R]], #1 |
| 115 ; ARM32: bge | 115 ; ARM32: beq |
| 116 ; ARM32: bx lr | 116 ; ARM32: bx lr |
| 117 ; ARM32: mov r0, #2 | 117 ; ARM32: mov r0, #2 |
| 118 ; ARM32: bx lr | 118 ; ARM32: bx lr |
| 119 | 119 |
| 120 | 120 |
| 121 ; Basic cmp/select folding. | 121 ; Basic cmp/select folding. |
| 122 define internal i32 @fold_cmp_select(i32 %arg1, i32 %arg2) { | 122 define internal i32 @fold_cmp_select(i32 %arg1, i32 %arg2) { |
| 123 entry: | 123 entry: |
| 124 %cmp1 = icmp slt i32 %arg1, %arg2 | 124 %cmp1 = icmp slt i32 %arg1, %arg2 |
| 125 %result = select i1 %cmp1, i32 %arg1, i32 %arg2 | 125 %result = select i1 %cmp1, i32 %arg1, i32 %arg2 |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 161 %arg1_trunc = trunc i64 %arg1 to i32 | 161 %arg1_trunc = trunc i64 %arg1 to i32 |
| 162 %cmp1 = icmp slt i32 undef, %arg1_trunc | 162 %cmp1 = icmp slt i32 undef, %arg1_trunc |
| 163 %result = select i1 %cmp1, i64 %arg1, i64 undef | 163 %result = select i1 %cmp1, i64 %arg1, i64 undef |
| 164 ret i64 %result | 164 ret i64 %result |
| 165 } | 165 } |
| 166 ; CHECK-LABEL: fold_cmp_select_64_undef | 166 ; CHECK-LABEL: fold_cmp_select_64_undef |
| 167 ; CHECK: cmp | 167 ; CHECK: cmp |
| 168 ; CHECK: cmovl | 168 ; CHECK: cmovl |
| 169 ; CHECK: cmovl | 169 ; CHECK: cmovl |
| 170 ; ARM32-LABEL: fold_cmp_select_64_undef | 170 ; ARM32-LABEL: fold_cmp_select_64_undef |
| 171 ; ARM32: mov |
| 172 ; ARM32: mov |
| 171 ; ARM32: cmp {{r[0-9]+}}, r0 | 173 ; ARM32: cmp {{r[0-9]+}}, r0 |
| 172 ; ARM32: movge | |
| 173 ; ARM32: movlt | 174 ; ARM32: movlt |
| 174 ; ARM32: movge | |
| 175 ; ARM32: movlt | 175 ; ARM32: movlt |
| 176 ; ARM32: bx lr | 176 ; ARM32: bx lr |
| 177 | 177 |
| 178 | 178 |
| 179 ; Cmp/select folding with intervening instructions. | 179 ; Cmp/select folding with intervening instructions. |
| 180 define internal i32 @fold_cmp_select_intervening_insts(i32 %arg1, i32 %arg2) { | 180 define internal i32 @fold_cmp_select_intervening_insts(i32 %arg1, i32 %arg2) { |
| 181 entry: | 181 entry: |
| 182 %cmp1 = icmp slt i32 %arg1, %arg2 | 182 %cmp1 = icmp slt i32 %arg1, %arg2 |
| 183 call void @use_value(i32 %arg1) | 183 call void @use_value(i32 %arg1) |
| 184 %result = select i1 %cmp1, i32 %arg1, i32 %arg2 | 184 %result = select i1 %cmp1, i32 %arg1, i32 %arg2 |
| (...skipping 26 matching lines...) Expand all Loading... |
| 211 ; CHECK-LABEL: fold_cmp_select_multi | 211 ; CHECK-LABEL: fold_cmp_select_multi |
| 212 ; CHECK: cmp | 212 ; CHECK: cmp |
| 213 ; CHECK: cmovl | 213 ; CHECK: cmovl |
| 214 ; CHECK: cmp | 214 ; CHECK: cmp |
| 215 ; CHECK: cmovl | 215 ; CHECK: cmovl |
| 216 ; CHECK: cmp | 216 ; CHECK: cmp |
| 217 ; CHECK: cmovge | 217 ; CHECK: cmovge |
| 218 ; CHECK: add | 218 ; CHECK: add |
| 219 ; CHECK: add | 219 ; CHECK: add |
| 220 ; ARM32-LABEL: fold_cmp_select_multi | 220 ; ARM32-LABEL: fold_cmp_select_multi |
| 221 ; ARM32: cmp r0, r1 | 221 ; ARM32: mov |
| 222 ; ARM32: movlt {{r[0-9]+}}, r0 | 222 ; ARM32: cmp |
| 223 ; ARM32: cmp r0, r1 | 223 ; ARM32: movlt {{.*}}, #1 |
| 224 ; ARM32: movlt {{r[0-9]+}}, r1 | 224 ; ARM32: mov |
| 225 ; ARM32: cmp r0, r1 | 225 ; ARM32: tst {{.*}}, #1 |
| 226 ; ARM32: movlt {{r[0-9]+}}, #123 | 226 ; ARM32: movne |
| 227 ; ARM32: add | 227 ; ARM32: mov |
| 228 ; ARM32: add | 228 ; ARM32: tst {{.*}}, #1 |
| 229 ; ARM32: movne |
| 230 ; ARM32: tst {{.*}}, #1 |
| 231 ; ARM32: movne {{.*}}, #123 |
| 229 ; ARM32: bx lr | 232 ; ARM32: bx lr |
| 230 | 233 |
| 231 | 234 |
| 232 ; Cmp/multi-select non-folding because of live-out. | 235 ; Cmp/multi-select non-folding because of live-out. |
| 233 define internal i32 @no_fold_cmp_select_multi_liveout(i32 %arg1, i32 %arg2) { | 236 define internal i32 @no_fold_cmp_select_multi_liveout(i32 %arg1, i32 %arg2) { |
| 234 entry: | 237 entry: |
| 235 %cmp1 = icmp slt i32 %arg1, %arg2 | 238 %cmp1 = icmp slt i32 %arg1, %arg2 |
| 236 %a = select i1 %cmp1, i32 %arg1, i32 %arg2 | 239 %a = select i1 %cmp1, i32 %arg1, i32 %arg2 |
| 237 %b = select i1 %cmp1, i32 %arg2, i32 %arg1 | 240 %b = select i1 %cmp1, i32 %arg2, i32 %arg1 |
| 238 br label %next | 241 br label %next |
| 239 next: | 242 next: |
| 240 %c = select i1 %cmp1, i32 123, i32 %arg1 | 243 %c = select i1 %cmp1, i32 123, i32 %arg1 |
| 241 %partial = add i32 %a, %b | 244 %partial = add i32 %a, %b |
| 242 %result = add i32 %partial, %c | 245 %result = add i32 %partial, %c |
| 243 ret i32 %result | 246 ret i32 %result |
| 244 } | 247 } |
| 245 | 248 |
| 246 ; CHECK-LABEL: no_fold_cmp_select_multi_liveout | 249 ; CHECK-LABEL: no_fold_cmp_select_multi_liveout |
| 247 ; CHECK: set | 250 ; CHECK: set |
| 248 ; CHECK: cmp | 251 ; CHECK: cmp |
| 249 ; CHECK: cmovne | 252 ; CHECK: cmovne |
| 250 ; CHECK: cmp | 253 ; CHECK: cmp |
| 251 ; CHECK: cmovne | 254 ; CHECK: cmovne |
| 252 ; CHECK: cmp | 255 ; CHECK: cmp |
| 253 ; CHECK: cmove | 256 ; CHECK: cmove |
| 254 ; CHECK: add | 257 ; CHECK: add |
| 255 ; CHECK: add | 258 ; CHECK: add |
| 256 ; ARM32-LABEL: no_fold_cmp_select_multi_liveout | 259 ; ARM32-LABEL: no_fold_cmp_select_multi_liveout |
| 257 ; ARM32-LABEL: fold_cmp_select_multi | 260 ; ARM32: mov |
| 258 ; ARM32: cmp r0, r1 | 261 ; ARM32: cmp r0, r1 |
| 259 ; ARM32: movge [[T0:r[0-9]+]], #0 | 262 ; ARM32: movlt |
| 260 ; ARM32: movlt [[T0]], #1 | 263 ; ARM32: mov |
| 261 ; ARM32: uxtb [[T1:r[0-9]+]], [[T1]] | 264 ; ARM32: tst |
| 262 ; ARM32-NEXT: cmp [[T1]], #0 | 265 ; ARM32: movne |
| 263 ; ARM32: movne [[T2:r[0-9]+]], r0 | 266 ; ARM32: mov |
| 264 ; ARM32: uxtb [[T3:r[0-9]+]], [[T3]] | 267 ; ARM32: tst |
| 265 ; ARM32-NEXT: cmp [[T3]], #0 | 268 ; ARM32: movne |
| 266 ; ARM32: movne [[T4:r[0-9]+]], r1 | 269 ; ARM32: tst |
| 267 ; ARM32-LABEL: .Lno_fold_cmp_select_multi_liveout$next: | 270 ; ARM32: movne |
| 268 ; ARM32: uxtb [[T5:r[0-9]+]], [[T5]] | |
| 269 ; ARM32: cmp [[T5]], #0 | |
| 270 ; ARM32: movne [[T6:r[0-9]+]], #123 | |
| 271 ; ARM32: add | |
| 272 ; ARM32: add | |
| 273 ; ARM32: bx lr | 271 ; ARM32: bx lr |
| 274 | 272 |
| 275 ; Cmp/multi-select non-folding because of extra non-whitelisted uses. | 273 ; Cmp/multi-select non-folding because of extra non-whitelisted uses. |
| 276 define internal i32 @no_fold_cmp_select_multi_non_whitelist(i32 %arg1, | 274 define internal i32 @no_fold_cmp_select_multi_non_whitelist(i32 %arg1, |
| 277 i32 %arg2) { | 275 i32 %arg2) { |
| 278 entry: | 276 entry: |
| 279 %cmp1 = icmp slt i32 %arg1, %arg2 | 277 %cmp1 = icmp slt i32 %arg1, %arg2 |
| 280 %a = select i1 %cmp1, i32 %arg1, i32 %arg2 | 278 %a = select i1 %cmp1, i32 %arg1, i32 %arg2 |
| 281 %b = select i1 %cmp1, i32 %arg2, i32 %arg1 | 279 %b = select i1 %cmp1, i32 %arg2, i32 %arg1 |
| 282 %c = select i1 %cmp1, i32 123, i32 %arg1 | 280 %c = select i1 %cmp1, i32 123, i32 %arg1 |
| (...skipping 10 matching lines...) Expand all Loading... |
| 293 ; CHECK: cmovne | 291 ; CHECK: cmovne |
| 294 ; CHECK: cmp | 292 ; CHECK: cmp |
| 295 ; CHECK: cmovne | 293 ; CHECK: cmovne |
| 296 ; CHECK: cmp | 294 ; CHECK: cmp |
| 297 ; CHECK: cmove | 295 ; CHECK: cmove |
| 298 ; CHECK: movzx | 296 ; CHECK: movzx |
| 299 ; CHECK: add | 297 ; CHECK: add |
| 300 ; CHECK: add | 298 ; CHECK: add |
| 301 ; CHECK: add | 299 ; CHECK: add |
| 302 ; ARM32-LABEL: no_fold_cmp_select_multi_non_whitelist | 300 ; ARM32-LABEL: no_fold_cmp_select_multi_non_whitelist |
| 301 ; ARM32: mov |
| 303 ; ARM32: cmp r0, r1 | 302 ; ARM32: cmp r0, r1 |
| 304 ; ARM32: movge [[R0:r[0-9]+]] | 303 ; ARM32: movlt |
| 305 ; ARM32: movlt [[R0]] | 304 ; ARM32: mov |
| 306 ; ARM32: cmp r0, r1 | 305 ; ARM32: tst |
| 307 ; ARM32: movge [[R1:r[0-9]+]] | 306 ; ARM32: movne |
| 308 ; ARM32: movlt [[R1]] | 307 ; ARM32: mov |
| 309 ; ARM32: cmp r0, r1 | 308 ; ARM32: tst |
| 310 ; ARM32: movge [[R2:r[0-9]+]] | 309 ; ARM32: movne |
| 311 ; ARM32: movlt [[R2]] | 310 ; ARM32: tst |
| 312 ; ARM32: cmp r0, r1 | 311 ; ARM32: movne |
| 313 ; ARM32: movge [[R3:r[0-9]+]] | |
| 314 ; ARM32: movlt [[R3]] | |
| 315 ; ARM32: add | |
| 316 ; ARM32: add | |
| 317 ; ARM32: add | |
| 318 ; ARM32: bx lr | 312 ; ARM32: bx lr |
| 313 |
| 314 define internal i32 @br_i1_folding2_and(i32 %arg1, i32 %arg2) { |
| 315 %t0 = trunc i32 %arg1 to i1 |
| 316 %t1 = trunc i32 %arg2 to i1 |
| 317 |
| 318 %t2 = and i1 %t0, %t1 |
| 319 br i1 %t2, label %target_true, label %target_false |
| 320 |
| 321 target_true: |
| 322 ret i32 1 |
| 323 |
| 324 target_false: |
| 325 ret i32 0 |
| 326 } |
| 327 ; ARM32-LABEL: br_i1_folding2_and |
| 328 ; ARM32: tst r0, #1 |
| 329 ; ARM32: beq {{.*}}target_false |
| 330 ; ARM32: tst r1, #1 |
| 331 ; ARM32: beq {{.*}}target_false |
| 332 |
| 333 define internal i32 @br_i1_folding2_or(i32 %arg1, i32 %arg2) { |
| 334 %t0 = trunc i32 %arg1 to i1 |
| 335 %t1 = trunc i32 %arg2 to i1 |
| 336 |
| 337 %t2 = or i1 %t0, %t1 |
| 338 br i1 %t2, label %target_true, label %target_false |
| 339 |
| 340 target_true: |
| 341 ret i32 1 |
| 342 |
| 343 target_false: |
| 344 ret i32 0 |
| 345 } |
| 346 ; ARM32-LABEL: br_i1_folding2_or |
| 347 ; ARM32: tst r0, #1 |
| 348 ; ARM32: bne {{.*}}target_true |
| 349 ; ARM32: tst r1, #1 |
| 350 ; ARM32: beq {{.*}}target_false |
| 351 |
| 352 define internal i32 @br_i1_folding3_and_or(i32 %arg1, i32 %arg2, i32 %arg3) { |
| 353 %t0 = trunc i32 %arg1 to i1 |
| 354 %t1 = trunc i32 %arg2 to i1 |
| 355 %t2 = trunc i32 %arg3 to i1 |
| 356 |
| 357 %t3 = and i1 %t0, %t1 |
| 358 %t4 = or i1 %t3, %t2 |
| 359 |
| 360 br i1 %t4, label %target_true, label %target_false |
| 361 |
| 362 target_true: |
| 363 ret i32 1 |
| 364 |
| 365 target_false: |
| 366 ret i32 0 |
| 367 } |
| 368 ; ARM32-LABEL: br_i1_folding3_and_or |
| 369 ; ARM32: tst r0, #1 |
| 370 ; ARM32: beq |
| 371 ; ARM32: tst r1, #1 |
| 372 ; ARM32: bne {{.*}}target_true |
| 373 ; ARM32: tst r2, #1 |
| 374 ; ARM32: beq {{.*}}target_false |
| 375 |
| 376 define internal i32 @br_i1_folding3_or_and(i32 %arg1, i32 %arg2, i32 %arg3) { |
| 377 %t0 = trunc i32 %arg1 to i1 |
| 378 %t1 = trunc i32 %arg2 to i1 |
| 379 %t2 = trunc i32 %arg3 to i1 |
| 380 |
| 381 %t3 = or i1 %t0, %t1 |
| 382 %t4 = and i1 %t3, %t2 |
| 383 |
| 384 br i1 %t4, label %target_true, label %target_false |
| 385 |
| 386 target_true: |
| 387 ret i32 1 |
| 388 |
| 389 target_false: |
| 390 ret i32 0 |
| 391 } |
| 392 ; ARM32-LABEL: br_i1_folding3_or_and |
| 393 ; ARM32: tst r0, #1 |
| 394 ; ARM32: bne |
| 395 ; ARM32: tst r1, #1 |
| 396 ; ARM32: beq {{.*}}target_false |
| 397 ; ARM32: tst r2, #1 |
| 398 ; ARM32: beq {{.*}}target_false |
| 399 |
| 400 define internal i32 @br_i1_folding4(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, |
| 401 i32 %arg5) { |
| 402 %t0 = trunc i32 %arg1 to i1 |
| 403 %t1 = trunc i32 %arg2 to i1 |
| 404 %t2 = trunc i32 %arg3 to i1 |
| 405 %t3 = trunc i32 %arg4 to i1 |
| 406 %t4 = trunc i32 %arg5 to i1 |
| 407 |
| 408 %t5 = or i1 %t0, %t1 |
| 409 %t6 = and i1 %t5, %t2 |
| 410 %t7 = and i1 %t3, %t4 |
| 411 %t8 = or i1 %t6, %t7 |
| 412 br i1 %t8, label %target_true, label %target_false |
| 413 |
| 414 target_true: |
| 415 ret i32 1 |
| 416 |
| 417 target_false: |
| 418 ret i32 0 |
| 419 } |
| 420 ; ARM32-LABEL: br_i1_folding4 |
| 421 ; ARM32: tst r0, #1 |
| 422 ; ARM32: bne |
| 423 ; ARM32: tst r1, #1 |
| 424 ; ARM32: beq |
| 425 ; ARM32: tst r2, #1 |
| 426 ; ARM32: bne {{.*}}target_true |
| 427 ; ARM32: tst r3, #1 |
| 428 ; ARM32: beq {{.*}}target_false |
| 429 ; ARM32: tst r4, #1 |
| 430 ; ARM32: beq {{.*}}target_false |
| OLD | NEW |