OLD | NEW |
1 ; This tests some of the subtleties of Phi lowering. In particular, | 1 ; This tests some of the subtleties of Phi lowering. In particular, |
2 ; it tests that it does the right thing when it tries to enable | 2 ; it tests that it does the right thing when it tries to enable |
3 ; compare/branch fusing. | 3 ; compare/branch fusing. |
4 | 4 |
5 ; TODO(kschimpf) Find out why lc2i must be used. | 5 ; RUN: %p2i -i %s --args -O2 --verbose none --phi-edge-split=0 \ |
6 ; REQUIRES: allow_llvm_ir_as_input | |
7 ; RUN: %lc2i -i %s --args -O2 --verbose none --phi-edge-split=0 \ | |
8 ; RUN: | llvm-mc -triple=i686-none-nacl -filetype=obj \ | 6 ; RUN: | llvm-mc -triple=i686-none-nacl -filetype=obj \ |
9 ; RUN: | llvm-objdump -d -symbolize -x86-asm-syntax=intel - | FileCheck %s | 7 ; RUN: | llvm-objdump -d -symbolize -x86-asm-syntax=intel - | FileCheck %s |
10 | 8 |
11 define internal i32 @testPhi1(i32 %arg) { | 9 define internal i32 @testPhi1(i32 %arg) { |
12 entry: | 10 entry: |
13 %cmp1 = icmp sgt i32 %arg, 0 | 11 %cmp1 = icmp sgt i32 %arg, 0 |
14 br i1 %cmp1, label %next, label %target | 12 br i1 %cmp1, label %next, label %target |
15 next: | 13 next: |
16 br label %target | 14 br label %target |
17 target: | 15 target: |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
51 ; CHECK: mov {{.*}}, [[PHI]] | 49 ; CHECK: mov {{.*}}, [[PHI]] |
52 | 50 |
53 ; Test that address mode inference doesn't extend past | 51 ; Test that address mode inference doesn't extend past |
54 ; multi-definition, non-SSA Phi temporaries. | 52 ; multi-definition, non-SSA Phi temporaries. |
55 define internal i32 @testPhi3(i32 %arg) { | 53 define internal i32 @testPhi3(i32 %arg) { |
56 entry: | 54 entry: |
57 br label %body | 55 br label %body |
58 body: | 56 body: |
59 %merge = phi i32 [ %arg, %entry ], [ %elt, %body ] | 57 %merge = phi i32 [ %arg, %entry ], [ %elt, %body ] |
60 %interior = add i32 %merge, 1000 | 58 %interior = add i32 %merge, 1000 |
61 %__4 = inttoptr i32 %interior to i32* | 59 ; Trick to make a basic block local copy of interior for |
| 60 ; addressing mode optimization. |
| 61 %interior__4 = add i32 %interior, 0 |
| 62 %__4 = inttoptr i32 %interior__4 to i32* |
62 %elt = load i32* %__4, align 1 | 63 %elt = load i32* %__4, align 1 |
63 %cmp = icmp eq i32 %elt, 0 | 64 %cmp = icmp eq i32 %elt, 0 |
64 br i1 %cmp, label %exit, label %body | 65 br i1 %cmp, label %exit, label %body |
65 exit: | 66 exit: |
66 %__6 = inttoptr i32 %interior to i32* | 67 ; Same trick (making a basic block local copy). |
| 68 %interior__6 = add i32 %interior, 0 |
| 69 %__6 = inttoptr i32 %interior__6 to i32* |
67 store i32 %arg, i32* %__6, align 1 | 70 store i32 %arg, i32* %__6, align 1 |
68 ret i32 %arg | 71 ret i32 %arg |
69 } | 72 } |
70 ; I can't figure out how to reliably test this for correctness, so I | 73 ; I can't figure out how to reliably test this for correctness, so I |
71 ; will just include patterns for the entire current O2 sequence. This | 74 ; will just include patterns for the entire current O2 sequence. This |
72 ; may need to be changed when meaningful optimizations are added. | 75 ; may need to be changed when meaningful optimizations are added. |
73 ; The key is to avoid the "bad" pattern like this: | 76 ; The key is to avoid the "bad" pattern like this: |
74 ; | 77 ; |
75 ; testPhi3: | 78 ; testPhi3: |
76 ; .LtestPhi3$entry: | 79 ; .LtestPhi3$entry: |
77 ; mov eax, dword ptr [esp+4] | 80 ; mov eax, dword ptr [esp+4] |
78 ; mov ecx, eax | 81 ; mov ecx, eax |
79 ; .LtestPhi3$body: | 82 ; .LtestPhi3$body: |
80 ; mov ecx, dword ptr [ecx+1000] | 83 ; mov ecx, dword ptr [ecx+1000] |
81 ; cmp ecx, 0 | 84 ; cmp ecx, 0 |
82 ; jne .LtestPhi3$body | 85 ; jne .LtestPhi3$body |
83 ; .LtestPhi3$exit: | 86 ; .LtestPhi3$exit: |
84 ; mov dword ptr [ecx+1000], eax | 87 ; mov dword ptr [ecx+1000], eax |
85 ; ret | 88 ; ret |
86 ; | 89 ; |
87 ; This is bad because the final store address is supposed to be the | 90 ; This is bad because the final store address is supposed to be the |
88 ; same as the load address in the loop, but it has clearly been | 91 ; same as the load address in the loop, but it has clearly been |
89 ; over-optimized into a null pointer dereference. | 92 ; over-optimized into a null pointer dereference. |
90 | 93 |
91 ; CHECK-LABEL: testPhi3 | 94 ; CHECK-LABEL: testPhi3 |
92 ; CHECK: push [[EBX:.*]] | 95 ; CHECK: push [[EBX:.*]] |
93 ; CHECK: mov {{.*}}, dword ptr [esp | 96 ; CHECK: mov {{.*}}, dword ptr [esp |
94 ; CHECK: mov | 97 ; CHECK: mov |
95 ; CHECK: mov {{.*}}[[ADDR:.*1000]] | 98 ; CHECK: mov {{.*}}, dword ptr [[ADDR:.*1000]] |
96 ; CHECK: cmp {{.*}}, 0 | 99 ; CHECK: cmp {{.*}}, 0 |
97 ; CHECK: jne | 100 ; CHECK: jne |
98 ; CHECK: mov {{.*}}[[ADDR]] | 101 ; CHECK: mov dword ptr [[ADDR]] |
99 ; CHECK: pop [[EBX]] | 102 ; CHECK: pop [[EBX]] |
OLD | NEW |