| OLD | NEW |
| 1 ; Test that loads/stores don't move across a nacl.atomic.fence.all. | 1 ; Test that loads/stores don't move across a nacl.atomic.fence.all. |
| 2 ; This should apply to both atomic and non-atomic loads/stores | 2 ; This should apply to both atomic and non-atomic loads/stores |
| 3 ; (unlike the non-"all" variety of nacl.atomic.fence, which only | 3 ; (unlike the non-"all" variety of nacl.atomic.fence, which only |
| 4 ; applies to atomic load/stores). | 4 ; applies to atomic load/stores). |
| 5 ; | 5 ; |
| 6 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s | 6 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s |
| 7 | 7 |
| 8 declare void @llvm.nacl.atomic.fence.all() | 8 declare void @llvm.nacl.atomic.fence.all() |
| 9 declare i32 @llvm.nacl.atomic.load.i32(i32*, i32) | 9 declare i32 @llvm.nacl.atomic.load.i32(i32*, i32) |
| 10 declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32) | 10 declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32) |
| (...skipping 22 matching lines...) Expand all Loading... |
| 33 %p_c = bitcast [4 x i8]* @g32_c to i32* | 33 %p_c = bitcast [4 x i8]* @g32_c to i32* |
| 34 %l_c = load i32, i32* %p_c, align 1 | 34 %l_c = load i32, i32* %p_c, align 1 |
| 35 %l_c2 = sub i32 1, %l_c | 35 %l_c2 = sub i32 1, %l_c |
| 36 call void @llvm.nacl.atomic.fence.all() | 36 call void @llvm.nacl.atomic.fence.all() |
| 37 store i32 %l_c2, i32* %p_c, align 1 | 37 store i32 %l_c2, i32* %p_c, align 1 |
| 38 | 38 |
| 39 ret i32 %l_c2 | 39 ret i32 %l_c2 |
| 40 } | 40 } |
| 41 ; CHECK-LABEL: test_fused_load_sub_a | 41 ; CHECK-LABEL: test_fused_load_sub_a |
| 42 ; alloca store | 42 ; alloca store |
| 43 ; CHECK: mov {{.*}},esp | |
| 44 ; CHECK: mov DWORD PTR {{.*}},0x3e7 | 43 ; CHECK: mov DWORD PTR {{.*}},0x3e7 |
| 45 ; atomic store (w/ its own mfence) | 44 ; atomic store (w/ its own mfence) |
| 46 ; The load + sub are optimized into one everywhere. | 45 ; The load + sub are optimized into one everywhere. |
| 47 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a | 46 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a |
| 48 ; CHECK: mov DWORD PTR | 47 ; CHECK: mov DWORD PTR |
| 49 ; CHECK: mfence | 48 ; CHECK: mfence |
| 50 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_b | 49 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_b |
| 51 ; CHECK: mov DWORD PTR | 50 ; CHECK: mov DWORD PTR |
| 52 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c | 51 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c |
| 53 ; CHECK: mfence | 52 ; CHECK: mfence |
| (...skipping 19 matching lines...) Expand all Loading... |
| 73 %p_c = bitcast [4 x i8]* @g32_c to i32* | 72 %p_c = bitcast [4 x i8]* @g32_c to i32* |
| 74 call void @llvm.nacl.atomic.fence.all() | 73 call void @llvm.nacl.atomic.fence.all() |
| 75 %l_c = load i32, i32* %p_c, align 1 | 74 %l_c = load i32, i32* %p_c, align 1 |
| 76 %l_c2 = sub i32 1, %l_c | 75 %l_c2 = sub i32 1, %l_c |
| 77 store i32 %l_c2, i32* %p_c, align 1 | 76 store i32 %l_c2, i32* %p_c, align 1 |
| 78 | 77 |
| 79 ret i32 %l_c2 | 78 ret i32 %l_c2 |
| 80 } | 79 } |
| 81 ; CHECK-LABEL: test_fused_load_sub_b | 80 ; CHECK-LABEL: test_fused_load_sub_b |
| 82 ; alloca store | 81 ; alloca store |
| 83 ; CHECK: mov {{.*}},esp | |
| 84 ; CHECK: mov DWORD PTR {{.*}},0x3e7 | 82 ; CHECK: mov DWORD PTR {{.*}},0x3e7 |
| 85 ; atomic store (w/ its own mfence) | 83 ; atomic store (w/ its own mfence) |
| 86 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a | 84 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a |
| 87 ; CHECK: mov DWORD PTR | 85 ; CHECK: mov DWORD PTR |
| 88 ; CHECK: mfence | 86 ; CHECK: mfence |
| 89 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_b | 87 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_b |
| 90 ; CHECK: mov DWORD PTR | 88 ; CHECK: mov DWORD PTR |
| 91 ; CHECK: mfence | 89 ; CHECK: mfence |
| 92 ; Load + sub can still be optimized into one instruction | 90 ; Load + sub can still be optimized into one instruction |
| 93 ; because it is not separated by a fence. | 91 ; because it is not separated by a fence. |
| (...skipping 20 matching lines...) Expand all Loading... |
| 114 | 112 |
| 115 %p_c = bitcast [4 x i8]* @g32_c to i32* | 113 %p_c = bitcast [4 x i8]* @g32_c to i32* |
| 116 %l_c = load i32, i32* %p_c, align 1 | 114 %l_c = load i32, i32* %p_c, align 1 |
| 117 %l_c2 = sub i32 1, %l_c | 115 %l_c2 = sub i32 1, %l_c |
| 118 store i32 %l_c2, i32* %p_c, align 1 | 116 store i32 %l_c2, i32* %p_c, align 1 |
| 119 | 117 |
| 120 ret i32 %l_c2 | 118 ret i32 %l_c2 |
| 121 } | 119 } |
| 122 ; CHECK-LABEL: test_fused_load_sub_c | 120 ; CHECK-LABEL: test_fused_load_sub_c |
| 123 ; alloca store | 121 ; alloca store |
| 124 ; CHECK: mov {{.*}},esp | |
| 125 ; CHECK: mov DWORD PTR {{.*}},0x3e7 | 122 ; CHECK: mov DWORD PTR {{.*}},0x3e7 |
| 126 ; atomic store (w/ its own mfence) | 123 ; atomic store (w/ its own mfence) |
| 127 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a | 124 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a |
| 128 ; CHECK: mov DWORD PTR | 125 ; CHECK: mov DWORD PTR |
| 129 ; CHECK: mfence | 126 ; CHECK: mfence |
| 130 ; This load + sub are no longer optimized into one, | 127 ; This load + sub are no longer optimized into one, |
| 131 ; though perhaps it should be legal as long as | 128 ; though perhaps it should be legal as long as |
| 132 ; the load stays on the same side of the fence. | 129 ; the load stays on the same side of the fence. |
| 133 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_b | 130 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_b |
| 134 ; CHECK: mfence | 131 ; CHECK: mfence |
| (...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 196 %z = load i32, i32* %ptr, align 1 | 193 %z = load i32, i32* %ptr, align 1 |
| 197 ret i32 %z | 194 ret i32 %z |
| 198 } | 195 } |
| 199 ; CHECK-LABEL: could_have_hoisted_loads | 196 ; CHECK-LABEL: could_have_hoisted_loads |
| 200 ; CHECK: jne {{.*}} | 197 ; CHECK: jne {{.*}} |
| 201 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_d | 198 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_d |
| 202 ; CHECK: ret | 199 ; CHECK: ret |
| 203 ; CHECK: mfence | 200 ; CHECK: mfence |
| 204 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_d | 201 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_d |
| 205 ; CHECK: ret | 202 ; CHECK: ret |
| OLD | NEW |