| OLD | NEW |
| 1 ; Test that loads/stores don't move across a nacl.atomic.fence.all. | 1 ; Test that loads/stores don't move across a nacl.atomic.fence.all. |
| 2 ; This should apply to both atomic and non-atomic loads/stores | 2 ; This should apply to both atomic and non-atomic loads/stores |
| 3 ; (unlike the non-"all" variety of nacl.atomic.fence, which only | 3 ; (unlike the non-"all" variety of nacl.atomic.fence, which only |
| 4 ; applies to atomic load/stores). | 4 ; applies to atomic load/stores). |
| 5 ; | 5 ; |
| 6 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s | 6 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s |
| 7 | 7 |
| 8 declare void @llvm.nacl.atomic.fence.all() | 8 declare void @llvm.nacl.atomic.fence.all() |
| 9 declare i32 @llvm.nacl.atomic.load.i32(i32*, i32) | 9 declare i32 @llvm.nacl.atomic.load.i32(i32*, i32) |
| 10 declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32) | 10 declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32) |
| (...skipping 25 matching lines...) Expand all Loading... |
| 36 call void @llvm.nacl.atomic.fence.all() | 36 call void @llvm.nacl.atomic.fence.all() |
| 37 store i32 %l_c2, i32* %p_c, align 1 | 37 store i32 %l_c2, i32* %p_c, align 1 |
| 38 | 38 |
| 39 ret i32 %l_c2 | 39 ret i32 %l_c2 |
| 40 } | 40 } |
| 41 ; CHECK-LABEL: test_fused_load_sub_a | 41 ; CHECK-LABEL: test_fused_load_sub_a |
| 42 ; alloca store | 42 ; alloca store |
| 43 ; CHECK: mov DWORD PTR {{.*}},0x3e7 | 43 ; CHECK: mov DWORD PTR {{.*}},0x3e7 |
| 44 ; atomic store (w/ its own mfence) | 44 ; atomic store (w/ its own mfence) |
| 45 ; The load + sub are optimized into one everywhere. | 45 ; The load + sub are optimized into one everywhere. |
| 46 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a | 46 ; CHECK: sub {{.*}},DWORD PTR {{.*}}{{(g32_a)|(.bss)}} |
| 47 ; CHECK: mov DWORD PTR | 47 ; CHECK: mov {{(DWORD PTR)?}} |
| 48 ; CHECK: mfence | 48 ; CHECK: mfence |
| 49 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_b | 49 ; CHECK: sub {{.*}},DWORD PTR {{.*}}{{(g32_b)|(.bss)}} |
| 50 ; CHECK: mov DWORD PTR | 50 ; CHECK: mov {{(DWORD PTR)?}} |
| 51 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c | 51 ; CHECK: sub {{.*}},DWORD PTR {{.*}}{{(g32_c)|(.bss)}} |
| 52 ; CHECK: mfence | 52 ; CHECK: mfence |
| 53 ; CHECK: mov DWORD PTR | 53 ; CHECK: mov {{(DWORD PTR)?}} |
| 54 | 54 |
| 55 ; Test with the fence moved up a bit. | 55 ; Test with the fence moved up a bit. |
| 56 define internal i32 @test_fused_load_sub_b() { | 56 define internal i32 @test_fused_load_sub_b() { |
| 57 entry: | 57 entry: |
| 58 %p_alloca = alloca i8, i32 4, align 4 | 58 %p_alloca = alloca i8, i32 4, align 4 |
| 59 %p_alloca_bc = bitcast i8* %p_alloca to i32* | 59 %p_alloca_bc = bitcast i8* %p_alloca to i32* |
| 60 store i32 999, i32* %p_alloca_bc, align 1 | 60 store i32 999, i32* %p_alloca_bc, align 1 |
| 61 | 61 |
| 62 %p_a = bitcast [4 x i8]* @g32_a to i32* | 62 %p_a = bitcast [4 x i8]* @g32_a to i32* |
| 63 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6) | 63 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6) |
| (...skipping 10 matching lines...) Expand all Loading... |
| 74 %l_c = load i32, i32* %p_c, align 1 | 74 %l_c = load i32, i32* %p_c, align 1 |
| 75 %l_c2 = sub i32 1, %l_c | 75 %l_c2 = sub i32 1, %l_c |
| 76 store i32 %l_c2, i32* %p_c, align 1 | 76 store i32 %l_c2, i32* %p_c, align 1 |
| 77 | 77 |
| 78 ret i32 %l_c2 | 78 ret i32 %l_c2 |
| 79 } | 79 } |
| 80 ; CHECK-LABEL: test_fused_load_sub_b | 80 ; CHECK-LABEL: test_fused_load_sub_b |
| 81 ; alloca store | 81 ; alloca store |
| 82 ; CHECK: mov DWORD PTR {{.*}},0x3e7 | 82 ; CHECK: mov DWORD PTR {{.*}},0x3e7 |
| 83 ; atomic store (w/ its own mfence) | 83 ; atomic store (w/ its own mfence) |
| 84 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a | 84 ; CHECK: sub {{.*}},DWORD PTR {{.*}}{{(g32_a)|(.bss)}} |
| 85 ; CHECK: mov DWORD PTR | 85 ; CHECK: mov {{(DWORD PTR)?}} |
| 86 ; CHECK: mfence | 86 ; CHECK: mfence |
| 87 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_b | 87 ; CHECK: sub {{.*}},DWORD PTR {{.*}}{{(g32_b)|(.bss)}} |
| 88 ; CHECK: mov DWORD PTR | 88 ; CHECK: mov {{(DWORD PTR)?}} |
| 89 ; CHECK: mfence | 89 ; CHECK: mfence |
| 90 ; Load + sub can still be optimized into one instruction | 90 ; Load + sub can still be optimized into one instruction |
| 91 ; because it is not separated by a fence. | 91 ; because it is not separated by a fence. |
| 92 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c | 92 ; CHECK: sub {{.*}},DWORD PTR {{.*}}{{(g32_c)|(.bss)}} |
| 93 ; CHECK: mov DWORD PTR | 93 ; CHECK: mov {{(DWORD PTR)?}} |
| 94 | 94 |
| 95 ; Test with the fence splitting a load/sub. | 95 ; Test with the fence splitting a load/sub. |
| 96 define internal i32 @test_fused_load_sub_c() { | 96 define internal i32 @test_fused_load_sub_c() { |
| 97 entry: | 97 entry: |
| 98 %p_alloca = alloca i8, i32 4, align 4 | 98 %p_alloca = alloca i8, i32 4, align 4 |
| 99 %p_alloca_bc = bitcast i8* %p_alloca to i32* | 99 %p_alloca_bc = bitcast i8* %p_alloca to i32* |
| 100 store i32 999, i32* %p_alloca_bc, align 1 | 100 store i32 999, i32* %p_alloca_bc, align 1 |
| 101 | 101 |
| 102 %p_a = bitcast [4 x i8]* @g32_a to i32* | 102 %p_a = bitcast [4 x i8]* @g32_a to i32* |
| 103 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6) | 103 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6) |
| (...skipping 10 matching lines...) Expand all Loading... |
| 114 %l_c = load i32, i32* %p_c, align 1 | 114 %l_c = load i32, i32* %p_c, align 1 |
| 115 %l_c2 = sub i32 1, %l_c | 115 %l_c2 = sub i32 1, %l_c |
| 116 store i32 %l_c2, i32* %p_c, align 1 | 116 store i32 %l_c2, i32* %p_c, align 1 |
| 117 | 117 |
| 118 ret i32 %l_c2 | 118 ret i32 %l_c2 |
| 119 } | 119 } |
| 120 ; CHECK-LABEL: test_fused_load_sub_c | 120 ; CHECK-LABEL: test_fused_load_sub_c |
| 121 ; alloca store | 121 ; alloca store |
| 122 ; CHECK: mov DWORD PTR {{.*}},0x3e7 | 122 ; CHECK: mov DWORD PTR {{.*}},0x3e7 |
| 123 ; atomic store (w/ its own mfence) | 123 ; atomic store (w/ its own mfence) |
| 124 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a | 124 ; CHECK: sub {{.*}},DWORD PTR {{.*}}{{(g32_a)|(.bss)}} |
| 125 ; CHECK: mov DWORD PTR | 125 ; CHECK: mov {{(DWORD PTR)?}} |
| 126 ; CHECK: mfence | 126 ; CHECK: mfence |
| 127 ; This load + sub are no longer optimized into one, | 127 ; This load + sub are no longer optimized into one, |
| 128 ; though perhaps it should be legal as long as | 128 ; though perhaps it should be legal as long as |
| 129 ; the load stays on the same side of the fence. | 129 ; the load stays on the same side of the fence. |
| 130 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_b | 130 ; CHECK: mov {{.*}},{{(DWORD PTR )?}}{{.*}}{{(g32_b)|(.bss)}} |
| 131 ; CHECK: mfence | 131 ; CHECK: mfence |
| 132 ; CHECK: mov {{.*}},0x1 | 132 ; CHECK: mov {{.*}},0x1 |
| 133 ; CHECK: sub | 133 ; CHECK: sub |
| 134 ; CHECK: mov DWORD PTR | 134 ; CHECK: mov {{(DWORD PTR)?}} |
| 135 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c | 135 ; CHECK: sub {{.*}},DWORD PTR {{.*}}{{(g32_c)|(.bss)}} |
| 136 ; CHECK: mov DWORD PTR | 136 ; CHECK: mov {{(DWORD PTR)?}} |
| 137 | 137 |
| 138 | 138 |
| 139 ; Test where a bunch of i8 loads could have been fused into one | 139 ; Test where a bunch of i8 loads could have been fused into one |
| 140 ; i32 load, but a fence blocks that. | 140 ; i32 load, but a fence blocks that. |
| 141 define internal i32 @could_have_fused_loads() { | 141 define internal i32 @could_have_fused_loads() { |
| 142 entry: | 142 entry: |
| 143 %ptr1 = bitcast [4 x i8]* @g32_d to i8* | 143 %ptr1 = bitcast [4 x i8]* @g32_d to i8* |
| 144 %b1 = load i8, i8* %ptr1, align 1 | 144 %b1 = load i8, i8* %ptr1, align 1 |
| 145 | 145 |
| 146 %int_ptr2 = ptrtoint [4 x i8]* @g32_d to i32 | 146 %int_ptr2 = ptrtoint [4 x i8]* @g32_d to i32 |
| (...skipping 17 matching lines...) Expand all Loading... |
| 164 %b12 = or i32 %b1.ext, %b2.shift | 164 %b12 = or i32 %b1.ext, %b2.shift |
| 165 %b3.ext = zext i8 %b3 to i32 | 165 %b3.ext = zext i8 %b3 to i32 |
| 166 %b3.shift = shl i32 %b3.ext, 16 | 166 %b3.shift = shl i32 %b3.ext, 16 |
| 167 %b123 = or i32 %b12, %b3.shift | 167 %b123 = or i32 %b12, %b3.shift |
| 168 %b4.ext = zext i8 %b4 to i32 | 168 %b4.ext = zext i8 %b4 to i32 |
| 169 %b4.shift = shl i32 %b4.ext, 24 | 169 %b4.shift = shl i32 %b4.ext, 24 |
| 170 %b1234 = or i32 %b123, %b4.shift | 170 %b1234 = or i32 %b123, %b4.shift |
| 171 ret i32 %b1234 | 171 ret i32 %b1234 |
| 172 } | 172 } |
| 173 ; CHECK-LABEL: could_have_fused_loads | 173 ; CHECK-LABEL: could_have_fused_loads |
| 174 ; CHECK: mov {{.*}},BYTE PTR | 174 ; CHECK: mov {{.*}},{{(BYTE PTR)?}} |
| 175 ; CHECK: mov {{.*}},BYTE PTR | 175 ; CHECK: mov {{.*}},BYTE PTR |
| 176 ; CHECK: mov {{.*}},BYTE PTR | 176 ; CHECK: mov {{.*}},BYTE PTR |
| 177 ; CHECK: mfence | 177 ; CHECK: mfence |
| 178 ; CHECK: mov {{.*}},BYTE PTR | 178 ; CHECK: mov {{.*}},BYTE PTR |
| 179 | 179 |
| 180 | 180 |
| 181 ; Test where an identical load from two branches could have been hoisted | 181 ; Test where an identical load from two branches could have been hoisted |
| 182 ; up, and then the code merged, but a fence prevents it. | 182 ; up, and then the code merged, but a fence prevents it. |
| 183 define internal i32 @could_have_hoisted_loads(i32 %x) { | 183 define internal i32 @could_have_hoisted_loads(i32 %x) { |
| 184 entry: | 184 entry: |
| 185 %ptr = bitcast [4 x i8]* @g32_d to i32* | 185 %ptr = bitcast [4 x i8]* @g32_d to i32* |
| 186 %cmp = icmp eq i32 %x, 1 | 186 %cmp = icmp eq i32 %x, 1 |
| 187 br i1 %cmp, label %branch1, label %branch2 | 187 br i1 %cmp, label %branch1, label %branch2 |
| 188 branch1: | 188 branch1: |
| 189 %y = load i32, i32* %ptr, align 1 | 189 %y = load i32, i32* %ptr, align 1 |
| 190 ret i32 %y | 190 ret i32 %y |
| 191 branch2: | 191 branch2: |
| 192 call void @llvm.nacl.atomic.fence.all() | 192 call void @llvm.nacl.atomic.fence.all() |
| 193 %z = load i32, i32* %ptr, align 1 | 193 %z = load i32, i32* %ptr, align 1 |
| 194 ret i32 %z | 194 ret i32 %z |
| 195 } | 195 } |
| 196 ; CHECK-LABEL: could_have_hoisted_loads | 196 ; CHECK-LABEL: could_have_hoisted_loads |
| 197 ; CHECK: jne {{.*}} | 197 ; CHECK: jne {{.*}} |
| 198 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_d | 198 ; CHECK: mov {{.*}},{{(DWORD PTR )?}}{{.*}}{{(g32_d)|(.bss)}} |
| 199 ; CHECK: ret | 199 ; CHECK: ret |
| 200 ; CHECK: mfence | 200 ; CHECK: mfence |
| 201 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_d | 201 ; CHECK: mov {{.*}},{{(DWORD PTR )?}}{{.*}}{{(g32_d)|(.bss)}} |
| 202 ; CHECK: ret | 202 ; CHECK: ret |
| OLD | NEW |