| OLD | NEW |
| 1 ; Test that loads/stores don't move across a nacl.atomic.fence.all. | 1 ; Test that loads/stores don't move across a nacl.atomic.fence.all. |
| 2 ; This should apply to both atomic and non-atomic loads/stores | 2 ; This should apply to both atomic and non-atomic loads/stores |
| 3 ; (unlike the non-"all" variety of nacl.atomic.fence, which only | 3 ; (unlike the non-"all" variety of nacl.atomic.fence, which only |
| 4 ; applies to atomic load/stores). | 4 ; applies to atomic load/stores). |
| 5 ; | 5 ; |
| 6 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s | 6 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s |
| 7 | 7 |
| 8 declare void @llvm.nacl.atomic.fence.all() | 8 declare void @llvm.nacl.atomic.fence.all() |
| 9 declare i32 @llvm.nacl.atomic.load.i32(i32*, i32) | 9 declare i32 @llvm.nacl.atomic.load.i32(i32*, i32) |
| 10 declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32) | 10 declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32) |
| 11 | 11 |
| 12 @g32_a = internal global [4 x i8] zeroinitializer, align 4 | 12 @g32_a = internal global [4 x i8] zeroinitializer, align 4 |
| 13 @g32_b = internal global [4 x i8] zeroinitializer, align 4 | 13 @g32_b = internal global [4 x i8] zeroinitializer, align 4 |
| 14 @g32_c = internal global [4 x i8] zeroinitializer, align 4 | 14 @g32_c = internal global [4 x i8] zeroinitializer, align 4 |
| 15 @g32_d = internal global [4 x i8] zeroinitializer, align 4 | 15 @g32_d = internal global [4 x i8] zeroinitializer, align 4 |
| 16 | 16 |
| 17 define i32 @test_fused_load_sub_a() { | 17 define internal i32 @test_fused_load_sub_a() { |
| 18 entry: | 18 entry: |
| 19 %p_alloca = alloca i8, i32 4, align 4 | 19 %p_alloca = alloca i8, i32 4, align 4 |
| 20 %p_alloca_bc = bitcast i8* %p_alloca to i32* | 20 %p_alloca_bc = bitcast i8* %p_alloca to i32* |
| 21 store i32 999, i32* %p_alloca_bc, align 1 | 21 store i32 999, i32* %p_alloca_bc, align 1 |
| 22 | 22 |
| 23 %p_a = bitcast [4 x i8]* @g32_a to i32* | 23 %p_a = bitcast [4 x i8]* @g32_a to i32* |
| 24 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6) | 24 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6) |
| 25 %l_a2 = sub i32 1, %l_a | 25 %l_a2 = sub i32 1, %l_a |
| 26 call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6) | 26 call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6) |
| 27 | 27 |
| (...skipping 19 matching lines...) Expand all Loading... |
| 47 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a | 47 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a |
| 48 ; CHECK: mov DWORD PTR | 48 ; CHECK: mov DWORD PTR |
| 49 ; CHECK: mfence | 49 ; CHECK: mfence |
| 50 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_b | 50 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_b |
| 51 ; CHECK: mov DWORD PTR | 51 ; CHECK: mov DWORD PTR |
| 52 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c | 52 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c |
| 53 ; CHECK: mfence | 53 ; CHECK: mfence |
| 54 ; CHECK: mov DWORD PTR | 54 ; CHECK: mov DWORD PTR |
| 55 | 55 |
| 56 ; Test with the fence moved up a bit. | 56 ; Test with the fence moved up a bit. |
| 57 define i32 @test_fused_load_sub_b() { | 57 define internal i32 @test_fused_load_sub_b() { |
| 58 entry: | 58 entry: |
| 59 %p_alloca = alloca i8, i32 4, align 4 | 59 %p_alloca = alloca i8, i32 4, align 4 |
| 60 %p_alloca_bc = bitcast i8* %p_alloca to i32* | 60 %p_alloca_bc = bitcast i8* %p_alloca to i32* |
| 61 store i32 999, i32* %p_alloca_bc, align 1 | 61 store i32 999, i32* %p_alloca_bc, align 1 |
| 62 | 62 |
| 63 %p_a = bitcast [4 x i8]* @g32_a to i32* | 63 %p_a = bitcast [4 x i8]* @g32_a to i32* |
| 64 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6) | 64 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6) |
| 65 %l_a2 = sub i32 1, %l_a | 65 %l_a2 = sub i32 1, %l_a |
| 66 call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6) | 66 call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6) |
| 67 | 67 |
| (...skipping 20 matching lines...) Expand all Loading... |
| 88 ; CHECK: mfence | 88 ; CHECK: mfence |
| 89 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_b | 89 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_b |
| 90 ; CHECK: mov DWORD PTR | 90 ; CHECK: mov DWORD PTR |
| 91 ; CHECK: mfence | 91 ; CHECK: mfence |
| 92 ; Load + sub can still be optimized into one instruction | 92 ; Load + sub can still be optimized into one instruction |
| 93 ; because it is not separated by a fence. | 93 ; because it is not separated by a fence. |
| 94 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c | 94 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c |
| 95 ; CHECK: mov DWORD PTR | 95 ; CHECK: mov DWORD PTR |
| 96 | 96 |
| 97 ; Test with the fence splitting a load/sub. | 97 ; Test with the fence splitting a load/sub. |
| 98 define i32 @test_fused_load_sub_c() { | 98 define internal i32 @test_fused_load_sub_c() { |
| 99 entry: | 99 entry: |
| 100 %p_alloca = alloca i8, i32 4, align 4 | 100 %p_alloca = alloca i8, i32 4, align 4 |
| 101 %p_alloca_bc = bitcast i8* %p_alloca to i32* | 101 %p_alloca_bc = bitcast i8* %p_alloca to i32* |
| 102 store i32 999, i32* %p_alloca_bc, align 1 | 102 store i32 999, i32* %p_alloca_bc, align 1 |
| 103 | 103 |
| 104 %p_a = bitcast [4 x i8]* @g32_a to i32* | 104 %p_a = bitcast [4 x i8]* @g32_a to i32* |
| 105 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6) | 105 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6) |
| 106 %l_a2 = sub i32 1, %l_a | 106 %l_a2 = sub i32 1, %l_a |
| 107 call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6) | 107 call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6) |
| 108 | 108 |
| (...skipping 25 matching lines...) Expand all Loading... |
| 134 ; CHECK: mfence | 134 ; CHECK: mfence |
| 135 ; CHECK: mov {{.*}},0x1 | 135 ; CHECK: mov {{.*}},0x1 |
| 136 ; CHECK: sub | 136 ; CHECK: sub |
| 137 ; CHECK: mov DWORD PTR | 137 ; CHECK: mov DWORD PTR |
| 138 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c | 138 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c |
| 139 ; CHECK: mov DWORD PTR | 139 ; CHECK: mov DWORD PTR |
| 140 | 140 |
| 141 | 141 |
| 142 ; Test where a bunch of i8 loads could have been fused into one | 142 ; Test where a bunch of i8 loads could have been fused into one |
| 143 ; i32 load, but a fence blocks that. | 143 ; i32 load, but a fence blocks that. |
| 144 define i32 @could_have_fused_loads() { | 144 define internal i32 @could_have_fused_loads() { |
| 145 entry: | 145 entry: |
| 146 %ptr1 = bitcast [4 x i8]* @g32_d to i8* | 146 %ptr1 = bitcast [4 x i8]* @g32_d to i8* |
| 147 %b1 = load i8, i8* %ptr1, align 1 | 147 %b1 = load i8, i8* %ptr1, align 1 |
| 148 | 148 |
| 149 %int_ptr2 = ptrtoint [4 x i8]* @g32_d to i32 | 149 %int_ptr2 = ptrtoint [4 x i8]* @g32_d to i32 |
| 150 %int_ptr_bump2 = add i32 %int_ptr2, 1 | 150 %int_ptr_bump2 = add i32 %int_ptr2, 1 |
| 151 %ptr2 = inttoptr i32 %int_ptr_bump2 to i8* | 151 %ptr2 = inttoptr i32 %int_ptr_bump2 to i8* |
| 152 %b2 = load i8, i8* %ptr2, align 1 | 152 %b2 = load i8, i8* %ptr2, align 1 |
| 153 | 153 |
| 154 %int_ptr_bump3 = add i32 %int_ptr2, 2 | 154 %int_ptr_bump3 = add i32 %int_ptr2, 2 |
| (...skipping 21 matching lines...) Expand all Loading... |
| 176 ; CHECK-LABEL: could_have_fused_loads | 176 ; CHECK-LABEL: could_have_fused_loads |
| 177 ; CHECK: mov {{.*}},BYTE PTR | 177 ; CHECK: mov {{.*}},BYTE PTR |
| 178 ; CHECK: mov {{.*}},BYTE PTR | 178 ; CHECK: mov {{.*}},BYTE PTR |
| 179 ; CHECK: mov {{.*}},BYTE PTR | 179 ; CHECK: mov {{.*}},BYTE PTR |
| 180 ; CHECK: mfence | 180 ; CHECK: mfence |
| 181 ; CHECK: mov {{.*}},BYTE PTR | 181 ; CHECK: mov {{.*}},BYTE PTR |
| 182 | 182 |
| 183 | 183 |
| 184 ; Test where an identical load from two branches could have been hoisted | 184 ; Test where an identical load from two branches could have been hoisted |
| 185 ; up, and then the code merged, but a fence prevents it. | 185 ; up, and then the code merged, but a fence prevents it. |
| 186 define i32 @could_have_hoisted_loads(i32 %x) { | 186 define internal i32 @could_have_hoisted_loads(i32 %x) { |
| 187 entry: | 187 entry: |
| 188 %ptr = bitcast [4 x i8]* @g32_d to i32* | 188 %ptr = bitcast [4 x i8]* @g32_d to i32* |
| 189 %cmp = icmp eq i32 %x, 1 | 189 %cmp = icmp eq i32 %x, 1 |
| 190 br i1 %cmp, label %branch1, label %branch2 | 190 br i1 %cmp, label %branch1, label %branch2 |
| 191 branch1: | 191 branch1: |
| 192 %y = load i32, i32* %ptr, align 1 | 192 %y = load i32, i32* %ptr, align 1 |
| 193 ret i32 %y | 193 ret i32 %y |
| 194 branch2: | 194 branch2: |
| 195 call void @llvm.nacl.atomic.fence.all() | 195 call void @llvm.nacl.atomic.fence.all() |
| 196 %z = load i32, i32* %ptr, align 1 | 196 %z = load i32, i32* %ptr, align 1 |
| 197 ret i32 %z | 197 ret i32 %z |
| 198 } | 198 } |
| 199 ; CHECK-LABEL: could_have_hoisted_loads | 199 ; CHECK-LABEL: could_have_hoisted_loads |
| 200 ; CHECK: jne {{.*}} | 200 ; CHECK: jne {{.*}} |
| 201 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_d | 201 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_d |
| 202 ; CHECK: ret | 202 ; CHECK: ret |
| 203 ; CHECK: mfence | 203 ; CHECK: mfence |
| 204 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_d | 204 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_d |
| 205 ; CHECK: ret | 205 ; CHECK: ret |
| OLD | NEW |