| OLD | NEW |
| 1 ; Test that loads/stores don't move across a nacl.atomic.fence.all. | 1 ; Test that loads/stores don't move across a nacl.atomic.fence.all. |
| 2 ; This should apply to both atomic and non-atomic loads/stores | 2 ; This should apply to both atomic and non-atomic loads/stores |
| 3 ; (unlike the non-"all" variety of nacl.atomic.fence, which only | 3 ; (unlike the non-"all" variety of nacl.atomic.fence, which only |
| 4 ; applies to atomic load/stores). | 4 ; applies to atomic load/stores). |
| 5 ; | 5 ; |
| 6 ; RUN: %p2i -i %s --assemble --disassemble --args -O2 --verbose none \ | 6 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s |
| 7 ; RUN: | FileCheck %s | |
| 8 | 7 |
| 9 declare void @llvm.nacl.atomic.fence.all() | 8 declare void @llvm.nacl.atomic.fence.all() |
| 10 declare i32 @llvm.nacl.atomic.load.i32(i32*, i32) | 9 declare i32 @llvm.nacl.atomic.load.i32(i32*, i32) |
| 11 declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32) | 10 declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32) |
| 12 | 11 |
| 13 @g32_a = internal global [4 x i8] zeroinitializer, align 4 | 12 @g32_a = internal global [4 x i8] zeroinitializer, align 4 |
| 14 @g32_b = internal global [4 x i8] zeroinitializer, align 4 | 13 @g32_b = internal global [4 x i8] zeroinitializer, align 4 |
| 15 @g32_c = internal global [4 x i8] zeroinitializer, align 4 | 14 @g32_c = internal global [4 x i8] zeroinitializer, align 4 |
| 16 @g32_d = internal global [4 x i8] zeroinitializer, align 4 | 15 @g32_d = internal global [4 x i8] zeroinitializer, align 4 |
| 17 | 16 |
| (...skipping 20 matching lines...) Expand all Loading... |
| 38 store i32 %l_c2, i32* %p_c, align 1 | 37 store i32 %l_c2, i32* %p_c, align 1 |
| 39 | 38 |
| 40 ret i32 %l_c2 | 39 ret i32 %l_c2 |
| 41 } | 40 } |
| 42 ; CHECK-LABEL: test_fused_load_add_a | 41 ; CHECK-LABEL: test_fused_load_add_a |
| 43 ; alloca store | 42 ; alloca store |
| 44 ; CHECK: mov {{.*}},esp | 43 ; CHECK: mov {{.*}},esp |
| 45 ; CHECK: mov DWORD PTR {{.*}},0x3e7 | 44 ; CHECK: mov DWORD PTR {{.*}},0x3e7 |
| 46 ; atomic store (w/ its own mfence) | 45 ; atomic store (w/ its own mfence) |
| 47 ; The load + add are optimized into one everywhere. | 46 ; The load + add are optimized into one everywhere. |
| 48 ; CHECK: add {{.*}},DWORD PTR {{.*}}.bss | 47 ; CHECK: add {{.*}},DWORD PTR {{.*}}g32_a |
| 49 ; CHECK: mov DWORD PTR | 48 ; CHECK: mov DWORD PTR |
| 50 ; CHECK: mfence | 49 ; CHECK: mfence |
| 51 ; CHECK: add {{.*}},DWORD PTR {{.*}}.bss | 50 ; CHECK: add {{.*}},DWORD PTR {{.*}}g32_b |
| 52 ; CHECK: mov DWORD PTR | 51 ; CHECK: mov DWORD PTR |
| 53 ; CHECK: add {{.*}},DWORD PTR {{.*}}.bss | 52 ; CHECK: add {{.*}},DWORD PTR {{.*}}g32_c |
| 54 ; CHECK: mfence | 53 ; CHECK: mfence |
| 55 ; CHECK: mov DWORD PTR | 54 ; CHECK: mov DWORD PTR |
| 56 | 55 |
| 57 ; Test with the fence moved up a bit. | 56 ; Test with the fence moved up a bit. |
| 58 define i32 @test_fused_load_add_b() { | 57 define i32 @test_fused_load_add_b() { |
| 59 entry: | 58 entry: |
| 60 %p_alloca = alloca i8, i32 4, align 4 | 59 %p_alloca = alloca i8, i32 4, align 4 |
| 61 %p_alloca_bc = bitcast i8* %p_alloca to i32* | 60 %p_alloca_bc = bitcast i8* %p_alloca to i32* |
| 62 store i32 999, i32* %p_alloca_bc, align 1 | 61 store i32 999, i32* %p_alloca_bc, align 1 |
| 63 | 62 |
| (...skipping 13 matching lines...) Expand all Loading... |
| 77 %l_c2 = add i32 %l_c, 1 | 76 %l_c2 = add i32 %l_c, 1 |
| 78 store i32 %l_c2, i32* %p_c, align 1 | 77 store i32 %l_c2, i32* %p_c, align 1 |
| 79 | 78 |
| 80 ret i32 %l_c2 | 79 ret i32 %l_c2 |
| 81 } | 80 } |
| 82 ; CHECK-LABEL: test_fused_load_add_b | 81 ; CHECK-LABEL: test_fused_load_add_b |
| 83 ; alloca store | 82 ; alloca store |
| 84 ; CHECK: mov {{.*}},esp | 83 ; CHECK: mov {{.*}},esp |
| 85 ; CHECK: mov DWORD PTR {{.*}},0x3e7 | 84 ; CHECK: mov DWORD PTR {{.*}},0x3e7 |
| 86 ; atomic store (w/ its own mfence) | 85 ; atomic store (w/ its own mfence) |
| 87 ; CHECK: add {{.*}},DWORD PTR {{.*}}.bss | 86 ; CHECK: add {{.*}},DWORD PTR {{.*}}g32_a |
| 88 ; CHECK: mov DWORD PTR | 87 ; CHECK: mov DWORD PTR |
| 89 ; CHECK: mfence | 88 ; CHECK: mfence |
| 90 ; CHECK: add {{.*}},DWORD PTR {{.*}}.bss | 89 ; CHECK: add {{.*}},DWORD PTR {{.*}}g32_b |
| 91 ; CHECK: mov DWORD PTR | 90 ; CHECK: mov DWORD PTR |
| 92 ; CHECK: mfence | 91 ; CHECK: mfence |
| 93 ; Load + add can still be optimized into one instruction | 92 ; Load + add can still be optimized into one instruction |
| 94 ; because it is not separated by a fence. | 93 ; because it is not separated by a fence. |
| 95 ; CHECK: add {{.*}},DWORD PTR {{.*}}.bss | 94 ; CHECK: add {{.*}},DWORD PTR {{.*}}g32_c |
| 96 ; CHECK: mov DWORD PTR | 95 ; CHECK: mov DWORD PTR |
| 97 | 96 |
| 98 ; Test with the fence splitting a load/add. | 97 ; Test with the fence splitting a load/add. |
| 99 define i32 @test_fused_load_add_c() { | 98 define i32 @test_fused_load_add_c() { |
| 100 entry: | 99 entry: |
| 101 %p_alloca = alloca i8, i32 4, align 4 | 100 %p_alloca = alloca i8, i32 4, align 4 |
| 102 %p_alloca_bc = bitcast i8* %p_alloca to i32* | 101 %p_alloca_bc = bitcast i8* %p_alloca to i32* |
| 103 store i32 999, i32* %p_alloca_bc, align 1 | 102 store i32 999, i32* %p_alloca_bc, align 1 |
| 104 | 103 |
| 105 %p_a = bitcast [4 x i8]* @g32_a to i32* | 104 %p_a = bitcast [4 x i8]* @g32_a to i32* |
| (...skipping 12 matching lines...) Expand all Loading... |
| 118 %l_c2 = add i32 %l_c, 1 | 117 %l_c2 = add i32 %l_c, 1 |
| 119 store i32 %l_c2, i32* %p_c, align 1 | 118 store i32 %l_c2, i32* %p_c, align 1 |
| 120 | 119 |
| 121 ret i32 %l_c2 | 120 ret i32 %l_c2 |
| 122 } | 121 } |
| 123 ; CHECK-LABEL: test_fused_load_add_c | 122 ; CHECK-LABEL: test_fused_load_add_c |
| 124 ; alloca store | 123 ; alloca store |
| 125 ; CHECK: mov {{.*}},esp | 124 ; CHECK: mov {{.*}},esp |
| 126 ; CHECK: mov DWORD PTR {{.*}},0x3e7 | 125 ; CHECK: mov DWORD PTR {{.*}},0x3e7 |
| 127 ; atomic store (w/ its own mfence) | 126 ; atomic store (w/ its own mfence) |
| 128 ; CHECK: add {{.*}},DWORD PTR {{.*}}.bss | 127 ; CHECK: add {{.*}},DWORD PTR {{.*}}g32_a |
| 129 ; CHECK: mov DWORD PTR | 128 ; CHECK: mov DWORD PTR |
| 130 ; CHECK: mfence | 129 ; CHECK: mfence |
| 131 ; This load + add are no longer optimized into one, | 130 ; This load + add are no longer optimized into one, |
| 132 ; though perhaps it should be legal as long as | 131 ; though perhaps it should be legal as long as |
| 133 ; the load stays on the same side of the fence. | 132 ; the load stays on the same side of the fence. |
| 134 ; CHECK: mov {{.*}},DWORD PTR {{.*}}.bss | 133 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_b |
| 135 ; CHECK: mfence | 134 ; CHECK: mfence |
| 136 ; CHECK: add {{.*}},0x1 | 135 ; CHECK: add {{.*}},0x1 |
| 137 ; CHECK: mov DWORD PTR | 136 ; CHECK: mov DWORD PTR |
| 138 ; CHECK: add {{.*}},DWORD PTR {{.*}}.bss | 137 ; CHECK: add {{.*}},DWORD PTR {{.*}}g32_c |
| 139 ; CHECK: mov DWORD PTR | 138 ; CHECK: mov DWORD PTR |
| 140 | 139 |
| 141 | 140 |
| 142 ; Test where a bunch of i8 loads could have been fused into one | 141 ; Test where a bunch of i8 loads could have been fused into one |
| 143 ; i32 load, but a fence blocks that. | 142 ; i32 load, but a fence blocks that. |
| 144 define i32 @could_have_fused_loads() { | 143 define i32 @could_have_fused_loads() { |
| 145 entry: | 144 entry: |
| 146 %ptr1 = bitcast [4 x i8]* @g32_d to i8* | 145 %ptr1 = bitcast [4 x i8]* @g32_d to i8* |
| 147 %b1 = load i8* %ptr1, align 1 | 146 %b1 = load i8* %ptr1, align 1 |
| 148 | 147 |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 191 branch1: | 190 branch1: |
| 192 %y = load i32* %ptr, align 1 | 191 %y = load i32* %ptr, align 1 |
| 193 ret i32 %y | 192 ret i32 %y |
| 194 branch2: | 193 branch2: |
| 195 call void @llvm.nacl.atomic.fence.all() | 194 call void @llvm.nacl.atomic.fence.all() |
| 196 %z = load i32* %ptr, align 1 | 195 %z = load i32* %ptr, align 1 |
| 197 ret i32 %z | 196 ret i32 %z |
| 198 } | 197 } |
| 199 ; CHECK-LABEL: could_have_hoisted_loads | 198 ; CHECK-LABEL: could_have_hoisted_loads |
| 200 ; CHECK: jne {{.*}} | 199 ; CHECK: jne {{.*}} |
| 201 ; CHECK: mov {{.*}},DWORD PTR {{.*}}.bss | 200 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_d |
| 202 ; CHECK: ret | 201 ; CHECK: ret |
| 203 ; CHECK: mfence | 202 ; CHECK: mfence |
| 204 ; CHECK: mov {{.*}},DWORD PTR {{.*}}.bss | 203 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_d |
| 205 ; CHECK: ret | 204 ; CHECK: ret |
| OLD | NEW |