OLD | NEW |
1 ; Test that loads/stores don't move across a nacl.atomic.fence.all. | 1 ; Test that loads/stores don't move across a nacl.atomic.fence.all. |
2 ; This should apply to both atomic and non-atomic loads/stores | 2 ; This should apply to both atomic and non-atomic loads/stores |
3 ; (unlike the non-"all" variety of nacl.atomic.fence, which only | 3 ; (unlike the non-"all" variety of nacl.atomic.fence, which only |
4 ; applies to atomic load/stores). | 4 ; applies to atomic load/stores). |
5 ; | 5 ; |
6 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s | 6 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s |
7 | 7 |
8 declare void @llvm.nacl.atomic.fence.all() | 8 declare void @llvm.nacl.atomic.fence.all() |
9 declare i32 @llvm.nacl.atomic.load.i32(i32*, i32) | 9 declare i32 @llvm.nacl.atomic.load.i32(i32*, i32) |
10 declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32) | 10 declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32) |
11 | 11 |
12 @g32_a = internal global [4 x i8] zeroinitializer, align 4 | 12 @g32_a = internal global [4 x i8] zeroinitializer, align 4 |
13 @g32_b = internal global [4 x i8] zeroinitializer, align 4 | 13 @g32_b = internal global [4 x i8] zeroinitializer, align 4 |
14 @g32_c = internal global [4 x i8] zeroinitializer, align 4 | 14 @g32_c = internal global [4 x i8] zeroinitializer, align 4 |
15 @g32_d = internal global [4 x i8] zeroinitializer, align 4 | 15 @g32_d = internal global [4 x i8] zeroinitializer, align 4 |
16 | 16 |
17 define i32 @test_fused_load_sub_a() { | 17 define internal i32 @test_fused_load_sub_a() { |
18 entry: | 18 entry: |
19 %p_alloca = alloca i8, i32 4, align 4 | 19 %p_alloca = alloca i8, i32 4, align 4 |
20 %p_alloca_bc = bitcast i8* %p_alloca to i32* | 20 %p_alloca_bc = bitcast i8* %p_alloca to i32* |
21 store i32 999, i32* %p_alloca_bc, align 1 | 21 store i32 999, i32* %p_alloca_bc, align 1 |
22 | 22 |
23 %p_a = bitcast [4 x i8]* @g32_a to i32* | 23 %p_a = bitcast [4 x i8]* @g32_a to i32* |
24 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6) | 24 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6) |
25 %l_a2 = sub i32 1, %l_a | 25 %l_a2 = sub i32 1, %l_a |
26 call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6) | 26 call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6) |
27 | 27 |
(...skipping 19 matching lines...) Expand all Loading... |
47 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a | 47 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a |
48 ; CHECK: mov DWORD PTR | 48 ; CHECK: mov DWORD PTR |
49 ; CHECK: mfence | 49 ; CHECK: mfence |
50 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_b | 50 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_b |
51 ; CHECK: mov DWORD PTR | 51 ; CHECK: mov DWORD PTR |
52 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c | 52 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c |
53 ; CHECK: mfence | 53 ; CHECK: mfence |
54 ; CHECK: mov DWORD PTR | 54 ; CHECK: mov DWORD PTR |
55 | 55 |
56 ; Test with the fence moved up a bit. | 56 ; Test with the fence moved up a bit. |
57 define i32 @test_fused_load_sub_b() { | 57 define internal i32 @test_fused_load_sub_b() { |
58 entry: | 58 entry: |
59 %p_alloca = alloca i8, i32 4, align 4 | 59 %p_alloca = alloca i8, i32 4, align 4 |
60 %p_alloca_bc = bitcast i8* %p_alloca to i32* | 60 %p_alloca_bc = bitcast i8* %p_alloca to i32* |
61 store i32 999, i32* %p_alloca_bc, align 1 | 61 store i32 999, i32* %p_alloca_bc, align 1 |
62 | 62 |
63 %p_a = bitcast [4 x i8]* @g32_a to i32* | 63 %p_a = bitcast [4 x i8]* @g32_a to i32* |
64 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6) | 64 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6) |
65 %l_a2 = sub i32 1, %l_a | 65 %l_a2 = sub i32 1, %l_a |
66 call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6) | 66 call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6) |
67 | 67 |
(...skipping 20 matching lines...) Expand all Loading... |
88 ; CHECK: mfence | 88 ; CHECK: mfence |
89 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_b | 89 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_b |
90 ; CHECK: mov DWORD PTR | 90 ; CHECK: mov DWORD PTR |
91 ; CHECK: mfence | 91 ; CHECK: mfence |
92 ; Load + sub can still be optimized into one instruction | 92 ; Load + sub can still be optimized into one instruction |
93 ; because it is not separated by a fence. | 93 ; because it is not separated by a fence. |
94 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c | 94 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c |
95 ; CHECK: mov DWORD PTR | 95 ; CHECK: mov DWORD PTR |
96 | 96 |
97 ; Test with the fence splitting a load/sub. | 97 ; Test with the fence splitting a load/sub. |
98 define i32 @test_fused_load_sub_c() { | 98 define internal i32 @test_fused_load_sub_c() { |
99 entry: | 99 entry: |
100 %p_alloca = alloca i8, i32 4, align 4 | 100 %p_alloca = alloca i8, i32 4, align 4 |
101 %p_alloca_bc = bitcast i8* %p_alloca to i32* | 101 %p_alloca_bc = bitcast i8* %p_alloca to i32* |
102 store i32 999, i32* %p_alloca_bc, align 1 | 102 store i32 999, i32* %p_alloca_bc, align 1 |
103 | 103 |
104 %p_a = bitcast [4 x i8]* @g32_a to i32* | 104 %p_a = bitcast [4 x i8]* @g32_a to i32* |
105 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6) | 105 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6) |
106 %l_a2 = sub i32 1, %l_a | 106 %l_a2 = sub i32 1, %l_a |
107 call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6) | 107 call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6) |
108 | 108 |
(...skipping 25 matching lines...) Expand all Loading... |
134 ; CHECK: mfence | 134 ; CHECK: mfence |
135 ; CHECK: mov {{.*}},0x1 | 135 ; CHECK: mov {{.*}},0x1 |
136 ; CHECK: sub | 136 ; CHECK: sub |
137 ; CHECK: mov DWORD PTR | 137 ; CHECK: mov DWORD PTR |
138 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c | 138 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c |
139 ; CHECK: mov DWORD PTR | 139 ; CHECK: mov DWORD PTR |
140 | 140 |
141 | 141 |
142 ; Test where a bunch of i8 loads could have been fused into one | 142 ; Test where a bunch of i8 loads could have been fused into one |
143 ; i32 load, but a fence blocks that. | 143 ; i32 load, but a fence blocks that. |
144 define i32 @could_have_fused_loads() { | 144 define internal i32 @could_have_fused_loads() { |
145 entry: | 145 entry: |
146 %ptr1 = bitcast [4 x i8]* @g32_d to i8* | 146 %ptr1 = bitcast [4 x i8]* @g32_d to i8* |
147 %b1 = load i8, i8* %ptr1, align 1 | 147 %b1 = load i8, i8* %ptr1, align 1 |
148 | 148 |
149 %int_ptr2 = ptrtoint [4 x i8]* @g32_d to i32 | 149 %int_ptr2 = ptrtoint [4 x i8]* @g32_d to i32 |
150 %int_ptr_bump2 = add i32 %int_ptr2, 1 | 150 %int_ptr_bump2 = add i32 %int_ptr2, 1 |
151 %ptr2 = inttoptr i32 %int_ptr_bump2 to i8* | 151 %ptr2 = inttoptr i32 %int_ptr_bump2 to i8* |
152 %b2 = load i8, i8* %ptr2, align 1 | 152 %b2 = load i8, i8* %ptr2, align 1 |
153 | 153 |
154 %int_ptr_bump3 = add i32 %int_ptr2, 2 | 154 %int_ptr_bump3 = add i32 %int_ptr2, 2 |
(...skipping 21 matching lines...) Expand all Loading... |
176 ; CHECK-LABEL: could_have_fused_loads | 176 ; CHECK-LABEL: could_have_fused_loads |
177 ; CHECK: mov {{.*}},BYTE PTR | 177 ; CHECK: mov {{.*}},BYTE PTR |
178 ; CHECK: mov {{.*}},BYTE PTR | 178 ; CHECK: mov {{.*}},BYTE PTR |
179 ; CHECK: mov {{.*}},BYTE PTR | 179 ; CHECK: mov {{.*}},BYTE PTR |
180 ; CHECK: mfence | 180 ; CHECK: mfence |
181 ; CHECK: mov {{.*}},BYTE PTR | 181 ; CHECK: mov {{.*}},BYTE PTR |
182 | 182 |
183 | 183 |
184 ; Test where an identical load from two branches could have been hoisted | 184 ; Test where an identical load from two branches could have been hoisted |
185 ; up, and then the code merged, but a fence prevents it. | 185 ; up, and then the code merged, but a fence prevents it. |
186 define i32 @could_have_hoisted_loads(i32 %x) { | 186 define internal i32 @could_have_hoisted_loads(i32 %x) { |
187 entry: | 187 entry: |
188 %ptr = bitcast [4 x i8]* @g32_d to i32* | 188 %ptr = bitcast [4 x i8]* @g32_d to i32* |
189 %cmp = icmp eq i32 %x, 1 | 189 %cmp = icmp eq i32 %x, 1 |
190 br i1 %cmp, label %branch1, label %branch2 | 190 br i1 %cmp, label %branch1, label %branch2 |
191 branch1: | 191 branch1: |
192 %y = load i32, i32* %ptr, align 1 | 192 %y = load i32, i32* %ptr, align 1 |
193 ret i32 %y | 193 ret i32 %y |
194 branch2: | 194 branch2: |
195 call void @llvm.nacl.atomic.fence.all() | 195 call void @llvm.nacl.atomic.fence.all() |
196 %z = load i32, i32* %ptr, align 1 | 196 %z = load i32, i32* %ptr, align 1 |
197 ret i32 %z | 197 ret i32 %z |
198 } | 198 } |
199 ; CHECK-LABEL: could_have_hoisted_loads | 199 ; CHECK-LABEL: could_have_hoisted_loads |
200 ; CHECK: jne {{.*}} | 200 ; CHECK: jne {{.*}} |
201 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_d | 201 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_d |
202 ; CHECK: ret | 202 ; CHECK: ret |
203 ; CHECK: mfence | 203 ; CHECK: mfence |
204 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_d | 204 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_d |
205 ; CHECK: ret | 205 ; CHECK: ret |
OLD | NEW |