OLD | NEW |
1 ; Test that loads/stores don't move across a nacl.atomic.fence.all. | 1 ; Test that loads/stores don't move across a nacl.atomic.fence.all. |
2 ; This should apply to both atomic and non-atomic loads/stores | 2 ; This should apply to both atomic and non-atomic loads/stores |
3 ; (unlike the non-"all" variety of nacl.atomic.fence, which only | 3 ; (unlike the non-"all" variety of nacl.atomic.fence, which only |
4 ; applies to atomic load/stores). | 4 ; applies to atomic load/stores). |
5 ; | 5 ; |
6 ; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s | 6 ; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s |
7 | 7 |
8 declare void @llvm.nacl.atomic.fence.all() | 8 declare void @llvm.nacl.atomic.fence.all() |
9 declare i32 @llvm.nacl.atomic.load.i32(i32*, i32) | 9 declare i32 @llvm.nacl.atomic.load.i32(i32*, i32) |
10 declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32) | 10 declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32) |
(...skipping 25 matching lines...) Expand all Loading... |
36 call void @llvm.nacl.atomic.fence.all() | 36 call void @llvm.nacl.atomic.fence.all() |
37 store i32 %l_c2, i32* %p_c, align 1 | 37 store i32 %l_c2, i32* %p_c, align 1 |
38 | 38 |
39 ret i32 %l_c2 | 39 ret i32 %l_c2 |
40 } | 40 } |
41 ; CHECK-LABEL: test_fused_load_add_a | 41 ; CHECK-LABEL: test_fused_load_add_a |
42 ; alloca store | 42 ; alloca store |
43 ; CHECK: mov {{.*}}, esp | 43 ; CHECK: mov {{.*}}, esp |
44 ; CHECK: mov dword ptr {{.*}}, 999 | 44 ; CHECK: mov dword ptr {{.*}}, 999 |
45 ; atomic store (w/ its own mfence) | 45 ; atomic store (w/ its own mfence) |
46 ; CHECK: mov {{.*}}, g32_a | 46 ; CHECK: lea {{.*}}, g32_a |
47 ; The load + add are optimized into one everywhere. | 47 ; The load + add are optimized into one everywhere. |
48 ; CHECK: add {{.*}}, dword ptr | 48 ; CHECK: add {{.*}}, dword ptr |
49 ; CHECK: mov dword ptr | 49 ; CHECK: mov dword ptr |
50 ; CHECK: mfence | 50 ; CHECK: mfence |
51 ; CHECK: mov {{.*}}, g32_b | 51 ; CHECK: lea {{.*}}, g32_b |
52 ; CHECK: add {{.*}}, dword ptr | 52 ; CHECK: add {{.*}}, dword ptr |
53 ; CHECK: mov dword ptr | 53 ; CHECK: mov dword ptr |
54 ; CHECK: mov {{.*}}, g32_c | 54 ; CHECK: lea {{.*}}, g32_c |
55 ; CHECK: add {{.*}}, dword ptr | 55 ; CHECK: add {{.*}}, dword ptr |
56 ; CHECK: mfence | 56 ; CHECK: mfence |
57 ; CHECK: mov dword ptr | 57 ; CHECK: mov dword ptr |
58 | 58 |
59 ; Test with the fence moved up a bit. | 59 ; Test with the fence moved up a bit. |
60 define i32 @test_fused_load_add_b() { | 60 define i32 @test_fused_load_add_b() { |
61 entry: | 61 entry: |
62 %p_alloca = alloca i8, i32 4, align 4 | 62 %p_alloca = alloca i8, i32 4, align 4 |
63 %p_alloca_bc = bitcast i8* %p_alloca to i32* | 63 %p_alloca_bc = bitcast i8* %p_alloca to i32* |
64 store i32 999, i32* %p_alloca_bc, align 1 | 64 store i32 999, i32* %p_alloca_bc, align 1 |
(...skipping 14 matching lines...) Expand all Loading... |
79 %l_c2 = add i32 %l_c, 1 | 79 %l_c2 = add i32 %l_c, 1 |
80 store i32 %l_c2, i32* %p_c, align 1 | 80 store i32 %l_c2, i32* %p_c, align 1 |
81 | 81 |
82 ret i32 %l_c2 | 82 ret i32 %l_c2 |
83 } | 83 } |
84 ; CHECK-LABEL: test_fused_load_add_b | 84 ; CHECK-LABEL: test_fused_load_add_b |
85 ; alloca store | 85 ; alloca store |
86 ; CHECK: mov {{.*}}, esp | 86 ; CHECK: mov {{.*}}, esp |
87 ; CHECK: mov dword ptr {{.*}}, 999 | 87 ; CHECK: mov dword ptr {{.*}}, 999 |
88 ; atomic store (w/ its own mfence) | 88 ; atomic store (w/ its own mfence) |
89 ; CHECK: mov {{.*}}, g32_a | 89 ; CHECK: lea {{.*}}, g32_a |
90 ; CHECK: add {{.*}}, dword ptr | 90 ; CHECK: add {{.*}}, dword ptr |
91 ; CHECK: mov dword ptr | 91 ; CHECK: mov dword ptr |
92 ; CHECK: mfence | 92 ; CHECK: mfence |
93 ; CHECK: mov {{.*}}, g32_b | 93 ; CHECK: lea {{.*}}, g32_b |
94 ; CHECK: add {{.*}}, dword ptr | 94 ; CHECK: add {{.*}}, dword ptr |
95 ; CHECK: mov dword ptr | 95 ; CHECK: mov dword ptr |
96 ; CHECK: mov {{.*}}, g32_c | 96 ; CHECK: lea {{.*}}, g32_c |
97 ; CHECK: mfence | 97 ; CHECK: mfence |
98 ; Load + add can still be optimized into one instruction | 98 ; Load + add can still be optimized into one instruction |
99 ; because it is not separated by a fence. | 99 ; because it is not separated by a fence. |
100 ; CHECK: add {{.*}}, dword ptr | 100 ; CHECK: add {{.*}}, dword ptr |
101 ; CHECK: mov dword ptr | 101 ; CHECK: mov dword ptr |
102 | 102 |
103 ; Test with the fence splitting a load/add. | 103 ; Test with the fence splitting a load/add. |
104 define i32 @test_fused_load_add_c() { | 104 define i32 @test_fused_load_add_c() { |
105 entry: | 105 entry: |
106 %p_alloca = alloca i8, i32 4, align 4 | 106 %p_alloca = alloca i8, i32 4, align 4 |
(...skipping 16 matching lines...) Expand all Loading... |
123 %l_c2 = add i32 %l_c, 1 | 123 %l_c2 = add i32 %l_c, 1 |
124 store i32 %l_c2, i32* %p_c, align 1 | 124 store i32 %l_c2, i32* %p_c, align 1 |
125 | 125 |
126 ret i32 %l_c2 | 126 ret i32 %l_c2 |
127 } | 127 } |
128 ; CHECK-LABEL: test_fused_load_add_c | 128 ; CHECK-LABEL: test_fused_load_add_c |
129 ; alloca store | 129 ; alloca store |
130 ; CHECK: mov {{.*}}, esp | 130 ; CHECK: mov {{.*}}, esp |
131 ; CHECK: mov dword ptr {{.*}}, 999 | 131 ; CHECK: mov dword ptr {{.*}}, 999 |
132 ; atomic store (w/ its own mfence) | 132 ; atomic store (w/ its own mfence) |
133 ; CHECK: mov {{.*}}, g32_a | 133 ; CHECK: lea {{.*}}, g32_a |
134 ; CHECK: add {{.*}}, dword ptr | 134 ; CHECK: add {{.*}}, dword ptr |
135 ; CHECK: mov dword ptr | 135 ; CHECK: mov dword ptr |
136 ; CHECK: mfence | 136 ; CHECK: mfence |
137 ; CHECK: mov {{.*}}, g32_b | 137 ; CHECK: lea {{.*}}, g32_b |
138 ; This load + add are no longer optimized into one, | 138 ; This load + add are no longer optimized into one, |
139 ; though perhaps it should be legal as long as | 139 ; though perhaps it should be legal as long as |
140 ; the load stays on the same side of the fence. | 140 ; the load stays on the same side of the fence. |
141 ; CHECK: mov {{.*}}, dword ptr | 141 ; CHECK: mov {{.*}}, dword ptr |
142 ; CHECK: mfence | 142 ; CHECK: mfence |
143 ; CHECK: add {{.*}}, 1 | 143 ; CHECK: add {{.*}}, 1 |
144 ; CHECK: mov dword ptr | 144 ; CHECK: mov dword ptr |
145 ; CHECK: mov {{.*}}, g32_c | 145 ; CHECK: lea {{.*}}, g32_c |
146 ; CHECK: add {{.*}}, dword ptr | 146 ; CHECK: add {{.*}}, dword ptr |
147 ; CHECK: mov dword ptr | 147 ; CHECK: mov dword ptr |
148 | 148 |
149 | 149 |
150 ; Test where a bunch of i8 loads could have been fused into one | 150 ; Test where a bunch of i8 loads could have been fused into one |
151 ; i32 load, but a fence blocks that. | 151 ; i32 load, but a fence blocks that. |
152 define i32 @could_have_fused_loads() { | 152 define i32 @could_have_fused_loads() { |
153 entry: | 153 entry: |
154 %ptr1 = bitcast [4 x i8]* @g32_d to i8* | 154 %ptr1 = bitcast [4 x i8]* @g32_d to i8* |
155 %b1 = load i8* %ptr1 | 155 %b1 = load i8* %ptr1 |
(...skipping 19 matching lines...) Expand all Loading... |
175 %b12 = or i32 %b1.ext, %b2.shift | 175 %b12 = or i32 %b1.ext, %b2.shift |
176 %b3.ext = zext i8 %b3 to i32 | 176 %b3.ext = zext i8 %b3 to i32 |
177 %b3.shift = shl i32 %b3.ext, 16 | 177 %b3.shift = shl i32 %b3.ext, 16 |
178 %b123 = or i32 %b12, %b3.shift | 178 %b123 = or i32 %b12, %b3.shift |
179 %b4.ext = zext i8 %b4 to i32 | 179 %b4.ext = zext i8 %b4 to i32 |
180 %b4.shift = shl i32 %b4.ext, 24 | 180 %b4.shift = shl i32 %b4.ext, 24 |
181 %b1234 = or i32 %b123, %b4.shift | 181 %b1234 = or i32 %b123, %b4.shift |
182 ret i32 %b1234 | 182 ret i32 %b1234 |
183 } | 183 } |
184 ; CHECK-LABEL: could_have_fused_loads | 184 ; CHECK-LABEL: could_have_fused_loads |
185 ; CHECK: mov {{.*}}, g32_d | 185 ; CHECK: lea {{.*}}, g32_d |
186 ; CHECK: mov {{.*}}, byte ptr | 186 ; CHECK: mov {{.*}}, byte ptr |
187 ; CHECK: mov {{.*}}, byte ptr | 187 ; CHECK: mov {{.*}}, byte ptr |
188 ; CHECK: mov {{.*}}, byte ptr | 188 ; CHECK: mov {{.*}}, byte ptr |
189 ; CHECK: mfence | 189 ; CHECK: mfence |
190 ; CHECK: mov {{.*}}, byte ptr | 190 ; CHECK: mov {{.*}}, byte ptr |
191 | 191 |
192 | 192 |
193 ; Test where an identical load from two branches could have been hoisted | 193 ; Test where an identical load from two branches could have been hoisted |
194 ; up, and then the code merged, but a fence prevents it. | 194 ; up, and then the code merged, but a fence prevents it. |
195 define i32 @could_have_hoisted_loads(i32 %x) { | 195 define i32 @could_have_hoisted_loads(i32 %x) { |
196 entry: | 196 entry: |
197 %ptr = bitcast [4 x i8]* @g32_d to i32* | 197 %ptr = bitcast [4 x i8]* @g32_d to i32* |
198 %cmp = icmp eq i32 %x, 1 | 198 %cmp = icmp eq i32 %x, 1 |
199 br i1 %cmp, label %branch1, label %branch2 | 199 br i1 %cmp, label %branch1, label %branch2 |
200 branch1: | 200 branch1: |
201 %y = load i32* %ptr | 201 %y = load i32* %ptr |
202 ret i32 %y | 202 ret i32 %y |
203 branch2: | 203 branch2: |
204 call void @llvm.nacl.atomic.fence.all() | 204 call void @llvm.nacl.atomic.fence.all() |
205 %z = load i32* %ptr | 205 %z = load i32* %ptr |
206 ret i32 %z | 206 ret i32 %z |
207 } | 207 } |
208 ; CHECK-LABEL: could_have_hoisted_loads | 208 ; CHECK-LABEL: could_have_hoisted_loads |
209 ; CHECK: mov {{.*}}, g32_d | 209 ; CHECK: lea {{.*}}, g32_d |
210 ; CHECK: je {{.*}} | 210 ; CHECK: je {{.*}} |
211 ; CHECK: jmp {{.*}} | 211 ; CHECK: jmp {{.*}} |
212 ; CHECK: mov {{.*}}, dword ptr | 212 ; CHECK: mov {{.*}}, dword ptr |
213 ; CHECK: ret | 213 ; CHECK: ret |
214 ; CHECK: mfence | 214 ; CHECK: mfence |
215 ; CHECK: mov {{.*}}, dword ptr | 215 ; CHECK: mov {{.*}}, dword ptr |
216 ; CHECK: ret | 216 ; CHECK: ret |
OLD | NEW |