OLD | NEW |
1 ; Test that loads/stores don't move across a nacl.atomic.fence.all. | 1 ; Test that loads/stores don't move across a nacl.atomic.fence.all. |
2 ; This should apply to both atomic and non-atomic loads/stores | 2 ; This should apply to both atomic and non-atomic loads/stores |
3 ; (unlike the non-"all" variety of nacl.atomic.fence, which only | 3 ; (unlike the non-"all" variety of nacl.atomic.fence, which only |
4 ; applies to atomic load/stores). | 4 ; applies to atomic load/stores). |
5 ; | 5 ; |
6 ; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s | |
7 ; RUN: %llvm2ice -O2 --verbose none %s \ | 6 ; RUN: %llvm2ice -O2 --verbose none %s \ |
8 ; RUN: | llvm-mc -triple=i686-none-nacl -x86-asm-syntax=intel -filetype=obj | 7 ; RUN: | llvm-mc -triple=i686-none-nacl -x86-asm-syntax=intel -filetype=obj \ |
| 8 ; RUN: | llvm-objdump -d -symbolize -x86-asm-syntax=intel - | FileCheck %s |
| 9 |
| 10 ; TODO(jvoung): llvm-objdump doesn't symbolize global symbols well, so we |
| 11 ; have [0] == g32_a, [4] == g32_b, [8] == g32_c. |
| 12 ; g32_d is also [0] because it's in the .data section instead of .bss. |
9 | 13 |
10 declare void @llvm.nacl.atomic.fence.all() | 14 declare void @llvm.nacl.atomic.fence.all() |
11 declare i32 @llvm.nacl.atomic.load.i32(i32*, i32) | 15 declare i32 @llvm.nacl.atomic.load.i32(i32*, i32) |
12 declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32) | 16 declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32) |
13 | 17 |
14 @g32_a = internal global [4 x i8] zeroinitializer, align 4 | 18 @g32_a = internal global [4 x i8] zeroinitializer, align 4 |
15 @g32_b = internal global [4 x i8] zeroinitializer, align 4 | 19 @g32_b = internal global [4 x i8] zeroinitializer, align 4 |
16 @g32_c = internal global [4 x i8] zeroinitializer, align 4 | 20 @g32_c = internal global [4 x i8] zeroinitializer, align 4 |
17 @g32_d = internal global [4 x i8] c"\02\00\00\00", align 4 | 21 @g32_d = internal global [4 x i8] c"\02\00\00\00", align 4 |
18 | 22 |
(...skipping 19 matching lines...) Loading... |
38 call void @llvm.nacl.atomic.fence.all() | 42 call void @llvm.nacl.atomic.fence.all() |
39 store i32 %l_c2, i32* %p_c, align 1 | 43 store i32 %l_c2, i32* %p_c, align 1 |
40 | 44 |
41 ret i32 %l_c2 | 45 ret i32 %l_c2 |
42 } | 46 } |
43 ; CHECK-LABEL: test_fused_load_add_a | 47 ; CHECK-LABEL: test_fused_load_add_a |
44 ; alloca store | 48 ; alloca store |
45 ; CHECK: mov {{.*}}, esp | 49 ; CHECK: mov {{.*}}, esp |
46 ; CHECK: mov dword ptr {{.*}}, 999 | 50 ; CHECK: mov dword ptr {{.*}}, 999 |
47 ; atomic store (w/ its own mfence) | 51 ; atomic store (w/ its own mfence) |
48 ; CHECK: lea {{.*}}, g32_a | 52 ; CHECK: dword ptr [0] |
49 ; The load + add are optimized into one everywhere. | 53 ; The load + add are optimized into one everywhere. |
50 ; CHECK: add {{.*}}, dword ptr | 54 ; CHECK: add {{.*}}, dword ptr |
51 ; CHECK: mov dword ptr | 55 ; CHECK: mov dword ptr |
52 ; CHECK: mfence | 56 ; CHECK: mfence |
53 ; CHECK: lea {{.*}}, g32_b | 57 ; CHECK: dword ptr [4] |
54 ; CHECK: add {{.*}}, dword ptr | 58 ; CHECK: add {{.*}}, dword ptr |
55 ; CHECK: mov dword ptr | 59 ; CHECK: mov dword ptr |
56 ; CHECK: lea {{.*}}, g32_c | 60 ; CHECK: dword ptr [8] |
57 ; CHECK: add {{.*}}, dword ptr | 61 ; CHECK: add {{.*}}, dword ptr |
58 ; CHECK: mfence | 62 ; CHECK: mfence |
59 ; CHECK: mov dword ptr | 63 ; CHECK: mov dword ptr |
60 | 64 |
61 ; Test with the fence moved up a bit. | 65 ; Test with the fence moved up a bit. |
62 define i32 @test_fused_load_add_b() { | 66 define i32 @test_fused_load_add_b() { |
63 entry: | 67 entry: |
64 %p_alloca = alloca i8, i32 4, align 4 | 68 %p_alloca = alloca i8, i32 4, align 4 |
65 %p_alloca_bc = bitcast i8* %p_alloca to i32* | 69 %p_alloca_bc = bitcast i8* %p_alloca to i32* |
66 store i32 999, i32* %p_alloca_bc, align 1 | 70 store i32 999, i32* %p_alloca_bc, align 1 |
(...skipping 14 matching lines...) Loading... |
81 %l_c2 = add i32 %l_c, 1 | 85 %l_c2 = add i32 %l_c, 1 |
82 store i32 %l_c2, i32* %p_c, align 1 | 86 store i32 %l_c2, i32* %p_c, align 1 |
83 | 87 |
84 ret i32 %l_c2 | 88 ret i32 %l_c2 |
85 } | 89 } |
86 ; CHECK-LABEL: test_fused_load_add_b | 90 ; CHECK-LABEL: test_fused_load_add_b |
87 ; alloca store | 91 ; alloca store |
88 ; CHECK: mov {{.*}}, esp | 92 ; CHECK: mov {{.*}}, esp |
89 ; CHECK: mov dword ptr {{.*}}, 999 | 93 ; CHECK: mov dword ptr {{.*}}, 999 |
90 ; atomic store (w/ its own mfence) | 94 ; atomic store (w/ its own mfence) |
91 ; CHECK: lea {{.*}}, g32_a | 95 ; CHECK: dword ptr [0] |
92 ; CHECK: add {{.*}}, dword ptr | 96 ; CHECK: add {{.*}}, dword ptr |
93 ; CHECK: mov dword ptr | 97 ; CHECK: mov dword ptr |
94 ; CHECK: mfence | 98 ; CHECK: mfence |
95 ; CHECK: lea {{.*}}, g32_b | 99 ; CHECK: dword ptr [4] |
96 ; CHECK: add {{.*}}, dword ptr | 100 ; CHECK: add {{.*}}, dword ptr |
97 ; CHECK: mov dword ptr | 101 ; CHECK: mov dword ptr |
98 ; CHECK: lea {{.*}}, g32_c | 102 ; CHECK: dword ptr [8] |
99 ; CHECK: mfence | 103 ; CHECK: mfence |
100 ; Load + add can still be optimized into one instruction | 104 ; Load + add can still be optimized into one instruction |
101 ; because it is not separated by a fence. | 105 ; because it is not separated by a fence. |
102 ; CHECK: add {{.*}}, dword ptr | 106 ; CHECK: add {{.*}}, dword ptr |
103 ; CHECK: mov dword ptr | 107 ; CHECK: mov dword ptr |
104 | 108 |
105 ; Test with the fence splitting a load/add. | 109 ; Test with the fence splitting a load/add. |
106 define i32 @test_fused_load_add_c() { | 110 define i32 @test_fused_load_add_c() { |
107 entry: | 111 entry: |
108 %p_alloca = alloca i8, i32 4, align 4 | 112 %p_alloca = alloca i8, i32 4, align 4 |
(...skipping 16 matching lines...) Loading... |
125 %l_c2 = add i32 %l_c, 1 | 129 %l_c2 = add i32 %l_c, 1 |
126 store i32 %l_c2, i32* %p_c, align 1 | 130 store i32 %l_c2, i32* %p_c, align 1 |
127 | 131 |
128 ret i32 %l_c2 | 132 ret i32 %l_c2 |
129 } | 133 } |
130 ; CHECK-LABEL: test_fused_load_add_c | 134 ; CHECK-LABEL: test_fused_load_add_c |
131 ; alloca store | 135 ; alloca store |
132 ; CHECK: mov {{.*}}, esp | 136 ; CHECK: mov {{.*}}, esp |
133 ; CHECK: mov dword ptr {{.*}}, 999 | 137 ; CHECK: mov dword ptr {{.*}}, 999 |
134 ; atomic store (w/ its own mfence) | 138 ; atomic store (w/ its own mfence) |
135 ; CHECK: lea {{.*}}, g32_a | 139 ; CHECK: dword ptr [0] |
136 ; CHECK: add {{.*}}, dword ptr | 140 ; CHECK: add {{.*}}, dword ptr |
137 ; CHECK: mov dword ptr | 141 ; CHECK: mov dword ptr |
138 ; CHECK: mfence | 142 ; CHECK: mfence |
139 ; CHECK: lea {{.*}}, g32_b | 143 ; CHECK: dword ptr [4] |
140 ; This load + add are no longer optimized into one, | 144 ; This load + add are no longer optimized into one, |
141 ; though perhaps it should be legal as long as | 145 ; though perhaps it should be legal as long as |
142 ; the load stays on the same side of the fence. | 146 ; the load stays on the same side of the fence. |
143 ; CHECK: mov {{.*}}, dword ptr | 147 ; CHECK: mov {{.*}}, dword ptr |
144 ; CHECK: mfence | 148 ; CHECK: mfence |
145 ; CHECK: add {{.*}}, 1 | 149 ; CHECK: add {{.*}}, 1 |
146 ; CHECK: mov dword ptr | 150 ; CHECK: mov dword ptr |
147 ; CHECK: lea {{.*}}, g32_c | 151 ; CHECK: dword ptr [8] |
148 ; CHECK: add {{.*}}, dword ptr | 152 ; CHECK: add {{.*}}, dword ptr |
149 ; CHECK: mov dword ptr | 153 ; CHECK: mov dword ptr |
150 | 154 |
151 | 155 |
152 ; Test where a bunch of i8 loads could have been fused into one | 156 ; Test where a bunch of i8 loads could have been fused into one |
153 ; i32 load, but a fence blocks that. | 157 ; i32 load, but a fence blocks that. |
154 define i32 @could_have_fused_loads() { | 158 define i32 @could_have_fused_loads() { |
155 entry: | 159 entry: |
156 %ptr1 = bitcast [4 x i8]* @g32_d to i8* | 160 %ptr1 = bitcast [4 x i8]* @g32_d to i8* |
157 %b1 = load i8* %ptr1 | 161 %b1 = load i8* %ptr1 |
(...skipping 19 matching lines...) Loading... |
177 %b12 = or i32 %b1.ext, %b2.shift | 181 %b12 = or i32 %b1.ext, %b2.shift |
178 %b3.ext = zext i8 %b3 to i32 | 182 %b3.ext = zext i8 %b3 to i32 |
179 %b3.shift = shl i32 %b3.ext, 16 | 183 %b3.shift = shl i32 %b3.ext, 16 |
180 %b123 = or i32 %b12, %b3.shift | 184 %b123 = or i32 %b12, %b3.shift |
181 %b4.ext = zext i8 %b4 to i32 | 185 %b4.ext = zext i8 %b4 to i32 |
182 %b4.shift = shl i32 %b4.ext, 24 | 186 %b4.shift = shl i32 %b4.ext, 24 |
183 %b1234 = or i32 %b123, %b4.shift | 187 %b1234 = or i32 %b123, %b4.shift |
184 ret i32 %b1234 | 188 ret i32 %b1234 |
185 } | 189 } |
186 ; CHECK-LABEL: could_have_fused_loads | 190 ; CHECK-LABEL: could_have_fused_loads |
187 ; CHECK: lea {{.*}}, g32_d | 191 ; CHECK: dword ptr [0] |
188 ; CHECK: mov {{.*}}, byte ptr | 192 ; CHECK: mov {{.*}}, byte ptr |
189 ; CHECK: mov {{.*}}, byte ptr | 193 ; CHECK: mov {{.*}}, byte ptr |
190 ; CHECK: mov {{.*}}, byte ptr | 194 ; CHECK: mov {{.*}}, byte ptr |
191 ; CHECK: mfence | 195 ; CHECK: mfence |
192 ; CHECK: mov {{.*}}, byte ptr | 196 ; CHECK: mov {{.*}}, byte ptr |
193 | 197 |
194 | 198 |
195 ; Test where an identical load from two branches could have been hoisted | 199 ; Test where an identical load from two branches could have been hoisted |
196 ; up, and then the code merged, but a fence prevents it. | 200 ; up, and then the code merged, but a fence prevents it. |
197 define i32 @could_have_hoisted_loads(i32 %x) { | 201 define i32 @could_have_hoisted_loads(i32 %x) { |
198 entry: | 202 entry: |
199 %ptr = bitcast [4 x i8]* @g32_d to i32* | 203 %ptr = bitcast [4 x i8]* @g32_d to i32* |
200 %cmp = icmp eq i32 %x, 1 | 204 %cmp = icmp eq i32 %x, 1 |
201 br i1 %cmp, label %branch1, label %branch2 | 205 br i1 %cmp, label %branch1, label %branch2 |
202 branch1: | 206 branch1: |
203 %y = load i32* %ptr | 207 %y = load i32* %ptr |
204 ret i32 %y | 208 ret i32 %y |
205 branch2: | 209 branch2: |
206 call void @llvm.nacl.atomic.fence.all() | 210 call void @llvm.nacl.atomic.fence.all() |
207 %z = load i32* %ptr | 211 %z = load i32* %ptr |
208 ret i32 %z | 212 ret i32 %z |
209 } | 213 } |
210 ; CHECK-LABEL: could_have_hoisted_loads | 214 ; CHECK-LABEL: could_have_hoisted_loads |
211 ; CHECK: lea {{.*}}, g32_d | 215 ; CHECK: dword ptr [0] |
212 ; CHECK: je {{.*}} | 216 ; CHECK: je {{.*}} |
213 ; CHECK: jmp {{.*}} | 217 ; CHECK: jmp {{.*}} |
214 ; CHECK: mov {{.*}}, dword ptr | 218 ; CHECK: mov {{.*}}, dword ptr |
215 ; CHECK: ret | 219 ; CHECK: ret |
216 ; CHECK: mfence | 220 ; CHECK: mfence |
217 ; CHECK: mov {{.*}}, dword ptr | 221 ; CHECK: mov {{.*}}, dword ptr |
218 ; CHECK: ret | 222 ; CHECK: ret |
OLD | NEW |