OLD | NEW |
1 ; Test that loads/stores don't move across a nacl.atomic.fence.all. | 1 ; Test that loads/stores don't move across a nacl.atomic.fence.all. |
2 ; This should apply to both atomic and non-atomic loads/stores | 2 ; This should apply to both atomic and non-atomic loads/stores |
3 ; (unlike the non-"all" variety of nacl.atomic.fence, which only | 3 ; (unlike the non-"all" variety of nacl.atomic.fence, which only |
4 ; applies to atomic load/stores). | 4 ; applies to atomic load/stores). |
5 ; | 5 ; |
6 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s | 6 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s |
7 | 7 |
8 declare void @llvm.nacl.atomic.fence.all() | 8 declare void @llvm.nacl.atomic.fence.all() |
9 declare i32 @llvm.nacl.atomic.load.i32(i32*, i32) | 9 declare i32 @llvm.nacl.atomic.load.i32(i32*, i32) |
10 declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32) | 10 declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32) |
(...skipping 25 matching lines...) Expand all Loading... |
36 call void @llvm.nacl.atomic.fence.all() | 36 call void @llvm.nacl.atomic.fence.all() |
37 store i32 %l_c2, i32* %p_c, align 1 | 37 store i32 %l_c2, i32* %p_c, align 1 |
38 | 38 |
39 ret i32 %l_c2 | 39 ret i32 %l_c2 |
40 } | 40 } |
41 ; CHECK-LABEL: test_fused_load_sub_a | 41 ; CHECK-LABEL: test_fused_load_sub_a |
42 ; alloca store | 42 ; alloca store |
43 ; CHECK: mov DWORD PTR {{.*}},0x3e7 | 43 ; CHECK: mov DWORD PTR {{.*}},0x3e7 |
44 ; atomic store (w/ its own mfence) | 44 ; atomic store (w/ its own mfence) |
45 ; The load + sub are optimized into one everywhere. | 45 ; The load + sub are optimized into one everywhere. |
46 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a | 46 ; CHECK: sub {{.*}},DWORD PTR {{.*}}{{(g32_a)|(.bss)}} |
47 ; CHECK: mov DWORD PTR | 47 ; CHECK: mov {{(DWORD PTR)?}} |
48 ; CHECK: mfence | 48 ; CHECK: mfence |
49 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_b | 49 ; CHECK: sub {{.*}},DWORD PTR {{.*}}{{(g32_b)|(.bss)}} |
50 ; CHECK: mov DWORD PTR | 50 ; CHECK: mov {{(DWORD PTR)?}} |
51 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c | 51 ; CHECK: sub {{.*}},DWORD PTR {{.*}}{{(g32_c)|(.bss)}} |
52 ; CHECK: mfence | 52 ; CHECK: mfence |
53 ; CHECK: mov DWORD PTR | 53 ; CHECK: mov {{(DWORD PTR)?}} |
54 | 54 |
55 ; Test with the fence moved up a bit. | 55 ; Test with the fence moved up a bit. |
56 define internal i32 @test_fused_load_sub_b() { | 56 define internal i32 @test_fused_load_sub_b() { |
57 entry: | 57 entry: |
58 %p_alloca = alloca i8, i32 4, align 4 | 58 %p_alloca = alloca i8, i32 4, align 4 |
59 %p_alloca_bc = bitcast i8* %p_alloca to i32* | 59 %p_alloca_bc = bitcast i8* %p_alloca to i32* |
60 store i32 999, i32* %p_alloca_bc, align 1 | 60 store i32 999, i32* %p_alloca_bc, align 1 |
61 | 61 |
62 %p_a = bitcast [4 x i8]* @g32_a to i32* | 62 %p_a = bitcast [4 x i8]* @g32_a to i32* |
63 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6) | 63 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6) |
(...skipping 10 matching lines...) Expand all Loading... |
74 %l_c = load i32, i32* %p_c, align 1 | 74 %l_c = load i32, i32* %p_c, align 1 |
75 %l_c2 = sub i32 1, %l_c | 75 %l_c2 = sub i32 1, %l_c |
76 store i32 %l_c2, i32* %p_c, align 1 | 76 store i32 %l_c2, i32* %p_c, align 1 |
77 | 77 |
78 ret i32 %l_c2 | 78 ret i32 %l_c2 |
79 } | 79 } |
80 ; CHECK-LABEL: test_fused_load_sub_b | 80 ; CHECK-LABEL: test_fused_load_sub_b |
81 ; alloca store | 81 ; alloca store |
82 ; CHECK: mov DWORD PTR {{.*}},0x3e7 | 82 ; CHECK: mov DWORD PTR {{.*}},0x3e7 |
83 ; atomic store (w/ its own mfence) | 83 ; atomic store (w/ its own mfence) |
84 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a | 84 ; CHECK: sub {{.*}},DWORD PTR {{.*}}{{(g32_a)|(.bss)}} |
85 ; CHECK: mov DWORD PTR | 85 ; CHECK: mov {{(DWORD PTR)?}} |
86 ; CHECK: mfence | 86 ; CHECK: mfence |
87 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_b | 87 ; CHECK: sub {{.*}},DWORD PTR {{.*}}{{(g32_b)|(.bss)}} |
88 ; CHECK: mov DWORD PTR | 88 ; CHECK: mov {{(DWORD PTR)?}} |
89 ; CHECK: mfence | 89 ; CHECK: mfence |
90 ; Load + sub can still be optimized into one instruction | 90 ; Load + sub can still be optimized into one instruction |
91 ; because it is not separated by a fence. | 91 ; because it is not separated by a fence. |
92 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c | 92 ; CHECK: sub {{.*}},DWORD PTR {{.*}}{{(g32_c)|(.bss)}} |
93 ; CHECK: mov DWORD PTR | 93 ; CHECK: mov {{(DWORD PTR)?}} |
94 | 94 |
95 ; Test with the fence splitting a load/sub. | 95 ; Test with the fence splitting a load/sub. |
96 define internal i32 @test_fused_load_sub_c() { | 96 define internal i32 @test_fused_load_sub_c() { |
97 entry: | 97 entry: |
98 %p_alloca = alloca i8, i32 4, align 4 | 98 %p_alloca = alloca i8, i32 4, align 4 |
99 %p_alloca_bc = bitcast i8* %p_alloca to i32* | 99 %p_alloca_bc = bitcast i8* %p_alloca to i32* |
100 store i32 999, i32* %p_alloca_bc, align 1 | 100 store i32 999, i32* %p_alloca_bc, align 1 |
101 | 101 |
102 %p_a = bitcast [4 x i8]* @g32_a to i32* | 102 %p_a = bitcast [4 x i8]* @g32_a to i32* |
103 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6) | 103 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6) |
(...skipping 10 matching lines...) Expand all Loading... |
114 %l_c = load i32, i32* %p_c, align 1 | 114 %l_c = load i32, i32* %p_c, align 1 |
115 %l_c2 = sub i32 1, %l_c | 115 %l_c2 = sub i32 1, %l_c |
116 store i32 %l_c2, i32* %p_c, align 1 | 116 store i32 %l_c2, i32* %p_c, align 1 |
117 | 117 |
118 ret i32 %l_c2 | 118 ret i32 %l_c2 |
119 } | 119 } |
120 ; CHECK-LABEL: test_fused_load_sub_c | 120 ; CHECK-LABEL: test_fused_load_sub_c |
121 ; alloca store | 121 ; alloca store |
122 ; CHECK: mov DWORD PTR {{.*}},0x3e7 | 122 ; CHECK: mov DWORD PTR {{.*}},0x3e7 |
123 ; atomic store (w/ its own mfence) | 123 ; atomic store (w/ its own mfence) |
124 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a | 124 ; CHECK: sub {{.*}},DWORD PTR {{.*}}{{(g32_a)|(.bss)}} |
125 ; CHECK: mov DWORD PTR | 125 ; CHECK: mov {{(DWORD PTR)?}} |
126 ; CHECK: mfence | 126 ; CHECK: mfence |
127 ; This load + sub are no longer optimized into one, | 127 ; This load + sub are no longer optimized into one, |
128 ; though perhaps it should be legal as long as | 128 ; though perhaps it should be legal as long as |
129 ; the load stays on the same side of the fence. | 129 ; the load stays on the same side of the fence. |
130 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_b | 130 ; CHECK: mov {{.*}},{{(DWORD PTR )?}}{{.*}}{{(g32_b)|(.bss)}} |
131 ; CHECK: mfence | 131 ; CHECK: mfence |
132 ; CHECK: mov {{.*}},0x1 | 132 ; CHECK: mov {{.*}},0x1 |
133 ; CHECK: sub | 133 ; CHECK: sub |
134 ; CHECK: mov DWORD PTR | 134 ; CHECK: mov {{(DWORD PTR)?}} |
135 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c | 135 ; CHECK: sub {{.*}},DWORD PTR {{.*}}{{(g32_c)|(.bss)}} |
136 ; CHECK: mov DWORD PTR | 136 ; CHECK: mov {{(DWORD PTR)?}} |
137 | 137 |
138 | 138 |
139 ; Test where a bunch of i8 loads could have been fused into one | 139 ; Test where a bunch of i8 loads could have been fused into one |
140 ; i32 load, but a fence blocks that. | 140 ; i32 load, but a fence blocks that. |
141 define internal i32 @could_have_fused_loads() { | 141 define internal i32 @could_have_fused_loads() { |
142 entry: | 142 entry: |
143 %ptr1 = bitcast [4 x i8]* @g32_d to i8* | 143 %ptr1 = bitcast [4 x i8]* @g32_d to i8* |
144 %b1 = load i8, i8* %ptr1, align 1 | 144 %b1 = load i8, i8* %ptr1, align 1 |
145 | 145 |
146 %int_ptr2 = ptrtoint [4 x i8]* @g32_d to i32 | 146 %int_ptr2 = ptrtoint [4 x i8]* @g32_d to i32 |
(...skipping 17 matching lines...) Expand all Loading... |
164 %b12 = or i32 %b1.ext, %b2.shift | 164 %b12 = or i32 %b1.ext, %b2.shift |
165 %b3.ext = zext i8 %b3 to i32 | 165 %b3.ext = zext i8 %b3 to i32 |
166 %b3.shift = shl i32 %b3.ext, 16 | 166 %b3.shift = shl i32 %b3.ext, 16 |
167 %b123 = or i32 %b12, %b3.shift | 167 %b123 = or i32 %b12, %b3.shift |
168 %b4.ext = zext i8 %b4 to i32 | 168 %b4.ext = zext i8 %b4 to i32 |
169 %b4.shift = shl i32 %b4.ext, 24 | 169 %b4.shift = shl i32 %b4.ext, 24 |
170 %b1234 = or i32 %b123, %b4.shift | 170 %b1234 = or i32 %b123, %b4.shift |
171 ret i32 %b1234 | 171 ret i32 %b1234 |
172 } | 172 } |
173 ; CHECK-LABEL: could_have_fused_loads | 173 ; CHECK-LABEL: could_have_fused_loads |
174 ; CHECK: mov {{.*}},BYTE PTR | 174 ; CHECK: mov {{.*}},{{(BYTE PTR)?}} |
175 ; CHECK: mov {{.*}},BYTE PTR | 175 ; CHECK: mov {{.*}},BYTE PTR |
176 ; CHECK: mov {{.*}},BYTE PTR | 176 ; CHECK: mov {{.*}},BYTE PTR |
177 ; CHECK: mfence | 177 ; CHECK: mfence |
178 ; CHECK: mov {{.*}},BYTE PTR | 178 ; CHECK: mov {{.*}},BYTE PTR |
179 | 179 |
180 | 180 |
181 ; Test where an identical load from two branches could have been hoisted | 181 ; Test where an identical load from two branches could have been hoisted |
182 ; up, and then the code merged, but a fence prevents it. | 182 ; up, and then the code merged, but a fence prevents it. |
183 define internal i32 @could_have_hoisted_loads(i32 %x) { | 183 define internal i32 @could_have_hoisted_loads(i32 %x) { |
184 entry: | 184 entry: |
185 %ptr = bitcast [4 x i8]* @g32_d to i32* | 185 %ptr = bitcast [4 x i8]* @g32_d to i32* |
186 %cmp = icmp eq i32 %x, 1 | 186 %cmp = icmp eq i32 %x, 1 |
187 br i1 %cmp, label %branch1, label %branch2 | 187 br i1 %cmp, label %branch1, label %branch2 |
188 branch1: | 188 branch1: |
189 %y = load i32, i32* %ptr, align 1 | 189 %y = load i32, i32* %ptr, align 1 |
190 ret i32 %y | 190 ret i32 %y |
191 branch2: | 191 branch2: |
192 call void @llvm.nacl.atomic.fence.all() | 192 call void @llvm.nacl.atomic.fence.all() |
193 %z = load i32, i32* %ptr, align 1 | 193 %z = load i32, i32* %ptr, align 1 |
194 ret i32 %z | 194 ret i32 %z |
195 } | 195 } |
196 ; CHECK-LABEL: could_have_hoisted_loads | 196 ; CHECK-LABEL: could_have_hoisted_loads |
197 ; CHECK: jne {{.*}} | 197 ; CHECK: jne {{.*}} |
198 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_d | 198 ; CHECK: mov {{.*}},{{(DWORD PTR )?}}{{.*}}{{(g32_d)|(.bss)}} |
199 ; CHECK: ret | 199 ; CHECK: ret |
200 ; CHECK: mfence | 200 ; CHECK: mfence |
201 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_d | 201 ; CHECK: mov {{.*}},{{(DWORD PTR )?}}{{.*}}{{(g32_d)|(.bss)}} |
202 ; CHECK: ret | 202 ; CHECK: ret |
OLD | NEW |