Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(118)

Side by Side Diff: tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll

Issue 671443003: Fix nacl-atomic-fence-all test to have alignment specified in load. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 ; Test that loads/stores don't move across a nacl.atomic.fence.all. 1 ; Test that loads/stores don't move across a nacl.atomic.fence.all.
2 ; This should apply to both atomic and non-atomic loads/stores 2 ; This should apply to both atomic and non-atomic loads/stores
3 ; (unlike the non-"all" variety of nacl.atomic.fence, which only 3 ; (unlike the non-"all" variety of nacl.atomic.fence, which only
4 ; applies to atomic load/stores). 4 ; applies to atomic load/stores).
5 ; 5 ;
6 ; TODO(kschimpf) Find out why lc2i is needed. 6 ; RUN: %p2i -i %s --args -O2 --verbose none \
7 ; RUN: %lc2i -i %s --args -O2 --verbose none \
8 ; RUN: | llvm-mc -triple=i686-none-nacl -x86-asm-syntax=intel -filetype=obj \ 7 ; RUN: | llvm-mc -triple=i686-none-nacl -x86-asm-syntax=intel -filetype=obj \
9 ; RUN: | llvm-objdump -d -r -symbolize -x86-asm-syntax=intel - | FileCheck %s 8 ; RUN: | llvm-objdump -d -r -symbolize -x86-asm-syntax=intel - | FileCheck %s
10 9
11 ; TODO(jvoung): llvm-objdump doesn't symbolize global symbols well, so we 10 ; TODO(jvoung): llvm-objdump doesn't symbolize global symbols well, so we
12 ; have 0 == g32_a, 4 == g32_b, 8 == g32_c. 11 ; have 0 == g32_a, 4 == g32_b, 8 == g32_c, 12 == g32_d
13 ; g32_d is also 0 because it's in the .data section instead of .bss.
14 12
15 declare void @llvm.nacl.atomic.fence.all() 13 declare void @llvm.nacl.atomic.fence.all()
16 declare i32 @llvm.nacl.atomic.load.i32(i32*, i32) 14 declare i32 @llvm.nacl.atomic.load.i32(i32*, i32)
17 declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32) 15 declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32)
18 16
19 @g32_a = internal global [4 x i8] zeroinitializer, align 4 17 @g32_a = internal global [4 x i8] zeroinitializer, align 4
20 @g32_b = internal global [4 x i8] zeroinitializer, align 4 18 @g32_b = internal global [4 x i8] zeroinitializer, align 4
21 @g32_c = internal global [4 x i8] zeroinitializer, align 4 19 @g32_c = internal global [4 x i8] zeroinitializer, align 4
22 @g32_d = internal global [4 x i8] c"\02\00\00\00", align 4 20 @g32_d = internal global [4 x i8] zeroinitializer, align 4
23 21
24 define i32 @test_fused_load_add_a() { 22 define i32 @test_fused_load_add_a() {
25 entry: 23 entry:
26 %p_alloca = alloca i8, i32 4, align 4 24 %p_alloca = alloca i8, i32 4, align 4
27 %p_alloca_bc = bitcast i8* %p_alloca to i32* 25 %p_alloca_bc = bitcast i8* %p_alloca to i32*
28 store i32 999, i32* %p_alloca_bc, align 1 26 store i32 999, i32* %p_alloca_bc, align 1
29 27
30 %p_a = bitcast [4 x i8]* @g32_a to i32* 28 %p_a = bitcast [4 x i8]* @g32_a to i32*
31 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6) 29 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6)
32 %l_a2 = add i32 %l_a, 1 30 %l_a2 = add i32 %l_a, 1
33 call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6) 31 call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6)
34 32
35 %p_b = bitcast [4 x i8]* @g32_b to i32* 33 %p_b = bitcast [4 x i8]* @g32_b to i32*
36 %l_b = load i32* %p_b 34 %l_b = load i32* %p_b, align 1
37 %l_b2 = add i32 %l_b, 1 35 %l_b2 = add i32 %l_b, 1
38 store i32 %l_b2, i32* %p_b, align 1 36 store i32 %l_b2, i32* %p_b, align 1
39 37
40 %p_c = bitcast [4 x i8]* @g32_c to i32* 38 %p_c = bitcast [4 x i8]* @g32_c to i32*
41 %l_c = load i32* %p_c 39 %l_c = load i32* %p_c, align 1
42 %l_c2 = add i32 %l_c, 1 40 %l_c2 = add i32 %l_c, 1
43 call void @llvm.nacl.atomic.fence.all() 41 call void @llvm.nacl.atomic.fence.all()
44 store i32 %l_c2, i32* %p_c, align 1 42 store i32 %l_c2, i32* %p_c, align 1
45 43
46 ret i32 %l_c2 44 ret i32 %l_c2
47 } 45 }
48 ; CHECK-LABEL: test_fused_load_add_a 46 ; CHECK-LABEL: test_fused_load_add_a
49 ; alloca store 47 ; alloca store
50 ; CHECK: mov {{.*}}, esp 48 ; CHECK: mov {{.*}}, esp
51 ; CHECK: mov dword ptr {{.*}}, 999 49 ; CHECK: mov dword ptr {{.*}}, 999
52 ; atomic store (w/ its own mfence) 50 ; atomic store (w/ its own mfence)
53 ; CHECK: mov {{.*}}, 0 51 ; The load + add are optimized into one everywhere.
52 ; CHECK: add {{.*}}, dword ptr [0]
54 ; CHECK-NEXT: R_386_32 53 ; CHECK-NEXT: R_386_32
55 ; The load + add are optimized into one everywhere.
56 ; CHECK: add {{.*}}, dword ptr
57 ; CHECK: mov dword ptr 54 ; CHECK: mov dword ptr
58 ; CHECK: mfence 55 ; CHECK: mfence
59 ; CHECK: mov {{.*}}, 4 56 ; CHECK: add {{.*}}, dword ptr [4]
60 ; CHECK-NEXT: R_386_32 57 ; CHECK-NEXT: R_386_32
61 ; CHECK: add {{.*}}, dword ptr
62 ; CHECK: mov dword ptr 58 ; CHECK: mov dword ptr
63 ; CHECK: mov {{.*}}, 8 59 ; CHECK: add {{.*}}, dword ptr [8]
64 ; CHECK-NEXT: R_386_32 60 ; CHECK-NEXT: R_386_32
65 ; CHECK: add {{.*}}, dword ptr
66 ; CHECK: mfence 61 ; CHECK: mfence
67 ; CHECK: mov dword ptr 62 ; CHECK: mov dword ptr
68 63
69 ; Test with the fence moved up a bit. 64 ; Test with the fence moved up a bit.
70 define i32 @test_fused_load_add_b() { 65 define i32 @test_fused_load_add_b() {
71 entry: 66 entry:
72 %p_alloca = alloca i8, i32 4, align 4 67 %p_alloca = alloca i8, i32 4, align 4
73 %p_alloca_bc = bitcast i8* %p_alloca to i32* 68 %p_alloca_bc = bitcast i8* %p_alloca to i32*
74 store i32 999, i32* %p_alloca_bc, align 1 69 store i32 999, i32* %p_alloca_bc, align 1
75 70
76 %p_a = bitcast [4 x i8]* @g32_a to i32* 71 %p_a = bitcast [4 x i8]* @g32_a to i32*
77 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6) 72 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6)
78 %l_a2 = add i32 %l_a, 1 73 %l_a2 = add i32 %l_a, 1
79 call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6) 74 call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6)
80 75
81 %p_b = bitcast [4 x i8]* @g32_b to i32* 76 %p_b = bitcast [4 x i8]* @g32_b to i32*
82 %l_b = load i32* %p_b 77 %l_b = load i32* %p_b, align 1
83 %l_b2 = add i32 %l_b, 1 78 %l_b2 = add i32 %l_b, 1
84 store i32 %l_b2, i32* %p_b, align 1 79 store i32 %l_b2, i32* %p_b, align 1
85 80
86 %p_c = bitcast [4 x i8]* @g32_c to i32* 81 %p_c = bitcast [4 x i8]* @g32_c to i32*
87 call void @llvm.nacl.atomic.fence.all() 82 call void @llvm.nacl.atomic.fence.all()
88 %l_c = load i32* %p_c 83 %l_c = load i32* %p_c, align 1
89 %l_c2 = add i32 %l_c, 1 84 %l_c2 = add i32 %l_c, 1
90 store i32 %l_c2, i32* %p_c, align 1 85 store i32 %l_c2, i32* %p_c, align 1
91 86
92 ret i32 %l_c2 87 ret i32 %l_c2
93 } 88 }
94 ; CHECK-LABEL: test_fused_load_add_b 89 ; CHECK-LABEL: test_fused_load_add_b
95 ; alloca store 90 ; alloca store
96 ; CHECK: mov {{.*}}, esp 91 ; CHECK: mov {{.*}}, esp
97 ; CHECK: mov dword ptr {{.*}}, 999 92 ; CHECK: mov dword ptr {{.*}}, 999
98 ; atomic store (w/ its own mfence) 93 ; atomic store (w/ its own mfence)
99 ; CHECK: mov {{.*}}, 0 94 ; CHECK: add {{.*}}, dword ptr [0]
100 ; CHECK-NEXT: R_386_32 95 ; CHECK-NEXT: R_386_32
101 ; CHECK: add {{.*}}, dword ptr
102 ; CHECK: mov dword ptr 96 ; CHECK: mov dword ptr
103 ; CHECK: mfence 97 ; CHECK: mfence
104 ; CHECK: mov {{.*}}, 4 98 ; CHECK: add {{.*}}, dword ptr [4]
105 ; CHECK-NEXT: R_386_32 99 ; CHECK-NEXT: R_386_32
106 ; CHECK: add {{.*}}, dword ptr
107 ; CHECK: mov dword ptr 100 ; CHECK: mov dword ptr
108 ; CHECK: mov {{.*}}, 8
109 ; CHECK-NEXT: R_386_32
110 ; CHECK: mfence 101 ; CHECK: mfence
111 ; Load + add can still be optimized into one instruction 102 ; Load + add can still be optimized into one instruction
112 ; because it is not separated by a fence. 103 ; because it is not separated by a fence.
113 ; CHECK: add {{.*}}, dword ptr 104 ; CHECK: add {{.*}}, dword ptr [8]
105 ; CHECK-NEXT: R_386_32
114 ; CHECK: mov dword ptr 106 ; CHECK: mov dword ptr
115 107
116 ; Test with the fence splitting a load/add. 108 ; Test with the fence splitting a load/add.
117 define i32 @test_fused_load_add_c() { 109 define i32 @test_fused_load_add_c() {
118 entry: 110 entry:
119 %p_alloca = alloca i8, i32 4, align 4 111 %p_alloca = alloca i8, i32 4, align 4
120 %p_alloca_bc = bitcast i8* %p_alloca to i32* 112 %p_alloca_bc = bitcast i8* %p_alloca to i32*
121 store i32 999, i32* %p_alloca_bc, align 1 113 store i32 999, i32* %p_alloca_bc, align 1
122 114
123 %p_a = bitcast [4 x i8]* @g32_a to i32* 115 %p_a = bitcast [4 x i8]* @g32_a to i32*
124 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6) 116 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6)
125 %l_a2 = add i32 %l_a, 1 117 %l_a2 = add i32 %l_a, 1
126 call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6) 118 call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6)
127 119
128 %p_b = bitcast [4 x i8]* @g32_b to i32* 120 %p_b = bitcast [4 x i8]* @g32_b to i32*
129 %l_b = load i32* %p_b 121 %l_b = load i32* %p_b, align 1
130 call void @llvm.nacl.atomic.fence.all() 122 call void @llvm.nacl.atomic.fence.all()
131 %l_b2 = add i32 %l_b, 1 123 %l_b2 = add i32 %l_b, 1
132 store i32 %l_b2, i32* %p_b, align 1 124 store i32 %l_b2, i32* %p_b, align 1
133 125
134 %p_c = bitcast [4 x i8]* @g32_c to i32* 126 %p_c = bitcast [4 x i8]* @g32_c to i32*
135 %l_c = load i32* %p_c 127 %l_c = load i32* %p_c, align 1
136 %l_c2 = add i32 %l_c, 1 128 %l_c2 = add i32 %l_c, 1
137 store i32 %l_c2, i32* %p_c, align 1 129 store i32 %l_c2, i32* %p_c, align 1
138 130
139 ret i32 %l_c2 131 ret i32 %l_c2
140 } 132 }
141 ; CHECK-LABEL: test_fused_load_add_c 133 ; CHECK-LABEL: test_fused_load_add_c
142 ; alloca store 134 ; alloca store
143 ; CHECK: mov {{.*}}, esp 135 ; CHECK: mov {{.*}}, esp
144 ; CHECK: mov dword ptr {{.*}}, 999 136 ; CHECK: mov dword ptr {{.*}}, 999
145 ; atomic store (w/ its own mfence) 137 ; atomic store (w/ its own mfence)
146 ; CHECK: mov {{.*}}, 0 138 ; CHECK: add {{.*}}, dword ptr [0]
147 ; CHECK-NEXT: R_386_32 139 ; CHECK-NEXT: R_386_32
148 ; CHECK: add {{.*}}, dword ptr
149 ; CHECK: mov dword ptr 140 ; CHECK: mov dword ptr
150 ; CHECK: mfence 141 ; CHECK: mfence
151 ; CHECK: mov {{.*}}, 4
152 ; CHECK-NEXT: R_386_32
153 ; This load + add are no longer optimized into one, 142 ; This load + add are no longer optimized into one,
154 ; though perhaps it should be legal as long as 143 ; though perhaps it should be legal as long as
155 ; the load stays on the same side of the fence. 144 ; the load stays on the same side of the fence.
156 ; CHECK: mov {{.*}}, dword ptr 145 ; CHECK: mov {{.*}}, dword ptr [4]
146 ; CHECK-NEXT: R_386_32
157 ; CHECK: mfence 147 ; CHECK: mfence
158 ; CHECK: add {{.*}}, 1 148 ; CHECK: add {{.*}}, 1
159 ; CHECK: mov dword ptr 149 ; CHECK: mov dword ptr
160 ; CHECK: mov {{.*}}, 8 150 ; CHECK: add {{.*}}, dword ptr [8]
161 ; CHECK-NEXT: R_386_32 151 ; CHECK-NEXT: R_386_32
162 ; CHECK: add {{.*}}, dword ptr
163 ; CHECK: mov dword ptr 152 ; CHECK: mov dword ptr
164 153
165 154
166 ; Test where a bunch of i8 loads could have been fused into one 155 ; Test where a bunch of i8 loads could have been fused into one
167 ; i32 load, but a fence blocks that. 156 ; i32 load, but a fence blocks that.
168 define i32 @could_have_fused_loads() { 157 define i32 @could_have_fused_loads() {
169 entry: 158 entry:
170 %ptr1 = bitcast [4 x i8]* @g32_d to i8* 159 %ptr1 = bitcast [4 x i8]* @g32_d to i8*
171 %b1 = load i8* %ptr1 160 %b1 = load i8* %ptr1, align 1
172 161
173 %int_ptr2 = ptrtoint [4 x i8]* @g32_d to i32 162 %int_ptr2 = ptrtoint [4 x i8]* @g32_d to i32
174 %int_ptr_bump2 = add i32 %int_ptr2, 1 163 %int_ptr_bump2 = add i32 %int_ptr2, 1
175 %ptr2 = inttoptr i32 %int_ptr_bump2 to i8* 164 %ptr2 = inttoptr i32 %int_ptr_bump2 to i8*
176 %b2 = load i8* %ptr2 165 %b2 = load i8* %ptr2, align 1
177 166
178 %int_ptr_bump3 = add i32 %int_ptr2, 2 167 %int_ptr_bump3 = add i32 %int_ptr2, 2
179 %ptr3 = inttoptr i32 %int_ptr_bump3 to i8* 168 %ptr3 = inttoptr i32 %int_ptr_bump3 to i8*
180 %b3 = load i8* %ptr3 169 %b3 = load i8* %ptr3, align 1
181 170
182 call void @llvm.nacl.atomic.fence.all() 171 call void @llvm.nacl.atomic.fence.all()
183 172
184 %int_ptr_bump4 = add i32 %int_ptr2, 3 173 %int_ptr_bump4 = add i32 %int_ptr2, 3
185 %ptr4 = inttoptr i32 %int_ptr_bump4 to i8* 174 %ptr4 = inttoptr i32 %int_ptr_bump4 to i8*
186 %b4 = load i8* %ptr4 175 %b4 = load i8* %ptr4, align 1
187 176
188 %b1.ext = zext i8 %b1 to i32 177 %b1.ext = zext i8 %b1 to i32
189 %b2.ext = zext i8 %b2 to i32 178 %b2.ext = zext i8 %b2 to i32
190 %b2.shift = shl i32 %b2.ext, 8 179 %b2.shift = shl i32 %b2.ext, 8
191 %b12 = or i32 %b1.ext, %b2.shift 180 %b12 = or i32 %b1.ext, %b2.shift
192 %b3.ext = zext i8 %b3 to i32 181 %b3.ext = zext i8 %b3 to i32
193 %b3.shift = shl i32 %b3.ext, 16 182 %b3.shift = shl i32 %b3.ext, 16
194 %b123 = or i32 %b12, %b3.shift 183 %b123 = or i32 %b12, %b3.shift
195 %b4.ext = zext i8 %b4 to i32 184 %b4.ext = zext i8 %b4 to i32
196 %b4.shift = shl i32 %b4.ext, 24 185 %b4.shift = shl i32 %b4.ext, 24
197 %b1234 = or i32 %b123, %b4.shift 186 %b1234 = or i32 %b123, %b4.shift
198 ret i32 %b1234 187 ret i32 %b1234
199 } 188 }
200 ; CHECK-LABEL: could_have_fused_loads 189 ; CHECK-LABEL: could_have_fused_loads
201 ; CHECK: mov {{.*}}, 0 190 ; CHECK: mov {{.*}}, byte ptr [12]
202 ; CHECK-NEXT: R_386_32 191 ; CHECK-NEXT: R_386_32
203 ; CHECK: mov {{.*}}, byte ptr 192 ; CHECK: mov {{.*}}, byte ptr
204 ; CHECK: mov {{.*}}, byte ptr 193 ; CHECK: mov {{.*}}, byte ptr
205 ; CHECK: mov {{.*}}, byte ptr
206 ; CHECK: mfence 194 ; CHECK: mfence
207 ; CHECK: mov {{.*}}, byte ptr 195 ; CHECK: mov {{.*}}, byte ptr
208 196
209 197
210 ; Test where an identical load from two branches could have been hoisted 198 ; Test where an identical load from two branches could have been hoisted
211 ; up, and then the code merged, but a fence prevents it. 199 ; up, and then the code merged, but a fence prevents it.
212 define i32 @could_have_hoisted_loads(i32 %x) { 200 define i32 @could_have_hoisted_loads(i32 %x) {
213 entry: 201 entry:
214 %ptr = bitcast [4 x i8]* @g32_d to i32* 202 %ptr = bitcast [4 x i8]* @g32_d to i32*
215 %cmp = icmp eq i32 %x, 1 203 %cmp = icmp eq i32 %x, 1
216 br i1 %cmp, label %branch1, label %branch2 204 br i1 %cmp, label %branch1, label %branch2
217 branch1: 205 branch1:
218 %y = load i32* %ptr 206 %y = load i32* %ptr, align 1
219 ret i32 %y 207 ret i32 %y
220 branch2: 208 branch2:
221 call void @llvm.nacl.atomic.fence.all() 209 call void @llvm.nacl.atomic.fence.all()
222 %z = load i32* %ptr 210 %z = load i32* %ptr, align 1
223 ret i32 %z 211 ret i32 %z
224 } 212 }
225 ; CHECK-LABEL: could_have_hoisted_loads 213 ; CHECK-LABEL: could_have_hoisted_loads
226 ; CHECK: mov {{.*}}, 0 214 ; CHECK: jne {{.*}}
215 ; CHECK: mov {{.*}}, dword ptr [12]
227 ; CHECK-NEXT: R_386_32 216 ; CHECK-NEXT: R_386_32
228 ; CHECK: jne {{.*}}
229 ; CHECK: mov {{.*}}, dword ptr
230 ; CHECK: ret 217 ; CHECK: ret
231 ; CHECK: mfence 218 ; CHECK: mfence
232 ; CHECK: mov {{.*}}, dword ptr 219 ; CHECK: mov {{.*}}, dword ptr [12]
220 ; CHECK-NEXT: R_386_32
233 ; CHECK: ret 221 ; CHECK: ret
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698