Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(840)

Side by Side Diff: tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll

Issue 342763004: Add atomic load/store, fetch_add, fence, and is-lock-free lowering. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: change comment Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 ; Test that loads/stores don't move across a nacl.atomic.fence.all.
2 ; This should apply to both atomic and non-atomic loads/stores
3 ; (unlike the non-"all" variety of nacl.atomic.fence, which only
4 ; applies to atomic load/stores).
5 ;
6 ; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
7
8 declare void @llvm.nacl.atomic.fence.all()
9 declare i32 @llvm.nacl.atomic.load.i32(i32*, i32)
10 declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32)
11
12 @g32_a = internal global [4 x i8] zeroinitializer, align 4
13 @g32_b = internal global [4 x i8] zeroinitializer, align 4
14 @g32_c = internal global [4 x i8] zeroinitializer, align 4
15 @g32_d = internal global [4 x i8] c"\02\00\00\00", align 4
16
17 define i32 @test_fused_load_add_a() {
18 entry:
19 %p_alloca = alloca i8, i32 4, align 4
20 %p_alloca_bc = bitcast i8* %p_alloca to i32*
21 store i32 999, i32* %p_alloca_bc, align 1
22
23 %p_a = bitcast [4 x i8]* @g32_a to i32*
24 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6)
25 %l_a2 = add i32 %l_a, 1
26 call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6)
27
28 %p_b = bitcast [4 x i8]* @g32_b to i32*
29 %l_b = load i32* %p_b
30 %l_b2 = add i32 %l_b, 1
31 store i32 %l_b2, i32* %p_b, align 1
32
33 %p_c = bitcast [4 x i8]* @g32_c to i32*
34 %l_c = load i32* %p_c
35 %l_c2 = add i32 %l_c, 1
36 call void @llvm.nacl.atomic.fence.all()
37 store i32 %l_c2, i32* %p_c, align 1
38
39 ret i32 %l_c2
40 }
41 ; CHECK-LABEL: test_fused_load_add_a
42 ; alloca store
43 ; CHECK: mov {{.*}}, esp
44 ; CHECK: mov dword ptr {{.*}}, 999
45 ; atomic store (w/ its own mfence)
46 ; CHECK: mov {{.*}}, g32_a
47 ; The load + add are optimized into one everywhere.
48 ; CHECK: add {{.*}}, dword ptr
49 ; CHECK: mov dword ptr
50 ; CHECK: mfence
51 ; CHECK: mov {{.*}}, g32_b
52 ; CHECK: add {{.*}}, dword ptr
53 ; CHECK: mov dword ptr
54 ; CHECK: mov {{.*}}, g32_c
55 ; CHECK: add {{.*}}, dword ptr
56 ; CHECK: mfence
57 ; CHECK: mov dword ptr
58
59 ; Test with the fence moved up a bit.
60 define i32 @test_fused_load_add_b() {
61 entry:
62 %p_alloca = alloca i8, i32 4, align 4
63 %p_alloca_bc = bitcast i8* %p_alloca to i32*
64 store i32 999, i32* %p_alloca_bc, align 1
65
66 %p_a = bitcast [4 x i8]* @g32_a to i32*
67 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6)
68 %l_a2 = add i32 %l_a, 1
69 call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6)
70
71 %p_b = bitcast [4 x i8]* @g32_b to i32*
72 %l_b = load i32* %p_b
73 %l_b2 = add i32 %l_b, 1
74 store i32 %l_b2, i32* %p_b, align 1
75
76 %p_c = bitcast [4 x i8]* @g32_c to i32*
77 call void @llvm.nacl.atomic.fence.all()
78 %l_c = load i32* %p_c
79 %l_c2 = add i32 %l_c, 1
80 store i32 %l_c2, i32* %p_c, align 1
81
82 ret i32 %l_c2
83 }
84 ; CHECK-LABEL: test_fused_load_add_b
85 ; alloca store
86 ; CHECK: mov {{.*}}, esp
87 ; CHECK: mov dword ptr {{.*}}, 999
88 ; atomic store (w/ its own mfence)
89 ; CHECK: mov {{.*}}, g32_a
90 ; CHECK: add {{.*}}, dword ptr
91 ; CHECK: mov dword ptr
92 ; CHECK: mfence
93 ; CHECK: mov {{.*}}, g32_b
94 ; CHECK: add {{.*}}, dword ptr
95 ; CHECK: mov dword ptr
96 ; CHECK: mov {{.*}}, g32_c
97 ; CHECK: mfence
98 ; Load + add can still be optimized into one instruction
99 ; because it is not separated by a fence.
100 ; CHECK: add {{.*}}, dword ptr
101 ; CHECK: mov dword ptr
102
103 ; Test with the fence splitting a load/add.
104 define i32 @test_fused_load_add_c() {
105 entry:
106 %p_alloca = alloca i8, i32 4, align 4
107 %p_alloca_bc = bitcast i8* %p_alloca to i32*
108 store i32 999, i32* %p_alloca_bc, align 1
109
110 %p_a = bitcast [4 x i8]* @g32_a to i32*
111 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6)
112 %l_a2 = add i32 %l_a, 1
113 call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6)
114
115 %p_b = bitcast [4 x i8]* @g32_b to i32*
116 %l_b = load i32* %p_b
117 call void @llvm.nacl.atomic.fence.all()
118 %l_b2 = add i32 %l_b, 1
119 store i32 %l_b2, i32* %p_b, align 1
120
121 %p_c = bitcast [4 x i8]* @g32_c to i32*
122 %l_c = load i32* %p_c
123 %l_c2 = add i32 %l_c, 1
124 store i32 %l_c2, i32* %p_c, align 1
125
126 ret i32 %l_c2
127 }
128 ; CHECK-LABEL: test_fused_load_add_c
129 ; alloca store
130 ; CHECK: mov {{.*}}, esp
131 ; CHECK: mov dword ptr {{.*}}, 999
132 ; atomic store (w/ its own mfence)
133 ; CHECK: mov {{.*}}, g32_a
134 ; CHECK: add {{.*}}, dword ptr
135 ; CHECK: mov dword ptr
136 ; CHECK: mfence
137 ; CHECK: mov {{.*}}, g32_b
138 ; This load + add are no longer optimized into one,
139 ; though perhaps it should be legal as long as
140 ; the load stays on the same side of the fence.
141 ; CHECK: mov {{.*}}, dword ptr
142 ; CHECK: mfence
143 ; CHECK: add {{.*}}, 1
144 ; CHECK: mov dword ptr
145 ; CHECK: mov {{.*}}, g32_c
146 ; CHECK: add {{.*}}, dword ptr
147 ; CHECK: mov dword ptr
148
149
150 ; Test where a bunch of i8 loads could have been fused into one
151 ; i32 load, but a fence blocks that.
152 define i32 @could_have_fused_loads() {
153 entry:
154 %ptr1 = bitcast [4 x i8]* @g32_d to i8*
155 %b1 = load i8* %ptr1
156
157 %int_ptr2 = ptrtoint [4 x i8]* @g32_d to i32
158 %int_ptr_bump2 = add i32 %int_ptr2, 1
159 %ptr2 = inttoptr i32 %int_ptr_bump2 to i8*
160 %b2 = load i8* %ptr2
161
162 %int_ptr_bump3 = add i32 %int_ptr2, 2
163 %ptr3 = inttoptr i32 %int_ptr_bump3 to i8*
164 %b3 = load i8* %ptr3
165
166 call void @llvm.nacl.atomic.fence.all()
167
168 %int_ptr_bump4 = add i32 %int_ptr2, 3
169 %ptr4 = inttoptr i32 %int_ptr_bump4 to i8*
170 %b4 = load i8* %ptr4
171
172 %b1.ext = zext i8 %b1 to i32
173 %b2.ext = zext i8 %b2 to i32
174 %b2.shift = shl i32 %b2.ext, 8
175 %b12 = or i32 %b1.ext, %b2.shift
176 %b3.ext = zext i8 %b3 to i32
177 %b3.shift = shl i32 %b3.ext, 16
178 %b123 = or i32 %b12, %b3.shift
179 %b4.ext = zext i8 %b4 to i32
180 %b4.shift = shl i32 %b4.ext, 24
181 %b1234 = or i32 %b123, %b4.shift
182 ret i32 %b1234
183 }
184 ; CHECK-LABEL: could_have_fused_loads
185 ; CHECK: mov {{.*}}, g32_d
186 ; CHECK: mov {{.*}}, byte ptr
187 ; CHECK: mov {{.*}}, byte ptr
188 ; CHECK: mov {{.*}}, byte ptr
189 ; CHECK: mfence
190 ; CHECK: mov {{.*}}, byte ptr
191
192
193 ; Test where an identical load from two branches could have been hoisted
194 ; up, and then the code merged, but a fence prevents it.
195 define i32 @could_have_hoisted_loads(i32 %x) {
196 entry:
197 %ptr = bitcast [4 x i8]* @g32_d to i32*
198 %cmp = icmp eq i32 %x, 1
199 br i1 %cmp, label %branch1, label %branch2
200 branch1:
201 %y = load i32* %ptr
202 ret i32 %y
203 branch2:
204 call void @llvm.nacl.atomic.fence.all()
205 %z = load i32* %ptr
206 ret i32 %z
207 }
208 ; CHECK-LABEL: could_have_hoisted_loads
209 ; CHECK: mov {{.*}}, g32_d
210 ; CHECK: je {{.*}}
211 ; CHECK: jmp {{.*}}
212 ; CHECK: mov {{.*}}, dword ptr
213 ; CHECK: ret
214 ; CHECK: mfence
215 ; CHECK: mov {{.*}}, dword ptr
216 ; CHECK: ret
OLDNEW
« no previous file with comments | « tests_lit/llvm2ice_tests/nacl-atomic-errors.ll ('k') | tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698