Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(15)

Side by Side Diff: tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll

Issue 649463002: Handle "Mov" which is mov, movss, movsd, and used for nacl.read.tp. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: bounds check Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 ; Test that loads/stores don't move across a nacl.atomic.fence.all. 1 ; Test that loads/stores don't move across a nacl.atomic.fence.all.
2 ; This should apply to both atomic and non-atomic loads/stores 2 ; This should apply to both atomic and non-atomic loads/stores
3 ; (unlike the non-"all" variety of nacl.atomic.fence, which only 3 ; (unlike the non-"all" variety of nacl.atomic.fence, which only
4 ; applies to atomic load/stores). 4 ; applies to atomic load/stores).
5 ; 5 ;
6 ; TODO(kschimpf) Find out why lc2i is needed. 6 ; TODO(kschimpf) Find out why lc2i is needed.
7 ; RUN: %lc2i -i %s --args -O2 --verbose none \ 7 ; RUN: %lc2i -i %s --args -O2 --verbose none \
8 ; RUN: | llvm-mc -triple=i686-none-nacl -x86-asm-syntax=intel -filetype=obj \ 8 ; RUN: | llvm-mc -triple=i686-none-nacl -x86-asm-syntax=intel -filetype=obj \
9 ; RUN: | llvm-objdump -d -symbolize -x86-asm-syntax=intel - | FileCheck %s 9 ; RUN: | llvm-objdump -d -r -symbolize -x86-asm-syntax=intel - | FileCheck %s
10 10
11 ; TODO(jvoung): llvm-objdump doesn't symbolize global symbols well, so we 11 ; TODO(jvoung): llvm-objdump doesn't symbolize global symbols well, so we
12 ; have [0] == g32_a, [4] == g32_b, [8] == g32_c. 12 ; have 0 == g32_a, 4 == g32_b, 8 == g32_c.
13 ; g32_d is also [0] because it's in the .data section instead of .bss. 13 ; g32_d is also 0 because it's in the .data section instead of .bss.
14 14
15 declare void @llvm.nacl.atomic.fence.all() 15 declare void @llvm.nacl.atomic.fence.all()
16 declare i32 @llvm.nacl.atomic.load.i32(i32*, i32) 16 declare i32 @llvm.nacl.atomic.load.i32(i32*, i32)
17 declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32) 17 declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32)
18 18
19 @g32_a = internal global [4 x i8] zeroinitializer, align 4 19 @g32_a = internal global [4 x i8] zeroinitializer, align 4
20 @g32_b = internal global [4 x i8] zeroinitializer, align 4 20 @g32_b = internal global [4 x i8] zeroinitializer, align 4
21 @g32_c = internal global [4 x i8] zeroinitializer, align 4 21 @g32_c = internal global [4 x i8] zeroinitializer, align 4
22 @g32_d = internal global [4 x i8] c"\02\00\00\00", align 4 22 @g32_d = internal global [4 x i8] c"\02\00\00\00", align 4
23 23
(...skipping 19 matching lines...) Expand all
43 call void @llvm.nacl.atomic.fence.all() 43 call void @llvm.nacl.atomic.fence.all()
44 store i32 %l_c2, i32* %p_c, align 1 44 store i32 %l_c2, i32* %p_c, align 1
45 45
46 ret i32 %l_c2 46 ret i32 %l_c2
47 } 47 }
48 ; CHECK-LABEL: test_fused_load_add_a 48 ; CHECK-LABEL: test_fused_load_add_a
49 ; alloca store 49 ; alloca store
50 ; CHECK: mov {{.*}}, esp 50 ; CHECK: mov {{.*}}, esp
51 ; CHECK: mov dword ptr {{.*}}, 999 51 ; CHECK: mov dword ptr {{.*}}, 999
52 ; atomic store (w/ its own mfence) 52 ; atomic store (w/ its own mfence)
53 ; CHECK: dword ptr [0] 53 ; CHECK: mov {{.*}}, 0
54 ; CHECK-NEXT: R_386_32
54 ; The load + add are optimized into one everywhere. 55 ; The load + add are optimized into one everywhere.
55 ; CHECK: add {{.*}}, dword ptr 56 ; CHECK: add {{.*}}, dword ptr
56 ; CHECK: mov dword ptr 57 ; CHECK: mov dword ptr
57 ; CHECK: mfence 58 ; CHECK: mfence
58 ; CHECK: dword ptr [4] 59 ; CHECK: mov {{.*}}, 4
60 ; CHECK-NEXT: R_386_32
59 ; CHECK: add {{.*}}, dword ptr 61 ; CHECK: add {{.*}}, dword ptr
60 ; CHECK: mov dword ptr 62 ; CHECK: mov dword ptr
61 ; CHECK: dword ptr [8] 63 ; CHECK: mov {{.*}}, 8
64 ; CHECK-NEXT: R_386_32
62 ; CHECK: add {{.*}}, dword ptr 65 ; CHECK: add {{.*}}, dword ptr
63 ; CHECK: mfence 66 ; CHECK: mfence
64 ; CHECK: mov dword ptr 67 ; CHECK: mov dword ptr
65 68
66 ; Test with the fence moved up a bit. 69 ; Test with the fence moved up a bit.
67 define i32 @test_fused_load_add_b() { 70 define i32 @test_fused_load_add_b() {
68 entry: 71 entry:
69 %p_alloca = alloca i8, i32 4, align 4 72 %p_alloca = alloca i8, i32 4, align 4
70 %p_alloca_bc = bitcast i8* %p_alloca to i32* 73 %p_alloca_bc = bitcast i8* %p_alloca to i32*
71 store i32 999, i32* %p_alloca_bc, align 1 74 store i32 999, i32* %p_alloca_bc, align 1
(...skipping 14 matching lines...) Expand all
86 %l_c2 = add i32 %l_c, 1 89 %l_c2 = add i32 %l_c, 1
87 store i32 %l_c2, i32* %p_c, align 1 90 store i32 %l_c2, i32* %p_c, align 1
88 91
89 ret i32 %l_c2 92 ret i32 %l_c2
90 } 93 }
91 ; CHECK-LABEL: test_fused_load_add_b 94 ; CHECK-LABEL: test_fused_load_add_b
92 ; alloca store 95 ; alloca store
93 ; CHECK: mov {{.*}}, esp 96 ; CHECK: mov {{.*}}, esp
94 ; CHECK: mov dword ptr {{.*}}, 999 97 ; CHECK: mov dword ptr {{.*}}, 999
95 ; atomic store (w/ its own mfence) 98 ; atomic store (w/ its own mfence)
96 ; CHECK: dword ptr [0] 99 ; CHECK: mov {{.*}}, 0
100 ; CHECK-NEXT: R_386_32
97 ; CHECK: add {{.*}}, dword ptr 101 ; CHECK: add {{.*}}, dword ptr
98 ; CHECK: mov dword ptr 102 ; CHECK: mov dword ptr
99 ; CHECK: mfence 103 ; CHECK: mfence
100 ; CHECK: dword ptr [4] 104 ; CHECK: mov {{.*}}, 4
105 ; CHECK-NEXT: R_386_32
101 ; CHECK: add {{.*}}, dword ptr 106 ; CHECK: add {{.*}}, dword ptr
102 ; CHECK: mov dword ptr 107 ; CHECK: mov dword ptr
103 ; CHECK: dword ptr [8] 108 ; CHECK: mov {{.*}}, 8
109 ; CHECK-NEXT: R_386_32
104 ; CHECK: mfence 110 ; CHECK: mfence
105 ; Load + add can still be optimized into one instruction 111 ; Load + add can still be optimized into one instruction
106 ; because it is not separated by a fence. 112 ; because it is not separated by a fence.
107 ; CHECK: add {{.*}}, dword ptr 113 ; CHECK: add {{.*}}, dword ptr
108 ; CHECK: mov dword ptr 114 ; CHECK: mov dword ptr
109 115
110 ; Test with the fence splitting a load/add. 116 ; Test with the fence splitting a load/add.
111 define i32 @test_fused_load_add_c() { 117 define i32 @test_fused_load_add_c() {
112 entry: 118 entry:
113 %p_alloca = alloca i8, i32 4, align 4 119 %p_alloca = alloca i8, i32 4, align 4
(...skipping 16 matching lines...) Expand all
130 %l_c2 = add i32 %l_c, 1 136 %l_c2 = add i32 %l_c, 1
131 store i32 %l_c2, i32* %p_c, align 1 137 store i32 %l_c2, i32* %p_c, align 1
132 138
133 ret i32 %l_c2 139 ret i32 %l_c2
134 } 140 }
135 ; CHECK-LABEL: test_fused_load_add_c 141 ; CHECK-LABEL: test_fused_load_add_c
136 ; alloca store 142 ; alloca store
137 ; CHECK: mov {{.*}}, esp 143 ; CHECK: mov {{.*}}, esp
138 ; CHECK: mov dword ptr {{.*}}, 999 144 ; CHECK: mov dword ptr {{.*}}, 999
139 ; atomic store (w/ its own mfence) 145 ; atomic store (w/ its own mfence)
140 ; CHECK: dword ptr [0] 146 ; CHECK: mov {{.*}}, 0
147 ; CHECK-NEXT: R_386_32
141 ; CHECK: add {{.*}}, dword ptr 148 ; CHECK: add {{.*}}, dword ptr
142 ; CHECK: mov dword ptr 149 ; CHECK: mov dword ptr
143 ; CHECK: mfence 150 ; CHECK: mfence
144 ; CHECK: dword ptr [4] 151 ; CHECK: mov {{.*}}, 4
152 ; CHECK-NEXT: R_386_32
145 ; This load + add are no longer optimized into one, 153 ; This load + add are no longer optimized into one,
146 ; though perhaps it should be legal as long as 154 ; though perhaps it should be legal as long as
147 ; the load stays on the same side of the fence. 155 ; the load stays on the same side of the fence.
148 ; CHECK: mov {{.*}}, dword ptr 156 ; CHECK: mov {{.*}}, dword ptr
149 ; CHECK: mfence 157 ; CHECK: mfence
150 ; CHECK: add {{.*}}, 1 158 ; CHECK: add {{.*}}, 1
151 ; CHECK: mov dword ptr 159 ; CHECK: mov dword ptr
152 ; CHECK: dword ptr [8] 160 ; CHECK: mov {{.*}}, 8
161 ; CHECK-NEXT: R_386_32
153 ; CHECK: add {{.*}}, dword ptr 162 ; CHECK: add {{.*}}, dword ptr
154 ; CHECK: mov dword ptr 163 ; CHECK: mov dword ptr
155 164
156 165
157 ; Test where a bunch of i8 loads could have been fused into one 166 ; Test where a bunch of i8 loads could have been fused into one
158 ; i32 load, but a fence blocks that. 167 ; i32 load, but a fence blocks that.
159 define i32 @could_have_fused_loads() { 168 define i32 @could_have_fused_loads() {
160 entry: 169 entry:
161 %ptr1 = bitcast [4 x i8]* @g32_d to i8* 170 %ptr1 = bitcast [4 x i8]* @g32_d to i8*
162 %b1 = load i8* %ptr1 171 %b1 = load i8* %ptr1
(...skipping 19 matching lines...) Expand all
182 %b12 = or i32 %b1.ext, %b2.shift 191 %b12 = or i32 %b1.ext, %b2.shift
183 %b3.ext = zext i8 %b3 to i32 192 %b3.ext = zext i8 %b3 to i32
184 %b3.shift = shl i32 %b3.ext, 16 193 %b3.shift = shl i32 %b3.ext, 16
185 %b123 = or i32 %b12, %b3.shift 194 %b123 = or i32 %b12, %b3.shift
186 %b4.ext = zext i8 %b4 to i32 195 %b4.ext = zext i8 %b4 to i32
187 %b4.shift = shl i32 %b4.ext, 24 196 %b4.shift = shl i32 %b4.ext, 24
188 %b1234 = or i32 %b123, %b4.shift 197 %b1234 = or i32 %b123, %b4.shift
189 ret i32 %b1234 198 ret i32 %b1234
190 } 199 }
191 ; CHECK-LABEL: could_have_fused_loads 200 ; CHECK-LABEL: could_have_fused_loads
192 ; CHECK: dword ptr [0] 201 ; CHECK: mov {{.*}}, 0
202 ; CHECK-NEXT: R_386_32
193 ; CHECK: mov {{.*}}, byte ptr 203 ; CHECK: mov {{.*}}, byte ptr
194 ; CHECK: mov {{.*}}, byte ptr 204 ; CHECK: mov {{.*}}, byte ptr
195 ; CHECK: mov {{.*}}, byte ptr 205 ; CHECK: mov {{.*}}, byte ptr
196 ; CHECK: mfence 206 ; CHECK: mfence
197 ; CHECK: mov {{.*}}, byte ptr 207 ; CHECK: mov {{.*}}, byte ptr
198 208
199 209
200 ; Test where an identical load from two branches could have been hoisted 210 ; Test where an identical load from two branches could have been hoisted
201 ; up, and then the code merged, but a fence prevents it. 211 ; up, and then the code merged, but a fence prevents it.
202 define i32 @could_have_hoisted_loads(i32 %x) { 212 define i32 @could_have_hoisted_loads(i32 %x) {
203 entry: 213 entry:
204 %ptr = bitcast [4 x i8]* @g32_d to i32* 214 %ptr = bitcast [4 x i8]* @g32_d to i32*
205 %cmp = icmp eq i32 %x, 1 215 %cmp = icmp eq i32 %x, 1
206 br i1 %cmp, label %branch1, label %branch2 216 br i1 %cmp, label %branch1, label %branch2
207 branch1: 217 branch1:
208 %y = load i32* %ptr 218 %y = load i32* %ptr
209 ret i32 %y 219 ret i32 %y
210 branch2: 220 branch2:
211 call void @llvm.nacl.atomic.fence.all() 221 call void @llvm.nacl.atomic.fence.all()
212 %z = load i32* %ptr 222 %z = load i32* %ptr
213 ret i32 %z 223 ret i32 %z
214 } 224 }
215 ; CHECK-LABEL: could_have_hoisted_loads 225 ; CHECK-LABEL: could_have_hoisted_loads
216 ; CHECK: dword ptr [0] 226 ; CHECK: mov {{.*}}, 0
227 ; CHECK-NEXT: R_386_32
217 ; CHECK: jne {{.*}} 228 ; CHECK: jne {{.*}}
218 ; CHECK: mov {{.*}}, dword ptr 229 ; CHECK: mov {{.*}}, dword ptr
219 ; CHECK: ret 230 ; CHECK: ret
220 ; CHECK: mfence 231 ; CHECK: mfence
221 ; CHECK: mov {{.*}}, dword ptr 232 ; CHECK: mov {{.*}}, dword ptr
222 ; CHECK: ret 233 ; CHECK: ret
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698