Chromium Code Reviews

Side by Side Diff: tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll

Issue 509233002: Convert lit tests to check disassembled assembly. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: reorder some CALLTARGETS-LABEL Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff |
OLDNEW
1 ; Test that loads/stores don't move across a nacl.atomic.fence.all. 1 ; Test that loads/stores don't move across a nacl.atomic.fence.all.
2 ; This should apply to both atomic and non-atomic loads/stores 2 ; This should apply to both atomic and non-atomic loads/stores
3 ; (unlike the non-"all" variety of nacl.atomic.fence, which only 3 ; (unlike the non-"all" variety of nacl.atomic.fence, which only
4 ; applies to atomic load/stores). 4 ; applies to atomic load/stores).
5 ; 5 ;
6 ; RUN: %llvm2ice -O2 --verbose none %s | FileCheck %s
7 ; RUN: %llvm2ice -O2 --verbose none %s \ 6 ; RUN: %llvm2ice -O2 --verbose none %s \
8 ; RUN: | llvm-mc -triple=i686-none-nacl -x86-asm-syntax=intel -filetype=obj 7 ; RUN: | llvm-mc -triple=i686-none-nacl -x86-asm-syntax=intel -filetype=obj \
8 ; RUN: | llvm-objdump -d -symbolize -x86-asm-syntax=intel - | FileCheck %s
9
10 ; TODO(jvoung): llvm-objdump doesn't symbolize global symbols well, so we
11 ; have [0] == g32_a, [4] == g32_b, [8] == g32_c.
12 ; g32_d is also [0] because it's in the .data section instead of .bss.
9 13
10 declare void @llvm.nacl.atomic.fence.all() 14 declare void @llvm.nacl.atomic.fence.all()
11 declare i32 @llvm.nacl.atomic.load.i32(i32*, i32) 15 declare i32 @llvm.nacl.atomic.load.i32(i32*, i32)
12 declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32) 16 declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32)
13 17
14 @g32_a = internal global [4 x i8] zeroinitializer, align 4 18 @g32_a = internal global [4 x i8] zeroinitializer, align 4
15 @g32_b = internal global [4 x i8] zeroinitializer, align 4 19 @g32_b = internal global [4 x i8] zeroinitializer, align 4
16 @g32_c = internal global [4 x i8] zeroinitializer, align 4 20 @g32_c = internal global [4 x i8] zeroinitializer, align 4
17 @g32_d = internal global [4 x i8] c"\02\00\00\00", align 4 21 @g32_d = internal global [4 x i8] c"\02\00\00\00", align 4
18 22
(...skipping 19 matching lines...)
38 call void @llvm.nacl.atomic.fence.all() 42 call void @llvm.nacl.atomic.fence.all()
39 store i32 %l_c2, i32* %p_c, align 1 43 store i32 %l_c2, i32* %p_c, align 1
40 44
41 ret i32 %l_c2 45 ret i32 %l_c2
42 } 46 }
43 ; CHECK-LABEL: test_fused_load_add_a 47 ; CHECK-LABEL: test_fused_load_add_a
44 ; alloca store 48 ; alloca store
45 ; CHECK: mov {{.*}}, esp 49 ; CHECK: mov {{.*}}, esp
46 ; CHECK: mov dword ptr {{.*}}, 999 50 ; CHECK: mov dword ptr {{.*}}, 999
47 ; atomic store (w/ its own mfence) 51 ; atomic store (w/ its own mfence)
48 ; CHECK: lea {{.*}}, g32_a 52 ; CHECK: dword ptr [0]
49 ; The load + add are optimized into one everywhere. 53 ; The load + add are optimized into one everywhere.
50 ; CHECK: add {{.*}}, dword ptr 54 ; CHECK: add {{.*}}, dword ptr
51 ; CHECK: mov dword ptr 55 ; CHECK: mov dword ptr
52 ; CHECK: mfence 56 ; CHECK: mfence
53 ; CHECK: lea {{.*}}, g32_b 57 ; CHECK: dword ptr [4]
54 ; CHECK: add {{.*}}, dword ptr 58 ; CHECK: add {{.*}}, dword ptr
55 ; CHECK: mov dword ptr 59 ; CHECK: mov dword ptr
56 ; CHECK: lea {{.*}}, g32_c 60 ; CHECK: dword ptr [8]
57 ; CHECK: add {{.*}}, dword ptr 61 ; CHECK: add {{.*}}, dword ptr
58 ; CHECK: mfence 62 ; CHECK: mfence
59 ; CHECK: mov dword ptr 63 ; CHECK: mov dword ptr
60 64
61 ; Test with the fence moved up a bit. 65 ; Test with the fence moved up a bit.
62 define i32 @test_fused_load_add_b() { 66 define i32 @test_fused_load_add_b() {
63 entry: 67 entry:
64 %p_alloca = alloca i8, i32 4, align 4 68 %p_alloca = alloca i8, i32 4, align 4
65 %p_alloca_bc = bitcast i8* %p_alloca to i32* 69 %p_alloca_bc = bitcast i8* %p_alloca to i32*
66 store i32 999, i32* %p_alloca_bc, align 1 70 store i32 999, i32* %p_alloca_bc, align 1
(...skipping 14 matching lines...)
81 %l_c2 = add i32 %l_c, 1 85 %l_c2 = add i32 %l_c, 1
82 store i32 %l_c2, i32* %p_c, align 1 86 store i32 %l_c2, i32* %p_c, align 1
83 87
84 ret i32 %l_c2 88 ret i32 %l_c2
85 } 89 }
86 ; CHECK-LABEL: test_fused_load_add_b 90 ; CHECK-LABEL: test_fused_load_add_b
87 ; alloca store 91 ; alloca store
88 ; CHECK: mov {{.*}}, esp 92 ; CHECK: mov {{.*}}, esp
89 ; CHECK: mov dword ptr {{.*}}, 999 93 ; CHECK: mov dword ptr {{.*}}, 999
90 ; atomic store (w/ its own mfence) 94 ; atomic store (w/ its own mfence)
91 ; CHECK: lea {{.*}}, g32_a 95 ; CHECK: dword ptr [0]
92 ; CHECK: add {{.*}}, dword ptr 96 ; CHECK: add {{.*}}, dword ptr
93 ; CHECK: mov dword ptr 97 ; CHECK: mov dword ptr
94 ; CHECK: mfence 98 ; CHECK: mfence
95 ; CHECK: lea {{.*}}, g32_b 99 ; CHECK: dword ptr [4]
96 ; CHECK: add {{.*}}, dword ptr 100 ; CHECK: add {{.*}}, dword ptr
97 ; CHECK: mov dword ptr 101 ; CHECK: mov dword ptr
98 ; CHECK: lea {{.*}}, g32_c 102 ; CHECK: dword ptr [8]
99 ; CHECK: mfence 103 ; CHECK: mfence
100 ; Load + add can still be optimized into one instruction 104 ; Load + add can still be optimized into one instruction
101 ; because it is not separated by a fence. 105 ; because it is not separated by a fence.
102 ; CHECK: add {{.*}}, dword ptr 106 ; CHECK: add {{.*}}, dword ptr
103 ; CHECK: mov dword ptr 107 ; CHECK: mov dword ptr
104 108
105 ; Test with the fence splitting a load/add. 109 ; Test with the fence splitting a load/add.
106 define i32 @test_fused_load_add_c() { 110 define i32 @test_fused_load_add_c() {
107 entry: 111 entry:
108 %p_alloca = alloca i8, i32 4, align 4 112 %p_alloca = alloca i8, i32 4, align 4
(...skipping 16 matching lines...)
125 %l_c2 = add i32 %l_c, 1 129 %l_c2 = add i32 %l_c, 1
126 store i32 %l_c2, i32* %p_c, align 1 130 store i32 %l_c2, i32* %p_c, align 1
127 131
128 ret i32 %l_c2 132 ret i32 %l_c2
129 } 133 }
130 ; CHECK-LABEL: test_fused_load_add_c 134 ; CHECK-LABEL: test_fused_load_add_c
131 ; alloca store 135 ; alloca store
132 ; CHECK: mov {{.*}}, esp 136 ; CHECK: mov {{.*}}, esp
133 ; CHECK: mov dword ptr {{.*}}, 999 137 ; CHECK: mov dword ptr {{.*}}, 999
134 ; atomic store (w/ its own mfence) 138 ; atomic store (w/ its own mfence)
135 ; CHECK: lea {{.*}}, g32_a 139 ; CHECK: dword ptr [0]
136 ; CHECK: add {{.*}}, dword ptr 140 ; CHECK: add {{.*}}, dword ptr
137 ; CHECK: mov dword ptr 141 ; CHECK: mov dword ptr
138 ; CHECK: mfence 142 ; CHECK: mfence
139 ; CHECK: lea {{.*}}, g32_b 143 ; CHECK: dword ptr [4]
140 ; This load + add are no longer optimized into one, 144 ; This load + add are no longer optimized into one,
141 ; though perhaps it should be legal as long as 145 ; though perhaps it should be legal as long as
142 ; the load stays on the same side of the fence. 146 ; the load stays on the same side of the fence.
143 ; CHECK: mov {{.*}}, dword ptr 147 ; CHECK: mov {{.*}}, dword ptr
144 ; CHECK: mfence 148 ; CHECK: mfence
145 ; CHECK: add {{.*}}, 1 149 ; CHECK: add {{.*}}, 1
146 ; CHECK: mov dword ptr 150 ; CHECK: mov dword ptr
147 ; CHECK: lea {{.*}}, g32_c 151 ; CHECK: dword ptr [8]
148 ; CHECK: add {{.*}}, dword ptr 152 ; CHECK: add {{.*}}, dword ptr
149 ; CHECK: mov dword ptr 153 ; CHECK: mov dword ptr
150 154
151 155
152 ; Test where a bunch of i8 loads could have been fused into one 156 ; Test where a bunch of i8 loads could have been fused into one
153 ; i32 load, but a fence blocks that. 157 ; i32 load, but a fence blocks that.
154 define i32 @could_have_fused_loads() { 158 define i32 @could_have_fused_loads() {
155 entry: 159 entry:
156 %ptr1 = bitcast [4 x i8]* @g32_d to i8* 160 %ptr1 = bitcast [4 x i8]* @g32_d to i8*
157 %b1 = load i8* %ptr1 161 %b1 = load i8* %ptr1
(...skipping 19 matching lines...)
177 %b12 = or i32 %b1.ext, %b2.shift 181 %b12 = or i32 %b1.ext, %b2.shift
178 %b3.ext = zext i8 %b3 to i32 182 %b3.ext = zext i8 %b3 to i32
179 %b3.shift = shl i32 %b3.ext, 16 183 %b3.shift = shl i32 %b3.ext, 16
180 %b123 = or i32 %b12, %b3.shift 184 %b123 = or i32 %b12, %b3.shift
181 %b4.ext = zext i8 %b4 to i32 185 %b4.ext = zext i8 %b4 to i32
182 %b4.shift = shl i32 %b4.ext, 24 186 %b4.shift = shl i32 %b4.ext, 24
183 %b1234 = or i32 %b123, %b4.shift 187 %b1234 = or i32 %b123, %b4.shift
184 ret i32 %b1234 188 ret i32 %b1234
185 } 189 }
186 ; CHECK-LABEL: could_have_fused_loads 190 ; CHECK-LABEL: could_have_fused_loads
187 ; CHECK: lea {{.*}}, g32_d 191 ; CHECK: dword ptr [0]
188 ; CHECK: mov {{.*}}, byte ptr 192 ; CHECK: mov {{.*}}, byte ptr
189 ; CHECK: mov {{.*}}, byte ptr 193 ; CHECK: mov {{.*}}, byte ptr
190 ; CHECK: mov {{.*}}, byte ptr 194 ; CHECK: mov {{.*}}, byte ptr
191 ; CHECK: mfence 195 ; CHECK: mfence
192 ; CHECK: mov {{.*}}, byte ptr 196 ; CHECK: mov {{.*}}, byte ptr
193 197
194 198
195 ; Test where an identical load from two branches could have been hoisted 199 ; Test where an identical load from two branches could have been hoisted
196 ; up, and then the code merged, but a fence prevents it. 200 ; up, and then the code merged, but a fence prevents it.
197 define i32 @could_have_hoisted_loads(i32 %x) { 201 define i32 @could_have_hoisted_loads(i32 %x) {
198 entry: 202 entry:
199 %ptr = bitcast [4 x i8]* @g32_d to i32* 203 %ptr = bitcast [4 x i8]* @g32_d to i32*
200 %cmp = icmp eq i32 %x, 1 204 %cmp = icmp eq i32 %x, 1
201 br i1 %cmp, label %branch1, label %branch2 205 br i1 %cmp, label %branch1, label %branch2
202 branch1: 206 branch1:
203 %y = load i32* %ptr 207 %y = load i32* %ptr
204 ret i32 %y 208 ret i32 %y
205 branch2: 209 branch2:
206 call void @llvm.nacl.atomic.fence.all() 210 call void @llvm.nacl.atomic.fence.all()
207 %z = load i32* %ptr 211 %z = load i32* %ptr
208 ret i32 %z 212 ret i32 %z
209 } 213 }
210 ; CHECK-LABEL: could_have_hoisted_loads 214 ; CHECK-LABEL: could_have_hoisted_loads
211 ; CHECK: lea {{.*}}, g32_d 215 ; CHECK: dword ptr [0]
212 ; CHECK: je {{.*}} 216 ; CHECK: je {{.*}}
213 ; CHECK: jmp {{.*}} 217 ; CHECK: jmp {{.*}}
214 ; CHECK: mov {{.*}}, dword ptr 218 ; CHECK: mov {{.*}}, dword ptr
215 ; CHECK: ret 219 ; CHECK: ret
216 ; CHECK: mfence 220 ; CHECK: mfence
217 ; CHECK: mov {{.*}}, dword ptr 221 ; CHECK: mov {{.*}}, dword ptr
218 ; CHECK: ret 222 ; CHECK: ret
OLDNEW
« no previous file with comments | « tests_lit/llvm2ice_tests/nacl-atomic-cmpxchg-optimization.ll ('k') | tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll » ('j') | no next file with comments »

Powered by Google App Engine