Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(29)

Side by Side Diff: tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll

Issue 1531623007: Add option to force filetype=asm for testing (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Missed one --sandbox in the wrong place. Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 ; Test that loads/stores don't move across a nacl.atomic.fence.all. 1 ; Test that loads/stores don't move across a nacl.atomic.fence.all.
2 ; This should apply to both atomic and non-atomic loads/stores 2 ; This should apply to both atomic and non-atomic loads/stores
3 ; (unlike the non-"all" variety of nacl.atomic.fence, which only 3 ; (unlike the non-"all" variety of nacl.atomic.fence, which only
4 ; applies to atomic load/stores). 4 ; applies to atomic load/stores).
5 ; 5 ;
6 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s 6 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s
7 7
8 declare void @llvm.nacl.atomic.fence.all() 8 declare void @llvm.nacl.atomic.fence.all()
9 declare i32 @llvm.nacl.atomic.load.i32(i32*, i32) 9 declare i32 @llvm.nacl.atomic.load.i32(i32*, i32)
10 declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32) 10 declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32)
(...skipping 25 matching lines...) Expand all
36 call void @llvm.nacl.atomic.fence.all() 36 call void @llvm.nacl.atomic.fence.all()
37 store i32 %l_c2, i32* %p_c, align 1 37 store i32 %l_c2, i32* %p_c, align 1
38 38
39 ret i32 %l_c2 39 ret i32 %l_c2
40 } 40 }
41 ; CHECK-LABEL: test_fused_load_sub_a 41 ; CHECK-LABEL: test_fused_load_sub_a
42 ; alloca store 42 ; alloca store
43 ; CHECK: mov DWORD PTR {{.*}},0x3e7 43 ; CHECK: mov DWORD PTR {{.*}},0x3e7
44 ; atomic store (w/ its own mfence) 44 ; atomic store (w/ its own mfence)
45 ; The load + sub are optimized into one everywhere. 45 ; The load + sub are optimized into one everywhere.
46 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a 46 ; CHECK: sub {{.*}},DWORD PTR {{.*}}{{(g32_a)|(.bss)}}
47 ; CHECK: mov DWORD PTR 47 ; CHECK: mov {{(DWORD PTR)?}}
48 ; CHECK: mfence 48 ; CHECK: mfence
49 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_b 49 ; CHECK: sub {{.*}},DWORD PTR {{.*}}{{(g32_b)|(.bss)}}
50 ; CHECK: mov DWORD PTR 50 ; CHECK: mov {{(DWORD PTR)?}}
51 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c 51 ; CHECK: sub {{.*}},DWORD PTR {{.*}}{{(g32_c)|(.bss)}}
52 ; CHECK: mfence 52 ; CHECK: mfence
53 ; CHECK: mov DWORD PTR 53 ; CHECK: mov {{(DWORD PTR)?}}
54 54
55 ; Test with the fence moved up a bit. 55 ; Test with the fence moved up a bit.
56 define internal i32 @test_fused_load_sub_b() { 56 define internal i32 @test_fused_load_sub_b() {
57 entry: 57 entry:
58 %p_alloca = alloca i8, i32 4, align 4 58 %p_alloca = alloca i8, i32 4, align 4
59 %p_alloca_bc = bitcast i8* %p_alloca to i32* 59 %p_alloca_bc = bitcast i8* %p_alloca to i32*
60 store i32 999, i32* %p_alloca_bc, align 1 60 store i32 999, i32* %p_alloca_bc, align 1
61 61
62 %p_a = bitcast [4 x i8]* @g32_a to i32* 62 %p_a = bitcast [4 x i8]* @g32_a to i32*
63 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6) 63 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6)
(...skipping 10 matching lines...) Expand all
74 %l_c = load i32, i32* %p_c, align 1 74 %l_c = load i32, i32* %p_c, align 1
75 %l_c2 = sub i32 1, %l_c 75 %l_c2 = sub i32 1, %l_c
76 store i32 %l_c2, i32* %p_c, align 1 76 store i32 %l_c2, i32* %p_c, align 1
77 77
78 ret i32 %l_c2 78 ret i32 %l_c2
79 } 79 }
80 ; CHECK-LABEL: test_fused_load_sub_b 80 ; CHECK-LABEL: test_fused_load_sub_b
81 ; alloca store 81 ; alloca store
82 ; CHECK: mov DWORD PTR {{.*}},0x3e7 82 ; CHECK: mov DWORD PTR {{.*}},0x3e7
83 ; atomic store (w/ its own mfence) 83 ; atomic store (w/ its own mfence)
84 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a 84 ; CHECK: sub {{.*}},DWORD PTR {{.*}}{{(g32_a)|(.bss)}}
85 ; CHECK: mov DWORD PTR 85 ; CHECK: mov {{(DWORD PTR)?}}
86 ; CHECK: mfence 86 ; CHECK: mfence
87 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_b 87 ; CHECK: sub {{.*}},DWORD PTR {{.*}}{{(g32_b)|(.bss)}}
88 ; CHECK: mov DWORD PTR 88 ; CHECK: mov {{(DWORD PTR)?}}
89 ; CHECK: mfence 89 ; CHECK: mfence
90 ; Load + sub can still be optimized into one instruction 90 ; Load + sub can still be optimized into one instruction
91 ; because it is not separated by a fence. 91 ; because it is not separated by a fence.
92 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c 92 ; CHECK: sub {{.*}},DWORD PTR {{.*}}{{(g32_c)|(.bss)}}
93 ; CHECK: mov DWORD PTR 93 ; CHECK: mov {{(DWORD PTR)?}}
94 94
95 ; Test with the fence splitting a load/sub. 95 ; Test with the fence splitting a load/sub.
96 define internal i32 @test_fused_load_sub_c() { 96 define internal i32 @test_fused_load_sub_c() {
97 entry: 97 entry:
98 %p_alloca = alloca i8, i32 4, align 4 98 %p_alloca = alloca i8, i32 4, align 4
99 %p_alloca_bc = bitcast i8* %p_alloca to i32* 99 %p_alloca_bc = bitcast i8* %p_alloca to i32*
100 store i32 999, i32* %p_alloca_bc, align 1 100 store i32 999, i32* %p_alloca_bc, align 1
101 101
102 %p_a = bitcast [4 x i8]* @g32_a to i32* 102 %p_a = bitcast [4 x i8]* @g32_a to i32*
103 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6) 103 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6)
(...skipping 10 matching lines...) Expand all
114 %l_c = load i32, i32* %p_c, align 1 114 %l_c = load i32, i32* %p_c, align 1
115 %l_c2 = sub i32 1, %l_c 115 %l_c2 = sub i32 1, %l_c
116 store i32 %l_c2, i32* %p_c, align 1 116 store i32 %l_c2, i32* %p_c, align 1
117 117
118 ret i32 %l_c2 118 ret i32 %l_c2
119 } 119 }
120 ; CHECK-LABEL: test_fused_load_sub_c 120 ; CHECK-LABEL: test_fused_load_sub_c
121 ; alloca store 121 ; alloca store
122 ; CHECK: mov DWORD PTR {{.*}},0x3e7 122 ; CHECK: mov DWORD PTR {{.*}},0x3e7
123 ; atomic store (w/ its own mfence) 123 ; atomic store (w/ its own mfence)
124 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a 124 ; CHECK: sub {{.*}},DWORD PTR {{.*}}{{(g32_a)|(.bss)}}
125 ; CHECK: mov DWORD PTR 125 ; CHECK: mov {{(DWORD PTR)?}}
126 ; CHECK: mfence 126 ; CHECK: mfence
127 ; This load + sub are no longer optimized into one, 127 ; This load + sub are no longer optimized into one,
128 ; though perhaps it should be legal as long as 128 ; though perhaps it should be legal as long as
129 ; the load stays on the same side of the fence. 129 ; the load stays on the same side of the fence.
130 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_b 130 ; CHECK: mov {{.*}},{{(DWORD PTR )?}}{{.*}}{{(g32_b)|(.bss)}}
131 ; CHECK: mfence 131 ; CHECK: mfence
132 ; CHECK: mov {{.*}},0x1 132 ; CHECK: mov {{.*}},0x1
133 ; CHECK: sub 133 ; CHECK: sub
134 ; CHECK: mov DWORD PTR 134 ; CHECK: mov {{(DWORD PTR)?}}
135 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c 135 ; CHECK: sub {{.*}},DWORD PTR {{.*}}{{(g32_c)|(.bss)}}
136 ; CHECK: mov DWORD PTR 136 ; CHECK: mov {{(DWORD PTR)?}}
137 137
138 138
139 ; Test where a bunch of i8 loads could have been fused into one 139 ; Test where a bunch of i8 loads could have been fused into one
140 ; i32 load, but a fence blocks that. 140 ; i32 load, but a fence blocks that.
141 define internal i32 @could_have_fused_loads() { 141 define internal i32 @could_have_fused_loads() {
142 entry: 142 entry:
143 %ptr1 = bitcast [4 x i8]* @g32_d to i8* 143 %ptr1 = bitcast [4 x i8]* @g32_d to i8*
144 %b1 = load i8, i8* %ptr1, align 1 144 %b1 = load i8, i8* %ptr1, align 1
145 145
146 %int_ptr2 = ptrtoint [4 x i8]* @g32_d to i32 146 %int_ptr2 = ptrtoint [4 x i8]* @g32_d to i32
(...skipping 17 matching lines...) Expand all
164 %b12 = or i32 %b1.ext, %b2.shift 164 %b12 = or i32 %b1.ext, %b2.shift
165 %b3.ext = zext i8 %b3 to i32 165 %b3.ext = zext i8 %b3 to i32
166 %b3.shift = shl i32 %b3.ext, 16 166 %b3.shift = shl i32 %b3.ext, 16
167 %b123 = or i32 %b12, %b3.shift 167 %b123 = or i32 %b12, %b3.shift
168 %b4.ext = zext i8 %b4 to i32 168 %b4.ext = zext i8 %b4 to i32
169 %b4.shift = shl i32 %b4.ext, 24 169 %b4.shift = shl i32 %b4.ext, 24
170 %b1234 = or i32 %b123, %b4.shift 170 %b1234 = or i32 %b123, %b4.shift
171 ret i32 %b1234 171 ret i32 %b1234
172 } 172 }
173 ; CHECK-LABEL: could_have_fused_loads 173 ; CHECK-LABEL: could_have_fused_loads
174 ; CHECK: mov {{.*}},BYTE PTR 174 ; CHECK: mov {{.*}},{{(BYTE PTR)?}}
175 ; CHECK: mov {{.*}},BYTE PTR 175 ; CHECK: mov {{.*}},BYTE PTR
176 ; CHECK: mov {{.*}},BYTE PTR 176 ; CHECK: mov {{.*}},BYTE PTR
177 ; CHECK: mfence 177 ; CHECK: mfence
178 ; CHECK: mov {{.*}},BYTE PTR 178 ; CHECK: mov {{.*}},BYTE PTR
179 179
180 180
181 ; Test where an identical load from two branches could have been hoisted 181 ; Test where an identical load from two branches could have been hoisted
182 ; up, and then the code merged, but a fence prevents it. 182 ; up, and then the code merged, but a fence prevents it.
183 define internal i32 @could_have_hoisted_loads(i32 %x) { 183 define internal i32 @could_have_hoisted_loads(i32 %x) {
184 entry: 184 entry:
185 %ptr = bitcast [4 x i8]* @g32_d to i32* 185 %ptr = bitcast [4 x i8]* @g32_d to i32*
186 %cmp = icmp eq i32 %x, 1 186 %cmp = icmp eq i32 %x, 1
187 br i1 %cmp, label %branch1, label %branch2 187 br i1 %cmp, label %branch1, label %branch2
188 branch1: 188 branch1:
189 %y = load i32, i32* %ptr, align 1 189 %y = load i32, i32* %ptr, align 1
190 ret i32 %y 190 ret i32 %y
191 branch2: 191 branch2:
192 call void @llvm.nacl.atomic.fence.all() 192 call void @llvm.nacl.atomic.fence.all()
193 %z = load i32, i32* %ptr, align 1 193 %z = load i32, i32* %ptr, align 1
194 ret i32 %z 194 ret i32 %z
195 } 195 }
196 ; CHECK-LABEL: could_have_hoisted_loads 196 ; CHECK-LABEL: could_have_hoisted_loads
197 ; CHECK: jne {{.*}} 197 ; CHECK: jne {{.*}}
198 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_d 198 ; CHECK: mov {{.*}},{{(DWORD PTR )?}}{{.*}}{{(g32_d)|(.bss)}}
199 ; CHECK: ret 199 ; CHECK: ret
200 ; CHECK: mfence 200 ; CHECK: mfence
201 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_d 201 ; CHECK: mov {{.*}},{{(DWORD PTR )?}}{{.*}}{{(g32_d)|(.bss)}}
202 ; CHECK: ret 202 ; CHECK: ret
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698