Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(425)

Side by Side Diff: tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll

Issue 1169493002: Subzero: Improve/refactor folding loads into the next instruction. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Code review changes Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 ; Test that loads/stores don't move across a nacl.atomic.fence.all. 1 ; Test that loads/stores don't move across a nacl.atomic.fence.all.
2 ; This should apply to both atomic and non-atomic loads/stores 2 ; This should apply to both atomic and non-atomic loads/stores
3 ; (unlike the non-"all" variety of nacl.atomic.fence, which only 3 ; (unlike the non-"all" variety of nacl.atomic.fence, which only
4 ; applies to atomic load/stores). 4 ; applies to atomic load/stores).
5 ; 5 ;
6 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s 6 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s
7 7
8 declare void @llvm.nacl.atomic.fence.all() 8 declare void @llvm.nacl.atomic.fence.all()
9 declare i32 @llvm.nacl.atomic.load.i32(i32*, i32) 9 declare i32 @llvm.nacl.atomic.load.i32(i32*, i32)
10 declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32) 10 declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32)
11 11
12 @g32_a = internal global [4 x i8] zeroinitializer, align 4 12 @g32_a = internal global [4 x i8] zeroinitializer, align 4
13 @g32_b = internal global [4 x i8] zeroinitializer, align 4 13 @g32_b = internal global [4 x i8] zeroinitializer, align 4
14 @g32_c = internal global [4 x i8] zeroinitializer, align 4 14 @g32_c = internal global [4 x i8] zeroinitializer, align 4
15 @g32_d = internal global [4 x i8] zeroinitializer, align 4 15 @g32_d = internal global [4 x i8] zeroinitializer, align 4
16 16
17 define i32 @test_fused_load_add_a() { 17 define i32 @test_fused_load_sub_a() {
18 entry: 18 entry:
19 %p_alloca = alloca i8, i32 4, align 4 19 %p_alloca = alloca i8, i32 4, align 4
20 %p_alloca_bc = bitcast i8* %p_alloca to i32* 20 %p_alloca_bc = bitcast i8* %p_alloca to i32*
21 store i32 999, i32* %p_alloca_bc, align 1 21 store i32 999, i32* %p_alloca_bc, align 1
22 22
23 %p_a = bitcast [4 x i8]* @g32_a to i32* 23 %p_a = bitcast [4 x i8]* @g32_a to i32*
24 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6) 24 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6)
25 %l_a2 = add i32 %l_a, 1 25 %l_a2 = sub i32 1, %l_a
26 call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6) 26 call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6)
27 27
28 %p_b = bitcast [4 x i8]* @g32_b to i32* 28 %p_b = bitcast [4 x i8]* @g32_b to i32*
29 %l_b = load i32, i32* %p_b, align 1 29 %l_b = load i32, i32* %p_b, align 1
30 %l_b2 = add i32 %l_b, 1 30 %l_b2 = sub i32 1, %l_b
31 store i32 %l_b2, i32* %p_b, align 1 31 store i32 %l_b2, i32* %p_b, align 1
32 32
33 %p_c = bitcast [4 x i8]* @g32_c to i32* 33 %p_c = bitcast [4 x i8]* @g32_c to i32*
34 %l_c = load i32, i32* %p_c, align 1 34 %l_c = load i32, i32* %p_c, align 1
35 %l_c2 = add i32 %l_c, 1 35 %l_c2 = sub i32 1, %l_c
36 call void @llvm.nacl.atomic.fence.all() 36 call void @llvm.nacl.atomic.fence.all()
37 store i32 %l_c2, i32* %p_c, align 1 37 store i32 %l_c2, i32* %p_c, align 1
38 38
39 ret i32 %l_c2 39 ret i32 %l_c2
40 } 40 }
41 ; CHECK-LABEL: test_fused_load_add_a 41 ; CHECK-LABEL: test_fused_load_sub_a
42 ; alloca store 42 ; alloca store
43 ; CHECK: mov {{.*}},esp 43 ; CHECK: mov {{.*}},esp
44 ; CHECK: mov DWORD PTR {{.*}},0x3e7 44 ; CHECK: mov DWORD PTR {{.*}},0x3e7
45 ; atomic store (w/ its own mfence) 45 ; atomic store (w/ its own mfence)
46 ; The load + add are optimized into one everywhere. 46 ; The load + sub are optimized into one everywhere.
47 ; CHECK: add {{.*}},DWORD PTR {{.*}}g32_a 47 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a
48 ; CHECK: mov DWORD PTR 48 ; CHECK: mov DWORD PTR
49 ; CHECK: mfence 49 ; CHECK: mfence
50 ; CHECK: add {{.*}},DWORD PTR {{.*}}g32_b 50 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_b
51 ; CHECK: mov DWORD PTR 51 ; CHECK: mov DWORD PTR
52 ; CHECK: add {{.*}},DWORD PTR {{.*}}g32_c 52 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c
53 ; CHECK: mfence 53 ; CHECK: mfence
54 ; CHECK: mov DWORD PTR 54 ; CHECK: mov DWORD PTR
55 55
56 ; Test with the fence moved up a bit. 56 ; Test with the fence moved up a bit.
57 define i32 @test_fused_load_add_b() { 57 define i32 @test_fused_load_sub_b() {
58 entry: 58 entry:
59 %p_alloca = alloca i8, i32 4, align 4 59 %p_alloca = alloca i8, i32 4, align 4
60 %p_alloca_bc = bitcast i8* %p_alloca to i32* 60 %p_alloca_bc = bitcast i8* %p_alloca to i32*
61 store i32 999, i32* %p_alloca_bc, align 1 61 store i32 999, i32* %p_alloca_bc, align 1
62 62
63 %p_a = bitcast [4 x i8]* @g32_a to i32* 63 %p_a = bitcast [4 x i8]* @g32_a to i32*
64 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6) 64 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6)
65 %l_a2 = add i32 %l_a, 1 65 %l_a2 = sub i32 1, %l_a
66 call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6) 66 call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6)
67 67
68 %p_b = bitcast [4 x i8]* @g32_b to i32* 68 %p_b = bitcast [4 x i8]* @g32_b to i32*
69 %l_b = load i32, i32* %p_b, align 1 69 %l_b = load i32, i32* %p_b, align 1
70 %l_b2 = add i32 %l_b, 1 70 %l_b2 = sub i32 1, %l_b
71 store i32 %l_b2, i32* %p_b, align 1 71 store i32 %l_b2, i32* %p_b, align 1
72 72
73 %p_c = bitcast [4 x i8]* @g32_c to i32* 73 %p_c = bitcast [4 x i8]* @g32_c to i32*
74 call void @llvm.nacl.atomic.fence.all() 74 call void @llvm.nacl.atomic.fence.all()
75 %l_c = load i32, i32* %p_c, align 1 75 %l_c = load i32, i32* %p_c, align 1
76 %l_c2 = add i32 %l_c, 1 76 %l_c2 = sub i32 1, %l_c
77 store i32 %l_c2, i32* %p_c, align 1 77 store i32 %l_c2, i32* %p_c, align 1
78 78
79 ret i32 %l_c2 79 ret i32 %l_c2
80 } 80 }
81 ; CHECK-LABEL: test_fused_load_add_b 81 ; CHECK-LABEL: test_fused_load_sub_b
82 ; alloca store 82 ; alloca store
83 ; CHECK: mov {{.*}},esp 83 ; CHECK: mov {{.*}},esp
84 ; CHECK: mov DWORD PTR {{.*}},0x3e7 84 ; CHECK: mov DWORD PTR {{.*}},0x3e7
85 ; atomic store (w/ its own mfence) 85 ; atomic store (w/ its own mfence)
86 ; CHECK: add {{.*}},DWORD PTR {{.*}}g32_a 86 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a
87 ; CHECK: mov DWORD PTR 87 ; CHECK: mov DWORD PTR
88 ; CHECK: mfence 88 ; CHECK: mfence
89 ; CHECK: add {{.*}},DWORD PTR {{.*}}g32_b 89 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_b
90 ; CHECK: mov DWORD PTR 90 ; CHECK: mov DWORD PTR
91 ; CHECK: mfence 91 ; CHECK: mfence
92 ; Load + add can still be optimized into one instruction 92 ; Load + sub can still be optimized into one instruction
93 ; because it is not separated by a fence. 93 ; because it is not separated by a fence.
94 ; CHECK: add {{.*}},DWORD PTR {{.*}}g32_c 94 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c
95 ; CHECK: mov DWORD PTR 95 ; CHECK: mov DWORD PTR
96 96
97 ; Test with the fence splitting a load/add. 97 ; Test with the fence splitting a load/sub.
98 define i32 @test_fused_load_add_c() { 98 define i32 @test_fused_load_sub_c() {
99 entry: 99 entry:
100 %p_alloca = alloca i8, i32 4, align 4 100 %p_alloca = alloca i8, i32 4, align 4
101 %p_alloca_bc = bitcast i8* %p_alloca to i32* 101 %p_alloca_bc = bitcast i8* %p_alloca to i32*
102 store i32 999, i32* %p_alloca_bc, align 1 102 store i32 999, i32* %p_alloca_bc, align 1
103 103
104 %p_a = bitcast [4 x i8]* @g32_a to i32* 104 %p_a = bitcast [4 x i8]* @g32_a to i32*
105 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6) 105 %l_a = call i32 @llvm.nacl.atomic.load.i32(i32* %p_a, i32 6)
106 %l_a2 = add i32 %l_a, 1 106 %l_a2 = sub i32 1, %l_a
107 call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6) 107 call void @llvm.nacl.atomic.store.i32(i32 %l_a2, i32* %p_a, i32 6)
108 108
109 %p_b = bitcast [4 x i8]* @g32_b to i32* 109 %p_b = bitcast [4 x i8]* @g32_b to i32*
110 %l_b = load i32, i32* %p_b, align 1 110 %l_b = load i32, i32* %p_b, align 1
111 call void @llvm.nacl.atomic.fence.all() 111 call void @llvm.nacl.atomic.fence.all()
112 %l_b2 = add i32 %l_b, 1 112 %l_b2 = sub i32 1, %l_b
113 store i32 %l_b2, i32* %p_b, align 1 113 store i32 %l_b2, i32* %p_b, align 1
114 114
115 %p_c = bitcast [4 x i8]* @g32_c to i32* 115 %p_c = bitcast [4 x i8]* @g32_c to i32*
116 %l_c = load i32, i32* %p_c, align 1 116 %l_c = load i32, i32* %p_c, align 1
117 %l_c2 = add i32 %l_c, 1 117 %l_c2 = sub i32 1, %l_c
118 store i32 %l_c2, i32* %p_c, align 1 118 store i32 %l_c2, i32* %p_c, align 1
119 119
120 ret i32 %l_c2 120 ret i32 %l_c2
121 } 121 }
122 ; CHECK-LABEL: test_fused_load_add_c 122 ; CHECK-LABEL: test_fused_load_sub_c
123 ; alloca store 123 ; alloca store
124 ; CHECK: mov {{.*}},esp 124 ; CHECK: mov {{.*}},esp
125 ; CHECK: mov DWORD PTR {{.*}},0x3e7 125 ; CHECK: mov DWORD PTR {{.*}},0x3e7
126 ; atomic store (w/ its own mfence) 126 ; atomic store (w/ its own mfence)
127 ; CHECK: add {{.*}},DWORD PTR {{.*}}g32_a 127 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a
128 ; CHECK: mov DWORD PTR 128 ; CHECK: mov DWORD PTR
129 ; CHECK: mfence 129 ; CHECK: mfence
130 ; This load + add are no longer optimized into one, 130 ; This load + sub are no longer optimized into one,
131 ; though perhaps it should be legal as long as 131 ; though perhaps it should be legal as long as
132 ; the load stays on the same side of the fence. 132 ; the load stays on the same side of the fence.
133 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_b 133 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_b
134 ; CHECK: mfence 134 ; CHECK: mfence
135 ; CHECK: add {{.*}},0x1 135 ; CHECK: mov {{.*}},0x1
136 ; CHECK: sub
136 ; CHECK: mov DWORD PTR 137 ; CHECK: mov DWORD PTR
137 ; CHECK: add {{.*}},DWORD PTR {{.*}}g32_c 138 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c
138 ; CHECK: mov DWORD PTR 139 ; CHECK: mov DWORD PTR
139 140
140 141
141 ; Test where a bunch of i8 loads could have been fused into one 142 ; Test where a bunch of i8 loads could have been fused into one
142 ; i32 load, but a fence blocks that. 143 ; i32 load, but a fence blocks that.
143 define i32 @could_have_fused_loads() { 144 define i32 @could_have_fused_loads() {
144 entry: 145 entry:
145 %ptr1 = bitcast [4 x i8]* @g32_d to i8* 146 %ptr1 = bitcast [4 x i8]* @g32_d to i8*
146 %b1 = load i8, i8* %ptr1, align 1 147 %b1 = load i8, i8* %ptr1, align 1
147 148
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
195 %z = load i32, i32* %ptr, align 1 196 %z = load i32, i32* %ptr, align 1
196 ret i32 %z 197 ret i32 %z
197 } 198 }
198 ; CHECK-LABEL: could_have_hoisted_loads 199 ; CHECK-LABEL: could_have_hoisted_loads
199 ; CHECK: jne {{.*}} 200 ; CHECK: jne {{.*}}
200 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_d 201 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_d
201 ; CHECK: ret 202 ; CHECK: ret
202 ; CHECK: mfence 203 ; CHECK: mfence
203 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_d 204 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_d
204 ; CHECK: ret 205 ; CHECK: ret
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698