OLD | NEW |
1 ; Test that loads/stores don't move across a nacl.atomic.fence.all. | 1 ; Test that loads/stores don't move across a nacl.atomic.fence.all. |
2 ; This should apply to both atomic and non-atomic loads/stores | 2 ; This should apply to both atomic and non-atomic loads/stores |
3 ; (unlike the non-"all" variety of nacl.atomic.fence, which only | 3 ; (unlike the non-"all" variety of nacl.atomic.fence, which only |
4 ; applies to atomic load/stores). | 4 ; applies to atomic load/stores). |
5 ; | 5 ; |
6 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s | 6 ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s |
7 | 7 |
8 declare void @llvm.nacl.atomic.fence.all() | 8 declare void @llvm.nacl.atomic.fence.all() |
9 declare i32 @llvm.nacl.atomic.load.i32(i32*, i32) | 9 declare i32 @llvm.nacl.atomic.load.i32(i32*, i32) |
10 declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32) | 10 declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32) |
(...skipping 22 matching lines...) Expand all Loading... |
33 %p_c = bitcast [4 x i8]* @g32_c to i32* | 33 %p_c = bitcast [4 x i8]* @g32_c to i32* |
34 %l_c = load i32, i32* %p_c, align 1 | 34 %l_c = load i32, i32* %p_c, align 1 |
35 %l_c2 = sub i32 1, %l_c | 35 %l_c2 = sub i32 1, %l_c |
36 call void @llvm.nacl.atomic.fence.all() | 36 call void @llvm.nacl.atomic.fence.all() |
37 store i32 %l_c2, i32* %p_c, align 1 | 37 store i32 %l_c2, i32* %p_c, align 1 |
38 | 38 |
39 ret i32 %l_c2 | 39 ret i32 %l_c2 |
40 } | 40 } |
41 ; CHECK-LABEL: test_fused_load_sub_a | 41 ; CHECK-LABEL: test_fused_load_sub_a |
42 ; alloca store | 42 ; alloca store |
43 ; CHECK: mov {{.*}},esp | |
44 ; CHECK: mov DWORD PTR {{.*}},0x3e7 | 43 ; CHECK: mov DWORD PTR {{.*}},0x3e7 |
45 ; atomic store (w/ its own mfence) | 44 ; atomic store (w/ its own mfence) |
46 ; The load + sub are optimized into one everywhere. | 45 ; The load + sub are optimized into one everywhere. |
47 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a | 46 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a |
48 ; CHECK: mov DWORD PTR | 47 ; CHECK: mov DWORD PTR |
49 ; CHECK: mfence | 48 ; CHECK: mfence |
50 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_b | 49 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_b |
51 ; CHECK: mov DWORD PTR | 50 ; CHECK: mov DWORD PTR |
52 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c | 51 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_c |
53 ; CHECK: mfence | 52 ; CHECK: mfence |
(...skipping 19 matching lines...) Expand all Loading... |
73 %p_c = bitcast [4 x i8]* @g32_c to i32* | 72 %p_c = bitcast [4 x i8]* @g32_c to i32* |
74 call void @llvm.nacl.atomic.fence.all() | 73 call void @llvm.nacl.atomic.fence.all() |
75 %l_c = load i32, i32* %p_c, align 1 | 74 %l_c = load i32, i32* %p_c, align 1 |
76 %l_c2 = sub i32 1, %l_c | 75 %l_c2 = sub i32 1, %l_c |
77 store i32 %l_c2, i32* %p_c, align 1 | 76 store i32 %l_c2, i32* %p_c, align 1 |
78 | 77 |
79 ret i32 %l_c2 | 78 ret i32 %l_c2 |
80 } | 79 } |
81 ; CHECK-LABEL: test_fused_load_sub_b | 80 ; CHECK-LABEL: test_fused_load_sub_b |
82 ; alloca store | 81 ; alloca store |
83 ; CHECK: mov {{.*}},esp | |
84 ; CHECK: mov DWORD PTR {{.*}},0x3e7 | 82 ; CHECK: mov DWORD PTR {{.*}},0x3e7 |
85 ; atomic store (w/ its own mfence) | 83 ; atomic store (w/ its own mfence) |
86 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a | 84 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a |
87 ; CHECK: mov DWORD PTR | 85 ; CHECK: mov DWORD PTR |
88 ; CHECK: mfence | 86 ; CHECK: mfence |
89 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_b | 87 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_b |
90 ; CHECK: mov DWORD PTR | 88 ; CHECK: mov DWORD PTR |
91 ; CHECK: mfence | 89 ; CHECK: mfence |
92 ; Load + sub can still be optimized into one instruction | 90 ; Load + sub can still be optimized into one instruction |
93 ; because it is not separated by a fence. | 91 ; because it is not separated by a fence. |
(...skipping 20 matching lines...) Expand all Loading... |
114 | 112 |
115 %p_c = bitcast [4 x i8]* @g32_c to i32* | 113 %p_c = bitcast [4 x i8]* @g32_c to i32* |
116 %l_c = load i32, i32* %p_c, align 1 | 114 %l_c = load i32, i32* %p_c, align 1 |
117 %l_c2 = sub i32 1, %l_c | 115 %l_c2 = sub i32 1, %l_c |
118 store i32 %l_c2, i32* %p_c, align 1 | 116 store i32 %l_c2, i32* %p_c, align 1 |
119 | 117 |
120 ret i32 %l_c2 | 118 ret i32 %l_c2 |
121 } | 119 } |
122 ; CHECK-LABEL: test_fused_load_sub_c | 120 ; CHECK-LABEL: test_fused_load_sub_c |
123 ; alloca store | 121 ; alloca store |
124 ; CHECK: mov {{.*}},esp | |
125 ; CHECK: mov DWORD PTR {{.*}},0x3e7 | 122 ; CHECK: mov DWORD PTR {{.*}},0x3e7 |
126 ; atomic store (w/ its own mfence) | 123 ; atomic store (w/ its own mfence) |
127 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a | 124 ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a |
128 ; CHECK: mov DWORD PTR | 125 ; CHECK: mov DWORD PTR |
129 ; CHECK: mfence | 126 ; CHECK: mfence |
130 ; This load + sub are no longer optimized into one, | 127 ; This load + sub are no longer optimized into one, |
131 ; though perhaps it should be legal as long as | 128 ; though perhaps it should be legal as long as |
132 ; the load stays on the same side of the fence. | 129 ; the load stays on the same side of the fence. |
133 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_b | 130 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_b |
134 ; CHECK: mfence | 131 ; CHECK: mfence |
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
196 %z = load i32, i32* %ptr, align 1 | 193 %z = load i32, i32* %ptr, align 1 |
197 ret i32 %z | 194 ret i32 %z |
198 } | 195 } |
199 ; CHECK-LABEL: could_have_hoisted_loads | 196 ; CHECK-LABEL: could_have_hoisted_loads |
200 ; CHECK: jne {{.*}} | 197 ; CHECK: jne {{.*}} |
201 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_d | 198 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_d |
202 ; CHECK: ret | 199 ; CHECK: ret |
203 ; CHECK: mfence | 200 ; CHECK: mfence |
204 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_d | 201 ; CHECK: mov {{.*}},DWORD PTR {{.*}}g32_d |
205 ; CHECK: ret | 202 ; CHECK: ret |
OLD | NEW |