OLD | NEW |
1 ; This tests the NaCl intrinsics not related to atomic operations. | 1 ; This tests the NaCl intrinsics not related to atomic operations. |
2 | 2 |
3 ; RUN: %p2i -i %s --args -O2 --verbose none -sandbox \ | 3 ; RUN: %p2i -i %s --assemble --disassemble --args -O2 --verbose none -sandbox \ |
4 ; RUN: | llvm-mc -triple=i686-none-nacl -filetype=obj \ | 4 ; RUN: | FileCheck %s |
5 ; RUN: | llvm-objdump -d --symbolize -x86-asm-syntax=intel - | FileCheck %s | 5 ; RUN: %p2i -i %s --assemble --disassemble --args -Om1 --verbose none -sandbox \ |
6 ; RUN: %p2i -i %s --args -Om1 --verbose none -sandbox \ | 6 ; RUN: | FileCheck %s |
7 ; RUN: | llvm-mc -triple=i686-none-nacl -filetype=obj \ | |
8 ; RUN: | llvm-objdump -d --symbolize -x86-asm-syntax=intel - | FileCheck %s | |
9 | 7 |
10 ; Do another run w/ O2 and a different check-prefix (otherwise O2 and Om1 | 8 ; Do another run w/ O2 and a different check-prefix (otherwise O2 and Om1 |
11 ; share the same "CHECK" prefix). This separate run helps check that | 9 ; share the same "CHECK" prefix). This separate run helps check that |
12 ; some code is optimized out. | 10 ; some code is optimized out. |
13 ; RUN: %p2i -i %s --args -O2 --verbose none -sandbox \ | 11 ; RUN: %p2i -i %s --assemble --disassemble --args -O2 --verbose none -sandbox \ |
14 ; RUN: | llvm-mc -triple=i686-none-nacl -filetype=obj \ | |
15 ; RUN: | llvm-objdump -d --symbolize -x86-asm-syntax=intel - \ | |
16 ; RUN: | FileCheck --check-prefix=CHECKO2REM %s | 12 ; RUN: | FileCheck --check-prefix=CHECKO2REM %s |
17 | 13 |
18 ; Do O2 runs without -sandbox to make sure llvm.nacl.read.tp gets | 14 ; Do O2 runs without -sandbox to make sure llvm.nacl.read.tp gets |
19 ; lowered to __nacl_read_tp instead of gs:[0x0]. | 15 ; lowered to __nacl_read_tp instead of gs:[0x0]. |
20 ; We also know that because it's O2, it'll have the O2REM optimizations. | 16 ; We also know that because it's O2, it'll have the O2REM optimizations. |
21 ; RUN: %p2i -i %s --args -O2 --verbose none \ | 17 ; RUN: %p2i -i %s --assemble --disassemble --args -O2 --verbose none \ |
22 ; RUN: | llvm-mc -triple=i686-none-nacl -filetype=obj \ | |
23 ; RUN: | llvm-objdump -d --symbolize -x86-asm-syntax=intel - \ | |
24 ; RUN: | FileCheck --check-prefix=CHECKO2UNSANDBOXEDREM %s | 18 ; RUN: | FileCheck --check-prefix=CHECKO2UNSANDBOXEDREM %s |
25 | 19 |
26 declare i8* @llvm.nacl.read.tp() | 20 declare i8* @llvm.nacl.read.tp() |
27 declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) | 21 declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) |
28 declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) | 22 declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) |
29 declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1) | 23 declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1) |
30 declare void @llvm.nacl.longjmp(i8*, i32) | 24 declare void @llvm.nacl.longjmp(i8*, i32) |
31 declare i32 @llvm.nacl.setjmp(i8*) | 25 declare i32 @llvm.nacl.setjmp(i8*) |
32 declare float @llvm.sqrt.f32(float) | 26 declare float @llvm.sqrt.f32(float) |
33 declare double @llvm.sqrt.f64(double) | 27 declare double @llvm.sqrt.f64(double) |
(...skipping 10 matching lines...) Expand all Loading... |
44 declare i8* @llvm.stacksave() | 38 declare i8* @llvm.stacksave() |
45 declare void @llvm.stackrestore(i8*) | 39 declare void @llvm.stackrestore(i8*) |
46 | 40 |
47 define i32 @test_nacl_read_tp() { | 41 define i32 @test_nacl_read_tp() { |
48 entry: | 42 entry: |
49 %ptr = call i8* @llvm.nacl.read.tp() | 43 %ptr = call i8* @llvm.nacl.read.tp() |
50 %__1 = ptrtoint i8* %ptr to i32 | 44 %__1 = ptrtoint i8* %ptr to i32 |
51 ret i32 %__1 | 45 ret i32 %__1 |
52 } | 46 } |
53 ; CHECK-LABEL: test_nacl_read_tp | 47 ; CHECK-LABEL: test_nacl_read_tp |
54 ; CHECK: mov e{{.*}}, dword ptr gs:[0] | 48 ; CHECK: mov e{{.*}},DWORD ptr gs:[0] |
55 ; CHECKO2REM-LABEL: test_nacl_read_tp | 49 ; CHECKO2REM-LABEL: test_nacl_read_tp |
56 ; CHECKO2REM: mov e{{.*}}, dword ptr gs:[0] | 50 ; CHECKO2REM: mov e{{.*}}, DWORD PTR gs:[0] |
57 ; CHECKO2UNSANDBOXEDREM-LABEL: test_nacl_read_tp | 51 ; CHECKO2UNSANDBOXEDREM-LABEL: test_nacl_read_tp |
58 ; CHECKO2UNSANDBOXEDREM: call __nacl_read_tp | 52 ; CHECKO2UNSANDBOXEDREM: call __nacl_read_tp |
59 | 53 |
60 define i32 @test_nacl_read_tp_more_addressing() { | 54 define i32 @test_nacl_read_tp_more_addressing() { |
61 entry: | 55 entry: |
62 %ptr = call i8* @llvm.nacl.read.tp() | 56 %ptr = call i8* @llvm.nacl.read.tp() |
63 %__1 = ptrtoint i8* %ptr to i32 | 57 %__1 = ptrtoint i8* %ptr to i32 |
64 %x = add i32 %__1, %__1 | 58 %x = add i32 %__1, %__1 |
65 %__3 = inttoptr i32 %x to i32* | 59 %__3 = inttoptr i32 %x to i32* |
66 %v = load i32* %__3, align 1 | 60 %v = load i32* %__3, align 1 |
67 %v_add = add i32 %v, 1 | 61 %v_add = add i32 %v, 1 |
68 | 62 |
69 %ptr2 = call i8* @llvm.nacl.read.tp() | 63 %ptr2 = call i8* @llvm.nacl.read.tp() |
70 %__6 = ptrtoint i8* %ptr2 to i32 | 64 %__6 = ptrtoint i8* %ptr2 to i32 |
71 %y = add i32 %__6, 4 | 65 %y = add i32 %__6, 4 |
72 %__8 = inttoptr i32 %y to i32* | 66 %__8 = inttoptr i32 %y to i32* |
73 %v_add2 = add i32 %v, 4 | 67 %v_add2 = add i32 %v, 4 |
74 store i32 %v_add2, i32* %__8, align 1 | 68 store i32 %v_add2, i32* %__8, align 1 |
75 ret i32 %v | 69 ret i32 %v |
76 } | 70 } |
77 ; CHECK-LABEL: test_nacl_read_tp_more_addressing | 71 ; CHECK-LABEL: test_nacl_read_tp_more_addressing |
78 ; CHECK: mov e{{.*}}, dword ptr gs:[0] | 72 ; CHECK: mov e{{.*}},DWORD ptr gs:[0] |
79 ; CHECK: mov e{{.*}}, dword ptr gs:[0] | 73 ; CHECK: mov e{{.*}},DWORD ptr gs:[0] |
80 ; CHECKO2REM-LABEL: test_nacl_read_tp_more_addressing | 74 ; CHECKO2REM-LABEL: test_nacl_read_tp_more_addressing |
81 ; CHECKO2REM: mov e{{.*}}, dword ptr gs:[0] | 75 ; CHECKO2REM: mov e{{.*}}, DWORD PTR gs:[0] |
82 ; CHECKO2REM: mov e{{.*}}, dword ptr gs:[0] | 76 ; CHECKO2REM: mov e{{.*}}, DWORD PTR gs:[0] |
83 ; CHECKO2UNSANDBOXEDREM-LABEL: test_nacl_read_tp_more_addressing | 77 ; CHECKO2UNSANDBOXEDREM-LABEL: test_nacl_read_tp_more_addressing |
84 ; CHECKO2UNSANDBOXEDREM: call __nacl_read_tp | 78 ; CHECKO2UNSANDBOXEDREM: call __nacl_read_tp |
85 ; CHECKO2UNSANDBOXEDREM: call __nacl_read_tp | 79 ; CHECKO2UNSANDBOXEDREM: call __nacl_read_tp |
86 | 80 |
87 define i32 @test_nacl_read_tp_dead(i32 %a) { | 81 define i32 @test_nacl_read_tp_dead(i32 %a) { |
88 entry: | 82 entry: |
89 %ptr = call i8* @llvm.nacl.read.tp() | 83 %ptr = call i8* @llvm.nacl.read.tp() |
90 ; Not actually using the result of nacl read tp call. | 84 ; Not actually using the result of nacl read tp call. |
91 ; In O2 mode this should be DCE'ed. | 85 ; In O2 mode this should be DCE'ed. |
92 ret i32 %a | 86 ret i32 %a |
93 } | 87 } |
94 ; Consider nacl.read.tp side-effect free, so it can be eliminated. | 88 ; Consider nacl.read.tp side-effect free, so it can be eliminated. |
95 ; CHECKO2REM-LABEL: test_nacl_read_tp_dead | 89 ; CHECKO2REM-LABEL: test_nacl_read_tp_dead |
96 ; CHECKO2REM-NOT: mov e{{.*}}, dword ptr gs:[0] | 90 ; CHECKO2REM-NOT: mov e{{.*}}, DWORD PTR gs:[0] |
97 ; CHECKO2UNSANDBOXEDREM-LABEL: test_nacl_read_tp_dead | 91 ; CHECKO2UNSANDBOXEDREM-LABEL: test_nacl_read_tp_dead |
98 ; CHECKO2UNSANDBOXEDREM-NOT: call __nacl_read_tp | 92 ; CHECKO2UNSANDBOXEDREM-NOT: call __nacl_read_tp |
99 | 93 |
100 define void @test_memcpy(i32 %iptr_dst, i32 %iptr_src, i32 %len) { | 94 define void @test_memcpy(i32 %iptr_dst, i32 %iptr_src, i32 %len) { |
101 entry: | 95 entry: |
102 %dst = inttoptr i32 %iptr_dst to i8* | 96 %dst = inttoptr i32 %iptr_dst to i8* |
103 %src = inttoptr i32 %iptr_src to i8* | 97 %src = inttoptr i32 %iptr_src to i8* |
104 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, | 98 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, |
105 i32 %len, i32 1, i1 false) | 99 i32 %len, i32 1, i1 false) |
106 ret void | 100 ret void |
107 } | 101 } |
108 ; CHECK-LABEL: test_memcpy | 102 ; CHECK-LABEL: test_memcpy |
109 ; CHECK: call memcpy | 103 ; CHECK: call |
| 104 ; CHECK-NEXT: R_{{.*}} memcpy |
110 ; CHECKO2REM-LABEL: test_memcpy | 105 ; CHECKO2REM-LABEL: test_memcpy |
111 ; CHECKO2UNSANDBOXEDREM-LABEL: test_memcpy | 106 ; CHECKO2UNSANDBOXEDREM-LABEL: test_memcpy |
112 | 107 |
113 ; TODO(jvoung) -- if we want to be clever, we can do this and the memmove, | 108 ; TODO(jvoung) -- if we want to be clever, we can do this and the memmove, |
114 ; memset without a function call. | 109 ; memset without a function call. |
115 define void @test_memcpy_const_len_align(i32 %iptr_dst, i32 %iptr_src) { | 110 define void @test_memcpy_const_len_align(i32 %iptr_dst, i32 %iptr_src) { |
116 entry: | 111 entry: |
117 %dst = inttoptr i32 %iptr_dst to i8* | 112 %dst = inttoptr i32 %iptr_dst to i8* |
118 %src = inttoptr i32 %iptr_src to i8* | 113 %src = inttoptr i32 %iptr_src to i8* |
119 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, | 114 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, |
120 i32 8, i32 1, i1 false) | 115 i32 8, i32 1, i1 false) |
121 ret void | 116 ret void |
122 } | 117 } |
123 ; CHECK-LABEL: test_memcpy_const_len_align | 118 ; CHECK-LABEL: test_memcpy_const_len_align |
124 ; CHECK: call memcpy | 119 ; CHECK: call |
| 120 ; CHECK-NEXT: R_{{.*}} memcpy |
125 | 121 |
126 define void @test_memmove(i32 %iptr_dst, i32 %iptr_src, i32 %len) { | 122 define void @test_memmove(i32 %iptr_dst, i32 %iptr_src, i32 %len) { |
127 entry: | 123 entry: |
128 %dst = inttoptr i32 %iptr_dst to i8* | 124 %dst = inttoptr i32 %iptr_dst to i8* |
129 %src = inttoptr i32 %iptr_src to i8* | 125 %src = inttoptr i32 %iptr_src to i8* |
130 call void @llvm.memmove.p0i8.p0i8.i32(i8* %dst, i8* %src, | 126 call void @llvm.memmove.p0i8.p0i8.i32(i8* %dst, i8* %src, |
131 i32 %len, i32 1, i1 false) | 127 i32 %len, i32 1, i1 false) |
132 ret void | 128 ret void |
133 } | 129 } |
134 ; CHECK-LABEL: test_memmove | 130 ; CHECK-LABEL: test_memmove |
135 ; CHECK: call memmove | 131 ; CHECK: call |
| 132 ; CHECK-NEXT: R_{{.*}} memmove |
136 | 133 |
137 define void @test_memmove_const_len_align(i32 %iptr_dst, i32 %iptr_src) { | 134 define void @test_memmove_const_len_align(i32 %iptr_dst, i32 %iptr_src) { |
138 entry: | 135 entry: |
139 %dst = inttoptr i32 %iptr_dst to i8* | 136 %dst = inttoptr i32 %iptr_dst to i8* |
140 %src = inttoptr i32 %iptr_src to i8* | 137 %src = inttoptr i32 %iptr_src to i8* |
141 call void @llvm.memmove.p0i8.p0i8.i32(i8* %dst, i8* %src, | 138 call void @llvm.memmove.p0i8.p0i8.i32(i8* %dst, i8* %src, |
142 i32 8, i32 1, i1 false) | 139 i32 8, i32 1, i1 false) |
143 ret void | 140 ret void |
144 } | 141 } |
145 ; CHECK-LABEL: test_memmove_const_len_align | 142 ; CHECK-LABEL: test_memmove_const_len_align |
146 ; CHECK: call memmove | 143 ; CHECK: call |
| 144 ; CHECK-NEXT: R_{{.*}} memmove |
147 | 145 |
148 define void @test_memset(i32 %iptr_dst, i32 %wide_val, i32 %len) { | 146 define void @test_memset(i32 %iptr_dst, i32 %wide_val, i32 %len) { |
149 entry: | 147 entry: |
150 %val = trunc i32 %wide_val to i8 | 148 %val = trunc i32 %wide_val to i8 |
151 %dst = inttoptr i32 %iptr_dst to i8* | 149 %dst = inttoptr i32 %iptr_dst to i8* |
152 call void @llvm.memset.p0i8.i32(i8* %dst, i8 %val, | 150 call void @llvm.memset.p0i8.i32(i8* %dst, i8 %val, |
153 i32 %len, i32 1, i1 false) | 151 i32 %len, i32 1, i1 false) |
154 ret void | 152 ret void |
155 } | 153 } |
156 ; CHECK-LABEL: test_memset | 154 ; CHECK-LABEL: test_memset |
157 ; CHECK: movzx | 155 ; CHECK: movzx |
158 ; CHECK: call memset | 156 ; CHECK: call |
| 157 ; CHECK-NEXT: R_{{.*}} memset |
159 | 158 |
160 define void @test_memset_const_len_align(i32 %iptr_dst, i32 %wide_val) { | 159 define void @test_memset_const_len_align(i32 %iptr_dst, i32 %wide_val) { |
161 entry: | 160 entry: |
162 %val = trunc i32 %wide_val to i8 | 161 %val = trunc i32 %wide_val to i8 |
163 %dst = inttoptr i32 %iptr_dst to i8* | 162 %dst = inttoptr i32 %iptr_dst to i8* |
164 call void @llvm.memset.p0i8.i32(i8* %dst, i8 %val, | 163 call void @llvm.memset.p0i8.i32(i8* %dst, i8 %val, |
165 i32 8, i32 1, i1 false) | 164 i32 8, i32 1, i1 false) |
166 ret void | 165 ret void |
167 } | 166 } |
168 ; CHECK-LABEL: test_memset_const_len_align | 167 ; CHECK-LABEL: test_memset_const_len_align |
169 ; CHECK: movzx | 168 ; CHECK: movzx |
170 ; CHECK: call memset | 169 ; CHECK: call |
| 170 ; CHECK-NEXT: R_{{.*}} memset |
171 | 171 |
172 define void @test_memset_const_val(i32 %iptr_dst, i32 %len) { | 172 define void @test_memset_const_val(i32 %iptr_dst, i32 %len) { |
173 entry: | 173 entry: |
174 %dst = inttoptr i32 %iptr_dst to i8* | 174 %dst = inttoptr i32 %iptr_dst to i8* |
175 call void @llvm.memset.p0i8.i32(i8* %dst, i8 0, i32 %len, i32 1, i1 false) | 175 call void @llvm.memset.p0i8.i32(i8* %dst, i8 0, i32 %len, i32 1, i1 false) |
176 ret void | 176 ret void |
177 } | 177 } |
178 ; CHECK-LABEL: test_memset_const_val | 178 ; CHECK-LABEL: test_memset_const_val |
179 ; Make sure the argument is legalized (can't movzx reg, 0). | 179 ; Make sure the argument is legalized (can't movzx reg, 0). |
180 ; CHECK: movzx {{.*}}, {{[^0]}} | 180 ; CHECK: movzx {{.*}},{{[^0]}} |
181 ; CHECK: call memset | 181 ; CHECK: call |
| 182 ; CHECK-NEXT: R_{{.*}} memset |
182 | 183 |
183 | 184 |
184 define i32 @test_setjmplongjmp(i32 %iptr_env) { | 185 define i32 @test_setjmplongjmp(i32 %iptr_env) { |
185 entry: | 186 entry: |
186 %env = inttoptr i32 %iptr_env to i8* | 187 %env = inttoptr i32 %iptr_env to i8* |
187 %i = call i32 @llvm.nacl.setjmp(i8* %env) | 188 %i = call i32 @llvm.nacl.setjmp(i8* %env) |
188 %r1 = icmp eq i32 %i, 0 | 189 %r1 = icmp eq i32 %i, 0 |
189 br i1 %r1, label %Zero, label %NonZero | 190 br i1 %r1, label %Zero, label %NonZero |
190 Zero: | 191 Zero: |
191 ; Redundant inttoptr, to make --pnacl cast-eliding/re-insertion happy. | 192 ; Redundant inttoptr, to make --pnacl cast-eliding/re-insertion happy. |
192 %env2 = inttoptr i32 %iptr_env to i8* | 193 %env2 = inttoptr i32 %iptr_env to i8* |
193 call void @llvm.nacl.longjmp(i8* %env2, i32 1) | 194 call void @llvm.nacl.longjmp(i8* %env2, i32 1) |
194 ret i32 0 | 195 ret i32 0 |
195 NonZero: | 196 NonZero: |
196 ret i32 1 | 197 ret i32 1 |
197 } | 198 } |
198 ; CHECK-LABEL: test_setjmplongjmp | 199 ; CHECK-LABEL: test_setjmplongjmp |
199 ; CHECK: call setjmp | 200 ; CHECK: call |
200 ; CHECK: call longjmp | 201 ; CHECK-NEXT: R_{{.*}} setjmp |
| 202 ; CHECK: call |
| 203 ; CHECK-NEXT: R_{{.*}} longjmp |
201 ; CHECKO2REM-LABEL: test_setjmplongjmp | 204 ; CHECKO2REM-LABEL: test_setjmplongjmp |
202 ; CHECKO2REM: call setjmp | 205 ; CHECKO2REM: call setjmp |
203 ; CHECKO2REM: call longjmp | 206 ; CHECKO2REM: call longjmp |
204 | 207 |
205 define i32 @test_setjmp_unused(i32 %iptr_env, i32 %i_other) { | 208 define i32 @test_setjmp_unused(i32 %iptr_env, i32 %i_other) { |
206 entry: | 209 entry: |
207 %env = inttoptr i32 %iptr_env to i8* | 210 %env = inttoptr i32 %iptr_env to i8* |
208 %i = call i32 @llvm.nacl.setjmp(i8* %env) | 211 %i = call i32 @llvm.nacl.setjmp(i8* %env) |
209 ret i32 %i_other | 212 ret i32 %i_other |
210 } | 213 } |
211 ; Don't consider setjmp side-effect free, so it's not eliminated if | 214 ; Don't consider setjmp side-effect free, so it's not eliminated if |
212 ; result unused. | 215 ; result unused. |
213 ; CHECKO2REM-LABEL: test_setjmp_unused | 216 ; CHECKO2REM-LABEL: test_setjmp_unused |
214 ; CHECKO2REM: call setjmp | 217 ; CHECKO2REM: call setjmp |
215 | 218 |
216 define float @test_sqrt_float(float %x, i32 %iptr) { | 219 define float @test_sqrt_float(float %x, i32 %iptr) { |
217 entry: | 220 entry: |
218 %r = call float @llvm.sqrt.f32(float %x) | 221 %r = call float @llvm.sqrt.f32(float %x) |
219 %r2 = call float @llvm.sqrt.f32(float %r) | 222 %r2 = call float @llvm.sqrt.f32(float %r) |
220 %r3 = call float @llvm.sqrt.f32(float -0.0) | 223 %r3 = call float @llvm.sqrt.f32(float -0.0) |
221 %r4 = fadd float %r2, %r3 | 224 %r4 = fadd float %r2, %r3 |
222 ret float %r4 | 225 ret float %r4 |
223 } | 226 } |
224 ; CHECK-LABEL: test_sqrt_float | 227 ; CHECK-LABEL: test_sqrt_float |
225 ; CHECK: sqrtss xmm{{.*}} | 228 ; CHECK: sqrtss xmm{{.*}} |
226 ; CHECK: sqrtss xmm{{.*}} | 229 ; CHECK: sqrtss xmm{{.*}} |
227 ; CHECK: sqrtss xmm{{.*}}, dword ptr | 230 ; CHECK: sqrtss xmm{{.*}}, DWORD PTR |
228 | 231 |
229 define float @test_sqrt_float_mergeable_load(float %x, i32 %iptr) { | 232 define float @test_sqrt_float_mergeable_load(float %x, i32 %iptr) { |
230 entry: | 233 entry: |
231 %__2 = inttoptr i32 %iptr to float* | 234 %__2 = inttoptr i32 %iptr to float* |
232 %y = load float* %__2, align 4 | 235 %y = load float* %__2, align 4 |
233 %r5 = call float @llvm.sqrt.f32(float %y) | 236 %r5 = call float @llvm.sqrt.f32(float %y) |
234 %r6 = fadd float %x, %r5 | 237 %r6 = fadd float %x, %r5 |
235 ret float %r6 | 238 ret float %r6 |
236 } | 239 } |
237 ; CHECK-LABEL: test_sqrt_float_mergeable_load | 240 ; CHECK-LABEL: test_sqrt_float_mergeable_load |
238 ; We could fold the load and the sqrt into one operation, but the | 241 ; We could fold the load and the sqrt into one operation, but the |
239 ; current folding only handles load + arithmetic op. The sqrt inst | 242 ; current folding only handles load + arithmetic op. The sqrt inst |
240 ; is considered an intrinsic call and not an arithmetic op. | 243 ; is considered an intrinsic call and not an arithmetic op. |
241 ; CHECK: sqrtss xmm{{.*}} | 244 ; CHECK: sqrtss xmm{{.*}} |
242 | 245 |
243 define double @test_sqrt_double(double %x, i32 %iptr) { | 246 define double @test_sqrt_double(double %x, i32 %iptr) { |
244 entry: | 247 entry: |
245 %r = call double @llvm.sqrt.f64(double %x) | 248 %r = call double @llvm.sqrt.f64(double %x) |
246 %r2 = call double @llvm.sqrt.f64(double %r) | 249 %r2 = call double @llvm.sqrt.f64(double %r) |
247 %r3 = call double @llvm.sqrt.f64(double -0.0) | 250 %r3 = call double @llvm.sqrt.f64(double -0.0) |
248 %r4 = fadd double %r2, %r3 | 251 %r4 = fadd double %r2, %r3 |
249 ret double %r4 | 252 ret double %r4 |
250 } | 253 } |
251 ; CHECK-LABEL: test_sqrt_double | 254 ; CHECK-LABEL: test_sqrt_double |
252 ; CHECK: sqrtsd xmm{{.*}} | 255 ; CHECK: sqrtsd xmm{{.*}} |
253 ; CHECK: sqrtsd xmm{{.*}} | 256 ; CHECK: sqrtsd xmm{{.*}} |
254 ; CHECK: sqrtsd xmm{{.*}}, qword ptr | 257 ; CHECK: sqrtsd xmm{{.*}}, QWORD PTR |
255 | 258 |
256 define double @test_sqrt_double_mergeable_load(double %x, i32 %iptr) { | 259 define double @test_sqrt_double_mergeable_load(double %x, i32 %iptr) { |
257 entry: | 260 entry: |
258 %__2 = inttoptr i32 %iptr to double* | 261 %__2 = inttoptr i32 %iptr to double* |
259 %y = load double* %__2, align 8 | 262 %y = load double* %__2, align 8 |
260 %r5 = call double @llvm.sqrt.f64(double %y) | 263 %r5 = call double @llvm.sqrt.f64(double %y) |
261 %r6 = fadd double %x, %r5 | 264 %r6 = fadd double %x, %r5 |
262 ret double %r6 | 265 ret double %r6 |
263 } | 266 } |
264 ; CHECK-LABEL: test_sqrt_double_mergeable_load | 267 ; CHECK-LABEL: test_sqrt_double_mergeable_load |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
319 define i32 @test_ctlz_32(i32 %x) { | 322 define i32 @test_ctlz_32(i32 %x) { |
320 entry: | 323 entry: |
321 %r = call i32 @llvm.ctlz.i32(i32 %x, i1 false) | 324 %r = call i32 @llvm.ctlz.i32(i32 %x, i1 false) |
322 ret i32 %r | 325 ret i32 %r |
323 } | 326 } |
324 ; CHECK-LABEL: test_ctlz_32 | 327 ; CHECK-LABEL: test_ctlz_32 |
325 ; TODO(jvoung): If we detect that LZCNT is supported, then use that | 328 ; TODO(jvoung): If we detect that LZCNT is supported, then use that |
326 ; and avoid the need to do the cmovne and xor stuff to guarantee that | 329 ; and avoid the need to do the cmovne and xor stuff to guarantee that |
327 ; the result is well-defined w/ input == 0. | 330 ; the result is well-defined w/ input == 0. |
328 ; CHECK: bsr [[REG_TMP:e.*]], {{.*}} | 331 ; CHECK: bsr [[REG_TMP:e.*]], {{.*}} |
329 ; CHECK: mov [[REG_RES:e.*]], 63 | 332 ; CHECK: mov [[REG_RES:e.*]],63 |
330 ; CHECK: cmovne [[REG_RES]], [[REG_TMP]] | 333 ; CHECK: cmovne [[REG_RES]],[[REG_TMP]] |
331 ; CHECK: xor [[REG_RES]], 31 | 334 ; CHECK: xor [[REG_RES]], 31 |
332 | 335 |
333 define i32 @test_ctlz_32_const() { | 336 define i32 @test_ctlz_32_const() { |
334 entry: | 337 entry: |
335 %r = call i32 @llvm.ctlz.i32(i32 123456, i1 false) | 338 %r = call i32 @llvm.ctlz.i32(i32 123456, i1 false) |
336 ret i32 %r | 339 ret i32 %r |
337 } | 340 } |
338 ; Could potentially constant fold this, but the front-end should have done that. | 341 ; Could potentially constant fold this, but the front-end should have done that. |
339 ; The dest operand must be a register and the source operand must be a register | 342 ; The dest operand must be a register and the source operand must be a register |
340 ; or memory. | 343 ; or memory. |
341 ; CHECK-LABEL: test_ctlz_32_const | 344 ; CHECK-LABEL: test_ctlz_32_const |
342 ; CHECK: bsr e{{.*}}, {{.*}}e{{.*}} | 345 ; CHECK: bsr e{{.*}}, {{.*}}e{{.*}} |
343 | 346 |
344 define i32 @test_ctlz_32_ignored(i32 %x) { | 347 define i32 @test_ctlz_32_ignored(i32 %x) { |
345 entry: | 348 entry: |
346 %ignored = call i32 @llvm.ctlz.i32(i32 %x, i1 false) | 349 %ignored = call i32 @llvm.ctlz.i32(i32 %x, i1 false) |
347 ret i32 1 | 350 ret i32 1 |
348 } | 351 } |
349 ; CHECKO2REM-LABEL: test_ctlz_32_ignored | 352 ; CHECKO2REM-LABEL: test_ctlz_32_ignored |
350 ; CHECKO2REM-NOT: bsr | 353 ; CHECKO2REM-NOT: bsr |
351 | 354 |
352 define i64 @test_ctlz_64(i64 %x) { | 355 define i64 @test_ctlz_64(i64 %x) { |
353 entry: | 356 entry: |
354 %r = call i64 @llvm.ctlz.i64(i64 %x, i1 false) | 357 %r = call i64 @llvm.ctlz.i64(i64 %x, i1 false) |
355 ret i64 %r | 358 ret i64 %r |
356 } | 359 } |
357 ; CHECKO2REM-LABEL: test_ctlz_64 | 360 ; CHECKO2REM-LABEL: test_ctlz_64 |
358 ; CHECK-LABEL: test_ctlz_64 | 361 ; CHECK-LABEL: test_ctlz_64 |
359 ; CHECK: bsr [[REG_TMP1:e.*]], {{.*}} | 362 ; CHECK: bsr [[REG_TMP1:e.*]], {{.*}} |
360 ; CHECK: mov [[REG_RES1:e.*]], 63 | 363 ; CHECK: mov [[REG_RES1:e.*]],63 |
361 ; CHECK: cmovne [[REG_RES1]], [[REG_TMP1]] | 364 ; CHECK: cmovne [[REG_RES1]],[[REG_TMP1]] |
362 ; CHECK: xor [[REG_RES1]], 31 | 365 ; CHECK: xor [[REG_RES1]], 31 |
363 ; CHECK: add [[REG_RES1]], 32 | 366 ; CHECK: add [[REG_RES1]], 32 |
364 ; CHECK: bsr [[REG_RES2:e.*]], {{.*}} | 367 ; CHECK: bsr [[REG_RES2:e.*]], {{.*}} |
365 ; CHECK: xor [[REG_RES2]], 31 | 368 ; CHECK: xor [[REG_RES2]], 31 |
366 ; CHECK: test [[REG_UPPER:.*]], [[REG_UPPER]] | 369 ; CHECK: test [[REG_UPPER:.*]], [[REG_UPPER]] |
367 ; CHECK: cmove [[REG_RES2]], [[REG_RES1]] | 370 ; CHECK: cmove [[REG_RES2]],[[REG_RES1]] |
368 ; CHECK: mov {{.*}}, 0 | 371 ; CHECK: mov {{.*}},0 |
369 | 372 |
370 define i32 @test_ctlz_64_const(i64 %x) { | 373 define i32 @test_ctlz_64_const(i64 %x) { |
371 entry: | 374 entry: |
372 %r = call i64 @llvm.ctlz.i64(i64 123456789012, i1 false) | 375 %r = call i64 @llvm.ctlz.i64(i64 123456789012, i1 false) |
373 %r2 = trunc i64 %r to i32 | 376 %r2 = trunc i64 %r to i32 |
374 ret i32 %r2 | 377 ret i32 %r2 |
375 } | 378 } |
376 ; CHECK-LABEL: test_ctlz_64_const | 379 ; CHECK-LABEL: test_ctlz_64_const |
377 ; CHECK: bsr e{{.*}}, {{.*}}e{{.*}} | 380 ; CHECK: bsr e{{.*}}, {{.*}}e{{.*}} |
378 ; CHECK: bsr e{{.*}}, {{.*}}e{{.*}} | 381 ; CHECK: bsr e{{.*}}, {{.*}}e{{.*}} |
379 | 382 |
380 | 383 |
381 define i32 @test_ctlz_64_ignored(i64 %x) { | 384 define i32 @test_ctlz_64_ignored(i64 %x) { |
382 entry: | 385 entry: |
383 %ignored = call i64 @llvm.ctlz.i64(i64 1234567890, i1 false) | 386 %ignored = call i64 @llvm.ctlz.i64(i64 1234567890, i1 false) |
384 ret i32 2 | 387 ret i32 2 |
385 } | 388 } |
386 ; CHECKO2REM-LABEL: test_ctlz_64_ignored | 389 ; CHECKO2REM-LABEL: test_ctlz_64_ignored |
387 ; CHECKO2REM-NOT: bsr | 390 ; CHECKO2REM-NOT: bsr |
388 | 391 |
389 define i32 @test_cttz_32(i32 %x) { | 392 define i32 @test_cttz_32(i32 %x) { |
390 entry: | 393 entry: |
391 %r = call i32 @llvm.cttz.i32(i32 %x, i1 false) | 394 %r = call i32 @llvm.cttz.i32(i32 %x, i1 false) |
392 ret i32 %r | 395 ret i32 %r |
393 } | 396 } |
394 ; CHECK-LABEL: test_cttz_32 | 397 ; CHECK-LABEL: test_cttz_32 |
395 ; CHECK: bsf [[REG_IF_NOTZERO:e.*]], {{.*}} | 398 ; CHECK: bsf [[REG_IF_NOTZERO:e.*]], {{.*}} |
396 ; CHECK: mov [[REG_IF_ZERO:e.*]], 32 | 399 ; CHECK: mov [[REG_IF_ZERO:e.*]],32 |
397 ; CHECK: cmovne [[REG_IF_ZERO]], [[REG_IF_NOTZERO]] | 400 ; CHECK: cmovne [[REG_IF_ZERO]],[[REG_IF_NOTZERO]] |
398 | 401 |
399 define i64 @test_cttz_64(i64 %x) { | 402 define i64 @test_cttz_64(i64 %x) { |
400 entry: | 403 entry: |
401 %r = call i64 @llvm.cttz.i64(i64 %x, i1 false) | 404 %r = call i64 @llvm.cttz.i64(i64 %x, i1 false) |
402 ret i64 %r | 405 ret i64 %r |
403 } | 406 } |
404 ; CHECK-LABEL: test_cttz_64 | 407 ; CHECK-LABEL: test_cttz_64 |
405 ; CHECK: bsf [[REG_IF_NOTZERO:e.*]], {{.*}} | 408 ; CHECK: bsf [[REG_IF_NOTZERO:e.*]], {{.*}} |
406 ; CHECK: mov [[REG_RES1:e.*]], 32 | 409 ; CHECK: mov [[REG_RES1:e.*]],32 |
407 ; CHECK: cmovne [[REG_RES1]], [[REG_IF_NOTZERO]] | 410 ; CHECK: cmovne [[REG_RES1]],[[REG_IF_NOTZERO]] |
408 ; CHECK: add [[REG_RES1]], 32 | 411 ; CHECK: add [[REG_RES1]], 32 |
409 ; CHECK: bsf [[REG_RES2:e.*]], [[REG_LOWER:.*]] | 412 ; CHECK: bsf [[REG_RES2:e.*]], [[REG_LOWER:.*]] |
410 ; CHECK: test [[REG_LOWER]], [[REG_LOWER]] | 413 ; CHECK: test [[REG_LOWER]], [[REG_LOWER]] |
411 ; CHECK: cmove [[REG_RES2]], [[REG_RES1]] | 414 ; CHECK: cmove [[REG_RES2]],[[REG_RES1]] |
412 ; CHECK: mov {{.*}}, 0 | 415 ; CHECK: mov {{.*}},0 |
413 | 416 |
414 define i32 @test_popcount_32(i32 %x) { | 417 define i32 @test_popcount_32(i32 %x) { |
415 entry: | 418 entry: |
416 %r = call i32 @llvm.ctpop.i32(i32 %x) | 419 %r = call i32 @llvm.ctpop.i32(i32 %x) |
417 ret i32 %r | 420 ret i32 %r |
418 } | 421 } |
419 ; CHECK-LABEL: test_popcount_32 | 422 ; CHECK-LABEL: test_popcount_32 |
420 ; CHECK: call __popcountsi2 | 423 ; CHECK: call |
| 424 ; CHECK-NEXT: R_{{.*}} __popcountsi2 |
421 | 425 |
422 define i64 @test_popcount_64(i64 %x) { | 426 define i64 @test_popcount_64(i64 %x) { |
423 entry: | 427 entry: |
424 %r = call i64 @llvm.ctpop.i64(i64 %x) | 428 %r = call i64 @llvm.ctpop.i64(i64 %x) |
425 ret i64 %r | 429 ret i64 %r |
426 } | 430 } |
427 ; CHECK-LABEL: test_popcount_64 | 431 ; CHECK-LABEL: test_popcount_64 |
428 ; CHECK: call __popcountdi2 | 432 ; CHECK: call |
| 433 ; CHECK-NEXT: R_{{.*}} __popcountdi2 |
429 ; __popcountdi2 only returns a 32-bit result, so clear the upper bits of | 434 ; __popcountdi2 only returns a 32-bit result, so clear the upper bits of |
430 ; the return value just in case. | 435 ; the return value just in case. |
431 ; CHECK: mov {{.*}}, 0 | 436 ; CHECK: mov {{.*}},0 |
432 | 437 |
433 | 438 |
434 define i32 @test_popcount_64_ret_i32(i64 %x) { | 439 define i32 @test_popcount_64_ret_i32(i64 %x) { |
435 entry: | 440 entry: |
436 %r_i64 = call i64 @llvm.ctpop.i64(i64 %x) | 441 %r_i64 = call i64 @llvm.ctpop.i64(i64 %x) |
437 %r = trunc i64 %r_i64 to i32 | 442 %r = trunc i64 %r_i64 to i32 |
438 ret i32 %r | 443 ret i32 %r |
439 } | 444 } |
440 ; If there is a trunc, then the mov {{.*}}, 0 is dead and gets optimized out. | 445 ; If there is a trunc, then the mov {{.*}}, 0 is dead and gets optimized out. |
441 ; CHECKO2REM-LABEL: test_popcount_64_ret_i32 | 446 ; CHECKO2REM-LABEL: test_popcount_64_ret_i32 |
442 ; CHECKO2REM: call __popcountdi2 | 447 ; CHECKO2REM: call __popcountdi2 |
443 ; CHECKO2REM-NOT: mov {{.*}}, 0 | 448 ; CHECKO2REM-NOT: mov {{.*}}, 0 |
444 | 449 |
445 define void @test_stacksave_noalloca() { | 450 define void @test_stacksave_noalloca() { |
446 entry: | 451 entry: |
447 %sp = call i8* @llvm.stacksave() | 452 %sp = call i8* @llvm.stacksave() |
448 call void @llvm.stackrestore(i8* %sp) | 453 call void @llvm.stackrestore(i8* %sp) |
449 ret void | 454 ret void |
450 } | 455 } |
451 ; CHECK-LABEL: test_stacksave_noalloca | 456 ; CHECK-LABEL: test_stacksave_noalloca |
452 ; CHECK: mov {{.*}}, esp | 457 ; CHECK: mov {{.*}},esp |
453 ; CHECK: mov esp, {{.*}} | 458 ; CHECK: mov esp,{{.*}} |
454 | 459 |
455 declare i32 @foo(i32 %x) | 460 declare i32 @foo(i32 %x) |
456 | 461 |
457 define void @test_stacksave_multiple(i32 %x) { | 462 define void @test_stacksave_multiple(i32 %x) { |
458 entry: | 463 entry: |
459 %x_4 = mul i32 %x, 4 | 464 %x_4 = mul i32 %x, 4 |
460 %sp1 = call i8* @llvm.stacksave() | 465 %sp1 = call i8* @llvm.stacksave() |
461 %tmp1 = alloca i8, i32 %x_4, align 4 | 466 %tmp1 = alloca i8, i32 %x_4, align 4 |
462 | 467 |
463 %sp2 = call i8* @llvm.stacksave() | 468 %sp2 = call i8* @llvm.stacksave() |
(...skipping 11 matching lines...) Expand all Loading... |
475 store i32 %x, i32* %__10, align 1 | 480 store i32 %x, i32* %__10, align 1 |
476 | 481 |
477 %__11 = bitcast i8* %tmp3 to i32* | 482 %__11 = bitcast i8* %tmp3 to i32* |
478 store i32 %x, i32* %__11, align 1 | 483 store i32 %x, i32* %__11, align 1 |
479 | 484 |
480 call void @llvm.stackrestore(i8* %sp1) | 485 call void @llvm.stackrestore(i8* %sp1) |
481 ret void | 486 ret void |
482 } | 487 } |
483 ; CHECK-LABEL: test_stacksave_multiple | 488 ; CHECK-LABEL: test_stacksave_multiple |
484 ; At least 3 copies of esp, but probably more from having to do the allocas. | 489 ; At least 3 copies of esp, but probably more from having to do the allocas. |
485 ; CHECK: mov {{.*}}, esp | 490 ; CHECK: mov {{.*}},esp |
486 ; CHECK: mov {{.*}}, esp | 491 ; CHECK: mov {{.*}},esp |
487 ; CHECK: mov {{.*}}, esp | 492 ; CHECK: mov {{.*}},esp |
488 ; CHECK: mov esp, {{.*}} | 493 ; CHECK: mov esp,{{.*}} |
OLD | NEW |