OLD | NEW |
1 ; This tests the NaCl intrinsics not related to atomic operations. | 1 ; This tests the NaCl intrinsics not related to atomic operations. |
2 | 2 |
3 ; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \ | 3 ; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \ |
4 ; RUN: --target x8632 -i %s --args -O2 -sandbox \ | 4 ; RUN: --target x8632 -i %s --args -O2 -sandbox \ |
| 5 ; RUN: -allow-externally-defined-symbols \ |
5 ; RUN: | %if --need=target_X8632 --command FileCheck %s | 6 ; RUN: | %if --need=target_X8632 --command FileCheck %s |
6 ; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \ | 7 ; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \ |
7 ; RUN: --target x8632 -i %s --args -Om1 -sandbox \ | 8 ; RUN: --target x8632 -i %s --args -Om1 -sandbox \ |
| 9 ; RUN: -allow-externally-defined-symbols \ |
8 ; RUN: | %if --need=target_X8632 --command FileCheck %s | 10 ; RUN: | %if --need=target_X8632 --command FileCheck %s |
9 | 11 |
10 ; Do another run w/ O2 and a different check-prefix (otherwise O2 and Om1 | 12 ; Do another run w/ O2 and a different check-prefix (otherwise O2 and Om1 |
11 ; share the same "CHECK" prefix). This separate run helps check that | 13 ; share the same "CHECK" prefix). This separate run helps check that |
12 ; some code is optimized out. | 14 ; some code is optimized out. |
13 ; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \ | 15 ; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \ |
14 ; RUN: --target x8632 -i %s --args -O2 -sandbox \ | 16 ; RUN: --target x8632 -i %s --args -O2 -sandbox \ |
| 17 ; RUN: -allow-externally-defined-symbols \ |
15 ; RUN: | %if --need=target_X8632 \ | 18 ; RUN: | %if --need=target_X8632 \ |
16 ; RUN: --command FileCheck --check-prefix=CHECKO2REM %s | 19 ; RUN: --command FileCheck --check-prefix=CHECKO2REM %s |
17 | 20 |
18 ; Do O2 runs without -sandbox to make sure llvm.nacl.read.tp gets | 21 ; Do O2 runs without -sandbox to make sure llvm.nacl.read.tp gets |
19 ; lowered to __nacl_read_tp instead of gs:0x0. | 22 ; lowered to __nacl_read_tp instead of gs:0x0. |
20 ; We also know that because it's O2, it'll have the O2REM optimizations. | 23 ; We also know that because it's O2, it'll have the O2REM optimizations. |
21 ; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \ | 24 ; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \ |
22 ; RUN: --target x8632 -i %s --args -O2 \ | 25 ; RUN: --target x8632 -i %s --args -O2 \ |
| 26 ; RUN: -allow-externally-defined-symbols \ |
23 ; RUN: | %if --need=target_X8632 \ | 27 ; RUN: | %if --need=target_X8632 \ |
24 ; RUN: --command FileCheck --check-prefix=CHECKO2UNSANDBOXEDREM %s | 28 ; RUN: --command FileCheck --check-prefix=CHECKO2UNSANDBOXEDREM %s |
25 | 29 |
26 ; RUN: %if --need=target_ARM32 --need=allow_dump \ | 30 ; RUN: %if --need=target_ARM32 --need=allow_dump \ |
27 ; RUN: --command %p2i --filetype=asm --assemble --disassemble --target arm32 \ | 31 ; RUN: --command %p2i --filetype=asm --assemble --disassemble --target arm32 \ |
28 ; RUN: -i %s --args -O2 --skip-unimplemented \ | 32 ; RUN: -i %s --args -O2 --skip-unimplemented \ |
| 33 ; RUN: -allow-externally-defined-symbols \ |
29 ; RUN: | %if --need=target_ARM32 --need=allow_dump \ | 34 ; RUN: | %if --need=target_ARM32 --need=allow_dump \ |
30 ; RUN: --command FileCheck --check-prefix ARM32 %s | 35 ; RUN: --command FileCheck --check-prefix ARM32 %s |
31 | 36 |
32 declare i8* @llvm.nacl.read.tp() | 37 declare i8* @llvm.nacl.read.tp() |
33 declare void @llvm.nacl.longjmp(i8*, i32) | 38 declare void @llvm.nacl.longjmp(i8*, i32) |
34 declare i32 @llvm.nacl.setjmp(i8*) | 39 declare i32 @llvm.nacl.setjmp(i8*) |
35 declare float @llvm.sqrt.f32(float) | 40 declare float @llvm.sqrt.f32(float) |
36 declare double @llvm.sqrt.f64(double) | 41 declare double @llvm.sqrt.f64(double) |
37 declare float @llvm.fabs.f32(float) | 42 declare float @llvm.fabs.f32(float) |
38 declare double @llvm.fabs.f64(double) | 43 declare double @llvm.fabs.f64(double) |
39 declare <4 x float> @llvm.fabs.v4f32(<4 x float>) | 44 declare <4 x float> @llvm.fabs.v4f32(<4 x float>) |
40 declare void @llvm.trap() | 45 declare void @llvm.trap() |
41 declare i16 @llvm.bswap.i16(i16) | 46 declare i16 @llvm.bswap.i16(i16) |
42 declare i32 @llvm.bswap.i32(i32) | 47 declare i32 @llvm.bswap.i32(i32) |
43 declare i64 @llvm.bswap.i64(i64) | 48 declare i64 @llvm.bswap.i64(i64) |
44 declare i32 @llvm.ctlz.i32(i32, i1) | 49 declare i32 @llvm.ctlz.i32(i32, i1) |
45 declare i64 @llvm.ctlz.i64(i64, i1) | 50 declare i64 @llvm.ctlz.i64(i64, i1) |
46 declare i32 @llvm.cttz.i32(i32, i1) | 51 declare i32 @llvm.cttz.i32(i32, i1) |
47 declare i64 @llvm.cttz.i64(i64, i1) | 52 declare i64 @llvm.cttz.i64(i64, i1) |
48 declare i32 @llvm.ctpop.i32(i32) | 53 declare i32 @llvm.ctpop.i32(i32) |
49 declare i64 @llvm.ctpop.i64(i64) | 54 declare i64 @llvm.ctpop.i64(i64) |
50 declare i8* @llvm.stacksave() | 55 declare i8* @llvm.stacksave() |
51 declare void @llvm.stackrestore(i8*) | 56 declare void @llvm.stackrestore(i8*) |
52 | 57 |
53 define i32 @test_nacl_read_tp() { | 58 define internal i32 @test_nacl_read_tp() { |
54 entry: | 59 entry: |
55 %ptr = call i8* @llvm.nacl.read.tp() | 60 %ptr = call i8* @llvm.nacl.read.tp() |
56 %__1 = ptrtoint i8* %ptr to i32 | 61 %__1 = ptrtoint i8* %ptr to i32 |
57 ret i32 %__1 | 62 ret i32 %__1 |
58 } | 63 } |
59 ; CHECK-LABEL: test_nacl_read_tp | 64 ; CHECK-LABEL: test_nacl_read_tp |
60 ; CHECK: mov e{{.*}},DWORD PTR gs:0x0 | 65 ; CHECK: mov e{{.*}},DWORD PTR gs:0x0 |
61 ; CHECKO2REM-LABEL: test_nacl_read_tp | 66 ; CHECKO2REM-LABEL: test_nacl_read_tp |
62 ; CHECKO2REM: mov e{{.*}},DWORD PTR gs:0x0 | 67 ; CHECKO2REM: mov e{{.*}},DWORD PTR gs:0x0 |
63 ; CHECKO2UNSANDBOXEDREM-LABEL: test_nacl_read_tp | 68 ; CHECKO2UNSANDBOXEDREM-LABEL: test_nacl_read_tp |
64 ; CHECKO2UNSANDBOXEDREM: call {{.*}} R_{{.*}} __nacl_read_tp | 69 ; CHECKO2UNSANDBOXEDREM: call {{.*}} R_{{.*}} __nacl_read_tp |
65 | 70 |
66 define i32 @test_nacl_read_tp_more_addressing() { | 71 define internal i32 @test_nacl_read_tp_more_addressing() { |
67 entry: | 72 entry: |
68 %ptr = call i8* @llvm.nacl.read.tp() | 73 %ptr = call i8* @llvm.nacl.read.tp() |
69 %__1 = ptrtoint i8* %ptr to i32 | 74 %__1 = ptrtoint i8* %ptr to i32 |
70 %x = add i32 %__1, %__1 | 75 %x = add i32 %__1, %__1 |
71 %__3 = inttoptr i32 %x to i32* | 76 %__3 = inttoptr i32 %x to i32* |
72 %v = load i32, i32* %__3, align 1 | 77 %v = load i32, i32* %__3, align 1 |
73 %v_add = add i32 %v, 1 | 78 %v_add = add i32 %v, 1 |
74 | 79 |
75 %ptr2 = call i8* @llvm.nacl.read.tp() | 80 %ptr2 = call i8* @llvm.nacl.read.tp() |
76 %__6 = ptrtoint i8* %ptr2 to i32 | 81 %__6 = ptrtoint i8* %ptr2 to i32 |
77 %y = add i32 %__6, 4 | 82 %y = add i32 %__6, 4 |
78 %__8 = inttoptr i32 %y to i32* | 83 %__8 = inttoptr i32 %y to i32* |
79 %v_add2 = add i32 %v, 4 | 84 %v_add2 = add i32 %v, 4 |
80 store i32 %v_add2, i32* %__8, align 1 | 85 store i32 %v_add2, i32* %__8, align 1 |
81 ret i32 %v | 86 ret i32 %v |
82 } | 87 } |
83 ; CHECK-LABEL: test_nacl_read_tp_more_addressing | 88 ; CHECK-LABEL: test_nacl_read_tp_more_addressing |
84 ; CHECK: mov e{{.*}},DWORD PTR gs:0x0 | 89 ; CHECK: mov e{{.*}},DWORD PTR gs:0x0 |
85 ; CHECK: mov e{{.*}},DWORD PTR gs:0x0 | 90 ; CHECK: mov e{{.*}},DWORD PTR gs:0x0 |
86 ; CHECKO2REM-LABEL: test_nacl_read_tp_more_addressing | 91 ; CHECKO2REM-LABEL: test_nacl_read_tp_more_addressing |
87 ; CHECKO2REM: mov e{{.*}},DWORD PTR gs:0x0 | 92 ; CHECKO2REM: mov e{{.*}},DWORD PTR gs:0x0 |
88 ; CHECKO2REM: mov e{{.*}},DWORD PTR gs:0x0 | 93 ; CHECKO2REM: mov e{{.*}},DWORD PTR gs:0x0 |
89 ; CHECKO2UNSANDBOXEDREM-LABEL: test_nacl_read_tp_more_addressing | 94 ; CHECKO2UNSANDBOXEDREM-LABEL: test_nacl_read_tp_more_addressing |
90 ; CHECKO2UNSANDBOXEDREM: call {{.*}} R_{{.*}} __nacl_read_tp | 95 ; CHECKO2UNSANDBOXEDREM: call {{.*}} R_{{.*}} __nacl_read_tp |
91 ; CHECKO2UNSANDBOXEDREM: call {{.*}} R_{{.*}} __nacl_read_tp | 96 ; CHECKO2UNSANDBOXEDREM: call {{.*}} R_{{.*}} __nacl_read_tp |
92 | 97 |
93 define i32 @test_nacl_read_tp_dead(i32 %a) { | 98 define internal i32 @test_nacl_read_tp_dead(i32 %a) { |
94 entry: | 99 entry: |
95 %ptr = call i8* @llvm.nacl.read.tp() | 100 %ptr = call i8* @llvm.nacl.read.tp() |
96 ; Not actually using the result of nacl read tp call. | 101 ; Not actually using the result of nacl read tp call. |
97 ; In O2 mode this should be DCE'ed. | 102 ; In O2 mode this should be DCE'ed. |
98 ret i32 %a | 103 ret i32 %a |
99 } | 104 } |
100 ; Consider nacl.read.tp side-effect free, so it can be eliminated. | 105 ; Consider nacl.read.tp side-effect free, so it can be eliminated. |
101 ; CHECKO2REM-LABEL: test_nacl_read_tp_dead | 106 ; CHECKO2REM-LABEL: test_nacl_read_tp_dead |
102 ; CHECKO2REM-NOT: mov e{{.*}}, DWORD PTR gs:0x0 | 107 ; CHECKO2REM-NOT: mov e{{.*}}, DWORD PTR gs:0x0 |
103 ; CHECKO2UNSANDBOXEDREM-LABEL: test_nacl_read_tp_dead | 108 ; CHECKO2UNSANDBOXEDREM-LABEL: test_nacl_read_tp_dead |
104 ; CHECKO2UNSANDBOXEDREM-NOT: call {{.*}} R_{{.*}} __nacl_read_tp | 109 ; CHECKO2UNSANDBOXEDREM-NOT: call {{.*}} R_{{.*}} __nacl_read_tp |
105 | 110 |
106 define i32 @test_setjmplongjmp(i32 %iptr_env) { | 111 define internal i32 @test_setjmplongjmp(i32 %iptr_env) { |
107 entry: | 112 entry: |
108 %env = inttoptr i32 %iptr_env to i8* | 113 %env = inttoptr i32 %iptr_env to i8* |
109 %i = call i32 @llvm.nacl.setjmp(i8* %env) | 114 %i = call i32 @llvm.nacl.setjmp(i8* %env) |
110 %r1 = icmp eq i32 %i, 0 | 115 %r1 = icmp eq i32 %i, 0 |
111 br i1 %r1, label %Zero, label %NonZero | 116 br i1 %r1, label %Zero, label %NonZero |
112 Zero: | 117 Zero: |
113 ; Redundant inttoptr, to make --pnacl cast-eliding/re-insertion happy. | 118 ; Redundant inttoptr, to make --pnacl cast-eliding/re-insertion happy. |
114 %env2 = inttoptr i32 %iptr_env to i8* | 119 %env2 = inttoptr i32 %iptr_env to i8* |
115 call void @llvm.nacl.longjmp(i8* %env2, i32 1) | 120 call void @llvm.nacl.longjmp(i8* %env2, i32 1) |
116 ret i32 0 | 121 ret i32 0 |
117 NonZero: | 122 NonZero: |
118 ret i32 1 | 123 ret i32 1 |
119 } | 124 } |
120 ; CHECK-LABEL: test_setjmplongjmp | 125 ; CHECK-LABEL: test_setjmplongjmp |
121 ; CHECK: call {{.*}} R_{{.*}} setjmp | 126 ; CHECK: call {{.*}} R_{{.*}} setjmp |
122 ; CHECK: call {{.*}} R_{{.*}} longjmp | 127 ; CHECK: call {{.*}} R_{{.*}} longjmp |
123 ; CHECKO2REM-LABEL: test_setjmplongjmp | 128 ; CHECKO2REM-LABEL: test_setjmplongjmp |
124 ; CHECKO2REM: call {{.*}} R_{{.*}} setjmp | 129 ; CHECKO2REM: call {{.*}} R_{{.*}} setjmp |
125 ; CHECKO2REM: call {{.*}} R_{{.*}} longjmp | 130 ; CHECKO2REM: call {{.*}} R_{{.*}} longjmp |
126 ; ARM32-LABEL: test_setjmplongjmp | 131 ; ARM32-LABEL: test_setjmplongjmp |
127 ; ARM32: bl {{.*}} setjmp | 132 ; ARM32: bl {{.*}} setjmp |
128 ; ARM32: bl {{.*}} longjmp | 133 ; ARM32: bl {{.*}} longjmp |
129 | 134 |
130 define i32 @test_setjmp_unused(i32 %iptr_env, i32 %i_other) { | 135 define internal i32 @test_setjmp_unused(i32 %iptr_env, i32 %i_other) { |
131 entry: | 136 entry: |
132 %env = inttoptr i32 %iptr_env to i8* | 137 %env = inttoptr i32 %iptr_env to i8* |
133 %i = call i32 @llvm.nacl.setjmp(i8* %env) | 138 %i = call i32 @llvm.nacl.setjmp(i8* %env) |
134 ret i32 %i_other | 139 ret i32 %i_other |
135 } | 140 } |
136 ; Don't consider setjmp side-effect free, so it's not eliminated if | 141 ; Don't consider setjmp side-effect free, so it's not eliminated if |
137 ; result unused. | 142 ; result unused. |
138 ; CHECKO2REM-LABEL: test_setjmp_unused | 143 ; CHECKO2REM-LABEL: test_setjmp_unused |
139 ; CHECKO2REM: call {{.*}} R_{{.*}} setjmp | 144 ; CHECKO2REM: call {{.*}} R_{{.*}} setjmp |
140 | 145 |
141 define float @test_sqrt_float(float %x, i32 %iptr) { | 146 define internal float @test_sqrt_float(float %x, i32 %iptr) { |
142 entry: | 147 entry: |
143 %r = call float @llvm.sqrt.f32(float %x) | 148 %r = call float @llvm.sqrt.f32(float %x) |
144 %r2 = call float @llvm.sqrt.f32(float %r) | 149 %r2 = call float @llvm.sqrt.f32(float %r) |
145 %r3 = call float @llvm.sqrt.f32(float -0.0) | 150 %r3 = call float @llvm.sqrt.f32(float -0.0) |
146 %r4 = fadd float %r2, %r3 | 151 %r4 = fadd float %r2, %r3 |
147 ret float %r4 | 152 ret float %r4 |
148 } | 153 } |
149 ; CHECK-LABEL: test_sqrt_float | 154 ; CHECK-LABEL: test_sqrt_float |
150 ; CHECK: sqrtss xmm{{.*}} | 155 ; CHECK: sqrtss xmm{{.*}} |
151 ; CHECK: sqrtss xmm{{.*}} | 156 ; CHECK: sqrtss xmm{{.*}} |
152 ; CHECK: sqrtss xmm{{.*}},DWORD PTR | 157 ; CHECK: sqrtss xmm{{.*}},DWORD PTR |
153 ; ARM32-LABEL: test_sqrt_float | 158 ; ARM32-LABEL: test_sqrt_float |
154 ; ARM32: vsqrt.f32 | 159 ; ARM32: vsqrt.f32 |
155 ; ARM32: vsqrt.f32 | 160 ; ARM32: vsqrt.f32 |
156 ; ARM32: vsqrt.f32 | 161 ; ARM32: vsqrt.f32 |
157 ; ARM32: vadd.f32 | 162 ; ARM32: vadd.f32 |
158 | 163 |
159 define float @test_sqrt_float_mergeable_load(float %x, i32 %iptr) { | 164 define internal float @test_sqrt_float_mergeable_load(float %x, i32 %iptr) { |
160 entry: | 165 entry: |
161 %__2 = inttoptr i32 %iptr to float* | 166 %__2 = inttoptr i32 %iptr to float* |
162 %y = load float, float* %__2, align 4 | 167 %y = load float, float* %__2, align 4 |
163 %r5 = call float @llvm.sqrt.f32(float %y) | 168 %r5 = call float @llvm.sqrt.f32(float %y) |
164 %r6 = fadd float %x, %r5 | 169 %r6 = fadd float %x, %r5 |
165 ret float %r6 | 170 ret float %r6 |
166 } | 171 } |
167 ; CHECK-LABEL: test_sqrt_float_mergeable_load | 172 ; CHECK-LABEL: test_sqrt_float_mergeable_load |
168 ; We could fold the load and the sqrt into one operation, but the | 173 ; We could fold the load and the sqrt into one operation, but the |
169 ; current folding only handles load + arithmetic op. The sqrt inst | 174 ; current folding only handles load + arithmetic op. The sqrt inst |
170 ; is considered an intrinsic call and not an arithmetic op. | 175 ; is considered an intrinsic call and not an arithmetic op. |
171 ; CHECK: sqrtss xmm{{.*}} | 176 ; CHECK: sqrtss xmm{{.*}} |
172 ; ARM32-LABEL: test_sqrt_float_mergeable_load | 177 ; ARM32-LABEL: test_sqrt_float_mergeable_load |
173 ; ARM32: vldr s{{.*}} | 178 ; ARM32: vldr s{{.*}} |
174 ; ARM32: vsqrt.f32 | 179 ; ARM32: vsqrt.f32 |
175 | 180 |
176 define double @test_sqrt_double(double %x, i32 %iptr) { | 181 define internal double @test_sqrt_double(double %x, i32 %iptr) { |
177 entry: | 182 entry: |
178 %r = call double @llvm.sqrt.f64(double %x) | 183 %r = call double @llvm.sqrt.f64(double %x) |
179 %r2 = call double @llvm.sqrt.f64(double %r) | 184 %r2 = call double @llvm.sqrt.f64(double %r) |
180 %r3 = call double @llvm.sqrt.f64(double -0.0) | 185 %r3 = call double @llvm.sqrt.f64(double -0.0) |
181 %r4 = fadd double %r2, %r3 | 186 %r4 = fadd double %r2, %r3 |
182 ret double %r4 | 187 ret double %r4 |
183 } | 188 } |
184 ; CHECK-LABEL: test_sqrt_double | 189 ; CHECK-LABEL: test_sqrt_double |
185 ; CHECK: sqrtsd xmm{{.*}} | 190 ; CHECK: sqrtsd xmm{{.*}} |
186 ; CHECK: sqrtsd xmm{{.*}} | 191 ; CHECK: sqrtsd xmm{{.*}} |
187 ; CHECK: sqrtsd xmm{{.*}},QWORD PTR | 192 ; CHECK: sqrtsd xmm{{.*}},QWORD PTR |
188 ; ARM32-LABEL: test_sqrt_double | 193 ; ARM32-LABEL: test_sqrt_double |
189 ; ARM32: vsqrt.f64 | 194 ; ARM32: vsqrt.f64 |
190 ; ARM32: vsqrt.f64 | 195 ; ARM32: vsqrt.f64 |
191 ; ARM32: vsqrt.f64 | 196 ; ARM32: vsqrt.f64 |
192 ; ARM32: vadd.f64 | 197 ; ARM32: vadd.f64 |
193 | 198 |
194 define double @test_sqrt_double_mergeable_load(double %x, i32 %iptr) { | 199 define internal double @test_sqrt_double_mergeable_load(double %x, i32 %iptr) { |
195 entry: | 200 entry: |
196 %__2 = inttoptr i32 %iptr to double* | 201 %__2 = inttoptr i32 %iptr to double* |
197 %y = load double, double* %__2, align 8 | 202 %y = load double, double* %__2, align 8 |
198 %r5 = call double @llvm.sqrt.f64(double %y) | 203 %r5 = call double @llvm.sqrt.f64(double %y) |
199 %r6 = fadd double %x, %r5 | 204 %r6 = fadd double %x, %r5 |
200 ret double %r6 | 205 ret double %r6 |
201 } | 206 } |
202 ; CHECK-LABEL: test_sqrt_double_mergeable_load | 207 ; CHECK-LABEL: test_sqrt_double_mergeable_load |
203 ; CHECK: sqrtsd xmm{{.*}} | 208 ; CHECK: sqrtsd xmm{{.*}} |
204 ; ARM32-LABEL: test_sqrt_double_mergeable_load | 209 ; ARM32-LABEL: test_sqrt_double_mergeable_load |
205 ; ARM32: vldr d{{.*}} | 210 ; ARM32: vldr d{{.*}} |
206 ; ARM32: vsqrt.f64 | 211 ; ARM32: vsqrt.f64 |
207 | 212 |
208 define float @test_sqrt_ignored(float %x, double %y) { | 213 define internal float @test_sqrt_ignored(float %x, double %y) { |
209 entry: | 214 entry: |
210 %ignored1 = call float @llvm.sqrt.f32(float %x) | 215 %ignored1 = call float @llvm.sqrt.f32(float %x) |
211 %ignored2 = call double @llvm.sqrt.f64(double %y) | 216 %ignored2 = call double @llvm.sqrt.f64(double %y) |
212 ret float 0.0 | 217 ret float 0.0 |
213 } | 218 } |
214 ; CHECKO2REM-LABEL: test_sqrt_ignored | 219 ; CHECKO2REM-LABEL: test_sqrt_ignored |
215 ; CHECKO2REM-NOT: sqrtss | 220 ; CHECKO2REM-NOT: sqrtss |
216 ; CHECKO2REM-NOT: sqrtsd | 221 ; CHECKO2REM-NOT: sqrtsd |
217 | 222 |
218 define float @test_fabs_float(float %x) { | 223 define internal float @test_fabs_float(float %x) { |
219 entry: | 224 entry: |
220 %r = call float @llvm.fabs.f32(float %x) | 225 %r = call float @llvm.fabs.f32(float %x) |
221 %r2 = call float @llvm.fabs.f32(float %r) | 226 %r2 = call float @llvm.fabs.f32(float %r) |
222 %r3 = call float @llvm.fabs.f32(float -0.0) | 227 %r3 = call float @llvm.fabs.f32(float -0.0) |
223 %r4 = fadd float %r2, %r3 | 228 %r4 = fadd float %r2, %r3 |
224 ret float %r4 | 229 ret float %r4 |
225 } | 230 } |
226 ;;; Specially check that the pand instruction doesn't try to operate on a 32-bit | 231 ;;; Specially check that the pand instruction doesn't try to operate on a 32-bit |
227 ;;; (f32) memory operand, and instead uses two xmm registers. | 232 ;;; (f32) memory operand, and instead uses two xmm registers. |
228 ; CHECK-LABEL: test_fabs_float | 233 ; CHECK-LABEL: test_fabs_float |
229 ; CHECK: pcmpeqd | 234 ; CHECK: pcmpeqd |
230 ; CHECK: psrld | 235 ; CHECK: psrld |
231 ; CHECK: pand {{.*}}xmm{{.*}}xmm | 236 ; CHECK: pand {{.*}}xmm{{.*}}xmm |
232 ; CHECK: pcmpeqd | 237 ; CHECK: pcmpeqd |
233 ; CHECK: psrld | 238 ; CHECK: psrld |
234 ; CHECK: pand {{.*}}xmm{{.*}}xmm | 239 ; CHECK: pand {{.*}}xmm{{.*}}xmm |
235 ; CHECK: pcmpeqd | 240 ; CHECK: pcmpeqd |
236 ; CHECK: psrld | 241 ; CHECK: psrld |
237 ; CHECK: pand {{.*}}xmm{{.*}}xmm | 242 ; CHECK: pand {{.*}}xmm{{.*}}xmm |
238 | 243 |
239 define double @test_fabs_double(double %x) { | 244 define internal double @test_fabs_double(double %x) { |
240 entry: | 245 entry: |
241 %r = call double @llvm.fabs.f64(double %x) | 246 %r = call double @llvm.fabs.f64(double %x) |
242 %r2 = call double @llvm.fabs.f64(double %r) | 247 %r2 = call double @llvm.fabs.f64(double %r) |
243 %r3 = call double @llvm.fabs.f64(double -0.0) | 248 %r3 = call double @llvm.fabs.f64(double -0.0) |
244 %r4 = fadd double %r2, %r3 | 249 %r4 = fadd double %r2, %r3 |
245 ret double %r4 | 250 ret double %r4 |
246 } | 251 } |
247 ;;; Specially check that the pand instruction doesn't try to operate on a 64-bit | 252 ;;; Specially check that the pand instruction doesn't try to operate on a 64-bit |
248 ;;; (f64) memory operand, and instead uses two xmm registers. | 253 ;;; (f64) memory operand, and instead uses two xmm registers. |
249 ; CHECK-LABEL: test_fabs_double | 254 ; CHECK-LABEL: test_fabs_double |
250 ; CHECK: pcmpeqd | 255 ; CHECK: pcmpeqd |
251 ; CHECK: psrlq | 256 ; CHECK: psrlq |
252 ; CHECK: pand {{.*}}xmm{{.*}}xmm | 257 ; CHECK: pand {{.*}}xmm{{.*}}xmm |
253 ; CHECK: pcmpeqd | 258 ; CHECK: pcmpeqd |
254 ; CHECK: psrlq | 259 ; CHECK: psrlq |
255 ; CHECK: pand {{.*}}xmm{{.*}}xmm | 260 ; CHECK: pand {{.*}}xmm{{.*}}xmm |
256 ; CHECK: pcmpeqd | 261 ; CHECK: pcmpeqd |
257 ; CHECK: psrlq | 262 ; CHECK: psrlq |
258 ; CHECK: pand {{.*}}xmm{{.*}}xmm | 263 ; CHECK: pand {{.*}}xmm{{.*}}xmm |
259 | 264 |
260 define <4 x float> @test_fabs_v4f32(<4 x float> %x) { | 265 define internal <4 x float> @test_fabs_v4f32(<4 x float> %x) { |
261 entry: | 266 entry: |
262 %r = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x) | 267 %r = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x) |
263 %r2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %r) | 268 %r2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %r) |
264 %r3 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) | 269 %r3 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef) |
265 %r4 = fadd <4 x float> %r2, %r3 | 270 %r4 = fadd <4 x float> %r2, %r3 |
266 ret <4 x float> %r4 | 271 ret <4 x float> %r4 |
267 } | 272 } |
268 ; CHECK-LABEL: test_fabs_v4f32 | 273 ; CHECK-LABEL: test_fabs_v4f32 |
269 ; CHECK: pcmpeqd | 274 ; CHECK: pcmpeqd |
270 ; CHECK: psrld | 275 ; CHECK: psrld |
271 ; CHECK: pand | 276 ; CHECK: pand |
272 ; CHECK: pcmpeqd | 277 ; CHECK: pcmpeqd |
273 ; CHECK: psrld | 278 ; CHECK: psrld |
274 ; CHECK: pand | 279 ; CHECK: pand |
275 ; CHECK: pcmpeqd | 280 ; CHECK: pcmpeqd |
276 ; CHECK: psrld | 281 ; CHECK: psrld |
277 ; CHECK: pand | 282 ; CHECK: pand |
278 | 283 |
279 define i32 @test_trap(i32 %br) { | 284 define internal i32 @test_trap(i32 %br) { |
280 entry: | 285 entry: |
281 %r1 = icmp eq i32 %br, 0 | 286 %r1 = icmp eq i32 %br, 0 |
282 br i1 %r1, label %Zero, label %NonZero | 287 br i1 %r1, label %Zero, label %NonZero |
283 Zero: | 288 Zero: |
284 call void @llvm.trap() | 289 call void @llvm.trap() |
285 unreachable | 290 unreachable |
286 NonZero: | 291 NonZero: |
287 ret i32 1 | 292 ret i32 1 |
288 } | 293 } |
289 ; CHECK-LABEL: test_trap | 294 ; CHECK-LABEL: test_trap |
290 ; CHECK: ud2 | 295 ; CHECK: ud2 |
291 ; ARM32-LABEL: test_trap | 296 ; ARM32-LABEL: test_trap |
292 ; ARM32: .word 0xe7fedef0 | 297 ; ARM32: .word 0xe7fedef0 |
293 | 298 |
294 define i32 @test_bswap_16(i32 %x) { | 299 define internal i32 @test_bswap_16(i32 %x) { |
295 entry: | 300 entry: |
296 %x_trunc = trunc i32 %x to i16 | 301 %x_trunc = trunc i32 %x to i16 |
297 %r = call i16 @llvm.bswap.i16(i16 %x_trunc) | 302 %r = call i16 @llvm.bswap.i16(i16 %x_trunc) |
298 %r_zext = zext i16 %r to i32 | 303 %r_zext = zext i16 %r to i32 |
299 ret i32 %r_zext | 304 ret i32 %r_zext |
300 } | 305 } |
301 ; CHECK-LABEL: test_bswap_16 | 306 ; CHECK-LABEL: test_bswap_16 |
302 ; Make sure this is the right operand size so that the most significant bit | 307 ; Make sure this is the right operand size so that the most significant bit |
303 ; to least significant bit rotation happens at the right boundary. | 308 ; to least significant bit rotation happens at the right boundary. |
304 ; CHECK: rol {{[abcd]x|si|di|bp|word ptr}},0x8 | 309 ; CHECK: rol {{[abcd]x|si|di|bp|word ptr}},0x8 |
305 ; ARM32-LABEL: test_bswap_16 | 310 ; ARM32-LABEL: test_bswap_16 |
306 ; ARM32: rev | 311 ; ARM32: rev |
307 ; ARM32: lsr {{.*}} #16 | 312 ; ARM32: lsr {{.*}} #16 |
308 | 313 |
309 define i32 @test_bswap_32(i32 %x) { | 314 define internal i32 @test_bswap_32(i32 %x) { |
310 entry: | 315 entry: |
311 %r = call i32 @llvm.bswap.i32(i32 %x) | 316 %r = call i32 @llvm.bswap.i32(i32 %x) |
312 ret i32 %r | 317 ret i32 %r |
313 } | 318 } |
314 ; CHECK-LABEL: test_bswap_32 | 319 ; CHECK-LABEL: test_bswap_32 |
315 ; CHECK: bswap e{{.*}} | 320 ; CHECK: bswap e{{.*}} |
316 ; ARM32-LABEL: test_bswap_32 | 321 ; ARM32-LABEL: test_bswap_32 |
317 ; ARM32: rev | 322 ; ARM32: rev |
318 | 323 |
319 define i64 @test_bswap_64(i64 %x) { | 324 define internal i64 @test_bswap_64(i64 %x) { |
320 entry: | 325 entry: |
321 %r = call i64 @llvm.bswap.i64(i64 %x) | 326 %r = call i64 @llvm.bswap.i64(i64 %x) |
322 ret i64 %r | 327 ret i64 %r |
323 } | 328 } |
324 ; CHECK-LABEL: test_bswap_64 | 329 ; CHECK-LABEL: test_bswap_64 |
325 ; CHECK: bswap e{{.*}} | 330 ; CHECK: bswap e{{.*}} |
326 ; CHECK: bswap e{{.*}} | 331 ; CHECK: bswap e{{.*}} |
327 ; ARM32-LABEL: test_bswap_64 | 332 ; ARM32-LABEL: test_bswap_64 |
328 ; ARM32: rev | 333 ; ARM32: rev |
329 ; ARM32: rev | 334 ; ARM32: rev |
330 | 335 |
331 define i64 @test_bswap_64_undef() { | 336 define internal i64 @test_bswap_64_undef() { |
332 entry: | 337 entry: |
333 %r = call i64 @llvm.bswap.i64(i64 undef) | 338 %r = call i64 @llvm.bswap.i64(i64 undef) |
334 ret i64 %r | 339 ret i64 %r |
335 } | 340 } |
336 ; CHECK-LABEL: test_bswap_64_undef | 341 ; CHECK-LABEL: test_bswap_64_undef |
337 ; CHECK: bswap e{{.*}} | 342 ; CHECK: bswap e{{.*}} |
338 ; CHECK: bswap e{{.*}} | 343 ; CHECK: bswap e{{.*}} |
339 ; ARM32-LABEL: test_bswap_64 | 344 ; ARM32-LABEL: test_bswap_64 |
340 ; ARM32: rev | 345 ; ARM32: rev |
341 ; ARM32: rev | 346 ; ARM32: rev |
342 | 347 |
343 define i32 @test_ctlz_32(i32 %x) { | 348 define internal i32 @test_ctlz_32(i32 %x) { |
344 entry: | 349 entry: |
345 %r = call i32 @llvm.ctlz.i32(i32 %x, i1 false) | 350 %r = call i32 @llvm.ctlz.i32(i32 %x, i1 false) |
346 ret i32 %r | 351 ret i32 %r |
347 } | 352 } |
348 ; CHECK-LABEL: test_ctlz_32 | 353 ; CHECK-LABEL: test_ctlz_32 |
349 ; TODO(jvoung): If we detect that LZCNT is supported, then use that | 354 ; TODO(jvoung): If we detect that LZCNT is supported, then use that |
350 ; and avoid the need to do the cmovne and xor stuff to guarantee that | 355 ; and avoid the need to do the cmovne and xor stuff to guarantee that |
351 ; the result is well-defined w/ input == 0. | 356 ; the result is well-defined w/ input == 0. |
352 ; CHECK: bsr [[REG_TMP:e.*]],{{.*}} | 357 ; CHECK: bsr [[REG_TMP:e.*]],{{.*}} |
353 ; CHECK: mov [[REG_RES:e.*]],0x3f | 358 ; CHECK: mov [[REG_RES:e.*]],0x3f |
354 ; CHECK: cmovne [[REG_RES]],[[REG_TMP]] | 359 ; CHECK: cmovne [[REG_RES]],[[REG_TMP]] |
355 ; CHECK: xor [[REG_RES]],0x1f | 360 ; CHECK: xor [[REG_RES]],0x1f |
356 ; ARM32-LABEL: test_ctlz_32 | 361 ; ARM32-LABEL: test_ctlz_32 |
357 ; ARM32: clz | 362 ; ARM32: clz |
358 | 363 |
359 define i32 @test_ctlz_32_const() { | 364 define internal i32 @test_ctlz_32_const() { |
360 entry: | 365 entry: |
361 %r = call i32 @llvm.ctlz.i32(i32 123456, i1 false) | 366 %r = call i32 @llvm.ctlz.i32(i32 123456, i1 false) |
362 ret i32 %r | 367 ret i32 %r |
363 } | 368 } |
364 ; Could potentially constant fold this, but the front-end should have done that. | 369 ; Could potentially constant fold this, but the front-end should have done that. |
365 ; The dest operand must be a register and the source operand must be a register | 370 ; The dest operand must be a register and the source operand must be a register |
366 ; or memory. | 371 ; or memory. |
367 ; CHECK-LABEL: test_ctlz_32_const | 372 ; CHECK-LABEL: test_ctlz_32_const |
368 ; CHECK: bsr e{{.*}},{{.*}}e{{.*}} | 373 ; CHECK: bsr e{{.*}},{{.*}}e{{.*}} |
369 ; ARM32-LABEL: test_ctlz_32_const | 374 ; ARM32-LABEL: test_ctlz_32_const |
370 ; ARM32: clz | 375 ; ARM32: clz |
371 | 376 |
372 define i32 @test_ctlz_32_ignored(i32 %x) { | 377 define internal i32 @test_ctlz_32_ignored(i32 %x) { |
373 entry: | 378 entry: |
374 %ignored = call i32 @llvm.ctlz.i32(i32 %x, i1 false) | 379 %ignored = call i32 @llvm.ctlz.i32(i32 %x, i1 false) |
375 ret i32 1 | 380 ret i32 1 |
376 } | 381 } |
377 ; CHECKO2REM-LABEL: test_ctlz_32_ignored | 382 ; CHECKO2REM-LABEL: test_ctlz_32_ignored |
378 ; CHECKO2REM-NOT: bsr | 383 ; CHECKO2REM-NOT: bsr |
379 | 384 |
380 define i64 @test_ctlz_64(i64 %x) { | 385 define internal i64 @test_ctlz_64(i64 %x) { |
381 entry: | 386 entry: |
382 %r = call i64 @llvm.ctlz.i64(i64 %x, i1 false) | 387 %r = call i64 @llvm.ctlz.i64(i64 %x, i1 false) |
383 ret i64 %r | 388 ret i64 %r |
384 } | 389 } |
385 ; CHECKO2REM-LABEL: test_ctlz_64 | 390 ; CHECKO2REM-LABEL: test_ctlz_64 |
386 ; CHECK-LABEL: test_ctlz_64 | 391 ; CHECK-LABEL: test_ctlz_64 |
387 ; CHECK: bsr [[REG_TMP1:e.*]],{{.*}} | 392 ; CHECK: bsr [[REG_TMP1:e.*]],{{.*}} |
388 ; CHECK: mov [[REG_RES1:e.*]],0x3f | 393 ; CHECK: mov [[REG_RES1:e.*]],0x3f |
389 ; CHECK: cmovne [[REG_RES1]],[[REG_TMP1]] | 394 ; CHECK: cmovne [[REG_RES1]],[[REG_TMP1]] |
390 ; CHECK: xor [[REG_RES1]],0x1f | 395 ; CHECK: xor [[REG_RES1]],0x1f |
391 ; CHECK: add [[REG_RES1]],0x20 | 396 ; CHECK: add [[REG_RES1]],0x20 |
392 ; CHECK: bsr [[REG_RES2:e.*]],{{.*}} | 397 ; CHECK: bsr [[REG_RES2:e.*]],{{.*}} |
393 ; CHECK: xor [[REG_RES2]],0x1f | 398 ; CHECK: xor [[REG_RES2]],0x1f |
394 ; CHECK: test [[REG_UPPER:.*]],[[REG_UPPER]] | 399 ; CHECK: test [[REG_UPPER:.*]],[[REG_UPPER]] |
395 ; CHECK: cmove [[REG_RES2]],[[REG_RES1]] | 400 ; CHECK: cmove [[REG_RES2]],[[REG_RES1]] |
396 ; CHECK: mov {{.*}},0x0 | 401 ; CHECK: mov {{.*}},0x0 |
397 ; ARM32-LABEL: test_ctlz_64 | 402 ; ARM32-LABEL: test_ctlz_64 |
398 ; ARM32: clz | 403 ; ARM32: clz |
399 ; ARM32: cmp {{.*}}, #0 | 404 ; ARM32: cmp {{.*}}, #0 |
400 ; ARM32: add {{.*}}, #32 | 405 ; ARM32: add {{.*}}, #32 |
401 ; ARM32: clzne | 406 ; ARM32: clzne |
402 ; ARM32: mov {{.*}}, #0 | 407 ; ARM32: mov {{.*}}, #0 |
403 | 408 |
404 define i32 @test_ctlz_64_const(i64 %x) { | 409 define internal i32 @test_ctlz_64_const(i64 %x) { |
405 entry: | 410 entry: |
406 %r = call i64 @llvm.ctlz.i64(i64 123456789012, i1 false) | 411 %r = call i64 @llvm.ctlz.i64(i64 123456789012, i1 false) |
407 %r2 = trunc i64 %r to i32 | 412 %r2 = trunc i64 %r to i32 |
408 ret i32 %r2 | 413 ret i32 %r2 |
409 } | 414 } |
410 ; CHECK-LABEL: test_ctlz_64_const | 415 ; CHECK-LABEL: test_ctlz_64_const |
411 ; CHECK: bsr e{{.*}},{{.*}}e{{.*}} | 416 ; CHECK: bsr e{{.*}},{{.*}}e{{.*}} |
412 ; CHECK: bsr e{{.*}},{{.*}}e{{.*}} | 417 ; CHECK: bsr e{{.*}},{{.*}}e{{.*}} |
413 ; ARM32-LABEL: test_ctlz_64 | 418 ; ARM32-LABEL: test_ctlz_64 |
414 ; ARM32: clz | 419 ; ARM32: clz |
415 ; ARM32: clzne | 420 ; ARM32: clzne |
416 | 421 |
417 define i32 @test_ctlz_64_ignored(i64 %x) { | 422 define internal i32 @test_ctlz_64_ignored(i64 %x) { |
418 entry: | 423 entry: |
419 %ignored = call i64 @llvm.ctlz.i64(i64 1234567890, i1 false) | 424 %ignored = call i64 @llvm.ctlz.i64(i64 1234567890, i1 false) |
420 ret i32 2 | 425 ret i32 2 |
421 } | 426 } |
422 ; CHECKO2REM-LABEL: test_ctlz_64_ignored | 427 ; CHECKO2REM-LABEL: test_ctlz_64_ignored |
423 ; CHECKO2REM-NOT: bsr | 428 ; CHECKO2REM-NOT: bsr |
424 | 429 |
425 define i32 @test_cttz_32(i32 %x) { | 430 define internal i32 @test_cttz_32(i32 %x) { |
426 entry: | 431 entry: |
427 %r = call i32 @llvm.cttz.i32(i32 %x, i1 false) | 432 %r = call i32 @llvm.cttz.i32(i32 %x, i1 false) |
428 ret i32 %r | 433 ret i32 %r |
429 } | 434 } |
430 ; CHECK-LABEL: test_cttz_32 | 435 ; CHECK-LABEL: test_cttz_32 |
431 ; CHECK: bsf [[REG_IF_NOTZERO:e.*]],{{.*}} | 436 ; CHECK: bsf [[REG_IF_NOTZERO:e.*]],{{.*}} |
432 ; CHECK: mov [[REG_IF_ZERO:e.*]],0x20 | 437 ; CHECK: mov [[REG_IF_ZERO:e.*]],0x20 |
433 ; CHECK: cmovne [[REG_IF_ZERO]],[[REG_IF_NOTZERO]] | 438 ; CHECK: cmovne [[REG_IF_ZERO]],[[REG_IF_NOTZERO]] |
434 ; ARM32-LABEL: test_cttz_32 | 439 ; ARM32-LABEL: test_cttz_32 |
435 ; ARM32: rbit | 440 ; ARM32: rbit |
436 ; ARM32: clz | 441 ; ARM32: clz |
437 | 442 |
438 define i64 @test_cttz_64(i64 %x) { | 443 define internal i64 @test_cttz_64(i64 %x) { |
439 entry: | 444 entry: |
440 %r = call i64 @llvm.cttz.i64(i64 %x, i1 false) | 445 %r = call i64 @llvm.cttz.i64(i64 %x, i1 false) |
441 ret i64 %r | 446 ret i64 %r |
442 } | 447 } |
443 ; CHECK-LABEL: test_cttz_64 | 448 ; CHECK-LABEL: test_cttz_64 |
444 ; CHECK: bsf [[REG_IF_NOTZERO:e.*]],{{.*}} | 449 ; CHECK: bsf [[REG_IF_NOTZERO:e.*]],{{.*}} |
445 ; CHECK: mov [[REG_RES1:e.*]],0x20 | 450 ; CHECK: mov [[REG_RES1:e.*]],0x20 |
446 ; CHECK: cmovne [[REG_RES1]],[[REG_IF_NOTZERO]] | 451 ; CHECK: cmovne [[REG_RES1]],[[REG_IF_NOTZERO]] |
447 ; CHECK: add [[REG_RES1]],0x20 | 452 ; CHECK: add [[REG_RES1]],0x20 |
448 ; CHECK: bsf [[REG_RES2:e.*]],[[REG_LOWER:.*]] | 453 ; CHECK: bsf [[REG_RES2:e.*]],[[REG_LOWER:.*]] |
449 ; CHECK: test [[REG_LOWER]],[[REG_LOWER]] | 454 ; CHECK: test [[REG_LOWER]],[[REG_LOWER]] |
450 ; CHECK: cmove [[REG_RES2]],[[REG_RES1]] | 455 ; CHECK: cmove [[REG_RES2]],[[REG_RES1]] |
451 ; CHECK: mov {{.*}},0x0 | 456 ; CHECK: mov {{.*}},0x0 |
452 ; ARM32-LABEL: test_cttz_64 | 457 ; ARM32-LABEL: test_cttz_64 |
453 ; ARM32: rbit | 458 ; ARM32: rbit |
454 ; ARM32: rbit | 459 ; ARM32: rbit |
455 ; ARM32: clz | 460 ; ARM32: clz |
456 ; ARM32: cmp {{.*}}, #0 | 461 ; ARM32: cmp {{.*}}, #0 |
457 ; ARM32: add {{.*}}, #32 | 462 ; ARM32: add {{.*}}, #32 |
458 ; ARM32: clzne | 463 ; ARM32: clzne |
459 ; ARM32: mov {{.*}}, #0 | 464 ; ARM32: mov {{.*}}, #0 |
460 | 465 |
461 define i32 @test_popcount_32(i32 %x) { | 466 define internal i32 @test_popcount_32(i32 %x) { |
462 entry: | 467 entry: |
463 %r = call i32 @llvm.ctpop.i32(i32 %x) | 468 %r = call i32 @llvm.ctpop.i32(i32 %x) |
464 ret i32 %r | 469 ret i32 %r |
465 } | 470 } |
466 ; CHECK-LABEL: test_popcount_32 | 471 ; CHECK-LABEL: test_popcount_32 |
467 ; CHECK: call {{.*}} R_{{.*}} __popcountsi2 | 472 ; CHECK: call {{.*}} R_{{.*}} __popcountsi2 |
468 ; ARM32-LABEL: test_popcount_32 | 473 ; ARM32-LABEL: test_popcount_32 |
469 ; ARM32: bl {{.*}} __popcountsi2 | 474 ; ARM32: bl {{.*}} __popcountsi2 |
470 | 475 |
471 define i64 @test_popcount_64(i64 %x) { | 476 define internal i64 @test_popcount_64(i64 %x) { |
472 entry: | 477 entry: |
473 %r = call i64 @llvm.ctpop.i64(i64 %x) | 478 %r = call i64 @llvm.ctpop.i64(i64 %x) |
474 ret i64 %r | 479 ret i64 %r |
475 } | 480 } |
476 ; CHECK-LABEL: test_popcount_64 | 481 ; CHECK-LABEL: test_popcount_64 |
477 ; CHECK: call {{.*}} R_{{.*}} __popcountdi2 | 482 ; CHECK: call {{.*}} R_{{.*}} __popcountdi2 |
478 ; __popcountdi2 only returns a 32-bit result, so clear the upper bits of | 483 ; __popcountdi2 only returns a 32-bit result, so clear the upper bits of |
479 ; the return value just in case. | 484 ; the return value just in case. |
480 ; CHECK: mov {{.*}},0x0 | 485 ; CHECK: mov {{.*}},0x0 |
481 ; ARM32-LABEL: test_popcount_64 | 486 ; ARM32-LABEL: test_popcount_64 |
482 ; ARM32: bl {{.*}} __popcountdi2 | 487 ; ARM32: bl {{.*}} __popcountdi2 |
483 ; ARM32: mov {{.*}}, #0 | 488 ; ARM32: mov {{.*}}, #0 |
484 | 489 |
485 define i32 @test_popcount_64_ret_i32(i64 %x) { | 490 define internal i32 @test_popcount_64_ret_i32(i64 %x) { |
486 entry: | 491 entry: |
487 %r_i64 = call i64 @llvm.ctpop.i64(i64 %x) | 492 %r_i64 = call i64 @llvm.ctpop.i64(i64 %x) |
488 %r = trunc i64 %r_i64 to i32 | 493 %r = trunc i64 %r_i64 to i32 |
489 ret i32 %r | 494 ret i32 %r |
490 } | 495 } |
491 ; If there is a trunc, then the mov {{.*}}, 0 is dead and gets optimized out. | 496 ; If there is a trunc, then the mov {{.*}}, 0 is dead and gets optimized out. |
492 ; CHECKO2REM-LABEL: test_popcount_64_ret_i32 | 497 ; CHECKO2REM-LABEL: test_popcount_64_ret_i32 |
493 ; CHECKO2REM: call {{.*}} R_{{.*}} __popcountdi2 | 498 ; CHECKO2REM: call {{.*}} R_{{.*}} __popcountdi2 |
494 ; CHECKO2REM-NOT: mov {{.*}}, 0 | 499 ; CHECKO2REM-NOT: mov {{.*}}, 0 |
495 | 500 |
496 define void @test_stacksave_noalloca() { | 501 define internal void @test_stacksave_noalloca() { |
497 entry: | 502 entry: |
498 %sp = call i8* @llvm.stacksave() | 503 %sp = call i8* @llvm.stacksave() |
499 call void @llvm.stackrestore(i8* %sp) | 504 call void @llvm.stackrestore(i8* %sp) |
500 ret void | 505 ret void |
501 } | 506 } |
502 ; CHECK-LABEL: test_stacksave_noalloca | 507 ; CHECK-LABEL: test_stacksave_noalloca |
503 ; CHECK: mov {{.*}},esp | 508 ; CHECK: mov {{.*}},esp |
504 ; CHECK: mov esp,{{.*}} | 509 ; CHECK: mov esp,{{.*}} |
505 ; ARM32-LABEL: test_stacksave_noalloca | 510 ; ARM32-LABEL: test_stacksave_noalloca |
506 ; ARM32: mov {{.*}}, sp | 511 ; ARM32: mov {{.*}}, sp |
507 ; ARM32: mov sp, {{.*}} | 512 ; ARM32: mov sp, {{.*}} |
508 | 513 |
509 declare i32 @foo(i32 %x) | 514 declare i32 @foo(i32 %x) |
510 | 515 |
511 define void @test_stacksave_multiple(i32 %x) { | 516 define internal void @test_stacksave_multiple(i32 %x) { |
512 entry: | 517 entry: |
513 %x_4 = mul i32 %x, 4 | 518 %x_4 = mul i32 %x, 4 |
514 %sp1 = call i8* @llvm.stacksave() | 519 %sp1 = call i8* @llvm.stacksave() |
515 %tmp1 = alloca i8, i32 %x_4, align 4 | 520 %tmp1 = alloca i8, i32 %x_4, align 4 |
516 | 521 |
517 %sp2 = call i8* @llvm.stacksave() | 522 %sp2 = call i8* @llvm.stacksave() |
518 %tmp2 = alloca i8, i32 %x_4, align 4 | 523 %tmp2 = alloca i8, i32 %x_4, align 4 |
519 | 524 |
520 %y = call i32 @foo(i32 %x) | 525 %y = call i32 @foo(i32 %x) |
521 | 526 |
(...skipping 16 matching lines...) Expand all Loading... |
538 ; At least 3 copies of esp, but probably more from having to do the allocas. | 543 ; At least 3 copies of esp, but probably more from having to do the allocas. |
539 ; CHECK: mov {{.*}},esp | 544 ; CHECK: mov {{.*}},esp |
540 ; CHECK: mov {{.*}},esp | 545 ; CHECK: mov {{.*}},esp |
541 ; CHECK: mov {{.*}},esp | 546 ; CHECK: mov {{.*}},esp |
542 ; CHECK: mov esp,{{.*}} | 547 ; CHECK: mov esp,{{.*}} |
543 ; ARM32-LABEL: test_stacksave_multiple | 548 ; ARM32-LABEL: test_stacksave_multiple |
544 ; ARM32: mov {{.*}}, sp | 549 ; ARM32: mov {{.*}}, sp |
545 ; ARM32: mov {{.*}}, sp | 550 ; ARM32: mov {{.*}}, sp |
546 ; ARM32: mov {{.*}}, sp | 551 ; ARM32: mov {{.*}}, sp |
547 ; ARM32: mov sp, {{.*}} | 552 ; ARM32: mov sp, {{.*}} |
OLD | NEW |