OLD | NEW |
| (Empty) |
1 /*===---- arm_neon.h - ARM Neon intrinsics ---------------------------------=== | |
2 * | |
3 * Permission is hereby granted, free of charge, to any person obtaining a copy | |
4 * of this software and associated documentation files (the "Software"), to deal | |
5 * in the Software without restriction, including without limitation the rights | |
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
7 * copies of the Software, and to permit persons to whom the Software is | |
8 * furnished to do so, subject to the following conditions: | |
9 * | |
10 * The above copyright notice and this permission notice shall be included in | |
11 * all copies or substantial portions of the Software. | |
12 * | |
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
19 * THE SOFTWARE. | |
20 * | |
21 *===-----------------------------------------------------------------------=== | |
22 */ | |
23 | |
24 #ifndef __ARM_NEON_H | |
25 #define __ARM_NEON_H | |
26 | |
27 #ifndef __ARM_NEON__ | |
28 #error "NEON support not enabled" | |
29 #endif | |
30 | |
31 #include <stdint.h> | |
32 | |
33 typedef float float32_t; | |
34 typedef int8_t poly8_t; | |
35 typedef int16_t poly16_t; | |
36 typedef uint16_t float16_t; | |
37 typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t; | |
38 typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t; | |
39 typedef __attribute__((neon_vector_type(4))) int16_t int16x4_t; | |
40 typedef __attribute__((neon_vector_type(8))) int16_t int16x8_t; | |
41 typedef __attribute__((neon_vector_type(2))) int32_t int32x2_t; | |
42 typedef __attribute__((neon_vector_type(4))) int32_t int32x4_t; | |
43 typedef __attribute__((neon_vector_type(1))) int64_t int64x1_t; | |
44 typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t; | |
45 typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t; | |
46 typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t; | |
47 typedef __attribute__((neon_vector_type(4))) uint16_t uint16x4_t; | |
48 typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t; | |
49 typedef __attribute__((neon_vector_type(2))) uint32_t uint32x2_t; | |
50 typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t; | |
51 typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t; | |
52 typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t; | |
53 typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t; | |
54 typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t; | |
55 typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t; | |
56 typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t; | |
57 typedef __attribute__((neon_polyvector_type(8))) poly8_t poly8x8_t; | |
58 typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t; | |
59 typedef __attribute__((neon_polyvector_type(4))) poly16_t poly16x4_t; | |
60 typedef __attribute__((neon_polyvector_type(8))) poly16_t poly16x8_t; | |
61 | |
62 typedef struct int8x8x2_t { | |
63 int8x8_t val[2]; | |
64 } int8x8x2_t; | |
65 | |
66 typedef struct int8x16x2_t { | |
67 int8x16_t val[2]; | |
68 } int8x16x2_t; | |
69 | |
70 typedef struct int16x4x2_t { | |
71 int16x4_t val[2]; | |
72 } int16x4x2_t; | |
73 | |
74 typedef struct int16x8x2_t { | |
75 int16x8_t val[2]; | |
76 } int16x8x2_t; | |
77 | |
78 typedef struct int32x2x2_t { | |
79 int32x2_t val[2]; | |
80 } int32x2x2_t; | |
81 | |
82 typedef struct int32x4x2_t { | |
83 int32x4_t val[2]; | |
84 } int32x4x2_t; | |
85 | |
86 typedef struct int64x1x2_t { | |
87 int64x1_t val[2]; | |
88 } int64x1x2_t; | |
89 | |
90 typedef struct int64x2x2_t { | |
91 int64x2_t val[2]; | |
92 } int64x2x2_t; | |
93 | |
94 typedef struct uint8x8x2_t { | |
95 uint8x8_t val[2]; | |
96 } uint8x8x2_t; | |
97 | |
98 typedef struct uint8x16x2_t { | |
99 uint8x16_t val[2]; | |
100 } uint8x16x2_t; | |
101 | |
102 typedef struct uint16x4x2_t { | |
103 uint16x4_t val[2]; | |
104 } uint16x4x2_t; | |
105 | |
106 typedef struct uint16x8x2_t { | |
107 uint16x8_t val[2]; | |
108 } uint16x8x2_t; | |
109 | |
110 typedef struct uint32x2x2_t { | |
111 uint32x2_t val[2]; | |
112 } uint32x2x2_t; | |
113 | |
114 typedef struct uint32x4x2_t { | |
115 uint32x4_t val[2]; | |
116 } uint32x4x2_t; | |
117 | |
118 typedef struct uint64x1x2_t { | |
119 uint64x1_t val[2]; | |
120 } uint64x1x2_t; | |
121 | |
122 typedef struct uint64x2x2_t { | |
123 uint64x2_t val[2]; | |
124 } uint64x2x2_t; | |
125 | |
126 typedef struct float16x4x2_t { | |
127 float16x4_t val[2]; | |
128 } float16x4x2_t; | |
129 | |
130 typedef struct float16x8x2_t { | |
131 float16x8_t val[2]; | |
132 } float16x8x2_t; | |
133 | |
134 typedef struct float32x2x2_t { | |
135 float32x2_t val[2]; | |
136 } float32x2x2_t; | |
137 | |
138 typedef struct float32x4x2_t { | |
139 float32x4_t val[2]; | |
140 } float32x4x2_t; | |
141 | |
142 typedef struct poly8x8x2_t { | |
143 poly8x8_t val[2]; | |
144 } poly8x8x2_t; | |
145 | |
146 typedef struct poly8x16x2_t { | |
147 poly8x16_t val[2]; | |
148 } poly8x16x2_t; | |
149 | |
150 typedef struct poly16x4x2_t { | |
151 poly16x4_t val[2]; | |
152 } poly16x4x2_t; | |
153 | |
154 typedef struct poly16x8x2_t { | |
155 poly16x8_t val[2]; | |
156 } poly16x8x2_t; | |
157 | |
158 typedef struct int8x8x3_t { | |
159 int8x8_t val[3]; | |
160 } int8x8x3_t; | |
161 | |
162 typedef struct int8x16x3_t { | |
163 int8x16_t val[3]; | |
164 } int8x16x3_t; | |
165 | |
166 typedef struct int16x4x3_t { | |
167 int16x4_t val[3]; | |
168 } int16x4x3_t; | |
169 | |
170 typedef struct int16x8x3_t { | |
171 int16x8_t val[3]; | |
172 } int16x8x3_t; | |
173 | |
174 typedef struct int32x2x3_t { | |
175 int32x2_t val[3]; | |
176 } int32x2x3_t; | |
177 | |
178 typedef struct int32x4x3_t { | |
179 int32x4_t val[3]; | |
180 } int32x4x3_t; | |
181 | |
182 typedef struct int64x1x3_t { | |
183 int64x1_t val[3]; | |
184 } int64x1x3_t; | |
185 | |
186 typedef struct int64x2x3_t { | |
187 int64x2_t val[3]; | |
188 } int64x2x3_t; | |
189 | |
190 typedef struct uint8x8x3_t { | |
191 uint8x8_t val[3]; | |
192 } uint8x8x3_t; | |
193 | |
194 typedef struct uint8x16x3_t { | |
195 uint8x16_t val[3]; | |
196 } uint8x16x3_t; | |
197 | |
198 typedef struct uint16x4x3_t { | |
199 uint16x4_t val[3]; | |
200 } uint16x4x3_t; | |
201 | |
202 typedef struct uint16x8x3_t { | |
203 uint16x8_t val[3]; | |
204 } uint16x8x3_t; | |
205 | |
206 typedef struct uint32x2x3_t { | |
207 uint32x2_t val[3]; | |
208 } uint32x2x3_t; | |
209 | |
210 typedef struct uint32x4x3_t { | |
211 uint32x4_t val[3]; | |
212 } uint32x4x3_t; | |
213 | |
214 typedef struct uint64x1x3_t { | |
215 uint64x1_t val[3]; | |
216 } uint64x1x3_t; | |
217 | |
218 typedef struct uint64x2x3_t { | |
219 uint64x2_t val[3]; | |
220 } uint64x2x3_t; | |
221 | |
222 typedef struct float16x4x3_t { | |
223 float16x4_t val[3]; | |
224 } float16x4x3_t; | |
225 | |
226 typedef struct float16x8x3_t { | |
227 float16x8_t val[3]; | |
228 } float16x8x3_t; | |
229 | |
230 typedef struct float32x2x3_t { | |
231 float32x2_t val[3]; | |
232 } float32x2x3_t; | |
233 | |
234 typedef struct float32x4x3_t { | |
235 float32x4_t val[3]; | |
236 } float32x4x3_t; | |
237 | |
238 typedef struct poly8x8x3_t { | |
239 poly8x8_t val[3]; | |
240 } poly8x8x3_t; | |
241 | |
242 typedef struct poly8x16x3_t { | |
243 poly8x16_t val[3]; | |
244 } poly8x16x3_t; | |
245 | |
246 typedef struct poly16x4x3_t { | |
247 poly16x4_t val[3]; | |
248 } poly16x4x3_t; | |
249 | |
250 typedef struct poly16x8x3_t { | |
251 poly16x8_t val[3]; | |
252 } poly16x8x3_t; | |
253 | |
254 typedef struct int8x8x4_t { | |
255 int8x8_t val[4]; | |
256 } int8x8x4_t; | |
257 | |
258 typedef struct int8x16x4_t { | |
259 int8x16_t val[4]; | |
260 } int8x16x4_t; | |
261 | |
262 typedef struct int16x4x4_t { | |
263 int16x4_t val[4]; | |
264 } int16x4x4_t; | |
265 | |
266 typedef struct int16x8x4_t { | |
267 int16x8_t val[4]; | |
268 } int16x8x4_t; | |
269 | |
270 typedef struct int32x2x4_t { | |
271 int32x2_t val[4]; | |
272 } int32x2x4_t; | |
273 | |
274 typedef struct int32x4x4_t { | |
275 int32x4_t val[4]; | |
276 } int32x4x4_t; | |
277 | |
278 typedef struct int64x1x4_t { | |
279 int64x1_t val[4]; | |
280 } int64x1x4_t; | |
281 | |
282 typedef struct int64x2x4_t { | |
283 int64x2_t val[4]; | |
284 } int64x2x4_t; | |
285 | |
286 typedef struct uint8x8x4_t { | |
287 uint8x8_t val[4]; | |
288 } uint8x8x4_t; | |
289 | |
290 typedef struct uint8x16x4_t { | |
291 uint8x16_t val[4]; | |
292 } uint8x16x4_t; | |
293 | |
294 typedef struct uint16x4x4_t { | |
295 uint16x4_t val[4]; | |
296 } uint16x4x4_t; | |
297 | |
298 typedef struct uint16x8x4_t { | |
299 uint16x8_t val[4]; | |
300 } uint16x8x4_t; | |
301 | |
302 typedef struct uint32x2x4_t { | |
303 uint32x2_t val[4]; | |
304 } uint32x2x4_t; | |
305 | |
306 typedef struct uint32x4x4_t { | |
307 uint32x4_t val[4]; | |
308 } uint32x4x4_t; | |
309 | |
310 typedef struct uint64x1x4_t { | |
311 uint64x1_t val[4]; | |
312 } uint64x1x4_t; | |
313 | |
314 typedef struct uint64x2x4_t { | |
315 uint64x2_t val[4]; | |
316 } uint64x2x4_t; | |
317 | |
318 typedef struct float16x4x4_t { | |
319 float16x4_t val[4]; | |
320 } float16x4x4_t; | |
321 | |
322 typedef struct float16x8x4_t { | |
323 float16x8_t val[4]; | |
324 } float16x8x4_t; | |
325 | |
326 typedef struct float32x2x4_t { | |
327 float32x2_t val[4]; | |
328 } float32x2x4_t; | |
329 | |
330 typedef struct float32x4x4_t { | |
331 float32x4_t val[4]; | |
332 } float32x4x4_t; | |
333 | |
334 typedef struct poly8x8x4_t { | |
335 poly8x8_t val[4]; | |
336 } poly8x8x4_t; | |
337 | |
338 typedef struct poly8x16x4_t { | |
339 poly8x16_t val[4]; | |
340 } poly8x16x4_t; | |
341 | |
342 typedef struct poly16x4x4_t { | |
343 poly16x4_t val[4]; | |
344 } poly16x4x4_t; | |
345 | |
346 typedef struct poly16x8x4_t { | |
347 poly16x8_t val[4]; | |
348 } poly16x8x4_t; | |
349 | |
350 #define __ai static __attribute__((__always_inline__)) | |
351 | |
352 __ai int16x8_t vmovl_s8(int8x8_t __a) { \ | |
353 return (int16x8_t)__builtin_neon_vmovl_v(__a, 17); } | |
354 __ai int32x4_t vmovl_s16(int16x4_t __a) { \ | |
355 return (int32x4_t)__builtin_neon_vmovl_v((int8x8_t)__a, 18); } | |
356 __ai int64x2_t vmovl_s32(int32x2_t __a) { \ | |
357 return (int64x2_t)__builtin_neon_vmovl_v((int8x8_t)__a, 19); } | |
358 __ai uint16x8_t vmovl_u8(uint8x8_t __a) { \ | |
359 return (uint16x8_t)__builtin_neon_vmovl_v((int8x8_t)__a, 25); } | |
360 __ai uint32x4_t vmovl_u16(uint16x4_t __a) { \ | |
361 return (uint32x4_t)__builtin_neon_vmovl_v((int8x8_t)__a, 26); } | |
362 __ai uint64x2_t vmovl_u32(uint32x2_t __a) { \ | |
363 return (uint64x2_t)__builtin_neon_vmovl_v((int8x8_t)__a, 27); } | |
364 | |
365 __ai int16x8_t vmull_s8(int8x8_t __a, int8x8_t __b) { \ | |
366 return (int16x8_t)__builtin_neon_vmull_v(__a, __b, 17); } | |
367 __ai int32x4_t vmull_s16(int16x4_t __a, int16x4_t __b) { \ | |
368 return (int32x4_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 18); } | |
369 __ai int64x2_t vmull_s32(int32x2_t __a, int32x2_t __b) { \ | |
370 return (int64x2_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 19); } | |
371 __ai uint16x8_t vmull_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
372 return (uint16x8_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 25); } | |
373 __ai uint32x4_t vmull_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
374 return (uint32x4_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 26); } | |
375 __ai uint64x2_t vmull_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
376 return (uint64x2_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 27); } | |
377 __ai poly16x8_t vmull_p8(poly8x8_t __a, poly8x8_t __b) { \ | |
378 return (poly16x8_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 22); } | |
379 | |
380 __ai int8x8_t vabd_s8(int8x8_t __a, int8x8_t __b) { \ | |
381 return (int8x8_t)__builtin_neon_vabd_v(__a, __b, 0); } | |
382 __ai int16x4_t vabd_s16(int16x4_t __a, int16x4_t __b) { \ | |
383 return (int16x4_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 1); } | |
384 __ai int32x2_t vabd_s32(int32x2_t __a, int32x2_t __b) { \ | |
385 return (int32x2_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 2); } | |
386 __ai uint8x8_t vabd_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
387 return (uint8x8_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 8); } | |
388 __ai uint16x4_t vabd_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
389 return (uint16x4_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 9); } | |
390 __ai uint32x2_t vabd_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
391 return (uint32x2_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 10); } | |
392 __ai float32x2_t vabd_f32(float32x2_t __a, float32x2_t __b) { \ | |
393 return (float32x2_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 4); } | |
394 __ai int8x16_t vabdq_s8(int8x16_t __a, int8x16_t __b) { \ | |
395 return (int8x16_t)__builtin_neon_vabdq_v(__a, __b, 16); } | |
396 __ai int16x8_t vabdq_s16(int16x8_t __a, int16x8_t __b) { \ | |
397 return (int16x8_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 17);
} | |
398 __ai int32x4_t vabdq_s32(int32x4_t __a, int32x4_t __b) { \ | |
399 return (int32x4_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 18);
} | |
400 __ai uint8x16_t vabdq_u8(uint8x16_t __a, uint8x16_t __b) { \ | |
401 return (uint8x16_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 24);
} | |
402 __ai uint16x8_t vabdq_u16(uint16x8_t __a, uint16x8_t __b) { \ | |
403 return (uint16x8_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 25);
} | |
404 __ai uint32x4_t vabdq_u32(uint32x4_t __a, uint32x4_t __b) { \ | |
405 return (uint32x4_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 26);
} | |
406 __ai float32x4_t vabdq_f32(float32x4_t __a, float32x4_t __b) { \ | |
407 return (float32x4_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 20)
; } | |
408 | |
409 __ai int8x8_t vaba_s8(int8x8_t __a, int8x8_t __b, int8x8_t __c) { \ | |
410 return __a + vabd_s8(__b, __c); } | |
411 __ai int16x4_t vaba_s16(int16x4_t __a, int16x4_t __b, int16x4_t __c) { \ | |
412 return __a + vabd_s16(__b, __c); } | |
413 __ai int32x2_t vaba_s32(int32x2_t __a, int32x2_t __b, int32x2_t __c) { \ | |
414 return __a + vabd_s32(__b, __c); } | |
415 __ai uint8x8_t vaba_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) { \ | |
416 return __a + vabd_u8(__b, __c); } | |
417 __ai uint16x4_t vaba_u16(uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) { \ | |
418 return __a + vabd_u16(__b, __c); } | |
419 __ai uint32x2_t vaba_u32(uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) { \ | |
420 return __a + vabd_u32(__b, __c); } | |
421 __ai int8x16_t vabaq_s8(int8x16_t __a, int8x16_t __b, int8x16_t __c) { \ | |
422 return __a + vabdq_s8(__b, __c); } | |
423 __ai int16x8_t vabaq_s16(int16x8_t __a, int16x8_t __b, int16x8_t __c) { \ | |
424 return __a + vabdq_s16(__b, __c); } | |
425 __ai int32x4_t vabaq_s32(int32x4_t __a, int32x4_t __b, int32x4_t __c) { \ | |
426 return __a + vabdq_s32(__b, __c); } | |
427 __ai uint8x16_t vabaq_u8(uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) { \ | |
428 return __a + vabdq_u8(__b, __c); } | |
429 __ai uint16x8_t vabaq_u16(uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) { \ | |
430 return __a + vabdq_u16(__b, __c); } | |
431 __ai uint32x4_t vabaq_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) { \ | |
432 return __a + vabdq_u32(__b, __c); } | |
433 | |
434 __ai int16x8_t vabal_s8(int16x8_t __a, int8x8_t __b, int8x8_t __c) { \ | |
435 return __a + (int16x8_t)vmovl_u8((uint8x8_t)vabd_s8(__b, __c)); } | |
436 __ai int32x4_t vabal_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) { \ | |
437 return __a + (int32x4_t)vmovl_u16((uint16x4_t)vabd_s16(__b, __c)); } | |
438 __ai int64x2_t vabal_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) { \ | |
439 return __a + (int64x2_t)vmovl_u32((uint32x2_t)vabd_s32(__b, __c)); } | |
440 __ai uint16x8_t vabal_u8(uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) { \ | |
441 return __a + vmovl_u8(vabd_u8(__b, __c)); } | |
442 __ai uint32x4_t vabal_u16(uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) { \ | |
443 return __a + vmovl_u16(vabd_u16(__b, __c)); } | |
444 __ai uint64x2_t vabal_u32(uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) { \ | |
445 return __a + vmovl_u32(vabd_u32(__b, __c)); } | |
446 | |
447 __ai int16x8_t vabdl_s8(int8x8_t __a, int8x8_t __b) { \ | |
448 return (int16x8_t)vmovl_u8((uint8x8_t)vabd_s8(__a, __b)); } | |
449 __ai int32x4_t vabdl_s16(int16x4_t __a, int16x4_t __b) { \ | |
450 return (int32x4_t)vmovl_u16((uint16x4_t)vabd_s16(__a, __b)); } | |
451 __ai int64x2_t vabdl_s32(int32x2_t __a, int32x2_t __b) { \ | |
452 return (int64x2_t)vmovl_u32((uint32x2_t)vabd_s32(__a, __b)); } | |
453 __ai uint16x8_t vabdl_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
454 return vmovl_u8(vabd_u8(__a, __b)); } | |
455 __ai uint32x4_t vabdl_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
456 return vmovl_u16(vabd_u16(__a, __b)); } | |
457 __ai uint64x2_t vabdl_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
458 return vmovl_u32(vabd_u32(__a, __b)); } | |
459 | |
460 __ai int8x8_t vabs_s8(int8x8_t __a) { \ | |
461 return (int8x8_t)__builtin_neon_vabs_v(__a, 0); } | |
462 __ai int16x4_t vabs_s16(int16x4_t __a) { \ | |
463 return (int16x4_t)__builtin_neon_vabs_v((int8x8_t)__a, 1); } | |
464 __ai int32x2_t vabs_s32(int32x2_t __a) { \ | |
465 return (int32x2_t)__builtin_neon_vabs_v((int8x8_t)__a, 2); } | |
466 __ai float32x2_t vabs_f32(float32x2_t __a) { \ | |
467 return (float32x2_t)__builtin_neon_vabs_v((int8x8_t)__a, 4); } | |
468 __ai int8x16_t vabsq_s8(int8x16_t __a) { \ | |
469 return (int8x16_t)__builtin_neon_vabsq_v(__a, 16); } | |
470 __ai int16x8_t vabsq_s16(int16x8_t __a) { \ | |
471 return (int16x8_t)__builtin_neon_vabsq_v((int8x16_t)__a, 17); } | |
472 __ai int32x4_t vabsq_s32(int32x4_t __a) { \ | |
473 return (int32x4_t)__builtin_neon_vabsq_v((int8x16_t)__a, 18); } | |
474 __ai float32x4_t vabsq_f32(float32x4_t __a) { \ | |
475 return (float32x4_t)__builtin_neon_vabsq_v((int8x16_t)__a, 20); } | |
476 | |
477 __ai int8x8_t vadd_s8(int8x8_t __a, int8x8_t __b) { \ | |
478 return __a + __b; } | |
479 __ai int16x4_t vadd_s16(int16x4_t __a, int16x4_t __b) { \ | |
480 return __a + __b; } | |
481 __ai int32x2_t vadd_s32(int32x2_t __a, int32x2_t __b) { \ | |
482 return __a + __b; } | |
483 __ai int64x1_t vadd_s64(int64x1_t __a, int64x1_t __b) { \ | |
484 return __a + __b; } | |
485 __ai float32x2_t vadd_f32(float32x2_t __a, float32x2_t __b) { \ | |
486 return __a + __b; } | |
487 __ai uint8x8_t vadd_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
488 return __a + __b; } | |
489 __ai uint16x4_t vadd_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
490 return __a + __b; } | |
491 __ai uint32x2_t vadd_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
492 return __a + __b; } | |
493 __ai uint64x1_t vadd_u64(uint64x1_t __a, uint64x1_t __b) { \ | |
494 return __a + __b; } | |
495 __ai int8x16_t vaddq_s8(int8x16_t __a, int8x16_t __b) { \ | |
496 return __a + __b; } | |
497 __ai int16x8_t vaddq_s16(int16x8_t __a, int16x8_t __b) { \ | |
498 return __a + __b; } | |
499 __ai int32x4_t vaddq_s32(int32x4_t __a, int32x4_t __b) { \ | |
500 return __a + __b; } | |
501 __ai int64x2_t vaddq_s64(int64x2_t __a, int64x2_t __b) { \ | |
502 return __a + __b; } | |
503 __ai float32x4_t vaddq_f32(float32x4_t __a, float32x4_t __b) { \ | |
504 return __a + __b; } | |
505 __ai uint8x16_t vaddq_u8(uint8x16_t __a, uint8x16_t __b) { \ | |
506 return __a + __b; } | |
507 __ai uint16x8_t vaddq_u16(uint16x8_t __a, uint16x8_t __b) { \ | |
508 return __a + __b; } | |
509 __ai uint32x4_t vaddq_u32(uint32x4_t __a, uint32x4_t __b) { \ | |
510 return __a + __b; } | |
511 __ai uint64x2_t vaddq_u64(uint64x2_t __a, uint64x2_t __b) { \ | |
512 return __a + __b; } | |
513 | |
514 __ai int8x8_t vaddhn_s16(int16x8_t __a, int16x8_t __b) { \ | |
515 return (int8x8_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 0); } | |
516 __ai int16x4_t vaddhn_s32(int32x4_t __a, int32x4_t __b) { \ | |
517 return (int16x4_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 1);
} | |
518 __ai int32x2_t vaddhn_s64(int64x2_t __a, int64x2_t __b) { \ | |
519 return (int32x2_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 2);
} | |
520 __ai uint8x8_t vaddhn_u16(uint16x8_t __a, uint16x8_t __b) { \ | |
521 return (uint8x8_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 8);
} | |
522 __ai uint16x4_t vaddhn_u32(uint32x4_t __a, uint32x4_t __b) { \ | |
523 return (uint16x4_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 9);
} | |
524 __ai uint32x2_t vaddhn_u64(uint64x2_t __a, uint64x2_t __b) { \ | |
525 return (uint32x2_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 10)
; } | |
526 | |
527 __ai int16x8_t vaddl_s8(int8x8_t __a, int8x8_t __b) { \ | |
528 return vmovl_s8(__a) + vmovl_s8(__b); } | |
529 __ai int32x4_t vaddl_s16(int16x4_t __a, int16x4_t __b) { \ | |
530 return vmovl_s16(__a) + vmovl_s16(__b); } | |
531 __ai int64x2_t vaddl_s32(int32x2_t __a, int32x2_t __b) { \ | |
532 return vmovl_s32(__a) + vmovl_s32(__b); } | |
533 __ai uint16x8_t vaddl_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
534 return vmovl_u8(__a) + vmovl_u8(__b); } | |
535 __ai uint32x4_t vaddl_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
536 return vmovl_u16(__a) + vmovl_u16(__b); } | |
537 __ai uint64x2_t vaddl_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
538 return vmovl_u32(__a) + vmovl_u32(__b); } | |
539 | |
540 __ai int16x8_t vaddw_s8(int16x8_t __a, int8x8_t __b) { \ | |
541 return __a + vmovl_s8(__b); } | |
542 __ai int32x4_t vaddw_s16(int32x4_t __a, int16x4_t __b) { \ | |
543 return __a + vmovl_s16(__b); } | |
544 __ai int64x2_t vaddw_s32(int64x2_t __a, int32x2_t __b) { \ | |
545 return __a + vmovl_s32(__b); } | |
546 __ai uint16x8_t vaddw_u8(uint16x8_t __a, uint8x8_t __b) { \ | |
547 return __a + vmovl_u8(__b); } | |
548 __ai uint32x4_t vaddw_u16(uint32x4_t __a, uint16x4_t __b) { \ | |
549 return __a + vmovl_u16(__b); } | |
550 __ai uint64x2_t vaddw_u32(uint64x2_t __a, uint32x2_t __b) { \ | |
551 return __a + vmovl_u32(__b); } | |
552 | |
553 __ai int8x8_t vand_s8(int8x8_t __a, int8x8_t __b) { \ | |
554 return __a & __b; } | |
555 __ai int16x4_t vand_s16(int16x4_t __a, int16x4_t __b) { \ | |
556 return __a & __b; } | |
557 __ai int32x2_t vand_s32(int32x2_t __a, int32x2_t __b) { \ | |
558 return __a & __b; } | |
559 __ai int64x1_t vand_s64(int64x1_t __a, int64x1_t __b) { \ | |
560 return __a & __b; } | |
561 __ai uint8x8_t vand_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
562 return __a & __b; } | |
563 __ai uint16x4_t vand_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
564 return __a & __b; } | |
565 __ai uint32x2_t vand_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
566 return __a & __b; } | |
567 __ai uint64x1_t vand_u64(uint64x1_t __a, uint64x1_t __b) { \ | |
568 return __a & __b; } | |
569 __ai int8x16_t vandq_s8(int8x16_t __a, int8x16_t __b) { \ | |
570 return __a & __b; } | |
571 __ai int16x8_t vandq_s16(int16x8_t __a, int16x8_t __b) { \ | |
572 return __a & __b; } | |
573 __ai int32x4_t vandq_s32(int32x4_t __a, int32x4_t __b) { \ | |
574 return __a & __b; } | |
575 __ai int64x2_t vandq_s64(int64x2_t __a, int64x2_t __b) { \ | |
576 return __a & __b; } | |
577 __ai uint8x16_t vandq_u8(uint8x16_t __a, uint8x16_t __b) { \ | |
578 return __a & __b; } | |
579 __ai uint16x8_t vandq_u16(uint16x8_t __a, uint16x8_t __b) { \ | |
580 return __a & __b; } | |
581 __ai uint32x4_t vandq_u32(uint32x4_t __a, uint32x4_t __b) { \ | |
582 return __a & __b; } | |
583 __ai uint64x2_t vandq_u64(uint64x2_t __a, uint64x2_t __b) { \ | |
584 return __a & __b; } | |
585 | |
586 __ai int8x8_t vbic_s8(int8x8_t __a, int8x8_t __b) { \ | |
587 return __a & ~__b; } | |
588 __ai int16x4_t vbic_s16(int16x4_t __a, int16x4_t __b) { \ | |
589 return __a & ~__b; } | |
590 __ai int32x2_t vbic_s32(int32x2_t __a, int32x2_t __b) { \ | |
591 return __a & ~__b; } | |
592 __ai int64x1_t vbic_s64(int64x1_t __a, int64x1_t __b) { \ | |
593 return __a & ~__b; } | |
594 __ai uint8x8_t vbic_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
595 return __a & ~__b; } | |
596 __ai uint16x4_t vbic_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
597 return __a & ~__b; } | |
598 __ai uint32x2_t vbic_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
599 return __a & ~__b; } | |
600 __ai uint64x1_t vbic_u64(uint64x1_t __a, uint64x1_t __b) { \ | |
601 return __a & ~__b; } | |
602 __ai int8x16_t vbicq_s8(int8x16_t __a, int8x16_t __b) { \ | |
603 return __a & ~__b; } | |
604 __ai int16x8_t vbicq_s16(int16x8_t __a, int16x8_t __b) { \ | |
605 return __a & ~__b; } | |
606 __ai int32x4_t vbicq_s32(int32x4_t __a, int32x4_t __b) { \ | |
607 return __a & ~__b; } | |
608 __ai int64x2_t vbicq_s64(int64x2_t __a, int64x2_t __b) { \ | |
609 return __a & ~__b; } | |
610 __ai uint8x16_t vbicq_u8(uint8x16_t __a, uint8x16_t __b) { \ | |
611 return __a & ~__b; } | |
612 __ai uint16x8_t vbicq_u16(uint16x8_t __a, uint16x8_t __b) { \ | |
613 return __a & ~__b; } | |
614 __ai uint32x4_t vbicq_u32(uint32x4_t __a, uint32x4_t __b) { \ | |
615 return __a & ~__b; } | |
616 __ai uint64x2_t vbicq_u64(uint64x2_t __a, uint64x2_t __b) { \ | |
617 return __a & ~__b; } | |
618 | |
619 __ai int8x8_t vbsl_s8(uint8x8_t __a, int8x8_t __b, int8x8_t __c) { \ | |
620 return (int8x8_t)((__a & (uint8x8_t)__b) | (~__a & (uint8x8_t)__c)); } | |
621 __ai int16x4_t vbsl_s16(uint16x4_t __a, int16x4_t __b, int16x4_t __c) { \ | |
622 return (int16x4_t)((__a & (uint16x4_t)__b) | (~__a & (uint16x4_t)__c)); } | |
623 __ai int32x2_t vbsl_s32(uint32x2_t __a, int32x2_t __b, int32x2_t __c) { \ | |
624 return (int32x2_t)((__a & (uint32x2_t)__b) | (~__a & (uint32x2_t)__c)); } | |
625 __ai int64x1_t vbsl_s64(uint64x1_t __a, int64x1_t __b, int64x1_t __c) { \ | |
626 return (int64x1_t)((__a & (uint64x1_t)__b) | (~__a & (uint64x1_t)__c)); } | |
627 __ai uint8x8_t vbsl_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) { \ | |
628 return (uint8x8_t)((__a & (uint8x8_t)__b) | (~__a & (uint8x8_t)__c)); } | |
629 __ai uint16x4_t vbsl_u16(uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) { \ | |
630 return (uint16x4_t)((__a & (uint16x4_t)__b) | (~__a & (uint16x4_t)__c)); } | |
631 __ai uint32x2_t vbsl_u32(uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) { \ | |
632 return (uint32x2_t)((__a & (uint32x2_t)__b) | (~__a & (uint32x2_t)__c)); } | |
633 __ai uint64x1_t vbsl_u64(uint64x1_t __a, uint64x1_t __b, uint64x1_t __c) { \ | |
634 return (uint64x1_t)((__a & (uint64x1_t)__b) | (~__a & (uint64x1_t)__c)); } | |
635 __ai float32x2_t vbsl_f32(uint32x2_t __a, float32x2_t __b, float32x2_t __c) { \ | |
636 return (float32x2_t)((__a & (uint32x2_t)__b) | (~__a & (uint32x2_t)__c)); } | |
637 __ai poly8x8_t vbsl_p8(uint8x8_t __a, poly8x8_t __b, poly8x8_t __c) { \ | |
638 return (poly8x8_t)((__a & (uint8x8_t)__b) | (~__a & (uint8x8_t)__c)); } | |
639 __ai poly16x4_t vbsl_p16(uint16x4_t __a, poly16x4_t __b, poly16x4_t __c) { \ | |
640 return (poly16x4_t)((__a & (uint16x4_t)__b) | (~__a & (uint16x4_t)__c)); } | |
641 __ai int8x16_t vbslq_s8(uint8x16_t __a, int8x16_t __b, int8x16_t __c) { \ | |
642 return (int8x16_t)((__a & (uint8x16_t)__b) | (~__a & (uint8x16_t)__c)); } | |
643 __ai int16x8_t vbslq_s16(uint16x8_t __a, int16x8_t __b, int16x8_t __c) { \ | |
644 return (int16x8_t)((__a & (uint16x8_t)__b) | (~__a & (uint16x8_t)__c)); } | |
645 __ai int32x4_t vbslq_s32(uint32x4_t __a, int32x4_t __b, int32x4_t __c) { \ | |
646 return (int32x4_t)((__a & (uint32x4_t)__b) | (~__a & (uint32x4_t)__c)); } | |
647 __ai int64x2_t vbslq_s64(uint64x2_t __a, int64x2_t __b, int64x2_t __c) { \ | |
648 return (int64x2_t)((__a & (uint64x2_t)__b) | (~__a & (uint64x2_t)__c)); } | |
649 __ai uint8x16_t vbslq_u8(uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) { \ | |
650 return (uint8x16_t)((__a & (uint8x16_t)__b) | (~__a & (uint8x16_t)__c)); } | |
651 __ai uint16x8_t vbslq_u16(uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) { \ | |
652 return (uint16x8_t)((__a & (uint16x8_t)__b) | (~__a & (uint16x8_t)__c)); } | |
653 __ai uint32x4_t vbslq_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) { \ | |
654 return (uint32x4_t)((__a & (uint32x4_t)__b) | (~__a & (uint32x4_t)__c)); } | |
655 __ai uint64x2_t vbslq_u64(uint64x2_t __a, uint64x2_t __b, uint64x2_t __c) { \ | |
656 return (uint64x2_t)((__a & (uint64x2_t)__b) | (~__a & (uint64x2_t)__c)); } | |
657 __ai float32x4_t vbslq_f32(uint32x4_t __a, float32x4_t __b, float32x4_t __c) { \ | |
658 return (float32x4_t)((__a & (uint32x4_t)__b) | (~__a & (uint32x4_t)__c)); } | |
659 __ai poly8x16_t vbslq_p8(uint8x16_t __a, poly8x16_t __b, poly8x16_t __c) { \ | |
660 return (poly8x16_t)((__a & (uint8x16_t)__b) | (~__a & (uint8x16_t)__c)); } | |
661 __ai poly16x8_t vbslq_p16(uint16x8_t __a, poly16x8_t __b, poly16x8_t __c) { \ | |
662 return (poly16x8_t)((__a & (uint16x8_t)__b) | (~__a & (uint16x8_t)__c)); } | |
663 | |
664 __ai uint32x2_t vcage_f32(float32x2_t __a, float32x2_t __b) { \ | |
665 return (uint32x2_t)__builtin_neon_vcage_v((int8x8_t)__a, (int8x8_t)__b, 10); } | |
666 __ai uint32x4_t vcageq_f32(float32x4_t __a, float32x4_t __b) { \ | |
667 return (uint32x4_t)__builtin_neon_vcageq_v((int8x16_t)__a, (int8x16_t)__b, 26)
; } | |
668 | |
669 __ai uint32x2_t vcagt_f32(float32x2_t __a, float32x2_t __b) { \ | |
670 return (uint32x2_t)__builtin_neon_vcagt_v((int8x8_t)__a, (int8x8_t)__b, 10); } | |
671 __ai uint32x4_t vcagtq_f32(float32x4_t __a, float32x4_t __b) { \ | |
672 return (uint32x4_t)__builtin_neon_vcagtq_v((int8x16_t)__a, (int8x16_t)__b, 26)
; } | |
673 | |
674 __ai uint32x2_t vcale_f32(float32x2_t __a, float32x2_t __b) { \ | |
675 return (uint32x2_t)__builtin_neon_vcale_v((int8x8_t)__a, (int8x8_t)__b, 10); } | |
676 __ai uint32x4_t vcaleq_f32(float32x4_t __a, float32x4_t __b) { \ | |
677 return (uint32x4_t)__builtin_neon_vcaleq_v((int8x16_t)__a, (int8x16_t)__b, 26)
; } | |
678 | |
679 __ai uint32x2_t vcalt_f32(float32x2_t __a, float32x2_t __b) { \ | |
680 return (uint32x2_t)__builtin_neon_vcalt_v((int8x8_t)__a, (int8x8_t)__b, 10); } | |
681 __ai uint32x4_t vcaltq_f32(float32x4_t __a, float32x4_t __b) { \ | |
682 return (uint32x4_t)__builtin_neon_vcaltq_v((int8x16_t)__a, (int8x16_t)__b, 26)
; } | |
683 | |
684 __ai uint8x8_t vceq_s8(int8x8_t __a, int8x8_t __b) { \ | |
685 return (uint8x8_t)(__a == __b); } | |
686 __ai uint16x4_t vceq_s16(int16x4_t __a, int16x4_t __b) { \ | |
687 return (uint16x4_t)(__a == __b); } | |
688 __ai uint32x2_t vceq_s32(int32x2_t __a, int32x2_t __b) { \ | |
689 return (uint32x2_t)(__a == __b); } | |
690 __ai uint32x2_t vceq_f32(float32x2_t __a, float32x2_t __b) { \ | |
691 return (uint32x2_t)(__a == __b); } | |
692 __ai uint8x8_t vceq_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
693 return (uint8x8_t)(__a == __b); } | |
694 __ai uint16x4_t vceq_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
695 return (uint16x4_t)(__a == __b); } | |
696 __ai uint32x2_t vceq_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
697 return (uint32x2_t)(__a == __b); } | |
698 __ai uint8x8_t vceq_p8(poly8x8_t __a, poly8x8_t __b) { \ | |
699 return (uint8x8_t)(__a == __b); } | |
700 __ai uint8x16_t vceqq_s8(int8x16_t __a, int8x16_t __b) { \ | |
701 return (uint8x16_t)(__a == __b); } | |
702 __ai uint16x8_t vceqq_s16(int16x8_t __a, int16x8_t __b) { \ | |
703 return (uint16x8_t)(__a == __b); } | |
704 __ai uint32x4_t vceqq_s32(int32x4_t __a, int32x4_t __b) { \ | |
705 return (uint32x4_t)(__a == __b); } | |
706 __ai uint32x4_t vceqq_f32(float32x4_t __a, float32x4_t __b) { \ | |
707 return (uint32x4_t)(__a == __b); } | |
708 __ai uint8x16_t vceqq_u8(uint8x16_t __a, uint8x16_t __b) { \ | |
709 return (uint8x16_t)(__a == __b); } | |
710 __ai uint16x8_t vceqq_u16(uint16x8_t __a, uint16x8_t __b) { \ | |
711 return (uint16x8_t)(__a == __b); } | |
712 __ai uint32x4_t vceqq_u32(uint32x4_t __a, uint32x4_t __b) { \ | |
713 return (uint32x4_t)(__a == __b); } | |
714 __ai uint8x16_t vceqq_p8(poly8x16_t __a, poly8x16_t __b) { \ | |
715 return (uint8x16_t)(__a == __b); } | |
716 | |
717 __ai uint8x8_t vcge_s8(int8x8_t __a, int8x8_t __b) { \ | |
718 return (uint8x8_t)(__a >= __b); } | |
719 __ai uint16x4_t vcge_s16(int16x4_t __a, int16x4_t __b) { \ | |
720 return (uint16x4_t)(__a >= __b); } | |
721 __ai uint32x2_t vcge_s32(int32x2_t __a, int32x2_t __b) { \ | |
722 return (uint32x2_t)(__a >= __b); } | |
723 __ai uint32x2_t vcge_f32(float32x2_t __a, float32x2_t __b) { \ | |
724 return (uint32x2_t)(__a >= __b); } | |
725 __ai uint8x8_t vcge_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
726 return (uint8x8_t)(__a >= __b); } | |
727 __ai uint16x4_t vcge_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
728 return (uint16x4_t)(__a >= __b); } | |
729 __ai uint32x2_t vcge_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
730 return (uint32x2_t)(__a >= __b); } | |
731 __ai uint8x16_t vcgeq_s8(int8x16_t __a, int8x16_t __b) { \ | |
732 return (uint8x16_t)(__a >= __b); } | |
733 __ai uint16x8_t vcgeq_s16(int16x8_t __a, int16x8_t __b) { \ | |
734 return (uint16x8_t)(__a >= __b); } | |
735 __ai uint32x4_t vcgeq_s32(int32x4_t __a, int32x4_t __b) { \ | |
736 return (uint32x4_t)(__a >= __b); } | |
737 __ai uint32x4_t vcgeq_f32(float32x4_t __a, float32x4_t __b) { \ | |
738 return (uint32x4_t)(__a >= __b); } | |
739 __ai uint8x16_t vcgeq_u8(uint8x16_t __a, uint8x16_t __b) { \ | |
740 return (uint8x16_t)(__a >= __b); } | |
741 __ai uint16x8_t vcgeq_u16(uint16x8_t __a, uint16x8_t __b) { \ | |
742 return (uint16x8_t)(__a >= __b); } | |
743 __ai uint32x4_t vcgeq_u32(uint32x4_t __a, uint32x4_t __b) { \ | |
744 return (uint32x4_t)(__a >= __b); } | |
745 | |
746 __ai uint8x8_t vcgt_s8(int8x8_t __a, int8x8_t __b) { \ | |
747 return (uint8x8_t)(__a > __b); } | |
748 __ai uint16x4_t vcgt_s16(int16x4_t __a, int16x4_t __b) { \ | |
749 return (uint16x4_t)(__a > __b); } | |
750 __ai uint32x2_t vcgt_s32(int32x2_t __a, int32x2_t __b) { \ | |
751 return (uint32x2_t)(__a > __b); } | |
752 __ai uint32x2_t vcgt_f32(float32x2_t __a, float32x2_t __b) { \ | |
753 return (uint32x2_t)(__a > __b); } | |
754 __ai uint8x8_t vcgt_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
755 return (uint8x8_t)(__a > __b); } | |
756 __ai uint16x4_t vcgt_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
757 return (uint16x4_t)(__a > __b); } | |
758 __ai uint32x2_t vcgt_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
759 return (uint32x2_t)(__a > __b); } | |
760 __ai uint8x16_t vcgtq_s8(int8x16_t __a, int8x16_t __b) { \ | |
761 return (uint8x16_t)(__a > __b); } | |
762 __ai uint16x8_t vcgtq_s16(int16x8_t __a, int16x8_t __b) { \ | |
763 return (uint16x8_t)(__a > __b); } | |
764 __ai uint32x4_t vcgtq_s32(int32x4_t __a, int32x4_t __b) { \ | |
765 return (uint32x4_t)(__a > __b); } | |
766 __ai uint32x4_t vcgtq_f32(float32x4_t __a, float32x4_t __b) { \ | |
767 return (uint32x4_t)(__a > __b); } | |
768 __ai uint8x16_t vcgtq_u8(uint8x16_t __a, uint8x16_t __b) { \ | |
769 return (uint8x16_t)(__a > __b); } | |
770 __ai uint16x8_t vcgtq_u16(uint16x8_t __a, uint16x8_t __b) { \ | |
771 return (uint16x8_t)(__a > __b); } | |
772 __ai uint32x4_t vcgtq_u32(uint32x4_t __a, uint32x4_t __b) { \ | |
773 return (uint32x4_t)(__a > __b); } | |
774 | |
775 __ai uint8x8_t vcle_s8(int8x8_t __a, int8x8_t __b) { \ | |
776 return (uint8x8_t)(__a <= __b); } | |
777 __ai uint16x4_t vcle_s16(int16x4_t __a, int16x4_t __b) { \ | |
778 return (uint16x4_t)(__a <= __b); } | |
779 __ai uint32x2_t vcle_s32(int32x2_t __a, int32x2_t __b) { \ | |
780 return (uint32x2_t)(__a <= __b); } | |
781 __ai uint32x2_t vcle_f32(float32x2_t __a, float32x2_t __b) { \ | |
782 return (uint32x2_t)(__a <= __b); } | |
783 __ai uint8x8_t vcle_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
784 return (uint8x8_t)(__a <= __b); } | |
785 __ai uint16x4_t vcle_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
786 return (uint16x4_t)(__a <= __b); } | |
787 __ai uint32x2_t vcle_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
788 return (uint32x2_t)(__a <= __b); } | |
789 __ai uint8x16_t vcleq_s8(int8x16_t __a, int8x16_t __b) { \ | |
790 return (uint8x16_t)(__a <= __b); } | |
791 __ai uint16x8_t vcleq_s16(int16x8_t __a, int16x8_t __b) { \ | |
792 return (uint16x8_t)(__a <= __b); } | |
793 __ai uint32x4_t vcleq_s32(int32x4_t __a, int32x4_t __b) { \ | |
794 return (uint32x4_t)(__a <= __b); } | |
795 __ai uint32x4_t vcleq_f32(float32x4_t __a, float32x4_t __b) { \ | |
796 return (uint32x4_t)(__a <= __b); } | |
797 __ai uint8x16_t vcleq_u8(uint8x16_t __a, uint8x16_t __b) { \ | |
798 return (uint8x16_t)(__a <= __b); } | |
799 __ai uint16x8_t vcleq_u16(uint16x8_t __a, uint16x8_t __b) { \ | |
800 return (uint16x8_t)(__a <= __b); } | |
801 __ai uint32x4_t vcleq_u32(uint32x4_t __a, uint32x4_t __b) { \ | |
802 return (uint32x4_t)(__a <= __b); } | |
803 | |
804 __ai int8x8_t vcls_s8(int8x8_t __a) { \ | |
805 return (int8x8_t)__builtin_neon_vcls_v(__a, 0); } | |
806 __ai int16x4_t vcls_s16(int16x4_t __a) { \ | |
807 return (int16x4_t)__builtin_neon_vcls_v((int8x8_t)__a, 1); } | |
808 __ai int32x2_t vcls_s32(int32x2_t __a) { \ | |
809 return (int32x2_t)__builtin_neon_vcls_v((int8x8_t)__a, 2); } | |
810 __ai int8x16_t vclsq_s8(int8x16_t __a) { \ | |
811 return (int8x16_t)__builtin_neon_vclsq_v(__a, 16); } | |
812 __ai int16x8_t vclsq_s16(int16x8_t __a) { \ | |
813 return (int16x8_t)__builtin_neon_vclsq_v((int8x16_t)__a, 17); } | |
814 __ai int32x4_t vclsq_s32(int32x4_t __a) { \ | |
815 return (int32x4_t)__builtin_neon_vclsq_v((int8x16_t)__a, 18); } | |
816 | |
817 __ai uint8x8_t vclt_s8(int8x8_t __a, int8x8_t __b) { \ | |
818 return (uint8x8_t)(__a < __b); } | |
819 __ai uint16x4_t vclt_s16(int16x4_t __a, int16x4_t __b) { \ | |
820 return (uint16x4_t)(__a < __b); } | |
821 __ai uint32x2_t vclt_s32(int32x2_t __a, int32x2_t __b) { \ | |
822 return (uint32x2_t)(__a < __b); } | |
823 __ai uint32x2_t vclt_f32(float32x2_t __a, float32x2_t __b) { \ | |
824 return (uint32x2_t)(__a < __b); } | |
825 __ai uint8x8_t vclt_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
826 return (uint8x8_t)(__a < __b); } | |
827 __ai uint16x4_t vclt_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
828 return (uint16x4_t)(__a < __b); } | |
829 __ai uint32x2_t vclt_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
830 return (uint32x2_t)(__a < __b); } | |
831 __ai uint8x16_t vcltq_s8(int8x16_t __a, int8x16_t __b) { \ | |
832 return (uint8x16_t)(__a < __b); } | |
833 __ai uint16x8_t vcltq_s16(int16x8_t __a, int16x8_t __b) { \ | |
834 return (uint16x8_t)(__a < __b); } | |
835 __ai uint32x4_t vcltq_s32(int32x4_t __a, int32x4_t __b) { \ | |
836 return (uint32x4_t)(__a < __b); } | |
837 __ai uint32x4_t vcltq_f32(float32x4_t __a, float32x4_t __b) { \ | |
838 return (uint32x4_t)(__a < __b); } | |
839 __ai uint8x16_t vcltq_u8(uint8x16_t __a, uint8x16_t __b) { \ | |
840 return (uint8x16_t)(__a < __b); } | |
841 __ai uint16x8_t vcltq_u16(uint16x8_t __a, uint16x8_t __b) { \ | |
842 return (uint16x8_t)(__a < __b); } | |
843 __ai uint32x4_t vcltq_u32(uint32x4_t __a, uint32x4_t __b) { \ | |
844 return (uint32x4_t)(__a < __b); } | |
845 | |
846 __ai int8x8_t vclz_s8(int8x8_t __a) { \ | |
847 return (int8x8_t)__builtin_neon_vclz_v(__a, 0); } | |
848 __ai int16x4_t vclz_s16(int16x4_t __a) { \ | |
849 return (int16x4_t)__builtin_neon_vclz_v((int8x8_t)__a, 1); } | |
850 __ai int32x2_t vclz_s32(int32x2_t __a) { \ | |
851 return (int32x2_t)__builtin_neon_vclz_v((int8x8_t)__a, 2); } | |
852 __ai uint8x8_t vclz_u8(uint8x8_t __a) { \ | |
853 return (uint8x8_t)__builtin_neon_vclz_v((int8x8_t)__a, 8); } | |
854 __ai uint16x4_t vclz_u16(uint16x4_t __a) { \ | |
855 return (uint16x4_t)__builtin_neon_vclz_v((int8x8_t)__a, 9); } | |
856 __ai uint32x2_t vclz_u32(uint32x2_t __a) { \ | |
857 return (uint32x2_t)__builtin_neon_vclz_v((int8x8_t)__a, 10); } | |
858 __ai int8x16_t vclzq_s8(int8x16_t __a) { \ | |
859 return (int8x16_t)__builtin_neon_vclzq_v(__a, 16); } | |
860 __ai int16x8_t vclzq_s16(int16x8_t __a) { \ | |
861 return (int16x8_t)__builtin_neon_vclzq_v((int8x16_t)__a, 17); } | |
862 __ai int32x4_t vclzq_s32(int32x4_t __a) { \ | |
863 return (int32x4_t)__builtin_neon_vclzq_v((int8x16_t)__a, 18); } | |
864 __ai uint8x16_t vclzq_u8(uint8x16_t __a) { \ | |
865 return (uint8x16_t)__builtin_neon_vclzq_v((int8x16_t)__a, 24); } | |
866 __ai uint16x8_t vclzq_u16(uint16x8_t __a) { \ | |
867 return (uint16x8_t)__builtin_neon_vclzq_v((int8x16_t)__a, 25); } | |
868 __ai uint32x4_t vclzq_u32(uint32x4_t __a) { \ | |
869 return (uint32x4_t)__builtin_neon_vclzq_v((int8x16_t)__a, 26); } | |
870 | |
871 __ai uint8x8_t vcnt_u8(uint8x8_t __a) { \ | |
872 return (uint8x8_t)__builtin_neon_vcnt_v((int8x8_t)__a, 8); } | |
873 __ai int8x8_t vcnt_s8(int8x8_t __a) { \ | |
874 return (int8x8_t)__builtin_neon_vcnt_v(__a, 0); } | |
875 __ai poly8x8_t vcnt_p8(poly8x8_t __a) { \ | |
876 return (poly8x8_t)__builtin_neon_vcnt_v((int8x8_t)__a, 5); } | |
877 __ai uint8x16_t vcntq_u8(uint8x16_t __a) { \ | |
878 return (uint8x16_t)__builtin_neon_vcntq_v((int8x16_t)__a, 24); } | |
879 __ai int8x16_t vcntq_s8(int8x16_t __a) { \ | |
880 return (int8x16_t)__builtin_neon_vcntq_v(__a, 16); } | |
881 __ai poly8x16_t vcntq_p8(poly8x16_t __a) { \ | |
882 return (poly8x16_t)__builtin_neon_vcntq_v((int8x16_t)__a, 21); } | |
883 | |
884 __ai int8x16_t vcombine_s8(int8x8_t __a, int8x8_t __b) { \ | |
885 return (int8x16_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1
); } | |
886 __ai int16x8_t vcombine_s16(int16x4_t __a, int16x4_t __b) { \ | |
887 return (int16x8_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1
); } | |
888 __ai int32x4_t vcombine_s32(int32x2_t __a, int32x2_t __b) { \ | |
889 return (int32x4_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1
); } | |
890 __ai int64x2_t vcombine_s64(int64x1_t __a, int64x1_t __b) { \ | |
891 return (int64x2_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1
); } | |
892 __ai float16x8_t vcombine_f16(float16x4_t __a, float16x4_t __b) { \ | |
893 return (float16x8_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0,
1); } | |
894 __ai float32x4_t vcombine_f32(float32x2_t __a, float32x2_t __b) { \ | |
895 return (float32x4_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0,
1); } | |
896 __ai uint8x16_t vcombine_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
897 return (uint8x16_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0,
1); } | |
898 __ai uint16x8_t vcombine_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
899 return (uint16x8_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0,
1); } | |
900 __ai uint32x4_t vcombine_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
901 return (uint32x4_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0,
1); } | |
902 __ai uint64x2_t vcombine_u64(uint64x1_t __a, uint64x1_t __b) { \ | |
903 return (uint64x2_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0,
1); } | |
904 __ai poly8x16_t vcombine_p8(poly8x8_t __a, poly8x8_t __b) { \ | |
905 return (poly8x16_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0,
1); } | |
906 __ai poly16x8_t vcombine_p16(poly16x4_t __a, poly16x4_t __b) { \ | |
907 return (poly16x8_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0,
1); } | |
908 | |
909 __ai int8x8_t vcreate_s8(uint64_t __a) { \ | |
910 return (int8x8_t)__a; } | |
911 __ai int16x4_t vcreate_s16(uint64_t __a) { \ | |
912 return (int16x4_t)__a; } | |
913 __ai int32x2_t vcreate_s32(uint64_t __a) { \ | |
914 return (int32x2_t)__a; } | |
915 __ai float16x4_t vcreate_f16(uint64_t __a) { \ | |
916 return (float16x4_t)__a; } | |
917 __ai float32x2_t vcreate_f32(uint64_t __a) { \ | |
918 return (float32x2_t)__a; } | |
919 __ai uint8x8_t vcreate_u8(uint64_t __a) { \ | |
920 return (uint8x8_t)__a; } | |
921 __ai uint16x4_t vcreate_u16(uint64_t __a) { \ | |
922 return (uint16x4_t)__a; } | |
923 __ai uint32x2_t vcreate_u32(uint64_t __a) { \ | |
924 return (uint32x2_t)__a; } | |
925 __ai uint64x1_t vcreate_u64(uint64_t __a) { \ | |
926 return (uint64x1_t)__a; } | |
927 __ai poly8x8_t vcreate_p8(uint64_t __a) { \ | |
928 return (poly8x8_t)__a; } | |
929 __ai poly16x4_t vcreate_p16(uint64_t __a) { \ | |
930 return (poly16x4_t)__a; } | |
931 __ai int64x1_t vcreate_s64(uint64_t __a) { \ | |
932 return (int64x1_t)__a; } | |
933 | |
934 __ai float16x4_t vcvt_f16_f32(float32x4_t __a) { \ | |
935 return (float16x4_t)__builtin_neon_vcvt_f16_v((int8x16_t)__a, 7); } | |
936 | |
937 __ai float32x2_t vcvt_f32_s32(int32x2_t __a) { \ | |
938 return (float32x2_t)__builtin_neon_vcvt_f32_v((int8x8_t)__a, 2); } | |
939 __ai float32x2_t vcvt_f32_u32(uint32x2_t __a) { \ | |
940 return (float32x2_t)__builtin_neon_vcvt_f32_v((int8x8_t)__a, 10); } | |
941 __ai float32x4_t vcvtq_f32_s32(int32x4_t __a) { \ | |
942 return (float32x4_t)__builtin_neon_vcvtq_f32_v((int8x16_t)__a, 18); } | |
943 __ai float32x4_t vcvtq_f32_u32(uint32x4_t __a) { \ | |
944 return (float32x4_t)__builtin_neon_vcvtq_f32_v((int8x16_t)__a, 26); } | |
945 | |
946 __ai float32x4_t vcvt_f32_f16(float16x4_t __a) { \ | |
947 return (float32x4_t)__builtin_neon_vcvt_f32_f16((int8x8_t)__a, 7); } | |
948 | |
949 #define vcvt_n_f32_s32(a, __b) __extension__ ({ \ | |
950 int32x2_t __a = (a); \ | |
951 (float32x2_t)__builtin_neon_vcvt_n_f32_v((int8x8_t)__a, __b, 2); }) | |
952 #define vcvt_n_f32_u32(a, __b) __extension__ ({ \ | |
953 uint32x2_t __a = (a); \ | |
954 (float32x2_t)__builtin_neon_vcvt_n_f32_v((int8x8_t)__a, __b, 10); }) | |
955 #define vcvtq_n_f32_s32(a, __b) __extension__ ({ \ | |
956 int32x4_t __a = (a); \ | |
957 (float32x4_t)__builtin_neon_vcvtq_n_f32_v((int8x16_t)__a, __b, 18); }) | |
958 #define vcvtq_n_f32_u32(a, __b) __extension__ ({ \ | |
959 uint32x4_t __a = (a); \ | |
960 (float32x4_t)__builtin_neon_vcvtq_n_f32_v((int8x16_t)__a, __b, 26); }) | |
961 | |
962 #define vcvt_n_s32_f32(a, __b) __extension__ ({ \ | |
963 float32x2_t __a = (a); \ | |
964 (int32x2_t)__builtin_neon_vcvt_n_s32_v((int8x8_t)__a, __b, 2); }) | |
965 #define vcvtq_n_s32_f32(a, __b) __extension__ ({ \ | |
966 float32x4_t __a = (a); \ | |
967 (int32x4_t)__builtin_neon_vcvtq_n_s32_v((int8x16_t)__a, __b, 18); }) | |
968 | |
969 #define vcvt_n_u32_f32(a, __b) __extension__ ({ \ | |
970 float32x2_t __a = (a); \ | |
971 (uint32x2_t)__builtin_neon_vcvt_n_u32_v((int8x8_t)__a, __b, 10); }) | |
972 #define vcvtq_n_u32_f32(a, __b) __extension__ ({ \ | |
973 float32x4_t __a = (a); \ | |
974 (uint32x4_t)__builtin_neon_vcvtq_n_u32_v((int8x16_t)__a, __b, 26); }) | |
975 | |
976 __ai int32x2_t vcvt_s32_f32(float32x2_t __a) { \ | |
977 return (int32x2_t)__builtin_neon_vcvt_s32_v((int8x8_t)__a, 2); } | |
978 __ai int32x4_t vcvtq_s32_f32(float32x4_t __a) { \ | |
979 return (int32x4_t)__builtin_neon_vcvtq_s32_v((int8x16_t)__a, 18); } | |
980 | |
981 __ai uint32x2_t vcvt_u32_f32(float32x2_t __a) { \ | |
982 return (uint32x2_t)__builtin_neon_vcvt_u32_v((int8x8_t)__a, 10); } | |
983 __ai uint32x4_t vcvtq_u32_f32(float32x4_t __a) { \ | |
984 return (uint32x4_t)__builtin_neon_vcvtq_u32_v((int8x16_t)__a, 26); } | |
985 | |
986 #define vdup_lane_u8(a, __b) __extension__ ({ \ | |
987 uint8x8_t __a = (a); \ | |
988 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) | |
989 #define vdup_lane_u16(a, __b) __extension__ ({ \ | |
990 uint16x4_t __a = (a); \ | |
991 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) | |
992 #define vdup_lane_u32(a, __b) __extension__ ({ \ | |
993 uint32x2_t __a = (a); \ | |
994 __builtin_shufflevector(__a, __a, __b, __b); }) | |
995 #define vdup_lane_s8(a, __b) __extension__ ({ \ | |
996 int8x8_t __a = (a); \ | |
997 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) | |
998 #define vdup_lane_s16(a, __b) __extension__ ({ \ | |
999 int16x4_t __a = (a); \ | |
1000 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) | |
1001 #define vdup_lane_s32(a, __b) __extension__ ({ \ | |
1002 int32x2_t __a = (a); \ | |
1003 __builtin_shufflevector(__a, __a, __b, __b); }) | |
1004 #define vdup_lane_p8(a, __b) __extension__ ({ \ | |
1005 poly8x8_t __a = (a); \ | |
1006 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) | |
1007 #define vdup_lane_p16(a, __b) __extension__ ({ \ | |
1008 poly16x4_t __a = (a); \ | |
1009 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) | |
1010 #define vdup_lane_f32(a, __b) __extension__ ({ \ | |
1011 float32x2_t __a = (a); \ | |
1012 __builtin_shufflevector(__a, __a, __b, __b); }) | |
1013 #define vdupq_lane_u8(a, __b) __extension__ ({ \ | |
1014 uint8x8_t __a = (a); \ | |
1015 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b, __b,
__b, __b, __b, __b, __b, __b, __b); }) | |
1016 #define vdupq_lane_u16(a, __b) __extension__ ({ \ | |
1017 uint16x4_t __a = (a); \ | |
1018 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) | |
1019 #define vdupq_lane_u32(a, __b) __extension__ ({ \ | |
1020 uint32x2_t __a = (a); \ | |
1021 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) | |
1022 #define vdupq_lane_s8(a, __b) __extension__ ({ \ | |
1023 int8x8_t __a = (a); \ | |
1024 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b, __b,
__b, __b, __b, __b, __b, __b, __b); }) | |
1025 #define vdupq_lane_s16(a, __b) __extension__ ({ \ | |
1026 int16x4_t __a = (a); \ | |
1027 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) | |
1028 #define vdupq_lane_s32(a, __b) __extension__ ({ \ | |
1029 int32x2_t __a = (a); \ | |
1030 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) | |
1031 #define vdupq_lane_p8(a, __b) __extension__ ({ \ | |
1032 poly8x8_t __a = (a); \ | |
1033 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b, __b,
__b, __b, __b, __b, __b, __b, __b); }) | |
1034 #define vdupq_lane_p16(a, __b) __extension__ ({ \ | |
1035 poly16x4_t __a = (a); \ | |
1036 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) | |
1037 #define vdupq_lane_f32(a, __b) __extension__ ({ \ | |
1038 float32x2_t __a = (a); \ | |
1039 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) | |
1040 #define vdup_lane_s64(a, __b) __extension__ ({ \ | |
1041 int64x1_t __a = (a); \ | |
1042 __builtin_shufflevector(__a, __a, __b); }) | |
1043 #define vdup_lane_u64(a, __b) __extension__ ({ \ | |
1044 uint64x1_t __a = (a); \ | |
1045 __builtin_shufflevector(__a, __a, __b); }) | |
1046 #define vdupq_lane_s64(a, __b) __extension__ ({ \ | |
1047 int64x1_t __a = (a); \ | |
1048 __builtin_shufflevector(__a, __a, __b, __b); }) | |
1049 #define vdupq_lane_u64(a, __b) __extension__ ({ \ | |
1050 uint64x1_t __a = (a); \ | |
1051 __builtin_shufflevector(__a, __a, __b, __b); }) | |
1052 | |
1053 __ai uint8x8_t vdup_n_u8(uint8_t __a) { \ | |
1054 return (uint8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } | |
1055 __ai uint16x4_t vdup_n_u16(uint16_t __a) { \ | |
1056 return (uint16x4_t){ __a, __a, __a, __a }; } | |
1057 __ai uint32x2_t vdup_n_u32(uint32_t __a) { \ | |
1058 return (uint32x2_t){ __a, __a }; } | |
1059 __ai int8x8_t vdup_n_s8(int8_t __a) { \ | |
1060 return (int8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } | |
1061 __ai int16x4_t vdup_n_s16(int16_t __a) { \ | |
1062 return (int16x4_t){ __a, __a, __a, __a }; } | |
1063 __ai int32x2_t vdup_n_s32(int32_t __a) { \ | |
1064 return (int32x2_t){ __a, __a }; } | |
1065 __ai poly8x8_t vdup_n_p8(poly8_t __a) { \ | |
1066 return (poly8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } | |
1067 __ai poly16x4_t vdup_n_p16(poly16_t __a) { \ | |
1068 return (poly16x4_t){ __a, __a, __a, __a }; } | |
1069 __ai float32x2_t vdup_n_f32(float32_t __a) { \ | |
1070 return (float32x2_t){ __a, __a }; } | |
1071 __ai uint8x16_t vdupq_n_u8(uint8_t __a) { \ | |
1072 return (uint8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __
a, __a, __a, __a, __a }; } | |
1073 __ai uint16x8_t vdupq_n_u16(uint16_t __a) { \ | |
1074 return (uint16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } | |
1075 __ai uint32x4_t vdupq_n_u32(uint32_t __a) { \ | |
1076 return (uint32x4_t){ __a, __a, __a, __a }; } | |
1077 __ai int8x16_t vdupq_n_s8(int8_t __a) { \ | |
1078 return (int8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a
, __a, __a, __a, __a }; } | |
1079 __ai int16x8_t vdupq_n_s16(int16_t __a) { \ | |
1080 return (int16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } | |
1081 __ai int32x4_t vdupq_n_s32(int32_t __a) { \ | |
1082 return (int32x4_t){ __a, __a, __a, __a }; } | |
1083 __ai poly8x16_t vdupq_n_p8(poly8_t __a) { \ | |
1084 return (poly8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __
a, __a, __a, __a, __a }; } | |
1085 __ai poly16x8_t vdupq_n_p16(poly16_t __a) { \ | |
1086 return (poly16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } | |
1087 __ai float32x4_t vdupq_n_f32(float32_t __a) { \ | |
1088 return (float32x4_t){ __a, __a, __a, __a }; } | |
1089 __ai int64x1_t vdup_n_s64(int64_t __a) { \ | |
1090 return (int64x1_t){ __a }; } | |
1091 __ai uint64x1_t vdup_n_u64(uint64_t __a) { \ | |
1092 return (uint64x1_t){ __a }; } | |
1093 __ai int64x2_t vdupq_n_s64(int64_t __a) { \ | |
1094 return (int64x2_t){ __a, __a }; } | |
1095 __ai uint64x2_t vdupq_n_u64(uint64_t __a) { \ | |
1096 return (uint64x2_t){ __a, __a }; } | |
1097 | |
1098 __ai int8x8_t veor_s8(int8x8_t __a, int8x8_t __b) { \ | |
1099 return __a ^ __b; } | |
1100 __ai int16x4_t veor_s16(int16x4_t __a, int16x4_t __b) { \ | |
1101 return __a ^ __b; } | |
1102 __ai int32x2_t veor_s32(int32x2_t __a, int32x2_t __b) { \ | |
1103 return __a ^ __b; } | |
1104 __ai int64x1_t veor_s64(int64x1_t __a, int64x1_t __b) { \ | |
1105 return __a ^ __b; } | |
1106 __ai uint8x8_t veor_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
1107 return __a ^ __b; } | |
1108 __ai uint16x4_t veor_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
1109 return __a ^ __b; } | |
1110 __ai uint32x2_t veor_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
1111 return __a ^ __b; } | |
1112 __ai uint64x1_t veor_u64(uint64x1_t __a, uint64x1_t __b) { \ | |
1113 return __a ^ __b; } | |
1114 __ai int8x16_t veorq_s8(int8x16_t __a, int8x16_t __b) { \ | |
1115 return __a ^ __b; } | |
1116 __ai int16x8_t veorq_s16(int16x8_t __a, int16x8_t __b) { \ | |
1117 return __a ^ __b; } | |
1118 __ai int32x4_t veorq_s32(int32x4_t __a, int32x4_t __b) { \ | |
1119 return __a ^ __b; } | |
1120 __ai int64x2_t veorq_s64(int64x2_t __a, int64x2_t __b) { \ | |
1121 return __a ^ __b; } | |
1122 __ai uint8x16_t veorq_u8(uint8x16_t __a, uint8x16_t __b) { \ | |
1123 return __a ^ __b; } | |
1124 __ai uint16x8_t veorq_u16(uint16x8_t __a, uint16x8_t __b) { \ | |
1125 return __a ^ __b; } | |
1126 __ai uint32x4_t veorq_u32(uint32x4_t __a, uint32x4_t __b) { \ | |
1127 return __a ^ __b; } | |
1128 __ai uint64x2_t veorq_u64(uint64x2_t __a, uint64x2_t __b) { \ | |
1129 return __a ^ __b; } | |
1130 | |
1131 #define vext_s8(a, b, __c) __extension__ ({ \ | |
1132 int8x8_t __a = (a); int8x8_t __b = (b); \ | |
1133 (int8x8_t)__builtin_neon_vext_v(__a, __b, __c, 0); }) | |
1134 #define vext_u8(a, b, __c) __extension__ ({ \ | |
1135 uint8x8_t __a = (a); uint8x8_t __b = (b); \ | |
1136 (uint8x8_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 8); }) | |
1137 #define vext_p8(a, b, __c) __extension__ ({ \ | |
1138 poly8x8_t __a = (a); poly8x8_t __b = (b); \ | |
1139 (poly8x8_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 5); }) | |
1140 #define vext_s16(a, b, __c) __extension__ ({ \ | |
1141 int16x4_t __a = (a); int16x4_t __b = (b); \ | |
1142 (int16x4_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); }) | |
1143 #define vext_u16(a, b, __c) __extension__ ({ \ | |
1144 uint16x4_t __a = (a); uint16x4_t __b = (b); \ | |
1145 (uint16x4_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 9); }) | |
1146 #define vext_p16(a, b, __c) __extension__ ({ \ | |
1147 poly16x4_t __a = (a); poly16x4_t __b = (b); \ | |
1148 (poly16x4_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 6); }) | |
1149 #define vext_s32(a, b, __c) __extension__ ({ \ | |
1150 int32x2_t __a = (a); int32x2_t __b = (b); \ | |
1151 (int32x2_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); }) | |
1152 #define vext_u32(a, b, __c) __extension__ ({ \ | |
1153 uint32x2_t __a = (a); uint32x2_t __b = (b); \ | |
1154 (uint32x2_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 10); }) | |
1155 #define vext_s64(a, b, __c) __extension__ ({ \ | |
1156 int64x1_t __a = (a); int64x1_t __b = (b); \ | |
1157 (int64x1_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); }) | |
1158 #define vext_u64(a, b, __c) __extension__ ({ \ | |
1159 uint64x1_t __a = (a); uint64x1_t __b = (b); \ | |
1160 (uint64x1_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 11); }) | |
1161 #define vext_f32(a, b, __c) __extension__ ({ \ | |
1162 float32x2_t __a = (a); float32x2_t __b = (b); \ | |
1163 (float32x2_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 4); }) | |
1164 #define vextq_s8(a, b, __c) __extension__ ({ \ | |
1165 int8x16_t __a = (a); int8x16_t __b = (b); \ | |
1166 (int8x16_t)__builtin_neon_vextq_v(__a, __b, __c, 16); }) | |
1167 #define vextq_u8(a, b, __c) __extension__ ({ \ | |
1168 uint8x16_t __a = (a); uint8x16_t __b = (b); \ | |
1169 (uint8x16_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 24); }
) | |
1170 #define vextq_p8(a, b, __c) __extension__ ({ \ | |
1171 poly8x16_t __a = (a); poly8x16_t __b = (b); \ | |
1172 (poly8x16_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 21); }
) | |
1173 #define vextq_s16(a, b, __c) __extension__ ({ \ | |
1174 int16x8_t __a = (a); int16x8_t __b = (b); \ | |
1175 (int16x8_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 17); }) | |
1176 #define vextq_u16(a, b, __c) __extension__ ({ \ | |
1177 uint16x8_t __a = (a); uint16x8_t __b = (b); \ | |
1178 (uint16x8_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 25); }
) | |
1179 #define vextq_p16(a, b, __c) __extension__ ({ \ | |
1180 poly16x8_t __a = (a); poly16x8_t __b = (b); \ | |
1181 (poly16x8_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 22); }
) | |
1182 #define vextq_s32(a, b, __c) __extension__ ({ \ | |
1183 int32x4_t __a = (a); int32x4_t __b = (b); \ | |
1184 (int32x4_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 18); }) | |
1185 #define vextq_u32(a, b, __c) __extension__ ({ \ | |
1186 uint32x4_t __a = (a); uint32x4_t __b = (b); \ | |
1187 (uint32x4_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 26); }
) | |
1188 #define vextq_s64(a, b, __c) __extension__ ({ \ | |
1189 int64x2_t __a = (a); int64x2_t __b = (b); \ | |
1190 (int64x2_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 19); }) | |
1191 #define vextq_u64(a, b, __c) __extension__ ({ \ | |
1192 uint64x2_t __a = (a); uint64x2_t __b = (b); \ | |
1193 (uint64x2_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 27); }
) | |
1194 #define vextq_f32(a, b, __c) __extension__ ({ \ | |
1195 float32x4_t __a = (a); float32x4_t __b = (b); \ | |
1196 (float32x4_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 20);
}) | |
1197 | |
1198 __ai int8x8_t vget_high_s8(int8x16_t __a) { \ | |
1199 return (int8x8_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1); } | |
1200 __ai int16x4_t vget_high_s16(int16x8_t __a) { \ | |
1201 return (int16x4_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1);
} | |
1202 __ai int32x2_t vget_high_s32(int32x4_t __a) { \ | |
1203 return (int32x2_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1);
} | |
1204 __ai int64x1_t vget_high_s64(int64x2_t __a) { \ | |
1205 return (int64x1_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1);
} | |
1206 __ai float16x4_t vget_high_f16(float16x8_t __a) { \ | |
1207 return (float16x4_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1)
; } | |
1208 __ai float32x2_t vget_high_f32(float32x4_t __a) { \ | |
1209 return (float32x2_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1)
; } | |
1210 __ai uint8x8_t vget_high_u8(uint8x16_t __a) { \ | |
1211 return (uint8x8_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1);
} | |
1212 __ai uint16x4_t vget_high_u16(uint16x8_t __a) { \ | |
1213 return (uint16x4_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1);
} | |
1214 __ai uint32x2_t vget_high_u32(uint32x4_t __a) { \ | |
1215 return (uint32x2_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1);
} | |
1216 __ai uint64x1_t vget_high_u64(uint64x2_t __a) { \ | |
1217 return (uint64x1_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1);
} | |
1218 __ai poly8x8_t vget_high_p8(poly8x16_t __a) { \ | |
1219 return (poly8x8_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1);
} | |
1220 __ai poly16x4_t vget_high_p16(poly16x8_t __a) { \ | |
1221 return (poly16x4_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1);
} | |
1222 | |
1223 #define vget_lane_u8(a, __b) __extension__ ({ \ | |
1224 uint8x8_t __a = (a); \ | |
1225 (uint8_t)__builtin_neon_vget_lane_i8((int8x8_t)__a, __b); }) | |
1226 #define vget_lane_u16(a, __b) __extension__ ({ \ | |
1227 uint16x4_t __a = (a); \ | |
1228 (uint16_t)__builtin_neon_vget_lane_i16((int16x4_t)__a, __b); }) | |
1229 #define vget_lane_u32(a, __b) __extension__ ({ \ | |
1230 uint32x2_t __a = (a); \ | |
1231 (uint32_t)__builtin_neon_vget_lane_i32((int32x2_t)__a, __b); }) | |
1232 #define vget_lane_s8(a, __b) __extension__ ({ \ | |
1233 int8x8_t __a = (a); \ | |
1234 (int8_t)__builtin_neon_vget_lane_i8(__a, __b); }) | |
1235 #define vget_lane_s16(a, __b) __extension__ ({ \ | |
1236 int16x4_t __a = (a); \ | |
1237 (int16_t)__builtin_neon_vget_lane_i16(__a, __b); }) | |
1238 #define vget_lane_s32(a, __b) __extension__ ({ \ | |
1239 int32x2_t __a = (a); \ | |
1240 (int32_t)__builtin_neon_vget_lane_i32(__a, __b); }) | |
1241 #define vget_lane_p8(a, __b) __extension__ ({ \ | |
1242 poly8x8_t __a = (a); \ | |
1243 (poly8_t)__builtin_neon_vget_lane_i8((int8x8_t)__a, __b); }) | |
1244 #define vget_lane_p16(a, __b) __extension__ ({ \ | |
1245 poly16x4_t __a = (a); \ | |
1246 (poly16_t)__builtin_neon_vget_lane_i16((int16x4_t)__a, __b); }) | |
1247 #define vget_lane_f32(a, __b) __extension__ ({ \ | |
1248 float32x2_t __a = (a); \ | |
1249 (float32_t)__builtin_neon_vget_lane_f32(__a, __b); }) | |
1250 #define vgetq_lane_u8(a, __b) __extension__ ({ \ | |
1251 uint8x16_t __a = (a); \ | |
1252 (uint8_t)__builtin_neon_vgetq_lane_i8((int8x16_t)__a, __b); }) | |
1253 #define vgetq_lane_u16(a, __b) __extension__ ({ \ | |
1254 uint16x8_t __a = (a); \ | |
1255 (uint16_t)__builtin_neon_vgetq_lane_i16((int16x8_t)__a, __b); }) | |
1256 #define vgetq_lane_u32(a, __b) __extension__ ({ \ | |
1257 uint32x4_t __a = (a); \ | |
1258 (uint32_t)__builtin_neon_vgetq_lane_i32((int32x4_t)__a, __b); }) | |
1259 #define vgetq_lane_s8(a, __b) __extension__ ({ \ | |
1260 int8x16_t __a = (a); \ | |
1261 (int8_t)__builtin_neon_vgetq_lane_i8(__a, __b); }) | |
1262 #define vgetq_lane_s16(a, __b) __extension__ ({ \ | |
1263 int16x8_t __a = (a); \ | |
1264 (int16_t)__builtin_neon_vgetq_lane_i16(__a, __b); }) | |
1265 #define vgetq_lane_s32(a, __b) __extension__ ({ \ | |
1266 int32x4_t __a = (a); \ | |
1267 (int32_t)__builtin_neon_vgetq_lane_i32(__a, __b); }) | |
1268 #define vgetq_lane_p8(a, __b) __extension__ ({ \ | |
1269 poly8x16_t __a = (a); \ | |
1270 (poly8_t)__builtin_neon_vgetq_lane_i8((int8x16_t)__a, __b); }) | |
1271 #define vgetq_lane_p16(a, __b) __extension__ ({ \ | |
1272 poly16x8_t __a = (a); \ | |
1273 (poly16_t)__builtin_neon_vgetq_lane_i16((int16x8_t)__a, __b); }) | |
1274 #define vgetq_lane_f32(a, __b) __extension__ ({ \ | |
1275 float32x4_t __a = (a); \ | |
1276 (float32_t)__builtin_neon_vgetq_lane_f32(__a, __b); }) | |
1277 #define vget_lane_s64(a, __b) __extension__ ({ \ | |
1278 int64x1_t __a = (a); \ | |
1279 (int64_t)__builtin_neon_vget_lane_i64(__a, __b); }) | |
1280 #define vget_lane_u64(a, __b) __extension__ ({ \ | |
1281 uint64x1_t __a = (a); \ | |
1282 (uint64_t)__builtin_neon_vget_lane_i64((int64x1_t)__a, __b); }) | |
1283 #define vgetq_lane_s64(a, __b) __extension__ ({ \ | |
1284 int64x2_t __a = (a); \ | |
1285 (int64_t)__builtin_neon_vgetq_lane_i64(__a, __b); }) | |
1286 #define vgetq_lane_u64(a, __b) __extension__ ({ \ | |
1287 uint64x2_t __a = (a); \ | |
1288 (uint64_t)__builtin_neon_vgetq_lane_i64((int64x2_t)__a, __b); }) | |
1289 | |
1290 __ai int8x8_t vget_low_s8(int8x16_t __a) { \ | |
1291 return (int8x8_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0); } | |
1292 __ai int16x4_t vget_low_s16(int16x8_t __a) { \ | |
1293 return (int16x4_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0);
} | |
1294 __ai int32x2_t vget_low_s32(int32x4_t __a) { \ | |
1295 return (int32x2_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0);
} | |
1296 __ai int64x1_t vget_low_s64(int64x2_t __a) { \ | |
1297 return (int64x1_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0);
} | |
1298 __ai float16x4_t vget_low_f16(float16x8_t __a) { \ | |
1299 return (float16x4_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0)
; } | |
1300 __ai float32x2_t vget_low_f32(float32x4_t __a) { \ | |
1301 return (float32x2_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0)
; } | |
1302 __ai uint8x8_t vget_low_u8(uint8x16_t __a) { \ | |
1303 return (uint8x8_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0);
} | |
1304 __ai uint16x4_t vget_low_u16(uint16x8_t __a) { \ | |
1305 return (uint16x4_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0);
} | |
1306 __ai uint32x2_t vget_low_u32(uint32x4_t __a) { \ | |
1307 return (uint32x2_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0);
} | |
1308 __ai uint64x1_t vget_low_u64(uint64x2_t __a) { \ | |
1309 return (uint64x1_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0);
} | |
1310 __ai poly8x8_t vget_low_p8(poly8x16_t __a) { \ | |
1311 return (poly8x8_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0);
} | |
1312 __ai poly16x4_t vget_low_p16(poly16x8_t __a) { \ | |
1313 return (poly16x4_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0);
} | |
1314 | |
1315 __ai int8x8_t vhadd_s8(int8x8_t __a, int8x8_t __b) { \ | |
1316 return (int8x8_t)__builtin_neon_vhadd_v(__a, __b, 0); } | |
1317 __ai int16x4_t vhadd_s16(int16x4_t __a, int16x4_t __b) { \ | |
1318 return (int16x4_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 1); } | |
1319 __ai int32x2_t vhadd_s32(int32x2_t __a, int32x2_t __b) { \ | |
1320 return (int32x2_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 2); } | |
1321 __ai uint8x8_t vhadd_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
1322 return (uint8x8_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 8); } | |
1323 __ai uint16x4_t vhadd_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
1324 return (uint16x4_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 9); } | |
1325 __ai uint32x2_t vhadd_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
1326 return (uint32x2_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 10); } | |
1327 __ai int8x16_t vhaddq_s8(int8x16_t __a, int8x16_t __b) { \ | |
1328 return (int8x16_t)__builtin_neon_vhaddq_v(__a, __b, 16); } | |
1329 __ai int16x8_t vhaddq_s16(int16x8_t __a, int16x8_t __b) { \ | |
1330 return (int16x8_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 17);
} | |
1331 __ai int32x4_t vhaddq_s32(int32x4_t __a, int32x4_t __b) { \ | |
1332 return (int32x4_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 18);
} | |
1333 __ai uint8x16_t vhaddq_u8(uint8x16_t __a, uint8x16_t __b) { \ | |
1334 return (uint8x16_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 24)
; } | |
1335 __ai uint16x8_t vhaddq_u16(uint16x8_t __a, uint16x8_t __b) { \ | |
1336 return (uint16x8_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 25)
; } | |
1337 __ai uint32x4_t vhaddq_u32(uint32x4_t __a, uint32x4_t __b) { \ | |
1338 return (uint32x4_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 26)
; } | |
1339 | |
1340 __ai int8x8_t vhsub_s8(int8x8_t __a, int8x8_t __b) { \ | |
1341 return (int8x8_t)__builtin_neon_vhsub_v(__a, __b, 0); } | |
1342 __ai int16x4_t vhsub_s16(int16x4_t __a, int16x4_t __b) { \ | |
1343 return (int16x4_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 1); } | |
1344 __ai int32x2_t vhsub_s32(int32x2_t __a, int32x2_t __b) { \ | |
1345 return (int32x2_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 2); } | |
1346 __ai uint8x8_t vhsub_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
1347 return (uint8x8_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 8); } | |
1348 __ai uint16x4_t vhsub_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
1349 return (uint16x4_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 9); } | |
1350 __ai uint32x2_t vhsub_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
1351 return (uint32x2_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 10); } | |
1352 __ai int8x16_t vhsubq_s8(int8x16_t __a, int8x16_t __b) { \ | |
1353 return (int8x16_t)__builtin_neon_vhsubq_v(__a, __b, 16); } | |
1354 __ai int16x8_t vhsubq_s16(int16x8_t __a, int16x8_t __b) { \ | |
1355 return (int16x8_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 17);
} | |
1356 __ai int32x4_t vhsubq_s32(int32x4_t __a, int32x4_t __b) { \ | |
1357 return (int32x4_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 18);
} | |
1358 __ai uint8x16_t vhsubq_u8(uint8x16_t __a, uint8x16_t __b) { \ | |
1359 return (uint8x16_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 24)
; } | |
1360 __ai uint16x8_t vhsubq_u16(uint16x8_t __a, uint16x8_t __b) { \ | |
1361 return (uint16x8_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 25)
; } | |
1362 __ai uint32x4_t vhsubq_u32(uint32x4_t __a, uint32x4_t __b) { \ | |
1363 return (uint32x4_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 26)
; } | |
1364 | |
1365 #define vld1q_u8(__a) __extension__ ({ \ | |
1366 (uint8x16_t)__builtin_neon_vld1q_v(__a, 24); }) | |
1367 #define vld1q_u16(__a) __extension__ ({ \ | |
1368 (uint16x8_t)__builtin_neon_vld1q_v(__a, 25); }) | |
1369 #define vld1q_u32(__a) __extension__ ({ \ | |
1370 (uint32x4_t)__builtin_neon_vld1q_v(__a, 26); }) | |
1371 #define vld1q_u64(__a) __extension__ ({ \ | |
1372 (uint64x2_t)__builtin_neon_vld1q_v(__a, 27); }) | |
1373 #define vld1q_s8(__a) __extension__ ({ \ | |
1374 (int8x16_t)__builtin_neon_vld1q_v(__a, 16); }) | |
1375 #define vld1q_s16(__a) __extension__ ({ \ | |
1376 (int16x8_t)__builtin_neon_vld1q_v(__a, 17); }) | |
1377 #define vld1q_s32(__a) __extension__ ({ \ | |
1378 (int32x4_t)__builtin_neon_vld1q_v(__a, 18); }) | |
1379 #define vld1q_s64(__a) __extension__ ({ \ | |
1380 (int64x2_t)__builtin_neon_vld1q_v(__a, 19); }) | |
1381 #define vld1q_f16(__a) __extension__ ({ \ | |
1382 (float16x8_t)__builtin_neon_vld1q_v(__a, 23); }) | |
1383 #define vld1q_f32(__a) __extension__ ({ \ | |
1384 (float32x4_t)__builtin_neon_vld1q_v(__a, 20); }) | |
1385 #define vld1q_p8(__a) __extension__ ({ \ | |
1386 (poly8x16_t)__builtin_neon_vld1q_v(__a, 21); }) | |
1387 #define vld1q_p16(__a) __extension__ ({ \ | |
1388 (poly16x8_t)__builtin_neon_vld1q_v(__a, 22); }) | |
1389 #define vld1_u8(__a) __extension__ ({ \ | |
1390 (uint8x8_t)__builtin_neon_vld1_v(__a, 8); }) | |
1391 #define vld1_u16(__a) __extension__ ({ \ | |
1392 (uint16x4_t)__builtin_neon_vld1_v(__a, 9); }) | |
1393 #define vld1_u32(__a) __extension__ ({ \ | |
1394 (uint32x2_t)__builtin_neon_vld1_v(__a, 10); }) | |
1395 #define vld1_u64(__a) __extension__ ({ \ | |
1396 (uint64x1_t)__builtin_neon_vld1_v(__a, 11); }) | |
1397 #define vld1_s8(__a) __extension__ ({ \ | |
1398 (int8x8_t)__builtin_neon_vld1_v(__a, 0); }) | |
1399 #define vld1_s16(__a) __extension__ ({ \ | |
1400 (int16x4_t)__builtin_neon_vld1_v(__a, 1); }) | |
1401 #define vld1_s32(__a) __extension__ ({ \ | |
1402 (int32x2_t)__builtin_neon_vld1_v(__a, 2); }) | |
1403 #define vld1_s64(__a) __extension__ ({ \ | |
1404 (int64x1_t)__builtin_neon_vld1_v(__a, 3); }) | |
1405 #define vld1_f16(__a) __extension__ ({ \ | |
1406 (float16x4_t)__builtin_neon_vld1_v(__a, 7); }) | |
1407 #define vld1_f32(__a) __extension__ ({ \ | |
1408 (float32x2_t)__builtin_neon_vld1_v(__a, 4); }) | |
1409 #define vld1_p8(__a) __extension__ ({ \ | |
1410 (poly8x8_t)__builtin_neon_vld1_v(__a, 5); }) | |
1411 #define vld1_p16(__a) __extension__ ({ \ | |
1412 (poly16x4_t)__builtin_neon_vld1_v(__a, 6); }) | |
1413 | |
1414 #define vld1q_dup_u8(__a) __extension__ ({ \ | |
1415 (uint8x16_t)__builtin_neon_vld1q_dup_v(__a, 24); }) | |
1416 #define vld1q_dup_u16(__a) __extension__ ({ \ | |
1417 (uint16x8_t)__builtin_neon_vld1q_dup_v(__a, 25); }) | |
1418 #define vld1q_dup_u32(__a) __extension__ ({ \ | |
1419 (uint32x4_t)__builtin_neon_vld1q_dup_v(__a, 26); }) | |
1420 #define vld1q_dup_u64(__a) __extension__ ({ \ | |
1421 (uint64x2_t)__builtin_neon_vld1q_dup_v(__a, 27); }) | |
1422 #define vld1q_dup_s8(__a) __extension__ ({ \ | |
1423 (int8x16_t)__builtin_neon_vld1q_dup_v(__a, 16); }) | |
1424 #define vld1q_dup_s16(__a) __extension__ ({ \ | |
1425 (int16x8_t)__builtin_neon_vld1q_dup_v(__a, 17); }) | |
1426 #define vld1q_dup_s32(__a) __extension__ ({ \ | |
1427 (int32x4_t)__builtin_neon_vld1q_dup_v(__a, 18); }) | |
1428 #define vld1q_dup_s64(__a) __extension__ ({ \ | |
1429 (int64x2_t)__builtin_neon_vld1q_dup_v(__a, 19); }) | |
1430 #define vld1q_dup_f16(__a) __extension__ ({ \ | |
1431 (float16x8_t)__builtin_neon_vld1q_dup_v(__a, 23); }) | |
1432 #define vld1q_dup_f32(__a) __extension__ ({ \ | |
1433 (float32x4_t)__builtin_neon_vld1q_dup_v(__a, 20); }) | |
1434 #define vld1q_dup_p8(__a) __extension__ ({ \ | |
1435 (poly8x16_t)__builtin_neon_vld1q_dup_v(__a, 21); }) | |
1436 #define vld1q_dup_p16(__a) __extension__ ({ \ | |
1437 (poly16x8_t)__builtin_neon_vld1q_dup_v(__a, 22); }) | |
1438 #define vld1_dup_u8(__a) __extension__ ({ \ | |
1439 (uint8x8_t)__builtin_neon_vld1_dup_v(__a, 8); }) | |
1440 #define vld1_dup_u16(__a) __extension__ ({ \ | |
1441 (uint16x4_t)__builtin_neon_vld1_dup_v(__a, 9); }) | |
1442 #define vld1_dup_u32(__a) __extension__ ({ \ | |
1443 (uint32x2_t)__builtin_neon_vld1_dup_v(__a, 10); }) | |
1444 #define vld1_dup_u64(__a) __extension__ ({ \ | |
1445 (uint64x1_t)__builtin_neon_vld1_dup_v(__a, 11); }) | |
1446 #define vld1_dup_s8(__a) __extension__ ({ \ | |
1447 (int8x8_t)__builtin_neon_vld1_dup_v(__a, 0); }) | |
1448 #define vld1_dup_s16(__a) __extension__ ({ \ | |
1449 (int16x4_t)__builtin_neon_vld1_dup_v(__a, 1); }) | |
1450 #define vld1_dup_s32(__a) __extension__ ({ \ | |
1451 (int32x2_t)__builtin_neon_vld1_dup_v(__a, 2); }) | |
1452 #define vld1_dup_s64(__a) __extension__ ({ \ | |
1453 (int64x1_t)__builtin_neon_vld1_dup_v(__a, 3); }) | |
1454 #define vld1_dup_f16(__a) __extension__ ({ \ | |
1455 (float16x4_t)__builtin_neon_vld1_dup_v(__a, 7); }) | |
1456 #define vld1_dup_f32(__a) __extension__ ({ \ | |
1457 (float32x2_t)__builtin_neon_vld1_dup_v(__a, 4); }) | |
1458 #define vld1_dup_p8(__a) __extension__ ({ \ | |
1459 (poly8x8_t)__builtin_neon_vld1_dup_v(__a, 5); }) | |
1460 #define vld1_dup_p16(__a) __extension__ ({ \ | |
1461 (poly16x4_t)__builtin_neon_vld1_dup_v(__a, 6); }) | |
1462 | |
1463 #define vld1q_lane_u8(__a, b, __c) __extension__ ({ \ | |
1464 uint8x16_t __b = (b); \ | |
1465 (uint8x16_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 24); }) | |
1466 #define vld1q_lane_u16(__a, b, __c) __extension__ ({ \ | |
1467 uint16x8_t __b = (b); \ | |
1468 (uint16x8_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 25); }) | |
1469 #define vld1q_lane_u32(__a, b, __c) __extension__ ({ \ | |
1470 uint32x4_t __b = (b); \ | |
1471 (uint32x4_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 26); }) | |
1472 #define vld1q_lane_u64(__a, b, __c) __extension__ ({ \ | |
1473 uint64x2_t __b = (b); \ | |
1474 (uint64x2_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 27); }) | |
1475 #define vld1q_lane_s8(__a, b, __c) __extension__ ({ \ | |
1476 int8x16_t __b = (b); \ | |
1477 (int8x16_t)__builtin_neon_vld1q_lane_v(__a, __b, __c, 16); }) | |
1478 #define vld1q_lane_s16(__a, b, __c) __extension__ ({ \ | |
1479 int16x8_t __b = (b); \ | |
1480 (int16x8_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 17); }) | |
1481 #define vld1q_lane_s32(__a, b, __c) __extension__ ({ \ | |
1482 int32x4_t __b = (b); \ | |
1483 (int32x4_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 18); }) | |
1484 #define vld1q_lane_s64(__a, b, __c) __extension__ ({ \ | |
1485 int64x2_t __b = (b); \ | |
1486 (int64x2_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 19); }) | |
1487 #define vld1q_lane_f16(__a, b, __c) __extension__ ({ \ | |
1488 float16x8_t __b = (b); \ | |
1489 (float16x8_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 23); }) | |
1490 #define vld1q_lane_f32(__a, b, __c) __extension__ ({ \ | |
1491 float32x4_t __b = (b); \ | |
1492 (float32x4_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 20); }) | |
1493 #define vld1q_lane_p8(__a, b, __c) __extension__ ({ \ | |
1494 poly8x16_t __b = (b); \ | |
1495 (poly8x16_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 21); }) | |
1496 #define vld1q_lane_p16(__a, b, __c) __extension__ ({ \ | |
1497 poly16x8_t __b = (b); \ | |
1498 (poly16x8_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 22); }) | |
1499 #define vld1_lane_u8(__a, b, __c) __extension__ ({ \ | |
1500 uint8x8_t __b = (b); \ | |
1501 (uint8x8_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 8); }) | |
1502 #define vld1_lane_u16(__a, b, __c) __extension__ ({ \ | |
1503 uint16x4_t __b = (b); \ | |
1504 (uint16x4_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 9); }) | |
1505 #define vld1_lane_u32(__a, b, __c) __extension__ ({ \ | |
1506 uint32x2_t __b = (b); \ | |
1507 (uint32x2_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 10); }) | |
1508 #define vld1_lane_u64(__a, b, __c) __extension__ ({ \ | |
1509 uint64x1_t __b = (b); \ | |
1510 (uint64x1_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 11); }) | |
1511 #define vld1_lane_s8(__a, b, __c) __extension__ ({ \ | |
1512 int8x8_t __b = (b); \ | |
1513 (int8x8_t)__builtin_neon_vld1_lane_v(__a, __b, __c, 0); }) | |
1514 #define vld1_lane_s16(__a, b, __c) __extension__ ({ \ | |
1515 int16x4_t __b = (b); \ | |
1516 (int16x4_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 1); }) | |
1517 #define vld1_lane_s32(__a, b, __c) __extension__ ({ \ | |
1518 int32x2_t __b = (b); \ | |
1519 (int32x2_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 2); }) | |
1520 #define vld1_lane_s64(__a, b, __c) __extension__ ({ \ | |
1521 int64x1_t __b = (b); \ | |
1522 (int64x1_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 3); }) | |
1523 #define vld1_lane_f16(__a, b, __c) __extension__ ({ \ | |
1524 float16x4_t __b = (b); \ | |
1525 (float16x4_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 7); }) | |
1526 #define vld1_lane_f32(__a, b, __c) __extension__ ({ \ | |
1527 float32x2_t __b = (b); \ | |
1528 (float32x2_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 4); }) | |
1529 #define vld1_lane_p8(__a, b, __c) __extension__ ({ \ | |
1530 poly8x8_t __b = (b); \ | |
1531 (poly8x8_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 5); }) | |
1532 #define vld1_lane_p16(__a, b, __c) __extension__ ({ \ | |
1533 poly16x4_t __b = (b); \ | |
1534 (poly16x4_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 6); }) | |
1535 | |
1536 #define vld2q_u8(__a) __extension__ ({ \ | |
1537 uint8x16x2_t r; __builtin_neon_vld2q_v(&r, __a, 24); r; }) | |
1538 #define vld2q_u16(__a) __extension__ ({ \ | |
1539 uint16x8x2_t r; __builtin_neon_vld2q_v(&r, __a, 25); r; }) | |
1540 #define vld2q_u32(__a) __extension__ ({ \ | |
1541 uint32x4x2_t r; __builtin_neon_vld2q_v(&r, __a, 26); r; }) | |
1542 #define vld2q_s8(__a) __extension__ ({ \ | |
1543 int8x16x2_t r; __builtin_neon_vld2q_v(&r, __a, 16); r; }) | |
1544 #define vld2q_s16(__a) __extension__ ({ \ | |
1545 int16x8x2_t r; __builtin_neon_vld2q_v(&r, __a, 17); r; }) | |
1546 #define vld2q_s32(__a) __extension__ ({ \ | |
1547 int32x4x2_t r; __builtin_neon_vld2q_v(&r, __a, 18); r; }) | |
1548 #define vld2q_f16(__a) __extension__ ({ \ | |
1549 float16x8x2_t r; __builtin_neon_vld2q_v(&r, __a, 23); r; }) | |
1550 #define vld2q_f32(__a) __extension__ ({ \ | |
1551 float32x4x2_t r; __builtin_neon_vld2q_v(&r, __a, 20); r; }) | |
1552 #define vld2q_p8(__a) __extension__ ({ \ | |
1553 poly8x16x2_t r; __builtin_neon_vld2q_v(&r, __a, 21); r; }) | |
1554 #define vld2q_p16(__a) __extension__ ({ \ | |
1555 poly16x8x2_t r; __builtin_neon_vld2q_v(&r, __a, 22); r; }) | |
1556 #define vld2_u8(__a) __extension__ ({ \ | |
1557 uint8x8x2_t r; __builtin_neon_vld2_v(&r, __a, 8); r; }) | |
1558 #define vld2_u16(__a) __extension__ ({ \ | |
1559 uint16x4x2_t r; __builtin_neon_vld2_v(&r, __a, 9); r; }) | |
1560 #define vld2_u32(__a) __extension__ ({ \ | |
1561 uint32x2x2_t r; __builtin_neon_vld2_v(&r, __a, 10); r; }) | |
1562 #define vld2_u64(__a) __extension__ ({ \ | |
1563 uint64x1x2_t r; __builtin_neon_vld2_v(&r, __a, 11); r; }) | |
1564 #define vld2_s8(__a) __extension__ ({ \ | |
1565 int8x8x2_t r; __builtin_neon_vld2_v(&r, __a, 0); r; }) | |
1566 #define vld2_s16(__a) __extension__ ({ \ | |
1567 int16x4x2_t r; __builtin_neon_vld2_v(&r, __a, 1); r; }) | |
1568 #define vld2_s32(__a) __extension__ ({ \ | |
1569 int32x2x2_t r; __builtin_neon_vld2_v(&r, __a, 2); r; }) | |
1570 #define vld2_s64(__a) __extension__ ({ \ | |
1571 int64x1x2_t r; __builtin_neon_vld2_v(&r, __a, 3); r; }) | |
1572 #define vld2_f16(__a) __extension__ ({ \ | |
1573 float16x4x2_t r; __builtin_neon_vld2_v(&r, __a, 7); r; }) | |
1574 #define vld2_f32(__a) __extension__ ({ \ | |
1575 float32x2x2_t r; __builtin_neon_vld2_v(&r, __a, 4); r; }) | |
1576 #define vld2_p8(__a) __extension__ ({ \ | |
1577 poly8x8x2_t r; __builtin_neon_vld2_v(&r, __a, 5); r; }) | |
1578 #define vld2_p16(__a) __extension__ ({ \ | |
1579 poly16x4x2_t r; __builtin_neon_vld2_v(&r, __a, 6); r; }) | |
1580 | |
1581 #define vld2_dup_u8(__a) __extension__ ({ \ | |
1582 uint8x8x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 8); r; }) | |
1583 #define vld2_dup_u16(__a) __extension__ ({ \ | |
1584 uint16x4x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 9); r; }) | |
1585 #define vld2_dup_u32(__a) __extension__ ({ \ | |
1586 uint32x2x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 10); r; }) | |
1587 #define vld2_dup_u64(__a) __extension__ ({ \ | |
1588 uint64x1x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 11); r; }) | |
1589 #define vld2_dup_s8(__a) __extension__ ({ \ | |
1590 int8x8x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 0); r; }) | |
1591 #define vld2_dup_s16(__a) __extension__ ({ \ | |
1592 int16x4x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 1); r; }) | |
1593 #define vld2_dup_s32(__a) __extension__ ({ \ | |
1594 int32x2x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 2); r; }) | |
1595 #define vld2_dup_s64(__a) __extension__ ({ \ | |
1596 int64x1x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 3); r; }) | |
1597 #define vld2_dup_f16(__a) __extension__ ({ \ | |
1598 float16x4x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 7); r; }) | |
1599 #define vld2_dup_f32(__a) __extension__ ({ \ | |
1600 float32x2x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 4); r; }) | |
1601 #define vld2_dup_p8(__a) __extension__ ({ \ | |
1602 poly8x8x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 5); r; }) | |
1603 #define vld2_dup_p16(__a) __extension__ ({ \ | |
1604 poly16x4x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 6); r; }) | |
1605 | |
1606 #define vld2q_lane_u16(__a, b, __c) __extension__ ({ \ | |
1607 uint16x8x2_t __b = (b); \ | |
1608 uint16x8x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i
nt8x16_t)__b.val[1], __c, 25); r; }) | |
1609 #define vld2q_lane_u32(__a, b, __c) __extension__ ({ \ | |
1610 uint32x4x2_t __b = (b); \ | |
1611 uint32x4x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i
nt8x16_t)__b.val[1], __c, 26); r; }) | |
1612 #define vld2q_lane_s16(__a, b, __c) __extension__ ({ \ | |
1613 int16x8x2_t __b = (b); \ | |
1614 int16x8x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (in
t8x16_t)__b.val[1], __c, 17); r; }) | |
1615 #define vld2q_lane_s32(__a, b, __c) __extension__ ({ \ | |
1616 int32x4x2_t __b = (b); \ | |
1617 int32x4x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (in
t8x16_t)__b.val[1], __c, 18); r; }) | |
1618 #define vld2q_lane_f16(__a, b, __c) __extension__ ({ \ | |
1619 float16x8x2_t __b = (b); \ | |
1620 float16x8x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (
int8x16_t)__b.val[1], __c, 23); r; }) | |
1621 #define vld2q_lane_f32(__a, b, __c) __extension__ ({ \ | |
1622 float32x4x2_t __b = (b); \ | |
1623 float32x4x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (
int8x16_t)__b.val[1], __c, 20); r; }) | |
1624 #define vld2q_lane_p16(__a, b, __c) __extension__ ({ \ | |
1625 poly16x8x2_t __b = (b); \ | |
1626 poly16x8x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i
nt8x16_t)__b.val[1], __c, 22); r; }) | |
1627 #define vld2_lane_u8(__a, b, __c) __extension__ ({ \ | |
1628 uint8x8x2_t __b = (b); \ | |
1629 uint8x8x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8
x8_t)__b.val[1], __c, 8); r; }) | |
1630 #define vld2_lane_u16(__a, b, __c) __extension__ ({ \ | |
1631 uint16x4x2_t __b = (b); \ | |
1632 uint16x4x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int
8x8_t)__b.val[1], __c, 9); r; }) | |
1633 #define vld2_lane_u32(__a, b, __c) __extension__ ({ \ | |
1634 uint32x2x2_t __b = (b); \ | |
1635 uint32x2x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int
8x8_t)__b.val[1], __c, 10); r; }) | |
1636 #define vld2_lane_s8(__a, b, __c) __extension__ ({ \ | |
1637 int8x8x2_t __b = (b); \ | |
1638 int8x8x2_t r; __builtin_neon_vld2_lane_v(&r, __a, __b.val[0], __b.val[1], __c,
0); r; }) | |
1639 #define vld2_lane_s16(__a, b, __c) __extension__ ({ \ | |
1640 int16x4x2_t __b = (b); \ | |
1641 int16x4x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8
x8_t)__b.val[1], __c, 1); r; }) | |
1642 #define vld2_lane_s32(__a, b, __c) __extension__ ({ \ | |
1643 int32x2x2_t __b = (b); \ | |
1644 int32x2x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8
x8_t)__b.val[1], __c, 2); r; }) | |
1645 #define vld2_lane_f16(__a, b, __c) __extension__ ({ \ | |
1646 float16x4x2_t __b = (b); \ | |
1647 float16x4x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (in
t8x8_t)__b.val[1], __c, 7); r; }) | |
1648 #define vld2_lane_f32(__a, b, __c) __extension__ ({ \ | |
1649 float32x2x2_t __b = (b); \ | |
1650 float32x2x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (in
t8x8_t)__b.val[1], __c, 4); r; }) | |
1651 #define vld2_lane_p8(__a, b, __c) __extension__ ({ \ | |
1652 poly8x8x2_t __b = (b); \ | |
1653 poly8x8x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8
x8_t)__b.val[1], __c, 5); r; }) | |
1654 #define vld2_lane_p16(__a, b, __c) __extension__ ({ \ | |
1655 poly16x4x2_t __b = (b); \ | |
1656 poly16x4x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int
8x8_t)__b.val[1], __c, 6); r; }) | |
1657 | |
1658 #define vld3q_u8(__a) __extension__ ({ \ | |
1659 uint8x16x3_t r; __builtin_neon_vld3q_v(&r, __a, 24); r; }) | |
1660 #define vld3q_u16(__a) __extension__ ({ \ | |
1661 uint16x8x3_t r; __builtin_neon_vld3q_v(&r, __a, 25); r; }) | |
1662 #define vld3q_u32(__a) __extension__ ({ \ | |
1663 uint32x4x3_t r; __builtin_neon_vld3q_v(&r, __a, 26); r; }) | |
1664 #define vld3q_s8(__a) __extension__ ({ \ | |
1665 int8x16x3_t r; __builtin_neon_vld3q_v(&r, __a, 16); r; }) | |
1666 #define vld3q_s16(__a) __extension__ ({ \ | |
1667 int16x8x3_t r; __builtin_neon_vld3q_v(&r, __a, 17); r; }) | |
1668 #define vld3q_s32(__a) __extension__ ({ \ | |
1669 int32x4x3_t r; __builtin_neon_vld3q_v(&r, __a, 18); r; }) | |
1670 #define vld3q_f16(__a) __extension__ ({ \ | |
1671 float16x8x3_t r; __builtin_neon_vld3q_v(&r, __a, 23); r; }) | |
1672 #define vld3q_f32(__a) __extension__ ({ \ | |
1673 float32x4x3_t r; __builtin_neon_vld3q_v(&r, __a, 20); r; }) | |
1674 #define vld3q_p8(__a) __extension__ ({ \ | |
1675 poly8x16x3_t r; __builtin_neon_vld3q_v(&r, __a, 21); r; }) | |
1676 #define vld3q_p16(__a) __extension__ ({ \ | |
1677 poly16x8x3_t r; __builtin_neon_vld3q_v(&r, __a, 22); r; }) | |
1678 #define vld3_u8(__a) __extension__ ({ \ | |
1679 uint8x8x3_t r; __builtin_neon_vld3_v(&r, __a, 8); r; }) | |
1680 #define vld3_u16(__a) __extension__ ({ \ | |
1681 uint16x4x3_t r; __builtin_neon_vld3_v(&r, __a, 9); r; }) | |
1682 #define vld3_u32(__a) __extension__ ({ \ | |
1683 uint32x2x3_t r; __builtin_neon_vld3_v(&r, __a, 10); r; }) | |
1684 #define vld3_u64(__a) __extension__ ({ \ | |
1685 uint64x1x3_t r; __builtin_neon_vld3_v(&r, __a, 11); r; }) | |
1686 #define vld3_s8(__a) __extension__ ({ \ | |
1687 int8x8x3_t r; __builtin_neon_vld3_v(&r, __a, 0); r; }) | |
1688 #define vld3_s16(__a) __extension__ ({ \ | |
1689 int16x4x3_t r; __builtin_neon_vld3_v(&r, __a, 1); r; }) | |
1690 #define vld3_s32(__a) __extension__ ({ \ | |
1691 int32x2x3_t r; __builtin_neon_vld3_v(&r, __a, 2); r; }) | |
1692 #define vld3_s64(__a) __extension__ ({ \ | |
1693 int64x1x3_t r; __builtin_neon_vld3_v(&r, __a, 3); r; }) | |
1694 #define vld3_f16(__a) __extension__ ({ \ | |
1695 float16x4x3_t r; __builtin_neon_vld3_v(&r, __a, 7); r; }) | |
1696 #define vld3_f32(__a) __extension__ ({ \ | |
1697 float32x2x3_t r; __builtin_neon_vld3_v(&r, __a, 4); r; }) | |
1698 #define vld3_p8(__a) __extension__ ({ \ | |
1699 poly8x8x3_t r; __builtin_neon_vld3_v(&r, __a, 5); r; }) | |
1700 #define vld3_p16(__a) __extension__ ({ \ | |
1701 poly16x4x3_t r; __builtin_neon_vld3_v(&r, __a, 6); r; }) | |
1702 | |
1703 #define vld3_dup_u8(__a) __extension__ ({ \ | |
1704 uint8x8x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 8); r; }) | |
1705 #define vld3_dup_u16(__a) __extension__ ({ \ | |
1706 uint16x4x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 9); r; }) | |
1707 #define vld3_dup_u32(__a) __extension__ ({ \ | |
1708 uint32x2x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 10); r; }) | |
1709 #define vld3_dup_u64(__a) __extension__ ({ \ | |
1710 uint64x1x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 11); r; }) | |
1711 #define vld3_dup_s8(__a) __extension__ ({ \ | |
1712 int8x8x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 0); r; }) | |
1713 #define vld3_dup_s16(__a) __extension__ ({ \ | |
1714 int16x4x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 1); r; }) | |
1715 #define vld3_dup_s32(__a) __extension__ ({ \ | |
1716 int32x2x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 2); r; }) | |
1717 #define vld3_dup_s64(__a) __extension__ ({ \ | |
1718 int64x1x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 3); r; }) | |
1719 #define vld3_dup_f16(__a) __extension__ ({ \ | |
1720 float16x4x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 7); r; }) | |
1721 #define vld3_dup_f32(__a) __extension__ ({ \ | |
1722 float32x2x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 4); r; }) | |
1723 #define vld3_dup_p8(__a) __extension__ ({ \ | |
1724 poly8x8x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 5); r; }) | |
1725 #define vld3_dup_p16(__a) __extension__ ({ \ | |
1726 poly16x4x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 6); r; }) | |
1727 | |
1728 #define vld3q_lane_u16(__a, b, __c) __extension__ ({ \ | |
1729 uint16x8x3_t __b = (b); \ | |
1730 uint16x8x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i
nt8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 25); r; }) | |
1731 #define vld3q_lane_u32(__a, b, __c) __extension__ ({ \ | |
1732 uint32x4x3_t __b = (b); \ | |
1733 uint32x4x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i
nt8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 26); r; }) | |
1734 #define vld3q_lane_s16(__a, b, __c) __extension__ ({ \ | |
1735 int16x8x3_t __b = (b); \ | |
1736 int16x8x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (in
t8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 17); r; }) | |
1737 #define vld3q_lane_s32(__a, b, __c) __extension__ ({ \ | |
1738 int32x4x3_t __b = (b); \ | |
1739 int32x4x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (in
t8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 18); r; }) | |
1740 #define vld3q_lane_f16(__a, b, __c) __extension__ ({ \ | |
1741 float16x8x3_t __b = (b); \ | |
1742 float16x8x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (
int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 23); r; }) | |
1743 #define vld3q_lane_f32(__a, b, __c) __extension__ ({ \ | |
1744 float32x4x3_t __b = (b); \ | |
1745 float32x4x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (
int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 20); r; }) | |
1746 #define vld3q_lane_p16(__a, b, __c) __extension__ ({ \ | |
1747 poly16x8x3_t __b = (b); \ | |
1748 poly16x8x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i
nt8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 22); r; }) | |
1749 #define vld3_lane_u8(__a, b, __c) __extension__ ({ \ | |
1750 uint8x8x3_t __b = (b); \ | |
1751 uint8x8x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8
x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 8); r; }) | |
1752 #define vld3_lane_u16(__a, b, __c) __extension__ ({ \ | |
1753 uint16x4x3_t __b = (b); \ | |
1754 uint16x4x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int
8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 9); r; }) | |
1755 #define vld3_lane_u32(__a, b, __c) __extension__ ({ \ | |
1756 uint32x2x3_t __b = (b); \ | |
1757 uint32x2x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int
8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 10); r; }) | |
1758 #define vld3_lane_s8(__a, b, __c) __extension__ ({ \ | |
1759 int8x8x3_t __b = (b); \ | |
1760 int8x8x3_t r; __builtin_neon_vld3_lane_v(&r, __a, __b.val[0], __b.val[1], __b.
val[2], __c, 0); r; }) | |
1761 #define vld3_lane_s16(__a, b, __c) __extension__ ({ \ | |
1762 int16x4x3_t __b = (b); \ | |
1763 int16x4x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8
x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 1); r; }) | |
1764 #define vld3_lane_s32(__a, b, __c) __extension__ ({ \ | |
1765 int32x2x3_t __b = (b); \ | |
1766 int32x2x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8
x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 2); r; }) | |
1767 #define vld3_lane_f16(__a, b, __c) __extension__ ({ \ | |
1768 float16x4x3_t __b = (b); \ | |
1769 float16x4x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (in
t8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 7); r; }) | |
1770 #define vld3_lane_f32(__a, b, __c) __extension__ ({ \ | |
1771 float32x2x3_t __b = (b); \ | |
1772 float32x2x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (in
t8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 4); r; }) | |
1773 #define vld3_lane_p8(__a, b, __c) __extension__ ({ \ | |
1774 poly8x8x3_t __b = (b); \ | |
1775 poly8x8x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8
x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 5); r; }) | |
1776 #define vld3_lane_p16(__a, b, __c) __extension__ ({ \ | |
1777 poly16x4x3_t __b = (b); \ | |
1778 poly16x4x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int
8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 6); r; }) | |
1779 | |
1780 #define vld4q_u8(__a) __extension__ ({ \ | |
1781 uint8x16x4_t r; __builtin_neon_vld4q_v(&r, __a, 24); r; }) | |
1782 #define vld4q_u16(__a) __extension__ ({ \ | |
1783 uint16x8x4_t r; __builtin_neon_vld4q_v(&r, __a, 25); r; }) | |
1784 #define vld4q_u32(__a) __extension__ ({ \ | |
1785 uint32x4x4_t r; __builtin_neon_vld4q_v(&r, __a, 26); r; }) | |
1786 #define vld4q_s8(__a) __extension__ ({ \ | |
1787 int8x16x4_t r; __builtin_neon_vld4q_v(&r, __a, 16); r; }) | |
1788 #define vld4q_s16(__a) __extension__ ({ \ | |
1789 int16x8x4_t r; __builtin_neon_vld4q_v(&r, __a, 17); r; }) | |
1790 #define vld4q_s32(__a) __extension__ ({ \ | |
1791 int32x4x4_t r; __builtin_neon_vld4q_v(&r, __a, 18); r; }) | |
1792 #define vld4q_f16(__a) __extension__ ({ \ | |
1793 float16x8x4_t r; __builtin_neon_vld4q_v(&r, __a, 23); r; }) | |
1794 #define vld4q_f32(__a) __extension__ ({ \ | |
1795 float32x4x4_t r; __builtin_neon_vld4q_v(&r, __a, 20); r; }) | |
1796 #define vld4q_p8(__a) __extension__ ({ \ | |
1797 poly8x16x4_t r; __builtin_neon_vld4q_v(&r, __a, 21); r; }) | |
1798 #define vld4q_p16(__a) __extension__ ({ \ | |
1799 poly16x8x4_t r; __builtin_neon_vld4q_v(&r, __a, 22); r; }) | |
1800 #define vld4_u8(__a) __extension__ ({ \ | |
1801 uint8x8x4_t r; __builtin_neon_vld4_v(&r, __a, 8); r; }) | |
1802 #define vld4_u16(__a) __extension__ ({ \ | |
1803 uint16x4x4_t r; __builtin_neon_vld4_v(&r, __a, 9); r; }) | |
1804 #define vld4_u32(__a) __extension__ ({ \ | |
1805 uint32x2x4_t r; __builtin_neon_vld4_v(&r, __a, 10); r; }) | |
1806 #define vld4_u64(__a) __extension__ ({ \ | |
1807 uint64x1x4_t r; __builtin_neon_vld4_v(&r, __a, 11); r; }) | |
1808 #define vld4_s8(__a) __extension__ ({ \ | |
1809 int8x8x4_t r; __builtin_neon_vld4_v(&r, __a, 0); r; }) | |
1810 #define vld4_s16(__a) __extension__ ({ \ | |
1811 int16x4x4_t r; __builtin_neon_vld4_v(&r, __a, 1); r; }) | |
1812 #define vld4_s32(__a) __extension__ ({ \ | |
1813 int32x2x4_t r; __builtin_neon_vld4_v(&r, __a, 2); r; }) | |
1814 #define vld4_s64(__a) __extension__ ({ \ | |
1815 int64x1x4_t r; __builtin_neon_vld4_v(&r, __a, 3); r; }) | |
1816 #define vld4_f16(__a) __extension__ ({ \ | |
1817 float16x4x4_t r; __builtin_neon_vld4_v(&r, __a, 7); r; }) | |
1818 #define vld4_f32(__a) __extension__ ({ \ | |
1819 float32x2x4_t r; __builtin_neon_vld4_v(&r, __a, 4); r; }) | |
1820 #define vld4_p8(__a) __extension__ ({ \ | |
1821 poly8x8x4_t r; __builtin_neon_vld4_v(&r, __a, 5); r; }) | |
1822 #define vld4_p16(__a) __extension__ ({ \ | |
1823 poly16x4x4_t r; __builtin_neon_vld4_v(&r, __a, 6); r; }) | |
1824 | |
1825 #define vld4_dup_u8(__a) __extension__ ({ \ | |
1826 uint8x8x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 8); r; }) | |
1827 #define vld4_dup_u16(__a) __extension__ ({ \ | |
1828 uint16x4x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 9); r; }) | |
1829 #define vld4_dup_u32(__a) __extension__ ({ \ | |
1830 uint32x2x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 10); r; }) | |
1831 #define vld4_dup_u64(__a) __extension__ ({ \ | |
1832 uint64x1x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 11); r; }) | |
1833 #define vld4_dup_s8(__a) __extension__ ({ \ | |
1834 int8x8x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 0); r; }) | |
1835 #define vld4_dup_s16(__a) __extension__ ({ \ | |
1836 int16x4x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 1); r; }) | |
1837 #define vld4_dup_s32(__a) __extension__ ({ \ | |
1838 int32x2x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 2); r; }) | |
1839 #define vld4_dup_s64(__a) __extension__ ({ \ | |
1840 int64x1x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 3); r; }) | |
1841 #define vld4_dup_f16(__a) __extension__ ({ \ | |
1842 float16x4x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 7); r; }) | |
1843 #define vld4_dup_f32(__a) __extension__ ({ \ | |
1844 float32x2x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 4); r; }) | |
1845 #define vld4_dup_p8(__a) __extension__ ({ \ | |
1846 poly8x8x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 5); r; }) | |
1847 #define vld4_dup_p16(__a) __extension__ ({ \ | |
1848 poly16x4x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 6); r; }) | |
1849 | |
1850 #define vld4q_lane_u16(__a, b, __c) __extension__ ({ \ | |
1851 uint16x8x4_t __b = (b); \ | |
1852 uint16x8x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i
nt8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 25); r;
}) | |
1853 #define vld4q_lane_u32(__a, b, __c) __extension__ ({ \ | |
1854 uint32x4x4_t __b = (b); \ | |
1855 uint32x4x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i
nt8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 26); r;
}) | |
1856 #define vld4q_lane_s16(__a, b, __c) __extension__ ({ \ | |
1857 int16x8x4_t __b = (b); \ | |
1858 int16x8x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (in
t8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 17); r; }
) | |
1859 #define vld4q_lane_s32(__a, b, __c) __extension__ ({ \ | |
1860 int32x4x4_t __b = (b); \ | |
1861 int32x4x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (in
t8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 18); r; }
) | |
1862 #define vld4q_lane_f16(__a, b, __c) __extension__ ({ \ | |
1863 float16x8x4_t __b = (b); \ | |
1864 float16x8x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (
int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 23); r;
}) | |
1865 #define vld4q_lane_f32(__a, b, __c) __extension__ ({ \ | |
1866 float32x4x4_t __b = (b); \ | |
1867 float32x4x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (
int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 20); r;
}) | |
1868 #define vld4q_lane_p16(__a, b, __c) __extension__ ({ \ | |
1869 poly16x8x4_t __b = (b); \ | |
1870 poly16x8x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i
nt8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 22); r;
}) | |
1871 #define vld4_lane_u8(__a, b, __c) __extension__ ({ \ | |
1872 uint8x8x4_t __b = (b); \ | |
1873 uint8x8x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8
x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 8); r; }) | |
1874 #define vld4_lane_u16(__a, b, __c) __extension__ ({ \ | |
1875 uint16x4x4_t __b = (b); \ | |
1876 uint16x4x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int
8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 9); r; }) | |
1877 #define vld4_lane_u32(__a, b, __c) __extension__ ({ \ | |
1878 uint32x2x4_t __b = (b); \ | |
1879 uint32x2x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int
8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 10); r; }) | |
1880 #define vld4_lane_s8(__a, b, __c) __extension__ ({ \ | |
1881 int8x8x4_t __b = (b); \ | |
1882 int8x8x4_t r; __builtin_neon_vld4_lane_v(&r, __a, __b.val[0], __b.val[1], __b.
val[2], __b.val[3], __c, 0); r; }) | |
1883 #define vld4_lane_s16(__a, b, __c) __extension__ ({ \ | |
1884 int16x4x4_t __b = (b); \ | |
1885 int16x4x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8
x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 1); r; }) | |
1886 #define vld4_lane_s32(__a, b, __c) __extension__ ({ \ | |
1887 int32x2x4_t __b = (b); \ | |
1888 int32x2x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8
x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 2); r; }) | |
1889 #define vld4_lane_f16(__a, b, __c) __extension__ ({ \ | |
1890 float16x4x4_t __b = (b); \ | |
1891 float16x4x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (in
t8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 7); r; }) | |
1892 #define vld4_lane_f32(__a, b, __c) __extension__ ({ \ | |
1893 float32x2x4_t __b = (b); \ | |
1894 float32x2x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (in
t8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 4); r; }) | |
1895 #define vld4_lane_p8(__a, b, __c) __extension__ ({ \ | |
1896 poly8x8x4_t __b = (b); \ | |
1897 poly8x8x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8
x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 5); r; }) | |
1898 #define vld4_lane_p16(__a, b, __c) __extension__ ({ \ | |
1899 poly16x4x4_t __b = (b); \ | |
1900 poly16x4x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int
8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 6); r; }) | |
1901 | |
1902 __ai int8x8_t vmax_s8(int8x8_t __a, int8x8_t __b) { \ | |
1903 return (int8x8_t)__builtin_neon_vmax_v(__a, __b, 0); } | |
1904 __ai int16x4_t vmax_s16(int16x4_t __a, int16x4_t __b) { \ | |
1905 return (int16x4_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 1); } | |
1906 __ai int32x2_t vmax_s32(int32x2_t __a, int32x2_t __b) { \ | |
1907 return (int32x2_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 2); } | |
1908 __ai uint8x8_t vmax_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
1909 return (uint8x8_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 8); } | |
1910 __ai uint16x4_t vmax_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
1911 return (uint16x4_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 9); } | |
1912 __ai uint32x2_t vmax_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
1913 return (uint32x2_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 10); } | |
1914 __ai float32x2_t vmax_f32(float32x2_t __a, float32x2_t __b) { \ | |
1915 return (float32x2_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 4); } | |
1916 __ai int8x16_t vmaxq_s8(int8x16_t __a, int8x16_t __b) { \ | |
1917 return (int8x16_t)__builtin_neon_vmaxq_v(__a, __b, 16); } | |
1918 __ai int16x8_t vmaxq_s16(int16x8_t __a, int16x8_t __b) { \ | |
1919 return (int16x8_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 17);
} | |
1920 __ai int32x4_t vmaxq_s32(int32x4_t __a, int32x4_t __b) { \ | |
1921 return (int32x4_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 18);
} | |
1922 __ai uint8x16_t vmaxq_u8(uint8x16_t __a, uint8x16_t __b) { \ | |
1923 return (uint8x16_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 24);
} | |
1924 __ai uint16x8_t vmaxq_u16(uint16x8_t __a, uint16x8_t __b) { \ | |
1925 return (uint16x8_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 25);
} | |
1926 __ai uint32x4_t vmaxq_u32(uint32x4_t __a, uint32x4_t __b) { \ | |
1927 return (uint32x4_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 26);
} | |
1928 __ai float32x4_t vmaxq_f32(float32x4_t __a, float32x4_t __b) { \ | |
1929 return (float32x4_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 20)
; } | |
1930 | |
1931 __ai int8x8_t vmin_s8(int8x8_t __a, int8x8_t __b) { \ | |
1932 return (int8x8_t)__builtin_neon_vmin_v(__a, __b, 0); } | |
1933 __ai int16x4_t vmin_s16(int16x4_t __a, int16x4_t __b) { \ | |
1934 return (int16x4_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 1); } | |
1935 __ai int32x2_t vmin_s32(int32x2_t __a, int32x2_t __b) { \ | |
1936 return (int32x2_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 2); } | |
1937 __ai uint8x8_t vmin_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
1938 return (uint8x8_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 8); } | |
1939 __ai uint16x4_t vmin_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
1940 return (uint16x4_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 9); } | |
1941 __ai uint32x2_t vmin_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
1942 return (uint32x2_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 10); } | |
1943 __ai float32x2_t vmin_f32(float32x2_t __a, float32x2_t __b) { \ | |
1944 return (float32x2_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 4); } | |
1945 __ai int8x16_t vminq_s8(int8x16_t __a, int8x16_t __b) { \ | |
1946 return (int8x16_t)__builtin_neon_vminq_v(__a, __b, 16); } | |
1947 __ai int16x8_t vminq_s16(int16x8_t __a, int16x8_t __b) { \ | |
1948 return (int16x8_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 17);
} | |
1949 __ai int32x4_t vminq_s32(int32x4_t __a, int32x4_t __b) { \ | |
1950 return (int32x4_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 18);
} | |
1951 __ai uint8x16_t vminq_u8(uint8x16_t __a, uint8x16_t __b) { \ | |
1952 return (uint8x16_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 24);
} | |
1953 __ai uint16x8_t vminq_u16(uint16x8_t __a, uint16x8_t __b) { \ | |
1954 return (uint16x8_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 25);
} | |
1955 __ai uint32x4_t vminq_u32(uint32x4_t __a, uint32x4_t __b) { \ | |
1956 return (uint32x4_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 26);
} | |
1957 __ai float32x4_t vminq_f32(float32x4_t __a, float32x4_t __b) { \ | |
1958 return (float32x4_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 20)
; } | |
1959 | |
1960 __ai int8x8_t vmla_s8(int8x8_t __a, int8x8_t __b, int8x8_t __c) { \ | |
1961 return __a + (__b * __c); } | |
1962 __ai int16x4_t vmla_s16(int16x4_t __a, int16x4_t __b, int16x4_t __c) { \ | |
1963 return __a + (__b * __c); } | |
1964 __ai int32x2_t vmla_s32(int32x2_t __a, int32x2_t __b, int32x2_t __c) { \ | |
1965 return __a + (__b * __c); } | |
1966 __ai float32x2_t vmla_f32(float32x2_t __a, float32x2_t __b, float32x2_t __c) { \ | |
1967 return __a + (__b * __c); } | |
1968 __ai uint8x8_t vmla_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) { \ | |
1969 return __a + (__b * __c); } | |
1970 __ai uint16x4_t vmla_u16(uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) { \ | |
1971 return __a + (__b * __c); } | |
1972 __ai uint32x2_t vmla_u32(uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) { \ | |
1973 return __a + (__b * __c); } | |
1974 __ai int8x16_t vmlaq_s8(int8x16_t __a, int8x16_t __b, int8x16_t __c) { \ | |
1975 return __a + (__b * __c); } | |
1976 __ai int16x8_t vmlaq_s16(int16x8_t __a, int16x8_t __b, int16x8_t __c) { \ | |
1977 return __a + (__b * __c); } | |
1978 __ai int32x4_t vmlaq_s32(int32x4_t __a, int32x4_t __b, int32x4_t __c) { \ | |
1979 return __a + (__b * __c); } | |
1980 __ai float32x4_t vmlaq_f32(float32x4_t __a, float32x4_t __b, float32x4_t __c) {
\ | |
1981 return __a + (__b * __c); } | |
1982 __ai uint8x16_t vmlaq_u8(uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) { \ | |
1983 return __a + (__b * __c); } | |
1984 __ai uint16x8_t vmlaq_u16(uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) { \ | |
1985 return __a + (__b * __c); } | |
1986 __ai uint32x4_t vmlaq_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) { \ | |
1987 return __a + (__b * __c); } | |
1988 | |
1989 __ai int16x8_t vmlal_s8(int16x8_t __a, int8x8_t __b, int8x8_t __c) { \ | |
1990 return __a + vmull_s8(__b, __c); } | |
1991 __ai int32x4_t vmlal_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) { \ | |
1992 return __a + vmull_s16(__b, __c); } | |
1993 __ai int64x2_t vmlal_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) { \ | |
1994 return __a + vmull_s32(__b, __c); } | |
1995 __ai uint16x8_t vmlal_u8(uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) { \ | |
1996 return __a + vmull_u8(__b, __c); } | |
1997 __ai uint32x4_t vmlal_u16(uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) { \ | |
1998 return __a + vmull_u16(__b, __c); } | |
1999 __ai uint64x2_t vmlal_u32(uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) { \ | |
2000 return __a + vmull_u32(__b, __c); } | |
2001 | |
2002 #define vmlal_lane_s16(a, b, c, __d) __extension__ ({ \ | |
2003 int32x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \ | |
2004 __a + vmull_s16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }
) | |
2005 #define vmlal_lane_s32(a, b, c, __d) __extension__ ({ \ | |
2006 int64x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \ | |
2007 __a + vmull_s32(__b, __builtin_shufflevector(__c, __c, __d, __d)); }) | |
2008 #define vmlal_lane_u16(a, b, c, __d) __extension__ ({ \ | |
2009 uint32x4_t __a = (a); uint16x4_t __b = (b); uint16x4_t __c = (c); \ | |
2010 __a + vmull_u16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }
) | |
2011 #define vmlal_lane_u32(a, b, c, __d) __extension__ ({ \ | |
2012 uint64x2_t __a = (a); uint32x2_t __b = (b); uint32x2_t __c = (c); \ | |
2013 __a + vmull_u32(__b, __builtin_shufflevector(__c, __c, __d, __d)); }) | |
2014 | |
2015 __ai int32x4_t vmlal_n_s16(int32x4_t __a, int16x4_t __b, int16_t __c) { \ | |
2016 return __a + vmull_s16(__b, (int16x4_t){ __c, __c, __c, __c }); } | |
2017 __ai int64x2_t vmlal_n_s32(int64x2_t __a, int32x2_t __b, int32_t __c) { \ | |
2018 return __a + vmull_s32(__b, (int32x2_t){ __c, __c }); } | |
2019 __ai uint32x4_t vmlal_n_u16(uint32x4_t __a, uint16x4_t __b, uint16_t __c) { \ | |
2020 return __a + vmull_u16(__b, (uint16x4_t){ __c, __c, __c, __c }); } | |
2021 __ai uint64x2_t vmlal_n_u32(uint64x2_t __a, uint32x2_t __b, uint32_t __c) { \ | |
2022 return __a + vmull_u32(__b, (uint32x2_t){ __c, __c }); } | |
2023 | |
2024 #define vmla_lane_s16(a, b, c, __d) __extension__ ({ \ | |
2025 int16x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \ | |
2026 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) | |
2027 #define vmla_lane_s32(a, b, c, __d) __extension__ ({ \ | |
2028 int32x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \ | |
2029 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) | |
2030 #define vmla_lane_u16(a, b, c, __d) __extension__ ({ \ | |
2031 uint16x4_t __a = (a); uint16x4_t __b = (b); uint16x4_t __c = (c); \ | |
2032 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) | |
2033 #define vmla_lane_u32(a, b, c, __d) __extension__ ({ \ | |
2034 uint32x2_t __a = (a); uint32x2_t __b = (b); uint32x2_t __c = (c); \ | |
2035 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) | |
2036 #define vmla_lane_f32(a, b, c, __d) __extension__ ({ \ | |
2037 float32x2_t __a = (a); float32x2_t __b = (b); float32x2_t __c = (c); \ | |
2038 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) | |
2039 #define vmlaq_lane_s16(a, b, c, __d) __extension__ ({ \ | |
2040 int16x8_t __a = (a); int16x8_t __b = (b); int16x4_t __c = (c); \ | |
2041 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, _
_d, __d)); }) | |
2042 #define vmlaq_lane_s32(a, b, c, __d) __extension__ ({ \ | |
2043 int32x4_t __a = (a); int32x4_t __b = (b); int32x2_t __c = (c); \ | |
2044 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) | |
2045 #define vmlaq_lane_u16(a, b, c, __d) __extension__ ({ \ | |
2046 uint16x8_t __a = (a); uint16x8_t __b = (b); uint16x4_t __c = (c); \ | |
2047 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, _
_d, __d)); }) | |
2048 #define vmlaq_lane_u32(a, b, c, __d) __extension__ ({ \ | |
2049 uint32x4_t __a = (a); uint32x4_t __b = (b); uint32x2_t __c = (c); \ | |
2050 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) | |
2051 #define vmlaq_lane_f32(a, b, c, __d) __extension__ ({ \ | |
2052 float32x4_t __a = (a); float32x4_t __b = (b); float32x2_t __c = (c); \ | |
2053 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) | |
2054 | |
2055 __ai int16x4_t vmla_n_s16(int16x4_t __a, int16x4_t __b, int16_t __c) { \ | |
2056 return __a + (__b * (int16x4_t){ __c, __c, __c, __c }); } | |
2057 __ai int32x2_t vmla_n_s32(int32x2_t __a, int32x2_t __b, int32_t __c) { \ | |
2058 return __a + (__b * (int32x2_t){ __c, __c }); } | |
2059 __ai uint16x4_t vmla_n_u16(uint16x4_t __a, uint16x4_t __b, uint16_t __c) { \ | |
2060 return __a + (__b * (uint16x4_t){ __c, __c, __c, __c }); } | |
2061 __ai uint32x2_t vmla_n_u32(uint32x2_t __a, uint32x2_t __b, uint32_t __c) { \ | |
2062 return __a + (__b * (uint32x2_t){ __c, __c }); } | |
2063 __ai float32x2_t vmla_n_f32(float32x2_t __a, float32x2_t __b, float32_t __c) { \ | |
2064 return __a + (__b * (float32x2_t){ __c, __c }); } | |
2065 __ai int16x8_t vmlaq_n_s16(int16x8_t __a, int16x8_t __b, int16_t __c) { \ | |
2066 return __a + (__b * (int16x8_t){ __c, __c, __c, __c, __c, __c, __c, __c }); } | |
2067 __ai int32x4_t vmlaq_n_s32(int32x4_t __a, int32x4_t __b, int32_t __c) { \ | |
2068 return __a + (__b * (int32x4_t){ __c, __c, __c, __c }); } | |
2069 __ai uint16x8_t vmlaq_n_u16(uint16x8_t __a, uint16x8_t __b, uint16_t __c) { \ | |
2070 return __a + (__b * (uint16x8_t){ __c, __c, __c, __c, __c, __c, __c, __c }); } | |
2071 __ai uint32x4_t vmlaq_n_u32(uint32x4_t __a, uint32x4_t __b, uint32_t __c) { \ | |
2072 return __a + (__b * (uint32x4_t){ __c, __c, __c, __c }); } | |
2073 __ai float32x4_t vmlaq_n_f32(float32x4_t __a, float32x4_t __b, float32_t __c) {
\ | |
2074 return __a + (__b * (float32x4_t){ __c, __c, __c, __c }); } | |
2075 | |
2076 __ai int8x8_t vmls_s8(int8x8_t __a, int8x8_t __b, int8x8_t __c) { \ | |
2077 return __a - (__b * __c); } | |
2078 __ai int16x4_t vmls_s16(int16x4_t __a, int16x4_t __b, int16x4_t __c) { \ | |
2079 return __a - (__b * __c); } | |
2080 __ai int32x2_t vmls_s32(int32x2_t __a, int32x2_t __b, int32x2_t __c) { \ | |
2081 return __a - (__b * __c); } | |
2082 __ai float32x2_t vmls_f32(float32x2_t __a, float32x2_t __b, float32x2_t __c) { \ | |
2083 return __a - (__b * __c); } | |
2084 __ai uint8x8_t vmls_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) { \ | |
2085 return __a - (__b * __c); } | |
2086 __ai uint16x4_t vmls_u16(uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) { \ | |
2087 return __a - (__b * __c); } | |
2088 __ai uint32x2_t vmls_u32(uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) { \ | |
2089 return __a - (__b * __c); } | |
2090 __ai int8x16_t vmlsq_s8(int8x16_t __a, int8x16_t __b, int8x16_t __c) { \ | |
2091 return __a - (__b * __c); } | |
2092 __ai int16x8_t vmlsq_s16(int16x8_t __a, int16x8_t __b, int16x8_t __c) { \ | |
2093 return __a - (__b * __c); } | |
2094 __ai int32x4_t vmlsq_s32(int32x4_t __a, int32x4_t __b, int32x4_t __c) { \ | |
2095 return __a - (__b * __c); } | |
2096 __ai float32x4_t vmlsq_f32(float32x4_t __a, float32x4_t __b, float32x4_t __c) {
\ | |
2097 return __a - (__b * __c); } | |
2098 __ai uint8x16_t vmlsq_u8(uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) { \ | |
2099 return __a - (__b * __c); } | |
2100 __ai uint16x8_t vmlsq_u16(uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) { \ | |
2101 return __a - (__b * __c); } | |
2102 __ai uint32x4_t vmlsq_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) { \ | |
2103 return __a - (__b * __c); } | |
2104 | |
2105 __ai int16x8_t vmlsl_s8(int16x8_t __a, int8x8_t __b, int8x8_t __c) { \ | |
2106 return __a - vmull_s8(__b, __c); } | |
2107 __ai int32x4_t vmlsl_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) { \ | |
2108 return __a - vmull_s16(__b, __c); } | |
2109 __ai int64x2_t vmlsl_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) { \ | |
2110 return __a - vmull_s32(__b, __c); } | |
2111 __ai uint16x8_t vmlsl_u8(uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) { \ | |
2112 return __a - vmull_u8(__b, __c); } | |
2113 __ai uint32x4_t vmlsl_u16(uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) { \ | |
2114 return __a - vmull_u16(__b, __c); } | |
2115 __ai uint64x2_t vmlsl_u32(uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) { \ | |
2116 return __a - vmull_u32(__b, __c); } | |
2117 | |
2118 #define vmlsl_lane_s16(a, b, c, __d) __extension__ ({ \ | |
2119 int32x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \ | |
2120 __a - vmull_s16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }
) | |
2121 #define vmlsl_lane_s32(a, b, c, __d) __extension__ ({ \ | |
2122 int64x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \ | |
2123 __a - vmull_s32(__b, __builtin_shufflevector(__c, __c, __d, __d)); }) | |
2124 #define vmlsl_lane_u16(a, b, c, __d) __extension__ ({ \ | |
2125 uint32x4_t __a = (a); uint16x4_t __b = (b); uint16x4_t __c = (c); \ | |
2126 __a - vmull_u16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }
) | |
2127 #define vmlsl_lane_u32(a, b, c, __d) __extension__ ({ \ | |
2128 uint64x2_t __a = (a); uint32x2_t __b = (b); uint32x2_t __c = (c); \ | |
2129 __a - vmull_u32(__b, __builtin_shufflevector(__c, __c, __d, __d)); }) | |
2130 | |
2131 __ai int32x4_t vmlsl_n_s16(int32x4_t __a, int16x4_t __b, int16_t __c) { \ | |
2132 return __a - vmull_s16(__b, (int16x4_t){ __c, __c, __c, __c }); } | |
2133 __ai int64x2_t vmlsl_n_s32(int64x2_t __a, int32x2_t __b, int32_t __c) { \ | |
2134 return __a - vmull_s32(__b, (int32x2_t){ __c, __c }); } | |
2135 __ai uint32x4_t vmlsl_n_u16(uint32x4_t __a, uint16x4_t __b, uint16_t __c) { \ | |
2136 return __a - vmull_u16(__b, (uint16x4_t){ __c, __c, __c, __c }); } | |
2137 __ai uint64x2_t vmlsl_n_u32(uint64x2_t __a, uint32x2_t __b, uint32_t __c) { \ | |
2138 return __a - vmull_u32(__b, (uint32x2_t){ __c, __c }); } | |
2139 | |
2140 #define vmls_lane_s16(a, b, c, __d) __extension__ ({ \ | |
2141 int16x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \ | |
2142 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) | |
2143 #define vmls_lane_s32(a, b, c, __d) __extension__ ({ \ | |
2144 int32x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \ | |
2145 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) | |
2146 #define vmls_lane_u16(a, b, c, __d) __extension__ ({ \ | |
2147 uint16x4_t __a = (a); uint16x4_t __b = (b); uint16x4_t __c = (c); \ | |
2148 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) | |
2149 #define vmls_lane_u32(a, b, c, __d) __extension__ ({ \ | |
2150 uint32x2_t __a = (a); uint32x2_t __b = (b); uint32x2_t __c = (c); \ | |
2151 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) | |
2152 #define vmls_lane_f32(a, b, c, __d) __extension__ ({ \ | |
2153 float32x2_t __a = (a); float32x2_t __b = (b); float32x2_t __c = (c); \ | |
2154 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d)); }) | |
2155 #define vmlsq_lane_s16(a, b, c, __d) __extension__ ({ \ | |
2156 int16x8_t __a = (a); int16x8_t __b = (b); int16x4_t __c = (c); \ | |
2157 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, _
_d, __d)); }) | |
2158 #define vmlsq_lane_s32(a, b, c, __d) __extension__ ({ \ | |
2159 int32x4_t __a = (a); int32x4_t __b = (b); int32x2_t __c = (c); \ | |
2160 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) | |
2161 #define vmlsq_lane_u16(a, b, c, __d) __extension__ ({ \ | |
2162 uint16x8_t __a = (a); uint16x8_t __b = (b); uint16x4_t __c = (c); \ | |
2163 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, _
_d, __d)); }) | |
2164 #define vmlsq_lane_u32(a, b, c, __d) __extension__ ({ \ | |
2165 uint32x4_t __a = (a); uint32x4_t __b = (b); uint32x2_t __c = (c); \ | |
2166 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) | |
2167 #define vmlsq_lane_f32(a, b, c, __d) __extension__ ({ \ | |
2168 float32x4_t __a = (a); float32x4_t __b = (b); float32x2_t __c = (c); \ | |
2169 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) | |
2170 | |
2171 __ai int16x4_t vmls_n_s16(int16x4_t __a, int16x4_t __b, int16_t __c) { \ | |
2172 return __a - (__b * (int16x4_t){ __c, __c, __c, __c }); } | |
2173 __ai int32x2_t vmls_n_s32(int32x2_t __a, int32x2_t __b, int32_t __c) { \ | |
2174 return __a - (__b * (int32x2_t){ __c, __c }); } | |
2175 __ai uint16x4_t vmls_n_u16(uint16x4_t __a, uint16x4_t __b, uint16_t __c) { \ | |
2176 return __a - (__b * (uint16x4_t){ __c, __c, __c, __c }); } | |
2177 __ai uint32x2_t vmls_n_u32(uint32x2_t __a, uint32x2_t __b, uint32_t __c) { \ | |
2178 return __a - (__b * (uint32x2_t){ __c, __c }); } | |
2179 __ai float32x2_t vmls_n_f32(float32x2_t __a, float32x2_t __b, float32_t __c) { \ | |
2180 return __a - (__b * (float32x2_t){ __c, __c }); } | |
2181 __ai int16x8_t vmlsq_n_s16(int16x8_t __a, int16x8_t __b, int16_t __c) { \ | |
2182 return __a - (__b * (int16x8_t){ __c, __c, __c, __c, __c, __c, __c, __c }); } | |
2183 __ai int32x4_t vmlsq_n_s32(int32x4_t __a, int32x4_t __b, int32_t __c) { \ | |
2184 return __a - (__b * (int32x4_t){ __c, __c, __c, __c }); } | |
2185 __ai uint16x8_t vmlsq_n_u16(uint16x8_t __a, uint16x8_t __b, uint16_t __c) { \ | |
2186 return __a - (__b * (uint16x8_t){ __c, __c, __c, __c, __c, __c, __c, __c }); } | |
2187 __ai uint32x4_t vmlsq_n_u32(uint32x4_t __a, uint32x4_t __b, uint32_t __c) { \ | |
2188 return __a - (__b * (uint32x4_t){ __c, __c, __c, __c }); } | |
2189 __ai float32x4_t vmlsq_n_f32(float32x4_t __a, float32x4_t __b, float32_t __c) {
\ | |
2190 return __a - (__b * (float32x4_t){ __c, __c, __c, __c }); } | |
2191 | |
2192 __ai int8x8_t vmovn_s16(int16x8_t __a) { \ | |
2193 return (int8x8_t)__builtin_neon_vmovn_v((int8x16_t)__a, 0); } | |
2194 __ai int16x4_t vmovn_s32(int32x4_t __a) { \ | |
2195 return (int16x4_t)__builtin_neon_vmovn_v((int8x16_t)__a, 1); } | |
2196 __ai int32x2_t vmovn_s64(int64x2_t __a) { \ | |
2197 return (int32x2_t)__builtin_neon_vmovn_v((int8x16_t)__a, 2); } | |
2198 __ai uint8x8_t vmovn_u16(uint16x8_t __a) { \ | |
2199 return (uint8x8_t)__builtin_neon_vmovn_v((int8x16_t)__a, 8); } | |
2200 __ai uint16x4_t vmovn_u32(uint32x4_t __a) { \ | |
2201 return (uint16x4_t)__builtin_neon_vmovn_v((int8x16_t)__a, 9); } | |
2202 __ai uint32x2_t vmovn_u64(uint64x2_t __a) { \ | |
2203 return (uint32x2_t)__builtin_neon_vmovn_v((int8x16_t)__a, 10); } | |
2204 | |
2205 __ai uint8x8_t vmov_n_u8(uint8_t __a) { \ | |
2206 return (uint8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } | |
2207 __ai uint16x4_t vmov_n_u16(uint16_t __a) { \ | |
2208 return (uint16x4_t){ __a, __a, __a, __a }; } | |
2209 __ai uint32x2_t vmov_n_u32(uint32_t __a) { \ | |
2210 return (uint32x2_t){ __a, __a }; } | |
2211 __ai int8x8_t vmov_n_s8(int8_t __a) { \ | |
2212 return (int8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } | |
2213 __ai int16x4_t vmov_n_s16(int16_t __a) { \ | |
2214 return (int16x4_t){ __a, __a, __a, __a }; } | |
2215 __ai int32x2_t vmov_n_s32(int32_t __a) { \ | |
2216 return (int32x2_t){ __a, __a }; } | |
2217 __ai poly8x8_t vmov_n_p8(poly8_t __a) { \ | |
2218 return (poly8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } | |
2219 __ai poly16x4_t vmov_n_p16(poly16_t __a) { \ | |
2220 return (poly16x4_t){ __a, __a, __a, __a }; } | |
2221 __ai float32x2_t vmov_n_f32(float32_t __a) { \ | |
2222 return (float32x2_t){ __a, __a }; } | |
2223 __ai uint8x16_t vmovq_n_u8(uint8_t __a) { \ | |
2224 return (uint8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __
a, __a, __a, __a, __a }; } | |
2225 __ai uint16x8_t vmovq_n_u16(uint16_t __a) { \ | |
2226 return (uint16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } | |
2227 __ai uint32x4_t vmovq_n_u32(uint32_t __a) { \ | |
2228 return (uint32x4_t){ __a, __a, __a, __a }; } | |
2229 __ai int8x16_t vmovq_n_s8(int8_t __a) { \ | |
2230 return (int8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a
, __a, __a, __a, __a }; } | |
2231 __ai int16x8_t vmovq_n_s16(int16_t __a) { \ | |
2232 return (int16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } | |
2233 __ai int32x4_t vmovq_n_s32(int32_t __a) { \ | |
2234 return (int32x4_t){ __a, __a, __a, __a }; } | |
2235 __ai poly8x16_t vmovq_n_p8(poly8_t __a) { \ | |
2236 return (poly8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __
a, __a, __a, __a, __a }; } | |
2237 __ai poly16x8_t vmovq_n_p16(poly16_t __a) { \ | |
2238 return (poly16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } | |
2239 __ai float32x4_t vmovq_n_f32(float32_t __a) { \ | |
2240 return (float32x4_t){ __a, __a, __a, __a }; } | |
2241 __ai int64x1_t vmov_n_s64(int64_t __a) { \ | |
2242 return (int64x1_t){ __a }; } | |
2243 __ai uint64x1_t vmov_n_u64(uint64_t __a) { \ | |
2244 return (uint64x1_t){ __a }; } | |
2245 __ai int64x2_t vmovq_n_s64(int64_t __a) { \ | |
2246 return (int64x2_t){ __a, __a }; } | |
2247 __ai uint64x2_t vmovq_n_u64(uint64_t __a) { \ | |
2248 return (uint64x2_t){ __a, __a }; } | |
2249 | |
2250 __ai int8x8_t vmul_s8(int8x8_t __a, int8x8_t __b) { \ | |
2251 return __a * __b; } | |
2252 __ai int16x4_t vmul_s16(int16x4_t __a, int16x4_t __b) { \ | |
2253 return __a * __b; } | |
2254 __ai int32x2_t vmul_s32(int32x2_t __a, int32x2_t __b) { \ | |
2255 return __a * __b; } | |
2256 __ai float32x2_t vmul_f32(float32x2_t __a, float32x2_t __b) { \ | |
2257 return __a * __b; } | |
2258 __ai uint8x8_t vmul_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
2259 return __a * __b; } | |
2260 __ai uint16x4_t vmul_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
2261 return __a * __b; } | |
2262 __ai uint32x2_t vmul_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
2263 return __a * __b; } | |
2264 __ai int8x16_t vmulq_s8(int8x16_t __a, int8x16_t __b) { \ | |
2265 return __a * __b; } | |
2266 __ai int16x8_t vmulq_s16(int16x8_t __a, int16x8_t __b) { \ | |
2267 return __a * __b; } | |
2268 __ai int32x4_t vmulq_s32(int32x4_t __a, int32x4_t __b) { \ | |
2269 return __a * __b; } | |
2270 __ai float32x4_t vmulq_f32(float32x4_t __a, float32x4_t __b) { \ | |
2271 return __a * __b; } | |
2272 __ai uint8x16_t vmulq_u8(uint8x16_t __a, uint8x16_t __b) { \ | |
2273 return __a * __b; } | |
2274 __ai uint16x8_t vmulq_u16(uint16x8_t __a, uint16x8_t __b) { \ | |
2275 return __a * __b; } | |
2276 __ai uint32x4_t vmulq_u32(uint32x4_t __a, uint32x4_t __b) { \ | |
2277 return __a * __b; } | |
2278 | |
2279 #define vmull_lane_s16(a, b, __c) __extension__ ({ \ | |
2280 int16x4_t __a = (a); int16x4_t __b = (b); \ | |
2281 vmull_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) | |
2282 #define vmull_lane_s32(a, b, __c) __extension__ ({ \ | |
2283 int32x2_t __a = (a); int32x2_t __b = (b); \ | |
2284 vmull_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) | |
2285 #define vmull_lane_u16(a, b, __c) __extension__ ({ \ | |
2286 uint16x4_t __a = (a); uint16x4_t __b = (b); \ | |
2287 vmull_u16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) | |
2288 #define vmull_lane_u32(a, b, __c) __extension__ ({ \ | |
2289 uint32x2_t __a = (a); uint32x2_t __b = (b); \ | |
2290 vmull_u32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) | |
2291 | |
2292 __ai int32x4_t vmull_n_s16(int16x4_t __a, int16_t __b) { \ | |
2293 return (int32x4_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)(int16x4_t){
__b, __b, __b, __b }, 18); } | |
2294 __ai int64x2_t vmull_n_s32(int32x2_t __a, int32_t __b) { \ | |
2295 return (int64x2_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)(int32x2_t){
__b, __b }, 19); } | |
2296 __ai uint32x4_t vmull_n_u16(uint16x4_t __a, uint16_t __b) { \ | |
2297 return (uint32x4_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)(uint16x4_t
){ __b, __b, __b, __b }, 26); } | |
2298 __ai uint64x2_t vmull_n_u32(uint32x2_t __a, uint32_t __b) { \ | |
2299 return (uint64x2_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)(uint32x2_t
){ __b, __b }, 27); } | |
2300 | |
2301 __ai poly8x8_t vmul_p8(poly8x8_t __a, poly8x8_t __b) { \ | |
2302 return (poly8x8_t)__builtin_neon_vmul_v((int8x8_t)__a, (int8x8_t)__b, 5); } | |
2303 __ai poly8x16_t vmulq_p8(poly8x16_t __a, poly8x16_t __b) { \ | |
2304 return (poly8x16_t)__builtin_neon_vmulq_v((int8x16_t)__a, (int8x16_t)__b, 21);
} | |
2305 | |
2306 #define vmul_lane_s16(a, b, __c) __extension__ ({ \ | |
2307 int16x4_t __a = (a); int16x4_t __b = (b); \ | |
2308 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); }) | |
2309 #define vmul_lane_s32(a, b, __c) __extension__ ({ \ | |
2310 int32x2_t __a = (a); int32x2_t __b = (b); \ | |
2311 __a * __builtin_shufflevector(__b, __b, __c, __c); }) | |
2312 #define vmul_lane_f32(a, b, __c) __extension__ ({ \ | |
2313 float32x2_t __a = (a); float32x2_t __b = (b); \ | |
2314 __a * __builtin_shufflevector(__b, __b, __c, __c); }) | |
2315 #define vmul_lane_u16(a, b, __c) __extension__ ({ \ | |
2316 uint16x4_t __a = (a); uint16x4_t __b = (b); \ | |
2317 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); }) | |
2318 #define vmul_lane_u32(a, b, __c) __extension__ ({ \ | |
2319 uint32x2_t __a = (a); uint32x2_t __b = (b); \ | |
2320 __a * __builtin_shufflevector(__b, __b, __c, __c); }) | |
2321 #define vmulq_lane_s16(a, b, __c) __extension__ ({ \ | |
2322 int16x8_t __a = (a); int16x4_t __b = (b); \ | |
2323 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c
); }) | |
2324 #define vmulq_lane_s32(a, b, __c) __extension__ ({ \ | |
2325 int32x4_t __a = (a); int32x2_t __b = (b); \ | |
2326 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); }) | |
2327 #define vmulq_lane_f32(a, b, __c) __extension__ ({ \ | |
2328 float32x4_t __a = (a); float32x2_t __b = (b); \ | |
2329 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); }) | |
2330 #define vmulq_lane_u16(a, b, __c) __extension__ ({ \ | |
2331 uint16x8_t __a = (a); uint16x4_t __b = (b); \ | |
2332 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c
); }) | |
2333 #define vmulq_lane_u32(a, b, __c) __extension__ ({ \ | |
2334 uint32x4_t __a = (a); uint32x2_t __b = (b); \ | |
2335 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); }) | |
2336 | |
2337 __ai int16x4_t vmul_n_s16(int16x4_t __a, int16_t __b) { \ | |
2338 return __a * (int16x4_t){ __b, __b, __b, __b }; } | |
2339 __ai int32x2_t vmul_n_s32(int32x2_t __a, int32_t __b) { \ | |
2340 return __a * (int32x2_t){ __b, __b }; } | |
2341 __ai float32x2_t vmul_n_f32(float32x2_t __a, float32_t __b) { \ | |
2342 return __a * (float32x2_t){ __b, __b }; } | |
2343 __ai uint16x4_t vmul_n_u16(uint16x4_t __a, uint16_t __b) { \ | |
2344 return __a * (uint16x4_t){ __b, __b, __b, __b }; } | |
2345 __ai uint32x2_t vmul_n_u32(uint32x2_t __a, uint32_t __b) { \ | |
2346 return __a * (uint32x2_t){ __b, __b }; } | |
2347 __ai int16x8_t vmulq_n_s16(int16x8_t __a, int16_t __b) { \ | |
2348 return __a * (int16x8_t){ __b, __b, __b, __b, __b, __b, __b, __b }; } | |
2349 __ai int32x4_t vmulq_n_s32(int32x4_t __a, int32_t __b) { \ | |
2350 return __a * (int32x4_t){ __b, __b, __b, __b }; } | |
2351 __ai float32x4_t vmulq_n_f32(float32x4_t __a, float32_t __b) { \ | |
2352 return __a * (float32x4_t){ __b, __b, __b, __b }; } | |
2353 __ai uint16x8_t vmulq_n_u16(uint16x8_t __a, uint16_t __b) { \ | |
2354 return __a * (uint16x8_t){ __b, __b, __b, __b, __b, __b, __b, __b }; } | |
2355 __ai uint32x4_t vmulq_n_u32(uint32x4_t __a, uint32_t __b) { \ | |
2356 return __a * (uint32x4_t){ __b, __b, __b, __b }; } | |
2357 | |
2358 __ai int8x8_t vmvn_s8(int8x8_t __a) { \ | |
2359 return ~__a; } | |
2360 __ai int16x4_t vmvn_s16(int16x4_t __a) { \ | |
2361 return ~__a; } | |
2362 __ai int32x2_t vmvn_s32(int32x2_t __a) { \ | |
2363 return ~__a; } | |
2364 __ai uint8x8_t vmvn_u8(uint8x8_t __a) { \ | |
2365 return ~__a; } | |
2366 __ai uint16x4_t vmvn_u16(uint16x4_t __a) { \ | |
2367 return ~__a; } | |
2368 __ai uint32x2_t vmvn_u32(uint32x2_t __a) { \ | |
2369 return ~__a; } | |
2370 __ai poly8x8_t vmvn_p8(poly8x8_t __a) { \ | |
2371 return ~__a; } | |
2372 __ai int8x16_t vmvnq_s8(int8x16_t __a) { \ | |
2373 return ~__a; } | |
2374 __ai int16x8_t vmvnq_s16(int16x8_t __a) { \ | |
2375 return ~__a; } | |
2376 __ai int32x4_t vmvnq_s32(int32x4_t __a) { \ | |
2377 return ~__a; } | |
2378 __ai uint8x16_t vmvnq_u8(uint8x16_t __a) { \ | |
2379 return ~__a; } | |
2380 __ai uint16x8_t vmvnq_u16(uint16x8_t __a) { \ | |
2381 return ~__a; } | |
2382 __ai uint32x4_t vmvnq_u32(uint32x4_t __a) { \ | |
2383 return ~__a; } | |
2384 __ai poly8x16_t vmvnq_p8(poly8x16_t __a) { \ | |
2385 return ~__a; } | |
2386 | |
2387 __ai int8x8_t vneg_s8(int8x8_t __a) { \ | |
2388 return -__a; } | |
2389 __ai int16x4_t vneg_s16(int16x4_t __a) { \ | |
2390 return -__a; } | |
2391 __ai int32x2_t vneg_s32(int32x2_t __a) { \ | |
2392 return -__a; } | |
2393 __ai float32x2_t vneg_f32(float32x2_t __a) { \ | |
2394 return -__a; } | |
2395 __ai int8x16_t vnegq_s8(int8x16_t __a) { \ | |
2396 return -__a; } | |
2397 __ai int16x8_t vnegq_s16(int16x8_t __a) { \ | |
2398 return -__a; } | |
2399 __ai int32x4_t vnegq_s32(int32x4_t __a) { \ | |
2400 return -__a; } | |
2401 __ai float32x4_t vnegq_f32(float32x4_t __a) { \ | |
2402 return -__a; } | |
2403 | |
2404 __ai int8x8_t vorn_s8(int8x8_t __a, int8x8_t __b) { \ | |
2405 return __a | ~__b; } | |
2406 __ai int16x4_t vorn_s16(int16x4_t __a, int16x4_t __b) { \ | |
2407 return __a | ~__b; } | |
2408 __ai int32x2_t vorn_s32(int32x2_t __a, int32x2_t __b) { \ | |
2409 return __a | ~__b; } | |
2410 __ai int64x1_t vorn_s64(int64x1_t __a, int64x1_t __b) { \ | |
2411 return __a | ~__b; } | |
2412 __ai uint8x8_t vorn_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
2413 return __a | ~__b; } | |
2414 __ai uint16x4_t vorn_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
2415 return __a | ~__b; } | |
2416 __ai uint32x2_t vorn_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
2417 return __a | ~__b; } | |
2418 __ai uint64x1_t vorn_u64(uint64x1_t __a, uint64x1_t __b) { \ | |
2419 return __a | ~__b; } | |
2420 __ai int8x16_t vornq_s8(int8x16_t __a, int8x16_t __b) { \ | |
2421 return __a | ~__b; } | |
2422 __ai int16x8_t vornq_s16(int16x8_t __a, int16x8_t __b) { \ | |
2423 return __a | ~__b; } | |
2424 __ai int32x4_t vornq_s32(int32x4_t __a, int32x4_t __b) { \ | |
2425 return __a | ~__b; } | |
2426 __ai int64x2_t vornq_s64(int64x2_t __a, int64x2_t __b) { \ | |
2427 return __a | ~__b; } | |
2428 __ai uint8x16_t vornq_u8(uint8x16_t __a, uint8x16_t __b) { \ | |
2429 return __a | ~__b; } | |
2430 __ai uint16x8_t vornq_u16(uint16x8_t __a, uint16x8_t __b) { \ | |
2431 return __a | ~__b; } | |
2432 __ai uint32x4_t vornq_u32(uint32x4_t __a, uint32x4_t __b) { \ | |
2433 return __a | ~__b; } | |
2434 __ai uint64x2_t vornq_u64(uint64x2_t __a, uint64x2_t __b) { \ | |
2435 return __a | ~__b; } | |
2436 | |
2437 __ai int8x8_t vorr_s8(int8x8_t __a, int8x8_t __b) { \ | |
2438 return __a | __b; } | |
2439 __ai int16x4_t vorr_s16(int16x4_t __a, int16x4_t __b) { \ | |
2440 return __a | __b; } | |
2441 __ai int32x2_t vorr_s32(int32x2_t __a, int32x2_t __b) { \ | |
2442 return __a | __b; } | |
2443 __ai int64x1_t vorr_s64(int64x1_t __a, int64x1_t __b) { \ | |
2444 return __a | __b; } | |
2445 __ai uint8x8_t vorr_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
2446 return __a | __b; } | |
2447 __ai uint16x4_t vorr_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
2448 return __a | __b; } | |
2449 __ai uint32x2_t vorr_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
2450 return __a | __b; } | |
2451 __ai uint64x1_t vorr_u64(uint64x1_t __a, uint64x1_t __b) { \ | |
2452 return __a | __b; } | |
2453 __ai int8x16_t vorrq_s8(int8x16_t __a, int8x16_t __b) { \ | |
2454 return __a | __b; } | |
2455 __ai int16x8_t vorrq_s16(int16x8_t __a, int16x8_t __b) { \ | |
2456 return __a | __b; } | |
2457 __ai int32x4_t vorrq_s32(int32x4_t __a, int32x4_t __b) { \ | |
2458 return __a | __b; } | |
2459 __ai int64x2_t vorrq_s64(int64x2_t __a, int64x2_t __b) { \ | |
2460 return __a | __b; } | |
2461 __ai uint8x16_t vorrq_u8(uint8x16_t __a, uint8x16_t __b) { \ | |
2462 return __a | __b; } | |
2463 __ai uint16x8_t vorrq_u16(uint16x8_t __a, uint16x8_t __b) { \ | |
2464 return __a | __b; } | |
2465 __ai uint32x4_t vorrq_u32(uint32x4_t __a, uint32x4_t __b) { \ | |
2466 return __a | __b; } | |
2467 __ai uint64x2_t vorrq_u64(uint64x2_t __a, uint64x2_t __b) { \ | |
2468 return __a | __b; } | |
2469 | |
2470 __ai int16x4_t vpadal_s8(int16x4_t __a, int8x8_t __b) { \ | |
2471 return (int16x4_t)__builtin_neon_vpadal_v((int8x8_t)__a, __b, 1); } | |
2472 __ai int32x2_t vpadal_s16(int32x2_t __a, int16x4_t __b) { \ | |
2473 return (int32x2_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 2); } | |
2474 __ai int64x1_t vpadal_s32(int64x1_t __a, int32x2_t __b) { \ | |
2475 return (int64x1_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 3); } | |
2476 __ai uint16x4_t vpadal_u8(uint16x4_t __a, uint8x8_t __b) { \ | |
2477 return (uint16x4_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 9); } | |
2478 __ai uint32x2_t vpadal_u16(uint32x2_t __a, uint16x4_t __b) { \ | |
2479 return (uint32x2_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 10);
} | |
2480 __ai uint64x1_t vpadal_u32(uint64x1_t __a, uint32x2_t __b) { \ | |
2481 return (uint64x1_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 11);
} | |
2482 __ai int16x8_t vpadalq_s8(int16x8_t __a, int8x16_t __b) { \ | |
2483 return (int16x8_t)__builtin_neon_vpadalq_v((int8x16_t)__a, __b, 17); } | |
2484 __ai int32x4_t vpadalq_s16(int32x4_t __a, int16x8_t __b) { \ | |
2485 return (int32x4_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 18)
; } | |
2486 __ai int64x2_t vpadalq_s32(int64x2_t __a, int32x4_t __b) { \ | |
2487 return (int64x2_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 19)
; } | |
2488 __ai uint16x8_t vpadalq_u8(uint16x8_t __a, uint8x16_t __b) { \ | |
2489 return (uint16x8_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 25
); } | |
2490 __ai uint32x4_t vpadalq_u16(uint32x4_t __a, uint16x8_t __b) { \ | |
2491 return (uint32x4_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 26
); } | |
2492 __ai uint64x2_t vpadalq_u32(uint64x2_t __a, uint32x4_t __b) { \ | |
2493 return (uint64x2_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 27
); } | |
2494 | |
2495 __ai int8x8_t vpadd_s8(int8x8_t __a, int8x8_t __b) { \ | |
2496 return (int8x8_t)__builtin_neon_vpadd_v(__a, __b, 0); } | |
2497 __ai int16x4_t vpadd_s16(int16x4_t __a, int16x4_t __b) { \ | |
2498 return (int16x4_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 1); } | |
2499 __ai int32x2_t vpadd_s32(int32x2_t __a, int32x2_t __b) { \ | |
2500 return (int32x2_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 2); } | |
2501 __ai uint8x8_t vpadd_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
2502 return (uint8x8_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 8); } | |
2503 __ai uint16x4_t vpadd_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
2504 return (uint16x4_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 9); } | |
2505 __ai uint32x2_t vpadd_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
2506 return (uint32x2_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 10); } | |
2507 __ai float32x2_t vpadd_f32(float32x2_t __a, float32x2_t __b) { \ | |
2508 return (float32x2_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 4); } | |
2509 | |
2510 __ai int16x4_t vpaddl_s8(int8x8_t __a) { \ | |
2511 return (int16x4_t)__builtin_neon_vpaddl_v(__a, 1); } | |
2512 __ai int32x2_t vpaddl_s16(int16x4_t __a) { \ | |
2513 return (int32x2_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 2); } | |
2514 __ai int64x1_t vpaddl_s32(int32x2_t __a) { \ | |
2515 return (int64x1_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 3); } | |
2516 __ai uint16x4_t vpaddl_u8(uint8x8_t __a) { \ | |
2517 return (uint16x4_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 9); } | |
2518 __ai uint32x2_t vpaddl_u16(uint16x4_t __a) { \ | |
2519 return (uint32x2_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 10); } | |
2520 __ai uint64x1_t vpaddl_u32(uint32x2_t __a) { \ | |
2521 return (uint64x1_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 11); } | |
2522 __ai int16x8_t vpaddlq_s8(int8x16_t __a) { \ | |
2523 return (int16x8_t)__builtin_neon_vpaddlq_v(__a, 17); } | |
2524 __ai int32x4_t vpaddlq_s16(int16x8_t __a) { \ | |
2525 return (int32x4_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 18); } | |
2526 __ai int64x2_t vpaddlq_s32(int32x4_t __a) { \ | |
2527 return (int64x2_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 19); } | |
2528 __ai uint16x8_t vpaddlq_u8(uint8x16_t __a) { \ | |
2529 return (uint16x8_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 25); } | |
2530 __ai uint32x4_t vpaddlq_u16(uint16x8_t __a) { \ | |
2531 return (uint32x4_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 26); } | |
2532 __ai uint64x2_t vpaddlq_u32(uint32x4_t __a) { \ | |
2533 return (uint64x2_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 27); } | |
2534 | |
2535 __ai int8x8_t vpmax_s8(int8x8_t __a, int8x8_t __b) { \ | |
2536 return (int8x8_t)__builtin_neon_vpmax_v(__a, __b, 0); } | |
2537 __ai int16x4_t vpmax_s16(int16x4_t __a, int16x4_t __b) { \ | |
2538 return (int16x4_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 1); } | |
2539 __ai int32x2_t vpmax_s32(int32x2_t __a, int32x2_t __b) { \ | |
2540 return (int32x2_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 2); } | |
2541 __ai uint8x8_t vpmax_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
2542 return (uint8x8_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 8); } | |
2543 __ai uint16x4_t vpmax_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
2544 return (uint16x4_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 9); } | |
2545 __ai uint32x2_t vpmax_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
2546 return (uint32x2_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 10); } | |
2547 __ai float32x2_t vpmax_f32(float32x2_t __a, float32x2_t __b) { \ | |
2548 return (float32x2_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 4); } | |
2549 | |
2550 __ai int8x8_t vpmin_s8(int8x8_t __a, int8x8_t __b) { \ | |
2551 return (int8x8_t)__builtin_neon_vpmin_v(__a, __b, 0); } | |
2552 __ai int16x4_t vpmin_s16(int16x4_t __a, int16x4_t __b) { \ | |
2553 return (int16x4_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 1); } | |
2554 __ai int32x2_t vpmin_s32(int32x2_t __a, int32x2_t __b) { \ | |
2555 return (int32x2_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 2); } | |
2556 __ai uint8x8_t vpmin_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
2557 return (uint8x8_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 8); } | |
2558 __ai uint16x4_t vpmin_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
2559 return (uint16x4_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 9); } | |
2560 __ai uint32x2_t vpmin_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
2561 return (uint32x2_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 10); } | |
2562 __ai float32x2_t vpmin_f32(float32x2_t __a, float32x2_t __b) { \ | |
2563 return (float32x2_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 4); } | |
2564 | |
2565 __ai int8x8_t vqabs_s8(int8x8_t __a) { \ | |
2566 return (int8x8_t)__builtin_neon_vqabs_v(__a, 0); } | |
2567 __ai int16x4_t vqabs_s16(int16x4_t __a) { \ | |
2568 return (int16x4_t)__builtin_neon_vqabs_v((int8x8_t)__a, 1); } | |
2569 __ai int32x2_t vqabs_s32(int32x2_t __a) { \ | |
2570 return (int32x2_t)__builtin_neon_vqabs_v((int8x8_t)__a, 2); } | |
2571 __ai int8x16_t vqabsq_s8(int8x16_t __a) { \ | |
2572 return (int8x16_t)__builtin_neon_vqabsq_v(__a, 16); } | |
2573 __ai int16x8_t vqabsq_s16(int16x8_t __a) { \ | |
2574 return (int16x8_t)__builtin_neon_vqabsq_v((int8x16_t)__a, 17); } | |
2575 __ai int32x4_t vqabsq_s32(int32x4_t __a) { \ | |
2576 return (int32x4_t)__builtin_neon_vqabsq_v((int8x16_t)__a, 18); } | |
2577 | |
2578 __ai int8x8_t vqadd_s8(int8x8_t __a, int8x8_t __b) { \ | |
2579 return (int8x8_t)__builtin_neon_vqadd_v(__a, __b, 0); } | |
2580 __ai int16x4_t vqadd_s16(int16x4_t __a, int16x4_t __b) { \ | |
2581 return (int16x4_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 1); } | |
2582 __ai int32x2_t vqadd_s32(int32x2_t __a, int32x2_t __b) { \ | |
2583 return (int32x2_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 2); } | |
2584 __ai int64x1_t vqadd_s64(int64x1_t __a, int64x1_t __b) { \ | |
2585 return (int64x1_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 3); } | |
2586 __ai uint8x8_t vqadd_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
2587 return (uint8x8_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 8); } | |
2588 __ai uint16x4_t vqadd_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
2589 return (uint16x4_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 9); } | |
2590 __ai uint32x2_t vqadd_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
2591 return (uint32x2_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 10); } | |
2592 __ai uint64x1_t vqadd_u64(uint64x1_t __a, uint64x1_t __b) { \ | |
2593 return (uint64x1_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 11); } | |
2594 __ai int8x16_t vqaddq_s8(int8x16_t __a, int8x16_t __b) { \ | |
2595 return (int8x16_t)__builtin_neon_vqaddq_v(__a, __b, 16); } | |
2596 __ai int16x8_t vqaddq_s16(int16x8_t __a, int16x8_t __b) { \ | |
2597 return (int16x8_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 17);
} | |
2598 __ai int32x4_t vqaddq_s32(int32x4_t __a, int32x4_t __b) { \ | |
2599 return (int32x4_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 18);
} | |
2600 __ai int64x2_t vqaddq_s64(int64x2_t __a, int64x2_t __b) { \ | |
2601 return (int64x2_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 19);
} | |
2602 __ai uint8x16_t vqaddq_u8(uint8x16_t __a, uint8x16_t __b) { \ | |
2603 return (uint8x16_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 24)
; } | |
2604 __ai uint16x8_t vqaddq_u16(uint16x8_t __a, uint16x8_t __b) { \ | |
2605 return (uint16x8_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 25)
; } | |
2606 __ai uint32x4_t vqaddq_u32(uint32x4_t __a, uint32x4_t __b) { \ | |
2607 return (uint32x4_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 26)
; } | |
2608 __ai uint64x2_t vqaddq_u64(uint64x2_t __a, uint64x2_t __b) { \ | |
2609 return (uint64x2_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 27)
; } | |
2610 | |
2611 __ai int32x4_t vqdmlal_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) { \ | |
2612 return (int32x4_t)__builtin_neon_vqdmlal_v((int8x16_t)__a, (int8x8_t)__b, (int
8x8_t)__c, 18); } | |
2613 __ai int64x2_t vqdmlal_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) { \ | |
2614 return (int64x2_t)__builtin_neon_vqdmlal_v((int8x16_t)__a, (int8x8_t)__b, (int
8x8_t)__c, 19); } | |
2615 | |
2616 #define vqdmlal_lane_s16(a, b, c, __d) __extension__ ({ \ | |
2617 int32x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \ | |
2618 vqdmlal_s16(__a, __b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d));
}) | |
2619 #define vqdmlal_lane_s32(a, b, c, __d) __extension__ ({ \ | |
2620 int64x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \ | |
2621 vqdmlal_s32(__a, __b, __builtin_shufflevector(__c, __c, __d, __d)); }) | |
2622 | |
2623 __ai int32x4_t vqdmlal_n_s16(int32x4_t __a, int16x4_t __b, int16_t __c) { \ | |
2624 return (int32x4_t)__builtin_neon_vqdmlal_v((int8x16_t)__a, (int8x8_t)__b, (int
8x8_t)(int16x4_t){ __c, __c, __c, __c }, 18); } | |
2625 __ai int64x2_t vqdmlal_n_s32(int64x2_t __a, int32x2_t __b, int32_t __c) { \ | |
2626 return (int64x2_t)__builtin_neon_vqdmlal_v((int8x16_t)__a, (int8x8_t)__b, (int
8x8_t)(int32x2_t){ __c, __c }, 19); } | |
2627 | |
2628 __ai int32x4_t vqdmlsl_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) { \ | |
2629 return (int32x4_t)__builtin_neon_vqdmlsl_v((int8x16_t)__a, (int8x8_t)__b, (int
8x8_t)__c, 18); } | |
2630 __ai int64x2_t vqdmlsl_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) { \ | |
2631 return (int64x2_t)__builtin_neon_vqdmlsl_v((int8x16_t)__a, (int8x8_t)__b, (int
8x8_t)__c, 19); } | |
2632 | |
2633 #define vqdmlsl_lane_s16(a, b, c, __d) __extension__ ({ \ | |
2634 int32x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \ | |
2635 vqdmlsl_s16(__a, __b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d));
}) | |
2636 #define vqdmlsl_lane_s32(a, b, c, __d) __extension__ ({ \ | |
2637 int64x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \ | |
2638 vqdmlsl_s32(__a, __b, __builtin_shufflevector(__c, __c, __d, __d)); }) | |
2639 | |
2640 __ai int32x4_t vqdmlsl_n_s16(int32x4_t __a, int16x4_t __b, int16_t __c) { \ | |
2641 return (int32x4_t)__builtin_neon_vqdmlsl_v((int8x16_t)__a, (int8x8_t)__b, (int
8x8_t)(int16x4_t){ __c, __c, __c, __c }, 18); } | |
2642 __ai int64x2_t vqdmlsl_n_s32(int64x2_t __a, int32x2_t __b, int32_t __c) { \ | |
2643 return (int64x2_t)__builtin_neon_vqdmlsl_v((int8x16_t)__a, (int8x8_t)__b, (int
8x8_t)(int32x2_t){ __c, __c }, 19); } | |
2644 | |
2645 __ai int16x4_t vqdmulh_s16(int16x4_t __a, int16x4_t __b) { \ | |
2646 return (int16x4_t)__builtin_neon_vqdmulh_v((int8x8_t)__a, (int8x8_t)__b, 1); } | |
2647 __ai int32x2_t vqdmulh_s32(int32x2_t __a, int32x2_t __b) { \ | |
2648 return (int32x2_t)__builtin_neon_vqdmulh_v((int8x8_t)__a, (int8x8_t)__b, 2); } | |
2649 __ai int16x8_t vqdmulhq_s16(int16x8_t __a, int16x8_t __b) { \ | |
2650 return (int16x8_t)__builtin_neon_vqdmulhq_v((int8x16_t)__a, (int8x16_t)__b, 17
); } | |
2651 __ai int32x4_t vqdmulhq_s32(int32x4_t __a, int32x4_t __b) { \ | |
2652 return (int32x4_t)__builtin_neon_vqdmulhq_v((int8x16_t)__a, (int8x16_t)__b, 18
); } | |
2653 | |
2654 #define vqdmulh_lane_s16(a, b, __c) __extension__ ({ \ | |
2655 int16x4_t __a = (a); int16x4_t __b = (b); \ | |
2656 vqdmulh_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) | |
2657 #define vqdmulh_lane_s32(a, b, __c) __extension__ ({ \ | |
2658 int32x2_t __a = (a); int32x2_t __b = (b); \ | |
2659 vqdmulh_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) | |
2660 #define vqdmulhq_lane_s16(a, b, __c) __extension__ ({ \ | |
2661 int16x8_t __a = (a); int16x4_t __b = (b); \ | |
2662 vqdmulhq_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, _
_c, __c, __c)); }) | |
2663 #define vqdmulhq_lane_s32(a, b, __c) __extension__ ({ \ | |
2664 int32x4_t __a = (a); int32x2_t __b = (b); \ | |
2665 vqdmulhq_s32(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) | |
2666 | |
2667 __ai int16x4_t vqdmulh_n_s16(int16x4_t __a, int16_t __b) { \ | |
2668 return (int16x4_t)__builtin_neon_vqdmulh_v((int8x8_t)__a, (int8x8_t)(int16x4_t
){ __b, __b, __b, __b }, 1); } | |
2669 __ai int32x2_t vqdmulh_n_s32(int32x2_t __a, int32_t __b) { \ | |
2670 return (int32x2_t)__builtin_neon_vqdmulh_v((int8x8_t)__a, (int8x8_t)(int32x2_t
){ __b, __b }, 2); } | |
2671 __ai int16x8_t vqdmulhq_n_s16(int16x8_t __a, int16_t __b) { \ | |
2672 return (int16x8_t)__builtin_neon_vqdmulhq_v((int8x16_t)__a, (int8x16_t)(int16x
8_t){ __b, __b, __b, __b, __b, __b, __b, __b }, 17); } | |
2673 __ai int32x4_t vqdmulhq_n_s32(int32x4_t __a, int32_t __b) { \ | |
2674 return (int32x4_t)__builtin_neon_vqdmulhq_v((int8x16_t)__a, (int8x16_t)(int32x
4_t){ __b, __b, __b, __b }, 18); } | |
2675 | |
2676 __ai int32x4_t vqdmull_s16(int16x4_t __a, int16x4_t __b) { \ | |
2677 return (int32x4_t)__builtin_neon_vqdmull_v((int8x8_t)__a, (int8x8_t)__b, 18);
} | |
2678 __ai int64x2_t vqdmull_s32(int32x2_t __a, int32x2_t __b) { \ | |
2679 return (int64x2_t)__builtin_neon_vqdmull_v((int8x8_t)__a, (int8x8_t)__b, 19);
} | |
2680 | |
2681 #define vqdmull_lane_s16(a, b, __c) __extension__ ({ \ | |
2682 int16x4_t __a = (a); int16x4_t __b = (b); \ | |
2683 vqdmull_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) | |
2684 #define vqdmull_lane_s32(a, b, __c) __extension__ ({ \ | |
2685 int32x2_t __a = (a); int32x2_t __b = (b); \ | |
2686 vqdmull_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) | |
2687 | |
2688 __ai int32x4_t vqdmull_n_s16(int16x4_t __a, int16_t __b) { \ | |
2689 return (int32x4_t)__builtin_neon_vqdmull_v((int8x8_t)__a, (int8x8_t)(int16x4_t
){ __b, __b, __b, __b }, 18); } | |
2690 __ai int64x2_t vqdmull_n_s32(int32x2_t __a, int32_t __b) { \ | |
2691 return (int64x2_t)__builtin_neon_vqdmull_v((int8x8_t)__a, (int8x8_t)(int32x2_t
){ __b, __b }, 19); } | |
2692 | |
2693 __ai int8x8_t vqmovn_s16(int16x8_t __a) { \ | |
2694 return (int8x8_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 0); } | |
2695 __ai int16x4_t vqmovn_s32(int32x4_t __a) { \ | |
2696 return (int16x4_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 1); } | |
2697 __ai int32x2_t vqmovn_s64(int64x2_t __a) { \ | |
2698 return (int32x2_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 2); } | |
2699 __ai uint8x8_t vqmovn_u16(uint16x8_t __a) { \ | |
2700 return (uint8x8_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 8); } | |
2701 __ai uint16x4_t vqmovn_u32(uint32x4_t __a) { \ | |
2702 return (uint16x4_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 9); } | |
2703 __ai uint32x2_t vqmovn_u64(uint64x2_t __a) { \ | |
2704 return (uint32x2_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 10); } | |
2705 | |
2706 __ai uint8x8_t vqmovun_s16(int16x8_t __a) { \ | |
2707 return (uint8x8_t)__builtin_neon_vqmovun_v((int8x16_t)__a, 8); } | |
2708 __ai uint16x4_t vqmovun_s32(int32x4_t __a) { \ | |
2709 return (uint16x4_t)__builtin_neon_vqmovun_v((int8x16_t)__a, 9); } | |
2710 __ai uint32x2_t vqmovun_s64(int64x2_t __a) { \ | |
2711 return (uint32x2_t)__builtin_neon_vqmovun_v((int8x16_t)__a, 10); } | |
2712 | |
2713 __ai int8x8_t vqneg_s8(int8x8_t __a) { \ | |
2714 return (int8x8_t)__builtin_neon_vqneg_v(__a, 0); } | |
2715 __ai int16x4_t vqneg_s16(int16x4_t __a) { \ | |
2716 return (int16x4_t)__builtin_neon_vqneg_v((int8x8_t)__a, 1); } | |
2717 __ai int32x2_t vqneg_s32(int32x2_t __a) { \ | |
2718 return (int32x2_t)__builtin_neon_vqneg_v((int8x8_t)__a, 2); } | |
2719 __ai int8x16_t vqnegq_s8(int8x16_t __a) { \ | |
2720 return (int8x16_t)__builtin_neon_vqnegq_v(__a, 16); } | |
2721 __ai int16x8_t vqnegq_s16(int16x8_t __a) { \ | |
2722 return (int16x8_t)__builtin_neon_vqnegq_v((int8x16_t)__a, 17); } | |
2723 __ai int32x4_t vqnegq_s32(int32x4_t __a) { \ | |
2724 return (int32x4_t)__builtin_neon_vqnegq_v((int8x16_t)__a, 18); } | |
2725 | |
2726 __ai int16x4_t vqrdmulh_s16(int16x4_t __a, int16x4_t __b) { \ | |
2727 return (int16x4_t)__builtin_neon_vqrdmulh_v((int8x8_t)__a, (int8x8_t)__b, 1);
} | |
2728 __ai int32x2_t vqrdmulh_s32(int32x2_t __a, int32x2_t __b) { \ | |
2729 return (int32x2_t)__builtin_neon_vqrdmulh_v((int8x8_t)__a, (int8x8_t)__b, 2);
} | |
2730 __ai int16x8_t vqrdmulhq_s16(int16x8_t __a, int16x8_t __b) { \ | |
2731 return (int16x8_t)__builtin_neon_vqrdmulhq_v((int8x16_t)__a, (int8x16_t)__b, 1
7); } | |
2732 __ai int32x4_t vqrdmulhq_s32(int32x4_t __a, int32x4_t __b) { \ | |
2733 return (int32x4_t)__builtin_neon_vqrdmulhq_v((int8x16_t)__a, (int8x16_t)__b, 1
8); } | |
2734 | |
2735 #define vqrdmulh_lane_s16(a, b, __c) __extension__ ({ \ | |
2736 int16x4_t __a = (a); int16x4_t __b = (b); \ | |
2737 vqrdmulh_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) | |
2738 #define vqrdmulh_lane_s32(a, b, __c) __extension__ ({ \ | |
2739 int32x2_t __a = (a); int32x2_t __b = (b); \ | |
2740 vqrdmulh_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) | |
2741 #define vqrdmulhq_lane_s16(a, b, __c) __extension__ ({ \ | |
2742 int16x8_t __a = (a); int16x4_t __b = (b); \ | |
2743 vqrdmulhq_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c,
__c, __c, __c)); }) | |
2744 #define vqrdmulhq_lane_s32(a, b, __c) __extension__ ({ \ | |
2745 int32x4_t __a = (a); int32x2_t __b = (b); \ | |
2746 vqrdmulhq_s32(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) | |
2747 | |
2748 __ai int16x4_t vqrdmulh_n_s16(int16x4_t __a, int16_t __b) { \ | |
2749 return (int16x4_t)__builtin_neon_vqrdmulh_v((int8x8_t)__a, (int8x8_t)(int16x4_
t){ __b, __b, __b, __b }, 1); } | |
2750 __ai int32x2_t vqrdmulh_n_s32(int32x2_t __a, int32_t __b) { \ | |
2751 return (int32x2_t)__builtin_neon_vqrdmulh_v((int8x8_t)__a, (int8x8_t)(int32x2_
t){ __b, __b }, 2); } | |
2752 __ai int16x8_t vqrdmulhq_n_s16(int16x8_t __a, int16_t __b) { \ | |
2753 return (int16x8_t)__builtin_neon_vqrdmulhq_v((int8x16_t)__a, (int8x16_t)(int16
x8_t){ __b, __b, __b, __b, __b, __b, __b, __b }, 17); } | |
2754 __ai int32x4_t vqrdmulhq_n_s32(int32x4_t __a, int32_t __b) { \ | |
2755 return (int32x4_t)__builtin_neon_vqrdmulhq_v((int8x16_t)__a, (int8x16_t)(int32
x4_t){ __b, __b, __b, __b }, 18); } | |
2756 | |
2757 __ai int8x8_t vqrshl_s8(int8x8_t __a, int8x8_t __b) { \ | |
2758 return (int8x8_t)__builtin_neon_vqrshl_v(__a, __b, 0); } | |
2759 __ai int16x4_t vqrshl_s16(int16x4_t __a, int16x4_t __b) { \ | |
2760 return (int16x4_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 1); } | |
2761 __ai int32x2_t vqrshl_s32(int32x2_t __a, int32x2_t __b) { \ | |
2762 return (int32x2_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 2); } | |
2763 __ai int64x1_t vqrshl_s64(int64x1_t __a, int64x1_t __b) { \ | |
2764 return (int64x1_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 3); } | |
2765 __ai uint8x8_t vqrshl_u8(uint8x8_t __a, int8x8_t __b) { \ | |
2766 return (uint8x8_t)__builtin_neon_vqrshl_v((int8x8_t)__a, __b, 8); } | |
2767 __ai uint16x4_t vqrshl_u16(uint16x4_t __a, int16x4_t __b) { \ | |
2768 return (uint16x4_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 9); } | |
2769 __ai uint32x2_t vqrshl_u32(uint32x2_t __a, int32x2_t __b) { \ | |
2770 return (uint32x2_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 10);
} | |
2771 __ai uint64x1_t vqrshl_u64(uint64x1_t __a, int64x1_t __b) { \ | |
2772 return (uint64x1_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 11);
} | |
2773 __ai int8x16_t vqrshlq_s8(int8x16_t __a, int8x16_t __b) { \ | |
2774 return (int8x16_t)__builtin_neon_vqrshlq_v(__a, __b, 16); } | |
2775 __ai int16x8_t vqrshlq_s16(int16x8_t __a, int16x8_t __b) { \ | |
2776 return (int16x8_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 17)
; } | |
2777 __ai int32x4_t vqrshlq_s32(int32x4_t __a, int32x4_t __b) { \ | |
2778 return (int32x4_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 18)
; } | |
2779 __ai int64x2_t vqrshlq_s64(int64x2_t __a, int64x2_t __b) { \ | |
2780 return (int64x2_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 19)
; } | |
2781 __ai uint8x16_t vqrshlq_u8(uint8x16_t __a, int8x16_t __b) { \ | |
2782 return (uint8x16_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, __b, 24); } | |
2783 __ai uint16x8_t vqrshlq_u16(uint16x8_t __a, int16x8_t __b) { \ | |
2784 return (uint16x8_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 25
); } | |
2785 __ai uint32x4_t vqrshlq_u32(uint32x4_t __a, int32x4_t __b) { \ | |
2786 return (uint32x4_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 26
); } | |
2787 __ai uint64x2_t vqrshlq_u64(uint64x2_t __a, int64x2_t __b) { \ | |
2788 return (uint64x2_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 27
); } | |
2789 | |
2790 #define vqrshrn_n_s16(a, __b) __extension__ ({ \ | |
2791 int16x8_t __a = (a); \ | |
2792 (int8x8_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 0); }) | |
2793 #define vqrshrn_n_s32(a, __b) __extension__ ({ \ | |
2794 int32x4_t __a = (a); \ | |
2795 (int16x4_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 1); }) | |
2796 #define vqrshrn_n_s64(a, __b) __extension__ ({ \ | |
2797 int64x2_t __a = (a); \ | |
2798 (int32x2_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 2); }) | |
2799 #define vqrshrn_n_u16(a, __b) __extension__ ({ \ | |
2800 uint16x8_t __a = (a); \ | |
2801 (uint8x8_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 8); }) | |
2802 #define vqrshrn_n_u32(a, __b) __extension__ ({ \ | |
2803 uint32x4_t __a = (a); \ | |
2804 (uint16x4_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 9); }) | |
2805 #define vqrshrn_n_u64(a, __b) __extension__ ({ \ | |
2806 uint64x2_t __a = (a); \ | |
2807 (uint32x2_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 10); }) | |
2808 | |
2809 #define vqrshrun_n_s16(a, __b) __extension__ ({ \ | |
2810 int16x8_t __a = (a); \ | |
2811 (uint8x8_t)__builtin_neon_vqrshrun_n_v((int8x16_t)__a, __b, 8); }) | |
2812 #define vqrshrun_n_s32(a, __b) __extension__ ({ \ | |
2813 int32x4_t __a = (a); \ | |
2814 (uint16x4_t)__builtin_neon_vqrshrun_n_v((int8x16_t)__a, __b, 9); }) | |
2815 #define vqrshrun_n_s64(a, __b) __extension__ ({ \ | |
2816 int64x2_t __a = (a); \ | |
2817 (uint32x2_t)__builtin_neon_vqrshrun_n_v((int8x16_t)__a, __b, 10); }) | |
2818 | |
2819 __ai int8x8_t vqshl_s8(int8x8_t __a, int8x8_t __b) { \ | |
2820 return (int8x8_t)__builtin_neon_vqshl_v(__a, __b, 0); } | |
2821 __ai int16x4_t vqshl_s16(int16x4_t __a, int16x4_t __b) { \ | |
2822 return (int16x4_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 1); } | |
2823 __ai int32x2_t vqshl_s32(int32x2_t __a, int32x2_t __b) { \ | |
2824 return (int32x2_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 2); } | |
2825 __ai int64x1_t vqshl_s64(int64x1_t __a, int64x1_t __b) { \ | |
2826 return (int64x1_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 3); } | |
2827 __ai uint8x8_t vqshl_u8(uint8x8_t __a, int8x8_t __b) { \ | |
2828 return (uint8x8_t)__builtin_neon_vqshl_v((int8x8_t)__a, __b, 8); } | |
2829 __ai uint16x4_t vqshl_u16(uint16x4_t __a, int16x4_t __b) { \ | |
2830 return (uint16x4_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 9); } | |
2831 __ai uint32x2_t vqshl_u32(uint32x2_t __a, int32x2_t __b) { \ | |
2832 return (uint32x2_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 10); } | |
2833 __ai uint64x1_t vqshl_u64(uint64x1_t __a, int64x1_t __b) { \ | |
2834 return (uint64x1_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 11); } | |
2835 __ai int8x16_t vqshlq_s8(int8x16_t __a, int8x16_t __b) { \ | |
2836 return (int8x16_t)__builtin_neon_vqshlq_v(__a, __b, 16); } | |
2837 __ai int16x8_t vqshlq_s16(int16x8_t __a, int16x8_t __b) { \ | |
2838 return (int16x8_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 17);
} | |
2839 __ai int32x4_t vqshlq_s32(int32x4_t __a, int32x4_t __b) { \ | |
2840 return (int32x4_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 18);
} | |
2841 __ai int64x2_t vqshlq_s64(int64x2_t __a, int64x2_t __b) { \ | |
2842 return (int64x2_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 19);
} | |
2843 __ai uint8x16_t vqshlq_u8(uint8x16_t __a, int8x16_t __b) { \ | |
2844 return (uint8x16_t)__builtin_neon_vqshlq_v((int8x16_t)__a, __b, 24); } | |
2845 __ai uint16x8_t vqshlq_u16(uint16x8_t __a, int16x8_t __b) { \ | |
2846 return (uint16x8_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 25)
; } | |
2847 __ai uint32x4_t vqshlq_u32(uint32x4_t __a, int32x4_t __b) { \ | |
2848 return (uint32x4_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 26)
; } | |
2849 __ai uint64x2_t vqshlq_u64(uint64x2_t __a, int64x2_t __b) { \ | |
2850 return (uint64x2_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 27)
; } | |
2851 | |
2852 #define vqshlu_n_s8(a, __b) __extension__ ({ \ | |
2853 int8x8_t __a = (a); \ | |
2854 (uint8x8_t)__builtin_neon_vqshlu_n_v(__a, __b, 8); }) | |
2855 #define vqshlu_n_s16(a, __b) __extension__ ({ \ | |
2856 int16x4_t __a = (a); \ | |
2857 (uint16x4_t)__builtin_neon_vqshlu_n_v((int8x8_t)__a, __b, 9); }) | |
2858 #define vqshlu_n_s32(a, __b) __extension__ ({ \ | |
2859 int32x2_t __a = (a); \ | |
2860 (uint32x2_t)__builtin_neon_vqshlu_n_v((int8x8_t)__a, __b, 10); }) | |
2861 #define vqshlu_n_s64(a, __b) __extension__ ({ \ | |
2862 int64x1_t __a = (a); \ | |
2863 (uint64x1_t)__builtin_neon_vqshlu_n_v((int8x8_t)__a, __b, 11); }) | |
2864 #define vqshluq_n_s8(a, __b) __extension__ ({ \ | |
2865 int8x16_t __a = (a); \ | |
2866 (uint8x16_t)__builtin_neon_vqshluq_n_v(__a, __b, 24); }) | |
2867 #define vqshluq_n_s16(a, __b) __extension__ ({ \ | |
2868 int16x8_t __a = (a); \ | |
2869 (uint16x8_t)__builtin_neon_vqshluq_n_v((int8x16_t)__a, __b, 25); }) | |
2870 #define vqshluq_n_s32(a, __b) __extension__ ({ \ | |
2871 int32x4_t __a = (a); \ | |
2872 (uint32x4_t)__builtin_neon_vqshluq_n_v((int8x16_t)__a, __b, 26); }) | |
2873 #define vqshluq_n_s64(a, __b) __extension__ ({ \ | |
2874 int64x2_t __a = (a); \ | |
2875 (uint64x2_t)__builtin_neon_vqshluq_n_v((int8x16_t)__a, __b, 27); }) | |
2876 | |
2877 #define vqshl_n_s8(a, __b) __extension__ ({ \ | |
2878 int8x8_t __a = (a); \ | |
2879 (int8x8_t)__builtin_neon_vqshl_n_v(__a, __b, 0); }) | |
2880 #define vqshl_n_s16(a, __b) __extension__ ({ \ | |
2881 int16x4_t __a = (a); \ | |
2882 (int16x4_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 1); }) | |
2883 #define vqshl_n_s32(a, __b) __extension__ ({ \ | |
2884 int32x2_t __a = (a); \ | |
2885 (int32x2_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 2); }) | |
2886 #define vqshl_n_s64(a, __b) __extension__ ({ \ | |
2887 int64x1_t __a = (a); \ | |
2888 (int64x1_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 3); }) | |
2889 #define vqshl_n_u8(a, __b) __extension__ ({ \ | |
2890 uint8x8_t __a = (a); \ | |
2891 (uint8x8_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 8); }) | |
2892 #define vqshl_n_u16(a, __b) __extension__ ({ \ | |
2893 uint16x4_t __a = (a); \ | |
2894 (uint16x4_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 9); }) | |
2895 #define vqshl_n_u32(a, __b) __extension__ ({ \ | |
2896 uint32x2_t __a = (a); \ | |
2897 (uint32x2_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 10); }) | |
2898 #define vqshl_n_u64(a, __b) __extension__ ({ \ | |
2899 uint64x1_t __a = (a); \ | |
2900 (uint64x1_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 11); }) | |
2901 #define vqshlq_n_s8(a, __b) __extension__ ({ \ | |
2902 int8x16_t __a = (a); \ | |
2903 (int8x16_t)__builtin_neon_vqshlq_n_v(__a, __b, 16); }) | |
2904 #define vqshlq_n_s16(a, __b) __extension__ ({ \ | |
2905 int16x8_t __a = (a); \ | |
2906 (int16x8_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 17); }) | |
2907 #define vqshlq_n_s32(a, __b) __extension__ ({ \ | |
2908 int32x4_t __a = (a); \ | |
2909 (int32x4_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 18); }) | |
2910 #define vqshlq_n_s64(a, __b) __extension__ ({ \ | |
2911 int64x2_t __a = (a); \ | |
2912 (int64x2_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 19); }) | |
2913 #define vqshlq_n_u8(a, __b) __extension__ ({ \ | |
2914 uint8x16_t __a = (a); \ | |
2915 (uint8x16_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 24); }) | |
2916 #define vqshlq_n_u16(a, __b) __extension__ ({ \ | |
2917 uint16x8_t __a = (a); \ | |
2918 (uint16x8_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 25); }) | |
2919 #define vqshlq_n_u32(a, __b) __extension__ ({ \ | |
2920 uint32x4_t __a = (a); \ | |
2921 (uint32x4_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 26); }) | |
2922 #define vqshlq_n_u64(a, __b) __extension__ ({ \ | |
2923 uint64x2_t __a = (a); \ | |
2924 (uint64x2_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 27); }) | |
2925 | |
2926 #define vqshrn_n_s16(a, __b) __extension__ ({ \ | |
2927 int16x8_t __a = (a); \ | |
2928 (int8x8_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 0); }) | |
2929 #define vqshrn_n_s32(a, __b) __extension__ ({ \ | |
2930 int32x4_t __a = (a); \ | |
2931 (int16x4_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 1); }) | |
2932 #define vqshrn_n_s64(a, __b) __extension__ ({ \ | |
2933 int64x2_t __a = (a); \ | |
2934 (int32x2_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 2); }) | |
2935 #define vqshrn_n_u16(a, __b) __extension__ ({ \ | |
2936 uint16x8_t __a = (a); \ | |
2937 (uint8x8_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 8); }) | |
2938 #define vqshrn_n_u32(a, __b) __extension__ ({ \ | |
2939 uint32x4_t __a = (a); \ | |
2940 (uint16x4_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 9); }) | |
2941 #define vqshrn_n_u64(a, __b) __extension__ ({ \ | |
2942 uint64x2_t __a = (a); \ | |
2943 (uint32x2_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 10); }) | |
2944 | |
2945 #define vqshrun_n_s16(a, __b) __extension__ ({ \ | |
2946 int16x8_t __a = (a); \ | |
2947 (uint8x8_t)__builtin_neon_vqshrun_n_v((int8x16_t)__a, __b, 8); }) | |
2948 #define vqshrun_n_s32(a, __b) __extension__ ({ \ | |
2949 int32x4_t __a = (a); \ | |
2950 (uint16x4_t)__builtin_neon_vqshrun_n_v((int8x16_t)__a, __b, 9); }) | |
2951 #define vqshrun_n_s64(a, __b) __extension__ ({ \ | |
2952 int64x2_t __a = (a); \ | |
2953 (uint32x2_t)__builtin_neon_vqshrun_n_v((int8x16_t)__a, __b, 10); }) | |
2954 | |
2955 __ai int8x8_t vqsub_s8(int8x8_t __a, int8x8_t __b) { \ | |
2956 return (int8x8_t)__builtin_neon_vqsub_v(__a, __b, 0); } | |
2957 __ai int16x4_t vqsub_s16(int16x4_t __a, int16x4_t __b) { \ | |
2958 return (int16x4_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 1); } | |
2959 __ai int32x2_t vqsub_s32(int32x2_t __a, int32x2_t __b) { \ | |
2960 return (int32x2_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 2); } | |
2961 __ai int64x1_t vqsub_s64(int64x1_t __a, int64x1_t __b) { \ | |
2962 return (int64x1_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 3); } | |
2963 __ai uint8x8_t vqsub_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
2964 return (uint8x8_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 8); } | |
2965 __ai uint16x4_t vqsub_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
2966 return (uint16x4_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 9); } | |
2967 __ai uint32x2_t vqsub_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
2968 return (uint32x2_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 10); } | |
2969 __ai uint64x1_t vqsub_u64(uint64x1_t __a, uint64x1_t __b) { \ | |
2970 return (uint64x1_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 11); } | |
2971 __ai int8x16_t vqsubq_s8(int8x16_t __a, int8x16_t __b) { \ | |
2972 return (int8x16_t)__builtin_neon_vqsubq_v(__a, __b, 16); } | |
2973 __ai int16x8_t vqsubq_s16(int16x8_t __a, int16x8_t __b) { \ | |
2974 return (int16x8_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 17);
} | |
2975 __ai int32x4_t vqsubq_s32(int32x4_t __a, int32x4_t __b) { \ | |
2976 return (int32x4_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 18);
} | |
2977 __ai int64x2_t vqsubq_s64(int64x2_t __a, int64x2_t __b) { \ | |
2978 return (int64x2_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 19);
} | |
2979 __ai uint8x16_t vqsubq_u8(uint8x16_t __a, uint8x16_t __b) { \ | |
2980 return (uint8x16_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 24)
; } | |
2981 __ai uint16x8_t vqsubq_u16(uint16x8_t __a, uint16x8_t __b) { \ | |
2982 return (uint16x8_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 25)
; } | |
2983 __ai uint32x4_t vqsubq_u32(uint32x4_t __a, uint32x4_t __b) { \ | |
2984 return (uint32x4_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 26)
; } | |
2985 __ai uint64x2_t vqsubq_u64(uint64x2_t __a, uint64x2_t __b) { \ | |
2986 return (uint64x2_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 27)
; } | |
2987 | |
2988 __ai int8x8_t vraddhn_s16(int16x8_t __a, int16x8_t __b) { \ | |
2989 return (int8x8_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 0);
} | |
2990 __ai int16x4_t vraddhn_s32(int32x4_t __a, int32x4_t __b) { \ | |
2991 return (int16x4_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 1);
} | |
2992 __ai int32x2_t vraddhn_s64(int64x2_t __a, int64x2_t __b) { \ | |
2993 return (int32x2_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 2);
} | |
2994 __ai uint8x8_t vraddhn_u16(uint16x8_t __a, uint16x8_t __b) { \ | |
2995 return (uint8x8_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 8);
} | |
2996 __ai uint16x4_t vraddhn_u32(uint32x4_t __a, uint32x4_t __b) { \ | |
2997 return (uint16x4_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 9)
; } | |
2998 __ai uint32x2_t vraddhn_u64(uint64x2_t __a, uint64x2_t __b) { \ | |
2999 return (uint32x2_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 10
); } | |
3000 | |
3001 __ai float32x2_t vrecpe_f32(float32x2_t __a) { \ | |
3002 return (float32x2_t)__builtin_neon_vrecpe_v((int8x8_t)__a, 4); } | |
3003 __ai uint32x2_t vrecpe_u32(uint32x2_t __a) { \ | |
3004 return (uint32x2_t)__builtin_neon_vrecpe_v((int8x8_t)__a, 10); } | |
3005 __ai float32x4_t vrecpeq_f32(float32x4_t __a) { \ | |
3006 return (float32x4_t)__builtin_neon_vrecpeq_v((int8x16_t)__a, 20); } | |
3007 __ai uint32x4_t vrecpeq_u32(uint32x4_t __a) { \ | |
3008 return (uint32x4_t)__builtin_neon_vrecpeq_v((int8x16_t)__a, 26); } | |
3009 | |
3010 __ai float32x2_t vrecps_f32(float32x2_t __a, float32x2_t __b) { \ | |
3011 return (float32x2_t)__builtin_neon_vrecps_v((int8x8_t)__a, (int8x8_t)__b, 4);
} | |
3012 __ai float32x4_t vrecpsq_f32(float32x4_t __a, float32x4_t __b) { \ | |
3013 return (float32x4_t)__builtin_neon_vrecpsq_v((int8x16_t)__a, (int8x16_t)__b, 2
0); } | |
3014 | |
3015 __ai int8x8_t vreinterpret_s8_s16(int16x4_t __a) { \ | |
3016 return (int8x8_t)__a; } | |
3017 __ai int8x8_t vreinterpret_s8_s32(int32x2_t __a) { \ | |
3018 return (int8x8_t)__a; } | |
3019 __ai int8x8_t vreinterpret_s8_s64(int64x1_t __a) { \ | |
3020 return (int8x8_t)__a; } | |
3021 __ai int8x8_t vreinterpret_s8_u8(uint8x8_t __a) { \ | |
3022 return (int8x8_t)__a; } | |
3023 __ai int8x8_t vreinterpret_s8_u16(uint16x4_t __a) { \ | |
3024 return (int8x8_t)__a; } | |
3025 __ai int8x8_t vreinterpret_s8_u32(uint32x2_t __a) { \ | |
3026 return (int8x8_t)__a; } | |
3027 __ai int8x8_t vreinterpret_s8_u64(uint64x1_t __a) { \ | |
3028 return (int8x8_t)__a; } | |
3029 __ai int8x8_t vreinterpret_s8_f16(float16x4_t __a) { \ | |
3030 return (int8x8_t)__a; } | |
3031 __ai int8x8_t vreinterpret_s8_f32(float32x2_t __a) { \ | |
3032 return (int8x8_t)__a; } | |
3033 __ai int8x8_t vreinterpret_s8_p8(poly8x8_t __a) { \ | |
3034 return (int8x8_t)__a; } | |
3035 __ai int8x8_t vreinterpret_s8_p16(poly16x4_t __a) { \ | |
3036 return (int8x8_t)__a; } | |
3037 __ai int16x4_t vreinterpret_s16_s8(int8x8_t __a) { \ | |
3038 return (int16x4_t)__a; } | |
3039 __ai int16x4_t vreinterpret_s16_s32(int32x2_t __a) { \ | |
3040 return (int16x4_t)__a; } | |
3041 __ai int16x4_t vreinterpret_s16_s64(int64x1_t __a) { \ | |
3042 return (int16x4_t)__a; } | |
3043 __ai int16x4_t vreinterpret_s16_u8(uint8x8_t __a) { \ | |
3044 return (int16x4_t)__a; } | |
3045 __ai int16x4_t vreinterpret_s16_u16(uint16x4_t __a) { \ | |
3046 return (int16x4_t)__a; } | |
3047 __ai int16x4_t vreinterpret_s16_u32(uint32x2_t __a) { \ | |
3048 return (int16x4_t)__a; } | |
3049 __ai int16x4_t vreinterpret_s16_u64(uint64x1_t __a) { \ | |
3050 return (int16x4_t)__a; } | |
3051 __ai int16x4_t vreinterpret_s16_f16(float16x4_t __a) { \ | |
3052 return (int16x4_t)__a; } | |
3053 __ai int16x4_t vreinterpret_s16_f32(float32x2_t __a) { \ | |
3054 return (int16x4_t)__a; } | |
3055 __ai int16x4_t vreinterpret_s16_p8(poly8x8_t __a) { \ | |
3056 return (int16x4_t)__a; } | |
3057 __ai int16x4_t vreinterpret_s16_p16(poly16x4_t __a) { \ | |
3058 return (int16x4_t)__a; } | |
3059 __ai int32x2_t vreinterpret_s32_s8(int8x8_t __a) { \ | |
3060 return (int32x2_t)__a; } | |
3061 __ai int32x2_t vreinterpret_s32_s16(int16x4_t __a) { \ | |
3062 return (int32x2_t)__a; } | |
3063 __ai int32x2_t vreinterpret_s32_s64(int64x1_t __a) { \ | |
3064 return (int32x2_t)__a; } | |
3065 __ai int32x2_t vreinterpret_s32_u8(uint8x8_t __a) { \ | |
3066 return (int32x2_t)__a; } | |
3067 __ai int32x2_t vreinterpret_s32_u16(uint16x4_t __a) { \ | |
3068 return (int32x2_t)__a; } | |
3069 __ai int32x2_t vreinterpret_s32_u32(uint32x2_t __a) { \ | |
3070 return (int32x2_t)__a; } | |
3071 __ai int32x2_t vreinterpret_s32_u64(uint64x1_t __a) { \ | |
3072 return (int32x2_t)__a; } | |
3073 __ai int32x2_t vreinterpret_s32_f16(float16x4_t __a) { \ | |
3074 return (int32x2_t)__a; } | |
3075 __ai int32x2_t vreinterpret_s32_f32(float32x2_t __a) { \ | |
3076 return (int32x2_t)__a; } | |
3077 __ai int32x2_t vreinterpret_s32_p8(poly8x8_t __a) { \ | |
3078 return (int32x2_t)__a; } | |
3079 __ai int32x2_t vreinterpret_s32_p16(poly16x4_t __a) { \ | |
3080 return (int32x2_t)__a; } | |
3081 __ai int64x1_t vreinterpret_s64_s8(int8x8_t __a) { \ | |
3082 return (int64x1_t)__a; } | |
3083 __ai int64x1_t vreinterpret_s64_s16(int16x4_t __a) { \ | |
3084 return (int64x1_t)__a; } | |
3085 __ai int64x1_t vreinterpret_s64_s32(int32x2_t __a) { \ | |
3086 return (int64x1_t)__a; } | |
3087 __ai int64x1_t vreinterpret_s64_u8(uint8x8_t __a) { \ | |
3088 return (int64x1_t)__a; } | |
3089 __ai int64x1_t vreinterpret_s64_u16(uint16x4_t __a) { \ | |
3090 return (int64x1_t)__a; } | |
3091 __ai int64x1_t vreinterpret_s64_u32(uint32x2_t __a) { \ | |
3092 return (int64x1_t)__a; } | |
3093 __ai int64x1_t vreinterpret_s64_u64(uint64x1_t __a) { \ | |
3094 return (int64x1_t)__a; } | |
3095 __ai int64x1_t vreinterpret_s64_f16(float16x4_t __a) { \ | |
3096 return (int64x1_t)__a; } | |
3097 __ai int64x1_t vreinterpret_s64_f32(float32x2_t __a) { \ | |
3098 return (int64x1_t)__a; } | |
3099 __ai int64x1_t vreinterpret_s64_p8(poly8x8_t __a) { \ | |
3100 return (int64x1_t)__a; } | |
3101 __ai int64x1_t vreinterpret_s64_p16(poly16x4_t __a) { \ | |
3102 return (int64x1_t)__a; } | |
3103 __ai uint8x8_t vreinterpret_u8_s8(int8x8_t __a) { \ | |
3104 return (uint8x8_t)__a; } | |
3105 __ai uint8x8_t vreinterpret_u8_s16(int16x4_t __a) { \ | |
3106 return (uint8x8_t)__a; } | |
3107 __ai uint8x8_t vreinterpret_u8_s32(int32x2_t __a) { \ | |
3108 return (uint8x8_t)__a; } | |
3109 __ai uint8x8_t vreinterpret_u8_s64(int64x1_t __a) { \ | |
3110 return (uint8x8_t)__a; } | |
3111 __ai uint8x8_t vreinterpret_u8_u16(uint16x4_t __a) { \ | |
3112 return (uint8x8_t)__a; } | |
3113 __ai uint8x8_t vreinterpret_u8_u32(uint32x2_t __a) { \ | |
3114 return (uint8x8_t)__a; } | |
3115 __ai uint8x8_t vreinterpret_u8_u64(uint64x1_t __a) { \ | |
3116 return (uint8x8_t)__a; } | |
3117 __ai uint8x8_t vreinterpret_u8_f16(float16x4_t __a) { \ | |
3118 return (uint8x8_t)__a; } | |
3119 __ai uint8x8_t vreinterpret_u8_f32(float32x2_t __a) { \ | |
3120 return (uint8x8_t)__a; } | |
3121 __ai uint8x8_t vreinterpret_u8_p8(poly8x8_t __a) { \ | |
3122 return (uint8x8_t)__a; } | |
3123 __ai uint8x8_t vreinterpret_u8_p16(poly16x4_t __a) { \ | |
3124 return (uint8x8_t)__a; } | |
3125 __ai uint16x4_t vreinterpret_u16_s8(int8x8_t __a) { \ | |
3126 return (uint16x4_t)__a; } | |
3127 __ai uint16x4_t vreinterpret_u16_s16(int16x4_t __a) { \ | |
3128 return (uint16x4_t)__a; } | |
3129 __ai uint16x4_t vreinterpret_u16_s32(int32x2_t __a) { \ | |
3130 return (uint16x4_t)__a; } | |
3131 __ai uint16x4_t vreinterpret_u16_s64(int64x1_t __a) { \ | |
3132 return (uint16x4_t)__a; } | |
3133 __ai uint16x4_t vreinterpret_u16_u8(uint8x8_t __a) { \ | |
3134 return (uint16x4_t)__a; } | |
3135 __ai uint16x4_t vreinterpret_u16_u32(uint32x2_t __a) { \ | |
3136 return (uint16x4_t)__a; } | |
3137 __ai uint16x4_t vreinterpret_u16_u64(uint64x1_t __a) { \ | |
3138 return (uint16x4_t)__a; } | |
3139 __ai uint16x4_t vreinterpret_u16_f16(float16x4_t __a) { \ | |
3140 return (uint16x4_t)__a; } | |
3141 __ai uint16x4_t vreinterpret_u16_f32(float32x2_t __a) { \ | |
3142 return (uint16x4_t)__a; } | |
3143 __ai uint16x4_t vreinterpret_u16_p8(poly8x8_t __a) { \ | |
3144 return (uint16x4_t)__a; } | |
3145 __ai uint16x4_t vreinterpret_u16_p16(poly16x4_t __a) { \ | |
3146 return (uint16x4_t)__a; } | |
3147 __ai uint32x2_t vreinterpret_u32_s8(int8x8_t __a) { \ | |
3148 return (uint32x2_t)__a; } | |
3149 __ai uint32x2_t vreinterpret_u32_s16(int16x4_t __a) { \ | |
3150 return (uint32x2_t)__a; } | |
3151 __ai uint32x2_t vreinterpret_u32_s32(int32x2_t __a) { \ | |
3152 return (uint32x2_t)__a; } | |
3153 __ai uint32x2_t vreinterpret_u32_s64(int64x1_t __a) { \ | |
3154 return (uint32x2_t)__a; } | |
3155 __ai uint32x2_t vreinterpret_u32_u8(uint8x8_t __a) { \ | |
3156 return (uint32x2_t)__a; } | |
3157 __ai uint32x2_t vreinterpret_u32_u16(uint16x4_t __a) { \ | |
3158 return (uint32x2_t)__a; } | |
3159 __ai uint32x2_t vreinterpret_u32_u64(uint64x1_t __a) { \ | |
3160 return (uint32x2_t)__a; } | |
3161 __ai uint32x2_t vreinterpret_u32_f16(float16x4_t __a) { \ | |
3162 return (uint32x2_t)__a; } | |
3163 __ai uint32x2_t vreinterpret_u32_f32(float32x2_t __a) { \ | |
3164 return (uint32x2_t)__a; } | |
3165 __ai uint32x2_t vreinterpret_u32_p8(poly8x8_t __a) { \ | |
3166 return (uint32x2_t)__a; } | |
3167 __ai uint32x2_t vreinterpret_u32_p16(poly16x4_t __a) { \ | |
3168 return (uint32x2_t)__a; } | |
3169 __ai uint64x1_t vreinterpret_u64_s8(int8x8_t __a) { \ | |
3170 return (uint64x1_t)__a; } | |
3171 __ai uint64x1_t vreinterpret_u64_s16(int16x4_t __a) { \ | |
3172 return (uint64x1_t)__a; } | |
3173 __ai uint64x1_t vreinterpret_u64_s32(int32x2_t __a) { \ | |
3174 return (uint64x1_t)__a; } | |
3175 __ai uint64x1_t vreinterpret_u64_s64(int64x1_t __a) { \ | |
3176 return (uint64x1_t)__a; } | |
3177 __ai uint64x1_t vreinterpret_u64_u8(uint8x8_t __a) { \ | |
3178 return (uint64x1_t)__a; } | |
3179 __ai uint64x1_t vreinterpret_u64_u16(uint16x4_t __a) { \ | |
3180 return (uint64x1_t)__a; } | |
3181 __ai uint64x1_t vreinterpret_u64_u32(uint32x2_t __a) { \ | |
3182 return (uint64x1_t)__a; } | |
3183 __ai uint64x1_t vreinterpret_u64_f16(float16x4_t __a) { \ | |
3184 return (uint64x1_t)__a; } | |
3185 __ai uint64x1_t vreinterpret_u64_f32(float32x2_t __a) { \ | |
3186 return (uint64x1_t)__a; } | |
3187 __ai uint64x1_t vreinterpret_u64_p8(poly8x8_t __a) { \ | |
3188 return (uint64x1_t)__a; } | |
3189 __ai uint64x1_t vreinterpret_u64_p16(poly16x4_t __a) { \ | |
3190 return (uint64x1_t)__a; } | |
3191 __ai float16x4_t vreinterpret_f16_s8(int8x8_t __a) { \ | |
3192 return (float16x4_t)__a; } | |
3193 __ai float16x4_t vreinterpret_f16_s16(int16x4_t __a) { \ | |
3194 return (float16x4_t)__a; } | |
3195 __ai float16x4_t vreinterpret_f16_s32(int32x2_t __a) { \ | |
3196 return (float16x4_t)__a; } | |
3197 __ai float16x4_t vreinterpret_f16_s64(int64x1_t __a) { \ | |
3198 return (float16x4_t)__a; } | |
3199 __ai float16x4_t vreinterpret_f16_u8(uint8x8_t __a) { \ | |
3200 return (float16x4_t)__a; } | |
3201 __ai float16x4_t vreinterpret_f16_u16(uint16x4_t __a) { \ | |
3202 return (float16x4_t)__a; } | |
3203 __ai float16x4_t vreinterpret_f16_u32(uint32x2_t __a) { \ | |
3204 return (float16x4_t)__a; } | |
3205 __ai float16x4_t vreinterpret_f16_u64(uint64x1_t __a) { \ | |
3206 return (float16x4_t)__a; } | |
3207 __ai float16x4_t vreinterpret_f16_f32(float32x2_t __a) { \ | |
3208 return (float16x4_t)__a; } | |
3209 __ai float16x4_t vreinterpret_f16_p8(poly8x8_t __a) { \ | |
3210 return (float16x4_t)__a; } | |
3211 __ai float16x4_t vreinterpret_f16_p16(poly16x4_t __a) { \ | |
3212 return (float16x4_t)__a; } | |
3213 __ai float32x2_t vreinterpret_f32_s8(int8x8_t __a) { \ | |
3214 return (float32x2_t)__a; } | |
3215 __ai float32x2_t vreinterpret_f32_s16(int16x4_t __a) { \ | |
3216 return (float32x2_t)__a; } | |
3217 __ai float32x2_t vreinterpret_f32_s32(int32x2_t __a) { \ | |
3218 return (float32x2_t)__a; } | |
3219 __ai float32x2_t vreinterpret_f32_s64(int64x1_t __a) { \ | |
3220 return (float32x2_t)__a; } | |
3221 __ai float32x2_t vreinterpret_f32_u8(uint8x8_t __a) { \ | |
3222 return (float32x2_t)__a; } | |
3223 __ai float32x2_t vreinterpret_f32_u16(uint16x4_t __a) { \ | |
3224 return (float32x2_t)__a; } | |
3225 __ai float32x2_t vreinterpret_f32_u32(uint32x2_t __a) { \ | |
3226 return (float32x2_t)__a; } | |
3227 __ai float32x2_t vreinterpret_f32_u64(uint64x1_t __a) { \ | |
3228 return (float32x2_t)__a; } | |
3229 __ai float32x2_t vreinterpret_f32_f16(float16x4_t __a) { \ | |
3230 return (float32x2_t)__a; } | |
3231 __ai float32x2_t vreinterpret_f32_p8(poly8x8_t __a) { \ | |
3232 return (float32x2_t)__a; } | |
3233 __ai float32x2_t vreinterpret_f32_p16(poly16x4_t __a) { \ | |
3234 return (float32x2_t)__a; } | |
3235 __ai poly8x8_t vreinterpret_p8_s8(int8x8_t __a) { \ | |
3236 return (poly8x8_t)__a; } | |
3237 __ai poly8x8_t vreinterpret_p8_s16(int16x4_t __a) { \ | |
3238 return (poly8x8_t)__a; } | |
3239 __ai poly8x8_t vreinterpret_p8_s32(int32x2_t __a) { \ | |
3240 return (poly8x8_t)__a; } | |
3241 __ai poly8x8_t vreinterpret_p8_s64(int64x1_t __a) { \ | |
3242 return (poly8x8_t)__a; } | |
3243 __ai poly8x8_t vreinterpret_p8_u8(uint8x8_t __a) { \ | |
3244 return (poly8x8_t)__a; } | |
3245 __ai poly8x8_t vreinterpret_p8_u16(uint16x4_t __a) { \ | |
3246 return (poly8x8_t)__a; } | |
3247 __ai poly8x8_t vreinterpret_p8_u32(uint32x2_t __a) { \ | |
3248 return (poly8x8_t)__a; } | |
3249 __ai poly8x8_t vreinterpret_p8_u64(uint64x1_t __a) { \ | |
3250 return (poly8x8_t)__a; } | |
3251 __ai poly8x8_t vreinterpret_p8_f16(float16x4_t __a) { \ | |
3252 return (poly8x8_t)__a; } | |
3253 __ai poly8x8_t vreinterpret_p8_f32(float32x2_t __a) { \ | |
3254 return (poly8x8_t)__a; } | |
3255 __ai poly8x8_t vreinterpret_p8_p16(poly16x4_t __a) { \ | |
3256 return (poly8x8_t)__a; } | |
3257 __ai poly16x4_t vreinterpret_p16_s8(int8x8_t __a) { \ | |
3258 return (poly16x4_t)__a; } | |
3259 __ai poly16x4_t vreinterpret_p16_s16(int16x4_t __a) { \ | |
3260 return (poly16x4_t)__a; } | |
3261 __ai poly16x4_t vreinterpret_p16_s32(int32x2_t __a) { \ | |
3262 return (poly16x4_t)__a; } | |
3263 __ai poly16x4_t vreinterpret_p16_s64(int64x1_t __a) { \ | |
3264 return (poly16x4_t)__a; } | |
3265 __ai poly16x4_t vreinterpret_p16_u8(uint8x8_t __a) { \ | |
3266 return (poly16x4_t)__a; } | |
3267 __ai poly16x4_t vreinterpret_p16_u16(uint16x4_t __a) { \ | |
3268 return (poly16x4_t)__a; } | |
3269 __ai poly16x4_t vreinterpret_p16_u32(uint32x2_t __a) { \ | |
3270 return (poly16x4_t)__a; } | |
3271 __ai poly16x4_t vreinterpret_p16_u64(uint64x1_t __a) { \ | |
3272 return (poly16x4_t)__a; } | |
3273 __ai poly16x4_t vreinterpret_p16_f16(float16x4_t __a) { \ | |
3274 return (poly16x4_t)__a; } | |
3275 __ai poly16x4_t vreinterpret_p16_f32(float32x2_t __a) { \ | |
3276 return (poly16x4_t)__a; } | |
3277 __ai poly16x4_t vreinterpret_p16_p8(poly8x8_t __a) { \ | |
3278 return (poly16x4_t)__a; } | |
3279 __ai int8x16_t vreinterpretq_s8_s16(int16x8_t __a) { \ | |
3280 return (int8x16_t)__a; } | |
3281 __ai int8x16_t vreinterpretq_s8_s32(int32x4_t __a) { \ | |
3282 return (int8x16_t)__a; } | |
3283 __ai int8x16_t vreinterpretq_s8_s64(int64x2_t __a) { \ | |
3284 return (int8x16_t)__a; } | |
3285 __ai int8x16_t vreinterpretq_s8_u8(uint8x16_t __a) { \ | |
3286 return (int8x16_t)__a; } | |
3287 __ai int8x16_t vreinterpretq_s8_u16(uint16x8_t __a) { \ | |
3288 return (int8x16_t)__a; } | |
3289 __ai int8x16_t vreinterpretq_s8_u32(uint32x4_t __a) { \ | |
3290 return (int8x16_t)__a; } | |
3291 __ai int8x16_t vreinterpretq_s8_u64(uint64x2_t __a) { \ | |
3292 return (int8x16_t)__a; } | |
3293 __ai int8x16_t vreinterpretq_s8_f16(float16x8_t __a) { \ | |
3294 return (int8x16_t)__a; } | |
3295 __ai int8x16_t vreinterpretq_s8_f32(float32x4_t __a) { \ | |
3296 return (int8x16_t)__a; } | |
3297 __ai int8x16_t vreinterpretq_s8_p8(poly8x16_t __a) { \ | |
3298 return (int8x16_t)__a; } | |
3299 __ai int8x16_t vreinterpretq_s8_p16(poly16x8_t __a) { \ | |
3300 return (int8x16_t)__a; } | |
3301 __ai int16x8_t vreinterpretq_s16_s8(int8x16_t __a) { \ | |
3302 return (int16x8_t)__a; } | |
3303 __ai int16x8_t vreinterpretq_s16_s32(int32x4_t __a) { \ | |
3304 return (int16x8_t)__a; } | |
3305 __ai int16x8_t vreinterpretq_s16_s64(int64x2_t __a) { \ | |
3306 return (int16x8_t)__a; } | |
3307 __ai int16x8_t vreinterpretq_s16_u8(uint8x16_t __a) { \ | |
3308 return (int16x8_t)__a; } | |
3309 __ai int16x8_t vreinterpretq_s16_u16(uint16x8_t __a) { \ | |
3310 return (int16x8_t)__a; } | |
3311 __ai int16x8_t vreinterpretq_s16_u32(uint32x4_t __a) { \ | |
3312 return (int16x8_t)__a; } | |
3313 __ai int16x8_t vreinterpretq_s16_u64(uint64x2_t __a) { \ | |
3314 return (int16x8_t)__a; } | |
3315 __ai int16x8_t vreinterpretq_s16_f16(float16x8_t __a) { \ | |
3316 return (int16x8_t)__a; } | |
3317 __ai int16x8_t vreinterpretq_s16_f32(float32x4_t __a) { \ | |
3318 return (int16x8_t)__a; } | |
3319 __ai int16x8_t vreinterpretq_s16_p8(poly8x16_t __a) { \ | |
3320 return (int16x8_t)__a; } | |
3321 __ai int16x8_t vreinterpretq_s16_p16(poly16x8_t __a) { \ | |
3322 return (int16x8_t)__a; } | |
3323 __ai int32x4_t vreinterpretq_s32_s8(int8x16_t __a) { \ | |
3324 return (int32x4_t)__a; } | |
3325 __ai int32x4_t vreinterpretq_s32_s16(int16x8_t __a) { \ | |
3326 return (int32x4_t)__a; } | |
3327 __ai int32x4_t vreinterpretq_s32_s64(int64x2_t __a) { \ | |
3328 return (int32x4_t)__a; } | |
3329 __ai int32x4_t vreinterpretq_s32_u8(uint8x16_t __a) { \ | |
3330 return (int32x4_t)__a; } | |
3331 __ai int32x4_t vreinterpretq_s32_u16(uint16x8_t __a) { \ | |
3332 return (int32x4_t)__a; } | |
3333 __ai int32x4_t vreinterpretq_s32_u32(uint32x4_t __a) { \ | |
3334 return (int32x4_t)__a; } | |
3335 __ai int32x4_t vreinterpretq_s32_u64(uint64x2_t __a) { \ | |
3336 return (int32x4_t)__a; } | |
3337 __ai int32x4_t vreinterpretq_s32_f16(float16x8_t __a) { \ | |
3338 return (int32x4_t)__a; } | |
3339 __ai int32x4_t vreinterpretq_s32_f32(float32x4_t __a) { \ | |
3340 return (int32x4_t)__a; } | |
3341 __ai int32x4_t vreinterpretq_s32_p8(poly8x16_t __a) { \ | |
3342 return (int32x4_t)__a; } | |
3343 __ai int32x4_t vreinterpretq_s32_p16(poly16x8_t __a) { \ | |
3344 return (int32x4_t)__a; } | |
3345 __ai int64x2_t vreinterpretq_s64_s8(int8x16_t __a) { \ | |
3346 return (int64x2_t)__a; } | |
3347 __ai int64x2_t vreinterpretq_s64_s16(int16x8_t __a) { \ | |
3348 return (int64x2_t)__a; } | |
3349 __ai int64x2_t vreinterpretq_s64_s32(int32x4_t __a) { \ | |
3350 return (int64x2_t)__a; } | |
3351 __ai int64x2_t vreinterpretq_s64_u8(uint8x16_t __a) { \ | |
3352 return (int64x2_t)__a; } | |
3353 __ai int64x2_t vreinterpretq_s64_u16(uint16x8_t __a) { \ | |
3354 return (int64x2_t)__a; } | |
3355 __ai int64x2_t vreinterpretq_s64_u32(uint32x4_t __a) { \ | |
3356 return (int64x2_t)__a; } | |
3357 __ai int64x2_t vreinterpretq_s64_u64(uint64x2_t __a) { \ | |
3358 return (int64x2_t)__a; } | |
3359 __ai int64x2_t vreinterpretq_s64_f16(float16x8_t __a) { \ | |
3360 return (int64x2_t)__a; } | |
3361 __ai int64x2_t vreinterpretq_s64_f32(float32x4_t __a) { \ | |
3362 return (int64x2_t)__a; } | |
3363 __ai int64x2_t vreinterpretq_s64_p8(poly8x16_t __a) { \ | |
3364 return (int64x2_t)__a; } | |
3365 __ai int64x2_t vreinterpretq_s64_p16(poly16x8_t __a) { \ | |
3366 return (int64x2_t)__a; } | |
3367 __ai uint8x16_t vreinterpretq_u8_s8(int8x16_t __a) { \ | |
3368 return (uint8x16_t)__a; } | |
3369 __ai uint8x16_t vreinterpretq_u8_s16(int16x8_t __a) { \ | |
3370 return (uint8x16_t)__a; } | |
3371 __ai uint8x16_t vreinterpretq_u8_s32(int32x4_t __a) { \ | |
3372 return (uint8x16_t)__a; } | |
3373 __ai uint8x16_t vreinterpretq_u8_s64(int64x2_t __a) { \ | |
3374 return (uint8x16_t)__a; } | |
3375 __ai uint8x16_t vreinterpretq_u8_u16(uint16x8_t __a) { \ | |
3376 return (uint8x16_t)__a; } | |
3377 __ai uint8x16_t vreinterpretq_u8_u32(uint32x4_t __a) { \ | |
3378 return (uint8x16_t)__a; } | |
3379 __ai uint8x16_t vreinterpretq_u8_u64(uint64x2_t __a) { \ | |
3380 return (uint8x16_t)__a; } | |
3381 __ai uint8x16_t vreinterpretq_u8_f16(float16x8_t __a) { \ | |
3382 return (uint8x16_t)__a; } | |
3383 __ai uint8x16_t vreinterpretq_u8_f32(float32x4_t __a) { \ | |
3384 return (uint8x16_t)__a; } | |
3385 __ai uint8x16_t vreinterpretq_u8_p8(poly8x16_t __a) { \ | |
3386 return (uint8x16_t)__a; } | |
3387 __ai uint8x16_t vreinterpretq_u8_p16(poly16x8_t __a) { \ | |
3388 return (uint8x16_t)__a; } | |
3389 __ai uint16x8_t vreinterpretq_u16_s8(int8x16_t __a) { \ | |
3390 return (uint16x8_t)__a; } | |
3391 __ai uint16x8_t vreinterpretq_u16_s16(int16x8_t __a) { \ | |
3392 return (uint16x8_t)__a; } | |
3393 __ai uint16x8_t vreinterpretq_u16_s32(int32x4_t __a) { \ | |
3394 return (uint16x8_t)__a; } | |
3395 __ai uint16x8_t vreinterpretq_u16_s64(int64x2_t __a) { \ | |
3396 return (uint16x8_t)__a; } | |
3397 __ai uint16x8_t vreinterpretq_u16_u8(uint8x16_t __a) { \ | |
3398 return (uint16x8_t)__a; } | |
3399 __ai uint16x8_t vreinterpretq_u16_u32(uint32x4_t __a) { \ | |
3400 return (uint16x8_t)__a; } | |
3401 __ai uint16x8_t vreinterpretq_u16_u64(uint64x2_t __a) { \ | |
3402 return (uint16x8_t)__a; } | |
3403 __ai uint16x8_t vreinterpretq_u16_f16(float16x8_t __a) { \ | |
3404 return (uint16x8_t)__a; } | |
3405 __ai uint16x8_t vreinterpretq_u16_f32(float32x4_t __a) { \ | |
3406 return (uint16x8_t)__a; } | |
3407 __ai uint16x8_t vreinterpretq_u16_p8(poly8x16_t __a) { \ | |
3408 return (uint16x8_t)__a; } | |
3409 __ai uint16x8_t vreinterpretq_u16_p16(poly16x8_t __a) { \ | |
3410 return (uint16x8_t)__a; } | |
3411 __ai uint32x4_t vreinterpretq_u32_s8(int8x16_t __a) { \ | |
3412 return (uint32x4_t)__a; } | |
3413 __ai uint32x4_t vreinterpretq_u32_s16(int16x8_t __a) { \ | |
3414 return (uint32x4_t)__a; } | |
3415 __ai uint32x4_t vreinterpretq_u32_s32(int32x4_t __a) { \ | |
3416 return (uint32x4_t)__a; } | |
3417 __ai uint32x4_t vreinterpretq_u32_s64(int64x2_t __a) { \ | |
3418 return (uint32x4_t)__a; } | |
3419 __ai uint32x4_t vreinterpretq_u32_u8(uint8x16_t __a) { \ | |
3420 return (uint32x4_t)__a; } | |
3421 __ai uint32x4_t vreinterpretq_u32_u16(uint16x8_t __a) { \ | |
3422 return (uint32x4_t)__a; } | |
3423 __ai uint32x4_t vreinterpretq_u32_u64(uint64x2_t __a) { \ | |
3424 return (uint32x4_t)__a; } | |
3425 __ai uint32x4_t vreinterpretq_u32_f16(float16x8_t __a) { \ | |
3426 return (uint32x4_t)__a; } | |
3427 __ai uint32x4_t vreinterpretq_u32_f32(float32x4_t __a) { \ | |
3428 return (uint32x4_t)__a; } | |
3429 __ai uint32x4_t vreinterpretq_u32_p8(poly8x16_t __a) { \ | |
3430 return (uint32x4_t)__a; } | |
3431 __ai uint32x4_t vreinterpretq_u32_p16(poly16x8_t __a) { \ | |
3432 return (uint32x4_t)__a; } | |
3433 __ai uint64x2_t vreinterpretq_u64_s8(int8x16_t __a) { \ | |
3434 return (uint64x2_t)__a; } | |
3435 __ai uint64x2_t vreinterpretq_u64_s16(int16x8_t __a) { \ | |
3436 return (uint64x2_t)__a; } | |
3437 __ai uint64x2_t vreinterpretq_u64_s32(int32x4_t __a) { \ | |
3438 return (uint64x2_t)__a; } | |
3439 __ai uint64x2_t vreinterpretq_u64_s64(int64x2_t __a) { \ | |
3440 return (uint64x2_t)__a; } | |
3441 __ai uint64x2_t vreinterpretq_u64_u8(uint8x16_t __a) { \ | |
3442 return (uint64x2_t)__a; } | |
3443 __ai uint64x2_t vreinterpretq_u64_u16(uint16x8_t __a) { \ | |
3444 return (uint64x2_t)__a; } | |
3445 __ai uint64x2_t vreinterpretq_u64_u32(uint32x4_t __a) { \ | |
3446 return (uint64x2_t)__a; } | |
3447 __ai uint64x2_t vreinterpretq_u64_f16(float16x8_t __a) { \ | |
3448 return (uint64x2_t)__a; } | |
3449 __ai uint64x2_t vreinterpretq_u64_f32(float32x4_t __a) { \ | |
3450 return (uint64x2_t)__a; } | |
3451 __ai uint64x2_t vreinterpretq_u64_p8(poly8x16_t __a) { \ | |
3452 return (uint64x2_t)__a; } | |
3453 __ai uint64x2_t vreinterpretq_u64_p16(poly16x8_t __a) { \ | |
3454 return (uint64x2_t)__a; } | |
3455 __ai float16x8_t vreinterpretq_f16_s8(int8x16_t __a) { \ | |
3456 return (float16x8_t)__a; } | |
3457 __ai float16x8_t vreinterpretq_f16_s16(int16x8_t __a) { \ | |
3458 return (float16x8_t)__a; } | |
3459 __ai float16x8_t vreinterpretq_f16_s32(int32x4_t __a) { \ | |
3460 return (float16x8_t)__a; } | |
3461 __ai float16x8_t vreinterpretq_f16_s64(int64x2_t __a) { \ | |
3462 return (float16x8_t)__a; } | |
3463 __ai float16x8_t vreinterpretq_f16_u8(uint8x16_t __a) { \ | |
3464 return (float16x8_t)__a; } | |
3465 __ai float16x8_t vreinterpretq_f16_u16(uint16x8_t __a) { \ | |
3466 return (float16x8_t)__a; } | |
3467 __ai float16x8_t vreinterpretq_f16_u32(uint32x4_t __a) { \ | |
3468 return (float16x8_t)__a; } | |
3469 __ai float16x8_t vreinterpretq_f16_u64(uint64x2_t __a) { \ | |
3470 return (float16x8_t)__a; } | |
3471 __ai float16x8_t vreinterpretq_f16_f32(float32x4_t __a) { \ | |
3472 return (float16x8_t)__a; } | |
3473 __ai float16x8_t vreinterpretq_f16_p8(poly8x16_t __a) { \ | |
3474 return (float16x8_t)__a; } | |
3475 __ai float16x8_t vreinterpretq_f16_p16(poly16x8_t __a) { \ | |
3476 return (float16x8_t)__a; } | |
3477 __ai float32x4_t vreinterpretq_f32_s8(int8x16_t __a) { \ | |
3478 return (float32x4_t)__a; } | |
3479 __ai float32x4_t vreinterpretq_f32_s16(int16x8_t __a) { \ | |
3480 return (float32x4_t)__a; } | |
3481 __ai float32x4_t vreinterpretq_f32_s32(int32x4_t __a) { \ | |
3482 return (float32x4_t)__a; } | |
3483 __ai float32x4_t vreinterpretq_f32_s64(int64x2_t __a) { \ | |
3484 return (float32x4_t)__a; } | |
3485 __ai float32x4_t vreinterpretq_f32_u8(uint8x16_t __a) { \ | |
3486 return (float32x4_t)__a; } | |
3487 __ai float32x4_t vreinterpretq_f32_u16(uint16x8_t __a) { \ | |
3488 return (float32x4_t)__a; } | |
3489 __ai float32x4_t vreinterpretq_f32_u32(uint32x4_t __a) { \ | |
3490 return (float32x4_t)__a; } | |
3491 __ai float32x4_t vreinterpretq_f32_u64(uint64x2_t __a) { \ | |
3492 return (float32x4_t)__a; } | |
3493 __ai float32x4_t vreinterpretq_f32_f16(float16x8_t __a) { \ | |
3494 return (float32x4_t)__a; } | |
3495 __ai float32x4_t vreinterpretq_f32_p8(poly8x16_t __a) { \ | |
3496 return (float32x4_t)__a; } | |
3497 __ai float32x4_t vreinterpretq_f32_p16(poly16x8_t __a) { \ | |
3498 return (float32x4_t)__a; } | |
3499 __ai poly8x16_t vreinterpretq_p8_s8(int8x16_t __a) { \ | |
3500 return (poly8x16_t)__a; } | |
3501 __ai poly8x16_t vreinterpretq_p8_s16(int16x8_t __a) { \ | |
3502 return (poly8x16_t)__a; } | |
3503 __ai poly8x16_t vreinterpretq_p8_s32(int32x4_t __a) { \ | |
3504 return (poly8x16_t)__a; } | |
3505 __ai poly8x16_t vreinterpretq_p8_s64(int64x2_t __a) { \ | |
3506 return (poly8x16_t)__a; } | |
3507 __ai poly8x16_t vreinterpretq_p8_u8(uint8x16_t __a) { \ | |
3508 return (poly8x16_t)__a; } | |
3509 __ai poly8x16_t vreinterpretq_p8_u16(uint16x8_t __a) { \ | |
3510 return (poly8x16_t)__a; } | |
3511 __ai poly8x16_t vreinterpretq_p8_u32(uint32x4_t __a) { \ | |
3512 return (poly8x16_t)__a; } | |
3513 __ai poly8x16_t vreinterpretq_p8_u64(uint64x2_t __a) { \ | |
3514 return (poly8x16_t)__a; } | |
3515 __ai poly8x16_t vreinterpretq_p8_f16(float16x8_t __a) { \ | |
3516 return (poly8x16_t)__a; } | |
3517 __ai poly8x16_t vreinterpretq_p8_f32(float32x4_t __a) { \ | |
3518 return (poly8x16_t)__a; } | |
3519 __ai poly8x16_t vreinterpretq_p8_p16(poly16x8_t __a) { \ | |
3520 return (poly8x16_t)__a; } | |
3521 __ai poly16x8_t vreinterpretq_p16_s8(int8x16_t __a) { \ | |
3522 return (poly16x8_t)__a; } | |
3523 __ai poly16x8_t vreinterpretq_p16_s16(int16x8_t __a) { \ | |
3524 return (poly16x8_t)__a; } | |
3525 __ai poly16x8_t vreinterpretq_p16_s32(int32x4_t __a) { \ | |
3526 return (poly16x8_t)__a; } | |
3527 __ai poly16x8_t vreinterpretq_p16_s64(int64x2_t __a) { \ | |
3528 return (poly16x8_t)__a; } | |
3529 __ai poly16x8_t vreinterpretq_p16_u8(uint8x16_t __a) { \ | |
3530 return (poly16x8_t)__a; } | |
3531 __ai poly16x8_t vreinterpretq_p16_u16(uint16x8_t __a) { \ | |
3532 return (poly16x8_t)__a; } | |
3533 __ai poly16x8_t vreinterpretq_p16_u32(uint32x4_t __a) { \ | |
3534 return (poly16x8_t)__a; } | |
3535 __ai poly16x8_t vreinterpretq_p16_u64(uint64x2_t __a) { \ | |
3536 return (poly16x8_t)__a; } | |
3537 __ai poly16x8_t vreinterpretq_p16_f16(float16x8_t __a) { \ | |
3538 return (poly16x8_t)__a; } | |
3539 __ai poly16x8_t vreinterpretq_p16_f32(float32x4_t __a) { \ | |
3540 return (poly16x8_t)__a; } | |
3541 __ai poly16x8_t vreinterpretq_p16_p8(poly8x16_t __a) { \ | |
3542 return (poly16x8_t)__a; } | |
3543 | |
3544 __ai int8x8_t vrev16_s8(int8x8_t __a) { \ | |
3545 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); } | |
3546 __ai uint8x8_t vrev16_u8(uint8x8_t __a) { \ | |
3547 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); } | |
3548 __ai poly8x8_t vrev16_p8(poly8x8_t __a) { \ | |
3549 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); } | |
3550 __ai int8x16_t vrev16q_s8(int8x16_t __a) { \ | |
3551 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10,
13, 12, 15, 14); } | |
3552 __ai uint8x16_t vrev16q_u8(uint8x16_t __a) { \ | |
3553 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10,
13, 12, 15, 14); } | |
3554 __ai poly8x16_t vrev16q_p8(poly8x16_t __a) { \ | |
3555 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10,
13, 12, 15, 14); } | |
3556 | |
3557 __ai int8x8_t vrev32_s8(int8x8_t __a) { \ | |
3558 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); } | |
3559 __ai int16x4_t vrev32_s16(int16x4_t __a) { \ | |
3560 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); } | |
3561 __ai uint8x8_t vrev32_u8(uint8x8_t __a) { \ | |
3562 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); } | |
3563 __ai uint16x4_t vrev32_u16(uint16x4_t __a) { \ | |
3564 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); } | |
3565 __ai poly8x8_t vrev32_p8(poly8x8_t __a) { \ | |
3566 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); } | |
3567 __ai poly16x4_t vrev32_p16(poly16x4_t __a) { \ | |
3568 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); } | |
3569 __ai int8x16_t vrev32q_s8(int8x16_t __a) { \ | |
3570 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8,
15, 14, 13, 12); } | |
3571 __ai int16x8_t vrev32q_s16(int16x8_t __a) { \ | |
3572 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); } | |
3573 __ai uint8x16_t vrev32q_u8(uint8x16_t __a) { \ | |
3574 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8,
15, 14, 13, 12); } | |
3575 __ai uint16x8_t vrev32q_u16(uint16x8_t __a) { \ | |
3576 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); } | |
3577 __ai poly8x16_t vrev32q_p8(poly8x16_t __a) { \ | |
3578 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8,
15, 14, 13, 12); } | |
3579 __ai poly16x8_t vrev32q_p16(poly16x8_t __a) { \ | |
3580 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); } | |
3581 | |
3582 __ai int8x8_t vrev64_s8(int8x8_t __a) { \ | |
3583 return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0); } | |
3584 __ai int16x4_t vrev64_s16(int16x4_t __a) { \ | |
3585 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); } | |
3586 __ai int32x2_t vrev64_s32(int32x2_t __a) { \ | |
3587 return __builtin_shufflevector(__a, __a, 1, 0); } | |
3588 __ai uint8x8_t vrev64_u8(uint8x8_t __a) { \ | |
3589 return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0); } | |
3590 __ai uint16x4_t vrev64_u16(uint16x4_t __a) { \ | |
3591 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); } | |
3592 __ai uint32x2_t vrev64_u32(uint32x2_t __a) { \ | |
3593 return __builtin_shufflevector(__a, __a, 1, 0); } | |
3594 __ai poly8x8_t vrev64_p8(poly8x8_t __a) { \ | |
3595 return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0); } | |
3596 __ai poly16x4_t vrev64_p16(poly16x4_t __a) { \ | |
3597 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); } | |
3598 __ai float32x2_t vrev64_f32(float32x2_t __a) { \ | |
3599 return __builtin_shufflevector(__a, __a, 1, 0); } | |
3600 __ai int8x16_t vrev64q_s8(int8x16_t __a) { \ | |
3601 return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 1
2, 11, 10, 9, 8); } | |
3602 __ai int16x8_t vrev64q_s16(int16x8_t __a) { \ | |
3603 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); } | |
3604 __ai int32x4_t vrev64q_s32(int32x4_t __a) { \ | |
3605 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); } | |
3606 __ai uint8x16_t vrev64q_u8(uint8x16_t __a) { \ | |
3607 return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 1
2, 11, 10, 9, 8); } | |
3608 __ai uint16x8_t vrev64q_u16(uint16x8_t __a) { \ | |
3609 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); } | |
3610 __ai uint32x4_t vrev64q_u32(uint32x4_t __a) { \ | |
3611 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); } | |
3612 __ai poly8x16_t vrev64q_p8(poly8x16_t __a) { \ | |
3613 return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 1
2, 11, 10, 9, 8); } | |
3614 __ai poly16x8_t vrev64q_p16(poly16x8_t __a) { \ | |
3615 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); } | |
3616 __ai float32x4_t vrev64q_f32(float32x4_t __a) { \ | |
3617 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); } | |
3618 | |
3619 __ai int8x8_t vrhadd_s8(int8x8_t __a, int8x8_t __b) { \ | |
3620 return (int8x8_t)__builtin_neon_vrhadd_v(__a, __b, 0); } | |
3621 __ai int16x4_t vrhadd_s16(int16x4_t __a, int16x4_t __b) { \ | |
3622 return (int16x4_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 1); } | |
3623 __ai int32x2_t vrhadd_s32(int32x2_t __a, int32x2_t __b) { \ | |
3624 return (int32x2_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 2); } | |
3625 __ai uint8x8_t vrhadd_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
3626 return (uint8x8_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 8); } | |
3627 __ai uint16x4_t vrhadd_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
3628 return (uint16x4_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 9); } | |
3629 __ai uint32x2_t vrhadd_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
3630 return (uint32x2_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 10);
} | |
3631 __ai int8x16_t vrhaddq_s8(int8x16_t __a, int8x16_t __b) { \ | |
3632 return (int8x16_t)__builtin_neon_vrhaddq_v(__a, __b, 16); } | |
3633 __ai int16x8_t vrhaddq_s16(int16x8_t __a, int16x8_t __b) { \ | |
3634 return (int16x8_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 17)
; } | |
3635 __ai int32x4_t vrhaddq_s32(int32x4_t __a, int32x4_t __b) { \ | |
3636 return (int32x4_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 18)
; } | |
3637 __ai uint8x16_t vrhaddq_u8(uint8x16_t __a, uint8x16_t __b) { \ | |
3638 return (uint8x16_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 24
); } | |
3639 __ai uint16x8_t vrhaddq_u16(uint16x8_t __a, uint16x8_t __b) { \ | |
3640 return (uint16x8_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 25
); } | |
3641 __ai uint32x4_t vrhaddq_u32(uint32x4_t __a, uint32x4_t __b) { \ | |
3642 return (uint32x4_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 26
); } | |
3643 | |
3644 __ai int8x8_t vrshl_s8(int8x8_t __a, int8x8_t __b) { \ | |
3645 return (int8x8_t)__builtin_neon_vrshl_v(__a, __b, 0); } | |
3646 __ai int16x4_t vrshl_s16(int16x4_t __a, int16x4_t __b) { \ | |
3647 return (int16x4_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 1); } | |
3648 __ai int32x2_t vrshl_s32(int32x2_t __a, int32x2_t __b) { \ | |
3649 return (int32x2_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 2); } | |
3650 __ai int64x1_t vrshl_s64(int64x1_t __a, int64x1_t __b) { \ | |
3651 return (int64x1_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 3); } | |
3652 __ai uint8x8_t vrshl_u8(uint8x8_t __a, int8x8_t __b) { \ | |
3653 return (uint8x8_t)__builtin_neon_vrshl_v((int8x8_t)__a, __b, 8); } | |
3654 __ai uint16x4_t vrshl_u16(uint16x4_t __a, int16x4_t __b) { \ | |
3655 return (uint16x4_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 9); } | |
3656 __ai uint32x2_t vrshl_u32(uint32x2_t __a, int32x2_t __b) { \ | |
3657 return (uint32x2_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 10); } | |
3658 __ai uint64x1_t vrshl_u64(uint64x1_t __a, int64x1_t __b) { \ | |
3659 return (uint64x1_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 11); } | |
3660 __ai int8x16_t vrshlq_s8(int8x16_t __a, int8x16_t __b) { \ | |
3661 return (int8x16_t)__builtin_neon_vrshlq_v(__a, __b, 16); } | |
3662 __ai int16x8_t vrshlq_s16(int16x8_t __a, int16x8_t __b) { \ | |
3663 return (int16x8_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 17);
} | |
3664 __ai int32x4_t vrshlq_s32(int32x4_t __a, int32x4_t __b) { \ | |
3665 return (int32x4_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 18);
} | |
3666 __ai int64x2_t vrshlq_s64(int64x2_t __a, int64x2_t __b) { \ | |
3667 return (int64x2_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 19);
} | |
3668 __ai uint8x16_t vrshlq_u8(uint8x16_t __a, int8x16_t __b) { \ | |
3669 return (uint8x16_t)__builtin_neon_vrshlq_v((int8x16_t)__a, __b, 24); } | |
3670 __ai uint16x8_t vrshlq_u16(uint16x8_t __a, int16x8_t __b) { \ | |
3671 return (uint16x8_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 25)
; } | |
3672 __ai uint32x4_t vrshlq_u32(uint32x4_t __a, int32x4_t __b) { \ | |
3673 return (uint32x4_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 26)
; } | |
3674 __ai uint64x2_t vrshlq_u64(uint64x2_t __a, int64x2_t __b) { \ | |
3675 return (uint64x2_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 27)
; } | |
3676 | |
3677 #define vrshrn_n_s16(a, __b) __extension__ ({ \ | |
3678 int16x8_t __a = (a); \ | |
3679 (int8x8_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 0); }) | |
3680 #define vrshrn_n_s32(a, __b) __extension__ ({ \ | |
3681 int32x4_t __a = (a); \ | |
3682 (int16x4_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 1); }) | |
3683 #define vrshrn_n_s64(a, __b) __extension__ ({ \ | |
3684 int64x2_t __a = (a); \ | |
3685 (int32x2_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 2); }) | |
3686 #define vrshrn_n_u16(a, __b) __extension__ ({ \ | |
3687 uint16x8_t __a = (a); \ | |
3688 (uint8x8_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 8); }) | |
3689 #define vrshrn_n_u32(a, __b) __extension__ ({ \ | |
3690 uint32x4_t __a = (a); \ | |
3691 (uint16x4_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 9); }) | |
3692 #define vrshrn_n_u64(a, __b) __extension__ ({ \ | |
3693 uint64x2_t __a = (a); \ | |
3694 (uint32x2_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 10); }) | |
3695 | |
3696 #define vrshr_n_s8(a, __b) __extension__ ({ \ | |
3697 int8x8_t __a = (a); \ | |
3698 (int8x8_t)__builtin_neon_vrshr_n_v(__a, __b, 0); }) | |
3699 #define vrshr_n_s16(a, __b) __extension__ ({ \ | |
3700 int16x4_t __a = (a); \ | |
3701 (int16x4_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 1); }) | |
3702 #define vrshr_n_s32(a, __b) __extension__ ({ \ | |
3703 int32x2_t __a = (a); \ | |
3704 (int32x2_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 2); }) | |
3705 #define vrshr_n_s64(a, __b) __extension__ ({ \ | |
3706 int64x1_t __a = (a); \ | |
3707 (int64x1_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 3); }) | |
3708 #define vrshr_n_u8(a, __b) __extension__ ({ \ | |
3709 uint8x8_t __a = (a); \ | |
3710 (uint8x8_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 8); }) | |
3711 #define vrshr_n_u16(a, __b) __extension__ ({ \ | |
3712 uint16x4_t __a = (a); \ | |
3713 (uint16x4_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 9); }) | |
3714 #define vrshr_n_u32(a, __b) __extension__ ({ \ | |
3715 uint32x2_t __a = (a); \ | |
3716 (uint32x2_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 10); }) | |
3717 #define vrshr_n_u64(a, __b) __extension__ ({ \ | |
3718 uint64x1_t __a = (a); \ | |
3719 (uint64x1_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 11); }) | |
3720 #define vrshrq_n_s8(a, __b) __extension__ ({ \ | |
3721 int8x16_t __a = (a); \ | |
3722 (int8x16_t)__builtin_neon_vrshrq_n_v(__a, __b, 16); }) | |
3723 #define vrshrq_n_s16(a, __b) __extension__ ({ \ | |
3724 int16x8_t __a = (a); \ | |
3725 (int16x8_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 17); }) | |
3726 #define vrshrq_n_s32(a, __b) __extension__ ({ \ | |
3727 int32x4_t __a = (a); \ | |
3728 (int32x4_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 18); }) | |
3729 #define vrshrq_n_s64(a, __b) __extension__ ({ \ | |
3730 int64x2_t __a = (a); \ | |
3731 (int64x2_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 19); }) | |
3732 #define vrshrq_n_u8(a, __b) __extension__ ({ \ | |
3733 uint8x16_t __a = (a); \ | |
3734 (uint8x16_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 24); }) | |
3735 #define vrshrq_n_u16(a, __b) __extension__ ({ \ | |
3736 uint16x8_t __a = (a); \ | |
3737 (uint16x8_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 25); }) | |
3738 #define vrshrq_n_u32(a, __b) __extension__ ({ \ | |
3739 uint32x4_t __a = (a); \ | |
3740 (uint32x4_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 26); }) | |
3741 #define vrshrq_n_u64(a, __b) __extension__ ({ \ | |
3742 uint64x2_t __a = (a); \ | |
3743 (uint64x2_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 27); }) | |
3744 | |
3745 __ai float32x2_t vrsqrte_f32(float32x2_t __a) { \ | |
3746 return (float32x2_t)__builtin_neon_vrsqrte_v((int8x8_t)__a, 4); } | |
3747 __ai uint32x2_t vrsqrte_u32(uint32x2_t __a) { \ | |
3748 return (uint32x2_t)__builtin_neon_vrsqrte_v((int8x8_t)__a, 10); } | |
3749 __ai float32x4_t vrsqrteq_f32(float32x4_t __a) { \ | |
3750 return (float32x4_t)__builtin_neon_vrsqrteq_v((int8x16_t)__a, 20); } | |
3751 __ai uint32x4_t vrsqrteq_u32(uint32x4_t __a) { \ | |
3752 return (uint32x4_t)__builtin_neon_vrsqrteq_v((int8x16_t)__a, 26); } | |
3753 | |
3754 __ai float32x2_t vrsqrts_f32(float32x2_t __a, float32x2_t __b) { \ | |
3755 return (float32x2_t)__builtin_neon_vrsqrts_v((int8x8_t)__a, (int8x8_t)__b, 4);
} | |
3756 __ai float32x4_t vrsqrtsq_f32(float32x4_t __a, float32x4_t __b) { \ | |
3757 return (float32x4_t)__builtin_neon_vrsqrtsq_v((int8x16_t)__a, (int8x16_t)__b,
20); } | |
3758 | |
3759 #define vrsra_n_s8(a, b, __c) __extension__ ({ \ | |
3760 int8x8_t __a = (a); int8x8_t __b = (b); \ | |
3761 (int8x8_t)__builtin_neon_vrsra_n_v(__a, __b, __c, 0); }) | |
3762 #define vrsra_n_s16(a, b, __c) __extension__ ({ \ | |
3763 int16x4_t __a = (a); int16x4_t __b = (b); \ | |
3764 (int16x4_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); }) | |
3765 #define vrsra_n_s32(a, b, __c) __extension__ ({ \ | |
3766 int32x2_t __a = (a); int32x2_t __b = (b); \ | |
3767 (int32x2_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); }) | |
3768 #define vrsra_n_s64(a, b, __c) __extension__ ({ \ | |
3769 int64x1_t __a = (a); int64x1_t __b = (b); \ | |
3770 (int64x1_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); }) | |
3771 #define vrsra_n_u8(a, b, __c) __extension__ ({ \ | |
3772 uint8x8_t __a = (a); uint8x8_t __b = (b); \ | |
3773 (uint8x8_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 8); }) | |
3774 #define vrsra_n_u16(a, b, __c) __extension__ ({ \ | |
3775 uint16x4_t __a = (a); uint16x4_t __b = (b); \ | |
3776 (uint16x4_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 9); }) | |
3777 #define vrsra_n_u32(a, b, __c) __extension__ ({ \ | |
3778 uint32x2_t __a = (a); uint32x2_t __b = (b); \ | |
3779 (uint32x2_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 10); }
) | |
3780 #define vrsra_n_u64(a, b, __c) __extension__ ({ \ | |
3781 uint64x1_t __a = (a); uint64x1_t __b = (b); \ | |
3782 (uint64x1_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 11); }
) | |
3783 #define vrsraq_n_s8(a, b, __c) __extension__ ({ \ | |
3784 int8x16_t __a = (a); int8x16_t __b = (b); \ | |
3785 (int8x16_t)__builtin_neon_vrsraq_n_v(__a, __b, __c, 16); }) | |
3786 #define vrsraq_n_s16(a, b, __c) __extension__ ({ \ | |
3787 int16x8_t __a = (a); int16x8_t __b = (b); \ | |
3788 (int16x8_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 17);
}) | |
3789 #define vrsraq_n_s32(a, b, __c) __extension__ ({ \ | |
3790 int32x4_t __a = (a); int32x4_t __b = (b); \ | |
3791 (int32x4_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 18);
}) | |
3792 #define vrsraq_n_s64(a, b, __c) __extension__ ({ \ | |
3793 int64x2_t __a = (a); int64x2_t __b = (b); \ | |
3794 (int64x2_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 19);
}) | |
3795 #define vrsraq_n_u8(a, b, __c) __extension__ ({ \ | |
3796 uint8x16_t __a = (a); uint8x16_t __b = (b); \ | |
3797 (uint8x16_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 24)
; }) | |
3798 #define vrsraq_n_u16(a, b, __c) __extension__ ({ \ | |
3799 uint16x8_t __a = (a); uint16x8_t __b = (b); \ | |
3800 (uint16x8_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 25)
; }) | |
3801 #define vrsraq_n_u32(a, b, __c) __extension__ ({ \ | |
3802 uint32x4_t __a = (a); uint32x4_t __b = (b); \ | |
3803 (uint32x4_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 26)
; }) | |
3804 #define vrsraq_n_u64(a, b, __c) __extension__ ({ \ | |
3805 uint64x2_t __a = (a); uint64x2_t __b = (b); \ | |
3806 (uint64x2_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 27)
; }) | |
3807 | |
3808 __ai int8x8_t vrsubhn_s16(int16x8_t __a, int16x8_t __b) { \ | |
3809 return (int8x8_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 0);
} | |
3810 __ai int16x4_t vrsubhn_s32(int32x4_t __a, int32x4_t __b) { \ | |
3811 return (int16x4_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 1);
} | |
3812 __ai int32x2_t vrsubhn_s64(int64x2_t __a, int64x2_t __b) { \ | |
3813 return (int32x2_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 2);
} | |
3814 __ai uint8x8_t vrsubhn_u16(uint16x8_t __a, uint16x8_t __b) { \ | |
3815 return (uint8x8_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 8);
} | |
3816 __ai uint16x4_t vrsubhn_u32(uint32x4_t __a, uint32x4_t __b) { \ | |
3817 return (uint16x4_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 9)
; } | |
3818 __ai uint32x2_t vrsubhn_u64(uint64x2_t __a, uint64x2_t __b) { \ | |
3819 return (uint32x2_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 10
); } | |
3820 | |
3821 #define vset_lane_u8(a, b, __c) __extension__ ({ \ | |
3822 uint8_t __a = (a); uint8x8_t __b = (b); \ | |
3823 (uint8x8_t)__builtin_neon_vset_lane_i8(__a, (int8x8_t)__b, __c); }) | |
3824 #define vset_lane_u16(a, b, __c) __extension__ ({ \ | |
3825 uint16_t __a = (a); uint16x4_t __b = (b); \ | |
3826 (uint16x4_t)__builtin_neon_vset_lane_i16(__a, (int16x4_t)__b, __c); }) | |
3827 #define vset_lane_u32(a, b, __c) __extension__ ({ \ | |
3828 uint32_t __a = (a); uint32x2_t __b = (b); \ | |
3829 (uint32x2_t)__builtin_neon_vset_lane_i32(__a, (int32x2_t)__b, __c); }) | |
3830 #define vset_lane_s8(a, b, __c) __extension__ ({ \ | |
3831 int8_t __a = (a); int8x8_t __b = (b); \ | |
3832 (int8x8_t)__builtin_neon_vset_lane_i8(__a, __b, __c); }) | |
3833 #define vset_lane_s16(a, b, __c) __extension__ ({ \ | |
3834 int16_t __a = (a); int16x4_t __b = (b); \ | |
3835 (int16x4_t)__builtin_neon_vset_lane_i16(__a, __b, __c); }) | |
3836 #define vset_lane_s32(a, b, __c) __extension__ ({ \ | |
3837 int32_t __a = (a); int32x2_t __b = (b); \ | |
3838 (int32x2_t)__builtin_neon_vset_lane_i32(__a, __b, __c); }) | |
3839 #define vset_lane_p8(a, b, __c) __extension__ ({ \ | |
3840 poly8_t __a = (a); poly8x8_t __b = (b); \ | |
3841 (poly8x8_t)__builtin_neon_vset_lane_i8(__a, (int8x8_t)__b, __c); }) | |
3842 #define vset_lane_p16(a, b, __c) __extension__ ({ \ | |
3843 poly16_t __a = (a); poly16x4_t __b = (b); \ | |
3844 (poly16x4_t)__builtin_neon_vset_lane_i16(__a, (int16x4_t)__b, __c); }) | |
3845 #define vset_lane_f32(a, b, __c) __extension__ ({ \ | |
3846 float32_t __a = (a); float32x2_t __b = (b); \ | |
3847 (float32x2_t)__builtin_neon_vset_lane_f32(__a, __b, __c); }) | |
3848 #define vsetq_lane_u8(a, b, __c) __extension__ ({ \ | |
3849 uint8_t __a = (a); uint8x16_t __b = (b); \ | |
3850 (uint8x16_t)__builtin_neon_vsetq_lane_i8(__a, (int8x16_t)__b, __c); }) | |
3851 #define vsetq_lane_u16(a, b, __c) __extension__ ({ \ | |
3852 uint16_t __a = (a); uint16x8_t __b = (b); \ | |
3853 (uint16x8_t)__builtin_neon_vsetq_lane_i16(__a, (int16x8_t)__b, __c); }) | |
3854 #define vsetq_lane_u32(a, b, __c) __extension__ ({ \ | |
3855 uint32_t __a = (a); uint32x4_t __b = (b); \ | |
3856 (uint32x4_t)__builtin_neon_vsetq_lane_i32(__a, (int32x4_t)__b, __c); }) | |
3857 #define vsetq_lane_s8(a, b, __c) __extension__ ({ \ | |
3858 int8_t __a = (a); int8x16_t __b = (b); \ | |
3859 (int8x16_t)__builtin_neon_vsetq_lane_i8(__a, __b, __c); }) | |
3860 #define vsetq_lane_s16(a, b, __c) __extension__ ({ \ | |
3861 int16_t __a = (a); int16x8_t __b = (b); \ | |
3862 (int16x8_t)__builtin_neon_vsetq_lane_i16(__a, __b, __c); }) | |
3863 #define vsetq_lane_s32(a, b, __c) __extension__ ({ \ | |
3864 int32_t __a = (a); int32x4_t __b = (b); \ | |
3865 (int32x4_t)__builtin_neon_vsetq_lane_i32(__a, __b, __c); }) | |
3866 #define vsetq_lane_p8(a, b, __c) __extension__ ({ \ | |
3867 poly8_t __a = (a); poly8x16_t __b = (b); \ | |
3868 (poly8x16_t)__builtin_neon_vsetq_lane_i8(__a, (int8x16_t)__b, __c); }) | |
3869 #define vsetq_lane_p16(a, b, __c) __extension__ ({ \ | |
3870 poly16_t __a = (a); poly16x8_t __b = (b); \ | |
3871 (poly16x8_t)__builtin_neon_vsetq_lane_i16(__a, (int16x8_t)__b, __c); }) | |
3872 #define vsetq_lane_f32(a, b, __c) __extension__ ({ \ | |
3873 float32_t __a = (a); float32x4_t __b = (b); \ | |
3874 (float32x4_t)__builtin_neon_vsetq_lane_f32(__a, __b, __c); }) | |
3875 #define vset_lane_s64(a, b, __c) __extension__ ({ \ | |
3876 int64_t __a = (a); int64x1_t __b = (b); \ | |
3877 (int64x1_t)__builtin_neon_vset_lane_i64(__a, __b, __c); }) | |
3878 #define vset_lane_u64(a, b, __c) __extension__ ({ \ | |
3879 uint64_t __a = (a); uint64x1_t __b = (b); \ | |
3880 (uint64x1_t)__builtin_neon_vset_lane_i64(__a, (int64x1_t)__b, __c); }) | |
3881 #define vsetq_lane_s64(a, b, __c) __extension__ ({ \ | |
3882 int64_t __a = (a); int64x2_t __b = (b); \ | |
3883 (int64x2_t)__builtin_neon_vsetq_lane_i64(__a, __b, __c); }) | |
3884 #define vsetq_lane_u64(a, b, __c) __extension__ ({ \ | |
3885 uint64_t __a = (a); uint64x2_t __b = (b); \ | |
3886 (uint64x2_t)__builtin_neon_vsetq_lane_i64(__a, (int64x2_t)__b, __c); }) | |
3887 | |
3888 __ai int8x8_t vshl_s8(int8x8_t __a, int8x8_t __b) { \ | |
3889 return (int8x8_t)__builtin_neon_vshl_v(__a, __b, 0); } | |
3890 __ai int16x4_t vshl_s16(int16x4_t __a, int16x4_t __b) { \ | |
3891 return (int16x4_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 1); } | |
3892 __ai int32x2_t vshl_s32(int32x2_t __a, int32x2_t __b) { \ | |
3893 return (int32x2_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 2); } | |
3894 __ai int64x1_t vshl_s64(int64x1_t __a, int64x1_t __b) { \ | |
3895 return (int64x1_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 3); } | |
3896 __ai uint8x8_t vshl_u8(uint8x8_t __a, int8x8_t __b) { \ | |
3897 return (uint8x8_t)__builtin_neon_vshl_v((int8x8_t)__a, __b, 8); } | |
3898 __ai uint16x4_t vshl_u16(uint16x4_t __a, int16x4_t __b) { \ | |
3899 return (uint16x4_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 9); } | |
3900 __ai uint32x2_t vshl_u32(uint32x2_t __a, int32x2_t __b) { \ | |
3901 return (uint32x2_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 10); } | |
3902 __ai uint64x1_t vshl_u64(uint64x1_t __a, int64x1_t __b) { \ | |
3903 return (uint64x1_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 11); } | |
3904 __ai int8x16_t vshlq_s8(int8x16_t __a, int8x16_t __b) { \ | |
3905 return (int8x16_t)__builtin_neon_vshlq_v(__a, __b, 16); } | |
3906 __ai int16x8_t vshlq_s16(int16x8_t __a, int16x8_t __b) { \ | |
3907 return (int16x8_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 17);
} | |
3908 __ai int32x4_t vshlq_s32(int32x4_t __a, int32x4_t __b) { \ | |
3909 return (int32x4_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 18);
} | |
3910 __ai int64x2_t vshlq_s64(int64x2_t __a, int64x2_t __b) { \ | |
3911 return (int64x2_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 19);
} | |
3912 __ai uint8x16_t vshlq_u8(uint8x16_t __a, int8x16_t __b) { \ | |
3913 return (uint8x16_t)__builtin_neon_vshlq_v((int8x16_t)__a, __b, 24); } | |
3914 __ai uint16x8_t vshlq_u16(uint16x8_t __a, int16x8_t __b) { \ | |
3915 return (uint16x8_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 25);
} | |
3916 __ai uint32x4_t vshlq_u32(uint32x4_t __a, int32x4_t __b) { \ | |
3917 return (uint32x4_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 26);
} | |
3918 __ai uint64x2_t vshlq_u64(uint64x2_t __a, int64x2_t __b) { \ | |
3919 return (uint64x2_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 27);
} | |
3920 | |
3921 #define vshll_n_s8(a, __b) __extension__ ({ \ | |
3922 int8x8_t __a = (a); \ | |
3923 (int16x8_t)__builtin_neon_vshll_n_v(__a, __b, 17); }) | |
3924 #define vshll_n_s16(a, __b) __extension__ ({ \ | |
3925 int16x4_t __a = (a); \ | |
3926 (int32x4_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 18); }) | |
3927 #define vshll_n_s32(a, __b) __extension__ ({ \ | |
3928 int32x2_t __a = (a); \ | |
3929 (int64x2_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 19); }) | |
3930 #define vshll_n_u8(a, __b) __extension__ ({ \ | |
3931 uint8x8_t __a = (a); \ | |
3932 (uint16x8_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 25); }) | |
3933 #define vshll_n_u16(a, __b) __extension__ ({ \ | |
3934 uint16x4_t __a = (a); \ | |
3935 (uint32x4_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 26); }) | |
3936 #define vshll_n_u32(a, __b) __extension__ ({ \ | |
3937 uint32x2_t __a = (a); \ | |
3938 (uint64x2_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 27); }) | |
3939 | |
3940 #define vshl_n_s8(a, __b) __extension__ ({ \ | |
3941 int8x8_t __a = (a); \ | |
3942 (int8x8_t)__builtin_neon_vshl_n_v(__a, __b, 0); }) | |
3943 #define vshl_n_s16(a, __b) __extension__ ({ \ | |
3944 int16x4_t __a = (a); \ | |
3945 (int16x4_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 1); }) | |
3946 #define vshl_n_s32(a, __b) __extension__ ({ \ | |
3947 int32x2_t __a = (a); \ | |
3948 (int32x2_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 2); }) | |
3949 #define vshl_n_s64(a, __b) __extension__ ({ \ | |
3950 int64x1_t __a = (a); \ | |
3951 (int64x1_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 3); }) | |
3952 #define vshl_n_u8(a, __b) __extension__ ({ \ | |
3953 uint8x8_t __a = (a); \ | |
3954 (uint8x8_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 8); }) | |
3955 #define vshl_n_u16(a, __b) __extension__ ({ \ | |
3956 uint16x4_t __a = (a); \ | |
3957 (uint16x4_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 9); }) | |
3958 #define vshl_n_u32(a, __b) __extension__ ({ \ | |
3959 uint32x2_t __a = (a); \ | |
3960 (uint32x2_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 10); }) | |
3961 #define vshl_n_u64(a, __b) __extension__ ({ \ | |
3962 uint64x1_t __a = (a); \ | |
3963 (uint64x1_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 11); }) | |
3964 #define vshlq_n_s8(a, __b) __extension__ ({ \ | |
3965 int8x16_t __a = (a); \ | |
3966 (int8x16_t)__builtin_neon_vshlq_n_v(__a, __b, 16); }) | |
3967 #define vshlq_n_s16(a, __b) __extension__ ({ \ | |
3968 int16x8_t __a = (a); \ | |
3969 (int16x8_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 17); }) | |
3970 #define vshlq_n_s32(a, __b) __extension__ ({ \ | |
3971 int32x4_t __a = (a); \ | |
3972 (int32x4_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 18); }) | |
3973 #define vshlq_n_s64(a, __b) __extension__ ({ \ | |
3974 int64x2_t __a = (a); \ | |
3975 (int64x2_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 19); }) | |
3976 #define vshlq_n_u8(a, __b) __extension__ ({ \ | |
3977 uint8x16_t __a = (a); \ | |
3978 (uint8x16_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 24); }) | |
3979 #define vshlq_n_u16(a, __b) __extension__ ({ \ | |
3980 uint16x8_t __a = (a); \ | |
3981 (uint16x8_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 25); }) | |
3982 #define vshlq_n_u32(a, __b) __extension__ ({ \ | |
3983 uint32x4_t __a = (a); \ | |
3984 (uint32x4_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 26); }) | |
3985 #define vshlq_n_u64(a, __b) __extension__ ({ \ | |
3986 uint64x2_t __a = (a); \ | |
3987 (uint64x2_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 27); }) | |
3988 | |
3989 #define vshrn_n_s16(a, __b) __extension__ ({ \ | |
3990 int16x8_t __a = (a); \ | |
3991 (int8x8_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 0); }) | |
3992 #define vshrn_n_s32(a, __b) __extension__ ({ \ | |
3993 int32x4_t __a = (a); \ | |
3994 (int16x4_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 1); }) | |
3995 #define vshrn_n_s64(a, __b) __extension__ ({ \ | |
3996 int64x2_t __a = (a); \ | |
3997 (int32x2_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 2); }) | |
3998 #define vshrn_n_u16(a, __b) __extension__ ({ \ | |
3999 uint16x8_t __a = (a); \ | |
4000 (uint8x8_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 8); }) | |
4001 #define vshrn_n_u32(a, __b) __extension__ ({ \ | |
4002 uint32x4_t __a = (a); \ | |
4003 (uint16x4_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 9); }) | |
4004 #define vshrn_n_u64(a, __b) __extension__ ({ \ | |
4005 uint64x2_t __a = (a); \ | |
4006 (uint32x2_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 10); }) | |
4007 | |
4008 #define vshr_n_s8(a, __b) __extension__ ({ \ | |
4009 int8x8_t __a = (a); \ | |
4010 (int8x8_t)__builtin_neon_vshr_n_v(__a, __b, 0); }) | |
4011 #define vshr_n_s16(a, __b) __extension__ ({ \ | |
4012 int16x4_t __a = (a); \ | |
4013 (int16x4_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 1); }) | |
4014 #define vshr_n_s32(a, __b) __extension__ ({ \ | |
4015 int32x2_t __a = (a); \ | |
4016 (int32x2_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 2); }) | |
4017 #define vshr_n_s64(a, __b) __extension__ ({ \ | |
4018 int64x1_t __a = (a); \ | |
4019 (int64x1_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 3); }) | |
4020 #define vshr_n_u8(a, __b) __extension__ ({ \ | |
4021 uint8x8_t __a = (a); \ | |
4022 (uint8x8_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 8); }) | |
4023 #define vshr_n_u16(a, __b) __extension__ ({ \ | |
4024 uint16x4_t __a = (a); \ | |
4025 (uint16x4_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 9); }) | |
4026 #define vshr_n_u32(a, __b) __extension__ ({ \ | |
4027 uint32x2_t __a = (a); \ | |
4028 (uint32x2_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 10); }) | |
4029 #define vshr_n_u64(a, __b) __extension__ ({ \ | |
4030 uint64x1_t __a = (a); \ | |
4031 (uint64x1_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 11); }) | |
4032 #define vshrq_n_s8(a, __b) __extension__ ({ \ | |
4033 int8x16_t __a = (a); \ | |
4034 (int8x16_t)__builtin_neon_vshrq_n_v(__a, __b, 16); }) | |
4035 #define vshrq_n_s16(a, __b) __extension__ ({ \ | |
4036 int16x8_t __a = (a); \ | |
4037 (int16x8_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 17); }) | |
4038 #define vshrq_n_s32(a, __b) __extension__ ({ \ | |
4039 int32x4_t __a = (a); \ | |
4040 (int32x4_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 18); }) | |
4041 #define vshrq_n_s64(a, __b) __extension__ ({ \ | |
4042 int64x2_t __a = (a); \ | |
4043 (int64x2_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 19); }) | |
4044 #define vshrq_n_u8(a, __b) __extension__ ({ \ | |
4045 uint8x16_t __a = (a); \ | |
4046 (uint8x16_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 24); }) | |
4047 #define vshrq_n_u16(a, __b) __extension__ ({ \ | |
4048 uint16x8_t __a = (a); \ | |
4049 (uint16x8_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 25); }) | |
4050 #define vshrq_n_u32(a, __b) __extension__ ({ \ | |
4051 uint32x4_t __a = (a); \ | |
4052 (uint32x4_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 26); }) | |
4053 #define vshrq_n_u64(a, __b) __extension__ ({ \ | |
4054 uint64x2_t __a = (a); \ | |
4055 (uint64x2_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 27); }) | |
4056 | |
4057 #define vsli_n_s8(a, b, __c) __extension__ ({ \ | |
4058 int8x8_t __a = (a); int8x8_t __b = (b); \ | |
4059 (int8x8_t)__builtin_neon_vsli_n_v(__a, __b, __c, 0); }) | |
4060 #define vsli_n_s16(a, b, __c) __extension__ ({ \ | |
4061 int16x4_t __a = (a); int16x4_t __b = (b); \ | |
4062 (int16x4_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); }) | |
4063 #define vsli_n_s32(a, b, __c) __extension__ ({ \ | |
4064 int32x2_t __a = (a); int32x2_t __b = (b); \ | |
4065 (int32x2_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); }) | |
4066 #define vsli_n_s64(a, b, __c) __extension__ ({ \ | |
4067 int64x1_t __a = (a); int64x1_t __b = (b); \ | |
4068 (int64x1_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); }) | |
4069 #define vsli_n_u8(a, b, __c) __extension__ ({ \ | |
4070 uint8x8_t __a = (a); uint8x8_t __b = (b); \ | |
4071 (uint8x8_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 8); }) | |
4072 #define vsli_n_u16(a, b, __c) __extension__ ({ \ | |
4073 uint16x4_t __a = (a); uint16x4_t __b = (b); \ | |
4074 (uint16x4_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 9); }) | |
4075 #define vsli_n_u32(a, b, __c) __extension__ ({ \ | |
4076 uint32x2_t __a = (a); uint32x2_t __b = (b); \ | |
4077 (uint32x2_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 10); }) | |
4078 #define vsli_n_u64(a, b, __c) __extension__ ({ \ | |
4079 uint64x1_t __a = (a); uint64x1_t __b = (b); \ | |
4080 (uint64x1_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 11); }) | |
4081 #define vsli_n_p8(a, b, __c) __extension__ ({ \ | |
4082 poly8x8_t __a = (a); poly8x8_t __b = (b); \ | |
4083 (poly8x8_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 5); }) | |
4084 #define vsli_n_p16(a, b, __c) __extension__ ({ \ | |
4085 poly16x4_t __a = (a); poly16x4_t __b = (b); \ | |
4086 (poly16x4_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 6); }) | |
4087 #define vsliq_n_s8(a, b, __c) __extension__ ({ \ | |
4088 int8x16_t __a = (a); int8x16_t __b = (b); \ | |
4089 (int8x16_t)__builtin_neon_vsliq_n_v(__a, __b, __c, 16); }) | |
4090 #define vsliq_n_s16(a, b, __c) __extension__ ({ \ | |
4091 int16x8_t __a = (a); int16x8_t __b = (b); \ | |
4092 (int16x8_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 17);
}) | |
4093 #define vsliq_n_s32(a, b, __c) __extension__ ({ \ | |
4094 int32x4_t __a = (a); int32x4_t __b = (b); \ | |
4095 (int32x4_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 18);
}) | |
4096 #define vsliq_n_s64(a, b, __c) __extension__ ({ \ | |
4097 int64x2_t __a = (a); int64x2_t __b = (b); \ | |
4098 (int64x2_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 19);
}) | |
4099 #define vsliq_n_u8(a, b, __c) __extension__ ({ \ | |
4100 uint8x16_t __a = (a); uint8x16_t __b = (b); \ | |
4101 (uint8x16_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 24);
}) | |
4102 #define vsliq_n_u16(a, b, __c) __extension__ ({ \ | |
4103 uint16x8_t __a = (a); uint16x8_t __b = (b); \ | |
4104 (uint16x8_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 25);
}) | |
4105 #define vsliq_n_u32(a, b, __c) __extension__ ({ \ | |
4106 uint32x4_t __a = (a); uint32x4_t __b = (b); \ | |
4107 (uint32x4_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 26);
}) | |
4108 #define vsliq_n_u64(a, b, __c) __extension__ ({ \ | |
4109 uint64x2_t __a = (a); uint64x2_t __b = (b); \ | |
4110 (uint64x2_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 27);
}) | |
4111 #define vsliq_n_p8(a, b, __c) __extension__ ({ \ | |
4112 poly8x16_t __a = (a); poly8x16_t __b = (b); \ | |
4113 (poly8x16_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 21);
}) | |
4114 #define vsliq_n_p16(a, b, __c) __extension__ ({ \ | |
4115 poly16x8_t __a = (a); poly16x8_t __b = (b); \ | |
4116 (poly16x8_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 22);
}) | |
4117 | |
4118 #define vsra_n_s8(a, b, __c) __extension__ ({ \ | |
4119 int8x8_t __a = (a); int8x8_t __b = (b); \ | |
4120 (int8x8_t)__builtin_neon_vsra_n_v(__a, __b, __c, 0); }) | |
4121 #define vsra_n_s16(a, b, __c) __extension__ ({ \ | |
4122 int16x4_t __a = (a); int16x4_t __b = (b); \ | |
4123 (int16x4_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); }) | |
4124 #define vsra_n_s32(a, b, __c) __extension__ ({ \ | |
4125 int32x2_t __a = (a); int32x2_t __b = (b); \ | |
4126 (int32x2_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); }) | |
4127 #define vsra_n_s64(a, b, __c) __extension__ ({ \ | |
4128 int64x1_t __a = (a); int64x1_t __b = (b); \ | |
4129 (int64x1_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); }) | |
4130 #define vsra_n_u8(a, b, __c) __extension__ ({ \ | |
4131 uint8x8_t __a = (a); uint8x8_t __b = (b); \ | |
4132 (uint8x8_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 8); }) | |
4133 #define vsra_n_u16(a, b, __c) __extension__ ({ \ | |
4134 uint16x4_t __a = (a); uint16x4_t __b = (b); \ | |
4135 (uint16x4_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 9); }) | |
4136 #define vsra_n_u32(a, b, __c) __extension__ ({ \ | |
4137 uint32x2_t __a = (a); uint32x2_t __b = (b); \ | |
4138 (uint32x2_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 10); }) | |
4139 #define vsra_n_u64(a, b, __c) __extension__ ({ \ | |
4140 uint64x1_t __a = (a); uint64x1_t __b = (b); \ | |
4141 (uint64x1_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 11); }) | |
4142 #define vsraq_n_s8(a, b, __c) __extension__ ({ \ | |
4143 int8x16_t __a = (a); int8x16_t __b = (b); \ | |
4144 (int8x16_t)__builtin_neon_vsraq_n_v(__a, __b, __c, 16); }) | |
4145 #define vsraq_n_s16(a, b, __c) __extension__ ({ \ | |
4146 int16x8_t __a = (a); int16x8_t __b = (b); \ | |
4147 (int16x8_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 17);
}) | |
4148 #define vsraq_n_s32(a, b, __c) __extension__ ({ \ | |
4149 int32x4_t __a = (a); int32x4_t __b = (b); \ | |
4150 (int32x4_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 18);
}) | |
4151 #define vsraq_n_s64(a, b, __c) __extension__ ({ \ | |
4152 int64x2_t __a = (a); int64x2_t __b = (b); \ | |
4153 (int64x2_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 19);
}) | |
4154 #define vsraq_n_u8(a, b, __c) __extension__ ({ \ | |
4155 uint8x16_t __a = (a); uint8x16_t __b = (b); \ | |
4156 (uint8x16_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 24);
}) | |
4157 #define vsraq_n_u16(a, b, __c) __extension__ ({ \ | |
4158 uint16x8_t __a = (a); uint16x8_t __b = (b); \ | |
4159 (uint16x8_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 25);
}) | |
4160 #define vsraq_n_u32(a, b, __c) __extension__ ({ \ | |
4161 uint32x4_t __a = (a); uint32x4_t __b = (b); \ | |
4162 (uint32x4_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 26);
}) | |
4163 #define vsraq_n_u64(a, b, __c) __extension__ ({ \ | |
4164 uint64x2_t __a = (a); uint64x2_t __b = (b); \ | |
4165 (uint64x2_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 27);
}) | |
4166 | |
4167 #define vsri_n_s8(a, b, __c) __extension__ ({ \ | |
4168 int8x8_t __a = (a); int8x8_t __b = (b); \ | |
4169 (int8x8_t)__builtin_neon_vsri_n_v(__a, __b, __c, 0); }) | |
4170 #define vsri_n_s16(a, b, __c) __extension__ ({ \ | |
4171 int16x4_t __a = (a); int16x4_t __b = (b); \ | |
4172 (int16x4_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); }) | |
4173 #define vsri_n_s32(a, b, __c) __extension__ ({ \ | |
4174 int32x2_t __a = (a); int32x2_t __b = (b); \ | |
4175 (int32x2_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); }) | |
4176 #define vsri_n_s64(a, b, __c) __extension__ ({ \ | |
4177 int64x1_t __a = (a); int64x1_t __b = (b); \ | |
4178 (int64x1_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); }) | |
4179 #define vsri_n_u8(a, b, __c) __extension__ ({ \ | |
4180 uint8x8_t __a = (a); uint8x8_t __b = (b); \ | |
4181 (uint8x8_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 8); }) | |
4182 #define vsri_n_u16(a, b, __c) __extension__ ({ \ | |
4183 uint16x4_t __a = (a); uint16x4_t __b = (b); \ | |
4184 (uint16x4_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 9); }) | |
4185 #define vsri_n_u32(a, b, __c) __extension__ ({ \ | |
4186 uint32x2_t __a = (a); uint32x2_t __b = (b); \ | |
4187 (uint32x2_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 10); }) | |
4188 #define vsri_n_u64(a, b, __c) __extension__ ({ \ | |
4189 uint64x1_t __a = (a); uint64x1_t __b = (b); \ | |
4190 (uint64x1_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 11); }) | |
4191 #define vsri_n_p8(a, b, __c) __extension__ ({ \ | |
4192 poly8x8_t __a = (a); poly8x8_t __b = (b); \ | |
4193 (poly8x8_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 5); }) | |
4194 #define vsri_n_p16(a, b, __c) __extension__ ({ \ | |
4195 poly16x4_t __a = (a); poly16x4_t __b = (b); \ | |
4196 (poly16x4_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 6); }) | |
4197 #define vsriq_n_s8(a, b, __c) __extension__ ({ \ | |
4198 int8x16_t __a = (a); int8x16_t __b = (b); \ | |
4199 (int8x16_t)__builtin_neon_vsriq_n_v(__a, __b, __c, 16); }) | |
4200 #define vsriq_n_s16(a, b, __c) __extension__ ({ \ | |
4201 int16x8_t __a = (a); int16x8_t __b = (b); \ | |
4202 (int16x8_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 17);
}) | |
4203 #define vsriq_n_s32(a, b, __c) __extension__ ({ \ | |
4204 int32x4_t __a = (a); int32x4_t __b = (b); \ | |
4205 (int32x4_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 18);
}) | |
4206 #define vsriq_n_s64(a, b, __c) __extension__ ({ \ | |
4207 int64x2_t __a = (a); int64x2_t __b = (b); \ | |
4208 (int64x2_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 19);
}) | |
4209 #define vsriq_n_u8(a, b, __c) __extension__ ({ \ | |
4210 uint8x16_t __a = (a); uint8x16_t __b = (b); \ | |
4211 (uint8x16_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 24);
}) | |
4212 #define vsriq_n_u16(a, b, __c) __extension__ ({ \ | |
4213 uint16x8_t __a = (a); uint16x8_t __b = (b); \ | |
4214 (uint16x8_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 25);
}) | |
4215 #define vsriq_n_u32(a, b, __c) __extension__ ({ \ | |
4216 uint32x4_t __a = (a); uint32x4_t __b = (b); \ | |
4217 (uint32x4_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 26);
}) | |
4218 #define vsriq_n_u64(a, b, __c) __extension__ ({ \ | |
4219 uint64x2_t __a = (a); uint64x2_t __b = (b); \ | |
4220 (uint64x2_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 27);
}) | |
4221 #define vsriq_n_p8(a, b, __c) __extension__ ({ \ | |
4222 poly8x16_t __a = (a); poly8x16_t __b = (b); \ | |
4223 (poly8x16_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 21);
}) | |
4224 #define vsriq_n_p16(a, b, __c) __extension__ ({ \ | |
4225 poly16x8_t __a = (a); poly16x8_t __b = (b); \ | |
4226 (poly16x8_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 22);
}) | |
4227 | |
4228 #define vst1q_u8(__a, b) __extension__ ({ \ | |
4229 uint8x16_t __b = (b); \ | |
4230 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 24); }) | |
4231 #define vst1q_u16(__a, b) __extension__ ({ \ | |
4232 uint16x8_t __b = (b); \ | |
4233 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 25); }) | |
4234 #define vst1q_u32(__a, b) __extension__ ({ \ | |
4235 uint32x4_t __b = (b); \ | |
4236 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 26); }) | |
4237 #define vst1q_u64(__a, b) __extension__ ({ \ | |
4238 uint64x2_t __b = (b); \ | |
4239 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 27); }) | |
4240 #define vst1q_s8(__a, b) __extension__ ({ \ | |
4241 int8x16_t __b = (b); \ | |
4242 __builtin_neon_vst1q_v(__a, __b, 16); }) | |
4243 #define vst1q_s16(__a, b) __extension__ ({ \ | |
4244 int16x8_t __b = (b); \ | |
4245 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 17); }) | |
4246 #define vst1q_s32(__a, b) __extension__ ({ \ | |
4247 int32x4_t __b = (b); \ | |
4248 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 18); }) | |
4249 #define vst1q_s64(__a, b) __extension__ ({ \ | |
4250 int64x2_t __b = (b); \ | |
4251 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 19); }) | |
4252 #define vst1q_f16(__a, b) __extension__ ({ \ | |
4253 float16x8_t __b = (b); \ | |
4254 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 23); }) | |
4255 #define vst1q_f32(__a, b) __extension__ ({ \ | |
4256 float32x4_t __b = (b); \ | |
4257 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 20); }) | |
4258 #define vst1q_p8(__a, b) __extension__ ({ \ | |
4259 poly8x16_t __b = (b); \ | |
4260 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 21); }) | |
4261 #define vst1q_p16(__a, b) __extension__ ({ \ | |
4262 poly16x8_t __b = (b); \ | |
4263 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 22); }) | |
4264 #define vst1_u8(__a, b) __extension__ ({ \ | |
4265 uint8x8_t __b = (b); \ | |
4266 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 8); }) | |
4267 #define vst1_u16(__a, b) __extension__ ({ \ | |
4268 uint16x4_t __b = (b); \ | |
4269 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 9); }) | |
4270 #define vst1_u32(__a, b) __extension__ ({ \ | |
4271 uint32x2_t __b = (b); \ | |
4272 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 10); }) | |
4273 #define vst1_u64(__a, b) __extension__ ({ \ | |
4274 uint64x1_t __b = (b); \ | |
4275 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 11); }) | |
4276 #define vst1_s8(__a, b) __extension__ ({ \ | |
4277 int8x8_t __b = (b); \ | |
4278 __builtin_neon_vst1_v(__a, __b, 0); }) | |
4279 #define vst1_s16(__a, b) __extension__ ({ \ | |
4280 int16x4_t __b = (b); \ | |
4281 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 1); }) | |
4282 #define vst1_s32(__a, b) __extension__ ({ \ | |
4283 int32x2_t __b = (b); \ | |
4284 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 2); }) | |
4285 #define vst1_s64(__a, b) __extension__ ({ \ | |
4286 int64x1_t __b = (b); \ | |
4287 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 3); }) | |
4288 #define vst1_f16(__a, b) __extension__ ({ \ | |
4289 float16x4_t __b = (b); \ | |
4290 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 7); }) | |
4291 #define vst1_f32(__a, b) __extension__ ({ \ | |
4292 float32x2_t __b = (b); \ | |
4293 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 4); }) | |
4294 #define vst1_p8(__a, b) __extension__ ({ \ | |
4295 poly8x8_t __b = (b); \ | |
4296 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 5); }) | |
4297 #define vst1_p16(__a, b) __extension__ ({ \ | |
4298 poly16x4_t __b = (b); \ | |
4299 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 6); }) | |
4300 | |
4301 #define vst1q_lane_u8(__a, b, __c) __extension__ ({ \ | |
4302 uint8x16_t __b = (b); \ | |
4303 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 24); }) | |
4304 #define vst1q_lane_u16(__a, b, __c) __extension__ ({ \ | |
4305 uint16x8_t __b = (b); \ | |
4306 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 25); }) | |
4307 #define vst1q_lane_u32(__a, b, __c) __extension__ ({ \ | |
4308 uint32x4_t __b = (b); \ | |
4309 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 26); }) | |
4310 #define vst1q_lane_u64(__a, b, __c) __extension__ ({ \ | |
4311 uint64x2_t __b = (b); \ | |
4312 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 27); }) | |
4313 #define vst1q_lane_s8(__a, b, __c) __extension__ ({ \ | |
4314 int8x16_t __b = (b); \ | |
4315 __builtin_neon_vst1q_lane_v(__a, __b, __c, 16); }) | |
4316 #define vst1q_lane_s16(__a, b, __c) __extension__ ({ \ | |
4317 int16x8_t __b = (b); \ | |
4318 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 17); }) | |
4319 #define vst1q_lane_s32(__a, b, __c) __extension__ ({ \ | |
4320 int32x4_t __b = (b); \ | |
4321 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 18); }) | |
4322 #define vst1q_lane_s64(__a, b, __c) __extension__ ({ \ | |
4323 int64x2_t __b = (b); \ | |
4324 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 19); }) | |
4325 #define vst1q_lane_f16(__a, b, __c) __extension__ ({ \ | |
4326 float16x8_t __b = (b); \ | |
4327 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 23); }) | |
4328 #define vst1q_lane_f32(__a, b, __c) __extension__ ({ \ | |
4329 float32x4_t __b = (b); \ | |
4330 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 20); }) | |
4331 #define vst1q_lane_p8(__a, b, __c) __extension__ ({ \ | |
4332 poly8x16_t __b = (b); \ | |
4333 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 21); }) | |
4334 #define vst1q_lane_p16(__a, b, __c) __extension__ ({ \ | |
4335 poly16x8_t __b = (b); \ | |
4336 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 22); }) | |
4337 #define vst1_lane_u8(__a, b, __c) __extension__ ({ \ | |
4338 uint8x8_t __b = (b); \ | |
4339 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 8); }) | |
4340 #define vst1_lane_u16(__a, b, __c) __extension__ ({ \ | |
4341 uint16x4_t __b = (b); \ | |
4342 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 9); }) | |
4343 #define vst1_lane_u32(__a, b, __c) __extension__ ({ \ | |
4344 uint32x2_t __b = (b); \ | |
4345 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 10); }) | |
4346 #define vst1_lane_u64(__a, b, __c) __extension__ ({ \ | |
4347 uint64x1_t __b = (b); \ | |
4348 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 11); }) | |
4349 #define vst1_lane_s8(__a, b, __c) __extension__ ({ \ | |
4350 int8x8_t __b = (b); \ | |
4351 __builtin_neon_vst1_lane_v(__a, __b, __c, 0); }) | |
4352 #define vst1_lane_s16(__a, b, __c) __extension__ ({ \ | |
4353 int16x4_t __b = (b); \ | |
4354 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 1); }) | |
4355 #define vst1_lane_s32(__a, b, __c) __extension__ ({ \ | |
4356 int32x2_t __b = (b); \ | |
4357 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 2); }) | |
4358 #define vst1_lane_s64(__a, b, __c) __extension__ ({ \ | |
4359 int64x1_t __b = (b); \ | |
4360 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 3); }) | |
4361 #define vst1_lane_f16(__a, b, __c) __extension__ ({ \ | |
4362 float16x4_t __b = (b); \ | |
4363 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 7); }) | |
4364 #define vst1_lane_f32(__a, b, __c) __extension__ ({ \ | |
4365 float32x2_t __b = (b); \ | |
4366 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 4); }) | |
4367 #define vst1_lane_p8(__a, b, __c) __extension__ ({ \ | |
4368 poly8x8_t __b = (b); \ | |
4369 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 5); }) | |
4370 #define vst1_lane_p16(__a, b, __c) __extension__ ({ \ | |
4371 poly16x4_t __b = (b); \ | |
4372 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 6); }) | |
4373 | |
4374 #define vst2q_u8(__a, b) __extension__ ({ \ | |
4375 uint8x16x2_t __b = (b); \ | |
4376 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 24);
}) | |
4377 #define vst2q_u16(__a, b) __extension__ ({ \ | |
4378 uint16x8x2_t __b = (b); \ | |
4379 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 25);
}) | |
4380 #define vst2q_u32(__a, b) __extension__ ({ \ | |
4381 uint32x4x2_t __b = (b); \ | |
4382 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 26);
}) | |
4383 #define vst2q_s8(__a, b) __extension__ ({ \ | |
4384 int8x16x2_t __b = (b); \ | |
4385 __builtin_neon_vst2q_v(__a, __b.val[0], __b.val[1], 16); }) | |
4386 #define vst2q_s16(__a, b) __extension__ ({ \ | |
4387 int16x8x2_t __b = (b); \ | |
4388 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 17);
}) | |
4389 #define vst2q_s32(__a, b) __extension__ ({ \ | |
4390 int32x4x2_t __b = (b); \ | |
4391 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 18);
}) | |
4392 #define vst2q_f16(__a, b) __extension__ ({ \ | |
4393 float16x8x2_t __b = (b); \ | |
4394 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 23);
}) | |
4395 #define vst2q_f32(__a, b) __extension__ ({ \ | |
4396 float32x4x2_t __b = (b); \ | |
4397 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 20);
}) | |
4398 #define vst2q_p8(__a, b) __extension__ ({ \ | |
4399 poly8x16x2_t __b = (b); \ | |
4400 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 21);
}) | |
4401 #define vst2q_p16(__a, b) __extension__ ({ \ | |
4402 poly16x8x2_t __b = (b); \ | |
4403 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 22);
}) | |
4404 #define vst2_u8(__a, b) __extension__ ({ \ | |
4405 uint8x8x2_t __b = (b); \ | |
4406 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 8); }) | |
4407 #define vst2_u16(__a, b) __extension__ ({ \ | |
4408 uint16x4x2_t __b = (b); \ | |
4409 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 9); }) | |
4410 #define vst2_u32(__a, b) __extension__ ({ \ | |
4411 uint32x2x2_t __b = (b); \ | |
4412 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 10); }) | |
4413 #define vst2_u64(__a, b) __extension__ ({ \ | |
4414 uint64x1x2_t __b = (b); \ | |
4415 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 11); }) | |
4416 #define vst2_s8(__a, b) __extension__ ({ \ | |
4417 int8x8x2_t __b = (b); \ | |
4418 __builtin_neon_vst2_v(__a, __b.val[0], __b.val[1], 0); }) | |
4419 #define vst2_s16(__a, b) __extension__ ({ \ | |
4420 int16x4x2_t __b = (b); \ | |
4421 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 1); }) | |
4422 #define vst2_s32(__a, b) __extension__ ({ \ | |
4423 int32x2x2_t __b = (b); \ | |
4424 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 2); }) | |
4425 #define vst2_s64(__a, b) __extension__ ({ \ | |
4426 int64x1x2_t __b = (b); \ | |
4427 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 3); }) | |
4428 #define vst2_f16(__a, b) __extension__ ({ \ | |
4429 float16x4x2_t __b = (b); \ | |
4430 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 7); }) | |
4431 #define vst2_f32(__a, b) __extension__ ({ \ | |
4432 float32x2x2_t __b = (b); \ | |
4433 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 4); }) | |
4434 #define vst2_p8(__a, b) __extension__ ({ \ | |
4435 poly8x8x2_t __b = (b); \ | |
4436 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 5); }) | |
4437 #define vst2_p16(__a, b) __extension__ ({ \ | |
4438 poly16x4x2_t __b = (b); \ | |
4439 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 6); }) | |
4440 | |
4441 #define vst2q_lane_u16(__a, b, __c) __extension__ ({ \ | |
4442 uint16x8x2_t __b = (b); \ | |
4443 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1],
__c, 25); }) | |
4444 #define vst2q_lane_u32(__a, b, __c) __extension__ ({ \ | |
4445 uint32x4x2_t __b = (b); \ | |
4446 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1],
__c, 26); }) | |
4447 #define vst2q_lane_s16(__a, b, __c) __extension__ ({ \ | |
4448 int16x8x2_t __b = (b); \ | |
4449 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1],
__c, 17); }) | |
4450 #define vst2q_lane_s32(__a, b, __c) __extension__ ({ \ | |
4451 int32x4x2_t __b = (b); \ | |
4452 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1],
__c, 18); }) | |
4453 #define vst2q_lane_f16(__a, b, __c) __extension__ ({ \ | |
4454 float16x8x2_t __b = (b); \ | |
4455 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1],
__c, 23); }) | |
4456 #define vst2q_lane_f32(__a, b, __c) __extension__ ({ \ | |
4457 float32x4x2_t __b = (b); \ | |
4458 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1],
__c, 20); }) | |
4459 #define vst2q_lane_p16(__a, b, __c) __extension__ ({ \ | |
4460 poly16x8x2_t __b = (b); \ | |
4461 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1],
__c, 22); }) | |
4462 #define vst2_lane_u8(__a, b, __c) __extension__ ({ \ | |
4463 uint8x8x2_t __b = (b); \ | |
4464 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __
c, 8); }) | |
4465 #define vst2_lane_u16(__a, b, __c) __extension__ ({ \ | |
4466 uint16x4x2_t __b = (b); \ | |
4467 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __
c, 9); }) | |
4468 #define vst2_lane_u32(__a, b, __c) __extension__ ({ \ | |
4469 uint32x2x2_t __b = (b); \ | |
4470 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __
c, 10); }) | |
4471 #define vst2_lane_s8(__a, b, __c) __extension__ ({ \ | |
4472 int8x8x2_t __b = (b); \ | |
4473 __builtin_neon_vst2_lane_v(__a, __b.val[0], __b.val[1], __c, 0); }) | |
4474 #define vst2_lane_s16(__a, b, __c) __extension__ ({ \ | |
4475 int16x4x2_t __b = (b); \ | |
4476 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __
c, 1); }) | |
4477 #define vst2_lane_s32(__a, b, __c) __extension__ ({ \ | |
4478 int32x2x2_t __b = (b); \ | |
4479 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __
c, 2); }) | |
4480 #define vst2_lane_f16(__a, b, __c) __extension__ ({ \ | |
4481 float16x4x2_t __b = (b); \ | |
4482 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __
c, 7); }) | |
4483 #define vst2_lane_f32(__a, b, __c) __extension__ ({ \ | |
4484 float32x2x2_t __b = (b); \ | |
4485 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __
c, 4); }) | |
4486 #define vst2_lane_p8(__a, b, __c) __extension__ ({ \ | |
4487 poly8x8x2_t __b = (b); \ | |
4488 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __
c, 5); }) | |
4489 #define vst2_lane_p16(__a, b, __c) __extension__ ({ \ | |
4490 poly16x4x2_t __b = (b); \ | |
4491 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __
c, 6); }) | |
4492 | |
4493 #define vst3q_u8(__a, b) __extension__ ({ \ | |
4494 uint8x16x3_t __b = (b); \ | |
4495 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int
8x16_t)__b.val[2], 24); }) | |
4496 #define vst3q_u16(__a, b) __extension__ ({ \ | |
4497 uint16x8x3_t __b = (b); \ | |
4498 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int
8x16_t)__b.val[2], 25); }) | |
4499 #define vst3q_u32(__a, b) __extension__ ({ \ | |
4500 uint32x4x3_t __b = (b); \ | |
4501 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int
8x16_t)__b.val[2], 26); }) | |
4502 #define vst3q_s8(__a, b) __extension__ ({ \ | |
4503 int8x16x3_t __b = (b); \ | |
4504 __builtin_neon_vst3q_v(__a, __b.val[0], __b.val[1], __b.val[2], 16); }) | |
4505 #define vst3q_s16(__a, b) __extension__ ({ \ | |
4506 int16x8x3_t __b = (b); \ | |
4507 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int
8x16_t)__b.val[2], 17); }) | |
4508 #define vst3q_s32(__a, b) __extension__ ({ \ | |
4509 int32x4x3_t __b = (b); \ | |
4510 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int
8x16_t)__b.val[2], 18); }) | |
4511 #define vst3q_f16(__a, b) __extension__ ({ \ | |
4512 float16x8x3_t __b = (b); \ | |
4513 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int
8x16_t)__b.val[2], 23); }) | |
4514 #define vst3q_f32(__a, b) __extension__ ({ \ | |
4515 float32x4x3_t __b = (b); \ | |
4516 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int
8x16_t)__b.val[2], 20); }) | |
4517 #define vst3q_p8(__a, b) __extension__ ({ \ | |
4518 poly8x16x3_t __b = (b); \ | |
4519 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int
8x16_t)__b.val[2], 21); }) | |
4520 #define vst3q_p16(__a, b) __extension__ ({ \ | |
4521 poly16x8x3_t __b = (b); \ | |
4522 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int
8x16_t)__b.val[2], 22); }) | |
4523 #define vst3_u8(__a, b) __extension__ ({ \ | |
4524 uint8x8x3_t __b = (b); \ | |
4525 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8
_t)__b.val[2], 8); }) | |
4526 #define vst3_u16(__a, b) __extension__ ({ \ | |
4527 uint16x4x3_t __b = (b); \ | |
4528 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8
_t)__b.val[2], 9); }) | |
4529 #define vst3_u32(__a, b) __extension__ ({ \ | |
4530 uint32x2x3_t __b = (b); \ | |
4531 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8
_t)__b.val[2], 10); }) | |
4532 #define vst3_u64(__a, b) __extension__ ({ \ | |
4533 uint64x1x3_t __b = (b); \ | |
4534 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8
_t)__b.val[2], 11); }) | |
4535 #define vst3_s8(__a, b) __extension__ ({ \ | |
4536 int8x8x3_t __b = (b); \ | |
4537 __builtin_neon_vst3_v(__a, __b.val[0], __b.val[1], __b.val[2], 0); }) | |
4538 #define vst3_s16(__a, b) __extension__ ({ \ | |
4539 int16x4x3_t __b = (b); \ | |
4540 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8
_t)__b.val[2], 1); }) | |
4541 #define vst3_s32(__a, b) __extension__ ({ \ | |
4542 int32x2x3_t __b = (b); \ | |
4543 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8
_t)__b.val[2], 2); }) | |
4544 #define vst3_s64(__a, b) __extension__ ({ \ | |
4545 int64x1x3_t __b = (b); \ | |
4546 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8
_t)__b.val[2], 3); }) | |
4547 #define vst3_f16(__a, b) __extension__ ({ \ | |
4548 float16x4x3_t __b = (b); \ | |
4549 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8
_t)__b.val[2], 7); }) | |
4550 #define vst3_f32(__a, b) __extension__ ({ \ | |
4551 float32x2x3_t __b = (b); \ | |
4552 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8
_t)__b.val[2], 4); }) | |
4553 #define vst3_p8(__a, b) __extension__ ({ \ | |
4554 poly8x8x3_t __b = (b); \ | |
4555 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8
_t)__b.val[2], 5); }) | |
4556 #define vst3_p16(__a, b) __extension__ ({ \ | |
4557 poly16x4x3_t __b = (b); \ | |
4558 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8
_t)__b.val[2], 6); }) | |
4559 | |
4560 #define vst3q_lane_u16(__a, b, __c) __extension__ ({ \ | |
4561 uint16x8x3_t __b = (b); \ | |
4562 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1],
(int8x16_t)__b.val[2], __c, 25); }) | |
4563 #define vst3q_lane_u32(__a, b, __c) __extension__ ({ \ | |
4564 uint32x4x3_t __b = (b); \ | |
4565 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1],
(int8x16_t)__b.val[2], __c, 26); }) | |
4566 #define vst3q_lane_s16(__a, b, __c) __extension__ ({ \ | |
4567 int16x8x3_t __b = (b); \ | |
4568 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1],
(int8x16_t)__b.val[2], __c, 17); }) | |
4569 #define vst3q_lane_s32(__a, b, __c) __extension__ ({ \ | |
4570 int32x4x3_t __b = (b); \ | |
4571 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1],
(int8x16_t)__b.val[2], __c, 18); }) | |
4572 #define vst3q_lane_f16(__a, b, __c) __extension__ ({ \ | |
4573 float16x8x3_t __b = (b); \ | |
4574 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1],
(int8x16_t)__b.val[2], __c, 23); }) | |
4575 #define vst3q_lane_f32(__a, b, __c) __extension__ ({ \ | |
4576 float32x4x3_t __b = (b); \ | |
4577 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1],
(int8x16_t)__b.val[2], __c, 20); }) | |
4578 #define vst3q_lane_p16(__a, b, __c) __extension__ ({ \ | |
4579 poly16x8x3_t __b = (b); \ | |
4580 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1],
(int8x16_t)__b.val[2], __c, 22); }) | |
4581 #define vst3_lane_u8(__a, b, __c) __extension__ ({ \ | |
4582 uint8x8x3_t __b = (b); \ | |
4583 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i
nt8x8_t)__b.val[2], __c, 8); }) | |
4584 #define vst3_lane_u16(__a, b, __c) __extension__ ({ \ | |
4585 uint16x4x3_t __b = (b); \ | |
4586 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i
nt8x8_t)__b.val[2], __c, 9); }) | |
4587 #define vst3_lane_u32(__a, b, __c) __extension__ ({ \ | |
4588 uint32x2x3_t __b = (b); \ | |
4589 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i
nt8x8_t)__b.val[2], __c, 10); }) | |
4590 #define vst3_lane_s8(__a, b, __c) __extension__ ({ \ | |
4591 int8x8x3_t __b = (b); \ | |
4592 __builtin_neon_vst3_lane_v(__a, __b.val[0], __b.val[1], __b.val[2], __c, 0); }
) | |
4593 #define vst3_lane_s16(__a, b, __c) __extension__ ({ \ | |
4594 int16x4x3_t __b = (b); \ | |
4595 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i
nt8x8_t)__b.val[2], __c, 1); }) | |
4596 #define vst3_lane_s32(__a, b, __c) __extension__ ({ \ | |
4597 int32x2x3_t __b = (b); \ | |
4598 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i
nt8x8_t)__b.val[2], __c, 2); }) | |
4599 #define vst3_lane_f16(__a, b, __c) __extension__ ({ \ | |
4600 float16x4x3_t __b = (b); \ | |
4601 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i
nt8x8_t)__b.val[2], __c, 7); }) | |
4602 #define vst3_lane_f32(__a, b, __c) __extension__ ({ \ | |
4603 float32x2x3_t __b = (b); \ | |
4604 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i
nt8x8_t)__b.val[2], __c, 4); }) | |
4605 #define vst3_lane_p8(__a, b, __c) __extension__ ({ \ | |
4606 poly8x8x3_t __b = (b); \ | |
4607 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i
nt8x8_t)__b.val[2], __c, 5); }) | |
4608 #define vst3_lane_p16(__a, b, __c) __extension__ ({ \ | |
4609 poly16x4x3_t __b = (b); \ | |
4610 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i
nt8x8_t)__b.val[2], __c, 6); }) | |
4611 | |
4612 #define vst4q_u8(__a, b) __extension__ ({ \ | |
4613 uint8x16x4_t __b = (b); \ | |
4614 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int
8x16_t)__b.val[2], (int8x16_t)__b.val[3], 24); }) | |
4615 #define vst4q_u16(__a, b) __extension__ ({ \ | |
4616 uint16x8x4_t __b = (b); \ | |
4617 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int
8x16_t)__b.val[2], (int8x16_t)__b.val[3], 25); }) | |
4618 #define vst4q_u32(__a, b) __extension__ ({ \ | |
4619 uint32x4x4_t __b = (b); \ | |
4620 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int
8x16_t)__b.val[2], (int8x16_t)__b.val[3], 26); }) | |
4621 #define vst4q_s8(__a, b) __extension__ ({ \ | |
4622 int8x16x4_t __b = (b); \ | |
4623 __builtin_neon_vst4q_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], 16
); }) | |
4624 #define vst4q_s16(__a, b) __extension__ ({ \ | |
4625 int16x8x4_t __b = (b); \ | |
4626 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int
8x16_t)__b.val[2], (int8x16_t)__b.val[3], 17); }) | |
4627 #define vst4q_s32(__a, b) __extension__ ({ \ | |
4628 int32x4x4_t __b = (b); \ | |
4629 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int
8x16_t)__b.val[2], (int8x16_t)__b.val[3], 18); }) | |
4630 #define vst4q_f16(__a, b) __extension__ ({ \ | |
4631 float16x8x4_t __b = (b); \ | |
4632 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int
8x16_t)__b.val[2], (int8x16_t)__b.val[3], 23); }) | |
4633 #define vst4q_f32(__a, b) __extension__ ({ \ | |
4634 float32x4x4_t __b = (b); \ | |
4635 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int
8x16_t)__b.val[2], (int8x16_t)__b.val[3], 20); }) | |
4636 #define vst4q_p8(__a, b) __extension__ ({ \ | |
4637 poly8x16x4_t __b = (b); \ | |
4638 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int
8x16_t)__b.val[2], (int8x16_t)__b.val[3], 21); }) | |
4639 #define vst4q_p16(__a, b) __extension__ ({ \ | |
4640 poly16x8x4_t __b = (b); \ | |
4641 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int
8x16_t)__b.val[2], (int8x16_t)__b.val[3], 22); }) | |
4642 #define vst4_u8(__a, b) __extension__ ({ \ | |
4643 uint8x8x4_t __b = (b); \ | |
4644 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8
_t)__b.val[2], (int8x8_t)__b.val[3], 8); }) | |
4645 #define vst4_u16(__a, b) __extension__ ({ \ | |
4646 uint16x4x4_t __b = (b); \ | |
4647 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8
_t)__b.val[2], (int8x8_t)__b.val[3], 9); }) | |
4648 #define vst4_u32(__a, b) __extension__ ({ \ | |
4649 uint32x2x4_t __b = (b); \ | |
4650 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8
_t)__b.val[2], (int8x8_t)__b.val[3], 10); }) | |
4651 #define vst4_u64(__a, b) __extension__ ({ \ | |
4652 uint64x1x4_t __b = (b); \ | |
4653 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8
_t)__b.val[2], (int8x8_t)__b.val[3], 11); }) | |
4654 #define vst4_s8(__a, b) __extension__ ({ \ | |
4655 int8x8x4_t __b = (b); \ | |
4656 __builtin_neon_vst4_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], 0);
}) | |
4657 #define vst4_s16(__a, b) __extension__ ({ \ | |
4658 int16x4x4_t __b = (b); \ | |
4659 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8
_t)__b.val[2], (int8x8_t)__b.val[3], 1); }) | |
4660 #define vst4_s32(__a, b) __extension__ ({ \ | |
4661 int32x2x4_t __b = (b); \ | |
4662 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8
_t)__b.val[2], (int8x8_t)__b.val[3], 2); }) | |
4663 #define vst4_s64(__a, b) __extension__ ({ \ | |
4664 int64x1x4_t __b = (b); \ | |
4665 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8
_t)__b.val[2], (int8x8_t)__b.val[3], 3); }) | |
4666 #define vst4_f16(__a, b) __extension__ ({ \ | |
4667 float16x4x4_t __b = (b); \ | |
4668 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8
_t)__b.val[2], (int8x8_t)__b.val[3], 7); }) | |
4669 #define vst4_f32(__a, b) __extension__ ({ \ | |
4670 float32x2x4_t __b = (b); \ | |
4671 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8
_t)__b.val[2], (int8x8_t)__b.val[3], 4); }) | |
4672 #define vst4_p8(__a, b) __extension__ ({ \ | |
4673 poly8x8x4_t __b = (b); \ | |
4674 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8
_t)__b.val[2], (int8x8_t)__b.val[3], 5); }) | |
4675 #define vst4_p16(__a, b) __extension__ ({ \ | |
4676 poly16x4x4_t __b = (b); \ | |
4677 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8
_t)__b.val[2], (int8x8_t)__b.val[3], 6); }) | |
4678 | |
4679 #define vst4q_lane_u16(__a, b, __c) __extension__ ({ \ | |
4680 uint16x8x4_t __b = (b); \ | |
4681 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1],
(int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 25); }) | |
4682 #define vst4q_lane_u32(__a, b, __c) __extension__ ({ \ | |
4683 uint32x4x4_t __b = (b); \ | |
4684 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1],
(int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 26); }) | |
4685 #define vst4q_lane_s16(__a, b, __c) __extension__ ({ \ | |
4686 int16x8x4_t __b = (b); \ | |
4687 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1],
(int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 17); }) | |
4688 #define vst4q_lane_s32(__a, b, __c) __extension__ ({ \ | |
4689 int32x4x4_t __b = (b); \ | |
4690 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1],
(int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 18); }) | |
4691 #define vst4q_lane_f16(__a, b, __c) __extension__ ({ \ | |
4692 float16x8x4_t __b = (b); \ | |
4693 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1],
(int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 23); }) | |
4694 #define vst4q_lane_f32(__a, b, __c) __extension__ ({ \ | |
4695 float32x4x4_t __b = (b); \ | |
4696 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1],
(int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 20); }) | |
4697 #define vst4q_lane_p16(__a, b, __c) __extension__ ({ \ | |
4698 poly16x8x4_t __b = (b); \ | |
4699 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1],
(int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 22); }) | |
4700 #define vst4_lane_u8(__a, b, __c) __extension__ ({ \ | |
4701 uint8x8x4_t __b = (b); \ | |
4702 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i
nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 8); }) | |
4703 #define vst4_lane_u16(__a, b, __c) __extension__ ({ \ | |
4704 uint16x4x4_t __b = (b); \ | |
4705 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i
nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 9); }) | |
4706 #define vst4_lane_u32(__a, b, __c) __extension__ ({ \ | |
4707 uint32x2x4_t __b = (b); \ | |
4708 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i
nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 10); }) | |
4709 #define vst4_lane_s8(__a, b, __c) __extension__ ({ \ | |
4710 int8x8x4_t __b = (b); \ | |
4711 __builtin_neon_vst4_lane_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3]
, __c, 0); }) | |
4712 #define vst4_lane_s16(__a, b, __c) __extension__ ({ \ | |
4713 int16x4x4_t __b = (b); \ | |
4714 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i
nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 1); }) | |
4715 #define vst4_lane_s32(__a, b, __c) __extension__ ({ \ | |
4716 int32x2x4_t __b = (b); \ | |
4717 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i
nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 2); }) | |
4718 #define vst4_lane_f16(__a, b, __c) __extension__ ({ \ | |
4719 float16x4x4_t __b = (b); \ | |
4720 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i
nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 7); }) | |
4721 #define vst4_lane_f32(__a, b, __c) __extension__ ({ \ | |
4722 float32x2x4_t __b = (b); \ | |
4723 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i
nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 4); }) | |
4724 #define vst4_lane_p8(__a, b, __c) __extension__ ({ \ | |
4725 poly8x8x4_t __b = (b); \ | |
4726 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i
nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 5); }) | |
4727 #define vst4_lane_p16(__a, b, __c) __extension__ ({ \ | |
4728 poly16x4x4_t __b = (b); \ | |
4729 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i
nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 6); }) | |
4730 | |
4731 __ai int8x8_t vsub_s8(int8x8_t __a, int8x8_t __b) { \ | |
4732 return __a - __b; } | |
4733 __ai int16x4_t vsub_s16(int16x4_t __a, int16x4_t __b) { \ | |
4734 return __a - __b; } | |
4735 __ai int32x2_t vsub_s32(int32x2_t __a, int32x2_t __b) { \ | |
4736 return __a - __b; } | |
4737 __ai int64x1_t vsub_s64(int64x1_t __a, int64x1_t __b) { \ | |
4738 return __a - __b; } | |
4739 __ai float32x2_t vsub_f32(float32x2_t __a, float32x2_t __b) { \ | |
4740 return __a - __b; } | |
4741 __ai uint8x8_t vsub_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
4742 return __a - __b; } | |
4743 __ai uint16x4_t vsub_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
4744 return __a - __b; } | |
4745 __ai uint32x2_t vsub_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
4746 return __a - __b; } | |
4747 __ai uint64x1_t vsub_u64(uint64x1_t __a, uint64x1_t __b) { \ | |
4748 return __a - __b; } | |
4749 __ai int8x16_t vsubq_s8(int8x16_t __a, int8x16_t __b) { \ | |
4750 return __a - __b; } | |
4751 __ai int16x8_t vsubq_s16(int16x8_t __a, int16x8_t __b) { \ | |
4752 return __a - __b; } | |
4753 __ai int32x4_t vsubq_s32(int32x4_t __a, int32x4_t __b) { \ | |
4754 return __a - __b; } | |
4755 __ai int64x2_t vsubq_s64(int64x2_t __a, int64x2_t __b) { \ | |
4756 return __a - __b; } | |
4757 __ai float32x4_t vsubq_f32(float32x4_t __a, float32x4_t __b) { \ | |
4758 return __a - __b; } | |
4759 __ai uint8x16_t vsubq_u8(uint8x16_t __a, uint8x16_t __b) { \ | |
4760 return __a - __b; } | |
4761 __ai uint16x8_t vsubq_u16(uint16x8_t __a, uint16x8_t __b) { \ | |
4762 return __a - __b; } | |
4763 __ai uint32x4_t vsubq_u32(uint32x4_t __a, uint32x4_t __b) { \ | |
4764 return __a - __b; } | |
4765 __ai uint64x2_t vsubq_u64(uint64x2_t __a, uint64x2_t __b) { \ | |
4766 return __a - __b; } | |
4767 | |
4768 __ai int8x8_t vsubhn_s16(int16x8_t __a, int16x8_t __b) { \ | |
4769 return (int8x8_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 0); } | |
4770 __ai int16x4_t vsubhn_s32(int32x4_t __a, int32x4_t __b) { \ | |
4771 return (int16x4_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 1);
} | |
4772 __ai int32x2_t vsubhn_s64(int64x2_t __a, int64x2_t __b) { \ | |
4773 return (int32x2_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 2);
} | |
4774 __ai uint8x8_t vsubhn_u16(uint16x8_t __a, uint16x8_t __b) { \ | |
4775 return (uint8x8_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 8);
} | |
4776 __ai uint16x4_t vsubhn_u32(uint32x4_t __a, uint32x4_t __b) { \ | |
4777 return (uint16x4_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 9);
} | |
4778 __ai uint32x2_t vsubhn_u64(uint64x2_t __a, uint64x2_t __b) { \ | |
4779 return (uint32x2_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 10)
; } | |
4780 | |
4781 __ai int16x8_t vsubl_s8(int8x8_t __a, int8x8_t __b) { \ | |
4782 return vmovl_s8(__a) - vmovl_s8(__b); } | |
4783 __ai int32x4_t vsubl_s16(int16x4_t __a, int16x4_t __b) { \ | |
4784 return vmovl_s16(__a) - vmovl_s16(__b); } | |
4785 __ai int64x2_t vsubl_s32(int32x2_t __a, int32x2_t __b) { \ | |
4786 return vmovl_s32(__a) - vmovl_s32(__b); } | |
4787 __ai uint16x8_t vsubl_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
4788 return vmovl_u8(__a) - vmovl_u8(__b); } | |
4789 __ai uint32x4_t vsubl_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
4790 return vmovl_u16(__a) - vmovl_u16(__b); } | |
4791 __ai uint64x2_t vsubl_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
4792 return vmovl_u32(__a) - vmovl_u32(__b); } | |
4793 | |
4794 __ai int16x8_t vsubw_s8(int16x8_t __a, int8x8_t __b) { \ | |
4795 return __a - vmovl_s8(__b); } | |
4796 __ai int32x4_t vsubw_s16(int32x4_t __a, int16x4_t __b) { \ | |
4797 return __a - vmovl_s16(__b); } | |
4798 __ai int64x2_t vsubw_s32(int64x2_t __a, int32x2_t __b) { \ | |
4799 return __a - vmovl_s32(__b); } | |
4800 __ai uint16x8_t vsubw_u8(uint16x8_t __a, uint8x8_t __b) { \ | |
4801 return __a - vmovl_u8(__b); } | |
4802 __ai uint32x4_t vsubw_u16(uint32x4_t __a, uint16x4_t __b) { \ | |
4803 return __a - vmovl_u16(__b); } | |
4804 __ai uint64x2_t vsubw_u32(uint64x2_t __a, uint32x2_t __b) { \ | |
4805 return __a - vmovl_u32(__b); } | |
4806 | |
4807 __ai uint8x8_t vtbl1_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
4808 return (uint8x8_t)__builtin_neon_vtbl1_v((int8x8_t)__a, (int8x8_t)__b, 8); } | |
4809 __ai int8x8_t vtbl1_s8(int8x8_t __a, int8x8_t __b) { \ | |
4810 return (int8x8_t)__builtin_neon_vtbl1_v(__a, __b, 0); } | |
4811 __ai poly8x8_t vtbl1_p8(poly8x8_t __a, uint8x8_t __b) { \ | |
4812 return (poly8x8_t)__builtin_neon_vtbl1_v((int8x8_t)__a, (int8x8_t)__b, 5); } | |
4813 | |
4814 __ai uint8x8_t vtbl2_u8(uint8x8x2_t __a, uint8x8_t __b) { \ | |
4815 return (uint8x8_t)__builtin_neon_vtbl2_v((int8x8_t)__a.val[0], (int8x8_t)__a.v
al[1], (int8x8_t)__b, 8); } | |
4816 __ai int8x8_t vtbl2_s8(int8x8x2_t __a, int8x8_t __b) { \ | |
4817 return (int8x8_t)__builtin_neon_vtbl2_v(__a.val[0], __a.val[1], __b, 0); } | |
4818 __ai poly8x8_t vtbl2_p8(poly8x8x2_t __a, uint8x8_t __b) { \ | |
4819 return (poly8x8_t)__builtin_neon_vtbl2_v((int8x8_t)__a.val[0], (int8x8_t)__a.v
al[1], (int8x8_t)__b, 5); } | |
4820 | |
4821 __ai uint8x8_t vtbl3_u8(uint8x8x3_t __a, uint8x8_t __b) { \ | |
4822 return (uint8x8_t)__builtin_neon_vtbl3_v((int8x8_t)__a.val[0], (int8x8_t)__a.v
al[1], (int8x8_t)__a.val[2], (int8x8_t)__b, 8); } | |
4823 __ai int8x8_t vtbl3_s8(int8x8x3_t __a, int8x8_t __b) { \ | |
4824 return (int8x8_t)__builtin_neon_vtbl3_v(__a.val[0], __a.val[1], __a.val[2], __
b, 0); } | |
4825 __ai poly8x8_t vtbl3_p8(poly8x8x3_t __a, uint8x8_t __b) { \ | |
4826 return (poly8x8_t)__builtin_neon_vtbl3_v((int8x8_t)__a.val[0], (int8x8_t)__a.v
al[1], (int8x8_t)__a.val[2], (int8x8_t)__b, 5); } | |
4827 | |
4828 __ai uint8x8_t vtbl4_u8(uint8x8x4_t __a, uint8x8_t __b) { \ | |
4829 return (uint8x8_t)__builtin_neon_vtbl4_v((int8x8_t)__a.val[0], (int8x8_t)__a.v
al[1], (int8x8_t)__a.val[2], (int8x8_t)__a.val[3], (int8x8_t)__b, 8); } | |
4830 __ai int8x8_t vtbl4_s8(int8x8x4_t __a, int8x8_t __b) { \ | |
4831 return (int8x8_t)__builtin_neon_vtbl4_v(__a.val[0], __a.val[1], __a.val[2], __
a.val[3], __b, 0); } | |
4832 __ai poly8x8_t vtbl4_p8(poly8x8x4_t __a, uint8x8_t __b) { \ | |
4833 return (poly8x8_t)__builtin_neon_vtbl4_v((int8x8_t)__a.val[0], (int8x8_t)__a.v
al[1], (int8x8_t)__a.val[2], (int8x8_t)__a.val[3], (int8x8_t)__b, 5); } | |
4834 | |
4835 __ai uint8x8_t vtbx1_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) { \ | |
4836 return (uint8x8_t)__builtin_neon_vtbx1_v((int8x8_t)__a, (int8x8_t)__b, (int8x8
_t)__c, 8); } | |
4837 __ai int8x8_t vtbx1_s8(int8x8_t __a, int8x8_t __b, int8x8_t __c) { \ | |
4838 return (int8x8_t)__builtin_neon_vtbx1_v(__a, __b, __c, 0); } | |
4839 __ai poly8x8_t vtbx1_p8(poly8x8_t __a, poly8x8_t __b, uint8x8_t __c) { \ | |
4840 return (poly8x8_t)__builtin_neon_vtbx1_v((int8x8_t)__a, (int8x8_t)__b, (int8x8
_t)__c, 5); } | |
4841 | |
4842 __ai uint8x8_t vtbx2_u8(uint8x8_t __a, uint8x8x2_t __b, uint8x8_t __c) { \ | |
4843 return (uint8x8_t)__builtin_neon_vtbx2_v((int8x8_t)__a, (int8x8_t)__b.val[0],
(int8x8_t)__b.val[1], (int8x8_t)__c, 8); } | |
4844 __ai int8x8_t vtbx2_s8(int8x8_t __a, int8x8x2_t __b, int8x8_t __c) { \ | |
4845 return (int8x8_t)__builtin_neon_vtbx2_v(__a, __b.val[0], __b.val[1], __c, 0);
} | |
4846 __ai poly8x8_t vtbx2_p8(poly8x8_t __a, poly8x8x2_t __b, uint8x8_t __c) { \ | |
4847 return (poly8x8_t)__builtin_neon_vtbx2_v((int8x8_t)__a, (int8x8_t)__b.val[0],
(int8x8_t)__b.val[1], (int8x8_t)__c, 5); } | |
4848 | |
4849 __ai uint8x8_t vtbx3_u8(uint8x8_t __a, uint8x8x3_t __b, uint8x8_t __c) { \ | |
4850 return (uint8x8_t)__builtin_neon_vtbx3_v((int8x8_t)__a, (int8x8_t)__b.val[0],
(int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__c, 8); } | |
4851 __ai int8x8_t vtbx3_s8(int8x8_t __a, int8x8x3_t __b, int8x8_t __c) { \ | |
4852 return (int8x8_t)__builtin_neon_vtbx3_v(__a, __b.val[0], __b.val[1], __b.val[2
], __c, 0); } | |
4853 __ai poly8x8_t vtbx3_p8(poly8x8_t __a, poly8x8x3_t __b, uint8x8_t __c) { \ | |
4854 return (poly8x8_t)__builtin_neon_vtbx3_v((int8x8_t)__a, (int8x8_t)__b.val[0],
(int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__c, 5); } | |
4855 | |
4856 __ai uint8x8_t vtbx4_u8(uint8x8_t __a, uint8x8x4_t __b, uint8x8_t __c) { \ | |
4857 return (uint8x8_t)__builtin_neon_vtbx4_v((int8x8_t)__a, (int8x8_t)__b.val[0],
(int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], (int8x8_t)__c,
8); } | |
4858 __ai int8x8_t vtbx4_s8(int8x8_t __a, int8x8x4_t __b, int8x8_t __c) { \ | |
4859 return (int8x8_t)__builtin_neon_vtbx4_v(__a, __b.val[0], __b.val[1], __b.val[2
], __b.val[3], __c, 0); } | |
4860 __ai poly8x8_t vtbx4_p8(poly8x8_t __a, poly8x8x4_t __b, uint8x8_t __c) { \ | |
4861 return (poly8x8_t)__builtin_neon_vtbx4_v((int8x8_t)__a, (int8x8_t)__b.val[0],
(int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], (int8x8_t)__c,
5); } | |
4862 | |
4863 __ai int8x8x2_t vtrn_s8(int8x8_t __a, int8x8_t __b) { \ | |
4864 int8x8x2_t r; __builtin_neon_vtrn_v(&r, __a, __b, 0); return r; } | |
4865 __ai int16x4x2_t vtrn_s16(int16x4_t __a, int16x4_t __b) { \ | |
4866 int16x4x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 1); ret
urn r; } | |
4867 __ai int32x2x2_t vtrn_s32(int32x2_t __a, int32x2_t __b) { \ | |
4868 int32x2x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 2); ret
urn r; } | |
4869 __ai uint8x8x2_t vtrn_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
4870 uint8x8x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 8); ret
urn r; } | |
4871 __ai uint16x4x2_t vtrn_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
4872 uint16x4x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 9); re
turn r; } | |
4873 __ai uint32x2x2_t vtrn_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
4874 uint32x2x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 10); r
eturn r; } | |
4875 __ai float32x2x2_t vtrn_f32(float32x2_t __a, float32x2_t __b) { \ | |
4876 float32x2x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 4); r
eturn r; } | |
4877 __ai poly8x8x2_t vtrn_p8(poly8x8_t __a, poly8x8_t __b) { \ | |
4878 poly8x8x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 5); ret
urn r; } | |
4879 __ai poly16x4x2_t vtrn_p16(poly16x4_t __a, poly16x4_t __b) { \ | |
4880 poly16x4x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 6); re
turn r; } | |
4881 __ai int8x16x2_t vtrnq_s8(int8x16_t __a, int8x16_t __b) { \ | |
4882 int8x16x2_t r; __builtin_neon_vtrnq_v(&r, __a, __b, 16); return r; } | |
4883 __ai int16x8x2_t vtrnq_s16(int16x8_t __a, int16x8_t __b) { \ | |
4884 int16x8x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 17);
return r; } | |
4885 __ai int32x4x2_t vtrnq_s32(int32x4_t __a, int32x4_t __b) { \ | |
4886 int32x4x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 18);
return r; } | |
4887 __ai uint8x16x2_t vtrnq_u8(uint8x16_t __a, uint8x16_t __b) { \ | |
4888 uint8x16x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 24)
; return r; } | |
4889 __ai uint16x8x2_t vtrnq_u16(uint16x8_t __a, uint16x8_t __b) { \ | |
4890 uint16x8x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 25)
; return r; } | |
4891 __ai uint32x4x2_t vtrnq_u32(uint32x4_t __a, uint32x4_t __b) { \ | |
4892 uint32x4x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 26)
; return r; } | |
4893 __ai float32x4x2_t vtrnq_f32(float32x4_t __a, float32x4_t __b) { \ | |
4894 float32x4x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 20
); return r; } | |
4895 __ai poly8x16x2_t vtrnq_p8(poly8x16_t __a, poly8x16_t __b) { \ | |
4896 poly8x16x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 21)
; return r; } | |
4897 __ai poly16x8x2_t vtrnq_p16(poly16x8_t __a, poly16x8_t __b) { \ | |
4898 poly16x8x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 22)
; return r; } | |
4899 | |
4900 __ai uint8x8_t vtst_s8(int8x8_t __a, int8x8_t __b) { \ | |
4901 return (uint8x8_t)__builtin_neon_vtst_v(__a, __b, 8); } | |
4902 __ai uint16x4_t vtst_s16(int16x4_t __a, int16x4_t __b) { \ | |
4903 return (uint16x4_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 9); } | |
4904 __ai uint32x2_t vtst_s32(int32x2_t __a, int32x2_t __b) { \ | |
4905 return (uint32x2_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 10); } | |
4906 __ai uint8x8_t vtst_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
4907 return (uint8x8_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 8); } | |
4908 __ai uint16x4_t vtst_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
4909 return (uint16x4_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 9); } | |
4910 __ai uint32x2_t vtst_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
4911 return (uint32x2_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 10); } | |
4912 __ai uint8x8_t vtst_p8(poly8x8_t __a, poly8x8_t __b) { \ | |
4913 return (uint8x8_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 8); } | |
4914 __ai uint8x16_t vtstq_s8(int8x16_t __a, int8x16_t __b) { \ | |
4915 return (uint8x16_t)__builtin_neon_vtstq_v(__a, __b, 24); } | |
4916 __ai uint16x8_t vtstq_s16(int16x8_t __a, int16x8_t __b) { \ | |
4917 return (uint16x8_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 25);
} | |
4918 __ai uint32x4_t vtstq_s32(int32x4_t __a, int32x4_t __b) { \ | |
4919 return (uint32x4_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 26);
} | |
4920 __ai uint8x16_t vtstq_u8(uint8x16_t __a, uint8x16_t __b) { \ | |
4921 return (uint8x16_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 24);
} | |
4922 __ai uint16x8_t vtstq_u16(uint16x8_t __a, uint16x8_t __b) { \ | |
4923 return (uint16x8_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 25);
} | |
4924 __ai uint32x4_t vtstq_u32(uint32x4_t __a, uint32x4_t __b) { \ | |
4925 return (uint32x4_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 26);
} | |
4926 __ai uint8x16_t vtstq_p8(poly8x16_t __a, poly8x16_t __b) { \ | |
4927 return (uint8x16_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 24);
} | |
4928 | |
4929 __ai int8x8x2_t vuzp_s8(int8x8_t __a, int8x8_t __b) { \ | |
4930 int8x8x2_t r; __builtin_neon_vuzp_v(&r, __a, __b, 0); return r; } | |
4931 __ai int16x4x2_t vuzp_s16(int16x4_t __a, int16x4_t __b) { \ | |
4932 int16x4x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 1); ret
urn r; } | |
4933 __ai int32x2x2_t vuzp_s32(int32x2_t __a, int32x2_t __b) { \ | |
4934 int32x2x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 2); ret
urn r; } | |
4935 __ai uint8x8x2_t vuzp_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
4936 uint8x8x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 8); ret
urn r; } | |
4937 __ai uint16x4x2_t vuzp_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
4938 uint16x4x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 9); re
turn r; } | |
4939 __ai uint32x2x2_t vuzp_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
4940 uint32x2x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 10); r
eturn r; } | |
4941 __ai float32x2x2_t vuzp_f32(float32x2_t __a, float32x2_t __b) { \ | |
4942 float32x2x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 4); r
eturn r; } | |
4943 __ai poly8x8x2_t vuzp_p8(poly8x8_t __a, poly8x8_t __b) { \ | |
4944 poly8x8x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 5); ret
urn r; } | |
4945 __ai poly16x4x2_t vuzp_p16(poly16x4_t __a, poly16x4_t __b) { \ | |
4946 poly16x4x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 6); re
turn r; } | |
4947 __ai int8x16x2_t vuzpq_s8(int8x16_t __a, int8x16_t __b) { \ | |
4948 int8x16x2_t r; __builtin_neon_vuzpq_v(&r, __a, __b, 16); return r; } | |
4949 __ai int16x8x2_t vuzpq_s16(int16x8_t __a, int16x8_t __b) { \ | |
4950 int16x8x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 17);
return r; } | |
4951 __ai int32x4x2_t vuzpq_s32(int32x4_t __a, int32x4_t __b) { \ | |
4952 int32x4x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 18);
return r; } | |
4953 __ai uint8x16x2_t vuzpq_u8(uint8x16_t __a, uint8x16_t __b) { \ | |
4954 uint8x16x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 24)
; return r; } | |
4955 __ai uint16x8x2_t vuzpq_u16(uint16x8_t __a, uint16x8_t __b) { \ | |
4956 uint16x8x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 25)
; return r; } | |
4957 __ai uint32x4x2_t vuzpq_u32(uint32x4_t __a, uint32x4_t __b) { \ | |
4958 uint32x4x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 26)
; return r; } | |
4959 __ai float32x4x2_t vuzpq_f32(float32x4_t __a, float32x4_t __b) { \ | |
4960 float32x4x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 20
); return r; } | |
4961 __ai poly8x16x2_t vuzpq_p8(poly8x16_t __a, poly8x16_t __b) { \ | |
4962 poly8x16x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 21)
; return r; } | |
4963 __ai poly16x8x2_t vuzpq_p16(poly16x8_t __a, poly16x8_t __b) { \ | |
4964 poly16x8x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 22)
; return r; } | |
4965 | |
4966 __ai int8x8x2_t vzip_s8(int8x8_t __a, int8x8_t __b) { \ | |
4967 int8x8x2_t r; __builtin_neon_vzip_v(&r, __a, __b, 0); return r; } | |
4968 __ai int16x4x2_t vzip_s16(int16x4_t __a, int16x4_t __b) { \ | |
4969 int16x4x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 1); ret
urn r; } | |
4970 __ai int32x2x2_t vzip_s32(int32x2_t __a, int32x2_t __b) { \ | |
4971 int32x2x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 2); ret
urn r; } | |
4972 __ai uint8x8x2_t vzip_u8(uint8x8_t __a, uint8x8_t __b) { \ | |
4973 uint8x8x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 8); ret
urn r; } | |
4974 __ai uint16x4x2_t vzip_u16(uint16x4_t __a, uint16x4_t __b) { \ | |
4975 uint16x4x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 9); re
turn r; } | |
4976 __ai uint32x2x2_t vzip_u32(uint32x2_t __a, uint32x2_t __b) { \ | |
4977 uint32x2x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 10); r
eturn r; } | |
4978 __ai float32x2x2_t vzip_f32(float32x2_t __a, float32x2_t __b) { \ | |
4979 float32x2x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 4); r
eturn r; } | |
4980 __ai poly8x8x2_t vzip_p8(poly8x8_t __a, poly8x8_t __b) { \ | |
4981 poly8x8x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 5); ret
urn r; } | |
4982 __ai poly16x4x2_t vzip_p16(poly16x4_t __a, poly16x4_t __b) { \ | |
4983 poly16x4x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 6); re
turn r; } | |
4984 __ai int8x16x2_t vzipq_s8(int8x16_t __a, int8x16_t __b) { \ | |
4985 int8x16x2_t r; __builtin_neon_vzipq_v(&r, __a, __b, 16); return r; } | |
4986 __ai int16x8x2_t vzipq_s16(int16x8_t __a, int16x8_t __b) { \ | |
4987 int16x8x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 17);
return r; } | |
4988 __ai int32x4x2_t vzipq_s32(int32x4_t __a, int32x4_t __b) { \ | |
4989 int32x4x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 18);
return r; } | |
4990 __ai uint8x16x2_t vzipq_u8(uint8x16_t __a, uint8x16_t __b) { \ | |
4991 uint8x16x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 24)
; return r; } | |
4992 __ai uint16x8x2_t vzipq_u16(uint16x8_t __a, uint16x8_t __b) { \ | |
4993 uint16x8x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 25)
; return r; } | |
4994 __ai uint32x4x2_t vzipq_u32(uint32x4_t __a, uint32x4_t __b) { \ | |
4995 uint32x4x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 26)
; return r; } | |
4996 __ai float32x4x2_t vzipq_f32(float32x4_t __a, float32x4_t __b) { \ | |
4997 float32x4x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 20
); return r; } | |
4998 __ai poly8x16x2_t vzipq_p8(poly8x16_t __a, poly8x16_t __b) { \ | |
4999 poly8x16x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 21)
; return r; } | |
5000 __ai poly16x8x2_t vzipq_p16(poly16x8_t __a, poly16x8_t __b) { \ | |
5001 poly16x8x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 22)
; return r; } | |
5002 | |
5003 #undef __ai | |
5004 | |
5005 #endif /* __ARM_NEON_H */ | |
OLD | NEW |