Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(436)

Side by Side Diff: Linux_ia32/lib/clang/3.2/include/arm_neon.h

Issue 11348245: Add 32-bit version of llvm-allocated-type. Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/llvm-allocated-type/
Patch Set: Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 /*===---- arm_neon.h - ARM Neon intrinsics ---------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24 #ifndef __ARM_NEON_H
25 #define __ARM_NEON_H
26
27 #ifndef __ARM_NEON__
28 #error "NEON support not enabled"
29 #endif
30
31 #include <stdint.h>
32
33 typedef float float32_t;
34 typedef int8_t poly8_t;
35 typedef int16_t poly16_t;
36 typedef uint16_t float16_t;
37 typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t;
38 typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t;
39 typedef __attribute__((neon_vector_type(4))) int16_t int16x4_t;
40 typedef __attribute__((neon_vector_type(8))) int16_t int16x8_t;
41 typedef __attribute__((neon_vector_type(2))) int32_t int32x2_t;
42 typedef __attribute__((neon_vector_type(4))) int32_t int32x4_t;
43 typedef __attribute__((neon_vector_type(1))) int64_t int64x1_t;
44 typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t;
45 typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t;
46 typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t;
47 typedef __attribute__((neon_vector_type(4))) uint16_t uint16x4_t;
48 typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t;
49 typedef __attribute__((neon_vector_type(2))) uint32_t uint32x2_t;
50 typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t;
51 typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t;
52 typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t;
53 typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t;
54 typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t;
55 typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t;
56 typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t;
57 typedef __attribute__((neon_polyvector_type(8))) poly8_t poly8x8_t;
58 typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t;
59 typedef __attribute__((neon_polyvector_type(4))) poly16_t poly16x4_t;
60 typedef __attribute__((neon_polyvector_type(8))) poly16_t poly16x8_t;
61
62 typedef struct int8x8x2_t {
63 int8x8_t val[2];
64 } int8x8x2_t;
65
66 typedef struct int8x16x2_t {
67 int8x16_t val[2];
68 } int8x16x2_t;
69
70 typedef struct int16x4x2_t {
71 int16x4_t val[2];
72 } int16x4x2_t;
73
74 typedef struct int16x8x2_t {
75 int16x8_t val[2];
76 } int16x8x2_t;
77
78 typedef struct int32x2x2_t {
79 int32x2_t val[2];
80 } int32x2x2_t;
81
82 typedef struct int32x4x2_t {
83 int32x4_t val[2];
84 } int32x4x2_t;
85
86 typedef struct int64x1x2_t {
87 int64x1_t val[2];
88 } int64x1x2_t;
89
90 typedef struct int64x2x2_t {
91 int64x2_t val[2];
92 } int64x2x2_t;
93
94 typedef struct uint8x8x2_t {
95 uint8x8_t val[2];
96 } uint8x8x2_t;
97
98 typedef struct uint8x16x2_t {
99 uint8x16_t val[2];
100 } uint8x16x2_t;
101
102 typedef struct uint16x4x2_t {
103 uint16x4_t val[2];
104 } uint16x4x2_t;
105
106 typedef struct uint16x8x2_t {
107 uint16x8_t val[2];
108 } uint16x8x2_t;
109
110 typedef struct uint32x2x2_t {
111 uint32x2_t val[2];
112 } uint32x2x2_t;
113
114 typedef struct uint32x4x2_t {
115 uint32x4_t val[2];
116 } uint32x4x2_t;
117
118 typedef struct uint64x1x2_t {
119 uint64x1_t val[2];
120 } uint64x1x2_t;
121
122 typedef struct uint64x2x2_t {
123 uint64x2_t val[2];
124 } uint64x2x2_t;
125
126 typedef struct float16x4x2_t {
127 float16x4_t val[2];
128 } float16x4x2_t;
129
130 typedef struct float16x8x2_t {
131 float16x8_t val[2];
132 } float16x8x2_t;
133
134 typedef struct float32x2x2_t {
135 float32x2_t val[2];
136 } float32x2x2_t;
137
138 typedef struct float32x4x2_t {
139 float32x4_t val[2];
140 } float32x4x2_t;
141
142 typedef struct poly8x8x2_t {
143 poly8x8_t val[2];
144 } poly8x8x2_t;
145
146 typedef struct poly8x16x2_t {
147 poly8x16_t val[2];
148 } poly8x16x2_t;
149
150 typedef struct poly16x4x2_t {
151 poly16x4_t val[2];
152 } poly16x4x2_t;
153
154 typedef struct poly16x8x2_t {
155 poly16x8_t val[2];
156 } poly16x8x2_t;
157
158 typedef struct int8x8x3_t {
159 int8x8_t val[3];
160 } int8x8x3_t;
161
162 typedef struct int8x16x3_t {
163 int8x16_t val[3];
164 } int8x16x3_t;
165
166 typedef struct int16x4x3_t {
167 int16x4_t val[3];
168 } int16x4x3_t;
169
170 typedef struct int16x8x3_t {
171 int16x8_t val[3];
172 } int16x8x3_t;
173
174 typedef struct int32x2x3_t {
175 int32x2_t val[3];
176 } int32x2x3_t;
177
178 typedef struct int32x4x3_t {
179 int32x4_t val[3];
180 } int32x4x3_t;
181
182 typedef struct int64x1x3_t {
183 int64x1_t val[3];
184 } int64x1x3_t;
185
186 typedef struct int64x2x3_t {
187 int64x2_t val[3];
188 } int64x2x3_t;
189
190 typedef struct uint8x8x3_t {
191 uint8x8_t val[3];
192 } uint8x8x3_t;
193
194 typedef struct uint8x16x3_t {
195 uint8x16_t val[3];
196 } uint8x16x3_t;
197
198 typedef struct uint16x4x3_t {
199 uint16x4_t val[3];
200 } uint16x4x3_t;
201
202 typedef struct uint16x8x3_t {
203 uint16x8_t val[3];
204 } uint16x8x3_t;
205
206 typedef struct uint32x2x3_t {
207 uint32x2_t val[3];
208 } uint32x2x3_t;
209
210 typedef struct uint32x4x3_t {
211 uint32x4_t val[3];
212 } uint32x4x3_t;
213
214 typedef struct uint64x1x3_t {
215 uint64x1_t val[3];
216 } uint64x1x3_t;
217
218 typedef struct uint64x2x3_t {
219 uint64x2_t val[3];
220 } uint64x2x3_t;
221
222 typedef struct float16x4x3_t {
223 float16x4_t val[3];
224 } float16x4x3_t;
225
226 typedef struct float16x8x3_t {
227 float16x8_t val[3];
228 } float16x8x3_t;
229
230 typedef struct float32x2x3_t {
231 float32x2_t val[3];
232 } float32x2x3_t;
233
234 typedef struct float32x4x3_t {
235 float32x4_t val[3];
236 } float32x4x3_t;
237
238 typedef struct poly8x8x3_t {
239 poly8x8_t val[3];
240 } poly8x8x3_t;
241
242 typedef struct poly8x16x3_t {
243 poly8x16_t val[3];
244 } poly8x16x3_t;
245
246 typedef struct poly16x4x3_t {
247 poly16x4_t val[3];
248 } poly16x4x3_t;
249
250 typedef struct poly16x8x3_t {
251 poly16x8_t val[3];
252 } poly16x8x3_t;
253
254 typedef struct int8x8x4_t {
255 int8x8_t val[4];
256 } int8x8x4_t;
257
258 typedef struct int8x16x4_t {
259 int8x16_t val[4];
260 } int8x16x4_t;
261
262 typedef struct int16x4x4_t {
263 int16x4_t val[4];
264 } int16x4x4_t;
265
266 typedef struct int16x8x4_t {
267 int16x8_t val[4];
268 } int16x8x4_t;
269
270 typedef struct int32x2x4_t {
271 int32x2_t val[4];
272 } int32x2x4_t;
273
274 typedef struct int32x4x4_t {
275 int32x4_t val[4];
276 } int32x4x4_t;
277
278 typedef struct int64x1x4_t {
279 int64x1_t val[4];
280 } int64x1x4_t;
281
282 typedef struct int64x2x4_t {
283 int64x2_t val[4];
284 } int64x2x4_t;
285
286 typedef struct uint8x8x4_t {
287 uint8x8_t val[4];
288 } uint8x8x4_t;
289
290 typedef struct uint8x16x4_t {
291 uint8x16_t val[4];
292 } uint8x16x4_t;
293
294 typedef struct uint16x4x4_t {
295 uint16x4_t val[4];
296 } uint16x4x4_t;
297
298 typedef struct uint16x8x4_t {
299 uint16x8_t val[4];
300 } uint16x8x4_t;
301
302 typedef struct uint32x2x4_t {
303 uint32x2_t val[4];
304 } uint32x2x4_t;
305
306 typedef struct uint32x4x4_t {
307 uint32x4_t val[4];
308 } uint32x4x4_t;
309
310 typedef struct uint64x1x4_t {
311 uint64x1_t val[4];
312 } uint64x1x4_t;
313
314 typedef struct uint64x2x4_t {
315 uint64x2_t val[4];
316 } uint64x2x4_t;
317
318 typedef struct float16x4x4_t {
319 float16x4_t val[4];
320 } float16x4x4_t;
321
322 typedef struct float16x8x4_t {
323 float16x8_t val[4];
324 } float16x8x4_t;
325
326 typedef struct float32x2x4_t {
327 float32x2_t val[4];
328 } float32x2x4_t;
329
330 typedef struct float32x4x4_t {
331 float32x4_t val[4];
332 } float32x4x4_t;
333
334 typedef struct poly8x8x4_t {
335 poly8x8_t val[4];
336 } poly8x8x4_t;
337
338 typedef struct poly8x16x4_t {
339 poly8x16_t val[4];
340 } poly8x16x4_t;
341
342 typedef struct poly16x4x4_t {
343 poly16x4_t val[4];
344 } poly16x4x4_t;
345
346 typedef struct poly16x8x4_t {
347 poly16x8_t val[4];
348 } poly16x8x4_t;
349
350 #define __ai static __attribute__((__always_inline__, __nodebug__))
351
352 __ai int16x8_t vmovl_s8(int8x8_t __a) {
353 return (int16x8_t)__builtin_neon_vmovl_v(__a, 33); }
354 __ai int32x4_t vmovl_s16(int16x4_t __a) {
355 return (int32x4_t)__builtin_neon_vmovl_v((int8x8_t)__a, 34); }
356 __ai int64x2_t vmovl_s32(int32x2_t __a) {
357 return (int64x2_t)__builtin_neon_vmovl_v((int8x8_t)__a, 35); }
358 __ai uint16x8_t vmovl_u8(uint8x8_t __a) {
359 return (uint16x8_t)__builtin_neon_vmovl_v((int8x8_t)__a, 49); }
360 __ai uint32x4_t vmovl_u16(uint16x4_t __a) {
361 return (uint32x4_t)__builtin_neon_vmovl_v((int8x8_t)__a, 50); }
362 __ai uint64x2_t vmovl_u32(uint32x2_t __a) {
363 return (uint64x2_t)__builtin_neon_vmovl_v((int8x8_t)__a, 51); }
364
365 __ai int16x8_t vmull_s8(int8x8_t __a, int8x8_t __b) {
366 return (int16x8_t)__builtin_neon_vmull_v(__a, __b, 33); }
367 __ai int32x4_t vmull_s16(int16x4_t __a, int16x4_t __b) {
368 return (int32x4_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 34); }
369 __ai int64x2_t vmull_s32(int32x2_t __a, int32x2_t __b) {
370 return (int64x2_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 35); }
371 __ai uint16x8_t vmull_u8(uint8x8_t __a, uint8x8_t __b) {
372 return (uint16x8_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 49); }
373 __ai uint32x4_t vmull_u16(uint16x4_t __a, uint16x4_t __b) {
374 return (uint32x4_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 50); }
375 __ai uint64x2_t vmull_u32(uint32x2_t __a, uint32x2_t __b) {
376 return (uint64x2_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 51); }
377 __ai poly16x8_t vmull_p8(poly8x8_t __a, poly8x8_t __b) {
378 return (poly16x8_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 37); }
379
380 __ai int8x8_t vabd_s8(int8x8_t __a, int8x8_t __b) {
381 return (int8x8_t)__builtin_neon_vabd_v(__a, __b, 0); }
382 __ai int16x4_t vabd_s16(int16x4_t __a, int16x4_t __b) {
383 return (int16x4_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 1); }
384 __ai int32x2_t vabd_s32(int32x2_t __a, int32x2_t __b) {
385 return (int32x2_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 2); }
386 __ai uint8x8_t vabd_u8(uint8x8_t __a, uint8x8_t __b) {
387 return (uint8x8_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 16); }
388 __ai uint16x4_t vabd_u16(uint16x4_t __a, uint16x4_t __b) {
389 return (uint16x4_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 17); }
390 __ai uint32x2_t vabd_u32(uint32x2_t __a, uint32x2_t __b) {
391 return (uint32x2_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 18); }
392 __ai float32x2_t vabd_f32(float32x2_t __a, float32x2_t __b) {
393 return (float32x2_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 7); }
394 __ai int8x16_t vabdq_s8(int8x16_t __a, int8x16_t __b) {
395 return (int8x16_t)__builtin_neon_vabdq_v(__a, __b, 32); }
396 __ai int16x8_t vabdq_s16(int16x8_t __a, int16x8_t __b) {
397 return (int16x8_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
398 __ai int32x4_t vabdq_s32(int32x4_t __a, int32x4_t __b) {
399 return (int32x4_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
400 __ai uint8x16_t vabdq_u8(uint8x16_t __a, uint8x16_t __b) {
401 return (uint8x16_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 48); }
402 __ai uint16x8_t vabdq_u16(uint16x8_t __a, uint16x8_t __b) {
403 return (uint16x8_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 49); }
404 __ai uint32x4_t vabdq_u32(uint32x4_t __a, uint32x4_t __b) {
405 return (uint32x4_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
406 __ai float32x4_t vabdq_f32(float32x4_t __a, float32x4_t __b) {
407 return (float32x4_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 39) ; }
408
409 __ai int8x8_t vaba_s8(int8x8_t __a, int8x8_t __b, int8x8_t __c) {
410 return __a + vabd_s8(__b, __c); }
411 __ai int16x4_t vaba_s16(int16x4_t __a, int16x4_t __b, int16x4_t __c) {
412 return __a + vabd_s16(__b, __c); }
413 __ai int32x2_t vaba_s32(int32x2_t __a, int32x2_t __b, int32x2_t __c) {
414 return __a + vabd_s32(__b, __c); }
415 __ai uint8x8_t vaba_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) {
416 return __a + vabd_u8(__b, __c); }
417 __ai uint16x4_t vaba_u16(uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) {
418 return __a + vabd_u16(__b, __c); }
419 __ai uint32x2_t vaba_u32(uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) {
420 return __a + vabd_u32(__b, __c); }
421 __ai int8x16_t vabaq_s8(int8x16_t __a, int8x16_t __b, int8x16_t __c) {
422 return __a + vabdq_s8(__b, __c); }
423 __ai int16x8_t vabaq_s16(int16x8_t __a, int16x8_t __b, int16x8_t __c) {
424 return __a + vabdq_s16(__b, __c); }
425 __ai int32x4_t vabaq_s32(int32x4_t __a, int32x4_t __b, int32x4_t __c) {
426 return __a + vabdq_s32(__b, __c); }
427 __ai uint8x16_t vabaq_u8(uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) {
428 return __a + vabdq_u8(__b, __c); }
429 __ai uint16x8_t vabaq_u16(uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) {
430 return __a + vabdq_u16(__b, __c); }
431 __ai uint32x4_t vabaq_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) {
432 return __a + vabdq_u32(__b, __c); }
433
434 __ai int16x8_t vabal_s8(int16x8_t __a, int8x8_t __b, int8x8_t __c) {
435 return __a + (int16x8_t)vmovl_u8((uint8x8_t)vabd_s8(__b, __c)); }
436 __ai int32x4_t vabal_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) {
437 return __a + (int32x4_t)vmovl_u16((uint16x4_t)vabd_s16(__b, __c)); }
438 __ai int64x2_t vabal_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) {
439 return __a + (int64x2_t)vmovl_u32((uint32x2_t)vabd_s32(__b, __c)); }
440 __ai uint16x8_t vabal_u8(uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) {
441 return __a + vmovl_u8(vabd_u8(__b, __c)); }
442 __ai uint32x4_t vabal_u16(uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) {
443 return __a + vmovl_u16(vabd_u16(__b, __c)); }
444 __ai uint64x2_t vabal_u32(uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) {
445 return __a + vmovl_u32(vabd_u32(__b, __c)); }
446
447 __ai int16x8_t vabdl_s8(int8x8_t __a, int8x8_t __b) {
448 return (int16x8_t)vmovl_u8((uint8x8_t)vabd_s8(__a, __b)); }
449 __ai int32x4_t vabdl_s16(int16x4_t __a, int16x4_t __b) {
450 return (int32x4_t)vmovl_u16((uint16x4_t)vabd_s16(__a, __b)); }
451 __ai int64x2_t vabdl_s32(int32x2_t __a, int32x2_t __b) {
452 return (int64x2_t)vmovl_u32((uint32x2_t)vabd_s32(__a, __b)); }
453 __ai uint16x8_t vabdl_u8(uint8x8_t __a, uint8x8_t __b) {
454 return vmovl_u8(vabd_u8(__a, __b)); }
455 __ai uint32x4_t vabdl_u16(uint16x4_t __a, uint16x4_t __b) {
456 return vmovl_u16(vabd_u16(__a, __b)); }
457 __ai uint64x2_t vabdl_u32(uint32x2_t __a, uint32x2_t __b) {
458 return vmovl_u32(vabd_u32(__a, __b)); }
459
460 __ai int8x8_t vabs_s8(int8x8_t __a) {
461 return (int8x8_t)__builtin_neon_vabs_v(__a, 0); }
462 __ai int16x4_t vabs_s16(int16x4_t __a) {
463 return (int16x4_t)__builtin_neon_vabs_v((int8x8_t)__a, 1); }
464 __ai int32x2_t vabs_s32(int32x2_t __a) {
465 return (int32x2_t)__builtin_neon_vabs_v((int8x8_t)__a, 2); }
466 __ai float32x2_t vabs_f32(float32x2_t __a) {
467 return (float32x2_t)__builtin_neon_vabs_v((int8x8_t)__a, 7); }
468 __ai int8x16_t vabsq_s8(int8x16_t __a) {
469 return (int8x16_t)__builtin_neon_vabsq_v(__a, 32); }
470 __ai int16x8_t vabsq_s16(int16x8_t __a) {
471 return (int16x8_t)__builtin_neon_vabsq_v((int8x16_t)__a, 33); }
472 __ai int32x4_t vabsq_s32(int32x4_t __a) {
473 return (int32x4_t)__builtin_neon_vabsq_v((int8x16_t)__a, 34); }
474 __ai float32x4_t vabsq_f32(float32x4_t __a) {
475 return (float32x4_t)__builtin_neon_vabsq_v((int8x16_t)__a, 39); }
476
477 __ai int8x8_t vadd_s8(int8x8_t __a, int8x8_t __b) {
478 return __a + __b; }
479 __ai int16x4_t vadd_s16(int16x4_t __a, int16x4_t __b) {
480 return __a + __b; }
481 __ai int32x2_t vadd_s32(int32x2_t __a, int32x2_t __b) {
482 return __a + __b; }
483 __ai int64x1_t vadd_s64(int64x1_t __a, int64x1_t __b) {
484 return __a + __b; }
485 __ai float32x2_t vadd_f32(float32x2_t __a, float32x2_t __b) {
486 return __a + __b; }
487 __ai uint8x8_t vadd_u8(uint8x8_t __a, uint8x8_t __b) {
488 return __a + __b; }
489 __ai uint16x4_t vadd_u16(uint16x4_t __a, uint16x4_t __b) {
490 return __a + __b; }
491 __ai uint32x2_t vadd_u32(uint32x2_t __a, uint32x2_t __b) {
492 return __a + __b; }
493 __ai uint64x1_t vadd_u64(uint64x1_t __a, uint64x1_t __b) {
494 return __a + __b; }
495 __ai int8x16_t vaddq_s8(int8x16_t __a, int8x16_t __b) {
496 return __a + __b; }
497 __ai int16x8_t vaddq_s16(int16x8_t __a, int16x8_t __b) {
498 return __a + __b; }
499 __ai int32x4_t vaddq_s32(int32x4_t __a, int32x4_t __b) {
500 return __a + __b; }
501 __ai int64x2_t vaddq_s64(int64x2_t __a, int64x2_t __b) {
502 return __a + __b; }
503 __ai float32x4_t vaddq_f32(float32x4_t __a, float32x4_t __b) {
504 return __a + __b; }
505 __ai uint8x16_t vaddq_u8(uint8x16_t __a, uint8x16_t __b) {
506 return __a + __b; }
507 __ai uint16x8_t vaddq_u16(uint16x8_t __a, uint16x8_t __b) {
508 return __a + __b; }
509 __ai uint32x4_t vaddq_u32(uint32x4_t __a, uint32x4_t __b) {
510 return __a + __b; }
511 __ai uint64x2_t vaddq_u64(uint64x2_t __a, uint64x2_t __b) {
512 return __a + __b; }
513
514 __ai int8x8_t vaddhn_s16(int16x8_t __a, int16x8_t __b) {
515 return (int8x8_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 0); }
516 __ai int16x4_t vaddhn_s32(int32x4_t __a, int32x4_t __b) {
517 return (int16x4_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 1); }
518 __ai int32x2_t vaddhn_s64(int64x2_t __a, int64x2_t __b) {
519 return (int32x2_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 2); }
520 __ai uint8x8_t vaddhn_u16(uint16x8_t __a, uint16x8_t __b) {
521 return (uint8x8_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 16); }
522 __ai uint16x4_t vaddhn_u32(uint32x4_t __a, uint32x4_t __b) {
523 return (uint16x4_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 17) ; }
524 __ai uint32x2_t vaddhn_u64(uint64x2_t __a, uint64x2_t __b) {
525 return (uint32x2_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 18) ; }
526
527 __ai int16x8_t vaddl_s8(int8x8_t __a, int8x8_t __b) {
528 return vmovl_s8(__a) + vmovl_s8(__b); }
529 __ai int32x4_t vaddl_s16(int16x4_t __a, int16x4_t __b) {
530 return vmovl_s16(__a) + vmovl_s16(__b); }
531 __ai int64x2_t vaddl_s32(int32x2_t __a, int32x2_t __b) {
532 return vmovl_s32(__a) + vmovl_s32(__b); }
533 __ai uint16x8_t vaddl_u8(uint8x8_t __a, uint8x8_t __b) {
534 return vmovl_u8(__a) + vmovl_u8(__b); }
535 __ai uint32x4_t vaddl_u16(uint16x4_t __a, uint16x4_t __b) {
536 return vmovl_u16(__a) + vmovl_u16(__b); }
537 __ai uint64x2_t vaddl_u32(uint32x2_t __a, uint32x2_t __b) {
538 return vmovl_u32(__a) + vmovl_u32(__b); }
539
540 __ai int16x8_t vaddw_s8(int16x8_t __a, int8x8_t __b) {
541 return __a + vmovl_s8(__b); }
542 __ai int32x4_t vaddw_s16(int32x4_t __a, int16x4_t __b) {
543 return __a + vmovl_s16(__b); }
544 __ai int64x2_t vaddw_s32(int64x2_t __a, int32x2_t __b) {
545 return __a + vmovl_s32(__b); }
546 __ai uint16x8_t vaddw_u8(uint16x8_t __a, uint8x8_t __b) {
547 return __a + vmovl_u8(__b); }
548 __ai uint32x4_t vaddw_u16(uint32x4_t __a, uint16x4_t __b) {
549 return __a + vmovl_u16(__b); }
550 __ai uint64x2_t vaddw_u32(uint64x2_t __a, uint32x2_t __b) {
551 return __a + vmovl_u32(__b); }
552
553 __ai int8x8_t vand_s8(int8x8_t __a, int8x8_t __b) {
554 return __a & __b; }
555 __ai int16x4_t vand_s16(int16x4_t __a, int16x4_t __b) {
556 return __a & __b; }
557 __ai int32x2_t vand_s32(int32x2_t __a, int32x2_t __b) {
558 return __a & __b; }
559 __ai int64x1_t vand_s64(int64x1_t __a, int64x1_t __b) {
560 return __a & __b; }
561 __ai uint8x8_t vand_u8(uint8x8_t __a, uint8x8_t __b) {
562 return __a & __b; }
563 __ai uint16x4_t vand_u16(uint16x4_t __a, uint16x4_t __b) {
564 return __a & __b; }
565 __ai uint32x2_t vand_u32(uint32x2_t __a, uint32x2_t __b) {
566 return __a & __b; }
567 __ai uint64x1_t vand_u64(uint64x1_t __a, uint64x1_t __b) {
568 return __a & __b; }
569 __ai int8x16_t vandq_s8(int8x16_t __a, int8x16_t __b) {
570 return __a & __b; }
571 __ai int16x8_t vandq_s16(int16x8_t __a, int16x8_t __b) {
572 return __a & __b; }
573 __ai int32x4_t vandq_s32(int32x4_t __a, int32x4_t __b) {
574 return __a & __b; }
575 __ai int64x2_t vandq_s64(int64x2_t __a, int64x2_t __b) {
576 return __a & __b; }
577 __ai uint8x16_t vandq_u8(uint8x16_t __a, uint8x16_t __b) {
578 return __a & __b; }
579 __ai uint16x8_t vandq_u16(uint16x8_t __a, uint16x8_t __b) {
580 return __a & __b; }
581 __ai uint32x4_t vandq_u32(uint32x4_t __a, uint32x4_t __b) {
582 return __a & __b; }
583 __ai uint64x2_t vandq_u64(uint64x2_t __a, uint64x2_t __b) {
584 return __a & __b; }
585
586 __ai int8x8_t vbic_s8(int8x8_t __a, int8x8_t __b) {
587 return __a & ~__b; }
588 __ai int16x4_t vbic_s16(int16x4_t __a, int16x4_t __b) {
589 return __a & ~__b; }
590 __ai int32x2_t vbic_s32(int32x2_t __a, int32x2_t __b) {
591 return __a & ~__b; }
592 __ai int64x1_t vbic_s64(int64x1_t __a, int64x1_t __b) {
593 return __a & ~__b; }
594 __ai uint8x8_t vbic_u8(uint8x8_t __a, uint8x8_t __b) {
595 return __a & ~__b; }
596 __ai uint16x4_t vbic_u16(uint16x4_t __a, uint16x4_t __b) {
597 return __a & ~__b; }
598 __ai uint32x2_t vbic_u32(uint32x2_t __a, uint32x2_t __b) {
599 return __a & ~__b; }
600 __ai uint64x1_t vbic_u64(uint64x1_t __a, uint64x1_t __b) {
601 return __a & ~__b; }
602 __ai int8x16_t vbicq_s8(int8x16_t __a, int8x16_t __b) {
603 return __a & ~__b; }
604 __ai int16x8_t vbicq_s16(int16x8_t __a, int16x8_t __b) {
605 return __a & ~__b; }
606 __ai int32x4_t vbicq_s32(int32x4_t __a, int32x4_t __b) {
607 return __a & ~__b; }
608 __ai int64x2_t vbicq_s64(int64x2_t __a, int64x2_t __b) {
609 return __a & ~__b; }
610 __ai uint8x16_t vbicq_u8(uint8x16_t __a, uint8x16_t __b) {
611 return __a & ~__b; }
612 __ai uint16x8_t vbicq_u16(uint16x8_t __a, uint16x8_t __b) {
613 return __a & ~__b; }
614 __ai uint32x4_t vbicq_u32(uint32x4_t __a, uint32x4_t __b) {
615 return __a & ~__b; }
616 __ai uint64x2_t vbicq_u64(uint64x2_t __a, uint64x2_t __b) {
617 return __a & ~__b; }
618
619 __ai int8x8_t vbsl_s8(uint8x8_t __a, int8x8_t __b, int8x8_t __c) {
620 return (int8x8_t)__builtin_neon_vbsl_v((int8x8_t)__a, __b, __c, 0); }
621 __ai int16x4_t vbsl_s16(uint16x4_t __a, int16x4_t __b, int16x4_t __c) {
622 return (int16x4_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_ t)__c, 1); }
623 __ai int32x2_t vbsl_s32(uint32x2_t __a, int32x2_t __b, int32x2_t __c) {
624 return (int32x2_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_ t)__c, 2); }
625 __ai int64x1_t vbsl_s64(uint64x1_t __a, int64x1_t __b, int64x1_t __c) {
626 return (int64x1_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_ t)__c, 3); }
627 __ai uint8x8_t vbsl_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) {
628 return (uint8x8_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_ t)__c, 16); }
629 __ai uint16x4_t vbsl_u16(uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) {
630 return (uint16x4_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8 _t)__c, 17); }
631 __ai uint32x2_t vbsl_u32(uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) {
632 return (uint32x2_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8 _t)__c, 18); }
633 __ai uint64x1_t vbsl_u64(uint64x1_t __a, uint64x1_t __b, uint64x1_t __c) {
634 return (uint64x1_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8 _t)__c, 19); }
635 __ai float32x2_t vbsl_f32(uint32x2_t __a, float32x2_t __b, float32x2_t __c) {
636 return (float32x2_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x 8_t)__c, 7); }
637 __ai poly8x8_t vbsl_p8(uint8x8_t __a, poly8x8_t __b, poly8x8_t __c) {
638 return (poly8x8_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8_ t)__c, 4); }
639 __ai poly16x4_t vbsl_p16(uint16x4_t __a, poly16x4_t __b, poly16x4_t __c) {
640 return (poly16x4_t)__builtin_neon_vbsl_v((int8x8_t)__a, (int8x8_t)__b, (int8x8 _t)__c, 5); }
641 __ai int8x16_t vbslq_s8(uint8x16_t __a, int8x16_t __b, int8x16_t __c) {
642 return (int8x16_t)__builtin_neon_vbslq_v((int8x16_t)__a, __b, __c, 32); }
643 __ai int16x8_t vbslq_s16(uint16x8_t __a, int16x8_t __b, int16x8_t __c) {
644 return (int16x8_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8 x16_t)__c, 33); }
645 __ai int32x4_t vbslq_s32(uint32x4_t __a, int32x4_t __b, int32x4_t __c) {
646 return (int32x4_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8 x16_t)__c, 34); }
647 __ai int64x2_t vbslq_s64(uint64x2_t __a, int64x2_t __b, int64x2_t __c) {
648 return (int64x2_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int8 x16_t)__c, 35); }
649 __ai uint8x16_t vbslq_u8(uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) {
650 return (uint8x16_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int 8x16_t)__c, 48); }
651 __ai uint16x8_t vbslq_u16(uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) {
652 return (uint16x8_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int 8x16_t)__c, 49); }
653 __ai uint32x4_t vbslq_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) {
654 return (uint32x4_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int 8x16_t)__c, 50); }
655 __ai uint64x2_t vbslq_u64(uint64x2_t __a, uint64x2_t __b, uint64x2_t __c) {
656 return (uint64x2_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int 8x16_t)__c, 51); }
657 __ai float32x4_t vbslq_f32(uint32x4_t __a, float32x4_t __b, float32x4_t __c) {
658 return (float32x4_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (in t8x16_t)__c, 39); }
659 __ai poly8x16_t vbslq_p8(uint8x16_t __a, poly8x16_t __b, poly8x16_t __c) {
660 return (poly8x16_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int 8x16_t)__c, 36); }
661 __ai poly16x8_t vbslq_p16(uint16x8_t __a, poly16x8_t __b, poly16x8_t __c) {
662 return (poly16x8_t)__builtin_neon_vbslq_v((int8x16_t)__a, (int8x16_t)__b, (int 8x16_t)__c, 37); }
663
664 __ai uint32x2_t vcage_f32(float32x2_t __a, float32x2_t __b) {
665 return (uint32x2_t)__builtin_neon_vcage_v((int8x8_t)__a, (int8x8_t)__b, 18); }
666 __ai uint32x4_t vcageq_f32(float32x4_t __a, float32x4_t __b) {
667 return (uint32x4_t)__builtin_neon_vcageq_v((int8x16_t)__a, (int8x16_t)__b, 50) ; }
668
669 __ai uint32x2_t vcagt_f32(float32x2_t __a, float32x2_t __b) {
670 return (uint32x2_t)__builtin_neon_vcagt_v((int8x8_t)__a, (int8x8_t)__b, 18); }
671 __ai uint32x4_t vcagtq_f32(float32x4_t __a, float32x4_t __b) {
672 return (uint32x4_t)__builtin_neon_vcagtq_v((int8x16_t)__a, (int8x16_t)__b, 50) ; }
673
674 __ai uint32x2_t vcale_f32(float32x2_t __a, float32x2_t __b) {
675 return (uint32x2_t)__builtin_neon_vcale_v((int8x8_t)__a, (int8x8_t)__b, 18); }
676 __ai uint32x4_t vcaleq_f32(float32x4_t __a, float32x4_t __b) {
677 return (uint32x4_t)__builtin_neon_vcaleq_v((int8x16_t)__a, (int8x16_t)__b, 50) ; }
678
679 __ai uint32x2_t vcalt_f32(float32x2_t __a, float32x2_t __b) {
680 return (uint32x2_t)__builtin_neon_vcalt_v((int8x8_t)__a, (int8x8_t)__b, 18); }
681 __ai uint32x4_t vcaltq_f32(float32x4_t __a, float32x4_t __b) {
682 return (uint32x4_t)__builtin_neon_vcaltq_v((int8x16_t)__a, (int8x16_t)__b, 50) ; }
683
684 __ai uint8x8_t vceq_s8(int8x8_t __a, int8x8_t __b) {
685 return (uint8x8_t)(__a == __b); }
686 __ai uint16x4_t vceq_s16(int16x4_t __a, int16x4_t __b) {
687 return (uint16x4_t)(__a == __b); }
688 __ai uint32x2_t vceq_s32(int32x2_t __a, int32x2_t __b) {
689 return (uint32x2_t)(__a == __b); }
690 __ai uint32x2_t vceq_f32(float32x2_t __a, float32x2_t __b) {
691 return (uint32x2_t)(__a == __b); }
692 __ai uint8x8_t vceq_u8(uint8x8_t __a, uint8x8_t __b) {
693 return (uint8x8_t)(__a == __b); }
694 __ai uint16x4_t vceq_u16(uint16x4_t __a, uint16x4_t __b) {
695 return (uint16x4_t)(__a == __b); }
696 __ai uint32x2_t vceq_u32(uint32x2_t __a, uint32x2_t __b) {
697 return (uint32x2_t)(__a == __b); }
698 __ai uint8x8_t vceq_p8(poly8x8_t __a, poly8x8_t __b) {
699 return (uint8x8_t)(__a == __b); }
700 __ai uint8x16_t vceqq_s8(int8x16_t __a, int8x16_t __b) {
701 return (uint8x16_t)(__a == __b); }
702 __ai uint16x8_t vceqq_s16(int16x8_t __a, int16x8_t __b) {
703 return (uint16x8_t)(__a == __b); }
704 __ai uint32x4_t vceqq_s32(int32x4_t __a, int32x4_t __b) {
705 return (uint32x4_t)(__a == __b); }
706 __ai uint32x4_t vceqq_f32(float32x4_t __a, float32x4_t __b) {
707 return (uint32x4_t)(__a == __b); }
708 __ai uint8x16_t vceqq_u8(uint8x16_t __a, uint8x16_t __b) {
709 return (uint8x16_t)(__a == __b); }
710 __ai uint16x8_t vceqq_u16(uint16x8_t __a, uint16x8_t __b) {
711 return (uint16x8_t)(__a == __b); }
712 __ai uint32x4_t vceqq_u32(uint32x4_t __a, uint32x4_t __b) {
713 return (uint32x4_t)(__a == __b); }
714 __ai uint8x16_t vceqq_p8(poly8x16_t __a, poly8x16_t __b) {
715 return (uint8x16_t)(__a == __b); }
716
717 __ai uint8x8_t vcge_s8(int8x8_t __a, int8x8_t __b) {
718 return (uint8x8_t)(__a >= __b); }
719 __ai uint16x4_t vcge_s16(int16x4_t __a, int16x4_t __b) {
720 return (uint16x4_t)(__a >= __b); }
721 __ai uint32x2_t vcge_s32(int32x2_t __a, int32x2_t __b) {
722 return (uint32x2_t)(__a >= __b); }
723 __ai uint32x2_t vcge_f32(float32x2_t __a, float32x2_t __b) {
724 return (uint32x2_t)(__a >= __b); }
725 __ai uint8x8_t vcge_u8(uint8x8_t __a, uint8x8_t __b) {
726 return (uint8x8_t)(__a >= __b); }
727 __ai uint16x4_t vcge_u16(uint16x4_t __a, uint16x4_t __b) {
728 return (uint16x4_t)(__a >= __b); }
729 __ai uint32x2_t vcge_u32(uint32x2_t __a, uint32x2_t __b) {
730 return (uint32x2_t)(__a >= __b); }
731 __ai uint8x16_t vcgeq_s8(int8x16_t __a, int8x16_t __b) {
732 return (uint8x16_t)(__a >= __b); }
733 __ai uint16x8_t vcgeq_s16(int16x8_t __a, int16x8_t __b) {
734 return (uint16x8_t)(__a >= __b); }
735 __ai uint32x4_t vcgeq_s32(int32x4_t __a, int32x4_t __b) {
736 return (uint32x4_t)(__a >= __b); }
737 __ai uint32x4_t vcgeq_f32(float32x4_t __a, float32x4_t __b) {
738 return (uint32x4_t)(__a >= __b); }
739 __ai uint8x16_t vcgeq_u8(uint8x16_t __a, uint8x16_t __b) {
740 return (uint8x16_t)(__a >= __b); }
741 __ai uint16x8_t vcgeq_u16(uint16x8_t __a, uint16x8_t __b) {
742 return (uint16x8_t)(__a >= __b); }
743 __ai uint32x4_t vcgeq_u32(uint32x4_t __a, uint32x4_t __b) {
744 return (uint32x4_t)(__a >= __b); }
745
746 __ai uint8x8_t vcgt_s8(int8x8_t __a, int8x8_t __b) {
747 return (uint8x8_t)(__a > __b); }
748 __ai uint16x4_t vcgt_s16(int16x4_t __a, int16x4_t __b) {
749 return (uint16x4_t)(__a > __b); }
750 __ai uint32x2_t vcgt_s32(int32x2_t __a, int32x2_t __b) {
751 return (uint32x2_t)(__a > __b); }
752 __ai uint32x2_t vcgt_f32(float32x2_t __a, float32x2_t __b) {
753 return (uint32x2_t)(__a > __b); }
754 __ai uint8x8_t vcgt_u8(uint8x8_t __a, uint8x8_t __b) {
755 return (uint8x8_t)(__a > __b); }
756 __ai uint16x4_t vcgt_u16(uint16x4_t __a, uint16x4_t __b) {
757 return (uint16x4_t)(__a > __b); }
758 __ai uint32x2_t vcgt_u32(uint32x2_t __a, uint32x2_t __b) {
759 return (uint32x2_t)(__a > __b); }
760 __ai uint8x16_t vcgtq_s8(int8x16_t __a, int8x16_t __b) {
761 return (uint8x16_t)(__a > __b); }
762 __ai uint16x8_t vcgtq_s16(int16x8_t __a, int16x8_t __b) {
763 return (uint16x8_t)(__a > __b); }
764 __ai uint32x4_t vcgtq_s32(int32x4_t __a, int32x4_t __b) {
765 return (uint32x4_t)(__a > __b); }
766 __ai uint32x4_t vcgtq_f32(float32x4_t __a, float32x4_t __b) {
767 return (uint32x4_t)(__a > __b); }
768 __ai uint8x16_t vcgtq_u8(uint8x16_t __a, uint8x16_t __b) {
769 return (uint8x16_t)(__a > __b); }
770 __ai uint16x8_t vcgtq_u16(uint16x8_t __a, uint16x8_t __b) {
771 return (uint16x8_t)(__a > __b); }
772 __ai uint32x4_t vcgtq_u32(uint32x4_t __a, uint32x4_t __b) {
773 return (uint32x4_t)(__a > __b); }
774
775 __ai uint8x8_t vcle_s8(int8x8_t __a, int8x8_t __b) {
776 return (uint8x8_t)(__a <= __b); }
777 __ai uint16x4_t vcle_s16(int16x4_t __a, int16x4_t __b) {
778 return (uint16x4_t)(__a <= __b); }
779 __ai uint32x2_t vcle_s32(int32x2_t __a, int32x2_t __b) {
780 return (uint32x2_t)(__a <= __b); }
781 __ai uint32x2_t vcle_f32(float32x2_t __a, float32x2_t __b) {
782 return (uint32x2_t)(__a <= __b); }
783 __ai uint8x8_t vcle_u8(uint8x8_t __a, uint8x8_t __b) {
784 return (uint8x8_t)(__a <= __b); }
785 __ai uint16x4_t vcle_u16(uint16x4_t __a, uint16x4_t __b) {
786 return (uint16x4_t)(__a <= __b); }
787 __ai uint32x2_t vcle_u32(uint32x2_t __a, uint32x2_t __b) {
788 return (uint32x2_t)(__a <= __b); }
789 __ai uint8x16_t vcleq_s8(int8x16_t __a, int8x16_t __b) {
790 return (uint8x16_t)(__a <= __b); }
791 __ai uint16x8_t vcleq_s16(int16x8_t __a, int16x8_t __b) {
792 return (uint16x8_t)(__a <= __b); }
793 __ai uint32x4_t vcleq_s32(int32x4_t __a, int32x4_t __b) {
794 return (uint32x4_t)(__a <= __b); }
795 __ai uint32x4_t vcleq_f32(float32x4_t __a, float32x4_t __b) {
796 return (uint32x4_t)(__a <= __b); }
797 __ai uint8x16_t vcleq_u8(uint8x16_t __a, uint8x16_t __b) {
798 return (uint8x16_t)(__a <= __b); }
799 __ai uint16x8_t vcleq_u16(uint16x8_t __a, uint16x8_t __b) {
800 return (uint16x8_t)(__a <= __b); }
801 __ai uint32x4_t vcleq_u32(uint32x4_t __a, uint32x4_t __b) {
802 return (uint32x4_t)(__a <= __b); }
803
804 __ai int8x8_t vcls_s8(int8x8_t __a) {
805 return (int8x8_t)__builtin_neon_vcls_v(__a, 0); }
806 __ai int16x4_t vcls_s16(int16x4_t __a) {
807 return (int16x4_t)__builtin_neon_vcls_v((int8x8_t)__a, 1); }
808 __ai int32x2_t vcls_s32(int32x2_t __a) {
809 return (int32x2_t)__builtin_neon_vcls_v((int8x8_t)__a, 2); }
810 __ai int8x16_t vclsq_s8(int8x16_t __a) {
811 return (int8x16_t)__builtin_neon_vclsq_v(__a, 32); }
812 __ai int16x8_t vclsq_s16(int16x8_t __a) {
813 return (int16x8_t)__builtin_neon_vclsq_v((int8x16_t)__a, 33); }
814 __ai int32x4_t vclsq_s32(int32x4_t __a) {
815 return (int32x4_t)__builtin_neon_vclsq_v((int8x16_t)__a, 34); }
816
817 __ai uint8x8_t vclt_s8(int8x8_t __a, int8x8_t __b) {
818 return (uint8x8_t)(__a < __b); }
819 __ai uint16x4_t vclt_s16(int16x4_t __a, int16x4_t __b) {
820 return (uint16x4_t)(__a < __b); }
821 __ai uint32x2_t vclt_s32(int32x2_t __a, int32x2_t __b) {
822 return (uint32x2_t)(__a < __b); }
823 __ai uint32x2_t vclt_f32(float32x2_t __a, float32x2_t __b) {
824 return (uint32x2_t)(__a < __b); }
825 __ai uint8x8_t vclt_u8(uint8x8_t __a, uint8x8_t __b) {
826 return (uint8x8_t)(__a < __b); }
827 __ai uint16x4_t vclt_u16(uint16x4_t __a, uint16x4_t __b) {
828 return (uint16x4_t)(__a < __b); }
829 __ai uint32x2_t vclt_u32(uint32x2_t __a, uint32x2_t __b) {
830 return (uint32x2_t)(__a < __b); }
831 __ai uint8x16_t vcltq_s8(int8x16_t __a, int8x16_t __b) {
832 return (uint8x16_t)(__a < __b); }
833 __ai uint16x8_t vcltq_s16(int16x8_t __a, int16x8_t __b) {
834 return (uint16x8_t)(__a < __b); }
835 __ai uint32x4_t vcltq_s32(int32x4_t __a, int32x4_t __b) {
836 return (uint32x4_t)(__a < __b); }
837 __ai uint32x4_t vcltq_f32(float32x4_t __a, float32x4_t __b) {
838 return (uint32x4_t)(__a < __b); }
839 __ai uint8x16_t vcltq_u8(uint8x16_t __a, uint8x16_t __b) {
840 return (uint8x16_t)(__a < __b); }
841 __ai uint16x8_t vcltq_u16(uint16x8_t __a, uint16x8_t __b) {
842 return (uint16x8_t)(__a < __b); }
843 __ai uint32x4_t vcltq_u32(uint32x4_t __a, uint32x4_t __b) {
844 return (uint32x4_t)(__a < __b); }
845
846 __ai int8x8_t vclz_s8(int8x8_t __a) {
847 return (int8x8_t)__builtin_neon_vclz_v(__a, 0); }
848 __ai int16x4_t vclz_s16(int16x4_t __a) {
849 return (int16x4_t)__builtin_neon_vclz_v((int8x8_t)__a, 1); }
850 __ai int32x2_t vclz_s32(int32x2_t __a) {
851 return (int32x2_t)__builtin_neon_vclz_v((int8x8_t)__a, 2); }
852 __ai uint8x8_t vclz_u8(uint8x8_t __a) {
853 return (uint8x8_t)__builtin_neon_vclz_v((int8x8_t)__a, 16); }
854 __ai uint16x4_t vclz_u16(uint16x4_t __a) {
855 return (uint16x4_t)__builtin_neon_vclz_v((int8x8_t)__a, 17); }
856 __ai uint32x2_t vclz_u32(uint32x2_t __a) {
857 return (uint32x2_t)__builtin_neon_vclz_v((int8x8_t)__a, 18); }
858 __ai int8x16_t vclzq_s8(int8x16_t __a) {
859 return (int8x16_t)__builtin_neon_vclzq_v(__a, 32); }
860 __ai int16x8_t vclzq_s16(int16x8_t __a) {
861 return (int16x8_t)__builtin_neon_vclzq_v((int8x16_t)__a, 33); }
862 __ai int32x4_t vclzq_s32(int32x4_t __a) {
863 return (int32x4_t)__builtin_neon_vclzq_v((int8x16_t)__a, 34); }
864 __ai uint8x16_t vclzq_u8(uint8x16_t __a) {
865 return (uint8x16_t)__builtin_neon_vclzq_v((int8x16_t)__a, 48); }
866 __ai uint16x8_t vclzq_u16(uint16x8_t __a) {
867 return (uint16x8_t)__builtin_neon_vclzq_v((int8x16_t)__a, 49); }
868 __ai uint32x4_t vclzq_u32(uint32x4_t __a) {
869 return (uint32x4_t)__builtin_neon_vclzq_v((int8x16_t)__a, 50); }
870
871 __ai uint8x8_t vcnt_u8(uint8x8_t __a) {
872 return (uint8x8_t)__builtin_neon_vcnt_v((int8x8_t)__a, 16); }
873 __ai int8x8_t vcnt_s8(int8x8_t __a) {
874 return (int8x8_t)__builtin_neon_vcnt_v(__a, 0); }
875 __ai poly8x8_t vcnt_p8(poly8x8_t __a) {
876 return (poly8x8_t)__builtin_neon_vcnt_v((int8x8_t)__a, 4); }
877 __ai uint8x16_t vcntq_u8(uint8x16_t __a) {
878 return (uint8x16_t)__builtin_neon_vcntq_v((int8x16_t)__a, 48); }
879 __ai int8x16_t vcntq_s8(int8x16_t __a) {
880 return (int8x16_t)__builtin_neon_vcntq_v(__a, 32); }
881 __ai poly8x16_t vcntq_p8(poly8x16_t __a) {
882 return (poly8x16_t)__builtin_neon_vcntq_v((int8x16_t)__a, 36); }
883
884 __ai int8x16_t vcombine_s8(int8x8_t __a, int8x8_t __b) {
885 return (int8x16_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1 ); }
886 __ai int16x8_t vcombine_s16(int16x4_t __a, int16x4_t __b) {
887 return (int16x8_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1 ); }
888 __ai int32x4_t vcombine_s32(int32x2_t __a, int32x2_t __b) {
889 return (int32x4_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1 ); }
890 __ai int64x2_t vcombine_s64(int64x1_t __a, int64x1_t __b) {
891 return (int64x2_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1 ); }
892 __ai float16x8_t vcombine_f16(float16x4_t __a, float16x4_t __b) {
893 return (float16x8_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); }
894 __ai float32x4_t vcombine_f32(float32x2_t __a, float32x2_t __b) {
895 return (float32x4_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); }
896 __ai uint8x16_t vcombine_u8(uint8x8_t __a, uint8x8_t __b) {
897 return (uint8x16_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); }
898 __ai uint16x8_t vcombine_u16(uint16x4_t __a, uint16x4_t __b) {
899 return (uint16x8_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); }
900 __ai uint32x4_t vcombine_u32(uint32x2_t __a, uint32x2_t __b) {
901 return (uint32x4_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); }
902 __ai uint64x2_t vcombine_u64(uint64x1_t __a, uint64x1_t __b) {
903 return (uint64x2_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); }
904 __ai poly8x16_t vcombine_p8(poly8x8_t __a, poly8x8_t __b) {
905 return (poly8x16_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); }
906 __ai poly16x8_t vcombine_p16(poly16x4_t __a, poly16x4_t __b) {
907 return (poly16x8_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1); }
908
909 __ai int8x8_t vcreate_s8(uint64_t __a) {
910 return (int8x8_t)__a; }
911 __ai int16x4_t vcreate_s16(uint64_t __a) {
912 return (int16x4_t)__a; }
913 __ai int32x2_t vcreate_s32(uint64_t __a) {
914 return (int32x2_t)__a; }
915 __ai float16x4_t vcreate_f16(uint64_t __a) {
916 return (float16x4_t)__a; }
917 __ai float32x2_t vcreate_f32(uint64_t __a) {
918 return (float32x2_t)__a; }
919 __ai uint8x8_t vcreate_u8(uint64_t __a) {
920 return (uint8x8_t)__a; }
921 __ai uint16x4_t vcreate_u16(uint64_t __a) {
922 return (uint16x4_t)__a; }
923 __ai uint32x2_t vcreate_u32(uint64_t __a) {
924 return (uint32x2_t)__a; }
925 __ai uint64x1_t vcreate_u64(uint64_t __a) {
926 return (uint64x1_t)__a; }
927 __ai poly8x8_t vcreate_p8(uint64_t __a) {
928 return (poly8x8_t)__a; }
929 __ai poly16x4_t vcreate_p16(uint64_t __a) {
930 return (poly16x4_t)__a; }
931 __ai int64x1_t vcreate_s64(uint64_t __a) {
932 return (int64x1_t)__a; }
933
934 __ai float16x4_t vcvt_f16_f32(float32x4_t __a) {
935 return (float16x4_t)__builtin_neon_vcvt_f16_v((int8x16_t)__a, 6); }
936
937 __ai float32x2_t vcvt_f32_s32(int32x2_t __a) {
938 return (float32x2_t)__builtin_neon_vcvt_f32_v((int8x8_t)__a, 2); }
939 __ai float32x2_t vcvt_f32_u32(uint32x2_t __a) {
940 return (float32x2_t)__builtin_neon_vcvt_f32_v((int8x8_t)__a, 18); }
941 __ai float32x4_t vcvtq_f32_s32(int32x4_t __a) {
942 return (float32x4_t)__builtin_neon_vcvtq_f32_v((int8x16_t)__a, 34); }
943 __ai float32x4_t vcvtq_f32_u32(uint32x4_t __a) {
944 return (float32x4_t)__builtin_neon_vcvtq_f32_v((int8x16_t)__a, 50); }
945
946 __ai float32x4_t vcvt_f32_f16(float16x4_t __a) {
947 return (float32x4_t)__builtin_neon_vcvt_f32_f16((int8x8_t)__a, 6); }
948
949 #define vcvt_n_f32_s32(a, __b) __extension__ ({ \
950 int32x2_t __a = (a); \
951 (float32x2_t)__builtin_neon_vcvt_n_f32_v((int8x8_t)__a, __b, 2); })
952 #define vcvt_n_f32_u32(a, __b) __extension__ ({ \
953 uint32x2_t __a = (a); \
954 (float32x2_t)__builtin_neon_vcvt_n_f32_v((int8x8_t)__a, __b, 18); })
955 #define vcvtq_n_f32_s32(a, __b) __extension__ ({ \
956 int32x4_t __a = (a); \
957 (float32x4_t)__builtin_neon_vcvtq_n_f32_v((int8x16_t)__a, __b, 34); })
958 #define vcvtq_n_f32_u32(a, __b) __extension__ ({ \
959 uint32x4_t __a = (a); \
960 (float32x4_t)__builtin_neon_vcvtq_n_f32_v((int8x16_t)__a, __b, 50); })
961
962 #define vcvt_n_s32_f32(a, __b) __extension__ ({ \
963 float32x2_t __a = (a); \
964 (int32x2_t)__builtin_neon_vcvt_n_s32_v((int8x8_t)__a, __b, 2); })
965 #define vcvtq_n_s32_f32(a, __b) __extension__ ({ \
966 float32x4_t __a = (a); \
967 (int32x4_t)__builtin_neon_vcvtq_n_s32_v((int8x16_t)__a, __b, 34); })
968
969 #define vcvt_n_u32_f32(a, __b) __extension__ ({ \
970 float32x2_t __a = (a); \
971 (uint32x2_t)__builtin_neon_vcvt_n_u32_v((int8x8_t)__a, __b, 18); })
972 #define vcvtq_n_u32_f32(a, __b) __extension__ ({ \
973 float32x4_t __a = (a); \
974 (uint32x4_t)__builtin_neon_vcvtq_n_u32_v((int8x16_t)__a, __b, 50); })
975
976 __ai int32x2_t vcvt_s32_f32(float32x2_t __a) {
977 return (int32x2_t)__builtin_neon_vcvt_s32_v((int8x8_t)__a, 2); }
978 __ai int32x4_t vcvtq_s32_f32(float32x4_t __a) {
979 return (int32x4_t)__builtin_neon_vcvtq_s32_v((int8x16_t)__a, 34); }
980
981 __ai uint32x2_t vcvt_u32_f32(float32x2_t __a) {
982 return (uint32x2_t)__builtin_neon_vcvt_u32_v((int8x8_t)__a, 18); }
983 __ai uint32x4_t vcvtq_u32_f32(float32x4_t __a) {
984 return (uint32x4_t)__builtin_neon_vcvtq_u32_v((int8x16_t)__a, 50); }
985
986 #define vdup_lane_u8(a, __b) __extension__ ({ \
987 uint8x8_t __a = (a); \
988 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); })
989 #define vdup_lane_u16(a, __b) __extension__ ({ \
990 uint16x4_t __a = (a); \
991 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); })
992 #define vdup_lane_u32(a, __b) __extension__ ({ \
993 uint32x2_t __a = (a); \
994 __builtin_shufflevector(__a, __a, __b, __b); })
995 #define vdup_lane_s8(a, __b) __extension__ ({ \
996 int8x8_t __a = (a); \
997 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); })
998 #define vdup_lane_s16(a, __b) __extension__ ({ \
999 int16x4_t __a = (a); \
1000 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); })
1001 #define vdup_lane_s32(a, __b) __extension__ ({ \
1002 int32x2_t __a = (a); \
1003 __builtin_shufflevector(__a, __a, __b, __b); })
1004 #define vdup_lane_p8(a, __b) __extension__ ({ \
1005 poly8x8_t __a = (a); \
1006 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); })
1007 #define vdup_lane_p16(a, __b) __extension__ ({ \
1008 poly16x4_t __a = (a); \
1009 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); })
1010 #define vdup_lane_f32(a, __b) __extension__ ({ \
1011 float32x2_t __a = (a); \
1012 __builtin_shufflevector(__a, __a, __b, __b); })
1013 #define vdupq_lane_u8(a, __b) __extension__ ({ \
1014 uint8x8_t __a = (a); \
1015 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b); })
1016 #define vdupq_lane_u16(a, __b) __extension__ ({ \
1017 uint16x4_t __a = (a); \
1018 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); })
1019 #define vdupq_lane_u32(a, __b) __extension__ ({ \
1020 uint32x2_t __a = (a); \
1021 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); })
1022 #define vdupq_lane_s8(a, __b) __extension__ ({ \
1023 int8x8_t __a = (a); \
1024 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b); })
1025 #define vdupq_lane_s16(a, __b) __extension__ ({ \
1026 int16x4_t __a = (a); \
1027 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); })
1028 #define vdupq_lane_s32(a, __b) __extension__ ({ \
1029 int32x2_t __a = (a); \
1030 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); })
1031 #define vdupq_lane_p8(a, __b) __extension__ ({ \
1032 poly8x8_t __a = (a); \
1033 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b); })
1034 #define vdupq_lane_p16(a, __b) __extension__ ({ \
1035 poly16x4_t __a = (a); \
1036 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); })
1037 #define vdupq_lane_f32(a, __b) __extension__ ({ \
1038 float32x2_t __a = (a); \
1039 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); })
1040 #define vdup_lane_s64(a, __b) __extension__ ({ \
1041 int64x1_t __a = (a); \
1042 __builtin_shufflevector(__a, __a, __b); })
1043 #define vdup_lane_u64(a, __b) __extension__ ({ \
1044 uint64x1_t __a = (a); \
1045 __builtin_shufflevector(__a, __a, __b); })
1046 #define vdupq_lane_s64(a, __b) __extension__ ({ \
1047 int64x1_t __a = (a); \
1048 __builtin_shufflevector(__a, __a, __b, __b); })
1049 #define vdupq_lane_u64(a, __b) __extension__ ({ \
1050 uint64x1_t __a = (a); \
1051 __builtin_shufflevector(__a, __a, __b, __b); })
1052
1053 __ai uint8x8_t vdup_n_u8(uint8_t __a) {
1054 return (uint8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; }
1055 __ai uint16x4_t vdup_n_u16(uint16_t __a) {
1056 return (uint16x4_t){ __a, __a, __a, __a }; }
1057 __ai uint32x2_t vdup_n_u32(uint32_t __a) {
1058 return (uint32x2_t){ __a, __a }; }
1059 __ai int8x8_t vdup_n_s8(int8_t __a) {
1060 return (int8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; }
1061 __ai int16x4_t vdup_n_s16(int16_t __a) {
1062 return (int16x4_t){ __a, __a, __a, __a }; }
1063 __ai int32x2_t vdup_n_s32(int32_t __a) {
1064 return (int32x2_t){ __a, __a }; }
1065 __ai poly8x8_t vdup_n_p8(poly8_t __a) {
1066 return (poly8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; }
1067 __ai poly16x4_t vdup_n_p16(poly16_t __a) {
1068 return (poly16x4_t){ __a, __a, __a, __a }; }
1069 __ai float32x2_t vdup_n_f32(float32_t __a) {
1070 return (float32x2_t){ __a, __a }; }
1071 __ai uint8x16_t vdupq_n_u8(uint8_t __a) {
1072 return (uint8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __ a, __a, __a, __a, __a }; }
1073 __ai uint16x8_t vdupq_n_u16(uint16_t __a) {
1074 return (uint16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; }
1075 __ai uint32x4_t vdupq_n_u32(uint32_t __a) {
1076 return (uint32x4_t){ __a, __a, __a, __a }; }
1077 __ai int8x16_t vdupq_n_s8(int8_t __a) {
1078 return (int8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a , __a, __a, __a, __a }; }
1079 __ai int16x8_t vdupq_n_s16(int16_t __a) {
1080 return (int16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; }
1081 __ai int32x4_t vdupq_n_s32(int32_t __a) {
1082 return (int32x4_t){ __a, __a, __a, __a }; }
1083 __ai poly8x16_t vdupq_n_p8(poly8_t __a) {
1084 return (poly8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __ a, __a, __a, __a, __a }; }
1085 __ai poly16x8_t vdupq_n_p16(poly16_t __a) {
1086 return (poly16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; }
1087 __ai float32x4_t vdupq_n_f32(float32_t __a) {
1088 return (float32x4_t){ __a, __a, __a, __a }; }
1089 __ai int64x1_t vdup_n_s64(int64_t __a) {
1090 return (int64x1_t){ __a }; }
1091 __ai uint64x1_t vdup_n_u64(uint64_t __a) {
1092 return (uint64x1_t){ __a }; }
1093 __ai int64x2_t vdupq_n_s64(int64_t __a) {
1094 return (int64x2_t){ __a, __a }; }
1095 __ai uint64x2_t vdupq_n_u64(uint64_t __a) {
1096 return (uint64x2_t){ __a, __a }; }
1097
1098 __ai int8x8_t veor_s8(int8x8_t __a, int8x8_t __b) {
1099 return __a ^ __b; }
1100 __ai int16x4_t veor_s16(int16x4_t __a, int16x4_t __b) {
1101 return __a ^ __b; }
1102 __ai int32x2_t veor_s32(int32x2_t __a, int32x2_t __b) {
1103 return __a ^ __b; }
1104 __ai int64x1_t veor_s64(int64x1_t __a, int64x1_t __b) {
1105 return __a ^ __b; }
1106 __ai uint8x8_t veor_u8(uint8x8_t __a, uint8x8_t __b) {
1107 return __a ^ __b; }
1108 __ai uint16x4_t veor_u16(uint16x4_t __a, uint16x4_t __b) {
1109 return __a ^ __b; }
1110 __ai uint32x2_t veor_u32(uint32x2_t __a, uint32x2_t __b) {
1111 return __a ^ __b; }
1112 __ai uint64x1_t veor_u64(uint64x1_t __a, uint64x1_t __b) {
1113 return __a ^ __b; }
1114 __ai int8x16_t veorq_s8(int8x16_t __a, int8x16_t __b) {
1115 return __a ^ __b; }
1116 __ai int16x8_t veorq_s16(int16x8_t __a, int16x8_t __b) {
1117 return __a ^ __b; }
1118 __ai int32x4_t veorq_s32(int32x4_t __a, int32x4_t __b) {
1119 return __a ^ __b; }
1120 __ai int64x2_t veorq_s64(int64x2_t __a, int64x2_t __b) {
1121 return __a ^ __b; }
1122 __ai uint8x16_t veorq_u8(uint8x16_t __a, uint8x16_t __b) {
1123 return __a ^ __b; }
1124 __ai uint16x8_t veorq_u16(uint16x8_t __a, uint16x8_t __b) {
1125 return __a ^ __b; }
1126 __ai uint32x4_t veorq_u32(uint32x4_t __a, uint32x4_t __b) {
1127 return __a ^ __b; }
1128 __ai uint64x2_t veorq_u64(uint64x2_t __a, uint64x2_t __b) {
1129 return __a ^ __b; }
1130
1131 #define vext_s8(a, b, __c) __extension__ ({ \
1132 int8x8_t __a = (a); int8x8_t __b = (b); \
1133 (int8x8_t)__builtin_neon_vext_v(__a, __b, __c, 0); })
1134 #define vext_u8(a, b, __c) __extension__ ({ \
1135 uint8x8_t __a = (a); uint8x8_t __b = (b); \
1136 (uint8x8_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 16); })
1137 #define vext_p8(a, b, __c) __extension__ ({ \
1138 poly8x8_t __a = (a); poly8x8_t __b = (b); \
1139 (poly8x8_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 4); })
1140 #define vext_s16(a, b, __c) __extension__ ({ \
1141 int16x4_t __a = (a); int16x4_t __b = (b); \
1142 (int16x4_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); })
1143 #define vext_u16(a, b, __c) __extension__ ({ \
1144 uint16x4_t __a = (a); uint16x4_t __b = (b); \
1145 (uint16x4_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 17); })
1146 #define vext_p16(a, b, __c) __extension__ ({ \
1147 poly16x4_t __a = (a); poly16x4_t __b = (b); \
1148 (poly16x4_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 5); })
1149 #define vext_s32(a, b, __c) __extension__ ({ \
1150 int32x2_t __a = (a); int32x2_t __b = (b); \
1151 (int32x2_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); })
1152 #define vext_u32(a, b, __c) __extension__ ({ \
1153 uint32x2_t __a = (a); uint32x2_t __b = (b); \
1154 (uint32x2_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 18); })
1155 #define vext_s64(a, b, __c) __extension__ ({ \
1156 int64x1_t __a = (a); int64x1_t __b = (b); \
1157 (int64x1_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); })
1158 #define vext_u64(a, b, __c) __extension__ ({ \
1159 uint64x1_t __a = (a); uint64x1_t __b = (b); \
1160 (uint64x1_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 19); })
1161 #define vext_f32(a, b, __c) __extension__ ({ \
1162 float32x2_t __a = (a); float32x2_t __b = (b); \
1163 (float32x2_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 7); })
1164 #define vextq_s8(a, b, __c) __extension__ ({ \
1165 int8x16_t __a = (a); int8x16_t __b = (b); \
1166 (int8x16_t)__builtin_neon_vextq_v(__a, __b, __c, 32); })
1167 #define vextq_u8(a, b, __c) __extension__ ({ \
1168 uint8x16_t __a = (a); uint8x16_t __b = (b); \
1169 (uint8x16_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 48); } )
1170 #define vextq_p8(a, b, __c) __extension__ ({ \
1171 poly8x16_t __a = (a); poly8x16_t __b = (b); \
1172 (poly8x16_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 36); } )
1173 #define vextq_s16(a, b, __c) __extension__ ({ \
1174 int16x8_t __a = (a); int16x8_t __b = (b); \
1175 (int16x8_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 33); })
1176 #define vextq_u16(a, b, __c) __extension__ ({ \
1177 uint16x8_t __a = (a); uint16x8_t __b = (b); \
1178 (uint16x8_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 49); } )
1179 #define vextq_p16(a, b, __c) __extension__ ({ \
1180 poly16x8_t __a = (a); poly16x8_t __b = (b); \
1181 (poly16x8_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 37); } )
1182 #define vextq_s32(a, b, __c) __extension__ ({ \
1183 int32x4_t __a = (a); int32x4_t __b = (b); \
1184 (int32x4_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 34); })
1185 #define vextq_u32(a, b, __c) __extension__ ({ \
1186 uint32x4_t __a = (a); uint32x4_t __b = (b); \
1187 (uint32x4_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 50); } )
1188 #define vextq_s64(a, b, __c) __extension__ ({ \
1189 int64x2_t __a = (a); int64x2_t __b = (b); \
1190 (int64x2_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 35); })
1191 #define vextq_u64(a, b, __c) __extension__ ({ \
1192 uint64x2_t __a = (a); uint64x2_t __b = (b); \
1193 (uint64x2_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 51); } )
1194 #define vextq_f32(a, b, __c) __extension__ ({ \
1195 float32x4_t __a = (a); float32x4_t __b = (b); \
1196 (float32x4_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 39); })
1197
1198 __ai float32x2_t vfma_f32(float32x2_t __a, float32x2_t __b, float32x2_t __c) {
1199 return (float32x2_t)__builtin_neon_vfma_v((int8x8_t)__a, (int8x8_t)__b, (int8x 8_t)__c, 7); }
1200 __ai float32x4_t vfmaq_f32(float32x4_t __a, float32x4_t __b, float32x4_t __c) {
1201 return (float32x4_t)__builtin_neon_vfmaq_v((int8x16_t)__a, (int8x16_t)__b, (in t8x16_t)__c, 39); }
1202
1203 __ai int8x8_t vget_high_s8(int8x16_t __a) {
1204 return (int8x8_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1); }
1205 __ai int16x4_t vget_high_s16(int16x8_t __a) {
1206 return (int16x4_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1); }
1207 __ai int32x2_t vget_high_s32(int32x4_t __a) {
1208 return (int32x2_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1); }
1209 __ai int64x1_t vget_high_s64(int64x2_t __a) {
1210 return (int64x1_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1); }
1211 __ai float16x4_t vget_high_f16(float16x8_t __a) {
1212 return (float16x4_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1) ; }
1213 __ai float32x2_t vget_high_f32(float32x4_t __a) {
1214 return (float32x2_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1) ; }
1215 __ai uint8x8_t vget_high_u8(uint8x16_t __a) {
1216 return (uint8x8_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1); }
1217 __ai uint16x4_t vget_high_u16(uint16x8_t __a) {
1218 return (uint16x4_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1); }
1219 __ai uint32x2_t vget_high_u32(uint32x4_t __a) {
1220 return (uint32x2_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1); }
1221 __ai uint64x1_t vget_high_u64(uint64x2_t __a) {
1222 return (uint64x1_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1); }
1223 __ai poly8x8_t vget_high_p8(poly8x16_t __a) {
1224 return (poly8x8_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1); }
1225 __ai poly16x4_t vget_high_p16(poly16x8_t __a) {
1226 return (poly16x4_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1); }
1227
1228 #define vget_lane_u8(a, __b) __extension__ ({ \
1229 uint8x8_t __a = (a); \
1230 (uint8_t)__builtin_neon_vget_lane_i8((int8x8_t)__a, __b); })
1231 #define vget_lane_u16(a, __b) __extension__ ({ \
1232 uint16x4_t __a = (a); \
1233 (uint16_t)__builtin_neon_vget_lane_i16((int16x4_t)__a, __b); })
1234 #define vget_lane_u32(a, __b) __extension__ ({ \
1235 uint32x2_t __a = (a); \
1236 (uint32_t)__builtin_neon_vget_lane_i32((int32x2_t)__a, __b); })
1237 #define vget_lane_s8(a, __b) __extension__ ({ \
1238 int8x8_t __a = (a); \
1239 (int8_t)__builtin_neon_vget_lane_i8(__a, __b); })
1240 #define vget_lane_s16(a, __b) __extension__ ({ \
1241 int16x4_t __a = (a); \
1242 (int16_t)__builtin_neon_vget_lane_i16(__a, __b); })
1243 #define vget_lane_s32(a, __b) __extension__ ({ \
1244 int32x2_t __a = (a); \
1245 (int32_t)__builtin_neon_vget_lane_i32(__a, __b); })
1246 #define vget_lane_p8(a, __b) __extension__ ({ \
1247 poly8x8_t __a = (a); \
1248 (poly8_t)__builtin_neon_vget_lane_i8((int8x8_t)__a, __b); })
1249 #define vget_lane_p16(a, __b) __extension__ ({ \
1250 poly16x4_t __a = (a); \
1251 (poly16_t)__builtin_neon_vget_lane_i16((int16x4_t)__a, __b); })
1252 #define vget_lane_f32(a, __b) __extension__ ({ \
1253 float32x2_t __a = (a); \
1254 (float32_t)__builtin_neon_vget_lane_f32(__a, __b); })
1255 #define vgetq_lane_u8(a, __b) __extension__ ({ \
1256 uint8x16_t __a = (a); \
1257 (uint8_t)__builtin_neon_vgetq_lane_i8((int8x16_t)__a, __b); })
1258 #define vgetq_lane_u16(a, __b) __extension__ ({ \
1259 uint16x8_t __a = (a); \
1260 (uint16_t)__builtin_neon_vgetq_lane_i16((int16x8_t)__a, __b); })
1261 #define vgetq_lane_u32(a, __b) __extension__ ({ \
1262 uint32x4_t __a = (a); \
1263 (uint32_t)__builtin_neon_vgetq_lane_i32((int32x4_t)__a, __b); })
1264 #define vgetq_lane_s8(a, __b) __extension__ ({ \
1265 int8x16_t __a = (a); \
1266 (int8_t)__builtin_neon_vgetq_lane_i8(__a, __b); })
1267 #define vgetq_lane_s16(a, __b) __extension__ ({ \
1268 int16x8_t __a = (a); \
1269 (int16_t)__builtin_neon_vgetq_lane_i16(__a, __b); })
1270 #define vgetq_lane_s32(a, __b) __extension__ ({ \
1271 int32x4_t __a = (a); \
1272 (int32_t)__builtin_neon_vgetq_lane_i32(__a, __b); })
1273 #define vgetq_lane_p8(a, __b) __extension__ ({ \
1274 poly8x16_t __a = (a); \
1275 (poly8_t)__builtin_neon_vgetq_lane_i8((int8x16_t)__a, __b); })
1276 #define vgetq_lane_p16(a, __b) __extension__ ({ \
1277 poly16x8_t __a = (a); \
1278 (poly16_t)__builtin_neon_vgetq_lane_i16((int16x8_t)__a, __b); })
1279 #define vgetq_lane_f32(a, __b) __extension__ ({ \
1280 float32x4_t __a = (a); \
1281 (float32_t)__builtin_neon_vgetq_lane_f32(__a, __b); })
1282 #define vget_lane_s64(a, __b) __extension__ ({ \
1283 int64x1_t __a = (a); \
1284 (int64_t)__builtin_neon_vget_lane_i64(__a, __b); })
1285 #define vget_lane_u64(a, __b) __extension__ ({ \
1286 uint64x1_t __a = (a); \
1287 (uint64_t)__builtin_neon_vget_lane_i64((int64x1_t)__a, __b); })
1288 #define vgetq_lane_s64(a, __b) __extension__ ({ \
1289 int64x2_t __a = (a); \
1290 (int64_t)__builtin_neon_vgetq_lane_i64(__a, __b); })
1291 #define vgetq_lane_u64(a, __b) __extension__ ({ \
1292 uint64x2_t __a = (a); \
1293 (uint64_t)__builtin_neon_vgetq_lane_i64((int64x2_t)__a, __b); })
1294
1295 __ai int8x8_t vget_low_s8(int8x16_t __a) {
1296 return (int8x8_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0); }
1297 __ai int16x4_t vget_low_s16(int16x8_t __a) {
1298 return (int16x4_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0); }
1299 __ai int32x2_t vget_low_s32(int32x4_t __a) {
1300 return (int32x2_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0); }
1301 __ai int64x1_t vget_low_s64(int64x2_t __a) {
1302 return (int64x1_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0); }
1303 __ai float16x4_t vget_low_f16(float16x8_t __a) {
1304 return (float16x4_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0) ; }
1305 __ai float32x2_t vget_low_f32(float32x4_t __a) {
1306 return (float32x2_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0) ; }
1307 __ai uint8x8_t vget_low_u8(uint8x16_t __a) {
1308 return (uint8x8_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0); }
1309 __ai uint16x4_t vget_low_u16(uint16x8_t __a) {
1310 return (uint16x4_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0); }
1311 __ai uint32x2_t vget_low_u32(uint32x4_t __a) {
1312 return (uint32x2_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0); }
1313 __ai uint64x1_t vget_low_u64(uint64x2_t __a) {
1314 return (uint64x1_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0); }
1315 __ai poly8x8_t vget_low_p8(poly8x16_t __a) {
1316 return (poly8x8_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0); }
1317 __ai poly16x4_t vget_low_p16(poly16x8_t __a) {
1318 return (poly16x4_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0); }
1319
1320 __ai int8x8_t vhadd_s8(int8x8_t __a, int8x8_t __b) {
1321 return (int8x8_t)__builtin_neon_vhadd_v(__a, __b, 0); }
1322 __ai int16x4_t vhadd_s16(int16x4_t __a, int16x4_t __b) {
1323 return (int16x4_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 1); }
1324 __ai int32x2_t vhadd_s32(int32x2_t __a, int32x2_t __b) {
1325 return (int32x2_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 2); }
1326 __ai uint8x8_t vhadd_u8(uint8x8_t __a, uint8x8_t __b) {
1327 return (uint8x8_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 16); }
1328 __ai uint16x4_t vhadd_u16(uint16x4_t __a, uint16x4_t __b) {
1329 return (uint16x4_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 17); }
1330 __ai uint32x2_t vhadd_u32(uint32x2_t __a, uint32x2_t __b) {
1331 return (uint32x2_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 18); }
1332 __ai int8x16_t vhaddq_s8(int8x16_t __a, int8x16_t __b) {
1333 return (int8x16_t)__builtin_neon_vhaddq_v(__a, __b, 32); }
1334 __ai int16x8_t vhaddq_s16(int16x8_t __a, int16x8_t __b) {
1335 return (int16x8_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
1336 __ai int32x4_t vhaddq_s32(int32x4_t __a, int32x4_t __b) {
1337 return (int32x4_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
1338 __ai uint8x16_t vhaddq_u8(uint8x16_t __a, uint8x16_t __b) {
1339 return (uint8x16_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 48) ; }
1340 __ai uint16x8_t vhaddq_u16(uint16x8_t __a, uint16x8_t __b) {
1341 return (uint16x8_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 49) ; }
1342 __ai uint32x4_t vhaddq_u32(uint32x4_t __a, uint32x4_t __b) {
1343 return (uint32x4_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 50) ; }
1344
1345 __ai int8x8_t vhsub_s8(int8x8_t __a, int8x8_t __b) {
1346 return (int8x8_t)__builtin_neon_vhsub_v(__a, __b, 0); }
1347 __ai int16x4_t vhsub_s16(int16x4_t __a, int16x4_t __b) {
1348 return (int16x4_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 1); }
1349 __ai int32x2_t vhsub_s32(int32x2_t __a, int32x2_t __b) {
1350 return (int32x2_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 2); }
1351 __ai uint8x8_t vhsub_u8(uint8x8_t __a, uint8x8_t __b) {
1352 return (uint8x8_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 16); }
1353 __ai uint16x4_t vhsub_u16(uint16x4_t __a, uint16x4_t __b) {
1354 return (uint16x4_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 17); }
1355 __ai uint32x2_t vhsub_u32(uint32x2_t __a, uint32x2_t __b) {
1356 return (uint32x2_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 18); }
1357 __ai int8x16_t vhsubq_s8(int8x16_t __a, int8x16_t __b) {
1358 return (int8x16_t)__builtin_neon_vhsubq_v(__a, __b, 32); }
1359 __ai int16x8_t vhsubq_s16(int16x8_t __a, int16x8_t __b) {
1360 return (int16x8_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
1361 __ai int32x4_t vhsubq_s32(int32x4_t __a, int32x4_t __b) {
1362 return (int32x4_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
1363 __ai uint8x16_t vhsubq_u8(uint8x16_t __a, uint8x16_t __b) {
1364 return (uint8x16_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 48) ; }
1365 __ai uint16x8_t vhsubq_u16(uint16x8_t __a, uint16x8_t __b) {
1366 return (uint16x8_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 49) ; }
1367 __ai uint32x4_t vhsubq_u32(uint32x4_t __a, uint32x4_t __b) {
1368 return (uint32x4_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 50) ; }
1369
1370 #define vld1q_u8(__a) __extension__ ({ \
1371 (uint8x16_t)__builtin_neon_vld1q_v(__a, 48); })
1372 #define vld1q_u16(__a) __extension__ ({ \
1373 (uint16x8_t)__builtin_neon_vld1q_v(__a, 49); })
1374 #define vld1q_u32(__a) __extension__ ({ \
1375 (uint32x4_t)__builtin_neon_vld1q_v(__a, 50); })
1376 #define vld1q_u64(__a) __extension__ ({ \
1377 (uint64x2_t)__builtin_neon_vld1q_v(__a, 51); })
1378 #define vld1q_s8(__a) __extension__ ({ \
1379 (int8x16_t)__builtin_neon_vld1q_v(__a, 32); })
1380 #define vld1q_s16(__a) __extension__ ({ \
1381 (int16x8_t)__builtin_neon_vld1q_v(__a, 33); })
1382 #define vld1q_s32(__a) __extension__ ({ \
1383 (int32x4_t)__builtin_neon_vld1q_v(__a, 34); })
1384 #define vld1q_s64(__a) __extension__ ({ \
1385 (int64x2_t)__builtin_neon_vld1q_v(__a, 35); })
1386 #define vld1q_f16(__a) __extension__ ({ \
1387 (float16x8_t)__builtin_neon_vld1q_v(__a, 38); })
1388 #define vld1q_f32(__a) __extension__ ({ \
1389 (float32x4_t)__builtin_neon_vld1q_v(__a, 39); })
1390 #define vld1q_p8(__a) __extension__ ({ \
1391 (poly8x16_t)__builtin_neon_vld1q_v(__a, 36); })
1392 #define vld1q_p16(__a) __extension__ ({ \
1393 (poly16x8_t)__builtin_neon_vld1q_v(__a, 37); })
1394 #define vld1_u8(__a) __extension__ ({ \
1395 (uint8x8_t)__builtin_neon_vld1_v(__a, 16); })
1396 #define vld1_u16(__a) __extension__ ({ \
1397 (uint16x4_t)__builtin_neon_vld1_v(__a, 17); })
1398 #define vld1_u32(__a) __extension__ ({ \
1399 (uint32x2_t)__builtin_neon_vld1_v(__a, 18); })
1400 #define vld1_u64(__a) __extension__ ({ \
1401 (uint64x1_t)__builtin_neon_vld1_v(__a, 19); })
1402 #define vld1_s8(__a) __extension__ ({ \
1403 (int8x8_t)__builtin_neon_vld1_v(__a, 0); })
1404 #define vld1_s16(__a) __extension__ ({ \
1405 (int16x4_t)__builtin_neon_vld1_v(__a, 1); })
1406 #define vld1_s32(__a) __extension__ ({ \
1407 (int32x2_t)__builtin_neon_vld1_v(__a, 2); })
1408 #define vld1_s64(__a) __extension__ ({ \
1409 (int64x1_t)__builtin_neon_vld1_v(__a, 3); })
1410 #define vld1_f16(__a) __extension__ ({ \
1411 (float16x4_t)__builtin_neon_vld1_v(__a, 6); })
1412 #define vld1_f32(__a) __extension__ ({ \
1413 (float32x2_t)__builtin_neon_vld1_v(__a, 7); })
1414 #define vld1_p8(__a) __extension__ ({ \
1415 (poly8x8_t)__builtin_neon_vld1_v(__a, 4); })
1416 #define vld1_p16(__a) __extension__ ({ \
1417 (poly16x4_t)__builtin_neon_vld1_v(__a, 5); })
1418
1419 #define vld1q_dup_u8(__a) __extension__ ({ \
1420 (uint8x16_t)__builtin_neon_vld1q_dup_v(__a, 48); })
1421 #define vld1q_dup_u16(__a) __extension__ ({ \
1422 (uint16x8_t)__builtin_neon_vld1q_dup_v(__a, 49); })
1423 #define vld1q_dup_u32(__a) __extension__ ({ \
1424 (uint32x4_t)__builtin_neon_vld1q_dup_v(__a, 50); })
1425 #define vld1q_dup_u64(__a) __extension__ ({ \
1426 (uint64x2_t)__builtin_neon_vld1q_dup_v(__a, 51); })
1427 #define vld1q_dup_s8(__a) __extension__ ({ \
1428 (int8x16_t)__builtin_neon_vld1q_dup_v(__a, 32); })
1429 #define vld1q_dup_s16(__a) __extension__ ({ \
1430 (int16x8_t)__builtin_neon_vld1q_dup_v(__a, 33); })
1431 #define vld1q_dup_s32(__a) __extension__ ({ \
1432 (int32x4_t)__builtin_neon_vld1q_dup_v(__a, 34); })
1433 #define vld1q_dup_s64(__a) __extension__ ({ \
1434 (int64x2_t)__builtin_neon_vld1q_dup_v(__a, 35); })
1435 #define vld1q_dup_f16(__a) __extension__ ({ \
1436 (float16x8_t)__builtin_neon_vld1q_dup_v(__a, 38); })
1437 #define vld1q_dup_f32(__a) __extension__ ({ \
1438 (float32x4_t)__builtin_neon_vld1q_dup_v(__a, 39); })
1439 #define vld1q_dup_p8(__a) __extension__ ({ \
1440 (poly8x16_t)__builtin_neon_vld1q_dup_v(__a, 36); })
1441 #define vld1q_dup_p16(__a) __extension__ ({ \
1442 (poly16x8_t)__builtin_neon_vld1q_dup_v(__a, 37); })
1443 #define vld1_dup_u8(__a) __extension__ ({ \
1444 (uint8x8_t)__builtin_neon_vld1_dup_v(__a, 16); })
1445 #define vld1_dup_u16(__a) __extension__ ({ \
1446 (uint16x4_t)__builtin_neon_vld1_dup_v(__a, 17); })
1447 #define vld1_dup_u32(__a) __extension__ ({ \
1448 (uint32x2_t)__builtin_neon_vld1_dup_v(__a, 18); })
1449 #define vld1_dup_u64(__a) __extension__ ({ \
1450 (uint64x1_t)__builtin_neon_vld1_dup_v(__a, 19); })
1451 #define vld1_dup_s8(__a) __extension__ ({ \
1452 (int8x8_t)__builtin_neon_vld1_dup_v(__a, 0); })
1453 #define vld1_dup_s16(__a) __extension__ ({ \
1454 (int16x4_t)__builtin_neon_vld1_dup_v(__a, 1); })
1455 #define vld1_dup_s32(__a) __extension__ ({ \
1456 (int32x2_t)__builtin_neon_vld1_dup_v(__a, 2); })
1457 #define vld1_dup_s64(__a) __extension__ ({ \
1458 (int64x1_t)__builtin_neon_vld1_dup_v(__a, 3); })
1459 #define vld1_dup_f16(__a) __extension__ ({ \
1460 (float16x4_t)__builtin_neon_vld1_dup_v(__a, 6); })
1461 #define vld1_dup_f32(__a) __extension__ ({ \
1462 (float32x2_t)__builtin_neon_vld1_dup_v(__a, 7); })
1463 #define vld1_dup_p8(__a) __extension__ ({ \
1464 (poly8x8_t)__builtin_neon_vld1_dup_v(__a, 4); })
1465 #define vld1_dup_p16(__a) __extension__ ({ \
1466 (poly16x4_t)__builtin_neon_vld1_dup_v(__a, 5); })
1467
1468 #define vld1q_lane_u8(__a, b, __c) __extension__ ({ \
1469 uint8x16_t __b = (b); \
1470 (uint8x16_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 48); })
1471 #define vld1q_lane_u16(__a, b, __c) __extension__ ({ \
1472 uint16x8_t __b = (b); \
1473 (uint16x8_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 49); })
1474 #define vld1q_lane_u32(__a, b, __c) __extension__ ({ \
1475 uint32x4_t __b = (b); \
1476 (uint32x4_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 50); })
1477 #define vld1q_lane_u64(__a, b, __c) __extension__ ({ \
1478 uint64x2_t __b = (b); \
1479 (uint64x2_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 51); })
1480 #define vld1q_lane_s8(__a, b, __c) __extension__ ({ \
1481 int8x16_t __b = (b); \
1482 (int8x16_t)__builtin_neon_vld1q_lane_v(__a, __b, __c, 32); })
1483 #define vld1q_lane_s16(__a, b, __c) __extension__ ({ \
1484 int16x8_t __b = (b); \
1485 (int16x8_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 33); })
1486 #define vld1q_lane_s32(__a, b, __c) __extension__ ({ \
1487 int32x4_t __b = (b); \
1488 (int32x4_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 34); })
1489 #define vld1q_lane_s64(__a, b, __c) __extension__ ({ \
1490 int64x2_t __b = (b); \
1491 (int64x2_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 35); })
1492 #define vld1q_lane_f16(__a, b, __c) __extension__ ({ \
1493 float16x8_t __b = (b); \
1494 (float16x8_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 38); })
1495 #define vld1q_lane_f32(__a, b, __c) __extension__ ({ \
1496 float32x4_t __b = (b); \
1497 (float32x4_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 39); })
1498 #define vld1q_lane_p8(__a, b, __c) __extension__ ({ \
1499 poly8x16_t __b = (b); \
1500 (poly8x16_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 36); })
1501 #define vld1q_lane_p16(__a, b, __c) __extension__ ({ \
1502 poly16x8_t __b = (b); \
1503 (poly16x8_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 37); })
1504 #define vld1_lane_u8(__a, b, __c) __extension__ ({ \
1505 uint8x8_t __b = (b); \
1506 (uint8x8_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 16); })
1507 #define vld1_lane_u16(__a, b, __c) __extension__ ({ \
1508 uint16x4_t __b = (b); \
1509 (uint16x4_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 17); })
1510 #define vld1_lane_u32(__a, b, __c) __extension__ ({ \
1511 uint32x2_t __b = (b); \
1512 (uint32x2_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 18); })
1513 #define vld1_lane_u64(__a, b, __c) __extension__ ({ \
1514 uint64x1_t __b = (b); \
1515 (uint64x1_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 19); })
1516 #define vld1_lane_s8(__a, b, __c) __extension__ ({ \
1517 int8x8_t __b = (b); \
1518 (int8x8_t)__builtin_neon_vld1_lane_v(__a, __b, __c, 0); })
1519 #define vld1_lane_s16(__a, b, __c) __extension__ ({ \
1520 int16x4_t __b = (b); \
1521 (int16x4_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 1); })
1522 #define vld1_lane_s32(__a, b, __c) __extension__ ({ \
1523 int32x2_t __b = (b); \
1524 (int32x2_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 2); })
1525 #define vld1_lane_s64(__a, b, __c) __extension__ ({ \
1526 int64x1_t __b = (b); \
1527 (int64x1_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 3); })
1528 #define vld1_lane_f16(__a, b, __c) __extension__ ({ \
1529 float16x4_t __b = (b); \
1530 (float16x4_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 6); })
1531 #define vld1_lane_f32(__a, b, __c) __extension__ ({ \
1532 float32x2_t __b = (b); \
1533 (float32x2_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 7); })
1534 #define vld1_lane_p8(__a, b, __c) __extension__ ({ \
1535 poly8x8_t __b = (b); \
1536 (poly8x8_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 4); })
1537 #define vld1_lane_p16(__a, b, __c) __extension__ ({ \
1538 poly16x4_t __b = (b); \
1539 (poly16x4_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 5); })
1540
1541 #define vld2q_u8(__a) __extension__ ({ \
1542 uint8x16x2_t r; __builtin_neon_vld2q_v(&r, __a, 48); r; })
1543 #define vld2q_u16(__a) __extension__ ({ \
1544 uint16x8x2_t r; __builtin_neon_vld2q_v(&r, __a, 49); r; })
1545 #define vld2q_u32(__a) __extension__ ({ \
1546 uint32x4x2_t r; __builtin_neon_vld2q_v(&r, __a, 50); r; })
1547 #define vld2q_s8(__a) __extension__ ({ \
1548 int8x16x2_t r; __builtin_neon_vld2q_v(&r, __a, 32); r; })
1549 #define vld2q_s16(__a) __extension__ ({ \
1550 int16x8x2_t r; __builtin_neon_vld2q_v(&r, __a, 33); r; })
1551 #define vld2q_s32(__a) __extension__ ({ \
1552 int32x4x2_t r; __builtin_neon_vld2q_v(&r, __a, 34); r; })
1553 #define vld2q_f16(__a) __extension__ ({ \
1554 float16x8x2_t r; __builtin_neon_vld2q_v(&r, __a, 38); r; })
1555 #define vld2q_f32(__a) __extension__ ({ \
1556 float32x4x2_t r; __builtin_neon_vld2q_v(&r, __a, 39); r; })
1557 #define vld2q_p8(__a) __extension__ ({ \
1558 poly8x16x2_t r; __builtin_neon_vld2q_v(&r, __a, 36); r; })
1559 #define vld2q_p16(__a) __extension__ ({ \
1560 poly16x8x2_t r; __builtin_neon_vld2q_v(&r, __a, 37); r; })
1561 #define vld2_u8(__a) __extension__ ({ \
1562 uint8x8x2_t r; __builtin_neon_vld2_v(&r, __a, 16); r; })
1563 #define vld2_u16(__a) __extension__ ({ \
1564 uint16x4x2_t r; __builtin_neon_vld2_v(&r, __a, 17); r; })
1565 #define vld2_u32(__a) __extension__ ({ \
1566 uint32x2x2_t r; __builtin_neon_vld2_v(&r, __a, 18); r; })
1567 #define vld2_u64(__a) __extension__ ({ \
1568 uint64x1x2_t r; __builtin_neon_vld2_v(&r, __a, 19); r; })
1569 #define vld2_s8(__a) __extension__ ({ \
1570 int8x8x2_t r; __builtin_neon_vld2_v(&r, __a, 0); r; })
1571 #define vld2_s16(__a) __extension__ ({ \
1572 int16x4x2_t r; __builtin_neon_vld2_v(&r, __a, 1); r; })
1573 #define vld2_s32(__a) __extension__ ({ \
1574 int32x2x2_t r; __builtin_neon_vld2_v(&r, __a, 2); r; })
1575 #define vld2_s64(__a) __extension__ ({ \
1576 int64x1x2_t r; __builtin_neon_vld2_v(&r, __a, 3); r; })
1577 #define vld2_f16(__a) __extension__ ({ \
1578 float16x4x2_t r; __builtin_neon_vld2_v(&r, __a, 6); r; })
1579 #define vld2_f32(__a) __extension__ ({ \
1580 float32x2x2_t r; __builtin_neon_vld2_v(&r, __a, 7); r; })
1581 #define vld2_p8(__a) __extension__ ({ \
1582 poly8x8x2_t r; __builtin_neon_vld2_v(&r, __a, 4); r; })
1583 #define vld2_p16(__a) __extension__ ({ \
1584 poly16x4x2_t r; __builtin_neon_vld2_v(&r, __a, 5); r; })
1585
1586 #define vld2_dup_u8(__a) __extension__ ({ \
1587 uint8x8x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 16); r; })
1588 #define vld2_dup_u16(__a) __extension__ ({ \
1589 uint16x4x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 17); r; })
1590 #define vld2_dup_u32(__a) __extension__ ({ \
1591 uint32x2x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 18); r; })
1592 #define vld2_dup_u64(__a) __extension__ ({ \
1593 uint64x1x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 19); r; })
1594 #define vld2_dup_s8(__a) __extension__ ({ \
1595 int8x8x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 0); r; })
1596 #define vld2_dup_s16(__a) __extension__ ({ \
1597 int16x4x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 1); r; })
1598 #define vld2_dup_s32(__a) __extension__ ({ \
1599 int32x2x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 2); r; })
1600 #define vld2_dup_s64(__a) __extension__ ({ \
1601 int64x1x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 3); r; })
1602 #define vld2_dup_f16(__a) __extension__ ({ \
1603 float16x4x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 6); r; })
1604 #define vld2_dup_f32(__a) __extension__ ({ \
1605 float32x2x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 7); r; })
1606 #define vld2_dup_p8(__a) __extension__ ({ \
1607 poly8x8x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 4); r; })
1608 #define vld2_dup_p16(__a) __extension__ ({ \
1609 poly16x4x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 5); r; })
1610
1611 #define vld2q_lane_u16(__a, b, __c) __extension__ ({ \
1612 uint16x8x2_t __b = (b); \
1613 uint16x8x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i nt8x16_t)__b.val[1], __c, 49); r; })
1614 #define vld2q_lane_u32(__a, b, __c) __extension__ ({ \
1615 uint32x4x2_t __b = (b); \
1616 uint32x4x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i nt8x16_t)__b.val[1], __c, 50); r; })
1617 #define vld2q_lane_s16(__a, b, __c) __extension__ ({ \
1618 int16x8x2_t __b = (b); \
1619 int16x8x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (in t8x16_t)__b.val[1], __c, 33); r; })
1620 #define vld2q_lane_s32(__a, b, __c) __extension__ ({ \
1621 int32x4x2_t __b = (b); \
1622 int32x4x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (in t8x16_t)__b.val[1], __c, 34); r; })
1623 #define vld2q_lane_f16(__a, b, __c) __extension__ ({ \
1624 float16x8x2_t __b = (b); \
1625 float16x8x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], ( int8x16_t)__b.val[1], __c, 38); r; })
1626 #define vld2q_lane_f32(__a, b, __c) __extension__ ({ \
1627 float32x4x2_t __b = (b); \
1628 float32x4x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], ( int8x16_t)__b.val[1], __c, 39); r; })
1629 #define vld2q_lane_p16(__a, b, __c) __extension__ ({ \
1630 poly16x8x2_t __b = (b); \
1631 poly16x8x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i nt8x16_t)__b.val[1], __c, 37); r; })
1632 #define vld2_lane_u8(__a, b, __c) __extension__ ({ \
1633 uint8x8x2_t __b = (b); \
1634 uint8x8x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], __c, 16); r; })
1635 #define vld2_lane_u16(__a, b, __c) __extension__ ({ \
1636 uint16x4x2_t __b = (b); \
1637 uint16x4x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int 8x8_t)__b.val[1], __c, 17); r; })
1638 #define vld2_lane_u32(__a, b, __c) __extension__ ({ \
1639 uint32x2x2_t __b = (b); \
1640 uint32x2x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int 8x8_t)__b.val[1], __c, 18); r; })
1641 #define vld2_lane_s8(__a, b, __c) __extension__ ({ \
1642 int8x8x2_t __b = (b); \
1643 int8x8x2_t r; __builtin_neon_vld2_lane_v(&r, __a, __b.val[0], __b.val[1], __c, 0); r; })
1644 #define vld2_lane_s16(__a, b, __c) __extension__ ({ \
1645 int16x4x2_t __b = (b); \
1646 int16x4x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], __c, 1); r; })
1647 #define vld2_lane_s32(__a, b, __c) __extension__ ({ \
1648 int32x2x2_t __b = (b); \
1649 int32x2x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], __c, 2); r; })
1650 #define vld2_lane_f16(__a, b, __c) __extension__ ({ \
1651 float16x4x2_t __b = (b); \
1652 float16x4x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (in t8x8_t)__b.val[1], __c, 6); r; })
1653 #define vld2_lane_f32(__a, b, __c) __extension__ ({ \
1654 float32x2x2_t __b = (b); \
1655 float32x2x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (in t8x8_t)__b.val[1], __c, 7); r; })
1656 #define vld2_lane_p8(__a, b, __c) __extension__ ({ \
1657 poly8x8x2_t __b = (b); \
1658 poly8x8x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], __c, 4); r; })
1659 #define vld2_lane_p16(__a, b, __c) __extension__ ({ \
1660 poly16x4x2_t __b = (b); \
1661 poly16x4x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int 8x8_t)__b.val[1], __c, 5); r; })
1662
1663 #define vld3q_u8(__a) __extension__ ({ \
1664 uint8x16x3_t r; __builtin_neon_vld3q_v(&r, __a, 48); r; })
1665 #define vld3q_u16(__a) __extension__ ({ \
1666 uint16x8x3_t r; __builtin_neon_vld3q_v(&r, __a, 49); r; })
1667 #define vld3q_u32(__a) __extension__ ({ \
1668 uint32x4x3_t r; __builtin_neon_vld3q_v(&r, __a, 50); r; })
1669 #define vld3q_s8(__a) __extension__ ({ \
1670 int8x16x3_t r; __builtin_neon_vld3q_v(&r, __a, 32); r; })
1671 #define vld3q_s16(__a) __extension__ ({ \
1672 int16x8x3_t r; __builtin_neon_vld3q_v(&r, __a, 33); r; })
1673 #define vld3q_s32(__a) __extension__ ({ \
1674 int32x4x3_t r; __builtin_neon_vld3q_v(&r, __a, 34); r; })
1675 #define vld3q_f16(__a) __extension__ ({ \
1676 float16x8x3_t r; __builtin_neon_vld3q_v(&r, __a, 38); r; })
1677 #define vld3q_f32(__a) __extension__ ({ \
1678 float32x4x3_t r; __builtin_neon_vld3q_v(&r, __a, 39); r; })
1679 #define vld3q_p8(__a) __extension__ ({ \
1680 poly8x16x3_t r; __builtin_neon_vld3q_v(&r, __a, 36); r; })
1681 #define vld3q_p16(__a) __extension__ ({ \
1682 poly16x8x3_t r; __builtin_neon_vld3q_v(&r, __a, 37); r; })
1683 #define vld3_u8(__a) __extension__ ({ \
1684 uint8x8x3_t r; __builtin_neon_vld3_v(&r, __a, 16); r; })
1685 #define vld3_u16(__a) __extension__ ({ \
1686 uint16x4x3_t r; __builtin_neon_vld3_v(&r, __a, 17); r; })
1687 #define vld3_u32(__a) __extension__ ({ \
1688 uint32x2x3_t r; __builtin_neon_vld3_v(&r, __a, 18); r; })
1689 #define vld3_u64(__a) __extension__ ({ \
1690 uint64x1x3_t r; __builtin_neon_vld3_v(&r, __a, 19); r; })
1691 #define vld3_s8(__a) __extension__ ({ \
1692 int8x8x3_t r; __builtin_neon_vld3_v(&r, __a, 0); r; })
1693 #define vld3_s16(__a) __extension__ ({ \
1694 int16x4x3_t r; __builtin_neon_vld3_v(&r, __a, 1); r; })
1695 #define vld3_s32(__a) __extension__ ({ \
1696 int32x2x3_t r; __builtin_neon_vld3_v(&r, __a, 2); r; })
1697 #define vld3_s64(__a) __extension__ ({ \
1698 int64x1x3_t r; __builtin_neon_vld3_v(&r, __a, 3); r; })
1699 #define vld3_f16(__a) __extension__ ({ \
1700 float16x4x3_t r; __builtin_neon_vld3_v(&r, __a, 6); r; })
1701 #define vld3_f32(__a) __extension__ ({ \
1702 float32x2x3_t r; __builtin_neon_vld3_v(&r, __a, 7); r; })
1703 #define vld3_p8(__a) __extension__ ({ \
1704 poly8x8x3_t r; __builtin_neon_vld3_v(&r, __a, 4); r; })
1705 #define vld3_p16(__a) __extension__ ({ \
1706 poly16x4x3_t r; __builtin_neon_vld3_v(&r, __a, 5); r; })
1707
1708 #define vld3_dup_u8(__a) __extension__ ({ \
1709 uint8x8x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 16); r; })
1710 #define vld3_dup_u16(__a) __extension__ ({ \
1711 uint16x4x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 17); r; })
1712 #define vld3_dup_u32(__a) __extension__ ({ \
1713 uint32x2x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 18); r; })
1714 #define vld3_dup_u64(__a) __extension__ ({ \
1715 uint64x1x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 19); r; })
1716 #define vld3_dup_s8(__a) __extension__ ({ \
1717 int8x8x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 0); r; })
1718 #define vld3_dup_s16(__a) __extension__ ({ \
1719 int16x4x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 1); r; })
1720 #define vld3_dup_s32(__a) __extension__ ({ \
1721 int32x2x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 2); r; })
1722 #define vld3_dup_s64(__a) __extension__ ({ \
1723 int64x1x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 3); r; })
1724 #define vld3_dup_f16(__a) __extension__ ({ \
1725 float16x4x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 6); r; })
1726 #define vld3_dup_f32(__a) __extension__ ({ \
1727 float32x2x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 7); r; })
1728 #define vld3_dup_p8(__a) __extension__ ({ \
1729 poly8x8x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 4); r; })
1730 #define vld3_dup_p16(__a) __extension__ ({ \
1731 poly16x4x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 5); r; })
1732
1733 #define vld3q_lane_u16(__a, b, __c) __extension__ ({ \
1734 uint16x8x3_t __b = (b); \
1735 uint16x8x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i nt8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 49); r; })
1736 #define vld3q_lane_u32(__a, b, __c) __extension__ ({ \
1737 uint32x4x3_t __b = (b); \
1738 uint32x4x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i nt8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 50); r; })
1739 #define vld3q_lane_s16(__a, b, __c) __extension__ ({ \
1740 int16x8x3_t __b = (b); \
1741 int16x8x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (in t8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 33); r; })
1742 #define vld3q_lane_s32(__a, b, __c) __extension__ ({ \
1743 int32x4x3_t __b = (b); \
1744 int32x4x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (in t8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 34); r; })
1745 #define vld3q_lane_f16(__a, b, __c) __extension__ ({ \
1746 float16x8x3_t __b = (b); \
1747 float16x8x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], ( int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 38); r; })
1748 #define vld3q_lane_f32(__a, b, __c) __extension__ ({ \
1749 float32x4x3_t __b = (b); \
1750 float32x4x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], ( int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 39); r; })
1751 #define vld3q_lane_p16(__a, b, __c) __extension__ ({ \
1752 poly16x8x3_t __b = (b); \
1753 poly16x8x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i nt8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 37); r; })
1754 #define vld3_lane_u8(__a, b, __c) __extension__ ({ \
1755 uint8x8x3_t __b = (b); \
1756 uint8x8x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 16); r; })
1757 #define vld3_lane_u16(__a, b, __c) __extension__ ({ \
1758 uint16x4x3_t __b = (b); \
1759 uint16x4x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int 8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 17); r; })
1760 #define vld3_lane_u32(__a, b, __c) __extension__ ({ \
1761 uint32x2x3_t __b = (b); \
1762 uint32x2x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int 8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 18); r; })
1763 #define vld3_lane_s8(__a, b, __c) __extension__ ({ \
1764 int8x8x3_t __b = (b); \
1765 int8x8x3_t r; __builtin_neon_vld3_lane_v(&r, __a, __b.val[0], __b.val[1], __b. val[2], __c, 0); r; })
1766 #define vld3_lane_s16(__a, b, __c) __extension__ ({ \
1767 int16x4x3_t __b = (b); \
1768 int16x4x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 1); r; })
1769 #define vld3_lane_s32(__a, b, __c) __extension__ ({ \
1770 int32x2x3_t __b = (b); \
1771 int32x2x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 2); r; })
1772 #define vld3_lane_f16(__a, b, __c) __extension__ ({ \
1773 float16x4x3_t __b = (b); \
1774 float16x4x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (in t8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 6); r; })
1775 #define vld3_lane_f32(__a, b, __c) __extension__ ({ \
1776 float32x2x3_t __b = (b); \
1777 float32x2x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (in t8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 7); r; })
1778 #define vld3_lane_p8(__a, b, __c) __extension__ ({ \
1779 poly8x8x3_t __b = (b); \
1780 poly8x8x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 4); r; })
1781 #define vld3_lane_p16(__a, b, __c) __extension__ ({ \
1782 poly16x4x3_t __b = (b); \
1783 poly16x4x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int 8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 5); r; })
1784
1785 #define vld4q_u8(__a) __extension__ ({ \
1786 uint8x16x4_t r; __builtin_neon_vld4q_v(&r, __a, 48); r; })
1787 #define vld4q_u16(__a) __extension__ ({ \
1788 uint16x8x4_t r; __builtin_neon_vld4q_v(&r, __a, 49); r; })
1789 #define vld4q_u32(__a) __extension__ ({ \
1790 uint32x4x4_t r; __builtin_neon_vld4q_v(&r, __a, 50); r; })
1791 #define vld4q_s8(__a) __extension__ ({ \
1792 int8x16x4_t r; __builtin_neon_vld4q_v(&r, __a, 32); r; })
1793 #define vld4q_s16(__a) __extension__ ({ \
1794 int16x8x4_t r; __builtin_neon_vld4q_v(&r, __a, 33); r; })
1795 #define vld4q_s32(__a) __extension__ ({ \
1796 int32x4x4_t r; __builtin_neon_vld4q_v(&r, __a, 34); r; })
1797 #define vld4q_f16(__a) __extension__ ({ \
1798 float16x8x4_t r; __builtin_neon_vld4q_v(&r, __a, 38); r; })
1799 #define vld4q_f32(__a) __extension__ ({ \
1800 float32x4x4_t r; __builtin_neon_vld4q_v(&r, __a, 39); r; })
1801 #define vld4q_p8(__a) __extension__ ({ \
1802 poly8x16x4_t r; __builtin_neon_vld4q_v(&r, __a, 36); r; })
1803 #define vld4q_p16(__a) __extension__ ({ \
1804 poly16x8x4_t r; __builtin_neon_vld4q_v(&r, __a, 37); r; })
1805 #define vld4_u8(__a) __extension__ ({ \
1806 uint8x8x4_t r; __builtin_neon_vld4_v(&r, __a, 16); r; })
1807 #define vld4_u16(__a) __extension__ ({ \
1808 uint16x4x4_t r; __builtin_neon_vld4_v(&r, __a, 17); r; })
1809 #define vld4_u32(__a) __extension__ ({ \
1810 uint32x2x4_t r; __builtin_neon_vld4_v(&r, __a, 18); r; })
1811 #define vld4_u64(__a) __extension__ ({ \
1812 uint64x1x4_t r; __builtin_neon_vld4_v(&r, __a, 19); r; })
1813 #define vld4_s8(__a) __extension__ ({ \
1814 int8x8x4_t r; __builtin_neon_vld4_v(&r, __a, 0); r; })
1815 #define vld4_s16(__a) __extension__ ({ \
1816 int16x4x4_t r; __builtin_neon_vld4_v(&r, __a, 1); r; })
1817 #define vld4_s32(__a) __extension__ ({ \
1818 int32x2x4_t r; __builtin_neon_vld4_v(&r, __a, 2); r; })
1819 #define vld4_s64(__a) __extension__ ({ \
1820 int64x1x4_t r; __builtin_neon_vld4_v(&r, __a, 3); r; })
1821 #define vld4_f16(__a) __extension__ ({ \
1822 float16x4x4_t r; __builtin_neon_vld4_v(&r, __a, 6); r; })
1823 #define vld4_f32(__a) __extension__ ({ \
1824 float32x2x4_t r; __builtin_neon_vld4_v(&r, __a, 7); r; })
1825 #define vld4_p8(__a) __extension__ ({ \
1826 poly8x8x4_t r; __builtin_neon_vld4_v(&r, __a, 4); r; })
1827 #define vld4_p16(__a) __extension__ ({ \
1828 poly16x4x4_t r; __builtin_neon_vld4_v(&r, __a, 5); r; })
1829
1830 #define vld4_dup_u8(__a) __extension__ ({ \
1831 uint8x8x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 16); r; })
1832 #define vld4_dup_u16(__a) __extension__ ({ \
1833 uint16x4x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 17); r; })
1834 #define vld4_dup_u32(__a) __extension__ ({ \
1835 uint32x2x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 18); r; })
1836 #define vld4_dup_u64(__a) __extension__ ({ \
1837 uint64x1x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 19); r; })
1838 #define vld4_dup_s8(__a) __extension__ ({ \
1839 int8x8x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 0); r; })
1840 #define vld4_dup_s16(__a) __extension__ ({ \
1841 int16x4x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 1); r; })
1842 #define vld4_dup_s32(__a) __extension__ ({ \
1843 int32x2x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 2); r; })
1844 #define vld4_dup_s64(__a) __extension__ ({ \
1845 int64x1x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 3); r; })
1846 #define vld4_dup_f16(__a) __extension__ ({ \
1847 float16x4x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 6); r; })
1848 #define vld4_dup_f32(__a) __extension__ ({ \
1849 float32x2x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 7); r; })
1850 #define vld4_dup_p8(__a) __extension__ ({ \
1851 poly8x8x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 4); r; })
1852 #define vld4_dup_p16(__a) __extension__ ({ \
1853 poly16x4x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 5); r; })
1854
1855 #define vld4q_lane_u16(__a, b, __c) __extension__ ({ \
1856 uint16x8x4_t __b = (b); \
1857 uint16x8x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i nt8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 49); r; })
1858 #define vld4q_lane_u32(__a, b, __c) __extension__ ({ \
1859 uint32x4x4_t __b = (b); \
1860 uint32x4x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i nt8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 50); r; })
1861 #define vld4q_lane_s16(__a, b, __c) __extension__ ({ \
1862 int16x8x4_t __b = (b); \
1863 int16x8x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (in t8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 33); r; } )
1864 #define vld4q_lane_s32(__a, b, __c) __extension__ ({ \
1865 int32x4x4_t __b = (b); \
1866 int32x4x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (in t8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 34); r; } )
1867 #define vld4q_lane_f16(__a, b, __c) __extension__ ({ \
1868 float16x8x4_t __b = (b); \
1869 float16x8x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], ( int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 38); r; })
1870 #define vld4q_lane_f32(__a, b, __c) __extension__ ({ \
1871 float32x4x4_t __b = (b); \
1872 float32x4x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], ( int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 39); r; })
1873 #define vld4q_lane_p16(__a, b, __c) __extension__ ({ \
1874 poly16x8x4_t __b = (b); \
1875 poly16x8x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i nt8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 37); r; })
1876 #define vld4_lane_u8(__a, b, __c) __extension__ ({ \
1877 uint8x8x4_t __b = (b); \
1878 uint8x8x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 16); r; })
1879 #define vld4_lane_u16(__a, b, __c) __extension__ ({ \
1880 uint16x4x4_t __b = (b); \
1881 uint16x4x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int 8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 17); r; })
1882 #define vld4_lane_u32(__a, b, __c) __extension__ ({ \
1883 uint32x2x4_t __b = (b); \
1884 uint32x2x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int 8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 18); r; })
1885 #define vld4_lane_s8(__a, b, __c) __extension__ ({ \
1886 int8x8x4_t __b = (b); \
1887 int8x8x4_t r; __builtin_neon_vld4_lane_v(&r, __a, __b.val[0], __b.val[1], __b. val[2], __b.val[3], __c, 0); r; })
1888 #define vld4_lane_s16(__a, b, __c) __extension__ ({ \
1889 int16x4x4_t __b = (b); \
1890 int16x4x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 1); r; })
1891 #define vld4_lane_s32(__a, b, __c) __extension__ ({ \
1892 int32x2x4_t __b = (b); \
1893 int32x2x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 2); r; })
1894 #define vld4_lane_f16(__a, b, __c) __extension__ ({ \
1895 float16x4x4_t __b = (b); \
1896 float16x4x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (in t8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 6); r; })
1897 #define vld4_lane_f32(__a, b, __c) __extension__ ({ \
1898 float32x2x4_t __b = (b); \
1899 float32x2x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (in t8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 7); r; })
1900 #define vld4_lane_p8(__a, b, __c) __extension__ ({ \
1901 poly8x8x4_t __b = (b); \
1902 poly8x8x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 4); r; })
1903 #define vld4_lane_p16(__a, b, __c) __extension__ ({ \
1904 poly16x4x4_t __b = (b); \
1905 poly16x4x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int 8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 5); r; })
1906
1907 __ai int8x8_t vmax_s8(int8x8_t __a, int8x8_t __b) {
1908 return (int8x8_t)__builtin_neon_vmax_v(__a, __b, 0); }
1909 __ai int16x4_t vmax_s16(int16x4_t __a, int16x4_t __b) {
1910 return (int16x4_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 1); }
1911 __ai int32x2_t vmax_s32(int32x2_t __a, int32x2_t __b) {
1912 return (int32x2_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 2); }
1913 __ai uint8x8_t vmax_u8(uint8x8_t __a, uint8x8_t __b) {
1914 return (uint8x8_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 16); }
1915 __ai uint16x4_t vmax_u16(uint16x4_t __a, uint16x4_t __b) {
1916 return (uint16x4_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 17); }
1917 __ai uint32x2_t vmax_u32(uint32x2_t __a, uint32x2_t __b) {
1918 return (uint32x2_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 18); }
1919 __ai float32x2_t vmax_f32(float32x2_t __a, float32x2_t __b) {
1920 return (float32x2_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 7); }
1921 __ai int8x16_t vmaxq_s8(int8x16_t __a, int8x16_t __b) {
1922 return (int8x16_t)__builtin_neon_vmaxq_v(__a, __b, 32); }
1923 __ai int16x8_t vmaxq_s16(int16x8_t __a, int16x8_t __b) {
1924 return (int16x8_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
1925 __ai int32x4_t vmaxq_s32(int32x4_t __a, int32x4_t __b) {
1926 return (int32x4_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
1927 __ai uint8x16_t vmaxq_u8(uint8x16_t __a, uint8x16_t __b) {
1928 return (uint8x16_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 48); }
1929 __ai uint16x8_t vmaxq_u16(uint16x8_t __a, uint16x8_t __b) {
1930 return (uint16x8_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 49); }
1931 __ai uint32x4_t vmaxq_u32(uint32x4_t __a, uint32x4_t __b) {
1932 return (uint32x4_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
1933 __ai float32x4_t vmaxq_f32(float32x4_t __a, float32x4_t __b) {
1934 return (float32x4_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 39) ; }
1935
1936 __ai int8x8_t vmin_s8(int8x8_t __a, int8x8_t __b) {
1937 return (int8x8_t)__builtin_neon_vmin_v(__a, __b, 0); }
1938 __ai int16x4_t vmin_s16(int16x4_t __a, int16x4_t __b) {
1939 return (int16x4_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 1); }
1940 __ai int32x2_t vmin_s32(int32x2_t __a, int32x2_t __b) {
1941 return (int32x2_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 2); }
1942 __ai uint8x8_t vmin_u8(uint8x8_t __a, uint8x8_t __b) {
1943 return (uint8x8_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 16); }
1944 __ai uint16x4_t vmin_u16(uint16x4_t __a, uint16x4_t __b) {
1945 return (uint16x4_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 17); }
1946 __ai uint32x2_t vmin_u32(uint32x2_t __a, uint32x2_t __b) {
1947 return (uint32x2_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 18); }
1948 __ai float32x2_t vmin_f32(float32x2_t __a, float32x2_t __b) {
1949 return (float32x2_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 7); }
1950 __ai int8x16_t vminq_s8(int8x16_t __a, int8x16_t __b) {
1951 return (int8x16_t)__builtin_neon_vminq_v(__a, __b, 32); }
1952 __ai int16x8_t vminq_s16(int16x8_t __a, int16x8_t __b) {
1953 return (int16x8_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
1954 __ai int32x4_t vminq_s32(int32x4_t __a, int32x4_t __b) {
1955 return (int32x4_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
1956 __ai uint8x16_t vminq_u8(uint8x16_t __a, uint8x16_t __b) {
1957 return (uint8x16_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 48); }
1958 __ai uint16x8_t vminq_u16(uint16x8_t __a, uint16x8_t __b) {
1959 return (uint16x8_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 49); }
1960 __ai uint32x4_t vminq_u32(uint32x4_t __a, uint32x4_t __b) {
1961 return (uint32x4_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
1962 __ai float32x4_t vminq_f32(float32x4_t __a, float32x4_t __b) {
1963 return (float32x4_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 39) ; }
1964
1965 __ai int8x8_t vmla_s8(int8x8_t __a, int8x8_t __b, int8x8_t __c) {
1966 return __a + (__b * __c); }
1967 __ai int16x4_t vmla_s16(int16x4_t __a, int16x4_t __b, int16x4_t __c) {
1968 return __a + (__b * __c); }
1969 __ai int32x2_t vmla_s32(int32x2_t __a, int32x2_t __b, int32x2_t __c) {
1970 return __a + (__b * __c); }
1971 __ai float32x2_t vmla_f32(float32x2_t __a, float32x2_t __b, float32x2_t __c) {
1972 return __a + (__b * __c); }
1973 __ai uint8x8_t vmla_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) {
1974 return __a + (__b * __c); }
1975 __ai uint16x4_t vmla_u16(uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) {
1976 return __a + (__b * __c); }
1977 __ai uint32x2_t vmla_u32(uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) {
1978 return __a + (__b * __c); }
1979 __ai int8x16_t vmlaq_s8(int8x16_t __a, int8x16_t __b, int8x16_t __c) {
1980 return __a + (__b * __c); }
1981 __ai int16x8_t vmlaq_s16(int16x8_t __a, int16x8_t __b, int16x8_t __c) {
1982 return __a + (__b * __c); }
1983 __ai int32x4_t vmlaq_s32(int32x4_t __a, int32x4_t __b, int32x4_t __c) {
1984 return __a + (__b * __c); }
1985 __ai float32x4_t vmlaq_f32(float32x4_t __a, float32x4_t __b, float32x4_t __c) {
1986 return __a + (__b * __c); }
1987 __ai uint8x16_t vmlaq_u8(uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) {
1988 return __a + (__b * __c); }
1989 __ai uint16x8_t vmlaq_u16(uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) {
1990 return __a + (__b * __c); }
1991 __ai uint32x4_t vmlaq_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) {
1992 return __a + (__b * __c); }
1993
1994 __ai int16x8_t vmlal_s8(int16x8_t __a, int8x8_t __b, int8x8_t __c) {
1995 return __a + vmull_s8(__b, __c); }
1996 __ai int32x4_t vmlal_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) {
1997 return __a + vmull_s16(__b, __c); }
1998 __ai int64x2_t vmlal_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) {
1999 return __a + vmull_s32(__b, __c); }
2000 __ai uint16x8_t vmlal_u8(uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) {
2001 return __a + vmull_u8(__b, __c); }
2002 __ai uint32x4_t vmlal_u16(uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) {
2003 return __a + vmull_u16(__b, __c); }
2004 __ai uint64x2_t vmlal_u32(uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) {
2005 return __a + vmull_u32(__b, __c); }
2006
2007 #define vmlal_lane_s16(a, b, c, __d) __extension__ ({ \
2008 int32x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \
2009 __a + vmull_s16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); } )
2010 #define vmlal_lane_s32(a, b, c, __d) __extension__ ({ \
2011 int64x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \
2012 __a + vmull_s32(__b, __builtin_shufflevector(__c, __c, __d, __d)); })
2013 #define vmlal_lane_u16(a, b, c, __d) __extension__ ({ \
2014 uint32x4_t __a = (a); uint16x4_t __b = (b); uint16x4_t __c = (c); \
2015 __a + vmull_u16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); } )
2016 #define vmlal_lane_u32(a, b, c, __d) __extension__ ({ \
2017 uint64x2_t __a = (a); uint32x2_t __b = (b); uint32x2_t __c = (c); \
2018 __a + vmull_u32(__b, __builtin_shufflevector(__c, __c, __d, __d)); })
2019
2020 __ai int32x4_t vmlal_n_s16(int32x4_t __a, int16x4_t __b, int16_t __c) {
2021 return __a + vmull_s16(__b, (int16x4_t){ __c, __c, __c, __c }); }
2022 __ai int64x2_t vmlal_n_s32(int64x2_t __a, int32x2_t __b, int32_t __c) {
2023 return __a + vmull_s32(__b, (int32x2_t){ __c, __c }); }
2024 __ai uint32x4_t vmlal_n_u16(uint32x4_t __a, uint16x4_t __b, uint16_t __c) {
2025 return __a + vmull_u16(__b, (uint16x4_t){ __c, __c, __c, __c }); }
2026 __ai uint64x2_t vmlal_n_u32(uint64x2_t __a, uint32x2_t __b, uint32_t __c) {
2027 return __a + vmull_u32(__b, (uint32x2_t){ __c, __c }); }
2028
2029 #define vmla_lane_s16(a, b, c, __d) __extension__ ({ \
2030 int16x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \
2031 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
2032 #define vmla_lane_s32(a, b, c, __d) __extension__ ({ \
2033 int32x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \
2034 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d)); })
2035 #define vmla_lane_u16(a, b, c, __d) __extension__ ({ \
2036 uint16x4_t __a = (a); uint16x4_t __b = (b); uint16x4_t __c = (c); \
2037 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
2038 #define vmla_lane_u32(a, b, c, __d) __extension__ ({ \
2039 uint32x2_t __a = (a); uint32x2_t __b = (b); uint32x2_t __c = (c); \
2040 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d)); })
2041 #define vmla_lane_f32(a, b, c, __d) __extension__ ({ \
2042 float32x2_t __a = (a); float32x2_t __b = (b); float32x2_t __c = (c); \
2043 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d)); })
2044 #define vmlaq_lane_s16(a, b, c, __d) __extension__ ({ \
2045 int16x8_t __a = (a); int16x8_t __b = (b); int16x4_t __c = (c); \
2046 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, _ _d, __d)); })
2047 #define vmlaq_lane_s32(a, b, c, __d) __extension__ ({ \
2048 int32x4_t __a = (a); int32x4_t __b = (b); int32x2_t __c = (c); \
2049 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
2050 #define vmlaq_lane_u16(a, b, c, __d) __extension__ ({ \
2051 uint16x8_t __a = (a); uint16x8_t __b = (b); uint16x4_t __c = (c); \
2052 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, _ _d, __d)); })
2053 #define vmlaq_lane_u32(a, b, c, __d) __extension__ ({ \
2054 uint32x4_t __a = (a); uint32x4_t __b = (b); uint32x2_t __c = (c); \
2055 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
2056 #define vmlaq_lane_f32(a, b, c, __d) __extension__ ({ \
2057 float32x4_t __a = (a); float32x4_t __b = (b); float32x2_t __c = (c); \
2058 __a + (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
2059
2060 __ai int16x4_t vmla_n_s16(int16x4_t __a, int16x4_t __b, int16_t __c) {
2061 return __a + (__b * (int16x4_t){ __c, __c, __c, __c }); }
2062 __ai int32x2_t vmla_n_s32(int32x2_t __a, int32x2_t __b, int32_t __c) {
2063 return __a + (__b * (int32x2_t){ __c, __c }); }
2064 __ai uint16x4_t vmla_n_u16(uint16x4_t __a, uint16x4_t __b, uint16_t __c) {
2065 return __a + (__b * (uint16x4_t){ __c, __c, __c, __c }); }
2066 __ai uint32x2_t vmla_n_u32(uint32x2_t __a, uint32x2_t __b, uint32_t __c) {
2067 return __a + (__b * (uint32x2_t){ __c, __c }); }
2068 __ai float32x2_t vmla_n_f32(float32x2_t __a, float32x2_t __b, float32_t __c) {
2069 return __a + (__b * (float32x2_t){ __c, __c }); }
2070 __ai int16x8_t vmlaq_n_s16(int16x8_t __a, int16x8_t __b, int16_t __c) {
2071 return __a + (__b * (int16x8_t){ __c, __c, __c, __c, __c, __c, __c, __c }); }
2072 __ai int32x4_t vmlaq_n_s32(int32x4_t __a, int32x4_t __b, int32_t __c) {
2073 return __a + (__b * (int32x4_t){ __c, __c, __c, __c }); }
2074 __ai uint16x8_t vmlaq_n_u16(uint16x8_t __a, uint16x8_t __b, uint16_t __c) {
2075 return __a + (__b * (uint16x8_t){ __c, __c, __c, __c, __c, __c, __c, __c }); }
2076 __ai uint32x4_t vmlaq_n_u32(uint32x4_t __a, uint32x4_t __b, uint32_t __c) {
2077 return __a + (__b * (uint32x4_t){ __c, __c, __c, __c }); }
2078 __ai float32x4_t vmlaq_n_f32(float32x4_t __a, float32x4_t __b, float32_t __c) {
2079 return __a + (__b * (float32x4_t){ __c, __c, __c, __c }); }
2080
2081 __ai int8x8_t vmls_s8(int8x8_t __a, int8x8_t __b, int8x8_t __c) {
2082 return __a - (__b * __c); }
2083 __ai int16x4_t vmls_s16(int16x4_t __a, int16x4_t __b, int16x4_t __c) {
2084 return __a - (__b * __c); }
2085 __ai int32x2_t vmls_s32(int32x2_t __a, int32x2_t __b, int32x2_t __c) {
2086 return __a - (__b * __c); }
2087 __ai float32x2_t vmls_f32(float32x2_t __a, float32x2_t __b, float32x2_t __c) {
2088 return __a - (__b * __c); }
2089 __ai uint8x8_t vmls_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) {
2090 return __a - (__b * __c); }
2091 __ai uint16x4_t vmls_u16(uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) {
2092 return __a - (__b * __c); }
2093 __ai uint32x2_t vmls_u32(uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) {
2094 return __a - (__b * __c); }
2095 __ai int8x16_t vmlsq_s8(int8x16_t __a, int8x16_t __b, int8x16_t __c) {
2096 return __a - (__b * __c); }
2097 __ai int16x8_t vmlsq_s16(int16x8_t __a, int16x8_t __b, int16x8_t __c) {
2098 return __a - (__b * __c); }
2099 __ai int32x4_t vmlsq_s32(int32x4_t __a, int32x4_t __b, int32x4_t __c) {
2100 return __a - (__b * __c); }
2101 __ai float32x4_t vmlsq_f32(float32x4_t __a, float32x4_t __b, float32x4_t __c) {
2102 return __a - (__b * __c); }
2103 __ai uint8x16_t vmlsq_u8(uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) {
2104 return __a - (__b * __c); }
2105 __ai uint16x8_t vmlsq_u16(uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) {
2106 return __a - (__b * __c); }
2107 __ai uint32x4_t vmlsq_u32(uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) {
2108 return __a - (__b * __c); }
2109
2110 __ai int16x8_t vmlsl_s8(int16x8_t __a, int8x8_t __b, int8x8_t __c) {
2111 return __a - vmull_s8(__b, __c); }
2112 __ai int32x4_t vmlsl_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) {
2113 return __a - vmull_s16(__b, __c); }
2114 __ai int64x2_t vmlsl_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) {
2115 return __a - vmull_s32(__b, __c); }
2116 __ai uint16x8_t vmlsl_u8(uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) {
2117 return __a - vmull_u8(__b, __c); }
2118 __ai uint32x4_t vmlsl_u16(uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) {
2119 return __a - vmull_u16(__b, __c); }
2120 __ai uint64x2_t vmlsl_u32(uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) {
2121 return __a - vmull_u32(__b, __c); }
2122
2123 #define vmlsl_lane_s16(a, b, c, __d) __extension__ ({ \
2124 int32x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \
2125 __a - vmull_s16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); } )
2126 #define vmlsl_lane_s32(a, b, c, __d) __extension__ ({ \
2127 int64x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \
2128 __a - vmull_s32(__b, __builtin_shufflevector(__c, __c, __d, __d)); })
2129 #define vmlsl_lane_u16(a, b, c, __d) __extension__ ({ \
2130 uint32x4_t __a = (a); uint16x4_t __b = (b); uint16x4_t __c = (c); \
2131 __a - vmull_u16(__b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); } )
2132 #define vmlsl_lane_u32(a, b, c, __d) __extension__ ({ \
2133 uint64x2_t __a = (a); uint32x2_t __b = (b); uint32x2_t __c = (c); \
2134 __a - vmull_u32(__b, __builtin_shufflevector(__c, __c, __d, __d)); })
2135
2136 __ai int32x4_t vmlsl_n_s16(int32x4_t __a, int16x4_t __b, int16_t __c) {
2137 return __a - vmull_s16(__b, (int16x4_t){ __c, __c, __c, __c }); }
2138 __ai int64x2_t vmlsl_n_s32(int64x2_t __a, int32x2_t __b, int32_t __c) {
2139 return __a - vmull_s32(__b, (int32x2_t){ __c, __c }); }
2140 __ai uint32x4_t vmlsl_n_u16(uint32x4_t __a, uint16x4_t __b, uint16_t __c) {
2141 return __a - vmull_u16(__b, (uint16x4_t){ __c, __c, __c, __c }); }
2142 __ai uint64x2_t vmlsl_n_u32(uint64x2_t __a, uint32x2_t __b, uint32_t __c) {
2143 return __a - vmull_u32(__b, (uint32x2_t){ __c, __c }); }
2144
2145 #define vmls_lane_s16(a, b, c, __d) __extension__ ({ \
2146 int16x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \
2147 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
2148 #define vmls_lane_s32(a, b, c, __d) __extension__ ({ \
2149 int32x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \
2150 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d)); })
2151 #define vmls_lane_u16(a, b, c, __d) __extension__ ({ \
2152 uint16x4_t __a = (a); uint16x4_t __b = (b); uint16x4_t __c = (c); \
2153 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
2154 #define vmls_lane_u32(a, b, c, __d) __extension__ ({ \
2155 uint32x2_t __a = (a); uint32x2_t __b = (b); uint32x2_t __c = (c); \
2156 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d)); })
2157 #define vmls_lane_f32(a, b, c, __d) __extension__ ({ \
2158 float32x2_t __a = (a); float32x2_t __b = (b); float32x2_t __c = (c); \
2159 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d)); })
2160 #define vmlsq_lane_s16(a, b, c, __d) __extension__ ({ \
2161 int16x8_t __a = (a); int16x8_t __b = (b); int16x4_t __c = (c); \
2162 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, _ _d, __d)); })
2163 #define vmlsq_lane_s32(a, b, c, __d) __extension__ ({ \
2164 int32x4_t __a = (a); int32x4_t __b = (b); int32x2_t __c = (c); \
2165 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
2166 #define vmlsq_lane_u16(a, b, c, __d) __extension__ ({ \
2167 uint16x8_t __a = (a); uint16x8_t __b = (b); uint16x4_t __c = (c); \
2168 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d, __d, __d, _ _d, __d)); })
2169 #define vmlsq_lane_u32(a, b, c, __d) __extension__ ({ \
2170 uint32x4_t __a = (a); uint32x4_t __b = (b); uint32x2_t __c = (c); \
2171 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
2172 #define vmlsq_lane_f32(a, b, c, __d) __extension__ ({ \
2173 float32x4_t __a = (a); float32x4_t __b = (b); float32x2_t __c = (c); \
2174 __a - (__b * __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
2175
2176 __ai int16x4_t vmls_n_s16(int16x4_t __a, int16x4_t __b, int16_t __c) {
2177 return __a - (__b * (int16x4_t){ __c, __c, __c, __c }); }
2178 __ai int32x2_t vmls_n_s32(int32x2_t __a, int32x2_t __b, int32_t __c) {
2179 return __a - (__b * (int32x2_t){ __c, __c }); }
2180 __ai uint16x4_t vmls_n_u16(uint16x4_t __a, uint16x4_t __b, uint16_t __c) {
2181 return __a - (__b * (uint16x4_t){ __c, __c, __c, __c }); }
2182 __ai uint32x2_t vmls_n_u32(uint32x2_t __a, uint32x2_t __b, uint32_t __c) {
2183 return __a - (__b * (uint32x2_t){ __c, __c }); }
2184 __ai float32x2_t vmls_n_f32(float32x2_t __a, float32x2_t __b, float32_t __c) {
2185 return __a - (__b * (float32x2_t){ __c, __c }); }
2186 __ai int16x8_t vmlsq_n_s16(int16x8_t __a, int16x8_t __b, int16_t __c) {
2187 return __a - (__b * (int16x8_t){ __c, __c, __c, __c, __c, __c, __c, __c }); }
2188 __ai int32x4_t vmlsq_n_s32(int32x4_t __a, int32x4_t __b, int32_t __c) {
2189 return __a - (__b * (int32x4_t){ __c, __c, __c, __c }); }
2190 __ai uint16x8_t vmlsq_n_u16(uint16x8_t __a, uint16x8_t __b, uint16_t __c) {
2191 return __a - (__b * (uint16x8_t){ __c, __c, __c, __c, __c, __c, __c, __c }); }
2192 __ai uint32x4_t vmlsq_n_u32(uint32x4_t __a, uint32x4_t __b, uint32_t __c) {
2193 return __a - (__b * (uint32x4_t){ __c, __c, __c, __c }); }
2194 __ai float32x4_t vmlsq_n_f32(float32x4_t __a, float32x4_t __b, float32_t __c) {
2195 return __a - (__b * (float32x4_t){ __c, __c, __c, __c }); }
2196
2197 __ai int8x8_t vmovn_s16(int16x8_t __a) {
2198 return (int8x8_t)__builtin_neon_vmovn_v((int8x16_t)__a, 0); }
2199 __ai int16x4_t vmovn_s32(int32x4_t __a) {
2200 return (int16x4_t)__builtin_neon_vmovn_v((int8x16_t)__a, 1); }
2201 __ai int32x2_t vmovn_s64(int64x2_t __a) {
2202 return (int32x2_t)__builtin_neon_vmovn_v((int8x16_t)__a, 2); }
2203 __ai uint8x8_t vmovn_u16(uint16x8_t __a) {
2204 return (uint8x8_t)__builtin_neon_vmovn_v((int8x16_t)__a, 16); }
2205 __ai uint16x4_t vmovn_u32(uint32x4_t __a) {
2206 return (uint16x4_t)__builtin_neon_vmovn_v((int8x16_t)__a, 17); }
2207 __ai uint32x2_t vmovn_u64(uint64x2_t __a) {
2208 return (uint32x2_t)__builtin_neon_vmovn_v((int8x16_t)__a, 18); }
2209
2210 __ai uint8x8_t vmov_n_u8(uint8_t __a) {
2211 return (uint8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; }
2212 __ai uint16x4_t vmov_n_u16(uint16_t __a) {
2213 return (uint16x4_t){ __a, __a, __a, __a }; }
2214 __ai uint32x2_t vmov_n_u32(uint32_t __a) {
2215 return (uint32x2_t){ __a, __a }; }
2216 __ai int8x8_t vmov_n_s8(int8_t __a) {
2217 return (int8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; }
2218 __ai int16x4_t vmov_n_s16(int16_t __a) {
2219 return (int16x4_t){ __a, __a, __a, __a }; }
2220 __ai int32x2_t vmov_n_s32(int32_t __a) {
2221 return (int32x2_t){ __a, __a }; }
2222 __ai poly8x8_t vmov_n_p8(poly8_t __a) {
2223 return (poly8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; }
2224 __ai poly16x4_t vmov_n_p16(poly16_t __a) {
2225 return (poly16x4_t){ __a, __a, __a, __a }; }
2226 __ai float32x2_t vmov_n_f32(float32_t __a) {
2227 return (float32x2_t){ __a, __a }; }
2228 __ai uint8x16_t vmovq_n_u8(uint8_t __a) {
2229 return (uint8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __ a, __a, __a, __a, __a }; }
2230 __ai uint16x8_t vmovq_n_u16(uint16_t __a) {
2231 return (uint16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; }
2232 __ai uint32x4_t vmovq_n_u32(uint32_t __a) {
2233 return (uint32x4_t){ __a, __a, __a, __a }; }
2234 __ai int8x16_t vmovq_n_s8(int8_t __a) {
2235 return (int8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a , __a, __a, __a, __a }; }
2236 __ai int16x8_t vmovq_n_s16(int16_t __a) {
2237 return (int16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; }
2238 __ai int32x4_t vmovq_n_s32(int32_t __a) {
2239 return (int32x4_t){ __a, __a, __a, __a }; }
2240 __ai poly8x16_t vmovq_n_p8(poly8_t __a) {
2241 return (poly8x16_t){ __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __a, __ a, __a, __a, __a, __a }; }
2242 __ai poly16x8_t vmovq_n_p16(poly16_t __a) {
2243 return (poly16x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; }
2244 __ai float32x4_t vmovq_n_f32(float32_t __a) {
2245 return (float32x4_t){ __a, __a, __a, __a }; }
2246 __ai int64x1_t vmov_n_s64(int64_t __a) {
2247 return (int64x1_t){ __a }; }
2248 __ai uint64x1_t vmov_n_u64(uint64_t __a) {
2249 return (uint64x1_t){ __a }; }
2250 __ai int64x2_t vmovq_n_s64(int64_t __a) {
2251 return (int64x2_t){ __a, __a }; }
2252 __ai uint64x2_t vmovq_n_u64(uint64_t __a) {
2253 return (uint64x2_t){ __a, __a }; }
2254
2255 __ai int8x8_t vmul_s8(int8x8_t __a, int8x8_t __b) {
2256 return __a * __b; }
2257 __ai int16x4_t vmul_s16(int16x4_t __a, int16x4_t __b) {
2258 return __a * __b; }
2259 __ai int32x2_t vmul_s32(int32x2_t __a, int32x2_t __b) {
2260 return __a * __b; }
2261 __ai float32x2_t vmul_f32(float32x2_t __a, float32x2_t __b) {
2262 return __a * __b; }
2263 __ai uint8x8_t vmul_u8(uint8x8_t __a, uint8x8_t __b) {
2264 return __a * __b; }
2265 __ai uint16x4_t vmul_u16(uint16x4_t __a, uint16x4_t __b) {
2266 return __a * __b; }
2267 __ai uint32x2_t vmul_u32(uint32x2_t __a, uint32x2_t __b) {
2268 return __a * __b; }
2269 __ai int8x16_t vmulq_s8(int8x16_t __a, int8x16_t __b) {
2270 return __a * __b; }
2271 __ai int16x8_t vmulq_s16(int16x8_t __a, int16x8_t __b) {
2272 return __a * __b; }
2273 __ai int32x4_t vmulq_s32(int32x4_t __a, int32x4_t __b) {
2274 return __a * __b; }
2275 __ai float32x4_t vmulq_f32(float32x4_t __a, float32x4_t __b) {
2276 return __a * __b; }
2277 __ai uint8x16_t vmulq_u8(uint8x16_t __a, uint8x16_t __b) {
2278 return __a * __b; }
2279 __ai uint16x8_t vmulq_u16(uint16x8_t __a, uint16x8_t __b) {
2280 return __a * __b; }
2281 __ai uint32x4_t vmulq_u32(uint32x4_t __a, uint32x4_t __b) {
2282 return __a * __b; }
2283
2284 #define vmull_lane_s16(a, b, __c) __extension__ ({ \
2285 int16x4_t __a = (a); int16x4_t __b = (b); \
2286 vmull_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
2287 #define vmull_lane_s32(a, b, __c) __extension__ ({ \
2288 int32x2_t __a = (a); int32x2_t __b = (b); \
2289 vmull_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); })
2290 #define vmull_lane_u16(a, b, __c) __extension__ ({ \
2291 uint16x4_t __a = (a); uint16x4_t __b = (b); \
2292 vmull_u16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
2293 #define vmull_lane_u32(a, b, __c) __extension__ ({ \
2294 uint32x2_t __a = (a); uint32x2_t __b = (b); \
2295 vmull_u32(__a, __builtin_shufflevector(__b, __b, __c, __c)); })
2296
2297 __ai int32x4_t vmull_n_s16(int16x4_t __a, int16_t __b) {
2298 return (int32x4_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)(int16x4_t){ __b, __b, __b, __b }, 34); }
2299 __ai int64x2_t vmull_n_s32(int32x2_t __a, int32_t __b) {
2300 return (int64x2_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)(int32x2_t){ __b, __b }, 35); }
2301 __ai uint32x4_t vmull_n_u16(uint16x4_t __a, uint16_t __b) {
2302 return (uint32x4_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)(uint16x4_t ){ __b, __b, __b, __b }, 50); }
2303 __ai uint64x2_t vmull_n_u32(uint32x2_t __a, uint32_t __b) {
2304 return (uint64x2_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)(uint32x2_t ){ __b, __b }, 51); }
2305
2306 __ai poly8x8_t vmul_p8(poly8x8_t __a, poly8x8_t __b) {
2307 return (poly8x8_t)__builtin_neon_vmul_v((int8x8_t)__a, (int8x8_t)__b, 4); }
2308 __ai poly8x16_t vmulq_p8(poly8x16_t __a, poly8x16_t __b) {
2309 return (poly8x16_t)__builtin_neon_vmulq_v((int8x16_t)__a, (int8x16_t)__b, 36); }
2310
2311 #define vmul_lane_s16(a, b, __c) __extension__ ({ \
2312 int16x4_t __a = (a); int16x4_t __b = (b); \
2313 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); })
2314 #define vmul_lane_s32(a, b, __c) __extension__ ({ \
2315 int32x2_t __a = (a); int32x2_t __b = (b); \
2316 __a * __builtin_shufflevector(__b, __b, __c, __c); })
2317 #define vmul_lane_f32(a, b, __c) __extension__ ({ \
2318 float32x2_t __a = (a); float32x2_t __b = (b); \
2319 __a * __builtin_shufflevector(__b, __b, __c, __c); })
2320 #define vmul_lane_u16(a, b, __c) __extension__ ({ \
2321 uint16x4_t __a = (a); uint16x4_t __b = (b); \
2322 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); })
2323 #define vmul_lane_u32(a, b, __c) __extension__ ({ \
2324 uint32x2_t __a = (a); uint32x2_t __b = (b); \
2325 __a * __builtin_shufflevector(__b, __b, __c, __c); })
2326 #define vmulq_lane_s16(a, b, __c) __extension__ ({ \
2327 int16x8_t __a = (a); int16x4_t __b = (b); \
2328 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c ); })
2329 #define vmulq_lane_s32(a, b, __c) __extension__ ({ \
2330 int32x4_t __a = (a); int32x2_t __b = (b); \
2331 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); })
2332 #define vmulq_lane_f32(a, b, __c) __extension__ ({ \
2333 float32x4_t __a = (a); float32x2_t __b = (b); \
2334 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); })
2335 #define vmulq_lane_u16(a, b, __c) __extension__ ({ \
2336 uint16x8_t __a = (a); uint16x4_t __b = (b); \
2337 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c ); })
2338 #define vmulq_lane_u32(a, b, __c) __extension__ ({ \
2339 uint32x4_t __a = (a); uint32x2_t __b = (b); \
2340 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); })
2341
2342 __ai int16x4_t vmul_n_s16(int16x4_t __a, int16_t __b) {
2343 return __a * (int16x4_t){ __b, __b, __b, __b }; }
2344 __ai int32x2_t vmul_n_s32(int32x2_t __a, int32_t __b) {
2345 return __a * (int32x2_t){ __b, __b }; }
2346 __ai float32x2_t vmul_n_f32(float32x2_t __a, float32_t __b) {
2347 return __a * (float32x2_t){ __b, __b }; }
2348 __ai uint16x4_t vmul_n_u16(uint16x4_t __a, uint16_t __b) {
2349 return __a * (uint16x4_t){ __b, __b, __b, __b }; }
2350 __ai uint32x2_t vmul_n_u32(uint32x2_t __a, uint32_t __b) {
2351 return __a * (uint32x2_t){ __b, __b }; }
2352 __ai int16x8_t vmulq_n_s16(int16x8_t __a, int16_t __b) {
2353 return __a * (int16x8_t){ __b, __b, __b, __b, __b, __b, __b, __b }; }
2354 __ai int32x4_t vmulq_n_s32(int32x4_t __a, int32_t __b) {
2355 return __a * (int32x4_t){ __b, __b, __b, __b }; }
2356 __ai float32x4_t vmulq_n_f32(float32x4_t __a, float32_t __b) {
2357 return __a * (float32x4_t){ __b, __b, __b, __b }; }
2358 __ai uint16x8_t vmulq_n_u16(uint16x8_t __a, uint16_t __b) {
2359 return __a * (uint16x8_t){ __b, __b, __b, __b, __b, __b, __b, __b }; }
2360 __ai uint32x4_t vmulq_n_u32(uint32x4_t __a, uint32_t __b) {
2361 return __a * (uint32x4_t){ __b, __b, __b, __b }; }
2362
2363 __ai int8x8_t vmvn_s8(int8x8_t __a) {
2364 return ~__a; }
2365 __ai int16x4_t vmvn_s16(int16x4_t __a) {
2366 return ~__a; }
2367 __ai int32x2_t vmvn_s32(int32x2_t __a) {
2368 return ~__a; }
2369 __ai uint8x8_t vmvn_u8(uint8x8_t __a) {
2370 return ~__a; }
2371 __ai uint16x4_t vmvn_u16(uint16x4_t __a) {
2372 return ~__a; }
2373 __ai uint32x2_t vmvn_u32(uint32x2_t __a) {
2374 return ~__a; }
2375 __ai poly8x8_t vmvn_p8(poly8x8_t __a) {
2376 return ~__a; }
2377 __ai int8x16_t vmvnq_s8(int8x16_t __a) {
2378 return ~__a; }
2379 __ai int16x8_t vmvnq_s16(int16x8_t __a) {
2380 return ~__a; }
2381 __ai int32x4_t vmvnq_s32(int32x4_t __a) {
2382 return ~__a; }
2383 __ai uint8x16_t vmvnq_u8(uint8x16_t __a) {
2384 return ~__a; }
2385 __ai uint16x8_t vmvnq_u16(uint16x8_t __a) {
2386 return ~__a; }
2387 __ai uint32x4_t vmvnq_u32(uint32x4_t __a) {
2388 return ~__a; }
2389 __ai poly8x16_t vmvnq_p8(poly8x16_t __a) {
2390 return ~__a; }
2391
2392 __ai int8x8_t vneg_s8(int8x8_t __a) {
2393 return -__a; }
2394 __ai int16x4_t vneg_s16(int16x4_t __a) {
2395 return -__a; }
2396 __ai int32x2_t vneg_s32(int32x2_t __a) {
2397 return -__a; }
2398 __ai float32x2_t vneg_f32(float32x2_t __a) {
2399 return -__a; }
2400 __ai int8x16_t vnegq_s8(int8x16_t __a) {
2401 return -__a; }
2402 __ai int16x8_t vnegq_s16(int16x8_t __a) {
2403 return -__a; }
2404 __ai int32x4_t vnegq_s32(int32x4_t __a) {
2405 return -__a; }
2406 __ai float32x4_t vnegq_f32(float32x4_t __a) {
2407 return -__a; }
2408
2409 __ai int8x8_t vorn_s8(int8x8_t __a, int8x8_t __b) {
2410 return __a | ~__b; }
2411 __ai int16x4_t vorn_s16(int16x4_t __a, int16x4_t __b) {
2412 return __a | ~__b; }
2413 __ai int32x2_t vorn_s32(int32x2_t __a, int32x2_t __b) {
2414 return __a | ~__b; }
2415 __ai int64x1_t vorn_s64(int64x1_t __a, int64x1_t __b) {
2416 return __a | ~__b; }
2417 __ai uint8x8_t vorn_u8(uint8x8_t __a, uint8x8_t __b) {
2418 return __a | ~__b; }
2419 __ai uint16x4_t vorn_u16(uint16x4_t __a, uint16x4_t __b) {
2420 return __a | ~__b; }
2421 __ai uint32x2_t vorn_u32(uint32x2_t __a, uint32x2_t __b) {
2422 return __a | ~__b; }
2423 __ai uint64x1_t vorn_u64(uint64x1_t __a, uint64x1_t __b) {
2424 return __a | ~__b; }
2425 __ai int8x16_t vornq_s8(int8x16_t __a, int8x16_t __b) {
2426 return __a | ~__b; }
2427 __ai int16x8_t vornq_s16(int16x8_t __a, int16x8_t __b) {
2428 return __a | ~__b; }
2429 __ai int32x4_t vornq_s32(int32x4_t __a, int32x4_t __b) {
2430 return __a | ~__b; }
2431 __ai int64x2_t vornq_s64(int64x2_t __a, int64x2_t __b) {
2432 return __a | ~__b; }
2433 __ai uint8x16_t vornq_u8(uint8x16_t __a, uint8x16_t __b) {
2434 return __a | ~__b; }
2435 __ai uint16x8_t vornq_u16(uint16x8_t __a, uint16x8_t __b) {
2436 return __a | ~__b; }
2437 __ai uint32x4_t vornq_u32(uint32x4_t __a, uint32x4_t __b) {
2438 return __a | ~__b; }
2439 __ai uint64x2_t vornq_u64(uint64x2_t __a, uint64x2_t __b) {
2440 return __a | ~__b; }
2441
2442 __ai int8x8_t vorr_s8(int8x8_t __a, int8x8_t __b) {
2443 return __a | __b; }
2444 __ai int16x4_t vorr_s16(int16x4_t __a, int16x4_t __b) {
2445 return __a | __b; }
2446 __ai int32x2_t vorr_s32(int32x2_t __a, int32x2_t __b) {
2447 return __a | __b; }
2448 __ai int64x1_t vorr_s64(int64x1_t __a, int64x1_t __b) {
2449 return __a | __b; }
2450 __ai uint8x8_t vorr_u8(uint8x8_t __a, uint8x8_t __b) {
2451 return __a | __b; }
2452 __ai uint16x4_t vorr_u16(uint16x4_t __a, uint16x4_t __b) {
2453 return __a | __b; }
2454 __ai uint32x2_t vorr_u32(uint32x2_t __a, uint32x2_t __b) {
2455 return __a | __b; }
2456 __ai uint64x1_t vorr_u64(uint64x1_t __a, uint64x1_t __b) {
2457 return __a | __b; }
2458 __ai int8x16_t vorrq_s8(int8x16_t __a, int8x16_t __b) {
2459 return __a | __b; }
2460 __ai int16x8_t vorrq_s16(int16x8_t __a, int16x8_t __b) {
2461 return __a | __b; }
2462 __ai int32x4_t vorrq_s32(int32x4_t __a, int32x4_t __b) {
2463 return __a | __b; }
2464 __ai int64x2_t vorrq_s64(int64x2_t __a, int64x2_t __b) {
2465 return __a | __b; }
2466 __ai uint8x16_t vorrq_u8(uint8x16_t __a, uint8x16_t __b) {
2467 return __a | __b; }
2468 __ai uint16x8_t vorrq_u16(uint16x8_t __a, uint16x8_t __b) {
2469 return __a | __b; }
2470 __ai uint32x4_t vorrq_u32(uint32x4_t __a, uint32x4_t __b) {
2471 return __a | __b; }
2472 __ai uint64x2_t vorrq_u64(uint64x2_t __a, uint64x2_t __b) {
2473 return __a | __b; }
2474
2475 __ai int16x4_t vpadal_s8(int16x4_t __a, int8x8_t __b) {
2476 return (int16x4_t)__builtin_neon_vpadal_v((int8x8_t)__a, __b, 1); }
2477 __ai int32x2_t vpadal_s16(int32x2_t __a, int16x4_t __b) {
2478 return (int32x2_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 2); }
2479 __ai int64x1_t vpadal_s32(int64x1_t __a, int32x2_t __b) {
2480 return (int64x1_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 3); }
2481 __ai uint16x4_t vpadal_u8(uint16x4_t __a, uint8x8_t __b) {
2482 return (uint16x4_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 17); }
2483 __ai uint32x2_t vpadal_u16(uint32x2_t __a, uint16x4_t __b) {
2484 return (uint32x2_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 18); }
2485 __ai uint64x1_t vpadal_u32(uint64x1_t __a, uint32x2_t __b) {
2486 return (uint64x1_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 19); }
2487 __ai int16x8_t vpadalq_s8(int16x8_t __a, int8x16_t __b) {
2488 return (int16x8_t)__builtin_neon_vpadalq_v((int8x16_t)__a, __b, 33); }
2489 __ai int32x4_t vpadalq_s16(int32x4_t __a, int16x8_t __b) {
2490 return (int32x4_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 34) ; }
2491 __ai int64x2_t vpadalq_s32(int64x2_t __a, int32x4_t __b) {
2492 return (int64x2_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 35) ; }
2493 __ai uint16x8_t vpadalq_u8(uint16x8_t __a, uint8x16_t __b) {
2494 return (uint16x8_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 49 ); }
2495 __ai uint32x4_t vpadalq_u16(uint32x4_t __a, uint16x8_t __b) {
2496 return (uint32x4_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 50 ); }
2497 __ai uint64x2_t vpadalq_u32(uint64x2_t __a, uint32x4_t __b) {
2498 return (uint64x2_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 51 ); }
2499
2500 __ai int8x8_t vpadd_s8(int8x8_t __a, int8x8_t __b) {
2501 return (int8x8_t)__builtin_neon_vpadd_v(__a, __b, 0); }
2502 __ai int16x4_t vpadd_s16(int16x4_t __a, int16x4_t __b) {
2503 return (int16x4_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 1); }
2504 __ai int32x2_t vpadd_s32(int32x2_t __a, int32x2_t __b) {
2505 return (int32x2_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 2); }
2506 __ai uint8x8_t vpadd_u8(uint8x8_t __a, uint8x8_t __b) {
2507 return (uint8x8_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 16); }
2508 __ai uint16x4_t vpadd_u16(uint16x4_t __a, uint16x4_t __b) {
2509 return (uint16x4_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 17); }
2510 __ai uint32x2_t vpadd_u32(uint32x2_t __a, uint32x2_t __b) {
2511 return (uint32x2_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 18); }
2512 __ai float32x2_t vpadd_f32(float32x2_t __a, float32x2_t __b) {
2513 return (float32x2_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 7); }
2514
2515 __ai int16x4_t vpaddl_s8(int8x8_t __a) {
2516 return (int16x4_t)__builtin_neon_vpaddl_v(__a, 1); }
2517 __ai int32x2_t vpaddl_s16(int16x4_t __a) {
2518 return (int32x2_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 2); }
2519 __ai int64x1_t vpaddl_s32(int32x2_t __a) {
2520 return (int64x1_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 3); }
2521 __ai uint16x4_t vpaddl_u8(uint8x8_t __a) {
2522 return (uint16x4_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 17); }
2523 __ai uint32x2_t vpaddl_u16(uint16x4_t __a) {
2524 return (uint32x2_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 18); }
2525 __ai uint64x1_t vpaddl_u32(uint32x2_t __a) {
2526 return (uint64x1_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 19); }
2527 __ai int16x8_t vpaddlq_s8(int8x16_t __a) {
2528 return (int16x8_t)__builtin_neon_vpaddlq_v(__a, 33); }
2529 __ai int32x4_t vpaddlq_s16(int16x8_t __a) {
2530 return (int32x4_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 34); }
2531 __ai int64x2_t vpaddlq_s32(int32x4_t __a) {
2532 return (int64x2_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 35); }
2533 __ai uint16x8_t vpaddlq_u8(uint8x16_t __a) {
2534 return (uint16x8_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 49); }
2535 __ai uint32x4_t vpaddlq_u16(uint16x8_t __a) {
2536 return (uint32x4_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 50); }
2537 __ai uint64x2_t vpaddlq_u32(uint32x4_t __a) {
2538 return (uint64x2_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 51); }
2539
2540 __ai int8x8_t vpmax_s8(int8x8_t __a, int8x8_t __b) {
2541 return (int8x8_t)__builtin_neon_vpmax_v(__a, __b, 0); }
2542 __ai int16x4_t vpmax_s16(int16x4_t __a, int16x4_t __b) {
2543 return (int16x4_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 1); }
2544 __ai int32x2_t vpmax_s32(int32x2_t __a, int32x2_t __b) {
2545 return (int32x2_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 2); }
2546 __ai uint8x8_t vpmax_u8(uint8x8_t __a, uint8x8_t __b) {
2547 return (uint8x8_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 16); }
2548 __ai uint16x4_t vpmax_u16(uint16x4_t __a, uint16x4_t __b) {
2549 return (uint16x4_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 17); }
2550 __ai uint32x2_t vpmax_u32(uint32x2_t __a, uint32x2_t __b) {
2551 return (uint32x2_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 18); }
2552 __ai float32x2_t vpmax_f32(float32x2_t __a, float32x2_t __b) {
2553 return (float32x2_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 7); }
2554
2555 __ai int8x8_t vpmin_s8(int8x8_t __a, int8x8_t __b) {
2556 return (int8x8_t)__builtin_neon_vpmin_v(__a, __b, 0); }
2557 __ai int16x4_t vpmin_s16(int16x4_t __a, int16x4_t __b) {
2558 return (int16x4_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 1); }
2559 __ai int32x2_t vpmin_s32(int32x2_t __a, int32x2_t __b) {
2560 return (int32x2_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 2); }
2561 __ai uint8x8_t vpmin_u8(uint8x8_t __a, uint8x8_t __b) {
2562 return (uint8x8_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 16); }
2563 __ai uint16x4_t vpmin_u16(uint16x4_t __a, uint16x4_t __b) {
2564 return (uint16x4_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 17); }
2565 __ai uint32x2_t vpmin_u32(uint32x2_t __a, uint32x2_t __b) {
2566 return (uint32x2_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 18); }
2567 __ai float32x2_t vpmin_f32(float32x2_t __a, float32x2_t __b) {
2568 return (float32x2_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 7); }
2569
2570 __ai int8x8_t vqabs_s8(int8x8_t __a) {
2571 return (int8x8_t)__builtin_neon_vqabs_v(__a, 0); }
2572 __ai int16x4_t vqabs_s16(int16x4_t __a) {
2573 return (int16x4_t)__builtin_neon_vqabs_v((int8x8_t)__a, 1); }
2574 __ai int32x2_t vqabs_s32(int32x2_t __a) {
2575 return (int32x2_t)__builtin_neon_vqabs_v((int8x8_t)__a, 2); }
2576 __ai int8x16_t vqabsq_s8(int8x16_t __a) {
2577 return (int8x16_t)__builtin_neon_vqabsq_v(__a, 32); }
2578 __ai int16x8_t vqabsq_s16(int16x8_t __a) {
2579 return (int16x8_t)__builtin_neon_vqabsq_v((int8x16_t)__a, 33); }
2580 __ai int32x4_t vqabsq_s32(int32x4_t __a) {
2581 return (int32x4_t)__builtin_neon_vqabsq_v((int8x16_t)__a, 34); }
2582
2583 __ai int8x8_t vqadd_s8(int8x8_t __a, int8x8_t __b) {
2584 return (int8x8_t)__builtin_neon_vqadd_v(__a, __b, 0); }
2585 __ai int16x4_t vqadd_s16(int16x4_t __a, int16x4_t __b) {
2586 return (int16x4_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 1); }
2587 __ai int32x2_t vqadd_s32(int32x2_t __a, int32x2_t __b) {
2588 return (int32x2_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 2); }
2589 __ai int64x1_t vqadd_s64(int64x1_t __a, int64x1_t __b) {
2590 return (int64x1_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 3); }
2591 __ai uint8x8_t vqadd_u8(uint8x8_t __a, uint8x8_t __b) {
2592 return (uint8x8_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 16); }
2593 __ai uint16x4_t vqadd_u16(uint16x4_t __a, uint16x4_t __b) {
2594 return (uint16x4_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 17); }
2595 __ai uint32x2_t vqadd_u32(uint32x2_t __a, uint32x2_t __b) {
2596 return (uint32x2_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 18); }
2597 __ai uint64x1_t vqadd_u64(uint64x1_t __a, uint64x1_t __b) {
2598 return (uint64x1_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 19); }
2599 __ai int8x16_t vqaddq_s8(int8x16_t __a, int8x16_t __b) {
2600 return (int8x16_t)__builtin_neon_vqaddq_v(__a, __b, 32); }
2601 __ai int16x8_t vqaddq_s16(int16x8_t __a, int16x8_t __b) {
2602 return (int16x8_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
2603 __ai int32x4_t vqaddq_s32(int32x4_t __a, int32x4_t __b) {
2604 return (int32x4_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
2605 __ai int64x2_t vqaddq_s64(int64x2_t __a, int64x2_t __b) {
2606 return (int64x2_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 35); }
2607 __ai uint8x16_t vqaddq_u8(uint8x16_t __a, uint8x16_t __b) {
2608 return (uint8x16_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 48) ; }
2609 __ai uint16x8_t vqaddq_u16(uint16x8_t __a, uint16x8_t __b) {
2610 return (uint16x8_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 49) ; }
2611 __ai uint32x4_t vqaddq_u32(uint32x4_t __a, uint32x4_t __b) {
2612 return (uint32x4_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 50) ; }
2613 __ai uint64x2_t vqaddq_u64(uint64x2_t __a, uint64x2_t __b) {
2614 return (uint64x2_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 51) ; }
2615
2616 __ai int32x4_t vqdmlal_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) {
2617 return (int32x4_t)__builtin_neon_vqdmlal_v((int8x16_t)__a, (int8x8_t)__b, (int 8x8_t)__c, 34); }
2618 __ai int64x2_t vqdmlal_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) {
2619 return (int64x2_t)__builtin_neon_vqdmlal_v((int8x16_t)__a, (int8x8_t)__b, (int 8x8_t)__c, 35); }
2620
2621 #define vqdmlal_lane_s16(a, b, c, __d) __extension__ ({ \
2622 int32x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \
2623 vqdmlal_s16(__a, __b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
2624 #define vqdmlal_lane_s32(a, b, c, __d) __extension__ ({ \
2625 int64x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \
2626 vqdmlal_s32(__a, __b, __builtin_shufflevector(__c, __c, __d, __d)); })
2627
2628 __ai int32x4_t vqdmlal_n_s16(int32x4_t __a, int16x4_t __b, int16_t __c) {
2629 return (int32x4_t)__builtin_neon_vqdmlal_v((int8x16_t)__a, (int8x8_t)__b, (int 8x8_t)(int16x4_t){ __c, __c, __c, __c }, 34); }
2630 __ai int64x2_t vqdmlal_n_s32(int64x2_t __a, int32x2_t __b, int32_t __c) {
2631 return (int64x2_t)__builtin_neon_vqdmlal_v((int8x16_t)__a, (int8x8_t)__b, (int 8x8_t)(int32x2_t){ __c, __c }, 35); }
2632
2633 __ai int32x4_t vqdmlsl_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) {
2634 return (int32x4_t)__builtin_neon_vqdmlsl_v((int8x16_t)__a, (int8x8_t)__b, (int 8x8_t)__c, 34); }
2635 __ai int64x2_t vqdmlsl_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) {
2636 return (int64x2_t)__builtin_neon_vqdmlsl_v((int8x16_t)__a, (int8x8_t)__b, (int 8x8_t)__c, 35); }
2637
2638 #define vqdmlsl_lane_s16(a, b, c, __d) __extension__ ({ \
2639 int32x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \
2640 vqdmlsl_s16(__a, __b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
2641 #define vqdmlsl_lane_s32(a, b, c, __d) __extension__ ({ \
2642 int64x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \
2643 vqdmlsl_s32(__a, __b, __builtin_shufflevector(__c, __c, __d, __d)); })
2644
2645 __ai int32x4_t vqdmlsl_n_s16(int32x4_t __a, int16x4_t __b, int16_t __c) {
2646 return (int32x4_t)__builtin_neon_vqdmlsl_v((int8x16_t)__a, (int8x8_t)__b, (int 8x8_t)(int16x4_t){ __c, __c, __c, __c }, 34); }
2647 __ai int64x2_t vqdmlsl_n_s32(int64x2_t __a, int32x2_t __b, int32_t __c) {
2648 return (int64x2_t)__builtin_neon_vqdmlsl_v((int8x16_t)__a, (int8x8_t)__b, (int 8x8_t)(int32x2_t){ __c, __c }, 35); }
2649
2650 __ai int16x4_t vqdmulh_s16(int16x4_t __a, int16x4_t __b) {
2651 return (int16x4_t)__builtin_neon_vqdmulh_v((int8x8_t)__a, (int8x8_t)__b, 1); }
2652 __ai int32x2_t vqdmulh_s32(int32x2_t __a, int32x2_t __b) {
2653 return (int32x2_t)__builtin_neon_vqdmulh_v((int8x8_t)__a, (int8x8_t)__b, 2); }
2654 __ai int16x8_t vqdmulhq_s16(int16x8_t __a, int16x8_t __b) {
2655 return (int16x8_t)__builtin_neon_vqdmulhq_v((int8x16_t)__a, (int8x16_t)__b, 33 ); }
2656 __ai int32x4_t vqdmulhq_s32(int32x4_t __a, int32x4_t __b) {
2657 return (int32x4_t)__builtin_neon_vqdmulhq_v((int8x16_t)__a, (int8x16_t)__b, 34 ); }
2658
2659 #define vqdmulh_lane_s16(a, b, __c) __extension__ ({ \
2660 int16x4_t __a = (a); int16x4_t __b = (b); \
2661 vqdmulh_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
2662 #define vqdmulh_lane_s32(a, b, __c) __extension__ ({ \
2663 int32x2_t __a = (a); int32x2_t __b = (b); \
2664 vqdmulh_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); })
2665 #define vqdmulhq_lane_s16(a, b, __c) __extension__ ({ \
2666 int16x8_t __a = (a); int16x4_t __b = (b); \
2667 vqdmulhq_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, _ _c, __c, __c)); })
2668 #define vqdmulhq_lane_s32(a, b, __c) __extension__ ({ \
2669 int32x4_t __a = (a); int32x2_t __b = (b); \
2670 vqdmulhq_s32(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
2671
2672 __ai int16x4_t vqdmulh_n_s16(int16x4_t __a, int16_t __b) {
2673 return (int16x4_t)__builtin_neon_vqdmulh_v((int8x8_t)__a, (int8x8_t)(int16x4_t ){ __b, __b, __b, __b }, 1); }
2674 __ai int32x2_t vqdmulh_n_s32(int32x2_t __a, int32_t __b) {
2675 return (int32x2_t)__builtin_neon_vqdmulh_v((int8x8_t)__a, (int8x8_t)(int32x2_t ){ __b, __b }, 2); }
2676 __ai int16x8_t vqdmulhq_n_s16(int16x8_t __a, int16_t __b) {
2677 return (int16x8_t)__builtin_neon_vqdmulhq_v((int8x16_t)__a, (int8x16_t)(int16x 8_t){ __b, __b, __b, __b, __b, __b, __b, __b }, 33); }
2678 __ai int32x4_t vqdmulhq_n_s32(int32x4_t __a, int32_t __b) {
2679 return (int32x4_t)__builtin_neon_vqdmulhq_v((int8x16_t)__a, (int8x16_t)(int32x 4_t){ __b, __b, __b, __b }, 34); }
2680
2681 __ai int32x4_t vqdmull_s16(int16x4_t __a, int16x4_t __b) {
2682 return (int32x4_t)__builtin_neon_vqdmull_v((int8x8_t)__a, (int8x8_t)__b, 34); }
2683 __ai int64x2_t vqdmull_s32(int32x2_t __a, int32x2_t __b) {
2684 return (int64x2_t)__builtin_neon_vqdmull_v((int8x8_t)__a, (int8x8_t)__b, 35); }
2685
2686 #define vqdmull_lane_s16(a, b, __c) __extension__ ({ \
2687 int16x4_t __a = (a); int16x4_t __b = (b); \
2688 vqdmull_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
2689 #define vqdmull_lane_s32(a, b, __c) __extension__ ({ \
2690 int32x2_t __a = (a); int32x2_t __b = (b); \
2691 vqdmull_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); })
2692
2693 __ai int32x4_t vqdmull_n_s16(int16x4_t __a, int16_t __b) {
2694 return (int32x4_t)__builtin_neon_vqdmull_v((int8x8_t)__a, (int8x8_t)(int16x4_t ){ __b, __b, __b, __b }, 34); }
2695 __ai int64x2_t vqdmull_n_s32(int32x2_t __a, int32_t __b) {
2696 return (int64x2_t)__builtin_neon_vqdmull_v((int8x8_t)__a, (int8x8_t)(int32x2_t ){ __b, __b }, 35); }
2697
2698 __ai int8x8_t vqmovn_s16(int16x8_t __a) {
2699 return (int8x8_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 0); }
2700 __ai int16x4_t vqmovn_s32(int32x4_t __a) {
2701 return (int16x4_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 1); }
2702 __ai int32x2_t vqmovn_s64(int64x2_t __a) {
2703 return (int32x2_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 2); }
2704 __ai uint8x8_t vqmovn_u16(uint16x8_t __a) {
2705 return (uint8x8_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 16); }
2706 __ai uint16x4_t vqmovn_u32(uint32x4_t __a) {
2707 return (uint16x4_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 17); }
2708 __ai uint32x2_t vqmovn_u64(uint64x2_t __a) {
2709 return (uint32x2_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 18); }
2710
2711 __ai uint8x8_t vqmovun_s16(int16x8_t __a) {
2712 return (uint8x8_t)__builtin_neon_vqmovun_v((int8x16_t)__a, 16); }
2713 __ai uint16x4_t vqmovun_s32(int32x4_t __a) {
2714 return (uint16x4_t)__builtin_neon_vqmovun_v((int8x16_t)__a, 17); }
2715 __ai uint32x2_t vqmovun_s64(int64x2_t __a) {
2716 return (uint32x2_t)__builtin_neon_vqmovun_v((int8x16_t)__a, 18); }
2717
2718 __ai int8x8_t vqneg_s8(int8x8_t __a) {
2719 return (int8x8_t)__builtin_neon_vqneg_v(__a, 0); }
2720 __ai int16x4_t vqneg_s16(int16x4_t __a) {
2721 return (int16x4_t)__builtin_neon_vqneg_v((int8x8_t)__a, 1); }
2722 __ai int32x2_t vqneg_s32(int32x2_t __a) {
2723 return (int32x2_t)__builtin_neon_vqneg_v((int8x8_t)__a, 2); }
2724 __ai int8x16_t vqnegq_s8(int8x16_t __a) {
2725 return (int8x16_t)__builtin_neon_vqnegq_v(__a, 32); }
2726 __ai int16x8_t vqnegq_s16(int16x8_t __a) {
2727 return (int16x8_t)__builtin_neon_vqnegq_v((int8x16_t)__a, 33); }
2728 __ai int32x4_t vqnegq_s32(int32x4_t __a) {
2729 return (int32x4_t)__builtin_neon_vqnegq_v((int8x16_t)__a, 34); }
2730
2731 __ai int16x4_t vqrdmulh_s16(int16x4_t __a, int16x4_t __b) {
2732 return (int16x4_t)__builtin_neon_vqrdmulh_v((int8x8_t)__a, (int8x8_t)__b, 1); }
2733 __ai int32x2_t vqrdmulh_s32(int32x2_t __a, int32x2_t __b) {
2734 return (int32x2_t)__builtin_neon_vqrdmulh_v((int8x8_t)__a, (int8x8_t)__b, 2); }
2735 __ai int16x8_t vqrdmulhq_s16(int16x8_t __a, int16x8_t __b) {
2736 return (int16x8_t)__builtin_neon_vqrdmulhq_v((int8x16_t)__a, (int8x16_t)__b, 3 3); }
2737 __ai int32x4_t vqrdmulhq_s32(int32x4_t __a, int32x4_t __b) {
2738 return (int32x4_t)__builtin_neon_vqrdmulhq_v((int8x16_t)__a, (int8x16_t)__b, 3 4); }
2739
2740 #define vqrdmulh_lane_s16(a, b, __c) __extension__ ({ \
2741 int16x4_t __a = (a); int16x4_t __b = (b); \
2742 vqrdmulh_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
2743 #define vqrdmulh_lane_s32(a, b, __c) __extension__ ({ \
2744 int32x2_t __a = (a); int32x2_t __b = (b); \
2745 vqrdmulh_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); })
2746 #define vqrdmulhq_lane_s16(a, b, __c) __extension__ ({ \
2747 int16x8_t __a = (a); int16x4_t __b = (b); \
2748 vqrdmulhq_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c)); })
2749 #define vqrdmulhq_lane_s32(a, b, __c) __extension__ ({ \
2750 int32x4_t __a = (a); int32x2_t __b = (b); \
2751 vqrdmulhq_s32(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
2752
2753 __ai int16x4_t vqrdmulh_n_s16(int16x4_t __a, int16_t __b) {
2754 return (int16x4_t)__builtin_neon_vqrdmulh_v((int8x8_t)__a, (int8x8_t)(int16x4_ t){ __b, __b, __b, __b }, 1); }
2755 __ai int32x2_t vqrdmulh_n_s32(int32x2_t __a, int32_t __b) {
2756 return (int32x2_t)__builtin_neon_vqrdmulh_v((int8x8_t)__a, (int8x8_t)(int32x2_ t){ __b, __b }, 2); }
2757 __ai int16x8_t vqrdmulhq_n_s16(int16x8_t __a, int16_t __b) {
2758 return (int16x8_t)__builtin_neon_vqrdmulhq_v((int8x16_t)__a, (int8x16_t)(int16 x8_t){ __b, __b, __b, __b, __b, __b, __b, __b }, 33); }
2759 __ai int32x4_t vqrdmulhq_n_s32(int32x4_t __a, int32_t __b) {
2760 return (int32x4_t)__builtin_neon_vqrdmulhq_v((int8x16_t)__a, (int8x16_t)(int32 x4_t){ __b, __b, __b, __b }, 34); }
2761
2762 __ai int8x8_t vqrshl_s8(int8x8_t __a, int8x8_t __b) {
2763 return (int8x8_t)__builtin_neon_vqrshl_v(__a, __b, 0); }
2764 __ai int16x4_t vqrshl_s16(int16x4_t __a, int16x4_t __b) {
2765 return (int16x4_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 1); }
2766 __ai int32x2_t vqrshl_s32(int32x2_t __a, int32x2_t __b) {
2767 return (int32x2_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 2); }
2768 __ai int64x1_t vqrshl_s64(int64x1_t __a, int64x1_t __b) {
2769 return (int64x1_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 3); }
2770 __ai uint8x8_t vqrshl_u8(uint8x8_t __a, int8x8_t __b) {
2771 return (uint8x8_t)__builtin_neon_vqrshl_v((int8x8_t)__a, __b, 16); }
2772 __ai uint16x4_t vqrshl_u16(uint16x4_t __a, int16x4_t __b) {
2773 return (uint16x4_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 17); }
2774 __ai uint32x2_t vqrshl_u32(uint32x2_t __a, int32x2_t __b) {
2775 return (uint32x2_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 18); }
2776 __ai uint64x1_t vqrshl_u64(uint64x1_t __a, int64x1_t __b) {
2777 return (uint64x1_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 19); }
2778 __ai int8x16_t vqrshlq_s8(int8x16_t __a, int8x16_t __b) {
2779 return (int8x16_t)__builtin_neon_vqrshlq_v(__a, __b, 32); }
2780 __ai int16x8_t vqrshlq_s16(int16x8_t __a, int16x8_t __b) {
2781 return (int16x8_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 33) ; }
2782 __ai int32x4_t vqrshlq_s32(int32x4_t __a, int32x4_t __b) {
2783 return (int32x4_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 34) ; }
2784 __ai int64x2_t vqrshlq_s64(int64x2_t __a, int64x2_t __b) {
2785 return (int64x2_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 35) ; }
2786 __ai uint8x16_t vqrshlq_u8(uint8x16_t __a, int8x16_t __b) {
2787 return (uint8x16_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, __b, 48); }
2788 __ai uint16x8_t vqrshlq_u16(uint16x8_t __a, int16x8_t __b) {
2789 return (uint16x8_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 49 ); }
2790 __ai uint32x4_t vqrshlq_u32(uint32x4_t __a, int32x4_t __b) {
2791 return (uint32x4_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 50 ); }
2792 __ai uint64x2_t vqrshlq_u64(uint64x2_t __a, int64x2_t __b) {
2793 return (uint64x2_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 51 ); }
2794
2795 #define vqrshrn_n_s16(a, __b) __extension__ ({ \
2796 int16x8_t __a = (a); \
2797 (int8x8_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 0); })
2798 #define vqrshrn_n_s32(a, __b) __extension__ ({ \
2799 int32x4_t __a = (a); \
2800 (int16x4_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 1); })
2801 #define vqrshrn_n_s64(a, __b) __extension__ ({ \
2802 int64x2_t __a = (a); \
2803 (int32x2_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 2); })
2804 #define vqrshrn_n_u16(a, __b) __extension__ ({ \
2805 uint16x8_t __a = (a); \
2806 (uint8x8_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 16); })
2807 #define vqrshrn_n_u32(a, __b) __extension__ ({ \
2808 uint32x4_t __a = (a); \
2809 (uint16x4_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 17); })
2810 #define vqrshrn_n_u64(a, __b) __extension__ ({ \
2811 uint64x2_t __a = (a); \
2812 (uint32x2_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 18); })
2813
2814 #define vqrshrun_n_s16(a, __b) __extension__ ({ \
2815 int16x8_t __a = (a); \
2816 (uint8x8_t)__builtin_neon_vqrshrun_n_v((int8x16_t)__a, __b, 16); })
2817 #define vqrshrun_n_s32(a, __b) __extension__ ({ \
2818 int32x4_t __a = (a); \
2819 (uint16x4_t)__builtin_neon_vqrshrun_n_v((int8x16_t)__a, __b, 17); })
2820 #define vqrshrun_n_s64(a, __b) __extension__ ({ \
2821 int64x2_t __a = (a); \
2822 (uint32x2_t)__builtin_neon_vqrshrun_n_v((int8x16_t)__a, __b, 18); })
2823
2824 __ai int8x8_t vqshl_s8(int8x8_t __a, int8x8_t __b) {
2825 return (int8x8_t)__builtin_neon_vqshl_v(__a, __b, 0); }
2826 __ai int16x4_t vqshl_s16(int16x4_t __a, int16x4_t __b) {
2827 return (int16x4_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 1); }
2828 __ai int32x2_t vqshl_s32(int32x2_t __a, int32x2_t __b) {
2829 return (int32x2_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 2); }
2830 __ai int64x1_t vqshl_s64(int64x1_t __a, int64x1_t __b) {
2831 return (int64x1_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 3); }
2832 __ai uint8x8_t vqshl_u8(uint8x8_t __a, int8x8_t __b) {
2833 return (uint8x8_t)__builtin_neon_vqshl_v((int8x8_t)__a, __b, 16); }
2834 __ai uint16x4_t vqshl_u16(uint16x4_t __a, int16x4_t __b) {
2835 return (uint16x4_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 17); }
2836 __ai uint32x2_t vqshl_u32(uint32x2_t __a, int32x2_t __b) {
2837 return (uint32x2_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 18); }
2838 __ai uint64x1_t vqshl_u64(uint64x1_t __a, int64x1_t __b) {
2839 return (uint64x1_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 19); }
2840 __ai int8x16_t vqshlq_s8(int8x16_t __a, int8x16_t __b) {
2841 return (int8x16_t)__builtin_neon_vqshlq_v(__a, __b, 32); }
2842 __ai int16x8_t vqshlq_s16(int16x8_t __a, int16x8_t __b) {
2843 return (int16x8_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
2844 __ai int32x4_t vqshlq_s32(int32x4_t __a, int32x4_t __b) {
2845 return (int32x4_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
2846 __ai int64x2_t vqshlq_s64(int64x2_t __a, int64x2_t __b) {
2847 return (int64x2_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 35); }
2848 __ai uint8x16_t vqshlq_u8(uint8x16_t __a, int8x16_t __b) {
2849 return (uint8x16_t)__builtin_neon_vqshlq_v((int8x16_t)__a, __b, 48); }
2850 __ai uint16x8_t vqshlq_u16(uint16x8_t __a, int16x8_t __b) {
2851 return (uint16x8_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 49) ; }
2852 __ai uint32x4_t vqshlq_u32(uint32x4_t __a, int32x4_t __b) {
2853 return (uint32x4_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 50) ; }
2854 __ai uint64x2_t vqshlq_u64(uint64x2_t __a, int64x2_t __b) {
2855 return (uint64x2_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 51) ; }
2856
2857 #define vqshlu_n_s8(a, __b) __extension__ ({ \
2858 int8x8_t __a = (a); \
2859 (uint8x8_t)__builtin_neon_vqshlu_n_v(__a, __b, 16); })
2860 #define vqshlu_n_s16(a, __b) __extension__ ({ \
2861 int16x4_t __a = (a); \
2862 (uint16x4_t)__builtin_neon_vqshlu_n_v((int8x8_t)__a, __b, 17); })
2863 #define vqshlu_n_s32(a, __b) __extension__ ({ \
2864 int32x2_t __a = (a); \
2865 (uint32x2_t)__builtin_neon_vqshlu_n_v((int8x8_t)__a, __b, 18); })
2866 #define vqshlu_n_s64(a, __b) __extension__ ({ \
2867 int64x1_t __a = (a); \
2868 (uint64x1_t)__builtin_neon_vqshlu_n_v((int8x8_t)__a, __b, 19); })
2869 #define vqshluq_n_s8(a, __b) __extension__ ({ \
2870 int8x16_t __a = (a); \
2871 (uint8x16_t)__builtin_neon_vqshluq_n_v(__a, __b, 48); })
2872 #define vqshluq_n_s16(a, __b) __extension__ ({ \
2873 int16x8_t __a = (a); \
2874 (uint16x8_t)__builtin_neon_vqshluq_n_v((int8x16_t)__a, __b, 49); })
2875 #define vqshluq_n_s32(a, __b) __extension__ ({ \
2876 int32x4_t __a = (a); \
2877 (uint32x4_t)__builtin_neon_vqshluq_n_v((int8x16_t)__a, __b, 50); })
2878 #define vqshluq_n_s64(a, __b) __extension__ ({ \
2879 int64x2_t __a = (a); \
2880 (uint64x2_t)__builtin_neon_vqshluq_n_v((int8x16_t)__a, __b, 51); })
2881
2882 #define vqshl_n_s8(a, __b) __extension__ ({ \
2883 int8x8_t __a = (a); \
2884 (int8x8_t)__builtin_neon_vqshl_n_v(__a, __b, 0); })
2885 #define vqshl_n_s16(a, __b) __extension__ ({ \
2886 int16x4_t __a = (a); \
2887 (int16x4_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 1); })
2888 #define vqshl_n_s32(a, __b) __extension__ ({ \
2889 int32x2_t __a = (a); \
2890 (int32x2_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 2); })
2891 #define vqshl_n_s64(a, __b) __extension__ ({ \
2892 int64x1_t __a = (a); \
2893 (int64x1_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 3); })
2894 #define vqshl_n_u8(a, __b) __extension__ ({ \
2895 uint8x8_t __a = (a); \
2896 (uint8x8_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 16); })
2897 #define vqshl_n_u16(a, __b) __extension__ ({ \
2898 uint16x4_t __a = (a); \
2899 (uint16x4_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 17); })
2900 #define vqshl_n_u32(a, __b) __extension__ ({ \
2901 uint32x2_t __a = (a); \
2902 (uint32x2_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 18); })
2903 #define vqshl_n_u64(a, __b) __extension__ ({ \
2904 uint64x1_t __a = (a); \
2905 (uint64x1_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 19); })
2906 #define vqshlq_n_s8(a, __b) __extension__ ({ \
2907 int8x16_t __a = (a); \
2908 (int8x16_t)__builtin_neon_vqshlq_n_v(__a, __b, 32); })
2909 #define vqshlq_n_s16(a, __b) __extension__ ({ \
2910 int16x8_t __a = (a); \
2911 (int16x8_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 33); })
2912 #define vqshlq_n_s32(a, __b) __extension__ ({ \
2913 int32x4_t __a = (a); \
2914 (int32x4_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 34); })
2915 #define vqshlq_n_s64(a, __b) __extension__ ({ \
2916 int64x2_t __a = (a); \
2917 (int64x2_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 35); })
2918 #define vqshlq_n_u8(a, __b) __extension__ ({ \
2919 uint8x16_t __a = (a); \
2920 (uint8x16_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 48); })
2921 #define vqshlq_n_u16(a, __b) __extension__ ({ \
2922 uint16x8_t __a = (a); \
2923 (uint16x8_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 49); })
2924 #define vqshlq_n_u32(a, __b) __extension__ ({ \
2925 uint32x4_t __a = (a); \
2926 (uint32x4_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 50); })
2927 #define vqshlq_n_u64(a, __b) __extension__ ({ \
2928 uint64x2_t __a = (a); \
2929 (uint64x2_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 51); })
2930
2931 #define vqshrn_n_s16(a, __b) __extension__ ({ \
2932 int16x8_t __a = (a); \
2933 (int8x8_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 0); })
2934 #define vqshrn_n_s32(a, __b) __extension__ ({ \
2935 int32x4_t __a = (a); \
2936 (int16x4_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 1); })
2937 #define vqshrn_n_s64(a, __b) __extension__ ({ \
2938 int64x2_t __a = (a); \
2939 (int32x2_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 2); })
2940 #define vqshrn_n_u16(a, __b) __extension__ ({ \
2941 uint16x8_t __a = (a); \
2942 (uint8x8_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 16); })
2943 #define vqshrn_n_u32(a, __b) __extension__ ({ \
2944 uint32x4_t __a = (a); \
2945 (uint16x4_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 17); })
2946 #define vqshrn_n_u64(a, __b) __extension__ ({ \
2947 uint64x2_t __a = (a); \
2948 (uint32x2_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 18); })
2949
2950 #define vqshrun_n_s16(a, __b) __extension__ ({ \
2951 int16x8_t __a = (a); \
2952 (uint8x8_t)__builtin_neon_vqshrun_n_v((int8x16_t)__a, __b, 16); })
2953 #define vqshrun_n_s32(a, __b) __extension__ ({ \
2954 int32x4_t __a = (a); \
2955 (uint16x4_t)__builtin_neon_vqshrun_n_v((int8x16_t)__a, __b, 17); })
2956 #define vqshrun_n_s64(a, __b) __extension__ ({ \
2957 int64x2_t __a = (a); \
2958 (uint32x2_t)__builtin_neon_vqshrun_n_v((int8x16_t)__a, __b, 18); })
2959
2960 __ai int8x8_t vqsub_s8(int8x8_t __a, int8x8_t __b) {
2961 return (int8x8_t)__builtin_neon_vqsub_v(__a, __b, 0); }
2962 __ai int16x4_t vqsub_s16(int16x4_t __a, int16x4_t __b) {
2963 return (int16x4_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 1); }
2964 __ai int32x2_t vqsub_s32(int32x2_t __a, int32x2_t __b) {
2965 return (int32x2_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 2); }
2966 __ai int64x1_t vqsub_s64(int64x1_t __a, int64x1_t __b) {
2967 return (int64x1_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 3); }
2968 __ai uint8x8_t vqsub_u8(uint8x8_t __a, uint8x8_t __b) {
2969 return (uint8x8_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 16); }
2970 __ai uint16x4_t vqsub_u16(uint16x4_t __a, uint16x4_t __b) {
2971 return (uint16x4_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 17); }
2972 __ai uint32x2_t vqsub_u32(uint32x2_t __a, uint32x2_t __b) {
2973 return (uint32x2_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 18); }
2974 __ai uint64x1_t vqsub_u64(uint64x1_t __a, uint64x1_t __b) {
2975 return (uint64x1_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 19); }
2976 __ai int8x16_t vqsubq_s8(int8x16_t __a, int8x16_t __b) {
2977 return (int8x16_t)__builtin_neon_vqsubq_v(__a, __b, 32); }
2978 __ai int16x8_t vqsubq_s16(int16x8_t __a, int16x8_t __b) {
2979 return (int16x8_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
2980 __ai int32x4_t vqsubq_s32(int32x4_t __a, int32x4_t __b) {
2981 return (int32x4_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
2982 __ai int64x2_t vqsubq_s64(int64x2_t __a, int64x2_t __b) {
2983 return (int64x2_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 35); }
2984 __ai uint8x16_t vqsubq_u8(uint8x16_t __a, uint8x16_t __b) {
2985 return (uint8x16_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 48) ; }
2986 __ai uint16x8_t vqsubq_u16(uint16x8_t __a, uint16x8_t __b) {
2987 return (uint16x8_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 49) ; }
2988 __ai uint32x4_t vqsubq_u32(uint32x4_t __a, uint32x4_t __b) {
2989 return (uint32x4_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 50) ; }
2990 __ai uint64x2_t vqsubq_u64(uint64x2_t __a, uint64x2_t __b) {
2991 return (uint64x2_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 51) ; }
2992
2993 __ai int8x8_t vraddhn_s16(int16x8_t __a, int16x8_t __b) {
2994 return (int8x8_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 0); }
2995 __ai int16x4_t vraddhn_s32(int32x4_t __a, int32x4_t __b) {
2996 return (int16x4_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 1); }
2997 __ai int32x2_t vraddhn_s64(int64x2_t __a, int64x2_t __b) {
2998 return (int32x2_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 2); }
2999 __ai uint8x8_t vraddhn_u16(uint16x8_t __a, uint16x8_t __b) {
3000 return (uint8x8_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 16) ; }
3001 __ai uint16x4_t vraddhn_u32(uint32x4_t __a, uint32x4_t __b) {
3002 return (uint16x4_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 17 ); }
3003 __ai uint32x2_t vraddhn_u64(uint64x2_t __a, uint64x2_t __b) {
3004 return (uint32x2_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 18 ); }
3005
3006 __ai float32x2_t vrecpe_f32(float32x2_t __a) {
3007 return (float32x2_t)__builtin_neon_vrecpe_v((int8x8_t)__a, 7); }
3008 __ai uint32x2_t vrecpe_u32(uint32x2_t __a) {
3009 return (uint32x2_t)__builtin_neon_vrecpe_v((int8x8_t)__a, 18); }
3010 __ai float32x4_t vrecpeq_f32(float32x4_t __a) {
3011 return (float32x4_t)__builtin_neon_vrecpeq_v((int8x16_t)__a, 39); }
3012 __ai uint32x4_t vrecpeq_u32(uint32x4_t __a) {
3013 return (uint32x4_t)__builtin_neon_vrecpeq_v((int8x16_t)__a, 50); }
3014
3015 __ai float32x2_t vrecps_f32(float32x2_t __a, float32x2_t __b) {
3016 return (float32x2_t)__builtin_neon_vrecps_v((int8x8_t)__a, (int8x8_t)__b, 7); }
3017 __ai float32x4_t vrecpsq_f32(float32x4_t __a, float32x4_t __b) {
3018 return (float32x4_t)__builtin_neon_vrecpsq_v((int8x16_t)__a, (int8x16_t)__b, 3 9); }
3019
3020 __ai int8x8_t vreinterpret_s8_s16(int16x4_t __a) {
3021 return (int8x8_t)__a; }
3022 __ai int8x8_t vreinterpret_s8_s32(int32x2_t __a) {
3023 return (int8x8_t)__a; }
3024 __ai int8x8_t vreinterpret_s8_s64(int64x1_t __a) {
3025 return (int8x8_t)__a; }
3026 __ai int8x8_t vreinterpret_s8_u8(uint8x8_t __a) {
3027 return (int8x8_t)__a; }
3028 __ai int8x8_t vreinterpret_s8_u16(uint16x4_t __a) {
3029 return (int8x8_t)__a; }
3030 __ai int8x8_t vreinterpret_s8_u32(uint32x2_t __a) {
3031 return (int8x8_t)__a; }
3032 __ai int8x8_t vreinterpret_s8_u64(uint64x1_t __a) {
3033 return (int8x8_t)__a; }
3034 __ai int8x8_t vreinterpret_s8_f16(float16x4_t __a) {
3035 return (int8x8_t)__a; }
3036 __ai int8x8_t vreinterpret_s8_f32(float32x2_t __a) {
3037 return (int8x8_t)__a; }
3038 __ai int8x8_t vreinterpret_s8_p8(poly8x8_t __a) {
3039 return (int8x8_t)__a; }
3040 __ai int8x8_t vreinterpret_s8_p16(poly16x4_t __a) {
3041 return (int8x8_t)__a; }
3042 __ai int16x4_t vreinterpret_s16_s8(int8x8_t __a) {
3043 return (int16x4_t)__a; }
3044 __ai int16x4_t vreinterpret_s16_s32(int32x2_t __a) {
3045 return (int16x4_t)__a; }
3046 __ai int16x4_t vreinterpret_s16_s64(int64x1_t __a) {
3047 return (int16x4_t)__a; }
3048 __ai int16x4_t vreinterpret_s16_u8(uint8x8_t __a) {
3049 return (int16x4_t)__a; }
3050 __ai int16x4_t vreinterpret_s16_u16(uint16x4_t __a) {
3051 return (int16x4_t)__a; }
3052 __ai int16x4_t vreinterpret_s16_u32(uint32x2_t __a) {
3053 return (int16x4_t)__a; }
3054 __ai int16x4_t vreinterpret_s16_u64(uint64x1_t __a) {
3055 return (int16x4_t)__a; }
3056 __ai int16x4_t vreinterpret_s16_f16(float16x4_t __a) {
3057 return (int16x4_t)__a; }
3058 __ai int16x4_t vreinterpret_s16_f32(float32x2_t __a) {
3059 return (int16x4_t)__a; }
3060 __ai int16x4_t vreinterpret_s16_p8(poly8x8_t __a) {
3061 return (int16x4_t)__a; }
3062 __ai int16x4_t vreinterpret_s16_p16(poly16x4_t __a) {
3063 return (int16x4_t)__a; }
3064 __ai int32x2_t vreinterpret_s32_s8(int8x8_t __a) {
3065 return (int32x2_t)__a; }
3066 __ai int32x2_t vreinterpret_s32_s16(int16x4_t __a) {
3067 return (int32x2_t)__a; }
3068 __ai int32x2_t vreinterpret_s32_s64(int64x1_t __a) {
3069 return (int32x2_t)__a; }
3070 __ai int32x2_t vreinterpret_s32_u8(uint8x8_t __a) {
3071 return (int32x2_t)__a; }
3072 __ai int32x2_t vreinterpret_s32_u16(uint16x4_t __a) {
3073 return (int32x2_t)__a; }
3074 __ai int32x2_t vreinterpret_s32_u32(uint32x2_t __a) {
3075 return (int32x2_t)__a; }
3076 __ai int32x2_t vreinterpret_s32_u64(uint64x1_t __a) {
3077 return (int32x2_t)__a; }
3078 __ai int32x2_t vreinterpret_s32_f16(float16x4_t __a) {
3079 return (int32x2_t)__a; }
3080 __ai int32x2_t vreinterpret_s32_f32(float32x2_t __a) {
3081 return (int32x2_t)__a; }
3082 __ai int32x2_t vreinterpret_s32_p8(poly8x8_t __a) {
3083 return (int32x2_t)__a; }
3084 __ai int32x2_t vreinterpret_s32_p16(poly16x4_t __a) {
3085 return (int32x2_t)__a; }
3086 __ai int64x1_t vreinterpret_s64_s8(int8x8_t __a) {
3087 return (int64x1_t)__a; }
3088 __ai int64x1_t vreinterpret_s64_s16(int16x4_t __a) {
3089 return (int64x1_t)__a; }
3090 __ai int64x1_t vreinterpret_s64_s32(int32x2_t __a) {
3091 return (int64x1_t)__a; }
3092 __ai int64x1_t vreinterpret_s64_u8(uint8x8_t __a) {
3093 return (int64x1_t)__a; }
3094 __ai int64x1_t vreinterpret_s64_u16(uint16x4_t __a) {
3095 return (int64x1_t)__a; }
3096 __ai int64x1_t vreinterpret_s64_u32(uint32x2_t __a) {
3097 return (int64x1_t)__a; }
3098 __ai int64x1_t vreinterpret_s64_u64(uint64x1_t __a) {
3099 return (int64x1_t)__a; }
3100 __ai int64x1_t vreinterpret_s64_f16(float16x4_t __a) {
3101 return (int64x1_t)__a; }
3102 __ai int64x1_t vreinterpret_s64_f32(float32x2_t __a) {
3103 return (int64x1_t)__a; }
3104 __ai int64x1_t vreinterpret_s64_p8(poly8x8_t __a) {
3105 return (int64x1_t)__a; }
3106 __ai int64x1_t vreinterpret_s64_p16(poly16x4_t __a) {
3107 return (int64x1_t)__a; }
3108 __ai uint8x8_t vreinterpret_u8_s8(int8x8_t __a) {
3109 return (uint8x8_t)__a; }
3110 __ai uint8x8_t vreinterpret_u8_s16(int16x4_t __a) {
3111 return (uint8x8_t)__a; }
3112 __ai uint8x8_t vreinterpret_u8_s32(int32x2_t __a) {
3113 return (uint8x8_t)__a; }
3114 __ai uint8x8_t vreinterpret_u8_s64(int64x1_t __a) {
3115 return (uint8x8_t)__a; }
3116 __ai uint8x8_t vreinterpret_u8_u16(uint16x4_t __a) {
3117 return (uint8x8_t)__a; }
3118 __ai uint8x8_t vreinterpret_u8_u32(uint32x2_t __a) {
3119 return (uint8x8_t)__a; }
3120 __ai uint8x8_t vreinterpret_u8_u64(uint64x1_t __a) {
3121 return (uint8x8_t)__a; }
3122 __ai uint8x8_t vreinterpret_u8_f16(float16x4_t __a) {
3123 return (uint8x8_t)__a; }
3124 __ai uint8x8_t vreinterpret_u8_f32(float32x2_t __a) {
3125 return (uint8x8_t)__a; }
3126 __ai uint8x8_t vreinterpret_u8_p8(poly8x8_t __a) {
3127 return (uint8x8_t)__a; }
3128 __ai uint8x8_t vreinterpret_u8_p16(poly16x4_t __a) {
3129 return (uint8x8_t)__a; }
3130 __ai uint16x4_t vreinterpret_u16_s8(int8x8_t __a) {
3131 return (uint16x4_t)__a; }
3132 __ai uint16x4_t vreinterpret_u16_s16(int16x4_t __a) {
3133 return (uint16x4_t)__a; }
3134 __ai uint16x4_t vreinterpret_u16_s32(int32x2_t __a) {
3135 return (uint16x4_t)__a; }
3136 __ai uint16x4_t vreinterpret_u16_s64(int64x1_t __a) {
3137 return (uint16x4_t)__a; }
3138 __ai uint16x4_t vreinterpret_u16_u8(uint8x8_t __a) {
3139 return (uint16x4_t)__a; }
3140 __ai uint16x4_t vreinterpret_u16_u32(uint32x2_t __a) {
3141 return (uint16x4_t)__a; }
3142 __ai uint16x4_t vreinterpret_u16_u64(uint64x1_t __a) {
3143 return (uint16x4_t)__a; }
3144 __ai uint16x4_t vreinterpret_u16_f16(float16x4_t __a) {
3145 return (uint16x4_t)__a; }
3146 __ai uint16x4_t vreinterpret_u16_f32(float32x2_t __a) {
3147 return (uint16x4_t)__a; }
3148 __ai uint16x4_t vreinterpret_u16_p8(poly8x8_t __a) {
3149 return (uint16x4_t)__a; }
3150 __ai uint16x4_t vreinterpret_u16_p16(poly16x4_t __a) {
3151 return (uint16x4_t)__a; }
3152 __ai uint32x2_t vreinterpret_u32_s8(int8x8_t __a) {
3153 return (uint32x2_t)__a; }
3154 __ai uint32x2_t vreinterpret_u32_s16(int16x4_t __a) {
3155 return (uint32x2_t)__a; }
3156 __ai uint32x2_t vreinterpret_u32_s32(int32x2_t __a) {
3157 return (uint32x2_t)__a; }
3158 __ai uint32x2_t vreinterpret_u32_s64(int64x1_t __a) {
3159 return (uint32x2_t)__a; }
3160 __ai uint32x2_t vreinterpret_u32_u8(uint8x8_t __a) {
3161 return (uint32x2_t)__a; }
3162 __ai uint32x2_t vreinterpret_u32_u16(uint16x4_t __a) {
3163 return (uint32x2_t)__a; }
3164 __ai uint32x2_t vreinterpret_u32_u64(uint64x1_t __a) {
3165 return (uint32x2_t)__a; }
3166 __ai uint32x2_t vreinterpret_u32_f16(float16x4_t __a) {
3167 return (uint32x2_t)__a; }
3168 __ai uint32x2_t vreinterpret_u32_f32(float32x2_t __a) {
3169 return (uint32x2_t)__a; }
3170 __ai uint32x2_t vreinterpret_u32_p8(poly8x8_t __a) {
3171 return (uint32x2_t)__a; }
3172 __ai uint32x2_t vreinterpret_u32_p16(poly16x4_t __a) {
3173 return (uint32x2_t)__a; }
3174 __ai uint64x1_t vreinterpret_u64_s8(int8x8_t __a) {
3175 return (uint64x1_t)__a; }
3176 __ai uint64x1_t vreinterpret_u64_s16(int16x4_t __a) {
3177 return (uint64x1_t)__a; }
3178 __ai uint64x1_t vreinterpret_u64_s32(int32x2_t __a) {
3179 return (uint64x1_t)__a; }
3180 __ai uint64x1_t vreinterpret_u64_s64(int64x1_t __a) {
3181 return (uint64x1_t)__a; }
3182 __ai uint64x1_t vreinterpret_u64_u8(uint8x8_t __a) {
3183 return (uint64x1_t)__a; }
3184 __ai uint64x1_t vreinterpret_u64_u16(uint16x4_t __a) {
3185 return (uint64x1_t)__a; }
3186 __ai uint64x1_t vreinterpret_u64_u32(uint32x2_t __a) {
3187 return (uint64x1_t)__a; }
3188 __ai uint64x1_t vreinterpret_u64_f16(float16x4_t __a) {
3189 return (uint64x1_t)__a; }
3190 __ai uint64x1_t vreinterpret_u64_f32(float32x2_t __a) {
3191 return (uint64x1_t)__a; }
3192 __ai uint64x1_t vreinterpret_u64_p8(poly8x8_t __a) {
3193 return (uint64x1_t)__a; }
3194 __ai uint64x1_t vreinterpret_u64_p16(poly16x4_t __a) {
3195 return (uint64x1_t)__a; }
3196 __ai float16x4_t vreinterpret_f16_s8(int8x8_t __a) {
3197 return (float16x4_t)__a; }
3198 __ai float16x4_t vreinterpret_f16_s16(int16x4_t __a) {
3199 return (float16x4_t)__a; }
3200 __ai float16x4_t vreinterpret_f16_s32(int32x2_t __a) {
3201 return (float16x4_t)__a; }
3202 __ai float16x4_t vreinterpret_f16_s64(int64x1_t __a) {
3203 return (float16x4_t)__a; }
3204 __ai float16x4_t vreinterpret_f16_u8(uint8x8_t __a) {
3205 return (float16x4_t)__a; }
3206 __ai float16x4_t vreinterpret_f16_u16(uint16x4_t __a) {
3207 return (float16x4_t)__a; }
3208 __ai float16x4_t vreinterpret_f16_u32(uint32x2_t __a) {
3209 return (float16x4_t)__a; }
3210 __ai float16x4_t vreinterpret_f16_u64(uint64x1_t __a) {
3211 return (float16x4_t)__a; }
3212 __ai float16x4_t vreinterpret_f16_f32(float32x2_t __a) {
3213 return (float16x4_t)__a; }
3214 __ai float16x4_t vreinterpret_f16_p8(poly8x8_t __a) {
3215 return (float16x4_t)__a; }
3216 __ai float16x4_t vreinterpret_f16_p16(poly16x4_t __a) {
3217 return (float16x4_t)__a; }
3218 __ai float32x2_t vreinterpret_f32_s8(int8x8_t __a) {
3219 return (float32x2_t)__a; }
3220 __ai float32x2_t vreinterpret_f32_s16(int16x4_t __a) {
3221 return (float32x2_t)__a; }
3222 __ai float32x2_t vreinterpret_f32_s32(int32x2_t __a) {
3223 return (float32x2_t)__a; }
3224 __ai float32x2_t vreinterpret_f32_s64(int64x1_t __a) {
3225 return (float32x2_t)__a; }
3226 __ai float32x2_t vreinterpret_f32_u8(uint8x8_t __a) {
3227 return (float32x2_t)__a; }
3228 __ai float32x2_t vreinterpret_f32_u16(uint16x4_t __a) {
3229 return (float32x2_t)__a; }
3230 __ai float32x2_t vreinterpret_f32_u32(uint32x2_t __a) {
3231 return (float32x2_t)__a; }
3232 __ai float32x2_t vreinterpret_f32_u64(uint64x1_t __a) {
3233 return (float32x2_t)__a; }
3234 __ai float32x2_t vreinterpret_f32_f16(float16x4_t __a) {
3235 return (float32x2_t)__a; }
3236 __ai float32x2_t vreinterpret_f32_p8(poly8x8_t __a) {
3237 return (float32x2_t)__a; }
3238 __ai float32x2_t vreinterpret_f32_p16(poly16x4_t __a) {
3239 return (float32x2_t)__a; }
3240 __ai poly8x8_t vreinterpret_p8_s8(int8x8_t __a) {
3241 return (poly8x8_t)__a; }
3242 __ai poly8x8_t vreinterpret_p8_s16(int16x4_t __a) {
3243 return (poly8x8_t)__a; }
3244 __ai poly8x8_t vreinterpret_p8_s32(int32x2_t __a) {
3245 return (poly8x8_t)__a; }
3246 __ai poly8x8_t vreinterpret_p8_s64(int64x1_t __a) {
3247 return (poly8x8_t)__a; }
3248 __ai poly8x8_t vreinterpret_p8_u8(uint8x8_t __a) {
3249 return (poly8x8_t)__a; }
3250 __ai poly8x8_t vreinterpret_p8_u16(uint16x4_t __a) {
3251 return (poly8x8_t)__a; }
3252 __ai poly8x8_t vreinterpret_p8_u32(uint32x2_t __a) {
3253 return (poly8x8_t)__a; }
3254 __ai poly8x8_t vreinterpret_p8_u64(uint64x1_t __a) {
3255 return (poly8x8_t)__a; }
3256 __ai poly8x8_t vreinterpret_p8_f16(float16x4_t __a) {
3257 return (poly8x8_t)__a; }
3258 __ai poly8x8_t vreinterpret_p8_f32(float32x2_t __a) {
3259 return (poly8x8_t)__a; }
3260 __ai poly8x8_t vreinterpret_p8_p16(poly16x4_t __a) {
3261 return (poly8x8_t)__a; }
3262 __ai poly16x4_t vreinterpret_p16_s8(int8x8_t __a) {
3263 return (poly16x4_t)__a; }
3264 __ai poly16x4_t vreinterpret_p16_s16(int16x4_t __a) {
3265 return (poly16x4_t)__a; }
3266 __ai poly16x4_t vreinterpret_p16_s32(int32x2_t __a) {
3267 return (poly16x4_t)__a; }
3268 __ai poly16x4_t vreinterpret_p16_s64(int64x1_t __a) {
3269 return (poly16x4_t)__a; }
3270 __ai poly16x4_t vreinterpret_p16_u8(uint8x8_t __a) {
3271 return (poly16x4_t)__a; }
3272 __ai poly16x4_t vreinterpret_p16_u16(uint16x4_t __a) {
3273 return (poly16x4_t)__a; }
3274 __ai poly16x4_t vreinterpret_p16_u32(uint32x2_t __a) {
3275 return (poly16x4_t)__a; }
3276 __ai poly16x4_t vreinterpret_p16_u64(uint64x1_t __a) {
3277 return (poly16x4_t)__a; }
3278 __ai poly16x4_t vreinterpret_p16_f16(float16x4_t __a) {
3279 return (poly16x4_t)__a; }
3280 __ai poly16x4_t vreinterpret_p16_f32(float32x2_t __a) {
3281 return (poly16x4_t)__a; }
3282 __ai poly16x4_t vreinterpret_p16_p8(poly8x8_t __a) {
3283 return (poly16x4_t)__a; }
3284 __ai int8x16_t vreinterpretq_s8_s16(int16x8_t __a) {
3285 return (int8x16_t)__a; }
3286 __ai int8x16_t vreinterpretq_s8_s32(int32x4_t __a) {
3287 return (int8x16_t)__a; }
3288 __ai int8x16_t vreinterpretq_s8_s64(int64x2_t __a) {
3289 return (int8x16_t)__a; }
3290 __ai int8x16_t vreinterpretq_s8_u8(uint8x16_t __a) {
3291 return (int8x16_t)__a; }
3292 __ai int8x16_t vreinterpretq_s8_u16(uint16x8_t __a) {
3293 return (int8x16_t)__a; }
3294 __ai int8x16_t vreinterpretq_s8_u32(uint32x4_t __a) {
3295 return (int8x16_t)__a; }
3296 __ai int8x16_t vreinterpretq_s8_u64(uint64x2_t __a) {
3297 return (int8x16_t)__a; }
3298 __ai int8x16_t vreinterpretq_s8_f16(float16x8_t __a) {
3299 return (int8x16_t)__a; }
3300 __ai int8x16_t vreinterpretq_s8_f32(float32x4_t __a) {
3301 return (int8x16_t)__a; }
3302 __ai int8x16_t vreinterpretq_s8_p8(poly8x16_t __a) {
3303 return (int8x16_t)__a; }
3304 __ai int8x16_t vreinterpretq_s8_p16(poly16x8_t __a) {
3305 return (int8x16_t)__a; }
3306 __ai int16x8_t vreinterpretq_s16_s8(int8x16_t __a) {
3307 return (int16x8_t)__a; }
3308 __ai int16x8_t vreinterpretq_s16_s32(int32x4_t __a) {
3309 return (int16x8_t)__a; }
3310 __ai int16x8_t vreinterpretq_s16_s64(int64x2_t __a) {
3311 return (int16x8_t)__a; }
3312 __ai int16x8_t vreinterpretq_s16_u8(uint8x16_t __a) {
3313 return (int16x8_t)__a; }
3314 __ai int16x8_t vreinterpretq_s16_u16(uint16x8_t __a) {
3315 return (int16x8_t)__a; }
3316 __ai int16x8_t vreinterpretq_s16_u32(uint32x4_t __a) {
3317 return (int16x8_t)__a; }
3318 __ai int16x8_t vreinterpretq_s16_u64(uint64x2_t __a) {
3319 return (int16x8_t)__a; }
3320 __ai int16x8_t vreinterpretq_s16_f16(float16x8_t __a) {
3321 return (int16x8_t)__a; }
3322 __ai int16x8_t vreinterpretq_s16_f32(float32x4_t __a) {
3323 return (int16x8_t)__a; }
3324 __ai int16x8_t vreinterpretq_s16_p8(poly8x16_t __a) {
3325 return (int16x8_t)__a; }
3326 __ai int16x8_t vreinterpretq_s16_p16(poly16x8_t __a) {
3327 return (int16x8_t)__a; }
3328 __ai int32x4_t vreinterpretq_s32_s8(int8x16_t __a) {
3329 return (int32x4_t)__a; }
3330 __ai int32x4_t vreinterpretq_s32_s16(int16x8_t __a) {
3331 return (int32x4_t)__a; }
3332 __ai int32x4_t vreinterpretq_s32_s64(int64x2_t __a) {
3333 return (int32x4_t)__a; }
3334 __ai int32x4_t vreinterpretq_s32_u8(uint8x16_t __a) {
3335 return (int32x4_t)__a; }
3336 __ai int32x4_t vreinterpretq_s32_u16(uint16x8_t __a) {
3337 return (int32x4_t)__a; }
3338 __ai int32x4_t vreinterpretq_s32_u32(uint32x4_t __a) {
3339 return (int32x4_t)__a; }
3340 __ai int32x4_t vreinterpretq_s32_u64(uint64x2_t __a) {
3341 return (int32x4_t)__a; }
3342 __ai int32x4_t vreinterpretq_s32_f16(float16x8_t __a) {
3343 return (int32x4_t)__a; }
3344 __ai int32x4_t vreinterpretq_s32_f32(float32x4_t __a) {
3345 return (int32x4_t)__a; }
3346 __ai int32x4_t vreinterpretq_s32_p8(poly8x16_t __a) {
3347 return (int32x4_t)__a; }
3348 __ai int32x4_t vreinterpretq_s32_p16(poly16x8_t __a) {
3349 return (int32x4_t)__a; }
3350 __ai int64x2_t vreinterpretq_s64_s8(int8x16_t __a) {
3351 return (int64x2_t)__a; }
3352 __ai int64x2_t vreinterpretq_s64_s16(int16x8_t __a) {
3353 return (int64x2_t)__a; }
3354 __ai int64x2_t vreinterpretq_s64_s32(int32x4_t __a) {
3355 return (int64x2_t)__a; }
3356 __ai int64x2_t vreinterpretq_s64_u8(uint8x16_t __a) {
3357 return (int64x2_t)__a; }
3358 __ai int64x2_t vreinterpretq_s64_u16(uint16x8_t __a) {
3359 return (int64x2_t)__a; }
3360 __ai int64x2_t vreinterpretq_s64_u32(uint32x4_t __a) {
3361 return (int64x2_t)__a; }
3362 __ai int64x2_t vreinterpretq_s64_u64(uint64x2_t __a) {
3363 return (int64x2_t)__a; }
3364 __ai int64x2_t vreinterpretq_s64_f16(float16x8_t __a) {
3365 return (int64x2_t)__a; }
3366 __ai int64x2_t vreinterpretq_s64_f32(float32x4_t __a) {
3367 return (int64x2_t)__a; }
3368 __ai int64x2_t vreinterpretq_s64_p8(poly8x16_t __a) {
3369 return (int64x2_t)__a; }
3370 __ai int64x2_t vreinterpretq_s64_p16(poly16x8_t __a) {
3371 return (int64x2_t)__a; }
3372 __ai uint8x16_t vreinterpretq_u8_s8(int8x16_t __a) {
3373 return (uint8x16_t)__a; }
3374 __ai uint8x16_t vreinterpretq_u8_s16(int16x8_t __a) {
3375 return (uint8x16_t)__a; }
3376 __ai uint8x16_t vreinterpretq_u8_s32(int32x4_t __a) {
3377 return (uint8x16_t)__a; }
3378 __ai uint8x16_t vreinterpretq_u8_s64(int64x2_t __a) {
3379 return (uint8x16_t)__a; }
3380 __ai uint8x16_t vreinterpretq_u8_u16(uint16x8_t __a) {
3381 return (uint8x16_t)__a; }
3382 __ai uint8x16_t vreinterpretq_u8_u32(uint32x4_t __a) {
3383 return (uint8x16_t)__a; }
3384 __ai uint8x16_t vreinterpretq_u8_u64(uint64x2_t __a) {
3385 return (uint8x16_t)__a; }
3386 __ai uint8x16_t vreinterpretq_u8_f16(float16x8_t __a) {
3387 return (uint8x16_t)__a; }
3388 __ai uint8x16_t vreinterpretq_u8_f32(float32x4_t __a) {
3389 return (uint8x16_t)__a; }
3390 __ai uint8x16_t vreinterpretq_u8_p8(poly8x16_t __a) {
3391 return (uint8x16_t)__a; }
3392 __ai uint8x16_t vreinterpretq_u8_p16(poly16x8_t __a) {
3393 return (uint8x16_t)__a; }
3394 __ai uint16x8_t vreinterpretq_u16_s8(int8x16_t __a) {
3395 return (uint16x8_t)__a; }
3396 __ai uint16x8_t vreinterpretq_u16_s16(int16x8_t __a) {
3397 return (uint16x8_t)__a; }
3398 __ai uint16x8_t vreinterpretq_u16_s32(int32x4_t __a) {
3399 return (uint16x8_t)__a; }
3400 __ai uint16x8_t vreinterpretq_u16_s64(int64x2_t __a) {
3401 return (uint16x8_t)__a; }
3402 __ai uint16x8_t vreinterpretq_u16_u8(uint8x16_t __a) {
3403 return (uint16x8_t)__a; }
3404 __ai uint16x8_t vreinterpretq_u16_u32(uint32x4_t __a) {
3405 return (uint16x8_t)__a; }
3406 __ai uint16x8_t vreinterpretq_u16_u64(uint64x2_t __a) {
3407 return (uint16x8_t)__a; }
3408 __ai uint16x8_t vreinterpretq_u16_f16(float16x8_t __a) {
3409 return (uint16x8_t)__a; }
3410 __ai uint16x8_t vreinterpretq_u16_f32(float32x4_t __a) {
3411 return (uint16x8_t)__a; }
3412 __ai uint16x8_t vreinterpretq_u16_p8(poly8x16_t __a) {
3413 return (uint16x8_t)__a; }
3414 __ai uint16x8_t vreinterpretq_u16_p16(poly16x8_t __a) {
3415 return (uint16x8_t)__a; }
3416 __ai uint32x4_t vreinterpretq_u32_s8(int8x16_t __a) {
3417 return (uint32x4_t)__a; }
3418 __ai uint32x4_t vreinterpretq_u32_s16(int16x8_t __a) {
3419 return (uint32x4_t)__a; }
3420 __ai uint32x4_t vreinterpretq_u32_s32(int32x4_t __a) {
3421 return (uint32x4_t)__a; }
3422 __ai uint32x4_t vreinterpretq_u32_s64(int64x2_t __a) {
3423 return (uint32x4_t)__a; }
3424 __ai uint32x4_t vreinterpretq_u32_u8(uint8x16_t __a) {
3425 return (uint32x4_t)__a; }
3426 __ai uint32x4_t vreinterpretq_u32_u16(uint16x8_t __a) {
3427 return (uint32x4_t)__a; }
3428 __ai uint32x4_t vreinterpretq_u32_u64(uint64x2_t __a) {
3429 return (uint32x4_t)__a; }
3430 __ai uint32x4_t vreinterpretq_u32_f16(float16x8_t __a) {
3431 return (uint32x4_t)__a; }
3432 __ai uint32x4_t vreinterpretq_u32_f32(float32x4_t __a) {
3433 return (uint32x4_t)__a; }
3434 __ai uint32x4_t vreinterpretq_u32_p8(poly8x16_t __a) {
3435 return (uint32x4_t)__a; }
3436 __ai uint32x4_t vreinterpretq_u32_p16(poly16x8_t __a) {
3437 return (uint32x4_t)__a; }
3438 __ai uint64x2_t vreinterpretq_u64_s8(int8x16_t __a) {
3439 return (uint64x2_t)__a; }
3440 __ai uint64x2_t vreinterpretq_u64_s16(int16x8_t __a) {
3441 return (uint64x2_t)__a; }
3442 __ai uint64x2_t vreinterpretq_u64_s32(int32x4_t __a) {
3443 return (uint64x2_t)__a; }
3444 __ai uint64x2_t vreinterpretq_u64_s64(int64x2_t __a) {
3445 return (uint64x2_t)__a; }
3446 __ai uint64x2_t vreinterpretq_u64_u8(uint8x16_t __a) {
3447 return (uint64x2_t)__a; }
3448 __ai uint64x2_t vreinterpretq_u64_u16(uint16x8_t __a) {
3449 return (uint64x2_t)__a; }
3450 __ai uint64x2_t vreinterpretq_u64_u32(uint32x4_t __a) {
3451 return (uint64x2_t)__a; }
3452 __ai uint64x2_t vreinterpretq_u64_f16(float16x8_t __a) {
3453 return (uint64x2_t)__a; }
3454 __ai uint64x2_t vreinterpretq_u64_f32(float32x4_t __a) {
3455 return (uint64x2_t)__a; }
3456 __ai uint64x2_t vreinterpretq_u64_p8(poly8x16_t __a) {
3457 return (uint64x2_t)__a; }
3458 __ai uint64x2_t vreinterpretq_u64_p16(poly16x8_t __a) {
3459 return (uint64x2_t)__a; }
3460 __ai float16x8_t vreinterpretq_f16_s8(int8x16_t __a) {
3461 return (float16x8_t)__a; }
3462 __ai float16x8_t vreinterpretq_f16_s16(int16x8_t __a) {
3463 return (float16x8_t)__a; }
3464 __ai float16x8_t vreinterpretq_f16_s32(int32x4_t __a) {
3465 return (float16x8_t)__a; }
3466 __ai float16x8_t vreinterpretq_f16_s64(int64x2_t __a) {
3467 return (float16x8_t)__a; }
3468 __ai float16x8_t vreinterpretq_f16_u8(uint8x16_t __a) {
3469 return (float16x8_t)__a; }
3470 __ai float16x8_t vreinterpretq_f16_u16(uint16x8_t __a) {
3471 return (float16x8_t)__a; }
3472 __ai float16x8_t vreinterpretq_f16_u32(uint32x4_t __a) {
3473 return (float16x8_t)__a; }
3474 __ai float16x8_t vreinterpretq_f16_u64(uint64x2_t __a) {
3475 return (float16x8_t)__a; }
3476 __ai float16x8_t vreinterpretq_f16_f32(float32x4_t __a) {
3477 return (float16x8_t)__a; }
3478 __ai float16x8_t vreinterpretq_f16_p8(poly8x16_t __a) {
3479 return (float16x8_t)__a; }
3480 __ai float16x8_t vreinterpretq_f16_p16(poly16x8_t __a) {
3481 return (float16x8_t)__a; }
3482 __ai float32x4_t vreinterpretq_f32_s8(int8x16_t __a) {
3483 return (float32x4_t)__a; }
3484 __ai float32x4_t vreinterpretq_f32_s16(int16x8_t __a) {
3485 return (float32x4_t)__a; }
3486 __ai float32x4_t vreinterpretq_f32_s32(int32x4_t __a) {
3487 return (float32x4_t)__a; }
3488 __ai float32x4_t vreinterpretq_f32_s64(int64x2_t __a) {
3489 return (float32x4_t)__a; }
3490 __ai float32x4_t vreinterpretq_f32_u8(uint8x16_t __a) {
3491 return (float32x4_t)__a; }
3492 __ai float32x4_t vreinterpretq_f32_u16(uint16x8_t __a) {
3493 return (float32x4_t)__a; }
3494 __ai float32x4_t vreinterpretq_f32_u32(uint32x4_t __a) {
3495 return (float32x4_t)__a; }
3496 __ai float32x4_t vreinterpretq_f32_u64(uint64x2_t __a) {
3497 return (float32x4_t)__a; }
3498 __ai float32x4_t vreinterpretq_f32_f16(float16x8_t __a) {
3499 return (float32x4_t)__a; }
3500 __ai float32x4_t vreinterpretq_f32_p8(poly8x16_t __a) {
3501 return (float32x4_t)__a; }
3502 __ai float32x4_t vreinterpretq_f32_p16(poly16x8_t __a) {
3503 return (float32x4_t)__a; }
3504 __ai poly8x16_t vreinterpretq_p8_s8(int8x16_t __a) {
3505 return (poly8x16_t)__a; }
3506 __ai poly8x16_t vreinterpretq_p8_s16(int16x8_t __a) {
3507 return (poly8x16_t)__a; }
3508 __ai poly8x16_t vreinterpretq_p8_s32(int32x4_t __a) {
3509 return (poly8x16_t)__a; }
3510 __ai poly8x16_t vreinterpretq_p8_s64(int64x2_t __a) {
3511 return (poly8x16_t)__a; }
3512 __ai poly8x16_t vreinterpretq_p8_u8(uint8x16_t __a) {
3513 return (poly8x16_t)__a; }
3514 __ai poly8x16_t vreinterpretq_p8_u16(uint16x8_t __a) {
3515 return (poly8x16_t)__a; }
3516 __ai poly8x16_t vreinterpretq_p8_u32(uint32x4_t __a) {
3517 return (poly8x16_t)__a; }
3518 __ai poly8x16_t vreinterpretq_p8_u64(uint64x2_t __a) {
3519 return (poly8x16_t)__a; }
3520 __ai poly8x16_t vreinterpretq_p8_f16(float16x8_t __a) {
3521 return (poly8x16_t)__a; }
3522 __ai poly8x16_t vreinterpretq_p8_f32(float32x4_t __a) {
3523 return (poly8x16_t)__a; }
3524 __ai poly8x16_t vreinterpretq_p8_p16(poly16x8_t __a) {
3525 return (poly8x16_t)__a; }
3526 __ai poly16x8_t vreinterpretq_p16_s8(int8x16_t __a) {
3527 return (poly16x8_t)__a; }
3528 __ai poly16x8_t vreinterpretq_p16_s16(int16x8_t __a) {
3529 return (poly16x8_t)__a; }
3530 __ai poly16x8_t vreinterpretq_p16_s32(int32x4_t __a) {
3531 return (poly16x8_t)__a; }
3532 __ai poly16x8_t vreinterpretq_p16_s64(int64x2_t __a) {
3533 return (poly16x8_t)__a; }
3534 __ai poly16x8_t vreinterpretq_p16_u8(uint8x16_t __a) {
3535 return (poly16x8_t)__a; }
3536 __ai poly16x8_t vreinterpretq_p16_u16(uint16x8_t __a) {
3537 return (poly16x8_t)__a; }
3538 __ai poly16x8_t vreinterpretq_p16_u32(uint32x4_t __a) {
3539 return (poly16x8_t)__a; }
3540 __ai poly16x8_t vreinterpretq_p16_u64(uint64x2_t __a) {
3541 return (poly16x8_t)__a; }
3542 __ai poly16x8_t vreinterpretq_p16_f16(float16x8_t __a) {
3543 return (poly16x8_t)__a; }
3544 __ai poly16x8_t vreinterpretq_p16_f32(float32x4_t __a) {
3545 return (poly16x8_t)__a; }
3546 __ai poly16x8_t vreinterpretq_p16_p8(poly8x16_t __a) {
3547 return (poly16x8_t)__a; }
3548
3549 __ai int8x8_t vrev16_s8(int8x8_t __a) {
3550 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); }
3551 __ai uint8x8_t vrev16_u8(uint8x8_t __a) {
3552 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); }
3553 __ai poly8x8_t vrev16_p8(poly8x8_t __a) {
3554 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); }
3555 __ai int8x16_t vrev16q_s8(int8x16_t __a) {
3556 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); }
3557 __ai uint8x16_t vrev16q_u8(uint8x16_t __a) {
3558 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); }
3559 __ai poly8x16_t vrev16q_p8(poly8x16_t __a) {
3560 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); }
3561
3562 __ai int8x8_t vrev32_s8(int8x8_t __a) {
3563 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); }
3564 __ai int16x4_t vrev32_s16(int16x4_t __a) {
3565 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); }
3566 __ai uint8x8_t vrev32_u8(uint8x8_t __a) {
3567 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); }
3568 __ai uint16x4_t vrev32_u16(uint16x4_t __a) {
3569 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); }
3570 __ai poly8x8_t vrev32_p8(poly8x8_t __a) {
3571 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); }
3572 __ai poly16x4_t vrev32_p16(poly16x4_t __a) {
3573 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); }
3574 __ai int8x16_t vrev32q_s8(int8x16_t __a) {
3575 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); }
3576 __ai int16x8_t vrev32q_s16(int16x8_t __a) {
3577 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); }
3578 __ai uint8x16_t vrev32q_u8(uint8x16_t __a) {
3579 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); }
3580 __ai uint16x8_t vrev32q_u16(uint16x8_t __a) {
3581 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); }
3582 __ai poly8x16_t vrev32q_p8(poly8x16_t __a) {
3583 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); }
3584 __ai poly16x8_t vrev32q_p16(poly16x8_t __a) {
3585 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2, 5, 4, 7, 6); }
3586
3587 __ai int8x8_t vrev64_s8(int8x8_t __a) {
3588 return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0); }
3589 __ai int16x4_t vrev64_s16(int16x4_t __a) {
3590 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); }
3591 __ai int32x2_t vrev64_s32(int32x2_t __a) {
3592 return __builtin_shufflevector(__a, __a, 1, 0); }
3593 __ai uint8x8_t vrev64_u8(uint8x8_t __a) {
3594 return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0); }
3595 __ai uint16x4_t vrev64_u16(uint16x4_t __a) {
3596 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); }
3597 __ai uint32x2_t vrev64_u32(uint32x2_t __a) {
3598 return __builtin_shufflevector(__a, __a, 1, 0); }
3599 __ai poly8x8_t vrev64_p8(poly8x8_t __a) {
3600 return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0); }
3601 __ai poly16x4_t vrev64_p16(poly16x4_t __a) {
3602 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); }
3603 __ai float32x2_t vrev64_f32(float32x2_t __a) {
3604 return __builtin_shufflevector(__a, __a, 1, 0); }
3605 __ai int8x16_t vrev64q_s8(int8x16_t __a) {
3606 return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 1 2, 11, 10, 9, 8); }
3607 __ai int16x8_t vrev64q_s16(int16x8_t __a) {
3608 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); }
3609 __ai int32x4_t vrev64q_s32(int32x4_t __a) {
3610 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); }
3611 __ai uint8x16_t vrev64q_u8(uint8x16_t __a) {
3612 return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 1 2, 11, 10, 9, 8); }
3613 __ai uint16x8_t vrev64q_u16(uint16x8_t __a) {
3614 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); }
3615 __ai uint32x4_t vrev64q_u32(uint32x4_t __a) {
3616 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); }
3617 __ai poly8x16_t vrev64q_p8(poly8x16_t __a) {
3618 return __builtin_shufflevector(__a, __a, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 1 2, 11, 10, 9, 8); }
3619 __ai poly16x8_t vrev64q_p16(poly16x8_t __a) {
3620 return __builtin_shufflevector(__a, __a, 3, 2, 1, 0, 7, 6, 5, 4); }
3621 __ai float32x4_t vrev64q_f32(float32x4_t __a) {
3622 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); }
3623
3624 __ai int8x8_t vrhadd_s8(int8x8_t __a, int8x8_t __b) {
3625 return (int8x8_t)__builtin_neon_vrhadd_v(__a, __b, 0); }
3626 __ai int16x4_t vrhadd_s16(int16x4_t __a, int16x4_t __b) {
3627 return (int16x4_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 1); }
3628 __ai int32x2_t vrhadd_s32(int32x2_t __a, int32x2_t __b) {
3629 return (int32x2_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 2); }
3630 __ai uint8x8_t vrhadd_u8(uint8x8_t __a, uint8x8_t __b) {
3631 return (uint8x8_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 16); }
3632 __ai uint16x4_t vrhadd_u16(uint16x4_t __a, uint16x4_t __b) {
3633 return (uint16x4_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 17); }
3634 __ai uint32x2_t vrhadd_u32(uint32x2_t __a, uint32x2_t __b) {
3635 return (uint32x2_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 18); }
3636 __ai int8x16_t vrhaddq_s8(int8x16_t __a, int8x16_t __b) {
3637 return (int8x16_t)__builtin_neon_vrhaddq_v(__a, __b, 32); }
3638 __ai int16x8_t vrhaddq_s16(int16x8_t __a, int16x8_t __b) {
3639 return (int16x8_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 33) ; }
3640 __ai int32x4_t vrhaddq_s32(int32x4_t __a, int32x4_t __b) {
3641 return (int32x4_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 34) ; }
3642 __ai uint8x16_t vrhaddq_u8(uint8x16_t __a, uint8x16_t __b) {
3643 return (uint8x16_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 48 ); }
3644 __ai uint16x8_t vrhaddq_u16(uint16x8_t __a, uint16x8_t __b) {
3645 return (uint16x8_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 49 ); }
3646 __ai uint32x4_t vrhaddq_u32(uint32x4_t __a, uint32x4_t __b) {
3647 return (uint32x4_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 50 ); }
3648
3649 __ai int8x8_t vrshl_s8(int8x8_t __a, int8x8_t __b) {
3650 return (int8x8_t)__builtin_neon_vrshl_v(__a, __b, 0); }
3651 __ai int16x4_t vrshl_s16(int16x4_t __a, int16x4_t __b) {
3652 return (int16x4_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 1); }
3653 __ai int32x2_t vrshl_s32(int32x2_t __a, int32x2_t __b) {
3654 return (int32x2_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 2); }
3655 __ai int64x1_t vrshl_s64(int64x1_t __a, int64x1_t __b) {
3656 return (int64x1_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 3); }
3657 __ai uint8x8_t vrshl_u8(uint8x8_t __a, int8x8_t __b) {
3658 return (uint8x8_t)__builtin_neon_vrshl_v((int8x8_t)__a, __b, 16); }
3659 __ai uint16x4_t vrshl_u16(uint16x4_t __a, int16x4_t __b) {
3660 return (uint16x4_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 17); }
3661 __ai uint32x2_t vrshl_u32(uint32x2_t __a, int32x2_t __b) {
3662 return (uint32x2_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 18); }
3663 __ai uint64x1_t vrshl_u64(uint64x1_t __a, int64x1_t __b) {
3664 return (uint64x1_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 19); }
3665 __ai int8x16_t vrshlq_s8(int8x16_t __a, int8x16_t __b) {
3666 return (int8x16_t)__builtin_neon_vrshlq_v(__a, __b, 32); }
3667 __ai int16x8_t vrshlq_s16(int16x8_t __a, int16x8_t __b) {
3668 return (int16x8_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
3669 __ai int32x4_t vrshlq_s32(int32x4_t __a, int32x4_t __b) {
3670 return (int32x4_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
3671 __ai int64x2_t vrshlq_s64(int64x2_t __a, int64x2_t __b) {
3672 return (int64x2_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 35); }
3673 __ai uint8x16_t vrshlq_u8(uint8x16_t __a, int8x16_t __b) {
3674 return (uint8x16_t)__builtin_neon_vrshlq_v((int8x16_t)__a, __b, 48); }
3675 __ai uint16x8_t vrshlq_u16(uint16x8_t __a, int16x8_t __b) {
3676 return (uint16x8_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 49) ; }
3677 __ai uint32x4_t vrshlq_u32(uint32x4_t __a, int32x4_t __b) {
3678 return (uint32x4_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 50) ; }
3679 __ai uint64x2_t vrshlq_u64(uint64x2_t __a, int64x2_t __b) {
3680 return (uint64x2_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 51) ; }
3681
3682 #define vrshrn_n_s16(a, __b) __extension__ ({ \
3683 int16x8_t __a = (a); \
3684 (int8x8_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 0); })
3685 #define vrshrn_n_s32(a, __b) __extension__ ({ \
3686 int32x4_t __a = (a); \
3687 (int16x4_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 1); })
3688 #define vrshrn_n_s64(a, __b) __extension__ ({ \
3689 int64x2_t __a = (a); \
3690 (int32x2_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 2); })
3691 #define vrshrn_n_u16(a, __b) __extension__ ({ \
3692 uint16x8_t __a = (a); \
3693 (uint8x8_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 16); })
3694 #define vrshrn_n_u32(a, __b) __extension__ ({ \
3695 uint32x4_t __a = (a); \
3696 (uint16x4_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 17); })
3697 #define vrshrn_n_u64(a, __b) __extension__ ({ \
3698 uint64x2_t __a = (a); \
3699 (uint32x2_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 18); })
3700
3701 #define vrshr_n_s8(a, __b) __extension__ ({ \
3702 int8x8_t __a = (a); \
3703 (int8x8_t)__builtin_neon_vrshr_n_v(__a, __b, 0); })
3704 #define vrshr_n_s16(a, __b) __extension__ ({ \
3705 int16x4_t __a = (a); \
3706 (int16x4_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 1); })
3707 #define vrshr_n_s32(a, __b) __extension__ ({ \
3708 int32x2_t __a = (a); \
3709 (int32x2_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 2); })
3710 #define vrshr_n_s64(a, __b) __extension__ ({ \
3711 int64x1_t __a = (a); \
3712 (int64x1_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 3); })
3713 #define vrshr_n_u8(a, __b) __extension__ ({ \
3714 uint8x8_t __a = (a); \
3715 (uint8x8_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 16); })
3716 #define vrshr_n_u16(a, __b) __extension__ ({ \
3717 uint16x4_t __a = (a); \
3718 (uint16x4_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 17); })
3719 #define vrshr_n_u32(a, __b) __extension__ ({ \
3720 uint32x2_t __a = (a); \
3721 (uint32x2_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 18); })
3722 #define vrshr_n_u64(a, __b) __extension__ ({ \
3723 uint64x1_t __a = (a); \
3724 (uint64x1_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 19); })
3725 #define vrshrq_n_s8(a, __b) __extension__ ({ \
3726 int8x16_t __a = (a); \
3727 (int8x16_t)__builtin_neon_vrshrq_n_v(__a, __b, 32); })
3728 #define vrshrq_n_s16(a, __b) __extension__ ({ \
3729 int16x8_t __a = (a); \
3730 (int16x8_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 33); })
3731 #define vrshrq_n_s32(a, __b) __extension__ ({ \
3732 int32x4_t __a = (a); \
3733 (int32x4_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 34); })
3734 #define vrshrq_n_s64(a, __b) __extension__ ({ \
3735 int64x2_t __a = (a); \
3736 (int64x2_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 35); })
3737 #define vrshrq_n_u8(a, __b) __extension__ ({ \
3738 uint8x16_t __a = (a); \
3739 (uint8x16_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 48); })
3740 #define vrshrq_n_u16(a, __b) __extension__ ({ \
3741 uint16x8_t __a = (a); \
3742 (uint16x8_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 49); })
3743 #define vrshrq_n_u32(a, __b) __extension__ ({ \
3744 uint32x4_t __a = (a); \
3745 (uint32x4_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 50); })
3746 #define vrshrq_n_u64(a, __b) __extension__ ({ \
3747 uint64x2_t __a = (a); \
3748 (uint64x2_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 51); })
3749
3750 __ai float32x2_t vrsqrte_f32(float32x2_t __a) {
3751 return (float32x2_t)__builtin_neon_vrsqrte_v((int8x8_t)__a, 7); }
3752 __ai uint32x2_t vrsqrte_u32(uint32x2_t __a) {
3753 return (uint32x2_t)__builtin_neon_vrsqrte_v((int8x8_t)__a, 18); }
3754 __ai float32x4_t vrsqrteq_f32(float32x4_t __a) {
3755 return (float32x4_t)__builtin_neon_vrsqrteq_v((int8x16_t)__a, 39); }
3756 __ai uint32x4_t vrsqrteq_u32(uint32x4_t __a) {
3757 return (uint32x4_t)__builtin_neon_vrsqrteq_v((int8x16_t)__a, 50); }
3758
3759 __ai float32x2_t vrsqrts_f32(float32x2_t __a, float32x2_t __b) {
3760 return (float32x2_t)__builtin_neon_vrsqrts_v((int8x8_t)__a, (int8x8_t)__b, 7); }
3761 __ai float32x4_t vrsqrtsq_f32(float32x4_t __a, float32x4_t __b) {
3762 return (float32x4_t)__builtin_neon_vrsqrtsq_v((int8x16_t)__a, (int8x16_t)__b, 39); }
3763
3764 #define vrsra_n_s8(a, b, __c) __extension__ ({ \
3765 int8x8_t __a = (a); int8x8_t __b = (b); \
3766 (int8x8_t)__builtin_neon_vrsra_n_v(__a, __b, __c, 0); })
3767 #define vrsra_n_s16(a, b, __c) __extension__ ({ \
3768 int16x4_t __a = (a); int16x4_t __b = (b); \
3769 (int16x4_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); })
3770 #define vrsra_n_s32(a, b, __c) __extension__ ({ \
3771 int32x2_t __a = (a); int32x2_t __b = (b); \
3772 (int32x2_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); })
3773 #define vrsra_n_s64(a, b, __c) __extension__ ({ \
3774 int64x1_t __a = (a); int64x1_t __b = (b); \
3775 (int64x1_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); })
3776 #define vrsra_n_u8(a, b, __c) __extension__ ({ \
3777 uint8x8_t __a = (a); uint8x8_t __b = (b); \
3778 (uint8x8_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 16); })
3779 #define vrsra_n_u16(a, b, __c) __extension__ ({ \
3780 uint16x4_t __a = (a); uint16x4_t __b = (b); \
3781 (uint16x4_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 17); } )
3782 #define vrsra_n_u32(a, b, __c) __extension__ ({ \
3783 uint32x2_t __a = (a); uint32x2_t __b = (b); \
3784 (uint32x2_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 18); } )
3785 #define vrsra_n_u64(a, b, __c) __extension__ ({ \
3786 uint64x1_t __a = (a); uint64x1_t __b = (b); \
3787 (uint64x1_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 19); } )
3788 #define vrsraq_n_s8(a, b, __c) __extension__ ({ \
3789 int8x16_t __a = (a); int8x16_t __b = (b); \
3790 (int8x16_t)__builtin_neon_vrsraq_n_v(__a, __b, __c, 32); })
3791 #define vrsraq_n_s16(a, b, __c) __extension__ ({ \
3792 int16x8_t __a = (a); int16x8_t __b = (b); \
3793 (int16x8_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 33); })
3794 #define vrsraq_n_s32(a, b, __c) __extension__ ({ \
3795 int32x4_t __a = (a); int32x4_t __b = (b); \
3796 (int32x4_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 34); })
3797 #define vrsraq_n_s64(a, b, __c) __extension__ ({ \
3798 int64x2_t __a = (a); int64x2_t __b = (b); \
3799 (int64x2_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 35); })
3800 #define vrsraq_n_u8(a, b, __c) __extension__ ({ \
3801 uint8x16_t __a = (a); uint8x16_t __b = (b); \
3802 (uint8x16_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 48) ; })
3803 #define vrsraq_n_u16(a, b, __c) __extension__ ({ \
3804 uint16x8_t __a = (a); uint16x8_t __b = (b); \
3805 (uint16x8_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 49) ; })
3806 #define vrsraq_n_u32(a, b, __c) __extension__ ({ \
3807 uint32x4_t __a = (a); uint32x4_t __b = (b); \
3808 (uint32x4_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 50) ; })
3809 #define vrsraq_n_u64(a, b, __c) __extension__ ({ \
3810 uint64x2_t __a = (a); uint64x2_t __b = (b); \
3811 (uint64x2_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 51) ; })
3812
3813 __ai int8x8_t vrsubhn_s16(int16x8_t __a, int16x8_t __b) {
3814 return (int8x8_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 0); }
3815 __ai int16x4_t vrsubhn_s32(int32x4_t __a, int32x4_t __b) {
3816 return (int16x4_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 1); }
3817 __ai int32x2_t vrsubhn_s64(int64x2_t __a, int64x2_t __b) {
3818 return (int32x2_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 2); }
3819 __ai uint8x8_t vrsubhn_u16(uint16x8_t __a, uint16x8_t __b) {
3820 return (uint8x8_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 16) ; }
3821 __ai uint16x4_t vrsubhn_u32(uint32x4_t __a, uint32x4_t __b) {
3822 return (uint16x4_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 17 ); }
3823 __ai uint32x2_t vrsubhn_u64(uint64x2_t __a, uint64x2_t __b) {
3824 return (uint32x2_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 18 ); }
3825
3826 #define vset_lane_u8(a, b, __c) __extension__ ({ \
3827 uint8_t __a = (a); uint8x8_t __b = (b); \
3828 (uint8x8_t)__builtin_neon_vset_lane_i8(__a, (int8x8_t)__b, __c); })
3829 #define vset_lane_u16(a, b, __c) __extension__ ({ \
3830 uint16_t __a = (a); uint16x4_t __b = (b); \
3831 (uint16x4_t)__builtin_neon_vset_lane_i16(__a, (int16x4_t)__b, __c); })
3832 #define vset_lane_u32(a, b, __c) __extension__ ({ \
3833 uint32_t __a = (a); uint32x2_t __b = (b); \
3834 (uint32x2_t)__builtin_neon_vset_lane_i32(__a, (int32x2_t)__b, __c); })
3835 #define vset_lane_s8(a, b, __c) __extension__ ({ \
3836 int8_t __a = (a); int8x8_t __b = (b); \
3837 (int8x8_t)__builtin_neon_vset_lane_i8(__a, __b, __c); })
3838 #define vset_lane_s16(a, b, __c) __extension__ ({ \
3839 int16_t __a = (a); int16x4_t __b = (b); \
3840 (int16x4_t)__builtin_neon_vset_lane_i16(__a, __b, __c); })
3841 #define vset_lane_s32(a, b, __c) __extension__ ({ \
3842 int32_t __a = (a); int32x2_t __b = (b); \
3843 (int32x2_t)__builtin_neon_vset_lane_i32(__a, __b, __c); })
3844 #define vset_lane_p8(a, b, __c) __extension__ ({ \
3845 poly8_t __a = (a); poly8x8_t __b = (b); \
3846 (poly8x8_t)__builtin_neon_vset_lane_i8(__a, (int8x8_t)__b, __c); })
3847 #define vset_lane_p16(a, b, __c) __extension__ ({ \
3848 poly16_t __a = (a); poly16x4_t __b = (b); \
3849 (poly16x4_t)__builtin_neon_vset_lane_i16(__a, (int16x4_t)__b, __c); })
3850 #define vset_lane_f32(a, b, __c) __extension__ ({ \
3851 float32_t __a = (a); float32x2_t __b = (b); \
3852 (float32x2_t)__builtin_neon_vset_lane_f32(__a, __b, __c); })
3853 #define vsetq_lane_u8(a, b, __c) __extension__ ({ \
3854 uint8_t __a = (a); uint8x16_t __b = (b); \
3855 (uint8x16_t)__builtin_neon_vsetq_lane_i8(__a, (int8x16_t)__b, __c); })
3856 #define vsetq_lane_u16(a, b, __c) __extension__ ({ \
3857 uint16_t __a = (a); uint16x8_t __b = (b); \
3858 (uint16x8_t)__builtin_neon_vsetq_lane_i16(__a, (int16x8_t)__b, __c); })
3859 #define vsetq_lane_u32(a, b, __c) __extension__ ({ \
3860 uint32_t __a = (a); uint32x4_t __b = (b); \
3861 (uint32x4_t)__builtin_neon_vsetq_lane_i32(__a, (int32x4_t)__b, __c); })
3862 #define vsetq_lane_s8(a, b, __c) __extension__ ({ \
3863 int8_t __a = (a); int8x16_t __b = (b); \
3864 (int8x16_t)__builtin_neon_vsetq_lane_i8(__a, __b, __c); })
3865 #define vsetq_lane_s16(a, b, __c) __extension__ ({ \
3866 int16_t __a = (a); int16x8_t __b = (b); \
3867 (int16x8_t)__builtin_neon_vsetq_lane_i16(__a, __b, __c); })
3868 #define vsetq_lane_s32(a, b, __c) __extension__ ({ \
3869 int32_t __a = (a); int32x4_t __b = (b); \
3870 (int32x4_t)__builtin_neon_vsetq_lane_i32(__a, __b, __c); })
3871 #define vsetq_lane_p8(a, b, __c) __extension__ ({ \
3872 poly8_t __a = (a); poly8x16_t __b = (b); \
3873 (poly8x16_t)__builtin_neon_vsetq_lane_i8(__a, (int8x16_t)__b, __c); })
3874 #define vsetq_lane_p16(a, b, __c) __extension__ ({ \
3875 poly16_t __a = (a); poly16x8_t __b = (b); \
3876 (poly16x8_t)__builtin_neon_vsetq_lane_i16(__a, (int16x8_t)__b, __c); })
3877 #define vsetq_lane_f32(a, b, __c) __extension__ ({ \
3878 float32_t __a = (a); float32x4_t __b = (b); \
3879 (float32x4_t)__builtin_neon_vsetq_lane_f32(__a, __b, __c); })
3880 #define vset_lane_s64(a, b, __c) __extension__ ({ \
3881 int64_t __a = (a); int64x1_t __b = (b); \
3882 (int64x1_t)__builtin_neon_vset_lane_i64(__a, __b, __c); })
3883 #define vset_lane_u64(a, b, __c) __extension__ ({ \
3884 uint64_t __a = (a); uint64x1_t __b = (b); \
3885 (uint64x1_t)__builtin_neon_vset_lane_i64(__a, (int64x1_t)__b, __c); })
3886 #define vsetq_lane_s64(a, b, __c) __extension__ ({ \
3887 int64_t __a = (a); int64x2_t __b = (b); \
3888 (int64x2_t)__builtin_neon_vsetq_lane_i64(__a, __b, __c); })
3889 #define vsetq_lane_u64(a, b, __c) __extension__ ({ \
3890 uint64_t __a = (a); uint64x2_t __b = (b); \
3891 (uint64x2_t)__builtin_neon_vsetq_lane_i64(__a, (int64x2_t)__b, __c); })
3892
3893 __ai int8x8_t vshl_s8(int8x8_t __a, int8x8_t __b) {
3894 return (int8x8_t)__builtin_neon_vshl_v(__a, __b, 0); }
3895 __ai int16x4_t vshl_s16(int16x4_t __a, int16x4_t __b) {
3896 return (int16x4_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 1); }
3897 __ai int32x2_t vshl_s32(int32x2_t __a, int32x2_t __b) {
3898 return (int32x2_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 2); }
3899 __ai int64x1_t vshl_s64(int64x1_t __a, int64x1_t __b) {
3900 return (int64x1_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 3); }
3901 __ai uint8x8_t vshl_u8(uint8x8_t __a, int8x8_t __b) {
3902 return (uint8x8_t)__builtin_neon_vshl_v((int8x8_t)__a, __b, 16); }
3903 __ai uint16x4_t vshl_u16(uint16x4_t __a, int16x4_t __b) {
3904 return (uint16x4_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 17); }
3905 __ai uint32x2_t vshl_u32(uint32x2_t __a, int32x2_t __b) {
3906 return (uint32x2_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 18); }
3907 __ai uint64x1_t vshl_u64(uint64x1_t __a, int64x1_t __b) {
3908 return (uint64x1_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 19); }
3909 __ai int8x16_t vshlq_s8(int8x16_t __a, int8x16_t __b) {
3910 return (int8x16_t)__builtin_neon_vshlq_v(__a, __b, 32); }
3911 __ai int16x8_t vshlq_s16(int16x8_t __a, int16x8_t __b) {
3912 return (int16x8_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
3913 __ai int32x4_t vshlq_s32(int32x4_t __a, int32x4_t __b) {
3914 return (int32x4_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
3915 __ai int64x2_t vshlq_s64(int64x2_t __a, int64x2_t __b) {
3916 return (int64x2_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 35); }
3917 __ai uint8x16_t vshlq_u8(uint8x16_t __a, int8x16_t __b) {
3918 return (uint8x16_t)__builtin_neon_vshlq_v((int8x16_t)__a, __b, 48); }
3919 __ai uint16x8_t vshlq_u16(uint16x8_t __a, int16x8_t __b) {
3920 return (uint16x8_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 49); }
3921 __ai uint32x4_t vshlq_u32(uint32x4_t __a, int32x4_t __b) {
3922 return (uint32x4_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
3923 __ai uint64x2_t vshlq_u64(uint64x2_t __a, int64x2_t __b) {
3924 return (uint64x2_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 51); }
3925
3926 #define vshll_n_s8(a, __b) __extension__ ({ \
3927 int8x8_t __a = (a); \
3928 (int16x8_t)__builtin_neon_vshll_n_v(__a, __b, 33); })
3929 #define vshll_n_s16(a, __b) __extension__ ({ \
3930 int16x4_t __a = (a); \
3931 (int32x4_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 34); })
3932 #define vshll_n_s32(a, __b) __extension__ ({ \
3933 int32x2_t __a = (a); \
3934 (int64x2_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 35); })
3935 #define vshll_n_u8(a, __b) __extension__ ({ \
3936 uint8x8_t __a = (a); \
3937 (uint16x8_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 49); })
3938 #define vshll_n_u16(a, __b) __extension__ ({ \
3939 uint16x4_t __a = (a); \
3940 (uint32x4_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 50); })
3941 #define vshll_n_u32(a, __b) __extension__ ({ \
3942 uint32x2_t __a = (a); \
3943 (uint64x2_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 51); })
3944
3945 #define vshl_n_s8(a, __b) __extension__ ({ \
3946 int8x8_t __a = (a); \
3947 (int8x8_t)__builtin_neon_vshl_n_v(__a, __b, 0); })
3948 #define vshl_n_s16(a, __b) __extension__ ({ \
3949 int16x4_t __a = (a); \
3950 (int16x4_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 1); })
3951 #define vshl_n_s32(a, __b) __extension__ ({ \
3952 int32x2_t __a = (a); \
3953 (int32x2_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 2); })
3954 #define vshl_n_s64(a, __b) __extension__ ({ \
3955 int64x1_t __a = (a); \
3956 (int64x1_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 3); })
3957 #define vshl_n_u8(a, __b) __extension__ ({ \
3958 uint8x8_t __a = (a); \
3959 (uint8x8_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 16); })
3960 #define vshl_n_u16(a, __b) __extension__ ({ \
3961 uint16x4_t __a = (a); \
3962 (uint16x4_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 17); })
3963 #define vshl_n_u32(a, __b) __extension__ ({ \
3964 uint32x2_t __a = (a); \
3965 (uint32x2_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 18); })
3966 #define vshl_n_u64(a, __b) __extension__ ({ \
3967 uint64x1_t __a = (a); \
3968 (uint64x1_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 19); })
3969 #define vshlq_n_s8(a, __b) __extension__ ({ \
3970 int8x16_t __a = (a); \
3971 (int8x16_t)__builtin_neon_vshlq_n_v(__a, __b, 32); })
3972 #define vshlq_n_s16(a, __b) __extension__ ({ \
3973 int16x8_t __a = (a); \
3974 (int16x8_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 33); })
3975 #define vshlq_n_s32(a, __b) __extension__ ({ \
3976 int32x4_t __a = (a); \
3977 (int32x4_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 34); })
3978 #define vshlq_n_s64(a, __b) __extension__ ({ \
3979 int64x2_t __a = (a); \
3980 (int64x2_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 35); })
3981 #define vshlq_n_u8(a, __b) __extension__ ({ \
3982 uint8x16_t __a = (a); \
3983 (uint8x16_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 48); })
3984 #define vshlq_n_u16(a, __b) __extension__ ({ \
3985 uint16x8_t __a = (a); \
3986 (uint16x8_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 49); })
3987 #define vshlq_n_u32(a, __b) __extension__ ({ \
3988 uint32x4_t __a = (a); \
3989 (uint32x4_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 50); })
3990 #define vshlq_n_u64(a, __b) __extension__ ({ \
3991 uint64x2_t __a = (a); \
3992 (uint64x2_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 51); })
3993
3994 #define vshrn_n_s16(a, __b) __extension__ ({ \
3995 int16x8_t __a = (a); \
3996 (int8x8_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 0); })
3997 #define vshrn_n_s32(a, __b) __extension__ ({ \
3998 int32x4_t __a = (a); \
3999 (int16x4_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 1); })
4000 #define vshrn_n_s64(a, __b) __extension__ ({ \
4001 int64x2_t __a = (a); \
4002 (int32x2_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 2); })
4003 #define vshrn_n_u16(a, __b) __extension__ ({ \
4004 uint16x8_t __a = (a); \
4005 (uint8x8_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 16); })
4006 #define vshrn_n_u32(a, __b) __extension__ ({ \
4007 uint32x4_t __a = (a); \
4008 (uint16x4_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 17); })
4009 #define vshrn_n_u64(a, __b) __extension__ ({ \
4010 uint64x2_t __a = (a); \
4011 (uint32x2_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 18); })
4012
4013 #define vshr_n_s8(a, __b) __extension__ ({ \
4014 int8x8_t __a = (a); \
4015 (int8x8_t)__builtin_neon_vshr_n_v(__a, __b, 0); })
4016 #define vshr_n_s16(a, __b) __extension__ ({ \
4017 int16x4_t __a = (a); \
4018 (int16x4_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 1); })
4019 #define vshr_n_s32(a, __b) __extension__ ({ \
4020 int32x2_t __a = (a); \
4021 (int32x2_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 2); })
4022 #define vshr_n_s64(a, __b) __extension__ ({ \
4023 int64x1_t __a = (a); \
4024 (int64x1_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 3); })
4025 #define vshr_n_u8(a, __b) __extension__ ({ \
4026 uint8x8_t __a = (a); \
4027 (uint8x8_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 16); })
4028 #define vshr_n_u16(a, __b) __extension__ ({ \
4029 uint16x4_t __a = (a); \
4030 (uint16x4_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 17); })
4031 #define vshr_n_u32(a, __b) __extension__ ({ \
4032 uint32x2_t __a = (a); \
4033 (uint32x2_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 18); })
4034 #define vshr_n_u64(a, __b) __extension__ ({ \
4035 uint64x1_t __a = (a); \
4036 (uint64x1_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 19); })
4037 #define vshrq_n_s8(a, __b) __extension__ ({ \
4038 int8x16_t __a = (a); \
4039 (int8x16_t)__builtin_neon_vshrq_n_v(__a, __b, 32); })
4040 #define vshrq_n_s16(a, __b) __extension__ ({ \
4041 int16x8_t __a = (a); \
4042 (int16x8_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 33); })
4043 #define vshrq_n_s32(a, __b) __extension__ ({ \
4044 int32x4_t __a = (a); \
4045 (int32x4_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 34); })
4046 #define vshrq_n_s64(a, __b) __extension__ ({ \
4047 int64x2_t __a = (a); \
4048 (int64x2_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 35); })
4049 #define vshrq_n_u8(a, __b) __extension__ ({ \
4050 uint8x16_t __a = (a); \
4051 (uint8x16_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 48); })
4052 #define vshrq_n_u16(a, __b) __extension__ ({ \
4053 uint16x8_t __a = (a); \
4054 (uint16x8_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 49); })
4055 #define vshrq_n_u32(a, __b) __extension__ ({ \
4056 uint32x4_t __a = (a); \
4057 (uint32x4_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 50); })
4058 #define vshrq_n_u64(a, __b) __extension__ ({ \
4059 uint64x2_t __a = (a); \
4060 (uint64x2_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 51); })
4061
4062 #define vsli_n_s8(a, b, __c) __extension__ ({ \
4063 int8x8_t __a = (a); int8x8_t __b = (b); \
4064 (int8x8_t)__builtin_neon_vsli_n_v(__a, __b, __c, 0); })
4065 #define vsli_n_s16(a, b, __c) __extension__ ({ \
4066 int16x4_t __a = (a); int16x4_t __b = (b); \
4067 (int16x4_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); })
4068 #define vsli_n_s32(a, b, __c) __extension__ ({ \
4069 int32x2_t __a = (a); int32x2_t __b = (b); \
4070 (int32x2_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); })
4071 #define vsli_n_s64(a, b, __c) __extension__ ({ \
4072 int64x1_t __a = (a); int64x1_t __b = (b); \
4073 (int64x1_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); })
4074 #define vsli_n_u8(a, b, __c) __extension__ ({ \
4075 uint8x8_t __a = (a); uint8x8_t __b = (b); \
4076 (uint8x8_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 16); })
4077 #define vsli_n_u16(a, b, __c) __extension__ ({ \
4078 uint16x4_t __a = (a); uint16x4_t __b = (b); \
4079 (uint16x4_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 17); })
4080 #define vsli_n_u32(a, b, __c) __extension__ ({ \
4081 uint32x2_t __a = (a); uint32x2_t __b = (b); \
4082 (uint32x2_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 18); })
4083 #define vsli_n_u64(a, b, __c) __extension__ ({ \
4084 uint64x1_t __a = (a); uint64x1_t __b = (b); \
4085 (uint64x1_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 19); })
4086 #define vsli_n_p8(a, b, __c) __extension__ ({ \
4087 poly8x8_t __a = (a); poly8x8_t __b = (b); \
4088 (poly8x8_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 4); })
4089 #define vsli_n_p16(a, b, __c) __extension__ ({ \
4090 poly16x4_t __a = (a); poly16x4_t __b = (b); \
4091 (poly16x4_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 5); })
4092 #define vsliq_n_s8(a, b, __c) __extension__ ({ \
4093 int8x16_t __a = (a); int8x16_t __b = (b); \
4094 (int8x16_t)__builtin_neon_vsliq_n_v(__a, __b, __c, 32); })
4095 #define vsliq_n_s16(a, b, __c) __extension__ ({ \
4096 int16x8_t __a = (a); int16x8_t __b = (b); \
4097 (int16x8_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 33); })
4098 #define vsliq_n_s32(a, b, __c) __extension__ ({ \
4099 int32x4_t __a = (a); int32x4_t __b = (b); \
4100 (int32x4_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 34); })
4101 #define vsliq_n_s64(a, b, __c) __extension__ ({ \
4102 int64x2_t __a = (a); int64x2_t __b = (b); \
4103 (int64x2_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 35); })
4104 #define vsliq_n_u8(a, b, __c) __extension__ ({ \
4105 uint8x16_t __a = (a); uint8x16_t __b = (b); \
4106 (uint8x16_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 48); })
4107 #define vsliq_n_u16(a, b, __c) __extension__ ({ \
4108 uint16x8_t __a = (a); uint16x8_t __b = (b); \
4109 (uint16x8_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 49); })
4110 #define vsliq_n_u32(a, b, __c) __extension__ ({ \
4111 uint32x4_t __a = (a); uint32x4_t __b = (b); \
4112 (uint32x4_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 50); })
4113 #define vsliq_n_u64(a, b, __c) __extension__ ({ \
4114 uint64x2_t __a = (a); uint64x2_t __b = (b); \
4115 (uint64x2_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 51); })
4116 #define vsliq_n_p8(a, b, __c) __extension__ ({ \
4117 poly8x16_t __a = (a); poly8x16_t __b = (b); \
4118 (poly8x16_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 36); })
4119 #define vsliq_n_p16(a, b, __c) __extension__ ({ \
4120 poly16x8_t __a = (a); poly16x8_t __b = (b); \
4121 (poly16x8_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 37); })
4122
4123 #define vsra_n_s8(a, b, __c) __extension__ ({ \
4124 int8x8_t __a = (a); int8x8_t __b = (b); \
4125 (int8x8_t)__builtin_neon_vsra_n_v(__a, __b, __c, 0); })
4126 #define vsra_n_s16(a, b, __c) __extension__ ({ \
4127 int16x4_t __a = (a); int16x4_t __b = (b); \
4128 (int16x4_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); })
4129 #define vsra_n_s32(a, b, __c) __extension__ ({ \
4130 int32x2_t __a = (a); int32x2_t __b = (b); \
4131 (int32x2_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); })
4132 #define vsra_n_s64(a, b, __c) __extension__ ({ \
4133 int64x1_t __a = (a); int64x1_t __b = (b); \
4134 (int64x1_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); })
4135 #define vsra_n_u8(a, b, __c) __extension__ ({ \
4136 uint8x8_t __a = (a); uint8x8_t __b = (b); \
4137 (uint8x8_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 16); })
4138 #define vsra_n_u16(a, b, __c) __extension__ ({ \
4139 uint16x4_t __a = (a); uint16x4_t __b = (b); \
4140 (uint16x4_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 17); })
4141 #define vsra_n_u32(a, b, __c) __extension__ ({ \
4142 uint32x2_t __a = (a); uint32x2_t __b = (b); \
4143 (uint32x2_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 18); })
4144 #define vsra_n_u64(a, b, __c) __extension__ ({ \
4145 uint64x1_t __a = (a); uint64x1_t __b = (b); \
4146 (uint64x1_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 19); })
4147 #define vsraq_n_s8(a, b, __c) __extension__ ({ \
4148 int8x16_t __a = (a); int8x16_t __b = (b); \
4149 (int8x16_t)__builtin_neon_vsraq_n_v(__a, __b, __c, 32); })
4150 #define vsraq_n_s16(a, b, __c) __extension__ ({ \
4151 int16x8_t __a = (a); int16x8_t __b = (b); \
4152 (int16x8_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 33); })
4153 #define vsraq_n_s32(a, b, __c) __extension__ ({ \
4154 int32x4_t __a = (a); int32x4_t __b = (b); \
4155 (int32x4_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 34); })
4156 #define vsraq_n_s64(a, b, __c) __extension__ ({ \
4157 int64x2_t __a = (a); int64x2_t __b = (b); \
4158 (int64x2_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 35); })
4159 #define vsraq_n_u8(a, b, __c) __extension__ ({ \
4160 uint8x16_t __a = (a); uint8x16_t __b = (b); \
4161 (uint8x16_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 48); })
4162 #define vsraq_n_u16(a, b, __c) __extension__ ({ \
4163 uint16x8_t __a = (a); uint16x8_t __b = (b); \
4164 (uint16x8_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 49); })
4165 #define vsraq_n_u32(a, b, __c) __extension__ ({ \
4166 uint32x4_t __a = (a); uint32x4_t __b = (b); \
4167 (uint32x4_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 50); })
4168 #define vsraq_n_u64(a, b, __c) __extension__ ({ \
4169 uint64x2_t __a = (a); uint64x2_t __b = (b); \
4170 (uint64x2_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 51); })
4171
4172 #define vsri_n_s8(a, b, __c) __extension__ ({ \
4173 int8x8_t __a = (a); int8x8_t __b = (b); \
4174 (int8x8_t)__builtin_neon_vsri_n_v(__a, __b, __c, 0); })
4175 #define vsri_n_s16(a, b, __c) __extension__ ({ \
4176 int16x4_t __a = (a); int16x4_t __b = (b); \
4177 (int16x4_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); })
4178 #define vsri_n_s32(a, b, __c) __extension__ ({ \
4179 int32x2_t __a = (a); int32x2_t __b = (b); \
4180 (int32x2_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); })
4181 #define vsri_n_s64(a, b, __c) __extension__ ({ \
4182 int64x1_t __a = (a); int64x1_t __b = (b); \
4183 (int64x1_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); })
4184 #define vsri_n_u8(a, b, __c) __extension__ ({ \
4185 uint8x8_t __a = (a); uint8x8_t __b = (b); \
4186 (uint8x8_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 16); })
4187 #define vsri_n_u16(a, b, __c) __extension__ ({ \
4188 uint16x4_t __a = (a); uint16x4_t __b = (b); \
4189 (uint16x4_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 17); })
4190 #define vsri_n_u32(a, b, __c) __extension__ ({ \
4191 uint32x2_t __a = (a); uint32x2_t __b = (b); \
4192 (uint32x2_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 18); })
4193 #define vsri_n_u64(a, b, __c) __extension__ ({ \
4194 uint64x1_t __a = (a); uint64x1_t __b = (b); \
4195 (uint64x1_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 19); })
4196 #define vsri_n_p8(a, b, __c) __extension__ ({ \
4197 poly8x8_t __a = (a); poly8x8_t __b = (b); \
4198 (poly8x8_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 4); })
4199 #define vsri_n_p16(a, b, __c) __extension__ ({ \
4200 poly16x4_t __a = (a); poly16x4_t __b = (b); \
4201 (poly16x4_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 5); })
4202 #define vsriq_n_s8(a, b, __c) __extension__ ({ \
4203 int8x16_t __a = (a); int8x16_t __b = (b); \
4204 (int8x16_t)__builtin_neon_vsriq_n_v(__a, __b, __c, 32); })
4205 #define vsriq_n_s16(a, b, __c) __extension__ ({ \
4206 int16x8_t __a = (a); int16x8_t __b = (b); \
4207 (int16x8_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 33); })
4208 #define vsriq_n_s32(a, b, __c) __extension__ ({ \
4209 int32x4_t __a = (a); int32x4_t __b = (b); \
4210 (int32x4_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 34); })
4211 #define vsriq_n_s64(a, b, __c) __extension__ ({ \
4212 int64x2_t __a = (a); int64x2_t __b = (b); \
4213 (int64x2_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 35); })
4214 #define vsriq_n_u8(a, b, __c) __extension__ ({ \
4215 uint8x16_t __a = (a); uint8x16_t __b = (b); \
4216 (uint8x16_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 48); })
4217 #define vsriq_n_u16(a, b, __c) __extension__ ({ \
4218 uint16x8_t __a = (a); uint16x8_t __b = (b); \
4219 (uint16x8_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 49); })
4220 #define vsriq_n_u32(a, b, __c) __extension__ ({ \
4221 uint32x4_t __a = (a); uint32x4_t __b = (b); \
4222 (uint32x4_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 50); })
4223 #define vsriq_n_u64(a, b, __c) __extension__ ({ \
4224 uint64x2_t __a = (a); uint64x2_t __b = (b); \
4225 (uint64x2_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 51); })
4226 #define vsriq_n_p8(a, b, __c) __extension__ ({ \
4227 poly8x16_t __a = (a); poly8x16_t __b = (b); \
4228 (poly8x16_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 36); })
4229 #define vsriq_n_p16(a, b, __c) __extension__ ({ \
4230 poly16x8_t __a = (a); poly16x8_t __b = (b); \
4231 (poly16x8_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 37); })
4232
4233 #define vst1q_u8(__a, b) __extension__ ({ \
4234 uint8x16_t __b = (b); \
4235 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 48); })
4236 #define vst1q_u16(__a, b) __extension__ ({ \
4237 uint16x8_t __b = (b); \
4238 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 49); })
4239 #define vst1q_u32(__a, b) __extension__ ({ \
4240 uint32x4_t __b = (b); \
4241 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 50); })
4242 #define vst1q_u64(__a, b) __extension__ ({ \
4243 uint64x2_t __b = (b); \
4244 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 51); })
4245 #define vst1q_s8(__a, b) __extension__ ({ \
4246 int8x16_t __b = (b); \
4247 __builtin_neon_vst1q_v(__a, __b, 32); })
4248 #define vst1q_s16(__a, b) __extension__ ({ \
4249 int16x8_t __b = (b); \
4250 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 33); })
4251 #define vst1q_s32(__a, b) __extension__ ({ \
4252 int32x4_t __b = (b); \
4253 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 34); })
4254 #define vst1q_s64(__a, b) __extension__ ({ \
4255 int64x2_t __b = (b); \
4256 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 35); })
4257 #define vst1q_f16(__a, b) __extension__ ({ \
4258 float16x8_t __b = (b); \
4259 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 38); })
4260 #define vst1q_f32(__a, b) __extension__ ({ \
4261 float32x4_t __b = (b); \
4262 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 39); })
4263 #define vst1q_p8(__a, b) __extension__ ({ \
4264 poly8x16_t __b = (b); \
4265 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 36); })
4266 #define vst1q_p16(__a, b) __extension__ ({ \
4267 poly16x8_t __b = (b); \
4268 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 37); })
4269 #define vst1_u8(__a, b) __extension__ ({ \
4270 uint8x8_t __b = (b); \
4271 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 16); })
4272 #define vst1_u16(__a, b) __extension__ ({ \
4273 uint16x4_t __b = (b); \
4274 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 17); })
4275 #define vst1_u32(__a, b) __extension__ ({ \
4276 uint32x2_t __b = (b); \
4277 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 18); })
4278 #define vst1_u64(__a, b) __extension__ ({ \
4279 uint64x1_t __b = (b); \
4280 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 19); })
4281 #define vst1_s8(__a, b) __extension__ ({ \
4282 int8x8_t __b = (b); \
4283 __builtin_neon_vst1_v(__a, __b, 0); })
4284 #define vst1_s16(__a, b) __extension__ ({ \
4285 int16x4_t __b = (b); \
4286 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 1); })
4287 #define vst1_s32(__a, b) __extension__ ({ \
4288 int32x2_t __b = (b); \
4289 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 2); })
4290 #define vst1_s64(__a, b) __extension__ ({ \
4291 int64x1_t __b = (b); \
4292 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 3); })
4293 #define vst1_f16(__a, b) __extension__ ({ \
4294 float16x4_t __b = (b); \
4295 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 6); })
4296 #define vst1_f32(__a, b) __extension__ ({ \
4297 float32x2_t __b = (b); \
4298 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 7); })
4299 #define vst1_p8(__a, b) __extension__ ({ \
4300 poly8x8_t __b = (b); \
4301 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 4); })
4302 #define vst1_p16(__a, b) __extension__ ({ \
4303 poly16x4_t __b = (b); \
4304 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 5); })
4305
4306 #define vst1q_lane_u8(__a, b, __c) __extension__ ({ \
4307 uint8x16_t __b = (b); \
4308 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 48); })
4309 #define vst1q_lane_u16(__a, b, __c) __extension__ ({ \
4310 uint16x8_t __b = (b); \
4311 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 49); })
4312 #define vst1q_lane_u32(__a, b, __c) __extension__ ({ \
4313 uint32x4_t __b = (b); \
4314 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 50); })
4315 #define vst1q_lane_u64(__a, b, __c) __extension__ ({ \
4316 uint64x2_t __b = (b); \
4317 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 51); })
4318 #define vst1q_lane_s8(__a, b, __c) __extension__ ({ \
4319 int8x16_t __b = (b); \
4320 __builtin_neon_vst1q_lane_v(__a, __b, __c, 32); })
4321 #define vst1q_lane_s16(__a, b, __c) __extension__ ({ \
4322 int16x8_t __b = (b); \
4323 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 33); })
4324 #define vst1q_lane_s32(__a, b, __c) __extension__ ({ \
4325 int32x4_t __b = (b); \
4326 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 34); })
4327 #define vst1q_lane_s64(__a, b, __c) __extension__ ({ \
4328 int64x2_t __b = (b); \
4329 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 35); })
4330 #define vst1q_lane_f16(__a, b, __c) __extension__ ({ \
4331 float16x8_t __b = (b); \
4332 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 38); })
4333 #define vst1q_lane_f32(__a, b, __c) __extension__ ({ \
4334 float32x4_t __b = (b); \
4335 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 39); })
4336 #define vst1q_lane_p8(__a, b, __c) __extension__ ({ \
4337 poly8x16_t __b = (b); \
4338 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 36); })
4339 #define vst1q_lane_p16(__a, b, __c) __extension__ ({ \
4340 poly16x8_t __b = (b); \
4341 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 37); })
4342 #define vst1_lane_u8(__a, b, __c) __extension__ ({ \
4343 uint8x8_t __b = (b); \
4344 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 16); })
4345 #define vst1_lane_u16(__a, b, __c) __extension__ ({ \
4346 uint16x4_t __b = (b); \
4347 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 17); })
4348 #define vst1_lane_u32(__a, b, __c) __extension__ ({ \
4349 uint32x2_t __b = (b); \
4350 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 18); })
4351 #define vst1_lane_u64(__a, b, __c) __extension__ ({ \
4352 uint64x1_t __b = (b); \
4353 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 19); })
4354 #define vst1_lane_s8(__a, b, __c) __extension__ ({ \
4355 int8x8_t __b = (b); \
4356 __builtin_neon_vst1_lane_v(__a, __b, __c, 0); })
4357 #define vst1_lane_s16(__a, b, __c) __extension__ ({ \
4358 int16x4_t __b = (b); \
4359 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 1); })
4360 #define vst1_lane_s32(__a, b, __c) __extension__ ({ \
4361 int32x2_t __b = (b); \
4362 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 2); })
4363 #define vst1_lane_s64(__a, b, __c) __extension__ ({ \
4364 int64x1_t __b = (b); \
4365 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 3); })
4366 #define vst1_lane_f16(__a, b, __c) __extension__ ({ \
4367 float16x4_t __b = (b); \
4368 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 6); })
4369 #define vst1_lane_f32(__a, b, __c) __extension__ ({ \
4370 float32x2_t __b = (b); \
4371 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 7); })
4372 #define vst1_lane_p8(__a, b, __c) __extension__ ({ \
4373 poly8x8_t __b = (b); \
4374 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 4); })
4375 #define vst1_lane_p16(__a, b, __c) __extension__ ({ \
4376 poly16x4_t __b = (b); \
4377 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 5); })
4378
4379 #define vst2q_u8(__a, b) __extension__ ({ \
4380 uint8x16x2_t __b = (b); \
4381 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 48); })
4382 #define vst2q_u16(__a, b) __extension__ ({ \
4383 uint16x8x2_t __b = (b); \
4384 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 49); })
4385 #define vst2q_u32(__a, b) __extension__ ({ \
4386 uint32x4x2_t __b = (b); \
4387 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 50); })
4388 #define vst2q_s8(__a, b) __extension__ ({ \
4389 int8x16x2_t __b = (b); \
4390 __builtin_neon_vst2q_v(__a, __b.val[0], __b.val[1], 32); })
4391 #define vst2q_s16(__a, b) __extension__ ({ \
4392 int16x8x2_t __b = (b); \
4393 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 33); })
4394 #define vst2q_s32(__a, b) __extension__ ({ \
4395 int32x4x2_t __b = (b); \
4396 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 34); })
4397 #define vst2q_f16(__a, b) __extension__ ({ \
4398 float16x8x2_t __b = (b); \
4399 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 38); })
4400 #define vst2q_f32(__a, b) __extension__ ({ \
4401 float32x4x2_t __b = (b); \
4402 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 39); })
4403 #define vst2q_p8(__a, b) __extension__ ({ \
4404 poly8x16x2_t __b = (b); \
4405 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 36); })
4406 #define vst2q_p16(__a, b) __extension__ ({ \
4407 poly16x8x2_t __b = (b); \
4408 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 37); })
4409 #define vst2_u8(__a, b) __extension__ ({ \
4410 uint8x8x2_t __b = (b); \
4411 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 16); })
4412 #define vst2_u16(__a, b) __extension__ ({ \
4413 uint16x4x2_t __b = (b); \
4414 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 17); })
4415 #define vst2_u32(__a, b) __extension__ ({ \
4416 uint32x2x2_t __b = (b); \
4417 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 18); })
4418 #define vst2_u64(__a, b) __extension__ ({ \
4419 uint64x1x2_t __b = (b); \
4420 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 19); })
4421 #define vst2_s8(__a, b) __extension__ ({ \
4422 int8x8x2_t __b = (b); \
4423 __builtin_neon_vst2_v(__a, __b.val[0], __b.val[1], 0); })
4424 #define vst2_s16(__a, b) __extension__ ({ \
4425 int16x4x2_t __b = (b); \
4426 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 1); })
4427 #define vst2_s32(__a, b) __extension__ ({ \
4428 int32x2x2_t __b = (b); \
4429 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 2); })
4430 #define vst2_s64(__a, b) __extension__ ({ \
4431 int64x1x2_t __b = (b); \
4432 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 3); })
4433 #define vst2_f16(__a, b) __extension__ ({ \
4434 float16x4x2_t __b = (b); \
4435 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 6); })
4436 #define vst2_f32(__a, b) __extension__ ({ \
4437 float32x2x2_t __b = (b); \
4438 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 7); })
4439 #define vst2_p8(__a, b) __extension__ ({ \
4440 poly8x8x2_t __b = (b); \
4441 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 4); })
4442 #define vst2_p16(__a, b) __extension__ ({ \
4443 poly16x4x2_t __b = (b); \
4444 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 5); })
4445
4446 #define vst2q_lane_u16(__a, b, __c) __extension__ ({ \
4447 uint16x8x2_t __b = (b); \
4448 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 49); })
4449 #define vst2q_lane_u32(__a, b, __c) __extension__ ({ \
4450 uint32x4x2_t __b = (b); \
4451 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 50); })
4452 #define vst2q_lane_s16(__a, b, __c) __extension__ ({ \
4453 int16x8x2_t __b = (b); \
4454 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 33); })
4455 #define vst2q_lane_s32(__a, b, __c) __extension__ ({ \
4456 int32x4x2_t __b = (b); \
4457 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 34); })
4458 #define vst2q_lane_f16(__a, b, __c) __extension__ ({ \
4459 float16x8x2_t __b = (b); \
4460 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 38); })
4461 #define vst2q_lane_f32(__a, b, __c) __extension__ ({ \
4462 float32x4x2_t __b = (b); \
4463 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 39); })
4464 #define vst2q_lane_p16(__a, b, __c) __extension__ ({ \
4465 poly16x8x2_t __b = (b); \
4466 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 37); })
4467 #define vst2_lane_u8(__a, b, __c) __extension__ ({ \
4468 uint8x8x2_t __b = (b); \
4469 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __ c, 16); })
4470 #define vst2_lane_u16(__a, b, __c) __extension__ ({ \
4471 uint16x4x2_t __b = (b); \
4472 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __ c, 17); })
4473 #define vst2_lane_u32(__a, b, __c) __extension__ ({ \
4474 uint32x2x2_t __b = (b); \
4475 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __ c, 18); })
4476 #define vst2_lane_s8(__a, b, __c) __extension__ ({ \
4477 int8x8x2_t __b = (b); \
4478 __builtin_neon_vst2_lane_v(__a, __b.val[0], __b.val[1], __c, 0); })
4479 #define vst2_lane_s16(__a, b, __c) __extension__ ({ \
4480 int16x4x2_t __b = (b); \
4481 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __ c, 1); })
4482 #define vst2_lane_s32(__a, b, __c) __extension__ ({ \
4483 int32x2x2_t __b = (b); \
4484 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __ c, 2); })
4485 #define vst2_lane_f16(__a, b, __c) __extension__ ({ \
4486 float16x4x2_t __b = (b); \
4487 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __ c, 6); })
4488 #define vst2_lane_f32(__a, b, __c) __extension__ ({ \
4489 float32x2x2_t __b = (b); \
4490 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __ c, 7); })
4491 #define vst2_lane_p8(__a, b, __c) __extension__ ({ \
4492 poly8x8x2_t __b = (b); \
4493 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __ c, 4); })
4494 #define vst2_lane_p16(__a, b, __c) __extension__ ({ \
4495 poly16x4x2_t __b = (b); \
4496 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __ c, 5); })
4497
4498 #define vst3q_u8(__a, b) __extension__ ({ \
4499 uint8x16x3_t __b = (b); \
4500 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], 48); })
4501 #define vst3q_u16(__a, b) __extension__ ({ \
4502 uint16x8x3_t __b = (b); \
4503 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], 49); })
4504 #define vst3q_u32(__a, b) __extension__ ({ \
4505 uint32x4x3_t __b = (b); \
4506 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], 50); })
4507 #define vst3q_s8(__a, b) __extension__ ({ \
4508 int8x16x3_t __b = (b); \
4509 __builtin_neon_vst3q_v(__a, __b.val[0], __b.val[1], __b.val[2], 32); })
4510 #define vst3q_s16(__a, b) __extension__ ({ \
4511 int16x8x3_t __b = (b); \
4512 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], 33); })
4513 #define vst3q_s32(__a, b) __extension__ ({ \
4514 int32x4x3_t __b = (b); \
4515 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], 34); })
4516 #define vst3q_f16(__a, b) __extension__ ({ \
4517 float16x8x3_t __b = (b); \
4518 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], 38); })
4519 #define vst3q_f32(__a, b) __extension__ ({ \
4520 float32x4x3_t __b = (b); \
4521 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], 39); })
4522 #define vst3q_p8(__a, b) __extension__ ({ \
4523 poly8x16x3_t __b = (b); \
4524 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], 36); })
4525 #define vst3q_p16(__a, b) __extension__ ({ \
4526 poly16x8x3_t __b = (b); \
4527 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], 37); })
4528 #define vst3_u8(__a, b) __extension__ ({ \
4529 uint8x8x3_t __b = (b); \
4530 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 16); })
4531 #define vst3_u16(__a, b) __extension__ ({ \
4532 uint16x4x3_t __b = (b); \
4533 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 17); })
4534 #define vst3_u32(__a, b) __extension__ ({ \
4535 uint32x2x3_t __b = (b); \
4536 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 18); })
4537 #define vst3_u64(__a, b) __extension__ ({ \
4538 uint64x1x3_t __b = (b); \
4539 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 19); })
4540 #define vst3_s8(__a, b) __extension__ ({ \
4541 int8x8x3_t __b = (b); \
4542 __builtin_neon_vst3_v(__a, __b.val[0], __b.val[1], __b.val[2], 0); })
4543 #define vst3_s16(__a, b) __extension__ ({ \
4544 int16x4x3_t __b = (b); \
4545 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 1); })
4546 #define vst3_s32(__a, b) __extension__ ({ \
4547 int32x2x3_t __b = (b); \
4548 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 2); })
4549 #define vst3_s64(__a, b) __extension__ ({ \
4550 int64x1x3_t __b = (b); \
4551 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 3); })
4552 #define vst3_f16(__a, b) __extension__ ({ \
4553 float16x4x3_t __b = (b); \
4554 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 6); })
4555 #define vst3_f32(__a, b) __extension__ ({ \
4556 float32x2x3_t __b = (b); \
4557 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 7); })
4558 #define vst3_p8(__a, b) __extension__ ({ \
4559 poly8x8x3_t __b = (b); \
4560 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 4); })
4561 #define vst3_p16(__a, b) __extension__ ({ \
4562 poly16x4x3_t __b = (b); \
4563 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 5); })
4564
4565 #define vst3q_lane_u16(__a, b, __c) __extension__ ({ \
4566 uint16x8x3_t __b = (b); \
4567 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 49); })
4568 #define vst3q_lane_u32(__a, b, __c) __extension__ ({ \
4569 uint32x4x3_t __b = (b); \
4570 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 50); })
4571 #define vst3q_lane_s16(__a, b, __c) __extension__ ({ \
4572 int16x8x3_t __b = (b); \
4573 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 33); })
4574 #define vst3q_lane_s32(__a, b, __c) __extension__ ({ \
4575 int32x4x3_t __b = (b); \
4576 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 34); })
4577 #define vst3q_lane_f16(__a, b, __c) __extension__ ({ \
4578 float16x8x3_t __b = (b); \
4579 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 38); })
4580 #define vst3q_lane_f32(__a, b, __c) __extension__ ({ \
4581 float32x4x3_t __b = (b); \
4582 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 39); })
4583 #define vst3q_lane_p16(__a, b, __c) __extension__ ({ \
4584 poly16x8x3_t __b = (b); \
4585 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 37); })
4586 #define vst3_lane_u8(__a, b, __c) __extension__ ({ \
4587 uint8x8x3_t __b = (b); \
4588 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], __c, 16); })
4589 #define vst3_lane_u16(__a, b, __c) __extension__ ({ \
4590 uint16x4x3_t __b = (b); \
4591 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], __c, 17); })
4592 #define vst3_lane_u32(__a, b, __c) __extension__ ({ \
4593 uint32x2x3_t __b = (b); \
4594 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], __c, 18); })
4595 #define vst3_lane_s8(__a, b, __c) __extension__ ({ \
4596 int8x8x3_t __b = (b); \
4597 __builtin_neon_vst3_lane_v(__a, __b.val[0], __b.val[1], __b.val[2], __c, 0); } )
4598 #define vst3_lane_s16(__a, b, __c) __extension__ ({ \
4599 int16x4x3_t __b = (b); \
4600 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], __c, 1); })
4601 #define vst3_lane_s32(__a, b, __c) __extension__ ({ \
4602 int32x2x3_t __b = (b); \
4603 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], __c, 2); })
4604 #define vst3_lane_f16(__a, b, __c) __extension__ ({ \
4605 float16x4x3_t __b = (b); \
4606 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], __c, 6); })
4607 #define vst3_lane_f32(__a, b, __c) __extension__ ({ \
4608 float32x2x3_t __b = (b); \
4609 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], __c, 7); })
4610 #define vst3_lane_p8(__a, b, __c) __extension__ ({ \
4611 poly8x8x3_t __b = (b); \
4612 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], __c, 4); })
4613 #define vst3_lane_p16(__a, b, __c) __extension__ ({ \
4614 poly16x4x3_t __b = (b); \
4615 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], __c, 5); })
4616
4617 #define vst4q_u8(__a, b) __extension__ ({ \
4618 uint8x16x4_t __b = (b); \
4619 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], (int8x16_t)__b.val[3], 48); })
4620 #define vst4q_u16(__a, b) __extension__ ({ \
4621 uint16x8x4_t __b = (b); \
4622 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], (int8x16_t)__b.val[3], 49); })
4623 #define vst4q_u32(__a, b) __extension__ ({ \
4624 uint32x4x4_t __b = (b); \
4625 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], (int8x16_t)__b.val[3], 50); })
4626 #define vst4q_s8(__a, b) __extension__ ({ \
4627 int8x16x4_t __b = (b); \
4628 __builtin_neon_vst4q_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], 32 ); })
4629 #define vst4q_s16(__a, b) __extension__ ({ \
4630 int16x8x4_t __b = (b); \
4631 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], (int8x16_t)__b.val[3], 33); })
4632 #define vst4q_s32(__a, b) __extension__ ({ \
4633 int32x4x4_t __b = (b); \
4634 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], (int8x16_t)__b.val[3], 34); })
4635 #define vst4q_f16(__a, b) __extension__ ({ \
4636 float16x8x4_t __b = (b); \
4637 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], (int8x16_t)__b.val[3], 38); })
4638 #define vst4q_f32(__a, b) __extension__ ({ \
4639 float32x4x4_t __b = (b); \
4640 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], (int8x16_t)__b.val[3], 39); })
4641 #define vst4q_p8(__a, b) __extension__ ({ \
4642 poly8x16x4_t __b = (b); \
4643 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], (int8x16_t)__b.val[3], 36); })
4644 #define vst4q_p16(__a, b) __extension__ ({ \
4645 poly16x8x4_t __b = (b); \
4646 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], (int8x16_t)__b.val[3], 37); })
4647 #define vst4_u8(__a, b) __extension__ ({ \
4648 uint8x8x4_t __b = (b); \
4649 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 16); })
4650 #define vst4_u16(__a, b) __extension__ ({ \
4651 uint16x4x4_t __b = (b); \
4652 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 17); })
4653 #define vst4_u32(__a, b) __extension__ ({ \
4654 uint32x2x4_t __b = (b); \
4655 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 18); })
4656 #define vst4_u64(__a, b) __extension__ ({ \
4657 uint64x1x4_t __b = (b); \
4658 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 19); })
4659 #define vst4_s8(__a, b) __extension__ ({ \
4660 int8x8x4_t __b = (b); \
4661 __builtin_neon_vst4_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], 0); })
4662 #define vst4_s16(__a, b) __extension__ ({ \
4663 int16x4x4_t __b = (b); \
4664 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 1); })
4665 #define vst4_s32(__a, b) __extension__ ({ \
4666 int32x2x4_t __b = (b); \
4667 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 2); })
4668 #define vst4_s64(__a, b) __extension__ ({ \
4669 int64x1x4_t __b = (b); \
4670 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 3); })
4671 #define vst4_f16(__a, b) __extension__ ({ \
4672 float16x4x4_t __b = (b); \
4673 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 6); })
4674 #define vst4_f32(__a, b) __extension__ ({ \
4675 float32x2x4_t __b = (b); \
4676 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 7); })
4677 #define vst4_p8(__a, b) __extension__ ({ \
4678 poly8x8x4_t __b = (b); \
4679 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 4); })
4680 #define vst4_p16(__a, b) __extension__ ({ \
4681 poly16x4x4_t __b = (b); \
4682 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 5); })
4683
4684 #define vst4q_lane_u16(__a, b, __c) __extension__ ({ \
4685 uint16x8x4_t __b = (b); \
4686 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 49); })
4687 #define vst4q_lane_u32(__a, b, __c) __extension__ ({ \
4688 uint32x4x4_t __b = (b); \
4689 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 50); })
4690 #define vst4q_lane_s16(__a, b, __c) __extension__ ({ \
4691 int16x8x4_t __b = (b); \
4692 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 33); })
4693 #define vst4q_lane_s32(__a, b, __c) __extension__ ({ \
4694 int32x4x4_t __b = (b); \
4695 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 34); })
4696 #define vst4q_lane_f16(__a, b, __c) __extension__ ({ \
4697 float16x8x4_t __b = (b); \
4698 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 38); })
4699 #define vst4q_lane_f32(__a, b, __c) __extension__ ({ \
4700 float32x4x4_t __b = (b); \
4701 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 39); })
4702 #define vst4q_lane_p16(__a, b, __c) __extension__ ({ \
4703 poly16x8x4_t __b = (b); \
4704 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 37); })
4705 #define vst4_lane_u8(__a, b, __c) __extension__ ({ \
4706 uint8x8x4_t __b = (b); \
4707 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 16); })
4708 #define vst4_lane_u16(__a, b, __c) __extension__ ({ \
4709 uint16x4x4_t __b = (b); \
4710 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 17); })
4711 #define vst4_lane_u32(__a, b, __c) __extension__ ({ \
4712 uint32x2x4_t __b = (b); \
4713 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 18); })
4714 #define vst4_lane_s8(__a, b, __c) __extension__ ({ \
4715 int8x8x4_t __b = (b); \
4716 __builtin_neon_vst4_lane_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3] , __c, 0); })
4717 #define vst4_lane_s16(__a, b, __c) __extension__ ({ \
4718 int16x4x4_t __b = (b); \
4719 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 1); })
4720 #define vst4_lane_s32(__a, b, __c) __extension__ ({ \
4721 int32x2x4_t __b = (b); \
4722 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 2); })
4723 #define vst4_lane_f16(__a, b, __c) __extension__ ({ \
4724 float16x4x4_t __b = (b); \
4725 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 6); })
4726 #define vst4_lane_f32(__a, b, __c) __extension__ ({ \
4727 float32x2x4_t __b = (b); \
4728 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 7); })
4729 #define vst4_lane_p8(__a, b, __c) __extension__ ({ \
4730 poly8x8x4_t __b = (b); \
4731 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 4); })
4732 #define vst4_lane_p16(__a, b, __c) __extension__ ({ \
4733 poly16x4x4_t __b = (b); \
4734 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 5); })
4735
4736 __ai int8x8_t vsub_s8(int8x8_t __a, int8x8_t __b) {
4737 return __a - __b; }
4738 __ai int16x4_t vsub_s16(int16x4_t __a, int16x4_t __b) {
4739 return __a - __b; }
4740 __ai int32x2_t vsub_s32(int32x2_t __a, int32x2_t __b) {
4741 return __a - __b; }
4742 __ai int64x1_t vsub_s64(int64x1_t __a, int64x1_t __b) {
4743 return __a - __b; }
4744 __ai float32x2_t vsub_f32(float32x2_t __a, float32x2_t __b) {
4745 return __a - __b; }
4746 __ai uint8x8_t vsub_u8(uint8x8_t __a, uint8x8_t __b) {
4747 return __a - __b; }
4748 __ai uint16x4_t vsub_u16(uint16x4_t __a, uint16x4_t __b) {
4749 return __a - __b; }
4750 __ai uint32x2_t vsub_u32(uint32x2_t __a, uint32x2_t __b) {
4751 return __a - __b; }
4752 __ai uint64x1_t vsub_u64(uint64x1_t __a, uint64x1_t __b) {
4753 return __a - __b; }
4754 __ai int8x16_t vsubq_s8(int8x16_t __a, int8x16_t __b) {
4755 return __a - __b; }
4756 __ai int16x8_t vsubq_s16(int16x8_t __a, int16x8_t __b) {
4757 return __a - __b; }
4758 __ai int32x4_t vsubq_s32(int32x4_t __a, int32x4_t __b) {
4759 return __a - __b; }
4760 __ai int64x2_t vsubq_s64(int64x2_t __a, int64x2_t __b) {
4761 return __a - __b; }
4762 __ai float32x4_t vsubq_f32(float32x4_t __a, float32x4_t __b) {
4763 return __a - __b; }
4764 __ai uint8x16_t vsubq_u8(uint8x16_t __a, uint8x16_t __b) {
4765 return __a - __b; }
4766 __ai uint16x8_t vsubq_u16(uint16x8_t __a, uint16x8_t __b) {
4767 return __a - __b; }
4768 __ai uint32x4_t vsubq_u32(uint32x4_t __a, uint32x4_t __b) {
4769 return __a - __b; }
4770 __ai uint64x2_t vsubq_u64(uint64x2_t __a, uint64x2_t __b) {
4771 return __a - __b; }
4772
4773 __ai int8x8_t vsubhn_s16(int16x8_t __a, int16x8_t __b) {
4774 return (int8x8_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 0); }
4775 __ai int16x4_t vsubhn_s32(int32x4_t __a, int32x4_t __b) {
4776 return (int16x4_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 1); }
4777 __ai int32x2_t vsubhn_s64(int64x2_t __a, int64x2_t __b) {
4778 return (int32x2_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 2); }
4779 __ai uint8x8_t vsubhn_u16(uint16x8_t __a, uint16x8_t __b) {
4780 return (uint8x8_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 16); }
4781 __ai uint16x4_t vsubhn_u32(uint32x4_t __a, uint32x4_t __b) {
4782 return (uint16x4_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 17) ; }
4783 __ai uint32x2_t vsubhn_u64(uint64x2_t __a, uint64x2_t __b) {
4784 return (uint32x2_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 18) ; }
4785
4786 __ai int16x8_t vsubl_s8(int8x8_t __a, int8x8_t __b) {
4787 return vmovl_s8(__a) - vmovl_s8(__b); }
4788 __ai int32x4_t vsubl_s16(int16x4_t __a, int16x4_t __b) {
4789 return vmovl_s16(__a) - vmovl_s16(__b); }
4790 __ai int64x2_t vsubl_s32(int32x2_t __a, int32x2_t __b) {
4791 return vmovl_s32(__a) - vmovl_s32(__b); }
4792 __ai uint16x8_t vsubl_u8(uint8x8_t __a, uint8x8_t __b) {
4793 return vmovl_u8(__a) - vmovl_u8(__b); }
4794 __ai uint32x4_t vsubl_u16(uint16x4_t __a, uint16x4_t __b) {
4795 return vmovl_u16(__a) - vmovl_u16(__b); }
4796 __ai uint64x2_t vsubl_u32(uint32x2_t __a, uint32x2_t __b) {
4797 return vmovl_u32(__a) - vmovl_u32(__b); }
4798
4799 __ai int16x8_t vsubw_s8(int16x8_t __a, int8x8_t __b) {
4800 return __a - vmovl_s8(__b); }
4801 __ai int32x4_t vsubw_s16(int32x4_t __a, int16x4_t __b) {
4802 return __a - vmovl_s16(__b); }
4803 __ai int64x2_t vsubw_s32(int64x2_t __a, int32x2_t __b) {
4804 return __a - vmovl_s32(__b); }
4805 __ai uint16x8_t vsubw_u8(uint16x8_t __a, uint8x8_t __b) {
4806 return __a - vmovl_u8(__b); }
4807 __ai uint32x4_t vsubw_u16(uint32x4_t __a, uint16x4_t __b) {
4808 return __a - vmovl_u16(__b); }
4809 __ai uint64x2_t vsubw_u32(uint64x2_t __a, uint32x2_t __b) {
4810 return __a - vmovl_u32(__b); }
4811
4812 __ai uint8x8_t vtbl1_u8(uint8x8_t __a, uint8x8_t __b) {
4813 return (uint8x8_t)__builtin_neon_vtbl1_v((int8x8_t)__a, (int8x8_t)__b, 16); }
4814 __ai int8x8_t vtbl1_s8(int8x8_t __a, int8x8_t __b) {
4815 return (int8x8_t)__builtin_neon_vtbl1_v(__a, __b, 0); }
4816 __ai poly8x8_t vtbl1_p8(poly8x8_t __a, uint8x8_t __b) {
4817 return (poly8x8_t)__builtin_neon_vtbl1_v((int8x8_t)__a, (int8x8_t)__b, 4); }
4818
4819 __ai uint8x8_t vtbl2_u8(uint8x8x2_t __a, uint8x8_t __b) {
4820 return (uint8x8_t)__builtin_neon_vtbl2_v((int8x8_t)__a.val[0], (int8x8_t)__a.v al[1], (int8x8_t)__b, 16); }
4821 __ai int8x8_t vtbl2_s8(int8x8x2_t __a, int8x8_t __b) {
4822 return (int8x8_t)__builtin_neon_vtbl2_v(__a.val[0], __a.val[1], __b, 0); }
4823 __ai poly8x8_t vtbl2_p8(poly8x8x2_t __a, uint8x8_t __b) {
4824 return (poly8x8_t)__builtin_neon_vtbl2_v((int8x8_t)__a.val[0], (int8x8_t)__a.v al[1], (int8x8_t)__b, 4); }
4825
4826 __ai uint8x8_t vtbl3_u8(uint8x8x3_t __a, uint8x8_t __b) {
4827 return (uint8x8_t)__builtin_neon_vtbl3_v((int8x8_t)__a.val[0], (int8x8_t)__a.v al[1], (int8x8_t)__a.val[2], (int8x8_t)__b, 16); }
4828 __ai int8x8_t vtbl3_s8(int8x8x3_t __a, int8x8_t __b) {
4829 return (int8x8_t)__builtin_neon_vtbl3_v(__a.val[0], __a.val[1], __a.val[2], __ b, 0); }
4830 __ai poly8x8_t vtbl3_p8(poly8x8x3_t __a, uint8x8_t __b) {
4831 return (poly8x8_t)__builtin_neon_vtbl3_v((int8x8_t)__a.val[0], (int8x8_t)__a.v al[1], (int8x8_t)__a.val[2], (int8x8_t)__b, 4); }
4832
4833 __ai uint8x8_t vtbl4_u8(uint8x8x4_t __a, uint8x8_t __b) {
4834 return (uint8x8_t)__builtin_neon_vtbl4_v((int8x8_t)__a.val[0], (int8x8_t)__a.v al[1], (int8x8_t)__a.val[2], (int8x8_t)__a.val[3], (int8x8_t)__b, 16); }
4835 __ai int8x8_t vtbl4_s8(int8x8x4_t __a, int8x8_t __b) {
4836 return (int8x8_t)__builtin_neon_vtbl4_v(__a.val[0], __a.val[1], __a.val[2], __ a.val[3], __b, 0); }
4837 __ai poly8x8_t vtbl4_p8(poly8x8x4_t __a, uint8x8_t __b) {
4838 return (poly8x8_t)__builtin_neon_vtbl4_v((int8x8_t)__a.val[0], (int8x8_t)__a.v al[1], (int8x8_t)__a.val[2], (int8x8_t)__a.val[3], (int8x8_t)__b, 4); }
4839
4840 __ai uint8x8_t vtbx1_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) {
4841 return (uint8x8_t)__builtin_neon_vtbx1_v((int8x8_t)__a, (int8x8_t)__b, (int8x8 _t)__c, 16); }
4842 __ai int8x8_t vtbx1_s8(int8x8_t __a, int8x8_t __b, int8x8_t __c) {
4843 return (int8x8_t)__builtin_neon_vtbx1_v(__a, __b, __c, 0); }
4844 __ai poly8x8_t vtbx1_p8(poly8x8_t __a, poly8x8_t __b, uint8x8_t __c) {
4845 return (poly8x8_t)__builtin_neon_vtbx1_v((int8x8_t)__a, (int8x8_t)__b, (int8x8 _t)__c, 4); }
4846
4847 __ai uint8x8_t vtbx2_u8(uint8x8_t __a, uint8x8x2_t __b, uint8x8_t __c) {
4848 return (uint8x8_t)__builtin_neon_vtbx2_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__c, 16); }
4849 __ai int8x8_t vtbx2_s8(int8x8_t __a, int8x8x2_t __b, int8x8_t __c) {
4850 return (int8x8_t)__builtin_neon_vtbx2_v(__a, __b.val[0], __b.val[1], __c, 0); }
4851 __ai poly8x8_t vtbx2_p8(poly8x8_t __a, poly8x8x2_t __b, uint8x8_t __c) {
4852 return (poly8x8_t)__builtin_neon_vtbx2_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__c, 4); }
4853
4854 __ai uint8x8_t vtbx3_u8(uint8x8_t __a, uint8x8x3_t __b, uint8x8_t __c) {
4855 return (uint8x8_t)__builtin_neon_vtbx3_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__c, 16); }
4856 __ai int8x8_t vtbx3_s8(int8x8_t __a, int8x8x3_t __b, int8x8_t __c) {
4857 return (int8x8_t)__builtin_neon_vtbx3_v(__a, __b.val[0], __b.val[1], __b.val[2 ], __c, 0); }
4858 __ai poly8x8_t vtbx3_p8(poly8x8_t __a, poly8x8x3_t __b, uint8x8_t __c) {
4859 return (poly8x8_t)__builtin_neon_vtbx3_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__c, 4); }
4860
4861 __ai uint8x8_t vtbx4_u8(uint8x8_t __a, uint8x8x4_t __b, uint8x8_t __c) {
4862 return (uint8x8_t)__builtin_neon_vtbx4_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], (int8x8_t)__c, 16); }
4863 __ai int8x8_t vtbx4_s8(int8x8_t __a, int8x8x4_t __b, int8x8_t __c) {
4864 return (int8x8_t)__builtin_neon_vtbx4_v(__a, __b.val[0], __b.val[1], __b.val[2 ], __b.val[3], __c, 0); }
4865 __ai poly8x8_t vtbx4_p8(poly8x8_t __a, poly8x8x4_t __b, uint8x8_t __c) {
4866 return (poly8x8_t)__builtin_neon_vtbx4_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], (int8x8_t)__c, 4); }
4867
4868 __ai int8x8x2_t vtrn_s8(int8x8_t __a, int8x8_t __b) {
4869 int8x8x2_t r; __builtin_neon_vtrn_v(&r, __a, __b, 0); return r; }
4870 __ai int16x4x2_t vtrn_s16(int16x4_t __a, int16x4_t __b) {
4871 int16x4x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 1); ret urn r; }
4872 __ai int32x2x2_t vtrn_s32(int32x2_t __a, int32x2_t __b) {
4873 int32x2x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 2); ret urn r; }
4874 __ai uint8x8x2_t vtrn_u8(uint8x8_t __a, uint8x8_t __b) {
4875 uint8x8x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 16); re turn r; }
4876 __ai uint16x4x2_t vtrn_u16(uint16x4_t __a, uint16x4_t __b) {
4877 uint16x4x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 17); r eturn r; }
4878 __ai uint32x2x2_t vtrn_u32(uint32x2_t __a, uint32x2_t __b) {
4879 uint32x2x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 18); r eturn r; }
4880 __ai float32x2x2_t vtrn_f32(float32x2_t __a, float32x2_t __b) {
4881 float32x2x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 7); r eturn r; }
4882 __ai poly8x8x2_t vtrn_p8(poly8x8_t __a, poly8x8_t __b) {
4883 poly8x8x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 4); ret urn r; }
4884 __ai poly16x4x2_t vtrn_p16(poly16x4_t __a, poly16x4_t __b) {
4885 poly16x4x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 5); re turn r; }
4886 __ai int8x16x2_t vtrnq_s8(int8x16_t __a, int8x16_t __b) {
4887 int8x16x2_t r; __builtin_neon_vtrnq_v(&r, __a, __b, 32); return r; }
4888 __ai int16x8x2_t vtrnq_s16(int16x8_t __a, int16x8_t __b) {
4889 int16x8x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 33); return r; }
4890 __ai int32x4x2_t vtrnq_s32(int32x4_t __a, int32x4_t __b) {
4891 int32x4x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 34); return r; }
4892 __ai uint8x16x2_t vtrnq_u8(uint8x16_t __a, uint8x16_t __b) {
4893 uint8x16x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 48) ; return r; }
4894 __ai uint16x8x2_t vtrnq_u16(uint16x8_t __a, uint16x8_t __b) {
4895 uint16x8x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 49) ; return r; }
4896 __ai uint32x4x2_t vtrnq_u32(uint32x4_t __a, uint32x4_t __b) {
4897 uint32x4x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 50) ; return r; }
4898 __ai float32x4x2_t vtrnq_f32(float32x4_t __a, float32x4_t __b) {
4899 float32x4x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 39 ); return r; }
4900 __ai poly8x16x2_t vtrnq_p8(poly8x16_t __a, poly8x16_t __b) {
4901 poly8x16x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 36) ; return r; }
4902 __ai poly16x8x2_t vtrnq_p16(poly16x8_t __a, poly16x8_t __b) {
4903 poly16x8x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 37) ; return r; }
4904
4905 __ai uint8x8_t vtst_s8(int8x8_t __a, int8x8_t __b) {
4906 return (uint8x8_t)__builtin_neon_vtst_v(__a, __b, 16); }
4907 __ai uint16x4_t vtst_s16(int16x4_t __a, int16x4_t __b) {
4908 return (uint16x4_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 17); }
4909 __ai uint32x2_t vtst_s32(int32x2_t __a, int32x2_t __b) {
4910 return (uint32x2_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 18); }
4911 __ai uint8x8_t vtst_u8(uint8x8_t __a, uint8x8_t __b) {
4912 return (uint8x8_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 16); }
4913 __ai uint16x4_t vtst_u16(uint16x4_t __a, uint16x4_t __b) {
4914 return (uint16x4_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 17); }
4915 __ai uint32x2_t vtst_u32(uint32x2_t __a, uint32x2_t __b) {
4916 return (uint32x2_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 18); }
4917 __ai uint8x8_t vtst_p8(poly8x8_t __a, poly8x8_t __b) {
4918 return (uint8x8_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 16); }
4919 __ai uint8x16_t vtstq_s8(int8x16_t __a, int8x16_t __b) {
4920 return (uint8x16_t)__builtin_neon_vtstq_v(__a, __b, 48); }
4921 __ai uint16x8_t vtstq_s16(int16x8_t __a, int16x8_t __b) {
4922 return (uint16x8_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 49); }
4923 __ai uint32x4_t vtstq_s32(int32x4_t __a, int32x4_t __b) {
4924 return (uint32x4_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
4925 __ai uint8x16_t vtstq_u8(uint8x16_t __a, uint8x16_t __b) {
4926 return (uint8x16_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 48); }
4927 __ai uint16x8_t vtstq_u16(uint16x8_t __a, uint16x8_t __b) {
4928 return (uint16x8_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 49); }
4929 __ai uint32x4_t vtstq_u32(uint32x4_t __a, uint32x4_t __b) {
4930 return (uint32x4_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
4931 __ai uint8x16_t vtstq_p8(poly8x16_t __a, poly8x16_t __b) {
4932 return (uint8x16_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 48); }
4933
4934 __ai int8x8x2_t vuzp_s8(int8x8_t __a, int8x8_t __b) {
4935 int8x8x2_t r; __builtin_neon_vuzp_v(&r, __a, __b, 0); return r; }
4936 __ai int16x4x2_t vuzp_s16(int16x4_t __a, int16x4_t __b) {
4937 int16x4x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 1); ret urn r; }
4938 __ai int32x2x2_t vuzp_s32(int32x2_t __a, int32x2_t __b) {
4939 int32x2x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 2); ret urn r; }
4940 __ai uint8x8x2_t vuzp_u8(uint8x8_t __a, uint8x8_t __b) {
4941 uint8x8x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 16); re turn r; }
4942 __ai uint16x4x2_t vuzp_u16(uint16x4_t __a, uint16x4_t __b) {
4943 uint16x4x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 17); r eturn r; }
4944 __ai uint32x2x2_t vuzp_u32(uint32x2_t __a, uint32x2_t __b) {
4945 uint32x2x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 18); r eturn r; }
4946 __ai float32x2x2_t vuzp_f32(float32x2_t __a, float32x2_t __b) {
4947 float32x2x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 7); r eturn r; }
4948 __ai poly8x8x2_t vuzp_p8(poly8x8_t __a, poly8x8_t __b) {
4949 poly8x8x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 4); ret urn r; }
4950 __ai poly16x4x2_t vuzp_p16(poly16x4_t __a, poly16x4_t __b) {
4951 poly16x4x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 5); re turn r; }
4952 __ai int8x16x2_t vuzpq_s8(int8x16_t __a, int8x16_t __b) {
4953 int8x16x2_t r; __builtin_neon_vuzpq_v(&r, __a, __b, 32); return r; }
4954 __ai int16x8x2_t vuzpq_s16(int16x8_t __a, int16x8_t __b) {
4955 int16x8x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 33); return r; }
4956 __ai int32x4x2_t vuzpq_s32(int32x4_t __a, int32x4_t __b) {
4957 int32x4x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 34); return r; }
4958 __ai uint8x16x2_t vuzpq_u8(uint8x16_t __a, uint8x16_t __b) {
4959 uint8x16x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 48) ; return r; }
4960 __ai uint16x8x2_t vuzpq_u16(uint16x8_t __a, uint16x8_t __b) {
4961 uint16x8x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 49) ; return r; }
4962 __ai uint32x4x2_t vuzpq_u32(uint32x4_t __a, uint32x4_t __b) {
4963 uint32x4x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 50) ; return r; }
4964 __ai float32x4x2_t vuzpq_f32(float32x4_t __a, float32x4_t __b) {
4965 float32x4x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 39 ); return r; }
4966 __ai poly8x16x2_t vuzpq_p8(poly8x16_t __a, poly8x16_t __b) {
4967 poly8x16x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 36) ; return r; }
4968 __ai poly16x8x2_t vuzpq_p16(poly16x8_t __a, poly16x8_t __b) {
4969 poly16x8x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 37) ; return r; }
4970
4971 __ai int8x8x2_t vzip_s8(int8x8_t __a, int8x8_t __b) {
4972 int8x8x2_t r; __builtin_neon_vzip_v(&r, __a, __b, 0); return r; }
4973 __ai int16x4x2_t vzip_s16(int16x4_t __a, int16x4_t __b) {
4974 int16x4x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 1); ret urn r; }
4975 __ai int32x2x2_t vzip_s32(int32x2_t __a, int32x2_t __b) {
4976 int32x2x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 2); ret urn r; }
4977 __ai uint8x8x2_t vzip_u8(uint8x8_t __a, uint8x8_t __b) {
4978 uint8x8x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 16); re turn r; }
4979 __ai uint16x4x2_t vzip_u16(uint16x4_t __a, uint16x4_t __b) {
4980 uint16x4x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 17); r eturn r; }
4981 __ai uint32x2x2_t vzip_u32(uint32x2_t __a, uint32x2_t __b) {
4982 uint32x2x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 18); r eturn r; }
4983 __ai float32x2x2_t vzip_f32(float32x2_t __a, float32x2_t __b) {
4984 float32x2x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 7); r eturn r; }
4985 __ai poly8x8x2_t vzip_p8(poly8x8_t __a, poly8x8_t __b) {
4986 poly8x8x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 4); ret urn r; }
4987 __ai poly16x4x2_t vzip_p16(poly16x4_t __a, poly16x4_t __b) {
4988 poly16x4x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 5); re turn r; }
4989 __ai int8x16x2_t vzipq_s8(int8x16_t __a, int8x16_t __b) {
4990 int8x16x2_t r; __builtin_neon_vzipq_v(&r, __a, __b, 32); return r; }
4991 __ai int16x8x2_t vzipq_s16(int16x8_t __a, int16x8_t __b) {
4992 int16x8x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 33); return r; }
4993 __ai int32x4x2_t vzipq_s32(int32x4_t __a, int32x4_t __b) {
4994 int32x4x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 34); return r; }
4995 __ai uint8x16x2_t vzipq_u8(uint8x16_t __a, uint8x16_t __b) {
4996 uint8x16x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 48) ; return r; }
4997 __ai uint16x8x2_t vzipq_u16(uint16x8_t __a, uint16x8_t __b) {
4998 uint16x8x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 49) ; return r; }
4999 __ai uint32x4x2_t vzipq_u32(uint32x4_t __a, uint32x4_t __b) {
5000 uint32x4x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 50) ; return r; }
5001 __ai float32x4x2_t vzipq_f32(float32x4_t __a, float32x4_t __b) {
5002 float32x4x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 39 ); return r; }
5003 __ai poly8x16x2_t vzipq_p8(poly8x16_t __a, poly8x16_t __b) {
5004 poly8x16x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 36) ; return r; }
5005 __ai poly16x8x2_t vzipq_p16(poly16x8_t __a, poly16x8_t __b) {
5006 poly16x8x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 37) ; return r; }
5007
5008 #undef __ai
5009
5010 #endif /* __ARM_NEON_H */
OLDNEW
« no previous file with comments | « Linux_ia32/lib/clang/3.2/include/ammintrin.h ('k') | Linux_ia32/lib/clang/3.2/include/avx2intrin.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698