Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(158)

Side by Side Diff: third_party/asan/asan_clang_Linux/lib/clang/3.1/include/arm_neon.h

Issue 8575016: Update ASan Linux binaries to r1085 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/
Patch Set: Created 9 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /*===---- arm_neon.h - ARM Neon intrinsics ---------------------------------=== 1 /*===---- arm_neon.h - ARM Neon intrinsics ---------------------------------===
2 * 2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy 3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal 4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights 5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is 7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions: 8 * furnished to do so, subject to the following conditions:
9 * 9 *
10 * The above copyright notice and this permission notice shall be included in 10 * The above copyright notice and this permission notice shall be included in
(...skipping 332 matching lines...) Expand 10 before | Expand all | Expand 10 after
343 poly16x4_t val[4]; 343 poly16x4_t val[4];
344 } poly16x4x4_t; 344 } poly16x4x4_t;
345 345
346 typedef struct poly16x8x4_t { 346 typedef struct poly16x8x4_t {
347 poly16x8_t val[4]; 347 poly16x8_t val[4];
348 } poly16x8x4_t; 348 } poly16x8x4_t;
349 349
350 #define __ai static __attribute__((__always_inline__, __nodebug__)) 350 #define __ai static __attribute__((__always_inline__, __nodebug__))
351 351
352 __ai int16x8_t vmovl_s8(int8x8_t __a) { \ 352 __ai int16x8_t vmovl_s8(int8x8_t __a) { \
353 return (int16x8_t)__builtin_neon_vmovl_v(__a, 17); } 353 return (int16x8_t)__builtin_neon_vmovl_v(__a, 33); }
354 __ai int32x4_t vmovl_s16(int16x4_t __a) { \ 354 __ai int32x4_t vmovl_s16(int16x4_t __a) { \
355 return (int32x4_t)__builtin_neon_vmovl_v((int8x8_t)__a, 18); } 355 return (int32x4_t)__builtin_neon_vmovl_v((int8x8_t)__a, 34); }
356 __ai int64x2_t vmovl_s32(int32x2_t __a) { \ 356 __ai int64x2_t vmovl_s32(int32x2_t __a) { \
357 return (int64x2_t)__builtin_neon_vmovl_v((int8x8_t)__a, 19); } 357 return (int64x2_t)__builtin_neon_vmovl_v((int8x8_t)__a, 35); }
358 __ai uint16x8_t vmovl_u8(uint8x8_t __a) { \ 358 __ai uint16x8_t vmovl_u8(uint8x8_t __a) { \
359 return (uint16x8_t)__builtin_neon_vmovl_v((int8x8_t)__a, 25); } 359 return (uint16x8_t)__builtin_neon_vmovl_v((int8x8_t)__a, 49); }
360 __ai uint32x4_t vmovl_u16(uint16x4_t __a) { \ 360 __ai uint32x4_t vmovl_u16(uint16x4_t __a) { \
361 return (uint32x4_t)__builtin_neon_vmovl_v((int8x8_t)__a, 26); } 361 return (uint32x4_t)__builtin_neon_vmovl_v((int8x8_t)__a, 50); }
362 __ai uint64x2_t vmovl_u32(uint32x2_t __a) { \ 362 __ai uint64x2_t vmovl_u32(uint32x2_t __a) { \
363 return (uint64x2_t)__builtin_neon_vmovl_v((int8x8_t)__a, 27); } 363 return (uint64x2_t)__builtin_neon_vmovl_v((int8x8_t)__a, 51); }
364 364
365 __ai int16x8_t vmull_s8(int8x8_t __a, int8x8_t __b) { \ 365 __ai int16x8_t vmull_s8(int8x8_t __a, int8x8_t __b) { \
366 return (int16x8_t)__builtin_neon_vmull_v(__a, __b, 17); } 366 return (int16x8_t)__builtin_neon_vmull_v(__a, __b, 33); }
367 __ai int32x4_t vmull_s16(int16x4_t __a, int16x4_t __b) { \ 367 __ai int32x4_t vmull_s16(int16x4_t __a, int16x4_t __b) { \
368 return (int32x4_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 18); } 368 return (int32x4_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 34); }
369 __ai int64x2_t vmull_s32(int32x2_t __a, int32x2_t __b) { \ 369 __ai int64x2_t vmull_s32(int32x2_t __a, int32x2_t __b) { \
370 return (int64x2_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 19); } 370 return (int64x2_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 35); }
371 __ai uint16x8_t vmull_u8(uint8x8_t __a, uint8x8_t __b) { \ 371 __ai uint16x8_t vmull_u8(uint8x8_t __a, uint8x8_t __b) { \
372 return (uint16x8_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 25); } 372 return (uint16x8_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 49); }
373 __ai uint32x4_t vmull_u16(uint16x4_t __a, uint16x4_t __b) { \ 373 __ai uint32x4_t vmull_u16(uint16x4_t __a, uint16x4_t __b) { \
374 return (uint32x4_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 26); } 374 return (uint32x4_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 50); }
375 __ai uint64x2_t vmull_u32(uint32x2_t __a, uint32x2_t __b) { \ 375 __ai uint64x2_t vmull_u32(uint32x2_t __a, uint32x2_t __b) { \
376 return (uint64x2_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 27); } 376 return (uint64x2_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 51); }
377 __ai poly16x8_t vmull_p8(poly8x8_t __a, poly8x8_t __b) { \ 377 __ai poly16x8_t vmull_p8(poly8x8_t __a, poly8x8_t __b) { \
378 return (poly16x8_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 22); } 378 return (poly16x8_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)__b, 37); }
379 379
380 __ai int8x8_t vabd_s8(int8x8_t __a, int8x8_t __b) { \ 380 __ai int8x8_t vabd_s8(int8x8_t __a, int8x8_t __b) { \
381 return (int8x8_t)__builtin_neon_vabd_v(__a, __b, 0); } 381 return (int8x8_t)__builtin_neon_vabd_v(__a, __b, 0); }
382 __ai int16x4_t vabd_s16(int16x4_t __a, int16x4_t __b) { \ 382 __ai int16x4_t vabd_s16(int16x4_t __a, int16x4_t __b) { \
383 return (int16x4_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 1); } 383 return (int16x4_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 1); }
384 __ai int32x2_t vabd_s32(int32x2_t __a, int32x2_t __b) { \ 384 __ai int32x2_t vabd_s32(int32x2_t __a, int32x2_t __b) { \
385 return (int32x2_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 2); } 385 return (int32x2_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 2); }
386 __ai uint8x8_t vabd_u8(uint8x8_t __a, uint8x8_t __b) { \ 386 __ai uint8x8_t vabd_u8(uint8x8_t __a, uint8x8_t __b) { \
387 return (uint8x8_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 8); } 387 return (uint8x8_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 16); }
388 __ai uint16x4_t vabd_u16(uint16x4_t __a, uint16x4_t __b) { \ 388 __ai uint16x4_t vabd_u16(uint16x4_t __a, uint16x4_t __b) { \
389 return (uint16x4_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 9); } 389 return (uint16x4_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 17); }
390 __ai uint32x2_t vabd_u32(uint32x2_t __a, uint32x2_t __b) { \ 390 __ai uint32x2_t vabd_u32(uint32x2_t __a, uint32x2_t __b) { \
391 return (uint32x2_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 10); } 391 return (uint32x2_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 18); }
392 __ai float32x2_t vabd_f32(float32x2_t __a, float32x2_t __b) { \ 392 __ai float32x2_t vabd_f32(float32x2_t __a, float32x2_t __b) { \
393 return (float32x2_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 4); } 393 return (float32x2_t)__builtin_neon_vabd_v((int8x8_t)__a, (int8x8_t)__b, 7); }
394 __ai int8x16_t vabdq_s8(int8x16_t __a, int8x16_t __b) { \ 394 __ai int8x16_t vabdq_s8(int8x16_t __a, int8x16_t __b) { \
395 return (int8x16_t)__builtin_neon_vabdq_v(__a, __b, 16); } 395 return (int8x16_t)__builtin_neon_vabdq_v(__a, __b, 32); }
396 __ai int16x8_t vabdq_s16(int16x8_t __a, int16x8_t __b) { \ 396 __ai int16x8_t vabdq_s16(int16x8_t __a, int16x8_t __b) { \
397 return (int16x8_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 17); } 397 return (int16x8_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
398 __ai int32x4_t vabdq_s32(int32x4_t __a, int32x4_t __b) { \ 398 __ai int32x4_t vabdq_s32(int32x4_t __a, int32x4_t __b) { \
399 return (int32x4_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 18); } 399 return (int32x4_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
400 __ai uint8x16_t vabdq_u8(uint8x16_t __a, uint8x16_t __b) { \ 400 __ai uint8x16_t vabdq_u8(uint8x16_t __a, uint8x16_t __b) { \
401 return (uint8x16_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 24); } 401 return (uint8x16_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 48); }
402 __ai uint16x8_t vabdq_u16(uint16x8_t __a, uint16x8_t __b) { \ 402 __ai uint16x8_t vabdq_u16(uint16x8_t __a, uint16x8_t __b) { \
403 return (uint16x8_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 25); } 403 return (uint16x8_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 49); }
404 __ai uint32x4_t vabdq_u32(uint32x4_t __a, uint32x4_t __b) { \ 404 __ai uint32x4_t vabdq_u32(uint32x4_t __a, uint32x4_t __b) { \
405 return (uint32x4_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 26); } 405 return (uint32x4_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
406 __ai float32x4_t vabdq_f32(float32x4_t __a, float32x4_t __b) { \ 406 __ai float32x4_t vabdq_f32(float32x4_t __a, float32x4_t __b) { \
407 return (float32x4_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 20) ; } 407 return (float32x4_t)__builtin_neon_vabdq_v((int8x16_t)__a, (int8x16_t)__b, 39) ; }
408 408
409 __ai int8x8_t vaba_s8(int8x8_t __a, int8x8_t __b, int8x8_t __c) { \ 409 __ai int8x8_t vaba_s8(int8x8_t __a, int8x8_t __b, int8x8_t __c) { \
410 return __a + vabd_s8(__b, __c); } 410 return __a + vabd_s8(__b, __c); }
411 __ai int16x4_t vaba_s16(int16x4_t __a, int16x4_t __b, int16x4_t __c) { \ 411 __ai int16x4_t vaba_s16(int16x4_t __a, int16x4_t __b, int16x4_t __c) { \
412 return __a + vabd_s16(__b, __c); } 412 return __a + vabd_s16(__b, __c); }
413 __ai int32x2_t vaba_s32(int32x2_t __a, int32x2_t __b, int32x2_t __c) { \ 413 __ai int32x2_t vaba_s32(int32x2_t __a, int32x2_t __b, int32x2_t __c) { \
414 return __a + vabd_s32(__b, __c); } 414 return __a + vabd_s32(__b, __c); }
415 __ai uint8x8_t vaba_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) { \ 415 __ai uint8x8_t vaba_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) { \
416 return __a + vabd_u8(__b, __c); } 416 return __a + vabd_u8(__b, __c); }
417 __ai uint16x4_t vaba_u16(uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) { \ 417 __ai uint16x4_t vaba_u16(uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) { \
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
457 __ai uint64x2_t vabdl_u32(uint32x2_t __a, uint32x2_t __b) { \ 457 __ai uint64x2_t vabdl_u32(uint32x2_t __a, uint32x2_t __b) { \
458 return vmovl_u32(vabd_u32(__a, __b)); } 458 return vmovl_u32(vabd_u32(__a, __b)); }
459 459
460 __ai int8x8_t vabs_s8(int8x8_t __a) { \ 460 __ai int8x8_t vabs_s8(int8x8_t __a) { \
461 return (int8x8_t)__builtin_neon_vabs_v(__a, 0); } 461 return (int8x8_t)__builtin_neon_vabs_v(__a, 0); }
462 __ai int16x4_t vabs_s16(int16x4_t __a) { \ 462 __ai int16x4_t vabs_s16(int16x4_t __a) { \
463 return (int16x4_t)__builtin_neon_vabs_v((int8x8_t)__a, 1); } 463 return (int16x4_t)__builtin_neon_vabs_v((int8x8_t)__a, 1); }
464 __ai int32x2_t vabs_s32(int32x2_t __a) { \ 464 __ai int32x2_t vabs_s32(int32x2_t __a) { \
465 return (int32x2_t)__builtin_neon_vabs_v((int8x8_t)__a, 2); } 465 return (int32x2_t)__builtin_neon_vabs_v((int8x8_t)__a, 2); }
466 __ai float32x2_t vabs_f32(float32x2_t __a) { \ 466 __ai float32x2_t vabs_f32(float32x2_t __a) { \
467 return (float32x2_t)__builtin_neon_vabs_v((int8x8_t)__a, 4); } 467 return (float32x2_t)__builtin_neon_vabs_v((int8x8_t)__a, 7); }
468 __ai int8x16_t vabsq_s8(int8x16_t __a) { \ 468 __ai int8x16_t vabsq_s8(int8x16_t __a) { \
469 return (int8x16_t)__builtin_neon_vabsq_v(__a, 16); } 469 return (int8x16_t)__builtin_neon_vabsq_v(__a, 32); }
470 __ai int16x8_t vabsq_s16(int16x8_t __a) { \ 470 __ai int16x8_t vabsq_s16(int16x8_t __a) { \
471 return (int16x8_t)__builtin_neon_vabsq_v((int8x16_t)__a, 17); } 471 return (int16x8_t)__builtin_neon_vabsq_v((int8x16_t)__a, 33); }
472 __ai int32x4_t vabsq_s32(int32x4_t __a) { \ 472 __ai int32x4_t vabsq_s32(int32x4_t __a) { \
473 return (int32x4_t)__builtin_neon_vabsq_v((int8x16_t)__a, 18); } 473 return (int32x4_t)__builtin_neon_vabsq_v((int8x16_t)__a, 34); }
474 __ai float32x4_t vabsq_f32(float32x4_t __a) { \ 474 __ai float32x4_t vabsq_f32(float32x4_t __a) { \
475 return (float32x4_t)__builtin_neon_vabsq_v((int8x16_t)__a, 20); } 475 return (float32x4_t)__builtin_neon_vabsq_v((int8x16_t)__a, 39); }
476 476
477 __ai int8x8_t vadd_s8(int8x8_t __a, int8x8_t __b) { \ 477 __ai int8x8_t vadd_s8(int8x8_t __a, int8x8_t __b) { \
478 return __a + __b; } 478 return __a + __b; }
479 __ai int16x4_t vadd_s16(int16x4_t __a, int16x4_t __b) { \ 479 __ai int16x4_t vadd_s16(int16x4_t __a, int16x4_t __b) { \
480 return __a + __b; } 480 return __a + __b; }
481 __ai int32x2_t vadd_s32(int32x2_t __a, int32x2_t __b) { \ 481 __ai int32x2_t vadd_s32(int32x2_t __a, int32x2_t __b) { \
482 return __a + __b; } 482 return __a + __b; }
483 __ai int64x1_t vadd_s64(int64x1_t __a, int64x1_t __b) { \ 483 __ai int64x1_t vadd_s64(int64x1_t __a, int64x1_t __b) { \
484 return __a + __b; } 484 return __a + __b; }
485 __ai float32x2_t vadd_f32(float32x2_t __a, float32x2_t __b) { \ 485 __ai float32x2_t vadd_f32(float32x2_t __a, float32x2_t __b) { \
(...skipping 25 matching lines...) Expand all
511 __ai uint64x2_t vaddq_u64(uint64x2_t __a, uint64x2_t __b) { \ 511 __ai uint64x2_t vaddq_u64(uint64x2_t __a, uint64x2_t __b) { \
512 return __a + __b; } 512 return __a + __b; }
513 513
514 __ai int8x8_t vaddhn_s16(int16x8_t __a, int16x8_t __b) { \ 514 __ai int8x8_t vaddhn_s16(int16x8_t __a, int16x8_t __b) { \
515 return (int8x8_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 0); } 515 return (int8x8_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 0); }
516 __ai int16x4_t vaddhn_s32(int32x4_t __a, int32x4_t __b) { \ 516 __ai int16x4_t vaddhn_s32(int32x4_t __a, int32x4_t __b) { \
517 return (int16x4_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 1); } 517 return (int16x4_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 1); }
518 __ai int32x2_t vaddhn_s64(int64x2_t __a, int64x2_t __b) { \ 518 __ai int32x2_t vaddhn_s64(int64x2_t __a, int64x2_t __b) { \
519 return (int32x2_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 2); } 519 return (int32x2_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 2); }
520 __ai uint8x8_t vaddhn_u16(uint16x8_t __a, uint16x8_t __b) { \ 520 __ai uint8x8_t vaddhn_u16(uint16x8_t __a, uint16x8_t __b) { \
521 return (uint8x8_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 8); } 521 return (uint8x8_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 16); }
522 __ai uint16x4_t vaddhn_u32(uint32x4_t __a, uint32x4_t __b) { \ 522 __ai uint16x4_t vaddhn_u32(uint32x4_t __a, uint32x4_t __b) { \
523 return (uint16x4_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 9); } 523 return (uint16x4_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 17) ; }
524 __ai uint32x2_t vaddhn_u64(uint64x2_t __a, uint64x2_t __b) { \ 524 __ai uint32x2_t vaddhn_u64(uint64x2_t __a, uint64x2_t __b) { \
525 return (uint32x2_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 10) ; } 525 return (uint32x2_t)__builtin_neon_vaddhn_v((int8x16_t)__a, (int8x16_t)__b, 18) ; }
526 526
527 __ai int16x8_t vaddl_s8(int8x8_t __a, int8x8_t __b) { \ 527 __ai int16x8_t vaddl_s8(int8x8_t __a, int8x8_t __b) { \
528 return vmovl_s8(__a) + vmovl_s8(__b); } 528 return vmovl_s8(__a) + vmovl_s8(__b); }
529 __ai int32x4_t vaddl_s16(int16x4_t __a, int16x4_t __b) { \ 529 __ai int32x4_t vaddl_s16(int16x4_t __a, int16x4_t __b) { \
530 return vmovl_s16(__a) + vmovl_s16(__b); } 530 return vmovl_s16(__a) + vmovl_s16(__b); }
531 __ai int64x2_t vaddl_s32(int32x2_t __a, int32x2_t __b) { \ 531 __ai int64x2_t vaddl_s32(int32x2_t __a, int32x2_t __b) { \
532 return vmovl_s32(__a) + vmovl_s32(__b); } 532 return vmovl_s32(__a) + vmovl_s32(__b); }
533 __ai uint16x8_t vaddl_u8(uint8x8_t __a, uint8x8_t __b) { \ 533 __ai uint16x8_t vaddl_u8(uint8x8_t __a, uint8x8_t __b) { \
534 return vmovl_u8(__a) + vmovl_u8(__b); } 534 return vmovl_u8(__a) + vmovl_u8(__b); }
535 __ai uint32x4_t vaddl_u16(uint16x4_t __a, uint16x4_t __b) { \ 535 __ai uint32x4_t vaddl_u16(uint16x4_t __a, uint16x4_t __b) { \
(...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after
655 __ai uint64x2_t vbslq_u64(uint64x2_t __a, uint64x2_t __b, uint64x2_t __c) { \ 655 __ai uint64x2_t vbslq_u64(uint64x2_t __a, uint64x2_t __b, uint64x2_t __c) { \
656 return (uint64x2_t)((__a & (uint64x2_t)__b) | (~__a & (uint64x2_t)__c)); } 656 return (uint64x2_t)((__a & (uint64x2_t)__b) | (~__a & (uint64x2_t)__c)); }
657 __ai float32x4_t vbslq_f32(uint32x4_t __a, float32x4_t __b, float32x4_t __c) { \ 657 __ai float32x4_t vbslq_f32(uint32x4_t __a, float32x4_t __b, float32x4_t __c) { \
658 return (float32x4_t)((__a & (uint32x4_t)__b) | (~__a & (uint32x4_t)__c)); } 658 return (float32x4_t)((__a & (uint32x4_t)__b) | (~__a & (uint32x4_t)__c)); }
659 __ai poly8x16_t vbslq_p8(uint8x16_t __a, poly8x16_t __b, poly8x16_t __c) { \ 659 __ai poly8x16_t vbslq_p8(uint8x16_t __a, poly8x16_t __b, poly8x16_t __c) { \
660 return (poly8x16_t)((__a & (uint8x16_t)__b) | (~__a & (uint8x16_t)__c)); } 660 return (poly8x16_t)((__a & (uint8x16_t)__b) | (~__a & (uint8x16_t)__c)); }
661 __ai poly16x8_t vbslq_p16(uint16x8_t __a, poly16x8_t __b, poly16x8_t __c) { \ 661 __ai poly16x8_t vbslq_p16(uint16x8_t __a, poly16x8_t __b, poly16x8_t __c) { \
662 return (poly16x8_t)((__a & (uint16x8_t)__b) | (~__a & (uint16x8_t)__c)); } 662 return (poly16x8_t)((__a & (uint16x8_t)__b) | (~__a & (uint16x8_t)__c)); }
663 663
664 __ai uint32x2_t vcage_f32(float32x2_t __a, float32x2_t __b) { \ 664 __ai uint32x2_t vcage_f32(float32x2_t __a, float32x2_t __b) { \
665 return (uint32x2_t)__builtin_neon_vcage_v((int8x8_t)__a, (int8x8_t)__b, 10); } 665 return (uint32x2_t)__builtin_neon_vcage_v((int8x8_t)__a, (int8x8_t)__b, 18); }
666 __ai uint32x4_t vcageq_f32(float32x4_t __a, float32x4_t __b) { \ 666 __ai uint32x4_t vcageq_f32(float32x4_t __a, float32x4_t __b) { \
667 return (uint32x4_t)__builtin_neon_vcageq_v((int8x16_t)__a, (int8x16_t)__b, 26) ; } 667 return (uint32x4_t)__builtin_neon_vcageq_v((int8x16_t)__a, (int8x16_t)__b, 50) ; }
668 668
669 __ai uint32x2_t vcagt_f32(float32x2_t __a, float32x2_t __b) { \ 669 __ai uint32x2_t vcagt_f32(float32x2_t __a, float32x2_t __b) { \
670 return (uint32x2_t)__builtin_neon_vcagt_v((int8x8_t)__a, (int8x8_t)__b, 10); } 670 return (uint32x2_t)__builtin_neon_vcagt_v((int8x8_t)__a, (int8x8_t)__b, 18); }
671 __ai uint32x4_t vcagtq_f32(float32x4_t __a, float32x4_t __b) { \ 671 __ai uint32x4_t vcagtq_f32(float32x4_t __a, float32x4_t __b) { \
672 return (uint32x4_t)__builtin_neon_vcagtq_v((int8x16_t)__a, (int8x16_t)__b, 26) ; } 672 return (uint32x4_t)__builtin_neon_vcagtq_v((int8x16_t)__a, (int8x16_t)__b, 50) ; }
673 673
674 __ai uint32x2_t vcale_f32(float32x2_t __a, float32x2_t __b) { \ 674 __ai uint32x2_t vcale_f32(float32x2_t __a, float32x2_t __b) { \
675 return (uint32x2_t)__builtin_neon_vcale_v((int8x8_t)__a, (int8x8_t)__b, 10); } 675 return (uint32x2_t)__builtin_neon_vcale_v((int8x8_t)__a, (int8x8_t)__b, 18); }
676 __ai uint32x4_t vcaleq_f32(float32x4_t __a, float32x4_t __b) { \ 676 __ai uint32x4_t vcaleq_f32(float32x4_t __a, float32x4_t __b) { \
677 return (uint32x4_t)__builtin_neon_vcaleq_v((int8x16_t)__a, (int8x16_t)__b, 26) ; } 677 return (uint32x4_t)__builtin_neon_vcaleq_v((int8x16_t)__a, (int8x16_t)__b, 50) ; }
678 678
679 __ai uint32x2_t vcalt_f32(float32x2_t __a, float32x2_t __b) { \ 679 __ai uint32x2_t vcalt_f32(float32x2_t __a, float32x2_t __b) { \
680 return (uint32x2_t)__builtin_neon_vcalt_v((int8x8_t)__a, (int8x8_t)__b, 10); } 680 return (uint32x2_t)__builtin_neon_vcalt_v((int8x8_t)__a, (int8x8_t)__b, 18); }
681 __ai uint32x4_t vcaltq_f32(float32x4_t __a, float32x4_t __b) { \ 681 __ai uint32x4_t vcaltq_f32(float32x4_t __a, float32x4_t __b) { \
682 return (uint32x4_t)__builtin_neon_vcaltq_v((int8x16_t)__a, (int8x16_t)__b, 26) ; } 682 return (uint32x4_t)__builtin_neon_vcaltq_v((int8x16_t)__a, (int8x16_t)__b, 50) ; }
683 683
684 __ai uint8x8_t vceq_s8(int8x8_t __a, int8x8_t __b) { \ 684 __ai uint8x8_t vceq_s8(int8x8_t __a, int8x8_t __b) { \
685 return (uint8x8_t)(__a == __b); } 685 return (uint8x8_t)(__a == __b); }
686 __ai uint16x4_t vceq_s16(int16x4_t __a, int16x4_t __b) { \ 686 __ai uint16x4_t vceq_s16(int16x4_t __a, int16x4_t __b) { \
687 return (uint16x4_t)(__a == __b); } 687 return (uint16x4_t)(__a == __b); }
688 __ai uint32x2_t vceq_s32(int32x2_t __a, int32x2_t __b) { \ 688 __ai uint32x2_t vceq_s32(int32x2_t __a, int32x2_t __b) { \
689 return (uint32x2_t)(__a == __b); } 689 return (uint32x2_t)(__a == __b); }
690 __ai uint32x2_t vceq_f32(float32x2_t __a, float32x2_t __b) { \ 690 __ai uint32x2_t vceq_f32(float32x2_t __a, float32x2_t __b) { \
691 return (uint32x2_t)(__a == __b); } 691 return (uint32x2_t)(__a == __b); }
692 __ai uint8x8_t vceq_u8(uint8x8_t __a, uint8x8_t __b) { \ 692 __ai uint8x8_t vceq_u8(uint8x8_t __a, uint8x8_t __b) { \
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after
801 __ai uint32x4_t vcleq_u32(uint32x4_t __a, uint32x4_t __b) { \ 801 __ai uint32x4_t vcleq_u32(uint32x4_t __a, uint32x4_t __b) { \
802 return (uint32x4_t)(__a <= __b); } 802 return (uint32x4_t)(__a <= __b); }
803 803
804 __ai int8x8_t vcls_s8(int8x8_t __a) { \ 804 __ai int8x8_t vcls_s8(int8x8_t __a) { \
805 return (int8x8_t)__builtin_neon_vcls_v(__a, 0); } 805 return (int8x8_t)__builtin_neon_vcls_v(__a, 0); }
806 __ai int16x4_t vcls_s16(int16x4_t __a) { \ 806 __ai int16x4_t vcls_s16(int16x4_t __a) { \
807 return (int16x4_t)__builtin_neon_vcls_v((int8x8_t)__a, 1); } 807 return (int16x4_t)__builtin_neon_vcls_v((int8x8_t)__a, 1); }
808 __ai int32x2_t vcls_s32(int32x2_t __a) { \ 808 __ai int32x2_t vcls_s32(int32x2_t __a) { \
809 return (int32x2_t)__builtin_neon_vcls_v((int8x8_t)__a, 2); } 809 return (int32x2_t)__builtin_neon_vcls_v((int8x8_t)__a, 2); }
810 __ai int8x16_t vclsq_s8(int8x16_t __a) { \ 810 __ai int8x16_t vclsq_s8(int8x16_t __a) { \
811 return (int8x16_t)__builtin_neon_vclsq_v(__a, 16); } 811 return (int8x16_t)__builtin_neon_vclsq_v(__a, 32); }
812 __ai int16x8_t vclsq_s16(int16x8_t __a) { \ 812 __ai int16x8_t vclsq_s16(int16x8_t __a) { \
813 return (int16x8_t)__builtin_neon_vclsq_v((int8x16_t)__a, 17); } 813 return (int16x8_t)__builtin_neon_vclsq_v((int8x16_t)__a, 33); }
814 __ai int32x4_t vclsq_s32(int32x4_t __a) { \ 814 __ai int32x4_t vclsq_s32(int32x4_t __a) { \
815 return (int32x4_t)__builtin_neon_vclsq_v((int8x16_t)__a, 18); } 815 return (int32x4_t)__builtin_neon_vclsq_v((int8x16_t)__a, 34); }
816 816
817 __ai uint8x8_t vclt_s8(int8x8_t __a, int8x8_t __b) { \ 817 __ai uint8x8_t vclt_s8(int8x8_t __a, int8x8_t __b) { \
818 return (uint8x8_t)(__a < __b); } 818 return (uint8x8_t)(__a < __b); }
819 __ai uint16x4_t vclt_s16(int16x4_t __a, int16x4_t __b) { \ 819 __ai uint16x4_t vclt_s16(int16x4_t __a, int16x4_t __b) { \
820 return (uint16x4_t)(__a < __b); } 820 return (uint16x4_t)(__a < __b); }
821 __ai uint32x2_t vclt_s32(int32x2_t __a, int32x2_t __b) { \ 821 __ai uint32x2_t vclt_s32(int32x2_t __a, int32x2_t __b) { \
822 return (uint32x2_t)(__a < __b); } 822 return (uint32x2_t)(__a < __b); }
823 __ai uint32x2_t vclt_f32(float32x2_t __a, float32x2_t __b) { \ 823 __ai uint32x2_t vclt_f32(float32x2_t __a, float32x2_t __b) { \
824 return (uint32x2_t)(__a < __b); } 824 return (uint32x2_t)(__a < __b); }
825 __ai uint8x8_t vclt_u8(uint8x8_t __a, uint8x8_t __b) { \ 825 __ai uint8x8_t vclt_u8(uint8x8_t __a, uint8x8_t __b) { \
(...skipping 17 matching lines...) Expand all
843 __ai uint32x4_t vcltq_u32(uint32x4_t __a, uint32x4_t __b) { \ 843 __ai uint32x4_t vcltq_u32(uint32x4_t __a, uint32x4_t __b) { \
844 return (uint32x4_t)(__a < __b); } 844 return (uint32x4_t)(__a < __b); }
845 845
846 __ai int8x8_t vclz_s8(int8x8_t __a) { \ 846 __ai int8x8_t vclz_s8(int8x8_t __a) { \
847 return (int8x8_t)__builtin_neon_vclz_v(__a, 0); } 847 return (int8x8_t)__builtin_neon_vclz_v(__a, 0); }
848 __ai int16x4_t vclz_s16(int16x4_t __a) { \ 848 __ai int16x4_t vclz_s16(int16x4_t __a) { \
849 return (int16x4_t)__builtin_neon_vclz_v((int8x8_t)__a, 1); } 849 return (int16x4_t)__builtin_neon_vclz_v((int8x8_t)__a, 1); }
850 __ai int32x2_t vclz_s32(int32x2_t __a) { \ 850 __ai int32x2_t vclz_s32(int32x2_t __a) { \
851 return (int32x2_t)__builtin_neon_vclz_v((int8x8_t)__a, 2); } 851 return (int32x2_t)__builtin_neon_vclz_v((int8x8_t)__a, 2); }
852 __ai uint8x8_t vclz_u8(uint8x8_t __a) { \ 852 __ai uint8x8_t vclz_u8(uint8x8_t __a) { \
853 return (uint8x8_t)__builtin_neon_vclz_v((int8x8_t)__a, 8); } 853 return (uint8x8_t)__builtin_neon_vclz_v((int8x8_t)__a, 16); }
854 __ai uint16x4_t vclz_u16(uint16x4_t __a) { \ 854 __ai uint16x4_t vclz_u16(uint16x4_t __a) { \
855 return (uint16x4_t)__builtin_neon_vclz_v((int8x8_t)__a, 9); } 855 return (uint16x4_t)__builtin_neon_vclz_v((int8x8_t)__a, 17); }
856 __ai uint32x2_t vclz_u32(uint32x2_t __a) { \ 856 __ai uint32x2_t vclz_u32(uint32x2_t __a) { \
857 return (uint32x2_t)__builtin_neon_vclz_v((int8x8_t)__a, 10); } 857 return (uint32x2_t)__builtin_neon_vclz_v((int8x8_t)__a, 18); }
858 __ai int8x16_t vclzq_s8(int8x16_t __a) { \ 858 __ai int8x16_t vclzq_s8(int8x16_t __a) { \
859 return (int8x16_t)__builtin_neon_vclzq_v(__a, 16); } 859 return (int8x16_t)__builtin_neon_vclzq_v(__a, 32); }
860 __ai int16x8_t vclzq_s16(int16x8_t __a) { \ 860 __ai int16x8_t vclzq_s16(int16x8_t __a) { \
861 return (int16x8_t)__builtin_neon_vclzq_v((int8x16_t)__a, 17); } 861 return (int16x8_t)__builtin_neon_vclzq_v((int8x16_t)__a, 33); }
862 __ai int32x4_t vclzq_s32(int32x4_t __a) { \ 862 __ai int32x4_t vclzq_s32(int32x4_t __a) { \
863 return (int32x4_t)__builtin_neon_vclzq_v((int8x16_t)__a, 18); } 863 return (int32x4_t)__builtin_neon_vclzq_v((int8x16_t)__a, 34); }
864 __ai uint8x16_t vclzq_u8(uint8x16_t __a) { \ 864 __ai uint8x16_t vclzq_u8(uint8x16_t __a) { \
865 return (uint8x16_t)__builtin_neon_vclzq_v((int8x16_t)__a, 24); } 865 return (uint8x16_t)__builtin_neon_vclzq_v((int8x16_t)__a, 48); }
866 __ai uint16x8_t vclzq_u16(uint16x8_t __a) { \ 866 __ai uint16x8_t vclzq_u16(uint16x8_t __a) { \
867 return (uint16x8_t)__builtin_neon_vclzq_v((int8x16_t)__a, 25); } 867 return (uint16x8_t)__builtin_neon_vclzq_v((int8x16_t)__a, 49); }
868 __ai uint32x4_t vclzq_u32(uint32x4_t __a) { \ 868 __ai uint32x4_t vclzq_u32(uint32x4_t __a) { \
869 return (uint32x4_t)__builtin_neon_vclzq_v((int8x16_t)__a, 26); } 869 return (uint32x4_t)__builtin_neon_vclzq_v((int8x16_t)__a, 50); }
870 870
871 __ai uint8x8_t vcnt_u8(uint8x8_t __a) { \ 871 __ai uint8x8_t vcnt_u8(uint8x8_t __a) { \
872 return (uint8x8_t)__builtin_neon_vcnt_v((int8x8_t)__a, 8); } 872 return (uint8x8_t)__builtin_neon_vcnt_v((int8x8_t)__a, 16); }
873 __ai int8x8_t vcnt_s8(int8x8_t __a) { \ 873 __ai int8x8_t vcnt_s8(int8x8_t __a) { \
874 return (int8x8_t)__builtin_neon_vcnt_v(__a, 0); } 874 return (int8x8_t)__builtin_neon_vcnt_v(__a, 0); }
875 __ai poly8x8_t vcnt_p8(poly8x8_t __a) { \ 875 __ai poly8x8_t vcnt_p8(poly8x8_t __a) { \
876 return (poly8x8_t)__builtin_neon_vcnt_v((int8x8_t)__a, 5); } 876 return (poly8x8_t)__builtin_neon_vcnt_v((int8x8_t)__a, 4); }
877 __ai uint8x16_t vcntq_u8(uint8x16_t __a) { \ 877 __ai uint8x16_t vcntq_u8(uint8x16_t __a) { \
878 return (uint8x16_t)__builtin_neon_vcntq_v((int8x16_t)__a, 24); } 878 return (uint8x16_t)__builtin_neon_vcntq_v((int8x16_t)__a, 48); }
879 __ai int8x16_t vcntq_s8(int8x16_t __a) { \ 879 __ai int8x16_t vcntq_s8(int8x16_t __a) { \
880 return (int8x16_t)__builtin_neon_vcntq_v(__a, 16); } 880 return (int8x16_t)__builtin_neon_vcntq_v(__a, 32); }
881 __ai poly8x16_t vcntq_p8(poly8x16_t __a) { \ 881 __ai poly8x16_t vcntq_p8(poly8x16_t __a) { \
882 return (poly8x16_t)__builtin_neon_vcntq_v((int8x16_t)__a, 21); } 882 return (poly8x16_t)__builtin_neon_vcntq_v((int8x16_t)__a, 36); }
883 883
884 __ai int8x16_t vcombine_s8(int8x8_t __a, int8x8_t __b) { \ 884 __ai int8x16_t vcombine_s8(int8x8_t __a, int8x8_t __b) { \
885 return (int8x16_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1 ); } 885 return (int8x16_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1 ); }
886 __ai int16x8_t vcombine_s16(int16x4_t __a, int16x4_t __b) { \ 886 __ai int16x8_t vcombine_s16(int16x4_t __a, int16x4_t __b) { \
887 return (int16x8_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1 ); } 887 return (int16x8_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1 ); }
888 __ai int32x4_t vcombine_s32(int32x2_t __a, int32x2_t __b) { \ 888 __ai int32x4_t vcombine_s32(int32x2_t __a, int32x2_t __b) { \
889 return (int32x4_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1 ); } 889 return (int32x4_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1 ); }
890 __ai int64x2_t vcombine_s64(int64x1_t __a, int64x1_t __b) { \ 890 __ai int64x2_t vcombine_s64(int64x1_t __a, int64x1_t __b) { \
891 return (int64x2_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1 ); } 891 return (int64x2_t)__builtin_shufflevector((int64x1_t)__a, (int64x1_t)__b, 0, 1 ); }
892 __ai float16x8_t vcombine_f16(float16x4_t __a, float16x4_t __b) { \ 892 __ai float16x8_t vcombine_f16(float16x4_t __a, float16x4_t __b) { \
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
925 __ai uint64x1_t vcreate_u64(uint64_t __a) { \ 925 __ai uint64x1_t vcreate_u64(uint64_t __a) { \
926 return (uint64x1_t)__a; } 926 return (uint64x1_t)__a; }
927 __ai poly8x8_t vcreate_p8(uint64_t __a) { \ 927 __ai poly8x8_t vcreate_p8(uint64_t __a) { \
928 return (poly8x8_t)__a; } 928 return (poly8x8_t)__a; }
929 __ai poly16x4_t vcreate_p16(uint64_t __a) { \ 929 __ai poly16x4_t vcreate_p16(uint64_t __a) { \
930 return (poly16x4_t)__a; } 930 return (poly16x4_t)__a; }
931 __ai int64x1_t vcreate_s64(uint64_t __a) { \ 931 __ai int64x1_t vcreate_s64(uint64_t __a) { \
932 return (int64x1_t)__a; } 932 return (int64x1_t)__a; }
933 933
934 __ai float16x4_t vcvt_f16_f32(float32x4_t __a) { \ 934 __ai float16x4_t vcvt_f16_f32(float32x4_t __a) { \
935 return (float16x4_t)__builtin_neon_vcvt_f16_v((int8x16_t)__a, 7); } 935 return (float16x4_t)__builtin_neon_vcvt_f16_v((int8x16_t)__a, 6); }
936 936
937 __ai float32x2_t vcvt_f32_s32(int32x2_t __a) { \ 937 __ai float32x2_t vcvt_f32_s32(int32x2_t __a) { \
938 return (float32x2_t)__builtin_neon_vcvt_f32_v((int8x8_t)__a, 2); } 938 return (float32x2_t)__builtin_neon_vcvt_f32_v((int8x8_t)__a, 2); }
939 __ai float32x2_t vcvt_f32_u32(uint32x2_t __a) { \ 939 __ai float32x2_t vcvt_f32_u32(uint32x2_t __a) { \
940 return (float32x2_t)__builtin_neon_vcvt_f32_v((int8x8_t)__a, 10); } 940 return (float32x2_t)__builtin_neon_vcvt_f32_v((int8x8_t)__a, 18); }
941 __ai float32x4_t vcvtq_f32_s32(int32x4_t __a) { \ 941 __ai float32x4_t vcvtq_f32_s32(int32x4_t __a) { \
942 return (float32x4_t)__builtin_neon_vcvtq_f32_v((int8x16_t)__a, 18); } 942 return (float32x4_t)__builtin_neon_vcvtq_f32_v((int8x16_t)__a, 34); }
943 __ai float32x4_t vcvtq_f32_u32(uint32x4_t __a) { \ 943 __ai float32x4_t vcvtq_f32_u32(uint32x4_t __a) { \
944 return (float32x4_t)__builtin_neon_vcvtq_f32_v((int8x16_t)__a, 26); } 944 return (float32x4_t)__builtin_neon_vcvtq_f32_v((int8x16_t)__a, 50); }
945 945
946 __ai float32x4_t vcvt_f32_f16(float16x4_t __a) { \ 946 __ai float32x4_t vcvt_f32_f16(float16x4_t __a) { \
947 return (float32x4_t)__builtin_neon_vcvt_f32_f16((int8x8_t)__a, 7); } 947 return (float32x4_t)__builtin_neon_vcvt_f32_f16((int8x8_t)__a, 6); }
948 948
949 #define vcvt_n_f32_s32(a, __b) __extension__ ({ \ 949 #define vcvt_n_f32_s32(a, __b) __extension__ ({ \
950 int32x2_t __a = (a); \ 950 int32x2_t __a = (a); \
951 (float32x2_t)__builtin_neon_vcvt_n_f32_v((int8x8_t)__a, __b, 2); }) 951 (float32x2_t)__builtin_neon_vcvt_n_f32_v((int8x8_t)__a, __b, 2); })
952 #define vcvt_n_f32_u32(a, __b) __extension__ ({ \ 952 #define vcvt_n_f32_u32(a, __b) __extension__ ({ \
953 uint32x2_t __a = (a); \ 953 uint32x2_t __a = (a); \
954 (float32x2_t)__builtin_neon_vcvt_n_f32_v((int8x8_t)__a, __b, 10); }) 954 (float32x2_t)__builtin_neon_vcvt_n_f32_v((int8x8_t)__a, __b, 18); })
955 #define vcvtq_n_f32_s32(a, __b) __extension__ ({ \ 955 #define vcvtq_n_f32_s32(a, __b) __extension__ ({ \
956 int32x4_t __a = (a); \ 956 int32x4_t __a = (a); \
957 (float32x4_t)__builtin_neon_vcvtq_n_f32_v((int8x16_t)__a, __b, 18); }) 957 (float32x4_t)__builtin_neon_vcvtq_n_f32_v((int8x16_t)__a, __b, 34); })
958 #define vcvtq_n_f32_u32(a, __b) __extension__ ({ \ 958 #define vcvtq_n_f32_u32(a, __b) __extension__ ({ \
959 uint32x4_t __a = (a); \ 959 uint32x4_t __a = (a); \
960 (float32x4_t)__builtin_neon_vcvtq_n_f32_v((int8x16_t)__a, __b, 26); }) 960 (float32x4_t)__builtin_neon_vcvtq_n_f32_v((int8x16_t)__a, __b, 50); })
961 961
962 #define vcvt_n_s32_f32(a, __b) __extension__ ({ \ 962 #define vcvt_n_s32_f32(a, __b) __extension__ ({ \
963 float32x2_t __a = (a); \ 963 float32x2_t __a = (a); \
964 (int32x2_t)__builtin_neon_vcvt_n_s32_v((int8x8_t)__a, __b, 2); }) 964 (int32x2_t)__builtin_neon_vcvt_n_s32_v((int8x8_t)__a, __b, 2); })
965 #define vcvtq_n_s32_f32(a, __b) __extension__ ({ \ 965 #define vcvtq_n_s32_f32(a, __b) __extension__ ({ \
966 float32x4_t __a = (a); \ 966 float32x4_t __a = (a); \
967 (int32x4_t)__builtin_neon_vcvtq_n_s32_v((int8x16_t)__a, __b, 18); }) 967 (int32x4_t)__builtin_neon_vcvtq_n_s32_v((int8x16_t)__a, __b, 34); })
968 968
969 #define vcvt_n_u32_f32(a, __b) __extension__ ({ \ 969 #define vcvt_n_u32_f32(a, __b) __extension__ ({ \
970 float32x2_t __a = (a); \ 970 float32x2_t __a = (a); \
971 (uint32x2_t)__builtin_neon_vcvt_n_u32_v((int8x8_t)__a, __b, 10); }) 971 (uint32x2_t)__builtin_neon_vcvt_n_u32_v((int8x8_t)__a, __b, 18); })
972 #define vcvtq_n_u32_f32(a, __b) __extension__ ({ \ 972 #define vcvtq_n_u32_f32(a, __b) __extension__ ({ \
973 float32x4_t __a = (a); \ 973 float32x4_t __a = (a); \
974 (uint32x4_t)__builtin_neon_vcvtq_n_u32_v((int8x16_t)__a, __b, 26); }) 974 (uint32x4_t)__builtin_neon_vcvtq_n_u32_v((int8x16_t)__a, __b, 50); })
975 975
976 __ai int32x2_t vcvt_s32_f32(float32x2_t __a) { \ 976 __ai int32x2_t vcvt_s32_f32(float32x2_t __a) { \
977 return (int32x2_t)__builtin_neon_vcvt_s32_v((int8x8_t)__a, 2); } 977 return (int32x2_t)__builtin_neon_vcvt_s32_v((int8x8_t)__a, 2); }
978 __ai int32x4_t vcvtq_s32_f32(float32x4_t __a) { \ 978 __ai int32x4_t vcvtq_s32_f32(float32x4_t __a) { \
979 return (int32x4_t)__builtin_neon_vcvtq_s32_v((int8x16_t)__a, 18); } 979 return (int32x4_t)__builtin_neon_vcvtq_s32_v((int8x16_t)__a, 34); }
980 980
981 __ai uint32x2_t vcvt_u32_f32(float32x2_t __a) { \ 981 __ai uint32x2_t vcvt_u32_f32(float32x2_t __a) { \
982 return (uint32x2_t)__builtin_neon_vcvt_u32_v((int8x8_t)__a, 10); } 982 return (uint32x2_t)__builtin_neon_vcvt_u32_v((int8x8_t)__a, 18); }
983 __ai uint32x4_t vcvtq_u32_f32(float32x4_t __a) { \ 983 __ai uint32x4_t vcvtq_u32_f32(float32x4_t __a) { \
984 return (uint32x4_t)__builtin_neon_vcvtq_u32_v((int8x16_t)__a, 26); } 984 return (uint32x4_t)__builtin_neon_vcvtq_u32_v((int8x16_t)__a, 50); }
985 985
986 #define vdup_lane_u8(a, __b) __extension__ ({ \ 986 #define vdup_lane_u8(a, __b) __extension__ ({ \
987 uint8x8_t __a = (a); \ 987 uint8x8_t __a = (a); \
988 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); }) 988 __builtin_shufflevector(__a, __a, __b, __b, __b, __b, __b, __b, __b, __b); })
989 #define vdup_lane_u16(a, __b) __extension__ ({ \ 989 #define vdup_lane_u16(a, __b) __extension__ ({ \
990 uint16x4_t __a = (a); \ 990 uint16x4_t __a = (a); \
991 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); }) 991 __builtin_shufflevector(__a, __a, __b, __b, __b, __b); })
992 #define vdup_lane_u32(a, __b) __extension__ ({ \ 992 #define vdup_lane_u32(a, __b) __extension__ ({ \
993 uint32x2_t __a = (a); \ 993 uint32x2_t __a = (a); \
994 __builtin_shufflevector(__a, __a, __b, __b); }) 994 __builtin_shufflevector(__a, __a, __b, __b); })
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after
1126 __ai uint32x4_t veorq_u32(uint32x4_t __a, uint32x4_t __b) { \ 1126 __ai uint32x4_t veorq_u32(uint32x4_t __a, uint32x4_t __b) { \
1127 return __a ^ __b; } 1127 return __a ^ __b; }
1128 __ai uint64x2_t veorq_u64(uint64x2_t __a, uint64x2_t __b) { \ 1128 __ai uint64x2_t veorq_u64(uint64x2_t __a, uint64x2_t __b) { \
1129 return __a ^ __b; } 1129 return __a ^ __b; }
1130 1130
1131 #define vext_s8(a, b, __c) __extension__ ({ \ 1131 #define vext_s8(a, b, __c) __extension__ ({ \
1132 int8x8_t __a = (a); int8x8_t __b = (b); \ 1132 int8x8_t __a = (a); int8x8_t __b = (b); \
1133 (int8x8_t)__builtin_neon_vext_v(__a, __b, __c, 0); }) 1133 (int8x8_t)__builtin_neon_vext_v(__a, __b, __c, 0); })
1134 #define vext_u8(a, b, __c) __extension__ ({ \ 1134 #define vext_u8(a, b, __c) __extension__ ({ \
1135 uint8x8_t __a = (a); uint8x8_t __b = (b); \ 1135 uint8x8_t __a = (a); uint8x8_t __b = (b); \
1136 (uint8x8_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 8); }) 1136 (uint8x8_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 16); })
1137 #define vext_p8(a, b, __c) __extension__ ({ \ 1137 #define vext_p8(a, b, __c) __extension__ ({ \
1138 poly8x8_t __a = (a); poly8x8_t __b = (b); \ 1138 poly8x8_t __a = (a); poly8x8_t __b = (b); \
1139 (poly8x8_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 5); }) 1139 (poly8x8_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 4); })
1140 #define vext_s16(a, b, __c) __extension__ ({ \ 1140 #define vext_s16(a, b, __c) __extension__ ({ \
1141 int16x4_t __a = (a); int16x4_t __b = (b); \ 1141 int16x4_t __a = (a); int16x4_t __b = (b); \
1142 (int16x4_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); }) 1142 (int16x4_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); })
1143 #define vext_u16(a, b, __c) __extension__ ({ \ 1143 #define vext_u16(a, b, __c) __extension__ ({ \
1144 uint16x4_t __a = (a); uint16x4_t __b = (b); \ 1144 uint16x4_t __a = (a); uint16x4_t __b = (b); \
1145 (uint16x4_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 9); }) 1145 (uint16x4_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 17); })
1146 #define vext_p16(a, b, __c) __extension__ ({ \ 1146 #define vext_p16(a, b, __c) __extension__ ({ \
1147 poly16x4_t __a = (a); poly16x4_t __b = (b); \ 1147 poly16x4_t __a = (a); poly16x4_t __b = (b); \
1148 (poly16x4_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 6); }) 1148 (poly16x4_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 5); })
1149 #define vext_s32(a, b, __c) __extension__ ({ \ 1149 #define vext_s32(a, b, __c) __extension__ ({ \
1150 int32x2_t __a = (a); int32x2_t __b = (b); \ 1150 int32x2_t __a = (a); int32x2_t __b = (b); \
1151 (int32x2_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); }) 1151 (int32x2_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); })
1152 #define vext_u32(a, b, __c) __extension__ ({ \ 1152 #define vext_u32(a, b, __c) __extension__ ({ \
1153 uint32x2_t __a = (a); uint32x2_t __b = (b); \ 1153 uint32x2_t __a = (a); uint32x2_t __b = (b); \
1154 (uint32x2_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 10); }) 1154 (uint32x2_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 18); })
1155 #define vext_s64(a, b, __c) __extension__ ({ \ 1155 #define vext_s64(a, b, __c) __extension__ ({ \
1156 int64x1_t __a = (a); int64x1_t __b = (b); \ 1156 int64x1_t __a = (a); int64x1_t __b = (b); \
1157 (int64x1_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); }) 1157 (int64x1_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); })
1158 #define vext_u64(a, b, __c) __extension__ ({ \ 1158 #define vext_u64(a, b, __c) __extension__ ({ \
1159 uint64x1_t __a = (a); uint64x1_t __b = (b); \ 1159 uint64x1_t __a = (a); uint64x1_t __b = (b); \
1160 (uint64x1_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 11); }) 1160 (uint64x1_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 19); })
1161 #define vext_f32(a, b, __c) __extension__ ({ \ 1161 #define vext_f32(a, b, __c) __extension__ ({ \
1162 float32x2_t __a = (a); float32x2_t __b = (b); \ 1162 float32x2_t __a = (a); float32x2_t __b = (b); \
1163 (float32x2_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 4); }) 1163 (float32x2_t)__builtin_neon_vext_v((int8x8_t)__a, (int8x8_t)__b, __c, 7); })
1164 #define vextq_s8(a, b, __c) __extension__ ({ \ 1164 #define vextq_s8(a, b, __c) __extension__ ({ \
1165 int8x16_t __a = (a); int8x16_t __b = (b); \ 1165 int8x16_t __a = (a); int8x16_t __b = (b); \
1166 (int8x16_t)__builtin_neon_vextq_v(__a, __b, __c, 16); }) 1166 (int8x16_t)__builtin_neon_vextq_v(__a, __b, __c, 32); })
1167 #define vextq_u8(a, b, __c) __extension__ ({ \ 1167 #define vextq_u8(a, b, __c) __extension__ ({ \
1168 uint8x16_t __a = (a); uint8x16_t __b = (b); \ 1168 uint8x16_t __a = (a); uint8x16_t __b = (b); \
1169 (uint8x16_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 24); } ) 1169 (uint8x16_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 48); } )
1170 #define vextq_p8(a, b, __c) __extension__ ({ \ 1170 #define vextq_p8(a, b, __c) __extension__ ({ \
1171 poly8x16_t __a = (a); poly8x16_t __b = (b); \ 1171 poly8x16_t __a = (a); poly8x16_t __b = (b); \
1172 (poly8x16_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 21); } ) 1172 (poly8x16_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 36); } )
1173 #define vextq_s16(a, b, __c) __extension__ ({ \ 1173 #define vextq_s16(a, b, __c) __extension__ ({ \
1174 int16x8_t __a = (a); int16x8_t __b = (b); \ 1174 int16x8_t __a = (a); int16x8_t __b = (b); \
1175 (int16x8_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 17); }) 1175 (int16x8_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 33); })
1176 #define vextq_u16(a, b, __c) __extension__ ({ \ 1176 #define vextq_u16(a, b, __c) __extension__ ({ \
1177 uint16x8_t __a = (a); uint16x8_t __b = (b); \ 1177 uint16x8_t __a = (a); uint16x8_t __b = (b); \
1178 (uint16x8_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 25); } ) 1178 (uint16x8_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 49); } )
1179 #define vextq_p16(a, b, __c) __extension__ ({ \ 1179 #define vextq_p16(a, b, __c) __extension__ ({ \
1180 poly16x8_t __a = (a); poly16x8_t __b = (b); \ 1180 poly16x8_t __a = (a); poly16x8_t __b = (b); \
1181 (poly16x8_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 22); } ) 1181 (poly16x8_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 37); } )
1182 #define vextq_s32(a, b, __c) __extension__ ({ \ 1182 #define vextq_s32(a, b, __c) __extension__ ({ \
1183 int32x4_t __a = (a); int32x4_t __b = (b); \ 1183 int32x4_t __a = (a); int32x4_t __b = (b); \
1184 (int32x4_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 18); }) 1184 (int32x4_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 34); })
1185 #define vextq_u32(a, b, __c) __extension__ ({ \ 1185 #define vextq_u32(a, b, __c) __extension__ ({ \
1186 uint32x4_t __a = (a); uint32x4_t __b = (b); \ 1186 uint32x4_t __a = (a); uint32x4_t __b = (b); \
1187 (uint32x4_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 26); } ) 1187 (uint32x4_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 50); } )
1188 #define vextq_s64(a, b, __c) __extension__ ({ \ 1188 #define vextq_s64(a, b, __c) __extension__ ({ \
1189 int64x2_t __a = (a); int64x2_t __b = (b); \ 1189 int64x2_t __a = (a); int64x2_t __b = (b); \
1190 (int64x2_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 19); }) 1190 (int64x2_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 35); })
1191 #define vextq_u64(a, b, __c) __extension__ ({ \ 1191 #define vextq_u64(a, b, __c) __extension__ ({ \
1192 uint64x2_t __a = (a); uint64x2_t __b = (b); \ 1192 uint64x2_t __a = (a); uint64x2_t __b = (b); \
1193 (uint64x2_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 27); } ) 1193 (uint64x2_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 51); } )
1194 #define vextq_f32(a, b, __c) __extension__ ({ \ 1194 #define vextq_f32(a, b, __c) __extension__ ({ \
1195 float32x4_t __a = (a); float32x4_t __b = (b); \ 1195 float32x4_t __a = (a); float32x4_t __b = (b); \
1196 (float32x4_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 20); }) 1196 (float32x4_t)__builtin_neon_vextq_v((int8x16_t)__a, (int8x16_t)__b, __c, 39); })
1197 1197
1198 __ai int8x8_t vget_high_s8(int8x16_t __a) { \ 1198 __ai int8x8_t vget_high_s8(int8x16_t __a) { \
1199 return (int8x8_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1); } 1199 return (int8x8_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1); }
1200 __ai int16x4_t vget_high_s16(int16x8_t __a) { \ 1200 __ai int16x4_t vget_high_s16(int16x8_t __a) { \
1201 return (int16x4_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1); } 1201 return (int16x4_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1); }
1202 __ai int32x2_t vget_high_s32(int32x4_t __a) { \ 1202 __ai int32x2_t vget_high_s32(int32x4_t __a) { \
1203 return (int32x2_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1); } 1203 return (int32x2_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1); }
1204 __ai int64x1_t vget_high_s64(int64x2_t __a) { \ 1204 __ai int64x1_t vget_high_s64(int64x2_t __a) { \
1205 return (int64x1_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1); } 1205 return (int64x1_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1); }
1206 __ai float16x4_t vget_high_f16(float16x8_t __a) { \ 1206 __ai float16x4_t vget_high_f16(float16x8_t __a) { \
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after
1312 __ai poly16x4_t vget_low_p16(poly16x8_t __a) { \ 1312 __ai poly16x4_t vget_low_p16(poly16x8_t __a) { \
1313 return (poly16x4_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0); } 1313 return (poly16x4_t)__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0); }
1314 1314
1315 __ai int8x8_t vhadd_s8(int8x8_t __a, int8x8_t __b) { \ 1315 __ai int8x8_t vhadd_s8(int8x8_t __a, int8x8_t __b) { \
1316 return (int8x8_t)__builtin_neon_vhadd_v(__a, __b, 0); } 1316 return (int8x8_t)__builtin_neon_vhadd_v(__a, __b, 0); }
1317 __ai int16x4_t vhadd_s16(int16x4_t __a, int16x4_t __b) { \ 1317 __ai int16x4_t vhadd_s16(int16x4_t __a, int16x4_t __b) { \
1318 return (int16x4_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 1); } 1318 return (int16x4_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 1); }
1319 __ai int32x2_t vhadd_s32(int32x2_t __a, int32x2_t __b) { \ 1319 __ai int32x2_t vhadd_s32(int32x2_t __a, int32x2_t __b) { \
1320 return (int32x2_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 2); } 1320 return (int32x2_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 2); }
1321 __ai uint8x8_t vhadd_u8(uint8x8_t __a, uint8x8_t __b) { \ 1321 __ai uint8x8_t vhadd_u8(uint8x8_t __a, uint8x8_t __b) { \
1322 return (uint8x8_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 8); } 1322 return (uint8x8_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 16); }
1323 __ai uint16x4_t vhadd_u16(uint16x4_t __a, uint16x4_t __b) { \ 1323 __ai uint16x4_t vhadd_u16(uint16x4_t __a, uint16x4_t __b) { \
1324 return (uint16x4_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 9); } 1324 return (uint16x4_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 17); }
1325 __ai uint32x2_t vhadd_u32(uint32x2_t __a, uint32x2_t __b) { \ 1325 __ai uint32x2_t vhadd_u32(uint32x2_t __a, uint32x2_t __b) { \
1326 return (uint32x2_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 10); } 1326 return (uint32x2_t)__builtin_neon_vhadd_v((int8x8_t)__a, (int8x8_t)__b, 18); }
1327 __ai int8x16_t vhaddq_s8(int8x16_t __a, int8x16_t __b) { \ 1327 __ai int8x16_t vhaddq_s8(int8x16_t __a, int8x16_t __b) { \
1328 return (int8x16_t)__builtin_neon_vhaddq_v(__a, __b, 16); } 1328 return (int8x16_t)__builtin_neon_vhaddq_v(__a, __b, 32); }
1329 __ai int16x8_t vhaddq_s16(int16x8_t __a, int16x8_t __b) { \ 1329 __ai int16x8_t vhaddq_s16(int16x8_t __a, int16x8_t __b) { \
1330 return (int16x8_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 17); } 1330 return (int16x8_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
1331 __ai int32x4_t vhaddq_s32(int32x4_t __a, int32x4_t __b) { \ 1331 __ai int32x4_t vhaddq_s32(int32x4_t __a, int32x4_t __b) { \
1332 return (int32x4_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 18); } 1332 return (int32x4_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
1333 __ai uint8x16_t vhaddq_u8(uint8x16_t __a, uint8x16_t __b) { \ 1333 __ai uint8x16_t vhaddq_u8(uint8x16_t __a, uint8x16_t __b) { \
1334 return (uint8x16_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 24) ; } 1334 return (uint8x16_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 48) ; }
1335 __ai uint16x8_t vhaddq_u16(uint16x8_t __a, uint16x8_t __b) { \ 1335 __ai uint16x8_t vhaddq_u16(uint16x8_t __a, uint16x8_t __b) { \
1336 return (uint16x8_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 25) ; } 1336 return (uint16x8_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 49) ; }
1337 __ai uint32x4_t vhaddq_u32(uint32x4_t __a, uint32x4_t __b) { \ 1337 __ai uint32x4_t vhaddq_u32(uint32x4_t __a, uint32x4_t __b) { \
1338 return (uint32x4_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 26) ; } 1338 return (uint32x4_t)__builtin_neon_vhaddq_v((int8x16_t)__a, (int8x16_t)__b, 50) ; }
1339 1339
1340 __ai int8x8_t vhsub_s8(int8x8_t __a, int8x8_t __b) { \ 1340 __ai int8x8_t vhsub_s8(int8x8_t __a, int8x8_t __b) { \
1341 return (int8x8_t)__builtin_neon_vhsub_v(__a, __b, 0); } 1341 return (int8x8_t)__builtin_neon_vhsub_v(__a, __b, 0); }
1342 __ai int16x4_t vhsub_s16(int16x4_t __a, int16x4_t __b) { \ 1342 __ai int16x4_t vhsub_s16(int16x4_t __a, int16x4_t __b) { \
1343 return (int16x4_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 1); } 1343 return (int16x4_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 1); }
1344 __ai int32x2_t vhsub_s32(int32x2_t __a, int32x2_t __b) { \ 1344 __ai int32x2_t vhsub_s32(int32x2_t __a, int32x2_t __b) { \
1345 return (int32x2_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 2); } 1345 return (int32x2_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 2); }
1346 __ai uint8x8_t vhsub_u8(uint8x8_t __a, uint8x8_t __b) { \ 1346 __ai uint8x8_t vhsub_u8(uint8x8_t __a, uint8x8_t __b) { \
1347 return (uint8x8_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 8); } 1347 return (uint8x8_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 16); }
1348 __ai uint16x4_t vhsub_u16(uint16x4_t __a, uint16x4_t __b) { \ 1348 __ai uint16x4_t vhsub_u16(uint16x4_t __a, uint16x4_t __b) { \
1349 return (uint16x4_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 9); } 1349 return (uint16x4_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 17); }
1350 __ai uint32x2_t vhsub_u32(uint32x2_t __a, uint32x2_t __b) { \ 1350 __ai uint32x2_t vhsub_u32(uint32x2_t __a, uint32x2_t __b) { \
1351 return (uint32x2_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 10); } 1351 return (uint32x2_t)__builtin_neon_vhsub_v((int8x8_t)__a, (int8x8_t)__b, 18); }
1352 __ai int8x16_t vhsubq_s8(int8x16_t __a, int8x16_t __b) { \ 1352 __ai int8x16_t vhsubq_s8(int8x16_t __a, int8x16_t __b) { \
1353 return (int8x16_t)__builtin_neon_vhsubq_v(__a, __b, 16); } 1353 return (int8x16_t)__builtin_neon_vhsubq_v(__a, __b, 32); }
1354 __ai int16x8_t vhsubq_s16(int16x8_t __a, int16x8_t __b) { \ 1354 __ai int16x8_t vhsubq_s16(int16x8_t __a, int16x8_t __b) { \
1355 return (int16x8_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 17); } 1355 return (int16x8_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
1356 __ai int32x4_t vhsubq_s32(int32x4_t __a, int32x4_t __b) { \ 1356 __ai int32x4_t vhsubq_s32(int32x4_t __a, int32x4_t __b) { \
1357 return (int32x4_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 18); } 1357 return (int32x4_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
1358 __ai uint8x16_t vhsubq_u8(uint8x16_t __a, uint8x16_t __b) { \ 1358 __ai uint8x16_t vhsubq_u8(uint8x16_t __a, uint8x16_t __b) { \
1359 return (uint8x16_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 24) ; } 1359 return (uint8x16_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 48) ; }
1360 __ai uint16x8_t vhsubq_u16(uint16x8_t __a, uint16x8_t __b) { \ 1360 __ai uint16x8_t vhsubq_u16(uint16x8_t __a, uint16x8_t __b) { \
1361 return (uint16x8_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 25) ; } 1361 return (uint16x8_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 49) ; }
1362 __ai uint32x4_t vhsubq_u32(uint32x4_t __a, uint32x4_t __b) { \ 1362 __ai uint32x4_t vhsubq_u32(uint32x4_t __a, uint32x4_t __b) { \
1363 return (uint32x4_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 26) ; } 1363 return (uint32x4_t)__builtin_neon_vhsubq_v((int8x16_t)__a, (int8x16_t)__b, 50) ; }
1364 1364
1365 #define vld1q_u8(__a) __extension__ ({ \ 1365 #define vld1q_u8(__a) __extension__ ({ \
1366 (uint8x16_t)__builtin_neon_vld1q_v(__a, 24); }) 1366 (uint8x16_t)__builtin_neon_vld1q_v(__a, 48); })
1367 #define vld1q_u16(__a) __extension__ ({ \ 1367 #define vld1q_u16(__a) __extension__ ({ \
1368 (uint16x8_t)__builtin_neon_vld1q_v(__a, 25); }) 1368 (uint16x8_t)__builtin_neon_vld1q_v(__a, 49); })
1369 #define vld1q_u32(__a) __extension__ ({ \ 1369 #define vld1q_u32(__a) __extension__ ({ \
1370 (uint32x4_t)__builtin_neon_vld1q_v(__a, 26); }) 1370 (uint32x4_t)__builtin_neon_vld1q_v(__a, 50); })
1371 #define vld1q_u64(__a) __extension__ ({ \ 1371 #define vld1q_u64(__a) __extension__ ({ \
1372 (uint64x2_t)__builtin_neon_vld1q_v(__a, 27); }) 1372 (uint64x2_t)__builtin_neon_vld1q_v(__a, 51); })
1373 #define vld1q_s8(__a) __extension__ ({ \ 1373 #define vld1q_s8(__a) __extension__ ({ \
1374 (int8x16_t)__builtin_neon_vld1q_v(__a, 16); }) 1374 (int8x16_t)__builtin_neon_vld1q_v(__a, 32); })
1375 #define vld1q_s16(__a) __extension__ ({ \ 1375 #define vld1q_s16(__a) __extension__ ({ \
1376 (int16x8_t)__builtin_neon_vld1q_v(__a, 17); }) 1376 (int16x8_t)__builtin_neon_vld1q_v(__a, 33); })
1377 #define vld1q_s32(__a) __extension__ ({ \ 1377 #define vld1q_s32(__a) __extension__ ({ \
1378 (int32x4_t)__builtin_neon_vld1q_v(__a, 18); }) 1378 (int32x4_t)__builtin_neon_vld1q_v(__a, 34); })
1379 #define vld1q_s64(__a) __extension__ ({ \ 1379 #define vld1q_s64(__a) __extension__ ({ \
1380 (int64x2_t)__builtin_neon_vld1q_v(__a, 19); }) 1380 (int64x2_t)__builtin_neon_vld1q_v(__a, 35); })
1381 #define vld1q_f16(__a) __extension__ ({ \ 1381 #define vld1q_f16(__a) __extension__ ({ \
1382 (float16x8_t)__builtin_neon_vld1q_v(__a, 23); }) 1382 (float16x8_t)__builtin_neon_vld1q_v(__a, 38); })
1383 #define vld1q_f32(__a) __extension__ ({ \ 1383 #define vld1q_f32(__a) __extension__ ({ \
1384 (float32x4_t)__builtin_neon_vld1q_v(__a, 20); }) 1384 (float32x4_t)__builtin_neon_vld1q_v(__a, 39); })
1385 #define vld1q_p8(__a) __extension__ ({ \ 1385 #define vld1q_p8(__a) __extension__ ({ \
1386 (poly8x16_t)__builtin_neon_vld1q_v(__a, 21); }) 1386 (poly8x16_t)__builtin_neon_vld1q_v(__a, 36); })
1387 #define vld1q_p16(__a) __extension__ ({ \ 1387 #define vld1q_p16(__a) __extension__ ({ \
1388 (poly16x8_t)__builtin_neon_vld1q_v(__a, 22); }) 1388 (poly16x8_t)__builtin_neon_vld1q_v(__a, 37); })
1389 #define vld1_u8(__a) __extension__ ({ \ 1389 #define vld1_u8(__a) __extension__ ({ \
1390 (uint8x8_t)__builtin_neon_vld1_v(__a, 8); }) 1390 (uint8x8_t)__builtin_neon_vld1_v(__a, 16); })
1391 #define vld1_u16(__a) __extension__ ({ \ 1391 #define vld1_u16(__a) __extension__ ({ \
1392 (uint16x4_t)__builtin_neon_vld1_v(__a, 9); }) 1392 (uint16x4_t)__builtin_neon_vld1_v(__a, 17); })
1393 #define vld1_u32(__a) __extension__ ({ \ 1393 #define vld1_u32(__a) __extension__ ({ \
1394 (uint32x2_t)__builtin_neon_vld1_v(__a, 10); }) 1394 (uint32x2_t)__builtin_neon_vld1_v(__a, 18); })
1395 #define vld1_u64(__a) __extension__ ({ \ 1395 #define vld1_u64(__a) __extension__ ({ \
1396 (uint64x1_t)__builtin_neon_vld1_v(__a, 11); }) 1396 (uint64x1_t)__builtin_neon_vld1_v(__a, 19); })
1397 #define vld1_s8(__a) __extension__ ({ \ 1397 #define vld1_s8(__a) __extension__ ({ \
1398 (int8x8_t)__builtin_neon_vld1_v(__a, 0); }) 1398 (int8x8_t)__builtin_neon_vld1_v(__a, 0); })
1399 #define vld1_s16(__a) __extension__ ({ \ 1399 #define vld1_s16(__a) __extension__ ({ \
1400 (int16x4_t)__builtin_neon_vld1_v(__a, 1); }) 1400 (int16x4_t)__builtin_neon_vld1_v(__a, 1); })
1401 #define vld1_s32(__a) __extension__ ({ \ 1401 #define vld1_s32(__a) __extension__ ({ \
1402 (int32x2_t)__builtin_neon_vld1_v(__a, 2); }) 1402 (int32x2_t)__builtin_neon_vld1_v(__a, 2); })
1403 #define vld1_s64(__a) __extension__ ({ \ 1403 #define vld1_s64(__a) __extension__ ({ \
1404 (int64x1_t)__builtin_neon_vld1_v(__a, 3); }) 1404 (int64x1_t)__builtin_neon_vld1_v(__a, 3); })
1405 #define vld1_f16(__a) __extension__ ({ \ 1405 #define vld1_f16(__a) __extension__ ({ \
1406 (float16x4_t)__builtin_neon_vld1_v(__a, 7); }) 1406 (float16x4_t)__builtin_neon_vld1_v(__a, 6); })
1407 #define vld1_f32(__a) __extension__ ({ \ 1407 #define vld1_f32(__a) __extension__ ({ \
1408 (float32x2_t)__builtin_neon_vld1_v(__a, 4); }) 1408 (float32x2_t)__builtin_neon_vld1_v(__a, 7); })
1409 #define vld1_p8(__a) __extension__ ({ \ 1409 #define vld1_p8(__a) __extension__ ({ \
1410 (poly8x8_t)__builtin_neon_vld1_v(__a, 5); }) 1410 (poly8x8_t)__builtin_neon_vld1_v(__a, 4); })
1411 #define vld1_p16(__a) __extension__ ({ \ 1411 #define vld1_p16(__a) __extension__ ({ \
1412 (poly16x4_t)__builtin_neon_vld1_v(__a, 6); }) 1412 (poly16x4_t)__builtin_neon_vld1_v(__a, 5); })
1413 1413
1414 #define vld1q_dup_u8(__a) __extension__ ({ \ 1414 #define vld1q_dup_u8(__a) __extension__ ({ \
1415 (uint8x16_t)__builtin_neon_vld1q_dup_v(__a, 24); }) 1415 (uint8x16_t)__builtin_neon_vld1q_dup_v(__a, 48); })
1416 #define vld1q_dup_u16(__a) __extension__ ({ \ 1416 #define vld1q_dup_u16(__a) __extension__ ({ \
1417 (uint16x8_t)__builtin_neon_vld1q_dup_v(__a, 25); }) 1417 (uint16x8_t)__builtin_neon_vld1q_dup_v(__a, 49); })
1418 #define vld1q_dup_u32(__a) __extension__ ({ \ 1418 #define vld1q_dup_u32(__a) __extension__ ({ \
1419 (uint32x4_t)__builtin_neon_vld1q_dup_v(__a, 26); }) 1419 (uint32x4_t)__builtin_neon_vld1q_dup_v(__a, 50); })
1420 #define vld1q_dup_u64(__a) __extension__ ({ \ 1420 #define vld1q_dup_u64(__a) __extension__ ({ \
1421 (uint64x2_t)__builtin_neon_vld1q_dup_v(__a, 27); }) 1421 (uint64x2_t)__builtin_neon_vld1q_dup_v(__a, 51); })
1422 #define vld1q_dup_s8(__a) __extension__ ({ \ 1422 #define vld1q_dup_s8(__a) __extension__ ({ \
1423 (int8x16_t)__builtin_neon_vld1q_dup_v(__a, 16); }) 1423 (int8x16_t)__builtin_neon_vld1q_dup_v(__a, 32); })
1424 #define vld1q_dup_s16(__a) __extension__ ({ \ 1424 #define vld1q_dup_s16(__a) __extension__ ({ \
1425 (int16x8_t)__builtin_neon_vld1q_dup_v(__a, 17); }) 1425 (int16x8_t)__builtin_neon_vld1q_dup_v(__a, 33); })
1426 #define vld1q_dup_s32(__a) __extension__ ({ \ 1426 #define vld1q_dup_s32(__a) __extension__ ({ \
1427 (int32x4_t)__builtin_neon_vld1q_dup_v(__a, 18); }) 1427 (int32x4_t)__builtin_neon_vld1q_dup_v(__a, 34); })
1428 #define vld1q_dup_s64(__a) __extension__ ({ \ 1428 #define vld1q_dup_s64(__a) __extension__ ({ \
1429 (int64x2_t)__builtin_neon_vld1q_dup_v(__a, 19); }) 1429 (int64x2_t)__builtin_neon_vld1q_dup_v(__a, 35); })
1430 #define vld1q_dup_f16(__a) __extension__ ({ \ 1430 #define vld1q_dup_f16(__a) __extension__ ({ \
1431 (float16x8_t)__builtin_neon_vld1q_dup_v(__a, 23); }) 1431 (float16x8_t)__builtin_neon_vld1q_dup_v(__a, 38); })
1432 #define vld1q_dup_f32(__a) __extension__ ({ \ 1432 #define vld1q_dup_f32(__a) __extension__ ({ \
1433 (float32x4_t)__builtin_neon_vld1q_dup_v(__a, 20); }) 1433 (float32x4_t)__builtin_neon_vld1q_dup_v(__a, 39); })
1434 #define vld1q_dup_p8(__a) __extension__ ({ \ 1434 #define vld1q_dup_p8(__a) __extension__ ({ \
1435 (poly8x16_t)__builtin_neon_vld1q_dup_v(__a, 21); }) 1435 (poly8x16_t)__builtin_neon_vld1q_dup_v(__a, 36); })
1436 #define vld1q_dup_p16(__a) __extension__ ({ \ 1436 #define vld1q_dup_p16(__a) __extension__ ({ \
1437 (poly16x8_t)__builtin_neon_vld1q_dup_v(__a, 22); }) 1437 (poly16x8_t)__builtin_neon_vld1q_dup_v(__a, 37); })
1438 #define vld1_dup_u8(__a) __extension__ ({ \ 1438 #define vld1_dup_u8(__a) __extension__ ({ \
1439 (uint8x8_t)__builtin_neon_vld1_dup_v(__a, 8); }) 1439 (uint8x8_t)__builtin_neon_vld1_dup_v(__a, 16); })
1440 #define vld1_dup_u16(__a) __extension__ ({ \ 1440 #define vld1_dup_u16(__a) __extension__ ({ \
1441 (uint16x4_t)__builtin_neon_vld1_dup_v(__a, 9); }) 1441 (uint16x4_t)__builtin_neon_vld1_dup_v(__a, 17); })
1442 #define vld1_dup_u32(__a) __extension__ ({ \ 1442 #define vld1_dup_u32(__a) __extension__ ({ \
1443 (uint32x2_t)__builtin_neon_vld1_dup_v(__a, 10); }) 1443 (uint32x2_t)__builtin_neon_vld1_dup_v(__a, 18); })
1444 #define vld1_dup_u64(__a) __extension__ ({ \ 1444 #define vld1_dup_u64(__a) __extension__ ({ \
1445 (uint64x1_t)__builtin_neon_vld1_dup_v(__a, 11); }) 1445 (uint64x1_t)__builtin_neon_vld1_dup_v(__a, 19); })
1446 #define vld1_dup_s8(__a) __extension__ ({ \ 1446 #define vld1_dup_s8(__a) __extension__ ({ \
1447 (int8x8_t)__builtin_neon_vld1_dup_v(__a, 0); }) 1447 (int8x8_t)__builtin_neon_vld1_dup_v(__a, 0); })
1448 #define vld1_dup_s16(__a) __extension__ ({ \ 1448 #define vld1_dup_s16(__a) __extension__ ({ \
1449 (int16x4_t)__builtin_neon_vld1_dup_v(__a, 1); }) 1449 (int16x4_t)__builtin_neon_vld1_dup_v(__a, 1); })
1450 #define vld1_dup_s32(__a) __extension__ ({ \ 1450 #define vld1_dup_s32(__a) __extension__ ({ \
1451 (int32x2_t)__builtin_neon_vld1_dup_v(__a, 2); }) 1451 (int32x2_t)__builtin_neon_vld1_dup_v(__a, 2); })
1452 #define vld1_dup_s64(__a) __extension__ ({ \ 1452 #define vld1_dup_s64(__a) __extension__ ({ \
1453 (int64x1_t)__builtin_neon_vld1_dup_v(__a, 3); }) 1453 (int64x1_t)__builtin_neon_vld1_dup_v(__a, 3); })
1454 #define vld1_dup_f16(__a) __extension__ ({ \ 1454 #define vld1_dup_f16(__a) __extension__ ({ \
1455 (float16x4_t)__builtin_neon_vld1_dup_v(__a, 7); }) 1455 (float16x4_t)__builtin_neon_vld1_dup_v(__a, 6); })
1456 #define vld1_dup_f32(__a) __extension__ ({ \ 1456 #define vld1_dup_f32(__a) __extension__ ({ \
1457 (float32x2_t)__builtin_neon_vld1_dup_v(__a, 4); }) 1457 (float32x2_t)__builtin_neon_vld1_dup_v(__a, 7); })
1458 #define vld1_dup_p8(__a) __extension__ ({ \ 1458 #define vld1_dup_p8(__a) __extension__ ({ \
1459 (poly8x8_t)__builtin_neon_vld1_dup_v(__a, 5); }) 1459 (poly8x8_t)__builtin_neon_vld1_dup_v(__a, 4); })
1460 #define vld1_dup_p16(__a) __extension__ ({ \ 1460 #define vld1_dup_p16(__a) __extension__ ({ \
1461 (poly16x4_t)__builtin_neon_vld1_dup_v(__a, 6); }) 1461 (poly16x4_t)__builtin_neon_vld1_dup_v(__a, 5); })
1462 1462
1463 #define vld1q_lane_u8(__a, b, __c) __extension__ ({ \ 1463 #define vld1q_lane_u8(__a, b, __c) __extension__ ({ \
1464 uint8x16_t __b = (b); \ 1464 uint8x16_t __b = (b); \
1465 (uint8x16_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 24); }) 1465 (uint8x16_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 48); })
1466 #define vld1q_lane_u16(__a, b, __c) __extension__ ({ \ 1466 #define vld1q_lane_u16(__a, b, __c) __extension__ ({ \
1467 uint16x8_t __b = (b); \ 1467 uint16x8_t __b = (b); \
1468 (uint16x8_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 25); }) 1468 (uint16x8_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 49); })
1469 #define vld1q_lane_u32(__a, b, __c) __extension__ ({ \ 1469 #define vld1q_lane_u32(__a, b, __c) __extension__ ({ \
1470 uint32x4_t __b = (b); \ 1470 uint32x4_t __b = (b); \
1471 (uint32x4_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 26); }) 1471 (uint32x4_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 50); })
1472 #define vld1q_lane_u64(__a, b, __c) __extension__ ({ \ 1472 #define vld1q_lane_u64(__a, b, __c) __extension__ ({ \
1473 uint64x2_t __b = (b); \ 1473 uint64x2_t __b = (b); \
1474 (uint64x2_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 27); }) 1474 (uint64x2_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 51); })
1475 #define vld1q_lane_s8(__a, b, __c) __extension__ ({ \ 1475 #define vld1q_lane_s8(__a, b, __c) __extension__ ({ \
1476 int8x16_t __b = (b); \ 1476 int8x16_t __b = (b); \
1477 (int8x16_t)__builtin_neon_vld1q_lane_v(__a, __b, __c, 16); }) 1477 (int8x16_t)__builtin_neon_vld1q_lane_v(__a, __b, __c, 32); })
1478 #define vld1q_lane_s16(__a, b, __c) __extension__ ({ \ 1478 #define vld1q_lane_s16(__a, b, __c) __extension__ ({ \
1479 int16x8_t __b = (b); \ 1479 int16x8_t __b = (b); \
1480 (int16x8_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 17); }) 1480 (int16x8_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 33); })
1481 #define vld1q_lane_s32(__a, b, __c) __extension__ ({ \ 1481 #define vld1q_lane_s32(__a, b, __c) __extension__ ({ \
1482 int32x4_t __b = (b); \ 1482 int32x4_t __b = (b); \
1483 (int32x4_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 18); }) 1483 (int32x4_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 34); })
1484 #define vld1q_lane_s64(__a, b, __c) __extension__ ({ \ 1484 #define vld1q_lane_s64(__a, b, __c) __extension__ ({ \
1485 int64x2_t __b = (b); \ 1485 int64x2_t __b = (b); \
1486 (int64x2_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 19); }) 1486 (int64x2_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 35); })
1487 #define vld1q_lane_f16(__a, b, __c) __extension__ ({ \ 1487 #define vld1q_lane_f16(__a, b, __c) __extension__ ({ \
1488 float16x8_t __b = (b); \ 1488 float16x8_t __b = (b); \
1489 (float16x8_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 23); }) 1489 (float16x8_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 38); })
1490 #define vld1q_lane_f32(__a, b, __c) __extension__ ({ \ 1490 #define vld1q_lane_f32(__a, b, __c) __extension__ ({ \
1491 float32x4_t __b = (b); \ 1491 float32x4_t __b = (b); \
1492 (float32x4_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 20); }) 1492 (float32x4_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 39); })
1493 #define vld1q_lane_p8(__a, b, __c) __extension__ ({ \ 1493 #define vld1q_lane_p8(__a, b, __c) __extension__ ({ \
1494 poly8x16_t __b = (b); \ 1494 poly8x16_t __b = (b); \
1495 (poly8x16_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 21); }) 1495 (poly8x16_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 36); })
1496 #define vld1q_lane_p16(__a, b, __c) __extension__ ({ \ 1496 #define vld1q_lane_p16(__a, b, __c) __extension__ ({ \
1497 poly16x8_t __b = (b); \ 1497 poly16x8_t __b = (b); \
1498 (poly16x8_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 22); }) 1498 (poly16x8_t)__builtin_neon_vld1q_lane_v(__a, (int8x16_t)__b, __c, 37); })
1499 #define vld1_lane_u8(__a, b, __c) __extension__ ({ \ 1499 #define vld1_lane_u8(__a, b, __c) __extension__ ({ \
1500 uint8x8_t __b = (b); \ 1500 uint8x8_t __b = (b); \
1501 (uint8x8_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 8); }) 1501 (uint8x8_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 16); })
1502 #define vld1_lane_u16(__a, b, __c) __extension__ ({ \ 1502 #define vld1_lane_u16(__a, b, __c) __extension__ ({ \
1503 uint16x4_t __b = (b); \ 1503 uint16x4_t __b = (b); \
1504 (uint16x4_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 9); }) 1504 (uint16x4_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 17); })
1505 #define vld1_lane_u32(__a, b, __c) __extension__ ({ \ 1505 #define vld1_lane_u32(__a, b, __c) __extension__ ({ \
1506 uint32x2_t __b = (b); \ 1506 uint32x2_t __b = (b); \
1507 (uint32x2_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 10); }) 1507 (uint32x2_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 18); })
1508 #define vld1_lane_u64(__a, b, __c) __extension__ ({ \ 1508 #define vld1_lane_u64(__a, b, __c) __extension__ ({ \
1509 uint64x1_t __b = (b); \ 1509 uint64x1_t __b = (b); \
1510 (uint64x1_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 11); }) 1510 (uint64x1_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 19); })
1511 #define vld1_lane_s8(__a, b, __c) __extension__ ({ \ 1511 #define vld1_lane_s8(__a, b, __c) __extension__ ({ \
1512 int8x8_t __b = (b); \ 1512 int8x8_t __b = (b); \
1513 (int8x8_t)__builtin_neon_vld1_lane_v(__a, __b, __c, 0); }) 1513 (int8x8_t)__builtin_neon_vld1_lane_v(__a, __b, __c, 0); })
1514 #define vld1_lane_s16(__a, b, __c) __extension__ ({ \ 1514 #define vld1_lane_s16(__a, b, __c) __extension__ ({ \
1515 int16x4_t __b = (b); \ 1515 int16x4_t __b = (b); \
1516 (int16x4_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 1); }) 1516 (int16x4_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 1); })
1517 #define vld1_lane_s32(__a, b, __c) __extension__ ({ \ 1517 #define vld1_lane_s32(__a, b, __c) __extension__ ({ \
1518 int32x2_t __b = (b); \ 1518 int32x2_t __b = (b); \
1519 (int32x2_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 2); }) 1519 (int32x2_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 2); })
1520 #define vld1_lane_s64(__a, b, __c) __extension__ ({ \ 1520 #define vld1_lane_s64(__a, b, __c) __extension__ ({ \
1521 int64x1_t __b = (b); \ 1521 int64x1_t __b = (b); \
1522 (int64x1_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 3); }) 1522 (int64x1_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 3); })
1523 #define vld1_lane_f16(__a, b, __c) __extension__ ({ \ 1523 #define vld1_lane_f16(__a, b, __c) __extension__ ({ \
1524 float16x4_t __b = (b); \ 1524 float16x4_t __b = (b); \
1525 (float16x4_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 7); }) 1525 (float16x4_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 6); })
1526 #define vld1_lane_f32(__a, b, __c) __extension__ ({ \ 1526 #define vld1_lane_f32(__a, b, __c) __extension__ ({ \
1527 float32x2_t __b = (b); \ 1527 float32x2_t __b = (b); \
1528 (float32x2_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 4); }) 1528 (float32x2_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 7); })
1529 #define vld1_lane_p8(__a, b, __c) __extension__ ({ \ 1529 #define vld1_lane_p8(__a, b, __c) __extension__ ({ \
1530 poly8x8_t __b = (b); \ 1530 poly8x8_t __b = (b); \
1531 (poly8x8_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 5); }) 1531 (poly8x8_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 4); })
1532 #define vld1_lane_p16(__a, b, __c) __extension__ ({ \ 1532 #define vld1_lane_p16(__a, b, __c) __extension__ ({ \
1533 poly16x4_t __b = (b); \ 1533 poly16x4_t __b = (b); \
1534 (poly16x4_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 6); }) 1534 (poly16x4_t)__builtin_neon_vld1_lane_v(__a, (int8x8_t)__b, __c, 5); })
1535 1535
1536 #define vld2q_u8(__a) __extension__ ({ \ 1536 #define vld2q_u8(__a) __extension__ ({ \
1537 uint8x16x2_t r; __builtin_neon_vld2q_v(&r, __a, 24); r; }) 1537 uint8x16x2_t r; __builtin_neon_vld2q_v(&r, __a, 48); r; })
1538 #define vld2q_u16(__a) __extension__ ({ \ 1538 #define vld2q_u16(__a) __extension__ ({ \
1539 uint16x8x2_t r; __builtin_neon_vld2q_v(&r, __a, 25); r; }) 1539 uint16x8x2_t r; __builtin_neon_vld2q_v(&r, __a, 49); r; })
1540 #define vld2q_u32(__a) __extension__ ({ \ 1540 #define vld2q_u32(__a) __extension__ ({ \
1541 uint32x4x2_t r; __builtin_neon_vld2q_v(&r, __a, 26); r; }) 1541 uint32x4x2_t r; __builtin_neon_vld2q_v(&r, __a, 50); r; })
1542 #define vld2q_s8(__a) __extension__ ({ \ 1542 #define vld2q_s8(__a) __extension__ ({ \
1543 int8x16x2_t r; __builtin_neon_vld2q_v(&r, __a, 16); r; }) 1543 int8x16x2_t r; __builtin_neon_vld2q_v(&r, __a, 32); r; })
1544 #define vld2q_s16(__a) __extension__ ({ \ 1544 #define vld2q_s16(__a) __extension__ ({ \
1545 int16x8x2_t r; __builtin_neon_vld2q_v(&r, __a, 17); r; }) 1545 int16x8x2_t r; __builtin_neon_vld2q_v(&r, __a, 33); r; })
1546 #define vld2q_s32(__a) __extension__ ({ \ 1546 #define vld2q_s32(__a) __extension__ ({ \
1547 int32x4x2_t r; __builtin_neon_vld2q_v(&r, __a, 18); r; }) 1547 int32x4x2_t r; __builtin_neon_vld2q_v(&r, __a, 34); r; })
1548 #define vld2q_f16(__a) __extension__ ({ \ 1548 #define vld2q_f16(__a) __extension__ ({ \
1549 float16x8x2_t r; __builtin_neon_vld2q_v(&r, __a, 23); r; }) 1549 float16x8x2_t r; __builtin_neon_vld2q_v(&r, __a, 38); r; })
1550 #define vld2q_f32(__a) __extension__ ({ \ 1550 #define vld2q_f32(__a) __extension__ ({ \
1551 float32x4x2_t r; __builtin_neon_vld2q_v(&r, __a, 20); r; }) 1551 float32x4x2_t r; __builtin_neon_vld2q_v(&r, __a, 39); r; })
1552 #define vld2q_p8(__a) __extension__ ({ \ 1552 #define vld2q_p8(__a) __extension__ ({ \
1553 poly8x16x2_t r; __builtin_neon_vld2q_v(&r, __a, 21); r; }) 1553 poly8x16x2_t r; __builtin_neon_vld2q_v(&r, __a, 36); r; })
1554 #define vld2q_p16(__a) __extension__ ({ \ 1554 #define vld2q_p16(__a) __extension__ ({ \
1555 poly16x8x2_t r; __builtin_neon_vld2q_v(&r, __a, 22); r; }) 1555 poly16x8x2_t r; __builtin_neon_vld2q_v(&r, __a, 37); r; })
1556 #define vld2_u8(__a) __extension__ ({ \ 1556 #define vld2_u8(__a) __extension__ ({ \
1557 uint8x8x2_t r; __builtin_neon_vld2_v(&r, __a, 8); r; }) 1557 uint8x8x2_t r; __builtin_neon_vld2_v(&r, __a, 16); r; })
1558 #define vld2_u16(__a) __extension__ ({ \ 1558 #define vld2_u16(__a) __extension__ ({ \
1559 uint16x4x2_t r; __builtin_neon_vld2_v(&r, __a, 9); r; }) 1559 uint16x4x2_t r; __builtin_neon_vld2_v(&r, __a, 17); r; })
1560 #define vld2_u32(__a) __extension__ ({ \ 1560 #define vld2_u32(__a) __extension__ ({ \
1561 uint32x2x2_t r; __builtin_neon_vld2_v(&r, __a, 10); r; }) 1561 uint32x2x2_t r; __builtin_neon_vld2_v(&r, __a, 18); r; })
1562 #define vld2_u64(__a) __extension__ ({ \ 1562 #define vld2_u64(__a) __extension__ ({ \
1563 uint64x1x2_t r; __builtin_neon_vld2_v(&r, __a, 11); r; }) 1563 uint64x1x2_t r; __builtin_neon_vld2_v(&r, __a, 19); r; })
1564 #define vld2_s8(__a) __extension__ ({ \ 1564 #define vld2_s8(__a) __extension__ ({ \
1565 int8x8x2_t r; __builtin_neon_vld2_v(&r, __a, 0); r; }) 1565 int8x8x2_t r; __builtin_neon_vld2_v(&r, __a, 0); r; })
1566 #define vld2_s16(__a) __extension__ ({ \ 1566 #define vld2_s16(__a) __extension__ ({ \
1567 int16x4x2_t r; __builtin_neon_vld2_v(&r, __a, 1); r; }) 1567 int16x4x2_t r; __builtin_neon_vld2_v(&r, __a, 1); r; })
1568 #define vld2_s32(__a) __extension__ ({ \ 1568 #define vld2_s32(__a) __extension__ ({ \
1569 int32x2x2_t r; __builtin_neon_vld2_v(&r, __a, 2); r; }) 1569 int32x2x2_t r; __builtin_neon_vld2_v(&r, __a, 2); r; })
1570 #define vld2_s64(__a) __extension__ ({ \ 1570 #define vld2_s64(__a) __extension__ ({ \
1571 int64x1x2_t r; __builtin_neon_vld2_v(&r, __a, 3); r; }) 1571 int64x1x2_t r; __builtin_neon_vld2_v(&r, __a, 3); r; })
1572 #define vld2_f16(__a) __extension__ ({ \ 1572 #define vld2_f16(__a) __extension__ ({ \
1573 float16x4x2_t r; __builtin_neon_vld2_v(&r, __a, 7); r; }) 1573 float16x4x2_t r; __builtin_neon_vld2_v(&r, __a, 6); r; })
1574 #define vld2_f32(__a) __extension__ ({ \ 1574 #define vld2_f32(__a) __extension__ ({ \
1575 float32x2x2_t r; __builtin_neon_vld2_v(&r, __a, 4); r; }) 1575 float32x2x2_t r; __builtin_neon_vld2_v(&r, __a, 7); r; })
1576 #define vld2_p8(__a) __extension__ ({ \ 1576 #define vld2_p8(__a) __extension__ ({ \
1577 poly8x8x2_t r; __builtin_neon_vld2_v(&r, __a, 5); r; }) 1577 poly8x8x2_t r; __builtin_neon_vld2_v(&r, __a, 4); r; })
1578 #define vld2_p16(__a) __extension__ ({ \ 1578 #define vld2_p16(__a) __extension__ ({ \
1579 poly16x4x2_t r; __builtin_neon_vld2_v(&r, __a, 6); r; }) 1579 poly16x4x2_t r; __builtin_neon_vld2_v(&r, __a, 5); r; })
1580 1580
1581 #define vld2_dup_u8(__a) __extension__ ({ \ 1581 #define vld2_dup_u8(__a) __extension__ ({ \
1582 uint8x8x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 8); r; }) 1582 uint8x8x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 16); r; })
1583 #define vld2_dup_u16(__a) __extension__ ({ \ 1583 #define vld2_dup_u16(__a) __extension__ ({ \
1584 uint16x4x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 9); r; }) 1584 uint16x4x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 17); r; })
1585 #define vld2_dup_u32(__a) __extension__ ({ \ 1585 #define vld2_dup_u32(__a) __extension__ ({ \
1586 uint32x2x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 10); r; }) 1586 uint32x2x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 18); r; })
1587 #define vld2_dup_u64(__a) __extension__ ({ \ 1587 #define vld2_dup_u64(__a) __extension__ ({ \
1588 uint64x1x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 11); r; }) 1588 uint64x1x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 19); r; })
1589 #define vld2_dup_s8(__a) __extension__ ({ \ 1589 #define vld2_dup_s8(__a) __extension__ ({ \
1590 int8x8x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 0); r; }) 1590 int8x8x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 0); r; })
1591 #define vld2_dup_s16(__a) __extension__ ({ \ 1591 #define vld2_dup_s16(__a) __extension__ ({ \
1592 int16x4x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 1); r; }) 1592 int16x4x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 1); r; })
1593 #define vld2_dup_s32(__a) __extension__ ({ \ 1593 #define vld2_dup_s32(__a) __extension__ ({ \
1594 int32x2x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 2); r; }) 1594 int32x2x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 2); r; })
1595 #define vld2_dup_s64(__a) __extension__ ({ \ 1595 #define vld2_dup_s64(__a) __extension__ ({ \
1596 int64x1x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 3); r; }) 1596 int64x1x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 3); r; })
1597 #define vld2_dup_f16(__a) __extension__ ({ \ 1597 #define vld2_dup_f16(__a) __extension__ ({ \
1598 float16x4x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 7); r; }) 1598 float16x4x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 6); r; })
1599 #define vld2_dup_f32(__a) __extension__ ({ \ 1599 #define vld2_dup_f32(__a) __extension__ ({ \
1600 float32x2x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 4); r; }) 1600 float32x2x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 7); r; })
1601 #define vld2_dup_p8(__a) __extension__ ({ \ 1601 #define vld2_dup_p8(__a) __extension__ ({ \
1602 poly8x8x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 5); r; }) 1602 poly8x8x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 4); r; })
1603 #define vld2_dup_p16(__a) __extension__ ({ \ 1603 #define vld2_dup_p16(__a) __extension__ ({ \
1604 poly16x4x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 6); r; }) 1604 poly16x4x2_t r; __builtin_neon_vld2_dup_v(&r, __a, 5); r; })
1605 1605
1606 #define vld2q_lane_u16(__a, b, __c) __extension__ ({ \ 1606 #define vld2q_lane_u16(__a, b, __c) __extension__ ({ \
1607 uint16x8x2_t __b = (b); \ 1607 uint16x8x2_t __b = (b); \
1608 uint16x8x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i nt8x16_t)__b.val[1], __c, 25); r; }) 1608 uint16x8x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i nt8x16_t)__b.val[1], __c, 49); r; })
1609 #define vld2q_lane_u32(__a, b, __c) __extension__ ({ \ 1609 #define vld2q_lane_u32(__a, b, __c) __extension__ ({ \
1610 uint32x4x2_t __b = (b); \ 1610 uint32x4x2_t __b = (b); \
1611 uint32x4x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i nt8x16_t)__b.val[1], __c, 26); r; }) 1611 uint32x4x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i nt8x16_t)__b.val[1], __c, 50); r; })
1612 #define vld2q_lane_s16(__a, b, __c) __extension__ ({ \ 1612 #define vld2q_lane_s16(__a, b, __c) __extension__ ({ \
1613 int16x8x2_t __b = (b); \ 1613 int16x8x2_t __b = (b); \
1614 int16x8x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (in t8x16_t)__b.val[1], __c, 17); r; }) 1614 int16x8x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (in t8x16_t)__b.val[1], __c, 33); r; })
1615 #define vld2q_lane_s32(__a, b, __c) __extension__ ({ \ 1615 #define vld2q_lane_s32(__a, b, __c) __extension__ ({ \
1616 int32x4x2_t __b = (b); \ 1616 int32x4x2_t __b = (b); \
1617 int32x4x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (in t8x16_t)__b.val[1], __c, 18); r; }) 1617 int32x4x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (in t8x16_t)__b.val[1], __c, 34); r; })
1618 #define vld2q_lane_f16(__a, b, __c) __extension__ ({ \ 1618 #define vld2q_lane_f16(__a, b, __c) __extension__ ({ \
1619 float16x8x2_t __b = (b); \ 1619 float16x8x2_t __b = (b); \
1620 float16x8x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], ( int8x16_t)__b.val[1], __c, 23); r; }) 1620 float16x8x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], ( int8x16_t)__b.val[1], __c, 38); r; })
1621 #define vld2q_lane_f32(__a, b, __c) __extension__ ({ \ 1621 #define vld2q_lane_f32(__a, b, __c) __extension__ ({ \
1622 float32x4x2_t __b = (b); \ 1622 float32x4x2_t __b = (b); \
1623 float32x4x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], ( int8x16_t)__b.val[1], __c, 20); r; }) 1623 float32x4x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], ( int8x16_t)__b.val[1], __c, 39); r; })
1624 #define vld2q_lane_p16(__a, b, __c) __extension__ ({ \ 1624 #define vld2q_lane_p16(__a, b, __c) __extension__ ({ \
1625 poly16x8x2_t __b = (b); \ 1625 poly16x8x2_t __b = (b); \
1626 poly16x8x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i nt8x16_t)__b.val[1], __c, 22); r; }) 1626 poly16x8x2_t r; __builtin_neon_vld2q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i nt8x16_t)__b.val[1], __c, 37); r; })
1627 #define vld2_lane_u8(__a, b, __c) __extension__ ({ \ 1627 #define vld2_lane_u8(__a, b, __c) __extension__ ({ \
1628 uint8x8x2_t __b = (b); \ 1628 uint8x8x2_t __b = (b); \
1629 uint8x8x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], __c, 8); r; }) 1629 uint8x8x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], __c, 16); r; })
1630 #define vld2_lane_u16(__a, b, __c) __extension__ ({ \ 1630 #define vld2_lane_u16(__a, b, __c) __extension__ ({ \
1631 uint16x4x2_t __b = (b); \ 1631 uint16x4x2_t __b = (b); \
1632 uint16x4x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int 8x8_t)__b.val[1], __c, 9); r; }) 1632 uint16x4x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int 8x8_t)__b.val[1], __c, 17); r; })
1633 #define vld2_lane_u32(__a, b, __c) __extension__ ({ \ 1633 #define vld2_lane_u32(__a, b, __c) __extension__ ({ \
1634 uint32x2x2_t __b = (b); \ 1634 uint32x2x2_t __b = (b); \
1635 uint32x2x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int 8x8_t)__b.val[1], __c, 10); r; }) 1635 uint32x2x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int 8x8_t)__b.val[1], __c, 18); r; })
1636 #define vld2_lane_s8(__a, b, __c) __extension__ ({ \ 1636 #define vld2_lane_s8(__a, b, __c) __extension__ ({ \
1637 int8x8x2_t __b = (b); \ 1637 int8x8x2_t __b = (b); \
1638 int8x8x2_t r; __builtin_neon_vld2_lane_v(&r, __a, __b.val[0], __b.val[1], __c, 0); r; }) 1638 int8x8x2_t r; __builtin_neon_vld2_lane_v(&r, __a, __b.val[0], __b.val[1], __c, 0); r; })
1639 #define vld2_lane_s16(__a, b, __c) __extension__ ({ \ 1639 #define vld2_lane_s16(__a, b, __c) __extension__ ({ \
1640 int16x4x2_t __b = (b); \ 1640 int16x4x2_t __b = (b); \
1641 int16x4x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], __c, 1); r; }) 1641 int16x4x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], __c, 1); r; })
1642 #define vld2_lane_s32(__a, b, __c) __extension__ ({ \ 1642 #define vld2_lane_s32(__a, b, __c) __extension__ ({ \
1643 int32x2x2_t __b = (b); \ 1643 int32x2x2_t __b = (b); \
1644 int32x2x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], __c, 2); r; }) 1644 int32x2x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], __c, 2); r; })
1645 #define vld2_lane_f16(__a, b, __c) __extension__ ({ \ 1645 #define vld2_lane_f16(__a, b, __c) __extension__ ({ \
1646 float16x4x2_t __b = (b); \ 1646 float16x4x2_t __b = (b); \
1647 float16x4x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (in t8x8_t)__b.val[1], __c, 7); r; }) 1647 float16x4x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (in t8x8_t)__b.val[1], __c, 6); r; })
1648 #define vld2_lane_f32(__a, b, __c) __extension__ ({ \ 1648 #define vld2_lane_f32(__a, b, __c) __extension__ ({ \
1649 float32x2x2_t __b = (b); \ 1649 float32x2x2_t __b = (b); \
1650 float32x2x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (in t8x8_t)__b.val[1], __c, 4); r; }) 1650 float32x2x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (in t8x8_t)__b.val[1], __c, 7); r; })
1651 #define vld2_lane_p8(__a, b, __c) __extension__ ({ \ 1651 #define vld2_lane_p8(__a, b, __c) __extension__ ({ \
1652 poly8x8x2_t __b = (b); \ 1652 poly8x8x2_t __b = (b); \
1653 poly8x8x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], __c, 5); r; }) 1653 poly8x8x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], __c, 4); r; })
1654 #define vld2_lane_p16(__a, b, __c) __extension__ ({ \ 1654 #define vld2_lane_p16(__a, b, __c) __extension__ ({ \
1655 poly16x4x2_t __b = (b); \ 1655 poly16x4x2_t __b = (b); \
1656 poly16x4x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int 8x8_t)__b.val[1], __c, 6); r; }) 1656 poly16x4x2_t r; __builtin_neon_vld2_lane_v(&r, __a, (int8x8_t)__b.val[0], (int 8x8_t)__b.val[1], __c, 5); r; })
1657 1657
1658 #define vld3q_u8(__a) __extension__ ({ \ 1658 #define vld3q_u8(__a) __extension__ ({ \
1659 uint8x16x3_t r; __builtin_neon_vld3q_v(&r, __a, 24); r; }) 1659 uint8x16x3_t r; __builtin_neon_vld3q_v(&r, __a, 48); r; })
1660 #define vld3q_u16(__a) __extension__ ({ \ 1660 #define vld3q_u16(__a) __extension__ ({ \
1661 uint16x8x3_t r; __builtin_neon_vld3q_v(&r, __a, 25); r; }) 1661 uint16x8x3_t r; __builtin_neon_vld3q_v(&r, __a, 49); r; })
1662 #define vld3q_u32(__a) __extension__ ({ \ 1662 #define vld3q_u32(__a) __extension__ ({ \
1663 uint32x4x3_t r; __builtin_neon_vld3q_v(&r, __a, 26); r; }) 1663 uint32x4x3_t r; __builtin_neon_vld3q_v(&r, __a, 50); r; })
1664 #define vld3q_s8(__a) __extension__ ({ \ 1664 #define vld3q_s8(__a) __extension__ ({ \
1665 int8x16x3_t r; __builtin_neon_vld3q_v(&r, __a, 16); r; }) 1665 int8x16x3_t r; __builtin_neon_vld3q_v(&r, __a, 32); r; })
1666 #define vld3q_s16(__a) __extension__ ({ \ 1666 #define vld3q_s16(__a) __extension__ ({ \
1667 int16x8x3_t r; __builtin_neon_vld3q_v(&r, __a, 17); r; }) 1667 int16x8x3_t r; __builtin_neon_vld3q_v(&r, __a, 33); r; })
1668 #define vld3q_s32(__a) __extension__ ({ \ 1668 #define vld3q_s32(__a) __extension__ ({ \
1669 int32x4x3_t r; __builtin_neon_vld3q_v(&r, __a, 18); r; }) 1669 int32x4x3_t r; __builtin_neon_vld3q_v(&r, __a, 34); r; })
1670 #define vld3q_f16(__a) __extension__ ({ \ 1670 #define vld3q_f16(__a) __extension__ ({ \
1671 float16x8x3_t r; __builtin_neon_vld3q_v(&r, __a, 23); r; }) 1671 float16x8x3_t r; __builtin_neon_vld3q_v(&r, __a, 38); r; })
1672 #define vld3q_f32(__a) __extension__ ({ \ 1672 #define vld3q_f32(__a) __extension__ ({ \
1673 float32x4x3_t r; __builtin_neon_vld3q_v(&r, __a, 20); r; }) 1673 float32x4x3_t r; __builtin_neon_vld3q_v(&r, __a, 39); r; })
1674 #define vld3q_p8(__a) __extension__ ({ \ 1674 #define vld3q_p8(__a) __extension__ ({ \
1675 poly8x16x3_t r; __builtin_neon_vld3q_v(&r, __a, 21); r; }) 1675 poly8x16x3_t r; __builtin_neon_vld3q_v(&r, __a, 36); r; })
1676 #define vld3q_p16(__a) __extension__ ({ \ 1676 #define vld3q_p16(__a) __extension__ ({ \
1677 poly16x8x3_t r; __builtin_neon_vld3q_v(&r, __a, 22); r; }) 1677 poly16x8x3_t r; __builtin_neon_vld3q_v(&r, __a, 37); r; })
1678 #define vld3_u8(__a) __extension__ ({ \ 1678 #define vld3_u8(__a) __extension__ ({ \
1679 uint8x8x3_t r; __builtin_neon_vld3_v(&r, __a, 8); r; }) 1679 uint8x8x3_t r; __builtin_neon_vld3_v(&r, __a, 16); r; })
1680 #define vld3_u16(__a) __extension__ ({ \ 1680 #define vld3_u16(__a) __extension__ ({ \
1681 uint16x4x3_t r; __builtin_neon_vld3_v(&r, __a, 9); r; }) 1681 uint16x4x3_t r; __builtin_neon_vld3_v(&r, __a, 17); r; })
1682 #define vld3_u32(__a) __extension__ ({ \ 1682 #define vld3_u32(__a) __extension__ ({ \
1683 uint32x2x3_t r; __builtin_neon_vld3_v(&r, __a, 10); r; }) 1683 uint32x2x3_t r; __builtin_neon_vld3_v(&r, __a, 18); r; })
1684 #define vld3_u64(__a) __extension__ ({ \ 1684 #define vld3_u64(__a) __extension__ ({ \
1685 uint64x1x3_t r; __builtin_neon_vld3_v(&r, __a, 11); r; }) 1685 uint64x1x3_t r; __builtin_neon_vld3_v(&r, __a, 19); r; })
1686 #define vld3_s8(__a) __extension__ ({ \ 1686 #define vld3_s8(__a) __extension__ ({ \
1687 int8x8x3_t r; __builtin_neon_vld3_v(&r, __a, 0); r; }) 1687 int8x8x3_t r; __builtin_neon_vld3_v(&r, __a, 0); r; })
1688 #define vld3_s16(__a) __extension__ ({ \ 1688 #define vld3_s16(__a) __extension__ ({ \
1689 int16x4x3_t r; __builtin_neon_vld3_v(&r, __a, 1); r; }) 1689 int16x4x3_t r; __builtin_neon_vld3_v(&r, __a, 1); r; })
1690 #define vld3_s32(__a) __extension__ ({ \ 1690 #define vld3_s32(__a) __extension__ ({ \
1691 int32x2x3_t r; __builtin_neon_vld3_v(&r, __a, 2); r; }) 1691 int32x2x3_t r; __builtin_neon_vld3_v(&r, __a, 2); r; })
1692 #define vld3_s64(__a) __extension__ ({ \ 1692 #define vld3_s64(__a) __extension__ ({ \
1693 int64x1x3_t r; __builtin_neon_vld3_v(&r, __a, 3); r; }) 1693 int64x1x3_t r; __builtin_neon_vld3_v(&r, __a, 3); r; })
1694 #define vld3_f16(__a) __extension__ ({ \ 1694 #define vld3_f16(__a) __extension__ ({ \
1695 float16x4x3_t r; __builtin_neon_vld3_v(&r, __a, 7); r; }) 1695 float16x4x3_t r; __builtin_neon_vld3_v(&r, __a, 6); r; })
1696 #define vld3_f32(__a) __extension__ ({ \ 1696 #define vld3_f32(__a) __extension__ ({ \
1697 float32x2x3_t r; __builtin_neon_vld3_v(&r, __a, 4); r; }) 1697 float32x2x3_t r; __builtin_neon_vld3_v(&r, __a, 7); r; })
1698 #define vld3_p8(__a) __extension__ ({ \ 1698 #define vld3_p8(__a) __extension__ ({ \
1699 poly8x8x3_t r; __builtin_neon_vld3_v(&r, __a, 5); r; }) 1699 poly8x8x3_t r; __builtin_neon_vld3_v(&r, __a, 4); r; })
1700 #define vld3_p16(__a) __extension__ ({ \ 1700 #define vld3_p16(__a) __extension__ ({ \
1701 poly16x4x3_t r; __builtin_neon_vld3_v(&r, __a, 6); r; }) 1701 poly16x4x3_t r; __builtin_neon_vld3_v(&r, __a, 5); r; })
1702 1702
1703 #define vld3_dup_u8(__a) __extension__ ({ \ 1703 #define vld3_dup_u8(__a) __extension__ ({ \
1704 uint8x8x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 8); r; }) 1704 uint8x8x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 16); r; })
1705 #define vld3_dup_u16(__a) __extension__ ({ \ 1705 #define vld3_dup_u16(__a) __extension__ ({ \
1706 uint16x4x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 9); r; }) 1706 uint16x4x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 17); r; })
1707 #define vld3_dup_u32(__a) __extension__ ({ \ 1707 #define vld3_dup_u32(__a) __extension__ ({ \
1708 uint32x2x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 10); r; }) 1708 uint32x2x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 18); r; })
1709 #define vld3_dup_u64(__a) __extension__ ({ \ 1709 #define vld3_dup_u64(__a) __extension__ ({ \
1710 uint64x1x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 11); r; }) 1710 uint64x1x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 19); r; })
1711 #define vld3_dup_s8(__a) __extension__ ({ \ 1711 #define vld3_dup_s8(__a) __extension__ ({ \
1712 int8x8x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 0); r; }) 1712 int8x8x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 0); r; })
1713 #define vld3_dup_s16(__a) __extension__ ({ \ 1713 #define vld3_dup_s16(__a) __extension__ ({ \
1714 int16x4x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 1); r; }) 1714 int16x4x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 1); r; })
1715 #define vld3_dup_s32(__a) __extension__ ({ \ 1715 #define vld3_dup_s32(__a) __extension__ ({ \
1716 int32x2x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 2); r; }) 1716 int32x2x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 2); r; })
1717 #define vld3_dup_s64(__a) __extension__ ({ \ 1717 #define vld3_dup_s64(__a) __extension__ ({ \
1718 int64x1x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 3); r; }) 1718 int64x1x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 3); r; })
1719 #define vld3_dup_f16(__a) __extension__ ({ \ 1719 #define vld3_dup_f16(__a) __extension__ ({ \
1720 float16x4x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 7); r; }) 1720 float16x4x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 6); r; })
1721 #define vld3_dup_f32(__a) __extension__ ({ \ 1721 #define vld3_dup_f32(__a) __extension__ ({ \
1722 float32x2x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 4); r; }) 1722 float32x2x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 7); r; })
1723 #define vld3_dup_p8(__a) __extension__ ({ \ 1723 #define vld3_dup_p8(__a) __extension__ ({ \
1724 poly8x8x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 5); r; }) 1724 poly8x8x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 4); r; })
1725 #define vld3_dup_p16(__a) __extension__ ({ \ 1725 #define vld3_dup_p16(__a) __extension__ ({ \
1726 poly16x4x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 6); r; }) 1726 poly16x4x3_t r; __builtin_neon_vld3_dup_v(&r, __a, 5); r; })
1727 1727
1728 #define vld3q_lane_u16(__a, b, __c) __extension__ ({ \ 1728 #define vld3q_lane_u16(__a, b, __c) __extension__ ({ \
1729 uint16x8x3_t __b = (b); \ 1729 uint16x8x3_t __b = (b); \
1730 uint16x8x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i nt8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 25); r; }) 1730 uint16x8x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i nt8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 49); r; })
1731 #define vld3q_lane_u32(__a, b, __c) __extension__ ({ \ 1731 #define vld3q_lane_u32(__a, b, __c) __extension__ ({ \
1732 uint32x4x3_t __b = (b); \ 1732 uint32x4x3_t __b = (b); \
1733 uint32x4x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i nt8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 26); r; }) 1733 uint32x4x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i nt8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 50); r; })
1734 #define vld3q_lane_s16(__a, b, __c) __extension__ ({ \ 1734 #define vld3q_lane_s16(__a, b, __c) __extension__ ({ \
1735 int16x8x3_t __b = (b); \ 1735 int16x8x3_t __b = (b); \
1736 int16x8x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (in t8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 17); r; }) 1736 int16x8x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (in t8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 33); r; })
1737 #define vld3q_lane_s32(__a, b, __c) __extension__ ({ \ 1737 #define vld3q_lane_s32(__a, b, __c) __extension__ ({ \
1738 int32x4x3_t __b = (b); \ 1738 int32x4x3_t __b = (b); \
1739 int32x4x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (in t8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 18); r; }) 1739 int32x4x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (in t8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 34); r; })
1740 #define vld3q_lane_f16(__a, b, __c) __extension__ ({ \ 1740 #define vld3q_lane_f16(__a, b, __c) __extension__ ({ \
1741 float16x8x3_t __b = (b); \ 1741 float16x8x3_t __b = (b); \
1742 float16x8x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], ( int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 23); r; }) 1742 float16x8x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], ( int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 38); r; })
1743 #define vld3q_lane_f32(__a, b, __c) __extension__ ({ \ 1743 #define vld3q_lane_f32(__a, b, __c) __extension__ ({ \
1744 float32x4x3_t __b = (b); \ 1744 float32x4x3_t __b = (b); \
1745 float32x4x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], ( int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 20); r; }) 1745 float32x4x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], ( int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 39); r; })
1746 #define vld3q_lane_p16(__a, b, __c) __extension__ ({ \ 1746 #define vld3q_lane_p16(__a, b, __c) __extension__ ({ \
1747 poly16x8x3_t __b = (b); \ 1747 poly16x8x3_t __b = (b); \
1748 poly16x8x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i nt8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 22); r; }) 1748 poly16x8x3_t r; __builtin_neon_vld3q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i nt8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 37); r; })
1749 #define vld3_lane_u8(__a, b, __c) __extension__ ({ \ 1749 #define vld3_lane_u8(__a, b, __c) __extension__ ({ \
1750 uint8x8x3_t __b = (b); \ 1750 uint8x8x3_t __b = (b); \
1751 uint8x8x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 8); r; }) 1751 uint8x8x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 16); r; })
1752 #define vld3_lane_u16(__a, b, __c) __extension__ ({ \ 1752 #define vld3_lane_u16(__a, b, __c) __extension__ ({ \
1753 uint16x4x3_t __b = (b); \ 1753 uint16x4x3_t __b = (b); \
1754 uint16x4x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int 8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 9); r; }) 1754 uint16x4x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int 8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 17); r; })
1755 #define vld3_lane_u32(__a, b, __c) __extension__ ({ \ 1755 #define vld3_lane_u32(__a, b, __c) __extension__ ({ \
1756 uint32x2x3_t __b = (b); \ 1756 uint32x2x3_t __b = (b); \
1757 uint32x2x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int 8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 10); r; }) 1757 uint32x2x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int 8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 18); r; })
1758 #define vld3_lane_s8(__a, b, __c) __extension__ ({ \ 1758 #define vld3_lane_s8(__a, b, __c) __extension__ ({ \
1759 int8x8x3_t __b = (b); \ 1759 int8x8x3_t __b = (b); \
1760 int8x8x3_t r; __builtin_neon_vld3_lane_v(&r, __a, __b.val[0], __b.val[1], __b. val[2], __c, 0); r; }) 1760 int8x8x3_t r; __builtin_neon_vld3_lane_v(&r, __a, __b.val[0], __b.val[1], __b. val[2], __c, 0); r; })
1761 #define vld3_lane_s16(__a, b, __c) __extension__ ({ \ 1761 #define vld3_lane_s16(__a, b, __c) __extension__ ({ \
1762 int16x4x3_t __b = (b); \ 1762 int16x4x3_t __b = (b); \
1763 int16x4x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 1); r; }) 1763 int16x4x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 1); r; })
1764 #define vld3_lane_s32(__a, b, __c) __extension__ ({ \ 1764 #define vld3_lane_s32(__a, b, __c) __extension__ ({ \
1765 int32x2x3_t __b = (b); \ 1765 int32x2x3_t __b = (b); \
1766 int32x2x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 2); r; }) 1766 int32x2x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 2); r; })
1767 #define vld3_lane_f16(__a, b, __c) __extension__ ({ \ 1767 #define vld3_lane_f16(__a, b, __c) __extension__ ({ \
1768 float16x4x3_t __b = (b); \ 1768 float16x4x3_t __b = (b); \
1769 float16x4x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (in t8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 7); r; }) 1769 float16x4x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (in t8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 6); r; })
1770 #define vld3_lane_f32(__a, b, __c) __extension__ ({ \ 1770 #define vld3_lane_f32(__a, b, __c) __extension__ ({ \
1771 float32x2x3_t __b = (b); \ 1771 float32x2x3_t __b = (b); \
1772 float32x2x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (in t8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 4); r; }) 1772 float32x2x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (in t8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 7); r; })
1773 #define vld3_lane_p8(__a, b, __c) __extension__ ({ \ 1773 #define vld3_lane_p8(__a, b, __c) __extension__ ({ \
1774 poly8x8x3_t __b = (b); \ 1774 poly8x8x3_t __b = (b); \
1775 poly8x8x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 5); r; }) 1775 poly8x8x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 4); r; })
1776 #define vld3_lane_p16(__a, b, __c) __extension__ ({ \ 1776 #define vld3_lane_p16(__a, b, __c) __extension__ ({ \
1777 poly16x4x3_t __b = (b); \ 1777 poly16x4x3_t __b = (b); \
1778 poly16x4x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int 8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 6); r; }) 1778 poly16x4x3_t r; __builtin_neon_vld3_lane_v(&r, __a, (int8x8_t)__b.val[0], (int 8x8_t)__b.val[1], (int8x8_t)__b.val[2], __c, 5); r; })
1779 1779
1780 #define vld4q_u8(__a) __extension__ ({ \ 1780 #define vld4q_u8(__a) __extension__ ({ \
1781 uint8x16x4_t r; __builtin_neon_vld4q_v(&r, __a, 24); r; }) 1781 uint8x16x4_t r; __builtin_neon_vld4q_v(&r, __a, 48); r; })
1782 #define vld4q_u16(__a) __extension__ ({ \ 1782 #define vld4q_u16(__a) __extension__ ({ \
1783 uint16x8x4_t r; __builtin_neon_vld4q_v(&r, __a, 25); r; }) 1783 uint16x8x4_t r; __builtin_neon_vld4q_v(&r, __a, 49); r; })
1784 #define vld4q_u32(__a) __extension__ ({ \ 1784 #define vld4q_u32(__a) __extension__ ({ \
1785 uint32x4x4_t r; __builtin_neon_vld4q_v(&r, __a, 26); r; }) 1785 uint32x4x4_t r; __builtin_neon_vld4q_v(&r, __a, 50); r; })
1786 #define vld4q_s8(__a) __extension__ ({ \ 1786 #define vld4q_s8(__a) __extension__ ({ \
1787 int8x16x4_t r; __builtin_neon_vld4q_v(&r, __a, 16); r; }) 1787 int8x16x4_t r; __builtin_neon_vld4q_v(&r, __a, 32); r; })
1788 #define vld4q_s16(__a) __extension__ ({ \ 1788 #define vld4q_s16(__a) __extension__ ({ \
1789 int16x8x4_t r; __builtin_neon_vld4q_v(&r, __a, 17); r; }) 1789 int16x8x4_t r; __builtin_neon_vld4q_v(&r, __a, 33); r; })
1790 #define vld4q_s32(__a) __extension__ ({ \ 1790 #define vld4q_s32(__a) __extension__ ({ \
1791 int32x4x4_t r; __builtin_neon_vld4q_v(&r, __a, 18); r; }) 1791 int32x4x4_t r; __builtin_neon_vld4q_v(&r, __a, 34); r; })
1792 #define vld4q_f16(__a) __extension__ ({ \ 1792 #define vld4q_f16(__a) __extension__ ({ \
1793 float16x8x4_t r; __builtin_neon_vld4q_v(&r, __a, 23); r; }) 1793 float16x8x4_t r; __builtin_neon_vld4q_v(&r, __a, 38); r; })
1794 #define vld4q_f32(__a) __extension__ ({ \ 1794 #define vld4q_f32(__a) __extension__ ({ \
1795 float32x4x4_t r; __builtin_neon_vld4q_v(&r, __a, 20); r; }) 1795 float32x4x4_t r; __builtin_neon_vld4q_v(&r, __a, 39); r; })
1796 #define vld4q_p8(__a) __extension__ ({ \ 1796 #define vld4q_p8(__a) __extension__ ({ \
1797 poly8x16x4_t r; __builtin_neon_vld4q_v(&r, __a, 21); r; }) 1797 poly8x16x4_t r; __builtin_neon_vld4q_v(&r, __a, 36); r; })
1798 #define vld4q_p16(__a) __extension__ ({ \ 1798 #define vld4q_p16(__a) __extension__ ({ \
1799 poly16x8x4_t r; __builtin_neon_vld4q_v(&r, __a, 22); r; }) 1799 poly16x8x4_t r; __builtin_neon_vld4q_v(&r, __a, 37); r; })
1800 #define vld4_u8(__a) __extension__ ({ \ 1800 #define vld4_u8(__a) __extension__ ({ \
1801 uint8x8x4_t r; __builtin_neon_vld4_v(&r, __a, 8); r; }) 1801 uint8x8x4_t r; __builtin_neon_vld4_v(&r, __a, 16); r; })
1802 #define vld4_u16(__a) __extension__ ({ \ 1802 #define vld4_u16(__a) __extension__ ({ \
1803 uint16x4x4_t r; __builtin_neon_vld4_v(&r, __a, 9); r; }) 1803 uint16x4x4_t r; __builtin_neon_vld4_v(&r, __a, 17); r; })
1804 #define vld4_u32(__a) __extension__ ({ \ 1804 #define vld4_u32(__a) __extension__ ({ \
1805 uint32x2x4_t r; __builtin_neon_vld4_v(&r, __a, 10); r; }) 1805 uint32x2x4_t r; __builtin_neon_vld4_v(&r, __a, 18); r; })
1806 #define vld4_u64(__a) __extension__ ({ \ 1806 #define vld4_u64(__a) __extension__ ({ \
1807 uint64x1x4_t r; __builtin_neon_vld4_v(&r, __a, 11); r; }) 1807 uint64x1x4_t r; __builtin_neon_vld4_v(&r, __a, 19); r; })
1808 #define vld4_s8(__a) __extension__ ({ \ 1808 #define vld4_s8(__a) __extension__ ({ \
1809 int8x8x4_t r; __builtin_neon_vld4_v(&r, __a, 0); r; }) 1809 int8x8x4_t r; __builtin_neon_vld4_v(&r, __a, 0); r; })
1810 #define vld4_s16(__a) __extension__ ({ \ 1810 #define vld4_s16(__a) __extension__ ({ \
1811 int16x4x4_t r; __builtin_neon_vld4_v(&r, __a, 1); r; }) 1811 int16x4x4_t r; __builtin_neon_vld4_v(&r, __a, 1); r; })
1812 #define vld4_s32(__a) __extension__ ({ \ 1812 #define vld4_s32(__a) __extension__ ({ \
1813 int32x2x4_t r; __builtin_neon_vld4_v(&r, __a, 2); r; }) 1813 int32x2x4_t r; __builtin_neon_vld4_v(&r, __a, 2); r; })
1814 #define vld4_s64(__a) __extension__ ({ \ 1814 #define vld4_s64(__a) __extension__ ({ \
1815 int64x1x4_t r; __builtin_neon_vld4_v(&r, __a, 3); r; }) 1815 int64x1x4_t r; __builtin_neon_vld4_v(&r, __a, 3); r; })
1816 #define vld4_f16(__a) __extension__ ({ \ 1816 #define vld4_f16(__a) __extension__ ({ \
1817 float16x4x4_t r; __builtin_neon_vld4_v(&r, __a, 7); r; }) 1817 float16x4x4_t r; __builtin_neon_vld4_v(&r, __a, 6); r; })
1818 #define vld4_f32(__a) __extension__ ({ \ 1818 #define vld4_f32(__a) __extension__ ({ \
1819 float32x2x4_t r; __builtin_neon_vld4_v(&r, __a, 4); r; }) 1819 float32x2x4_t r; __builtin_neon_vld4_v(&r, __a, 7); r; })
1820 #define vld4_p8(__a) __extension__ ({ \ 1820 #define vld4_p8(__a) __extension__ ({ \
1821 poly8x8x4_t r; __builtin_neon_vld4_v(&r, __a, 5); r; }) 1821 poly8x8x4_t r; __builtin_neon_vld4_v(&r, __a, 4); r; })
1822 #define vld4_p16(__a) __extension__ ({ \ 1822 #define vld4_p16(__a) __extension__ ({ \
1823 poly16x4x4_t r; __builtin_neon_vld4_v(&r, __a, 6); r; }) 1823 poly16x4x4_t r; __builtin_neon_vld4_v(&r, __a, 5); r; })
1824 1824
1825 #define vld4_dup_u8(__a) __extension__ ({ \ 1825 #define vld4_dup_u8(__a) __extension__ ({ \
1826 uint8x8x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 8); r; }) 1826 uint8x8x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 16); r; })
1827 #define vld4_dup_u16(__a) __extension__ ({ \ 1827 #define vld4_dup_u16(__a) __extension__ ({ \
1828 uint16x4x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 9); r; }) 1828 uint16x4x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 17); r; })
1829 #define vld4_dup_u32(__a) __extension__ ({ \ 1829 #define vld4_dup_u32(__a) __extension__ ({ \
1830 uint32x2x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 10); r; }) 1830 uint32x2x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 18); r; })
1831 #define vld4_dup_u64(__a) __extension__ ({ \ 1831 #define vld4_dup_u64(__a) __extension__ ({ \
1832 uint64x1x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 11); r; }) 1832 uint64x1x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 19); r; })
1833 #define vld4_dup_s8(__a) __extension__ ({ \ 1833 #define vld4_dup_s8(__a) __extension__ ({ \
1834 int8x8x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 0); r; }) 1834 int8x8x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 0); r; })
1835 #define vld4_dup_s16(__a) __extension__ ({ \ 1835 #define vld4_dup_s16(__a) __extension__ ({ \
1836 int16x4x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 1); r; }) 1836 int16x4x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 1); r; })
1837 #define vld4_dup_s32(__a) __extension__ ({ \ 1837 #define vld4_dup_s32(__a) __extension__ ({ \
1838 int32x2x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 2); r; }) 1838 int32x2x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 2); r; })
1839 #define vld4_dup_s64(__a) __extension__ ({ \ 1839 #define vld4_dup_s64(__a) __extension__ ({ \
1840 int64x1x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 3); r; }) 1840 int64x1x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 3); r; })
1841 #define vld4_dup_f16(__a) __extension__ ({ \ 1841 #define vld4_dup_f16(__a) __extension__ ({ \
1842 float16x4x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 7); r; }) 1842 float16x4x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 6); r; })
1843 #define vld4_dup_f32(__a) __extension__ ({ \ 1843 #define vld4_dup_f32(__a) __extension__ ({ \
1844 float32x2x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 4); r; }) 1844 float32x2x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 7); r; })
1845 #define vld4_dup_p8(__a) __extension__ ({ \ 1845 #define vld4_dup_p8(__a) __extension__ ({ \
1846 poly8x8x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 5); r; }) 1846 poly8x8x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 4); r; })
1847 #define vld4_dup_p16(__a) __extension__ ({ \ 1847 #define vld4_dup_p16(__a) __extension__ ({ \
1848 poly16x4x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 6); r; }) 1848 poly16x4x4_t r; __builtin_neon_vld4_dup_v(&r, __a, 5); r; })
1849 1849
1850 #define vld4q_lane_u16(__a, b, __c) __extension__ ({ \ 1850 #define vld4q_lane_u16(__a, b, __c) __extension__ ({ \
1851 uint16x8x4_t __b = (b); \ 1851 uint16x8x4_t __b = (b); \
1852 uint16x8x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i nt8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 25); r; }) 1852 uint16x8x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i nt8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 49); r; })
1853 #define vld4q_lane_u32(__a, b, __c) __extension__ ({ \ 1853 #define vld4q_lane_u32(__a, b, __c) __extension__ ({ \
1854 uint32x4x4_t __b = (b); \ 1854 uint32x4x4_t __b = (b); \
1855 uint32x4x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i nt8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 26); r; }) 1855 uint32x4x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i nt8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 50); r; })
1856 #define vld4q_lane_s16(__a, b, __c) __extension__ ({ \ 1856 #define vld4q_lane_s16(__a, b, __c) __extension__ ({ \
1857 int16x8x4_t __b = (b); \ 1857 int16x8x4_t __b = (b); \
1858 int16x8x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (in t8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 17); r; } ) 1858 int16x8x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (in t8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 33); r; } )
1859 #define vld4q_lane_s32(__a, b, __c) __extension__ ({ \ 1859 #define vld4q_lane_s32(__a, b, __c) __extension__ ({ \
1860 int32x4x4_t __b = (b); \ 1860 int32x4x4_t __b = (b); \
1861 int32x4x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (in t8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 18); r; } ) 1861 int32x4x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (in t8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 34); r; } )
1862 #define vld4q_lane_f16(__a, b, __c) __extension__ ({ \ 1862 #define vld4q_lane_f16(__a, b, __c) __extension__ ({ \
1863 float16x8x4_t __b = (b); \ 1863 float16x8x4_t __b = (b); \
1864 float16x8x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], ( int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 23); r; }) 1864 float16x8x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], ( int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 38); r; })
1865 #define vld4q_lane_f32(__a, b, __c) __extension__ ({ \ 1865 #define vld4q_lane_f32(__a, b, __c) __extension__ ({ \
1866 float32x4x4_t __b = (b); \ 1866 float32x4x4_t __b = (b); \
1867 float32x4x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], ( int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 20); r; }) 1867 float32x4x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], ( int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 39); r; })
1868 #define vld4q_lane_p16(__a, b, __c) __extension__ ({ \ 1868 #define vld4q_lane_p16(__a, b, __c) __extension__ ({ \
1869 poly16x8x4_t __b = (b); \ 1869 poly16x8x4_t __b = (b); \
1870 poly16x8x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i nt8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 22); r; }) 1870 poly16x8x4_t r; __builtin_neon_vld4q_lane_v(&r, __a, (int8x16_t)__b.val[0], (i nt8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 37); r; })
1871 #define vld4_lane_u8(__a, b, __c) __extension__ ({ \ 1871 #define vld4_lane_u8(__a, b, __c) __extension__ ({ \
1872 uint8x8x4_t __b = (b); \ 1872 uint8x8x4_t __b = (b); \
1873 uint8x8x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 8); r; }) 1873 uint8x8x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 16); r; })
1874 #define vld4_lane_u16(__a, b, __c) __extension__ ({ \ 1874 #define vld4_lane_u16(__a, b, __c) __extension__ ({ \
1875 uint16x4x4_t __b = (b); \ 1875 uint16x4x4_t __b = (b); \
1876 uint16x4x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int 8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 9); r; }) 1876 uint16x4x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int 8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 17); r; })
1877 #define vld4_lane_u32(__a, b, __c) __extension__ ({ \ 1877 #define vld4_lane_u32(__a, b, __c) __extension__ ({ \
1878 uint32x2x4_t __b = (b); \ 1878 uint32x2x4_t __b = (b); \
1879 uint32x2x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int 8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 10); r; }) 1879 uint32x2x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int 8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 18); r; })
1880 #define vld4_lane_s8(__a, b, __c) __extension__ ({ \ 1880 #define vld4_lane_s8(__a, b, __c) __extension__ ({ \
1881 int8x8x4_t __b = (b); \ 1881 int8x8x4_t __b = (b); \
1882 int8x8x4_t r; __builtin_neon_vld4_lane_v(&r, __a, __b.val[0], __b.val[1], __b. val[2], __b.val[3], __c, 0); r; }) 1882 int8x8x4_t r; __builtin_neon_vld4_lane_v(&r, __a, __b.val[0], __b.val[1], __b. val[2], __b.val[3], __c, 0); r; })
1883 #define vld4_lane_s16(__a, b, __c) __extension__ ({ \ 1883 #define vld4_lane_s16(__a, b, __c) __extension__ ({ \
1884 int16x4x4_t __b = (b); \ 1884 int16x4x4_t __b = (b); \
1885 int16x4x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 1); r; }) 1885 int16x4x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 1); r; })
1886 #define vld4_lane_s32(__a, b, __c) __extension__ ({ \ 1886 #define vld4_lane_s32(__a, b, __c) __extension__ ({ \
1887 int32x2x4_t __b = (b); \ 1887 int32x2x4_t __b = (b); \
1888 int32x2x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 2); r; }) 1888 int32x2x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 2); r; })
1889 #define vld4_lane_f16(__a, b, __c) __extension__ ({ \ 1889 #define vld4_lane_f16(__a, b, __c) __extension__ ({ \
1890 float16x4x4_t __b = (b); \ 1890 float16x4x4_t __b = (b); \
1891 float16x4x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (in t8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 7); r; }) 1891 float16x4x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (in t8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 6); r; })
1892 #define vld4_lane_f32(__a, b, __c) __extension__ ({ \ 1892 #define vld4_lane_f32(__a, b, __c) __extension__ ({ \
1893 float32x2x4_t __b = (b); \ 1893 float32x2x4_t __b = (b); \
1894 float32x2x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (in t8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 4); r; }) 1894 float32x2x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (in t8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 7); r; })
1895 #define vld4_lane_p8(__a, b, __c) __extension__ ({ \ 1895 #define vld4_lane_p8(__a, b, __c) __extension__ ({ \
1896 poly8x8x4_t __b = (b); \ 1896 poly8x8x4_t __b = (b); \
1897 poly8x8x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 5); r; }) 1897 poly8x8x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int8 x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 4); r; })
1898 #define vld4_lane_p16(__a, b, __c) __extension__ ({ \ 1898 #define vld4_lane_p16(__a, b, __c) __extension__ ({ \
1899 poly16x4x4_t __b = (b); \ 1899 poly16x4x4_t __b = (b); \
1900 poly16x4x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int 8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 6); r; }) 1900 poly16x4x4_t r; __builtin_neon_vld4_lane_v(&r, __a, (int8x8_t)__b.val[0], (int 8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 5); r; })
1901 1901
1902 __ai int8x8_t vmax_s8(int8x8_t __a, int8x8_t __b) { \ 1902 __ai int8x8_t vmax_s8(int8x8_t __a, int8x8_t __b) { \
1903 return (int8x8_t)__builtin_neon_vmax_v(__a, __b, 0); } 1903 return (int8x8_t)__builtin_neon_vmax_v(__a, __b, 0); }
1904 __ai int16x4_t vmax_s16(int16x4_t __a, int16x4_t __b) { \ 1904 __ai int16x4_t vmax_s16(int16x4_t __a, int16x4_t __b) { \
1905 return (int16x4_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 1); } 1905 return (int16x4_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 1); }
1906 __ai int32x2_t vmax_s32(int32x2_t __a, int32x2_t __b) { \ 1906 __ai int32x2_t vmax_s32(int32x2_t __a, int32x2_t __b) { \
1907 return (int32x2_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 2); } 1907 return (int32x2_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 2); }
1908 __ai uint8x8_t vmax_u8(uint8x8_t __a, uint8x8_t __b) { \ 1908 __ai uint8x8_t vmax_u8(uint8x8_t __a, uint8x8_t __b) { \
1909 return (uint8x8_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 8); } 1909 return (uint8x8_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 16); }
1910 __ai uint16x4_t vmax_u16(uint16x4_t __a, uint16x4_t __b) { \ 1910 __ai uint16x4_t vmax_u16(uint16x4_t __a, uint16x4_t __b) { \
1911 return (uint16x4_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 9); } 1911 return (uint16x4_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 17); }
1912 __ai uint32x2_t vmax_u32(uint32x2_t __a, uint32x2_t __b) { \ 1912 __ai uint32x2_t vmax_u32(uint32x2_t __a, uint32x2_t __b) { \
1913 return (uint32x2_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 10); } 1913 return (uint32x2_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 18); }
1914 __ai float32x2_t vmax_f32(float32x2_t __a, float32x2_t __b) { \ 1914 __ai float32x2_t vmax_f32(float32x2_t __a, float32x2_t __b) { \
1915 return (float32x2_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 4); } 1915 return (float32x2_t)__builtin_neon_vmax_v((int8x8_t)__a, (int8x8_t)__b, 7); }
1916 __ai int8x16_t vmaxq_s8(int8x16_t __a, int8x16_t __b) { \ 1916 __ai int8x16_t vmaxq_s8(int8x16_t __a, int8x16_t __b) { \
1917 return (int8x16_t)__builtin_neon_vmaxq_v(__a, __b, 16); } 1917 return (int8x16_t)__builtin_neon_vmaxq_v(__a, __b, 32); }
1918 __ai int16x8_t vmaxq_s16(int16x8_t __a, int16x8_t __b) { \ 1918 __ai int16x8_t vmaxq_s16(int16x8_t __a, int16x8_t __b) { \
1919 return (int16x8_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 17); } 1919 return (int16x8_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
1920 __ai int32x4_t vmaxq_s32(int32x4_t __a, int32x4_t __b) { \ 1920 __ai int32x4_t vmaxq_s32(int32x4_t __a, int32x4_t __b) { \
1921 return (int32x4_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 18); } 1921 return (int32x4_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
1922 __ai uint8x16_t vmaxq_u8(uint8x16_t __a, uint8x16_t __b) { \ 1922 __ai uint8x16_t vmaxq_u8(uint8x16_t __a, uint8x16_t __b) { \
1923 return (uint8x16_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 24); } 1923 return (uint8x16_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 48); }
1924 __ai uint16x8_t vmaxq_u16(uint16x8_t __a, uint16x8_t __b) { \ 1924 __ai uint16x8_t vmaxq_u16(uint16x8_t __a, uint16x8_t __b) { \
1925 return (uint16x8_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 25); } 1925 return (uint16x8_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 49); }
1926 __ai uint32x4_t vmaxq_u32(uint32x4_t __a, uint32x4_t __b) { \ 1926 __ai uint32x4_t vmaxq_u32(uint32x4_t __a, uint32x4_t __b) { \
1927 return (uint32x4_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 26); } 1927 return (uint32x4_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
1928 __ai float32x4_t vmaxq_f32(float32x4_t __a, float32x4_t __b) { \ 1928 __ai float32x4_t vmaxq_f32(float32x4_t __a, float32x4_t __b) { \
1929 return (float32x4_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 20) ; } 1929 return (float32x4_t)__builtin_neon_vmaxq_v((int8x16_t)__a, (int8x16_t)__b, 39) ; }
1930 1930
1931 __ai int8x8_t vmin_s8(int8x8_t __a, int8x8_t __b) { \ 1931 __ai int8x8_t vmin_s8(int8x8_t __a, int8x8_t __b) { \
1932 return (int8x8_t)__builtin_neon_vmin_v(__a, __b, 0); } 1932 return (int8x8_t)__builtin_neon_vmin_v(__a, __b, 0); }
1933 __ai int16x4_t vmin_s16(int16x4_t __a, int16x4_t __b) { \ 1933 __ai int16x4_t vmin_s16(int16x4_t __a, int16x4_t __b) { \
1934 return (int16x4_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 1); } 1934 return (int16x4_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 1); }
1935 __ai int32x2_t vmin_s32(int32x2_t __a, int32x2_t __b) { \ 1935 __ai int32x2_t vmin_s32(int32x2_t __a, int32x2_t __b) { \
1936 return (int32x2_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 2); } 1936 return (int32x2_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 2); }
1937 __ai uint8x8_t vmin_u8(uint8x8_t __a, uint8x8_t __b) { \ 1937 __ai uint8x8_t vmin_u8(uint8x8_t __a, uint8x8_t __b) { \
1938 return (uint8x8_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 8); } 1938 return (uint8x8_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 16); }
1939 __ai uint16x4_t vmin_u16(uint16x4_t __a, uint16x4_t __b) { \ 1939 __ai uint16x4_t vmin_u16(uint16x4_t __a, uint16x4_t __b) { \
1940 return (uint16x4_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 9); } 1940 return (uint16x4_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 17); }
1941 __ai uint32x2_t vmin_u32(uint32x2_t __a, uint32x2_t __b) { \ 1941 __ai uint32x2_t vmin_u32(uint32x2_t __a, uint32x2_t __b) { \
1942 return (uint32x2_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 10); } 1942 return (uint32x2_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 18); }
1943 __ai float32x2_t vmin_f32(float32x2_t __a, float32x2_t __b) { \ 1943 __ai float32x2_t vmin_f32(float32x2_t __a, float32x2_t __b) { \
1944 return (float32x2_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 4); } 1944 return (float32x2_t)__builtin_neon_vmin_v((int8x8_t)__a, (int8x8_t)__b, 7); }
1945 __ai int8x16_t vminq_s8(int8x16_t __a, int8x16_t __b) { \ 1945 __ai int8x16_t vminq_s8(int8x16_t __a, int8x16_t __b) { \
1946 return (int8x16_t)__builtin_neon_vminq_v(__a, __b, 16); } 1946 return (int8x16_t)__builtin_neon_vminq_v(__a, __b, 32); }
1947 __ai int16x8_t vminq_s16(int16x8_t __a, int16x8_t __b) { \ 1947 __ai int16x8_t vminq_s16(int16x8_t __a, int16x8_t __b) { \
1948 return (int16x8_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 17); } 1948 return (int16x8_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
1949 __ai int32x4_t vminq_s32(int32x4_t __a, int32x4_t __b) { \ 1949 __ai int32x4_t vminq_s32(int32x4_t __a, int32x4_t __b) { \
1950 return (int32x4_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 18); } 1950 return (int32x4_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
1951 __ai uint8x16_t vminq_u8(uint8x16_t __a, uint8x16_t __b) { \ 1951 __ai uint8x16_t vminq_u8(uint8x16_t __a, uint8x16_t __b) { \
1952 return (uint8x16_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 24); } 1952 return (uint8x16_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 48); }
1953 __ai uint16x8_t vminq_u16(uint16x8_t __a, uint16x8_t __b) { \ 1953 __ai uint16x8_t vminq_u16(uint16x8_t __a, uint16x8_t __b) { \
1954 return (uint16x8_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 25); } 1954 return (uint16x8_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 49); }
1955 __ai uint32x4_t vminq_u32(uint32x4_t __a, uint32x4_t __b) { \ 1955 __ai uint32x4_t vminq_u32(uint32x4_t __a, uint32x4_t __b) { \
1956 return (uint32x4_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 26); } 1956 return (uint32x4_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
1957 __ai float32x4_t vminq_f32(float32x4_t __a, float32x4_t __b) { \ 1957 __ai float32x4_t vminq_f32(float32x4_t __a, float32x4_t __b) { \
1958 return (float32x4_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 20) ; } 1958 return (float32x4_t)__builtin_neon_vminq_v((int8x16_t)__a, (int8x16_t)__b, 39) ; }
1959 1959
1960 __ai int8x8_t vmla_s8(int8x8_t __a, int8x8_t __b, int8x8_t __c) { \ 1960 __ai int8x8_t vmla_s8(int8x8_t __a, int8x8_t __b, int8x8_t __c) { \
1961 return __a + (__b * __c); } 1961 return __a + (__b * __c); }
1962 __ai int16x4_t vmla_s16(int16x4_t __a, int16x4_t __b, int16x4_t __c) { \ 1962 __ai int16x4_t vmla_s16(int16x4_t __a, int16x4_t __b, int16x4_t __c) { \
1963 return __a + (__b * __c); } 1963 return __a + (__b * __c); }
1964 __ai int32x2_t vmla_s32(int32x2_t __a, int32x2_t __b, int32x2_t __c) { \ 1964 __ai int32x2_t vmla_s32(int32x2_t __a, int32x2_t __b, int32x2_t __c) { \
1965 return __a + (__b * __c); } 1965 return __a + (__b * __c); }
1966 __ai float32x2_t vmla_f32(float32x2_t __a, float32x2_t __b, float32x2_t __c) { \ 1966 __ai float32x2_t vmla_f32(float32x2_t __a, float32x2_t __b, float32x2_t __c) { \
1967 return __a + (__b * __c); } 1967 return __a + (__b * __c); }
1968 __ai uint8x8_t vmla_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) { \ 1968 __ai uint8x8_t vmla_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) { \
(...skipping 220 matching lines...) Expand 10 before | Expand all | Expand 10 after
2189 __ai float32x4_t vmlsq_n_f32(float32x4_t __a, float32x4_t __b, float32_t __c) { \ 2189 __ai float32x4_t vmlsq_n_f32(float32x4_t __a, float32x4_t __b, float32_t __c) { \
2190 return __a - (__b * (float32x4_t){ __c, __c, __c, __c }); } 2190 return __a - (__b * (float32x4_t){ __c, __c, __c, __c }); }
2191 2191
2192 __ai int8x8_t vmovn_s16(int16x8_t __a) { \ 2192 __ai int8x8_t vmovn_s16(int16x8_t __a) { \
2193 return (int8x8_t)__builtin_neon_vmovn_v((int8x16_t)__a, 0); } 2193 return (int8x8_t)__builtin_neon_vmovn_v((int8x16_t)__a, 0); }
2194 __ai int16x4_t vmovn_s32(int32x4_t __a) { \ 2194 __ai int16x4_t vmovn_s32(int32x4_t __a) { \
2195 return (int16x4_t)__builtin_neon_vmovn_v((int8x16_t)__a, 1); } 2195 return (int16x4_t)__builtin_neon_vmovn_v((int8x16_t)__a, 1); }
2196 __ai int32x2_t vmovn_s64(int64x2_t __a) { \ 2196 __ai int32x2_t vmovn_s64(int64x2_t __a) { \
2197 return (int32x2_t)__builtin_neon_vmovn_v((int8x16_t)__a, 2); } 2197 return (int32x2_t)__builtin_neon_vmovn_v((int8x16_t)__a, 2); }
2198 __ai uint8x8_t vmovn_u16(uint16x8_t __a) { \ 2198 __ai uint8x8_t vmovn_u16(uint16x8_t __a) { \
2199 return (uint8x8_t)__builtin_neon_vmovn_v((int8x16_t)__a, 8); } 2199 return (uint8x8_t)__builtin_neon_vmovn_v((int8x16_t)__a, 16); }
2200 __ai uint16x4_t vmovn_u32(uint32x4_t __a) { \ 2200 __ai uint16x4_t vmovn_u32(uint32x4_t __a) { \
2201 return (uint16x4_t)__builtin_neon_vmovn_v((int8x16_t)__a, 9); } 2201 return (uint16x4_t)__builtin_neon_vmovn_v((int8x16_t)__a, 17); }
2202 __ai uint32x2_t vmovn_u64(uint64x2_t __a) { \ 2202 __ai uint32x2_t vmovn_u64(uint64x2_t __a) { \
2203 return (uint32x2_t)__builtin_neon_vmovn_v((int8x16_t)__a, 10); } 2203 return (uint32x2_t)__builtin_neon_vmovn_v((int8x16_t)__a, 18); }
2204 2204
2205 __ai uint8x8_t vmov_n_u8(uint8_t __a) { \ 2205 __ai uint8x8_t vmov_n_u8(uint8_t __a) { \
2206 return (uint8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } 2206 return (uint8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; }
2207 __ai uint16x4_t vmov_n_u16(uint16_t __a) { \ 2207 __ai uint16x4_t vmov_n_u16(uint16_t __a) { \
2208 return (uint16x4_t){ __a, __a, __a, __a }; } 2208 return (uint16x4_t){ __a, __a, __a, __a }; }
2209 __ai uint32x2_t vmov_n_u32(uint32_t __a) { \ 2209 __ai uint32x2_t vmov_n_u32(uint32_t __a) { \
2210 return (uint32x2_t){ __a, __a }; } 2210 return (uint32x2_t){ __a, __a }; }
2211 __ai int8x8_t vmov_n_s8(int8_t __a) { \ 2211 __ai int8x8_t vmov_n_s8(int8_t __a) { \
2212 return (int8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; } 2212 return (int8x8_t){ __a, __a, __a, __a, __a, __a, __a, __a }; }
2213 __ai int16x4_t vmov_n_s16(int16_t __a) { \ 2213 __ai int16x4_t vmov_n_s16(int16_t __a) { \
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after
2283 int32x2_t __a = (a); int32x2_t __b = (b); \ 2283 int32x2_t __a = (a); int32x2_t __b = (b); \
2284 vmull_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) 2284 vmull_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); })
2285 #define vmull_lane_u16(a, b, __c) __extension__ ({ \ 2285 #define vmull_lane_u16(a, b, __c) __extension__ ({ \
2286 uint16x4_t __a = (a); uint16x4_t __b = (b); \ 2286 uint16x4_t __a = (a); uint16x4_t __b = (b); \
2287 vmull_u16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) 2287 vmull_u16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
2288 #define vmull_lane_u32(a, b, __c) __extension__ ({ \ 2288 #define vmull_lane_u32(a, b, __c) __extension__ ({ \
2289 uint32x2_t __a = (a); uint32x2_t __b = (b); \ 2289 uint32x2_t __a = (a); uint32x2_t __b = (b); \
2290 vmull_u32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) 2290 vmull_u32(__a, __builtin_shufflevector(__b, __b, __c, __c)); })
2291 2291
2292 __ai int32x4_t vmull_n_s16(int16x4_t __a, int16_t __b) { \ 2292 __ai int32x4_t vmull_n_s16(int16x4_t __a, int16_t __b) { \
2293 return (int32x4_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)(int16x4_t){ __b, __b, __b, __b }, 18); } 2293 return (int32x4_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)(int16x4_t){ __b, __b, __b, __b }, 34); }
2294 __ai int64x2_t vmull_n_s32(int32x2_t __a, int32_t __b) { \ 2294 __ai int64x2_t vmull_n_s32(int32x2_t __a, int32_t __b) { \
2295 return (int64x2_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)(int32x2_t){ __b, __b }, 19); } 2295 return (int64x2_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)(int32x2_t){ __b, __b }, 35); }
2296 __ai uint32x4_t vmull_n_u16(uint16x4_t __a, uint16_t __b) { \ 2296 __ai uint32x4_t vmull_n_u16(uint16x4_t __a, uint16_t __b) { \
2297 return (uint32x4_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)(uint16x4_t ){ __b, __b, __b, __b }, 26); } 2297 return (uint32x4_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)(uint16x4_t ){ __b, __b, __b, __b }, 50); }
2298 __ai uint64x2_t vmull_n_u32(uint32x2_t __a, uint32_t __b) { \ 2298 __ai uint64x2_t vmull_n_u32(uint32x2_t __a, uint32_t __b) { \
2299 return (uint64x2_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)(uint32x2_t ){ __b, __b }, 27); } 2299 return (uint64x2_t)__builtin_neon_vmull_v((int8x8_t)__a, (int8x8_t)(uint32x2_t ){ __b, __b }, 51); }
2300 2300
2301 __ai poly8x8_t vmul_p8(poly8x8_t __a, poly8x8_t __b) { \ 2301 __ai poly8x8_t vmul_p8(poly8x8_t __a, poly8x8_t __b) { \
2302 return (poly8x8_t)__builtin_neon_vmul_v((int8x8_t)__a, (int8x8_t)__b, 5); } 2302 return (poly8x8_t)__builtin_neon_vmul_v((int8x8_t)__a, (int8x8_t)__b, 4); }
2303 __ai poly8x16_t vmulq_p8(poly8x16_t __a, poly8x16_t __b) { \ 2303 __ai poly8x16_t vmulq_p8(poly8x16_t __a, poly8x16_t __b) { \
2304 return (poly8x16_t)__builtin_neon_vmulq_v((int8x16_t)__a, (int8x16_t)__b, 21); } 2304 return (poly8x16_t)__builtin_neon_vmulq_v((int8x16_t)__a, (int8x16_t)__b, 36); }
2305 2305
2306 #define vmul_lane_s16(a, b, __c) __extension__ ({ \ 2306 #define vmul_lane_s16(a, b, __c) __extension__ ({ \
2307 int16x4_t __a = (a); int16x4_t __b = (b); \ 2307 int16x4_t __a = (a); int16x4_t __b = (b); \
2308 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); }) 2308 __a * __builtin_shufflevector(__b, __b, __c, __c, __c, __c); })
2309 #define vmul_lane_s32(a, b, __c) __extension__ ({ \ 2309 #define vmul_lane_s32(a, b, __c) __extension__ ({ \
2310 int32x2_t __a = (a); int32x2_t __b = (b); \ 2310 int32x2_t __a = (a); int32x2_t __b = (b); \
2311 __a * __builtin_shufflevector(__b, __b, __c, __c); }) 2311 __a * __builtin_shufflevector(__b, __b, __c, __c); })
2312 #define vmul_lane_f32(a, b, __c) __extension__ ({ \ 2312 #define vmul_lane_f32(a, b, __c) __extension__ ({ \
2313 float32x2_t __a = (a); float32x2_t __b = (b); \ 2313 float32x2_t __a = (a); float32x2_t __b = (b); \
2314 __a * __builtin_shufflevector(__b, __b, __c, __c); }) 2314 __a * __builtin_shufflevector(__b, __b, __c, __c); })
(...skipping 152 matching lines...) Expand 10 before | Expand all | Expand 10 after
2467 __ai uint64x2_t vorrq_u64(uint64x2_t __a, uint64x2_t __b) { \ 2467 __ai uint64x2_t vorrq_u64(uint64x2_t __a, uint64x2_t __b) { \
2468 return __a | __b; } 2468 return __a | __b; }
2469 2469
2470 __ai int16x4_t vpadal_s8(int16x4_t __a, int8x8_t __b) { \ 2470 __ai int16x4_t vpadal_s8(int16x4_t __a, int8x8_t __b) { \
2471 return (int16x4_t)__builtin_neon_vpadal_v((int8x8_t)__a, __b, 1); } 2471 return (int16x4_t)__builtin_neon_vpadal_v((int8x8_t)__a, __b, 1); }
2472 __ai int32x2_t vpadal_s16(int32x2_t __a, int16x4_t __b) { \ 2472 __ai int32x2_t vpadal_s16(int32x2_t __a, int16x4_t __b) { \
2473 return (int32x2_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 2); } 2473 return (int32x2_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 2); }
2474 __ai int64x1_t vpadal_s32(int64x1_t __a, int32x2_t __b) { \ 2474 __ai int64x1_t vpadal_s32(int64x1_t __a, int32x2_t __b) { \
2475 return (int64x1_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 3); } 2475 return (int64x1_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 3); }
2476 __ai uint16x4_t vpadal_u8(uint16x4_t __a, uint8x8_t __b) { \ 2476 __ai uint16x4_t vpadal_u8(uint16x4_t __a, uint8x8_t __b) { \
2477 return (uint16x4_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 9); } 2477 return (uint16x4_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 17); }
2478 __ai uint32x2_t vpadal_u16(uint32x2_t __a, uint16x4_t __b) { \ 2478 __ai uint32x2_t vpadal_u16(uint32x2_t __a, uint16x4_t __b) { \
2479 return (uint32x2_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 10); } 2479 return (uint32x2_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 18); }
2480 __ai uint64x1_t vpadal_u32(uint64x1_t __a, uint32x2_t __b) { \ 2480 __ai uint64x1_t vpadal_u32(uint64x1_t __a, uint32x2_t __b) { \
2481 return (uint64x1_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 11); } 2481 return (uint64x1_t)__builtin_neon_vpadal_v((int8x8_t)__a, (int8x8_t)__b, 19); }
2482 __ai int16x8_t vpadalq_s8(int16x8_t __a, int8x16_t __b) { \ 2482 __ai int16x8_t vpadalq_s8(int16x8_t __a, int8x16_t __b) { \
2483 return (int16x8_t)__builtin_neon_vpadalq_v((int8x16_t)__a, __b, 17); } 2483 return (int16x8_t)__builtin_neon_vpadalq_v((int8x16_t)__a, __b, 33); }
2484 __ai int32x4_t vpadalq_s16(int32x4_t __a, int16x8_t __b) { \ 2484 __ai int32x4_t vpadalq_s16(int32x4_t __a, int16x8_t __b) { \
2485 return (int32x4_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 18) ; } 2485 return (int32x4_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 34) ; }
2486 __ai int64x2_t vpadalq_s32(int64x2_t __a, int32x4_t __b) { \ 2486 __ai int64x2_t vpadalq_s32(int64x2_t __a, int32x4_t __b) { \
2487 return (int64x2_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 19) ; } 2487 return (int64x2_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 35) ; }
2488 __ai uint16x8_t vpadalq_u8(uint16x8_t __a, uint8x16_t __b) { \ 2488 __ai uint16x8_t vpadalq_u8(uint16x8_t __a, uint8x16_t __b) { \
2489 return (uint16x8_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 25 ); } 2489 return (uint16x8_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 49 ); }
2490 __ai uint32x4_t vpadalq_u16(uint32x4_t __a, uint16x8_t __b) { \ 2490 __ai uint32x4_t vpadalq_u16(uint32x4_t __a, uint16x8_t __b) { \
2491 return (uint32x4_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 26 ); } 2491 return (uint32x4_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 50 ); }
2492 __ai uint64x2_t vpadalq_u32(uint64x2_t __a, uint32x4_t __b) { \ 2492 __ai uint64x2_t vpadalq_u32(uint64x2_t __a, uint32x4_t __b) { \
2493 return (uint64x2_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 27 ); } 2493 return (uint64x2_t)__builtin_neon_vpadalq_v((int8x16_t)__a, (int8x16_t)__b, 51 ); }
2494 2494
2495 __ai int8x8_t vpadd_s8(int8x8_t __a, int8x8_t __b) { \ 2495 __ai int8x8_t vpadd_s8(int8x8_t __a, int8x8_t __b) { \
2496 return (int8x8_t)__builtin_neon_vpadd_v(__a, __b, 0); } 2496 return (int8x8_t)__builtin_neon_vpadd_v(__a, __b, 0); }
2497 __ai int16x4_t vpadd_s16(int16x4_t __a, int16x4_t __b) { \ 2497 __ai int16x4_t vpadd_s16(int16x4_t __a, int16x4_t __b) { \
2498 return (int16x4_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 1); } 2498 return (int16x4_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 1); }
2499 __ai int32x2_t vpadd_s32(int32x2_t __a, int32x2_t __b) { \ 2499 __ai int32x2_t vpadd_s32(int32x2_t __a, int32x2_t __b) { \
2500 return (int32x2_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 2); } 2500 return (int32x2_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 2); }
2501 __ai uint8x8_t vpadd_u8(uint8x8_t __a, uint8x8_t __b) { \ 2501 __ai uint8x8_t vpadd_u8(uint8x8_t __a, uint8x8_t __b) { \
2502 return (uint8x8_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 8); } 2502 return (uint8x8_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 16); }
2503 __ai uint16x4_t vpadd_u16(uint16x4_t __a, uint16x4_t __b) { \ 2503 __ai uint16x4_t vpadd_u16(uint16x4_t __a, uint16x4_t __b) { \
2504 return (uint16x4_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 9); } 2504 return (uint16x4_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 17); }
2505 __ai uint32x2_t vpadd_u32(uint32x2_t __a, uint32x2_t __b) { \ 2505 __ai uint32x2_t vpadd_u32(uint32x2_t __a, uint32x2_t __b) { \
2506 return (uint32x2_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 10); } 2506 return (uint32x2_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 18); }
2507 __ai float32x2_t vpadd_f32(float32x2_t __a, float32x2_t __b) { \ 2507 __ai float32x2_t vpadd_f32(float32x2_t __a, float32x2_t __b) { \
2508 return (float32x2_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 4); } 2508 return (float32x2_t)__builtin_neon_vpadd_v((int8x8_t)__a, (int8x8_t)__b, 7); }
2509 2509
2510 __ai int16x4_t vpaddl_s8(int8x8_t __a) { \ 2510 __ai int16x4_t vpaddl_s8(int8x8_t __a) { \
2511 return (int16x4_t)__builtin_neon_vpaddl_v(__a, 1); } 2511 return (int16x4_t)__builtin_neon_vpaddl_v(__a, 1); }
2512 __ai int32x2_t vpaddl_s16(int16x4_t __a) { \ 2512 __ai int32x2_t vpaddl_s16(int16x4_t __a) { \
2513 return (int32x2_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 2); } 2513 return (int32x2_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 2); }
2514 __ai int64x1_t vpaddl_s32(int32x2_t __a) { \ 2514 __ai int64x1_t vpaddl_s32(int32x2_t __a) { \
2515 return (int64x1_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 3); } 2515 return (int64x1_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 3); }
2516 __ai uint16x4_t vpaddl_u8(uint8x8_t __a) { \ 2516 __ai uint16x4_t vpaddl_u8(uint8x8_t __a) { \
2517 return (uint16x4_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 9); } 2517 return (uint16x4_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 17); }
2518 __ai uint32x2_t vpaddl_u16(uint16x4_t __a) { \ 2518 __ai uint32x2_t vpaddl_u16(uint16x4_t __a) { \
2519 return (uint32x2_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 10); } 2519 return (uint32x2_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 18); }
2520 __ai uint64x1_t vpaddl_u32(uint32x2_t __a) { \ 2520 __ai uint64x1_t vpaddl_u32(uint32x2_t __a) { \
2521 return (uint64x1_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 11); } 2521 return (uint64x1_t)__builtin_neon_vpaddl_v((int8x8_t)__a, 19); }
2522 __ai int16x8_t vpaddlq_s8(int8x16_t __a) { \ 2522 __ai int16x8_t vpaddlq_s8(int8x16_t __a) { \
2523 return (int16x8_t)__builtin_neon_vpaddlq_v(__a, 17); } 2523 return (int16x8_t)__builtin_neon_vpaddlq_v(__a, 33); }
2524 __ai int32x4_t vpaddlq_s16(int16x8_t __a) { \ 2524 __ai int32x4_t vpaddlq_s16(int16x8_t __a) { \
2525 return (int32x4_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 18); } 2525 return (int32x4_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 34); }
2526 __ai int64x2_t vpaddlq_s32(int32x4_t __a) { \ 2526 __ai int64x2_t vpaddlq_s32(int32x4_t __a) { \
2527 return (int64x2_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 19); } 2527 return (int64x2_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 35); }
2528 __ai uint16x8_t vpaddlq_u8(uint8x16_t __a) { \ 2528 __ai uint16x8_t vpaddlq_u8(uint8x16_t __a) { \
2529 return (uint16x8_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 25); } 2529 return (uint16x8_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 49); }
2530 __ai uint32x4_t vpaddlq_u16(uint16x8_t __a) { \ 2530 __ai uint32x4_t vpaddlq_u16(uint16x8_t __a) { \
2531 return (uint32x4_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 26); } 2531 return (uint32x4_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 50); }
2532 __ai uint64x2_t vpaddlq_u32(uint32x4_t __a) { \ 2532 __ai uint64x2_t vpaddlq_u32(uint32x4_t __a) { \
2533 return (uint64x2_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 27); } 2533 return (uint64x2_t)__builtin_neon_vpaddlq_v((int8x16_t)__a, 51); }
2534 2534
2535 __ai int8x8_t vpmax_s8(int8x8_t __a, int8x8_t __b) { \ 2535 __ai int8x8_t vpmax_s8(int8x8_t __a, int8x8_t __b) { \
2536 return (int8x8_t)__builtin_neon_vpmax_v(__a, __b, 0); } 2536 return (int8x8_t)__builtin_neon_vpmax_v(__a, __b, 0); }
2537 __ai int16x4_t vpmax_s16(int16x4_t __a, int16x4_t __b) { \ 2537 __ai int16x4_t vpmax_s16(int16x4_t __a, int16x4_t __b) { \
2538 return (int16x4_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 1); } 2538 return (int16x4_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 1); }
2539 __ai int32x2_t vpmax_s32(int32x2_t __a, int32x2_t __b) { \ 2539 __ai int32x2_t vpmax_s32(int32x2_t __a, int32x2_t __b) { \
2540 return (int32x2_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 2); } 2540 return (int32x2_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 2); }
2541 __ai uint8x8_t vpmax_u8(uint8x8_t __a, uint8x8_t __b) { \ 2541 __ai uint8x8_t vpmax_u8(uint8x8_t __a, uint8x8_t __b) { \
2542 return (uint8x8_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 8); } 2542 return (uint8x8_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 16); }
2543 __ai uint16x4_t vpmax_u16(uint16x4_t __a, uint16x4_t __b) { \ 2543 __ai uint16x4_t vpmax_u16(uint16x4_t __a, uint16x4_t __b) { \
2544 return (uint16x4_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 9); } 2544 return (uint16x4_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 17); }
2545 __ai uint32x2_t vpmax_u32(uint32x2_t __a, uint32x2_t __b) { \ 2545 __ai uint32x2_t vpmax_u32(uint32x2_t __a, uint32x2_t __b) { \
2546 return (uint32x2_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 10); } 2546 return (uint32x2_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 18); }
2547 __ai float32x2_t vpmax_f32(float32x2_t __a, float32x2_t __b) { \ 2547 __ai float32x2_t vpmax_f32(float32x2_t __a, float32x2_t __b) { \
2548 return (float32x2_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 4); } 2548 return (float32x2_t)__builtin_neon_vpmax_v((int8x8_t)__a, (int8x8_t)__b, 7); }
2549 2549
2550 __ai int8x8_t vpmin_s8(int8x8_t __a, int8x8_t __b) { \ 2550 __ai int8x8_t vpmin_s8(int8x8_t __a, int8x8_t __b) { \
2551 return (int8x8_t)__builtin_neon_vpmin_v(__a, __b, 0); } 2551 return (int8x8_t)__builtin_neon_vpmin_v(__a, __b, 0); }
2552 __ai int16x4_t vpmin_s16(int16x4_t __a, int16x4_t __b) { \ 2552 __ai int16x4_t vpmin_s16(int16x4_t __a, int16x4_t __b) { \
2553 return (int16x4_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 1); } 2553 return (int16x4_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 1); }
2554 __ai int32x2_t vpmin_s32(int32x2_t __a, int32x2_t __b) { \ 2554 __ai int32x2_t vpmin_s32(int32x2_t __a, int32x2_t __b) { \
2555 return (int32x2_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 2); } 2555 return (int32x2_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 2); }
2556 __ai uint8x8_t vpmin_u8(uint8x8_t __a, uint8x8_t __b) { \ 2556 __ai uint8x8_t vpmin_u8(uint8x8_t __a, uint8x8_t __b) { \
2557 return (uint8x8_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 8); } 2557 return (uint8x8_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 16); }
2558 __ai uint16x4_t vpmin_u16(uint16x4_t __a, uint16x4_t __b) { \ 2558 __ai uint16x4_t vpmin_u16(uint16x4_t __a, uint16x4_t __b) { \
2559 return (uint16x4_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 9); } 2559 return (uint16x4_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 17); }
2560 __ai uint32x2_t vpmin_u32(uint32x2_t __a, uint32x2_t __b) { \ 2560 __ai uint32x2_t vpmin_u32(uint32x2_t __a, uint32x2_t __b) { \
2561 return (uint32x2_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 10); } 2561 return (uint32x2_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 18); }
2562 __ai float32x2_t vpmin_f32(float32x2_t __a, float32x2_t __b) { \ 2562 __ai float32x2_t vpmin_f32(float32x2_t __a, float32x2_t __b) { \
2563 return (float32x2_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 4); } 2563 return (float32x2_t)__builtin_neon_vpmin_v((int8x8_t)__a, (int8x8_t)__b, 7); }
2564 2564
2565 __ai int8x8_t vqabs_s8(int8x8_t __a) { \ 2565 __ai int8x8_t vqabs_s8(int8x8_t __a) { \
2566 return (int8x8_t)__builtin_neon_vqabs_v(__a, 0); } 2566 return (int8x8_t)__builtin_neon_vqabs_v(__a, 0); }
2567 __ai int16x4_t vqabs_s16(int16x4_t __a) { \ 2567 __ai int16x4_t vqabs_s16(int16x4_t __a) { \
2568 return (int16x4_t)__builtin_neon_vqabs_v((int8x8_t)__a, 1); } 2568 return (int16x4_t)__builtin_neon_vqabs_v((int8x8_t)__a, 1); }
2569 __ai int32x2_t vqabs_s32(int32x2_t __a) { \ 2569 __ai int32x2_t vqabs_s32(int32x2_t __a) { \
2570 return (int32x2_t)__builtin_neon_vqabs_v((int8x8_t)__a, 2); } 2570 return (int32x2_t)__builtin_neon_vqabs_v((int8x8_t)__a, 2); }
2571 __ai int8x16_t vqabsq_s8(int8x16_t __a) { \ 2571 __ai int8x16_t vqabsq_s8(int8x16_t __a) { \
2572 return (int8x16_t)__builtin_neon_vqabsq_v(__a, 16); } 2572 return (int8x16_t)__builtin_neon_vqabsq_v(__a, 32); }
2573 __ai int16x8_t vqabsq_s16(int16x8_t __a) { \ 2573 __ai int16x8_t vqabsq_s16(int16x8_t __a) { \
2574 return (int16x8_t)__builtin_neon_vqabsq_v((int8x16_t)__a, 17); } 2574 return (int16x8_t)__builtin_neon_vqabsq_v((int8x16_t)__a, 33); }
2575 __ai int32x4_t vqabsq_s32(int32x4_t __a) { \ 2575 __ai int32x4_t vqabsq_s32(int32x4_t __a) { \
2576 return (int32x4_t)__builtin_neon_vqabsq_v((int8x16_t)__a, 18); } 2576 return (int32x4_t)__builtin_neon_vqabsq_v((int8x16_t)__a, 34); }
2577 2577
2578 __ai int8x8_t vqadd_s8(int8x8_t __a, int8x8_t __b) { \ 2578 __ai int8x8_t vqadd_s8(int8x8_t __a, int8x8_t __b) { \
2579 return (int8x8_t)__builtin_neon_vqadd_v(__a, __b, 0); } 2579 return (int8x8_t)__builtin_neon_vqadd_v(__a, __b, 0); }
2580 __ai int16x4_t vqadd_s16(int16x4_t __a, int16x4_t __b) { \ 2580 __ai int16x4_t vqadd_s16(int16x4_t __a, int16x4_t __b) { \
2581 return (int16x4_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 1); } 2581 return (int16x4_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 1); }
2582 __ai int32x2_t vqadd_s32(int32x2_t __a, int32x2_t __b) { \ 2582 __ai int32x2_t vqadd_s32(int32x2_t __a, int32x2_t __b) { \
2583 return (int32x2_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 2); } 2583 return (int32x2_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 2); }
2584 __ai int64x1_t vqadd_s64(int64x1_t __a, int64x1_t __b) { \ 2584 __ai int64x1_t vqadd_s64(int64x1_t __a, int64x1_t __b) { \
2585 return (int64x1_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 3); } 2585 return (int64x1_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 3); }
2586 __ai uint8x8_t vqadd_u8(uint8x8_t __a, uint8x8_t __b) { \ 2586 __ai uint8x8_t vqadd_u8(uint8x8_t __a, uint8x8_t __b) { \
2587 return (uint8x8_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 8); } 2587 return (uint8x8_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 16); }
2588 __ai uint16x4_t vqadd_u16(uint16x4_t __a, uint16x4_t __b) { \ 2588 __ai uint16x4_t vqadd_u16(uint16x4_t __a, uint16x4_t __b) { \
2589 return (uint16x4_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 9); } 2589 return (uint16x4_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 17); }
2590 __ai uint32x2_t vqadd_u32(uint32x2_t __a, uint32x2_t __b) { \ 2590 __ai uint32x2_t vqadd_u32(uint32x2_t __a, uint32x2_t __b) { \
2591 return (uint32x2_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 10); } 2591 return (uint32x2_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 18); }
2592 __ai uint64x1_t vqadd_u64(uint64x1_t __a, uint64x1_t __b) { \ 2592 __ai uint64x1_t vqadd_u64(uint64x1_t __a, uint64x1_t __b) { \
2593 return (uint64x1_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 11); } 2593 return (uint64x1_t)__builtin_neon_vqadd_v((int8x8_t)__a, (int8x8_t)__b, 19); }
2594 __ai int8x16_t vqaddq_s8(int8x16_t __a, int8x16_t __b) { \ 2594 __ai int8x16_t vqaddq_s8(int8x16_t __a, int8x16_t __b) { \
2595 return (int8x16_t)__builtin_neon_vqaddq_v(__a, __b, 16); } 2595 return (int8x16_t)__builtin_neon_vqaddq_v(__a, __b, 32); }
2596 __ai int16x8_t vqaddq_s16(int16x8_t __a, int16x8_t __b) { \ 2596 __ai int16x8_t vqaddq_s16(int16x8_t __a, int16x8_t __b) { \
2597 return (int16x8_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 17); } 2597 return (int16x8_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
2598 __ai int32x4_t vqaddq_s32(int32x4_t __a, int32x4_t __b) { \ 2598 __ai int32x4_t vqaddq_s32(int32x4_t __a, int32x4_t __b) { \
2599 return (int32x4_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 18); } 2599 return (int32x4_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
2600 __ai int64x2_t vqaddq_s64(int64x2_t __a, int64x2_t __b) { \ 2600 __ai int64x2_t vqaddq_s64(int64x2_t __a, int64x2_t __b) { \
2601 return (int64x2_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 19); } 2601 return (int64x2_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 35); }
2602 __ai uint8x16_t vqaddq_u8(uint8x16_t __a, uint8x16_t __b) { \ 2602 __ai uint8x16_t vqaddq_u8(uint8x16_t __a, uint8x16_t __b) { \
2603 return (uint8x16_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 24) ; } 2603 return (uint8x16_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 48) ; }
2604 __ai uint16x8_t vqaddq_u16(uint16x8_t __a, uint16x8_t __b) { \ 2604 __ai uint16x8_t vqaddq_u16(uint16x8_t __a, uint16x8_t __b) { \
2605 return (uint16x8_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 25) ; } 2605 return (uint16x8_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 49) ; }
2606 __ai uint32x4_t vqaddq_u32(uint32x4_t __a, uint32x4_t __b) { \ 2606 __ai uint32x4_t vqaddq_u32(uint32x4_t __a, uint32x4_t __b) { \
2607 return (uint32x4_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 26) ; } 2607 return (uint32x4_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 50) ; }
2608 __ai uint64x2_t vqaddq_u64(uint64x2_t __a, uint64x2_t __b) { \ 2608 __ai uint64x2_t vqaddq_u64(uint64x2_t __a, uint64x2_t __b) { \
2609 return (uint64x2_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 27) ; } 2609 return (uint64x2_t)__builtin_neon_vqaddq_v((int8x16_t)__a, (int8x16_t)__b, 51) ; }
2610 2610
2611 __ai int32x4_t vqdmlal_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) { \ 2611 __ai int32x4_t vqdmlal_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) { \
2612 return (int32x4_t)__builtin_neon_vqdmlal_v((int8x16_t)__a, (int8x8_t)__b, (int 8x8_t)__c, 18); } 2612 return (int32x4_t)__builtin_neon_vqdmlal_v((int8x16_t)__a, (int8x8_t)__b, (int 8x8_t)__c, 34); }
2613 __ai int64x2_t vqdmlal_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) { \ 2613 __ai int64x2_t vqdmlal_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) { \
2614 return (int64x2_t)__builtin_neon_vqdmlal_v((int8x16_t)__a, (int8x8_t)__b, (int 8x8_t)__c, 19); } 2614 return (int64x2_t)__builtin_neon_vqdmlal_v((int8x16_t)__a, (int8x8_t)__b, (int 8x8_t)__c, 35); }
2615 2615
2616 #define vqdmlal_lane_s16(a, b, c, __d) __extension__ ({ \ 2616 #define vqdmlal_lane_s16(a, b, c, __d) __extension__ ({ \
2617 int32x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \ 2617 int32x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \
2618 vqdmlal_s16(__a, __b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) 2618 vqdmlal_s16(__a, __b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
2619 #define vqdmlal_lane_s32(a, b, c, __d) __extension__ ({ \ 2619 #define vqdmlal_lane_s32(a, b, c, __d) __extension__ ({ \
2620 int64x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \ 2620 int64x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \
2621 vqdmlal_s32(__a, __b, __builtin_shufflevector(__c, __c, __d, __d)); }) 2621 vqdmlal_s32(__a, __b, __builtin_shufflevector(__c, __c, __d, __d)); })
2622 2622
2623 __ai int32x4_t vqdmlal_n_s16(int32x4_t __a, int16x4_t __b, int16_t __c) { \ 2623 __ai int32x4_t vqdmlal_n_s16(int32x4_t __a, int16x4_t __b, int16_t __c) { \
2624 return (int32x4_t)__builtin_neon_vqdmlal_v((int8x16_t)__a, (int8x8_t)__b, (int 8x8_t)(int16x4_t){ __c, __c, __c, __c }, 18); } 2624 return (int32x4_t)__builtin_neon_vqdmlal_v((int8x16_t)__a, (int8x8_t)__b, (int 8x8_t)(int16x4_t){ __c, __c, __c, __c }, 34); }
2625 __ai int64x2_t vqdmlal_n_s32(int64x2_t __a, int32x2_t __b, int32_t __c) { \ 2625 __ai int64x2_t vqdmlal_n_s32(int64x2_t __a, int32x2_t __b, int32_t __c) { \
2626 return (int64x2_t)__builtin_neon_vqdmlal_v((int8x16_t)__a, (int8x8_t)__b, (int 8x8_t)(int32x2_t){ __c, __c }, 19); } 2626 return (int64x2_t)__builtin_neon_vqdmlal_v((int8x16_t)__a, (int8x8_t)__b, (int 8x8_t)(int32x2_t){ __c, __c }, 35); }
2627 2627
2628 __ai int32x4_t vqdmlsl_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) { \ 2628 __ai int32x4_t vqdmlsl_s16(int32x4_t __a, int16x4_t __b, int16x4_t __c) { \
2629 return (int32x4_t)__builtin_neon_vqdmlsl_v((int8x16_t)__a, (int8x8_t)__b, (int 8x8_t)__c, 18); } 2629 return (int32x4_t)__builtin_neon_vqdmlsl_v((int8x16_t)__a, (int8x8_t)__b, (int 8x8_t)__c, 34); }
2630 __ai int64x2_t vqdmlsl_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) { \ 2630 __ai int64x2_t vqdmlsl_s32(int64x2_t __a, int32x2_t __b, int32x2_t __c) { \
2631 return (int64x2_t)__builtin_neon_vqdmlsl_v((int8x16_t)__a, (int8x8_t)__b, (int 8x8_t)__c, 19); } 2631 return (int64x2_t)__builtin_neon_vqdmlsl_v((int8x16_t)__a, (int8x8_t)__b, (int 8x8_t)__c, 35); }
2632 2632
2633 #define vqdmlsl_lane_s16(a, b, c, __d) __extension__ ({ \ 2633 #define vqdmlsl_lane_s16(a, b, c, __d) __extension__ ({ \
2634 int32x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \ 2634 int32x4_t __a = (a); int16x4_t __b = (b); int16x4_t __c = (c); \
2635 vqdmlsl_s16(__a, __b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); }) 2635 vqdmlsl_s16(__a, __b, __builtin_shufflevector(__c, __c, __d, __d, __d, __d)); })
2636 #define vqdmlsl_lane_s32(a, b, c, __d) __extension__ ({ \ 2636 #define vqdmlsl_lane_s32(a, b, c, __d) __extension__ ({ \
2637 int64x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \ 2637 int64x2_t __a = (a); int32x2_t __b = (b); int32x2_t __c = (c); \
2638 vqdmlsl_s32(__a, __b, __builtin_shufflevector(__c, __c, __d, __d)); }) 2638 vqdmlsl_s32(__a, __b, __builtin_shufflevector(__c, __c, __d, __d)); })
2639 2639
2640 __ai int32x4_t vqdmlsl_n_s16(int32x4_t __a, int16x4_t __b, int16_t __c) { \ 2640 __ai int32x4_t vqdmlsl_n_s16(int32x4_t __a, int16x4_t __b, int16_t __c) { \
2641 return (int32x4_t)__builtin_neon_vqdmlsl_v((int8x16_t)__a, (int8x8_t)__b, (int 8x8_t)(int16x4_t){ __c, __c, __c, __c }, 18); } 2641 return (int32x4_t)__builtin_neon_vqdmlsl_v((int8x16_t)__a, (int8x8_t)__b, (int 8x8_t)(int16x4_t){ __c, __c, __c, __c }, 34); }
2642 __ai int64x2_t vqdmlsl_n_s32(int64x2_t __a, int32x2_t __b, int32_t __c) { \ 2642 __ai int64x2_t vqdmlsl_n_s32(int64x2_t __a, int32x2_t __b, int32_t __c) { \
2643 return (int64x2_t)__builtin_neon_vqdmlsl_v((int8x16_t)__a, (int8x8_t)__b, (int 8x8_t)(int32x2_t){ __c, __c }, 19); } 2643 return (int64x2_t)__builtin_neon_vqdmlsl_v((int8x16_t)__a, (int8x8_t)__b, (int 8x8_t)(int32x2_t){ __c, __c }, 35); }
2644 2644
2645 __ai int16x4_t vqdmulh_s16(int16x4_t __a, int16x4_t __b) { \ 2645 __ai int16x4_t vqdmulh_s16(int16x4_t __a, int16x4_t __b) { \
2646 return (int16x4_t)__builtin_neon_vqdmulh_v((int8x8_t)__a, (int8x8_t)__b, 1); } 2646 return (int16x4_t)__builtin_neon_vqdmulh_v((int8x8_t)__a, (int8x8_t)__b, 1); }
2647 __ai int32x2_t vqdmulh_s32(int32x2_t __a, int32x2_t __b) { \ 2647 __ai int32x2_t vqdmulh_s32(int32x2_t __a, int32x2_t __b) { \
2648 return (int32x2_t)__builtin_neon_vqdmulh_v((int8x8_t)__a, (int8x8_t)__b, 2); } 2648 return (int32x2_t)__builtin_neon_vqdmulh_v((int8x8_t)__a, (int8x8_t)__b, 2); }
2649 __ai int16x8_t vqdmulhq_s16(int16x8_t __a, int16x8_t __b) { \ 2649 __ai int16x8_t vqdmulhq_s16(int16x8_t __a, int16x8_t __b) { \
2650 return (int16x8_t)__builtin_neon_vqdmulhq_v((int8x16_t)__a, (int8x16_t)__b, 17 ); } 2650 return (int16x8_t)__builtin_neon_vqdmulhq_v((int8x16_t)__a, (int8x16_t)__b, 33 ); }
2651 __ai int32x4_t vqdmulhq_s32(int32x4_t __a, int32x4_t __b) { \ 2651 __ai int32x4_t vqdmulhq_s32(int32x4_t __a, int32x4_t __b) { \
2652 return (int32x4_t)__builtin_neon_vqdmulhq_v((int8x16_t)__a, (int8x16_t)__b, 18 ); } 2652 return (int32x4_t)__builtin_neon_vqdmulhq_v((int8x16_t)__a, (int8x16_t)__b, 34 ); }
2653 2653
2654 #define vqdmulh_lane_s16(a, b, __c) __extension__ ({ \ 2654 #define vqdmulh_lane_s16(a, b, __c) __extension__ ({ \
2655 int16x4_t __a = (a); int16x4_t __b = (b); \ 2655 int16x4_t __a = (a); int16x4_t __b = (b); \
2656 vqdmulh_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) 2656 vqdmulh_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
2657 #define vqdmulh_lane_s32(a, b, __c) __extension__ ({ \ 2657 #define vqdmulh_lane_s32(a, b, __c) __extension__ ({ \
2658 int32x2_t __a = (a); int32x2_t __b = (b); \ 2658 int32x2_t __a = (a); int32x2_t __b = (b); \
2659 vqdmulh_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) 2659 vqdmulh_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); })
2660 #define vqdmulhq_lane_s16(a, b, __c) __extension__ ({ \ 2660 #define vqdmulhq_lane_s16(a, b, __c) __extension__ ({ \
2661 int16x8_t __a = (a); int16x4_t __b = (b); \ 2661 int16x8_t __a = (a); int16x4_t __b = (b); \
2662 vqdmulhq_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, _ _c, __c, __c)); }) 2662 vqdmulhq_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, _ _c, __c, __c)); })
2663 #define vqdmulhq_lane_s32(a, b, __c) __extension__ ({ \ 2663 #define vqdmulhq_lane_s32(a, b, __c) __extension__ ({ \
2664 int32x4_t __a = (a); int32x2_t __b = (b); \ 2664 int32x4_t __a = (a); int32x2_t __b = (b); \
2665 vqdmulhq_s32(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) 2665 vqdmulhq_s32(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
2666 2666
2667 __ai int16x4_t vqdmulh_n_s16(int16x4_t __a, int16_t __b) { \ 2667 __ai int16x4_t vqdmulh_n_s16(int16x4_t __a, int16_t __b) { \
2668 return (int16x4_t)__builtin_neon_vqdmulh_v((int8x8_t)__a, (int8x8_t)(int16x4_t ){ __b, __b, __b, __b }, 1); } 2668 return (int16x4_t)__builtin_neon_vqdmulh_v((int8x8_t)__a, (int8x8_t)(int16x4_t ){ __b, __b, __b, __b }, 1); }
2669 __ai int32x2_t vqdmulh_n_s32(int32x2_t __a, int32_t __b) { \ 2669 __ai int32x2_t vqdmulh_n_s32(int32x2_t __a, int32_t __b) { \
2670 return (int32x2_t)__builtin_neon_vqdmulh_v((int8x8_t)__a, (int8x8_t)(int32x2_t ){ __b, __b }, 2); } 2670 return (int32x2_t)__builtin_neon_vqdmulh_v((int8x8_t)__a, (int8x8_t)(int32x2_t ){ __b, __b }, 2); }
2671 __ai int16x8_t vqdmulhq_n_s16(int16x8_t __a, int16_t __b) { \ 2671 __ai int16x8_t vqdmulhq_n_s16(int16x8_t __a, int16_t __b) { \
2672 return (int16x8_t)__builtin_neon_vqdmulhq_v((int8x16_t)__a, (int8x16_t)(int16x 8_t){ __b, __b, __b, __b, __b, __b, __b, __b }, 17); } 2672 return (int16x8_t)__builtin_neon_vqdmulhq_v((int8x16_t)__a, (int8x16_t)(int16x 8_t){ __b, __b, __b, __b, __b, __b, __b, __b }, 33); }
2673 __ai int32x4_t vqdmulhq_n_s32(int32x4_t __a, int32_t __b) { \ 2673 __ai int32x4_t vqdmulhq_n_s32(int32x4_t __a, int32_t __b) { \
2674 return (int32x4_t)__builtin_neon_vqdmulhq_v((int8x16_t)__a, (int8x16_t)(int32x 4_t){ __b, __b, __b, __b }, 18); } 2674 return (int32x4_t)__builtin_neon_vqdmulhq_v((int8x16_t)__a, (int8x16_t)(int32x 4_t){ __b, __b, __b, __b }, 34); }
2675 2675
2676 __ai int32x4_t vqdmull_s16(int16x4_t __a, int16x4_t __b) { \ 2676 __ai int32x4_t vqdmull_s16(int16x4_t __a, int16x4_t __b) { \
2677 return (int32x4_t)__builtin_neon_vqdmull_v((int8x8_t)__a, (int8x8_t)__b, 18); } 2677 return (int32x4_t)__builtin_neon_vqdmull_v((int8x8_t)__a, (int8x8_t)__b, 34); }
2678 __ai int64x2_t vqdmull_s32(int32x2_t __a, int32x2_t __b) { \ 2678 __ai int64x2_t vqdmull_s32(int32x2_t __a, int32x2_t __b) { \
2679 return (int64x2_t)__builtin_neon_vqdmull_v((int8x8_t)__a, (int8x8_t)__b, 19); } 2679 return (int64x2_t)__builtin_neon_vqdmull_v((int8x8_t)__a, (int8x8_t)__b, 35); }
2680 2680
2681 #define vqdmull_lane_s16(a, b, __c) __extension__ ({ \ 2681 #define vqdmull_lane_s16(a, b, __c) __extension__ ({ \
2682 int16x4_t __a = (a); int16x4_t __b = (b); \ 2682 int16x4_t __a = (a); int16x4_t __b = (b); \
2683 vqdmull_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) 2683 vqdmull_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
2684 #define vqdmull_lane_s32(a, b, __c) __extension__ ({ \ 2684 #define vqdmull_lane_s32(a, b, __c) __extension__ ({ \
2685 int32x2_t __a = (a); int32x2_t __b = (b); \ 2685 int32x2_t __a = (a); int32x2_t __b = (b); \
2686 vqdmull_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) 2686 vqdmull_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); })
2687 2687
2688 __ai int32x4_t vqdmull_n_s16(int16x4_t __a, int16_t __b) { \ 2688 __ai int32x4_t vqdmull_n_s16(int16x4_t __a, int16_t __b) { \
2689 return (int32x4_t)__builtin_neon_vqdmull_v((int8x8_t)__a, (int8x8_t)(int16x4_t ){ __b, __b, __b, __b }, 18); } 2689 return (int32x4_t)__builtin_neon_vqdmull_v((int8x8_t)__a, (int8x8_t)(int16x4_t ){ __b, __b, __b, __b }, 34); }
2690 __ai int64x2_t vqdmull_n_s32(int32x2_t __a, int32_t __b) { \ 2690 __ai int64x2_t vqdmull_n_s32(int32x2_t __a, int32_t __b) { \
2691 return (int64x2_t)__builtin_neon_vqdmull_v((int8x8_t)__a, (int8x8_t)(int32x2_t ){ __b, __b }, 19); } 2691 return (int64x2_t)__builtin_neon_vqdmull_v((int8x8_t)__a, (int8x8_t)(int32x2_t ){ __b, __b }, 35); }
2692 2692
2693 __ai int8x8_t vqmovn_s16(int16x8_t __a) { \ 2693 __ai int8x8_t vqmovn_s16(int16x8_t __a) { \
2694 return (int8x8_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 0); } 2694 return (int8x8_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 0); }
2695 __ai int16x4_t vqmovn_s32(int32x4_t __a) { \ 2695 __ai int16x4_t vqmovn_s32(int32x4_t __a) { \
2696 return (int16x4_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 1); } 2696 return (int16x4_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 1); }
2697 __ai int32x2_t vqmovn_s64(int64x2_t __a) { \ 2697 __ai int32x2_t vqmovn_s64(int64x2_t __a) { \
2698 return (int32x2_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 2); } 2698 return (int32x2_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 2); }
2699 __ai uint8x8_t vqmovn_u16(uint16x8_t __a) { \ 2699 __ai uint8x8_t vqmovn_u16(uint16x8_t __a) { \
2700 return (uint8x8_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 8); } 2700 return (uint8x8_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 16); }
2701 __ai uint16x4_t vqmovn_u32(uint32x4_t __a) { \ 2701 __ai uint16x4_t vqmovn_u32(uint32x4_t __a) { \
2702 return (uint16x4_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 9); } 2702 return (uint16x4_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 17); }
2703 __ai uint32x2_t vqmovn_u64(uint64x2_t __a) { \ 2703 __ai uint32x2_t vqmovn_u64(uint64x2_t __a) { \
2704 return (uint32x2_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 10); } 2704 return (uint32x2_t)__builtin_neon_vqmovn_v((int8x16_t)__a, 18); }
2705 2705
2706 __ai uint8x8_t vqmovun_s16(int16x8_t __a) { \ 2706 __ai uint8x8_t vqmovun_s16(int16x8_t __a) { \
2707 return (uint8x8_t)__builtin_neon_vqmovun_v((int8x16_t)__a, 8); } 2707 return (uint8x8_t)__builtin_neon_vqmovun_v((int8x16_t)__a, 16); }
2708 __ai uint16x4_t vqmovun_s32(int32x4_t __a) { \ 2708 __ai uint16x4_t vqmovun_s32(int32x4_t __a) { \
2709 return (uint16x4_t)__builtin_neon_vqmovun_v((int8x16_t)__a, 9); } 2709 return (uint16x4_t)__builtin_neon_vqmovun_v((int8x16_t)__a, 17); }
2710 __ai uint32x2_t vqmovun_s64(int64x2_t __a) { \ 2710 __ai uint32x2_t vqmovun_s64(int64x2_t __a) { \
2711 return (uint32x2_t)__builtin_neon_vqmovun_v((int8x16_t)__a, 10); } 2711 return (uint32x2_t)__builtin_neon_vqmovun_v((int8x16_t)__a, 18); }
2712 2712
2713 __ai int8x8_t vqneg_s8(int8x8_t __a) { \ 2713 __ai int8x8_t vqneg_s8(int8x8_t __a) { \
2714 return (int8x8_t)__builtin_neon_vqneg_v(__a, 0); } 2714 return (int8x8_t)__builtin_neon_vqneg_v(__a, 0); }
2715 __ai int16x4_t vqneg_s16(int16x4_t __a) { \ 2715 __ai int16x4_t vqneg_s16(int16x4_t __a) { \
2716 return (int16x4_t)__builtin_neon_vqneg_v((int8x8_t)__a, 1); } 2716 return (int16x4_t)__builtin_neon_vqneg_v((int8x8_t)__a, 1); }
2717 __ai int32x2_t vqneg_s32(int32x2_t __a) { \ 2717 __ai int32x2_t vqneg_s32(int32x2_t __a) { \
2718 return (int32x2_t)__builtin_neon_vqneg_v((int8x8_t)__a, 2); } 2718 return (int32x2_t)__builtin_neon_vqneg_v((int8x8_t)__a, 2); }
2719 __ai int8x16_t vqnegq_s8(int8x16_t __a) { \ 2719 __ai int8x16_t vqnegq_s8(int8x16_t __a) { \
2720 return (int8x16_t)__builtin_neon_vqnegq_v(__a, 16); } 2720 return (int8x16_t)__builtin_neon_vqnegq_v(__a, 32); }
2721 __ai int16x8_t vqnegq_s16(int16x8_t __a) { \ 2721 __ai int16x8_t vqnegq_s16(int16x8_t __a) { \
2722 return (int16x8_t)__builtin_neon_vqnegq_v((int8x16_t)__a, 17); } 2722 return (int16x8_t)__builtin_neon_vqnegq_v((int8x16_t)__a, 33); }
2723 __ai int32x4_t vqnegq_s32(int32x4_t __a) { \ 2723 __ai int32x4_t vqnegq_s32(int32x4_t __a) { \
2724 return (int32x4_t)__builtin_neon_vqnegq_v((int8x16_t)__a, 18); } 2724 return (int32x4_t)__builtin_neon_vqnegq_v((int8x16_t)__a, 34); }
2725 2725
2726 __ai int16x4_t vqrdmulh_s16(int16x4_t __a, int16x4_t __b) { \ 2726 __ai int16x4_t vqrdmulh_s16(int16x4_t __a, int16x4_t __b) { \
2727 return (int16x4_t)__builtin_neon_vqrdmulh_v((int8x8_t)__a, (int8x8_t)__b, 1); } 2727 return (int16x4_t)__builtin_neon_vqrdmulh_v((int8x8_t)__a, (int8x8_t)__b, 1); }
2728 __ai int32x2_t vqrdmulh_s32(int32x2_t __a, int32x2_t __b) { \ 2728 __ai int32x2_t vqrdmulh_s32(int32x2_t __a, int32x2_t __b) { \
2729 return (int32x2_t)__builtin_neon_vqrdmulh_v((int8x8_t)__a, (int8x8_t)__b, 2); } 2729 return (int32x2_t)__builtin_neon_vqrdmulh_v((int8x8_t)__a, (int8x8_t)__b, 2); }
2730 __ai int16x8_t vqrdmulhq_s16(int16x8_t __a, int16x8_t __b) { \ 2730 __ai int16x8_t vqrdmulhq_s16(int16x8_t __a, int16x8_t __b) { \
2731 return (int16x8_t)__builtin_neon_vqrdmulhq_v((int8x16_t)__a, (int8x16_t)__b, 1 7); } 2731 return (int16x8_t)__builtin_neon_vqrdmulhq_v((int8x16_t)__a, (int8x16_t)__b, 3 3); }
2732 __ai int32x4_t vqrdmulhq_s32(int32x4_t __a, int32x4_t __b) { \ 2732 __ai int32x4_t vqrdmulhq_s32(int32x4_t __a, int32x4_t __b) { \
2733 return (int32x4_t)__builtin_neon_vqrdmulhq_v((int8x16_t)__a, (int8x16_t)__b, 1 8); } 2733 return (int32x4_t)__builtin_neon_vqrdmulhq_v((int8x16_t)__a, (int8x16_t)__b, 3 4); }
2734 2734
2735 #define vqrdmulh_lane_s16(a, b, __c) __extension__ ({ \ 2735 #define vqrdmulh_lane_s16(a, b, __c) __extension__ ({ \
2736 int16x4_t __a = (a); int16x4_t __b = (b); \ 2736 int16x4_t __a = (a); int16x4_t __b = (b); \
2737 vqrdmulh_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) 2737 vqrdmulh_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
2738 #define vqrdmulh_lane_s32(a, b, __c) __extension__ ({ \ 2738 #define vqrdmulh_lane_s32(a, b, __c) __extension__ ({ \
2739 int32x2_t __a = (a); int32x2_t __b = (b); \ 2739 int32x2_t __a = (a); int32x2_t __b = (b); \
2740 vqrdmulh_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); }) 2740 vqrdmulh_s32(__a, __builtin_shufflevector(__b, __b, __c, __c)); })
2741 #define vqrdmulhq_lane_s16(a, b, __c) __extension__ ({ \ 2741 #define vqrdmulhq_lane_s16(a, b, __c) __extension__ ({ \
2742 int16x8_t __a = (a); int16x4_t __b = (b); \ 2742 int16x8_t __a = (a); int16x4_t __b = (b); \
2743 vqrdmulhq_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c)); }) 2743 vqrdmulhq_s16(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c, __c, __c, __c, __c)); })
2744 #define vqrdmulhq_lane_s32(a, b, __c) __extension__ ({ \ 2744 #define vqrdmulhq_lane_s32(a, b, __c) __extension__ ({ \
2745 int32x4_t __a = (a); int32x2_t __b = (b); \ 2745 int32x4_t __a = (a); int32x2_t __b = (b); \
2746 vqrdmulhq_s32(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); }) 2746 vqrdmulhq_s32(__a, __builtin_shufflevector(__b, __b, __c, __c, __c, __c)); })
2747 2747
2748 __ai int16x4_t vqrdmulh_n_s16(int16x4_t __a, int16_t __b) { \ 2748 __ai int16x4_t vqrdmulh_n_s16(int16x4_t __a, int16_t __b) { \
2749 return (int16x4_t)__builtin_neon_vqrdmulh_v((int8x8_t)__a, (int8x8_t)(int16x4_ t){ __b, __b, __b, __b }, 1); } 2749 return (int16x4_t)__builtin_neon_vqrdmulh_v((int8x8_t)__a, (int8x8_t)(int16x4_ t){ __b, __b, __b, __b }, 1); }
2750 __ai int32x2_t vqrdmulh_n_s32(int32x2_t __a, int32_t __b) { \ 2750 __ai int32x2_t vqrdmulh_n_s32(int32x2_t __a, int32_t __b) { \
2751 return (int32x2_t)__builtin_neon_vqrdmulh_v((int8x8_t)__a, (int8x8_t)(int32x2_ t){ __b, __b }, 2); } 2751 return (int32x2_t)__builtin_neon_vqrdmulh_v((int8x8_t)__a, (int8x8_t)(int32x2_ t){ __b, __b }, 2); }
2752 __ai int16x8_t vqrdmulhq_n_s16(int16x8_t __a, int16_t __b) { \ 2752 __ai int16x8_t vqrdmulhq_n_s16(int16x8_t __a, int16_t __b) { \
2753 return (int16x8_t)__builtin_neon_vqrdmulhq_v((int8x16_t)__a, (int8x16_t)(int16 x8_t){ __b, __b, __b, __b, __b, __b, __b, __b }, 17); } 2753 return (int16x8_t)__builtin_neon_vqrdmulhq_v((int8x16_t)__a, (int8x16_t)(int16 x8_t){ __b, __b, __b, __b, __b, __b, __b, __b }, 33); }
2754 __ai int32x4_t vqrdmulhq_n_s32(int32x4_t __a, int32_t __b) { \ 2754 __ai int32x4_t vqrdmulhq_n_s32(int32x4_t __a, int32_t __b) { \
2755 return (int32x4_t)__builtin_neon_vqrdmulhq_v((int8x16_t)__a, (int8x16_t)(int32 x4_t){ __b, __b, __b, __b }, 18); } 2755 return (int32x4_t)__builtin_neon_vqrdmulhq_v((int8x16_t)__a, (int8x16_t)(int32 x4_t){ __b, __b, __b, __b }, 34); }
2756 2756
2757 __ai int8x8_t vqrshl_s8(int8x8_t __a, int8x8_t __b) { \ 2757 __ai int8x8_t vqrshl_s8(int8x8_t __a, int8x8_t __b) { \
2758 return (int8x8_t)__builtin_neon_vqrshl_v(__a, __b, 0); } 2758 return (int8x8_t)__builtin_neon_vqrshl_v(__a, __b, 0); }
2759 __ai int16x4_t vqrshl_s16(int16x4_t __a, int16x4_t __b) { \ 2759 __ai int16x4_t vqrshl_s16(int16x4_t __a, int16x4_t __b) { \
2760 return (int16x4_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 1); } 2760 return (int16x4_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 1); }
2761 __ai int32x2_t vqrshl_s32(int32x2_t __a, int32x2_t __b) { \ 2761 __ai int32x2_t vqrshl_s32(int32x2_t __a, int32x2_t __b) { \
2762 return (int32x2_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 2); } 2762 return (int32x2_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 2); }
2763 __ai int64x1_t vqrshl_s64(int64x1_t __a, int64x1_t __b) { \ 2763 __ai int64x1_t vqrshl_s64(int64x1_t __a, int64x1_t __b) { \
2764 return (int64x1_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 3); } 2764 return (int64x1_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 3); }
2765 __ai uint8x8_t vqrshl_u8(uint8x8_t __a, int8x8_t __b) { \ 2765 __ai uint8x8_t vqrshl_u8(uint8x8_t __a, int8x8_t __b) { \
2766 return (uint8x8_t)__builtin_neon_vqrshl_v((int8x8_t)__a, __b, 8); } 2766 return (uint8x8_t)__builtin_neon_vqrshl_v((int8x8_t)__a, __b, 16); }
2767 __ai uint16x4_t vqrshl_u16(uint16x4_t __a, int16x4_t __b) { \ 2767 __ai uint16x4_t vqrshl_u16(uint16x4_t __a, int16x4_t __b) { \
2768 return (uint16x4_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 9); } 2768 return (uint16x4_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 17); }
2769 __ai uint32x2_t vqrshl_u32(uint32x2_t __a, int32x2_t __b) { \ 2769 __ai uint32x2_t vqrshl_u32(uint32x2_t __a, int32x2_t __b) { \
2770 return (uint32x2_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 10); } 2770 return (uint32x2_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 18); }
2771 __ai uint64x1_t vqrshl_u64(uint64x1_t __a, int64x1_t __b) { \ 2771 __ai uint64x1_t vqrshl_u64(uint64x1_t __a, int64x1_t __b) { \
2772 return (uint64x1_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 11); } 2772 return (uint64x1_t)__builtin_neon_vqrshl_v((int8x8_t)__a, (int8x8_t)__b, 19); }
2773 __ai int8x16_t vqrshlq_s8(int8x16_t __a, int8x16_t __b) { \ 2773 __ai int8x16_t vqrshlq_s8(int8x16_t __a, int8x16_t __b) { \
2774 return (int8x16_t)__builtin_neon_vqrshlq_v(__a, __b, 16); } 2774 return (int8x16_t)__builtin_neon_vqrshlq_v(__a, __b, 32); }
2775 __ai int16x8_t vqrshlq_s16(int16x8_t __a, int16x8_t __b) { \ 2775 __ai int16x8_t vqrshlq_s16(int16x8_t __a, int16x8_t __b) { \
2776 return (int16x8_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 17) ; } 2776 return (int16x8_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 33) ; }
2777 __ai int32x4_t vqrshlq_s32(int32x4_t __a, int32x4_t __b) { \ 2777 __ai int32x4_t vqrshlq_s32(int32x4_t __a, int32x4_t __b) { \
2778 return (int32x4_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 18) ; } 2778 return (int32x4_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 34) ; }
2779 __ai int64x2_t vqrshlq_s64(int64x2_t __a, int64x2_t __b) { \ 2779 __ai int64x2_t vqrshlq_s64(int64x2_t __a, int64x2_t __b) { \
2780 return (int64x2_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 19) ; } 2780 return (int64x2_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 35) ; }
2781 __ai uint8x16_t vqrshlq_u8(uint8x16_t __a, int8x16_t __b) { \ 2781 __ai uint8x16_t vqrshlq_u8(uint8x16_t __a, int8x16_t __b) { \
2782 return (uint8x16_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, __b, 24); } 2782 return (uint8x16_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, __b, 48); }
2783 __ai uint16x8_t vqrshlq_u16(uint16x8_t __a, int16x8_t __b) { \ 2783 __ai uint16x8_t vqrshlq_u16(uint16x8_t __a, int16x8_t __b) { \
2784 return (uint16x8_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 25 ); } 2784 return (uint16x8_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 49 ); }
2785 __ai uint32x4_t vqrshlq_u32(uint32x4_t __a, int32x4_t __b) { \ 2785 __ai uint32x4_t vqrshlq_u32(uint32x4_t __a, int32x4_t __b) { \
2786 return (uint32x4_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 26 ); } 2786 return (uint32x4_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 50 ); }
2787 __ai uint64x2_t vqrshlq_u64(uint64x2_t __a, int64x2_t __b) { \ 2787 __ai uint64x2_t vqrshlq_u64(uint64x2_t __a, int64x2_t __b) { \
2788 return (uint64x2_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 27 ); } 2788 return (uint64x2_t)__builtin_neon_vqrshlq_v((int8x16_t)__a, (int8x16_t)__b, 51 ); }
2789 2789
2790 #define vqrshrn_n_s16(a, __b) __extension__ ({ \ 2790 #define vqrshrn_n_s16(a, __b) __extension__ ({ \
2791 int16x8_t __a = (a); \ 2791 int16x8_t __a = (a); \
2792 (int8x8_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 0); }) 2792 (int8x8_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 0); })
2793 #define vqrshrn_n_s32(a, __b) __extension__ ({ \ 2793 #define vqrshrn_n_s32(a, __b) __extension__ ({ \
2794 int32x4_t __a = (a); \ 2794 int32x4_t __a = (a); \
2795 (int16x4_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 1); }) 2795 (int16x4_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 1); })
2796 #define vqrshrn_n_s64(a, __b) __extension__ ({ \ 2796 #define vqrshrn_n_s64(a, __b) __extension__ ({ \
2797 int64x2_t __a = (a); \ 2797 int64x2_t __a = (a); \
2798 (int32x2_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 2); }) 2798 (int32x2_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 2); })
2799 #define vqrshrn_n_u16(a, __b) __extension__ ({ \ 2799 #define vqrshrn_n_u16(a, __b) __extension__ ({ \
2800 uint16x8_t __a = (a); \ 2800 uint16x8_t __a = (a); \
2801 (uint8x8_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 8); }) 2801 (uint8x8_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 16); })
2802 #define vqrshrn_n_u32(a, __b) __extension__ ({ \ 2802 #define vqrshrn_n_u32(a, __b) __extension__ ({ \
2803 uint32x4_t __a = (a); \ 2803 uint32x4_t __a = (a); \
2804 (uint16x4_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 9); }) 2804 (uint16x4_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 17); })
2805 #define vqrshrn_n_u64(a, __b) __extension__ ({ \ 2805 #define vqrshrn_n_u64(a, __b) __extension__ ({ \
2806 uint64x2_t __a = (a); \ 2806 uint64x2_t __a = (a); \
2807 (uint32x2_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 10); }) 2807 (uint32x2_t)__builtin_neon_vqrshrn_n_v((int8x16_t)__a, __b, 18); })
2808 2808
2809 #define vqrshrun_n_s16(a, __b) __extension__ ({ \ 2809 #define vqrshrun_n_s16(a, __b) __extension__ ({ \
2810 int16x8_t __a = (a); \ 2810 int16x8_t __a = (a); \
2811 (uint8x8_t)__builtin_neon_vqrshrun_n_v((int8x16_t)__a, __b, 8); }) 2811 (uint8x8_t)__builtin_neon_vqrshrun_n_v((int8x16_t)__a, __b, 16); })
2812 #define vqrshrun_n_s32(a, __b) __extension__ ({ \ 2812 #define vqrshrun_n_s32(a, __b) __extension__ ({ \
2813 int32x4_t __a = (a); \ 2813 int32x4_t __a = (a); \
2814 (uint16x4_t)__builtin_neon_vqrshrun_n_v((int8x16_t)__a, __b, 9); }) 2814 (uint16x4_t)__builtin_neon_vqrshrun_n_v((int8x16_t)__a, __b, 17); })
2815 #define vqrshrun_n_s64(a, __b) __extension__ ({ \ 2815 #define vqrshrun_n_s64(a, __b) __extension__ ({ \
2816 int64x2_t __a = (a); \ 2816 int64x2_t __a = (a); \
2817 (uint32x2_t)__builtin_neon_vqrshrun_n_v((int8x16_t)__a, __b, 10); }) 2817 (uint32x2_t)__builtin_neon_vqrshrun_n_v((int8x16_t)__a, __b, 18); })
2818 2818
2819 __ai int8x8_t vqshl_s8(int8x8_t __a, int8x8_t __b) { \ 2819 __ai int8x8_t vqshl_s8(int8x8_t __a, int8x8_t __b) { \
2820 return (int8x8_t)__builtin_neon_vqshl_v(__a, __b, 0); } 2820 return (int8x8_t)__builtin_neon_vqshl_v(__a, __b, 0); }
2821 __ai int16x4_t vqshl_s16(int16x4_t __a, int16x4_t __b) { \ 2821 __ai int16x4_t vqshl_s16(int16x4_t __a, int16x4_t __b) { \
2822 return (int16x4_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 1); } 2822 return (int16x4_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 1); }
2823 __ai int32x2_t vqshl_s32(int32x2_t __a, int32x2_t __b) { \ 2823 __ai int32x2_t vqshl_s32(int32x2_t __a, int32x2_t __b) { \
2824 return (int32x2_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 2); } 2824 return (int32x2_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 2); }
2825 __ai int64x1_t vqshl_s64(int64x1_t __a, int64x1_t __b) { \ 2825 __ai int64x1_t vqshl_s64(int64x1_t __a, int64x1_t __b) { \
2826 return (int64x1_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 3); } 2826 return (int64x1_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 3); }
2827 __ai uint8x8_t vqshl_u8(uint8x8_t __a, int8x8_t __b) { \ 2827 __ai uint8x8_t vqshl_u8(uint8x8_t __a, int8x8_t __b) { \
2828 return (uint8x8_t)__builtin_neon_vqshl_v((int8x8_t)__a, __b, 8); } 2828 return (uint8x8_t)__builtin_neon_vqshl_v((int8x8_t)__a, __b, 16); }
2829 __ai uint16x4_t vqshl_u16(uint16x4_t __a, int16x4_t __b) { \ 2829 __ai uint16x4_t vqshl_u16(uint16x4_t __a, int16x4_t __b) { \
2830 return (uint16x4_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 9); } 2830 return (uint16x4_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 17); }
2831 __ai uint32x2_t vqshl_u32(uint32x2_t __a, int32x2_t __b) { \ 2831 __ai uint32x2_t vqshl_u32(uint32x2_t __a, int32x2_t __b) { \
2832 return (uint32x2_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 10); } 2832 return (uint32x2_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 18); }
2833 __ai uint64x1_t vqshl_u64(uint64x1_t __a, int64x1_t __b) { \ 2833 __ai uint64x1_t vqshl_u64(uint64x1_t __a, int64x1_t __b) { \
2834 return (uint64x1_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 11); } 2834 return (uint64x1_t)__builtin_neon_vqshl_v((int8x8_t)__a, (int8x8_t)__b, 19); }
2835 __ai int8x16_t vqshlq_s8(int8x16_t __a, int8x16_t __b) { \ 2835 __ai int8x16_t vqshlq_s8(int8x16_t __a, int8x16_t __b) { \
2836 return (int8x16_t)__builtin_neon_vqshlq_v(__a, __b, 16); } 2836 return (int8x16_t)__builtin_neon_vqshlq_v(__a, __b, 32); }
2837 __ai int16x8_t vqshlq_s16(int16x8_t __a, int16x8_t __b) { \ 2837 __ai int16x8_t vqshlq_s16(int16x8_t __a, int16x8_t __b) { \
2838 return (int16x8_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 17); } 2838 return (int16x8_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
2839 __ai int32x4_t vqshlq_s32(int32x4_t __a, int32x4_t __b) { \ 2839 __ai int32x4_t vqshlq_s32(int32x4_t __a, int32x4_t __b) { \
2840 return (int32x4_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 18); } 2840 return (int32x4_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
2841 __ai int64x2_t vqshlq_s64(int64x2_t __a, int64x2_t __b) { \ 2841 __ai int64x2_t vqshlq_s64(int64x2_t __a, int64x2_t __b) { \
2842 return (int64x2_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 19); } 2842 return (int64x2_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 35); }
2843 __ai uint8x16_t vqshlq_u8(uint8x16_t __a, int8x16_t __b) { \ 2843 __ai uint8x16_t vqshlq_u8(uint8x16_t __a, int8x16_t __b) { \
2844 return (uint8x16_t)__builtin_neon_vqshlq_v((int8x16_t)__a, __b, 24); } 2844 return (uint8x16_t)__builtin_neon_vqshlq_v((int8x16_t)__a, __b, 48); }
2845 __ai uint16x8_t vqshlq_u16(uint16x8_t __a, int16x8_t __b) { \ 2845 __ai uint16x8_t vqshlq_u16(uint16x8_t __a, int16x8_t __b) { \
2846 return (uint16x8_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 25) ; } 2846 return (uint16x8_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 49) ; }
2847 __ai uint32x4_t vqshlq_u32(uint32x4_t __a, int32x4_t __b) { \ 2847 __ai uint32x4_t vqshlq_u32(uint32x4_t __a, int32x4_t __b) { \
2848 return (uint32x4_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 26) ; } 2848 return (uint32x4_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 50) ; }
2849 __ai uint64x2_t vqshlq_u64(uint64x2_t __a, int64x2_t __b) { \ 2849 __ai uint64x2_t vqshlq_u64(uint64x2_t __a, int64x2_t __b) { \
2850 return (uint64x2_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 27) ; } 2850 return (uint64x2_t)__builtin_neon_vqshlq_v((int8x16_t)__a, (int8x16_t)__b, 51) ; }
2851 2851
2852 #define vqshlu_n_s8(a, __b) __extension__ ({ \ 2852 #define vqshlu_n_s8(a, __b) __extension__ ({ \
2853 int8x8_t __a = (a); \ 2853 int8x8_t __a = (a); \
2854 (uint8x8_t)__builtin_neon_vqshlu_n_v(__a, __b, 8); }) 2854 (uint8x8_t)__builtin_neon_vqshlu_n_v(__a, __b, 16); })
2855 #define vqshlu_n_s16(a, __b) __extension__ ({ \ 2855 #define vqshlu_n_s16(a, __b) __extension__ ({ \
2856 int16x4_t __a = (a); \ 2856 int16x4_t __a = (a); \
2857 (uint16x4_t)__builtin_neon_vqshlu_n_v((int8x8_t)__a, __b, 9); }) 2857 (uint16x4_t)__builtin_neon_vqshlu_n_v((int8x8_t)__a, __b, 17); })
2858 #define vqshlu_n_s32(a, __b) __extension__ ({ \ 2858 #define vqshlu_n_s32(a, __b) __extension__ ({ \
2859 int32x2_t __a = (a); \ 2859 int32x2_t __a = (a); \
2860 (uint32x2_t)__builtin_neon_vqshlu_n_v((int8x8_t)__a, __b, 10); }) 2860 (uint32x2_t)__builtin_neon_vqshlu_n_v((int8x8_t)__a, __b, 18); })
2861 #define vqshlu_n_s64(a, __b) __extension__ ({ \ 2861 #define vqshlu_n_s64(a, __b) __extension__ ({ \
2862 int64x1_t __a = (a); \ 2862 int64x1_t __a = (a); \
2863 (uint64x1_t)__builtin_neon_vqshlu_n_v((int8x8_t)__a, __b, 11); }) 2863 (uint64x1_t)__builtin_neon_vqshlu_n_v((int8x8_t)__a, __b, 19); })
2864 #define vqshluq_n_s8(a, __b) __extension__ ({ \ 2864 #define vqshluq_n_s8(a, __b) __extension__ ({ \
2865 int8x16_t __a = (a); \ 2865 int8x16_t __a = (a); \
2866 (uint8x16_t)__builtin_neon_vqshluq_n_v(__a, __b, 24); }) 2866 (uint8x16_t)__builtin_neon_vqshluq_n_v(__a, __b, 48); })
2867 #define vqshluq_n_s16(a, __b) __extension__ ({ \ 2867 #define vqshluq_n_s16(a, __b) __extension__ ({ \
2868 int16x8_t __a = (a); \ 2868 int16x8_t __a = (a); \
2869 (uint16x8_t)__builtin_neon_vqshluq_n_v((int8x16_t)__a, __b, 25); }) 2869 (uint16x8_t)__builtin_neon_vqshluq_n_v((int8x16_t)__a, __b, 49); })
2870 #define vqshluq_n_s32(a, __b) __extension__ ({ \ 2870 #define vqshluq_n_s32(a, __b) __extension__ ({ \
2871 int32x4_t __a = (a); \ 2871 int32x4_t __a = (a); \
2872 (uint32x4_t)__builtin_neon_vqshluq_n_v((int8x16_t)__a, __b, 26); }) 2872 (uint32x4_t)__builtin_neon_vqshluq_n_v((int8x16_t)__a, __b, 50); })
2873 #define vqshluq_n_s64(a, __b) __extension__ ({ \ 2873 #define vqshluq_n_s64(a, __b) __extension__ ({ \
2874 int64x2_t __a = (a); \ 2874 int64x2_t __a = (a); \
2875 (uint64x2_t)__builtin_neon_vqshluq_n_v((int8x16_t)__a, __b, 27); }) 2875 (uint64x2_t)__builtin_neon_vqshluq_n_v((int8x16_t)__a, __b, 51); })
2876 2876
2877 #define vqshl_n_s8(a, __b) __extension__ ({ \ 2877 #define vqshl_n_s8(a, __b) __extension__ ({ \
2878 int8x8_t __a = (a); \ 2878 int8x8_t __a = (a); \
2879 (int8x8_t)__builtin_neon_vqshl_n_v(__a, __b, 0); }) 2879 (int8x8_t)__builtin_neon_vqshl_n_v(__a, __b, 0); })
2880 #define vqshl_n_s16(a, __b) __extension__ ({ \ 2880 #define vqshl_n_s16(a, __b) __extension__ ({ \
2881 int16x4_t __a = (a); \ 2881 int16x4_t __a = (a); \
2882 (int16x4_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 1); }) 2882 (int16x4_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 1); })
2883 #define vqshl_n_s32(a, __b) __extension__ ({ \ 2883 #define vqshl_n_s32(a, __b) __extension__ ({ \
2884 int32x2_t __a = (a); \ 2884 int32x2_t __a = (a); \
2885 (int32x2_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 2); }) 2885 (int32x2_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 2); })
2886 #define vqshl_n_s64(a, __b) __extension__ ({ \ 2886 #define vqshl_n_s64(a, __b) __extension__ ({ \
2887 int64x1_t __a = (a); \ 2887 int64x1_t __a = (a); \
2888 (int64x1_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 3); }) 2888 (int64x1_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 3); })
2889 #define vqshl_n_u8(a, __b) __extension__ ({ \ 2889 #define vqshl_n_u8(a, __b) __extension__ ({ \
2890 uint8x8_t __a = (a); \ 2890 uint8x8_t __a = (a); \
2891 (uint8x8_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 8); }) 2891 (uint8x8_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 16); })
2892 #define vqshl_n_u16(a, __b) __extension__ ({ \ 2892 #define vqshl_n_u16(a, __b) __extension__ ({ \
2893 uint16x4_t __a = (a); \ 2893 uint16x4_t __a = (a); \
2894 (uint16x4_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 9); }) 2894 (uint16x4_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 17); })
2895 #define vqshl_n_u32(a, __b) __extension__ ({ \ 2895 #define vqshl_n_u32(a, __b) __extension__ ({ \
2896 uint32x2_t __a = (a); \ 2896 uint32x2_t __a = (a); \
2897 (uint32x2_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 10); }) 2897 (uint32x2_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 18); })
2898 #define vqshl_n_u64(a, __b) __extension__ ({ \ 2898 #define vqshl_n_u64(a, __b) __extension__ ({ \
2899 uint64x1_t __a = (a); \ 2899 uint64x1_t __a = (a); \
2900 (uint64x1_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 11); }) 2900 (uint64x1_t)__builtin_neon_vqshl_n_v((int8x8_t)__a, __b, 19); })
2901 #define vqshlq_n_s8(a, __b) __extension__ ({ \ 2901 #define vqshlq_n_s8(a, __b) __extension__ ({ \
2902 int8x16_t __a = (a); \ 2902 int8x16_t __a = (a); \
2903 (int8x16_t)__builtin_neon_vqshlq_n_v(__a, __b, 16); }) 2903 (int8x16_t)__builtin_neon_vqshlq_n_v(__a, __b, 32); })
2904 #define vqshlq_n_s16(a, __b) __extension__ ({ \ 2904 #define vqshlq_n_s16(a, __b) __extension__ ({ \
2905 int16x8_t __a = (a); \ 2905 int16x8_t __a = (a); \
2906 (int16x8_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 17); }) 2906 (int16x8_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 33); })
2907 #define vqshlq_n_s32(a, __b) __extension__ ({ \ 2907 #define vqshlq_n_s32(a, __b) __extension__ ({ \
2908 int32x4_t __a = (a); \ 2908 int32x4_t __a = (a); \
2909 (int32x4_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 18); }) 2909 (int32x4_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 34); })
2910 #define vqshlq_n_s64(a, __b) __extension__ ({ \ 2910 #define vqshlq_n_s64(a, __b) __extension__ ({ \
2911 int64x2_t __a = (a); \ 2911 int64x2_t __a = (a); \
2912 (int64x2_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 19); }) 2912 (int64x2_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 35); })
2913 #define vqshlq_n_u8(a, __b) __extension__ ({ \ 2913 #define vqshlq_n_u8(a, __b) __extension__ ({ \
2914 uint8x16_t __a = (a); \ 2914 uint8x16_t __a = (a); \
2915 (uint8x16_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 24); }) 2915 (uint8x16_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 48); })
2916 #define vqshlq_n_u16(a, __b) __extension__ ({ \ 2916 #define vqshlq_n_u16(a, __b) __extension__ ({ \
2917 uint16x8_t __a = (a); \ 2917 uint16x8_t __a = (a); \
2918 (uint16x8_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 25); }) 2918 (uint16x8_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 49); })
2919 #define vqshlq_n_u32(a, __b) __extension__ ({ \ 2919 #define vqshlq_n_u32(a, __b) __extension__ ({ \
2920 uint32x4_t __a = (a); \ 2920 uint32x4_t __a = (a); \
2921 (uint32x4_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 26); }) 2921 (uint32x4_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 50); })
2922 #define vqshlq_n_u64(a, __b) __extension__ ({ \ 2922 #define vqshlq_n_u64(a, __b) __extension__ ({ \
2923 uint64x2_t __a = (a); \ 2923 uint64x2_t __a = (a); \
2924 (uint64x2_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 27); }) 2924 (uint64x2_t)__builtin_neon_vqshlq_n_v((int8x16_t)__a, __b, 51); })
2925 2925
2926 #define vqshrn_n_s16(a, __b) __extension__ ({ \ 2926 #define vqshrn_n_s16(a, __b) __extension__ ({ \
2927 int16x8_t __a = (a); \ 2927 int16x8_t __a = (a); \
2928 (int8x8_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 0); }) 2928 (int8x8_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 0); })
2929 #define vqshrn_n_s32(a, __b) __extension__ ({ \ 2929 #define vqshrn_n_s32(a, __b) __extension__ ({ \
2930 int32x4_t __a = (a); \ 2930 int32x4_t __a = (a); \
2931 (int16x4_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 1); }) 2931 (int16x4_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 1); })
2932 #define vqshrn_n_s64(a, __b) __extension__ ({ \ 2932 #define vqshrn_n_s64(a, __b) __extension__ ({ \
2933 int64x2_t __a = (a); \ 2933 int64x2_t __a = (a); \
2934 (int32x2_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 2); }) 2934 (int32x2_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 2); })
2935 #define vqshrn_n_u16(a, __b) __extension__ ({ \ 2935 #define vqshrn_n_u16(a, __b) __extension__ ({ \
2936 uint16x8_t __a = (a); \ 2936 uint16x8_t __a = (a); \
2937 (uint8x8_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 8); }) 2937 (uint8x8_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 16); })
2938 #define vqshrn_n_u32(a, __b) __extension__ ({ \ 2938 #define vqshrn_n_u32(a, __b) __extension__ ({ \
2939 uint32x4_t __a = (a); \ 2939 uint32x4_t __a = (a); \
2940 (uint16x4_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 9); }) 2940 (uint16x4_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 17); })
2941 #define vqshrn_n_u64(a, __b) __extension__ ({ \ 2941 #define vqshrn_n_u64(a, __b) __extension__ ({ \
2942 uint64x2_t __a = (a); \ 2942 uint64x2_t __a = (a); \
2943 (uint32x2_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 10); }) 2943 (uint32x2_t)__builtin_neon_vqshrn_n_v((int8x16_t)__a, __b, 18); })
2944 2944
2945 #define vqshrun_n_s16(a, __b) __extension__ ({ \ 2945 #define vqshrun_n_s16(a, __b) __extension__ ({ \
2946 int16x8_t __a = (a); \ 2946 int16x8_t __a = (a); \
2947 (uint8x8_t)__builtin_neon_vqshrun_n_v((int8x16_t)__a, __b, 8); }) 2947 (uint8x8_t)__builtin_neon_vqshrun_n_v((int8x16_t)__a, __b, 16); })
2948 #define vqshrun_n_s32(a, __b) __extension__ ({ \ 2948 #define vqshrun_n_s32(a, __b) __extension__ ({ \
2949 int32x4_t __a = (a); \ 2949 int32x4_t __a = (a); \
2950 (uint16x4_t)__builtin_neon_vqshrun_n_v((int8x16_t)__a, __b, 9); }) 2950 (uint16x4_t)__builtin_neon_vqshrun_n_v((int8x16_t)__a, __b, 17); })
2951 #define vqshrun_n_s64(a, __b) __extension__ ({ \ 2951 #define vqshrun_n_s64(a, __b) __extension__ ({ \
2952 int64x2_t __a = (a); \ 2952 int64x2_t __a = (a); \
2953 (uint32x2_t)__builtin_neon_vqshrun_n_v((int8x16_t)__a, __b, 10); }) 2953 (uint32x2_t)__builtin_neon_vqshrun_n_v((int8x16_t)__a, __b, 18); })
2954 2954
2955 __ai int8x8_t vqsub_s8(int8x8_t __a, int8x8_t __b) { \ 2955 __ai int8x8_t vqsub_s8(int8x8_t __a, int8x8_t __b) { \
2956 return (int8x8_t)__builtin_neon_vqsub_v(__a, __b, 0); } 2956 return (int8x8_t)__builtin_neon_vqsub_v(__a, __b, 0); }
2957 __ai int16x4_t vqsub_s16(int16x4_t __a, int16x4_t __b) { \ 2957 __ai int16x4_t vqsub_s16(int16x4_t __a, int16x4_t __b) { \
2958 return (int16x4_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 1); } 2958 return (int16x4_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 1); }
2959 __ai int32x2_t vqsub_s32(int32x2_t __a, int32x2_t __b) { \ 2959 __ai int32x2_t vqsub_s32(int32x2_t __a, int32x2_t __b) { \
2960 return (int32x2_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 2); } 2960 return (int32x2_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 2); }
2961 __ai int64x1_t vqsub_s64(int64x1_t __a, int64x1_t __b) { \ 2961 __ai int64x1_t vqsub_s64(int64x1_t __a, int64x1_t __b) { \
2962 return (int64x1_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 3); } 2962 return (int64x1_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 3); }
2963 __ai uint8x8_t vqsub_u8(uint8x8_t __a, uint8x8_t __b) { \ 2963 __ai uint8x8_t vqsub_u8(uint8x8_t __a, uint8x8_t __b) { \
2964 return (uint8x8_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 8); } 2964 return (uint8x8_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 16); }
2965 __ai uint16x4_t vqsub_u16(uint16x4_t __a, uint16x4_t __b) { \ 2965 __ai uint16x4_t vqsub_u16(uint16x4_t __a, uint16x4_t __b) { \
2966 return (uint16x4_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 9); } 2966 return (uint16x4_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 17); }
2967 __ai uint32x2_t vqsub_u32(uint32x2_t __a, uint32x2_t __b) { \ 2967 __ai uint32x2_t vqsub_u32(uint32x2_t __a, uint32x2_t __b) { \
2968 return (uint32x2_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 10); } 2968 return (uint32x2_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 18); }
2969 __ai uint64x1_t vqsub_u64(uint64x1_t __a, uint64x1_t __b) { \ 2969 __ai uint64x1_t vqsub_u64(uint64x1_t __a, uint64x1_t __b) { \
2970 return (uint64x1_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 11); } 2970 return (uint64x1_t)__builtin_neon_vqsub_v((int8x8_t)__a, (int8x8_t)__b, 19); }
2971 __ai int8x16_t vqsubq_s8(int8x16_t __a, int8x16_t __b) { \ 2971 __ai int8x16_t vqsubq_s8(int8x16_t __a, int8x16_t __b) { \
2972 return (int8x16_t)__builtin_neon_vqsubq_v(__a, __b, 16); } 2972 return (int8x16_t)__builtin_neon_vqsubq_v(__a, __b, 32); }
2973 __ai int16x8_t vqsubq_s16(int16x8_t __a, int16x8_t __b) { \ 2973 __ai int16x8_t vqsubq_s16(int16x8_t __a, int16x8_t __b) { \
2974 return (int16x8_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 17); } 2974 return (int16x8_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
2975 __ai int32x4_t vqsubq_s32(int32x4_t __a, int32x4_t __b) { \ 2975 __ai int32x4_t vqsubq_s32(int32x4_t __a, int32x4_t __b) { \
2976 return (int32x4_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 18); } 2976 return (int32x4_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
2977 __ai int64x2_t vqsubq_s64(int64x2_t __a, int64x2_t __b) { \ 2977 __ai int64x2_t vqsubq_s64(int64x2_t __a, int64x2_t __b) { \
2978 return (int64x2_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 19); } 2978 return (int64x2_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 35); }
2979 __ai uint8x16_t vqsubq_u8(uint8x16_t __a, uint8x16_t __b) { \ 2979 __ai uint8x16_t vqsubq_u8(uint8x16_t __a, uint8x16_t __b) { \
2980 return (uint8x16_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 24) ; } 2980 return (uint8x16_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 48) ; }
2981 __ai uint16x8_t vqsubq_u16(uint16x8_t __a, uint16x8_t __b) { \ 2981 __ai uint16x8_t vqsubq_u16(uint16x8_t __a, uint16x8_t __b) { \
2982 return (uint16x8_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 25) ; } 2982 return (uint16x8_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 49) ; }
2983 __ai uint32x4_t vqsubq_u32(uint32x4_t __a, uint32x4_t __b) { \ 2983 __ai uint32x4_t vqsubq_u32(uint32x4_t __a, uint32x4_t __b) { \
2984 return (uint32x4_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 26) ; } 2984 return (uint32x4_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 50) ; }
2985 __ai uint64x2_t vqsubq_u64(uint64x2_t __a, uint64x2_t __b) { \ 2985 __ai uint64x2_t vqsubq_u64(uint64x2_t __a, uint64x2_t __b) { \
2986 return (uint64x2_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 27) ; } 2986 return (uint64x2_t)__builtin_neon_vqsubq_v((int8x16_t)__a, (int8x16_t)__b, 51) ; }
2987 2987
2988 __ai int8x8_t vraddhn_s16(int16x8_t __a, int16x8_t __b) { \ 2988 __ai int8x8_t vraddhn_s16(int16x8_t __a, int16x8_t __b) { \
2989 return (int8x8_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 0); } 2989 return (int8x8_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 0); }
2990 __ai int16x4_t vraddhn_s32(int32x4_t __a, int32x4_t __b) { \ 2990 __ai int16x4_t vraddhn_s32(int32x4_t __a, int32x4_t __b) { \
2991 return (int16x4_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 1); } 2991 return (int16x4_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 1); }
2992 __ai int32x2_t vraddhn_s64(int64x2_t __a, int64x2_t __b) { \ 2992 __ai int32x2_t vraddhn_s64(int64x2_t __a, int64x2_t __b) { \
2993 return (int32x2_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 2); } 2993 return (int32x2_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 2); }
2994 __ai uint8x8_t vraddhn_u16(uint16x8_t __a, uint16x8_t __b) { \ 2994 __ai uint8x8_t vraddhn_u16(uint16x8_t __a, uint16x8_t __b) { \
2995 return (uint8x8_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 8); } 2995 return (uint8x8_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 16) ; }
2996 __ai uint16x4_t vraddhn_u32(uint32x4_t __a, uint32x4_t __b) { \ 2996 __ai uint16x4_t vraddhn_u32(uint32x4_t __a, uint32x4_t __b) { \
2997 return (uint16x4_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 9) ; } 2997 return (uint16x4_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 17 ); }
2998 __ai uint32x2_t vraddhn_u64(uint64x2_t __a, uint64x2_t __b) { \ 2998 __ai uint32x2_t vraddhn_u64(uint64x2_t __a, uint64x2_t __b) { \
2999 return (uint32x2_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 10 ); } 2999 return (uint32x2_t)__builtin_neon_vraddhn_v((int8x16_t)__a, (int8x16_t)__b, 18 ); }
3000 3000
3001 __ai float32x2_t vrecpe_f32(float32x2_t __a) { \ 3001 __ai float32x2_t vrecpe_f32(float32x2_t __a) { \
3002 return (float32x2_t)__builtin_neon_vrecpe_v((int8x8_t)__a, 4); } 3002 return (float32x2_t)__builtin_neon_vrecpe_v((int8x8_t)__a, 7); }
3003 __ai uint32x2_t vrecpe_u32(uint32x2_t __a) { \ 3003 __ai uint32x2_t vrecpe_u32(uint32x2_t __a) { \
3004 return (uint32x2_t)__builtin_neon_vrecpe_v((int8x8_t)__a, 10); } 3004 return (uint32x2_t)__builtin_neon_vrecpe_v((int8x8_t)__a, 18); }
3005 __ai float32x4_t vrecpeq_f32(float32x4_t __a) { \ 3005 __ai float32x4_t vrecpeq_f32(float32x4_t __a) { \
3006 return (float32x4_t)__builtin_neon_vrecpeq_v((int8x16_t)__a, 20); } 3006 return (float32x4_t)__builtin_neon_vrecpeq_v((int8x16_t)__a, 39); }
3007 __ai uint32x4_t vrecpeq_u32(uint32x4_t __a) { \ 3007 __ai uint32x4_t vrecpeq_u32(uint32x4_t __a) { \
3008 return (uint32x4_t)__builtin_neon_vrecpeq_v((int8x16_t)__a, 26); } 3008 return (uint32x4_t)__builtin_neon_vrecpeq_v((int8x16_t)__a, 50); }
3009 3009
3010 __ai float32x2_t vrecps_f32(float32x2_t __a, float32x2_t __b) { \ 3010 __ai float32x2_t vrecps_f32(float32x2_t __a, float32x2_t __b) { \
3011 return (float32x2_t)__builtin_neon_vrecps_v((int8x8_t)__a, (int8x8_t)__b, 4); } 3011 return (float32x2_t)__builtin_neon_vrecps_v((int8x8_t)__a, (int8x8_t)__b, 7); }
3012 __ai float32x4_t vrecpsq_f32(float32x4_t __a, float32x4_t __b) { \ 3012 __ai float32x4_t vrecpsq_f32(float32x4_t __a, float32x4_t __b) { \
3013 return (float32x4_t)__builtin_neon_vrecpsq_v((int8x16_t)__a, (int8x16_t)__b, 2 0); } 3013 return (float32x4_t)__builtin_neon_vrecpsq_v((int8x16_t)__a, (int8x16_t)__b, 3 9); }
3014 3014
3015 __ai int8x8_t vreinterpret_s8_s16(int16x4_t __a) { \ 3015 __ai int8x8_t vreinterpret_s8_s16(int16x4_t __a) { \
3016 return (int8x8_t)__a; } 3016 return (int8x8_t)__a; }
3017 __ai int8x8_t vreinterpret_s8_s32(int32x2_t __a) { \ 3017 __ai int8x8_t vreinterpret_s8_s32(int32x2_t __a) { \
3018 return (int8x8_t)__a; } 3018 return (int8x8_t)__a; }
3019 __ai int8x8_t vreinterpret_s8_s64(int64x1_t __a) { \ 3019 __ai int8x8_t vreinterpret_s8_s64(int64x1_t __a) { \
3020 return (int8x8_t)__a; } 3020 return (int8x8_t)__a; }
3021 __ai int8x8_t vreinterpret_s8_u8(uint8x8_t __a) { \ 3021 __ai int8x8_t vreinterpret_s8_u8(uint8x8_t __a) { \
3022 return (int8x8_t)__a; } 3022 return (int8x8_t)__a; }
3023 __ai int8x8_t vreinterpret_s8_u16(uint16x4_t __a) { \ 3023 __ai int8x8_t vreinterpret_s8_u16(uint16x4_t __a) { \
(...skipping 592 matching lines...) Expand 10 before | Expand all | Expand 10 after
3616 __ai float32x4_t vrev64q_f32(float32x4_t __a) { \ 3616 __ai float32x4_t vrev64q_f32(float32x4_t __a) { \
3617 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); } 3617 return __builtin_shufflevector(__a, __a, 1, 0, 3, 2); }
3618 3618
3619 __ai int8x8_t vrhadd_s8(int8x8_t __a, int8x8_t __b) { \ 3619 __ai int8x8_t vrhadd_s8(int8x8_t __a, int8x8_t __b) { \
3620 return (int8x8_t)__builtin_neon_vrhadd_v(__a, __b, 0); } 3620 return (int8x8_t)__builtin_neon_vrhadd_v(__a, __b, 0); }
3621 __ai int16x4_t vrhadd_s16(int16x4_t __a, int16x4_t __b) { \ 3621 __ai int16x4_t vrhadd_s16(int16x4_t __a, int16x4_t __b) { \
3622 return (int16x4_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 1); } 3622 return (int16x4_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 1); }
3623 __ai int32x2_t vrhadd_s32(int32x2_t __a, int32x2_t __b) { \ 3623 __ai int32x2_t vrhadd_s32(int32x2_t __a, int32x2_t __b) { \
3624 return (int32x2_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 2); } 3624 return (int32x2_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 2); }
3625 __ai uint8x8_t vrhadd_u8(uint8x8_t __a, uint8x8_t __b) { \ 3625 __ai uint8x8_t vrhadd_u8(uint8x8_t __a, uint8x8_t __b) { \
3626 return (uint8x8_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 8); } 3626 return (uint8x8_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 16); }
3627 __ai uint16x4_t vrhadd_u16(uint16x4_t __a, uint16x4_t __b) { \ 3627 __ai uint16x4_t vrhadd_u16(uint16x4_t __a, uint16x4_t __b) { \
3628 return (uint16x4_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 9); } 3628 return (uint16x4_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 17); }
3629 __ai uint32x2_t vrhadd_u32(uint32x2_t __a, uint32x2_t __b) { \ 3629 __ai uint32x2_t vrhadd_u32(uint32x2_t __a, uint32x2_t __b) { \
3630 return (uint32x2_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 10); } 3630 return (uint32x2_t)__builtin_neon_vrhadd_v((int8x8_t)__a, (int8x8_t)__b, 18); }
3631 __ai int8x16_t vrhaddq_s8(int8x16_t __a, int8x16_t __b) { \ 3631 __ai int8x16_t vrhaddq_s8(int8x16_t __a, int8x16_t __b) { \
3632 return (int8x16_t)__builtin_neon_vrhaddq_v(__a, __b, 16); } 3632 return (int8x16_t)__builtin_neon_vrhaddq_v(__a, __b, 32); }
3633 __ai int16x8_t vrhaddq_s16(int16x8_t __a, int16x8_t __b) { \ 3633 __ai int16x8_t vrhaddq_s16(int16x8_t __a, int16x8_t __b) { \
3634 return (int16x8_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 17) ; } 3634 return (int16x8_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 33) ; }
3635 __ai int32x4_t vrhaddq_s32(int32x4_t __a, int32x4_t __b) { \ 3635 __ai int32x4_t vrhaddq_s32(int32x4_t __a, int32x4_t __b) { \
3636 return (int32x4_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 18) ; } 3636 return (int32x4_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 34) ; }
3637 __ai uint8x16_t vrhaddq_u8(uint8x16_t __a, uint8x16_t __b) { \ 3637 __ai uint8x16_t vrhaddq_u8(uint8x16_t __a, uint8x16_t __b) { \
3638 return (uint8x16_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 24 ); } 3638 return (uint8x16_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 48 ); }
3639 __ai uint16x8_t vrhaddq_u16(uint16x8_t __a, uint16x8_t __b) { \ 3639 __ai uint16x8_t vrhaddq_u16(uint16x8_t __a, uint16x8_t __b) { \
3640 return (uint16x8_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 25 ); } 3640 return (uint16x8_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 49 ); }
3641 __ai uint32x4_t vrhaddq_u32(uint32x4_t __a, uint32x4_t __b) { \ 3641 __ai uint32x4_t vrhaddq_u32(uint32x4_t __a, uint32x4_t __b) { \
3642 return (uint32x4_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 26 ); } 3642 return (uint32x4_t)__builtin_neon_vrhaddq_v((int8x16_t)__a, (int8x16_t)__b, 50 ); }
3643 3643
3644 __ai int8x8_t vrshl_s8(int8x8_t __a, int8x8_t __b) { \ 3644 __ai int8x8_t vrshl_s8(int8x8_t __a, int8x8_t __b) { \
3645 return (int8x8_t)__builtin_neon_vrshl_v(__a, __b, 0); } 3645 return (int8x8_t)__builtin_neon_vrshl_v(__a, __b, 0); }
3646 __ai int16x4_t vrshl_s16(int16x4_t __a, int16x4_t __b) { \ 3646 __ai int16x4_t vrshl_s16(int16x4_t __a, int16x4_t __b) { \
3647 return (int16x4_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 1); } 3647 return (int16x4_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 1); }
3648 __ai int32x2_t vrshl_s32(int32x2_t __a, int32x2_t __b) { \ 3648 __ai int32x2_t vrshl_s32(int32x2_t __a, int32x2_t __b) { \
3649 return (int32x2_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 2); } 3649 return (int32x2_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 2); }
3650 __ai int64x1_t vrshl_s64(int64x1_t __a, int64x1_t __b) { \ 3650 __ai int64x1_t vrshl_s64(int64x1_t __a, int64x1_t __b) { \
3651 return (int64x1_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 3); } 3651 return (int64x1_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 3); }
3652 __ai uint8x8_t vrshl_u8(uint8x8_t __a, int8x8_t __b) { \ 3652 __ai uint8x8_t vrshl_u8(uint8x8_t __a, int8x8_t __b) { \
3653 return (uint8x8_t)__builtin_neon_vrshl_v((int8x8_t)__a, __b, 8); } 3653 return (uint8x8_t)__builtin_neon_vrshl_v((int8x8_t)__a, __b, 16); }
3654 __ai uint16x4_t vrshl_u16(uint16x4_t __a, int16x4_t __b) { \ 3654 __ai uint16x4_t vrshl_u16(uint16x4_t __a, int16x4_t __b) { \
3655 return (uint16x4_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 9); } 3655 return (uint16x4_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 17); }
3656 __ai uint32x2_t vrshl_u32(uint32x2_t __a, int32x2_t __b) { \ 3656 __ai uint32x2_t vrshl_u32(uint32x2_t __a, int32x2_t __b) { \
3657 return (uint32x2_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 10); } 3657 return (uint32x2_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 18); }
3658 __ai uint64x1_t vrshl_u64(uint64x1_t __a, int64x1_t __b) { \ 3658 __ai uint64x1_t vrshl_u64(uint64x1_t __a, int64x1_t __b) { \
3659 return (uint64x1_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 11); } 3659 return (uint64x1_t)__builtin_neon_vrshl_v((int8x8_t)__a, (int8x8_t)__b, 19); }
3660 __ai int8x16_t vrshlq_s8(int8x16_t __a, int8x16_t __b) { \ 3660 __ai int8x16_t vrshlq_s8(int8x16_t __a, int8x16_t __b) { \
3661 return (int8x16_t)__builtin_neon_vrshlq_v(__a, __b, 16); } 3661 return (int8x16_t)__builtin_neon_vrshlq_v(__a, __b, 32); }
3662 __ai int16x8_t vrshlq_s16(int16x8_t __a, int16x8_t __b) { \ 3662 __ai int16x8_t vrshlq_s16(int16x8_t __a, int16x8_t __b) { \
3663 return (int16x8_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 17); } 3663 return (int16x8_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
3664 __ai int32x4_t vrshlq_s32(int32x4_t __a, int32x4_t __b) { \ 3664 __ai int32x4_t vrshlq_s32(int32x4_t __a, int32x4_t __b) { \
3665 return (int32x4_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 18); } 3665 return (int32x4_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
3666 __ai int64x2_t vrshlq_s64(int64x2_t __a, int64x2_t __b) { \ 3666 __ai int64x2_t vrshlq_s64(int64x2_t __a, int64x2_t __b) { \
3667 return (int64x2_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 19); } 3667 return (int64x2_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 35); }
3668 __ai uint8x16_t vrshlq_u8(uint8x16_t __a, int8x16_t __b) { \ 3668 __ai uint8x16_t vrshlq_u8(uint8x16_t __a, int8x16_t __b) { \
3669 return (uint8x16_t)__builtin_neon_vrshlq_v((int8x16_t)__a, __b, 24); } 3669 return (uint8x16_t)__builtin_neon_vrshlq_v((int8x16_t)__a, __b, 48); }
3670 __ai uint16x8_t vrshlq_u16(uint16x8_t __a, int16x8_t __b) { \ 3670 __ai uint16x8_t vrshlq_u16(uint16x8_t __a, int16x8_t __b) { \
3671 return (uint16x8_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 25) ; } 3671 return (uint16x8_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 49) ; }
3672 __ai uint32x4_t vrshlq_u32(uint32x4_t __a, int32x4_t __b) { \ 3672 __ai uint32x4_t vrshlq_u32(uint32x4_t __a, int32x4_t __b) { \
3673 return (uint32x4_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 26) ; } 3673 return (uint32x4_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 50) ; }
3674 __ai uint64x2_t vrshlq_u64(uint64x2_t __a, int64x2_t __b) { \ 3674 __ai uint64x2_t vrshlq_u64(uint64x2_t __a, int64x2_t __b) { \
3675 return (uint64x2_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 27) ; } 3675 return (uint64x2_t)__builtin_neon_vrshlq_v((int8x16_t)__a, (int8x16_t)__b, 51) ; }
3676 3676
3677 #define vrshrn_n_s16(a, __b) __extension__ ({ \ 3677 #define vrshrn_n_s16(a, __b) __extension__ ({ \
3678 int16x8_t __a = (a); \ 3678 int16x8_t __a = (a); \
3679 (int8x8_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 0); }) 3679 (int8x8_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 0); })
3680 #define vrshrn_n_s32(a, __b) __extension__ ({ \ 3680 #define vrshrn_n_s32(a, __b) __extension__ ({ \
3681 int32x4_t __a = (a); \ 3681 int32x4_t __a = (a); \
3682 (int16x4_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 1); }) 3682 (int16x4_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 1); })
3683 #define vrshrn_n_s64(a, __b) __extension__ ({ \ 3683 #define vrshrn_n_s64(a, __b) __extension__ ({ \
3684 int64x2_t __a = (a); \ 3684 int64x2_t __a = (a); \
3685 (int32x2_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 2); }) 3685 (int32x2_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 2); })
3686 #define vrshrn_n_u16(a, __b) __extension__ ({ \ 3686 #define vrshrn_n_u16(a, __b) __extension__ ({ \
3687 uint16x8_t __a = (a); \ 3687 uint16x8_t __a = (a); \
3688 (uint8x8_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 8); }) 3688 (uint8x8_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 16); })
3689 #define vrshrn_n_u32(a, __b) __extension__ ({ \ 3689 #define vrshrn_n_u32(a, __b) __extension__ ({ \
3690 uint32x4_t __a = (a); \ 3690 uint32x4_t __a = (a); \
3691 (uint16x4_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 9); }) 3691 (uint16x4_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 17); })
3692 #define vrshrn_n_u64(a, __b) __extension__ ({ \ 3692 #define vrshrn_n_u64(a, __b) __extension__ ({ \
3693 uint64x2_t __a = (a); \ 3693 uint64x2_t __a = (a); \
3694 (uint32x2_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 10); }) 3694 (uint32x2_t)__builtin_neon_vrshrn_n_v((int8x16_t)__a, __b, 18); })
3695 3695
3696 #define vrshr_n_s8(a, __b) __extension__ ({ \ 3696 #define vrshr_n_s8(a, __b) __extension__ ({ \
3697 int8x8_t __a = (a); \ 3697 int8x8_t __a = (a); \
3698 (int8x8_t)__builtin_neon_vrshr_n_v(__a, __b, 0); }) 3698 (int8x8_t)__builtin_neon_vrshr_n_v(__a, __b, 0); })
3699 #define vrshr_n_s16(a, __b) __extension__ ({ \ 3699 #define vrshr_n_s16(a, __b) __extension__ ({ \
3700 int16x4_t __a = (a); \ 3700 int16x4_t __a = (a); \
3701 (int16x4_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 1); }) 3701 (int16x4_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 1); })
3702 #define vrshr_n_s32(a, __b) __extension__ ({ \ 3702 #define vrshr_n_s32(a, __b) __extension__ ({ \
3703 int32x2_t __a = (a); \ 3703 int32x2_t __a = (a); \
3704 (int32x2_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 2); }) 3704 (int32x2_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 2); })
3705 #define vrshr_n_s64(a, __b) __extension__ ({ \ 3705 #define vrshr_n_s64(a, __b) __extension__ ({ \
3706 int64x1_t __a = (a); \ 3706 int64x1_t __a = (a); \
3707 (int64x1_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 3); }) 3707 (int64x1_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 3); })
3708 #define vrshr_n_u8(a, __b) __extension__ ({ \ 3708 #define vrshr_n_u8(a, __b) __extension__ ({ \
3709 uint8x8_t __a = (a); \ 3709 uint8x8_t __a = (a); \
3710 (uint8x8_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 8); }) 3710 (uint8x8_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 16); })
3711 #define vrshr_n_u16(a, __b) __extension__ ({ \ 3711 #define vrshr_n_u16(a, __b) __extension__ ({ \
3712 uint16x4_t __a = (a); \ 3712 uint16x4_t __a = (a); \
3713 (uint16x4_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 9); }) 3713 (uint16x4_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 17); })
3714 #define vrshr_n_u32(a, __b) __extension__ ({ \ 3714 #define vrshr_n_u32(a, __b) __extension__ ({ \
3715 uint32x2_t __a = (a); \ 3715 uint32x2_t __a = (a); \
3716 (uint32x2_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 10); }) 3716 (uint32x2_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 18); })
3717 #define vrshr_n_u64(a, __b) __extension__ ({ \ 3717 #define vrshr_n_u64(a, __b) __extension__ ({ \
3718 uint64x1_t __a = (a); \ 3718 uint64x1_t __a = (a); \
3719 (uint64x1_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 11); }) 3719 (uint64x1_t)__builtin_neon_vrshr_n_v((int8x8_t)__a, __b, 19); })
3720 #define vrshrq_n_s8(a, __b) __extension__ ({ \ 3720 #define vrshrq_n_s8(a, __b) __extension__ ({ \
3721 int8x16_t __a = (a); \ 3721 int8x16_t __a = (a); \
3722 (int8x16_t)__builtin_neon_vrshrq_n_v(__a, __b, 16); }) 3722 (int8x16_t)__builtin_neon_vrshrq_n_v(__a, __b, 32); })
3723 #define vrshrq_n_s16(a, __b) __extension__ ({ \ 3723 #define vrshrq_n_s16(a, __b) __extension__ ({ \
3724 int16x8_t __a = (a); \ 3724 int16x8_t __a = (a); \
3725 (int16x8_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 17); }) 3725 (int16x8_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 33); })
3726 #define vrshrq_n_s32(a, __b) __extension__ ({ \ 3726 #define vrshrq_n_s32(a, __b) __extension__ ({ \
3727 int32x4_t __a = (a); \ 3727 int32x4_t __a = (a); \
3728 (int32x4_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 18); }) 3728 (int32x4_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 34); })
3729 #define vrshrq_n_s64(a, __b) __extension__ ({ \ 3729 #define vrshrq_n_s64(a, __b) __extension__ ({ \
3730 int64x2_t __a = (a); \ 3730 int64x2_t __a = (a); \
3731 (int64x2_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 19); }) 3731 (int64x2_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 35); })
3732 #define vrshrq_n_u8(a, __b) __extension__ ({ \ 3732 #define vrshrq_n_u8(a, __b) __extension__ ({ \
3733 uint8x16_t __a = (a); \ 3733 uint8x16_t __a = (a); \
3734 (uint8x16_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 24); }) 3734 (uint8x16_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 48); })
3735 #define vrshrq_n_u16(a, __b) __extension__ ({ \ 3735 #define vrshrq_n_u16(a, __b) __extension__ ({ \
3736 uint16x8_t __a = (a); \ 3736 uint16x8_t __a = (a); \
3737 (uint16x8_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 25); }) 3737 (uint16x8_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 49); })
3738 #define vrshrq_n_u32(a, __b) __extension__ ({ \ 3738 #define vrshrq_n_u32(a, __b) __extension__ ({ \
3739 uint32x4_t __a = (a); \ 3739 uint32x4_t __a = (a); \
3740 (uint32x4_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 26); }) 3740 (uint32x4_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 50); })
3741 #define vrshrq_n_u64(a, __b) __extension__ ({ \ 3741 #define vrshrq_n_u64(a, __b) __extension__ ({ \
3742 uint64x2_t __a = (a); \ 3742 uint64x2_t __a = (a); \
3743 (uint64x2_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 27); }) 3743 (uint64x2_t)__builtin_neon_vrshrq_n_v((int8x16_t)__a, __b, 51); })
3744 3744
3745 __ai float32x2_t vrsqrte_f32(float32x2_t __a) { \ 3745 __ai float32x2_t vrsqrte_f32(float32x2_t __a) { \
3746 return (float32x2_t)__builtin_neon_vrsqrte_v((int8x8_t)__a, 4); } 3746 return (float32x2_t)__builtin_neon_vrsqrte_v((int8x8_t)__a, 7); }
3747 __ai uint32x2_t vrsqrte_u32(uint32x2_t __a) { \ 3747 __ai uint32x2_t vrsqrte_u32(uint32x2_t __a) { \
3748 return (uint32x2_t)__builtin_neon_vrsqrte_v((int8x8_t)__a, 10); } 3748 return (uint32x2_t)__builtin_neon_vrsqrte_v((int8x8_t)__a, 18); }
3749 __ai float32x4_t vrsqrteq_f32(float32x4_t __a) { \ 3749 __ai float32x4_t vrsqrteq_f32(float32x4_t __a) { \
3750 return (float32x4_t)__builtin_neon_vrsqrteq_v((int8x16_t)__a, 20); } 3750 return (float32x4_t)__builtin_neon_vrsqrteq_v((int8x16_t)__a, 39); }
3751 __ai uint32x4_t vrsqrteq_u32(uint32x4_t __a) { \ 3751 __ai uint32x4_t vrsqrteq_u32(uint32x4_t __a) { \
3752 return (uint32x4_t)__builtin_neon_vrsqrteq_v((int8x16_t)__a, 26); } 3752 return (uint32x4_t)__builtin_neon_vrsqrteq_v((int8x16_t)__a, 50); }
3753 3753
3754 __ai float32x2_t vrsqrts_f32(float32x2_t __a, float32x2_t __b) { \ 3754 __ai float32x2_t vrsqrts_f32(float32x2_t __a, float32x2_t __b) { \
3755 return (float32x2_t)__builtin_neon_vrsqrts_v((int8x8_t)__a, (int8x8_t)__b, 4); } 3755 return (float32x2_t)__builtin_neon_vrsqrts_v((int8x8_t)__a, (int8x8_t)__b, 7); }
3756 __ai float32x4_t vrsqrtsq_f32(float32x4_t __a, float32x4_t __b) { \ 3756 __ai float32x4_t vrsqrtsq_f32(float32x4_t __a, float32x4_t __b) { \
3757 return (float32x4_t)__builtin_neon_vrsqrtsq_v((int8x16_t)__a, (int8x16_t)__b, 20); } 3757 return (float32x4_t)__builtin_neon_vrsqrtsq_v((int8x16_t)__a, (int8x16_t)__b, 39); }
3758 3758
3759 #define vrsra_n_s8(a, b, __c) __extension__ ({ \ 3759 #define vrsra_n_s8(a, b, __c) __extension__ ({ \
3760 int8x8_t __a = (a); int8x8_t __b = (b); \ 3760 int8x8_t __a = (a); int8x8_t __b = (b); \
3761 (int8x8_t)__builtin_neon_vrsra_n_v(__a, __b, __c, 0); }) 3761 (int8x8_t)__builtin_neon_vrsra_n_v(__a, __b, __c, 0); })
3762 #define vrsra_n_s16(a, b, __c) __extension__ ({ \ 3762 #define vrsra_n_s16(a, b, __c) __extension__ ({ \
3763 int16x4_t __a = (a); int16x4_t __b = (b); \ 3763 int16x4_t __a = (a); int16x4_t __b = (b); \
3764 (int16x4_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); }) 3764 (int16x4_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); })
3765 #define vrsra_n_s32(a, b, __c) __extension__ ({ \ 3765 #define vrsra_n_s32(a, b, __c) __extension__ ({ \
3766 int32x2_t __a = (a); int32x2_t __b = (b); \ 3766 int32x2_t __a = (a); int32x2_t __b = (b); \
3767 (int32x2_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); }) 3767 (int32x2_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); })
3768 #define vrsra_n_s64(a, b, __c) __extension__ ({ \ 3768 #define vrsra_n_s64(a, b, __c) __extension__ ({ \
3769 int64x1_t __a = (a); int64x1_t __b = (b); \ 3769 int64x1_t __a = (a); int64x1_t __b = (b); \
3770 (int64x1_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); }) 3770 (int64x1_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); })
3771 #define vrsra_n_u8(a, b, __c) __extension__ ({ \ 3771 #define vrsra_n_u8(a, b, __c) __extension__ ({ \
3772 uint8x8_t __a = (a); uint8x8_t __b = (b); \ 3772 uint8x8_t __a = (a); uint8x8_t __b = (b); \
3773 (uint8x8_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 8); }) 3773 (uint8x8_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 16); })
3774 #define vrsra_n_u16(a, b, __c) __extension__ ({ \ 3774 #define vrsra_n_u16(a, b, __c) __extension__ ({ \
3775 uint16x4_t __a = (a); uint16x4_t __b = (b); \ 3775 uint16x4_t __a = (a); uint16x4_t __b = (b); \
3776 (uint16x4_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 9); }) 3776 (uint16x4_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 17); } )
3777 #define vrsra_n_u32(a, b, __c) __extension__ ({ \ 3777 #define vrsra_n_u32(a, b, __c) __extension__ ({ \
3778 uint32x2_t __a = (a); uint32x2_t __b = (b); \ 3778 uint32x2_t __a = (a); uint32x2_t __b = (b); \
3779 (uint32x2_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 10); } ) 3779 (uint32x2_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 18); } )
3780 #define vrsra_n_u64(a, b, __c) __extension__ ({ \ 3780 #define vrsra_n_u64(a, b, __c) __extension__ ({ \
3781 uint64x1_t __a = (a); uint64x1_t __b = (b); \ 3781 uint64x1_t __a = (a); uint64x1_t __b = (b); \
3782 (uint64x1_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 11); } ) 3782 (uint64x1_t)__builtin_neon_vrsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 19); } )
3783 #define vrsraq_n_s8(a, b, __c) __extension__ ({ \ 3783 #define vrsraq_n_s8(a, b, __c) __extension__ ({ \
3784 int8x16_t __a = (a); int8x16_t __b = (b); \ 3784 int8x16_t __a = (a); int8x16_t __b = (b); \
3785 (int8x16_t)__builtin_neon_vrsraq_n_v(__a, __b, __c, 16); }) 3785 (int8x16_t)__builtin_neon_vrsraq_n_v(__a, __b, __c, 32); })
3786 #define vrsraq_n_s16(a, b, __c) __extension__ ({ \ 3786 #define vrsraq_n_s16(a, b, __c) __extension__ ({ \
3787 int16x8_t __a = (a); int16x8_t __b = (b); \ 3787 int16x8_t __a = (a); int16x8_t __b = (b); \
3788 (int16x8_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 17); }) 3788 (int16x8_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 33); })
3789 #define vrsraq_n_s32(a, b, __c) __extension__ ({ \ 3789 #define vrsraq_n_s32(a, b, __c) __extension__ ({ \
3790 int32x4_t __a = (a); int32x4_t __b = (b); \ 3790 int32x4_t __a = (a); int32x4_t __b = (b); \
3791 (int32x4_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 18); }) 3791 (int32x4_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 34); })
3792 #define vrsraq_n_s64(a, b, __c) __extension__ ({ \ 3792 #define vrsraq_n_s64(a, b, __c) __extension__ ({ \
3793 int64x2_t __a = (a); int64x2_t __b = (b); \ 3793 int64x2_t __a = (a); int64x2_t __b = (b); \
3794 (int64x2_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 19); }) 3794 (int64x2_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 35); })
3795 #define vrsraq_n_u8(a, b, __c) __extension__ ({ \ 3795 #define vrsraq_n_u8(a, b, __c) __extension__ ({ \
3796 uint8x16_t __a = (a); uint8x16_t __b = (b); \ 3796 uint8x16_t __a = (a); uint8x16_t __b = (b); \
3797 (uint8x16_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 24) ; }) 3797 (uint8x16_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 48) ; })
3798 #define vrsraq_n_u16(a, b, __c) __extension__ ({ \ 3798 #define vrsraq_n_u16(a, b, __c) __extension__ ({ \
3799 uint16x8_t __a = (a); uint16x8_t __b = (b); \ 3799 uint16x8_t __a = (a); uint16x8_t __b = (b); \
3800 (uint16x8_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 25) ; }) 3800 (uint16x8_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 49) ; })
3801 #define vrsraq_n_u32(a, b, __c) __extension__ ({ \ 3801 #define vrsraq_n_u32(a, b, __c) __extension__ ({ \
3802 uint32x4_t __a = (a); uint32x4_t __b = (b); \ 3802 uint32x4_t __a = (a); uint32x4_t __b = (b); \
3803 (uint32x4_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 26) ; }) 3803 (uint32x4_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 50) ; })
3804 #define vrsraq_n_u64(a, b, __c) __extension__ ({ \ 3804 #define vrsraq_n_u64(a, b, __c) __extension__ ({ \
3805 uint64x2_t __a = (a); uint64x2_t __b = (b); \ 3805 uint64x2_t __a = (a); uint64x2_t __b = (b); \
3806 (uint64x2_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 27) ; }) 3806 (uint64x2_t)__builtin_neon_vrsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 51) ; })
3807 3807
3808 __ai int8x8_t vrsubhn_s16(int16x8_t __a, int16x8_t __b) { \ 3808 __ai int8x8_t vrsubhn_s16(int16x8_t __a, int16x8_t __b) { \
3809 return (int8x8_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 0); } 3809 return (int8x8_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 0); }
3810 __ai int16x4_t vrsubhn_s32(int32x4_t __a, int32x4_t __b) { \ 3810 __ai int16x4_t vrsubhn_s32(int32x4_t __a, int32x4_t __b) { \
3811 return (int16x4_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 1); } 3811 return (int16x4_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 1); }
3812 __ai int32x2_t vrsubhn_s64(int64x2_t __a, int64x2_t __b) { \ 3812 __ai int32x2_t vrsubhn_s64(int64x2_t __a, int64x2_t __b) { \
3813 return (int32x2_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 2); } 3813 return (int32x2_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 2); }
3814 __ai uint8x8_t vrsubhn_u16(uint16x8_t __a, uint16x8_t __b) { \ 3814 __ai uint8x8_t vrsubhn_u16(uint16x8_t __a, uint16x8_t __b) { \
3815 return (uint8x8_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 8); } 3815 return (uint8x8_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 16) ; }
3816 __ai uint16x4_t vrsubhn_u32(uint32x4_t __a, uint32x4_t __b) { \ 3816 __ai uint16x4_t vrsubhn_u32(uint32x4_t __a, uint32x4_t __b) { \
3817 return (uint16x4_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 9) ; } 3817 return (uint16x4_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 17 ); }
3818 __ai uint32x2_t vrsubhn_u64(uint64x2_t __a, uint64x2_t __b) { \ 3818 __ai uint32x2_t vrsubhn_u64(uint64x2_t __a, uint64x2_t __b) { \
3819 return (uint32x2_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 10 ); } 3819 return (uint32x2_t)__builtin_neon_vrsubhn_v((int8x16_t)__a, (int8x16_t)__b, 18 ); }
3820 3820
3821 #define vset_lane_u8(a, b, __c) __extension__ ({ \ 3821 #define vset_lane_u8(a, b, __c) __extension__ ({ \
3822 uint8_t __a = (a); uint8x8_t __b = (b); \ 3822 uint8_t __a = (a); uint8x8_t __b = (b); \
3823 (uint8x8_t)__builtin_neon_vset_lane_i8(__a, (int8x8_t)__b, __c); }) 3823 (uint8x8_t)__builtin_neon_vset_lane_i8(__a, (int8x8_t)__b, __c); })
3824 #define vset_lane_u16(a, b, __c) __extension__ ({ \ 3824 #define vset_lane_u16(a, b, __c) __extension__ ({ \
3825 uint16_t __a = (a); uint16x4_t __b = (b); \ 3825 uint16_t __a = (a); uint16x4_t __b = (b); \
3826 (uint16x4_t)__builtin_neon_vset_lane_i16(__a, (int16x4_t)__b, __c); }) 3826 (uint16x4_t)__builtin_neon_vset_lane_i16(__a, (int16x4_t)__b, __c); })
3827 #define vset_lane_u32(a, b, __c) __extension__ ({ \ 3827 #define vset_lane_u32(a, b, __c) __extension__ ({ \
3828 uint32_t __a = (a); uint32x2_t __b = (b); \ 3828 uint32_t __a = (a); uint32x2_t __b = (b); \
3829 (uint32x2_t)__builtin_neon_vset_lane_i32(__a, (int32x2_t)__b, __c); }) 3829 (uint32x2_t)__builtin_neon_vset_lane_i32(__a, (int32x2_t)__b, __c); })
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
3887 3887
3888 __ai int8x8_t vshl_s8(int8x8_t __a, int8x8_t __b) { \ 3888 __ai int8x8_t vshl_s8(int8x8_t __a, int8x8_t __b) { \
3889 return (int8x8_t)__builtin_neon_vshl_v(__a, __b, 0); } 3889 return (int8x8_t)__builtin_neon_vshl_v(__a, __b, 0); }
3890 __ai int16x4_t vshl_s16(int16x4_t __a, int16x4_t __b) { \ 3890 __ai int16x4_t vshl_s16(int16x4_t __a, int16x4_t __b) { \
3891 return (int16x4_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 1); } 3891 return (int16x4_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 1); }
3892 __ai int32x2_t vshl_s32(int32x2_t __a, int32x2_t __b) { \ 3892 __ai int32x2_t vshl_s32(int32x2_t __a, int32x2_t __b) { \
3893 return (int32x2_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 2); } 3893 return (int32x2_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 2); }
3894 __ai int64x1_t vshl_s64(int64x1_t __a, int64x1_t __b) { \ 3894 __ai int64x1_t vshl_s64(int64x1_t __a, int64x1_t __b) { \
3895 return (int64x1_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 3); } 3895 return (int64x1_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 3); }
3896 __ai uint8x8_t vshl_u8(uint8x8_t __a, int8x8_t __b) { \ 3896 __ai uint8x8_t vshl_u8(uint8x8_t __a, int8x8_t __b) { \
3897 return (uint8x8_t)__builtin_neon_vshl_v((int8x8_t)__a, __b, 8); } 3897 return (uint8x8_t)__builtin_neon_vshl_v((int8x8_t)__a, __b, 16); }
3898 __ai uint16x4_t vshl_u16(uint16x4_t __a, int16x4_t __b) { \ 3898 __ai uint16x4_t vshl_u16(uint16x4_t __a, int16x4_t __b) { \
3899 return (uint16x4_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 9); } 3899 return (uint16x4_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 17); }
3900 __ai uint32x2_t vshl_u32(uint32x2_t __a, int32x2_t __b) { \ 3900 __ai uint32x2_t vshl_u32(uint32x2_t __a, int32x2_t __b) { \
3901 return (uint32x2_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 10); } 3901 return (uint32x2_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 18); }
3902 __ai uint64x1_t vshl_u64(uint64x1_t __a, int64x1_t __b) { \ 3902 __ai uint64x1_t vshl_u64(uint64x1_t __a, int64x1_t __b) { \
3903 return (uint64x1_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 11); } 3903 return (uint64x1_t)__builtin_neon_vshl_v((int8x8_t)__a, (int8x8_t)__b, 19); }
3904 __ai int8x16_t vshlq_s8(int8x16_t __a, int8x16_t __b) { \ 3904 __ai int8x16_t vshlq_s8(int8x16_t __a, int8x16_t __b) { \
3905 return (int8x16_t)__builtin_neon_vshlq_v(__a, __b, 16); } 3905 return (int8x16_t)__builtin_neon_vshlq_v(__a, __b, 32); }
3906 __ai int16x8_t vshlq_s16(int16x8_t __a, int16x8_t __b) { \ 3906 __ai int16x8_t vshlq_s16(int16x8_t __a, int16x8_t __b) { \
3907 return (int16x8_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 17); } 3907 return (int16x8_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 33); }
3908 __ai int32x4_t vshlq_s32(int32x4_t __a, int32x4_t __b) { \ 3908 __ai int32x4_t vshlq_s32(int32x4_t __a, int32x4_t __b) { \
3909 return (int32x4_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 18); } 3909 return (int32x4_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 34); }
3910 __ai int64x2_t vshlq_s64(int64x2_t __a, int64x2_t __b) { \ 3910 __ai int64x2_t vshlq_s64(int64x2_t __a, int64x2_t __b) { \
3911 return (int64x2_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 19); } 3911 return (int64x2_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 35); }
3912 __ai uint8x16_t vshlq_u8(uint8x16_t __a, int8x16_t __b) { \ 3912 __ai uint8x16_t vshlq_u8(uint8x16_t __a, int8x16_t __b) { \
3913 return (uint8x16_t)__builtin_neon_vshlq_v((int8x16_t)__a, __b, 24); } 3913 return (uint8x16_t)__builtin_neon_vshlq_v((int8x16_t)__a, __b, 48); }
3914 __ai uint16x8_t vshlq_u16(uint16x8_t __a, int16x8_t __b) { \ 3914 __ai uint16x8_t vshlq_u16(uint16x8_t __a, int16x8_t __b) { \
3915 return (uint16x8_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 25); } 3915 return (uint16x8_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 49); }
3916 __ai uint32x4_t vshlq_u32(uint32x4_t __a, int32x4_t __b) { \ 3916 __ai uint32x4_t vshlq_u32(uint32x4_t __a, int32x4_t __b) { \
3917 return (uint32x4_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 26); } 3917 return (uint32x4_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
3918 __ai uint64x2_t vshlq_u64(uint64x2_t __a, int64x2_t __b) { \ 3918 __ai uint64x2_t vshlq_u64(uint64x2_t __a, int64x2_t __b) { \
3919 return (uint64x2_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 27); } 3919 return (uint64x2_t)__builtin_neon_vshlq_v((int8x16_t)__a, (int8x16_t)__b, 51); }
3920 3920
3921 #define vshll_n_s8(a, __b) __extension__ ({ \ 3921 #define vshll_n_s8(a, __b) __extension__ ({ \
3922 int8x8_t __a = (a); \ 3922 int8x8_t __a = (a); \
3923 (int16x8_t)__builtin_neon_vshll_n_v(__a, __b, 17); }) 3923 (int16x8_t)__builtin_neon_vshll_n_v(__a, __b, 33); })
3924 #define vshll_n_s16(a, __b) __extension__ ({ \ 3924 #define vshll_n_s16(a, __b) __extension__ ({ \
3925 int16x4_t __a = (a); \ 3925 int16x4_t __a = (a); \
3926 (int32x4_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 18); }) 3926 (int32x4_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 34); })
3927 #define vshll_n_s32(a, __b) __extension__ ({ \ 3927 #define vshll_n_s32(a, __b) __extension__ ({ \
3928 int32x2_t __a = (a); \ 3928 int32x2_t __a = (a); \
3929 (int64x2_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 19); }) 3929 (int64x2_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 35); })
3930 #define vshll_n_u8(a, __b) __extension__ ({ \ 3930 #define vshll_n_u8(a, __b) __extension__ ({ \
3931 uint8x8_t __a = (a); \ 3931 uint8x8_t __a = (a); \
3932 (uint16x8_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 25); }) 3932 (uint16x8_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 49); })
3933 #define vshll_n_u16(a, __b) __extension__ ({ \ 3933 #define vshll_n_u16(a, __b) __extension__ ({ \
3934 uint16x4_t __a = (a); \ 3934 uint16x4_t __a = (a); \
3935 (uint32x4_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 26); }) 3935 (uint32x4_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 50); })
3936 #define vshll_n_u32(a, __b) __extension__ ({ \ 3936 #define vshll_n_u32(a, __b) __extension__ ({ \
3937 uint32x2_t __a = (a); \ 3937 uint32x2_t __a = (a); \
3938 (uint64x2_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 27); }) 3938 (uint64x2_t)__builtin_neon_vshll_n_v((int8x8_t)__a, __b, 51); })
3939 3939
3940 #define vshl_n_s8(a, __b) __extension__ ({ \ 3940 #define vshl_n_s8(a, __b) __extension__ ({ \
3941 int8x8_t __a = (a); \ 3941 int8x8_t __a = (a); \
3942 (int8x8_t)__builtin_neon_vshl_n_v(__a, __b, 0); }) 3942 (int8x8_t)__builtin_neon_vshl_n_v(__a, __b, 0); })
3943 #define vshl_n_s16(a, __b) __extension__ ({ \ 3943 #define vshl_n_s16(a, __b) __extension__ ({ \
3944 int16x4_t __a = (a); \ 3944 int16x4_t __a = (a); \
3945 (int16x4_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 1); }) 3945 (int16x4_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 1); })
3946 #define vshl_n_s32(a, __b) __extension__ ({ \ 3946 #define vshl_n_s32(a, __b) __extension__ ({ \
3947 int32x2_t __a = (a); \ 3947 int32x2_t __a = (a); \
3948 (int32x2_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 2); }) 3948 (int32x2_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 2); })
3949 #define vshl_n_s64(a, __b) __extension__ ({ \ 3949 #define vshl_n_s64(a, __b) __extension__ ({ \
3950 int64x1_t __a = (a); \ 3950 int64x1_t __a = (a); \
3951 (int64x1_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 3); }) 3951 (int64x1_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 3); })
3952 #define vshl_n_u8(a, __b) __extension__ ({ \ 3952 #define vshl_n_u8(a, __b) __extension__ ({ \
3953 uint8x8_t __a = (a); \ 3953 uint8x8_t __a = (a); \
3954 (uint8x8_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 8); }) 3954 (uint8x8_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 16); })
3955 #define vshl_n_u16(a, __b) __extension__ ({ \ 3955 #define vshl_n_u16(a, __b) __extension__ ({ \
3956 uint16x4_t __a = (a); \ 3956 uint16x4_t __a = (a); \
3957 (uint16x4_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 9); }) 3957 (uint16x4_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 17); })
3958 #define vshl_n_u32(a, __b) __extension__ ({ \ 3958 #define vshl_n_u32(a, __b) __extension__ ({ \
3959 uint32x2_t __a = (a); \ 3959 uint32x2_t __a = (a); \
3960 (uint32x2_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 10); }) 3960 (uint32x2_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 18); })
3961 #define vshl_n_u64(a, __b) __extension__ ({ \ 3961 #define vshl_n_u64(a, __b) __extension__ ({ \
3962 uint64x1_t __a = (a); \ 3962 uint64x1_t __a = (a); \
3963 (uint64x1_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 11); }) 3963 (uint64x1_t)__builtin_neon_vshl_n_v((int8x8_t)__a, __b, 19); })
3964 #define vshlq_n_s8(a, __b) __extension__ ({ \ 3964 #define vshlq_n_s8(a, __b) __extension__ ({ \
3965 int8x16_t __a = (a); \ 3965 int8x16_t __a = (a); \
3966 (int8x16_t)__builtin_neon_vshlq_n_v(__a, __b, 16); }) 3966 (int8x16_t)__builtin_neon_vshlq_n_v(__a, __b, 32); })
3967 #define vshlq_n_s16(a, __b) __extension__ ({ \ 3967 #define vshlq_n_s16(a, __b) __extension__ ({ \
3968 int16x8_t __a = (a); \ 3968 int16x8_t __a = (a); \
3969 (int16x8_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 17); }) 3969 (int16x8_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 33); })
3970 #define vshlq_n_s32(a, __b) __extension__ ({ \ 3970 #define vshlq_n_s32(a, __b) __extension__ ({ \
3971 int32x4_t __a = (a); \ 3971 int32x4_t __a = (a); \
3972 (int32x4_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 18); }) 3972 (int32x4_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 34); })
3973 #define vshlq_n_s64(a, __b) __extension__ ({ \ 3973 #define vshlq_n_s64(a, __b) __extension__ ({ \
3974 int64x2_t __a = (a); \ 3974 int64x2_t __a = (a); \
3975 (int64x2_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 19); }) 3975 (int64x2_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 35); })
3976 #define vshlq_n_u8(a, __b) __extension__ ({ \ 3976 #define vshlq_n_u8(a, __b) __extension__ ({ \
3977 uint8x16_t __a = (a); \ 3977 uint8x16_t __a = (a); \
3978 (uint8x16_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 24); }) 3978 (uint8x16_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 48); })
3979 #define vshlq_n_u16(a, __b) __extension__ ({ \ 3979 #define vshlq_n_u16(a, __b) __extension__ ({ \
3980 uint16x8_t __a = (a); \ 3980 uint16x8_t __a = (a); \
3981 (uint16x8_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 25); }) 3981 (uint16x8_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 49); })
3982 #define vshlq_n_u32(a, __b) __extension__ ({ \ 3982 #define vshlq_n_u32(a, __b) __extension__ ({ \
3983 uint32x4_t __a = (a); \ 3983 uint32x4_t __a = (a); \
3984 (uint32x4_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 26); }) 3984 (uint32x4_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 50); })
3985 #define vshlq_n_u64(a, __b) __extension__ ({ \ 3985 #define vshlq_n_u64(a, __b) __extension__ ({ \
3986 uint64x2_t __a = (a); \ 3986 uint64x2_t __a = (a); \
3987 (uint64x2_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 27); }) 3987 (uint64x2_t)__builtin_neon_vshlq_n_v((int8x16_t)__a, __b, 51); })
3988 3988
3989 #define vshrn_n_s16(a, __b) __extension__ ({ \ 3989 #define vshrn_n_s16(a, __b) __extension__ ({ \
3990 int16x8_t __a = (a); \ 3990 int16x8_t __a = (a); \
3991 (int8x8_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 0); }) 3991 (int8x8_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 0); })
3992 #define vshrn_n_s32(a, __b) __extension__ ({ \ 3992 #define vshrn_n_s32(a, __b) __extension__ ({ \
3993 int32x4_t __a = (a); \ 3993 int32x4_t __a = (a); \
3994 (int16x4_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 1); }) 3994 (int16x4_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 1); })
3995 #define vshrn_n_s64(a, __b) __extension__ ({ \ 3995 #define vshrn_n_s64(a, __b) __extension__ ({ \
3996 int64x2_t __a = (a); \ 3996 int64x2_t __a = (a); \
3997 (int32x2_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 2); }) 3997 (int32x2_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 2); })
3998 #define vshrn_n_u16(a, __b) __extension__ ({ \ 3998 #define vshrn_n_u16(a, __b) __extension__ ({ \
3999 uint16x8_t __a = (a); \ 3999 uint16x8_t __a = (a); \
4000 (uint8x8_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 8); }) 4000 (uint8x8_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 16); })
4001 #define vshrn_n_u32(a, __b) __extension__ ({ \ 4001 #define vshrn_n_u32(a, __b) __extension__ ({ \
4002 uint32x4_t __a = (a); \ 4002 uint32x4_t __a = (a); \
4003 (uint16x4_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 9); }) 4003 (uint16x4_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 17); })
4004 #define vshrn_n_u64(a, __b) __extension__ ({ \ 4004 #define vshrn_n_u64(a, __b) __extension__ ({ \
4005 uint64x2_t __a = (a); \ 4005 uint64x2_t __a = (a); \
4006 (uint32x2_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 10); }) 4006 (uint32x2_t)__builtin_neon_vshrn_n_v((int8x16_t)__a, __b, 18); })
4007 4007
4008 #define vshr_n_s8(a, __b) __extension__ ({ \ 4008 #define vshr_n_s8(a, __b) __extension__ ({ \
4009 int8x8_t __a = (a); \ 4009 int8x8_t __a = (a); \
4010 (int8x8_t)__builtin_neon_vshr_n_v(__a, __b, 0); }) 4010 (int8x8_t)__builtin_neon_vshr_n_v(__a, __b, 0); })
4011 #define vshr_n_s16(a, __b) __extension__ ({ \ 4011 #define vshr_n_s16(a, __b) __extension__ ({ \
4012 int16x4_t __a = (a); \ 4012 int16x4_t __a = (a); \
4013 (int16x4_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 1); }) 4013 (int16x4_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 1); })
4014 #define vshr_n_s32(a, __b) __extension__ ({ \ 4014 #define vshr_n_s32(a, __b) __extension__ ({ \
4015 int32x2_t __a = (a); \ 4015 int32x2_t __a = (a); \
4016 (int32x2_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 2); }) 4016 (int32x2_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 2); })
4017 #define vshr_n_s64(a, __b) __extension__ ({ \ 4017 #define vshr_n_s64(a, __b) __extension__ ({ \
4018 int64x1_t __a = (a); \ 4018 int64x1_t __a = (a); \
4019 (int64x1_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 3); }) 4019 (int64x1_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 3); })
4020 #define vshr_n_u8(a, __b) __extension__ ({ \ 4020 #define vshr_n_u8(a, __b) __extension__ ({ \
4021 uint8x8_t __a = (a); \ 4021 uint8x8_t __a = (a); \
4022 (uint8x8_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 8); }) 4022 (uint8x8_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 16); })
4023 #define vshr_n_u16(a, __b) __extension__ ({ \ 4023 #define vshr_n_u16(a, __b) __extension__ ({ \
4024 uint16x4_t __a = (a); \ 4024 uint16x4_t __a = (a); \
4025 (uint16x4_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 9); }) 4025 (uint16x4_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 17); })
4026 #define vshr_n_u32(a, __b) __extension__ ({ \ 4026 #define vshr_n_u32(a, __b) __extension__ ({ \
4027 uint32x2_t __a = (a); \ 4027 uint32x2_t __a = (a); \
4028 (uint32x2_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 10); }) 4028 (uint32x2_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 18); })
4029 #define vshr_n_u64(a, __b) __extension__ ({ \ 4029 #define vshr_n_u64(a, __b) __extension__ ({ \
4030 uint64x1_t __a = (a); \ 4030 uint64x1_t __a = (a); \
4031 (uint64x1_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 11); }) 4031 (uint64x1_t)__builtin_neon_vshr_n_v((int8x8_t)__a, __b, 19); })
4032 #define vshrq_n_s8(a, __b) __extension__ ({ \ 4032 #define vshrq_n_s8(a, __b) __extension__ ({ \
4033 int8x16_t __a = (a); \ 4033 int8x16_t __a = (a); \
4034 (int8x16_t)__builtin_neon_vshrq_n_v(__a, __b, 16); }) 4034 (int8x16_t)__builtin_neon_vshrq_n_v(__a, __b, 32); })
4035 #define vshrq_n_s16(a, __b) __extension__ ({ \ 4035 #define vshrq_n_s16(a, __b) __extension__ ({ \
4036 int16x8_t __a = (a); \ 4036 int16x8_t __a = (a); \
4037 (int16x8_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 17); }) 4037 (int16x8_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 33); })
4038 #define vshrq_n_s32(a, __b) __extension__ ({ \ 4038 #define vshrq_n_s32(a, __b) __extension__ ({ \
4039 int32x4_t __a = (a); \ 4039 int32x4_t __a = (a); \
4040 (int32x4_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 18); }) 4040 (int32x4_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 34); })
4041 #define vshrq_n_s64(a, __b) __extension__ ({ \ 4041 #define vshrq_n_s64(a, __b) __extension__ ({ \
4042 int64x2_t __a = (a); \ 4042 int64x2_t __a = (a); \
4043 (int64x2_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 19); }) 4043 (int64x2_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 35); })
4044 #define vshrq_n_u8(a, __b) __extension__ ({ \ 4044 #define vshrq_n_u8(a, __b) __extension__ ({ \
4045 uint8x16_t __a = (a); \ 4045 uint8x16_t __a = (a); \
4046 (uint8x16_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 24); }) 4046 (uint8x16_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 48); })
4047 #define vshrq_n_u16(a, __b) __extension__ ({ \ 4047 #define vshrq_n_u16(a, __b) __extension__ ({ \
4048 uint16x8_t __a = (a); \ 4048 uint16x8_t __a = (a); \
4049 (uint16x8_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 25); }) 4049 (uint16x8_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 49); })
4050 #define vshrq_n_u32(a, __b) __extension__ ({ \ 4050 #define vshrq_n_u32(a, __b) __extension__ ({ \
4051 uint32x4_t __a = (a); \ 4051 uint32x4_t __a = (a); \
4052 (uint32x4_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 26); }) 4052 (uint32x4_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 50); })
4053 #define vshrq_n_u64(a, __b) __extension__ ({ \ 4053 #define vshrq_n_u64(a, __b) __extension__ ({ \
4054 uint64x2_t __a = (a); \ 4054 uint64x2_t __a = (a); \
4055 (uint64x2_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 27); }) 4055 (uint64x2_t)__builtin_neon_vshrq_n_v((int8x16_t)__a, __b, 51); })
4056 4056
4057 #define vsli_n_s8(a, b, __c) __extension__ ({ \ 4057 #define vsli_n_s8(a, b, __c) __extension__ ({ \
4058 int8x8_t __a = (a); int8x8_t __b = (b); \ 4058 int8x8_t __a = (a); int8x8_t __b = (b); \
4059 (int8x8_t)__builtin_neon_vsli_n_v(__a, __b, __c, 0); }) 4059 (int8x8_t)__builtin_neon_vsli_n_v(__a, __b, __c, 0); })
4060 #define vsli_n_s16(a, b, __c) __extension__ ({ \ 4060 #define vsli_n_s16(a, b, __c) __extension__ ({ \
4061 int16x4_t __a = (a); int16x4_t __b = (b); \ 4061 int16x4_t __a = (a); int16x4_t __b = (b); \
4062 (int16x4_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); }) 4062 (int16x4_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); })
4063 #define vsli_n_s32(a, b, __c) __extension__ ({ \ 4063 #define vsli_n_s32(a, b, __c) __extension__ ({ \
4064 int32x2_t __a = (a); int32x2_t __b = (b); \ 4064 int32x2_t __a = (a); int32x2_t __b = (b); \
4065 (int32x2_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); }) 4065 (int32x2_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); })
4066 #define vsli_n_s64(a, b, __c) __extension__ ({ \ 4066 #define vsli_n_s64(a, b, __c) __extension__ ({ \
4067 int64x1_t __a = (a); int64x1_t __b = (b); \ 4067 int64x1_t __a = (a); int64x1_t __b = (b); \
4068 (int64x1_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); }) 4068 (int64x1_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); })
4069 #define vsli_n_u8(a, b, __c) __extension__ ({ \ 4069 #define vsli_n_u8(a, b, __c) __extension__ ({ \
4070 uint8x8_t __a = (a); uint8x8_t __b = (b); \ 4070 uint8x8_t __a = (a); uint8x8_t __b = (b); \
4071 (uint8x8_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 8); }) 4071 (uint8x8_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 16); })
4072 #define vsli_n_u16(a, b, __c) __extension__ ({ \ 4072 #define vsli_n_u16(a, b, __c) __extension__ ({ \
4073 uint16x4_t __a = (a); uint16x4_t __b = (b); \ 4073 uint16x4_t __a = (a); uint16x4_t __b = (b); \
4074 (uint16x4_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 9); }) 4074 (uint16x4_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 17); })
4075 #define vsli_n_u32(a, b, __c) __extension__ ({ \ 4075 #define vsli_n_u32(a, b, __c) __extension__ ({ \
4076 uint32x2_t __a = (a); uint32x2_t __b = (b); \ 4076 uint32x2_t __a = (a); uint32x2_t __b = (b); \
4077 (uint32x2_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 10); }) 4077 (uint32x2_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 18); })
4078 #define vsli_n_u64(a, b, __c) __extension__ ({ \ 4078 #define vsli_n_u64(a, b, __c) __extension__ ({ \
4079 uint64x1_t __a = (a); uint64x1_t __b = (b); \ 4079 uint64x1_t __a = (a); uint64x1_t __b = (b); \
4080 (uint64x1_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 11); }) 4080 (uint64x1_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 19); })
4081 #define vsli_n_p8(a, b, __c) __extension__ ({ \ 4081 #define vsli_n_p8(a, b, __c) __extension__ ({ \
4082 poly8x8_t __a = (a); poly8x8_t __b = (b); \ 4082 poly8x8_t __a = (a); poly8x8_t __b = (b); \
4083 (poly8x8_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 5); }) 4083 (poly8x8_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 4); })
4084 #define vsli_n_p16(a, b, __c) __extension__ ({ \ 4084 #define vsli_n_p16(a, b, __c) __extension__ ({ \
4085 poly16x4_t __a = (a); poly16x4_t __b = (b); \ 4085 poly16x4_t __a = (a); poly16x4_t __b = (b); \
4086 (poly16x4_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 6); }) 4086 (poly16x4_t)__builtin_neon_vsli_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 5); })
4087 #define vsliq_n_s8(a, b, __c) __extension__ ({ \ 4087 #define vsliq_n_s8(a, b, __c) __extension__ ({ \
4088 int8x16_t __a = (a); int8x16_t __b = (b); \ 4088 int8x16_t __a = (a); int8x16_t __b = (b); \
4089 (int8x16_t)__builtin_neon_vsliq_n_v(__a, __b, __c, 16); }) 4089 (int8x16_t)__builtin_neon_vsliq_n_v(__a, __b, __c, 32); })
4090 #define vsliq_n_s16(a, b, __c) __extension__ ({ \ 4090 #define vsliq_n_s16(a, b, __c) __extension__ ({ \
4091 int16x8_t __a = (a); int16x8_t __b = (b); \ 4091 int16x8_t __a = (a); int16x8_t __b = (b); \
4092 (int16x8_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 17); }) 4092 (int16x8_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 33); })
4093 #define vsliq_n_s32(a, b, __c) __extension__ ({ \ 4093 #define vsliq_n_s32(a, b, __c) __extension__ ({ \
4094 int32x4_t __a = (a); int32x4_t __b = (b); \ 4094 int32x4_t __a = (a); int32x4_t __b = (b); \
4095 (int32x4_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 18); }) 4095 (int32x4_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 34); })
4096 #define vsliq_n_s64(a, b, __c) __extension__ ({ \ 4096 #define vsliq_n_s64(a, b, __c) __extension__ ({ \
4097 int64x2_t __a = (a); int64x2_t __b = (b); \ 4097 int64x2_t __a = (a); int64x2_t __b = (b); \
4098 (int64x2_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 19); }) 4098 (int64x2_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 35); })
4099 #define vsliq_n_u8(a, b, __c) __extension__ ({ \ 4099 #define vsliq_n_u8(a, b, __c) __extension__ ({ \
4100 uint8x16_t __a = (a); uint8x16_t __b = (b); \ 4100 uint8x16_t __a = (a); uint8x16_t __b = (b); \
4101 (uint8x16_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 24); }) 4101 (uint8x16_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 48); })
4102 #define vsliq_n_u16(a, b, __c) __extension__ ({ \ 4102 #define vsliq_n_u16(a, b, __c) __extension__ ({ \
4103 uint16x8_t __a = (a); uint16x8_t __b = (b); \ 4103 uint16x8_t __a = (a); uint16x8_t __b = (b); \
4104 (uint16x8_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 25); }) 4104 (uint16x8_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 49); })
4105 #define vsliq_n_u32(a, b, __c) __extension__ ({ \ 4105 #define vsliq_n_u32(a, b, __c) __extension__ ({ \
4106 uint32x4_t __a = (a); uint32x4_t __b = (b); \ 4106 uint32x4_t __a = (a); uint32x4_t __b = (b); \
4107 (uint32x4_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 26); }) 4107 (uint32x4_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 50); })
4108 #define vsliq_n_u64(a, b, __c) __extension__ ({ \ 4108 #define vsliq_n_u64(a, b, __c) __extension__ ({ \
4109 uint64x2_t __a = (a); uint64x2_t __b = (b); \ 4109 uint64x2_t __a = (a); uint64x2_t __b = (b); \
4110 (uint64x2_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 27); }) 4110 (uint64x2_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 51); })
4111 #define vsliq_n_p8(a, b, __c) __extension__ ({ \ 4111 #define vsliq_n_p8(a, b, __c) __extension__ ({ \
4112 poly8x16_t __a = (a); poly8x16_t __b = (b); \ 4112 poly8x16_t __a = (a); poly8x16_t __b = (b); \
4113 (poly8x16_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 21); }) 4113 (poly8x16_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 36); })
4114 #define vsliq_n_p16(a, b, __c) __extension__ ({ \ 4114 #define vsliq_n_p16(a, b, __c) __extension__ ({ \
4115 poly16x8_t __a = (a); poly16x8_t __b = (b); \ 4115 poly16x8_t __a = (a); poly16x8_t __b = (b); \
4116 (poly16x8_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 22); }) 4116 (poly16x8_t)__builtin_neon_vsliq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 37); })
4117 4117
4118 #define vsra_n_s8(a, b, __c) __extension__ ({ \ 4118 #define vsra_n_s8(a, b, __c) __extension__ ({ \
4119 int8x8_t __a = (a); int8x8_t __b = (b); \ 4119 int8x8_t __a = (a); int8x8_t __b = (b); \
4120 (int8x8_t)__builtin_neon_vsra_n_v(__a, __b, __c, 0); }) 4120 (int8x8_t)__builtin_neon_vsra_n_v(__a, __b, __c, 0); })
4121 #define vsra_n_s16(a, b, __c) __extension__ ({ \ 4121 #define vsra_n_s16(a, b, __c) __extension__ ({ \
4122 int16x4_t __a = (a); int16x4_t __b = (b); \ 4122 int16x4_t __a = (a); int16x4_t __b = (b); \
4123 (int16x4_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); }) 4123 (int16x4_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); })
4124 #define vsra_n_s32(a, b, __c) __extension__ ({ \ 4124 #define vsra_n_s32(a, b, __c) __extension__ ({ \
4125 int32x2_t __a = (a); int32x2_t __b = (b); \ 4125 int32x2_t __a = (a); int32x2_t __b = (b); \
4126 (int32x2_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); }) 4126 (int32x2_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); })
4127 #define vsra_n_s64(a, b, __c) __extension__ ({ \ 4127 #define vsra_n_s64(a, b, __c) __extension__ ({ \
4128 int64x1_t __a = (a); int64x1_t __b = (b); \ 4128 int64x1_t __a = (a); int64x1_t __b = (b); \
4129 (int64x1_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); }) 4129 (int64x1_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); })
4130 #define vsra_n_u8(a, b, __c) __extension__ ({ \ 4130 #define vsra_n_u8(a, b, __c) __extension__ ({ \
4131 uint8x8_t __a = (a); uint8x8_t __b = (b); \ 4131 uint8x8_t __a = (a); uint8x8_t __b = (b); \
4132 (uint8x8_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 8); }) 4132 (uint8x8_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 16); })
4133 #define vsra_n_u16(a, b, __c) __extension__ ({ \ 4133 #define vsra_n_u16(a, b, __c) __extension__ ({ \
4134 uint16x4_t __a = (a); uint16x4_t __b = (b); \ 4134 uint16x4_t __a = (a); uint16x4_t __b = (b); \
4135 (uint16x4_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 9); }) 4135 (uint16x4_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 17); })
4136 #define vsra_n_u32(a, b, __c) __extension__ ({ \ 4136 #define vsra_n_u32(a, b, __c) __extension__ ({ \
4137 uint32x2_t __a = (a); uint32x2_t __b = (b); \ 4137 uint32x2_t __a = (a); uint32x2_t __b = (b); \
4138 (uint32x2_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 10); }) 4138 (uint32x2_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 18); })
4139 #define vsra_n_u64(a, b, __c) __extension__ ({ \ 4139 #define vsra_n_u64(a, b, __c) __extension__ ({ \
4140 uint64x1_t __a = (a); uint64x1_t __b = (b); \ 4140 uint64x1_t __a = (a); uint64x1_t __b = (b); \
4141 (uint64x1_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 11); }) 4141 (uint64x1_t)__builtin_neon_vsra_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 19); })
4142 #define vsraq_n_s8(a, b, __c) __extension__ ({ \ 4142 #define vsraq_n_s8(a, b, __c) __extension__ ({ \
4143 int8x16_t __a = (a); int8x16_t __b = (b); \ 4143 int8x16_t __a = (a); int8x16_t __b = (b); \
4144 (int8x16_t)__builtin_neon_vsraq_n_v(__a, __b, __c, 16); }) 4144 (int8x16_t)__builtin_neon_vsraq_n_v(__a, __b, __c, 32); })
4145 #define vsraq_n_s16(a, b, __c) __extension__ ({ \ 4145 #define vsraq_n_s16(a, b, __c) __extension__ ({ \
4146 int16x8_t __a = (a); int16x8_t __b = (b); \ 4146 int16x8_t __a = (a); int16x8_t __b = (b); \
4147 (int16x8_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 17); }) 4147 (int16x8_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 33); })
4148 #define vsraq_n_s32(a, b, __c) __extension__ ({ \ 4148 #define vsraq_n_s32(a, b, __c) __extension__ ({ \
4149 int32x4_t __a = (a); int32x4_t __b = (b); \ 4149 int32x4_t __a = (a); int32x4_t __b = (b); \
4150 (int32x4_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 18); }) 4150 (int32x4_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 34); })
4151 #define vsraq_n_s64(a, b, __c) __extension__ ({ \ 4151 #define vsraq_n_s64(a, b, __c) __extension__ ({ \
4152 int64x2_t __a = (a); int64x2_t __b = (b); \ 4152 int64x2_t __a = (a); int64x2_t __b = (b); \
4153 (int64x2_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 19); }) 4153 (int64x2_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 35); })
4154 #define vsraq_n_u8(a, b, __c) __extension__ ({ \ 4154 #define vsraq_n_u8(a, b, __c) __extension__ ({ \
4155 uint8x16_t __a = (a); uint8x16_t __b = (b); \ 4155 uint8x16_t __a = (a); uint8x16_t __b = (b); \
4156 (uint8x16_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 24); }) 4156 (uint8x16_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 48); })
4157 #define vsraq_n_u16(a, b, __c) __extension__ ({ \ 4157 #define vsraq_n_u16(a, b, __c) __extension__ ({ \
4158 uint16x8_t __a = (a); uint16x8_t __b = (b); \ 4158 uint16x8_t __a = (a); uint16x8_t __b = (b); \
4159 (uint16x8_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 25); }) 4159 (uint16x8_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 49); })
4160 #define vsraq_n_u32(a, b, __c) __extension__ ({ \ 4160 #define vsraq_n_u32(a, b, __c) __extension__ ({ \
4161 uint32x4_t __a = (a); uint32x4_t __b = (b); \ 4161 uint32x4_t __a = (a); uint32x4_t __b = (b); \
4162 (uint32x4_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 26); }) 4162 (uint32x4_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 50); })
4163 #define vsraq_n_u64(a, b, __c) __extension__ ({ \ 4163 #define vsraq_n_u64(a, b, __c) __extension__ ({ \
4164 uint64x2_t __a = (a); uint64x2_t __b = (b); \ 4164 uint64x2_t __a = (a); uint64x2_t __b = (b); \
4165 (uint64x2_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 27); }) 4165 (uint64x2_t)__builtin_neon_vsraq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 51); })
4166 4166
4167 #define vsri_n_s8(a, b, __c) __extension__ ({ \ 4167 #define vsri_n_s8(a, b, __c) __extension__ ({ \
4168 int8x8_t __a = (a); int8x8_t __b = (b); \ 4168 int8x8_t __a = (a); int8x8_t __b = (b); \
4169 (int8x8_t)__builtin_neon_vsri_n_v(__a, __b, __c, 0); }) 4169 (int8x8_t)__builtin_neon_vsri_n_v(__a, __b, __c, 0); })
4170 #define vsri_n_s16(a, b, __c) __extension__ ({ \ 4170 #define vsri_n_s16(a, b, __c) __extension__ ({ \
4171 int16x4_t __a = (a); int16x4_t __b = (b); \ 4171 int16x4_t __a = (a); int16x4_t __b = (b); \
4172 (int16x4_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); }) 4172 (int16x4_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 1); })
4173 #define vsri_n_s32(a, b, __c) __extension__ ({ \ 4173 #define vsri_n_s32(a, b, __c) __extension__ ({ \
4174 int32x2_t __a = (a); int32x2_t __b = (b); \ 4174 int32x2_t __a = (a); int32x2_t __b = (b); \
4175 (int32x2_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); }) 4175 (int32x2_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 2); })
4176 #define vsri_n_s64(a, b, __c) __extension__ ({ \ 4176 #define vsri_n_s64(a, b, __c) __extension__ ({ \
4177 int64x1_t __a = (a); int64x1_t __b = (b); \ 4177 int64x1_t __a = (a); int64x1_t __b = (b); \
4178 (int64x1_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); }) 4178 (int64x1_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 3); })
4179 #define vsri_n_u8(a, b, __c) __extension__ ({ \ 4179 #define vsri_n_u8(a, b, __c) __extension__ ({ \
4180 uint8x8_t __a = (a); uint8x8_t __b = (b); \ 4180 uint8x8_t __a = (a); uint8x8_t __b = (b); \
4181 (uint8x8_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 8); }) 4181 (uint8x8_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 16); })
4182 #define vsri_n_u16(a, b, __c) __extension__ ({ \ 4182 #define vsri_n_u16(a, b, __c) __extension__ ({ \
4183 uint16x4_t __a = (a); uint16x4_t __b = (b); \ 4183 uint16x4_t __a = (a); uint16x4_t __b = (b); \
4184 (uint16x4_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 9); }) 4184 (uint16x4_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 17); })
4185 #define vsri_n_u32(a, b, __c) __extension__ ({ \ 4185 #define vsri_n_u32(a, b, __c) __extension__ ({ \
4186 uint32x2_t __a = (a); uint32x2_t __b = (b); \ 4186 uint32x2_t __a = (a); uint32x2_t __b = (b); \
4187 (uint32x2_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 10); }) 4187 (uint32x2_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 18); })
4188 #define vsri_n_u64(a, b, __c) __extension__ ({ \ 4188 #define vsri_n_u64(a, b, __c) __extension__ ({ \
4189 uint64x1_t __a = (a); uint64x1_t __b = (b); \ 4189 uint64x1_t __a = (a); uint64x1_t __b = (b); \
4190 (uint64x1_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 11); }) 4190 (uint64x1_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 19); })
4191 #define vsri_n_p8(a, b, __c) __extension__ ({ \ 4191 #define vsri_n_p8(a, b, __c) __extension__ ({ \
4192 poly8x8_t __a = (a); poly8x8_t __b = (b); \ 4192 poly8x8_t __a = (a); poly8x8_t __b = (b); \
4193 (poly8x8_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 5); }) 4193 (poly8x8_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 4); })
4194 #define vsri_n_p16(a, b, __c) __extension__ ({ \ 4194 #define vsri_n_p16(a, b, __c) __extension__ ({ \
4195 poly16x4_t __a = (a); poly16x4_t __b = (b); \ 4195 poly16x4_t __a = (a); poly16x4_t __b = (b); \
4196 (poly16x4_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 6); }) 4196 (poly16x4_t)__builtin_neon_vsri_n_v((int8x8_t)__a, (int8x8_t)__b, __c, 5); })
4197 #define vsriq_n_s8(a, b, __c) __extension__ ({ \ 4197 #define vsriq_n_s8(a, b, __c) __extension__ ({ \
4198 int8x16_t __a = (a); int8x16_t __b = (b); \ 4198 int8x16_t __a = (a); int8x16_t __b = (b); \
4199 (int8x16_t)__builtin_neon_vsriq_n_v(__a, __b, __c, 16); }) 4199 (int8x16_t)__builtin_neon_vsriq_n_v(__a, __b, __c, 32); })
4200 #define vsriq_n_s16(a, b, __c) __extension__ ({ \ 4200 #define vsriq_n_s16(a, b, __c) __extension__ ({ \
4201 int16x8_t __a = (a); int16x8_t __b = (b); \ 4201 int16x8_t __a = (a); int16x8_t __b = (b); \
4202 (int16x8_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 17); }) 4202 (int16x8_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 33); })
4203 #define vsriq_n_s32(a, b, __c) __extension__ ({ \ 4203 #define vsriq_n_s32(a, b, __c) __extension__ ({ \
4204 int32x4_t __a = (a); int32x4_t __b = (b); \ 4204 int32x4_t __a = (a); int32x4_t __b = (b); \
4205 (int32x4_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 18); }) 4205 (int32x4_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 34); })
4206 #define vsriq_n_s64(a, b, __c) __extension__ ({ \ 4206 #define vsriq_n_s64(a, b, __c) __extension__ ({ \
4207 int64x2_t __a = (a); int64x2_t __b = (b); \ 4207 int64x2_t __a = (a); int64x2_t __b = (b); \
4208 (int64x2_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 19); }) 4208 (int64x2_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 35); })
4209 #define vsriq_n_u8(a, b, __c) __extension__ ({ \ 4209 #define vsriq_n_u8(a, b, __c) __extension__ ({ \
4210 uint8x16_t __a = (a); uint8x16_t __b = (b); \ 4210 uint8x16_t __a = (a); uint8x16_t __b = (b); \
4211 (uint8x16_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 24); }) 4211 (uint8x16_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 48); })
4212 #define vsriq_n_u16(a, b, __c) __extension__ ({ \ 4212 #define vsriq_n_u16(a, b, __c) __extension__ ({ \
4213 uint16x8_t __a = (a); uint16x8_t __b = (b); \ 4213 uint16x8_t __a = (a); uint16x8_t __b = (b); \
4214 (uint16x8_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 25); }) 4214 (uint16x8_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 49); })
4215 #define vsriq_n_u32(a, b, __c) __extension__ ({ \ 4215 #define vsriq_n_u32(a, b, __c) __extension__ ({ \
4216 uint32x4_t __a = (a); uint32x4_t __b = (b); \ 4216 uint32x4_t __a = (a); uint32x4_t __b = (b); \
4217 (uint32x4_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 26); }) 4217 (uint32x4_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 50); })
4218 #define vsriq_n_u64(a, b, __c) __extension__ ({ \ 4218 #define vsriq_n_u64(a, b, __c) __extension__ ({ \
4219 uint64x2_t __a = (a); uint64x2_t __b = (b); \ 4219 uint64x2_t __a = (a); uint64x2_t __b = (b); \
4220 (uint64x2_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 27); }) 4220 (uint64x2_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 51); })
4221 #define vsriq_n_p8(a, b, __c) __extension__ ({ \ 4221 #define vsriq_n_p8(a, b, __c) __extension__ ({ \
4222 poly8x16_t __a = (a); poly8x16_t __b = (b); \ 4222 poly8x16_t __a = (a); poly8x16_t __b = (b); \
4223 (poly8x16_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 21); }) 4223 (poly8x16_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 36); })
4224 #define vsriq_n_p16(a, b, __c) __extension__ ({ \ 4224 #define vsriq_n_p16(a, b, __c) __extension__ ({ \
4225 poly16x8_t __a = (a); poly16x8_t __b = (b); \ 4225 poly16x8_t __a = (a); poly16x8_t __b = (b); \
4226 (poly16x8_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 22); }) 4226 (poly16x8_t)__builtin_neon_vsriq_n_v((int8x16_t)__a, (int8x16_t)__b, __c, 37); })
4227 4227
4228 #define vst1q_u8(__a, b) __extension__ ({ \ 4228 #define vst1q_u8(__a, b) __extension__ ({ \
4229 uint8x16_t __b = (b); \ 4229 uint8x16_t __b = (b); \
4230 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 24); }) 4230 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 48); })
4231 #define vst1q_u16(__a, b) __extension__ ({ \ 4231 #define vst1q_u16(__a, b) __extension__ ({ \
4232 uint16x8_t __b = (b); \ 4232 uint16x8_t __b = (b); \
4233 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 25); }) 4233 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 49); })
4234 #define vst1q_u32(__a, b) __extension__ ({ \ 4234 #define vst1q_u32(__a, b) __extension__ ({ \
4235 uint32x4_t __b = (b); \ 4235 uint32x4_t __b = (b); \
4236 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 26); }) 4236 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 50); })
4237 #define vst1q_u64(__a, b) __extension__ ({ \ 4237 #define vst1q_u64(__a, b) __extension__ ({ \
4238 uint64x2_t __b = (b); \ 4238 uint64x2_t __b = (b); \
4239 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 27); }) 4239 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 51); })
4240 #define vst1q_s8(__a, b) __extension__ ({ \ 4240 #define vst1q_s8(__a, b) __extension__ ({ \
4241 int8x16_t __b = (b); \ 4241 int8x16_t __b = (b); \
4242 __builtin_neon_vst1q_v(__a, __b, 16); }) 4242 __builtin_neon_vst1q_v(__a, __b, 32); })
4243 #define vst1q_s16(__a, b) __extension__ ({ \ 4243 #define vst1q_s16(__a, b) __extension__ ({ \
4244 int16x8_t __b = (b); \ 4244 int16x8_t __b = (b); \
4245 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 17); }) 4245 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 33); })
4246 #define vst1q_s32(__a, b) __extension__ ({ \ 4246 #define vst1q_s32(__a, b) __extension__ ({ \
4247 int32x4_t __b = (b); \ 4247 int32x4_t __b = (b); \
4248 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 18); }) 4248 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 34); })
4249 #define vst1q_s64(__a, b) __extension__ ({ \ 4249 #define vst1q_s64(__a, b) __extension__ ({ \
4250 int64x2_t __b = (b); \ 4250 int64x2_t __b = (b); \
4251 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 19); }) 4251 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 35); })
4252 #define vst1q_f16(__a, b) __extension__ ({ \ 4252 #define vst1q_f16(__a, b) __extension__ ({ \
4253 float16x8_t __b = (b); \ 4253 float16x8_t __b = (b); \
4254 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 23); }) 4254 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 38); })
4255 #define vst1q_f32(__a, b) __extension__ ({ \ 4255 #define vst1q_f32(__a, b) __extension__ ({ \
4256 float32x4_t __b = (b); \ 4256 float32x4_t __b = (b); \
4257 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 20); }) 4257 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 39); })
4258 #define vst1q_p8(__a, b) __extension__ ({ \ 4258 #define vst1q_p8(__a, b) __extension__ ({ \
4259 poly8x16_t __b = (b); \ 4259 poly8x16_t __b = (b); \
4260 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 21); }) 4260 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 36); })
4261 #define vst1q_p16(__a, b) __extension__ ({ \ 4261 #define vst1q_p16(__a, b) __extension__ ({ \
4262 poly16x8_t __b = (b); \ 4262 poly16x8_t __b = (b); \
4263 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 22); }) 4263 __builtin_neon_vst1q_v(__a, (int8x16_t)__b, 37); })
4264 #define vst1_u8(__a, b) __extension__ ({ \ 4264 #define vst1_u8(__a, b) __extension__ ({ \
4265 uint8x8_t __b = (b); \ 4265 uint8x8_t __b = (b); \
4266 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 8); }) 4266 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 16); })
4267 #define vst1_u16(__a, b) __extension__ ({ \ 4267 #define vst1_u16(__a, b) __extension__ ({ \
4268 uint16x4_t __b = (b); \ 4268 uint16x4_t __b = (b); \
4269 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 9); }) 4269 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 17); })
4270 #define vst1_u32(__a, b) __extension__ ({ \ 4270 #define vst1_u32(__a, b) __extension__ ({ \
4271 uint32x2_t __b = (b); \ 4271 uint32x2_t __b = (b); \
4272 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 10); }) 4272 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 18); })
4273 #define vst1_u64(__a, b) __extension__ ({ \ 4273 #define vst1_u64(__a, b) __extension__ ({ \
4274 uint64x1_t __b = (b); \ 4274 uint64x1_t __b = (b); \
4275 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 11); }) 4275 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 19); })
4276 #define vst1_s8(__a, b) __extension__ ({ \ 4276 #define vst1_s8(__a, b) __extension__ ({ \
4277 int8x8_t __b = (b); \ 4277 int8x8_t __b = (b); \
4278 __builtin_neon_vst1_v(__a, __b, 0); }) 4278 __builtin_neon_vst1_v(__a, __b, 0); })
4279 #define vst1_s16(__a, b) __extension__ ({ \ 4279 #define vst1_s16(__a, b) __extension__ ({ \
4280 int16x4_t __b = (b); \ 4280 int16x4_t __b = (b); \
4281 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 1); }) 4281 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 1); })
4282 #define vst1_s32(__a, b) __extension__ ({ \ 4282 #define vst1_s32(__a, b) __extension__ ({ \
4283 int32x2_t __b = (b); \ 4283 int32x2_t __b = (b); \
4284 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 2); }) 4284 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 2); })
4285 #define vst1_s64(__a, b) __extension__ ({ \ 4285 #define vst1_s64(__a, b) __extension__ ({ \
4286 int64x1_t __b = (b); \ 4286 int64x1_t __b = (b); \
4287 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 3); }) 4287 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 3); })
4288 #define vst1_f16(__a, b) __extension__ ({ \ 4288 #define vst1_f16(__a, b) __extension__ ({ \
4289 float16x4_t __b = (b); \ 4289 float16x4_t __b = (b); \
4290 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 7); }) 4290 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 6); })
4291 #define vst1_f32(__a, b) __extension__ ({ \ 4291 #define vst1_f32(__a, b) __extension__ ({ \
4292 float32x2_t __b = (b); \ 4292 float32x2_t __b = (b); \
4293 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 4); }) 4293 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 7); })
4294 #define vst1_p8(__a, b) __extension__ ({ \ 4294 #define vst1_p8(__a, b) __extension__ ({ \
4295 poly8x8_t __b = (b); \ 4295 poly8x8_t __b = (b); \
4296 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 5); }) 4296 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 4); })
4297 #define vst1_p16(__a, b) __extension__ ({ \ 4297 #define vst1_p16(__a, b) __extension__ ({ \
4298 poly16x4_t __b = (b); \ 4298 poly16x4_t __b = (b); \
4299 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 6); }) 4299 __builtin_neon_vst1_v(__a, (int8x8_t)__b, 5); })
4300 4300
4301 #define vst1q_lane_u8(__a, b, __c) __extension__ ({ \ 4301 #define vst1q_lane_u8(__a, b, __c) __extension__ ({ \
4302 uint8x16_t __b = (b); \ 4302 uint8x16_t __b = (b); \
4303 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 24); }) 4303 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 48); })
4304 #define vst1q_lane_u16(__a, b, __c) __extension__ ({ \ 4304 #define vst1q_lane_u16(__a, b, __c) __extension__ ({ \
4305 uint16x8_t __b = (b); \ 4305 uint16x8_t __b = (b); \
4306 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 25); }) 4306 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 49); })
4307 #define vst1q_lane_u32(__a, b, __c) __extension__ ({ \ 4307 #define vst1q_lane_u32(__a, b, __c) __extension__ ({ \
4308 uint32x4_t __b = (b); \ 4308 uint32x4_t __b = (b); \
4309 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 26); }) 4309 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 50); })
4310 #define vst1q_lane_u64(__a, b, __c) __extension__ ({ \ 4310 #define vst1q_lane_u64(__a, b, __c) __extension__ ({ \
4311 uint64x2_t __b = (b); \ 4311 uint64x2_t __b = (b); \
4312 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 27); }) 4312 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 51); })
4313 #define vst1q_lane_s8(__a, b, __c) __extension__ ({ \ 4313 #define vst1q_lane_s8(__a, b, __c) __extension__ ({ \
4314 int8x16_t __b = (b); \ 4314 int8x16_t __b = (b); \
4315 __builtin_neon_vst1q_lane_v(__a, __b, __c, 16); }) 4315 __builtin_neon_vst1q_lane_v(__a, __b, __c, 32); })
4316 #define vst1q_lane_s16(__a, b, __c) __extension__ ({ \ 4316 #define vst1q_lane_s16(__a, b, __c) __extension__ ({ \
4317 int16x8_t __b = (b); \ 4317 int16x8_t __b = (b); \
4318 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 17); }) 4318 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 33); })
4319 #define vst1q_lane_s32(__a, b, __c) __extension__ ({ \ 4319 #define vst1q_lane_s32(__a, b, __c) __extension__ ({ \
4320 int32x4_t __b = (b); \ 4320 int32x4_t __b = (b); \
4321 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 18); }) 4321 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 34); })
4322 #define vst1q_lane_s64(__a, b, __c) __extension__ ({ \ 4322 #define vst1q_lane_s64(__a, b, __c) __extension__ ({ \
4323 int64x2_t __b = (b); \ 4323 int64x2_t __b = (b); \
4324 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 19); }) 4324 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 35); })
4325 #define vst1q_lane_f16(__a, b, __c) __extension__ ({ \ 4325 #define vst1q_lane_f16(__a, b, __c) __extension__ ({ \
4326 float16x8_t __b = (b); \ 4326 float16x8_t __b = (b); \
4327 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 23); }) 4327 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 38); })
4328 #define vst1q_lane_f32(__a, b, __c) __extension__ ({ \ 4328 #define vst1q_lane_f32(__a, b, __c) __extension__ ({ \
4329 float32x4_t __b = (b); \ 4329 float32x4_t __b = (b); \
4330 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 20); }) 4330 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 39); })
4331 #define vst1q_lane_p8(__a, b, __c) __extension__ ({ \ 4331 #define vst1q_lane_p8(__a, b, __c) __extension__ ({ \
4332 poly8x16_t __b = (b); \ 4332 poly8x16_t __b = (b); \
4333 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 21); }) 4333 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 36); })
4334 #define vst1q_lane_p16(__a, b, __c) __extension__ ({ \ 4334 #define vst1q_lane_p16(__a, b, __c) __extension__ ({ \
4335 poly16x8_t __b = (b); \ 4335 poly16x8_t __b = (b); \
4336 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 22); }) 4336 __builtin_neon_vst1q_lane_v(__a, (int8x16_t)__b, __c, 37); })
4337 #define vst1_lane_u8(__a, b, __c) __extension__ ({ \ 4337 #define vst1_lane_u8(__a, b, __c) __extension__ ({ \
4338 uint8x8_t __b = (b); \ 4338 uint8x8_t __b = (b); \
4339 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 8); }) 4339 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 16); })
4340 #define vst1_lane_u16(__a, b, __c) __extension__ ({ \ 4340 #define vst1_lane_u16(__a, b, __c) __extension__ ({ \
4341 uint16x4_t __b = (b); \ 4341 uint16x4_t __b = (b); \
4342 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 9); }) 4342 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 17); })
4343 #define vst1_lane_u32(__a, b, __c) __extension__ ({ \ 4343 #define vst1_lane_u32(__a, b, __c) __extension__ ({ \
4344 uint32x2_t __b = (b); \ 4344 uint32x2_t __b = (b); \
4345 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 10); }) 4345 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 18); })
4346 #define vst1_lane_u64(__a, b, __c) __extension__ ({ \ 4346 #define vst1_lane_u64(__a, b, __c) __extension__ ({ \
4347 uint64x1_t __b = (b); \ 4347 uint64x1_t __b = (b); \
4348 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 11); }) 4348 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 19); })
4349 #define vst1_lane_s8(__a, b, __c) __extension__ ({ \ 4349 #define vst1_lane_s8(__a, b, __c) __extension__ ({ \
4350 int8x8_t __b = (b); \ 4350 int8x8_t __b = (b); \
4351 __builtin_neon_vst1_lane_v(__a, __b, __c, 0); }) 4351 __builtin_neon_vst1_lane_v(__a, __b, __c, 0); })
4352 #define vst1_lane_s16(__a, b, __c) __extension__ ({ \ 4352 #define vst1_lane_s16(__a, b, __c) __extension__ ({ \
4353 int16x4_t __b = (b); \ 4353 int16x4_t __b = (b); \
4354 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 1); }) 4354 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 1); })
4355 #define vst1_lane_s32(__a, b, __c) __extension__ ({ \ 4355 #define vst1_lane_s32(__a, b, __c) __extension__ ({ \
4356 int32x2_t __b = (b); \ 4356 int32x2_t __b = (b); \
4357 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 2); }) 4357 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 2); })
4358 #define vst1_lane_s64(__a, b, __c) __extension__ ({ \ 4358 #define vst1_lane_s64(__a, b, __c) __extension__ ({ \
4359 int64x1_t __b = (b); \ 4359 int64x1_t __b = (b); \
4360 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 3); }) 4360 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 3); })
4361 #define vst1_lane_f16(__a, b, __c) __extension__ ({ \ 4361 #define vst1_lane_f16(__a, b, __c) __extension__ ({ \
4362 float16x4_t __b = (b); \ 4362 float16x4_t __b = (b); \
4363 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 7); }) 4363 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 6); })
4364 #define vst1_lane_f32(__a, b, __c) __extension__ ({ \ 4364 #define vst1_lane_f32(__a, b, __c) __extension__ ({ \
4365 float32x2_t __b = (b); \ 4365 float32x2_t __b = (b); \
4366 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 4); }) 4366 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 7); })
4367 #define vst1_lane_p8(__a, b, __c) __extension__ ({ \ 4367 #define vst1_lane_p8(__a, b, __c) __extension__ ({ \
4368 poly8x8_t __b = (b); \ 4368 poly8x8_t __b = (b); \
4369 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 5); }) 4369 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 4); })
4370 #define vst1_lane_p16(__a, b, __c) __extension__ ({ \ 4370 #define vst1_lane_p16(__a, b, __c) __extension__ ({ \
4371 poly16x4_t __b = (b); \ 4371 poly16x4_t __b = (b); \
4372 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 6); }) 4372 __builtin_neon_vst1_lane_v(__a, (int8x8_t)__b, __c, 5); })
4373 4373
4374 #define vst2q_u8(__a, b) __extension__ ({ \ 4374 #define vst2q_u8(__a, b) __extension__ ({ \
4375 uint8x16x2_t __b = (b); \ 4375 uint8x16x2_t __b = (b); \
4376 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 24); }) 4376 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 48); })
4377 #define vst2q_u16(__a, b) __extension__ ({ \ 4377 #define vst2q_u16(__a, b) __extension__ ({ \
4378 uint16x8x2_t __b = (b); \ 4378 uint16x8x2_t __b = (b); \
4379 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 25); }) 4379 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 49); })
4380 #define vst2q_u32(__a, b) __extension__ ({ \ 4380 #define vst2q_u32(__a, b) __extension__ ({ \
4381 uint32x4x2_t __b = (b); \ 4381 uint32x4x2_t __b = (b); \
4382 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 26); }) 4382 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 50); })
4383 #define vst2q_s8(__a, b) __extension__ ({ \ 4383 #define vst2q_s8(__a, b) __extension__ ({ \
4384 int8x16x2_t __b = (b); \ 4384 int8x16x2_t __b = (b); \
4385 __builtin_neon_vst2q_v(__a, __b.val[0], __b.val[1], 16); }) 4385 __builtin_neon_vst2q_v(__a, __b.val[0], __b.val[1], 32); })
4386 #define vst2q_s16(__a, b) __extension__ ({ \ 4386 #define vst2q_s16(__a, b) __extension__ ({ \
4387 int16x8x2_t __b = (b); \ 4387 int16x8x2_t __b = (b); \
4388 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 17); }) 4388 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 33); })
4389 #define vst2q_s32(__a, b) __extension__ ({ \ 4389 #define vst2q_s32(__a, b) __extension__ ({ \
4390 int32x4x2_t __b = (b); \ 4390 int32x4x2_t __b = (b); \
4391 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 18); }) 4391 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 34); })
4392 #define vst2q_f16(__a, b) __extension__ ({ \ 4392 #define vst2q_f16(__a, b) __extension__ ({ \
4393 float16x8x2_t __b = (b); \ 4393 float16x8x2_t __b = (b); \
4394 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 23); }) 4394 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 38); })
4395 #define vst2q_f32(__a, b) __extension__ ({ \ 4395 #define vst2q_f32(__a, b) __extension__ ({ \
4396 float32x4x2_t __b = (b); \ 4396 float32x4x2_t __b = (b); \
4397 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 20); }) 4397 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 39); })
4398 #define vst2q_p8(__a, b) __extension__ ({ \ 4398 #define vst2q_p8(__a, b) __extension__ ({ \
4399 poly8x16x2_t __b = (b); \ 4399 poly8x16x2_t __b = (b); \
4400 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 21); }) 4400 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 36); })
4401 #define vst2q_p16(__a, b) __extension__ ({ \ 4401 #define vst2q_p16(__a, b) __extension__ ({ \
4402 poly16x8x2_t __b = (b); \ 4402 poly16x8x2_t __b = (b); \
4403 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 22); }) 4403 __builtin_neon_vst2q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], 37); })
4404 #define vst2_u8(__a, b) __extension__ ({ \ 4404 #define vst2_u8(__a, b) __extension__ ({ \
4405 uint8x8x2_t __b = (b); \ 4405 uint8x8x2_t __b = (b); \
4406 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 8); }) 4406 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 16); })
4407 #define vst2_u16(__a, b) __extension__ ({ \ 4407 #define vst2_u16(__a, b) __extension__ ({ \
4408 uint16x4x2_t __b = (b); \ 4408 uint16x4x2_t __b = (b); \
4409 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 9); }) 4409 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 17); })
4410 #define vst2_u32(__a, b) __extension__ ({ \ 4410 #define vst2_u32(__a, b) __extension__ ({ \
4411 uint32x2x2_t __b = (b); \ 4411 uint32x2x2_t __b = (b); \
4412 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 10); }) 4412 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 18); })
4413 #define vst2_u64(__a, b) __extension__ ({ \ 4413 #define vst2_u64(__a, b) __extension__ ({ \
4414 uint64x1x2_t __b = (b); \ 4414 uint64x1x2_t __b = (b); \
4415 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 11); }) 4415 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 19); })
4416 #define vst2_s8(__a, b) __extension__ ({ \ 4416 #define vst2_s8(__a, b) __extension__ ({ \
4417 int8x8x2_t __b = (b); \ 4417 int8x8x2_t __b = (b); \
4418 __builtin_neon_vst2_v(__a, __b.val[0], __b.val[1], 0); }) 4418 __builtin_neon_vst2_v(__a, __b.val[0], __b.val[1], 0); })
4419 #define vst2_s16(__a, b) __extension__ ({ \ 4419 #define vst2_s16(__a, b) __extension__ ({ \
4420 int16x4x2_t __b = (b); \ 4420 int16x4x2_t __b = (b); \
4421 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 1); }) 4421 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 1); })
4422 #define vst2_s32(__a, b) __extension__ ({ \ 4422 #define vst2_s32(__a, b) __extension__ ({ \
4423 int32x2x2_t __b = (b); \ 4423 int32x2x2_t __b = (b); \
4424 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 2); }) 4424 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 2); })
4425 #define vst2_s64(__a, b) __extension__ ({ \ 4425 #define vst2_s64(__a, b) __extension__ ({ \
4426 int64x1x2_t __b = (b); \ 4426 int64x1x2_t __b = (b); \
4427 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 3); }) 4427 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 3); })
4428 #define vst2_f16(__a, b) __extension__ ({ \ 4428 #define vst2_f16(__a, b) __extension__ ({ \
4429 float16x4x2_t __b = (b); \ 4429 float16x4x2_t __b = (b); \
4430 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 7); }) 4430 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 6); })
4431 #define vst2_f32(__a, b) __extension__ ({ \ 4431 #define vst2_f32(__a, b) __extension__ ({ \
4432 float32x2x2_t __b = (b); \ 4432 float32x2x2_t __b = (b); \
4433 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 4); }) 4433 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 7); })
4434 #define vst2_p8(__a, b) __extension__ ({ \ 4434 #define vst2_p8(__a, b) __extension__ ({ \
4435 poly8x8x2_t __b = (b); \ 4435 poly8x8x2_t __b = (b); \
4436 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 5); }) 4436 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 4); })
4437 #define vst2_p16(__a, b) __extension__ ({ \ 4437 #define vst2_p16(__a, b) __extension__ ({ \
4438 poly16x4x2_t __b = (b); \ 4438 poly16x4x2_t __b = (b); \
4439 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 6); }) 4439 __builtin_neon_vst2_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], 5); })
4440 4440
4441 #define vst2q_lane_u16(__a, b, __c) __extension__ ({ \ 4441 #define vst2q_lane_u16(__a, b, __c) __extension__ ({ \
4442 uint16x8x2_t __b = (b); \ 4442 uint16x8x2_t __b = (b); \
4443 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 25); }) 4443 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 49); })
4444 #define vst2q_lane_u32(__a, b, __c) __extension__ ({ \ 4444 #define vst2q_lane_u32(__a, b, __c) __extension__ ({ \
4445 uint32x4x2_t __b = (b); \ 4445 uint32x4x2_t __b = (b); \
4446 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 26); }) 4446 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 50); })
4447 #define vst2q_lane_s16(__a, b, __c) __extension__ ({ \ 4447 #define vst2q_lane_s16(__a, b, __c) __extension__ ({ \
4448 int16x8x2_t __b = (b); \ 4448 int16x8x2_t __b = (b); \
4449 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 17); }) 4449 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 33); })
4450 #define vst2q_lane_s32(__a, b, __c) __extension__ ({ \ 4450 #define vst2q_lane_s32(__a, b, __c) __extension__ ({ \
4451 int32x4x2_t __b = (b); \ 4451 int32x4x2_t __b = (b); \
4452 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 18); }) 4452 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 34); })
4453 #define vst2q_lane_f16(__a, b, __c) __extension__ ({ \ 4453 #define vst2q_lane_f16(__a, b, __c) __extension__ ({ \
4454 float16x8x2_t __b = (b); \ 4454 float16x8x2_t __b = (b); \
4455 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 23); }) 4455 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 38); })
4456 #define vst2q_lane_f32(__a, b, __c) __extension__ ({ \ 4456 #define vst2q_lane_f32(__a, b, __c) __extension__ ({ \
4457 float32x4x2_t __b = (b); \ 4457 float32x4x2_t __b = (b); \
4458 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 20); }) 4458 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 39); })
4459 #define vst2q_lane_p16(__a, b, __c) __extension__ ({ \ 4459 #define vst2q_lane_p16(__a, b, __c) __extension__ ({ \
4460 poly16x8x2_t __b = (b); \ 4460 poly16x8x2_t __b = (b); \
4461 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 22); }) 4461 __builtin_neon_vst2q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], __c, 37); })
4462 #define vst2_lane_u8(__a, b, __c) __extension__ ({ \ 4462 #define vst2_lane_u8(__a, b, __c) __extension__ ({ \
4463 uint8x8x2_t __b = (b); \ 4463 uint8x8x2_t __b = (b); \
4464 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __ c, 8); }) 4464 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __ c, 16); })
4465 #define vst2_lane_u16(__a, b, __c) __extension__ ({ \ 4465 #define vst2_lane_u16(__a, b, __c) __extension__ ({ \
4466 uint16x4x2_t __b = (b); \ 4466 uint16x4x2_t __b = (b); \
4467 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __ c, 9); }) 4467 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __ c, 17); })
4468 #define vst2_lane_u32(__a, b, __c) __extension__ ({ \ 4468 #define vst2_lane_u32(__a, b, __c) __extension__ ({ \
4469 uint32x2x2_t __b = (b); \ 4469 uint32x2x2_t __b = (b); \
4470 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __ c, 10); }) 4470 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __ c, 18); })
4471 #define vst2_lane_s8(__a, b, __c) __extension__ ({ \ 4471 #define vst2_lane_s8(__a, b, __c) __extension__ ({ \
4472 int8x8x2_t __b = (b); \ 4472 int8x8x2_t __b = (b); \
4473 __builtin_neon_vst2_lane_v(__a, __b.val[0], __b.val[1], __c, 0); }) 4473 __builtin_neon_vst2_lane_v(__a, __b.val[0], __b.val[1], __c, 0); })
4474 #define vst2_lane_s16(__a, b, __c) __extension__ ({ \ 4474 #define vst2_lane_s16(__a, b, __c) __extension__ ({ \
4475 int16x4x2_t __b = (b); \ 4475 int16x4x2_t __b = (b); \
4476 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __ c, 1); }) 4476 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __ c, 1); })
4477 #define vst2_lane_s32(__a, b, __c) __extension__ ({ \ 4477 #define vst2_lane_s32(__a, b, __c) __extension__ ({ \
4478 int32x2x2_t __b = (b); \ 4478 int32x2x2_t __b = (b); \
4479 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __ c, 2); }) 4479 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __ c, 2); })
4480 #define vst2_lane_f16(__a, b, __c) __extension__ ({ \ 4480 #define vst2_lane_f16(__a, b, __c) __extension__ ({ \
4481 float16x4x2_t __b = (b); \ 4481 float16x4x2_t __b = (b); \
4482 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __ c, 7); }) 4482 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __ c, 6); })
4483 #define vst2_lane_f32(__a, b, __c) __extension__ ({ \ 4483 #define vst2_lane_f32(__a, b, __c) __extension__ ({ \
4484 float32x2x2_t __b = (b); \ 4484 float32x2x2_t __b = (b); \
4485 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __ c, 4); }) 4485 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __ c, 7); })
4486 #define vst2_lane_p8(__a, b, __c) __extension__ ({ \ 4486 #define vst2_lane_p8(__a, b, __c) __extension__ ({ \
4487 poly8x8x2_t __b = (b); \ 4487 poly8x8x2_t __b = (b); \
4488 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __ c, 5); }) 4488 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __ c, 4); })
4489 #define vst2_lane_p16(__a, b, __c) __extension__ ({ \ 4489 #define vst2_lane_p16(__a, b, __c) __extension__ ({ \
4490 poly16x4x2_t __b = (b); \ 4490 poly16x4x2_t __b = (b); \
4491 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __ c, 6); }) 4491 __builtin_neon_vst2_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], __ c, 5); })
4492 4492
4493 #define vst3q_u8(__a, b) __extension__ ({ \ 4493 #define vst3q_u8(__a, b) __extension__ ({ \
4494 uint8x16x3_t __b = (b); \ 4494 uint8x16x3_t __b = (b); \
4495 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], 24); }) 4495 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], 48); })
4496 #define vst3q_u16(__a, b) __extension__ ({ \ 4496 #define vst3q_u16(__a, b) __extension__ ({ \
4497 uint16x8x3_t __b = (b); \ 4497 uint16x8x3_t __b = (b); \
4498 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], 25); }) 4498 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], 49); })
4499 #define vst3q_u32(__a, b) __extension__ ({ \ 4499 #define vst3q_u32(__a, b) __extension__ ({ \
4500 uint32x4x3_t __b = (b); \ 4500 uint32x4x3_t __b = (b); \
4501 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], 26); }) 4501 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], 50); })
4502 #define vst3q_s8(__a, b) __extension__ ({ \ 4502 #define vst3q_s8(__a, b) __extension__ ({ \
4503 int8x16x3_t __b = (b); \ 4503 int8x16x3_t __b = (b); \
4504 __builtin_neon_vst3q_v(__a, __b.val[0], __b.val[1], __b.val[2], 16); }) 4504 __builtin_neon_vst3q_v(__a, __b.val[0], __b.val[1], __b.val[2], 32); })
4505 #define vst3q_s16(__a, b) __extension__ ({ \ 4505 #define vst3q_s16(__a, b) __extension__ ({ \
4506 int16x8x3_t __b = (b); \ 4506 int16x8x3_t __b = (b); \
4507 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], 17); }) 4507 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], 33); })
4508 #define vst3q_s32(__a, b) __extension__ ({ \ 4508 #define vst3q_s32(__a, b) __extension__ ({ \
4509 int32x4x3_t __b = (b); \ 4509 int32x4x3_t __b = (b); \
4510 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], 18); }) 4510 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], 34); })
4511 #define vst3q_f16(__a, b) __extension__ ({ \ 4511 #define vst3q_f16(__a, b) __extension__ ({ \
4512 float16x8x3_t __b = (b); \ 4512 float16x8x3_t __b = (b); \
4513 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], 23); }) 4513 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], 38); })
4514 #define vst3q_f32(__a, b) __extension__ ({ \ 4514 #define vst3q_f32(__a, b) __extension__ ({ \
4515 float32x4x3_t __b = (b); \ 4515 float32x4x3_t __b = (b); \
4516 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], 20); }) 4516 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], 39); })
4517 #define vst3q_p8(__a, b) __extension__ ({ \ 4517 #define vst3q_p8(__a, b) __extension__ ({ \
4518 poly8x16x3_t __b = (b); \ 4518 poly8x16x3_t __b = (b); \
4519 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], 21); }) 4519 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], 36); })
4520 #define vst3q_p16(__a, b) __extension__ ({ \ 4520 #define vst3q_p16(__a, b) __extension__ ({ \
4521 poly16x8x3_t __b = (b); \ 4521 poly16x8x3_t __b = (b); \
4522 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], 22); }) 4522 __builtin_neon_vst3q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], 37); })
4523 #define vst3_u8(__a, b) __extension__ ({ \ 4523 #define vst3_u8(__a, b) __extension__ ({ \
4524 uint8x8x3_t __b = (b); \ 4524 uint8x8x3_t __b = (b); \
4525 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 8); }) 4525 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 16); })
4526 #define vst3_u16(__a, b) __extension__ ({ \ 4526 #define vst3_u16(__a, b) __extension__ ({ \
4527 uint16x4x3_t __b = (b); \ 4527 uint16x4x3_t __b = (b); \
4528 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 9); }) 4528 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 17); })
4529 #define vst3_u32(__a, b) __extension__ ({ \ 4529 #define vst3_u32(__a, b) __extension__ ({ \
4530 uint32x2x3_t __b = (b); \ 4530 uint32x2x3_t __b = (b); \
4531 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 10); }) 4531 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 18); })
4532 #define vst3_u64(__a, b) __extension__ ({ \ 4532 #define vst3_u64(__a, b) __extension__ ({ \
4533 uint64x1x3_t __b = (b); \ 4533 uint64x1x3_t __b = (b); \
4534 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 11); }) 4534 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 19); })
4535 #define vst3_s8(__a, b) __extension__ ({ \ 4535 #define vst3_s8(__a, b) __extension__ ({ \
4536 int8x8x3_t __b = (b); \ 4536 int8x8x3_t __b = (b); \
4537 __builtin_neon_vst3_v(__a, __b.val[0], __b.val[1], __b.val[2], 0); }) 4537 __builtin_neon_vst3_v(__a, __b.val[0], __b.val[1], __b.val[2], 0); })
4538 #define vst3_s16(__a, b) __extension__ ({ \ 4538 #define vst3_s16(__a, b) __extension__ ({ \
4539 int16x4x3_t __b = (b); \ 4539 int16x4x3_t __b = (b); \
4540 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 1); }) 4540 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 1); })
4541 #define vst3_s32(__a, b) __extension__ ({ \ 4541 #define vst3_s32(__a, b) __extension__ ({ \
4542 int32x2x3_t __b = (b); \ 4542 int32x2x3_t __b = (b); \
4543 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 2); }) 4543 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 2); })
4544 #define vst3_s64(__a, b) __extension__ ({ \ 4544 #define vst3_s64(__a, b) __extension__ ({ \
4545 int64x1x3_t __b = (b); \ 4545 int64x1x3_t __b = (b); \
4546 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 3); }) 4546 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 3); })
4547 #define vst3_f16(__a, b) __extension__ ({ \ 4547 #define vst3_f16(__a, b) __extension__ ({ \
4548 float16x4x3_t __b = (b); \ 4548 float16x4x3_t __b = (b); \
4549 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 7); }) 4549 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 6); })
4550 #define vst3_f32(__a, b) __extension__ ({ \ 4550 #define vst3_f32(__a, b) __extension__ ({ \
4551 float32x2x3_t __b = (b); \ 4551 float32x2x3_t __b = (b); \
4552 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 4); }) 4552 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 7); })
4553 #define vst3_p8(__a, b) __extension__ ({ \ 4553 #define vst3_p8(__a, b) __extension__ ({ \
4554 poly8x8x3_t __b = (b); \ 4554 poly8x8x3_t __b = (b); \
4555 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 5); }) 4555 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 4); })
4556 #define vst3_p16(__a, b) __extension__ ({ \ 4556 #define vst3_p16(__a, b) __extension__ ({ \
4557 poly16x4x3_t __b = (b); \ 4557 poly16x4x3_t __b = (b); \
4558 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 6); }) 4558 __builtin_neon_vst3_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], 5); })
4559 4559
4560 #define vst3q_lane_u16(__a, b, __c) __extension__ ({ \ 4560 #define vst3q_lane_u16(__a, b, __c) __extension__ ({ \
4561 uint16x8x3_t __b = (b); \ 4561 uint16x8x3_t __b = (b); \
4562 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 25); }) 4562 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 49); })
4563 #define vst3q_lane_u32(__a, b, __c) __extension__ ({ \ 4563 #define vst3q_lane_u32(__a, b, __c) __extension__ ({ \
4564 uint32x4x3_t __b = (b); \ 4564 uint32x4x3_t __b = (b); \
4565 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 26); }) 4565 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 50); })
4566 #define vst3q_lane_s16(__a, b, __c) __extension__ ({ \ 4566 #define vst3q_lane_s16(__a, b, __c) __extension__ ({ \
4567 int16x8x3_t __b = (b); \ 4567 int16x8x3_t __b = (b); \
4568 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 17); }) 4568 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 33); })
4569 #define vst3q_lane_s32(__a, b, __c) __extension__ ({ \ 4569 #define vst3q_lane_s32(__a, b, __c) __extension__ ({ \
4570 int32x4x3_t __b = (b); \ 4570 int32x4x3_t __b = (b); \
4571 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 18); }) 4571 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 34); })
4572 #define vst3q_lane_f16(__a, b, __c) __extension__ ({ \ 4572 #define vst3q_lane_f16(__a, b, __c) __extension__ ({ \
4573 float16x8x3_t __b = (b); \ 4573 float16x8x3_t __b = (b); \
4574 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 23); }) 4574 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 38); })
4575 #define vst3q_lane_f32(__a, b, __c) __extension__ ({ \ 4575 #define vst3q_lane_f32(__a, b, __c) __extension__ ({ \
4576 float32x4x3_t __b = (b); \ 4576 float32x4x3_t __b = (b); \
4577 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 20); }) 4577 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 39); })
4578 #define vst3q_lane_p16(__a, b, __c) __extension__ ({ \ 4578 #define vst3q_lane_p16(__a, b, __c) __extension__ ({ \
4579 poly16x8x3_t __b = (b); \ 4579 poly16x8x3_t __b = (b); \
4580 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 22); }) 4580 __builtin_neon_vst3q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], __c, 37); })
4581 #define vst3_lane_u8(__a, b, __c) __extension__ ({ \ 4581 #define vst3_lane_u8(__a, b, __c) __extension__ ({ \
4582 uint8x8x3_t __b = (b); \ 4582 uint8x8x3_t __b = (b); \
4583 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], __c, 8); }) 4583 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], __c, 16); })
4584 #define vst3_lane_u16(__a, b, __c) __extension__ ({ \ 4584 #define vst3_lane_u16(__a, b, __c) __extension__ ({ \
4585 uint16x4x3_t __b = (b); \ 4585 uint16x4x3_t __b = (b); \
4586 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], __c, 9); }) 4586 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], __c, 17); })
4587 #define vst3_lane_u32(__a, b, __c) __extension__ ({ \ 4587 #define vst3_lane_u32(__a, b, __c) __extension__ ({ \
4588 uint32x2x3_t __b = (b); \ 4588 uint32x2x3_t __b = (b); \
4589 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], __c, 10); }) 4589 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], __c, 18); })
4590 #define vst3_lane_s8(__a, b, __c) __extension__ ({ \ 4590 #define vst3_lane_s8(__a, b, __c) __extension__ ({ \
4591 int8x8x3_t __b = (b); \ 4591 int8x8x3_t __b = (b); \
4592 __builtin_neon_vst3_lane_v(__a, __b.val[0], __b.val[1], __b.val[2], __c, 0); } ) 4592 __builtin_neon_vst3_lane_v(__a, __b.val[0], __b.val[1], __b.val[2], __c, 0); } )
4593 #define vst3_lane_s16(__a, b, __c) __extension__ ({ \ 4593 #define vst3_lane_s16(__a, b, __c) __extension__ ({ \
4594 int16x4x3_t __b = (b); \ 4594 int16x4x3_t __b = (b); \
4595 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], __c, 1); }) 4595 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], __c, 1); })
4596 #define vst3_lane_s32(__a, b, __c) __extension__ ({ \ 4596 #define vst3_lane_s32(__a, b, __c) __extension__ ({ \
4597 int32x2x3_t __b = (b); \ 4597 int32x2x3_t __b = (b); \
4598 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], __c, 2); }) 4598 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], __c, 2); })
4599 #define vst3_lane_f16(__a, b, __c) __extension__ ({ \ 4599 #define vst3_lane_f16(__a, b, __c) __extension__ ({ \
4600 float16x4x3_t __b = (b); \ 4600 float16x4x3_t __b = (b); \
4601 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], __c, 7); }) 4601 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], __c, 6); })
4602 #define vst3_lane_f32(__a, b, __c) __extension__ ({ \ 4602 #define vst3_lane_f32(__a, b, __c) __extension__ ({ \
4603 float32x2x3_t __b = (b); \ 4603 float32x2x3_t __b = (b); \
4604 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], __c, 4); }) 4604 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], __c, 7); })
4605 #define vst3_lane_p8(__a, b, __c) __extension__ ({ \ 4605 #define vst3_lane_p8(__a, b, __c) __extension__ ({ \
4606 poly8x8x3_t __b = (b); \ 4606 poly8x8x3_t __b = (b); \
4607 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], __c, 5); }) 4607 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], __c, 4); })
4608 #define vst3_lane_p16(__a, b, __c) __extension__ ({ \ 4608 #define vst3_lane_p16(__a, b, __c) __extension__ ({ \
4609 poly16x4x3_t __b = (b); \ 4609 poly16x4x3_t __b = (b); \
4610 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], __c, 6); }) 4610 __builtin_neon_vst3_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], __c, 5); })
4611 4611
4612 #define vst4q_u8(__a, b) __extension__ ({ \ 4612 #define vst4q_u8(__a, b) __extension__ ({ \
4613 uint8x16x4_t __b = (b); \ 4613 uint8x16x4_t __b = (b); \
4614 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], (int8x16_t)__b.val[3], 24); }) 4614 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], (int8x16_t)__b.val[3], 48); })
4615 #define vst4q_u16(__a, b) __extension__ ({ \ 4615 #define vst4q_u16(__a, b) __extension__ ({ \
4616 uint16x8x4_t __b = (b); \ 4616 uint16x8x4_t __b = (b); \
4617 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], (int8x16_t)__b.val[3], 25); }) 4617 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], (int8x16_t)__b.val[3], 49); })
4618 #define vst4q_u32(__a, b) __extension__ ({ \ 4618 #define vst4q_u32(__a, b) __extension__ ({ \
4619 uint32x4x4_t __b = (b); \ 4619 uint32x4x4_t __b = (b); \
4620 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], (int8x16_t)__b.val[3], 26); }) 4620 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], (int8x16_t)__b.val[3], 50); })
4621 #define vst4q_s8(__a, b) __extension__ ({ \ 4621 #define vst4q_s8(__a, b) __extension__ ({ \
4622 int8x16x4_t __b = (b); \ 4622 int8x16x4_t __b = (b); \
4623 __builtin_neon_vst4q_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], 16 ); }) 4623 __builtin_neon_vst4q_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], 32 ); })
4624 #define vst4q_s16(__a, b) __extension__ ({ \ 4624 #define vst4q_s16(__a, b) __extension__ ({ \
4625 int16x8x4_t __b = (b); \ 4625 int16x8x4_t __b = (b); \
4626 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], (int8x16_t)__b.val[3], 17); }) 4626 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], (int8x16_t)__b.val[3], 33); })
4627 #define vst4q_s32(__a, b) __extension__ ({ \ 4627 #define vst4q_s32(__a, b) __extension__ ({ \
4628 int32x4x4_t __b = (b); \ 4628 int32x4x4_t __b = (b); \
4629 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], (int8x16_t)__b.val[3], 18); }) 4629 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], (int8x16_t)__b.val[3], 34); })
4630 #define vst4q_f16(__a, b) __extension__ ({ \ 4630 #define vst4q_f16(__a, b) __extension__ ({ \
4631 float16x8x4_t __b = (b); \ 4631 float16x8x4_t __b = (b); \
4632 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], (int8x16_t)__b.val[3], 23); }) 4632 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], (int8x16_t)__b.val[3], 38); })
4633 #define vst4q_f32(__a, b) __extension__ ({ \ 4633 #define vst4q_f32(__a, b) __extension__ ({ \
4634 float32x4x4_t __b = (b); \ 4634 float32x4x4_t __b = (b); \
4635 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], (int8x16_t)__b.val[3], 20); }) 4635 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], (int8x16_t)__b.val[3], 39); })
4636 #define vst4q_p8(__a, b) __extension__ ({ \ 4636 #define vst4q_p8(__a, b) __extension__ ({ \
4637 poly8x16x4_t __b = (b); \ 4637 poly8x16x4_t __b = (b); \
4638 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], (int8x16_t)__b.val[3], 21); }) 4638 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], (int8x16_t)__b.val[3], 36); })
4639 #define vst4q_p16(__a, b) __extension__ ({ \ 4639 #define vst4q_p16(__a, b) __extension__ ({ \
4640 poly16x8x4_t __b = (b); \ 4640 poly16x8x4_t __b = (b); \
4641 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], (int8x16_t)__b.val[3], 22); }) 4641 __builtin_neon_vst4q_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int 8x16_t)__b.val[2], (int8x16_t)__b.val[3], 37); })
4642 #define vst4_u8(__a, b) __extension__ ({ \ 4642 #define vst4_u8(__a, b) __extension__ ({ \
4643 uint8x8x4_t __b = (b); \ 4643 uint8x8x4_t __b = (b); \
4644 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 8); }) 4644 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 16); })
4645 #define vst4_u16(__a, b) __extension__ ({ \ 4645 #define vst4_u16(__a, b) __extension__ ({ \
4646 uint16x4x4_t __b = (b); \ 4646 uint16x4x4_t __b = (b); \
4647 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 9); }) 4647 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 17); })
4648 #define vst4_u32(__a, b) __extension__ ({ \ 4648 #define vst4_u32(__a, b) __extension__ ({ \
4649 uint32x2x4_t __b = (b); \ 4649 uint32x2x4_t __b = (b); \
4650 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 10); }) 4650 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 18); })
4651 #define vst4_u64(__a, b) __extension__ ({ \ 4651 #define vst4_u64(__a, b) __extension__ ({ \
4652 uint64x1x4_t __b = (b); \ 4652 uint64x1x4_t __b = (b); \
4653 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 11); }) 4653 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 19); })
4654 #define vst4_s8(__a, b) __extension__ ({ \ 4654 #define vst4_s8(__a, b) __extension__ ({ \
4655 int8x8x4_t __b = (b); \ 4655 int8x8x4_t __b = (b); \
4656 __builtin_neon_vst4_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], 0); }) 4656 __builtin_neon_vst4_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3], 0); })
4657 #define vst4_s16(__a, b) __extension__ ({ \ 4657 #define vst4_s16(__a, b) __extension__ ({ \
4658 int16x4x4_t __b = (b); \ 4658 int16x4x4_t __b = (b); \
4659 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 1); }) 4659 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 1); })
4660 #define vst4_s32(__a, b) __extension__ ({ \ 4660 #define vst4_s32(__a, b) __extension__ ({ \
4661 int32x2x4_t __b = (b); \ 4661 int32x2x4_t __b = (b); \
4662 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 2); }) 4662 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 2); })
4663 #define vst4_s64(__a, b) __extension__ ({ \ 4663 #define vst4_s64(__a, b) __extension__ ({ \
4664 int64x1x4_t __b = (b); \ 4664 int64x1x4_t __b = (b); \
4665 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 3); }) 4665 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 3); })
4666 #define vst4_f16(__a, b) __extension__ ({ \ 4666 #define vst4_f16(__a, b) __extension__ ({ \
4667 float16x4x4_t __b = (b); \ 4667 float16x4x4_t __b = (b); \
4668 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 7); }) 4668 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 6); })
4669 #define vst4_f32(__a, b) __extension__ ({ \ 4669 #define vst4_f32(__a, b) __extension__ ({ \
4670 float32x2x4_t __b = (b); \ 4670 float32x2x4_t __b = (b); \
4671 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 4); }) 4671 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 7); })
4672 #define vst4_p8(__a, b) __extension__ ({ \ 4672 #define vst4_p8(__a, b) __extension__ ({ \
4673 poly8x8x4_t __b = (b); \ 4673 poly8x8x4_t __b = (b); \
4674 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 5); }) 4674 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 4); })
4675 #define vst4_p16(__a, b) __extension__ ({ \ 4675 #define vst4_p16(__a, b) __extension__ ({ \
4676 poly16x4x4_t __b = (b); \ 4676 poly16x4x4_t __b = (b); \
4677 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 6); }) 4677 __builtin_neon_vst4_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8 _t)__b.val[2], (int8x8_t)__b.val[3], 5); })
4678 4678
4679 #define vst4q_lane_u16(__a, b, __c) __extension__ ({ \ 4679 #define vst4q_lane_u16(__a, b, __c) __extension__ ({ \
4680 uint16x8x4_t __b = (b); \ 4680 uint16x8x4_t __b = (b); \
4681 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 25); }) 4681 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 49); })
4682 #define vst4q_lane_u32(__a, b, __c) __extension__ ({ \ 4682 #define vst4q_lane_u32(__a, b, __c) __extension__ ({ \
4683 uint32x4x4_t __b = (b); \ 4683 uint32x4x4_t __b = (b); \
4684 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 26); }) 4684 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 50); })
4685 #define vst4q_lane_s16(__a, b, __c) __extension__ ({ \ 4685 #define vst4q_lane_s16(__a, b, __c) __extension__ ({ \
4686 int16x8x4_t __b = (b); \ 4686 int16x8x4_t __b = (b); \
4687 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 17); }) 4687 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 33); })
4688 #define vst4q_lane_s32(__a, b, __c) __extension__ ({ \ 4688 #define vst4q_lane_s32(__a, b, __c) __extension__ ({ \
4689 int32x4x4_t __b = (b); \ 4689 int32x4x4_t __b = (b); \
4690 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 18); }) 4690 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 34); })
4691 #define vst4q_lane_f16(__a, b, __c) __extension__ ({ \ 4691 #define vst4q_lane_f16(__a, b, __c) __extension__ ({ \
4692 float16x8x4_t __b = (b); \ 4692 float16x8x4_t __b = (b); \
4693 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 23); }) 4693 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 38); })
4694 #define vst4q_lane_f32(__a, b, __c) __extension__ ({ \ 4694 #define vst4q_lane_f32(__a, b, __c) __extension__ ({ \
4695 float32x4x4_t __b = (b); \ 4695 float32x4x4_t __b = (b); \
4696 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 20); }) 4696 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 39); })
4697 #define vst4q_lane_p16(__a, b, __c) __extension__ ({ \ 4697 #define vst4q_lane_p16(__a, b, __c) __extension__ ({ \
4698 poly16x8x4_t __b = (b); \ 4698 poly16x8x4_t __b = (b); \
4699 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 22); }) 4699 __builtin_neon_vst4q_lane_v(__a, (int8x16_t)__b.val[0], (int8x16_t)__b.val[1], (int8x16_t)__b.val[2], (int8x16_t)__b.val[3], __c, 37); })
4700 #define vst4_lane_u8(__a, b, __c) __extension__ ({ \ 4700 #define vst4_lane_u8(__a, b, __c) __extension__ ({ \
4701 uint8x8x4_t __b = (b); \ 4701 uint8x8x4_t __b = (b); \
4702 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 8); }) 4702 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 16); })
4703 #define vst4_lane_u16(__a, b, __c) __extension__ ({ \ 4703 #define vst4_lane_u16(__a, b, __c) __extension__ ({ \
4704 uint16x4x4_t __b = (b); \ 4704 uint16x4x4_t __b = (b); \
4705 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 9); }) 4705 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 17); })
4706 #define vst4_lane_u32(__a, b, __c) __extension__ ({ \ 4706 #define vst4_lane_u32(__a, b, __c) __extension__ ({ \
4707 uint32x2x4_t __b = (b); \ 4707 uint32x2x4_t __b = (b); \
4708 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 10); }) 4708 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 18); })
4709 #define vst4_lane_s8(__a, b, __c) __extension__ ({ \ 4709 #define vst4_lane_s8(__a, b, __c) __extension__ ({ \
4710 int8x8x4_t __b = (b); \ 4710 int8x8x4_t __b = (b); \
4711 __builtin_neon_vst4_lane_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3] , __c, 0); }) 4711 __builtin_neon_vst4_lane_v(__a, __b.val[0], __b.val[1], __b.val[2], __b.val[3] , __c, 0); })
4712 #define vst4_lane_s16(__a, b, __c) __extension__ ({ \ 4712 #define vst4_lane_s16(__a, b, __c) __extension__ ({ \
4713 int16x4x4_t __b = (b); \ 4713 int16x4x4_t __b = (b); \
4714 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 1); }) 4714 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 1); })
4715 #define vst4_lane_s32(__a, b, __c) __extension__ ({ \ 4715 #define vst4_lane_s32(__a, b, __c) __extension__ ({ \
4716 int32x2x4_t __b = (b); \ 4716 int32x2x4_t __b = (b); \
4717 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 2); }) 4717 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 2); })
4718 #define vst4_lane_f16(__a, b, __c) __extension__ ({ \ 4718 #define vst4_lane_f16(__a, b, __c) __extension__ ({ \
4719 float16x4x4_t __b = (b); \ 4719 float16x4x4_t __b = (b); \
4720 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 7); }) 4720 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 6); })
4721 #define vst4_lane_f32(__a, b, __c) __extension__ ({ \ 4721 #define vst4_lane_f32(__a, b, __c) __extension__ ({ \
4722 float32x2x4_t __b = (b); \ 4722 float32x2x4_t __b = (b); \
4723 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 4); }) 4723 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 7); })
4724 #define vst4_lane_p8(__a, b, __c) __extension__ ({ \ 4724 #define vst4_lane_p8(__a, b, __c) __extension__ ({ \
4725 poly8x8x4_t __b = (b); \ 4725 poly8x8x4_t __b = (b); \
4726 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 5); }) 4726 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 4); })
4727 #define vst4_lane_p16(__a, b, __c) __extension__ ({ \ 4727 #define vst4_lane_p16(__a, b, __c) __extension__ ({ \
4728 poly16x4x4_t __b = (b); \ 4728 poly16x4x4_t __b = (b); \
4729 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 6); }) 4729 __builtin_neon_vst4_lane_v(__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (i nt8x8_t)__b.val[2], (int8x8_t)__b.val[3], __c, 5); })
4730 4730
4731 __ai int8x8_t vsub_s8(int8x8_t __a, int8x8_t __b) { \ 4731 __ai int8x8_t vsub_s8(int8x8_t __a, int8x8_t __b) { \
4732 return __a - __b; } 4732 return __a - __b; }
4733 __ai int16x4_t vsub_s16(int16x4_t __a, int16x4_t __b) { \ 4733 __ai int16x4_t vsub_s16(int16x4_t __a, int16x4_t __b) { \
4734 return __a - __b; } 4734 return __a - __b; }
4735 __ai int32x2_t vsub_s32(int32x2_t __a, int32x2_t __b) { \ 4735 __ai int32x2_t vsub_s32(int32x2_t __a, int32x2_t __b) { \
4736 return __a - __b; } 4736 return __a - __b; }
4737 __ai int64x1_t vsub_s64(int64x1_t __a, int64x1_t __b) { \ 4737 __ai int64x1_t vsub_s64(int64x1_t __a, int64x1_t __b) { \
4738 return __a - __b; } 4738 return __a - __b; }
4739 __ai float32x2_t vsub_f32(float32x2_t __a, float32x2_t __b) { \ 4739 __ai float32x2_t vsub_f32(float32x2_t __a, float32x2_t __b) { \
(...skipping 25 matching lines...) Expand all
4765 __ai uint64x2_t vsubq_u64(uint64x2_t __a, uint64x2_t __b) { \ 4765 __ai uint64x2_t vsubq_u64(uint64x2_t __a, uint64x2_t __b) { \
4766 return __a - __b; } 4766 return __a - __b; }
4767 4767
4768 __ai int8x8_t vsubhn_s16(int16x8_t __a, int16x8_t __b) { \ 4768 __ai int8x8_t vsubhn_s16(int16x8_t __a, int16x8_t __b) { \
4769 return (int8x8_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 0); } 4769 return (int8x8_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 0); }
4770 __ai int16x4_t vsubhn_s32(int32x4_t __a, int32x4_t __b) { \ 4770 __ai int16x4_t vsubhn_s32(int32x4_t __a, int32x4_t __b) { \
4771 return (int16x4_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 1); } 4771 return (int16x4_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 1); }
4772 __ai int32x2_t vsubhn_s64(int64x2_t __a, int64x2_t __b) { \ 4772 __ai int32x2_t vsubhn_s64(int64x2_t __a, int64x2_t __b) { \
4773 return (int32x2_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 2); } 4773 return (int32x2_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 2); }
4774 __ai uint8x8_t vsubhn_u16(uint16x8_t __a, uint16x8_t __b) { \ 4774 __ai uint8x8_t vsubhn_u16(uint16x8_t __a, uint16x8_t __b) { \
4775 return (uint8x8_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 8); } 4775 return (uint8x8_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 16); }
4776 __ai uint16x4_t vsubhn_u32(uint32x4_t __a, uint32x4_t __b) { \ 4776 __ai uint16x4_t vsubhn_u32(uint32x4_t __a, uint32x4_t __b) { \
4777 return (uint16x4_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 9); } 4777 return (uint16x4_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 17) ; }
4778 __ai uint32x2_t vsubhn_u64(uint64x2_t __a, uint64x2_t __b) { \ 4778 __ai uint32x2_t vsubhn_u64(uint64x2_t __a, uint64x2_t __b) { \
4779 return (uint32x2_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 10) ; } 4779 return (uint32x2_t)__builtin_neon_vsubhn_v((int8x16_t)__a, (int8x16_t)__b, 18) ; }
4780 4780
4781 __ai int16x8_t vsubl_s8(int8x8_t __a, int8x8_t __b) { \ 4781 __ai int16x8_t vsubl_s8(int8x8_t __a, int8x8_t __b) { \
4782 return vmovl_s8(__a) - vmovl_s8(__b); } 4782 return vmovl_s8(__a) - vmovl_s8(__b); }
4783 __ai int32x4_t vsubl_s16(int16x4_t __a, int16x4_t __b) { \ 4783 __ai int32x4_t vsubl_s16(int16x4_t __a, int16x4_t __b) { \
4784 return vmovl_s16(__a) - vmovl_s16(__b); } 4784 return vmovl_s16(__a) - vmovl_s16(__b); }
4785 __ai int64x2_t vsubl_s32(int32x2_t __a, int32x2_t __b) { \ 4785 __ai int64x2_t vsubl_s32(int32x2_t __a, int32x2_t __b) { \
4786 return vmovl_s32(__a) - vmovl_s32(__b); } 4786 return vmovl_s32(__a) - vmovl_s32(__b); }
4787 __ai uint16x8_t vsubl_u8(uint8x8_t __a, uint8x8_t __b) { \ 4787 __ai uint16x8_t vsubl_u8(uint8x8_t __a, uint8x8_t __b) { \
4788 return vmovl_u8(__a) - vmovl_u8(__b); } 4788 return vmovl_u8(__a) - vmovl_u8(__b); }
4789 __ai uint32x4_t vsubl_u16(uint16x4_t __a, uint16x4_t __b) { \ 4789 __ai uint32x4_t vsubl_u16(uint16x4_t __a, uint16x4_t __b) { \
4790 return vmovl_u16(__a) - vmovl_u16(__b); } 4790 return vmovl_u16(__a) - vmovl_u16(__b); }
4791 __ai uint64x2_t vsubl_u32(uint32x2_t __a, uint32x2_t __b) { \ 4791 __ai uint64x2_t vsubl_u32(uint32x2_t __a, uint32x2_t __b) { \
4792 return vmovl_u32(__a) - vmovl_u32(__b); } 4792 return vmovl_u32(__a) - vmovl_u32(__b); }
4793 4793
4794 __ai int16x8_t vsubw_s8(int16x8_t __a, int8x8_t __b) { \ 4794 __ai int16x8_t vsubw_s8(int16x8_t __a, int8x8_t __b) { \
4795 return __a - vmovl_s8(__b); } 4795 return __a - vmovl_s8(__b); }
4796 __ai int32x4_t vsubw_s16(int32x4_t __a, int16x4_t __b) { \ 4796 __ai int32x4_t vsubw_s16(int32x4_t __a, int16x4_t __b) { \
4797 return __a - vmovl_s16(__b); } 4797 return __a - vmovl_s16(__b); }
4798 __ai int64x2_t vsubw_s32(int64x2_t __a, int32x2_t __b) { \ 4798 __ai int64x2_t vsubw_s32(int64x2_t __a, int32x2_t __b) { \
4799 return __a - vmovl_s32(__b); } 4799 return __a - vmovl_s32(__b); }
4800 __ai uint16x8_t vsubw_u8(uint16x8_t __a, uint8x8_t __b) { \ 4800 __ai uint16x8_t vsubw_u8(uint16x8_t __a, uint8x8_t __b) { \
4801 return __a - vmovl_u8(__b); } 4801 return __a - vmovl_u8(__b); }
4802 __ai uint32x4_t vsubw_u16(uint32x4_t __a, uint16x4_t __b) { \ 4802 __ai uint32x4_t vsubw_u16(uint32x4_t __a, uint16x4_t __b) { \
4803 return __a - vmovl_u16(__b); } 4803 return __a - vmovl_u16(__b); }
4804 __ai uint64x2_t vsubw_u32(uint64x2_t __a, uint32x2_t __b) { \ 4804 __ai uint64x2_t vsubw_u32(uint64x2_t __a, uint32x2_t __b) { \
4805 return __a - vmovl_u32(__b); } 4805 return __a - vmovl_u32(__b); }
4806 4806
4807 __ai uint8x8_t vtbl1_u8(uint8x8_t __a, uint8x8_t __b) { \ 4807 __ai uint8x8_t vtbl1_u8(uint8x8_t __a, uint8x8_t __b) { \
4808 return (uint8x8_t)__builtin_neon_vtbl1_v((int8x8_t)__a, (int8x8_t)__b, 8); } 4808 return (uint8x8_t)__builtin_neon_vtbl1_v((int8x8_t)__a, (int8x8_t)__b, 16); }
4809 __ai int8x8_t vtbl1_s8(int8x8_t __a, int8x8_t __b) { \ 4809 __ai int8x8_t vtbl1_s8(int8x8_t __a, int8x8_t __b) { \
4810 return (int8x8_t)__builtin_neon_vtbl1_v(__a, __b, 0); } 4810 return (int8x8_t)__builtin_neon_vtbl1_v(__a, __b, 0); }
4811 __ai poly8x8_t vtbl1_p8(poly8x8_t __a, uint8x8_t __b) { \ 4811 __ai poly8x8_t vtbl1_p8(poly8x8_t __a, uint8x8_t __b) { \
4812 return (poly8x8_t)__builtin_neon_vtbl1_v((int8x8_t)__a, (int8x8_t)__b, 5); } 4812 return (poly8x8_t)__builtin_neon_vtbl1_v((int8x8_t)__a, (int8x8_t)__b, 4); }
4813 4813
4814 __ai uint8x8_t vtbl2_u8(uint8x8x2_t __a, uint8x8_t __b) { \ 4814 __ai uint8x8_t vtbl2_u8(uint8x8x2_t __a, uint8x8_t __b) { \
4815 return (uint8x8_t)__builtin_neon_vtbl2_v((int8x8_t)__a.val[0], (int8x8_t)__a.v al[1], (int8x8_t)__b, 8); } 4815 return (uint8x8_t)__builtin_neon_vtbl2_v((int8x8_t)__a.val[0], (int8x8_t)__a.v al[1], (int8x8_t)__b, 16); }
4816 __ai int8x8_t vtbl2_s8(int8x8x2_t __a, int8x8_t __b) { \ 4816 __ai int8x8_t vtbl2_s8(int8x8x2_t __a, int8x8_t __b) { \
4817 return (int8x8_t)__builtin_neon_vtbl2_v(__a.val[0], __a.val[1], __b, 0); } 4817 return (int8x8_t)__builtin_neon_vtbl2_v(__a.val[0], __a.val[1], __b, 0); }
4818 __ai poly8x8_t vtbl2_p8(poly8x8x2_t __a, uint8x8_t __b) { \ 4818 __ai poly8x8_t vtbl2_p8(poly8x8x2_t __a, uint8x8_t __b) { \
4819 return (poly8x8_t)__builtin_neon_vtbl2_v((int8x8_t)__a.val[0], (int8x8_t)__a.v al[1], (int8x8_t)__b, 5); } 4819 return (poly8x8_t)__builtin_neon_vtbl2_v((int8x8_t)__a.val[0], (int8x8_t)__a.v al[1], (int8x8_t)__b, 4); }
4820 4820
4821 __ai uint8x8_t vtbl3_u8(uint8x8x3_t __a, uint8x8_t __b) { \ 4821 __ai uint8x8_t vtbl3_u8(uint8x8x3_t __a, uint8x8_t __b) { \
4822 return (uint8x8_t)__builtin_neon_vtbl3_v((int8x8_t)__a.val[0], (int8x8_t)__a.v al[1], (int8x8_t)__a.val[2], (int8x8_t)__b, 8); } 4822 return (uint8x8_t)__builtin_neon_vtbl3_v((int8x8_t)__a.val[0], (int8x8_t)__a.v al[1], (int8x8_t)__a.val[2], (int8x8_t)__b, 16); }
4823 __ai int8x8_t vtbl3_s8(int8x8x3_t __a, int8x8_t __b) { \ 4823 __ai int8x8_t vtbl3_s8(int8x8x3_t __a, int8x8_t __b) { \
4824 return (int8x8_t)__builtin_neon_vtbl3_v(__a.val[0], __a.val[1], __a.val[2], __ b, 0); } 4824 return (int8x8_t)__builtin_neon_vtbl3_v(__a.val[0], __a.val[1], __a.val[2], __ b, 0); }
4825 __ai poly8x8_t vtbl3_p8(poly8x8x3_t __a, uint8x8_t __b) { \ 4825 __ai poly8x8_t vtbl3_p8(poly8x8x3_t __a, uint8x8_t __b) { \
4826 return (poly8x8_t)__builtin_neon_vtbl3_v((int8x8_t)__a.val[0], (int8x8_t)__a.v al[1], (int8x8_t)__a.val[2], (int8x8_t)__b, 5); } 4826 return (poly8x8_t)__builtin_neon_vtbl3_v((int8x8_t)__a.val[0], (int8x8_t)__a.v al[1], (int8x8_t)__a.val[2], (int8x8_t)__b, 4); }
4827 4827
4828 __ai uint8x8_t vtbl4_u8(uint8x8x4_t __a, uint8x8_t __b) { \ 4828 __ai uint8x8_t vtbl4_u8(uint8x8x4_t __a, uint8x8_t __b) { \
4829 return (uint8x8_t)__builtin_neon_vtbl4_v((int8x8_t)__a.val[0], (int8x8_t)__a.v al[1], (int8x8_t)__a.val[2], (int8x8_t)__a.val[3], (int8x8_t)__b, 8); } 4829 return (uint8x8_t)__builtin_neon_vtbl4_v((int8x8_t)__a.val[0], (int8x8_t)__a.v al[1], (int8x8_t)__a.val[2], (int8x8_t)__a.val[3], (int8x8_t)__b, 16); }
4830 __ai int8x8_t vtbl4_s8(int8x8x4_t __a, int8x8_t __b) { \ 4830 __ai int8x8_t vtbl4_s8(int8x8x4_t __a, int8x8_t __b) { \
4831 return (int8x8_t)__builtin_neon_vtbl4_v(__a.val[0], __a.val[1], __a.val[2], __ a.val[3], __b, 0); } 4831 return (int8x8_t)__builtin_neon_vtbl4_v(__a.val[0], __a.val[1], __a.val[2], __ a.val[3], __b, 0); }
4832 __ai poly8x8_t vtbl4_p8(poly8x8x4_t __a, uint8x8_t __b) { \ 4832 __ai poly8x8_t vtbl4_p8(poly8x8x4_t __a, uint8x8_t __b) { \
4833 return (poly8x8_t)__builtin_neon_vtbl4_v((int8x8_t)__a.val[0], (int8x8_t)__a.v al[1], (int8x8_t)__a.val[2], (int8x8_t)__a.val[3], (int8x8_t)__b, 5); } 4833 return (poly8x8_t)__builtin_neon_vtbl4_v((int8x8_t)__a.val[0], (int8x8_t)__a.v al[1], (int8x8_t)__a.val[2], (int8x8_t)__a.val[3], (int8x8_t)__b, 4); }
4834 4834
4835 __ai uint8x8_t vtbx1_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) { \ 4835 __ai uint8x8_t vtbx1_u8(uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) { \
4836 return (uint8x8_t)__builtin_neon_vtbx1_v((int8x8_t)__a, (int8x8_t)__b, (int8x8 _t)__c, 8); } 4836 return (uint8x8_t)__builtin_neon_vtbx1_v((int8x8_t)__a, (int8x8_t)__b, (int8x8 _t)__c, 16); }
4837 __ai int8x8_t vtbx1_s8(int8x8_t __a, int8x8_t __b, int8x8_t __c) { \ 4837 __ai int8x8_t vtbx1_s8(int8x8_t __a, int8x8_t __b, int8x8_t __c) { \
4838 return (int8x8_t)__builtin_neon_vtbx1_v(__a, __b, __c, 0); } 4838 return (int8x8_t)__builtin_neon_vtbx1_v(__a, __b, __c, 0); }
4839 __ai poly8x8_t vtbx1_p8(poly8x8_t __a, poly8x8_t __b, uint8x8_t __c) { \ 4839 __ai poly8x8_t vtbx1_p8(poly8x8_t __a, poly8x8_t __b, uint8x8_t __c) { \
4840 return (poly8x8_t)__builtin_neon_vtbx1_v((int8x8_t)__a, (int8x8_t)__b, (int8x8 _t)__c, 5); } 4840 return (poly8x8_t)__builtin_neon_vtbx1_v((int8x8_t)__a, (int8x8_t)__b, (int8x8 _t)__c, 4); }
4841 4841
4842 __ai uint8x8_t vtbx2_u8(uint8x8_t __a, uint8x8x2_t __b, uint8x8_t __c) { \ 4842 __ai uint8x8_t vtbx2_u8(uint8x8_t __a, uint8x8x2_t __b, uint8x8_t __c) { \
4843 return (uint8x8_t)__builtin_neon_vtbx2_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__c, 8); } 4843 return (uint8x8_t)__builtin_neon_vtbx2_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__c, 16); }
4844 __ai int8x8_t vtbx2_s8(int8x8_t __a, int8x8x2_t __b, int8x8_t __c) { \ 4844 __ai int8x8_t vtbx2_s8(int8x8_t __a, int8x8x2_t __b, int8x8_t __c) { \
4845 return (int8x8_t)__builtin_neon_vtbx2_v(__a, __b.val[0], __b.val[1], __c, 0); } 4845 return (int8x8_t)__builtin_neon_vtbx2_v(__a, __b.val[0], __b.val[1], __c, 0); }
4846 __ai poly8x8_t vtbx2_p8(poly8x8_t __a, poly8x8x2_t __b, uint8x8_t __c) { \ 4846 __ai poly8x8_t vtbx2_p8(poly8x8_t __a, poly8x8x2_t __b, uint8x8_t __c) { \
4847 return (poly8x8_t)__builtin_neon_vtbx2_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__c, 5); } 4847 return (poly8x8_t)__builtin_neon_vtbx2_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__c, 4); }
4848 4848
4849 __ai uint8x8_t vtbx3_u8(uint8x8_t __a, uint8x8x3_t __b, uint8x8_t __c) { \ 4849 __ai uint8x8_t vtbx3_u8(uint8x8_t __a, uint8x8x3_t __b, uint8x8_t __c) { \
4850 return (uint8x8_t)__builtin_neon_vtbx3_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__c, 8); } 4850 return (uint8x8_t)__builtin_neon_vtbx3_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__c, 16); }
4851 __ai int8x8_t vtbx3_s8(int8x8_t __a, int8x8x3_t __b, int8x8_t __c) { \ 4851 __ai int8x8_t vtbx3_s8(int8x8_t __a, int8x8x3_t __b, int8x8_t __c) { \
4852 return (int8x8_t)__builtin_neon_vtbx3_v(__a, __b.val[0], __b.val[1], __b.val[2 ], __c, 0); } 4852 return (int8x8_t)__builtin_neon_vtbx3_v(__a, __b.val[0], __b.val[1], __b.val[2 ], __c, 0); }
4853 __ai poly8x8_t vtbx3_p8(poly8x8_t __a, poly8x8x3_t __b, uint8x8_t __c) { \ 4853 __ai poly8x8_t vtbx3_p8(poly8x8_t __a, poly8x8x3_t __b, uint8x8_t __c) { \
4854 return (poly8x8_t)__builtin_neon_vtbx3_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__c, 5); } 4854 return (poly8x8_t)__builtin_neon_vtbx3_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__c, 4); }
4855 4855
4856 __ai uint8x8_t vtbx4_u8(uint8x8_t __a, uint8x8x4_t __b, uint8x8_t __c) { \ 4856 __ai uint8x8_t vtbx4_u8(uint8x8_t __a, uint8x8x4_t __b, uint8x8_t __c) { \
4857 return (uint8x8_t)__builtin_neon_vtbx4_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], (int8x8_t)__c, 8); } 4857 return (uint8x8_t)__builtin_neon_vtbx4_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], (int8x8_t)__c, 16); }
4858 __ai int8x8_t vtbx4_s8(int8x8_t __a, int8x8x4_t __b, int8x8_t __c) { \ 4858 __ai int8x8_t vtbx4_s8(int8x8_t __a, int8x8x4_t __b, int8x8_t __c) { \
4859 return (int8x8_t)__builtin_neon_vtbx4_v(__a, __b.val[0], __b.val[1], __b.val[2 ], __b.val[3], __c, 0); } 4859 return (int8x8_t)__builtin_neon_vtbx4_v(__a, __b.val[0], __b.val[1], __b.val[2 ], __b.val[3], __c, 0); }
4860 __ai poly8x8_t vtbx4_p8(poly8x8_t __a, poly8x8x4_t __b, uint8x8_t __c) { \ 4860 __ai poly8x8_t vtbx4_p8(poly8x8_t __a, poly8x8x4_t __b, uint8x8_t __c) { \
4861 return (poly8x8_t)__builtin_neon_vtbx4_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], (int8x8_t)__c, 5); } 4861 return (poly8x8_t)__builtin_neon_vtbx4_v((int8x8_t)__a, (int8x8_t)__b.val[0], (int8x8_t)__b.val[1], (int8x8_t)__b.val[2], (int8x8_t)__b.val[3], (int8x8_t)__c, 4); }
4862 4862
4863 __ai int8x8x2_t vtrn_s8(int8x8_t __a, int8x8_t __b) { \ 4863 __ai int8x8x2_t vtrn_s8(int8x8_t __a, int8x8_t __b) { \
4864 int8x8x2_t r; __builtin_neon_vtrn_v(&r, __a, __b, 0); return r; } 4864 int8x8x2_t r; __builtin_neon_vtrn_v(&r, __a, __b, 0); return r; }
4865 __ai int16x4x2_t vtrn_s16(int16x4_t __a, int16x4_t __b) { \ 4865 __ai int16x4x2_t vtrn_s16(int16x4_t __a, int16x4_t __b) { \
4866 int16x4x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 1); ret urn r; } 4866 int16x4x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 1); ret urn r; }
4867 __ai int32x2x2_t vtrn_s32(int32x2_t __a, int32x2_t __b) { \ 4867 __ai int32x2x2_t vtrn_s32(int32x2_t __a, int32x2_t __b) { \
4868 int32x2x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 2); ret urn r; } 4868 int32x2x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 2); ret urn r; }
4869 __ai uint8x8x2_t vtrn_u8(uint8x8_t __a, uint8x8_t __b) { \ 4869 __ai uint8x8x2_t vtrn_u8(uint8x8_t __a, uint8x8_t __b) { \
4870 uint8x8x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 8); ret urn r; } 4870 uint8x8x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 16); re turn r; }
4871 __ai uint16x4x2_t vtrn_u16(uint16x4_t __a, uint16x4_t __b) { \ 4871 __ai uint16x4x2_t vtrn_u16(uint16x4_t __a, uint16x4_t __b) { \
4872 uint16x4x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 9); re turn r; } 4872 uint16x4x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 17); r eturn r; }
4873 __ai uint32x2x2_t vtrn_u32(uint32x2_t __a, uint32x2_t __b) { \ 4873 __ai uint32x2x2_t vtrn_u32(uint32x2_t __a, uint32x2_t __b) { \
4874 uint32x2x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 10); r eturn r; } 4874 uint32x2x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 18); r eturn r; }
4875 __ai float32x2x2_t vtrn_f32(float32x2_t __a, float32x2_t __b) { \ 4875 __ai float32x2x2_t vtrn_f32(float32x2_t __a, float32x2_t __b) { \
4876 float32x2x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 4); r eturn r; } 4876 float32x2x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 7); r eturn r; }
4877 __ai poly8x8x2_t vtrn_p8(poly8x8_t __a, poly8x8_t __b) { \ 4877 __ai poly8x8x2_t vtrn_p8(poly8x8_t __a, poly8x8_t __b) { \
4878 poly8x8x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 5); ret urn r; } 4878 poly8x8x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 4); ret urn r; }
4879 __ai poly16x4x2_t vtrn_p16(poly16x4_t __a, poly16x4_t __b) { \ 4879 __ai poly16x4x2_t vtrn_p16(poly16x4_t __a, poly16x4_t __b) { \
4880 poly16x4x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 6); re turn r; } 4880 poly16x4x2_t r; __builtin_neon_vtrn_v(&r, (int8x8_t)__a, (int8x8_t)__b, 5); re turn r; }
4881 __ai int8x16x2_t vtrnq_s8(int8x16_t __a, int8x16_t __b) { \ 4881 __ai int8x16x2_t vtrnq_s8(int8x16_t __a, int8x16_t __b) { \
4882 int8x16x2_t r; __builtin_neon_vtrnq_v(&r, __a, __b, 16); return r; } 4882 int8x16x2_t r; __builtin_neon_vtrnq_v(&r, __a, __b, 32); return r; }
4883 __ai int16x8x2_t vtrnq_s16(int16x8_t __a, int16x8_t __b) { \ 4883 __ai int16x8x2_t vtrnq_s16(int16x8_t __a, int16x8_t __b) { \
4884 int16x8x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 17); return r; } 4884 int16x8x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 33); return r; }
4885 __ai int32x4x2_t vtrnq_s32(int32x4_t __a, int32x4_t __b) { \ 4885 __ai int32x4x2_t vtrnq_s32(int32x4_t __a, int32x4_t __b) { \
4886 int32x4x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 18); return r; } 4886 int32x4x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 34); return r; }
4887 __ai uint8x16x2_t vtrnq_u8(uint8x16_t __a, uint8x16_t __b) { \ 4887 __ai uint8x16x2_t vtrnq_u8(uint8x16_t __a, uint8x16_t __b) { \
4888 uint8x16x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 24) ; return r; } 4888 uint8x16x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 48) ; return r; }
4889 __ai uint16x8x2_t vtrnq_u16(uint16x8_t __a, uint16x8_t __b) { \ 4889 __ai uint16x8x2_t vtrnq_u16(uint16x8_t __a, uint16x8_t __b) { \
4890 uint16x8x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 25) ; return r; } 4890 uint16x8x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 49) ; return r; }
4891 __ai uint32x4x2_t vtrnq_u32(uint32x4_t __a, uint32x4_t __b) { \ 4891 __ai uint32x4x2_t vtrnq_u32(uint32x4_t __a, uint32x4_t __b) { \
4892 uint32x4x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 26) ; return r; } 4892 uint32x4x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 50) ; return r; }
4893 __ai float32x4x2_t vtrnq_f32(float32x4_t __a, float32x4_t __b) { \ 4893 __ai float32x4x2_t vtrnq_f32(float32x4_t __a, float32x4_t __b) { \
4894 float32x4x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 20 ); return r; } 4894 float32x4x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 39 ); return r; }
4895 __ai poly8x16x2_t vtrnq_p8(poly8x16_t __a, poly8x16_t __b) { \ 4895 __ai poly8x16x2_t vtrnq_p8(poly8x16_t __a, poly8x16_t __b) { \
4896 poly8x16x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 21) ; return r; } 4896 poly8x16x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 36) ; return r; }
4897 __ai poly16x8x2_t vtrnq_p16(poly16x8_t __a, poly16x8_t __b) { \ 4897 __ai poly16x8x2_t vtrnq_p16(poly16x8_t __a, poly16x8_t __b) { \
4898 poly16x8x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 22) ; return r; } 4898 poly16x8x2_t r; __builtin_neon_vtrnq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 37) ; return r; }
4899 4899
4900 __ai uint8x8_t vtst_s8(int8x8_t __a, int8x8_t __b) { \ 4900 __ai uint8x8_t vtst_s8(int8x8_t __a, int8x8_t __b) { \
4901 return (uint8x8_t)__builtin_neon_vtst_v(__a, __b, 8); } 4901 return (uint8x8_t)__builtin_neon_vtst_v(__a, __b, 16); }
4902 __ai uint16x4_t vtst_s16(int16x4_t __a, int16x4_t __b) { \ 4902 __ai uint16x4_t vtst_s16(int16x4_t __a, int16x4_t __b) { \
4903 return (uint16x4_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 9); } 4903 return (uint16x4_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 17); }
4904 __ai uint32x2_t vtst_s32(int32x2_t __a, int32x2_t __b) { \ 4904 __ai uint32x2_t vtst_s32(int32x2_t __a, int32x2_t __b) { \
4905 return (uint32x2_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 10); } 4905 return (uint32x2_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 18); }
4906 __ai uint8x8_t vtst_u8(uint8x8_t __a, uint8x8_t __b) { \ 4906 __ai uint8x8_t vtst_u8(uint8x8_t __a, uint8x8_t __b) { \
4907 return (uint8x8_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 8); } 4907 return (uint8x8_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 16); }
4908 __ai uint16x4_t vtst_u16(uint16x4_t __a, uint16x4_t __b) { \ 4908 __ai uint16x4_t vtst_u16(uint16x4_t __a, uint16x4_t __b) { \
4909 return (uint16x4_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 9); } 4909 return (uint16x4_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 17); }
4910 __ai uint32x2_t vtst_u32(uint32x2_t __a, uint32x2_t __b) { \ 4910 __ai uint32x2_t vtst_u32(uint32x2_t __a, uint32x2_t __b) { \
4911 return (uint32x2_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 10); } 4911 return (uint32x2_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 18); }
4912 __ai uint8x8_t vtst_p8(poly8x8_t __a, poly8x8_t __b) { \ 4912 __ai uint8x8_t vtst_p8(poly8x8_t __a, poly8x8_t __b) { \
4913 return (uint8x8_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 8); } 4913 return (uint8x8_t)__builtin_neon_vtst_v((int8x8_t)__a, (int8x8_t)__b, 16); }
4914 __ai uint8x16_t vtstq_s8(int8x16_t __a, int8x16_t __b) { \ 4914 __ai uint8x16_t vtstq_s8(int8x16_t __a, int8x16_t __b) { \
4915 return (uint8x16_t)__builtin_neon_vtstq_v(__a, __b, 24); } 4915 return (uint8x16_t)__builtin_neon_vtstq_v(__a, __b, 48); }
4916 __ai uint16x8_t vtstq_s16(int16x8_t __a, int16x8_t __b) { \ 4916 __ai uint16x8_t vtstq_s16(int16x8_t __a, int16x8_t __b) { \
4917 return (uint16x8_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 25); } 4917 return (uint16x8_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 49); }
4918 __ai uint32x4_t vtstq_s32(int32x4_t __a, int32x4_t __b) { \ 4918 __ai uint32x4_t vtstq_s32(int32x4_t __a, int32x4_t __b) { \
4919 return (uint32x4_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 26); } 4919 return (uint32x4_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
4920 __ai uint8x16_t vtstq_u8(uint8x16_t __a, uint8x16_t __b) { \ 4920 __ai uint8x16_t vtstq_u8(uint8x16_t __a, uint8x16_t __b) { \
4921 return (uint8x16_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 24); } 4921 return (uint8x16_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 48); }
4922 __ai uint16x8_t vtstq_u16(uint16x8_t __a, uint16x8_t __b) { \ 4922 __ai uint16x8_t vtstq_u16(uint16x8_t __a, uint16x8_t __b) { \
4923 return (uint16x8_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 25); } 4923 return (uint16x8_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 49); }
4924 __ai uint32x4_t vtstq_u32(uint32x4_t __a, uint32x4_t __b) { \ 4924 __ai uint32x4_t vtstq_u32(uint32x4_t __a, uint32x4_t __b) { \
4925 return (uint32x4_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 26); } 4925 return (uint32x4_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 50); }
4926 __ai uint8x16_t vtstq_p8(poly8x16_t __a, poly8x16_t __b) { \ 4926 __ai uint8x16_t vtstq_p8(poly8x16_t __a, poly8x16_t __b) { \
4927 return (uint8x16_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 24); } 4927 return (uint8x16_t)__builtin_neon_vtstq_v((int8x16_t)__a, (int8x16_t)__b, 48); }
4928 4928
4929 __ai int8x8x2_t vuzp_s8(int8x8_t __a, int8x8_t __b) { \ 4929 __ai int8x8x2_t vuzp_s8(int8x8_t __a, int8x8_t __b) { \
4930 int8x8x2_t r; __builtin_neon_vuzp_v(&r, __a, __b, 0); return r; } 4930 int8x8x2_t r; __builtin_neon_vuzp_v(&r, __a, __b, 0); return r; }
4931 __ai int16x4x2_t vuzp_s16(int16x4_t __a, int16x4_t __b) { \ 4931 __ai int16x4x2_t vuzp_s16(int16x4_t __a, int16x4_t __b) { \
4932 int16x4x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 1); ret urn r; } 4932 int16x4x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 1); ret urn r; }
4933 __ai int32x2x2_t vuzp_s32(int32x2_t __a, int32x2_t __b) { \ 4933 __ai int32x2x2_t vuzp_s32(int32x2_t __a, int32x2_t __b) { \
4934 int32x2x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 2); ret urn r; } 4934 int32x2x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 2); ret urn r; }
4935 __ai uint8x8x2_t vuzp_u8(uint8x8_t __a, uint8x8_t __b) { \ 4935 __ai uint8x8x2_t vuzp_u8(uint8x8_t __a, uint8x8_t __b) { \
4936 uint8x8x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 8); ret urn r; } 4936 uint8x8x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 16); re turn r; }
4937 __ai uint16x4x2_t vuzp_u16(uint16x4_t __a, uint16x4_t __b) { \ 4937 __ai uint16x4x2_t vuzp_u16(uint16x4_t __a, uint16x4_t __b) { \
4938 uint16x4x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 9); re turn r; } 4938 uint16x4x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 17); r eturn r; }
4939 __ai uint32x2x2_t vuzp_u32(uint32x2_t __a, uint32x2_t __b) { \ 4939 __ai uint32x2x2_t vuzp_u32(uint32x2_t __a, uint32x2_t __b) { \
4940 uint32x2x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 10); r eturn r; } 4940 uint32x2x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 18); r eturn r; }
4941 __ai float32x2x2_t vuzp_f32(float32x2_t __a, float32x2_t __b) { \ 4941 __ai float32x2x2_t vuzp_f32(float32x2_t __a, float32x2_t __b) { \
4942 float32x2x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 4); r eturn r; } 4942 float32x2x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 7); r eturn r; }
4943 __ai poly8x8x2_t vuzp_p8(poly8x8_t __a, poly8x8_t __b) { \ 4943 __ai poly8x8x2_t vuzp_p8(poly8x8_t __a, poly8x8_t __b) { \
4944 poly8x8x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 5); ret urn r; } 4944 poly8x8x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 4); ret urn r; }
4945 __ai poly16x4x2_t vuzp_p16(poly16x4_t __a, poly16x4_t __b) { \ 4945 __ai poly16x4x2_t vuzp_p16(poly16x4_t __a, poly16x4_t __b) { \
4946 poly16x4x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 6); re turn r; } 4946 poly16x4x2_t r; __builtin_neon_vuzp_v(&r, (int8x8_t)__a, (int8x8_t)__b, 5); re turn r; }
4947 __ai int8x16x2_t vuzpq_s8(int8x16_t __a, int8x16_t __b) { \ 4947 __ai int8x16x2_t vuzpq_s8(int8x16_t __a, int8x16_t __b) { \
4948 int8x16x2_t r; __builtin_neon_vuzpq_v(&r, __a, __b, 16); return r; } 4948 int8x16x2_t r; __builtin_neon_vuzpq_v(&r, __a, __b, 32); return r; }
4949 __ai int16x8x2_t vuzpq_s16(int16x8_t __a, int16x8_t __b) { \ 4949 __ai int16x8x2_t vuzpq_s16(int16x8_t __a, int16x8_t __b) { \
4950 int16x8x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 17); return r; } 4950 int16x8x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 33); return r; }
4951 __ai int32x4x2_t vuzpq_s32(int32x4_t __a, int32x4_t __b) { \ 4951 __ai int32x4x2_t vuzpq_s32(int32x4_t __a, int32x4_t __b) { \
4952 int32x4x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 18); return r; } 4952 int32x4x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 34); return r; }
4953 __ai uint8x16x2_t vuzpq_u8(uint8x16_t __a, uint8x16_t __b) { \ 4953 __ai uint8x16x2_t vuzpq_u8(uint8x16_t __a, uint8x16_t __b) { \
4954 uint8x16x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 24) ; return r; } 4954 uint8x16x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 48) ; return r; }
4955 __ai uint16x8x2_t vuzpq_u16(uint16x8_t __a, uint16x8_t __b) { \ 4955 __ai uint16x8x2_t vuzpq_u16(uint16x8_t __a, uint16x8_t __b) { \
4956 uint16x8x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 25) ; return r; } 4956 uint16x8x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 49) ; return r; }
4957 __ai uint32x4x2_t vuzpq_u32(uint32x4_t __a, uint32x4_t __b) { \ 4957 __ai uint32x4x2_t vuzpq_u32(uint32x4_t __a, uint32x4_t __b) { \
4958 uint32x4x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 26) ; return r; } 4958 uint32x4x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 50) ; return r; }
4959 __ai float32x4x2_t vuzpq_f32(float32x4_t __a, float32x4_t __b) { \ 4959 __ai float32x4x2_t vuzpq_f32(float32x4_t __a, float32x4_t __b) { \
4960 float32x4x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 20 ); return r; } 4960 float32x4x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 39 ); return r; }
4961 __ai poly8x16x2_t vuzpq_p8(poly8x16_t __a, poly8x16_t __b) { \ 4961 __ai poly8x16x2_t vuzpq_p8(poly8x16_t __a, poly8x16_t __b) { \
4962 poly8x16x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 21) ; return r; } 4962 poly8x16x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 36) ; return r; }
4963 __ai poly16x8x2_t vuzpq_p16(poly16x8_t __a, poly16x8_t __b) { \ 4963 __ai poly16x8x2_t vuzpq_p16(poly16x8_t __a, poly16x8_t __b) { \
4964 poly16x8x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 22) ; return r; } 4964 poly16x8x2_t r; __builtin_neon_vuzpq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 37) ; return r; }
4965 4965
4966 __ai int8x8x2_t vzip_s8(int8x8_t __a, int8x8_t __b) { \ 4966 __ai int8x8x2_t vzip_s8(int8x8_t __a, int8x8_t __b) { \
4967 int8x8x2_t r; __builtin_neon_vzip_v(&r, __a, __b, 0); return r; } 4967 int8x8x2_t r; __builtin_neon_vzip_v(&r, __a, __b, 0); return r; }
4968 __ai int16x4x2_t vzip_s16(int16x4_t __a, int16x4_t __b) { \ 4968 __ai int16x4x2_t vzip_s16(int16x4_t __a, int16x4_t __b) { \
4969 int16x4x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 1); ret urn r; } 4969 int16x4x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 1); ret urn r; }
4970 __ai int32x2x2_t vzip_s32(int32x2_t __a, int32x2_t __b) { \ 4970 __ai int32x2x2_t vzip_s32(int32x2_t __a, int32x2_t __b) { \
4971 int32x2x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 2); ret urn r; } 4971 int32x2x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 2); ret urn r; }
4972 __ai uint8x8x2_t vzip_u8(uint8x8_t __a, uint8x8_t __b) { \ 4972 __ai uint8x8x2_t vzip_u8(uint8x8_t __a, uint8x8_t __b) { \
4973 uint8x8x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 8); ret urn r; } 4973 uint8x8x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 16); re turn r; }
4974 __ai uint16x4x2_t vzip_u16(uint16x4_t __a, uint16x4_t __b) { \ 4974 __ai uint16x4x2_t vzip_u16(uint16x4_t __a, uint16x4_t __b) { \
4975 uint16x4x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 9); re turn r; } 4975 uint16x4x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 17); r eturn r; }
4976 __ai uint32x2x2_t vzip_u32(uint32x2_t __a, uint32x2_t __b) { \ 4976 __ai uint32x2x2_t vzip_u32(uint32x2_t __a, uint32x2_t __b) { \
4977 uint32x2x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 10); r eturn r; } 4977 uint32x2x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 18); r eturn r; }
4978 __ai float32x2x2_t vzip_f32(float32x2_t __a, float32x2_t __b) { \ 4978 __ai float32x2x2_t vzip_f32(float32x2_t __a, float32x2_t __b) { \
4979 float32x2x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 4); r eturn r; } 4979 float32x2x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 7); r eturn r; }
4980 __ai poly8x8x2_t vzip_p8(poly8x8_t __a, poly8x8_t __b) { \ 4980 __ai poly8x8x2_t vzip_p8(poly8x8_t __a, poly8x8_t __b) { \
4981 poly8x8x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 5); ret urn r; } 4981 poly8x8x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 4); ret urn r; }
4982 __ai poly16x4x2_t vzip_p16(poly16x4_t __a, poly16x4_t __b) { \ 4982 __ai poly16x4x2_t vzip_p16(poly16x4_t __a, poly16x4_t __b) { \
4983 poly16x4x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 6); re turn r; } 4983 poly16x4x2_t r; __builtin_neon_vzip_v(&r, (int8x8_t)__a, (int8x8_t)__b, 5); re turn r; }
4984 __ai int8x16x2_t vzipq_s8(int8x16_t __a, int8x16_t __b) { \ 4984 __ai int8x16x2_t vzipq_s8(int8x16_t __a, int8x16_t __b) { \
4985 int8x16x2_t r; __builtin_neon_vzipq_v(&r, __a, __b, 16); return r; } 4985 int8x16x2_t r; __builtin_neon_vzipq_v(&r, __a, __b, 32); return r; }
4986 __ai int16x8x2_t vzipq_s16(int16x8_t __a, int16x8_t __b) { \ 4986 __ai int16x8x2_t vzipq_s16(int16x8_t __a, int16x8_t __b) { \
4987 int16x8x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 17); return r; } 4987 int16x8x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 33); return r; }
4988 __ai int32x4x2_t vzipq_s32(int32x4_t __a, int32x4_t __b) { \ 4988 __ai int32x4x2_t vzipq_s32(int32x4_t __a, int32x4_t __b) { \
4989 int32x4x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 18); return r; } 4989 int32x4x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 34); return r; }
4990 __ai uint8x16x2_t vzipq_u8(uint8x16_t __a, uint8x16_t __b) { \ 4990 __ai uint8x16x2_t vzipq_u8(uint8x16_t __a, uint8x16_t __b) { \
4991 uint8x16x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 24) ; return r; } 4991 uint8x16x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 48) ; return r; }
4992 __ai uint16x8x2_t vzipq_u16(uint16x8_t __a, uint16x8_t __b) { \ 4992 __ai uint16x8x2_t vzipq_u16(uint16x8_t __a, uint16x8_t __b) { \
4993 uint16x8x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 25) ; return r; } 4993 uint16x8x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 49) ; return r; }
4994 __ai uint32x4x2_t vzipq_u32(uint32x4_t __a, uint32x4_t __b) { \ 4994 __ai uint32x4x2_t vzipq_u32(uint32x4_t __a, uint32x4_t __b) { \
4995 uint32x4x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 26) ; return r; } 4995 uint32x4x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 50) ; return r; }
4996 __ai float32x4x2_t vzipq_f32(float32x4_t __a, float32x4_t __b) { \ 4996 __ai float32x4x2_t vzipq_f32(float32x4_t __a, float32x4_t __b) { \
4997 float32x4x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 20 ); return r; } 4997 float32x4x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 39 ); return r; }
4998 __ai poly8x16x2_t vzipq_p8(poly8x16_t __a, poly8x16_t __b) { \ 4998 __ai poly8x16x2_t vzipq_p8(poly8x16_t __a, poly8x16_t __b) { \
4999 poly8x16x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 21) ; return r; } 4999 poly8x16x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 36) ; return r; }
5000 __ai poly16x8x2_t vzipq_p16(poly16x8_t __a, poly16x8_t __b) { \ 5000 __ai poly16x8x2_t vzipq_p16(poly16x8_t __a, poly16x8_t __b) { \
5001 poly16x8x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 22) ; return r; } 5001 poly16x8x2_t r; __builtin_neon_vzipq_v(&r, (int8x16_t)__a, (int8x16_t)__b, 37) ; return r; }
5002 5002
5003 #undef __ai 5003 #undef __ai
5004 5004
5005 #endif /* __ARM_NEON_H */ 5005 #endif /* __ARM_NEON_H */
OLDNEW
« no previous file with comments | « third_party/asan/asan_clang_Linux/bin/clang ('k') | third_party/asan/asan_clang_Linux/lib/clang/3.1/include/avxintrin.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698