| OLD | NEW |
| 1 /*===---- smmintrin.h - SSE4 intrinsics ------------------------------------=== | 1 /*===---- smmintrin.h - SSE4 intrinsics ------------------------------------=== |
| 2 * | 2 * |
| 3 * Permission is hereby granted, free of charge, to any person obtaining a copy | 3 * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 4 * of this software and associated documentation files (the "Software"), to deal | 4 * of this software and associated documentation files (the "Software"), to deal |
| 5 * in the Software without restriction, including without limitation the rights | 5 * in the Software without restriction, including without limitation the rights |
| 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 7 * copies of the Software, and to permit persons to whom the Software is | 7 * copies of the Software, and to permit persons to whom the Software is |
| 8 * furnished to do so, subject to the following conditions: | 8 * furnished to do so, subject to the following conditions: |
| 9 * | 9 * |
| 10 * The above copyright notice and this permission notice shall be included in | 10 * The above copyright notice and this permission notice shall be included in |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 56 #define _mm_floor_pd(X) _mm_round_pd((X), _MM_FROUND_FLOOR) | 56 #define _mm_floor_pd(X) _mm_round_pd((X), _MM_FROUND_FLOOR) |
| 57 #define _mm_floor_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_FLOOR) | 57 #define _mm_floor_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_FLOOR) |
| 58 #define _mm_floor_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_FLOOR) | 58 #define _mm_floor_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_FLOOR) |
| 59 | 59 |
| 60 #define _mm_round_ps(X, Y) __builtin_ia32_roundps((X), (Y)) | 60 #define _mm_round_ps(X, Y) __builtin_ia32_roundps((X), (Y)) |
| 61 #define _mm_round_ss(X, Y, M) __builtin_ia32_roundss((X), (Y), (M)) | 61 #define _mm_round_ss(X, Y, M) __builtin_ia32_roundss((X), (Y), (M)) |
| 62 #define _mm_round_pd(X, M) __builtin_ia32_roundpd((X), (M)) | 62 #define _mm_round_pd(X, M) __builtin_ia32_roundpd((X), (M)) |
| 63 #define _mm_round_sd(X, Y, M) __builtin_ia32_roundsd((X), (Y), (M)) | 63 #define _mm_round_sd(X, Y, M) __builtin_ia32_roundsd((X), (Y), (M)) |
| 64 | 64 |
| 65 /* SSE4 Packed Blending Intrinsics. */ | 65 /* SSE4 Packed Blending Intrinsics. */ |
| 66 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) | 66 #define _mm_blend_pd(V1, V2, M) __extension__ ({ \ |
| 67 _mm_blend_pd (__m128d __V1, __m128d __V2, const int __M) | 67 __m128d __V1 = (V1); \ |
| 68 { | 68 __m128d __V2 = (V2); \ |
| 69 return (__m128d) __builtin_ia32_blendpd ((__v2df)__V1, (__v2df)__V2, __M); | 69 (__m128d) __builtin_ia32_blendpd ((__v2df)__V1, (__v2df)__V2, M); }) |
| 70 } | |
| 71 | 70 |
| 72 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | 71 #define _mm_blend_ps(V1, V2, M) __extension__ ({ \ |
| 73 _mm_blend_ps (__m128 __V1, __m128 __V2, const int __M) | 72 __m128 __V1 = (V1); \ |
| 74 { | 73 __m128 __V2 = (V2); \ |
| 75 return (__m128) __builtin_ia32_blendps ((__v4sf)__V1, (__v4sf)__V2, __M); | 74 (__m128) __builtin_ia32_blendps ((__v4sf)__V1, (__v4sf)__V2, M); }) |
| 76 } | |
| 77 | 75 |
| 78 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) | 76 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) |
| 79 _mm_blendv_pd (__m128d __V1, __m128d __V2, __m128d __M) | 77 _mm_blendv_pd (__m128d __V1, __m128d __V2, __m128d __M) |
| 80 { | 78 { |
| 81 return (__m128d) __builtin_ia32_blendvpd ((__v2df)__V1, (__v2df)__V2, | 79 return (__m128d) __builtin_ia32_blendvpd ((__v2df)__V1, (__v2df)__V2, |
| 82 (__v2df)__M); | 80 (__v2df)__M); |
| 83 } | 81 } |
| 84 | 82 |
| 85 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | 83 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) |
| 86 _mm_blendv_ps (__m128 __V1, __m128 __V2, __m128 __M) | 84 _mm_blendv_ps (__m128 __V1, __m128 __V2, __m128 __M) |
| 87 { | 85 { |
| 88 return (__m128) __builtin_ia32_blendvps ((__v4sf)__V1, (__v4sf)__V2, | 86 return (__m128) __builtin_ia32_blendvps ((__v4sf)__V1, (__v4sf)__V2, |
| 89 (__v4sf)__M); | 87 (__v4sf)__M); |
| 90 } | 88 } |
| 91 | 89 |
| 92 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) | 90 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) |
| 93 _mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M) | 91 _mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M) |
| 94 { | 92 { |
| 95 return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__V1, (__v16qi)__V2, | 93 return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__V1, (__v16qi)__V2, |
| 96 (__v16qi)__M); | 94 (__v16qi)__M); |
| 97 } | 95 } |
| 98 | 96 |
| 99 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) | 97 #define _mm_blend_epi16(V1, V2, M) __extension__ ({ \ |
| 100 _mm_blend_epi16 (__m128i __V1, __m128i __V2, const int __M) | 98 __m128i __V1 = (V1); \ |
| 101 { | 99 __m128i __V2 = (V2); \ |
| 102 return (__m128i) __builtin_ia32_pblendw128 ((__v8hi)__V1, (__v8hi)__V2, __M); | 100 (__m128i) __builtin_ia32_pblendw128 ((__v8hi)__V1, (__v8hi)__V2, M); }) |
| 103 } | |
| 104 | 101 |
| 105 /* SSE4 Dword Multiply Instructions. */ | 102 /* SSE4 Dword Multiply Instructions. */ |
| 106 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) | 103 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) |
| 107 _mm_mullo_epi32 (__m128i __V1, __m128i __V2) | 104 _mm_mullo_epi32 (__m128i __V1, __m128i __V2) |
| 108 { | 105 { |
| 109 return (__m128i) ((__v4si)__V1 * (__v4si)__V2); | 106 return (__m128i) ((__v4si)__V1 * (__v4si)__V2); |
| 110 } | 107 } |
| 111 | 108 |
| 112 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) | 109 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) |
| 113 _mm_mul_epi32 (__m128i __V1, __m128i __V2) | 110 _mm_mul_epi32 (__m128i __V1, __m128i __V2) |
| (...skipping 254 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 368 /* SSE4.2 Packed Comparison Intrinsics. */ | 365 /* SSE4.2 Packed Comparison Intrinsics. */ |
| 369 #define _mm_cmpistrm(A, B, M) __builtin_ia32_pcmpistrm128((A), (B), (M)) | 366 #define _mm_cmpistrm(A, B, M) __builtin_ia32_pcmpistrm128((A), (B), (M)) |
| 370 #define _mm_cmpistri(A, B, M) __builtin_ia32_pcmpistri128((A), (B), (M)) | 367 #define _mm_cmpistri(A, B, M) __builtin_ia32_pcmpistri128((A), (B), (M)) |
| 371 | 368 |
| 372 #define _mm_cmpestrm(A, LA, B, LB, M) \ | 369 #define _mm_cmpestrm(A, LA, B, LB, M) \ |
| 373 __builtin_ia32_pcmpestrm128((A), (LA), (B), (LB), (M)) | 370 __builtin_ia32_pcmpestrm128((A), (LA), (B), (LB), (M)) |
| 374 #define _mm_cmpestri(X, LX, Y, LY, M) \ | 371 #define _mm_cmpestri(X, LX, Y, LY, M) \ |
| 375 __builtin_ia32_pcmpestri128((A), (LA), (B), (LB), (M)) | 372 __builtin_ia32_pcmpestri128((A), (LA), (B), (LB), (M)) |
| 376 | 373 |
| 377 /* SSE4.2 Packed Comparison Intrinsics and EFlag Reading. */ | 374 /* SSE4.2 Packed Comparison Intrinsics and EFlag Reading. */ |
| 378 #define _mm_cmpistra(A, LA, B, LB, M) \ | 375 #define _mm_cmpistra(A, B, M) \ |
| 379 __builtin_ia32_pcmpistria128((A), (LA), (B), (LB), (M)) | 376 __builtin_ia32_pcmpistria128((A), (B), (M)) |
| 380 #define _mm_cmpistrc(A, LA, B, LB, M) \ | 377 #define _mm_cmpistrc(A, B, M) \ |
| 381 __builtin_ia32_pcmpistric128((A), (LA), (B), (LB), (M)) | 378 __builtin_ia32_pcmpistric128((A), (B), (M)) |
| 382 #define _mm_cmpistro(A, LA, B, LB, M) \ | 379 #define _mm_cmpistro(A, B, M) \ |
| 383 __builtin_ia32_pcmpistrio128((A), (LA), (B), (LB), (M)) | 380 __builtin_ia32_pcmpistrio128((A), (B), (M)) |
| 384 #define _mm_cmpistrs(A, LA, B, LB, M) \ | 381 #define _mm_cmpistrs(A, B, M) \ |
| 385 __builtin_ia32_pcmpistris128((A), (LA), (B), (LB), (M)) | 382 __builtin_ia32_pcmpistris128((A), (B), (M)) |
| 386 #define _mm_cmpistrz(A, LA, B, LB, M) \ | 383 #define _mm_cmpistrz(A, B, M) \ |
| 387 __builtin_ia32_pcmpistriz128((A), (LA), (B), (LB), (M)) | 384 __builtin_ia32_pcmpistriz128((A), (B), (M)) |
| 388 | 385 |
| 389 #define _mm_cmpestra(A, LA, B, LB, M) \ | 386 #define _mm_cmpestra(A, LA, B, LB, M) \ |
| 390 __builtin_ia32_pcmpestria128((A), (LA), (B), (LB), (M)) | 387 __builtin_ia32_pcmpestria128((A), (LA), (B), (LB), (M)) |
| 391 #define _mm_cmpestrc(A, LA, B, LB, M) \ | 388 #define _mm_cmpestrc(A, LA, B, LB, M) \ |
| 392 __builtin_ia32_pcmpestric128((A), (LA), (B), (LB), (M)) | 389 __builtin_ia32_pcmpestric128((A), (LA), (B), (LB), (M)) |
| 393 #define _mm_cmpestro(A, LA, B, LB, M) \ | 390 #define _mm_cmpestro(A, LA, B, LB, M) \ |
| 394 __builtin_ia32_pcmpestrio128((A), (LA), (B), (LB), (M)) | 391 __builtin_ia32_pcmpestrio128((A), (LA), (B), (LB), (M)) |
| 395 #define _mm_cmpestrs(A, LA, B, LB, M) \ | 392 #define _mm_cmpestrs(A, LA, B, LB, M) \ |
| 396 __builtin_ia32_pcmpestris128((A), (LA), (B), (LB), (M)) | 393 __builtin_ia32_pcmpestris128((A), (LA), (B), (LB), (M)) |
| 397 #define _mm_cmpestrz(A, LA, B, LB, M) \ | 394 #define _mm_cmpestrz(A, LA, B, LB, M) \ |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 443 _mm_popcnt_u64(unsigned long long __A) | 440 _mm_popcnt_u64(unsigned long long __A) |
| 444 { | 441 { |
| 445 return __builtin_popcountll(__A); | 442 return __builtin_popcountll(__A); |
| 446 } | 443 } |
| 447 #endif /* __x86_64__ */ | 444 #endif /* __x86_64__ */ |
| 448 | 445 |
| 449 #endif /* __SSE4_2__ */ | 446 #endif /* __SSE4_2__ */ |
| 450 #endif /* __SSE4_1__ */ | 447 #endif /* __SSE4_1__ */ |
| 451 | 448 |
| 452 #endif /* _SMMINTRIN_H */ | 449 #endif /* _SMMINTRIN_H */ |
| OLD | NEW |