OLD | NEW |
1 /*===---- smmintrin.h - SSE4 intrinsics ------------------------------------=== | 1 /*===---- smmintrin.h - SSE4 intrinsics ------------------------------------=== |
2 * | 2 * |
3 * Permission is hereby granted, free of charge, to any person obtaining a copy | 3 * Permission is hereby granted, free of charge, to any person obtaining a copy |
4 * of this software and associated documentation files (the "Software"), to deal | 4 * of this software and associated documentation files (the "Software"), to deal |
5 * in the Software without restriction, including without limitation the rights | 5 * in the Software without restriction, including without limitation the rights |
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
7 * copies of the Software, and to permit persons to whom the Software is | 7 * copies of the Software, and to permit persons to whom the Software is |
8 * furnished to do so, subject to the following conditions: | 8 * furnished to do so, subject to the following conditions: |
9 * | 9 * |
10 * The above copyright notice and this permission notice shall be included in | 10 * The above copyright notice and this permission notice shall be included in |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
56 #define _mm_floor_pd(X) _mm_round_pd((X), _MM_FROUND_FLOOR) | 56 #define _mm_floor_pd(X) _mm_round_pd((X), _MM_FROUND_FLOOR) |
57 #define _mm_floor_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_FLOOR) | 57 #define _mm_floor_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_FLOOR) |
58 #define _mm_floor_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_FLOOR) | 58 #define _mm_floor_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_FLOOR) |
59 | 59 |
60 #define _mm_round_ps(X, Y) __builtin_ia32_roundps((X), (Y)) | 60 #define _mm_round_ps(X, Y) __builtin_ia32_roundps((X), (Y)) |
61 #define _mm_round_ss(X, Y, M) __builtin_ia32_roundss((X), (Y), (M)) | 61 #define _mm_round_ss(X, Y, M) __builtin_ia32_roundss((X), (Y), (M)) |
62 #define _mm_round_pd(X, M) __builtin_ia32_roundpd((X), (M)) | 62 #define _mm_round_pd(X, M) __builtin_ia32_roundpd((X), (M)) |
63 #define _mm_round_sd(X, Y, M) __builtin_ia32_roundsd((X), (Y), (M)) | 63 #define _mm_round_sd(X, Y, M) __builtin_ia32_roundsd((X), (Y), (M)) |
64 | 64 |
65 /* SSE4 Packed Blending Intrinsics. */ | 65 /* SSE4 Packed Blending Intrinsics. */ |
66 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) | 66 #define _mm_blend_pd(V1, V2, M) __extension__ ({ \ |
67 _mm_blend_pd (__m128d __V1, __m128d __V2, const int __M) | 67 __m128d __V1 = (V1); \ |
68 { | 68 __m128d __V2 = (V2); \ |
69 return (__m128d) __builtin_ia32_blendpd ((__v2df)__V1, (__v2df)__V2, __M); | 69 (__m128d) __builtin_ia32_blendpd ((__v2df)__V1, (__v2df)__V2, M); }) |
70 } | |
71 | 70 |
72 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | 71 #define _mm_blend_ps(V1, V2, M) __extension__ ({ \ |
73 _mm_blend_ps (__m128 __V1, __m128 __V2, const int __M) | 72 __m128 __V1 = (V1); \ |
74 { | 73 __m128 __V2 = (V2); \ |
75 return (__m128) __builtin_ia32_blendps ((__v4sf)__V1, (__v4sf)__V2, __M); | 74 (__m128) __builtin_ia32_blendps ((__v4sf)__V1, (__v4sf)__V2, M); }) |
76 } | |
77 | 75 |
78 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) | 76 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) |
79 _mm_blendv_pd (__m128d __V1, __m128d __V2, __m128d __M) | 77 _mm_blendv_pd (__m128d __V1, __m128d __V2, __m128d __M) |
80 { | 78 { |
81 return (__m128d) __builtin_ia32_blendvpd ((__v2df)__V1, (__v2df)__V2, | 79 return (__m128d) __builtin_ia32_blendvpd ((__v2df)__V1, (__v2df)__V2, |
82 (__v2df)__M); | 80 (__v2df)__M); |
83 } | 81 } |
84 | 82 |
85 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | 83 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) |
86 _mm_blendv_ps (__m128 __V1, __m128 __V2, __m128 __M) | 84 _mm_blendv_ps (__m128 __V1, __m128 __V2, __m128 __M) |
87 { | 85 { |
88 return (__m128) __builtin_ia32_blendvps ((__v4sf)__V1, (__v4sf)__V2, | 86 return (__m128) __builtin_ia32_blendvps ((__v4sf)__V1, (__v4sf)__V2, |
89 (__v4sf)__M); | 87 (__v4sf)__M); |
90 } | 88 } |
91 | 89 |
92 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) | 90 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) |
93 _mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M) | 91 _mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M) |
94 { | 92 { |
95 return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__V1, (__v16qi)__V2, | 93 return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__V1, (__v16qi)__V2, |
96 (__v16qi)__M); | 94 (__v16qi)__M); |
97 } | 95 } |
98 | 96 |
99 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) | 97 #define _mm_blend_epi16(V1, V2, M) __extension__ ({ \ |
100 _mm_blend_epi16 (__m128i __V1, __m128i __V2, const int __M) | 98 __m128i __V1 = (V1); \ |
101 { | 99 __m128i __V2 = (V2); \ |
102 return (__m128i) __builtin_ia32_pblendw128 ((__v8hi)__V1, (__v8hi)__V2, __M); | 100 (__m128i) __builtin_ia32_pblendw128 ((__v8hi)__V1, (__v8hi)__V2, M); }) |
103 } | |
104 | 101 |
105 /* SSE4 Dword Multiply Instructions. */ | 102 /* SSE4 Dword Multiply Instructions. */ |
106 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) | 103 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) |
107 _mm_mullo_epi32 (__m128i __V1, __m128i __V2) | 104 _mm_mullo_epi32 (__m128i __V1, __m128i __V2) |
108 { | 105 { |
109 return (__m128i) ((__v4si)__V1 * (__v4si)__V2); | 106 return (__m128i) ((__v4si)__V1 * (__v4si)__V2); |
110 } | 107 } |
111 | 108 |
112 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) | 109 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) |
113 _mm_mul_epi32 (__m128i __V1, __m128i __V2) | 110 _mm_mul_epi32 (__m128i __V1, __m128i __V2) |
(...skipping 254 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
368 /* SSE4.2 Packed Comparison Intrinsics. */ | 365 /* SSE4.2 Packed Comparison Intrinsics. */ |
369 #define _mm_cmpistrm(A, B, M) __builtin_ia32_pcmpistrm128((A), (B), (M)) | 366 #define _mm_cmpistrm(A, B, M) __builtin_ia32_pcmpistrm128((A), (B), (M)) |
370 #define _mm_cmpistri(A, B, M) __builtin_ia32_pcmpistri128((A), (B), (M)) | 367 #define _mm_cmpistri(A, B, M) __builtin_ia32_pcmpistri128((A), (B), (M)) |
371 | 368 |
372 #define _mm_cmpestrm(A, LA, B, LB, M) \ | 369 #define _mm_cmpestrm(A, LA, B, LB, M) \ |
373 __builtin_ia32_pcmpestrm128((A), (LA), (B), (LB), (M)) | 370 __builtin_ia32_pcmpestrm128((A), (LA), (B), (LB), (M)) |
374 #define _mm_cmpestri(X, LX, Y, LY, M) \ | 371 #define _mm_cmpestri(X, LX, Y, LY, M) \ |
375 __builtin_ia32_pcmpestri128((A), (LA), (B), (LB), (M)) | 372 __builtin_ia32_pcmpestri128((A), (LA), (B), (LB), (M)) |
376 | 373 |
377 /* SSE4.2 Packed Comparison Intrinsics and EFlag Reading. */ | 374 /* SSE4.2 Packed Comparison Intrinsics and EFlag Reading. */ |
378 #define _mm_cmpistra(A, LA, B, LB, M) \ | 375 #define _mm_cmpistra(A, B, M) \ |
379 __builtin_ia32_pcmpistria128((A), (LA), (B), (LB), (M)) | 376 __builtin_ia32_pcmpistria128((A), (B), (M)) |
380 #define _mm_cmpistrc(A, LA, B, LB, M) \ | 377 #define _mm_cmpistrc(A, B, M) \ |
381 __builtin_ia32_pcmpistric128((A), (LA), (B), (LB), (M)) | 378 __builtin_ia32_pcmpistric128((A), (B), (M)) |
382 #define _mm_cmpistro(A, LA, B, LB, M) \ | 379 #define _mm_cmpistro(A, B, M) \ |
383 __builtin_ia32_pcmpistrio128((A), (LA), (B), (LB), (M)) | 380 __builtin_ia32_pcmpistrio128((A), (B), (M)) |
384 #define _mm_cmpistrs(A, LA, B, LB, M) \ | 381 #define _mm_cmpistrs(A, B, M) \ |
385 __builtin_ia32_pcmpistris128((A), (LA), (B), (LB), (M)) | 382 __builtin_ia32_pcmpistris128((A), (B), (M)) |
386 #define _mm_cmpistrz(A, LA, B, LB, M) \ | 383 #define _mm_cmpistrz(A, B, M) \ |
387 __builtin_ia32_pcmpistriz128((A), (LA), (B), (LB), (M)) | 384 __builtin_ia32_pcmpistriz128((A), (B), (M)) |
388 | 385 |
389 #define _mm_cmpestra(A, LA, B, LB, M) \ | 386 #define _mm_cmpestra(A, LA, B, LB, M) \ |
390 __builtin_ia32_pcmpestria128((A), (LA), (B), (LB), (M)) | 387 __builtin_ia32_pcmpestria128((A), (LA), (B), (LB), (M)) |
391 #define _mm_cmpestrc(A, LA, B, LB, M) \ | 388 #define _mm_cmpestrc(A, LA, B, LB, M) \ |
392 __builtin_ia32_pcmpestric128((A), (LA), (B), (LB), (M)) | 389 __builtin_ia32_pcmpestric128((A), (LA), (B), (LB), (M)) |
393 #define _mm_cmpestro(A, LA, B, LB, M) \ | 390 #define _mm_cmpestro(A, LA, B, LB, M) \ |
394 __builtin_ia32_pcmpestrio128((A), (LA), (B), (LB), (M)) | 391 __builtin_ia32_pcmpestrio128((A), (LA), (B), (LB), (M)) |
395 #define _mm_cmpestrs(A, LA, B, LB, M) \ | 392 #define _mm_cmpestrs(A, LA, B, LB, M) \ |
396 __builtin_ia32_pcmpestris128((A), (LA), (B), (LB), (M)) | 393 __builtin_ia32_pcmpestris128((A), (LA), (B), (LB), (M)) |
397 #define _mm_cmpestrz(A, LA, B, LB, M) \ | 394 #define _mm_cmpestrz(A, LA, B, LB, M) \ |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
443 _mm_popcnt_u64(unsigned long long __A) | 440 _mm_popcnt_u64(unsigned long long __A) |
444 { | 441 { |
445 return __builtin_popcountll(__A); | 442 return __builtin_popcountll(__A); |
446 } | 443 } |
447 #endif /* __x86_64__ */ | 444 #endif /* __x86_64__ */ |
448 | 445 |
449 #endif /* __SSE4_2__ */ | 446 #endif /* __SSE4_2__ */ |
450 #endif /* __SSE4_1__ */ | 447 #endif /* __SSE4_1__ */ |
451 | 448 |
452 #endif /* _SMMINTRIN_H */ | 449 #endif /* _SMMINTRIN_H */ |
OLD | NEW |