| OLD | NEW |
| 1 /*===---- smmintrin.h - SSE4 intrinsics ------------------------------------=== | 1 /*===---- smmintrin.h - SSE4 intrinsics ------------------------------------=== |
| 2 * | 2 * |
| 3 * Permission is hereby granted, free of charge, to any person obtaining a copy | 3 * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 4 * of this software and associated documentation files (the "Software"), to deal | 4 * of this software and associated documentation files (the "Software"), to deal |
| 5 * in the Software without restriction, including without limitation the rights | 5 * in the Software without restriction, including without limitation the rights |
| 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 7 * copies of the Software, and to permit persons to whom the Software is | 7 * copies of the Software, and to permit persons to whom the Software is |
| 8 * furnished to do so, subject to the following conditions: | 8 * furnished to do so, subject to the following conditions: |
| 9 * | 9 * |
| 10 * The above copyright notice and this permission notice shall be included in | 10 * The above copyright notice and this permission notice shall be included in |
| (...skipping 221 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 232 } | 232 } |
| 233 | 233 |
| 234 static __inline__ int __attribute__((__always_inline__, __nodebug__)) | 234 static __inline__ int __attribute__((__always_inline__, __nodebug__)) |
| 235 _mm_testnzc_si128(__m128i __M, __m128i __V) | 235 _mm_testnzc_si128(__m128i __M, __m128i __V) |
| 236 { | 236 { |
| 237 return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V); | 237 return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V); |
| 238 } | 238 } |
| 239 | 239 |
| 240 #define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_cmpeq_epi32((V), (V))) | 240 #define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_cmpeq_epi32((V), (V))) |
| 241 #define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V)) | 241 #define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V)) |
| 242 #define _mm_test_all_zeros(M, V) _mm_testz_si128 ((V), (V)) | 242 #define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V)) |
| 243 | 243 |
| 244 /* SSE4 64-bit Packed Integer Comparisons. */ | 244 /* SSE4 64-bit Packed Integer Comparisons. */ |
| 245 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) | 245 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) |
| 246 _mm_cmpeq_epi64(__m128i __V1, __m128i __V2) | 246 _mm_cmpeq_epi64(__m128i __V1, __m128i __V2) |
| 247 { | 247 { |
| 248 return (__m128i) __builtin_ia32_pcmpeqq((__v2di)__V1, (__v2di)__V2); | 248 return (__m128i)((__v2di)__V1 == (__v2di)__V2); |
| 249 } | 249 } |
| 250 | 250 |
| 251 /* SSE4 Packed Integer Sign-Extension. */ | 251 /* SSE4 Packed Integer Sign-Extension. */ |
| 252 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) | 252 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) |
| 253 _mm_cvtepi8_epi16(__m128i __V) | 253 _mm_cvtepi8_epi16(__m128i __V) |
| 254 { | 254 { |
| 255 return (__m128i) __builtin_ia32_pmovsxbw128((__v16qi) __V); | 255 return (__m128i) __builtin_ia32_pmovsxbw128((__v16qi) __V); |
| 256 } | 256 } |
| 257 | 257 |
| 258 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) | 258 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) |
| (...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 361 /* These macros are used in _mm_cmpXstri() to specify the return. */ | 361 /* These macros are used in _mm_cmpXstri() to specify the return. */ |
| 362 #define _SIDD_BIT_MASK 0x00 | 362 #define _SIDD_BIT_MASK 0x00 |
| 363 #define _SIDD_UNIT_MASK 0x40 | 363 #define _SIDD_UNIT_MASK 0x40 |
| 364 | 364 |
| 365 /* SSE4.2 Packed Comparison Intrinsics. */ | 365 /* SSE4.2 Packed Comparison Intrinsics. */ |
| 366 #define _mm_cmpistrm(A, B, M) __builtin_ia32_pcmpistrm128((A), (B), (M)) | 366 #define _mm_cmpistrm(A, B, M) __builtin_ia32_pcmpistrm128((A), (B), (M)) |
| 367 #define _mm_cmpistri(A, B, M) __builtin_ia32_pcmpistri128((A), (B), (M)) | 367 #define _mm_cmpistri(A, B, M) __builtin_ia32_pcmpistri128((A), (B), (M)) |
| 368 | 368 |
| 369 #define _mm_cmpestrm(A, LA, B, LB, M) \ | 369 #define _mm_cmpestrm(A, LA, B, LB, M) \ |
| 370 __builtin_ia32_pcmpestrm128((A), (LA), (B), (LB), (M)) | 370 __builtin_ia32_pcmpestrm128((A), (LA), (B), (LB), (M)) |
| 371 #define _mm_cmpestri(X, LX, Y, LY, M) \ | 371 #define _mm_cmpestri(A, LA, B, LB, M) \ |
| 372 __builtin_ia32_pcmpestri128((A), (LA), (B), (LB), (M)) | 372 __builtin_ia32_pcmpestri128((A), (LA), (B), (LB), (M)) |
| 373 | 373 |
| 374 /* SSE4.2 Packed Comparison Intrinsics and EFlag Reading. */ | 374 /* SSE4.2 Packed Comparison Intrinsics and EFlag Reading. */ |
| 375 #define _mm_cmpistra(A, B, M) \ | 375 #define _mm_cmpistra(A, B, M) \ |
| 376 __builtin_ia32_pcmpistria128((A), (B), (M)) | 376 __builtin_ia32_pcmpistria128((A), (B), (M)) |
| 377 #define _mm_cmpistrc(A, B, M) \ | 377 #define _mm_cmpistrc(A, B, M) \ |
| 378 __builtin_ia32_pcmpistric128((A), (B), (M)) | 378 __builtin_ia32_pcmpistric128((A), (B), (M)) |
| 379 #define _mm_cmpistro(A, B, M) \ | 379 #define _mm_cmpistro(A, B, M) \ |
| 380 __builtin_ia32_pcmpistrio128((A), (B), (M)) | 380 __builtin_ia32_pcmpistrio128((A), (B), (M)) |
| 381 #define _mm_cmpistrs(A, B, M) \ | 381 #define _mm_cmpistrs(A, B, M) \ |
| 382 __builtin_ia32_pcmpistris128((A), (B), (M)) | 382 __builtin_ia32_pcmpistris128((A), (B), (M)) |
| 383 #define _mm_cmpistrz(A, B, M) \ | 383 #define _mm_cmpistrz(A, B, M) \ |
| 384 __builtin_ia32_pcmpistriz128((A), (B), (M)) | 384 __builtin_ia32_pcmpistriz128((A), (B), (M)) |
| 385 | 385 |
| 386 #define _mm_cmpestra(A, LA, B, LB, M) \ | 386 #define _mm_cmpestra(A, LA, B, LB, M) \ |
| 387 __builtin_ia32_pcmpestria128((A), (LA), (B), (LB), (M)) | 387 __builtin_ia32_pcmpestria128((A), (LA), (B), (LB), (M)) |
| 388 #define _mm_cmpestrc(A, LA, B, LB, M) \ | 388 #define _mm_cmpestrc(A, LA, B, LB, M) \ |
| 389 __builtin_ia32_pcmpestric128((A), (LA), (B), (LB), (M)) | 389 __builtin_ia32_pcmpestric128((A), (LA), (B), (LB), (M)) |
| 390 #define _mm_cmpestro(A, LA, B, LB, M) \ | 390 #define _mm_cmpestro(A, LA, B, LB, M) \ |
| 391 __builtin_ia32_pcmpestrio128((A), (LA), (B), (LB), (M)) | 391 __builtin_ia32_pcmpestrio128((A), (LA), (B), (LB), (M)) |
| 392 #define _mm_cmpestrs(A, LA, B, LB, M) \ | 392 #define _mm_cmpestrs(A, LA, B, LB, M) \ |
| 393 __builtin_ia32_pcmpestris128((A), (LA), (B), (LB), (M)) | 393 __builtin_ia32_pcmpestris128((A), (LA), (B), (LB), (M)) |
| 394 #define _mm_cmpestrz(A, LA, B, LB, M) \ | 394 #define _mm_cmpestrz(A, LA, B, LB, M) \ |
| 395 __builtin_ia32_pcmpestriz128((A), (LA), (B), (LB), (M)) | 395 __builtin_ia32_pcmpestriz128((A), (LA), (B), (LB), (M)) |
| 396 | 396 |
| 397 /* SSE4.2 Compare Packed Data -- Greater Than. */ | 397 /* SSE4.2 Compare Packed Data -- Greater Than. */ |
| 398 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) | 398 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) |
| 399 _mm_cmpgt_epi64(__m128i __V1, __m128i __V2) | 399 _mm_cmpgt_epi64(__m128i __V1, __m128i __V2) |
| 400 { | 400 { |
| 401 return __builtin_ia32_pcmpgtq((__v2di)__V1, (__v2di)__V2); | 401 return (__m128i)((__v2di)__V1 > (__v2di)__V2); |
| 402 } | 402 } |
| 403 | 403 |
| 404 /* SSE4.2 Accumulate CRC32. */ | 404 /* SSE4.2 Accumulate CRC32. */ |
| 405 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) | 405 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) |
| 406 _mm_crc32_u8(unsigned int __C, unsigned char __D) | 406 _mm_crc32_u8(unsigned int __C, unsigned char __D) |
| 407 { | 407 { |
| 408 return __builtin_ia32_crc32qi(__C, __D); | 408 return __builtin_ia32_crc32qi(__C, __D); |
| 409 } | 409 } |
| 410 | 410 |
| 411 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) | 411 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) |
| (...skipping 28 matching lines...) Expand all Loading... |
| 440 _mm_popcnt_u64(unsigned long long __A) | 440 _mm_popcnt_u64(unsigned long long __A) |
| 441 { | 441 { |
| 442 return __builtin_popcountll(__A); | 442 return __builtin_popcountll(__A); |
| 443 } | 443 } |
| 444 #endif /* __x86_64__ */ | 444 #endif /* __x86_64__ */ |
| 445 | 445 |
| 446 #endif /* __SSE4_2__ */ | 446 #endif /* __SSE4_2__ */ |
| 447 #endif /* __SSE4_1__ */ | 447 #endif /* __SSE4_1__ */ |
| 448 | 448 |
| 449 #endif /* _SMMINTRIN_H */ | 449 #endif /* _SMMINTRIN_H */ |
| OLD | NEW |