| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include "libyuv/row.h" | 11 #include "libyuv/row.h" |
| 12 | 12 |
| 13 #if defined (_M_X64) | 13 #if defined (_M_X64) && !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) |
| 14 #include <emmintrin.h> | 14 #include <emmintrin.h> |
| 15 #include <tmmintrin.h> // For _mm_maddubs_epi16 | 15 #include <tmmintrin.h> // For _mm_maddubs_epi16 |
| 16 #endif | 16 #endif |
| 17 | 17 |
| 18 #ifdef __cplusplus | 18 #ifdef __cplusplus |
| 19 namespace libyuv { | 19 namespace libyuv { |
| 20 extern "C" { | 20 extern "C" { |
| 21 #endif | 21 #endif |
| 22 | 22 |
| 23 // This module is for Visual C. | 23 // This module is for Visual C. |
| 24 #if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) | 24 #if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \ |
| 25 (defined(_M_IX86) || defined(_M_X64)) |
| 25 | 26 |
| 26 #define YG 74 /* (int8)(1.164 * 64 + 0.5) */ | 27 #define YG 74 /* (int8)(1.164 * 64 + 0.5) */ |
| 27 | 28 |
| 28 #define UB 127 /* min(127,(int8)(2.018 * 64)) */ | 29 #define UB 127 /* min(127,(int8)(2.018 * 64)) */ |
| 29 #define UG -25 /* (int8)(-0.391 * 64 - 0.5) */ | 30 #define UG -25 /* (int8)(-0.391 * 64 - 0.5) */ |
| 30 #define UR 0 | 31 #define UR 0 |
| 31 | 32 |
| 32 #define VB 0 | 33 #define VB 0 |
| 33 #define VG -52 /* (int8)(-0.813 * 64 - 0.5) */ | 34 #define VG -52 /* (int8)(-0.813 * 64 - 0.5) */ |
| 34 #define VR 102 /* (int8)(1.596 * 64 + 0.5) */ | 35 #define VR 102 /* (int8)(1.596 * 64 + 0.5) */ |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 71 // 64 bit | 72 // 64 bit |
| 72 #if defined(_M_X64) | 73 #if defined(_M_X64) |
| 73 | 74 |
| 74 // Aligned destination version. | 75 // Aligned destination version. |
| 75 __declspec(align(16)) | 76 __declspec(align(16)) |
| 76 void I422ToARGBRow_SSSE3(const uint8* y_buf, | 77 void I422ToARGBRow_SSSE3(const uint8* y_buf, |
| 77 const uint8* u_buf, | 78 const uint8* u_buf, |
| 78 const uint8* v_buf, | 79 const uint8* v_buf, |
| 79 uint8* dst_argb, | 80 uint8* dst_argb, |
| 80 int width) { | 81 int width) { |
| 81 | |
| 82 __m128i xmm0, xmm1, xmm2, xmm3; | 82 __m128i xmm0, xmm1, xmm2, xmm3; |
| 83 const __m128i xmm5 = _mm_set1_epi8(-1); | 83 const __m128i xmm5 = _mm_set1_epi8(-1); |
| 84 const __m128i xmm4 = _mm_setzero_si128(); | 84 const __m128i xmm4 = _mm_setzero_si128(); |
| 85 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; | 85 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; |
| 86 | 86 |
| 87 while (width > 0) { | 87 while (width > 0) { |
| 88 xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); | 88 xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); |
| 89 xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); | 89 xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); |
| 90 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); | 90 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); |
| 91 xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); | 91 xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 125 width -= 8; | 125 width -= 8; |
| 126 } | 126 } |
| 127 } | 127 } |
| 128 | 128 |
| 129 // Unaligned destination version. | 129 // Unaligned destination version. |
| 130 void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, | 130 void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, |
| 131 const uint8* u_buf, | 131 const uint8* u_buf, |
| 132 const uint8* v_buf, | 132 const uint8* v_buf, |
| 133 uint8* dst_argb, | 133 uint8* dst_argb, |
| 134 int width) { | 134 int width) { |
| 135 | |
| 136 __m128i xmm0, xmm1, xmm2, xmm3; | 135 __m128i xmm0, xmm1, xmm2, xmm3; |
| 137 const __m128i xmm5 = _mm_set1_epi8(-1); | 136 const __m128i xmm5 = _mm_set1_epi8(-1); |
| 138 const __m128i xmm4 = _mm_setzero_si128(); | 137 const __m128i xmm4 = _mm_setzero_si128(); |
| 139 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; | 138 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; |
| 140 | 139 |
| 141 while (width > 0) { | 140 while (width > 0) { |
| 142 xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); | 141 xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); |
| 143 xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); | 142 xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); |
| 144 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); | 143 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); |
| 145 xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); | 144 xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); |
| (...skipping 7249 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 7395 } | 7394 } |
| 7396 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 7395 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
| 7397 | 7396 |
| 7398 #endif // defined(_M_X64) | 7397 #endif // defined(_M_X64) |
| 7399 #endif // !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) | 7398 #endif // !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) |
| 7400 | 7399 |
| 7401 #ifdef __cplusplus | 7400 #ifdef __cplusplus |
| 7402 } // extern "C" | 7401 } // extern "C" |
| 7403 } // namespace libyuv | 7402 } // namespace libyuv |
| 7404 #endif | 7403 #endif |
| OLD | NEW |