OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include "libyuv/row.h" | 11 #include "libyuv/row.h" |
12 | 12 |
13 #if defined (_M_X64) | 13 #if defined (_M_X64) && !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) |
14 #include <emmintrin.h> | 14 #include <emmintrin.h> |
15 #include <tmmintrin.h> // For _mm_maddubs_epi16 | 15 #include <tmmintrin.h> // For _mm_maddubs_epi16 |
16 #endif | 16 #endif |
17 | 17 |
18 #ifdef __cplusplus | 18 #ifdef __cplusplus |
19 namespace libyuv { | 19 namespace libyuv { |
20 extern "C" { | 20 extern "C" { |
21 #endif | 21 #endif |
22 | 22 |
23 // This module is for Visual C. | 23 // This module is for Visual C. |
24 #if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) | 24 #if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \ |
| 25 (defined(_M_IX86) || defined(_M_X64)) |
25 | 26 |
26 #define YG 74 /* (int8)(1.164 * 64 + 0.5) */ | 27 #define YG 74 /* (int8)(1.164 * 64 + 0.5) */ |
27 | 28 |
28 #define UB 127 /* min(127,(int8)(2.018 * 64)) */ | 29 #define UB 127 /* min(127,(int8)(2.018 * 64)) */ |
29 #define UG -25 /* (int8)(-0.391 * 64 - 0.5) */ | 30 #define UG -25 /* (int8)(-0.391 * 64 - 0.5) */ |
30 #define UR 0 | 31 #define UR 0 |
31 | 32 |
32 #define VB 0 | 33 #define VB 0 |
33 #define VG -52 /* (int8)(-0.813 * 64 - 0.5) */ | 34 #define VG -52 /* (int8)(-0.813 * 64 - 0.5) */ |
34 #define VR 102 /* (int8)(1.596 * 64 + 0.5) */ | 35 #define VR 102 /* (int8)(1.596 * 64 + 0.5) */ |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
71 // 64 bit | 72 // 64 bit |
72 #if defined(_M_X64) | 73 #if defined(_M_X64) |
73 | 74 |
74 // Aligned destination version. | 75 // Aligned destination version. |
75 __declspec(align(16)) | 76 __declspec(align(16)) |
76 void I422ToARGBRow_SSSE3(const uint8* y_buf, | 77 void I422ToARGBRow_SSSE3(const uint8* y_buf, |
77 const uint8* u_buf, | 78 const uint8* u_buf, |
78 const uint8* v_buf, | 79 const uint8* v_buf, |
79 uint8* dst_argb, | 80 uint8* dst_argb, |
80 int width) { | 81 int width) { |
81 | |
82 __m128i xmm0, xmm1, xmm2, xmm3; | 82 __m128i xmm0, xmm1, xmm2, xmm3; |
83 const __m128i xmm5 = _mm_set1_epi8(-1); | 83 const __m128i xmm5 = _mm_set1_epi8(-1); |
84 const __m128i xmm4 = _mm_setzero_si128(); | 84 const __m128i xmm4 = _mm_setzero_si128(); |
85 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; | 85 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; |
86 | 86 |
87 while (width > 0) { | 87 while (width > 0) { |
88 xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); | 88 xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); |
89 xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); | 89 xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); |
90 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); | 90 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); |
91 xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); | 91 xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
125 width -= 8; | 125 width -= 8; |
126 } | 126 } |
127 } | 127 } |
128 | 128 |
129 // Unaligned destination version. | 129 // Unaligned destination version. |
130 void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, | 130 void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, |
131 const uint8* u_buf, | 131 const uint8* u_buf, |
132 const uint8* v_buf, | 132 const uint8* v_buf, |
133 uint8* dst_argb, | 133 uint8* dst_argb, |
134 int width) { | 134 int width) { |
135 | |
136 __m128i xmm0, xmm1, xmm2, xmm3; | 135 __m128i xmm0, xmm1, xmm2, xmm3; |
137 const __m128i xmm5 = _mm_set1_epi8(-1); | 136 const __m128i xmm5 = _mm_set1_epi8(-1); |
138 const __m128i xmm4 = _mm_setzero_si128(); | 137 const __m128i xmm4 = _mm_setzero_si128(); |
139 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; | 138 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; |
140 | 139 |
141 while (width > 0) { | 140 while (width > 0) { |
142 xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); | 141 xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); |
143 xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); | 142 xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); |
144 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); | 143 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); |
145 xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); | 144 xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); |
(...skipping 7249 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
7395 } | 7394 } |
7396 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 7395 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
7397 | 7396 |
7398 #endif // defined(_M_X64) | 7397 #endif // defined(_M_X64) |
7399 #endif // !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) | 7398 #endif // !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) |
7400 | 7399 |
7401 #ifdef __cplusplus | 7400 #ifdef __cplusplus |
7402 } // extern "C" | 7401 } // extern "C" |
7403 } // namespace libyuv | 7402 } // namespace libyuv |
7404 #endif | 7403 #endif |
OLD | NEW |