OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include "libyuv/row.h" | 11 #include "libyuv/row.h" |
12 | 12 |
13 #if !defined(LIBYUV_DISABLE_X86) && defined(_M_X64) && \ | 13 #if !defined(LIBYUV_DISABLE_X86) && defined(_M_X64) && \ |
14 defined(_MSC_VER) && !defined(__clang__) | 14 defined(_MSC_VER) && !defined(__clang__) |
15 #include <emmintrin.h> | 15 #include <emmintrin.h> |
16 #include <tmmintrin.h> // For _mm_maddubs_epi16 | 16 #include <tmmintrin.h> // For _mm_maddubs_epi16 |
17 #endif | 17 #endif |
18 | 18 |
19 #ifdef __cplusplus | 19 #ifdef __cplusplus |
20 namespace libyuv { | 20 namespace libyuv { |
21 extern "C" { | 21 extern "C" { |
22 #endif | 22 #endif |
23 | 23 |
24 // This module is for Visual C 32/64 bit and clangcl 32 bit | 24 // This module is for Visual C 32/64 bit and clangcl 32 bit |
25 #if !defined(LIBYUV_DISABLE_X86) && \ | 25 #if !defined(LIBYUV_DISABLE_X86) && \ |
26 (defined(_M_IX86) || (defined(_M_X64) && !defined(__clang__))) | 26 (defined(_M_IX86) || (defined(_M_X64) && !defined(__clang__))) |
27 | 27 |
28 #define KUVTOB 0 | |
29 #define KUVTOG 32 | |
30 #define KUVTOR 64 | |
31 #define KUVBIASB 96 | |
32 #define KUVBIASG 128 | |
33 #define KUVBIASR 160 | |
34 #define KYTORGB 192 | |
35 | |
36 // BT.601 YUV to RGB reference | |
37 // R = (Y - 16) * 1.164 - V * -1.596 | |
38 // G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813 | |
39 // B = (Y - 16) * 1.164 - U * -2.018 | |
40 | |
41 // Y contribution to R,G,B. Scale and bias. | |
42 // TODO(fbarchard): Consider moving constants into a common header. | |
43 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ | |
44 #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */ | |
45 | |
46 // U and V contributions to R,G,B. | |
47 #define UB -128 /* max(-128, round(-2.018 * 64)) */ | |
48 #define UG 25 /* round(0.391 * 64) */ | |
49 #define VG 52 /* round(0.813 * 64) */ | |
50 #define VR -102 /* round(-1.596 * 64) */ | |
51 | |
52 // Bias values to subtract 16 from Y and 128 from U and V. | |
53 #define BB (UB * 128 + YGB) | |
54 #define BG (UG * 128 + VG * 128 + YGB) | |
55 #define BR (VR * 128 + YGB) | |
56 | |
57 // BT601 constants for YUV to RGB. | |
58 YuvConstants SIMD_ALIGNED(kYuvConstants) = { | |
59 { UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, | |
60 UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0 }, | |
61 { UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, | |
62 UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG }, | |
63 { 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, | |
64 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR }, | |
65 { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB }, | |
66 { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG }, | |
67 { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR }, | |
68 { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG } | |
69 }; | |
70 | |
71 // BT601 constants for NV21 where chroma plane is VU instead of UV. | |
72 YuvConstants SIMD_ALIGNED(kYvuConstants) = { | |
73 { 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, | |
74 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB }, | |
75 { VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, | |
76 VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG }, | |
77 { VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, | |
78 VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0 }, | |
79 { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB }, | |
80 { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG }, | |
81 { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR }, | |
82 { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG } | |
83 }; | |
84 | |
85 #undef YG | |
86 #undef YGB | |
87 #undef UB | |
88 #undef UG | |
89 #undef VG | |
90 #undef VR | |
91 #undef BB | |
92 #undef BG | |
93 #undef BR | |
94 | |
95 // JPEG YUV to RGB reference | |
96 // * R = Y - V * -1.40200 | |
97 // * G = Y - U * 0.34414 - V * 0.71414 | |
98 // * B = Y - U * -1.77200 | |
99 | |
100 // Y contribution to R,G,B. Scale and bias. | |
101 // TODO(fbarchard): Consider moving constants into a common header. | |
102 #define YGJ 16320 /* round(1.000 * 64 * 256 * 256 / 257) */ | |
103 #define YGBJ 32 /* 64 / 2 */ | |
104 | |
105 // U and V contributions to R,G,B. | |
106 #define UBJ -113 /* round(-1.77200 * 64) */ | |
107 #define UGJ 22 /* round(0.34414 * 64) */ | |
108 #define VGJ 46 /* round(0.71414 * 64) */ | |
109 #define VRJ -90 /* round(-1.40200 * 64) */ | |
110 | |
111 // Bias values to subtract 16 from Y and 128 from U and V. | |
112 #define BBJ (UBJ * 128 + YGBJ) | |
113 #define BGJ (UGJ * 128 + VGJ * 128 + YGBJ) | |
114 #define BRJ (VRJ * 128 + YGBJ) | |
115 | |
116 // JPEG constants for YUV to RGB. | |
117 YuvConstants SIMD_ALIGNED(kYuvJConstants) = { | |
118 { UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, | |
119 UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0 }, | |
120 { UGJ, VGJ, UGJ, VGJ, UGJ, VGJ, UGJ, VGJ, | |
121 UGJ, VGJ, UGJ, VGJ, UGJ, VGJ, UGJ, VGJ, | |
122 UGJ, VGJ, UGJ, VGJ, UGJ, VGJ, UGJ, VGJ, | |
123 UGJ, VGJ, UGJ, VGJ, UGJ, VGJ, UGJ, VGJ }, | |
124 { 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, | |
125 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ, 0, VRJ }, | |
126 { BBJ, BBJ, BBJ, BBJ, BBJ, BBJ, BBJ, BBJ, | |
127 BBJ, BBJ, BBJ, BBJ, BBJ, BBJ, BBJ, BBJ }, | |
128 { BGJ, BGJ, BGJ, BGJ, BGJ, BGJ, BGJ, BGJ, | |
129 BGJ, BGJ, BGJ, BGJ, BGJ, BGJ, BGJ, BGJ }, | |
130 { BRJ, BRJ, BRJ, BRJ, BRJ, BRJ, BRJ, BRJ, | |
131 BRJ, BRJ, BRJ, BRJ, BRJ, BRJ, BRJ, BRJ }, | |
132 { YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, | |
133 YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ } | |
134 }; | |
135 | |
136 #undef YGJ | |
137 #undef YGBJ | |
138 #undef UBJ | |
139 #undef UGJ | |
140 #undef VGJ | |
141 #undef VRJ | |
142 #undef BBJ | |
143 #undef BGJ | |
144 #undef BRJ | |
145 | |
146 // BT.709 YUV to RGB reference | |
147 // * R = Y - V * -1.28033 | |
148 // * G = Y - U * 0.21482 - V * 0.38059 | |
149 // * B = Y - U * -2.12798 | |
150 | |
151 // Y contribution to R,G,B. Scale and bias. | |
152 // TODO(fbarchard): Consider moving constants into a common header. | |
153 #define YGH 16320 /* round(1.000 * 64 * 256 * 256 / 257) */ | |
154 #define YGBH 32 /* 64 / 2 */ | |
155 | |
156 // U and V contributions to R,G,B. | |
157 #define UBH -128 /* max(-128, round(-2.12798 * 64)) */ | |
158 #define UGH 14 /* round(0.21482 * 64) */ | |
159 #define VGH 24 /* round(0.38059 * 64) */ | |
160 #define VRH -82 /* round(-1.28033 * 64) */ | |
161 | |
162 // Bias values to round, and subtract 128 from U and V. | |
163 #define BBH (UBH * 128 + YGBH) | |
164 #define BGH (UGH * 128 + VGH * 128 + YGBH) | |
165 #define BRH (VRH * 128 + YGBH) | |
166 | |
167 // BT.709 constants for YUV to RGB. | |
168 YuvConstants SIMD_ALIGNED(kYuvHConstants) = { | |
169 { UBH, 0, UBH, 0, UBH, 0, UBH, 0, UBH, 0, UBH, 0, UBH, 0, UBH, 0, | |
170 UBH, 0, UBH, 0, UBH, 0, UBH, 0, UBH, 0, UBH, 0, UBH, 0, UBH, 0 }, | |
171 { UGH, VGH, UGH, VGH, UGH, VGH, UGH, VGH, | |
172 UGH, VGH, UGH, VGH, UGH, VGH, UGH, VGH, | |
173 UGH, VGH, UGH, VGH, UGH, VGH, UGH, VGH, | |
174 UGH, VGH, UGH, VGH, UGH, VGH, UGH, VGH }, | |
175 { 0, VRH, 0, VRH, 0, VRH, 0, VRH, 0, VRH, 0, VRH, 0, VRH, 0, VRH, | |
176 0, VRH, 0, VRH, 0, VRH, 0, VRH, 0, VRH, 0, VRH, 0, VRH, 0, VRH }, | |
177 { BBH, BBH, BBH, BBH, BBH, BBH, BBH, BBH, | |
178 BBH, BBH, BBH, BBH, BBH, BBH, BBH, BBH }, | |
179 { BGH, BGH, BGH, BGH, BGH, BGH, BGH, BGH, | |
180 BGH, BGH, BGH, BGH, BGH, BGH, BGH, BGH }, | |
181 { BRH, BRH, BRH, BRH, BRH, BRH, BRH, BRH, | |
182 BRH, BRH, BRH, BRH, BRH, BRH, BRH, BRH }, | |
183 { YGH, YGH, YGH, YGH, YGH, YGH, YGH, YGH, | |
184 YGH, YGH, YGH, YGH, YGH, YGH, YGH, YGH } | |
185 }; | |
186 | |
187 #undef YGH | |
188 #undef YGBH | |
189 #undef UBH | |
190 #undef UGH | |
191 #undef VGH | |
192 #undef VRH | |
193 #undef BBH | |
194 #undef BGH | |
195 #undef BRH | |
196 | |
197 // 64 bit | 28 // 64 bit |
198 #if defined(_M_X64) | 29 #if defined(_M_X64) |
199 | 30 |
200 // Read 4 UV from 422, upsample to 8 UV. | 31 // Read 4 UV from 422, upsample to 8 UV. |
201 #define READYUV422 \ | 32 #define READYUV422 \ |
202 xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); \ | 33 xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); \ |
203 xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \ | 34 xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \ |
204 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ | 35 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ |
205 xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \ | 36 xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \ |
206 u_buf += 4; | 37 u_buf += 4; |
(...skipping 6240 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6447 } | 6278 } |
6448 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 6279 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
6449 | 6280 |
6450 #endif // defined(_M_X64) | 6281 #endif // defined(_M_X64) |
6451 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) | 6282 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) |
6452 | 6283 |
6453 #ifdef __cplusplus | 6284 #ifdef __cplusplus |
6454 } // extern "C" | 6285 } // extern "C" |
6455 } // namespace libyuv | 6286 } // namespace libyuv |
6456 #endif | 6287 #endif |
OLD | NEW |