OLD | NEW |
1 // Copyright 2010 Google Inc. All Rights Reserved. | 1 // Copyright 2010 Google Inc. All Rights Reserved. |
2 // | 2 // |
3 // Use of this source code is governed by a BSD-style license | 3 // Use of this source code is governed by a BSD-style license |
4 // that can be found in the COPYING file in the root of the source | 4 // that can be found in the COPYING file in the root of the source |
5 // tree. An additional intellectual property rights grant can be found | 5 // tree. An additional intellectual property rights grant can be found |
6 // in the file PATENTS. All contributing project authors may | 6 // in the file PATENTS. All contributing project authors may |
7 // be found in the AUTHORS file in the root of the source tree. | 7 // be found in the AUTHORS file in the root of the source tree. |
8 // ----------------------------------------------------------------------------- | 8 // ----------------------------------------------------------------------------- |
9 // | 9 // |
10 // inline YUV<->RGB conversion function | 10 // inline YUV<->RGB conversion function |
11 // | 11 // |
12 // The exact naming is Y'CbCr, following the ITU-R BT.601 standard. | 12 // The exact naming is Y'CbCr, following the ITU-R BT.601 standard. |
13 // More information at: http://en.wikipedia.org/wiki/YCbCr | 13 // More information at: http://en.wikipedia.org/wiki/YCbCr |
14 // Y = 0.2569 * R + 0.5044 * G + 0.0979 * B + 16 | 14 // Y = 0.2569 * R + 0.5044 * G + 0.0979 * B + 16 |
15 // U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128 | 15 // U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128 |
16 // V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128 | 16 // V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128 |
17 // We use 16bit fixed point operations for RGB->YUV conversion (YUV_FIX). | 17 // We use 16bit fixed point operations for RGB->YUV conversion (YUV_FIX). |
18 // | 18 // |
19 // For the Y'CbCr to RGB conversion, the BT.601 specification reads: | 19 // For the Y'CbCr to RGB conversion, the BT.601 specification reads: |
20 // R = 1.164 * (Y-16) + 1.596 * (V-128) | 20 // R = 1.164 * (Y-16) + 1.596 * (V-128) |
21 // G = 1.164 * (Y-16) - 0.813 * (V-128) - 0.391 * (U-128) | 21 // G = 1.164 * (Y-16) - 0.813 * (V-128) - 0.391 * (U-128) |
22 // B = 1.164 * (Y-16) + 2.018 * (U-128) | 22 // B = 1.164 * (Y-16) + 2.018 * (U-128) |
23 // where Y is in the [16,235] range, and U/V in the [16,240] range. | 23 // where Y is in the [16,235] range, and U/V in the [16,240] range. |
24 // In the table-lookup version (WEBP_YUV_USE_TABLE), the common factor | |
25 // "1.164 * (Y-16)" can be handled as an offset in the VP8kClip[] table. | |
26 // So in this case the formulae should read: | |
27 // R = 1.164 * [Y + 1.371 * (V-128) ] - 18.624 | |
28 // G = 1.164 * [Y - 0.698 * (V-128) - 0.336 * (U-128)] - 18.624 | |
29 // B = 1.164 * [Y + 1.733 * (U-128)] - 18.624 | |
30 // once factorized. | |
31 // For YUV->RGB conversion, only 14bit fixed precision is used (YUV_FIX2). | |
32 // That's the maximum possible for a convenient ARM implementation. | |
33 // | 24 // |
| 25 // The fixed-point implementation used here is: |
| 26 // R = (19077 . y + 26149 . v - 14234) >> 6 |
| 27 // G = (19077 . y - 6419 . u - 13320 . v + 8708) >> 6 |
| 28 // B = (19077 . y + 33050 . u - 17685) >> 6 |
| 29 // where the '.' operator is the mulhi_epu16 variant: |
| 30 // a . b = ((a << 8) * b) >> 16 |
| 31 // that preserves 8 bits of fractional precision before final descaling. |
| 32 |
34 // Author: Skal (pascal.massimino@gmail.com) | 33 // Author: Skal (pascal.massimino@gmail.com) |
35 | 34 |
36 #ifndef WEBP_DSP_YUV_H_ | 35 #ifndef WEBP_DSP_YUV_H_ |
37 #define WEBP_DSP_YUV_H_ | 36 #define WEBP_DSP_YUV_H_ |
38 | 37 |
39 #include "./dsp.h" | 38 #include "./dsp.h" |
40 #include "../dec/decode_vp8.h" | 39 #include "../dec/decode_vp8.h" |
41 | 40 |
42 // Define the following to use the LUT-based code: | |
43 // #define WEBP_YUV_USE_TABLE | |
44 | |
45 #if defined(WEBP_EXPERIMENTAL_FEATURES) | 41 #if defined(WEBP_EXPERIMENTAL_FEATURES) |
46 // Do NOT activate this feature for real compression. This is only experimental! | 42 // Do NOT activate this feature for real compression. This is only experimental! |
47 // This flag is for comparison purpose against JPEG's "YUVj" natural colorspace. | 43 // This flag is for comparison purpose against JPEG's "YUVj" natural colorspace. |
48 // This colorspace is close to Rec.601's Y'CbCr model with the notable | 44 // This colorspace is close to Rec.601's Y'CbCr model with the notable |
49 // difference of allowing larger range for luma/chroma. | 45 // difference of allowing larger range for luma/chroma. |
50 // See http://en.wikipedia.org/wiki/YCbCr#JPEG_conversion paragraph, and its | 46 // See http://en.wikipedia.org/wiki/YCbCr#JPEG_conversion paragraph, and its |
51 // difference with http://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion | 47 // difference with http://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion |
52 // #define USE_YUVj | 48 // #define USE_YUVj |
53 #endif | 49 #endif |
54 | 50 |
55 //------------------------------------------------------------------------------ | 51 //------------------------------------------------------------------------------ |
56 // YUV -> RGB conversion | 52 // YUV -> RGB conversion |
57 | 53 |
58 #ifdef __cplusplus | 54 #ifdef __cplusplus |
59 extern "C" { | 55 extern "C" { |
60 #endif | 56 #endif |
61 | 57 |
62 enum { | 58 enum { |
63 YUV_FIX = 16, // fixed-point precision for RGB->YUV | 59 YUV_FIX = 16, // fixed-point precision for RGB->YUV |
64 YUV_HALF = 1 << (YUV_FIX - 1), | 60 YUV_HALF = 1 << (YUV_FIX - 1), |
65 YUV_MASK = (256 << YUV_FIX) - 1, | 61 YUV_MASK = (256 << YUV_FIX) - 1, |
66 YUV_RANGE_MIN = -227, // min value of r/g/b output | 62 YUV_RANGE_MIN = -227, // min value of r/g/b output |
67 YUV_RANGE_MAX = 256 + 226, // max value of r/g/b output | 63 YUV_RANGE_MAX = 256 + 226, // max value of r/g/b output |
68 | 64 |
69 YUV_FIX2 = 14, // fixed-point precision for YUV->RGB | 65 YUV_FIX2 = 6, // fixed-point precision for YUV->RGB |
70 YUV_HALF2 = 1 << (YUV_FIX2 - 1), | 66 YUV_HALF2 = 1 << YUV_FIX2 >> 1, |
71 YUV_MASK2 = (256 << YUV_FIX2) - 1 | 67 YUV_MASK2 = (256 << YUV_FIX2) - 1 |
72 }; | 68 }; |
73 | 69 |
74 // These constants are 14b fixed-point version of ITU-R BT.601 constants. | 70 //------------------------------------------------------------------------------ |
75 #define kYScale 19077 // 1.164 = 255 / 219 | 71 // slower on x86 by ~7-8%, but bit-exact with the SSE2/NEON version |
76 #define kVToR 26149 // 1.596 = 255 / 112 * 0.701 | |
77 #define kUToG 6419 // 0.391 = 255 / 112 * 0.886 * 0.114 / 0.587 | |
78 #define kVToG 13320 // 0.813 = 255 / 112 * 0.701 * 0.299 / 0.587 | |
79 #define kUToB 33050 // 2.018 = 255 / 112 * 0.886 | |
80 #define kRCst (-kYScale * 16 - kVToR * 128 + YUV_HALF2) | |
81 #define kGCst (-kYScale * 16 + kUToG * 128 + kVToG * 128 + YUV_HALF2) | |
82 #define kBCst (-kYScale * 16 - kUToB * 128 + YUV_HALF2) | |
83 | 72 |
84 //------------------------------------------------------------------------------ | 73 static WEBP_INLINE int MultHi(int v, int coeff) { // _mm_mulhi_epu16 emulation |
85 | 74 return (v * coeff) >> 8; |
86 #if !defined(WEBP_YUV_USE_TABLE) | 75 } |
87 | |
88 // slower on x86 by ~7-8%, but bit-exact with the SSE2 version | |
89 | 76 |
90 static WEBP_INLINE int VP8Clip8(int v) { | 77 static WEBP_INLINE int VP8Clip8(int v) { |
91 return ((v & ~YUV_MASK2) == 0) ? (v >> YUV_FIX2) : (v < 0) ? 0 : 255; | 78 return ((v & ~YUV_MASK2) == 0) ? (v >> YUV_FIX2) : (v < 0) ? 0 : 255; |
92 } | 79 } |
93 | 80 |
94 static WEBP_INLINE int VP8YUVToR(int y, int v) { | 81 static WEBP_INLINE int VP8YUVToR(int y, int v) { |
95 return VP8Clip8(kYScale * y + kVToR * v + kRCst); | 82 return VP8Clip8(MultHi(y, 19077) + MultHi(v, 26149) - 14234); |
96 } | 83 } |
97 | 84 |
98 static WEBP_INLINE int VP8YUVToG(int y, int u, int v) { | 85 static WEBP_INLINE int VP8YUVToG(int y, int u, int v) { |
99 return VP8Clip8(kYScale * y - kUToG * u - kVToG * v + kGCst); | 86 return VP8Clip8(MultHi(y, 19077) - MultHi(u, 6419) - MultHi(v, 13320) + 8708); |
100 } | 87 } |
101 | 88 |
102 static WEBP_INLINE int VP8YUVToB(int y, int u) { | 89 static WEBP_INLINE int VP8YUVToB(int y, int u) { |
103 return VP8Clip8(kYScale * y + kUToB * u + kBCst); | 90 return VP8Clip8(MultHi(y, 19077) + MultHi(u, 33050) - 17685); |
104 } | 91 } |
105 | 92 |
106 static WEBP_INLINE void VP8YuvToRgb(int y, int u, int v, | 93 static WEBP_INLINE void VP8YuvToRgb(int y, int u, int v, |
107 uint8_t* const rgb) { | 94 uint8_t* const rgb) { |
108 rgb[0] = VP8YUVToR(y, v); | 95 rgb[0] = VP8YUVToR(y, v); |
109 rgb[1] = VP8YUVToG(y, u, v); | 96 rgb[1] = VP8YUVToG(y, u, v); |
110 rgb[2] = VP8YUVToB(y, u); | 97 rgb[2] = VP8YUVToB(y, u); |
111 } | 98 } |
112 | 99 |
113 static WEBP_INLINE void VP8YuvToBgr(int y, int u, int v, | 100 static WEBP_INLINE void VP8YuvToBgr(int y, int u, int v, |
(...skipping 28 matching lines...) Expand all Loading... |
142 const int ba = (b & 0xf0) | 0x0f; // overwrite the lower 4 bits | 129 const int ba = (b & 0xf0) | 0x0f; // overwrite the lower 4 bits |
143 #ifdef WEBP_SWAP_16BIT_CSP | 130 #ifdef WEBP_SWAP_16BIT_CSP |
144 argb[0] = ba; | 131 argb[0] = ba; |
145 argb[1] = rg; | 132 argb[1] = rg; |
146 #else | 133 #else |
147 argb[0] = rg; | 134 argb[0] = rg; |
148 argb[1] = ba; | 135 argb[1] = ba; |
149 #endif | 136 #endif |
150 } | 137 } |
151 | 138 |
152 #else | |
153 | |
154 // Table-based version, not totally equivalent to the SSE2 version. | |
155 // Rounding diff is only +/-1 though. | |
156 | |
157 extern int16_t VP8kVToR[256], VP8kUToB[256]; | |
158 extern int32_t VP8kVToG[256], VP8kUToG[256]; | |
159 extern uint8_t VP8kClip[YUV_RANGE_MAX - YUV_RANGE_MIN]; | |
160 extern uint8_t VP8kClip4Bits[YUV_RANGE_MAX - YUV_RANGE_MIN]; | |
161 | |
162 static WEBP_INLINE void VP8YuvToRgb(int y, int u, int v, | |
163 uint8_t* const rgb) { | |
164 const int r_off = VP8kVToR[v]; | |
165 const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX; | |
166 const int b_off = VP8kUToB[u]; | |
167 rgb[0] = VP8kClip[y + r_off - YUV_RANGE_MIN]; | |
168 rgb[1] = VP8kClip[y + g_off - YUV_RANGE_MIN]; | |
169 rgb[2] = VP8kClip[y + b_off - YUV_RANGE_MIN]; | |
170 } | |
171 | |
172 static WEBP_INLINE void VP8YuvToBgr(int y, int u, int v, | |
173 uint8_t* const bgr) { | |
174 const int r_off = VP8kVToR[v]; | |
175 const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX; | |
176 const int b_off = VP8kUToB[u]; | |
177 bgr[0] = VP8kClip[y + b_off - YUV_RANGE_MIN]; | |
178 bgr[1] = VP8kClip[y + g_off - YUV_RANGE_MIN]; | |
179 bgr[2] = VP8kClip[y + r_off - YUV_RANGE_MIN]; | |
180 } | |
181 | |
182 static WEBP_INLINE void VP8YuvToRgb565(int y, int u, int v, | |
183 uint8_t* const rgb) { | |
184 const int r_off = VP8kVToR[v]; | |
185 const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX; | |
186 const int b_off = VP8kUToB[u]; | |
187 const int rg = ((VP8kClip[y + r_off - YUV_RANGE_MIN] & 0xf8) | | |
188 (VP8kClip[y + g_off - YUV_RANGE_MIN] >> 5)); | |
189 const int gb = (((VP8kClip[y + g_off - YUV_RANGE_MIN] << 3) & 0xe0) | | |
190 (VP8kClip[y + b_off - YUV_RANGE_MIN] >> 3)); | |
191 #ifdef WEBP_SWAP_16BIT_CSP | |
192 rgb[0] = gb; | |
193 rgb[1] = rg; | |
194 #else | |
195 rgb[0] = rg; | |
196 rgb[1] = gb; | |
197 #endif | |
198 } | |
199 | |
200 static WEBP_INLINE void VP8YuvToRgba4444(int y, int u, int v, | |
201 uint8_t* const argb) { | |
202 const int r_off = VP8kVToR[v]; | |
203 const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX; | |
204 const int b_off = VP8kUToB[u]; | |
205 const int rg = ((VP8kClip4Bits[y + r_off - YUV_RANGE_MIN] << 4) | | |
206 VP8kClip4Bits[y + g_off - YUV_RANGE_MIN]); | |
207 const int ba = (VP8kClip4Bits[y + b_off - YUV_RANGE_MIN] << 4) | 0x0f; | |
208 #ifdef WEBP_SWAP_16BIT_CSP | |
209 argb[0] = ba; | |
210 argb[1] = rg; | |
211 #else | |
212 argb[0] = rg; | |
213 argb[1] = ba; | |
214 #endif | |
215 } | |
216 | |
217 #endif // WEBP_YUV_USE_TABLE | |
218 | |
219 //----------------------------------------------------------------------------- | 139 //----------------------------------------------------------------------------- |
220 // Alpha handling variants | 140 // Alpha handling variants |
221 | 141 |
222 static WEBP_INLINE void VP8YuvToArgb(uint8_t y, uint8_t u, uint8_t v, | 142 static WEBP_INLINE void VP8YuvToArgb(uint8_t y, uint8_t u, uint8_t v, |
223 uint8_t* const argb) { | 143 uint8_t* const argb) { |
224 argb[0] = 0xff; | 144 argb[0] = 0xff; |
225 VP8YuvToRgb(y, u, v, argb + 1); | 145 VP8YuvToRgb(y, u, v, argb + 1); |
226 } | 146 } |
227 | 147 |
228 static WEBP_INLINE void VP8YuvToBgra(uint8_t y, uint8_t u, uint8_t v, | 148 static WEBP_INLINE void VP8YuvToBgra(uint8_t y, uint8_t u, uint8_t v, |
229 uint8_t* const bgra) { | 149 uint8_t* const bgra) { |
230 VP8YuvToBgr(y, u, v, bgra); | 150 VP8YuvToBgr(y, u, v, bgra); |
231 bgra[3] = 0xff; | 151 bgra[3] = 0xff; |
232 } | 152 } |
233 | 153 |
234 static WEBP_INLINE void VP8YuvToRgba(uint8_t y, uint8_t u, uint8_t v, | 154 static WEBP_INLINE void VP8YuvToRgba(uint8_t y, uint8_t u, uint8_t v, |
235 uint8_t* const rgba) { | 155 uint8_t* const rgba) { |
236 VP8YuvToRgb(y, u, v, rgba); | 156 VP8YuvToRgb(y, u, v, rgba); |
237 rgba[3] = 0xff; | 157 rgba[3] = 0xff; |
238 } | 158 } |
239 | 159 |
240 // Must be called before everything, to initialize the tables. | 160 // Must be called before everything, to initialize the tables. |
241 void VP8YUVInit(void); | 161 void VP8YUVInit(void); |
242 | 162 |
243 //----------------------------------------------------------------------------- | 163 //----------------------------------------------------------------------------- |
244 // SSE2 extra functions (mostly for upsampling_sse2.c) | 164 // SSE2 extra functions (mostly for upsampling_sse2.c) |
245 | 165 |
246 #if defined(WEBP_USE_SSE2) | 166 #if defined(WEBP_USE_SSE2) |
247 | 167 |
248 // When the following is defined, tables are initialized statically, adding ~12k | 168 // Process 32 pixels and store the result (16b, 24b or 32b per pixel) in *dst. |
249 // to the binary size. Otherwise, they are initialized at run-time (small cost). | |
250 #define WEBP_YUV_USE_SSE2_TABLES | |
251 | |
252 #if defined(FANCY_UPSAMPLING) | |
253 // Process 32 pixels and store the result (24b or 32b per pixel) in *dst. | |
254 void VP8YuvToRgba32(const uint8_t* y, const uint8_t* u, const uint8_t* v, | 169 void VP8YuvToRgba32(const uint8_t* y, const uint8_t* u, const uint8_t* v, |
255 uint8_t* dst); | 170 uint8_t* dst); |
256 void VP8YuvToRgb32(const uint8_t* y, const uint8_t* u, const uint8_t* v, | 171 void VP8YuvToRgb32(const uint8_t* y, const uint8_t* u, const uint8_t* v, |
257 uint8_t* dst); | 172 uint8_t* dst); |
258 void VP8YuvToBgra32(const uint8_t* y, const uint8_t* u, const uint8_t* v, | 173 void VP8YuvToBgra32(const uint8_t* y, const uint8_t* u, const uint8_t* v, |
259 uint8_t* dst); | 174 uint8_t* dst); |
260 void VP8YuvToBgr32(const uint8_t* y, const uint8_t* u, const uint8_t* v, | 175 void VP8YuvToBgr32(const uint8_t* y, const uint8_t* u, const uint8_t* v, |
261 uint8_t* dst); | 176 uint8_t* dst); |
262 #endif // FANCY_UPSAMPLING | 177 void VP8YuvToArgb32(const uint8_t* y, const uint8_t* u, const uint8_t* v, |
263 | 178 uint8_t* dst); |
264 // Must be called to initialize tables before using the functions. | 179 void VP8YuvToRgba444432(const uint8_t* y, const uint8_t* u, const uint8_t* v, |
265 void VP8YUVInitSSE2(void); | 180 uint8_t* dst); |
| 181 void VP8YuvToRgb56532(const uint8_t* y, const uint8_t* u, const uint8_t* v, |
| 182 uint8_t* dst); |
266 | 183 |
267 #endif // WEBP_USE_SSE2 | 184 #endif // WEBP_USE_SSE2 |
268 | 185 |
269 //------------------------------------------------------------------------------ | 186 //------------------------------------------------------------------------------ |
270 // RGB -> YUV conversion | 187 // RGB -> YUV conversion |
271 | 188 |
272 // Stub functions that can be called with various rounding values: | 189 // Stub functions that can be called with various rounding values: |
273 static WEBP_INLINE int VP8ClipUV(int uv, int rounding) { | 190 static WEBP_INLINE int VP8ClipUV(int uv, int rounding) { |
274 uv = (uv + rounding + (128 << (YUV_FIX + 2))) >> (YUV_FIX + 2); | 191 uv = (uv + rounding + (128 << (YUV_FIX + 2))) >> (YUV_FIX + 2); |
275 return ((uv & ~0xff) == 0) ? uv : (uv < 0) ? 0 : 255; | 192 return ((uv & ~0xff) == 0) ? uv : (uv < 0) ? 0 : 255; |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
312 return VP8ClipUV(v, rounding); | 229 return VP8ClipUV(v, rounding); |
313 } | 230 } |
314 | 231 |
315 #endif // USE_YUVj | 232 #endif // USE_YUVj |
316 | 233 |
317 #ifdef __cplusplus | 234 #ifdef __cplusplus |
318 } // extern "C" | 235 } // extern "C" |
319 #endif | 236 #endif |
320 | 237 |
321 #endif /* WEBP_DSP_YUV_H_ */ | 238 #endif /* WEBP_DSP_YUV_H_ */ |
OLD | NEW |