OLD | NEW |
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // This webpage shows layout of YV12 and other YUV formats | 5 // This webpage shows layout of YV12 and other YUV formats |
6 // http://www.fourcc.org/yuv.php | 6 // http://www.fourcc.org/yuv.php |
7 // The actual conversion is best described here | 7 // The actual conversion is best described here |
8 // http://en.wikipedia.org/wiki/YUV | 8 // http://en.wikipedia.org/wiki/YUV |
9 // excerpt from wiki: | |
10 // These formulae are based on the NTSC standard; | |
11 // Y' = 0.299 x R + 0.587 x G + 0.114 x B | |
12 // U = -0.147 x R - 0.289 x G + 0.436 x B | |
13 // V = 0.615 x R - 0.515 x G - 0.100 x B | |
14 // On older, non-SIMD architectures, floating point arithmetic is much | |
15 // slower than using fixed-point arithmetic, so an alternative formulation | |
16 // is: | |
17 // c = Y' - 16 | |
18 // d = U - 128 | |
19 // e = V - 128 | |
20 // Using the previous coefficients and noting that clip() denotes clipping a | |
21 // value to the range of 0 to 255, the following formulae provide the | |
22 // conversion from Y'UV to RGB (NTSC version): | |
23 // R = clip((298 x c + 409 x e + 128) >> 8) | |
24 // G = clip((298 x c - 100 x d - 208 x e + 128) >> 8) | |
25 // B = clip((298 x c + 516 x d + 128) >> 8) | |
26 // | |
27 // An article on optimizing YUV conversion using tables instead of multiplies | 9 // An article on optimizing YUV conversion using tables instead of multiplies |
28 // http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf | 10 // http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf |
29 // | 11 // |
30 // YV12 is a full plane of Y and a half height, half width chroma planes | 12 // YV12 is a full plane of Y and a half height, half width chroma planes |
31 // YV16 is a full plane of Y and a full height, half width chroma planes | 13 // YV16 is a full plane of Y and a full height, half width chroma planes |
32 // | 14 // |
33 // Implimentation notes | 15 // ARGB pixel format is output, which on little endian is stored as BGRA. |
34 // This version uses MMX for Visual C and GCC, which should cover all | 16 // The alpha is set to 255, allowing the application to use RGBA or RGB32. |
35 // current platforms. C++ is included for reference and future platforms. | |
36 // | |
37 // ARGB pixel format is output, which on little endian is stored as BGRA. | |
38 // The alpha is filled in, allowing the application to use RGBA or RGB32. | |
39 // | |
40 // The Visual C assembler is considered the source. | |
41 // The GCC asm was created by compiling with Visual C and disassembling | |
42 // with GNU objdump. | |
43 // cl /c /Ox yuv_convert.cc | |
44 // objdump -d yuv_convert.o | |
45 // The code almost copy/pasted in, except the table lookups, which produced | |
46 // movq 0x800(,%eax,8),%mm0 | |
47 // and needed to be changed to cdecl style table names | |
48 // "movq _coefficients_RGB_U(,%eax,8),%mm0\n" | |
49 // extern "C" was used to avoid name mangling. | |
50 // | |
51 // Once compiled with both MinGW GCC and Visual C on PC, performance should | |
52 // be identical. A small difference will occur in the C++ calling code, | |
53 // depending on the frame size. | |
54 // To confirm the same code is being generated | |
55 // g++ -O3 -c yuv_convert.cc | |
56 // dumpbin -disasm yuv_convert.o >gcc.txt | |
57 // cl /Ox /c yuv_convert.cc | |
58 // dumpbin -disasm yuv_convert.obj >vc.txt | |
59 // and compare the files. | |
60 // | |
61 // The GCC function label is inside the assembler to avoid a stack frame | |
62 // push ebp, that may vary depending on compile options. | |
63 | 17 |
64 #include "media/base/yuv_convert.h" | 18 #include "media/base/yuv_convert.h" |
65 | 19 |
66 #ifdef _OPENMP | |
67 #include <omp.h> | |
68 #endif | |
69 | |
70 #ifdef _DEBUG | |
71 #include "base/logging.h" | |
72 #else | |
73 #define DCHECK(a) | |
74 #endif | |
75 | |
76 // Header for low level row functions. | 20 // Header for low level row functions. |
77 #include "media/base/yuv_row.h" | 21 #include "media/base/yuv_row.h" |
78 | 22 |
79 namespace media { | 23 namespace media { |
| 24 |
80 // Convert a frame of YUV to 32 bit ARGB. | 25 // Convert a frame of YUV to 32 bit ARGB. |
81 void ConvertYUVToRGB32(const uint8* y_buf, | 26 void ConvertYUVToRGB32(const uint8* y_buf, |
82 const uint8* u_buf, | 27 const uint8* u_buf, |
83 const uint8* v_buf, | 28 const uint8* v_buf, |
84 uint8* rgb_buf, | 29 uint8* rgb_buf, |
85 int width, | 30 int width, |
86 int height, | 31 int height, |
87 int y_pitch, | 32 int y_pitch, |
88 int uv_pitch, | 33 int uv_pitch, |
89 int rgb_pitch, | 34 int rgb_pitch, |
90 YUVType yuv_type) { | 35 YUVType yuv_type) { |
91 unsigned int y_shift = yuv_type; | 36 unsigned int y_shift = yuv_type; |
92 #ifdef _OPENMP | |
93 #pragma omp parallel for | |
94 #endif | |
95 for (int y = 0; y < height; ++y) { | 37 for (int y = 0; y < height; ++y) { |
96 uint8* rgb_row = rgb_buf + y * rgb_pitch; | 38 uint8* rgb_row = rgb_buf + y * rgb_pitch; |
97 const uint8* y_ptr = y_buf + y * y_pitch; | 39 const uint8* y_ptr = y_buf + y * y_pitch; |
98 const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch; | 40 const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch; |
99 const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch; | 41 const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch; |
100 | 42 |
101 FastConvertYUVToRGB32Row(y_ptr, | 43 FastConvertYUVToRGB32Row(y_ptr, |
102 u_ptr, | 44 u_ptr, |
103 v_ptr, | 45 v_ptr, |
104 rgb_row, | 46 rgb_row, |
105 width); | 47 width); |
106 } | 48 } |
| 49 |
| 50 // MMX used for FastConvertYUVToRGB32Row requires emms instruction. |
107 EMMS(); | 51 EMMS(); |
108 } | 52 } |
109 | 53 |
110 // Scale a frame of YUV to 32 bit ARGB. | 54 // Scale a frame of YUV to 32 bit ARGB. |
111 void ScaleYUVToRGB32(const uint8* y_buf, | 55 void ScaleYUVToRGB32(const uint8* y_buf, |
112 const uint8* u_buf, | 56 const uint8* u_buf, |
113 const uint8* v_buf, | 57 const uint8* v_buf, |
114 uint8* rgb_buf, | 58 uint8* rgb_buf, |
115 int width, | 59 int width, |
116 int height, | 60 int height, |
117 int scaled_width, | 61 int scaled_width, |
118 int scaled_height, | 62 int scaled_height, |
119 int y_pitch, | 63 int y_pitch, |
120 int uv_pitch, | 64 int uv_pitch, |
121 int rgb_pitch, | 65 int rgb_pitch, |
122 YUVType yuv_type, | 66 YUVType yuv_type, |
123 Rotate view_rotate) { | 67 Rotate view_rotate) { |
124 unsigned int y_shift = yuv_type; | 68 unsigned int y_shift = yuv_type; |
125 // Diagram showing origin and direction of source sampling. | 69 // Diagram showing origin and direction of source sampling. |
126 // ->0 4<- | 70 // ->0 4<- |
127 // 7 3 | 71 // 7 3 |
128 // | 72 // |
129 // 6 5 | 73 // 6 5 |
130 // ->1 2<- | 74 // ->1 2<- |
131 // Rotations that start at right side of image. | 75 // Rotations that start at right side of image. |
132 if ((view_rotate == ROTATE_180) || | 76 if ((view_rotate == ROTATE_180) || |
133 (view_rotate == ROTATE_270) || | 77 (view_rotate == ROTATE_270) || |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
173 if (view_rotate == ROTATE_90) { | 117 if (view_rotate == ROTATE_90) { |
174 y_pitch = -1; | 118 y_pitch = -1; |
175 uv_pitch = -1; | 119 uv_pitch = -1; |
176 height = -height; | 120 height = -height; |
177 } else { | 121 } else { |
178 y_pitch = 1; | 122 y_pitch = 1; |
179 uv_pitch = 1; | 123 uv_pitch = 1; |
180 } | 124 } |
181 } | 125 } |
182 | 126 |
183 #ifdef _OPENMP | |
184 #pragma omp parallel for | |
185 #endif | |
186 for (int y = 0; y < scaled_height; ++y) { | 127 for (int y = 0; y < scaled_height; ++y) { |
187 uint8* dest_pixel = rgb_buf + y * rgb_pitch; | 128 uint8* dest_pixel = rgb_buf + y * rgb_pitch; |
188 int scaled_y = (y * height / scaled_height); | 129 int scaled_y = (y * height / scaled_height); |
189 const uint8* y_ptr = y_buf + scaled_y * y_pitch; | 130 const uint8* y_ptr = y_buf + scaled_y * y_pitch; |
190 const uint8* u_ptr = u_buf + (scaled_y >> y_shift) * uv_pitch; | 131 const uint8* u_ptr = u_buf + (scaled_y >> y_shift) * uv_pitch; |
191 const uint8* v_ptr = v_buf + (scaled_y >> y_shift) * uv_pitch; | 132 const uint8* v_ptr = v_buf + (scaled_y >> y_shift) * uv_pitch; |
192 | 133 |
193 #if USE_MMX | 134 #if USE_MMX |
194 if (scaled_width == (width * 2)) { | 135 if (scaled_width == (width * 2)) { |
195 DoubleYUVToRGB32Row(y_ptr, u_ptr, v_ptr, | 136 DoubleYUVToRGB32Row(y_ptr, u_ptr, v_ptr, |
(...skipping 15 matching lines...) Expand all Loading... |
211 #else | 152 #else |
212 if (scaled_dx == 16) { // Not scaled | 153 if (scaled_dx == 16) { // Not scaled |
213 FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr, | 154 FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr, |
214 dest_pixel, scaled_width); | 155 dest_pixel, scaled_width); |
215 #endif | 156 #endif |
216 } else { | 157 } else { |
217 ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr, | 158 ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr, |
218 dest_pixel, scaled_width, scaled_dx); | 159 dest_pixel, scaled_width, scaled_dx); |
219 } | 160 } |
220 } | 161 } |
| 162 |
| 163 // MMX used for FastConvertYUVToRGB32Row requires emms instruction. |
221 EMMS(); | 164 EMMS(); |
222 } | 165 } |
223 | 166 |
224 } // namespace media | 167 } // namespace media |
OLD | NEW |