OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // This webpage shows layout of YV12 and other YUV formats | 5 // This webpage shows layout of YV12 and other YUV formats |
6 // http://www.fourcc.org/yuv.php | 6 // http://www.fourcc.org/yuv.php |
7 // The actual conversion is best described here | 7 // The actual conversion is best described here |
8 // http://en.wikipedia.org/wiki/YUV | 8 // http://en.wikipedia.org/wiki/YUV |
9 // An article on optimizing YUV conversion using tables instead of multiplies | 9 // An article on optimizing YUV conversion using tables instead of multiplies |
10 // http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf | 10 // http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf |
11 // | 11 // |
12 // YV12 is a full plane of Y and a half height, half width chroma planes | 12 // YV12 is a full plane of Y and a half height, half width chroma planes |
13 // YV16 is a full plane of Y and a full height, half width chroma planes | 13 // YV16 is a full plane of Y and a full height, half width chroma planes |
14 // | 14 // |
15 // ARGB pixel format is output, which on little endian is stored as BGRA. | 15 // ARGB pixel format is output, which on little endian is stored as BGRA. |
16 // The alpha is set to 255, allowing the application to use RGBA or RGB32. | 16 // The alpha is set to 255, allowing the application to use RGBA or RGB32. |
17 | 17 |
18 #include "media/base/yuv_convert.h" | 18 #include "media/base/yuv_convert.h" |
19 | 19 |
| 20 #include <stddef.h> |
| 21 |
20 #include <algorithm> | 22 #include <algorithm> |
21 | 23 |
22 #include "base/cpu.h" | 24 #include "base/cpu.h" |
23 #include "base/lazy_instance.h" | 25 #include "base/lazy_instance.h" |
24 #include "base/logging.h" | 26 #include "base/logging.h" |
25 #include "base/macros.h" | 27 #include "base/macros.h" |
26 #include "base/memory/aligned_memory.h" | 28 #include "base/memory/aligned_memory.h" |
27 #include "base/memory/scoped_ptr.h" | 29 #include "base/memory/scoped_ptr.h" |
28 #include "base/third_party/dynamic_annotations/dynamic_annotations.h" | 30 #include "base/third_party/dynamic_annotations/dynamic_annotations.h" |
29 #include "build/build_config.h" | 31 #include "build/build_config.h" |
30 #include "media/base/simd/convert_rgb_to_yuv.h" | 32 #include "media/base/simd/convert_rgb_to_yuv.h" |
31 #include "media/base/simd/convert_yuv_to_rgb.h" | 33 #include "media/base/simd/convert_yuv_to_rgb.h" |
32 #include "media/base/simd/filter_yuv.h" | 34 #include "media/base/simd/filter_yuv.h" |
33 | 35 |
34 #if defined(ARCH_CPU_X86_FAMILY) | 36 #if defined(ARCH_CPU_X86_FAMILY) |
35 #if defined(COMPILER_MSVC) | 37 #if defined(COMPILER_MSVC) |
36 #include <intrin.h> | 38 #include <intrin.h> |
37 #else | 39 #else |
38 #include <mmintrin.h> | 40 #include <mmintrin.h> |
39 #endif | 41 #endif |
40 #endif | 42 #endif |
41 | 43 |
42 // Assembly functions are declared without namespace. | 44 // Assembly functions are declared without namespace. |
43 extern "C" { void EmptyRegisterState_MMX(); } // extern "C" | 45 extern "C" { void EmptyRegisterState_MMX(); } // extern "C" |
44 | 46 |
45 namespace media { | 47 namespace media { |
46 | 48 |
47 typedef void (*FilterYUVRowsProc)(uint8*, | 49 typedef void ( |
48 const uint8*, | 50 *FilterYUVRowsProc)(uint8_t*, const uint8_t*, const uint8_t*, int, uint8_t); |
49 const uint8*, | |
50 int, | |
51 uint8); | |
52 | 51 |
53 typedef void (*ConvertRGBToYUVProc)(const uint8*, | 52 typedef void (*ConvertRGBToYUVProc)(const uint8_t*, |
54 uint8*, | 53 uint8_t*, |
55 uint8*, | 54 uint8_t*, |
56 uint8*, | 55 uint8_t*, |
57 int, | 56 int, |
58 int, | 57 int, |
59 int, | 58 int, |
60 int, | 59 int, |
61 int); | 60 int); |
62 | 61 |
63 typedef void (*ConvertYUVToRGB32Proc)(const uint8*, | 62 typedef void (*ConvertYUVToRGB32Proc)(const uint8_t*, |
64 const uint8*, | 63 const uint8_t*, |
65 const uint8*, | 64 const uint8_t*, |
66 uint8*, | 65 uint8_t*, |
67 int, | 66 int, |
68 int, | 67 int, |
69 int, | 68 int, |
70 int, | 69 int, |
71 int, | 70 int, |
72 YUVType); | 71 YUVType); |
73 | 72 |
74 typedef void (*ConvertYUVAToARGBProc)(const uint8*, | 73 typedef void (*ConvertYUVAToARGBProc)(const uint8_t*, |
75 const uint8*, | 74 const uint8_t*, |
76 const uint8*, | 75 const uint8_t*, |
77 const uint8*, | 76 const uint8_t*, |
78 uint8*, | 77 uint8_t*, |
79 int, | 78 int, |
80 int, | 79 int, |
81 int, | 80 int, |
82 int, | 81 int, |
83 int, | 82 int, |
84 int, | 83 int, |
85 YUVType); | 84 YUVType); |
86 | 85 |
87 typedef void (*ConvertYUVToRGB32RowProc)(const uint8*, | 86 typedef void (*ConvertYUVToRGB32RowProc)(const uint8_t*, |
88 const uint8*, | 87 const uint8_t*, |
89 const uint8*, | 88 const uint8_t*, |
90 uint8*, | 89 uint8_t*, |
91 ptrdiff_t, | 90 ptrdiff_t, |
92 const int16*); | 91 const int16_t*); |
93 | 92 |
94 typedef void (*ConvertYUVAToARGBRowProc)(const uint8*, | 93 typedef void (*ConvertYUVAToARGBRowProc)(const uint8_t*, |
95 const uint8*, | 94 const uint8_t*, |
96 const uint8*, | 95 const uint8_t*, |
97 const uint8*, | 96 const uint8_t*, |
98 uint8*, | 97 uint8_t*, |
99 ptrdiff_t, | 98 ptrdiff_t, |
100 const int16*); | 99 const int16_t*); |
101 | 100 |
102 typedef void (*ScaleYUVToRGB32RowProc)(const uint8*, | 101 typedef void (*ScaleYUVToRGB32RowProc)(const uint8_t*, |
103 const uint8*, | 102 const uint8_t*, |
104 const uint8*, | 103 const uint8_t*, |
105 uint8*, | 104 uint8_t*, |
106 ptrdiff_t, | 105 ptrdiff_t, |
107 ptrdiff_t, | 106 ptrdiff_t, |
108 const int16*); | 107 const int16_t*); |
109 | 108 |
110 static FilterYUVRowsProc g_filter_yuv_rows_proc_ = NULL; | 109 static FilterYUVRowsProc g_filter_yuv_rows_proc_ = NULL; |
111 static ConvertYUVToRGB32RowProc g_convert_yuv_to_rgb32_row_proc_ = NULL; | 110 static ConvertYUVToRGB32RowProc g_convert_yuv_to_rgb32_row_proc_ = NULL; |
112 static ScaleYUVToRGB32RowProc g_scale_yuv_to_rgb32_row_proc_ = NULL; | 111 static ScaleYUVToRGB32RowProc g_scale_yuv_to_rgb32_row_proc_ = NULL; |
113 static ScaleYUVToRGB32RowProc g_linear_scale_yuv_to_rgb32_row_proc_ = NULL; | 112 static ScaleYUVToRGB32RowProc g_linear_scale_yuv_to_rgb32_row_proc_ = NULL; |
114 static ConvertRGBToYUVProc g_convert_rgb32_to_yuv_proc_ = NULL; | 113 static ConvertRGBToYUVProc g_convert_rgb32_to_yuv_proc_ = NULL; |
115 static ConvertRGBToYUVProc g_convert_rgb24_to_yuv_proc_ = NULL; | 114 static ConvertRGBToYUVProc g_convert_rgb24_to_yuv_proc_ = NULL; |
116 static ConvertYUVToRGB32Proc g_convert_yuv_to_rgb32_proc_ = NULL; | 115 static ConvertYUVToRGB32Proc g_convert_yuv_to_rgb32_proc_ = NULL; |
117 static ConvertYUVAToARGBProc g_convert_yuva_to_argb_proc_ = NULL; | 116 static ConvertYUVAToARGBProc g_convert_yuva_to_argb_proc_ = NULL; |
118 | 117 |
119 static const int kYUVToRGBTableSize = 256 * 4 * 4 * sizeof(int16); | 118 static const int kYUVToRGBTableSize = 256 * 4 * 4 * sizeof(int16_t); |
120 | 119 |
121 // base::AlignedMemory has a private operator new(), so wrap it in a struct so | 120 // base::AlignedMemory has a private operator new(), so wrap it in a struct so |
122 // that we can put it in a LazyInstance::Leaky. | 121 // that we can put it in a LazyInstance::Leaky. |
123 struct YUVToRGBTableWrapper { | 122 struct YUVToRGBTableWrapper { |
124 base::AlignedMemory<kYUVToRGBTableSize, 16> table; | 123 base::AlignedMemory<kYUVToRGBTableSize, 16> table; |
125 }; | 124 }; |
126 | 125 |
127 typedef base::LazyInstance<YUVToRGBTableWrapper>::Leaky | 126 typedef base::LazyInstance<YUVToRGBTableWrapper>::Leaky |
128 YUVToRGBTable; | 127 YUVToRGBTable; |
129 static YUVToRGBTable g_table_rec601 = LAZY_INSTANCE_INITIALIZER; | 128 static YUVToRGBTable g_table_rec601 = LAZY_INSTANCE_INITIALIZER; |
130 static YUVToRGBTable g_table_jpeg = LAZY_INSTANCE_INITIALIZER; | 129 static YUVToRGBTable g_table_jpeg = LAZY_INSTANCE_INITIALIZER; |
131 static YUVToRGBTable g_table_rec709 = LAZY_INSTANCE_INITIALIZER; | 130 static YUVToRGBTable g_table_rec709 = LAZY_INSTANCE_INITIALIZER; |
132 static const int16* g_table_rec601_ptr = NULL; | 131 static const int16_t* g_table_rec601_ptr = NULL; |
133 static const int16* g_table_jpeg_ptr = NULL; | 132 static const int16_t* g_table_jpeg_ptr = NULL; |
134 static const int16* g_table_rec709_ptr = NULL; | 133 static const int16_t* g_table_rec709_ptr = NULL; |
135 | 134 |
136 // Empty SIMD registers state after using them. | 135 // Empty SIMD registers state after using them. |
137 void EmptyRegisterStateStub() {} | 136 void EmptyRegisterStateStub() {} |
138 #if defined(MEDIA_MMX_INTRINSICS_AVAILABLE) | 137 #if defined(MEDIA_MMX_INTRINSICS_AVAILABLE) |
139 void EmptyRegisterStateIntrinsic() { _mm_empty(); } | 138 void EmptyRegisterStateIntrinsic() { _mm_empty(); } |
140 #endif | 139 #endif |
141 typedef void (*EmptyRegisterStateProc)(); | 140 typedef void (*EmptyRegisterStateProc)(); |
142 static EmptyRegisterStateProc g_empty_register_state_proc_ = NULL; | 141 static EmptyRegisterStateProc g_empty_register_state_proc_ = NULL; |
143 | 142 |
144 // Get the appropriate value to bitshift by for vertical indices. | 143 // Get the appropriate value to bitshift by for vertical indices. |
145 int GetVerticalShift(YUVType type) { | 144 int GetVerticalShift(YUVType type) { |
146 switch (type) { | 145 switch (type) { |
147 case YV16: | 146 case YV16: |
148 return 0; | 147 return 0; |
149 case YV12: | 148 case YV12: |
150 case YV12J: | 149 case YV12J: |
151 case YV12HD: | 150 case YV12HD: |
152 return 1; | 151 return 1; |
153 } | 152 } |
154 NOTREACHED(); | 153 NOTREACHED(); |
155 return 0; | 154 return 0; |
156 } | 155 } |
157 | 156 |
158 const int16* GetLookupTable(YUVType type) { | 157 const int16_t* GetLookupTable(YUVType type) { |
159 switch (type) { | 158 switch (type) { |
160 case YV12: | 159 case YV12: |
161 case YV16: | 160 case YV16: |
162 return g_table_rec601_ptr; | 161 return g_table_rec601_ptr; |
163 case YV12J: | 162 case YV12J: |
164 return g_table_jpeg_ptr; | 163 return g_table_jpeg_ptr; |
165 case YV12HD: | 164 case YV12HD: |
166 return g_table_rec709_ptr; | 165 return g_table_rec709_ptr; |
167 } | 166 } |
168 NOTREACHED(); | 167 NOTREACHED(); |
169 return NULL; | 168 return NULL; |
170 } | 169 } |
171 | 170 |
172 // Populates a pre-allocated lookup table from a YUV->RGB matrix. | 171 // Populates a pre-allocated lookup table from a YUV->RGB matrix. |
173 const int16* PopulateYUVToRGBTable(const double matrix[3][3], | 172 const int16_t* PopulateYUVToRGBTable(const double matrix[3][3], |
174 bool full_range, | 173 bool full_range, |
175 int16* table) { | 174 int16_t* table) { |
176 // We'll have 4 sub-tables that lie contiguous in memory, one for each of Y, | 175 // We'll have 4 sub-tables that lie contiguous in memory, one for each of Y, |
177 // U, V and A. | 176 // U, V and A. |
178 const int kNumTables = 4; | 177 const int kNumTables = 4; |
179 // Each table has 256 rows (for all possible 8-bit values). | 178 // Each table has 256 rows (for all possible 8-bit values). |
180 const int kNumRows = 256; | 179 const int kNumRows = 256; |
181 // Each row has 4 columns, for contributions to each of R, G, B and A. | 180 // Each row has 4 columns, for contributions to each of R, G, B and A. |
182 const int kNumColumns = 4; | 181 const int kNumColumns = 4; |
183 // Each element is a fixed-point (10.6) 16-bit signed value. | 182 // Each element is a fixed-point (10.6) 16-bit signed value. |
184 const int kElementSize = sizeof(int16); | 183 const int kElementSize = sizeof(int16_t); |
185 | 184 |
186 // Sanity check that our constants here match the size of the statically | 185 // Sanity check that our constants here match the size of the statically |
187 // allocated tables. | 186 // allocated tables. |
188 static_assert( | 187 static_assert( |
189 kNumTables * kNumRows * kNumColumns * kElementSize == kYUVToRGBTableSize, | 188 kNumTables * kNumRows * kNumColumns * kElementSize == kYUVToRGBTableSize, |
190 "YUV lookup table size doesn't match expectation."); | 189 "YUV lookup table size doesn't match expectation."); |
191 | 190 |
192 // Y needs an offset of -16 for color ranges that ignore the lower 16 values, | 191 // Y needs an offset of -16 for color ranges that ignore the lower 16 values, |
193 // U and V get -128 to put them in [-128, 127] from [0, 255]. | 192 // U and V get -128 to put them in [-128, 127] from [0, 255]. |
194 int offsets[3] = {(full_range ? 0 : -16), -128, -128}; | 193 int offsets[3] = {(full_range ? 0 : -16), -128, -128}; |
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
297 {1.0, 1.0, 1.0}, {0.0, -0.34414, 1.772}, {1.402, -0.71414, 0.0}, | 296 {1.0, 1.0, 1.0}, {0.0, -0.34414, 1.772}, {1.402, -0.71414, 0.0}, |
298 }; | 297 }; |
299 | 298 |
300 // Rec709 "HD" color space, values from: | 299 // Rec709 "HD" color space, values from: |
301 // http://www.equasys.de/colorconversion.html | 300 // http://www.equasys.de/colorconversion.html |
302 const double kRec709ConvertMatrix[3][3] = { | 301 const double kRec709ConvertMatrix[3][3] = { |
303 {1.164, 1.164, 1.164}, {0.0, -0.213, 2.112}, {1.793, -0.533, 0.0}, | 302 {1.164, 1.164, 1.164}, {0.0, -0.213, 2.112}, {1.793, -0.533, 0.0}, |
304 }; | 303 }; |
305 | 304 |
306 PopulateYUVToRGBTable(kRec601ConvertMatrix, false, | 305 PopulateYUVToRGBTable(kRec601ConvertMatrix, false, |
307 g_table_rec601.Get().table.data_as<int16>()); | 306 g_table_rec601.Get().table.data_as<int16_t>()); |
308 PopulateYUVToRGBTable(kJPEGConvertMatrix, true, | 307 PopulateYUVToRGBTable(kJPEGConvertMatrix, true, |
309 g_table_jpeg.Get().table.data_as<int16>()); | 308 g_table_jpeg.Get().table.data_as<int16_t>()); |
310 PopulateYUVToRGBTable(kRec709ConvertMatrix, false, | 309 PopulateYUVToRGBTable(kRec709ConvertMatrix, false, |
311 g_table_rec709.Get().table.data_as<int16>()); | 310 g_table_rec709.Get().table.data_as<int16_t>()); |
312 g_table_rec601_ptr = g_table_rec601.Get().table.data_as<int16>(); | 311 g_table_rec601_ptr = g_table_rec601.Get().table.data_as<int16_t>(); |
313 g_table_rec709_ptr = g_table_rec709.Get().table.data_as<int16>(); | 312 g_table_rec709_ptr = g_table_rec709.Get().table.data_as<int16_t>(); |
314 g_table_jpeg_ptr = g_table_jpeg.Get().table.data_as<int16>(); | 313 g_table_jpeg_ptr = g_table_jpeg.Get().table.data_as<int16_t>(); |
315 } | 314 } |
316 | 315 |
317 // Empty SIMD registers state after using them. | 316 // Empty SIMD registers state after using them. |
318 void EmptyRegisterState() { g_empty_register_state_proc_(); } | 317 void EmptyRegisterState() { g_empty_register_state_proc_(); } |
319 | 318 |
320 // 16.16 fixed point arithmetic | 319 // 16.16 fixed point arithmetic |
321 const int kFractionBits = 16; | 320 const int kFractionBits = 16; |
322 const int kFractionMax = 1 << kFractionBits; | 321 const int kFractionMax = 1 << kFractionBits; |
323 const int kFractionMask = ((1 << kFractionBits) - 1); | 322 const int kFractionMask = ((1 << kFractionBits) - 1); |
324 | 323 |
325 // Scale a frame of YUV to 32 bit ARGB. | 324 // Scale a frame of YUV to 32 bit ARGB. |
326 void ScaleYUVToRGB32(const uint8* y_buf, | 325 void ScaleYUVToRGB32(const uint8_t* y_buf, |
327 const uint8* u_buf, | 326 const uint8_t* u_buf, |
328 const uint8* v_buf, | 327 const uint8_t* v_buf, |
329 uint8* rgb_buf, | 328 uint8_t* rgb_buf, |
330 int source_width, | 329 int source_width, |
331 int source_height, | 330 int source_height, |
332 int width, | 331 int width, |
333 int height, | 332 int height, |
334 int y_pitch, | 333 int y_pitch, |
335 int uv_pitch, | 334 int uv_pitch, |
336 int rgb_pitch, | 335 int rgb_pitch, |
337 YUVType yuv_type, | 336 YUVType yuv_type, |
338 Rotate view_rotate, | 337 Rotate view_rotate, |
339 ScaleFilter filter) { | 338 ScaleFilter filter) { |
340 // Handle zero sized sources and destinations. | 339 // Handle zero sized sources and destinations. |
341 if ((yuv_type == YV12 && (source_width < 2 || source_height < 2)) || | 340 if ((yuv_type == YV12 && (source_width < 2 || source_height < 2)) || |
342 (yuv_type == YV16 && (source_width < 2 || source_height < 1)) || | 341 (yuv_type == YV16 && (source_width < 2 || source_height < 1)) || |
343 width == 0 || height == 0) | 342 width == 0 || height == 0) |
344 return; | 343 return; |
345 | 344 |
346 const int16* lookup_table = GetLookupTable(yuv_type); | 345 const int16_t* lookup_table = GetLookupTable(yuv_type); |
347 | 346 |
348 // 4096 allows 3 buffers to fit in 12k. | 347 // 4096 allows 3 buffers to fit in 12k. |
349 // Helps performance on CPU with 16K L1 cache. | 348 // Helps performance on CPU with 16K L1 cache. |
350 // Large enough for 3830x2160 and 30" displays which are 2560x1600. | 349 // Large enough for 3830x2160 and 30" displays which are 2560x1600. |
351 const int kFilterBufferSize = 4096; | 350 const int kFilterBufferSize = 4096; |
352 // Disable filtering if the screen is too big (to avoid buffer overflows). | 351 // Disable filtering if the screen is too big (to avoid buffer overflows). |
353 // This should never happen to regular users: they don't have monitors | 352 // This should never happen to regular users: they don't have monitors |
354 // wider than 4096 pixels. | 353 // wider than 4096 pixels. |
355 // TODO(fbarchard): Allow rotated videos to filter. | 354 // TODO(fbarchard): Allow rotated videos to filter. |
356 if (source_width > kFilterBufferSize || view_rotate) | 355 if (source_width > kFilterBufferSize || view_rotate) |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
396 uv_pitch = -1; | 395 uv_pitch = -1; |
397 source_height = -source_height; | 396 source_height = -source_height; |
398 } else { | 397 } else { |
399 y_pitch = 1; | 398 y_pitch = 1; |
400 uv_pitch = 1; | 399 uv_pitch = 1; |
401 } | 400 } |
402 } | 401 } |
403 | 402 |
404 // Need padding because FilterRows() will write 1 to 16 extra pixels | 403 // Need padding because FilterRows() will write 1 to 16 extra pixels |
405 // after the end for SSE2 version. | 404 // after the end for SSE2 version. |
406 uint8 yuvbuf[16 + kFilterBufferSize * 3 + 16]; | 405 uint8_t yuvbuf[16 + kFilterBufferSize * 3 + 16]; |
407 uint8* ybuf = | 406 uint8_t* ybuf = reinterpret_cast<uint8_t*>( |
408 reinterpret_cast<uint8*>(reinterpret_cast<uintptr_t>(yuvbuf + 15) & ~15); | 407 reinterpret_cast<uintptr_t>(yuvbuf + 15) & ~15); |
409 uint8* ubuf = ybuf + kFilterBufferSize; | 408 uint8_t* ubuf = ybuf + kFilterBufferSize; |
410 uint8* vbuf = ubuf + kFilterBufferSize; | 409 uint8_t* vbuf = ubuf + kFilterBufferSize; |
411 | 410 |
412 // TODO(fbarchard): Fixed point math is off by 1 on negatives. | 411 // TODO(fbarchard): Fixed point math is off by 1 on negatives. |
413 | 412 |
414 // We take a y-coordinate in [0,1] space in the source image space, and | 413 // We take a y-coordinate in [0,1] space in the source image space, and |
415 // transform to a y-coordinate in [0,1] space in the destination image space. | 414 // transform to a y-coordinate in [0,1] space in the destination image space. |
416 // Note that the coordinate endpoints lie on pixel boundaries, not on pixel | 415 // Note that the coordinate endpoints lie on pixel boundaries, not on pixel |
417 // centers: e.g. a two-pixel-high image will have pixel centers at 0.25 and | 416 // centers: e.g. a two-pixel-high image will have pixel centers at 0.25 and |
418 // 0.75. The formula is as follows (in fixed-point arithmetic): | 417 // 0.75. The formula is as follows (in fixed-point arithmetic): |
419 // y_dst = dst_height * ((y_src + 0.5) / src_height) | 418 // y_dst = dst_height * ((y_src + 0.5) / src_height) |
420 // dst_pixel = clamp([0, dst_height - 1], floor(y_dst - 0.5)) | 419 // dst_pixel = clamp([0, dst_height - 1], floor(y_dst - 0.5)) |
421 // Implement this here as an accumulator + delta, to avoid expensive math | 420 // Implement this here as an accumulator + delta, to avoid expensive math |
422 // in the loop. | 421 // in the loop. |
423 int source_y_subpixel_accum = | 422 int source_y_subpixel_accum = |
424 ((kFractionMax / 2) * source_height) / height - (kFractionMax / 2); | 423 ((kFractionMax / 2) * source_height) / height - (kFractionMax / 2); |
425 int source_y_subpixel_delta = ((1 << kFractionBits) * source_height) / height; | 424 int source_y_subpixel_delta = ((1 << kFractionBits) * source_height) / height; |
426 | 425 |
427 // TODO(fbarchard): Split this into separate function for better efficiency. | 426 // TODO(fbarchard): Split this into separate function for better efficiency. |
428 for (int y = 0; y < height; ++y) { | 427 for (int y = 0; y < height; ++y) { |
429 uint8* dest_pixel = rgb_buf + y * rgb_pitch; | 428 uint8_t* dest_pixel = rgb_buf + y * rgb_pitch; |
430 int source_y_subpixel = source_y_subpixel_accum; | 429 int source_y_subpixel = source_y_subpixel_accum; |
431 source_y_subpixel_accum += source_y_subpixel_delta; | 430 source_y_subpixel_accum += source_y_subpixel_delta; |
432 if (source_y_subpixel < 0) | 431 if (source_y_subpixel < 0) |
433 source_y_subpixel = 0; | 432 source_y_subpixel = 0; |
434 else if (source_y_subpixel > ((source_height - 1) << kFractionBits)) | 433 else if (source_y_subpixel > ((source_height - 1) << kFractionBits)) |
435 source_y_subpixel = (source_height - 1) << kFractionBits; | 434 source_y_subpixel = (source_height - 1) << kFractionBits; |
436 | 435 |
437 const uint8* y_ptr = NULL; | 436 const uint8_t* y_ptr = NULL; |
438 const uint8* u_ptr = NULL; | 437 const uint8_t* u_ptr = NULL; |
439 const uint8* v_ptr = NULL; | 438 const uint8_t* v_ptr = NULL; |
440 // Apply vertical filtering if necessary. | 439 // Apply vertical filtering if necessary. |
441 // TODO(fbarchard): Remove memcpy when not necessary. | 440 // TODO(fbarchard): Remove memcpy when not necessary. |
442 if (filter & media::FILTER_BILINEAR_V) { | 441 if (filter & media::FILTER_BILINEAR_V) { |
443 int source_y = source_y_subpixel >> kFractionBits; | 442 int source_y = source_y_subpixel >> kFractionBits; |
444 y_ptr = y_buf + source_y * y_pitch; | 443 y_ptr = y_buf + source_y * y_pitch; |
445 u_ptr = u_buf + (source_y >> y_shift) * uv_pitch; | 444 u_ptr = u_buf + (source_y >> y_shift) * uv_pitch; |
446 v_ptr = v_buf + (source_y >> y_shift) * uv_pitch; | 445 v_ptr = v_buf + (source_y >> y_shift) * uv_pitch; |
447 | 446 |
448 // Vertical scaler uses 16.8 fixed point. | 447 // Vertical scaler uses 16.8 fixed point. |
449 uint8 source_y_fraction = (source_y_subpixel & kFractionMask) >> 8; | 448 uint8_t source_y_fraction = (source_y_subpixel & kFractionMask) >> 8; |
450 if (source_y_fraction != 0) { | 449 if (source_y_fraction != 0) { |
451 g_filter_yuv_rows_proc_( | 450 g_filter_yuv_rows_proc_( |
452 ybuf, y_ptr, y_ptr + y_pitch, source_width, source_y_fraction); | 451 ybuf, y_ptr, y_ptr + y_pitch, source_width, source_y_fraction); |
453 } else { | 452 } else { |
454 memcpy(ybuf, y_ptr, source_width); | 453 memcpy(ybuf, y_ptr, source_width); |
455 } | 454 } |
456 y_ptr = ybuf; | 455 y_ptr = ybuf; |
457 ybuf[source_width] = ybuf[source_width - 1]; | 456 ybuf[source_width] = ybuf[source_width - 1]; |
458 | 457 |
459 int uv_source_width = (source_width + 1) / 2; | 458 int uv_source_width = (source_width + 1) / 2; |
460 uint8 source_uv_fraction; | 459 uint8_t source_uv_fraction; |
461 | 460 |
462 // For formats with half-height UV planes, each even-numbered pixel row | 461 // For formats with half-height UV planes, each even-numbered pixel row |
463 // should not interpolate, since the next row to interpolate from should | 462 // should not interpolate, since the next row to interpolate from should |
464 // be a duplicate of the current row. | 463 // be a duplicate of the current row. |
465 if (y_shift && (source_y & 0x1) == 0) | 464 if (y_shift && (source_y & 0x1) == 0) |
466 source_uv_fraction = 0; | 465 source_uv_fraction = 0; |
467 else | 466 else |
468 source_uv_fraction = source_y_fraction; | 467 source_uv_fraction = source_y_fraction; |
469 | 468 |
470 if (source_uv_fraction != 0) { | 469 if (source_uv_fraction != 0) { |
(...skipping 28 matching lines...) Expand all Loading... |
499 g_scale_yuv_to_rgb32_row_proc_(y_ptr, u_ptr, v_ptr, dest_pixel, width, | 498 g_scale_yuv_to_rgb32_row_proc_(y_ptr, u_ptr, v_ptr, dest_pixel, width, |
500 source_dx, lookup_table); | 499 source_dx, lookup_table); |
501 } | 500 } |
502 } | 501 } |
503 } | 502 } |
504 | 503 |
505 g_empty_register_state_proc_(); | 504 g_empty_register_state_proc_(); |
506 } | 505 } |
507 | 506 |
508 // Scale a frame of YV12 to 32 bit ARGB for a specific rectangle. | 507 // Scale a frame of YV12 to 32 bit ARGB for a specific rectangle. |
509 void ScaleYUVToRGB32WithRect(const uint8* y_buf, | 508 void ScaleYUVToRGB32WithRect(const uint8_t* y_buf, |
510 const uint8* u_buf, | 509 const uint8_t* u_buf, |
511 const uint8* v_buf, | 510 const uint8_t* v_buf, |
512 uint8* rgb_buf, | 511 uint8_t* rgb_buf, |
513 int source_width, | 512 int source_width, |
514 int source_height, | 513 int source_height, |
515 int dest_width, | 514 int dest_width, |
516 int dest_height, | 515 int dest_height, |
517 int dest_rect_left, | 516 int dest_rect_left, |
518 int dest_rect_top, | 517 int dest_rect_top, |
519 int dest_rect_right, | 518 int dest_rect_right, |
520 int dest_rect_bottom, | 519 int dest_rect_bottom, |
521 int y_pitch, | 520 int y_pitch, |
522 int uv_pitch, | 521 int uv_pitch, |
523 int rgb_pitch) { | 522 int rgb_pitch) { |
524 // This routine doesn't currently support up-scaling. | 523 // This routine doesn't currently support up-scaling. |
525 CHECK_LE(dest_width, source_width); | 524 CHECK_LE(dest_width, source_width); |
526 CHECK_LE(dest_height, source_height); | 525 CHECK_LE(dest_height, source_height); |
527 | 526 |
528 // Sanity-check the destination rectangle. | 527 // Sanity-check the destination rectangle. |
529 DCHECK(dest_rect_left >= 0 && dest_rect_right <= dest_width); | 528 DCHECK(dest_rect_left >= 0 && dest_rect_right <= dest_width); |
530 DCHECK(dest_rect_top >= 0 && dest_rect_bottom <= dest_height); | 529 DCHECK(dest_rect_top >= 0 && dest_rect_bottom <= dest_height); |
531 DCHECK(dest_rect_right > dest_rect_left); | 530 DCHECK(dest_rect_right > dest_rect_left); |
532 DCHECK(dest_rect_bottom > dest_rect_top); | 531 DCHECK(dest_rect_bottom > dest_rect_top); |
533 | 532 |
534 const int16* lookup_table = GetLookupTable(YV12); | 533 const int16_t* lookup_table = GetLookupTable(YV12); |
535 | 534 |
536 // Fixed-point value of vertical and horizontal scale down factor. | 535 // Fixed-point value of vertical and horizontal scale down factor. |
537 // Values are in the format 16.16. | 536 // Values are in the format 16.16. |
538 int y_step = kFractionMax * source_height / dest_height; | 537 int y_step = kFractionMax * source_height / dest_height; |
539 int x_step = kFractionMax * source_width / dest_width; | 538 int x_step = kFractionMax * source_width / dest_width; |
540 | 539 |
541 // Determine the coordinates of the rectangle in 16.16 coords. | 540 // Determine the coordinates of the rectangle in 16.16 coords. |
542 // NB: Our origin is the *center* of the top/left pixel, NOT its top/left. | 541 // NB: Our origin is the *center* of the top/left pixel, NOT its top/left. |
543 // If we're down-scaling by more than a factor of two, we start with a 50% | 542 // If we're down-scaling by more than a factor of two, we start with a 50% |
544 // fraction to avoid degenerating to point-sampling - we should really just | 543 // fraction to avoid degenerating to point-sampling - we should really just |
(...skipping 30 matching lines...) Expand all Loading... |
575 int dest_rect_width = dest_rect_right - dest_rect_left; | 574 int dest_rect_width = dest_rect_right - dest_rect_left; |
576 | 575 |
577 // Intermediate buffer for vertical interpolation. | 576 // Intermediate buffer for vertical interpolation. |
578 // 4096 bytes allows 3 buffers to fit in 12k, which fits in a 16K L1 cache, | 577 // 4096 bytes allows 3 buffers to fit in 12k, which fits in a 16K L1 cache, |
579 // and is bigger than most users will generally need. | 578 // and is bigger than most users will generally need. |
580 // The buffer is 16-byte aligned and padded with 16 extra bytes; some of the | 579 // The buffer is 16-byte aligned and padded with 16 extra bytes; some of the |
581 // FilterYUVRowsProcs have alignment requirements, and the SSE version can | 580 // FilterYUVRowsProcs have alignment requirements, and the SSE version can |
582 // write up to 16 bytes past the end of the buffer. | 581 // write up to 16 bytes past the end of the buffer. |
583 const int kFilterBufferSize = 4096; | 582 const int kFilterBufferSize = 4096; |
584 const bool kAvoidUsingOptimizedFilter = source_width > kFilterBufferSize; | 583 const bool kAvoidUsingOptimizedFilter = source_width > kFilterBufferSize; |
585 uint8 yuv_temp[16 + kFilterBufferSize * 3 + 16]; | 584 uint8_t yuv_temp[16 + kFilterBufferSize * 3 + 16]; |
586 // memset() yuv_temp to 0 to avoid bogus warnings when running on Valgrind. | 585 // memset() yuv_temp to 0 to avoid bogus warnings when running on Valgrind. |
587 if (RunningOnValgrind()) | 586 if (RunningOnValgrind()) |
588 memset(yuv_temp, 0, sizeof(yuv_temp)); | 587 memset(yuv_temp, 0, sizeof(yuv_temp)); |
589 uint8* y_temp = reinterpret_cast<uint8*>( | 588 uint8_t* y_temp = reinterpret_cast<uint8_t*>( |
590 reinterpret_cast<uintptr_t>(yuv_temp + 15) & ~15); | 589 reinterpret_cast<uintptr_t>(yuv_temp + 15) & ~15); |
591 uint8* u_temp = y_temp + kFilterBufferSize; | 590 uint8_t* u_temp = y_temp + kFilterBufferSize; |
592 uint8* v_temp = u_temp + kFilterBufferSize; | 591 uint8_t* v_temp = u_temp + kFilterBufferSize; |
593 | 592 |
594 // Move to the top-left pixel of output. | 593 // Move to the top-left pixel of output. |
595 rgb_buf += dest_rect_top * rgb_pitch; | 594 rgb_buf += dest_rect_top * rgb_pitch; |
596 rgb_buf += dest_rect_left * 4; | 595 rgb_buf += dest_rect_left * 4; |
597 | 596 |
598 // For each destination row perform interpolation and color space | 597 // For each destination row perform interpolation and color space |
599 // conversion to produce the output. | 598 // conversion to produce the output. |
600 for (int row = dest_rect_top; row < dest_rect_bottom; ++row) { | 599 for (int row = dest_rect_top; row < dest_rect_bottom; ++row) { |
601 // Round the fixed-point y position to get the current row. | 600 // Round the fixed-point y position to get the current row. |
602 int source_row = source_top >> kFractionBits; | 601 int source_row = source_top >> kFractionBits; |
603 int source_uv_row = source_row / 2; | 602 int source_uv_row = source_row / 2; |
604 DCHECK(source_row < source_height); | 603 DCHECK(source_row < source_height); |
605 | 604 |
606 // Locate the first row for each plane for interpolation. | 605 // Locate the first row for each plane for interpolation. |
607 const uint8* y0_ptr = y_buf + y_pitch * source_row + source_y_left; | 606 const uint8_t* y0_ptr = y_buf + y_pitch * source_row + source_y_left; |
608 const uint8* u0_ptr = u_buf + uv_pitch * source_uv_row + source_uv_left; | 607 const uint8_t* u0_ptr = u_buf + uv_pitch * source_uv_row + source_uv_left; |
609 const uint8* v0_ptr = v_buf + uv_pitch * source_uv_row + source_uv_left; | 608 const uint8_t* v0_ptr = v_buf + uv_pitch * source_uv_row + source_uv_left; |
610 const uint8* y1_ptr = NULL; | 609 const uint8_t* y1_ptr = NULL; |
611 const uint8* u1_ptr = NULL; | 610 const uint8_t* u1_ptr = NULL; |
612 const uint8* v1_ptr = NULL; | 611 const uint8_t* v1_ptr = NULL; |
613 | 612 |
614 // Locate the second row for interpolation, being careful not to overrun. | 613 // Locate the second row for interpolation, being careful not to overrun. |
615 if (source_row + 1 >= source_height) { | 614 if (source_row + 1 >= source_height) { |
616 y1_ptr = y0_ptr; | 615 y1_ptr = y0_ptr; |
617 } else { | 616 } else { |
618 y1_ptr = y0_ptr + y_pitch; | 617 y1_ptr = y0_ptr + y_pitch; |
619 } | 618 } |
620 if (source_uv_row + 1 >= (source_height + 1) / 2) { | 619 if (source_uv_row + 1 >= (source_height + 1) / 2) { |
621 u1_ptr = u0_ptr; | 620 u1_ptr = u0_ptr; |
622 v1_ptr = v0_ptr; | 621 v1_ptr = v0_ptr; |
623 } else { | 622 } else { |
624 u1_ptr = u0_ptr + uv_pitch; | 623 u1_ptr = u0_ptr + uv_pitch; |
625 v1_ptr = v0_ptr + uv_pitch; | 624 v1_ptr = v0_ptr + uv_pitch; |
626 } | 625 } |
627 | 626 |
628 if (!kAvoidUsingOptimizedFilter) { | 627 if (!kAvoidUsingOptimizedFilter) { |
629 // Vertical scaler uses 16.8 fixed point. | 628 // Vertical scaler uses 16.8 fixed point. |
630 uint8 fraction = (source_top & kFractionMask) >> 8; | 629 uint8_t fraction = (source_top & kFractionMask) >> 8; |
631 g_filter_yuv_rows_proc_( | 630 g_filter_yuv_rows_proc_( |
632 y_temp + source_y_left, y0_ptr, y1_ptr, source_y_width, fraction); | 631 y_temp + source_y_left, y0_ptr, y1_ptr, source_y_width, fraction); |
633 g_filter_yuv_rows_proc_( | 632 g_filter_yuv_rows_proc_( |
634 u_temp + source_uv_left, u0_ptr, u1_ptr, source_uv_width, fraction); | 633 u_temp + source_uv_left, u0_ptr, u1_ptr, source_uv_width, fraction); |
635 g_filter_yuv_rows_proc_( | 634 g_filter_yuv_rows_proc_( |
636 v_temp + source_uv_left, v0_ptr, v1_ptr, source_uv_width, fraction); | 635 v_temp + source_uv_left, v0_ptr, v1_ptr, source_uv_width, fraction); |
637 | 636 |
638 // Perform horizontal interpolation and color space conversion. | 637 // Perform horizontal interpolation and color space conversion. |
639 // TODO(hclam): Use the MMX version after more testing. | 638 // TODO(hclam): Use the MMX version after more testing. |
640 LinearScaleYUVToRGB32RowWithRange_C(y_temp, u_temp, v_temp, rgb_buf, | 639 LinearScaleYUVToRGB32RowWithRange_C(y_temp, u_temp, v_temp, rgb_buf, |
641 dest_rect_width, source_left, x_step, | 640 dest_rect_width, source_left, x_step, |
642 lookup_table); | 641 lookup_table); |
643 } else { | 642 } else { |
644 // If the frame is too large then we linear scale a single row. | 643 // If the frame is too large then we linear scale a single row. |
645 LinearScaleYUVToRGB32RowWithRange_C(y0_ptr, u0_ptr, v0_ptr, rgb_buf, | 644 LinearScaleYUVToRGB32RowWithRange_C(y0_ptr, u0_ptr, v0_ptr, rgb_buf, |
646 dest_rect_width, source_left, x_step, | 645 dest_rect_width, source_left, x_step, |
647 lookup_table); | 646 lookup_table); |
648 } | 647 } |
649 | 648 |
650 // Advance vertically in the source and destination image. | 649 // Advance vertically in the source and destination image. |
651 source_top += y_step; | 650 source_top += y_step; |
652 rgb_buf += rgb_pitch; | 651 rgb_buf += rgb_pitch; |
653 } | 652 } |
654 | 653 |
655 g_empty_register_state_proc_(); | 654 g_empty_register_state_proc_(); |
656 } | 655 } |
657 | 656 |
658 void ConvertRGB32ToYUV(const uint8* rgbframe, | 657 void ConvertRGB32ToYUV(const uint8_t* rgbframe, |
659 uint8* yplane, | 658 uint8_t* yplane, |
660 uint8* uplane, | 659 uint8_t* uplane, |
661 uint8* vplane, | 660 uint8_t* vplane, |
662 int width, | 661 int width, |
663 int height, | 662 int height, |
664 int rgbstride, | 663 int rgbstride, |
665 int ystride, | 664 int ystride, |
666 int uvstride) { | 665 int uvstride) { |
667 g_convert_rgb32_to_yuv_proc_(rgbframe, | 666 g_convert_rgb32_to_yuv_proc_(rgbframe, |
668 yplane, | 667 yplane, |
669 uplane, | 668 uplane, |
670 vplane, | 669 vplane, |
671 width, | 670 width, |
672 height, | 671 height, |
673 rgbstride, | 672 rgbstride, |
674 ystride, | 673 ystride, |
675 uvstride); | 674 uvstride); |
676 } | 675 } |
677 | 676 |
678 void ConvertRGB24ToYUV(const uint8* rgbframe, | 677 void ConvertRGB24ToYUV(const uint8_t* rgbframe, |
679 uint8* yplane, | 678 uint8_t* yplane, |
680 uint8* uplane, | 679 uint8_t* uplane, |
681 uint8* vplane, | 680 uint8_t* vplane, |
682 int width, | 681 int width, |
683 int height, | 682 int height, |
684 int rgbstride, | 683 int rgbstride, |
685 int ystride, | 684 int ystride, |
686 int uvstride) { | 685 int uvstride) { |
687 g_convert_rgb24_to_yuv_proc_(rgbframe, | 686 g_convert_rgb24_to_yuv_proc_(rgbframe, |
688 yplane, | 687 yplane, |
689 uplane, | 688 uplane, |
690 vplane, | 689 vplane, |
691 width, | 690 width, |
692 height, | 691 height, |
693 rgbstride, | 692 rgbstride, |
694 ystride, | 693 ystride, |
695 uvstride); | 694 uvstride); |
696 } | 695 } |
697 | 696 |
698 void ConvertYUVToRGB32(const uint8* yplane, | 697 void ConvertYUVToRGB32(const uint8_t* yplane, |
699 const uint8* uplane, | 698 const uint8_t* uplane, |
700 const uint8* vplane, | 699 const uint8_t* vplane, |
701 uint8* rgbframe, | 700 uint8_t* rgbframe, |
702 int width, | 701 int width, |
703 int height, | 702 int height, |
704 int ystride, | 703 int ystride, |
705 int uvstride, | 704 int uvstride, |
706 int rgbstride, | 705 int rgbstride, |
707 YUVType yuv_type) { | 706 YUVType yuv_type) { |
708 g_convert_yuv_to_rgb32_proc_(yplane, | 707 g_convert_yuv_to_rgb32_proc_(yplane, |
709 uplane, | 708 uplane, |
710 vplane, | 709 vplane, |
711 rgbframe, | 710 rgbframe, |
712 width, | 711 width, |
713 height, | 712 height, |
714 ystride, | 713 ystride, |
715 uvstride, | 714 uvstride, |
716 rgbstride, | 715 rgbstride, |
717 yuv_type); | 716 yuv_type); |
718 } | 717 } |
719 | 718 |
720 void ConvertYUVAToARGB(const uint8* yplane, | 719 void ConvertYUVAToARGB(const uint8_t* yplane, |
721 const uint8* uplane, | 720 const uint8_t* uplane, |
722 const uint8* vplane, | 721 const uint8_t* vplane, |
723 const uint8* aplane, | 722 const uint8_t* aplane, |
724 uint8* rgbframe, | 723 uint8_t* rgbframe, |
725 int width, | 724 int width, |
726 int height, | 725 int height, |
727 int ystride, | 726 int ystride, |
728 int uvstride, | 727 int uvstride, |
729 int astride, | 728 int astride, |
730 int rgbstride, | 729 int rgbstride, |
731 YUVType yuv_type) { | 730 YUVType yuv_type) { |
732 g_convert_yuva_to_argb_proc_(yplane, | 731 g_convert_yuva_to_argb_proc_(yplane, |
733 uplane, | 732 uplane, |
734 vplane, | 733 vplane, |
735 aplane, | 734 aplane, |
736 rgbframe, | 735 rgbframe, |
737 width, | 736 width, |
738 height, | 737 height, |
739 ystride, | 738 ystride, |
740 uvstride, | 739 uvstride, |
741 astride, | 740 astride, |
742 rgbstride, | 741 rgbstride, |
743 yuv_type); | 742 yuv_type); |
744 } | 743 } |
745 | 744 |
746 } // namespace media | 745 } // namespace media |
OLD | NEW |