OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // This webpage shows layout of YV12 and other YUV formats | 5 // This webpage shows layout of YV12 and other YUV formats |
6 // http://www.fourcc.org/yuv.php | 6 // http://www.fourcc.org/yuv.php |
7 // The actual conversion is best described here | 7 // The actual conversion is best described here |
8 // http://en.wikipedia.org/wiki/YUV | 8 // http://en.wikipedia.org/wiki/YUV |
9 // An article on optimizing YUV conversion using tables instead of multiplies | 9 // An article on optimizing YUV conversion using tables instead of multiplies |
10 // http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf | 10 // http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf |
(...skipping 26 matching lines...) Expand all Loading... |
37 #else | 37 #else |
38 #include <mmintrin.h> | 38 #include <mmintrin.h> |
39 #endif | 39 #endif |
40 #endif | 40 #endif |
41 | 41 |
42 // Assembly functions are declared without namespace. | 42 // Assembly functions are declared without namespace. |
43 extern "C" { void EmptyRegisterState_MMX(); } // extern "C" | 43 extern "C" { void EmptyRegisterState_MMX(); } // extern "C" |
44 | 44 |
45 namespace media { | 45 namespace media { |
46 | 46 |
47 typedef void (*FilterYUVRowsProc)(uint8*, | 47 typedef void ( |
48 const uint8*, | 48 *FilterYUVRowsProc)(uint8_t*, const uint8_t*, const uint8_t*, int, uint8_t); |
49 const uint8*, | |
50 int, | |
51 uint8); | |
52 | 49 |
53 typedef void (*ConvertRGBToYUVProc)(const uint8*, | 50 typedef void (*ConvertRGBToYUVProc)(const uint8_t*, |
54 uint8*, | 51 uint8_t*, |
55 uint8*, | 52 uint8_t*, |
56 uint8*, | 53 uint8_t*, |
57 int, | 54 int, |
58 int, | 55 int, |
59 int, | 56 int, |
60 int, | 57 int, |
61 int); | 58 int); |
62 | 59 |
63 typedef void (*ConvertYUVToRGB32Proc)(const uint8*, | 60 typedef void (*ConvertYUVToRGB32Proc)(const uint8_t*, |
64 const uint8*, | 61 const uint8_t*, |
65 const uint8*, | 62 const uint8_t*, |
66 uint8*, | 63 uint8_t*, |
67 int, | 64 int, |
68 int, | 65 int, |
69 int, | 66 int, |
70 int, | 67 int, |
71 int, | 68 int, |
72 YUVType); | 69 YUVType); |
73 | 70 |
74 typedef void (*ConvertYUVAToARGBProc)(const uint8*, | 71 typedef void (*ConvertYUVAToARGBProc)(const uint8_t*, |
75 const uint8*, | 72 const uint8_t*, |
76 const uint8*, | 73 const uint8_t*, |
77 const uint8*, | 74 const uint8_t*, |
78 uint8*, | 75 uint8_t*, |
79 int, | 76 int, |
80 int, | 77 int, |
81 int, | 78 int, |
82 int, | 79 int, |
83 int, | 80 int, |
84 int, | 81 int, |
85 YUVType); | 82 YUVType); |
86 | 83 |
87 typedef void (*ConvertYUVToRGB32RowProc)(const uint8*, | 84 typedef void (*ConvertYUVToRGB32RowProc)(const uint8_t*, |
88 const uint8*, | 85 const uint8_t*, |
89 const uint8*, | 86 const uint8_t*, |
90 uint8*, | 87 uint8_t*, |
91 ptrdiff_t, | 88 ptrdiff_t, |
92 const int16*); | 89 const int16_t*); |
93 | 90 |
94 typedef void (*ConvertYUVAToARGBRowProc)(const uint8*, | 91 typedef void (*ConvertYUVAToARGBRowProc)(const uint8_t*, |
95 const uint8*, | 92 const uint8_t*, |
96 const uint8*, | 93 const uint8_t*, |
97 const uint8*, | 94 const uint8_t*, |
98 uint8*, | 95 uint8_t*, |
99 ptrdiff_t, | 96 ptrdiff_t, |
100 const int16*); | 97 const int16_t*); |
101 | 98 |
102 typedef void (*ScaleYUVToRGB32RowProc)(const uint8*, | 99 typedef void (*ScaleYUVToRGB32RowProc)(const uint8_t*, |
103 const uint8*, | 100 const uint8_t*, |
104 const uint8*, | 101 const uint8_t*, |
105 uint8*, | 102 uint8_t*, |
106 ptrdiff_t, | 103 ptrdiff_t, |
107 ptrdiff_t, | 104 ptrdiff_t, |
108 const int16*); | 105 const int16_t*); |
109 | 106 |
110 static FilterYUVRowsProc g_filter_yuv_rows_proc_ = NULL; | 107 static FilterYUVRowsProc g_filter_yuv_rows_proc_ = NULL; |
111 static ConvertYUVToRGB32RowProc g_convert_yuv_to_rgb32_row_proc_ = NULL; | 108 static ConvertYUVToRGB32RowProc g_convert_yuv_to_rgb32_row_proc_ = NULL; |
112 static ScaleYUVToRGB32RowProc g_scale_yuv_to_rgb32_row_proc_ = NULL; | 109 static ScaleYUVToRGB32RowProc g_scale_yuv_to_rgb32_row_proc_ = NULL; |
113 static ScaleYUVToRGB32RowProc g_linear_scale_yuv_to_rgb32_row_proc_ = NULL; | 110 static ScaleYUVToRGB32RowProc g_linear_scale_yuv_to_rgb32_row_proc_ = NULL; |
114 static ConvertRGBToYUVProc g_convert_rgb32_to_yuv_proc_ = NULL; | 111 static ConvertRGBToYUVProc g_convert_rgb32_to_yuv_proc_ = NULL; |
115 static ConvertRGBToYUVProc g_convert_rgb24_to_yuv_proc_ = NULL; | 112 static ConvertRGBToYUVProc g_convert_rgb24_to_yuv_proc_ = NULL; |
116 static ConvertYUVToRGB32Proc g_convert_yuv_to_rgb32_proc_ = NULL; | 113 static ConvertYUVToRGB32Proc g_convert_yuv_to_rgb32_proc_ = NULL; |
117 static ConvertYUVAToARGBProc g_convert_yuva_to_argb_proc_ = NULL; | 114 static ConvertYUVAToARGBProc g_convert_yuva_to_argb_proc_ = NULL; |
118 | 115 |
119 static const int kYUVToRGBTableSize = 256 * 4 * 4 * sizeof(int16); | 116 static const int kYUVToRGBTableSize = 256 * 4 * 4 * sizeof(int16_t); |
120 | 117 |
121 // base::AlignedMemory has a private operator new(), so wrap it in a struct so | 118 // base::AlignedMemory has a private operator new(), so wrap it in a struct so |
122 // that we can put it in a LazyInstance::Leaky. | 119 // that we can put it in a LazyInstance::Leaky. |
123 struct YUVToRGBTableWrapper { | 120 struct YUVToRGBTableWrapper { |
124 base::AlignedMemory<kYUVToRGBTableSize, 16> table; | 121 base::AlignedMemory<kYUVToRGBTableSize, 16> table; |
125 }; | 122 }; |
126 | 123 |
127 typedef base::LazyInstance<YUVToRGBTableWrapper>::Leaky | 124 typedef base::LazyInstance<YUVToRGBTableWrapper>::Leaky |
128 YUVToRGBTable; | 125 YUVToRGBTable; |
129 static YUVToRGBTable g_table_rec601 = LAZY_INSTANCE_INITIALIZER; | 126 static YUVToRGBTable g_table_rec601 = LAZY_INSTANCE_INITIALIZER; |
130 static YUVToRGBTable g_table_jpeg = LAZY_INSTANCE_INITIALIZER; | 127 static YUVToRGBTable g_table_jpeg = LAZY_INSTANCE_INITIALIZER; |
131 static YUVToRGBTable g_table_rec709 = LAZY_INSTANCE_INITIALIZER; | 128 static YUVToRGBTable g_table_rec709 = LAZY_INSTANCE_INITIALIZER; |
132 static const int16* g_table_rec601_ptr = NULL; | 129 static const int16_t* g_table_rec601_ptr = NULL; |
133 static const int16* g_table_jpeg_ptr = NULL; | 130 static const int16_t* g_table_jpeg_ptr = NULL; |
134 static const int16* g_table_rec709_ptr = NULL; | 131 static const int16_t* g_table_rec709_ptr = NULL; |
135 | 132 |
136 // Empty SIMD registers state after using them. | 133 // Empty SIMD registers state after using them. |
137 void EmptyRegisterStateStub() {} | 134 void EmptyRegisterStateStub() {} |
138 #if defined(MEDIA_MMX_INTRINSICS_AVAILABLE) | 135 #if defined(MEDIA_MMX_INTRINSICS_AVAILABLE) |
139 void EmptyRegisterStateIntrinsic() { _mm_empty(); } | 136 void EmptyRegisterStateIntrinsic() { _mm_empty(); } |
140 #endif | 137 #endif |
141 typedef void (*EmptyRegisterStateProc)(); | 138 typedef void (*EmptyRegisterStateProc)(); |
142 static EmptyRegisterStateProc g_empty_register_state_proc_ = NULL; | 139 static EmptyRegisterStateProc g_empty_register_state_proc_ = NULL; |
143 | 140 |
144 // Get the appropriate value to bitshift by for vertical indices. | 141 // Get the appropriate value to bitshift by for vertical indices. |
145 int GetVerticalShift(YUVType type) { | 142 int GetVerticalShift(YUVType type) { |
146 switch (type) { | 143 switch (type) { |
147 case YV16: | 144 case YV16: |
148 return 0; | 145 return 0; |
149 case YV12: | 146 case YV12: |
150 case YV12J: | 147 case YV12J: |
151 case YV12HD: | 148 case YV12HD: |
152 return 1; | 149 return 1; |
153 } | 150 } |
154 NOTREACHED(); | 151 NOTREACHED(); |
155 return 0; | 152 return 0; |
156 } | 153 } |
157 | 154 |
158 const int16* GetLookupTable(YUVType type) { | 155 const int16_t* GetLookupTable(YUVType type) { |
159 switch (type) { | 156 switch (type) { |
160 case YV12: | 157 case YV12: |
161 case YV16: | 158 case YV16: |
162 return g_table_rec601_ptr; | 159 return g_table_rec601_ptr; |
163 case YV12J: | 160 case YV12J: |
164 return g_table_jpeg_ptr; | 161 return g_table_jpeg_ptr; |
165 case YV12HD: | 162 case YV12HD: |
166 return g_table_rec709_ptr; | 163 return g_table_rec709_ptr; |
167 } | 164 } |
168 NOTREACHED(); | 165 NOTREACHED(); |
169 return NULL; | 166 return NULL; |
170 } | 167 } |
171 | 168 |
172 // Populates a pre-allocated lookup table from a YUV->RGB matrix. | 169 // Populates a pre-allocated lookup table from a YUV->RGB matrix. |
173 const int16* PopulateYUVToRGBTable(const double matrix[3][3], | 170 const int16_t* PopulateYUVToRGBTable(const double matrix[3][3], |
174 bool full_range, | 171 bool full_range, |
175 int16* table) { | 172 int16_t* table) { |
176 // We'll have 4 sub-tables that lie contiguous in memory, one for each of Y, | 173 // We'll have 4 sub-tables that lie contiguous in memory, one for each of Y, |
177 // U, V and A. | 174 // U, V and A. |
178 const int kNumTables = 4; | 175 const int kNumTables = 4; |
179 // Each table has 256 rows (for all possible 8-bit values). | 176 // Each table has 256 rows (for all possible 8-bit values). |
180 const int kNumRows = 256; | 177 const int kNumRows = 256; |
181 // Each row has 4 columns, for contributions to each of R, G, B and A. | 178 // Each row has 4 columns, for contributions to each of R, G, B and A. |
182 const int kNumColumns = 4; | 179 const int kNumColumns = 4; |
183 // Each element is a fixed-point (10.6) 16-bit signed value. | 180 // Each element is a fixed-point (10.6) 16-bit signed value. |
184 const int kElementSize = sizeof(int16); | 181 const int kElementSize = sizeof(int16_t); |
185 | 182 |
186 // Sanity check that our constants here match the size of the statically | 183 // Sanity check that our constants here match the size of the statically |
187 // allocated tables. | 184 // allocated tables. |
188 static_assert( | 185 static_assert( |
189 kNumTables * kNumRows * kNumColumns * kElementSize == kYUVToRGBTableSize, | 186 kNumTables * kNumRows * kNumColumns * kElementSize == kYUVToRGBTableSize, |
190 "YUV lookup table size doesn't match expectation."); | 187 "YUV lookup table size doesn't match expectation."); |
191 | 188 |
192 // Y needs an offset of -16 for color ranges that ignore the lower 16 values, | 189 // Y needs an offset of -16 for color ranges that ignore the lower 16 values, |
193 // U and V get -128 to put them in [-128, 127] from [0, 255]. | 190 // U and V get -128 to put them in [-128, 127] from [0, 255]. |
194 int offsets[3] = {(full_range ? 0 : -16), -128, -128}; | 191 int offsets[3] = {(full_range ? 0 : -16), -128, -128}; |
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
297 {1.0, 1.0, 1.0}, {0.0, -0.34414, 1.772}, {1.402, -0.71414, 0.0}, | 294 {1.0, 1.0, 1.0}, {0.0, -0.34414, 1.772}, {1.402, -0.71414, 0.0}, |
298 }; | 295 }; |
299 | 296 |
300 // Rec709 "HD" color space, values from: | 297 // Rec709 "HD" color space, values from: |
301 // http://www.equasys.de/colorconversion.html | 298 // http://www.equasys.de/colorconversion.html |
302 const double kRec709ConvertMatrix[3][3] = { | 299 const double kRec709ConvertMatrix[3][3] = { |
303 {1.164, 1.164, 1.164}, {0.0, -0.213, 2.112}, {1.793, -0.533, 0.0}, | 300 {1.164, 1.164, 1.164}, {0.0, -0.213, 2.112}, {1.793, -0.533, 0.0}, |
304 }; | 301 }; |
305 | 302 |
306 PopulateYUVToRGBTable(kRec601ConvertMatrix, false, | 303 PopulateYUVToRGBTable(kRec601ConvertMatrix, false, |
307 g_table_rec601.Get().table.data_as<int16>()); | 304 g_table_rec601.Get().table.data_as<int16_t>()); |
308 PopulateYUVToRGBTable(kJPEGConvertMatrix, true, | 305 PopulateYUVToRGBTable(kJPEGConvertMatrix, true, |
309 g_table_jpeg.Get().table.data_as<int16>()); | 306 g_table_jpeg.Get().table.data_as<int16_t>()); |
310 PopulateYUVToRGBTable(kRec709ConvertMatrix, false, | 307 PopulateYUVToRGBTable(kRec709ConvertMatrix, false, |
311 g_table_rec709.Get().table.data_as<int16>()); | 308 g_table_rec709.Get().table.data_as<int16_t>()); |
312 g_table_rec601_ptr = g_table_rec601.Get().table.data_as<int16>(); | 309 g_table_rec601_ptr = g_table_rec601.Get().table.data_as<int16_t>(); |
313 g_table_rec709_ptr = g_table_rec709.Get().table.data_as<int16>(); | 310 g_table_rec709_ptr = g_table_rec709.Get().table.data_as<int16_t>(); |
314 g_table_jpeg_ptr = g_table_jpeg.Get().table.data_as<int16>(); | 311 g_table_jpeg_ptr = g_table_jpeg.Get().table.data_as<int16_t>(); |
315 } | 312 } |
316 | 313 |
317 // Empty SIMD registers state after using them. | 314 // Empty SIMD registers state after using them. |
318 void EmptyRegisterState() { g_empty_register_state_proc_(); } | 315 void EmptyRegisterState() { g_empty_register_state_proc_(); } |
319 | 316 |
320 // 16.16 fixed point arithmetic | 317 // 16.16 fixed point arithmetic |
321 const int kFractionBits = 16; | 318 const int kFractionBits = 16; |
322 const int kFractionMax = 1 << kFractionBits; | 319 const int kFractionMax = 1 << kFractionBits; |
323 const int kFractionMask = ((1 << kFractionBits) - 1); | 320 const int kFractionMask = ((1 << kFractionBits) - 1); |
324 | 321 |
325 // Scale a frame of YUV to 32 bit ARGB. | 322 // Scale a frame of YUV to 32 bit ARGB. |
326 void ScaleYUVToRGB32(const uint8* y_buf, | 323 void ScaleYUVToRGB32(const uint8_t* y_buf, |
327 const uint8* u_buf, | 324 const uint8_t* u_buf, |
328 const uint8* v_buf, | 325 const uint8_t* v_buf, |
329 uint8* rgb_buf, | 326 uint8_t* rgb_buf, |
330 int source_width, | 327 int source_width, |
331 int source_height, | 328 int source_height, |
332 int width, | 329 int width, |
333 int height, | 330 int height, |
334 int y_pitch, | 331 int y_pitch, |
335 int uv_pitch, | 332 int uv_pitch, |
336 int rgb_pitch, | 333 int rgb_pitch, |
337 YUVType yuv_type, | 334 YUVType yuv_type, |
338 Rotate view_rotate, | 335 Rotate view_rotate, |
339 ScaleFilter filter) { | 336 ScaleFilter filter) { |
340 // Handle zero sized sources and destinations. | 337 // Handle zero sized sources and destinations. |
341 if ((yuv_type == YV12 && (source_width < 2 || source_height < 2)) || | 338 if ((yuv_type == YV12 && (source_width < 2 || source_height < 2)) || |
342 (yuv_type == YV16 && (source_width < 2 || source_height < 1)) || | 339 (yuv_type == YV16 && (source_width < 2 || source_height < 1)) || |
343 width == 0 || height == 0) | 340 width == 0 || height == 0) |
344 return; | 341 return; |
345 | 342 |
346 const int16* lookup_table = GetLookupTable(yuv_type); | 343 const int16_t* lookup_table = GetLookupTable(yuv_type); |
347 | 344 |
348 // 4096 allows 3 buffers to fit in 12k. | 345 // 4096 allows 3 buffers to fit in 12k. |
349 // Helps performance on CPU with 16K L1 cache. | 346 // Helps performance on CPU with 16K L1 cache. |
350 // Large enough for 3830x2160 and 30" displays which are 2560x1600. | 347 // Large enough for 3830x2160 and 30" displays which are 2560x1600. |
351 const int kFilterBufferSize = 4096; | 348 const int kFilterBufferSize = 4096; |
352 // Disable filtering if the screen is too big (to avoid buffer overflows). | 349 // Disable filtering if the screen is too big (to avoid buffer overflows). |
353 // This should never happen to regular users: they don't have monitors | 350 // This should never happen to regular users: they don't have monitors |
354 // wider than 4096 pixels. | 351 // wider than 4096 pixels. |
355 // TODO(fbarchard): Allow rotated videos to filter. | 352 // TODO(fbarchard): Allow rotated videos to filter. |
356 if (source_width > kFilterBufferSize || view_rotate) | 353 if (source_width > kFilterBufferSize || view_rotate) |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
396 uv_pitch = -1; | 393 uv_pitch = -1; |
397 source_height = -source_height; | 394 source_height = -source_height; |
398 } else { | 395 } else { |
399 y_pitch = 1; | 396 y_pitch = 1; |
400 uv_pitch = 1; | 397 uv_pitch = 1; |
401 } | 398 } |
402 } | 399 } |
403 | 400 |
404 // Need padding because FilterRows() will write 1 to 16 extra pixels | 401 // Need padding because FilterRows() will write 1 to 16 extra pixels |
405 // after the end for SSE2 version. | 402 // after the end for SSE2 version. |
406 uint8 yuvbuf[16 + kFilterBufferSize * 3 + 16]; | 403 uint8_t yuvbuf[16 + kFilterBufferSize * 3 + 16]; |
407 uint8* ybuf = | 404 uint8_t* ybuf = reinterpret_cast<uint8_t*>( |
408 reinterpret_cast<uint8*>(reinterpret_cast<uintptr_t>(yuvbuf + 15) & ~15); | 405 reinterpret_cast<uintptr_t>(yuvbuf + 15) & ~15); |
409 uint8* ubuf = ybuf + kFilterBufferSize; | 406 uint8_t* ubuf = ybuf + kFilterBufferSize; |
410 uint8* vbuf = ubuf + kFilterBufferSize; | 407 uint8_t* vbuf = ubuf + kFilterBufferSize; |
411 | 408 |
412 // TODO(fbarchard): Fixed point math is off by 1 on negatives. | 409 // TODO(fbarchard): Fixed point math is off by 1 on negatives. |
413 | 410 |
414 // We take a y-coordinate in [0,1] space in the source image space, and | 411 // We take a y-coordinate in [0,1] space in the source image space, and |
415 // transform to a y-coordinate in [0,1] space in the destination image space. | 412 // transform to a y-coordinate in [0,1] space in the destination image space. |
416 // Note that the coordinate endpoints lie on pixel boundaries, not on pixel | 413 // Note that the coordinate endpoints lie on pixel boundaries, not on pixel |
417 // centers: e.g. a two-pixel-high image will have pixel centers at 0.25 and | 414 // centers: e.g. a two-pixel-high image will have pixel centers at 0.25 and |
418 // 0.75. The formula is as follows (in fixed-point arithmetic): | 415 // 0.75. The formula is as follows (in fixed-point arithmetic): |
419 // y_dst = dst_height * ((y_src + 0.5) / src_height) | 416 // y_dst = dst_height * ((y_src + 0.5) / src_height) |
420 // dst_pixel = clamp([0, dst_height - 1], floor(y_dst - 0.5)) | 417 // dst_pixel = clamp([0, dst_height - 1], floor(y_dst - 0.5)) |
421 // Implement this here as an accumulator + delta, to avoid expensive math | 418 // Implement this here as an accumulator + delta, to avoid expensive math |
422 // in the loop. | 419 // in the loop. |
423 int source_y_subpixel_accum = | 420 int source_y_subpixel_accum = |
424 ((kFractionMax / 2) * source_height) / height - (kFractionMax / 2); | 421 ((kFractionMax / 2) * source_height) / height - (kFractionMax / 2); |
425 int source_y_subpixel_delta = ((1 << kFractionBits) * source_height) / height; | 422 int source_y_subpixel_delta = ((1 << kFractionBits) * source_height) / height; |
426 | 423 |
427 // TODO(fbarchard): Split this into separate function for better efficiency. | 424 // TODO(fbarchard): Split this into separate function for better efficiency. |
428 for (int y = 0; y < height; ++y) { | 425 for (int y = 0; y < height; ++y) { |
429 uint8* dest_pixel = rgb_buf + y * rgb_pitch; | 426 uint8_t* dest_pixel = rgb_buf + y * rgb_pitch; |
430 int source_y_subpixel = source_y_subpixel_accum; | 427 int source_y_subpixel = source_y_subpixel_accum; |
431 source_y_subpixel_accum += source_y_subpixel_delta; | 428 source_y_subpixel_accum += source_y_subpixel_delta; |
432 if (source_y_subpixel < 0) | 429 if (source_y_subpixel < 0) |
433 source_y_subpixel = 0; | 430 source_y_subpixel = 0; |
434 else if (source_y_subpixel > ((source_height - 1) << kFractionBits)) | 431 else if (source_y_subpixel > ((source_height - 1) << kFractionBits)) |
435 source_y_subpixel = (source_height - 1) << kFractionBits; | 432 source_y_subpixel = (source_height - 1) << kFractionBits; |
436 | 433 |
437 const uint8* y_ptr = NULL; | 434 const uint8_t* y_ptr = NULL; |
438 const uint8* u_ptr = NULL; | 435 const uint8_t* u_ptr = NULL; |
439 const uint8* v_ptr = NULL; | 436 const uint8_t* v_ptr = NULL; |
440 // Apply vertical filtering if necessary. | 437 // Apply vertical filtering if necessary. |
441 // TODO(fbarchard): Remove memcpy when not necessary. | 438 // TODO(fbarchard): Remove memcpy when not necessary. |
442 if (filter & media::FILTER_BILINEAR_V) { | 439 if (filter & media::FILTER_BILINEAR_V) { |
443 int source_y = source_y_subpixel >> kFractionBits; | 440 int source_y = source_y_subpixel >> kFractionBits; |
444 y_ptr = y_buf + source_y * y_pitch; | 441 y_ptr = y_buf + source_y * y_pitch; |
445 u_ptr = u_buf + (source_y >> y_shift) * uv_pitch; | 442 u_ptr = u_buf + (source_y >> y_shift) * uv_pitch; |
446 v_ptr = v_buf + (source_y >> y_shift) * uv_pitch; | 443 v_ptr = v_buf + (source_y >> y_shift) * uv_pitch; |
447 | 444 |
448 // Vertical scaler uses 16.8 fixed point. | 445 // Vertical scaler uses 16.8 fixed point. |
449 uint8 source_y_fraction = (source_y_subpixel & kFractionMask) >> 8; | 446 uint8_t source_y_fraction = (source_y_subpixel & kFractionMask) >> 8; |
450 if (source_y_fraction != 0) { | 447 if (source_y_fraction != 0) { |
451 g_filter_yuv_rows_proc_( | 448 g_filter_yuv_rows_proc_( |
452 ybuf, y_ptr, y_ptr + y_pitch, source_width, source_y_fraction); | 449 ybuf, y_ptr, y_ptr + y_pitch, source_width, source_y_fraction); |
453 } else { | 450 } else { |
454 memcpy(ybuf, y_ptr, source_width); | 451 memcpy(ybuf, y_ptr, source_width); |
455 } | 452 } |
456 y_ptr = ybuf; | 453 y_ptr = ybuf; |
457 ybuf[source_width] = ybuf[source_width - 1]; | 454 ybuf[source_width] = ybuf[source_width - 1]; |
458 | 455 |
459 int uv_source_width = (source_width + 1) / 2; | 456 int uv_source_width = (source_width + 1) / 2; |
460 uint8 source_uv_fraction; | 457 uint8_t source_uv_fraction; |
461 | 458 |
462 // For formats with half-height UV planes, each even-numbered pixel row | 459 // For formats with half-height UV planes, each even-numbered pixel row |
463 // should not interpolate, since the next row to interpolate from should | 460 // should not interpolate, since the next row to interpolate from should |
464 // be a duplicate of the current row. | 461 // be a duplicate of the current row. |
465 if (y_shift && (source_y & 0x1) == 0) | 462 if (y_shift && (source_y & 0x1) == 0) |
466 source_uv_fraction = 0; | 463 source_uv_fraction = 0; |
467 else | 464 else |
468 source_uv_fraction = source_y_fraction; | 465 source_uv_fraction = source_y_fraction; |
469 | 466 |
470 if (source_uv_fraction != 0) { | 467 if (source_uv_fraction != 0) { |
(...skipping 28 matching lines...) Expand all Loading... |
499 g_scale_yuv_to_rgb32_row_proc_(y_ptr, u_ptr, v_ptr, dest_pixel, width, | 496 g_scale_yuv_to_rgb32_row_proc_(y_ptr, u_ptr, v_ptr, dest_pixel, width, |
500 source_dx, lookup_table); | 497 source_dx, lookup_table); |
501 } | 498 } |
502 } | 499 } |
503 } | 500 } |
504 | 501 |
505 g_empty_register_state_proc_(); | 502 g_empty_register_state_proc_(); |
506 } | 503 } |
507 | 504 |
508 // Scale a frame of YV12 to 32 bit ARGB for a specific rectangle. | 505 // Scale a frame of YV12 to 32 bit ARGB for a specific rectangle. |
509 void ScaleYUVToRGB32WithRect(const uint8* y_buf, | 506 void ScaleYUVToRGB32WithRect(const uint8_t* y_buf, |
510 const uint8* u_buf, | 507 const uint8_t* u_buf, |
511 const uint8* v_buf, | 508 const uint8_t* v_buf, |
512 uint8* rgb_buf, | 509 uint8_t* rgb_buf, |
513 int source_width, | 510 int source_width, |
514 int source_height, | 511 int source_height, |
515 int dest_width, | 512 int dest_width, |
516 int dest_height, | 513 int dest_height, |
517 int dest_rect_left, | 514 int dest_rect_left, |
518 int dest_rect_top, | 515 int dest_rect_top, |
519 int dest_rect_right, | 516 int dest_rect_right, |
520 int dest_rect_bottom, | 517 int dest_rect_bottom, |
521 int y_pitch, | 518 int y_pitch, |
522 int uv_pitch, | 519 int uv_pitch, |
523 int rgb_pitch) { | 520 int rgb_pitch) { |
524 // This routine doesn't currently support up-scaling. | 521 // This routine doesn't currently support up-scaling. |
525 CHECK_LE(dest_width, source_width); | 522 CHECK_LE(dest_width, source_width); |
526 CHECK_LE(dest_height, source_height); | 523 CHECK_LE(dest_height, source_height); |
527 | 524 |
528 // Sanity-check the destination rectangle. | 525 // Sanity-check the destination rectangle. |
529 DCHECK(dest_rect_left >= 0 && dest_rect_right <= dest_width); | 526 DCHECK(dest_rect_left >= 0 && dest_rect_right <= dest_width); |
530 DCHECK(dest_rect_top >= 0 && dest_rect_bottom <= dest_height); | 527 DCHECK(dest_rect_top >= 0 && dest_rect_bottom <= dest_height); |
531 DCHECK(dest_rect_right > dest_rect_left); | 528 DCHECK(dest_rect_right > dest_rect_left); |
532 DCHECK(dest_rect_bottom > dest_rect_top); | 529 DCHECK(dest_rect_bottom > dest_rect_top); |
533 | 530 |
534 const int16* lookup_table = GetLookupTable(YV12); | 531 const int16_t* lookup_table = GetLookupTable(YV12); |
535 | 532 |
536 // Fixed-point value of vertical and horizontal scale down factor. | 533 // Fixed-point value of vertical and horizontal scale down factor. |
537 // Values are in the format 16.16. | 534 // Values are in the format 16.16. |
538 int y_step = kFractionMax * source_height / dest_height; | 535 int y_step = kFractionMax * source_height / dest_height; |
539 int x_step = kFractionMax * source_width / dest_width; | 536 int x_step = kFractionMax * source_width / dest_width; |
540 | 537 |
541 // Determine the coordinates of the rectangle in 16.16 coords. | 538 // Determine the coordinates of the rectangle in 16.16 coords. |
542 // NB: Our origin is the *center* of the top/left pixel, NOT its top/left. | 539 // NB: Our origin is the *center* of the top/left pixel, NOT its top/left. |
543 // If we're down-scaling by more than a factor of two, we start with a 50% | 540 // If we're down-scaling by more than a factor of two, we start with a 50% |
544 // fraction to avoid degenerating to point-sampling - we should really just | 541 // fraction to avoid degenerating to point-sampling - we should really just |
(...skipping 30 matching lines...) Expand all Loading... |
575 int dest_rect_width = dest_rect_right - dest_rect_left; | 572 int dest_rect_width = dest_rect_right - dest_rect_left; |
576 | 573 |
577 // Intermediate buffer for vertical interpolation. | 574 // Intermediate buffer for vertical interpolation. |
578 // 4096 bytes allows 3 buffers to fit in 12k, which fits in a 16K L1 cache, | 575 // 4096 bytes allows 3 buffers to fit in 12k, which fits in a 16K L1 cache, |
579 // and is bigger than most users will generally need. | 576 // and is bigger than most users will generally need. |
580 // The buffer is 16-byte aligned and padded with 16 extra bytes; some of the | 577 // The buffer is 16-byte aligned and padded with 16 extra bytes; some of the |
581 // FilterYUVRowsProcs have alignment requirements, and the SSE version can | 578 // FilterYUVRowsProcs have alignment requirements, and the SSE version can |
582 // write up to 16 bytes past the end of the buffer. | 579 // write up to 16 bytes past the end of the buffer. |
583 const int kFilterBufferSize = 4096; | 580 const int kFilterBufferSize = 4096; |
584 const bool kAvoidUsingOptimizedFilter = source_width > kFilterBufferSize; | 581 const bool kAvoidUsingOptimizedFilter = source_width > kFilterBufferSize; |
585 uint8 yuv_temp[16 + kFilterBufferSize * 3 + 16]; | 582 uint8_t yuv_temp[16 + kFilterBufferSize * 3 + 16]; |
586 // memset() yuv_temp to 0 to avoid bogus warnings when running on Valgrind. | 583 // memset() yuv_temp to 0 to avoid bogus warnings when running on Valgrind. |
587 if (RunningOnValgrind()) | 584 if (RunningOnValgrind()) |
588 memset(yuv_temp, 0, sizeof(yuv_temp)); | 585 memset(yuv_temp, 0, sizeof(yuv_temp)); |
589 uint8* y_temp = reinterpret_cast<uint8*>( | 586 uint8_t* y_temp = reinterpret_cast<uint8_t*>( |
590 reinterpret_cast<uintptr_t>(yuv_temp + 15) & ~15); | 587 reinterpret_cast<uintptr_t>(yuv_temp + 15) & ~15); |
591 uint8* u_temp = y_temp + kFilterBufferSize; | 588 uint8_t* u_temp = y_temp + kFilterBufferSize; |
592 uint8* v_temp = u_temp + kFilterBufferSize; | 589 uint8_t* v_temp = u_temp + kFilterBufferSize; |
593 | 590 |
594 // Move to the top-left pixel of output. | 591 // Move to the top-left pixel of output. |
595 rgb_buf += dest_rect_top * rgb_pitch; | 592 rgb_buf += dest_rect_top * rgb_pitch; |
596 rgb_buf += dest_rect_left * 4; | 593 rgb_buf += dest_rect_left * 4; |
597 | 594 |
598 // For each destination row perform interpolation and color space | 595 // For each destination row perform interpolation and color space |
599 // conversion to produce the output. | 596 // conversion to produce the output. |
600 for (int row = dest_rect_top; row < dest_rect_bottom; ++row) { | 597 for (int row = dest_rect_top; row < dest_rect_bottom; ++row) { |
601 // Round the fixed-point y position to get the current row. | 598 // Round the fixed-point y position to get the current row. |
602 int source_row = source_top >> kFractionBits; | 599 int source_row = source_top >> kFractionBits; |
603 int source_uv_row = source_row / 2; | 600 int source_uv_row = source_row / 2; |
604 DCHECK(source_row < source_height); | 601 DCHECK(source_row < source_height); |
605 | 602 |
606 // Locate the first row for each plane for interpolation. | 603 // Locate the first row for each plane for interpolation. |
607 const uint8* y0_ptr = y_buf + y_pitch * source_row + source_y_left; | 604 const uint8_t* y0_ptr = y_buf + y_pitch * source_row + source_y_left; |
608 const uint8* u0_ptr = u_buf + uv_pitch * source_uv_row + source_uv_left; | 605 const uint8_t* u0_ptr = u_buf + uv_pitch * source_uv_row + source_uv_left; |
609 const uint8* v0_ptr = v_buf + uv_pitch * source_uv_row + source_uv_left; | 606 const uint8_t* v0_ptr = v_buf + uv_pitch * source_uv_row + source_uv_left; |
610 const uint8* y1_ptr = NULL; | 607 const uint8_t* y1_ptr = NULL; |
611 const uint8* u1_ptr = NULL; | 608 const uint8_t* u1_ptr = NULL; |
612 const uint8* v1_ptr = NULL; | 609 const uint8_t* v1_ptr = NULL; |
613 | 610 |
614 // Locate the second row for interpolation, being careful not to overrun. | 611 // Locate the second row for interpolation, being careful not to overrun. |
615 if (source_row + 1 >= source_height) { | 612 if (source_row + 1 >= source_height) { |
616 y1_ptr = y0_ptr; | 613 y1_ptr = y0_ptr; |
617 } else { | 614 } else { |
618 y1_ptr = y0_ptr + y_pitch; | 615 y1_ptr = y0_ptr + y_pitch; |
619 } | 616 } |
620 if (source_uv_row + 1 >= (source_height + 1) / 2) { | 617 if (source_uv_row + 1 >= (source_height + 1) / 2) { |
621 u1_ptr = u0_ptr; | 618 u1_ptr = u0_ptr; |
622 v1_ptr = v0_ptr; | 619 v1_ptr = v0_ptr; |
623 } else { | 620 } else { |
624 u1_ptr = u0_ptr + uv_pitch; | 621 u1_ptr = u0_ptr + uv_pitch; |
625 v1_ptr = v0_ptr + uv_pitch; | 622 v1_ptr = v0_ptr + uv_pitch; |
626 } | 623 } |
627 | 624 |
628 if (!kAvoidUsingOptimizedFilter) { | 625 if (!kAvoidUsingOptimizedFilter) { |
629 // Vertical scaler uses 16.8 fixed point. | 626 // Vertical scaler uses 16.8 fixed point. |
630 uint8 fraction = (source_top & kFractionMask) >> 8; | 627 uint8_t fraction = (source_top & kFractionMask) >> 8; |
631 g_filter_yuv_rows_proc_( | 628 g_filter_yuv_rows_proc_( |
632 y_temp + source_y_left, y0_ptr, y1_ptr, source_y_width, fraction); | 629 y_temp + source_y_left, y0_ptr, y1_ptr, source_y_width, fraction); |
633 g_filter_yuv_rows_proc_( | 630 g_filter_yuv_rows_proc_( |
634 u_temp + source_uv_left, u0_ptr, u1_ptr, source_uv_width, fraction); | 631 u_temp + source_uv_left, u0_ptr, u1_ptr, source_uv_width, fraction); |
635 g_filter_yuv_rows_proc_( | 632 g_filter_yuv_rows_proc_( |
636 v_temp + source_uv_left, v0_ptr, v1_ptr, source_uv_width, fraction); | 633 v_temp + source_uv_left, v0_ptr, v1_ptr, source_uv_width, fraction); |
637 | 634 |
638 // Perform horizontal interpolation and color space conversion. | 635 // Perform horizontal interpolation and color space conversion. |
639 // TODO(hclam): Use the MMX version after more testing. | 636 // TODO(hclam): Use the MMX version after more testing. |
640 LinearScaleYUVToRGB32RowWithRange_C(y_temp, u_temp, v_temp, rgb_buf, | 637 LinearScaleYUVToRGB32RowWithRange_C(y_temp, u_temp, v_temp, rgb_buf, |
641 dest_rect_width, source_left, x_step, | 638 dest_rect_width, source_left, x_step, |
642 lookup_table); | 639 lookup_table); |
643 } else { | 640 } else { |
644 // If the frame is too large then we linear scale a single row. | 641 // If the frame is too large then we linear scale a single row. |
645 LinearScaleYUVToRGB32RowWithRange_C(y0_ptr, u0_ptr, v0_ptr, rgb_buf, | 642 LinearScaleYUVToRGB32RowWithRange_C(y0_ptr, u0_ptr, v0_ptr, rgb_buf, |
646 dest_rect_width, source_left, x_step, | 643 dest_rect_width, source_left, x_step, |
647 lookup_table); | 644 lookup_table); |
648 } | 645 } |
649 | 646 |
650 // Advance vertically in the source and destination image. | 647 // Advance vertically in the source and destination image. |
651 source_top += y_step; | 648 source_top += y_step; |
652 rgb_buf += rgb_pitch; | 649 rgb_buf += rgb_pitch; |
653 } | 650 } |
654 | 651 |
655 g_empty_register_state_proc_(); | 652 g_empty_register_state_proc_(); |
656 } | 653 } |
657 | 654 |
658 void ConvertRGB32ToYUV(const uint8* rgbframe, | 655 void ConvertRGB32ToYUV(const uint8_t* rgbframe, |
659 uint8* yplane, | 656 uint8_t* yplane, |
660 uint8* uplane, | 657 uint8_t* uplane, |
661 uint8* vplane, | 658 uint8_t* vplane, |
662 int width, | 659 int width, |
663 int height, | 660 int height, |
664 int rgbstride, | 661 int rgbstride, |
665 int ystride, | 662 int ystride, |
666 int uvstride) { | 663 int uvstride) { |
667 g_convert_rgb32_to_yuv_proc_(rgbframe, | 664 g_convert_rgb32_to_yuv_proc_(rgbframe, |
668 yplane, | 665 yplane, |
669 uplane, | 666 uplane, |
670 vplane, | 667 vplane, |
671 width, | 668 width, |
672 height, | 669 height, |
673 rgbstride, | 670 rgbstride, |
674 ystride, | 671 ystride, |
675 uvstride); | 672 uvstride); |
676 } | 673 } |
677 | 674 |
678 void ConvertRGB24ToYUV(const uint8* rgbframe, | 675 void ConvertRGB24ToYUV(const uint8_t* rgbframe, |
679 uint8* yplane, | 676 uint8_t* yplane, |
680 uint8* uplane, | 677 uint8_t* uplane, |
681 uint8* vplane, | 678 uint8_t* vplane, |
682 int width, | 679 int width, |
683 int height, | 680 int height, |
684 int rgbstride, | 681 int rgbstride, |
685 int ystride, | 682 int ystride, |
686 int uvstride) { | 683 int uvstride) { |
687 g_convert_rgb24_to_yuv_proc_(rgbframe, | 684 g_convert_rgb24_to_yuv_proc_(rgbframe, |
688 yplane, | 685 yplane, |
689 uplane, | 686 uplane, |
690 vplane, | 687 vplane, |
691 width, | 688 width, |
692 height, | 689 height, |
693 rgbstride, | 690 rgbstride, |
694 ystride, | 691 ystride, |
695 uvstride); | 692 uvstride); |
696 } | 693 } |
697 | 694 |
698 void ConvertYUVToRGB32(const uint8* yplane, | 695 void ConvertYUVToRGB32(const uint8_t* yplane, |
699 const uint8* uplane, | 696 const uint8_t* uplane, |
700 const uint8* vplane, | 697 const uint8_t* vplane, |
701 uint8* rgbframe, | 698 uint8_t* rgbframe, |
702 int width, | 699 int width, |
703 int height, | 700 int height, |
704 int ystride, | 701 int ystride, |
705 int uvstride, | 702 int uvstride, |
706 int rgbstride, | 703 int rgbstride, |
707 YUVType yuv_type) { | 704 YUVType yuv_type) { |
708 g_convert_yuv_to_rgb32_proc_(yplane, | 705 g_convert_yuv_to_rgb32_proc_(yplane, |
709 uplane, | 706 uplane, |
710 vplane, | 707 vplane, |
711 rgbframe, | 708 rgbframe, |
712 width, | 709 width, |
713 height, | 710 height, |
714 ystride, | 711 ystride, |
715 uvstride, | 712 uvstride, |
716 rgbstride, | 713 rgbstride, |
717 yuv_type); | 714 yuv_type); |
718 } | 715 } |
719 | 716 |
720 void ConvertYUVAToARGB(const uint8* yplane, | 717 void ConvertYUVAToARGB(const uint8_t* yplane, |
721 const uint8* uplane, | 718 const uint8_t* uplane, |
722 const uint8* vplane, | 719 const uint8_t* vplane, |
723 const uint8* aplane, | 720 const uint8_t* aplane, |
724 uint8* rgbframe, | 721 uint8_t* rgbframe, |
725 int width, | 722 int width, |
726 int height, | 723 int height, |
727 int ystride, | 724 int ystride, |
728 int uvstride, | 725 int uvstride, |
729 int astride, | 726 int astride, |
730 int rgbstride, | 727 int rgbstride, |
731 YUVType yuv_type) { | 728 YUVType yuv_type) { |
732 g_convert_yuva_to_argb_proc_(yplane, | 729 g_convert_yuva_to_argb_proc_(yplane, |
733 uplane, | 730 uplane, |
734 vplane, | 731 vplane, |
735 aplane, | 732 aplane, |
736 rgbframe, | 733 rgbframe, |
737 width, | 734 width, |
738 height, | 735 height, |
739 ystride, | 736 ystride, |
740 uvstride, | 737 uvstride, |
741 astride, | 738 astride, |
742 rgbstride, | 739 rgbstride, |
743 yuv_type); | 740 yuv_type); |
744 } | 741 } |
745 | 742 |
746 } // namespace media | 743 } // namespace media |
OLD | NEW |