OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 // This webpage shows layout of YV12 and other YUV formats | |
6 // http://www.fourcc.org/yuv.php | |
7 // The actual conversion is best described here | |
8 // http://en.wikipedia.org/wiki/YUV | |
9 // An article on optimizing YUV conversion using tables instead of multiplies | |
10 // http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf | |
11 // | |
12 // YV12 is a full plane of Y and a half height, half width chroma planes | |
13 // YV16 is a full plane of Y and a full height, half width chroma planes | |
14 // | |
15 // ARGB pixel format is output, which on little endian is stored as BGRA. | |
16 // The alpha is set to 255, allowing the application to use RGBA or RGB32. | |
17 | |
18 #include "media/base/yuv_convert.h" | |
19 | |
20 #include <stddef.h> | |
21 | |
22 #include <algorithm> | |
23 | |
24 #include "base/cpu.h" | |
25 #include "base/logging.h" | |
26 #include "base/macros.h" | |
27 #include "base/memory/aligned_memory.h" | |
28 #include "base/third_party/dynamic_annotations/dynamic_annotations.h" | |
29 #include "build/build_config.h" | |
30 #include "media/base/simd/convert_rgb_to_yuv.h" | |
31 #include "media/base/simd/convert_yuv_to_rgb.h" | |
32 #include "media/base/simd/filter_yuv.h" | |
33 | |
34 #if defined(ARCH_CPU_X86_FAMILY) | |
35 #if defined(COMPILER_MSVC) | |
36 #include <intrin.h> | |
37 #else | |
38 #include <mmintrin.h> | |
39 #endif | |
40 #endif | |
41 | |
42 // Assembly functions are declared without namespace. | |
43 extern "C" { void EmptyRegisterState_MMX(); } // extern "C" | |
44 | |
45 namespace media { | |
46 | |
47 typedef void ( | |
48 *FilterYUVRowsProc)(uint8_t*, const uint8_t*, const uint8_t*, int, uint8_t); | |
49 | |
50 typedef void (*ConvertRGBToYUVProc)(const uint8_t*, | |
51 uint8_t*, | |
52 uint8_t*, | |
53 uint8_t*, | |
54 int, | |
55 int, | |
56 int, | |
57 int, | |
58 int); | |
59 | |
60 typedef void (*ConvertYUVToRGB32Proc)(const uint8_t*, | |
61 const uint8_t*, | |
62 const uint8_t*, | |
63 uint8_t*, | |
64 int, | |
65 int, | |
66 int, | |
67 int, | |
68 int, | |
69 YUVType); | |
70 | |
71 typedef void (*ConvertYUVAToARGBProc)(const uint8_t*, | |
72 const uint8_t*, | |
73 const uint8_t*, | |
74 const uint8_t*, | |
75 uint8_t*, | |
76 int, | |
77 int, | |
78 int, | |
79 int, | |
80 int, | |
81 int, | |
82 YUVType); | |
83 | |
84 typedef void (*ConvertYUVToRGB32RowProc)(const uint8_t*, | |
85 const uint8_t*, | |
86 const uint8_t*, | |
87 uint8_t*, | |
88 ptrdiff_t, | |
89 const int16_t*); | |
90 | |
91 typedef void (*ConvertYUVAToARGBRowProc)(const uint8_t*, | |
92 const uint8_t*, | |
93 const uint8_t*, | |
94 const uint8_t*, | |
95 uint8_t*, | |
96 ptrdiff_t, | |
97 const int16_t*); | |
98 | |
99 typedef void (*ScaleYUVToRGB32RowProc)(const uint8_t*, | |
100 const uint8_t*, | |
101 const uint8_t*, | |
102 uint8_t*, | |
103 ptrdiff_t, | |
104 ptrdiff_t, | |
105 const int16_t*); | |
106 | |
107 static FilterYUVRowsProc g_filter_yuv_rows_proc_ = NULL; | |
108 static ConvertYUVToRGB32RowProc g_convert_yuv_to_rgb32_row_proc_ = NULL; | |
109 static ScaleYUVToRGB32RowProc g_scale_yuv_to_rgb32_row_proc_ = NULL; | |
110 static ScaleYUVToRGB32RowProc g_linear_scale_yuv_to_rgb32_row_proc_ = NULL; | |
111 static ConvertRGBToYUVProc g_convert_rgb32_to_yuv_proc_ = NULL; | |
112 static ConvertRGBToYUVProc g_convert_rgb24_to_yuv_proc_ = NULL; | |
113 static ConvertYUVToRGB32Proc g_convert_yuv_to_rgb32_proc_ = NULL; | |
114 static ConvertYUVAToARGBProc g_convert_yuva_to_argb_proc_ = NULL; | |
115 | |
116 static const int kYUVToRGBTableSize = 256 * 4 * 4 * sizeof(int16_t); | |
117 | |
118 static int16_t* g_table_rec601 = NULL; | |
119 static int16_t* g_table_jpeg = NULL; | |
120 static int16_t* g_table_rec709 = NULL; | |
121 | |
122 // Empty SIMD registers state after using them. | |
123 void EmptyRegisterStateStub() {} | |
124 #if defined(MEDIA_MMX_INTRINSICS_AVAILABLE) | |
125 void EmptyRegisterStateIntrinsic() { _mm_empty(); } | |
126 #endif | |
127 typedef void (*EmptyRegisterStateProc)(); | |
128 static EmptyRegisterStateProc g_empty_register_state_proc_ = NULL; | |
129 | |
130 // Get the appropriate value to bitshift by for vertical indices. | |
131 int GetVerticalShift(YUVType type) { | |
132 switch (type) { | |
133 case YV16: | |
134 return 0; | |
135 case YV12: | |
136 case YV12J: | |
137 case YV12HD: | |
138 return 1; | |
139 } | |
140 NOTREACHED(); | |
141 return 0; | |
142 } | |
143 | |
144 const int16_t* GetLookupTable(YUVType type) { | |
145 switch (type) { | |
146 case YV12: | |
147 case YV16: | |
148 return g_table_rec601; | |
149 case YV12J: | |
150 return g_table_jpeg; | |
151 case YV12HD: | |
152 return g_table_rec709; | |
153 } | |
154 NOTREACHED(); | |
155 return NULL; | |
156 } | |
157 | |
158 // Populates a pre-allocated lookup table from a YUV->RGB matrix. | |
159 const int16_t* PopulateYUVToRGBTable(const double matrix[3][3], | |
160 bool full_range, | |
161 int16_t* table) { | |
162 // We'll have 4 sub-tables that lie contiguous in memory, one for each of Y, | |
163 // U, V and A. | |
164 const int kNumTables = 4; | |
165 // Each table has 256 rows (for all possible 8-bit values). | |
166 const int kNumRows = 256; | |
167 // Each row has 4 columns, for contributions to each of R, G, B and A. | |
168 const int kNumColumns = 4; | |
169 // Each element is a fixed-point (10.6) 16-bit signed value. | |
170 const int kElementSize = sizeof(int16_t); | |
171 | |
172 // Sanity check that our constants here match the size of the statically | |
173 // allocated tables. | |
174 static_assert( | |
175 kNumTables * kNumRows * kNumColumns * kElementSize == kYUVToRGBTableSize, | |
176 "YUV lookup table size doesn't match expectation."); | |
177 | |
178 // Y needs an offset of -16 for color ranges that ignore the lower 16 values, | |
179 // U and V get -128 to put them in [-128, 127] from [0, 255]. | |
180 int offsets[3] = {(full_range ? 0 : -16), -128, -128}; | |
181 | |
182 for (int i = 0; i < kNumRows; ++i) { | |
183 // Y, U, and V contributions to each of R, G, B and A. | |
184 for (int j = 0; j < 3; ++j) { | |
185 #if defined(OS_ANDROID) | |
186 // Android is RGBA. | |
187 table[(j * kNumRows + i) * kNumColumns + 0] = | |
188 matrix[j][0] * 64 * (i + offsets[j]) + 0.5; | |
189 table[(j * kNumRows + i) * kNumColumns + 1] = | |
190 matrix[j][1] * 64 * (i + offsets[j]) + 0.5; | |
191 table[(j * kNumRows + i) * kNumColumns + 2] = | |
192 matrix[j][2] * 64 * (i + offsets[j]) + 0.5; | |
193 #else | |
194 // Other platforms are BGRA. | |
195 table[(j * kNumRows + i) * kNumColumns + 0] = | |
196 matrix[j][2] * 64 * (i + offsets[j]) + 0.5; | |
197 table[(j * kNumRows + i) * kNumColumns + 1] = | |
198 matrix[j][1] * 64 * (i + offsets[j]) + 0.5; | |
199 table[(j * kNumRows + i) * kNumColumns + 2] = | |
200 matrix[j][0] * 64 * (i + offsets[j]) + 0.5; | |
201 #endif | |
202 // Alpha contributions from Y and V are always 0. U is set such that | |
203 // all values result in a full '255' alpha value. | |
204 table[(j * kNumRows + i) * kNumColumns + 3] = (j == 1) ? 256 * 64 - 1 : 0; | |
205 } | |
206 // And YUVA alpha is passed through as-is. | |
207 for (int k = 0; k < kNumTables; ++k) | |
208 table[((kNumTables - 1) * kNumRows + i) * kNumColumns + k] = i; | |
209 } | |
210 | |
211 return table; | |
212 } | |
213 | |
214 void InitializeCPUSpecificYUVConversions() { | |
215 CHECK(!g_filter_yuv_rows_proc_); | |
216 CHECK(!g_convert_yuv_to_rgb32_row_proc_); | |
217 CHECK(!g_scale_yuv_to_rgb32_row_proc_); | |
218 CHECK(!g_linear_scale_yuv_to_rgb32_row_proc_); | |
219 CHECK(!g_convert_rgb32_to_yuv_proc_); | |
220 CHECK(!g_convert_rgb24_to_yuv_proc_); | |
221 CHECK(!g_convert_yuv_to_rgb32_proc_); | |
222 CHECK(!g_convert_yuva_to_argb_proc_); | |
223 CHECK(!g_empty_register_state_proc_); | |
224 | |
225 g_filter_yuv_rows_proc_ = FilterYUVRows_C; | |
226 g_convert_yuv_to_rgb32_row_proc_ = ConvertYUVToRGB32Row_C; | |
227 g_scale_yuv_to_rgb32_row_proc_ = ScaleYUVToRGB32Row_C; | |
228 g_linear_scale_yuv_to_rgb32_row_proc_ = LinearScaleYUVToRGB32Row_C; | |
229 g_convert_rgb32_to_yuv_proc_ = ConvertRGB32ToYUV_C; | |
230 g_convert_rgb24_to_yuv_proc_ = ConvertRGB24ToYUV_C; | |
231 g_convert_yuv_to_rgb32_proc_ = ConvertYUVToRGB32_C; | |
232 g_convert_yuva_to_argb_proc_ = ConvertYUVAToARGB_C; | |
233 g_empty_register_state_proc_ = EmptyRegisterStateStub; | |
234 | |
235 // Assembly code confuses MemorySanitizer. Also not available in iOS builds. | |
236 #if defined(ARCH_CPU_X86_FAMILY) && !defined(MEMORY_SANITIZER) && \ | |
237 !defined(OS_IOS) | |
238 g_convert_yuva_to_argb_proc_ = ConvertYUVAToARGB_MMX; | |
239 | |
240 #if defined(MEDIA_MMX_INTRINSICS_AVAILABLE) | |
241 g_empty_register_state_proc_ = EmptyRegisterStateIntrinsic; | |
242 #else | |
243 g_empty_register_state_proc_ = EmptyRegisterState_MMX; | |
244 #endif | |
245 | |
246 g_convert_yuv_to_rgb32_row_proc_ = ConvertYUVToRGB32Row_SSE; | |
247 g_convert_yuv_to_rgb32_proc_ = ConvertYUVToRGB32_SSE; | |
248 | |
249 g_filter_yuv_rows_proc_ = FilterYUVRows_SSE2; | |
250 g_convert_rgb32_to_yuv_proc_ = ConvertRGB32ToYUV_SSE2; | |
251 | |
252 #if defined(ARCH_CPU_X86_64) | |
253 g_scale_yuv_to_rgb32_row_proc_ = ScaleYUVToRGB32Row_SSE2_X64; | |
254 | |
255 // Technically this should be in the MMX section, but MSVC will optimize out | |
256 // the export of LinearScaleYUVToRGB32Row_MMX, which is required by the unit | |
257 // tests, if that decision can be made at compile time. Since all X64 CPUs | |
258 // have SSE2, we can hack around this by making the selection here. | |
259 g_linear_scale_yuv_to_rgb32_row_proc_ = LinearScaleYUVToRGB32Row_MMX_X64; | |
260 #else | |
261 g_scale_yuv_to_rgb32_row_proc_ = ScaleYUVToRGB32Row_SSE; | |
262 g_linear_scale_yuv_to_rgb32_row_proc_ = LinearScaleYUVToRGB32Row_SSE; | |
263 #endif | |
264 | |
265 base::CPU cpu; | |
266 if (cpu.has_ssse3()) { | |
267 g_convert_rgb24_to_yuv_proc_ = &ConvertRGB24ToYUV_SSSE3; | |
268 | |
269 // TODO(hclam): Add ConvertRGB32ToYUV_SSSE3 when the cyan problem is solved. | |
270 // See: crbug.com/100462 | |
271 } | |
272 #endif | |
273 | |
274 // Initialize YUV conversion lookup tables. | |
275 | |
276 // SD Rec601 YUV->RGB matrix, see http://www.fourcc.org/fccyvrgb.php | |
277 const double kRec601ConvertMatrix[3][3] = { | |
278 {1.164, 1.164, 1.164}, {0.0, -0.391, 2.018}, {1.596, -0.813, 0.0}, | |
279 }; | |
280 | |
281 // JPEG table, values from above link. | |
282 const double kJPEGConvertMatrix[3][3] = { | |
283 {1.0, 1.0, 1.0}, {0.0, -0.34414, 1.772}, {1.402, -0.71414, 0.0}, | |
284 }; | |
285 | |
286 // Rec709 "HD" color space, values from: | |
287 // http://www.equasys.de/colorconversion.html | |
288 const double kRec709ConvertMatrix[3][3] = { | |
289 {1.164, 1.164, 1.164}, {0.0, -0.213, 2.112}, {1.793, -0.533, 0.0}, | |
290 }; | |
291 | |
292 g_table_rec601 = | |
293 static_cast<int16_t*>(base::AlignedAlloc(kYUVToRGBTableSize, 16)); | |
294 PopulateYUVToRGBTable(kRec601ConvertMatrix, false, g_table_rec601); | |
295 | |
296 g_table_rec709 = | |
297 static_cast<int16_t*>(base::AlignedAlloc(kYUVToRGBTableSize, 16)); | |
298 PopulateYUVToRGBTable(kRec709ConvertMatrix, false, g_table_rec709); | |
299 | |
300 g_table_jpeg = | |
301 static_cast<int16_t*>(base::AlignedAlloc(kYUVToRGBTableSize, 16)); | |
302 PopulateYUVToRGBTable(kJPEGConvertMatrix, true, g_table_jpeg); | |
303 } | |
304 | |
305 // Empty SIMD registers state after using them. | |
306 void EmptyRegisterState() { g_empty_register_state_proc_(); } | |
307 | |
308 // 16.16 fixed point arithmetic | |
309 const int kFractionBits = 16; | |
310 const int kFractionMax = 1 << kFractionBits; | |
311 const int kFractionMask = ((1 << kFractionBits) - 1); | |
312 | |
313 // Scale a frame of YUV to 32 bit ARGB. | |
314 void ScaleYUVToRGB32(const uint8_t* y_buf, | |
315 const uint8_t* u_buf, | |
316 const uint8_t* v_buf, | |
317 uint8_t* rgb_buf, | |
318 int source_width, | |
319 int source_height, | |
320 int width, | |
321 int height, | |
322 int y_pitch, | |
323 int uv_pitch, | |
324 int rgb_pitch, | |
325 YUVType yuv_type, | |
326 Rotate view_rotate, | |
327 ScaleFilter filter) { | |
328 // Handle zero sized sources and destinations. | |
329 if ((yuv_type == YV12 && (source_width < 2 || source_height < 2)) || | |
330 (yuv_type == YV16 && (source_width < 2 || source_height < 1)) || | |
331 width == 0 || height == 0) | |
332 return; | |
333 | |
334 const int16_t* lookup_table = GetLookupTable(yuv_type); | |
335 | |
336 // 4096 allows 3 buffers to fit in 12k. | |
337 // Helps performance on CPU with 16K L1 cache. | |
338 // Large enough for 3830x2160 and 30" displays which are 2560x1600. | |
339 const int kFilterBufferSize = 4096; | |
340 // Disable filtering if the screen is too big (to avoid buffer overflows). | |
341 // This should never happen to regular users: they don't have monitors | |
342 // wider than 4096 pixels. | |
343 // TODO(fbarchard): Allow rotated videos to filter. | |
344 if (source_width > kFilterBufferSize || view_rotate) | |
345 filter = FILTER_NONE; | |
346 | |
347 unsigned int y_shift = GetVerticalShift(yuv_type); | |
348 // Diagram showing origin and direction of source sampling. | |
349 // ->0 4<- | |
350 // 7 3 | |
351 // | |
352 // 6 5 | |
353 // ->1 2<- | |
354 // Rotations that start at right side of image. | |
355 if ((view_rotate == ROTATE_180) || (view_rotate == ROTATE_270) || | |
356 (view_rotate == MIRROR_ROTATE_0) || (view_rotate == MIRROR_ROTATE_90)) { | |
357 y_buf += source_width - 1; | |
358 u_buf += source_width / 2 - 1; | |
359 v_buf += source_width / 2 - 1; | |
360 source_width = -source_width; | |
361 } | |
362 // Rotations that start at bottom of image. | |
363 if ((view_rotate == ROTATE_90) || (view_rotate == ROTATE_180) || | |
364 (view_rotate == MIRROR_ROTATE_90) || (view_rotate == MIRROR_ROTATE_180)) { | |
365 y_buf += (source_height - 1) * y_pitch; | |
366 u_buf += ((source_height >> y_shift) - 1) * uv_pitch; | |
367 v_buf += ((source_height >> y_shift) - 1) * uv_pitch; | |
368 source_height = -source_height; | |
369 } | |
370 | |
371 int source_dx = source_width * kFractionMax / width; | |
372 | |
373 if ((view_rotate == ROTATE_90) || (view_rotate == ROTATE_270)) { | |
374 int tmp = height; | |
375 height = width; | |
376 width = tmp; | |
377 tmp = source_height; | |
378 source_height = source_width; | |
379 source_width = tmp; | |
380 int source_dy = source_height * kFractionMax / height; | |
381 source_dx = ((source_dy >> kFractionBits) * y_pitch) << kFractionBits; | |
382 if (view_rotate == ROTATE_90) { | |
383 y_pitch = -1; | |
384 uv_pitch = -1; | |
385 source_height = -source_height; | |
386 } else { | |
387 y_pitch = 1; | |
388 uv_pitch = 1; | |
389 } | |
390 } | |
391 | |
392 // Need padding because FilterRows() will write 1 to 16 extra pixels | |
393 // after the end for SSE2 version. | |
394 uint8_t yuvbuf[16 + kFilterBufferSize * 3 + 16]; | |
395 uint8_t* ybuf = reinterpret_cast<uint8_t*>( | |
396 reinterpret_cast<uintptr_t>(yuvbuf + 15) & ~15); | |
397 uint8_t* ubuf = ybuf + kFilterBufferSize; | |
398 uint8_t* vbuf = ubuf + kFilterBufferSize; | |
399 | |
400 // TODO(fbarchard): Fixed point math is off by 1 on negatives. | |
401 | |
402 // We take a y-coordinate in [0,1] space in the source image space, and | |
403 // transform to a y-coordinate in [0,1] space in the destination image space. | |
404 // Note that the coordinate endpoints lie on pixel boundaries, not on pixel | |
405 // centers: e.g. a two-pixel-high image will have pixel centers at 0.25 and | |
406 // 0.75. The formula is as follows (in fixed-point arithmetic): | |
407 // y_dst = dst_height * ((y_src + 0.5) / src_height) | |
408 // dst_pixel = clamp([0, dst_height - 1], floor(y_dst - 0.5)) | |
409 // Implement this here as an accumulator + delta, to avoid expensive math | |
410 // in the loop. | |
411 int source_y_subpixel_accum = | |
412 ((kFractionMax / 2) * source_height) / height - (kFractionMax / 2); | |
413 int source_y_subpixel_delta = ((1 << kFractionBits) * source_height) / height; | |
414 | |
415 // TODO(fbarchard): Split this into separate function for better efficiency. | |
416 for (int y = 0; y < height; ++y) { | |
417 uint8_t* dest_pixel = rgb_buf + y * rgb_pitch; | |
418 int source_y_subpixel = source_y_subpixel_accum; | |
419 source_y_subpixel_accum += source_y_subpixel_delta; | |
420 if (source_y_subpixel < 0) | |
421 source_y_subpixel = 0; | |
422 else if (source_y_subpixel > ((source_height - 1) << kFractionBits)) | |
423 source_y_subpixel = (source_height - 1) << kFractionBits; | |
424 | |
425 const uint8_t* y_ptr = NULL; | |
426 const uint8_t* u_ptr = NULL; | |
427 const uint8_t* v_ptr = NULL; | |
428 // Apply vertical filtering if necessary. | |
429 // TODO(fbarchard): Remove memcpy when not necessary. | |
430 if (filter & media::FILTER_BILINEAR_V) { | |
431 int source_y = source_y_subpixel >> kFractionBits; | |
432 y_ptr = y_buf + source_y * y_pitch; | |
433 u_ptr = u_buf + (source_y >> y_shift) * uv_pitch; | |
434 v_ptr = v_buf + (source_y >> y_shift) * uv_pitch; | |
435 | |
436 // Vertical scaler uses 16.8 fixed point. | |
437 uint8_t source_y_fraction = (source_y_subpixel & kFractionMask) >> 8; | |
438 if (source_y_fraction != 0) { | |
439 g_filter_yuv_rows_proc_( | |
440 ybuf, y_ptr, y_ptr + y_pitch, source_width, source_y_fraction); | |
441 } else { | |
442 memcpy(ybuf, y_ptr, source_width); | |
443 } | |
444 y_ptr = ybuf; | |
445 ybuf[source_width] = ybuf[source_width - 1]; | |
446 | |
447 int uv_source_width = (source_width + 1) / 2; | |
448 uint8_t source_uv_fraction; | |
449 | |
450 // For formats with half-height UV planes, each even-numbered pixel row | |
451 // should not interpolate, since the next row to interpolate from should | |
452 // be a duplicate of the current row. | |
453 if (y_shift && (source_y & 0x1) == 0) | |
454 source_uv_fraction = 0; | |
455 else | |
456 source_uv_fraction = source_y_fraction; | |
457 | |
458 if (source_uv_fraction != 0) { | |
459 g_filter_yuv_rows_proc_( | |
460 ubuf, u_ptr, u_ptr + uv_pitch, uv_source_width, source_uv_fraction); | |
461 g_filter_yuv_rows_proc_( | |
462 vbuf, v_ptr, v_ptr + uv_pitch, uv_source_width, source_uv_fraction); | |
463 } else { | |
464 memcpy(ubuf, u_ptr, uv_source_width); | |
465 memcpy(vbuf, v_ptr, uv_source_width); | |
466 } | |
467 u_ptr = ubuf; | |
468 v_ptr = vbuf; | |
469 ubuf[uv_source_width] = ubuf[uv_source_width - 1]; | |
470 vbuf[uv_source_width] = vbuf[uv_source_width - 1]; | |
471 } else { | |
472 // Offset by 1/2 pixel for center sampling. | |
473 int source_y = (source_y_subpixel + (kFractionMax / 2)) >> kFractionBits; | |
474 y_ptr = y_buf + source_y * y_pitch; | |
475 u_ptr = u_buf + (source_y >> y_shift) * uv_pitch; | |
476 v_ptr = v_buf + (source_y >> y_shift) * uv_pitch; | |
477 } | |
478 if (source_dx == kFractionMax) { // Not scaled | |
479 g_convert_yuv_to_rgb32_row_proc_(y_ptr, u_ptr, v_ptr, dest_pixel, width, | |
480 lookup_table); | |
481 } else { | |
482 if (filter & FILTER_BILINEAR_H) { | |
483 g_linear_scale_yuv_to_rgb32_row_proc_(y_ptr, u_ptr, v_ptr, dest_pixel, | |
484 width, source_dx, | |
485 lookup_table); | |
486 } else { | |
487 g_scale_yuv_to_rgb32_row_proc_(y_ptr, u_ptr, v_ptr, dest_pixel, width, | |
488 source_dx, lookup_table); | |
489 } | |
490 } | |
491 } | |
492 | |
493 g_empty_register_state_proc_(); | |
494 } | |
495 | |
496 // Scale a frame of YV12 to 32 bit ARGB for a specific rectangle. | |
497 void ScaleYUVToRGB32WithRect(const uint8_t* y_buf, | |
498 const uint8_t* u_buf, | |
499 const uint8_t* v_buf, | |
500 uint8_t* rgb_buf, | |
501 int source_width, | |
502 int source_height, | |
503 int dest_width, | |
504 int dest_height, | |
505 int dest_rect_left, | |
506 int dest_rect_top, | |
507 int dest_rect_right, | |
508 int dest_rect_bottom, | |
509 int y_pitch, | |
510 int uv_pitch, | |
511 int rgb_pitch) { | |
512 // This routine doesn't currently support up-scaling. | |
513 CHECK_LE(dest_width, source_width); | |
514 CHECK_LE(dest_height, source_height); | |
515 | |
516 // Sanity-check the destination rectangle. | |
517 DCHECK(dest_rect_left >= 0 && dest_rect_right <= dest_width); | |
518 DCHECK(dest_rect_top >= 0 && dest_rect_bottom <= dest_height); | |
519 DCHECK(dest_rect_right > dest_rect_left); | |
520 DCHECK(dest_rect_bottom > dest_rect_top); | |
521 | |
522 const int16_t* lookup_table = GetLookupTable(YV12); | |
523 | |
524 // Fixed-point value of vertical and horizontal scale down factor. | |
525 // Values are in the format 16.16. | |
526 int y_step = kFractionMax * source_height / dest_height; | |
527 int x_step = kFractionMax * source_width / dest_width; | |
528 | |
529 // Determine the coordinates of the rectangle in 16.16 coords. | |
530 // NB: Our origin is the *center* of the top/left pixel, NOT its top/left. | |
531 // If we're down-scaling by more than a factor of two, we start with a 50% | |
532 // fraction to avoid degenerating to point-sampling - we should really just | |
533 // fix the fraction at 50% for all pixels in that case. | |
534 int source_left = dest_rect_left * x_step; | |
535 int source_right = (dest_rect_right - 1) * x_step; | |
536 if (x_step < kFractionMax * 2) { | |
537 source_left += ((x_step - kFractionMax) / 2); | |
538 source_right += ((x_step - kFractionMax) / 2); | |
539 } else { | |
540 source_left += kFractionMax / 2; | |
541 source_right += kFractionMax / 2; | |
542 } | |
543 int source_top = dest_rect_top * y_step; | |
544 if (y_step < kFractionMax * 2) { | |
545 source_top += ((y_step - kFractionMax) / 2); | |
546 } else { | |
547 source_top += kFractionMax / 2; | |
548 } | |
549 | |
550 // Determine the parts of the Y, U and V buffers to interpolate. | |
551 int source_y_left = source_left >> kFractionBits; | |
552 int source_y_right = | |
553 std::min((source_right >> kFractionBits) + 2, source_width + 1); | |
554 | |
555 int source_uv_left = source_y_left / 2; | |
556 int source_uv_right = std::min((source_right >> (kFractionBits + 1)) + 2, | |
557 (source_width + 1) / 2); | |
558 | |
559 int source_y_width = source_y_right - source_y_left; | |
560 int source_uv_width = source_uv_right - source_uv_left; | |
561 | |
562 // Determine number of pixels in each output row. | |
563 int dest_rect_width = dest_rect_right - dest_rect_left; | |
564 | |
565 // Intermediate buffer for vertical interpolation. | |
566 // 4096 bytes allows 3 buffers to fit in 12k, which fits in a 16K L1 cache, | |
567 // and is bigger than most users will generally need. | |
568 // The buffer is 16-byte aligned and padded with 16 extra bytes; some of the | |
569 // FilterYUVRowsProcs have alignment requirements, and the SSE version can | |
570 // write up to 16 bytes past the end of the buffer. | |
571 const int kFilterBufferSize = 4096; | |
572 const bool kAvoidUsingOptimizedFilter = source_width > kFilterBufferSize; | |
573 uint8_t yuv_temp[16 + kFilterBufferSize * 3 + 16]; | |
574 // memset() yuv_temp to 0 to avoid bogus warnings when running on Valgrind. | |
575 if (RunningOnValgrind()) | |
576 memset(yuv_temp, 0, sizeof(yuv_temp)); | |
577 uint8_t* y_temp = reinterpret_cast<uint8_t*>( | |
578 reinterpret_cast<uintptr_t>(yuv_temp + 15) & ~15); | |
579 uint8_t* u_temp = y_temp + kFilterBufferSize; | |
580 uint8_t* v_temp = u_temp + kFilterBufferSize; | |
581 | |
582 // Move to the top-left pixel of output. | |
583 rgb_buf += dest_rect_top * rgb_pitch; | |
584 rgb_buf += dest_rect_left * 4; | |
585 | |
586 // For each destination row perform interpolation and color space | |
587 // conversion to produce the output. | |
588 for (int row = dest_rect_top; row < dest_rect_bottom; ++row) { | |
589 // Round the fixed-point y position to get the current row. | |
590 int source_row = source_top >> kFractionBits; | |
591 int source_uv_row = source_row / 2; | |
592 DCHECK(source_row < source_height); | |
593 | |
594 // Locate the first row for each plane for interpolation. | |
595 const uint8_t* y0_ptr = y_buf + y_pitch * source_row + source_y_left; | |
596 const uint8_t* u0_ptr = u_buf + uv_pitch * source_uv_row + source_uv_left; | |
597 const uint8_t* v0_ptr = v_buf + uv_pitch * source_uv_row + source_uv_left; | |
598 const uint8_t* y1_ptr = NULL; | |
599 const uint8_t* u1_ptr = NULL; | |
600 const uint8_t* v1_ptr = NULL; | |
601 | |
602 // Locate the second row for interpolation, being careful not to overrun. | |
603 if (source_row + 1 >= source_height) { | |
604 y1_ptr = y0_ptr; | |
605 } else { | |
606 y1_ptr = y0_ptr + y_pitch; | |
607 } | |
608 if (source_uv_row + 1 >= (source_height + 1) / 2) { | |
609 u1_ptr = u0_ptr; | |
610 v1_ptr = v0_ptr; | |
611 } else { | |
612 u1_ptr = u0_ptr + uv_pitch; | |
613 v1_ptr = v0_ptr + uv_pitch; | |
614 } | |
615 | |
616 if (!kAvoidUsingOptimizedFilter) { | |
617 // Vertical scaler uses 16.8 fixed point. | |
618 uint8_t fraction = (source_top & kFractionMask) >> 8; | |
619 g_filter_yuv_rows_proc_( | |
620 y_temp + source_y_left, y0_ptr, y1_ptr, source_y_width, fraction); | |
621 g_filter_yuv_rows_proc_( | |
622 u_temp + source_uv_left, u0_ptr, u1_ptr, source_uv_width, fraction); | |
623 g_filter_yuv_rows_proc_( | |
624 v_temp + source_uv_left, v0_ptr, v1_ptr, source_uv_width, fraction); | |
625 | |
626 // Perform horizontal interpolation and color space conversion. | |
627 // TODO(hclam): Use the MMX version after more testing. | |
628 LinearScaleYUVToRGB32RowWithRange_C(y_temp, u_temp, v_temp, rgb_buf, | |
629 dest_rect_width, source_left, x_step, | |
630 lookup_table); | |
631 } else { | |
632 // If the frame is too large then we linear scale a single row. | |
633 LinearScaleYUVToRGB32RowWithRange_C(y0_ptr, u0_ptr, v0_ptr, rgb_buf, | |
634 dest_rect_width, source_left, x_step, | |
635 lookup_table); | |
636 } | |
637 | |
638 // Advance vertically in the source and destination image. | |
639 source_top += y_step; | |
640 rgb_buf += rgb_pitch; | |
641 } | |
642 | |
643 g_empty_register_state_proc_(); | |
644 } | |
645 | |
646 void ConvertRGB32ToYUV(const uint8_t* rgbframe, | |
647 uint8_t* yplane, | |
648 uint8_t* uplane, | |
649 uint8_t* vplane, | |
650 int width, | |
651 int height, | |
652 int rgbstride, | |
653 int ystride, | |
654 int uvstride) { | |
655 g_convert_rgb32_to_yuv_proc_(rgbframe, | |
656 yplane, | |
657 uplane, | |
658 vplane, | |
659 width, | |
660 height, | |
661 rgbstride, | |
662 ystride, | |
663 uvstride); | |
664 } | |
665 | |
666 void ConvertRGB24ToYUV(const uint8_t* rgbframe, | |
667 uint8_t* yplane, | |
668 uint8_t* uplane, | |
669 uint8_t* vplane, | |
670 int width, | |
671 int height, | |
672 int rgbstride, | |
673 int ystride, | |
674 int uvstride) { | |
675 g_convert_rgb24_to_yuv_proc_(rgbframe, | |
676 yplane, | |
677 uplane, | |
678 vplane, | |
679 width, | |
680 height, | |
681 rgbstride, | |
682 ystride, | |
683 uvstride); | |
684 } | |
685 | |
686 void ConvertYUVToRGB32(const uint8_t* yplane, | |
687 const uint8_t* uplane, | |
688 const uint8_t* vplane, | |
689 uint8_t* rgbframe, | |
690 int width, | |
691 int height, | |
692 int ystride, | |
693 int uvstride, | |
694 int rgbstride, | |
695 YUVType yuv_type) { | |
696 g_convert_yuv_to_rgb32_proc_(yplane, | |
697 uplane, | |
698 vplane, | |
699 rgbframe, | |
700 width, | |
701 height, | |
702 ystride, | |
703 uvstride, | |
704 rgbstride, | |
705 yuv_type); | |
706 } | |
707 | |
708 void ConvertYUVAToARGB(const uint8_t* yplane, | |
709 const uint8_t* uplane, | |
710 const uint8_t* vplane, | |
711 const uint8_t* aplane, | |
712 uint8_t* rgbframe, | |
713 int width, | |
714 int height, | |
715 int ystride, | |
716 int uvstride, | |
717 int astride, | |
718 int rgbstride, | |
719 YUVType yuv_type) { | |
720 g_convert_yuva_to_argb_proc_(yplane, | |
721 uplane, | |
722 vplane, | |
723 aplane, | |
724 rgbframe, | |
725 width, | |
726 height, | |
727 ystride, | |
728 uvstride, | |
729 astride, | |
730 rgbstride, | |
731 yuv_type); | |
732 } | |
733 | |
734 } // namespace media | |
OLD | NEW |