| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 // This webpage shows layout of YV12 and other YUV formats | |
| 6 // http://www.fourcc.org/yuv.php | |
| 7 // The actual conversion is best described here | |
| 8 // http://en.wikipedia.org/wiki/YUV | |
| 9 // An article on optimizing YUV conversion using tables instead of multiplies | |
| 10 // http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf | |
| 11 // | |
| 12 // YV12 is a full plane of Y and a half height, half width chroma planes | |
| 13 // YV16 is a full plane of Y and a full height, half width chroma planes | |
| 14 // | |
| 15 // ARGB pixel format is output, which on little endian is stored as BGRA. | |
| 16 // The alpha is set to 255, allowing the application to use RGBA or RGB32. | |
| 17 | |
| 18 #include "media/base/yuv_convert.h" | |
| 19 | |
| 20 #include <stddef.h> | |
| 21 | |
| 22 #include <algorithm> | |
| 23 | |
| 24 #include "base/cpu.h" | |
| 25 #include "base/logging.h" | |
| 26 #include "base/macros.h" | |
| 27 #include "base/memory/aligned_memory.h" | |
| 28 #include "base/third_party/dynamic_annotations/dynamic_annotations.h" | |
| 29 #include "build/build_config.h" | |
| 30 #include "media/base/simd/convert_rgb_to_yuv.h" | |
| 31 #include "media/base/simd/convert_yuv_to_rgb.h" | |
| 32 #include "media/base/simd/filter_yuv.h" | |
| 33 | |
| 34 #if defined(ARCH_CPU_X86_FAMILY) | |
| 35 #if defined(COMPILER_MSVC) | |
| 36 #include <intrin.h> | |
| 37 #else | |
| 38 #include <mmintrin.h> | |
| 39 #endif | |
| 40 #endif | |
| 41 | |
| 42 // Assembly functions are declared without namespace. | |
| 43 extern "C" { void EmptyRegisterState_MMX(); } // extern "C" | |
| 44 | |
| 45 namespace media { | |
| 46 | |
| 47 typedef void ( | |
| 48 *FilterYUVRowsProc)(uint8_t*, const uint8_t*, const uint8_t*, int, uint8_t); | |
| 49 | |
| 50 typedef void (*ConvertRGBToYUVProc)(const uint8_t*, | |
| 51 uint8_t*, | |
| 52 uint8_t*, | |
| 53 uint8_t*, | |
| 54 int, | |
| 55 int, | |
| 56 int, | |
| 57 int, | |
| 58 int); | |
| 59 | |
| 60 typedef void (*ConvertYUVToRGB32Proc)(const uint8_t*, | |
| 61 const uint8_t*, | |
| 62 const uint8_t*, | |
| 63 uint8_t*, | |
| 64 int, | |
| 65 int, | |
| 66 int, | |
| 67 int, | |
| 68 int, | |
| 69 YUVType); | |
| 70 | |
| 71 typedef void (*ConvertYUVAToARGBProc)(const uint8_t*, | |
| 72 const uint8_t*, | |
| 73 const uint8_t*, | |
| 74 const uint8_t*, | |
| 75 uint8_t*, | |
| 76 int, | |
| 77 int, | |
| 78 int, | |
| 79 int, | |
| 80 int, | |
| 81 int, | |
| 82 YUVType); | |
| 83 | |
| 84 typedef void (*ConvertYUVToRGB32RowProc)(const uint8_t*, | |
| 85 const uint8_t*, | |
| 86 const uint8_t*, | |
| 87 uint8_t*, | |
| 88 ptrdiff_t, | |
| 89 const int16_t*); | |
| 90 | |
| 91 typedef void (*ConvertYUVAToARGBRowProc)(const uint8_t*, | |
| 92 const uint8_t*, | |
| 93 const uint8_t*, | |
| 94 const uint8_t*, | |
| 95 uint8_t*, | |
| 96 ptrdiff_t, | |
| 97 const int16_t*); | |
| 98 | |
| 99 typedef void (*ScaleYUVToRGB32RowProc)(const uint8_t*, | |
| 100 const uint8_t*, | |
| 101 const uint8_t*, | |
| 102 uint8_t*, | |
| 103 ptrdiff_t, | |
| 104 ptrdiff_t, | |
| 105 const int16_t*); | |
| 106 | |
| 107 static FilterYUVRowsProc g_filter_yuv_rows_proc_ = NULL; | |
| 108 static ConvertYUVToRGB32RowProc g_convert_yuv_to_rgb32_row_proc_ = NULL; | |
| 109 static ScaleYUVToRGB32RowProc g_scale_yuv_to_rgb32_row_proc_ = NULL; | |
| 110 static ScaleYUVToRGB32RowProc g_linear_scale_yuv_to_rgb32_row_proc_ = NULL; | |
| 111 static ConvertRGBToYUVProc g_convert_rgb32_to_yuv_proc_ = NULL; | |
| 112 static ConvertRGBToYUVProc g_convert_rgb24_to_yuv_proc_ = NULL; | |
| 113 static ConvertYUVToRGB32Proc g_convert_yuv_to_rgb32_proc_ = NULL; | |
| 114 static ConvertYUVAToARGBProc g_convert_yuva_to_argb_proc_ = NULL; | |
| 115 | |
| 116 static const int kYUVToRGBTableSize = 256 * 4 * 4 * sizeof(int16_t); | |
| 117 | |
| 118 static int16_t* g_table_rec601 = NULL; | |
| 119 static int16_t* g_table_jpeg = NULL; | |
| 120 static int16_t* g_table_rec709 = NULL; | |
| 121 | |
| 122 // Empty SIMD registers state after using them. | |
| 123 void EmptyRegisterStateStub() {} | |
| 124 #if defined(MEDIA_MMX_INTRINSICS_AVAILABLE) | |
| 125 void EmptyRegisterStateIntrinsic() { _mm_empty(); } | |
| 126 #endif | |
| 127 typedef void (*EmptyRegisterStateProc)(); | |
| 128 static EmptyRegisterStateProc g_empty_register_state_proc_ = NULL; | |
| 129 | |
| 130 // Get the appropriate value to bitshift by for vertical indices. | |
| 131 int GetVerticalShift(YUVType type) { | |
| 132 switch (type) { | |
| 133 case YV16: | |
| 134 return 0; | |
| 135 case YV12: | |
| 136 case YV12J: | |
| 137 case YV12HD: | |
| 138 return 1; | |
| 139 } | |
| 140 NOTREACHED(); | |
| 141 return 0; | |
| 142 } | |
| 143 | |
| 144 const int16_t* GetLookupTable(YUVType type) { | |
| 145 switch (type) { | |
| 146 case YV12: | |
| 147 case YV16: | |
| 148 return g_table_rec601; | |
| 149 case YV12J: | |
| 150 return g_table_jpeg; | |
| 151 case YV12HD: | |
| 152 return g_table_rec709; | |
| 153 } | |
| 154 NOTREACHED(); | |
| 155 return NULL; | |
| 156 } | |
| 157 | |
| 158 // Populates a pre-allocated lookup table from a YUV->RGB matrix. | |
| 159 const int16_t* PopulateYUVToRGBTable(const double matrix[3][3], | |
| 160 bool full_range, | |
| 161 int16_t* table) { | |
| 162 // We'll have 4 sub-tables that lie contiguous in memory, one for each of Y, | |
| 163 // U, V and A. | |
| 164 const int kNumTables = 4; | |
| 165 // Each table has 256 rows (for all possible 8-bit values). | |
| 166 const int kNumRows = 256; | |
| 167 // Each row has 4 columns, for contributions to each of R, G, B and A. | |
| 168 const int kNumColumns = 4; | |
| 169 // Each element is a fixed-point (10.6) 16-bit signed value. | |
| 170 const int kElementSize = sizeof(int16_t); | |
| 171 | |
| 172 // Sanity check that our constants here match the size of the statically | |
| 173 // allocated tables. | |
| 174 static_assert( | |
| 175 kNumTables * kNumRows * kNumColumns * kElementSize == kYUVToRGBTableSize, | |
| 176 "YUV lookup table size doesn't match expectation."); | |
| 177 | |
| 178 // Y needs an offset of -16 for color ranges that ignore the lower 16 values, | |
| 179 // U and V get -128 to put them in [-128, 127] from [0, 255]. | |
| 180 int offsets[3] = {(full_range ? 0 : -16), -128, -128}; | |
| 181 | |
| 182 for (int i = 0; i < kNumRows; ++i) { | |
| 183 // Y, U, and V contributions to each of R, G, B and A. | |
| 184 for (int j = 0; j < 3; ++j) { | |
| 185 #if defined(OS_ANDROID) | |
| 186 // Android is RGBA. | |
| 187 table[(j * kNumRows + i) * kNumColumns + 0] = | |
| 188 matrix[j][0] * 64 * (i + offsets[j]) + 0.5; | |
| 189 table[(j * kNumRows + i) * kNumColumns + 1] = | |
| 190 matrix[j][1] * 64 * (i + offsets[j]) + 0.5; | |
| 191 table[(j * kNumRows + i) * kNumColumns + 2] = | |
| 192 matrix[j][2] * 64 * (i + offsets[j]) + 0.5; | |
| 193 #else | |
| 194 // Other platforms are BGRA. | |
| 195 table[(j * kNumRows + i) * kNumColumns + 0] = | |
| 196 matrix[j][2] * 64 * (i + offsets[j]) + 0.5; | |
| 197 table[(j * kNumRows + i) * kNumColumns + 1] = | |
| 198 matrix[j][1] * 64 * (i + offsets[j]) + 0.5; | |
| 199 table[(j * kNumRows + i) * kNumColumns + 2] = | |
| 200 matrix[j][0] * 64 * (i + offsets[j]) + 0.5; | |
| 201 #endif | |
| 202 // Alpha contributions from Y and V are always 0. U is set such that | |
| 203 // all values result in a full '255' alpha value. | |
| 204 table[(j * kNumRows + i) * kNumColumns + 3] = (j == 1) ? 256 * 64 - 1 : 0; | |
| 205 } | |
| 206 // And YUVA alpha is passed through as-is. | |
| 207 for (int k = 0; k < kNumTables; ++k) | |
| 208 table[((kNumTables - 1) * kNumRows + i) * kNumColumns + k] = i; | |
| 209 } | |
| 210 | |
| 211 return table; | |
| 212 } | |
| 213 | |
| 214 void InitializeCPUSpecificYUVConversions() { | |
| 215 CHECK(!g_filter_yuv_rows_proc_); | |
| 216 CHECK(!g_convert_yuv_to_rgb32_row_proc_); | |
| 217 CHECK(!g_scale_yuv_to_rgb32_row_proc_); | |
| 218 CHECK(!g_linear_scale_yuv_to_rgb32_row_proc_); | |
| 219 CHECK(!g_convert_rgb32_to_yuv_proc_); | |
| 220 CHECK(!g_convert_rgb24_to_yuv_proc_); | |
| 221 CHECK(!g_convert_yuv_to_rgb32_proc_); | |
| 222 CHECK(!g_convert_yuva_to_argb_proc_); | |
| 223 CHECK(!g_empty_register_state_proc_); | |
| 224 | |
| 225 g_filter_yuv_rows_proc_ = FilterYUVRows_C; | |
| 226 g_convert_yuv_to_rgb32_row_proc_ = ConvertYUVToRGB32Row_C; | |
| 227 g_scale_yuv_to_rgb32_row_proc_ = ScaleYUVToRGB32Row_C; | |
| 228 g_linear_scale_yuv_to_rgb32_row_proc_ = LinearScaleYUVToRGB32Row_C; | |
| 229 g_convert_rgb32_to_yuv_proc_ = ConvertRGB32ToYUV_C; | |
| 230 g_convert_rgb24_to_yuv_proc_ = ConvertRGB24ToYUV_C; | |
| 231 g_convert_yuv_to_rgb32_proc_ = ConvertYUVToRGB32_C; | |
| 232 g_convert_yuva_to_argb_proc_ = ConvertYUVAToARGB_C; | |
| 233 g_empty_register_state_proc_ = EmptyRegisterStateStub; | |
| 234 | |
| 235 // Assembly code confuses MemorySanitizer. Also not available in iOS builds. | |
| 236 #if defined(ARCH_CPU_X86_FAMILY) && !defined(MEMORY_SANITIZER) && \ | |
| 237 !defined(OS_IOS) | |
| 238 g_convert_yuva_to_argb_proc_ = ConvertYUVAToARGB_MMX; | |
| 239 | |
| 240 #if defined(MEDIA_MMX_INTRINSICS_AVAILABLE) | |
| 241 g_empty_register_state_proc_ = EmptyRegisterStateIntrinsic; | |
| 242 #else | |
| 243 g_empty_register_state_proc_ = EmptyRegisterState_MMX; | |
| 244 #endif | |
| 245 | |
| 246 g_convert_yuv_to_rgb32_row_proc_ = ConvertYUVToRGB32Row_SSE; | |
| 247 g_convert_yuv_to_rgb32_proc_ = ConvertYUVToRGB32_SSE; | |
| 248 | |
| 249 g_filter_yuv_rows_proc_ = FilterYUVRows_SSE2; | |
| 250 g_convert_rgb32_to_yuv_proc_ = ConvertRGB32ToYUV_SSE2; | |
| 251 | |
| 252 #if defined(ARCH_CPU_X86_64) | |
| 253 g_scale_yuv_to_rgb32_row_proc_ = ScaleYUVToRGB32Row_SSE2_X64; | |
| 254 | |
| 255 // Technically this should be in the MMX section, but MSVC will optimize out | |
| 256 // the export of LinearScaleYUVToRGB32Row_MMX, which is required by the unit | |
| 257 // tests, if that decision can be made at compile time. Since all X64 CPUs | |
| 258 // have SSE2, we can hack around this by making the selection here. | |
| 259 g_linear_scale_yuv_to_rgb32_row_proc_ = LinearScaleYUVToRGB32Row_MMX_X64; | |
| 260 #else | |
| 261 g_scale_yuv_to_rgb32_row_proc_ = ScaleYUVToRGB32Row_SSE; | |
| 262 g_linear_scale_yuv_to_rgb32_row_proc_ = LinearScaleYUVToRGB32Row_SSE; | |
| 263 #endif | |
| 264 | |
| 265 base::CPU cpu; | |
| 266 if (cpu.has_ssse3()) { | |
| 267 g_convert_rgb24_to_yuv_proc_ = &ConvertRGB24ToYUV_SSSE3; | |
| 268 | |
| 269 // TODO(hclam): Add ConvertRGB32ToYUV_SSSE3 when the cyan problem is solved. | |
| 270 // See: crbug.com/100462 | |
| 271 } | |
| 272 #endif | |
| 273 | |
| 274 // Initialize YUV conversion lookup tables. | |
| 275 | |
| 276 // SD Rec601 YUV->RGB matrix, see http://www.fourcc.org/fccyvrgb.php | |
| 277 const double kRec601ConvertMatrix[3][3] = { | |
| 278 {1.164, 1.164, 1.164}, {0.0, -0.391, 2.018}, {1.596, -0.813, 0.0}, | |
| 279 }; | |
| 280 | |
| 281 // JPEG table, values from above link. | |
| 282 const double kJPEGConvertMatrix[3][3] = { | |
| 283 {1.0, 1.0, 1.0}, {0.0, -0.34414, 1.772}, {1.402, -0.71414, 0.0}, | |
| 284 }; | |
| 285 | |
| 286 // Rec709 "HD" color space, values from: | |
| 287 // http://www.equasys.de/colorconversion.html | |
| 288 const double kRec709ConvertMatrix[3][3] = { | |
| 289 {1.164, 1.164, 1.164}, {0.0, -0.213, 2.112}, {1.793, -0.533, 0.0}, | |
| 290 }; | |
| 291 | |
| 292 g_table_rec601 = | |
| 293 static_cast<int16_t*>(base::AlignedAlloc(kYUVToRGBTableSize, 16)); | |
| 294 PopulateYUVToRGBTable(kRec601ConvertMatrix, false, g_table_rec601); | |
| 295 | |
| 296 g_table_rec709 = | |
| 297 static_cast<int16_t*>(base::AlignedAlloc(kYUVToRGBTableSize, 16)); | |
| 298 PopulateYUVToRGBTable(kRec709ConvertMatrix, false, g_table_rec709); | |
| 299 | |
| 300 g_table_jpeg = | |
| 301 static_cast<int16_t*>(base::AlignedAlloc(kYUVToRGBTableSize, 16)); | |
| 302 PopulateYUVToRGBTable(kJPEGConvertMatrix, true, g_table_jpeg); | |
| 303 } | |
| 304 | |
| 305 // Empty SIMD registers state after using them. | |
| 306 void EmptyRegisterState() { g_empty_register_state_proc_(); } | |
| 307 | |
| 308 // 16.16 fixed point arithmetic | |
| 309 const int kFractionBits = 16; | |
| 310 const int kFractionMax = 1 << kFractionBits; | |
| 311 const int kFractionMask = ((1 << kFractionBits) - 1); | |
| 312 | |
| 313 // Scale a frame of YUV to 32 bit ARGB. | |
| 314 void ScaleYUVToRGB32(const uint8_t* y_buf, | |
| 315 const uint8_t* u_buf, | |
| 316 const uint8_t* v_buf, | |
| 317 uint8_t* rgb_buf, | |
| 318 int source_width, | |
| 319 int source_height, | |
| 320 int width, | |
| 321 int height, | |
| 322 int y_pitch, | |
| 323 int uv_pitch, | |
| 324 int rgb_pitch, | |
| 325 YUVType yuv_type, | |
| 326 Rotate view_rotate, | |
| 327 ScaleFilter filter) { | |
| 328 // Handle zero sized sources and destinations. | |
| 329 if ((yuv_type == YV12 && (source_width < 2 || source_height < 2)) || | |
| 330 (yuv_type == YV16 && (source_width < 2 || source_height < 1)) || | |
| 331 width == 0 || height == 0) | |
| 332 return; | |
| 333 | |
| 334 const int16_t* lookup_table = GetLookupTable(yuv_type); | |
| 335 | |
| 336 // 4096 allows 3 buffers to fit in 12k. | |
| 337 // Helps performance on CPU with 16K L1 cache. | |
| 338 // Large enough for 3830x2160 and 30" displays which are 2560x1600. | |
| 339 const int kFilterBufferSize = 4096; | |
| 340 // Disable filtering if the screen is too big (to avoid buffer overflows). | |
| 341 // This should never happen to regular users: they don't have monitors | |
| 342 // wider than 4096 pixels. | |
| 343 // TODO(fbarchard): Allow rotated videos to filter. | |
| 344 if (source_width > kFilterBufferSize || view_rotate) | |
| 345 filter = FILTER_NONE; | |
| 346 | |
| 347 unsigned int y_shift = GetVerticalShift(yuv_type); | |
| 348 // Diagram showing origin and direction of source sampling. | |
| 349 // ->0 4<- | |
| 350 // 7 3 | |
| 351 // | |
| 352 // 6 5 | |
| 353 // ->1 2<- | |
| 354 // Rotations that start at right side of image. | |
| 355 if ((view_rotate == ROTATE_180) || (view_rotate == ROTATE_270) || | |
| 356 (view_rotate == MIRROR_ROTATE_0) || (view_rotate == MIRROR_ROTATE_90)) { | |
| 357 y_buf += source_width - 1; | |
| 358 u_buf += source_width / 2 - 1; | |
| 359 v_buf += source_width / 2 - 1; | |
| 360 source_width = -source_width; | |
| 361 } | |
| 362 // Rotations that start at bottom of image. | |
| 363 if ((view_rotate == ROTATE_90) || (view_rotate == ROTATE_180) || | |
| 364 (view_rotate == MIRROR_ROTATE_90) || (view_rotate == MIRROR_ROTATE_180)) { | |
| 365 y_buf += (source_height - 1) * y_pitch; | |
| 366 u_buf += ((source_height >> y_shift) - 1) * uv_pitch; | |
| 367 v_buf += ((source_height >> y_shift) - 1) * uv_pitch; | |
| 368 source_height = -source_height; | |
| 369 } | |
| 370 | |
| 371 int source_dx = source_width * kFractionMax / width; | |
| 372 | |
| 373 if ((view_rotate == ROTATE_90) || (view_rotate == ROTATE_270)) { | |
| 374 int tmp = height; | |
| 375 height = width; | |
| 376 width = tmp; | |
| 377 tmp = source_height; | |
| 378 source_height = source_width; | |
| 379 source_width = tmp; | |
| 380 int source_dy = source_height * kFractionMax / height; | |
| 381 source_dx = ((source_dy >> kFractionBits) * y_pitch) << kFractionBits; | |
| 382 if (view_rotate == ROTATE_90) { | |
| 383 y_pitch = -1; | |
| 384 uv_pitch = -1; | |
| 385 source_height = -source_height; | |
| 386 } else { | |
| 387 y_pitch = 1; | |
| 388 uv_pitch = 1; | |
| 389 } | |
| 390 } | |
| 391 | |
| 392 // Need padding because FilterRows() will write 1 to 16 extra pixels | |
| 393 // after the end for SSE2 version. | |
| 394 uint8_t yuvbuf[16 + kFilterBufferSize * 3 + 16]; | |
| 395 uint8_t* ybuf = reinterpret_cast<uint8_t*>( | |
| 396 reinterpret_cast<uintptr_t>(yuvbuf + 15) & ~15); | |
| 397 uint8_t* ubuf = ybuf + kFilterBufferSize; | |
| 398 uint8_t* vbuf = ubuf + kFilterBufferSize; | |
| 399 | |
| 400 // TODO(fbarchard): Fixed point math is off by 1 on negatives. | |
| 401 | |
| 402 // We take a y-coordinate in [0,1] space in the source image space, and | |
| 403 // transform to a y-coordinate in [0,1] space in the destination image space. | |
| 404 // Note that the coordinate endpoints lie on pixel boundaries, not on pixel | |
| 405 // centers: e.g. a two-pixel-high image will have pixel centers at 0.25 and | |
| 406 // 0.75. The formula is as follows (in fixed-point arithmetic): | |
| 407 // y_dst = dst_height * ((y_src + 0.5) / src_height) | |
| 408 // dst_pixel = clamp([0, dst_height - 1], floor(y_dst - 0.5)) | |
| 409 // Implement this here as an accumulator + delta, to avoid expensive math | |
| 410 // in the loop. | |
| 411 int source_y_subpixel_accum = | |
| 412 ((kFractionMax / 2) * source_height) / height - (kFractionMax / 2); | |
| 413 int source_y_subpixel_delta = ((1 << kFractionBits) * source_height) / height; | |
| 414 | |
| 415 // TODO(fbarchard): Split this into separate function for better efficiency. | |
| 416 for (int y = 0; y < height; ++y) { | |
| 417 uint8_t* dest_pixel = rgb_buf + y * rgb_pitch; | |
| 418 int source_y_subpixel = source_y_subpixel_accum; | |
| 419 source_y_subpixel_accum += source_y_subpixel_delta; | |
| 420 if (source_y_subpixel < 0) | |
| 421 source_y_subpixel = 0; | |
| 422 else if (source_y_subpixel > ((source_height - 1) << kFractionBits)) | |
| 423 source_y_subpixel = (source_height - 1) << kFractionBits; | |
| 424 | |
| 425 const uint8_t* y_ptr = NULL; | |
| 426 const uint8_t* u_ptr = NULL; | |
| 427 const uint8_t* v_ptr = NULL; | |
| 428 // Apply vertical filtering if necessary. | |
| 429 // TODO(fbarchard): Remove memcpy when not necessary. | |
| 430 if (filter & media::FILTER_BILINEAR_V) { | |
| 431 int source_y = source_y_subpixel >> kFractionBits; | |
| 432 y_ptr = y_buf + source_y * y_pitch; | |
| 433 u_ptr = u_buf + (source_y >> y_shift) * uv_pitch; | |
| 434 v_ptr = v_buf + (source_y >> y_shift) * uv_pitch; | |
| 435 | |
| 436 // Vertical scaler uses 16.8 fixed point. | |
| 437 uint8_t source_y_fraction = (source_y_subpixel & kFractionMask) >> 8; | |
| 438 if (source_y_fraction != 0) { | |
| 439 g_filter_yuv_rows_proc_( | |
| 440 ybuf, y_ptr, y_ptr + y_pitch, source_width, source_y_fraction); | |
| 441 } else { | |
| 442 memcpy(ybuf, y_ptr, source_width); | |
| 443 } | |
| 444 y_ptr = ybuf; | |
| 445 ybuf[source_width] = ybuf[source_width - 1]; | |
| 446 | |
| 447 int uv_source_width = (source_width + 1) / 2; | |
| 448 uint8_t source_uv_fraction; | |
| 449 | |
| 450 // For formats with half-height UV planes, each even-numbered pixel row | |
| 451 // should not interpolate, since the next row to interpolate from should | |
| 452 // be a duplicate of the current row. | |
| 453 if (y_shift && (source_y & 0x1) == 0) | |
| 454 source_uv_fraction = 0; | |
| 455 else | |
| 456 source_uv_fraction = source_y_fraction; | |
| 457 | |
| 458 if (source_uv_fraction != 0) { | |
| 459 g_filter_yuv_rows_proc_( | |
| 460 ubuf, u_ptr, u_ptr + uv_pitch, uv_source_width, source_uv_fraction); | |
| 461 g_filter_yuv_rows_proc_( | |
| 462 vbuf, v_ptr, v_ptr + uv_pitch, uv_source_width, source_uv_fraction); | |
| 463 } else { | |
| 464 memcpy(ubuf, u_ptr, uv_source_width); | |
| 465 memcpy(vbuf, v_ptr, uv_source_width); | |
| 466 } | |
| 467 u_ptr = ubuf; | |
| 468 v_ptr = vbuf; | |
| 469 ubuf[uv_source_width] = ubuf[uv_source_width - 1]; | |
| 470 vbuf[uv_source_width] = vbuf[uv_source_width - 1]; | |
| 471 } else { | |
| 472 // Offset by 1/2 pixel for center sampling. | |
| 473 int source_y = (source_y_subpixel + (kFractionMax / 2)) >> kFractionBits; | |
| 474 y_ptr = y_buf + source_y * y_pitch; | |
| 475 u_ptr = u_buf + (source_y >> y_shift) * uv_pitch; | |
| 476 v_ptr = v_buf + (source_y >> y_shift) * uv_pitch; | |
| 477 } | |
| 478 if (source_dx == kFractionMax) { // Not scaled | |
| 479 g_convert_yuv_to_rgb32_row_proc_(y_ptr, u_ptr, v_ptr, dest_pixel, width, | |
| 480 lookup_table); | |
| 481 } else { | |
| 482 if (filter & FILTER_BILINEAR_H) { | |
| 483 g_linear_scale_yuv_to_rgb32_row_proc_(y_ptr, u_ptr, v_ptr, dest_pixel, | |
| 484 width, source_dx, | |
| 485 lookup_table); | |
| 486 } else { | |
| 487 g_scale_yuv_to_rgb32_row_proc_(y_ptr, u_ptr, v_ptr, dest_pixel, width, | |
| 488 source_dx, lookup_table); | |
| 489 } | |
| 490 } | |
| 491 } | |
| 492 | |
| 493 g_empty_register_state_proc_(); | |
| 494 } | |
| 495 | |
| 496 // Scale a frame of YV12 to 32 bit ARGB for a specific rectangle. | |
| 497 void ScaleYUVToRGB32WithRect(const uint8_t* y_buf, | |
| 498 const uint8_t* u_buf, | |
| 499 const uint8_t* v_buf, | |
| 500 uint8_t* rgb_buf, | |
| 501 int source_width, | |
| 502 int source_height, | |
| 503 int dest_width, | |
| 504 int dest_height, | |
| 505 int dest_rect_left, | |
| 506 int dest_rect_top, | |
| 507 int dest_rect_right, | |
| 508 int dest_rect_bottom, | |
| 509 int y_pitch, | |
| 510 int uv_pitch, | |
| 511 int rgb_pitch) { | |
| 512 // This routine doesn't currently support up-scaling. | |
| 513 CHECK_LE(dest_width, source_width); | |
| 514 CHECK_LE(dest_height, source_height); | |
| 515 | |
| 516 // Sanity-check the destination rectangle. | |
| 517 DCHECK(dest_rect_left >= 0 && dest_rect_right <= dest_width); | |
| 518 DCHECK(dest_rect_top >= 0 && dest_rect_bottom <= dest_height); | |
| 519 DCHECK(dest_rect_right > dest_rect_left); | |
| 520 DCHECK(dest_rect_bottom > dest_rect_top); | |
| 521 | |
| 522 const int16_t* lookup_table = GetLookupTable(YV12); | |
| 523 | |
| 524 // Fixed-point value of vertical and horizontal scale down factor. | |
| 525 // Values are in the format 16.16. | |
| 526 int y_step = kFractionMax * source_height / dest_height; | |
| 527 int x_step = kFractionMax * source_width / dest_width; | |
| 528 | |
| 529 // Determine the coordinates of the rectangle in 16.16 coords. | |
| 530 // NB: Our origin is the *center* of the top/left pixel, NOT its top/left. | |
| 531 // If we're down-scaling by more than a factor of two, we start with a 50% | |
| 532 // fraction to avoid degenerating to point-sampling - we should really just | |
| 533 // fix the fraction at 50% for all pixels in that case. | |
| 534 int source_left = dest_rect_left * x_step; | |
| 535 int source_right = (dest_rect_right - 1) * x_step; | |
| 536 if (x_step < kFractionMax * 2) { | |
| 537 source_left += ((x_step - kFractionMax) / 2); | |
| 538 source_right += ((x_step - kFractionMax) / 2); | |
| 539 } else { | |
| 540 source_left += kFractionMax / 2; | |
| 541 source_right += kFractionMax / 2; | |
| 542 } | |
| 543 int source_top = dest_rect_top * y_step; | |
| 544 if (y_step < kFractionMax * 2) { | |
| 545 source_top += ((y_step - kFractionMax) / 2); | |
| 546 } else { | |
| 547 source_top += kFractionMax / 2; | |
| 548 } | |
| 549 | |
| 550 // Determine the parts of the Y, U and V buffers to interpolate. | |
| 551 int source_y_left = source_left >> kFractionBits; | |
| 552 int source_y_right = | |
| 553 std::min((source_right >> kFractionBits) + 2, source_width + 1); | |
| 554 | |
| 555 int source_uv_left = source_y_left / 2; | |
| 556 int source_uv_right = std::min((source_right >> (kFractionBits + 1)) + 2, | |
| 557 (source_width + 1) / 2); | |
| 558 | |
| 559 int source_y_width = source_y_right - source_y_left; | |
| 560 int source_uv_width = source_uv_right - source_uv_left; | |
| 561 | |
| 562 // Determine number of pixels in each output row. | |
| 563 int dest_rect_width = dest_rect_right - dest_rect_left; | |
| 564 | |
| 565 // Intermediate buffer for vertical interpolation. | |
| 566 // 4096 bytes allows 3 buffers to fit in 12k, which fits in a 16K L1 cache, | |
| 567 // and is bigger than most users will generally need. | |
| 568 // The buffer is 16-byte aligned and padded with 16 extra bytes; some of the | |
| 569 // FilterYUVRowsProcs have alignment requirements, and the SSE version can | |
| 570 // write up to 16 bytes past the end of the buffer. | |
| 571 const int kFilterBufferSize = 4096; | |
| 572 const bool kAvoidUsingOptimizedFilter = source_width > kFilterBufferSize; | |
| 573 uint8_t yuv_temp[16 + kFilterBufferSize * 3 + 16]; | |
| 574 // memset() yuv_temp to 0 to avoid bogus warnings when running on Valgrind. | |
| 575 if (RunningOnValgrind()) | |
| 576 memset(yuv_temp, 0, sizeof(yuv_temp)); | |
| 577 uint8_t* y_temp = reinterpret_cast<uint8_t*>( | |
| 578 reinterpret_cast<uintptr_t>(yuv_temp + 15) & ~15); | |
| 579 uint8_t* u_temp = y_temp + kFilterBufferSize; | |
| 580 uint8_t* v_temp = u_temp + kFilterBufferSize; | |
| 581 | |
| 582 // Move to the top-left pixel of output. | |
| 583 rgb_buf += dest_rect_top * rgb_pitch; | |
| 584 rgb_buf += dest_rect_left * 4; | |
| 585 | |
| 586 // For each destination row perform interpolation and color space | |
| 587 // conversion to produce the output. | |
| 588 for (int row = dest_rect_top; row < dest_rect_bottom; ++row) { | |
| 589 // Round the fixed-point y position to get the current row. | |
| 590 int source_row = source_top >> kFractionBits; | |
| 591 int source_uv_row = source_row / 2; | |
| 592 DCHECK(source_row < source_height); | |
| 593 | |
| 594 // Locate the first row for each plane for interpolation. | |
| 595 const uint8_t* y0_ptr = y_buf + y_pitch * source_row + source_y_left; | |
| 596 const uint8_t* u0_ptr = u_buf + uv_pitch * source_uv_row + source_uv_left; | |
| 597 const uint8_t* v0_ptr = v_buf + uv_pitch * source_uv_row + source_uv_left; | |
| 598 const uint8_t* y1_ptr = NULL; | |
| 599 const uint8_t* u1_ptr = NULL; | |
| 600 const uint8_t* v1_ptr = NULL; | |
| 601 | |
| 602 // Locate the second row for interpolation, being careful not to overrun. | |
| 603 if (source_row + 1 >= source_height) { | |
| 604 y1_ptr = y0_ptr; | |
| 605 } else { | |
| 606 y1_ptr = y0_ptr + y_pitch; | |
| 607 } | |
| 608 if (source_uv_row + 1 >= (source_height + 1) / 2) { | |
| 609 u1_ptr = u0_ptr; | |
| 610 v1_ptr = v0_ptr; | |
| 611 } else { | |
| 612 u1_ptr = u0_ptr + uv_pitch; | |
| 613 v1_ptr = v0_ptr + uv_pitch; | |
| 614 } | |
| 615 | |
| 616 if (!kAvoidUsingOptimizedFilter) { | |
| 617 // Vertical scaler uses 16.8 fixed point. | |
| 618 uint8_t fraction = (source_top & kFractionMask) >> 8; | |
| 619 g_filter_yuv_rows_proc_( | |
| 620 y_temp + source_y_left, y0_ptr, y1_ptr, source_y_width, fraction); | |
| 621 g_filter_yuv_rows_proc_( | |
| 622 u_temp + source_uv_left, u0_ptr, u1_ptr, source_uv_width, fraction); | |
| 623 g_filter_yuv_rows_proc_( | |
| 624 v_temp + source_uv_left, v0_ptr, v1_ptr, source_uv_width, fraction); | |
| 625 | |
| 626 // Perform horizontal interpolation and color space conversion. | |
| 627 // TODO(hclam): Use the MMX version after more testing. | |
| 628 LinearScaleYUVToRGB32RowWithRange_C(y_temp, u_temp, v_temp, rgb_buf, | |
| 629 dest_rect_width, source_left, x_step, | |
| 630 lookup_table); | |
| 631 } else { | |
| 632 // If the frame is too large then we linear scale a single row. | |
| 633 LinearScaleYUVToRGB32RowWithRange_C(y0_ptr, u0_ptr, v0_ptr, rgb_buf, | |
| 634 dest_rect_width, source_left, x_step, | |
| 635 lookup_table); | |
| 636 } | |
| 637 | |
| 638 // Advance vertically in the source and destination image. | |
| 639 source_top += y_step; | |
| 640 rgb_buf += rgb_pitch; | |
| 641 } | |
| 642 | |
| 643 g_empty_register_state_proc_(); | |
| 644 } | |
| 645 | |
| 646 void ConvertRGB32ToYUV(const uint8_t* rgbframe, | |
| 647 uint8_t* yplane, | |
| 648 uint8_t* uplane, | |
| 649 uint8_t* vplane, | |
| 650 int width, | |
| 651 int height, | |
| 652 int rgbstride, | |
| 653 int ystride, | |
| 654 int uvstride) { | |
| 655 g_convert_rgb32_to_yuv_proc_(rgbframe, | |
| 656 yplane, | |
| 657 uplane, | |
| 658 vplane, | |
| 659 width, | |
| 660 height, | |
| 661 rgbstride, | |
| 662 ystride, | |
| 663 uvstride); | |
| 664 } | |
| 665 | |
| 666 void ConvertRGB24ToYUV(const uint8_t* rgbframe, | |
| 667 uint8_t* yplane, | |
| 668 uint8_t* uplane, | |
| 669 uint8_t* vplane, | |
| 670 int width, | |
| 671 int height, | |
| 672 int rgbstride, | |
| 673 int ystride, | |
| 674 int uvstride) { | |
| 675 g_convert_rgb24_to_yuv_proc_(rgbframe, | |
| 676 yplane, | |
| 677 uplane, | |
| 678 vplane, | |
| 679 width, | |
| 680 height, | |
| 681 rgbstride, | |
| 682 ystride, | |
| 683 uvstride); | |
| 684 } | |
| 685 | |
| 686 void ConvertYUVToRGB32(const uint8_t* yplane, | |
| 687 const uint8_t* uplane, | |
| 688 const uint8_t* vplane, | |
| 689 uint8_t* rgbframe, | |
| 690 int width, | |
| 691 int height, | |
| 692 int ystride, | |
| 693 int uvstride, | |
| 694 int rgbstride, | |
| 695 YUVType yuv_type) { | |
| 696 g_convert_yuv_to_rgb32_proc_(yplane, | |
| 697 uplane, | |
| 698 vplane, | |
| 699 rgbframe, | |
| 700 width, | |
| 701 height, | |
| 702 ystride, | |
| 703 uvstride, | |
| 704 rgbstride, | |
| 705 yuv_type); | |
| 706 } | |
| 707 | |
| 708 void ConvertYUVAToARGB(const uint8_t* yplane, | |
| 709 const uint8_t* uplane, | |
| 710 const uint8_t* vplane, | |
| 711 const uint8_t* aplane, | |
| 712 uint8_t* rgbframe, | |
| 713 int width, | |
| 714 int height, | |
| 715 int ystride, | |
| 716 int uvstride, | |
| 717 int astride, | |
| 718 int rgbstride, | |
| 719 YUVType yuv_type) { | |
| 720 g_convert_yuva_to_argb_proc_(yplane, | |
| 721 uplane, | |
| 722 vplane, | |
| 723 aplane, | |
| 724 rgbframe, | |
| 725 width, | |
| 726 height, | |
| 727 ystride, | |
| 728 uvstride, | |
| 729 astride, | |
| 730 rgbstride, | |
| 731 yuv_type); | |
| 732 } | |
| 733 | |
| 734 } // namespace media | |
| OLD | NEW |