| OLD | NEW |
| 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // This webpage shows layout of YV12 and other YUV formats | 5 // This webpage shows layout of YV12 and other YUV formats |
| 6 // http://www.fourcc.org/yuv.php | 6 // http://www.fourcc.org/yuv.php |
| 7 // The actual conversion is best described here | 7 // The actual conversion is best described here |
| 8 // http://en.wikipedia.org/wiki/YUV | 8 // http://en.wikipedia.org/wiki/YUV |
| 9 // excerpt from wiki: | 9 // excerpt from wiki: |
| 10 // These formulae are based on the NTSC standard; | 10 // These formulae are based on the NTSC standard; |
| 11 // Y' = 0.299 x R + 0.587 x G + 0.114 x B | 11 // Y' = 0.299 x R + 0.587 x G + 0.114 x B |
| 12 // U = -0.147 x R - 0.289 x G + 0.436 x B | 12 // U = -0.147 x R - 0.289 x G + 0.436 x B |
| 13 // V = 0.615 x R - 0.515 x G - 0.100 x B | 13 // V = 0.615 x R - 0.515 x G - 0.100 x B |
| 14 // On older, non-SIMD architectures, floating point arithmetic is much | 14 // On older, non-SIMD architectures, floating point arithmetic is much |
| 15 // slower than using fixed-point arithmetic, so an alternative formulation | 15 // slower than using fixed-point arithmetic, so an alternative formulation |
| 16 // is: | 16 // is: |
| 17 // C = Y' - 16 | 17 // C = Y' - 16 |
| 18 // D = U - 128 | 18 // D = U - 128 |
| 19 // E = V - 128 | 19 // E = V - 128 |
| 20 // Using the previous coefficients and noting that clip() denotes clipping a | 20 // Using the previous coefficients and noting that clip() denotes clipping a |
| 21 // value to the range of 0 to 255, the following formulae provide the | 21 // value to the range of 0 to 255, the following formulae provide the |
| 22 // conversion from Y'UV to RGB (NTSC version): | 22 // conversion from Y'UV to RGB (NTSC version): |
| 23 // R = clip((298 x C + 409 x E + 128) >> 8) | 23 // R = clip((298 x C + 409 x E + 128) >> 8) |
| 24 // G = clip((298 x C - 100 x D - 208 x E + 128) >> 8) | 24 // G = clip((298 x C - 100 x D - 208 x E + 128) >> 8) |
| 25 // B = clip((298 x C + 516 x D + 128) >> 8) | 25 // B = clip((298 x C + 516 x D + 128) >> 8) |
| 26 // | 26 // |
| 27 // An article on optimizing YUV conversion using tables instead of multiplies | 27 // An article on optimizing YUV conversion using tables instead of multiplies |
| 28 // http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf | 28 // http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf |
| 29 // | 29 // |
| 30 // YV12 is a full plane of Y and a half height, half width chroma planes |
| 31 // YV16 is a full plane of Y and a full height, half width chroma planes |
| 32 // |
| 30 // Implimentation notes | 33 // Implimentation notes |
| 31 // This version uses MMX for Visual C and GCC, which should cover all | 34 // This version uses MMX for Visual C and GCC, which should cover all |
| 32 // current platforms. C++ is included for reference and future platforms. | 35 // current platforms. C++ is included for reference and future platforms. |
| 33 // | 36 // |
| 34 // ARGB pixel format is assumed, which on little endian is stored as BGRA. | 37 // ARGB pixel format is output, which on little endian is stored as BGRA. |
| 35 // The alpha is filled in, allowing the application to use RGBA or RGB32. | 38 // The alpha is filled in, allowing the application to use RGBA or RGB32. |
| 36 // The row based conversion allows for a future YV16 version, and simplifies | |
| 37 // the platform specific portion of the code. | |
| 38 // | 39 // |
| 39 // The Visual C assembler is considered the source. | 40 // The Visual C assembler is considered the source. |
| 40 // The GCC asm was created by compiling with Visual C and disassembling | 41 // The GCC asm was created by compiling with Visual C and disassembling |
| 41 // with GNU objdump. | 42 // with GNU objdump. |
| 42 // cl /c /Ox yuv_convert.cc | 43 // cl /c /Ox yuv_convert.cc |
| 43 // objdump -d yuv_convert.o | 44 // objdump -d yuv_convert.o |
| 44 // The code almost copy/pasted in, except the table lookups, which produced | 45 // The code almost copy/pasted in, except the table lookups, which produced |
| 45 // movq 0x800(,%eax,8),%mm0 | 46 // movq 0x800(,%eax,8),%mm0 |
| 46 // and needed to be changed to cdecl style table names | 47 // and needed to be changed to cdecl style table names |
| 47 // "movq _coefficients_RGB_U(,%eax,8),%mm0\n" | 48 // "movq _coefficients_RGB_U(,%eax,8),%mm0\n" |
| (...skipping 328 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 376 asm( | 377 asm( |
| 377 "ConvertYV12ToRGB32Row:\n" | 378 "ConvertYV12ToRGB32Row:\n" |
| 378 "pusha\n" | 379 "pusha\n" |
| 379 "mov 0x24(%esp),%edx\n" | 380 "mov 0x24(%esp),%edx\n" |
| 380 "mov 0x28(%esp),%edi\n" | 381 "mov 0x28(%esp),%edi\n" |
| 381 "mov 0x2c(%esp),%esi\n" | 382 "mov 0x2c(%esp),%esi\n" |
| 382 "mov 0x30(%esp),%ebp\n" | 383 "mov 0x30(%esp),%ebp\n" |
| 383 "mov 0x34(%esp),%ecx\n" | 384 "mov 0x34(%esp),%ecx\n" |
| 384 "shr %ecx\n" | 385 "shr %ecx\n" |
| 385 "1:\n" | 386 "1:\n" |
| 386 "movzbl (%edi),%eax\n" | 387 "movzb (%edi),%eax\n" |
| 387 "add $0x1,%edi\n" | 388 "add $0x1,%edi\n" |
| 388 "movzbl (%esi),%ebx\n" | 389 "movzb (%esi),%ebx\n" |
| 389 "add $0x1,%esi\n" | 390 "add $0x1,%esi\n" |
| 390 "movq coefficients_RGB_U(,%eax,8),%mm0\n" | 391 "movq coefficients_RGB_U(,%eax,8),%mm0\n" |
| 391 "movzbl (%edx),%eax\n" | 392 "movzb (%edx),%eax\n" |
| 392 "paddsw coefficients_RGB_V(,%ebx,8),%mm0\n" | 393 "paddsw coefficients_RGB_V(,%ebx,8),%mm0\n" |
| 393 "movzbl 0x1(%edx),%ebx\n" | 394 "movzb 0x1(%edx),%ebx\n" |
| 394 "movq coefficients_RGB_Y(,%eax,8),%mm1\n" | 395 "movq coefficients_RGB_Y(,%eax,8),%mm1\n" |
| 395 "add $0x2,%edx\n" | 396 "add $0x2,%edx\n" |
| 396 "movq coefficients_RGB_Y(,%ebx,8),%mm2\n" | 397 "movq coefficients_RGB_Y(,%ebx,8),%mm2\n" |
| 397 "paddsw %mm0,%mm1\n" | 398 "paddsw %mm0,%mm1\n" |
| 398 "paddsw %mm0,%mm2\n" | 399 "paddsw %mm0,%mm2\n" |
| 399 "psraw $0x6,%mm1\n" | 400 "psraw $0x6,%mm1\n" |
| 400 "psraw $0x6,%mm2\n" | 401 "psraw $0x6,%mm2\n" |
| 401 "packuswb %mm2,%mm1\n" | 402 "packuswb %mm2,%mm1\n" |
| 402 "movntq %mm1,0x0(%ebp)\n" | 403 "movntq %mm1,0x0(%ebp)\n" |
| 403 "add $0x8,%ebp\n" | 404 "add $0x8,%ebp\n" |
| (...skipping 17 matching lines...) Expand all Loading... |
| 421 "pusha\n" | 422 "pusha\n" |
| 422 "mov 0x24(%esp),%edx\n" | 423 "mov 0x24(%esp),%edx\n" |
| 423 "mov 0x28(%esp),%edi\n" | 424 "mov 0x28(%esp),%edi\n" |
| 424 "mov 0x2c(%esp),%esi\n" | 425 "mov 0x2c(%esp),%esi\n" |
| 425 "mov 0x30(%esp),%ebp\n" | 426 "mov 0x30(%esp),%ebp\n" |
| 426 "mov 0x34(%esp),%ecx\n" | 427 "mov 0x34(%esp),%ecx\n" |
| 427 "shr %ecx\n" | 428 "shr %ecx\n" |
| 428 "xor %eax,%eax\n" | 429 "xor %eax,%eax\n" |
| 429 "xor %ebx,%ebx\n" | 430 "xor %ebx,%ebx\n" |
| 430 "1:\n" | 431 "1:\n" |
| 431 "movzbl (%edi),%eax\n" | 432 "movzb (%edi),%eax\n" |
| 432 "add $0x1,%edi\n" | 433 "add $0x1,%edi\n" |
| 433 "movzbl (%esi),%ebx\n" | 434 "movzb (%esi),%ebx\n" |
| 434 "add $0x1,%esi\n" | 435 "add $0x1,%esi\n" |
| 435 "movq _coefficients_RGB_U(,%eax,8),%mm0\n" | 436 "movq _coefficients_RGB_U(,%eax,8),%mm0\n" |
| 436 "movzbl (%edx),%eax\n" | 437 "movzb (%edx),%eax\n" |
| 437 "paddsw _coefficients_RGB_V(,%ebx,8),%mm0\n" | 438 "paddsw _coefficients_RGB_V(,%ebx,8),%mm0\n" |
| 438 "movzbl 0x1(%edx),%ebx\n" | 439 "movzb 0x1(%edx),%ebx\n" |
| 439 "movq _coefficients_RGB_Y(,%eax,8),%mm1\n" | 440 "movq _coefficients_RGB_Y(,%eax,8),%mm1\n" |
| 440 "add $0x2,%edx\n" | 441 "add $0x2,%edx\n" |
| 441 "movq _coefficients_RGB_Y(,%ebx,8),%mm2\n" | 442 "movq _coefficients_RGB_Y(,%ebx,8),%mm2\n" |
| 442 "paddsw %mm0,%mm1\n" | 443 "paddsw %mm0,%mm1\n" |
| 443 "paddsw %mm0,%mm2\n" | 444 "paddsw %mm0,%mm2\n" |
| 444 "psraw $0x6,%mm1\n" | 445 "psraw $0x6,%mm1\n" |
| 445 "psraw $0x6,%mm2\n" | 446 "psraw $0x6,%mm2\n" |
| 446 "packuswb %mm2,%mm1\n" | 447 "packuswb %mm2,%mm1\n" |
| 447 "movntq %mm1,0x0(%ebp)\n" | 448 "movntq %mm1,0x0(%ebp)\n" |
| 448 "add $0x8,%ebp\n" | 449 "add $0x8,%ebp\n" |
| 449 "sub $0x1,%ecx\n" | 450 "sub $0x1,%ecx\n" |
| 450 "jne 1b\n" | 451 "jne 1b\n" |
| 451 "popa\n" | 452 "popa\n" |
| 452 "ret\n" | 453 "ret\n" |
| 453 ); | 454 ); |
| 454 | 455 |
| 455 #endif // MSC_VER | 456 #endif // MSC_VER |
| 456 } // extern "C" | 457 } // extern "C" |
| 457 | 458 |
| 458 #else // USE_MMX | 459 #else // USE_MMX |
| 459 | 460 |
| 460 void ConvertYV12ToRGB32Row(const uint8* y_buf, | 461 void ConvertYV12ToRGB32Row(const uint8* y_buf, |
| 461 const uint8* u_buf, | 462 const uint8* u_buf, |
| 462 const uint8* v_buf, | 463 const uint8* v_buf, |
| 463 uint8* rgb_buf, | 464 uint8* rgb_buf, |
| 464 size_t width); | 465 size_t width); |
| 465 #endif | 466 #endif |
| 466 | 467 |
| 467 // Convert a frame of YUV to 32 bit ARGB. | 468 // Convert a frame of YV12 (aka YUV420) to 32 bit ARGB. |
| 468 void ConvertYV12ToRGB32(const uint8* y_buf, | 469 void ConvertYV12ToRGB32(const uint8* y_buf, |
| 469 const uint8* u_buf, | 470 const uint8* u_buf, |
| 470 const uint8* v_buf, | 471 const uint8* v_buf, |
| 471 uint8* rgb_buf, | 472 uint8* rgb_buf, |
| 472 size_t width, | 473 size_t width, |
| 473 size_t height, | 474 size_t height, |
| 474 int y_pitch, | 475 int y_pitch, |
| 475 int uv_pitch, | 476 int uv_pitch, |
| 476 int rgb_pitch) { | 477 int rgb_pitch) { |
| 477 // Image must be multiple of 2 in width. | 478 // Image must be multiple of 2 in width. |
| (...skipping 18 matching lines...) Expand all Loading... |
| 496 } | 497 } |
| 497 #if USE_MMX | 498 #if USE_MMX |
| 498 #if defined(_MSC_VER) | 499 #if defined(_MSC_VER) |
| 499 __asm emms; | 500 __asm emms; |
| 500 #else | 501 #else |
| 501 asm("emms"); | 502 asm("emms"); |
| 502 #endif | 503 #endif |
| 503 #endif | 504 #endif |
| 504 } | 505 } |
| 505 | 506 |
| 507 // Convert a frame of YV16 (aka YUV422) to 32 bit ARGB. |
| 508 void ConvertYV16ToRGB32(const uint8* y_buf, |
| 509 const uint8* u_buf, |
| 510 const uint8* v_buf, |
| 511 uint8* rgb_buf, |
| 512 size_t width, |
| 513 size_t height, |
| 514 int y_pitch, |
| 515 int uv_pitch, |
| 516 int rgb_pitch) { |
| 517 // Image must be multiple of 2 in width. |
| 518 DCHECK((width & 1) == 0); |
| 519 // Check alignment. Use memalign to allocate the buffer if you hit this |
| 520 // check: |
| 521 DCHECK((reinterpret_cast<uintptr_t>(rgb_buf) & 7) == 0); |
| 522 #ifdef _OPENMP |
| 523 #pragma omp parallel for |
| 524 #endif |
| 525 for (int y = 0; y < static_cast<int>(height); ++y) { |
| 526 uint8* d1 = rgb_buf + y * rgb_pitch; |
| 527 const uint8* y_ptr = y_buf + y * y_pitch; |
| 528 const uint8* u_ptr = u_buf + y * uv_pitch; |
| 529 const uint8* v_ptr = v_buf + y * uv_pitch; |
| 530 |
| 531 ConvertYV12ToRGB32Row(y_ptr, |
| 532 u_ptr, |
| 533 v_ptr, |
| 534 d1, |
| 535 width); |
| 536 } |
| 537 #if USE_MMX |
| 538 #if defined(_MSC_VER) |
| 539 __asm emms; |
| 540 #else |
| 541 asm("emms"); |
| 542 #endif |
| 543 #endif |
| 544 } |
| 545 |
| 506 //------------------------------------------------------------------------------ | 546 //------------------------------------------------------------------------------ |
| 507 // This is pure C code | 547 // This is pure C code |
| 508 | 548 |
| 509 #if !USE_MMX | 549 #if !USE_MMX |
| 510 | 550 |
| 511 // Reference version of YUV converter. | 551 // Reference version of YUV converter. |
| 512 static const int kClipTableSize = 256; | 552 static const int kClipTableSize = 256; |
| 513 static const int kClipOverflow = 128; | 553 static const int kClipOverflow = 128; |
| 514 | 554 |
| 515 static uint8 g_rgb_clip_table[kClipOverflow | 555 static uint8 g_rgb_clip_table[kClipOverflow |
| (...skipping 107 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 623 | 0xff000000; | 663 | 0xff000000; |
| 624 | 664 |
| 625 rgb_buf += 8; // Advance 2 pixels. | 665 rgb_buf += 8; // Advance 2 pixels. |
| 626 } | 666 } |
| 627 } | 667 } |
| 628 #endif | 668 #endif |
| 629 | 669 |
| 630 | 670 |
| 631 } // namespace media | 671 } // namespace media |
| 632 | 672 |
| OLD | NEW |