OLD | NEW |
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // This webpage shows layout of YV12 and other YUV formats | 5 // This webpage shows layout of YV12 and other YUV formats |
6 // http://www.fourcc.org/yuv.php | 6 // http://www.fourcc.org/yuv.php |
7 // The actual conversion is best described here | 7 // The actual conversion is best described here |
8 // http://en.wikipedia.org/wiki/YUV | 8 // http://en.wikipedia.org/wiki/YUV |
9 // excerpt from wiki: | 9 // excerpt from wiki: |
10 // These formulae are based on the NTSC standard; | 10 // These formulae are based on the NTSC standard; |
11 // Y' = 0.299 x R + 0.587 x G + 0.114 x B | 11 // Y' = 0.299 x R + 0.587 x G + 0.114 x B |
12 // U = -0.147 x R - 0.289 x G + 0.436 x B | 12 // U = -0.147 x R - 0.289 x G + 0.436 x B |
13 // V = 0.615 x R - 0.515 x G - 0.100 x B | 13 // V = 0.615 x R - 0.515 x G - 0.100 x B |
14 // On older, non-SIMD architectures, floating point arithmetic is much | 14 // On older, non-SIMD architectures, floating point arithmetic is much |
15 // slower than using fixed-point arithmetic, so an alternative formulation | 15 // slower than using fixed-point arithmetic, so an alternative formulation |
16 // is: | 16 // is: |
17 // C = Y' - 16 | 17 // C = Y' - 16 |
18 // D = U - 128 | 18 // D = U - 128 |
19 // E = V - 128 | 19 // E = V - 128 |
20 // Using the previous coefficients and noting that clip() denotes clipping a | 20 // Using the previous coefficients and noting that clip() denotes clipping a |
21 // value to the range of 0 to 255, the following formulae provide the | 21 // value to the range of 0 to 255, the following formulae provide the |
22 // conversion from Y'UV to RGB (NTSC version): | 22 // conversion from Y'UV to RGB (NTSC version): |
23 // R = clip((298 x C + 409 x E + 128) >> 8) | 23 // R = clip((298 x C + 409 x E + 128) >> 8) |
24 // G = clip((298 x C - 100 x D - 208 x E + 128) >> 8) | 24 // G = clip((298 x C - 100 x D - 208 x E + 128) >> 8) |
25 // B = clip((298 x C + 516 x D + 128) >> 8) | 25 // B = clip((298 x C + 516 x D + 128) >> 8) |
26 // | 26 // |
27 // An article on optimizing YUV conversion using tables instead of multiplies | 27 // An article on optimizing YUV conversion using tables instead of multiplies |
28 // http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf | 28 // http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf |
29 // | 29 // |
| 30 // YV12 is a full plane of Y and a half height, half width chroma planes |
| 31 // YV16 is a full plane of Y and a full height, half width chroma planes |
| 32 // |
30 // Implimentation notes | 33 // Implimentation notes |
31 // This version uses MMX for Visual C and GCC, which should cover all | 34 // This version uses MMX for Visual C and GCC, which should cover all |
32 // current platforms. C++ is included for reference and future platforms. | 35 // current platforms. C++ is included for reference and future platforms. |
33 // | 36 // |
34 // ARGB pixel format is assumed, which on little endian is stored as BGRA. | 37 // ARGB pixel format is output, which on little endian is stored as BGRA. |
35 // The alpha is filled in, allowing the application to use RGBA or RGB32. | 38 // The alpha is filled in, allowing the application to use RGBA or RGB32. |
36 // The row based conversion allows for a future YV16 version, and simplifies | |
37 // the platform specific portion of the code. | |
38 // | 39 // |
39 // The Visual C assembler is considered the source. | 40 // The Visual C assembler is considered the source. |
40 // The GCC asm was created by compiling with Visual C and disassembling | 41 // The GCC asm was created by compiling with Visual C and disassembling |
41 // with GNU objdump. | 42 // with GNU objdump. |
42 // cl /c /Ox yuv_convert.cc | 43 // cl /c /Ox yuv_convert.cc |
43 // objdump -d yuv_convert.o | 44 // objdump -d yuv_convert.o |
44 // The code almost copy/pasted in, except the table lookups, which produced | 45 // The code almost copy/pasted in, except the table lookups, which produced |
45 // movq 0x800(,%eax,8),%mm0 | 46 // movq 0x800(,%eax,8),%mm0 |
46 // and needed to be changed to cdecl style table names | 47 // and needed to be changed to cdecl style table names |
47 // "movq _coefficients_RGB_U(,%eax,8),%mm0\n" | 48 // "movq _coefficients_RGB_U(,%eax,8),%mm0\n" |
(...skipping 328 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
376 asm( | 377 asm( |
377 "ConvertYV12ToRGB32Row:\n" | 378 "ConvertYV12ToRGB32Row:\n" |
378 "pusha\n" | 379 "pusha\n" |
379 "mov 0x24(%esp),%edx\n" | 380 "mov 0x24(%esp),%edx\n" |
380 "mov 0x28(%esp),%edi\n" | 381 "mov 0x28(%esp),%edi\n" |
381 "mov 0x2c(%esp),%esi\n" | 382 "mov 0x2c(%esp),%esi\n" |
382 "mov 0x30(%esp),%ebp\n" | 383 "mov 0x30(%esp),%ebp\n" |
383 "mov 0x34(%esp),%ecx\n" | 384 "mov 0x34(%esp),%ecx\n" |
384 "shr %ecx\n" | 385 "shr %ecx\n" |
385 "1:\n" | 386 "1:\n" |
386 "movzbl (%edi),%eax\n" | 387 "movzb (%edi),%eax\n" |
387 "add $0x1,%edi\n" | 388 "add $0x1,%edi\n" |
388 "movzbl (%esi),%ebx\n" | 389 "movzb (%esi),%ebx\n" |
389 "add $0x1,%esi\n" | 390 "add $0x1,%esi\n" |
390 "movq coefficients_RGB_U(,%eax,8),%mm0\n" | 391 "movq coefficients_RGB_U(,%eax,8),%mm0\n" |
391 "movzbl (%edx),%eax\n" | 392 "movzb (%edx),%eax\n" |
392 "paddsw coefficients_RGB_V(,%ebx,8),%mm0\n" | 393 "paddsw coefficients_RGB_V(,%ebx,8),%mm0\n" |
393 "movzbl 0x1(%edx),%ebx\n" | 394 "movzb 0x1(%edx),%ebx\n" |
394 "movq coefficients_RGB_Y(,%eax,8),%mm1\n" | 395 "movq coefficients_RGB_Y(,%eax,8),%mm1\n" |
395 "add $0x2,%edx\n" | 396 "add $0x2,%edx\n" |
396 "movq coefficients_RGB_Y(,%ebx,8),%mm2\n" | 397 "movq coefficients_RGB_Y(,%ebx,8),%mm2\n" |
397 "paddsw %mm0,%mm1\n" | 398 "paddsw %mm0,%mm1\n" |
398 "paddsw %mm0,%mm2\n" | 399 "paddsw %mm0,%mm2\n" |
399 "psraw $0x6,%mm1\n" | 400 "psraw $0x6,%mm1\n" |
400 "psraw $0x6,%mm2\n" | 401 "psraw $0x6,%mm2\n" |
401 "packuswb %mm2,%mm1\n" | 402 "packuswb %mm2,%mm1\n" |
402 "movntq %mm1,0x0(%ebp)\n" | 403 "movntq %mm1,0x0(%ebp)\n" |
403 "add $0x8,%ebp\n" | 404 "add $0x8,%ebp\n" |
(...skipping 17 matching lines...) Expand all Loading... |
421 "pusha\n" | 422 "pusha\n" |
422 "mov 0x24(%esp),%edx\n" | 423 "mov 0x24(%esp),%edx\n" |
423 "mov 0x28(%esp),%edi\n" | 424 "mov 0x28(%esp),%edi\n" |
424 "mov 0x2c(%esp),%esi\n" | 425 "mov 0x2c(%esp),%esi\n" |
425 "mov 0x30(%esp),%ebp\n" | 426 "mov 0x30(%esp),%ebp\n" |
426 "mov 0x34(%esp),%ecx\n" | 427 "mov 0x34(%esp),%ecx\n" |
427 "shr %ecx\n" | 428 "shr %ecx\n" |
428 "xor %eax,%eax\n" | 429 "xor %eax,%eax\n" |
429 "xor %ebx,%ebx\n" | 430 "xor %ebx,%ebx\n" |
430 "1:\n" | 431 "1:\n" |
431 "movzbl (%edi),%eax\n" | 432 "movzb (%edi),%eax\n" |
432 "add $0x1,%edi\n" | 433 "add $0x1,%edi\n" |
433 "movzbl (%esi),%ebx\n" | 434 "movzb (%esi),%ebx\n" |
434 "add $0x1,%esi\n" | 435 "add $0x1,%esi\n" |
435 "movq _coefficients_RGB_U(,%eax,8),%mm0\n" | 436 "movq _coefficients_RGB_U(,%eax,8),%mm0\n" |
436 "movzbl (%edx),%eax\n" | 437 "movzb (%edx),%eax\n" |
437 "paddsw _coefficients_RGB_V(,%ebx,8),%mm0\n" | 438 "paddsw _coefficients_RGB_V(,%ebx,8),%mm0\n" |
438 "movzbl 0x1(%edx),%ebx\n" | 439 "movzb 0x1(%edx),%ebx\n" |
439 "movq _coefficients_RGB_Y(,%eax,8),%mm1\n" | 440 "movq _coefficients_RGB_Y(,%eax,8),%mm1\n" |
440 "add $0x2,%edx\n" | 441 "add $0x2,%edx\n" |
441 "movq _coefficients_RGB_Y(,%ebx,8),%mm2\n" | 442 "movq _coefficients_RGB_Y(,%ebx,8),%mm2\n" |
442 "paddsw %mm0,%mm1\n" | 443 "paddsw %mm0,%mm1\n" |
443 "paddsw %mm0,%mm2\n" | 444 "paddsw %mm0,%mm2\n" |
444 "psraw $0x6,%mm1\n" | 445 "psraw $0x6,%mm1\n" |
445 "psraw $0x6,%mm2\n" | 446 "psraw $0x6,%mm2\n" |
446 "packuswb %mm2,%mm1\n" | 447 "packuswb %mm2,%mm1\n" |
447 "movntq %mm1,0x0(%ebp)\n" | 448 "movntq %mm1,0x0(%ebp)\n" |
448 "add $0x8,%ebp\n" | 449 "add $0x8,%ebp\n" |
449 "sub $0x1,%ecx\n" | 450 "sub $0x1,%ecx\n" |
450 "jne 1b\n" | 451 "jne 1b\n" |
451 "popa\n" | 452 "popa\n" |
452 "ret\n" | 453 "ret\n" |
453 ); | 454 ); |
454 | 455 |
455 #endif // MSC_VER | 456 #endif // MSC_VER |
456 } // extern "C" | 457 } // extern "C" |
457 | 458 |
458 #else // USE_MMX | 459 #else // USE_MMX |
459 | 460 |
460 void ConvertYV12ToRGB32Row(const uint8* y_buf, | 461 void ConvertYV12ToRGB32Row(const uint8* y_buf, |
461 const uint8* u_buf, | 462 const uint8* u_buf, |
462 const uint8* v_buf, | 463 const uint8* v_buf, |
463 uint8* rgb_buf, | 464 uint8* rgb_buf, |
464 size_t width); | 465 size_t width); |
465 #endif | 466 #endif |
466 | 467 |
467 // Convert a frame of YUV to 32 bit ARGB. | 468 // Convert a frame of YV12 (aka YUV420) to 32 bit ARGB. |
468 void ConvertYV12ToRGB32(const uint8* y_buf, | 469 void ConvertYV12ToRGB32(const uint8* y_buf, |
469 const uint8* u_buf, | 470 const uint8* u_buf, |
470 const uint8* v_buf, | 471 const uint8* v_buf, |
471 uint8* rgb_buf, | 472 uint8* rgb_buf, |
472 size_t width, | 473 size_t width, |
473 size_t height, | 474 size_t height, |
474 int y_pitch, | 475 int y_pitch, |
475 int uv_pitch, | 476 int uv_pitch, |
476 int rgb_pitch) { | 477 int rgb_pitch) { |
477 // Image must be multiple of 2 in width. | 478 // Image must be multiple of 2 in width. |
(...skipping 18 matching lines...) Expand all Loading... |
496 } | 497 } |
497 #if USE_MMX | 498 #if USE_MMX |
498 #if defined(_MSC_VER) | 499 #if defined(_MSC_VER) |
499 __asm emms; | 500 __asm emms; |
500 #else | 501 #else |
501 asm("emms"); | 502 asm("emms"); |
502 #endif | 503 #endif |
503 #endif | 504 #endif |
504 } | 505 } |
505 | 506 |
| 507 // Convert a frame of YV16 (aka YUV422) to 32 bit ARGB. |
| 508 void ConvertYV16ToRGB32(const uint8* y_buf, |
| 509 const uint8* u_buf, |
| 510 const uint8* v_buf, |
| 511 uint8* rgb_buf, |
| 512 size_t width, |
| 513 size_t height, |
| 514 int y_pitch, |
| 515 int uv_pitch, |
| 516 int rgb_pitch) { |
| 517 // Image must be multiple of 2 in width. |
| 518 DCHECK((width & 1) == 0); |
| 519 // Check alignment. Use memalign to allocate the buffer if you hit this |
| 520 // check: |
| 521 DCHECK((reinterpret_cast<uintptr_t>(rgb_buf) & 7) == 0); |
| 522 #ifdef _OPENMP |
| 523 #pragma omp parallel for |
| 524 #endif |
| 525 for (int y = 0; y < static_cast<int>(height); ++y) { |
| 526 uint8* d1 = rgb_buf + y * rgb_pitch; |
| 527 const uint8* y_ptr = y_buf + y * y_pitch; |
| 528 const uint8* u_ptr = u_buf + y * uv_pitch; |
| 529 const uint8* v_ptr = v_buf + y * uv_pitch; |
| 530 |
| 531 ConvertYV12ToRGB32Row(y_ptr, |
| 532 u_ptr, |
| 533 v_ptr, |
| 534 d1, |
| 535 width); |
| 536 } |
| 537 #if USE_MMX |
| 538 #if defined(_MSC_VER) |
| 539 __asm emms; |
| 540 #else |
| 541 asm("emms"); |
| 542 #endif |
| 543 #endif |
| 544 } |
| 545 |
506 //------------------------------------------------------------------------------ | 546 //------------------------------------------------------------------------------ |
507 // This is pure C code | 547 // This is pure C code |
508 | 548 |
509 #if !USE_MMX | 549 #if !USE_MMX |
510 | 550 |
511 // Reference version of YUV converter. | 551 // Reference version of YUV converter. |
512 static const int kClipTableSize = 256; | 552 static const int kClipTableSize = 256; |
513 static const int kClipOverflow = 128; | 553 static const int kClipOverflow = 128; |
514 | 554 |
515 static uint8 g_rgb_clip_table[kClipOverflow | 555 static uint8 g_rgb_clip_table[kClipOverflow |
(...skipping 107 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
623 | 0xff000000; | 663 | 0xff000000; |
624 | 664 |
625 rgb_buf += 8; // Advance 2 pixels. | 665 rgb_buf += 8; // Advance 2 pixels. |
626 } | 666 } |
627 } | 667 } |
628 #endif | 668 #endif |
629 | 669 |
630 | 670 |
631 } // namespace media | 671 } // namespace media |
632 | 672 |
OLD | NEW |