| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "media/base/yuv_row.h" | 5 #include "media/base/yuv_row.h" |
| 6 | 6 |
| 7 #ifndef NDEBUG | 7 #ifndef NDEBUG |
| 8 #include "base/logging.h" | 8 #include "base/logging.h" |
| 9 #else | 9 #else |
| 10 #define DCHECK(a) | 10 #define DCHECK(a) |
| 11 #endif | 11 #endif |
| 12 | 12 |
| 13 extern "C" { | 13 extern "C" { |
| 14 | 14 |
| 15 // Branch 874 specific fix to disable movntq to prevent crashes on Pentium IIs. |
| 16 #define USE_MOVNTQ 0 |
| 17 |
| 15 #if USE_SSE2 && defined(ARCH_CPU_X86_64) | 18 #if USE_SSE2 && defined(ARCH_CPU_X86_64) |
| 16 | 19 |
| 17 // AMD64 ABI uses register paremters. | 20 // AMD64 ABI uses register paremters. |
| 18 void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi | 21 void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi |
| 19 const uint8* u_buf, // rsi | 22 const uint8* u_buf, // rsi |
| 20 const uint8* v_buf, // rdx | 23 const uint8* v_buf, // rdx |
| 21 uint8* rgb_buf, // rcx | 24 uint8* rgb_buf, // rcx |
| 22 int width) { // r8 | 25 int width) { // r8 |
| 23 asm( | 26 asm( |
| 24 "jmp convertend\n" | 27 "jmp convertend\n" |
| (...skipping 266 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 291 "paddsw kCoefficientsRgbY+4096(,%ebx,8),%mm0\n" | 294 "paddsw kCoefficientsRgbY+4096(,%ebx,8),%mm0\n" |
| 292 "movzbl 0x1(%edx),%ebx\n" | 295 "movzbl 0x1(%edx),%ebx\n" |
| 293 "movq kCoefficientsRgbY(,%eax,8),%mm1\n" | 296 "movq kCoefficientsRgbY(,%eax,8),%mm1\n" |
| 294 "add $0x2,%edx\n" | 297 "add $0x2,%edx\n" |
| 295 "movq kCoefficientsRgbY(,%ebx,8),%mm2\n" | 298 "movq kCoefficientsRgbY(,%ebx,8),%mm2\n" |
| 296 "paddsw %mm0,%mm1\n" | 299 "paddsw %mm0,%mm1\n" |
| 297 "paddsw %mm0,%mm2\n" | 300 "paddsw %mm0,%mm2\n" |
| 298 "psraw $0x6,%mm1\n" | 301 "psraw $0x6,%mm1\n" |
| 299 "psraw $0x6,%mm2\n" | 302 "psraw $0x6,%mm2\n" |
| 300 "packuswb %mm2,%mm1\n" | 303 "packuswb %mm2,%mm1\n" |
| 304 #if USE_MOVNTQ |
| 301 "movntq %mm1,0x0(%ebp)\n" | 305 "movntq %mm1,0x0(%ebp)\n" |
| 306 #else |
| 307 "movq %mm1,0x0(%ebp)\n" |
| 308 #endif |
| 302 "add $0x8,%ebp\n" | 309 "add $0x8,%ebp\n" |
| 303 "convertend:" | 310 "convertend:" |
| 304 "sub $0x2,%ecx\n" | 311 "sub $0x2,%ecx\n" |
| 305 "jns convertloop\n" | 312 "jns convertloop\n" |
| 306 | 313 |
| 307 "and $0x1,%ecx\n" | 314 "and $0x1,%ecx\n" |
| 308 "je convertdone\n" | 315 "je convertdone\n" |
| 309 | 316 |
| 310 "movzbl (%edi),%eax\n" | 317 "movzbl (%edi),%eax\n" |
| 311 "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n" | 318 "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n" |
| (...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 359 "mov %ebx,%eax\n" | 366 "mov %ebx,%eax\n" |
| 360 "add 0x38(%esp),%ebx\n" | 367 "add 0x38(%esp),%ebx\n" |
| 361 "sar $0x10,%eax\n" | 368 "sar $0x10,%eax\n" |
| 362 "movzbl (%edx,%eax,1),%eax\n" | 369 "movzbl (%edx,%eax,1),%eax\n" |
| 363 "movq kCoefficientsRgbY(,%eax,8),%mm2\n" | 370 "movq kCoefficientsRgbY(,%eax,8),%mm2\n" |
| 364 "paddsw %mm0,%mm1\n" | 371 "paddsw %mm0,%mm1\n" |
| 365 "paddsw %mm0,%mm2\n" | 372 "paddsw %mm0,%mm2\n" |
| 366 "psraw $0x6,%mm1\n" | 373 "psraw $0x6,%mm1\n" |
| 367 "psraw $0x6,%mm2\n" | 374 "psraw $0x6,%mm2\n" |
| 368 "packuswb %mm2,%mm1\n" | 375 "packuswb %mm2,%mm1\n" |
| 376 #if USE_MOVNTQ |
| 369 "movntq %mm1,0x0(%ebp)\n" | 377 "movntq %mm1,0x0(%ebp)\n" |
| 378 #else |
| 379 "movq %mm1,0x0(%ebp)\n" |
| 380 #endif |
| 370 "add $0x8,%ebp\n" | 381 "add $0x8,%ebp\n" |
| 371 "scaleend:" | 382 "scaleend:" |
| 372 "sub $0x2,%ecx\n" | 383 "sub $0x2,%ecx\n" |
| 373 "jns scaleloop\n" | 384 "jns scaleloop\n" |
| 374 | 385 |
| 375 "and $0x1,%ecx\n" | 386 "and $0x1,%ecx\n" |
| 376 "je scaledone\n" | 387 "je scaledone\n" |
| 377 | 388 |
| 378 "mov %ebx,%eax\n" | 389 "mov %ebx,%eax\n" |
| 379 "sar $0x11,%eax\n" | 390 "sar $0x11,%eax\n" |
| (...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 483 "imul %eax, %ecx \n" | 494 "imul %eax, %ecx \n" |
| 484 "addl %esi, %ecx \n" | 495 "addl %esi, %ecx \n" |
| 485 "shrl $16, %ecx \n" | 496 "shrl $16, %ecx \n" |
| 486 "movq kCoefficientsRgbY(,%ecx,8),%mm2\n" | 497 "movq kCoefficientsRgbY(,%ecx,8),%mm2\n" |
| 487 | 498 |
| 488 "paddsw %mm0,%mm1\n" | 499 "paddsw %mm0,%mm1\n" |
| 489 "paddsw %mm0,%mm2\n" | 500 "paddsw %mm0,%mm2\n" |
| 490 "psraw $0x6,%mm1\n" | 501 "psraw $0x6,%mm1\n" |
| 491 "psraw $0x6,%mm2\n" | 502 "psraw $0x6,%mm2\n" |
| 492 "packuswb %mm2,%mm1\n" | 503 "packuswb %mm2,%mm1\n" |
| 504 #if USE_MOVNTQ |
| 493 "movntq %mm1,0x0(%ebp)\n" | 505 "movntq %mm1,0x0(%ebp)\n" |
| 506 #else |
| 507 "movq %mm1,0x0(%ebp)\n" |
| 508 #endif |
| 494 "add $0x8,%ebp\n" | 509 "add $0x8,%ebp\n" |
| 495 | 510 |
| 496 ".lscaleend:" | 511 ".lscaleend:" |
| 497 "cmp 0x34(%esp), %ebx\n" | 512 "cmp 0x34(%esp), %ebx\n" |
| 498 "jl .lscaleloop\n" | 513 "jl .lscaleloop\n" |
| 499 "popa\n" | 514 "popa\n" |
| 500 "ret\n" | 515 "ret\n" |
| 501 | 516 |
| 502 ".lscalelastpixel:" | 517 ".lscalelastpixel:" |
| 503 "paddsw %mm0, %mm1\n" | 518 "paddsw %mm0, %mm1\n" |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 542 "paddsw 4096(%ecx,%ebx,8),%mm0\n" | 557 "paddsw 4096(%ecx,%ebx,8),%mm0\n" |
| 543 "movzbl 0x1(%edx),%ebx\n" | 558 "movzbl 0x1(%edx),%ebx\n" |
| 544 "movq 0(%ecx,%eax,8),%mm1\n" | 559 "movq 0(%ecx,%eax,8),%mm1\n" |
| 545 "add $0x2,%edx\n" | 560 "add $0x2,%edx\n" |
| 546 "movq 0(%ecx,%ebx,8),%mm2\n" | 561 "movq 0(%ecx,%ebx,8),%mm2\n" |
| 547 "paddsw %mm0,%mm1\n" | 562 "paddsw %mm0,%mm1\n" |
| 548 "paddsw %mm0,%mm2\n" | 563 "paddsw %mm0,%mm2\n" |
| 549 "psraw $0x6,%mm1\n" | 564 "psraw $0x6,%mm1\n" |
| 550 "psraw $0x6,%mm2\n" | 565 "psraw $0x6,%mm2\n" |
| 551 "packuswb %mm2,%mm1\n" | 566 "packuswb %mm2,%mm1\n" |
| 567 #if USE_MOVNTQ |
| 552 "movntq %mm1,0x0(%ebp)\n" | 568 "movntq %mm1,0x0(%ebp)\n" |
| 569 #else |
| 570 "movq %mm1,0x0(%ebp)\n" |
| 571 #endif |
| 553 "add $0x8,%ebp\n" | 572 "add $0x8,%ebp\n" |
| 554 ".Lconvertend:" | 573 ".Lconvertend:" |
| 555 "subl $0x2,0x34(%esp)\n" | 574 "subl $0x2,0x34(%esp)\n" |
| 556 "jns .Lconvertloop\n" | 575 "jns .Lconvertloop\n" |
| 557 | 576 |
| 558 "andl $0x1,0x34(%esp)\n" | 577 "andl $0x1,0x34(%esp)\n" |
| 559 "je .Lconvertdone\n" | 578 "je .Lconvertdone\n" |
| 560 | 579 |
| 561 "movzbl (%edi),%eax\n" | 580 "movzbl (%edi),%eax\n" |
| 562 "movq 2048(%ecx,%eax,8),%mm0\n" | 581 "movq 2048(%ecx,%eax,8),%mm0\n" |
| (...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 623 "mov %ebx,%eax\n" | 642 "mov %ebx,%eax\n" |
| 624 "add 0x38(%esp),%ebx\n" | 643 "add 0x38(%esp),%ebx\n" |
| 625 "sar $0x10,%eax\n" | 644 "sar $0x10,%eax\n" |
| 626 "movzbl (%edx,%eax,1),%eax\n" | 645 "movzbl (%edx,%eax,1),%eax\n" |
| 627 "movq 0(%ecx,%eax,8),%mm2\n" | 646 "movq 0(%ecx,%eax,8),%mm2\n" |
| 628 "paddsw %mm0,%mm1\n" | 647 "paddsw %mm0,%mm1\n" |
| 629 "paddsw %mm0,%mm2\n" | 648 "paddsw %mm0,%mm2\n" |
| 630 "psraw $0x6,%mm1\n" | 649 "psraw $0x6,%mm1\n" |
| 631 "psraw $0x6,%mm2\n" | 650 "psraw $0x6,%mm2\n" |
| 632 "packuswb %mm2,%mm1\n" | 651 "packuswb %mm2,%mm1\n" |
| 652 #if USE_MOVNTQ |
| 633 "movntq %mm1,0x0(%ebp)\n" | 653 "movntq %mm1,0x0(%ebp)\n" |
| 654 #else |
| 655 "movq %mm1,0x0(%ebp)\n" |
| 656 #endif |
| 634 "add $0x8,%ebp\n" | 657 "add $0x8,%ebp\n" |
| 635 "Lscaleend:" | 658 "Lscaleend:" |
| 636 "subl $0x2,0x34(%esp)\n" | 659 "subl $0x2,0x34(%esp)\n" |
| 637 "jns Lscaleloop\n" | 660 "jns Lscaleloop\n" |
| 638 | 661 |
| 639 "andl $0x1,0x34(%esp)\n" | 662 "andl $0x1,0x34(%esp)\n" |
| 640 "je Lscaledone\n" | 663 "je Lscaledone\n" |
| 641 | 664 |
| 642 "mov %ebx,%eax\n" | 665 "mov %ebx,%eax\n" |
| 643 "sar $0x11,%eax\n" | 666 "sar $0x11,%eax\n" |
| (...skipping 121 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 765 "imul %eax, %ecx \n" | 788 "imul %eax, %ecx \n" |
| 766 "addl %esi, %ecx \n" | 789 "addl %esi, %ecx \n" |
| 767 "shrl $16, %ecx \n" | 790 "shrl $16, %ecx \n" |
| 768 "movq (%edi,%ecx,8),%mm2\n" | 791 "movq (%edi,%ecx,8),%mm2\n" |
| 769 | 792 |
| 770 "paddsw %mm0,%mm1\n" | 793 "paddsw %mm0,%mm1\n" |
| 771 "paddsw %mm0,%mm2\n" | 794 "paddsw %mm0,%mm2\n" |
| 772 "psraw $0x6,%mm1\n" | 795 "psraw $0x6,%mm1\n" |
| 773 "psraw $0x6,%mm2\n" | 796 "psraw $0x6,%mm2\n" |
| 774 "packuswb %mm2,%mm1\n" | 797 "packuswb %mm2,%mm1\n" |
| 798 #if USE_MOVNTQ |
| 775 "movntq %mm1,0x0(%ebp)\n" | 799 "movntq %mm1,0x0(%ebp)\n" |
| 800 #else |
| 801 "movq %mm1,0x0(%ebp)\n" |
| 802 #endif |
| 776 "add $0x8,%ebp\n" | 803 "add $0x8,%ebp\n" |
| 777 | 804 |
| 778 ".lscaleend:" | 805 ".lscaleend:" |
| 779 "cmp %ebx, 0x34(%esp)\n" | 806 "cmp %ebx, 0x34(%esp)\n" |
| 780 "jg .lscaleloop\n" | 807 "jg .lscaleloop\n" |
| 781 "popa\n" | 808 "popa\n" |
| 782 "ret\n" | 809 "ret\n" |
| 783 | 810 |
| 784 ".lscalelastpixel:" | 811 ".lscalelastpixel:" |
| 785 "paddsw %mm0, %mm1\n" | 812 "paddsw %mm0, %mm1\n" |
| (...skipping 127 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 913 y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16; | 940 y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16; |
| 914 YuvPixel(y, u, v, rgb_buf+4); | 941 YuvPixel(y, u, v, rgb_buf+4); |
| 915 x += source_dx; | 942 x += source_dx; |
| 916 } | 943 } |
| 917 rgb_buf += 8; | 944 rgb_buf += 8; |
| 918 } | 945 } |
| 919 } | 946 } |
| 920 | 947 |
| 921 #endif // USE_MMX | 948 #endif // USE_MMX |
| 922 } // extern "C" | 949 } // extern "C" |
| 923 | |
| OLD | NEW |