OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "media/base/yuv_row.h" | 5 #include "media/base/yuv_row.h" |
6 | 6 |
7 #ifndef NDEBUG | 7 #ifndef NDEBUG |
8 #include "base/logging.h" | 8 #include "base/logging.h" |
9 #else | 9 #else |
10 #define DCHECK(a) | 10 #define DCHECK(a) |
11 #endif | 11 #endif |
12 | 12 |
13 extern "C" { | 13 extern "C" { |
14 | 14 |
| 15 // Branch 874 specific fix to disable movntq to prevent crashes on Pentium IIs. |
| 16 #define USE_MOVNTQ 0 |
| 17 |
15 #if USE_SSE2 && defined(ARCH_CPU_X86_64) | 18 #if USE_SSE2 && defined(ARCH_CPU_X86_64) |
16 | 19 |
17 // AMD64 ABI uses register paremters. | 20 // AMD64 ABI uses register paremters. |
18 void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi | 21 void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi |
19 const uint8* u_buf, // rsi | 22 const uint8* u_buf, // rsi |
20 const uint8* v_buf, // rdx | 23 const uint8* v_buf, // rdx |
21 uint8* rgb_buf, // rcx | 24 uint8* rgb_buf, // rcx |
22 int width) { // r8 | 25 int width) { // r8 |
23 asm( | 26 asm( |
24 "jmp convertend\n" | 27 "jmp convertend\n" |
(...skipping 266 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
291 "paddsw kCoefficientsRgbY+4096(,%ebx,8),%mm0\n" | 294 "paddsw kCoefficientsRgbY+4096(,%ebx,8),%mm0\n" |
292 "movzbl 0x1(%edx),%ebx\n" | 295 "movzbl 0x1(%edx),%ebx\n" |
293 "movq kCoefficientsRgbY(,%eax,8),%mm1\n" | 296 "movq kCoefficientsRgbY(,%eax,8),%mm1\n" |
294 "add $0x2,%edx\n" | 297 "add $0x2,%edx\n" |
295 "movq kCoefficientsRgbY(,%ebx,8),%mm2\n" | 298 "movq kCoefficientsRgbY(,%ebx,8),%mm2\n" |
296 "paddsw %mm0,%mm1\n" | 299 "paddsw %mm0,%mm1\n" |
297 "paddsw %mm0,%mm2\n" | 300 "paddsw %mm0,%mm2\n" |
298 "psraw $0x6,%mm1\n" | 301 "psraw $0x6,%mm1\n" |
299 "psraw $0x6,%mm2\n" | 302 "psraw $0x6,%mm2\n" |
300 "packuswb %mm2,%mm1\n" | 303 "packuswb %mm2,%mm1\n" |
| 304 #if USE_MOVNTQ |
301 "movntq %mm1,0x0(%ebp)\n" | 305 "movntq %mm1,0x0(%ebp)\n" |
| 306 #else |
| 307 "movq %mm1,0x0(%ebp)\n" |
| 308 #endif |
302 "add $0x8,%ebp\n" | 309 "add $0x8,%ebp\n" |
303 "convertend:" | 310 "convertend:" |
304 "sub $0x2,%ecx\n" | 311 "sub $0x2,%ecx\n" |
305 "jns convertloop\n" | 312 "jns convertloop\n" |
306 | 313 |
307 "and $0x1,%ecx\n" | 314 "and $0x1,%ecx\n" |
308 "je convertdone\n" | 315 "je convertdone\n" |
309 | 316 |
310 "movzbl (%edi),%eax\n" | 317 "movzbl (%edi),%eax\n" |
311 "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n" | 318 "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n" |
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
359 "mov %ebx,%eax\n" | 366 "mov %ebx,%eax\n" |
360 "add 0x38(%esp),%ebx\n" | 367 "add 0x38(%esp),%ebx\n" |
361 "sar $0x10,%eax\n" | 368 "sar $0x10,%eax\n" |
362 "movzbl (%edx,%eax,1),%eax\n" | 369 "movzbl (%edx,%eax,1),%eax\n" |
363 "movq kCoefficientsRgbY(,%eax,8),%mm2\n" | 370 "movq kCoefficientsRgbY(,%eax,8),%mm2\n" |
364 "paddsw %mm0,%mm1\n" | 371 "paddsw %mm0,%mm1\n" |
365 "paddsw %mm0,%mm2\n" | 372 "paddsw %mm0,%mm2\n" |
366 "psraw $0x6,%mm1\n" | 373 "psraw $0x6,%mm1\n" |
367 "psraw $0x6,%mm2\n" | 374 "psraw $0x6,%mm2\n" |
368 "packuswb %mm2,%mm1\n" | 375 "packuswb %mm2,%mm1\n" |
| 376 #if USE_MOVNTQ |
369 "movntq %mm1,0x0(%ebp)\n" | 377 "movntq %mm1,0x0(%ebp)\n" |
| 378 #else |
| 379 "movq %mm1,0x0(%ebp)\n" |
| 380 #endif |
370 "add $0x8,%ebp\n" | 381 "add $0x8,%ebp\n" |
371 "scaleend:" | 382 "scaleend:" |
372 "sub $0x2,%ecx\n" | 383 "sub $0x2,%ecx\n" |
373 "jns scaleloop\n" | 384 "jns scaleloop\n" |
374 | 385 |
375 "and $0x1,%ecx\n" | 386 "and $0x1,%ecx\n" |
376 "je scaledone\n" | 387 "je scaledone\n" |
377 | 388 |
378 "mov %ebx,%eax\n" | 389 "mov %ebx,%eax\n" |
379 "sar $0x11,%eax\n" | 390 "sar $0x11,%eax\n" |
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
483 "imul %eax, %ecx \n" | 494 "imul %eax, %ecx \n" |
484 "addl %esi, %ecx \n" | 495 "addl %esi, %ecx \n" |
485 "shrl $16, %ecx \n" | 496 "shrl $16, %ecx \n" |
486 "movq kCoefficientsRgbY(,%ecx,8),%mm2\n" | 497 "movq kCoefficientsRgbY(,%ecx,8),%mm2\n" |
487 | 498 |
488 "paddsw %mm0,%mm1\n" | 499 "paddsw %mm0,%mm1\n" |
489 "paddsw %mm0,%mm2\n" | 500 "paddsw %mm0,%mm2\n" |
490 "psraw $0x6,%mm1\n" | 501 "psraw $0x6,%mm1\n" |
491 "psraw $0x6,%mm2\n" | 502 "psraw $0x6,%mm2\n" |
492 "packuswb %mm2,%mm1\n" | 503 "packuswb %mm2,%mm1\n" |
| 504 #if USE_MOVNTQ |
493 "movntq %mm1,0x0(%ebp)\n" | 505 "movntq %mm1,0x0(%ebp)\n" |
| 506 #else |
| 507 "movq %mm1,0x0(%ebp)\n" |
| 508 #endif |
494 "add $0x8,%ebp\n" | 509 "add $0x8,%ebp\n" |
495 | 510 |
496 ".lscaleend:" | 511 ".lscaleend:" |
497 "cmp 0x34(%esp), %ebx\n" | 512 "cmp 0x34(%esp), %ebx\n" |
498 "jl .lscaleloop\n" | 513 "jl .lscaleloop\n" |
499 "popa\n" | 514 "popa\n" |
500 "ret\n" | 515 "ret\n" |
501 | 516 |
502 ".lscalelastpixel:" | 517 ".lscalelastpixel:" |
503 "paddsw %mm0, %mm1\n" | 518 "paddsw %mm0, %mm1\n" |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
542 "paddsw 4096(%ecx,%ebx,8),%mm0\n" | 557 "paddsw 4096(%ecx,%ebx,8),%mm0\n" |
543 "movzbl 0x1(%edx),%ebx\n" | 558 "movzbl 0x1(%edx),%ebx\n" |
544 "movq 0(%ecx,%eax,8),%mm1\n" | 559 "movq 0(%ecx,%eax,8),%mm1\n" |
545 "add $0x2,%edx\n" | 560 "add $0x2,%edx\n" |
546 "movq 0(%ecx,%ebx,8),%mm2\n" | 561 "movq 0(%ecx,%ebx,8),%mm2\n" |
547 "paddsw %mm0,%mm1\n" | 562 "paddsw %mm0,%mm1\n" |
548 "paddsw %mm0,%mm2\n" | 563 "paddsw %mm0,%mm2\n" |
549 "psraw $0x6,%mm1\n" | 564 "psraw $0x6,%mm1\n" |
550 "psraw $0x6,%mm2\n" | 565 "psraw $0x6,%mm2\n" |
551 "packuswb %mm2,%mm1\n" | 566 "packuswb %mm2,%mm1\n" |
| 567 #if USE_MOVNTQ |
552 "movntq %mm1,0x0(%ebp)\n" | 568 "movntq %mm1,0x0(%ebp)\n" |
| 569 #else |
| 570 "movq %mm1,0x0(%ebp)\n" |
| 571 #endif |
553 "add $0x8,%ebp\n" | 572 "add $0x8,%ebp\n" |
554 ".Lconvertend:" | 573 ".Lconvertend:" |
555 "subl $0x2,0x34(%esp)\n" | 574 "subl $0x2,0x34(%esp)\n" |
556 "jns .Lconvertloop\n" | 575 "jns .Lconvertloop\n" |
557 | 576 |
558 "andl $0x1,0x34(%esp)\n" | 577 "andl $0x1,0x34(%esp)\n" |
559 "je .Lconvertdone\n" | 578 "je .Lconvertdone\n" |
560 | 579 |
561 "movzbl (%edi),%eax\n" | 580 "movzbl (%edi),%eax\n" |
562 "movq 2048(%ecx,%eax,8),%mm0\n" | 581 "movq 2048(%ecx,%eax,8),%mm0\n" |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
623 "mov %ebx,%eax\n" | 642 "mov %ebx,%eax\n" |
624 "add 0x38(%esp),%ebx\n" | 643 "add 0x38(%esp),%ebx\n" |
625 "sar $0x10,%eax\n" | 644 "sar $0x10,%eax\n" |
626 "movzbl (%edx,%eax,1),%eax\n" | 645 "movzbl (%edx,%eax,1),%eax\n" |
627 "movq 0(%ecx,%eax,8),%mm2\n" | 646 "movq 0(%ecx,%eax,8),%mm2\n" |
628 "paddsw %mm0,%mm1\n" | 647 "paddsw %mm0,%mm1\n" |
629 "paddsw %mm0,%mm2\n" | 648 "paddsw %mm0,%mm2\n" |
630 "psraw $0x6,%mm1\n" | 649 "psraw $0x6,%mm1\n" |
631 "psraw $0x6,%mm2\n" | 650 "psraw $0x6,%mm2\n" |
632 "packuswb %mm2,%mm1\n" | 651 "packuswb %mm2,%mm1\n" |
| 652 #if USE_MOVNTQ |
633 "movntq %mm1,0x0(%ebp)\n" | 653 "movntq %mm1,0x0(%ebp)\n" |
| 654 #else |
| 655 "movq %mm1,0x0(%ebp)\n" |
| 656 #endif |
634 "add $0x8,%ebp\n" | 657 "add $0x8,%ebp\n" |
635 "Lscaleend:" | 658 "Lscaleend:" |
636 "subl $0x2,0x34(%esp)\n" | 659 "subl $0x2,0x34(%esp)\n" |
637 "jns Lscaleloop\n" | 660 "jns Lscaleloop\n" |
638 | 661 |
639 "andl $0x1,0x34(%esp)\n" | 662 "andl $0x1,0x34(%esp)\n" |
640 "je Lscaledone\n" | 663 "je Lscaledone\n" |
641 | 664 |
642 "mov %ebx,%eax\n" | 665 "mov %ebx,%eax\n" |
643 "sar $0x11,%eax\n" | 666 "sar $0x11,%eax\n" |
(...skipping 121 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
765 "imul %eax, %ecx \n" | 788 "imul %eax, %ecx \n" |
766 "addl %esi, %ecx \n" | 789 "addl %esi, %ecx \n" |
767 "shrl $16, %ecx \n" | 790 "shrl $16, %ecx \n" |
768 "movq (%edi,%ecx,8),%mm2\n" | 791 "movq (%edi,%ecx,8),%mm2\n" |
769 | 792 |
770 "paddsw %mm0,%mm1\n" | 793 "paddsw %mm0,%mm1\n" |
771 "paddsw %mm0,%mm2\n" | 794 "paddsw %mm0,%mm2\n" |
772 "psraw $0x6,%mm1\n" | 795 "psraw $0x6,%mm1\n" |
773 "psraw $0x6,%mm2\n" | 796 "psraw $0x6,%mm2\n" |
774 "packuswb %mm2,%mm1\n" | 797 "packuswb %mm2,%mm1\n" |
| 798 #if USE_MOVNTQ |
775 "movntq %mm1,0x0(%ebp)\n" | 799 "movntq %mm1,0x0(%ebp)\n" |
| 800 #else |
| 801 "movq %mm1,0x0(%ebp)\n" |
| 802 #endif |
776 "add $0x8,%ebp\n" | 803 "add $0x8,%ebp\n" |
777 | 804 |
778 ".lscaleend:" | 805 ".lscaleend:" |
779 "cmp %ebx, 0x34(%esp)\n" | 806 "cmp %ebx, 0x34(%esp)\n" |
780 "jg .lscaleloop\n" | 807 "jg .lscaleloop\n" |
781 "popa\n" | 808 "popa\n" |
782 "ret\n" | 809 "ret\n" |
783 | 810 |
784 ".lscalelastpixel:" | 811 ".lscalelastpixel:" |
785 "paddsw %mm0, %mm1\n" | 812 "paddsw %mm0, %mm1\n" |
(...skipping 127 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
913 y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16; | 940 y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16; |
914 YuvPixel(y, u, v, rgb_buf+4); | 941 YuvPixel(y, u, v, rgb_buf+4); |
915 x += source_dx; | 942 x += source_dx; |
916 } | 943 } |
917 rgb_buf += 8; | 944 rgb_buf += 8; |
918 } | 945 } |
919 } | 946 } |
920 | 947 |
921 #endif // USE_MMX | 948 #endif // USE_MMX |
922 } // extern "C" | 949 } // extern "C" |
923 | |
OLD | NEW |