| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 ; | 3 ; |
| 4 ; Use of this source code is governed by a BSD-style license | 4 ; Use of this source code is governed by a BSD-style license |
| 5 ; that can be found in the LICENSE file in the root of the source | 5 ; that can be found in the LICENSE file in the root of the source |
| 6 ; tree. An additional intellectual property rights grant can be found | 6 ; tree. An additional intellectual property rights grant can be found |
| 7 ; in the file PATENTS. All contributing project authors may | 7 ; in the file PATENTS. All contributing project authors may |
| 8 ; be found in the AUTHORS file in the root of the source tree. | 8 ; be found in the AUTHORS file in the root of the source tree. |
| 9 ; | 9 ; |
| 10 | 10 |
| (...skipping 516 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 527 pop rbx | 527 pop rbx |
| 528 ; begin epilog | 528 ; begin epilog |
| 529 pop rdi | 529 pop rdi |
| 530 pop rsi | 530 pop rsi |
| 531 RESTORE_XMM | 531 RESTORE_XMM |
| 532 UNSHADOW_ARGS | 532 UNSHADOW_ARGS |
| 533 pop rbp | 533 pop rbp |
| 534 ret | 534 ret |
| 535 | 535 |
| 536 ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | 536 ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
| 537 %macro HORIZx4_ROW 2 |
| 538 movdqa %2, %1 |
| 539 pshufb %1, [GLOBAL(shuf_t0t1)] |
| 540 pshufb %2, [GLOBAL(shuf_t2t3)] |
| 541 pmaddubsw %1, xmm6 |
| 542 pmaddubsw %2, xmm7 |
| 543 |
| 544 paddsw %1, %2 |
| 545 movdqa %2, %1 |
| 546 psrldq %2, 8 |
| 547 paddsw %1, %2 |
| 548 paddsw %1, xmm5 |
| 549 psraw %1, 7 |
| 550 packuswb %1, %1 |
| 551 %endm |
| 537 | 552 |
| 538 %macro HORIZx4 1 | 553 %macro HORIZx4 1 |
| 539 mov rdx, arg(5) ;filter ptr | 554 mov rdx, arg(5) ;filter ptr |
| 540 mov rsi, arg(0) ;src_ptr | 555 mov rsi, arg(0) ;src_ptr |
| 541 mov rdi, arg(2) ;output_ptr | 556 mov rdi, arg(2) ;output_ptr |
| 542 mov rcx, 0x0400040 | 557 mov rcx, 0x0400040 |
| 543 | 558 |
| 544 movdqa xmm4, [rdx] ;load filters | 559 movdqa xmm4, [rdx] ;load filters |
| 545 movq xmm5, rcx | 560 movq xmm5, rcx |
| 546 packsswb xmm4, xmm4 | 561 packsswb xmm4, xmm4 |
| 547 pshuflw xmm0, xmm4, 0b ;k0_k1 | 562 pshuflw xmm6, xmm4, 0b ;k0_k1 |
| 548 pshuflw xmm1, xmm4, 01010101b ;k2_k3 | 563 pshufhw xmm6, xmm6, 10101010b ;k0_k1_k4_k5 |
| 549 pshuflw xmm2, xmm4, 10101010b ;k4_k5 | 564 pshuflw xmm7, xmm4, 01010101b ;k2_k3 |
| 550 pshuflw xmm3, xmm4, 11111111b ;k6_k7 | 565 pshufhw xmm7, xmm7, 11111111b ;k2_k3_k6_k7 |
| 551 | 566 pshufd xmm5, xmm5, 0 ;rounding |
| 552 punpcklqdq xmm0, xmm0 | |
| 553 punpcklqdq xmm1, xmm1 | |
| 554 punpcklqdq xmm2, xmm2 | |
| 555 punpcklqdq xmm3, xmm3 | |
| 556 | |
| 557 movdqa k0k1, xmm0 | |
| 558 movdqa k2k3, xmm1 | |
| 559 pshufd xmm5, xmm5, 0 | |
| 560 movdqa k4k5, xmm2 | |
| 561 movdqa k6k7, xmm3 | |
| 562 movdqa krd, xmm5 | |
| 563 | 567 |
| 564 movsxd rax, dword ptr arg(1) ;src_pixels_per_line | 568 movsxd rax, dword ptr arg(1) ;src_pixels_per_line |
| 565 movsxd rdx, dword ptr arg(3) ;output_pitch | 569 movsxd rdx, dword ptr arg(3) ;output_pitch |
| 566 movsxd rcx, dword ptr arg(4) ;output_height | 570 movsxd rcx, dword ptr arg(4) ;output_height |
| 571 shr rcx, 1 |
| 572 .loop: |
| 573 ;Do two rows once |
| 574 movq xmm0, [rsi - 3] ;load src |
| 575 movq xmm1, [rsi + 5] |
| 576 movq xmm2, [rsi + rax - 3] |
| 577 movq xmm3, [rsi + rax + 5] |
| 578 punpcklqdq xmm0, xmm1 |
| 579 punpcklqdq xmm2, xmm3 |
| 567 | 580 |
| 568 .loop: | 581 HORIZx4_ROW xmm0, xmm1 |
| 569 movq xmm0, [rsi - 3] ; -3 -2 -1 0 1 2 3 4 | 582 HORIZx4_ROW xmm2, xmm3 |
| 583 %if %1 |
| 584 movd xmm1, [rdi] |
| 585 pavgb xmm0, xmm1 |
| 586 movd xmm3, [rdi + rdx] |
| 587 pavgb xmm2, xmm3 |
| 588 %endif |
| 589 movd [rdi], xmm0 |
| 590 movd [rdi +rdx], xmm2 |
| 570 | 591 |
| 571 movq xmm3, [rsi + 5] ; 5 6 7 8 9 10 11 12 | 592 lea rsi, [rsi + rax] |
| 572 punpcklqdq xmm0, xmm3 | 593 prefetcht0 [rsi + 4 * rax - 3] |
| 594 lea rsi, [rsi + rax] |
| 595 lea rdi, [rdi + 2 * rdx] |
| 596 prefetcht0 [rsi + 2 * rax - 3] |
| 573 | 597 |
| 574 movdqa xmm1, xmm0 | 598 dec rcx |
| 575 pshufb xmm0, [GLOBAL(shuf_t0t1)] | 599 jnz .loop |
| 576 pmaddubsw xmm0, k0k1 | |
| 577 | 600 |
| 578 movdqa xmm2, xmm1 | 601 ; Do last row if output_height is odd |
| 579 pshufb xmm1, [GLOBAL(shuf_t2t3)] | 602 movsxd rcx, dword ptr arg(4) ;output_height |
| 580 pmaddubsw xmm1, k2k3 | 603 and rcx, 1 |
| 604 je .done |
| 581 | 605 |
| 582 movdqa xmm4, xmm2 | 606 movq xmm0, [rsi - 3] ; load src |
| 583 pshufb xmm2, [GLOBAL(shuf_t4t5)] | 607 movq xmm1, [rsi + 5] |
| 584 pmaddubsw xmm2, k4k5 | 608 punpcklqdq xmm0, xmm1 |
| 585 | 609 |
| 586 pshufb xmm4, [GLOBAL(shuf_t6t7)] | 610 HORIZx4_ROW xmm0, xmm1 |
| 587 pmaddubsw xmm4, k6k7 | |
| 588 | |
| 589 paddsw xmm0, xmm1 | |
| 590 paddsw xmm0, xmm4 | |
| 591 paddsw xmm0, xmm2 | |
| 592 paddsw xmm0, krd | |
| 593 psraw xmm0, 7 | |
| 594 packuswb xmm0, xmm0 | |
| 595 %if %1 | 611 %if %1 |
| 596 movd xmm1, [rdi] | 612 movd xmm1, [rdi] |
| 597 pavgb xmm0, xmm1 | 613 pavgb xmm0, xmm1 |
| 598 %endif | 614 %endif |
| 599 lea rsi, [rsi + rax] | |
| 600 movd [rdi], xmm0 | 615 movd [rdi], xmm0 |
| 616 .done |
| 617 %endm |
| 601 | 618 |
| 602 lea rdi, [rdi + rdx] | 619 %macro HORIZx8_ROW 4 |
| 603 dec rcx | 620 movdqa %2, %1 |
| 604 jnz .loop | 621 movdqa %3, %1 |
| 622 movdqa %4, %1 |
| 623 |
| 624 pshufb %1, [GLOBAL(shuf_t0t1)] |
| 625 pshufb %2, [GLOBAL(shuf_t2t3)] |
| 626 pshufb %3, [GLOBAL(shuf_t4t5)] |
| 627 pshufb %4, [GLOBAL(shuf_t6t7)] |
| 628 |
| 629 pmaddubsw %1, k0k1 |
| 630 pmaddubsw %2, k2k3 |
| 631 pmaddubsw %3, k4k5 |
| 632 pmaddubsw %4, k6k7 |
| 633 |
| 634 paddsw %1, %2 |
| 635 paddsw %1, %4 |
| 636 paddsw %1, %3 |
| 637 paddsw %1, krd |
| 638 psraw %1, 7 |
| 639 packuswb %1, %1 |
| 605 %endm | 640 %endm |
| 606 | 641 |
| 607 %macro HORIZx8 1 | 642 %macro HORIZx8 1 |
| 608 mov rdx, arg(5) ;filter ptr | 643 mov rdx, arg(5) ;filter ptr |
| 609 mov rsi, arg(0) ;src_ptr | 644 mov rsi, arg(0) ;src_ptr |
| 610 mov rdi, arg(2) ;output_ptr | 645 mov rdi, arg(2) ;output_ptr |
| 611 mov rcx, 0x0400040 | 646 mov rcx, 0x0400040 |
| 612 | 647 |
| 613 movdqa xmm4, [rdx] ;load filters | 648 movdqa xmm4, [rdx] ;load filters |
| 614 movd xmm5, rcx | 649 movd xmm5, rcx |
| (...skipping 11 matching lines...) Expand all Loading... |
| 626 movdqa k0k1, xmm0 | 661 movdqa k0k1, xmm0 |
| 627 movdqa k2k3, xmm1 | 662 movdqa k2k3, xmm1 |
| 628 pshufd xmm5, xmm5, 0 | 663 pshufd xmm5, xmm5, 0 |
| 629 movdqa k4k5, xmm2 | 664 movdqa k4k5, xmm2 |
| 630 movdqa k6k7, xmm3 | 665 movdqa k6k7, xmm3 |
| 631 movdqa krd, xmm5 | 666 movdqa krd, xmm5 |
| 632 | 667 |
| 633 movsxd rax, dword ptr arg(1) ;src_pixels_per_line | 668 movsxd rax, dword ptr arg(1) ;src_pixels_per_line |
| 634 movsxd rdx, dword ptr arg(3) ;output_pitch | 669 movsxd rdx, dword ptr arg(3) ;output_pitch |
| 635 movsxd rcx, dword ptr arg(4) ;output_height | 670 movsxd rcx, dword ptr arg(4) ;output_height |
| 671 shr rcx, 1 |
| 636 | 672 |
| 637 .loop: | 673 .loop: |
| 638 movq xmm0, [rsi - 3] ; -3 -2 -1 0 1 2 3 4 | 674 movq xmm0, [rsi - 3] ;load src |
| 675 movq xmm3, [rsi + 5] |
| 676 movq xmm4, [rsi + rax - 3] |
| 677 movq xmm7, [rsi + rax + 5] |
| 678 punpcklqdq xmm0, xmm3 |
| 679 punpcklqdq xmm4, xmm7 |
| 639 | 680 |
| 640 movq xmm3, [rsi + 5] ; 5 6 7 8 9 10 11 12 | 681 HORIZx8_ROW xmm0, xmm1, xmm2, xmm3 |
| 682 HORIZx8_ROW xmm4, xmm5, xmm6, xmm7 |
| 683 %if %1 |
| 684 movq xmm1, [rdi] |
| 685 movq xmm2, [rdi + rdx] |
| 686 pavgb xmm0, xmm1 |
| 687 pavgb xmm4, xmm2 |
| 688 %endif |
| 689 movq [rdi], xmm0 |
| 690 movq [rdi + rdx], xmm4 |
| 691 |
| 692 lea rsi, [rsi + rax] |
| 693 prefetcht0 [rsi + 4 * rax - 3] |
| 694 lea rsi, [rsi + rax] |
| 695 lea rdi, [rdi + 2 * rdx] |
| 696 prefetcht0 [rsi + 2 * rax - 3] |
| 697 dec rcx |
| 698 jnz .loop |
| 699 |
| 700 ;Do last row if output_height is odd |
| 701 movsxd rcx, dword ptr arg(4) ;output_height |
| 702 and rcx, 1 |
| 703 je .done |
| 704 |
| 705 movq xmm0, [rsi - 3] |
| 706 movq xmm3, [rsi + 5] |
| 641 punpcklqdq xmm0, xmm3 | 707 punpcklqdq xmm0, xmm3 |
| 642 | 708 |
| 643 movdqa xmm1, xmm0 | 709 HORIZx8_ROW xmm0, xmm1, xmm2, xmm3 |
| 644 pshufb xmm0, [GLOBAL(shuf_t0t1)] | |
| 645 pmaddubsw xmm0, k0k1 | |
| 646 | |
| 647 movdqa xmm2, xmm1 | |
| 648 pshufb xmm1, [GLOBAL(shuf_t2t3)] | |
| 649 pmaddubsw xmm1, k2k3 | |
| 650 | |
| 651 movdqa xmm4, xmm2 | |
| 652 pshufb xmm2, [GLOBAL(shuf_t4t5)] | |
| 653 pmaddubsw xmm2, k4k5 | |
| 654 | |
| 655 pshufb xmm4, [GLOBAL(shuf_t6t7)] | |
| 656 pmaddubsw xmm4, k6k7 | |
| 657 | |
| 658 paddsw xmm0, xmm1 | |
| 659 paddsw xmm0, xmm4 | |
| 660 paddsw xmm0, xmm2 | |
| 661 paddsw xmm0, krd | |
| 662 psraw xmm0, 7 | |
| 663 packuswb xmm0, xmm0 | |
| 664 %if %1 | 710 %if %1 |
| 665 movq xmm1, [rdi] | 711 movq xmm1, [rdi] |
| 666 pavgb xmm0, xmm1 | 712 pavgb xmm0, xmm1 |
| 667 %endif | 713 %endif |
| 668 | |
| 669 lea rsi, [rsi + rax] | |
| 670 movq [rdi], xmm0 | 714 movq [rdi], xmm0 |
| 671 | 715 .done |
| 672 lea rdi, [rdi + rdx] | |
| 673 dec rcx | |
| 674 jnz .loop | |
| 675 %endm | 716 %endm |
| 676 | 717 |
| 677 %macro HORIZx16 1 | 718 %macro HORIZx16 1 |
| 678 mov rdx, arg(5) ;filter ptr | 719 mov rdx, arg(5) ;filter ptr |
| 679 mov rsi, arg(0) ;src_ptr | 720 mov rsi, arg(0) ;src_ptr |
| 680 mov rdi, arg(2) ;output_ptr | 721 mov rdi, arg(2) ;output_ptr |
| 681 mov rcx, 0x0400040 | 722 mov rcx, 0x0400040 |
| 682 | 723 |
| 683 movdqa xmm4, [rdx] ;load filters | 724 movdqa xmm4, [rdx] ;load filters |
| 684 movq xmm5, rcx | 725 movq xmm5, rcx |
| (...skipping 13 matching lines...) Expand all Loading... |
| 698 pshufd xmm5, xmm5, 0 | 739 pshufd xmm5, xmm5, 0 |
| 699 movdqa k4k5, xmm2 | 740 movdqa k4k5, xmm2 |
| 700 movdqa k6k7, xmm3 | 741 movdqa k6k7, xmm3 |
| 701 movdqa krd, xmm5 | 742 movdqa krd, xmm5 |
| 702 | 743 |
| 703 movsxd rax, dword ptr arg(1) ;src_pixels_per_line | 744 movsxd rax, dword ptr arg(1) ;src_pixels_per_line |
| 704 movsxd rdx, dword ptr arg(3) ;output_pitch | 745 movsxd rdx, dword ptr arg(3) ;output_pitch |
| 705 movsxd rcx, dword ptr arg(4) ;output_height | 746 movsxd rcx, dword ptr arg(4) ;output_height |
| 706 | 747 |
| 707 .loop: | 748 .loop: |
| 708 movq xmm0, [rsi - 3] ; -3 -2 -1 0 1 2 3 4 | 749 prefetcht0 [rsi + 2 * rax -3] |
| 709 | 750 |
| 710 movq xmm3, [rsi + 5] ; 5 6 7 8 9 10 11 12 | 751 movq xmm0, [rsi - 3] ;load src data |
| 711 punpcklqdq xmm0, xmm3 | 752 movq xmm4, [rsi + 5] |
| 753 movq xmm7, [rsi + 13] |
| 754 punpcklqdq xmm0, xmm4 |
| 755 punpcklqdq xmm4, xmm7 |
| 712 | 756 |
| 713 movdqa xmm1, xmm0 | 757 movdqa xmm1, xmm0 |
| 758 movdqa xmm2, xmm0 |
| 759 movdqa xmm3, xmm0 |
| 760 movdqa xmm5, xmm4 |
| 761 movdqa xmm6, xmm4 |
| 762 movdqa xmm7, xmm4 |
| 763 |
| 714 pshufb xmm0, [GLOBAL(shuf_t0t1)] | 764 pshufb xmm0, [GLOBAL(shuf_t0t1)] |
| 765 pshufb xmm1, [GLOBAL(shuf_t2t3)] |
| 766 pshufb xmm2, [GLOBAL(shuf_t4t5)] |
| 767 pshufb xmm3, [GLOBAL(shuf_t6t7)] |
| 768 pshufb xmm4, [GLOBAL(shuf_t0t1)] |
| 769 pshufb xmm5, [GLOBAL(shuf_t2t3)] |
| 770 pshufb xmm6, [GLOBAL(shuf_t4t5)] |
| 771 pshufb xmm7, [GLOBAL(shuf_t6t7)] |
| 772 |
| 715 pmaddubsw xmm0, k0k1 | 773 pmaddubsw xmm0, k0k1 |
| 716 | |
| 717 movdqa xmm2, xmm1 | |
| 718 pshufb xmm1, [GLOBAL(shuf_t2t3)] | |
| 719 pmaddubsw xmm1, k2k3 | 774 pmaddubsw xmm1, k2k3 |
| 720 | |
| 721 movdqa xmm4, xmm2 | |
| 722 pshufb xmm2, [GLOBAL(shuf_t4t5)] | |
| 723 pmaddubsw xmm2, k4k5 | 775 pmaddubsw xmm2, k4k5 |
| 724 | 776 pmaddubsw xmm3, k6k7 |
| 725 pshufb xmm4, [GLOBAL(shuf_t6t7)] | 777 pmaddubsw xmm4, k0k1 |
| 726 pmaddubsw xmm4, k6k7 | 778 pmaddubsw xmm5, k2k3 |
| 779 pmaddubsw xmm6, k4k5 |
| 780 pmaddubsw xmm7, k6k7 |
| 727 | 781 |
| 728 paddsw xmm0, xmm1 | 782 paddsw xmm0, xmm1 |
| 729 paddsw xmm0, xmm4 | 783 paddsw xmm0, xmm3 |
| 730 paddsw xmm0, xmm2 | 784 paddsw xmm0, xmm2 |
| 785 paddsw xmm4, xmm5 |
| 786 paddsw xmm4, xmm7 |
| 787 paddsw xmm4, xmm6 |
| 788 |
| 731 paddsw xmm0, krd | 789 paddsw xmm0, krd |
| 790 paddsw xmm4, krd |
| 732 psraw xmm0, 7 | 791 psraw xmm0, 7 |
| 792 psraw xmm4, 7 |
| 733 packuswb xmm0, xmm0 | 793 packuswb xmm0, xmm0 |
| 734 | 794 packuswb xmm4, xmm4 |
| 735 | 795 punpcklqdq xmm0, xmm4 |
| 736 movq xmm3, [rsi + 5] | |
| 737 movq xmm7, [rsi + 13] | |
| 738 punpcklqdq xmm3, xmm7 | |
| 739 | |
| 740 movdqa xmm1, xmm3 | |
| 741 pshufb xmm3, [GLOBAL(shuf_t0t1)] | |
| 742 pmaddubsw xmm3, k0k1 | |
| 743 | |
| 744 movdqa xmm2, xmm1 | |
| 745 pshufb xmm1, [GLOBAL(shuf_t2t3)] | |
| 746 pmaddubsw xmm1, k2k3 | |
| 747 | |
| 748 movdqa xmm4, xmm2 | |
| 749 pshufb xmm2, [GLOBAL(shuf_t4t5)] | |
| 750 pmaddubsw xmm2, k4k5 | |
| 751 | |
| 752 pshufb xmm4, [GLOBAL(shuf_t6t7)] | |
| 753 pmaddubsw xmm4, k6k7 | |
| 754 | |
| 755 paddsw xmm3, xmm1 | |
| 756 paddsw xmm3, xmm4 | |
| 757 paddsw xmm3, xmm2 | |
| 758 paddsw xmm3, krd | |
| 759 psraw xmm3, 7 | |
| 760 packuswb xmm3, xmm3 | |
| 761 punpcklqdq xmm0, xmm3 | |
| 762 %if %1 | 796 %if %1 |
| 763 movdqa xmm1, [rdi] | 797 movdqa xmm1, [rdi] |
| 764 pavgb xmm0, xmm1 | 798 pavgb xmm0, xmm1 |
| 765 %endif | 799 %endif |
| 766 | 800 |
| 767 lea rsi, [rsi + rax] | 801 lea rsi, [rsi + rax] |
| 768 movdqa [rdi], xmm0 | 802 movdqa [rdi], xmm0 |
| 769 | 803 |
| 770 lea rdi, [rdi + rdx] | 804 lea rdi, [rdi + rdx] |
| 771 dec rcx | 805 dec rcx |
| (...skipping 13 matching lines...) Expand all Loading... |
| 785 sym(vp9_filter_block1d4_h8_ssse3): | 819 sym(vp9_filter_block1d4_h8_ssse3): |
| 786 push rbp | 820 push rbp |
| 787 mov rbp, rsp | 821 mov rbp, rsp |
| 788 SHADOW_ARGS_TO_STACK 6 | 822 SHADOW_ARGS_TO_STACK 6 |
| 789 SAVE_XMM 7 | 823 SAVE_XMM 7 |
| 790 GET_GOT rbx | 824 GET_GOT rbx |
| 791 push rsi | 825 push rsi |
| 792 push rdi | 826 push rdi |
| 793 ; end prolog | 827 ; end prolog |
| 794 | 828 |
| 795 ALIGN_STACK 16, rax | |
| 796 sub rsp, 16*5 | |
| 797 %define k0k1 [rsp + 16*0] | |
| 798 %define k2k3 [rsp + 16*1] | |
| 799 %define k4k5 [rsp + 16*2] | |
| 800 %define k6k7 [rsp + 16*3] | |
| 801 %define krd [rsp + 16*4] | |
| 802 | |
| 803 HORIZx4 0 | 829 HORIZx4 0 |
| 804 | 830 |
| 805 add rsp, 16*5 | |
| 806 pop rsp | |
| 807 | |
| 808 ; begin epilog | 831 ; begin epilog |
| 809 pop rdi | 832 pop rdi |
| 810 pop rsi | 833 pop rsi |
| 811 RESTORE_GOT | 834 RESTORE_GOT |
| 812 RESTORE_XMM | 835 RESTORE_XMM |
| 813 UNSHADOW_ARGS | 836 UNSHADOW_ARGS |
| 814 pop rbp | 837 pop rbp |
| 815 ret | 838 ret |
| 816 | 839 |
| 817 ;void vp9_filter_block1d8_h8_ssse3 | 840 ;void vp9_filter_block1d8_h8_ssse3 |
| (...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 902 sym(vp9_filter_block1d4_h8_avg_ssse3): | 925 sym(vp9_filter_block1d4_h8_avg_ssse3): |
| 903 push rbp | 926 push rbp |
| 904 mov rbp, rsp | 927 mov rbp, rsp |
| 905 SHADOW_ARGS_TO_STACK 6 | 928 SHADOW_ARGS_TO_STACK 6 |
| 906 SAVE_XMM 7 | 929 SAVE_XMM 7 |
| 907 GET_GOT rbx | 930 GET_GOT rbx |
| 908 push rsi | 931 push rsi |
| 909 push rdi | 932 push rdi |
| 910 ; end prolog | 933 ; end prolog |
| 911 | 934 |
| 912 ALIGN_STACK 16, rax | |
| 913 sub rsp, 16*5 | |
| 914 %define k0k1 [rsp + 16*0] | |
| 915 %define k2k3 [rsp + 16*1] | |
| 916 %define k4k5 [rsp + 16*2] | |
| 917 %define k6k7 [rsp + 16*3] | |
| 918 %define krd [rsp + 16*4] | |
| 919 | |
| 920 HORIZx4 1 | 935 HORIZx4 1 |
| 921 | 936 |
| 922 add rsp, 16*5 | |
| 923 pop rsp | |
| 924 | |
| 925 ; begin epilog | 937 ; begin epilog |
| 926 pop rdi | 938 pop rdi |
| 927 pop rsi | 939 pop rsi |
| 928 RESTORE_GOT | 940 RESTORE_GOT |
| 929 RESTORE_XMM | 941 RESTORE_XMM |
| 930 UNSHADOW_ARGS | 942 UNSHADOW_ARGS |
| 931 pop rbp | 943 pop rbp |
| 932 ret | 944 ret |
| 933 | 945 |
| 934 global sym(vp9_filter_block1d8_h8_avg_ssse3) PRIVATE | 946 global sym(vp9_filter_block1d8_h8_avg_ssse3) PRIVATE |
| (...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1002 db 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 | 1014 db 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 |
| 1003 align 16 | 1015 align 16 |
| 1004 shuf_t2t3: | 1016 shuf_t2t3: |
| 1005 db 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10 | 1017 db 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10 |
| 1006 align 16 | 1018 align 16 |
| 1007 shuf_t4t5: | 1019 shuf_t4t5: |
| 1008 db 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12 | 1020 db 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12 |
| 1009 align 16 | 1021 align 16 |
| 1010 shuf_t6t7: | 1022 shuf_t6t7: |
| 1011 db 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14 | 1023 db 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14 |
| OLD | NEW |