Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(697)

Side by Side Diff: source/libvpx/vp9/common/x86/vp9_subpixel_8t_ssse3.asm

Issue 812033011: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 ; 1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ; 3 ;
4 ; Use of this source code is governed by a BSD-style license 4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source 5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found 6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may 7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree. 8 ; be found in the AUTHORS file in the root of the source tree.
9 ; 9 ;
10 10
11 11
12 %include "vpx_ports/x86_abi_support.asm" 12 %include "vpx_ports/x86_abi_support.asm"
13 13
14 %macro VERTx4 1 14 %macro VERTx4 1
15 mov rdx, arg(5) ;filter ptr 15 mov rdx, arg(5) ;filter ptr
16 mov rsi, arg(0) ;src_ptr 16 mov rsi, arg(0) ;src_ptr
17 mov rdi, arg(2) ;output_ptr 17 mov rdi, arg(2) ;output_ptr
18 mov rcx, 0x0400040 18 mov rcx, 0x0400040
19 19
20 movdqa xmm4, [rdx] ;load filters 20 movdqa xmm4, [rdx] ;load filters
21 movd xmm5, rcx 21 movq xmm5, rcx
22 packsswb xmm4, xmm4 22 packsswb xmm4, xmm4
23 pshuflw xmm0, xmm4, 0b ;k0_k1 23 pshuflw xmm0, xmm4, 0b ;k0_k1
24 pshuflw xmm1, xmm4, 01010101b ;k2_k3 24 pshuflw xmm1, xmm4, 01010101b ;k2_k3
25 pshuflw xmm2, xmm4, 10101010b ;k4_k5 25 pshuflw xmm2, xmm4, 10101010b ;k4_k5
26 pshuflw xmm3, xmm4, 11111111b ;k6_k7 26 pshuflw xmm3, xmm4, 11111111b ;k6_k7
27 27
28 punpcklqdq xmm0, xmm0 28 punpcklqdq xmm0, xmm0
29 punpcklqdq xmm1, xmm1 29 punpcklqdq xmm1, xmm1
30 punpcklqdq xmm2, xmm2 30 punpcklqdq xmm2, xmm2
31 punpcklqdq xmm3, xmm3 31 punpcklqdq xmm3, xmm3
(...skipping 622 matching lines...) Expand 10 before | Expand all | Expand 10 after
654 packuswb %1, %1 654 packuswb %1, %1
655 %endm 655 %endm
656 656
657 %macro HORIZx8 1 657 %macro HORIZx8 1
658 mov rdx, arg(5) ;filter ptr 658 mov rdx, arg(5) ;filter ptr
659 mov rsi, arg(0) ;src_ptr 659 mov rsi, arg(0) ;src_ptr
660 mov rdi, arg(2) ;output_ptr 660 mov rdi, arg(2) ;output_ptr
661 mov rcx, 0x0400040 661 mov rcx, 0x0400040
662 662
663 movdqa xmm4, [rdx] ;load filters 663 movdqa xmm4, [rdx] ;load filters
664 movd xmm5, rcx 664 movq xmm5, rcx
665 packsswb xmm4, xmm4 665 packsswb xmm4, xmm4
666 pshuflw xmm0, xmm4, 0b ;k0_k1 666 pshuflw xmm0, xmm4, 0b ;k0_k1
667 pshuflw xmm1, xmm4, 01010101b ;k2_k3 667 pshuflw xmm1, xmm4, 01010101b ;k2_k3
668 pshuflw xmm2, xmm4, 10101010b ;k4_k5 668 pshuflw xmm2, xmm4, 10101010b ;k4_k5
669 pshuflw xmm3, xmm4, 11111111b ;k6_k7 669 pshuflw xmm3, xmm4, 11111111b ;k6_k7
670 670
671 punpcklqdq xmm0, xmm0 671 punpcklqdq xmm0, xmm0
672 punpcklqdq xmm1, xmm1 672 punpcklqdq xmm1, xmm1
673 punpcklqdq xmm2, xmm2 673 punpcklqdq xmm2, xmm2
674 punpcklqdq xmm3, xmm3 674 punpcklqdq xmm3, xmm3
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after
758 758
759 movsxd rax, dword ptr arg(1) ;src_pixels_per_line 759 movsxd rax, dword ptr arg(1) ;src_pixels_per_line
760 movsxd rdx, dword ptr arg(3) ;output_pitch 760 movsxd rdx, dword ptr arg(3) ;output_pitch
761 movsxd rcx, dword ptr arg(4) ;output_height 761 movsxd rcx, dword ptr arg(4) ;output_height
762 762
763 .loop: 763 .loop:
764 prefetcht0 [rsi + 2 * rax -3] 764 prefetcht0 [rsi + 2 * rax -3]
765 765
766 movq xmm0, [rsi - 3] ;load src data 766 movq xmm0, [rsi - 3] ;load src data
767 movq xmm4, [rsi + 5] 767 movq xmm4, [rsi + 5]
768 movq xmm7, [rsi + 13] 768 movq xmm6, [rsi + 13]
769 punpcklqdq xmm0, xmm4 769 punpcklqdq xmm0, xmm4
770 punpcklqdq xmm4, xmm7 770 punpcklqdq xmm4, xmm6
771 771
772 movdqa xmm7, xmm0
773
774 punpcklbw xmm7, xmm7
775 punpckhbw xmm0, xmm0
772 movdqa xmm1, xmm0 776 movdqa xmm1, xmm0
773 movdqa xmm2, xmm0 777 movdqa xmm2, xmm0
774 movdqa xmm3, xmm0 778 movdqa xmm3, xmm0
779
780 palignr xmm0, xmm7, 1
781 palignr xmm1, xmm7, 5
782 pmaddubsw xmm0, k0k1
783 palignr xmm2, xmm7, 9
784 pmaddubsw xmm1, k2k3
785 palignr xmm3, xmm7, 13
786
787 pmaddubsw xmm2, k4k5
788 pmaddubsw xmm3, k6k7
789 paddsw xmm0, xmm3
790
791 movdqa xmm3, xmm4
792 punpcklbw xmm3, xmm3
793 punpckhbw xmm4, xmm4
794
775 movdqa xmm5, xmm4 795 movdqa xmm5, xmm4
776 movdqa xmm6, xmm4 796 movdqa xmm6, xmm4
777 movdqa xmm7, xmm4 797 movdqa xmm7, xmm4
778 798
779 pshufb xmm0, [GLOBAL(shuf_t0t1)] 799 palignr xmm4, xmm3, 1
780 pshufb xmm1, [GLOBAL(shuf_t2t3)] 800 palignr xmm5, xmm3, 5
781 pshufb xmm2, [GLOBAL(shuf_t4t5)] 801 palignr xmm6, xmm3, 9
782 pshufb xmm3, [GLOBAL(shuf_t6t7)] 802 palignr xmm7, xmm3, 13
783 pshufb xmm4, [GLOBAL(shuf_t0t1)]
784 pshufb xmm5, [GLOBAL(shuf_t2t3)]
785 pshufb xmm6, [GLOBAL(shuf_t4t5)]
786 pshufb xmm7, [GLOBAL(shuf_t6t7)]
787 803
788 pmaddubsw xmm0, k0k1 804 movdqa xmm3, xmm1
789 pmaddubsw xmm1, k2k3
790 pmaddubsw xmm2, k4k5
791 pmaddubsw xmm3, k6k7
792 pmaddubsw xmm4, k0k1 805 pmaddubsw xmm4, k0k1
806 pmaxsw xmm1, xmm2
793 pmaddubsw xmm5, k2k3 807 pmaddubsw xmm5, k2k3
808 pminsw xmm2, xmm3
794 pmaddubsw xmm6, k4k5 809 pmaddubsw xmm6, k4k5
810 paddsw xmm0, xmm2
795 pmaddubsw xmm7, k6k7 811 pmaddubsw xmm7, k6k7
796
797 paddsw xmm0, xmm3
798 movdqa xmm3, xmm1
799 pmaxsw xmm1, xmm2
800 pminsw xmm2, xmm3
801 paddsw xmm0, xmm2
802 paddsw xmm0, xmm1 812 paddsw xmm0, xmm1
803 813
804 paddsw xmm4, xmm7 814 paddsw xmm4, xmm7
805 movdqa xmm7, xmm5 815 movdqa xmm7, xmm5
806 pmaxsw xmm5, xmm6 816 pmaxsw xmm5, xmm6
807 pminsw xmm6, xmm7 817 pminsw xmm6, xmm7
808 paddsw xmm4, xmm6 818 paddsw xmm4, xmm6
809 paddsw xmm4, xmm5 819 paddsw xmm4, xmm5
810 820
811 paddsw xmm0, krd 821 paddsw xmm0, krd
(...skipping 240 matching lines...) Expand 10 before | Expand all | Expand 10 after
1052 db 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 1062 db 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
1053 align 16 1063 align 16
1054 shuf_t2t3: 1064 shuf_t2t3:
1055 db 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10 1065 db 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10
1056 align 16 1066 align 16
1057 shuf_t4t5: 1067 shuf_t4t5:
1058 db 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12 1068 db 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12
1059 align 16 1069 align 16
1060 shuf_t6t7: 1070 shuf_t6t7:
1061 db 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14 1071 db 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14
OLDNEW
« no previous file with comments | « source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c ('k') | source/libvpx/vp9/decoder/vp9_decodeframe.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698