Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(334)

Side by Side Diff: source/libvpx/vp8/encoder/x86/variance_impl_mmx.asm

Issue 7671004: Update libvpx snapshot to v0.9.7-p1 (Cayuga). (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/libvpx/
Patch Set: '' Created 9 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 ; 1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ; 3 ;
4 ; Use of this source code is governed by a BSD-style license 4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source 5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found 6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may 7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree. 8 ; be found in the AUTHORS file in the root of the source tree.
9 ; 9 ;
10 10
(...skipping 825 matching lines...) Expand 10 before | Expand all | Expand 10 after
836 836
837 ; begin epilog 837 ; begin epilog
838 add rsp, 16 838 add rsp, 16
839 pop rdi 839 pop rdi
840 pop rsi 840 pop rsi
841 RESTORE_GOT 841 RESTORE_GOT
842 UNSHADOW_ARGS 842 UNSHADOW_ARGS
843 pop rbp 843 pop rbp
844 ret 844 ret
845 845
846 ;unsigned int vp8_get16x16pred_error_mmx
847 ;(
848 ; unsigned char *src_ptr,
849 ; int src_stride,
850 ; unsigned char *ref_ptr,
851 ; int ref_stride
852 ;)
853 global sym(vp8_get16x16pred_error_mmx)
854 sym(vp8_get16x16pred_error_mmx):
855 push rbp
856 mov rbp, rsp
857 SHADOW_ARGS_TO_STACK 4
858 GET_GOT rbx
859 push rsi
860 push rdi
861 sub rsp, 16
862 ; end prolog
863
864 mov rsi, arg(0) ;DWORD PTR [src_ptr]
865 mov rdi, arg(2) ;DWORD PTR [ref_ptr]
866
867 movsxd rax, DWORD PTR arg(1) ;[src_stride]
868 movsxd rdx, DWORD PTR arg(3) ;[ref_stride]
869
870 pxor mm0, mm0 ; clear xmm0 for unp ack
871 pxor mm7, mm7 ; clear xmm7 for acc umulating diffs
872
873 pxor mm6, mm6 ; clear xmm6 for acc umulating sse
874 mov rcx, 16
875
876 var16loop:
877
878 movq mm1, [rsi]
879 movq mm2, [rdi]
880
881 movq mm3, mm1
882 movq mm4, mm2
883
884 punpcklbw mm1, mm0
885 punpckhbw mm3, mm0
886
887 punpcklbw mm2, mm0
888 punpckhbw mm4, mm0
889
890 psubw mm1, mm2
891 psubw mm3, mm4
892
893 paddw mm7, mm1
894 pmaddwd mm1, mm1
895
896 paddw mm7, mm3
897 pmaddwd mm3, mm3
898
899 paddd mm6, mm1
900 paddd mm6, mm3
901
902
903 movq mm1, [rsi+8]
904 movq mm2, [rdi+8]
905
906 movq mm3, mm1
907 movq mm4, mm2
908
909 punpcklbw mm1, mm0
910 punpckhbw mm3, mm0
911
912 punpcklbw mm2, mm0
913 punpckhbw mm4, mm0
914
915 psubw mm1, mm2
916 psubw mm3, mm4
917
918 paddw mm7, mm1
919 pmaddwd mm1, mm1
920
921 paddw mm7, mm3
922 pmaddwd mm3, mm3
923
924 paddd mm6, mm1
925 paddd mm6, mm3
926
927 add rsi, rax
928 add rdi, rdx
929
930 sub rcx, 1
931 jnz var16loop
932
933
934 movq mm1, mm6
935 pxor mm6, mm6
936
937 pxor mm5, mm5
938 punpcklwd mm6, mm7
939
940 punpckhwd mm5, mm7
941 psrad mm5, 16
942
943 psrad mm6, 16
944 paddd mm6, mm5
945
946 movq mm2, mm1
947 psrlq mm1, 32
948
949 paddd mm2, mm1
950 movq mm7, mm6
951
952 psrlq mm6, 32
953 paddd mm6, mm7
954
955 movd DWORD PTR [rsp], mm6 ;Sum
956 movd DWORD PTR [rsp+4], mm2 ;SSE
957
958 ; return (SSE-((Sum*Sum)>>8));
959 movsxd rdx, dword ptr [rsp]
960 imul rdx, rdx
961 sar rdx, 8
962 movsxd rax, dword ptr [rsp + 4]
963 sub rax, rdx
964
965
966 ; begin epilog
967 add rsp, 16
968 pop rdi
969 pop rsi
970 RESTORE_GOT
971 UNSHADOW_ARGS
972 pop rbp
973 ret
974
975
976 846
977 SECTION_RODATA 847 SECTION_RODATA
978 ;short mmx_bi_rd[4] = { 64, 64, 64, 64}; 848 ;short mmx_bi_rd[4] = { 64, 64, 64, 64};
979 align 16 849 align 16
980 mmx_bi_rd: 850 mmx_bi_rd:
981 times 4 dw 64 851 times 4 dw 64
OLDNEW
« no previous file with comments | « source/libvpx/vp8/encoder/x86/temporal_filter_apply_sse2.asm ('k') | source/libvpx/vp8/encoder/x86/variance_impl_sse2.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698