source/libvpx/vp8/encoder/x86/variance_impl_mmx.asm - Issue 7671004: Update libvpx snapshot to v0.9.7-p1 (Cayuga).

Side by Side Diff: source/libvpx/vp8/encoder/x86/variance_impl_mmx.asm

Issue 7671004: Update libvpx snapshot to v0.9.7-p1 (Cayuga). (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/libvpx/

Patch Set: '' Created 9 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 ;	1 ;

2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.	2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.

3 ;	3 ;

4 ; Use of this source code is governed by a BSD-style license	4 ; Use of this source code is governed by a BSD-style license

5 ; that can be found in the LICENSE file in the root of the source	5 ; that can be found in the LICENSE file in the root of the source

6 ; tree. An additional intellectual property rights grant can be found	6 ; tree. An additional intellectual property rights grant can be found

7 ; in the file PATENTS. All contributing project authors may	7 ; in the file PATENTS. All contributing project authors may

8 ; be found in the AUTHORS file in the root of the source tree.	8 ; be found in the AUTHORS file in the root of the source tree.

9 ;	9 ;

10	10

(...skipping 825 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
836	836

837 ; begin epilog	837 ; begin epilog

838 add rsp, 16	838 add rsp, 16

839 pop rdi	839 pop rdi

840 pop rsi	840 pop rsi

841 RESTORE_GOT	841 RESTORE_GOT

842 UNSHADOW_ARGS	842 UNSHADOW_ARGS

843 pop rbp	843 pop rbp

844 ret	844 ret

845	845

846 ;unsigned int vp8_get16x16pred_error_mmx

847 ;(

848 ; unsigned char *src_ptr,

849 ; int src_stride,

850 ; unsigned char *ref_ptr,

851 ; int ref_stride

852 ;)

853 global sym(vp8_get16x16pred_error_mmx)

854 sym(vp8_get16x16pred_error_mmx):

855 push rbp

856 mov rbp, rsp

857 SHADOW_ARGS_TO_STACK 4

858 GET_GOT rbx

859 push rsi

860 push rdi

861 sub rsp, 16

862 ; end prolog

863

864 mov rsi, arg(0) ;DWORD PTR [src_ptr]

865 mov rdi, arg(2) ;DWORD PTR [ref_ptr]

866

867 movsxd rax, DWORD PTR arg(1) ;[src_stride]

868 movsxd rdx, DWORD PTR arg(3) ;[ref_stride]

869

870 pxor mm0, mm0 ; clear xmm0 for unp ack

871 pxor mm7, mm7 ; clear xmm7 for acc umulating diffs

872

873 pxor mm6, mm6 ; clear xmm6 for acc umulating sse

874 mov rcx, 16

875

876 var16loop:

877

878 movq mm1, [rsi]

879 movq mm2, [rdi]

880

881 movq mm3, mm1

882 movq mm4, mm2

883

884 punpcklbw mm1, mm0

885 punpckhbw mm3, mm0

886

887 punpcklbw mm2, mm0

888 punpckhbw mm4, mm0

889

890 psubw mm1, mm2

891 psubw mm3, mm4

892

893 paddw mm7, mm1

894 pmaddwd mm1, mm1

895

896 paddw mm7, mm3

897 pmaddwd mm3, mm3

898

899 paddd mm6, mm1

900 paddd mm6, mm3

901

902

903 movq mm1, [rsi+8]

904 movq mm2, [rdi+8]

905

906 movq mm3, mm1

907 movq mm4, mm2

908

909 punpcklbw mm1, mm0

910 punpckhbw mm3, mm0

911

912 punpcklbw mm2, mm0

913 punpckhbw mm4, mm0

914

915 psubw mm1, mm2

916 psubw mm3, mm4

917

918 paddw mm7, mm1

919 pmaddwd mm1, mm1

920

921 paddw mm7, mm3

922 pmaddwd mm3, mm3

923

924 paddd mm6, mm1

925 paddd mm6, mm3

926

927 add rsi, rax

928 add rdi, rdx

929

930 sub rcx, 1

931 jnz var16loop

932

933

934 movq mm1, mm6

935 pxor mm6, mm6

936

937 pxor mm5, mm5

938 punpcklwd mm6, mm7

939

940 punpckhwd mm5, mm7

941 psrad mm5, 16

942

943 psrad mm6, 16

944 paddd mm6, mm5

945

946 movq mm2, mm1

947 psrlq mm1, 32

948

949 paddd mm2, mm1

950 movq mm7, mm6

951

952 psrlq mm6, 32

953 paddd mm6, mm7

954

955 movd DWORD PTR [rsp], mm6 ;Sum

956 movd DWORD PTR [rsp+4], mm2 ;SSE

957

958 ; return (SSE-((Sum*Sum)>>8));

959 movsxd rdx, dword ptr [rsp]

960 imul rdx, rdx

961 sar rdx, 8

962 movsxd rax, dword ptr [rsp + 4]

963 sub rax, rdx

964

965

966 ; begin epilog

967 add rsp, 16

968 pop rdi

969 pop rsi

970 RESTORE_GOT

971 UNSHADOW_ARGS

972 pop rbp

973 ret

974

975

976	846

977 SECTION_RODATA	847 SECTION_RODATA

978 ;short mmx_bi_rd[4] = { 64, 64, 64, 64};	848 ;short mmx_bi_rd[4] = { 64, 64, 64, 64};

979 align 16	849 align 16

980 mmx_bi_rd:	850 mmx_bi_rd:

981 times 4 dw 64	851 times 4 dw 64

OLD	NEW