| OLD | NEW | 
|---|
| 1 ; | 1 ; | 
| 2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 
| 3 ; | 3 ; | 
| 4 ;  Use of this source code is governed by a BSD-style license | 4 ;  Use of this source code is governed by a BSD-style license | 
| 5 ;  that can be found in the LICENSE file in the root of the source | 5 ;  that can be found in the LICENSE file in the root of the source | 
| 6 ;  tree. An additional intellectual property rights grant can be found | 6 ;  tree. An additional intellectual property rights grant can be found | 
| 7 ;  in the file PATENTS.  All contributing project authors may | 7 ;  in the file PATENTS.  All contributing project authors may | 
| 8 ;  be found in the AUTHORS file in the root of the source tree. | 8 ;  be found in the AUTHORS file in the root of the source tree. | 
| 9 ; | 9 ; | 
| 10 | 10 | 
| (...skipping 825 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 836 | 836 | 
| 837     ; begin epilog | 837     ; begin epilog | 
| 838     add rsp, 16 | 838     add rsp, 16 | 
| 839     pop rdi | 839     pop rdi | 
| 840     pop rsi | 840     pop rsi | 
| 841     RESTORE_GOT | 841     RESTORE_GOT | 
| 842     UNSHADOW_ARGS | 842     UNSHADOW_ARGS | 
| 843     pop         rbp | 843     pop         rbp | 
| 844     ret | 844     ret | 
| 845 | 845 | 
| 846 ;unsigned int vp8_get16x16pred_error_mmx |  | 
| 847 ;( |  | 
| 848 ;    unsigned char *src_ptr, |  | 
| 849 ;    int src_stride, |  | 
| 850 ;    unsigned char *ref_ptr, |  | 
| 851 ;    int ref_stride |  | 
| 852 ;) |  | 
| 853 global sym(vp8_get16x16pred_error_mmx) |  | 
| 854 sym(vp8_get16x16pred_error_mmx): |  | 
| 855     push        rbp |  | 
| 856     mov         rbp, rsp |  | 
| 857     SHADOW_ARGS_TO_STACK 4 |  | 
| 858     GET_GOT     rbx |  | 
| 859     push rsi |  | 
| 860     push rdi |  | 
| 861     sub         rsp, 16 |  | 
| 862     ; end prolog |  | 
| 863 |  | 
| 864         mov         rsi,            arg(0) ;DWORD PTR [src_ptr] |  | 
| 865         mov         rdi,            arg(2) ;DWORD PTR [ref_ptr] |  | 
| 866 |  | 
| 867         movsxd      rax,            DWORD PTR arg(1) ;[src_stride] |  | 
| 868         movsxd      rdx,            DWORD PTR arg(3) ;[ref_stride] |  | 
| 869 |  | 
| 870         pxor        mm0,            mm0                     ; clear xmm0 for unp
     ack |  | 
| 871         pxor        mm7,            mm7                     ; clear xmm7 for acc
     umulating diffs |  | 
| 872 |  | 
| 873         pxor        mm6,            mm6                     ; clear xmm6 for acc
     umulating sse |  | 
| 874         mov         rcx,            16 |  | 
| 875 |  | 
| 876 var16loop: |  | 
| 877 |  | 
| 878         movq        mm1,            [rsi] |  | 
| 879         movq        mm2,            [rdi] |  | 
| 880 |  | 
| 881         movq        mm3,            mm1 |  | 
| 882         movq        mm4,            mm2 |  | 
| 883 |  | 
| 884         punpcklbw   mm1,            mm0 |  | 
| 885         punpckhbw   mm3,            mm0 |  | 
| 886 |  | 
| 887         punpcklbw   mm2,            mm0 |  | 
| 888         punpckhbw   mm4,            mm0 |  | 
| 889 |  | 
| 890         psubw       mm1,            mm2 |  | 
| 891         psubw       mm3,            mm4 |  | 
| 892 |  | 
| 893         paddw       mm7,            mm1 |  | 
| 894         pmaddwd     mm1,            mm1 |  | 
| 895 |  | 
| 896         paddw       mm7,            mm3 |  | 
| 897         pmaddwd     mm3,            mm3 |  | 
| 898 |  | 
| 899         paddd       mm6,            mm1 |  | 
| 900         paddd       mm6,            mm3 |  | 
| 901 |  | 
| 902 |  | 
| 903         movq        mm1,            [rsi+8] |  | 
| 904         movq        mm2,            [rdi+8] |  | 
| 905 |  | 
| 906         movq        mm3,            mm1 |  | 
| 907         movq        mm4,            mm2 |  | 
| 908 |  | 
| 909         punpcklbw   mm1,            mm0 |  | 
| 910         punpckhbw   mm3,            mm0 |  | 
| 911 |  | 
| 912         punpcklbw   mm2,            mm0 |  | 
| 913         punpckhbw   mm4,            mm0 |  | 
| 914 |  | 
| 915         psubw       mm1,            mm2 |  | 
| 916         psubw       mm3,            mm4 |  | 
| 917 |  | 
| 918         paddw       mm7,            mm1 |  | 
| 919         pmaddwd     mm1,            mm1 |  | 
| 920 |  | 
| 921         paddw       mm7,            mm3 |  | 
| 922         pmaddwd     mm3,            mm3 |  | 
| 923 |  | 
| 924         paddd       mm6,            mm1 |  | 
| 925         paddd       mm6,            mm3 |  | 
| 926 |  | 
| 927         add         rsi,            rax |  | 
| 928         add         rdi,            rdx |  | 
| 929 |  | 
| 930         sub         rcx,            1 |  | 
| 931         jnz         var16loop |  | 
| 932 |  | 
| 933 |  | 
| 934         movq        mm1,            mm6 |  | 
| 935         pxor        mm6,            mm6 |  | 
| 936 |  | 
| 937         pxor        mm5,            mm5 |  | 
| 938         punpcklwd   mm6,            mm7 |  | 
| 939 |  | 
| 940         punpckhwd   mm5,            mm7 |  | 
| 941         psrad       mm5,            16 |  | 
| 942 |  | 
| 943         psrad       mm6,            16 |  | 
| 944         paddd       mm6,            mm5 |  | 
| 945 |  | 
| 946         movq        mm2,            mm1 |  | 
| 947         psrlq       mm1,            32 |  | 
| 948 |  | 
| 949         paddd       mm2,            mm1 |  | 
| 950         movq        mm7,            mm6 |  | 
| 951 |  | 
| 952         psrlq       mm6,            32 |  | 
| 953         paddd       mm6,            mm7 |  | 
| 954 |  | 
| 955         movd DWORD PTR [rsp],       mm6  ;Sum |  | 
| 956         movd DWORD PTR [rsp+4],     mm2  ;SSE |  | 
| 957 |  | 
| 958         ; return (SSE-((Sum*Sum)>>8)); |  | 
| 959         movsxd      rdx, dword ptr [rsp] |  | 
| 960         imul        rdx, rdx |  | 
| 961         sar         rdx, 8 |  | 
| 962         movsxd      rax, dword ptr [rsp + 4] |  | 
| 963         sub         rax, rdx |  | 
| 964 |  | 
| 965 |  | 
| 966     ; begin epilog |  | 
| 967     add rsp, 16 |  | 
| 968     pop rdi |  | 
| 969     pop rsi |  | 
| 970     RESTORE_GOT |  | 
| 971     UNSHADOW_ARGS |  | 
| 972     pop         rbp |  | 
| 973     ret |  | 
| 974 |  | 
| 975 |  | 
| 976 | 846 | 
| 977 SECTION_RODATA | 847 SECTION_RODATA | 
| 978 ;short mmx_bi_rd[4] = { 64, 64, 64, 64}; | 848 ;short mmx_bi_rd[4] = { 64, 64, 64, 64}; | 
| 979 align 16 | 849 align 16 | 
| 980 mmx_bi_rd: | 850 mmx_bi_rd: | 
| 981     times 4 dw 64 | 851     times 4 dw 64 | 
| OLD | NEW | 
|---|