| Index: source/libvpx/vp8/encoder/x86/variance_impl_sse2.asm | 
| =================================================================== | 
| --- source/libvpx/vp8/encoder/x86/variance_impl_sse2.asm	(revision 96967) | 
| +++ source/libvpx/vp8/encoder/x86/variance_impl_sse2.asm	(working copy) | 
| @@ -85,6 +85,7 @@ | 
| push        rbp | 
| mov         rbp, rsp | 
| SHADOW_ARGS_TO_STACK 6 | 
| +    SAVE_XMM 7 | 
| push rbx | 
| push rsi | 
| push rdi | 
| @@ -206,127 +207,14 @@ | 
| pop rdi | 
| pop rsi | 
| pop rbx | 
| +    RESTORE_XMM | 
| UNSHADOW_ARGS | 
| pop         rbp | 
| ret | 
|  | 
|  | 
| -;unsigned int vp8_get16x16pred_error_sse2 | 
| -;( | 
| -;   unsigned char *src_ptr, | 
| -;    int src_stride, | 
| -;    unsigned char *ref_ptr, | 
| -;    int ref_stride | 
| -;) | 
| -global sym(vp8_get16x16pred_error_sse2) | 
| -sym(vp8_get16x16pred_error_sse2): | 
| -    push        rbp | 
| -    mov         rbp, rsp | 
| -    SHADOW_ARGS_TO_STACK 4 | 
| -    GET_GOT     rbx | 
| -    push rsi | 
| -    push rdi | 
| -    sub         rsp, 16 | 
| -    ; end prolog | 
|  | 
| -        mov         rsi,            arg(0) ;[src_ptr] | 
| -        mov         rdi,            arg(2) ;[ref_ptr] | 
|  | 
| -        movsxd      rax,            DWORD PTR arg(1) ;[src_stride] | 
| -        movsxd      rdx,            DWORD PTR arg(3) ;[ref_stride] | 
| - | 
| -        pxor        xmm0,           xmm0                        ; clear xmm0 for unpack | 
| -        pxor        xmm7,           xmm7                        ; clear xmm7 for accumulating diffs | 
| - | 
| -        pxor        xmm6,           xmm6                        ; clear xmm6 for accumulating sse | 
| -        mov         rcx,            16 | 
| - | 
| -var16peloop: | 
| -        movdqu      xmm1,           XMMWORD PTR [rsi] | 
| -        movdqu      xmm2,           XMMWORD PTR [rdi] | 
| - | 
| -        movdqa      xmm3,           xmm1 | 
| -        movdqa      xmm4,           xmm2 | 
| - | 
| -        punpcklbw   xmm1,           xmm0 | 
| -        punpckhbw   xmm3,           xmm0 | 
| - | 
| -        punpcklbw   xmm2,           xmm0 | 
| -        punpckhbw   xmm4,           xmm0 | 
| - | 
| -        psubw       xmm1,           xmm2 | 
| -        psubw       xmm3,           xmm4 | 
| - | 
| -        paddw       xmm7,           xmm1 | 
| -        pmaddwd     xmm1,           xmm1 | 
| - | 
| -        paddw       xmm7,           xmm3 | 
| -        pmaddwd     xmm3,           xmm3 | 
| - | 
| -        paddd       xmm6,           xmm1 | 
| -        paddd       xmm6,           xmm3 | 
| - | 
| -        add         rsi,            rax | 
| -        add         rdi,            rdx | 
| - | 
| -        sub         rcx,            1 | 
| -        jnz         var16peloop | 
| - | 
| - | 
| -        movdqa      xmm1,           xmm6 | 
| -        pxor        xmm6,           xmm6 | 
| - | 
| -        pxor        xmm5,           xmm5 | 
| -        punpcklwd   xmm6,           xmm7 | 
| - | 
| -        punpckhwd   xmm5,           xmm7 | 
| -        psrad       xmm5,           16 | 
| - | 
| -        psrad       xmm6,           16 | 
| -        paddd       xmm6,           xmm5 | 
| - | 
| -        movdqa      xmm2,           xmm1 | 
| -        punpckldq   xmm1,           xmm0 | 
| - | 
| -        punpckhdq   xmm2,           xmm0 | 
| -        movdqa      xmm7,           xmm6 | 
| - | 
| -        paddd       xmm1,           xmm2 | 
| -        punpckldq   xmm6,           xmm0 | 
| - | 
| -        punpckhdq   xmm7,           xmm0 | 
| -        paddd       xmm6,           xmm7 | 
| - | 
| -        movdqa      xmm2,           xmm1 | 
| -        movdqa      xmm7,           xmm6 | 
| - | 
| -        psrldq      xmm1,           8 | 
| -        psrldq      xmm6,           8 | 
| - | 
| -        paddd       xmm7,           xmm6 | 
| -        paddd       xmm1,           xmm2 | 
| - | 
| -        movd DWORD PTR [rsp],       xmm7  ;Sum | 
| -        movd DWORD PTR [rsp+4],     xmm1  ;SSE | 
| - | 
| -        ; return (SSE-((Sum*Sum)>>8)); | 
| -        movsxd      rdx, dword ptr [rsp] | 
| -        imul        rdx, rdx | 
| -        sar         rdx, 8 | 
| -        movsxd      rax, dword ptr [rsp + 4] | 
| -        sub         rax, rdx | 
| - | 
| -    ; begin epilog | 
| -    add rsp, 16 | 
| -    pop rdi | 
| -    pop rsi | 
| -    RESTORE_GOT | 
| -    UNSHADOW_ARGS | 
| -    pop         rbp | 
| -    ret | 
| - | 
| - | 
| - | 
| ;unsigned int vp8_get8x8var_sse2 | 
| ;( | 
| ;    unsigned char   *  src_ptr, | 
| @@ -341,6 +229,7 @@ | 
| push        rbp | 
| mov         rbp, rsp | 
| SHADOW_ARGS_TO_STACK 6 | 
| +    SAVE_XMM 7 | 
| GET_GOT     rbx | 
| push rsi | 
| push rdi | 
| @@ -506,6 +395,7 @@ | 
| pop rdi | 
| pop rsi | 
| RESTORE_GOT | 
| +    RESTORE_XMM | 
| UNSHADOW_ARGS | 
| pop         rbp | 
| ret | 
| @@ -528,7 +418,7 @@ | 
| push        rbp | 
| mov         rbp, rsp | 
| SHADOW_ARGS_TO_STACK 9 | 
| -    SAVE_XMM | 
| +    SAVE_XMM 7 | 
| GET_GOT     rbx | 
| push rsi | 
| push rdi | 
| @@ -805,6 +695,7 @@ | 
| push        rbp | 
| mov         rbp, rsp | 
| SHADOW_ARGS_TO_STACK 7 | 
| +    SAVE_XMM 7 | 
| GET_GOT     rbx | 
| push rsi | 
| push rdi | 
| @@ -906,6 +797,7 @@ | 
| pop rdi | 
| pop rsi | 
| RESTORE_GOT | 
| +    RESTORE_XMM | 
| UNSHADOW_ARGS | 
| pop         rbp | 
| ret | 
| @@ -925,7 +817,7 @@ | 
| push        rbp | 
| mov         rbp, rsp | 
| SHADOW_ARGS_TO_STACK 7 | 
| -    SAVE_XMM | 
| +    SAVE_XMM 7 | 
| GET_GOT     rbx | 
| push rsi | 
| push rdi | 
| @@ -1041,6 +933,7 @@ | 
| push        rbp | 
| mov         rbp, rsp | 
| SHADOW_ARGS_TO_STACK 7 | 
| +    SAVE_XMM 7 | 
| GET_GOT     rbx | 
| push rsi | 
| push rdi | 
| @@ -1127,6 +1020,7 @@ | 
| pop rdi | 
| pop rsi | 
| RESTORE_GOT | 
| +    RESTORE_XMM | 
| UNSHADOW_ARGS | 
| pop         rbp | 
| ret | 
| @@ -1146,7 +1040,7 @@ | 
| push        rbp | 
| mov         rbp, rsp | 
| SHADOW_ARGS_TO_STACK 7 | 
| -    SAVE_XMM | 
| +    SAVE_XMM 7 | 
| GET_GOT     rbx | 
| push rsi | 
| push rdi | 
| @@ -1254,6 +1148,7 @@ | 
| push        rbp | 
| mov         rbp, rsp | 
| SHADOW_ARGS_TO_STACK 7 | 
| +    SAVE_XMM 7 | 
| GET_GOT     rbx | 
| push rsi | 
| push rdi | 
| @@ -1338,6 +1233,7 @@ | 
| pop rdi | 
| pop rsi | 
| RESTORE_GOT | 
| +    RESTORE_XMM | 
| UNSHADOW_ARGS | 
| pop         rbp | 
| ret | 
| @@ -1357,7 +1253,7 @@ | 
| push        rbp | 
| mov         rbp, rsp | 
| SHADOW_ARGS_TO_STACK 7 | 
| -    SAVE_XMM | 
| +    SAVE_XMM 7 | 
| GET_GOT     rbx | 
| push rsi | 
| push rdi | 
|  |