| Index: source/libvpx/vp8/encoder/x86/variance_impl_mmx.asm
|
| ===================================================================
|
| --- source/libvpx/vp8/encoder/x86/variance_impl_mmx.asm (revision 96967)
|
| +++ source/libvpx/vp8/encoder/x86/variance_impl_mmx.asm (working copy)
|
| @@ -843,137 +843,7 @@
|
| pop rbp
|
| ret
|
|
|
| -;unsigned int vp8_get16x16pred_error_mmx
|
| -;(
|
| -; unsigned char *src_ptr,
|
| -; int src_stride,
|
| -; unsigned char *ref_ptr,
|
| -; int ref_stride
|
| -;)
|
| -global sym(vp8_get16x16pred_error_mmx)
|
| -sym(vp8_get16x16pred_error_mmx):
|
| - push rbp
|
| - mov rbp, rsp
|
| - SHADOW_ARGS_TO_STACK 4
|
| - GET_GOT rbx
|
| - push rsi
|
| - push rdi
|
| - sub rsp, 16
|
| - ; end prolog
|
|
|
| - mov rsi, arg(0) ;DWORD PTR [src_ptr]
|
| - mov rdi, arg(2) ;DWORD PTR [ref_ptr]
|
| -
|
| - movsxd rax, DWORD PTR arg(1) ;[src_stride]
|
| - movsxd rdx, DWORD PTR arg(3) ;[ref_stride]
|
| -
|
| - pxor mm0, mm0 ; clear xmm0 for unpack
|
| - pxor mm7, mm7 ; clear xmm7 for accumulating diffs
|
| -
|
| - pxor mm6, mm6 ; clear xmm6 for accumulating sse
|
| - mov rcx, 16
|
| -
|
| -var16loop:
|
| -
|
| - movq mm1, [rsi]
|
| - movq mm2, [rdi]
|
| -
|
| - movq mm3, mm1
|
| - movq mm4, mm2
|
| -
|
| - punpcklbw mm1, mm0
|
| - punpckhbw mm3, mm0
|
| -
|
| - punpcklbw mm2, mm0
|
| - punpckhbw mm4, mm0
|
| -
|
| - psubw mm1, mm2
|
| - psubw mm3, mm4
|
| -
|
| - paddw mm7, mm1
|
| - pmaddwd mm1, mm1
|
| -
|
| - paddw mm7, mm3
|
| - pmaddwd mm3, mm3
|
| -
|
| - paddd mm6, mm1
|
| - paddd mm6, mm3
|
| -
|
| -
|
| - movq mm1, [rsi+8]
|
| - movq mm2, [rdi+8]
|
| -
|
| - movq mm3, mm1
|
| - movq mm4, mm2
|
| -
|
| - punpcklbw mm1, mm0
|
| - punpckhbw mm3, mm0
|
| -
|
| - punpcklbw mm2, mm0
|
| - punpckhbw mm4, mm0
|
| -
|
| - psubw mm1, mm2
|
| - psubw mm3, mm4
|
| -
|
| - paddw mm7, mm1
|
| - pmaddwd mm1, mm1
|
| -
|
| - paddw mm7, mm3
|
| - pmaddwd mm3, mm3
|
| -
|
| - paddd mm6, mm1
|
| - paddd mm6, mm3
|
| -
|
| - add rsi, rax
|
| - add rdi, rdx
|
| -
|
| - sub rcx, 1
|
| - jnz var16loop
|
| -
|
| -
|
| - movq mm1, mm6
|
| - pxor mm6, mm6
|
| -
|
| - pxor mm5, mm5
|
| - punpcklwd mm6, mm7
|
| -
|
| - punpckhwd mm5, mm7
|
| - psrad mm5, 16
|
| -
|
| - psrad mm6, 16
|
| - paddd mm6, mm5
|
| -
|
| - movq mm2, mm1
|
| - psrlq mm1, 32
|
| -
|
| - paddd mm2, mm1
|
| - movq mm7, mm6
|
| -
|
| - psrlq mm6, 32
|
| - paddd mm6, mm7
|
| -
|
| - movd DWORD PTR [rsp], mm6 ;Sum
|
| - movd DWORD PTR [rsp+4], mm2 ;SSE
|
| -
|
| - ; return (SSE-((Sum*Sum)>>8));
|
| - movsxd rdx, dword ptr [rsp]
|
| - imul rdx, rdx
|
| - sar rdx, 8
|
| - movsxd rax, dword ptr [rsp + 4]
|
| - sub rax, rdx
|
| -
|
| -
|
| - ; begin epilog
|
| - add rsp, 16
|
| - pop rdi
|
| - pop rsi
|
| - RESTORE_GOT
|
| - UNSHADOW_ARGS
|
| - pop rbp
|
| - ret
|
| -
|
| -
|
| -
|
| SECTION_RODATA
|
| ;short mmx_bi_rd[4] = { 64, 64, 64, 64};
|
| align 16
|
|
|