| Index: libvpx/source/libvpx/vp8/encoder/x86/encodeopt.asm
|
| diff --git a/libvpx/source/libvpx/vp8/encoder/x86/encodeopt.asm b/libvpx/source/libvpx/vp8/encoder/x86/encodeopt.asm
|
| index 9946294995cef24c4d95293f85f4814bfca43988..c0f06bbbb638f868d799df669b5d55e4371d3289 100644
|
| --- a/libvpx/source/libvpx/vp8/encoder/x86/encodeopt.asm
|
| +++ b/libvpx/source/libvpx/vp8/encoder/x86/encodeopt.asm
|
| @@ -22,33 +22,33 @@ sym(vp8_block_error_xmm):
|
| ; end prologue
|
|
|
| mov rsi, arg(0) ;coeff_ptr
|
| - mov rdi, arg(1) ;dcoef_ptr
|
|
|
| - movdqa xmm0, [rsi]
|
| - movdqa xmm1, [rdi]
|
| + mov rdi, arg(1) ;dcoef_ptr
|
| + movdqa xmm3, [rsi]
|
|
|
| - movdqa xmm2, [rsi+16]
|
| - movdqa xmm3, [rdi+16]
|
| + movdqa xmm4, [rdi]
|
| + movdqa xmm5, [rsi+16]
|
|
|
| - psubw xmm0, xmm1
|
| - psubw xmm2, xmm3
|
| + movdqa xmm6, [rdi+16]
|
| + psubw xmm3, xmm4
|
|
|
| - pmaddwd xmm0, xmm0
|
| - pmaddwd xmm2, xmm2
|
| + psubw xmm5, xmm6
|
| + pmaddwd xmm3, xmm3
|
| + pmaddwd xmm5, xmm5
|
|
|
| - paddd xmm0, xmm2
|
| + paddd xmm3, xmm5
|
|
|
| - pxor xmm5, xmm5
|
| - movdqa xmm1, xmm0
|
| + pxor xmm7, xmm7
|
| + movdqa xmm0, xmm3
|
|
|
| - punpckldq xmm0, xmm5
|
| - punpckhdq xmm1, xmm5
|
| + punpckldq xmm0, xmm7
|
| + punpckhdq xmm3, xmm7
|
|
|
| - paddd xmm0, xmm1
|
| - movdqa xmm1, xmm0
|
| + paddd xmm0, xmm3
|
| + movdqa xmm3, xmm0
|
|
|
| psrldq xmm0, 8
|
| - paddd xmm0, xmm1
|
| + paddd xmm0, xmm3
|
|
|
| movq rax, xmm0
|
|
|
| @@ -208,54 +208,53 @@ sym(vp8_mbblock_error_xmm_impl):
|
| push rbp
|
| mov rbp, rsp
|
| SHADOW_ARGS_TO_STACK 3
|
| - SAVE_XMM 6
|
| push rsi
|
| push rdi
|
| ; end prolog
|
|
|
|
|
| mov rsi, arg(0) ;coeff_ptr
|
| - pxor xmm6, xmm6
|
| + pxor xmm7, xmm7
|
|
|
| mov rdi, arg(1) ;dcoef_ptr
|
| - pxor xmm4, xmm4
|
| + pxor xmm2, xmm2
|
|
|
| - movd xmm5, dword ptr arg(2) ;dc
|
| - por xmm5, xmm4
|
| + movd xmm1, dword ptr arg(2) ;dc
|
| + por xmm1, xmm2
|
|
|
| - pcmpeqw xmm5, xmm6
|
| + pcmpeqw xmm1, xmm7
|
| mov rcx, 16
|
|
|
| mberror_loop:
|
| - movdqa xmm0, [rsi]
|
| - movdqa xmm1, [rdi]
|
| + movdqa xmm3, [rsi]
|
| + movdqa xmm4, [rdi]
|
|
|
| - movdqa xmm2, [rsi+16]
|
| - movdqa xmm3, [rdi+16]
|
| + movdqa xmm5, [rsi+16]
|
| + movdqa xmm6, [rdi+16]
|
|
|
|
|
| - psubw xmm2, xmm3
|
| - pmaddwd xmm2, xmm2
|
| + psubw xmm5, xmm6
|
| + pmaddwd xmm5, xmm5
|
|
|
| - psubw xmm0, xmm1
|
| - pand xmm0, xmm5
|
| + psubw xmm3, xmm4
|
| + pand xmm3, xmm1
|
|
|
| - pmaddwd xmm0, xmm0
|
| + pmaddwd xmm3, xmm3
|
| add rsi, 32
|
|
|
| add rdi, 32
|
|
|
| sub rcx, 1
|
| - paddd xmm4, xmm2
|
| + paddd xmm2, xmm5
|
|
|
| - paddd xmm4, xmm0
|
| + paddd xmm2, xmm3
|
| jnz mberror_loop
|
|
|
| - movdqa xmm0, xmm4
|
| - punpckldq xmm0, xmm6
|
| + movdqa xmm0, xmm2
|
| + punpckldq xmm0, xmm7
|
|
|
| - punpckhdq xmm4, xmm6
|
| - paddd xmm0, xmm4
|
| + punpckhdq xmm2, xmm7
|
| + paddd xmm0, xmm2
|
|
|
| movdqa xmm1, xmm0
|
| psrldq xmm0, 8
|
| @@ -266,7 +265,6 @@ mberror_loop:
|
| pop rdi
|
| pop rsi
|
| ; begin epilog
|
| - RESTORE_XMM
|
| UNSHADOW_ARGS
|
| pop rbp
|
| ret
|
| @@ -344,7 +342,7 @@ sym(vp8_mbuverror_xmm_impl):
|
| mov rdi, arg(1) ;d_ptr
|
|
|
| mov rcx, 16
|
| - pxor xmm3, xmm3
|
| + pxor xmm7, xmm7
|
|
|
| mbuverror_loop:
|
|
|
| @@ -354,7 +352,7 @@ mbuverror_loop:
|
| psubw xmm1, xmm2
|
| pmaddwd xmm1, xmm1
|
|
|
| - paddd xmm3, xmm1
|
| + paddd xmm7, xmm1
|
|
|
| add rsi, 16
|
| add rdi, 16
|
| @@ -363,7 +361,7 @@ mbuverror_loop:
|
| jnz mbuverror_loop
|
|
|
| pxor xmm0, xmm0
|
| - movdqa xmm1, xmm3
|
| + movdqa xmm1, xmm7
|
|
|
| movdqa xmm2, xmm1
|
| punpckldq xmm1, xmm0
|
|
|