| Index: libvpx/source/libvpx/vp8/encoder/x86/sad_sse3.asm
|
| diff --git a/libvpx/source/libvpx/vp8/encoder/x86/sad_sse3.asm b/libvpx/source/libvpx/vp8/encoder/x86/sad_sse3.asm
|
| index 9e05521664edb8a147acf033b8cd010b92bf3ed3..f0336ab175f7bda413c7bc0706ec2907fc4bcb21 100644
|
| --- a/libvpx/source/libvpx/vp8/encoder/x86/sad_sse3.asm
|
| +++ b/libvpx/source/libvpx/vp8/encoder/x86/sad_sse3.asm
|
| @@ -20,7 +20,6 @@
|
| %define ret_var rbx
|
| %define result_ptr arg(4)
|
| %define max_err arg(4)
|
| - %define height dword ptr arg(4)
|
| push rbp
|
| mov rbp, rsp
|
| push rsi
|
| @@ -34,16 +33,14 @@
|
| movsxd rdx, dword ptr arg(3) ; ref_stride
|
| %else
|
| %ifidn __OUTPUT_FORMAT__,x64
|
| - SAVE_XMM 7, u
|
| %define src_ptr rcx
|
| %define src_stride rdx
|
| %define ref_ptr r8
|
| %define ref_stride r9
|
| %define end_ptr r10
|
| %define ret_var r11
|
| - %define result_ptr [rsp+xmm_stack_space+8+4*8]
|
| - %define max_err [rsp+xmm_stack_space+8+4*8]
|
| - %define height dword ptr [rsp+xmm_stack_space+8+4*8]
|
| + %define result_ptr [rsp+8+4*8]
|
| + %define max_err [rsp+8+4*8]
|
| %else
|
| %define src_ptr rdi
|
| %define src_stride rsi
|
| @@ -53,7 +50,6 @@
|
| %define ret_var r10
|
| %define result_ptr r8
|
| %define max_err r8
|
| - %define height r8
|
| %endif
|
| %endif
|
|
|
| @@ -68,7 +64,6 @@
|
| %define ret_var
|
| %define result_ptr
|
| %define max_err
|
| - %define height
|
|
|
| %if ABI_IS_32BIT
|
| pop rbx
|
| @@ -77,7 +72,6 @@
|
| pop rbp
|
| %else
|
| %ifidn __OUTPUT_FORMAT__,x64
|
| - RESTORE_XMM
|
| %endif
|
| %endif
|
| ret
|
| @@ -112,7 +106,6 @@
|
| xchg rbx, rax
|
| %else
|
| %ifidn __OUTPUT_FORMAT__,x64
|
| - SAVE_XMM 7, u
|
| %define src_ptr rcx
|
| %define src_stride rdx
|
| %define r0_ptr rsi
|
| @@ -120,7 +113,7 @@
|
| %define r2_ptr r11
|
| %define r3_ptr r8
|
| %define ref_stride r9
|
| - %define result_ptr [rsp+xmm_stack_space+16+4*8]
|
| + %define result_ptr [rsp+16+4*8]
|
| push rsi
|
|
|
| LOAD_X4_ADDRESSES r8, r0_ptr, r1_ptr, r2_ptr, r3_ptr
|
| @@ -158,7 +151,6 @@
|
| %else
|
| %ifidn __OUTPUT_FORMAT__,x64
|
| pop rsi
|
| - RESTORE_XMM
|
| %endif
|
| %endif
|
| ret
|
| @@ -636,67 +628,6 @@ sym(vp8_sad16x16_sse3):
|
|
|
| STACK_FRAME_DESTROY_X3
|
|
|
| -;void vp8_copy32xn_sse3(
|
| -; unsigned char *src_ptr,
|
| -; int src_stride,
|
| -; unsigned char *dst_ptr,
|
| -; int dst_stride,
|
| -; int height);
|
| -global sym(vp8_copy32xn_sse3)
|
| -sym(vp8_copy32xn_sse3):
|
| -
|
| - STACK_FRAME_CREATE_X3
|
| -
|
| -block_copy_sse3_loopx4:
|
| - lea end_ptr, [src_ptr+src_stride*2]
|
| -
|
| - movdqu xmm0, XMMWORD PTR [src_ptr]
|
| - movdqu xmm1, XMMWORD PTR [src_ptr + 16]
|
| - movdqu xmm2, XMMWORD PTR [src_ptr + src_stride]
|
| - movdqu xmm3, XMMWORD PTR [src_ptr + src_stride + 16]
|
| - movdqu xmm4, XMMWORD PTR [end_ptr]
|
| - movdqu xmm5, XMMWORD PTR [end_ptr + 16]
|
| - movdqu xmm6, XMMWORD PTR [end_ptr + src_stride]
|
| - movdqu xmm7, XMMWORD PTR [end_ptr + src_stride + 16]
|
| -
|
| - lea src_ptr, [src_ptr+src_stride*4]
|
| -
|
| - lea end_ptr, [ref_ptr+ref_stride*2]
|
| -
|
| - movdqa XMMWORD PTR [ref_ptr], xmm0
|
| - movdqa XMMWORD PTR [ref_ptr + 16], xmm1
|
| - movdqa XMMWORD PTR [ref_ptr + ref_stride], xmm2
|
| - movdqa XMMWORD PTR [ref_ptr + ref_stride + 16], xmm3
|
| - movdqa XMMWORD PTR [end_ptr], xmm4
|
| - movdqa XMMWORD PTR [end_ptr + 16], xmm5
|
| - movdqa XMMWORD PTR [end_ptr + ref_stride], xmm6
|
| - movdqa XMMWORD PTR [end_ptr + ref_stride + 16], xmm7
|
| -
|
| - lea ref_ptr, [ref_ptr+ref_stride*4]
|
| -
|
| - sub height, 4
|
| - cmp height, 4
|
| - jge block_copy_sse3_loopx4
|
| -
|
| - ;Check to see if there is more rows need to be copied.
|
| - cmp height, 0
|
| - je copy_is_done
|
| -
|
| -block_copy_sse3_loop:
|
| - movdqu xmm0, XMMWORD PTR [src_ptr]
|
| - movdqu xmm1, XMMWORD PTR [src_ptr + 16]
|
| - lea src_ptr, [src_ptr+src_stride]
|
| -
|
| - movdqa XMMWORD PTR [ref_ptr], xmm0
|
| - movdqa XMMWORD PTR [ref_ptr + 16], xmm1
|
| - lea ref_ptr, [ref_ptr+ref_stride]
|
| -
|
| - sub height, 1
|
| - jne block_copy_sse3_loop
|
| -
|
| -copy_is_done:
|
| - STACK_FRAME_DESTROY_X3
|
| -
|
| ;void vp8_sad16x16x4d_sse3(
|
| ; unsigned char *src_ptr,
|
| ; int src_stride,
|
| @@ -957,4 +888,3 @@ sym(vp8_sad4x4x4d_sse3):
|
|
|
|
|
| STACK_FRAME_DESTROY_X4
|
| -
|
|
|