| Index: source/libvpx/vp8/encoder/x86/sad_sse3.asm
|
| ===================================================================
|
| --- source/libvpx/vp8/encoder/x86/sad_sse3.asm (revision 96967)
|
| +++ source/libvpx/vp8/encoder/x86/sad_sse3.asm (working copy)
|
| @@ -20,6 +20,7 @@
|
| %define ret_var rbx
|
| %define result_ptr arg(4)
|
| %define max_err arg(4)
|
| + %define height dword ptr arg(4)
|
| push rbp
|
| mov rbp, rsp
|
| push rsi
|
| @@ -33,14 +34,16 @@
|
| movsxd rdx, dword ptr arg(3) ; ref_stride
|
| %else
|
| %ifidn __OUTPUT_FORMAT__,x64
|
| + SAVE_XMM 7, u
|
| %define src_ptr rcx
|
| %define src_stride rdx
|
| %define ref_ptr r8
|
| %define ref_stride r9
|
| %define end_ptr r10
|
| %define ret_var r11
|
| - %define result_ptr [rsp+8+4*8]
|
| - %define max_err [rsp+8+4*8]
|
| + %define result_ptr [rsp+xmm_stack_space+8+4*8]
|
| + %define max_err [rsp+xmm_stack_space+8+4*8]
|
| + %define height dword ptr [rsp+xmm_stack_space+8+4*8]
|
| %else
|
| %define src_ptr rdi
|
| %define src_stride rsi
|
| @@ -50,6 +53,7 @@
|
| %define ret_var r10
|
| %define result_ptr r8
|
| %define max_err r8
|
| + %define height r8
|
| %endif
|
| %endif
|
|
|
| @@ -64,6 +68,7 @@
|
| %define ret_var
|
| %define result_ptr
|
| %define max_err
|
| + %define height
|
|
|
| %if ABI_IS_32BIT
|
| pop rbx
|
| @@ -72,6 +77,7 @@
|
| pop rbp
|
| %else
|
| %ifidn __OUTPUT_FORMAT__,x64
|
| + RESTORE_XMM
|
| %endif
|
| %endif
|
| ret
|
| @@ -106,6 +112,7 @@
|
| xchg rbx, rax
|
| %else
|
| %ifidn __OUTPUT_FORMAT__,x64
|
| + SAVE_XMM 7, u
|
| %define src_ptr rcx
|
| %define src_stride rdx
|
| %define r0_ptr rsi
|
| @@ -113,7 +120,7 @@
|
| %define r2_ptr r11
|
| %define r3_ptr r8
|
| %define ref_stride r9
|
| - %define result_ptr [rsp+16+4*8]
|
| + %define result_ptr [rsp+xmm_stack_space+16+4*8]
|
| push rsi
|
|
|
| LOAD_X4_ADDRESSES r8, r0_ptr, r1_ptr, r2_ptr, r3_ptr
|
| @@ -151,6 +158,7 @@
|
| %else
|
| %ifidn __OUTPUT_FORMAT__,x64
|
| pop rsi
|
| + RESTORE_XMM
|
| %endif
|
| %endif
|
| ret
|
| @@ -628,6 +636,67 @@
|
|
|
| STACK_FRAME_DESTROY_X3
|
|
|
| +;void vp8_copy32xn_sse3(
|
| +; unsigned char *src_ptr,
|
| +; int src_stride,
|
| +; unsigned char *dst_ptr,
|
| +; int dst_stride,
|
| +; int height);
|
| +global sym(vp8_copy32xn_sse3)
|
| +sym(vp8_copy32xn_sse3):
|
| +
|
| + STACK_FRAME_CREATE_X3
|
| +
|
| +block_copy_sse3_loopx4:
|
| + lea end_ptr, [src_ptr+src_stride*2]
|
| +
|
| + movdqu xmm0, XMMWORD PTR [src_ptr]
|
| + movdqu xmm1, XMMWORD PTR [src_ptr + 16]
|
| + movdqu xmm2, XMMWORD PTR [src_ptr + src_stride]
|
| + movdqu xmm3, XMMWORD PTR [src_ptr + src_stride + 16]
|
| + movdqu xmm4, XMMWORD PTR [end_ptr]
|
| + movdqu xmm5, XMMWORD PTR [end_ptr + 16]
|
| + movdqu xmm6, XMMWORD PTR [end_ptr + src_stride]
|
| + movdqu xmm7, XMMWORD PTR [end_ptr + src_stride + 16]
|
| +
|
| + lea src_ptr, [src_ptr+src_stride*4]
|
| +
|
| + lea end_ptr, [ref_ptr+ref_stride*2]
|
| +
|
| + movdqa XMMWORD PTR [ref_ptr], xmm0
|
| + movdqa XMMWORD PTR [ref_ptr + 16], xmm1
|
| + movdqa XMMWORD PTR [ref_ptr + ref_stride], xmm2
|
| + movdqa XMMWORD PTR [ref_ptr + ref_stride + 16], xmm3
|
| + movdqa XMMWORD PTR [end_ptr], xmm4
|
| + movdqa XMMWORD PTR [end_ptr + 16], xmm5
|
| + movdqa XMMWORD PTR [end_ptr + ref_stride], xmm6
|
| + movdqa XMMWORD PTR [end_ptr + ref_stride + 16], xmm7
|
| +
|
| + lea ref_ptr, [ref_ptr+ref_stride*4]
|
| +
|
| + sub height, 4
|
| + cmp height, 4
|
| + jge block_copy_sse3_loopx4
|
| +
|
| + ;Check to see if there is more rows need to be copied.
|
| + cmp height, 0
|
| + je copy_is_done
|
| +
|
| +block_copy_sse3_loop:
|
| + movdqu xmm0, XMMWORD PTR [src_ptr]
|
| + movdqu xmm1, XMMWORD PTR [src_ptr + 16]
|
| + lea src_ptr, [src_ptr+src_stride]
|
| +
|
| + movdqa XMMWORD PTR [ref_ptr], xmm0
|
| + movdqa XMMWORD PTR [ref_ptr + 16], xmm1
|
| + lea ref_ptr, [ref_ptr+ref_stride]
|
| +
|
| + sub height, 1
|
| + jne block_copy_sse3_loop
|
| +
|
| +copy_is_done:
|
| + STACK_FRAME_DESTROY_X3
|
| +
|
| ;void vp8_sad16x16x4d_sse3(
|
| ; unsigned char *src_ptr,
|
| ; int src_stride,
|
| @@ -888,3 +957,4 @@
|
|
|
|
|
| STACK_FRAME_DESTROY_X4
|
| +
|
|
|