| Index: source/libvpx/vp9/encoder/x86/vp9_sad_sse3.asm
|
| diff --git a/source/libvpx/vp9/encoder/x86/vp9_sad_sse3.asm b/source/libvpx/vp9/encoder/x86/vp9_sad_sse3.asm
|
| deleted file mode 100644
|
| index 2b90a5d54789fa46787e0e2e7efe1dc6c978fdb8..0000000000000000000000000000000000000000
|
| --- a/source/libvpx/vp9/encoder/x86/vp9_sad_sse3.asm
|
| +++ /dev/null
|
| @@ -1,378 +0,0 @@
|
| -;
|
| -; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
| -;
|
| -; Use of this source code is governed by a BSD-style license
|
| -; that can be found in the LICENSE file in the root of the source
|
| -; tree. An additional intellectual property rights grant can be found
|
| -; in the file PATENTS. All contributing project authors may
|
| -; be found in the AUTHORS file in the root of the source tree.
|
| -;
|
| -
|
| -%include "vpx_ports/x86_abi_support.asm"
|
| -
|
| -%macro STACK_FRAME_CREATE_X3 0
|
| -%if ABI_IS_32BIT
|
| - %define src_ptr rsi
|
| - %define src_stride rax
|
| - %define ref_ptr rdi
|
| - %define ref_stride rdx
|
| - %define end_ptr rcx
|
| - %define ret_var rbx
|
| - %define result_ptr arg(4)
|
| - %define max_err arg(4)
|
| - %define height dword ptr arg(4)
|
| - push rbp
|
| - mov rbp, rsp
|
| - push rsi
|
| - push rdi
|
| - push rbx
|
| -
|
| - mov rsi, arg(0) ; src_ptr
|
| - mov rdi, arg(2) ; ref_ptr
|
| -
|
| - movsxd rax, dword ptr arg(1) ; src_stride
|
| - movsxd rdx, dword ptr arg(3) ; ref_stride
|
| -%else
|
| - %if LIBVPX_YASM_WIN64
|
| - SAVE_XMM 7, u
|
| - %define src_ptr rcx
|
| - %define src_stride rdx
|
| - %define ref_ptr r8
|
| - %define ref_stride r9
|
| - %define end_ptr r10
|
| - %define ret_var r11
|
| - %define result_ptr [rsp+xmm_stack_space+8+4*8]
|
| - %define max_err [rsp+xmm_stack_space+8+4*8]
|
| - %define height dword ptr [rsp+xmm_stack_space+8+4*8]
|
| - %else
|
| - %define src_ptr rdi
|
| - %define src_stride rsi
|
| - %define ref_ptr rdx
|
| - %define ref_stride rcx
|
| - %define end_ptr r9
|
| - %define ret_var r10
|
| - %define result_ptr r8
|
| - %define max_err r8
|
| - %define height r8
|
| - %endif
|
| -%endif
|
| -
|
| -%endmacro
|
| -
|
| -%macro STACK_FRAME_DESTROY_X3 0
|
| - %define src_ptr
|
| - %define src_stride
|
| - %define ref_ptr
|
| - %define ref_stride
|
| - %define end_ptr
|
| - %define ret_var
|
| - %define result_ptr
|
| - %define max_err
|
| - %define height
|
| -
|
| -%if ABI_IS_32BIT
|
| - pop rbx
|
| - pop rdi
|
| - pop rsi
|
| - pop rbp
|
| -%else
|
| - %if LIBVPX_YASM_WIN64
|
| - RESTORE_XMM
|
| - %endif
|
| -%endif
|
| - ret
|
| -%endmacro
|
| -
|
| -%macro PROCESS_16X2X3 5
|
| -%if %1==0
|
| - movdqa xmm0, XMMWORD PTR [%2]
|
| - lddqu xmm5, XMMWORD PTR [%3]
|
| - lddqu xmm6, XMMWORD PTR [%3+1]
|
| - lddqu xmm7, XMMWORD PTR [%3+2]
|
| -
|
| - psadbw xmm5, xmm0
|
| - psadbw xmm6, xmm0
|
| - psadbw xmm7, xmm0
|
| -%else
|
| - movdqa xmm0, XMMWORD PTR [%2]
|
| - lddqu xmm1, XMMWORD PTR [%3]
|
| - lddqu xmm2, XMMWORD PTR [%3+1]
|
| - lddqu xmm3, XMMWORD PTR [%3+2]
|
| -
|
| - psadbw xmm1, xmm0
|
| - psadbw xmm2, xmm0
|
| - psadbw xmm3, xmm0
|
| -
|
| - paddw xmm5, xmm1
|
| - paddw xmm6, xmm2
|
| - paddw xmm7, xmm3
|
| -%endif
|
| - movdqa xmm0, XMMWORD PTR [%2+%4]
|
| - lddqu xmm1, XMMWORD PTR [%3+%5]
|
| - lddqu xmm2, XMMWORD PTR [%3+%5+1]
|
| - lddqu xmm3, XMMWORD PTR [%3+%5+2]
|
| -
|
| -%if %1==0 || %1==1
|
| - lea %2, [%2+%4*2]
|
| - lea %3, [%3+%5*2]
|
| -%endif
|
| -
|
| - psadbw xmm1, xmm0
|
| - psadbw xmm2, xmm0
|
| - psadbw xmm3, xmm0
|
| -
|
| - paddw xmm5, xmm1
|
| - paddw xmm6, xmm2
|
| - paddw xmm7, xmm3
|
| -%endmacro
|
| -
|
| -%macro PROCESS_8X2X3 5
|
| -%if %1==0
|
| - movq mm0, QWORD PTR [%2]
|
| - movq mm5, QWORD PTR [%3]
|
| - movq mm6, QWORD PTR [%3+1]
|
| - movq mm7, QWORD PTR [%3+2]
|
| -
|
| - psadbw mm5, mm0
|
| - psadbw mm6, mm0
|
| - psadbw mm7, mm0
|
| -%else
|
| - movq mm0, QWORD PTR [%2]
|
| - movq mm1, QWORD PTR [%3]
|
| - movq mm2, QWORD PTR [%3+1]
|
| - movq mm3, QWORD PTR [%3+2]
|
| -
|
| - psadbw mm1, mm0
|
| - psadbw mm2, mm0
|
| - psadbw mm3, mm0
|
| -
|
| - paddw mm5, mm1
|
| - paddw mm6, mm2
|
| - paddw mm7, mm3
|
| -%endif
|
| - movq mm0, QWORD PTR [%2+%4]
|
| - movq mm1, QWORD PTR [%3+%5]
|
| - movq mm2, QWORD PTR [%3+%5+1]
|
| - movq mm3, QWORD PTR [%3+%5+2]
|
| -
|
| -%if %1==0 || %1==1
|
| - lea %2, [%2+%4*2]
|
| - lea %3, [%3+%5*2]
|
| -%endif
|
| -
|
| - psadbw mm1, mm0
|
| - psadbw mm2, mm0
|
| - psadbw mm3, mm0
|
| -
|
| - paddw mm5, mm1
|
| - paddw mm6, mm2
|
| - paddw mm7, mm3
|
| -%endmacro
|
| -
|
| -;void int vp9_sad16x16x3_sse3(
|
| -; unsigned char *src_ptr,
|
| -; int src_stride,
|
| -; unsigned char *ref_ptr,
|
| -; int ref_stride,
|
| -; int *results)
|
| -global sym(vp9_sad16x16x3_sse3) PRIVATE
|
| -sym(vp9_sad16x16x3_sse3):
|
| -
|
| - STACK_FRAME_CREATE_X3
|
| -
|
| - PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
|
| - PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
| - PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
| - PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
| - PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
| - PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
| - PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
| - PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
|
| -
|
| - mov rcx, result_ptr
|
| -
|
| - movq xmm0, xmm5
|
| - psrldq xmm5, 8
|
| -
|
| - paddw xmm0, xmm5
|
| - movd [rcx], xmm0
|
| -;-
|
| - movq xmm0, xmm6
|
| - psrldq xmm6, 8
|
| -
|
| - paddw xmm0, xmm6
|
| - movd [rcx+4], xmm0
|
| -;-
|
| - movq xmm0, xmm7
|
| - psrldq xmm7, 8
|
| -
|
| - paddw xmm0, xmm7
|
| - movd [rcx+8], xmm0
|
| -
|
| - STACK_FRAME_DESTROY_X3
|
| -
|
| -;void int vp9_sad16x8x3_sse3(
|
| -; unsigned char *src_ptr,
|
| -; int src_stride,
|
| -; unsigned char *ref_ptr,
|
| -; int ref_stride,
|
| -; int *results)
|
| -global sym(vp9_sad16x8x3_sse3) PRIVATE
|
| -sym(vp9_sad16x8x3_sse3):
|
| -
|
| - STACK_FRAME_CREATE_X3
|
| -
|
| - PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
|
| - PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
| - PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
| - PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
|
| -
|
| - mov rcx, result_ptr
|
| -
|
| - movq xmm0, xmm5
|
| - psrldq xmm5, 8
|
| -
|
| - paddw xmm0, xmm5
|
| - movd [rcx], xmm0
|
| -;-
|
| - movq xmm0, xmm6
|
| - psrldq xmm6, 8
|
| -
|
| - paddw xmm0, xmm6
|
| - movd [rcx+4], xmm0
|
| -;-
|
| - movq xmm0, xmm7
|
| - psrldq xmm7, 8
|
| -
|
| - paddw xmm0, xmm7
|
| - movd [rcx+8], xmm0
|
| -
|
| - STACK_FRAME_DESTROY_X3
|
| -
|
| -;void int vp9_sad8x16x3_sse3(
|
| -; unsigned char *src_ptr,
|
| -; int src_stride,
|
| -; unsigned char *ref_ptr,
|
| -; int ref_stride,
|
| -; int *results)
|
| -global sym(vp9_sad8x16x3_sse3) PRIVATE
|
| -sym(vp9_sad8x16x3_sse3):
|
| -
|
| - STACK_FRAME_CREATE_X3
|
| -
|
| - PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
|
| - PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
| - PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
| - PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
| - PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
| - PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
| - PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
| - PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
|
| -
|
| - mov rcx, result_ptr
|
| -
|
| - punpckldq mm5, mm6
|
| -
|
| - movq [rcx], mm5
|
| - movd [rcx+8], mm7
|
| -
|
| - STACK_FRAME_DESTROY_X3
|
| -
|
| -;void int vp9_sad8x8x3_sse3(
|
| -; unsigned char *src_ptr,
|
| -; int src_stride,
|
| -; unsigned char *ref_ptr,
|
| -; int ref_stride,
|
| -; int *results)
|
| -global sym(vp9_sad8x8x3_sse3) PRIVATE
|
| -sym(vp9_sad8x8x3_sse3):
|
| -
|
| - STACK_FRAME_CREATE_X3
|
| -
|
| - PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
|
| - PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
| - PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
|
| - PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
|
| -
|
| - mov rcx, result_ptr
|
| -
|
| - punpckldq mm5, mm6
|
| -
|
| - movq [rcx], mm5
|
| - movd [rcx+8], mm7
|
| -
|
| - STACK_FRAME_DESTROY_X3
|
| -
|
| -;void int vp9_sad4x4x3_sse3(
|
| -; unsigned char *src_ptr,
|
| -; int src_stride,
|
| -; unsigned char *ref_ptr,
|
| -; int ref_stride,
|
| -; int *results)
|
| -global sym(vp9_sad4x4x3_sse3) PRIVATE
|
| -sym(vp9_sad4x4x3_sse3):
|
| -
|
| - STACK_FRAME_CREATE_X3
|
| -
|
| - movd mm0, DWORD PTR [src_ptr]
|
| - movd mm1, DWORD PTR [ref_ptr]
|
| -
|
| - movd mm2, DWORD PTR [src_ptr+src_stride]
|
| - movd mm3, DWORD PTR [ref_ptr+ref_stride]
|
| -
|
| - punpcklbw mm0, mm2
|
| - punpcklbw mm1, mm3
|
| -
|
| - movd mm4, DWORD PTR [ref_ptr+1]
|
| - movd mm5, DWORD PTR [ref_ptr+2]
|
| -
|
| - movd mm2, DWORD PTR [ref_ptr+ref_stride+1]
|
| - movd mm3, DWORD PTR [ref_ptr+ref_stride+2]
|
| -
|
| - psadbw mm1, mm0
|
| -
|
| - punpcklbw mm4, mm2
|
| - punpcklbw mm5, mm3
|
| -
|
| - psadbw mm4, mm0
|
| - psadbw mm5, mm0
|
| -
|
| - lea src_ptr, [src_ptr+src_stride*2]
|
| - lea ref_ptr, [ref_ptr+ref_stride*2]
|
| -
|
| - movd mm0, DWORD PTR [src_ptr]
|
| - movd mm2, DWORD PTR [ref_ptr]
|
| -
|
| - movd mm3, DWORD PTR [src_ptr+src_stride]
|
| - movd mm6, DWORD PTR [ref_ptr+ref_stride]
|
| -
|
| - punpcklbw mm0, mm3
|
| - punpcklbw mm2, mm6
|
| -
|
| - movd mm3, DWORD PTR [ref_ptr+1]
|
| - movd mm7, DWORD PTR [ref_ptr+2]
|
| -
|
| - psadbw mm2, mm0
|
| -
|
| - paddw mm1, mm2
|
| -
|
| - movd mm2, DWORD PTR [ref_ptr+ref_stride+1]
|
| - movd mm6, DWORD PTR [ref_ptr+ref_stride+2]
|
| -
|
| - punpcklbw mm3, mm2
|
| - punpcklbw mm7, mm6
|
| -
|
| - psadbw mm3, mm0
|
| - psadbw mm7, mm0
|
| -
|
| - paddw mm3, mm4
|
| - paddw mm7, mm5
|
| -
|
| - mov rcx, result_ptr
|
| -
|
| - punpckldq mm1, mm3
|
| -
|
| - movq [rcx], mm1
|
| - movd [rcx+8], mm7
|
| -
|
| - STACK_FRAME_DESTROY_X3
|
|
|