source/libvpx/vp9/encoder/x86/vp9_sad_sse4.asm - Issue 1124333011: libvpx: Pull from upstream

Unified Diff: source/libvpx/vp9/encoder/x86/vp9_sad_sse4.asm

Issue 1124333011: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master

Patch Set: only update to last nights LKGR Created 5 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: source/libvpx/vp9/encoder/x86/vp9_sad_sse4.asm

diff --git a/source/libvpx/vp9/encoder/x86/vp9_sad_sse4.asm b/source/libvpx/vp9/encoder/x86/vp9_sad_sse4.asm

deleted file mode 100644

index faf1768a983bff9d25cc973cdb790dce856414c3..0000000000000000000000000000000000000000

--- a/source/libvpx/vp9/encoder/x86/vp9_sad_sse4.asm

+++ /dev/null

@@ -1,359 +0,0 @@

-; Use of this source code is governed by a BSD-style license

-; that can be found in the LICENSE file in the root of the source

-; tree. An additional intellectual property rights grant can be found

-; in the file PATENTS. All contributing project authors may

-; be found in the AUTHORS file in the root of the source tree.

-%include "vpx_ports/x86_abi_support.asm"

-%macro PROCESS_16X2X8 1

-%if %1

- movdqa xmm0, XMMWORD PTR [rsi]

- movq xmm1, MMWORD PTR [rdi]

- movq xmm3, MMWORD PTR [rdi+8]

- movq xmm2, MMWORD PTR [rdi+16]

- punpcklqdq xmm1, xmm3

- punpcklqdq xmm3, xmm2

- movdqa xmm2, xmm1

- mpsadbw xmm1, xmm0, 0x0

- mpsadbw xmm2, xmm0, 0x5

- psrldq xmm0, 8

- movdqa xmm4, xmm3

- mpsadbw xmm3, xmm0, 0x0

- mpsadbw xmm4, xmm0, 0x5

- paddw xmm1, xmm2

- paddw xmm1, xmm3

- paddw xmm1, xmm4

-%else

- movdqa xmm0, XMMWORD PTR [rsi]

- movq xmm5, MMWORD PTR [rdi]

- movq xmm3, MMWORD PTR [rdi+8]

- movq xmm2, MMWORD PTR [rdi+16]

- punpcklqdq xmm5, xmm3

- punpcklqdq xmm3, xmm2

- movdqa xmm2, xmm5

- mpsadbw xmm5, xmm0, 0x0

- mpsadbw xmm2, xmm0, 0x5

- psrldq xmm0, 8

- movdqa xmm4, xmm3

- mpsadbw xmm3, xmm0, 0x0

- mpsadbw xmm4, xmm0, 0x5

- paddw xmm5, xmm2

- paddw xmm5, xmm3

- paddw xmm5, xmm4

- paddw xmm1, xmm5

-%endif

- movdqa xmm0, XMMWORD PTR [rsi + rax]

- movq xmm5, MMWORD PTR [rdi+ rdx]

- movq xmm3, MMWORD PTR [rdi+ rdx+8]

- movq xmm2, MMWORD PTR [rdi+ rdx+16]

- punpcklqdq xmm5, xmm3

- punpcklqdq xmm3, xmm2

- lea rsi, [rsi+rax*2]

- lea rdi, [rdi+rdx*2]

- movdqa xmm2, xmm5

- mpsadbw xmm5, xmm0, 0x0

- mpsadbw xmm2, xmm0, 0x5

- psrldq xmm0, 8

- movdqa xmm4, xmm3

- mpsadbw xmm3, xmm0, 0x0

- mpsadbw xmm4, xmm0, 0x5

- paddw xmm5, xmm2

- paddw xmm5, xmm3

- paddw xmm5, xmm4

- paddw xmm1, xmm5

-%endmacro

-%macro PROCESS_8X2X8 1

-%if %1

- movq xmm0, MMWORD PTR [rsi]

- movq xmm1, MMWORD PTR [rdi]

- movq xmm3, MMWORD PTR [rdi+8]

- punpcklqdq xmm1, xmm3

- movdqa xmm2, xmm1

- mpsadbw xmm1, xmm0, 0x0

- mpsadbw xmm2, xmm0, 0x5

- paddw xmm1, xmm2

-%else

- movq xmm0, MMWORD PTR [rsi]

- movq xmm5, MMWORD PTR [rdi]

- movq xmm3, MMWORD PTR [rdi+8]

- punpcklqdq xmm5, xmm3

- movdqa xmm2, xmm5

- mpsadbw xmm5, xmm0, 0x0

- mpsadbw xmm2, xmm0, 0x5

- paddw xmm5, xmm2

- paddw xmm1, xmm5

-%endif

- movq xmm0, MMWORD PTR [rsi + rax]

- movq xmm5, MMWORD PTR [rdi+ rdx]

- movq xmm3, MMWORD PTR [rdi+ rdx+8]

- punpcklqdq xmm5, xmm3

- lea rsi, [rsi+rax*2]

- lea rdi, [rdi+rdx*2]

- movdqa xmm2, xmm5

- mpsadbw xmm5, xmm0, 0x0

- mpsadbw xmm2, xmm0, 0x5

- paddw xmm5, xmm2

- paddw xmm1, xmm5

-%endmacro

-%macro PROCESS_4X2X8 1

-%if %1

- movd xmm0, [rsi]

- movq xmm1, MMWORD PTR [rdi]

- movq xmm3, MMWORD PTR [rdi+8]

- punpcklqdq xmm1, xmm3

- mpsadbw xmm1, xmm0, 0x0

-%else

- movd xmm0, [rsi]

- movq xmm5, MMWORD PTR [rdi]

- movq xmm3, MMWORD PTR [rdi+8]

- punpcklqdq xmm5, xmm3

- mpsadbw xmm5, xmm0, 0x0

- paddw xmm1, xmm5

-%endif

- movd xmm0, [rsi + rax]

- movq xmm5, MMWORD PTR [rdi+ rdx]

- movq xmm3, MMWORD PTR [rdi+ rdx+8]

- punpcklqdq xmm5, xmm3

- lea rsi, [rsi+rax*2]

- lea rdi, [rdi+rdx*2]

- mpsadbw xmm5, xmm0, 0x0

- paddw xmm1, xmm5

-%endmacro

-%macro WRITE_AS_INTS 0

- mov rdi, arg(4) ;Results

- pxor xmm0, xmm0

- movdqa xmm2, xmm1

- punpcklwd xmm1, xmm0

- punpckhwd xmm2, xmm0

- movdqa [rdi], xmm1

- movdqa [rdi + 16], xmm2

-%endmacro

-;void vp9_sad16x16x8_sse4(

-; const unsigned char *src_ptr,

-; int src_stride,

-; const unsigned char *ref_ptr,

-; int ref_stride,

-; unsigned short *sad_array);

-global sym(vp9_sad16x16x8_sse4) PRIVATE

-sym(vp9_sad16x16x8_sse4):

- push rbp

- mov rbp, rsp

- SHADOW_ARGS_TO_STACK 5

- push rsi

- push rdi

- ; end prolog

- mov rsi, arg(0) ;src_ptr

- mov rdi, arg(2) ;ref_ptr

- movsxd rax, dword ptr arg(1) ;src_stride

- movsxd rdx, dword ptr arg(3) ;ref_stride

- PROCESS_16X2X8 1

- PROCESS_16X2X8 0

- WRITE_AS_INTS

- ; begin epilog

- pop rdi

- pop rsi

- UNSHADOW_ARGS

- pop rbp

- ret

-;void vp9_sad16x8x8_sse4(

-; const unsigned char *src_ptr,

-; int src_stride,

-; const unsigned char *ref_ptr,

-; int ref_stride,

-; unsigned short *sad_array

-;);

-global sym(vp9_sad16x8x8_sse4) PRIVATE

-sym(vp9_sad16x8x8_sse4):

- push rbp

- mov rbp, rsp

- SHADOW_ARGS_TO_STACK 5

- push rsi

- push rdi

- ; end prolog

- mov rsi, arg(0) ;src_ptr

- mov rdi, arg(2) ;ref_ptr

- movsxd rax, dword ptr arg(1) ;src_stride

- movsxd rdx, dword ptr arg(3) ;ref_stride

- PROCESS_16X2X8 1

- PROCESS_16X2X8 0

- WRITE_AS_INTS

- ; begin epilog

- pop rdi

- pop rsi

- UNSHADOW_ARGS

- pop rbp

- ret

-;void vp9_sad8x8x8_sse4(

-; const unsigned char *src_ptr,

-; int src_stride,

-; const unsigned char *ref_ptr,

-; int ref_stride,

-; unsigned short *sad_array

-;);

-global sym(vp9_sad8x8x8_sse4) PRIVATE

-sym(vp9_sad8x8x8_sse4):

- push rbp

- mov rbp, rsp

- SHADOW_ARGS_TO_STACK 5

- push rsi

- push rdi

- ; end prolog

- mov rsi, arg(0) ;src_ptr

- mov rdi, arg(2) ;ref_ptr

- movsxd rax, dword ptr arg(1) ;src_stride

- movsxd rdx, dword ptr arg(3) ;ref_stride

- PROCESS_8X2X8 1

- PROCESS_8X2X8 0

- WRITE_AS_INTS

- ; begin epilog

- pop rdi

- pop rsi

- UNSHADOW_ARGS

- pop rbp

- ret

-;void vp9_sad8x16x8_sse4(

-; const unsigned char *src_ptr,

-; int src_stride,

-; const unsigned char *ref_ptr,

-; int ref_stride,

-; unsigned short *sad_array

-;);

-global sym(vp9_sad8x16x8_sse4) PRIVATE

-sym(vp9_sad8x16x8_sse4):

- push rbp

- mov rbp, rsp

- SHADOW_ARGS_TO_STACK 5

- push rsi

- push rdi

- ; end prolog

- mov rsi, arg(0) ;src_ptr

- mov rdi, arg(2) ;ref_ptr

- movsxd rax, dword ptr arg(1) ;src_stride

- movsxd rdx, dword ptr arg(3) ;ref_stride

- PROCESS_8X2X8 1

- PROCESS_8X2X8 0

- WRITE_AS_INTS

- ; begin epilog

- pop rdi

- pop rsi

- UNSHADOW_ARGS

- pop rbp

- ret

-;void vp9_sad4x4x8_c(

-; const unsigned char *src_ptr,

-; int src_stride,

-; const unsigned char *ref_ptr,

-; int ref_stride,

-; unsigned short *sad_array

-;);

-global sym(vp9_sad4x4x8_sse4) PRIVATE

-sym(vp9_sad4x4x8_sse4):

- push rbp

- mov rbp, rsp

- SHADOW_ARGS_TO_STACK 5

- push rsi

- push rdi

- ; end prolog

- mov rsi, arg(0) ;src_ptr

- mov rdi, arg(2) ;ref_ptr

- movsxd rax, dword ptr arg(1) ;src_stride

- movsxd rdx, dword ptr arg(3) ;ref_stride

- PROCESS_4X2X8 1

- PROCESS_4X2X8 0

- WRITE_AS_INTS

- ; begin epilog

- pop rdi

- pop rsi

- UNSHADOW_ARGS

- pop rbp

- ret

« no previous file with comments | « source/libvpx/vp9/encoder/x86/vp9_sad_sse3.asm ('k') | source/libvpx/vp9/encoder/x86/vp9_sad_ssse3.asm » ('j') | no next file with comments »