source/libvpx/vp9/common/x86/vp9_recon_mmx.asm - Issue 11555023: libvpx: Add VP9 decoder.

Unified Diff: source/libvpx/vp9/common/x86/vp9_recon_mmx.asm

Issue 11555023: libvpx: Add VP9 decoder. (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/libvpx/

Patch Set: Created 8 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: source/libvpx/vp9/common/x86/vp9_recon_mmx.asm

===================================================================

--- source/libvpx/vp9/common/x86/vp9_recon_mmx.asm (revision 0)

+++ source/libvpx/vp9/common/x86/vp9_recon_mmx.asm (revision 0)

@@ -0,0 +1,321 @@

+; Use of this source code is governed by a BSD-style license

+; that can be found in the LICENSE file in the root of the source

+; tree. An additional intellectual property rights grant can be found

+; in the file PATENTS. All contributing project authors may

+; be found in the AUTHORS file in the root of the source tree.

+%include "vpx_ports/x86_abi_support.asm"

+;void vp9_recon_b_mmx(unsigned char *s, short *q, unsigned char *d, int stride)

+global sym(vp9_recon_b_mmx)

+sym(vp9_recon_b_mmx):

+ push rbp

+ mov rbp, rsp

+ SHADOW_ARGS_TO_STACK 4

+ push rsi

+ push rdi

+ ; end prolog

+ mov rsi, arg(0) ;s

+ mov rdi, arg(2) ;d

+ mov rdx, arg(1) ;q

+ movsxd rax, dword ptr arg(3) ;stride

+ pxor mm0, mm0

+ movd mm1, [rsi]

+ punpcklbw mm1, mm0

+ paddsw mm1, [rdx]

+ packuswb mm1, mm0 ; pack and unpack to saturate

+ movd [rdi], mm1

+ movd mm2, [rsi+16]

+ punpcklbw mm2, mm0

+ paddsw mm2, [rdx+32]

+ packuswb mm2, mm0 ; pack and unpack to saturate

+ movd [rdi+rax], mm2

+ movd mm3, [rsi+32]

+ punpcklbw mm3, mm0

+ paddsw mm3, [rdx+64]

+ packuswb mm3, mm0 ; pack and unpack to saturate

+ movd [rdi+2*rax], mm3

+ add rdi, rax

+ movd mm4, [rsi+48]

+ punpcklbw mm4, mm0

+ paddsw mm4, [rdx+96]

+ packuswb mm4, mm0 ; pack and unpack to saturate

+ movd [rdi+2*rax], mm4

+ ; begin epilog

+ pop rdi

+ pop rsi

+ UNSHADOW_ARGS

+ pop rbp

+ ret

+;void copy_mem8x8_mmx(

+; unsigned char *src,

+; int src_stride,

+; unsigned char *dst,

+; int dst_stride

+; )

+global sym(vp9_copy_mem8x8_mmx)

+sym(vp9_copy_mem8x8_mmx):

+ push rbp

+ mov rbp, rsp

+ SHADOW_ARGS_TO_STACK 4

+ push rsi

+ push rdi

+ ; end prolog

+ mov rsi, arg(0) ;src;

+ movq mm0, [rsi]

+ movsxd rax, dword ptr arg(1) ;src_stride;

+ mov rdi, arg(2) ;dst;

+ movq mm1, [rsi+rax]

+ movq mm2, [rsi+rax*2]

+ movsxd rcx, dword ptr arg(3) ;dst_stride

+ lea rsi, [rsi+rax*2]

+ movq [rdi], mm0

+ add rsi, rax

+ movq [rdi+rcx], mm1

+ movq [rdi+rcx*2], mm2

+ lea rdi, [rdi+rcx*2]

+ movq mm3, [rsi]

+ add rdi, rcx

+ movq mm4, [rsi+rax]

+ movq mm5, [rsi+rax*2]

+ movq [rdi], mm3

+ lea rsi, [rsi+rax*2]

+ movq [rdi+rcx], mm4

+ movq [rdi+rcx*2], mm5

+ lea rdi, [rdi+rcx*2]

+ movq mm0, [rsi+rax]

+ movq mm1, [rsi+rax*2]

+ movq [rdi+rcx], mm0

+ movq [rdi+rcx*2],mm1

+ ; begin epilog

+ pop rdi

+ pop rsi

+ UNSHADOW_ARGS

+ pop rbp

+ ret

+;void copy_mem8x4_mmx(

+; unsigned char *src,

+; int src_stride,

+; unsigned char *dst,

+; int dst_stride

+; )

+global sym(vp9_copy_mem8x4_mmx)

+sym(vp9_copy_mem8x4_mmx):

+ push rbp

+ mov rbp, rsp

+ SHADOW_ARGS_TO_STACK 4

+ push rsi

+ push rdi

+ ; end prolog

+ mov rsi, arg(0) ;src;

+ movq mm0, [rsi]

+ movsxd rax, dword ptr arg(1) ;src_stride;

+ mov rdi, arg(2) ;dst;

+ movq mm1, [rsi+rax]

+ movq mm2, [rsi+rax*2]

+ movsxd rcx, dword ptr arg(3) ;dst_stride

+ lea rsi, [rsi+rax*2]

+ movq [rdi], mm0

+ movq [rdi+rcx], mm1

+ movq [rdi+rcx*2], mm2

+ lea rdi, [rdi+rcx*2]

+ movq mm3, [rsi+rax]

+ movq [rdi+rcx], mm3

+ ; begin epilog

+ pop rdi

+ pop rsi

+ UNSHADOW_ARGS

+ pop rbp

+ ret

+;void copy_mem16x16_mmx(

+; unsigned char *src,

+; int src_stride,

+; unsigned char *dst,

+; int dst_stride

+; )

+global sym(vp9_copy_mem16x16_mmx)

+sym(vp9_copy_mem16x16_mmx):

+ push rbp

+ mov rbp, rsp

+ SHADOW_ARGS_TO_STACK 4

+ push rsi

+ push rdi

+ ; end prolog

+ mov rsi, arg(0) ;src;

+ movsxd rax, dword ptr arg(1) ;src_stride;

+ mov rdi, arg(2) ;dst;

+ movsxd rcx, dword ptr arg(3) ;dst_stride

+ movq mm0, [rsi]

+ movq mm3, [rsi+8];

+ movq mm1, [rsi+rax]

+ movq mm4, [rsi+rax+8]

+ movq mm2, [rsi+rax*2]

+ movq mm5, [rsi+rax*2+8]

+ lea rsi, [rsi+rax*2]

+ add rsi, rax

+ movq [rdi], mm0

+ movq [rdi+8], mm3

+ movq [rdi+rcx], mm1

+ movq [rdi+rcx+8], mm4

+ movq [rdi+rcx*2], mm2

+ movq [rdi+rcx*2+8], mm5

+ lea rdi, [rdi+rcx*2]

+ add rdi, rcx

+ movq mm0, [rsi]

+ movq mm3, [rsi+8];

+ movq mm1, [rsi+rax]

+ movq mm4, [rsi+rax+8]

+ movq mm2, [rsi+rax*2]

+ movq mm5, [rsi+rax*2+8]

+ lea rsi, [rsi+rax*2]

+ add rsi, rax

+ movq [rdi], mm0

+ movq [rdi+8], mm3

+ movq [rdi+rcx], mm1

+ movq [rdi+rcx+8], mm4

+ movq [rdi+rcx*2], mm2

+ movq [rdi+rcx*2+8], mm5

+ lea rdi, [rdi+rcx*2]

+ add rdi, rcx

+ movq mm0, [rsi]

+ movq mm3, [rsi+8];

+ movq mm1, [rsi+rax]

+ movq mm4, [rsi+rax+8]

+ movq mm2, [rsi+rax*2]

+ movq mm5, [rsi+rax*2+8]

+ lea rsi, [rsi+rax*2]

+ add rsi, rax

+ movq [rdi], mm0

+ movq [rdi+8], mm3

+ movq [rdi+rcx], mm1

+ movq [rdi+rcx+8], mm4

+ movq [rdi+rcx*2], mm2

+ movq [rdi+rcx*2+8], mm5

+ lea rdi, [rdi+rcx*2]

+ add rdi, rcx

+ movq mm0, [rsi]

+ movq mm3, [rsi+8];

+ movq mm1, [rsi+rax]

+ movq mm4, [rsi+rax+8]

+ movq mm2, [rsi+rax*2]

+ movq mm5, [rsi+rax*2+8]

+ lea rsi, [rsi+rax*2]

+ add rsi, rax

+ movq [rdi], mm0

+ movq [rdi+8], mm3

+ movq [rdi+rcx], mm1

+ movq [rdi+rcx+8], mm4

+ movq [rdi+rcx*2], mm2

+ movq [rdi+rcx*2+8], mm5

+ lea rdi, [rdi+rcx*2]

+ add rdi, rcx

+ movq mm0, [rsi]

+ movq mm3, [rsi+8];

+ movq mm1, [rsi+rax]

+ movq mm4, [rsi+rax+8]

+ movq mm2, [rsi+rax*2]

+ movq mm5, [rsi+rax*2+8]

+ lea rsi, [rsi+rax*2]

+ add rsi, rax

+ movq [rdi], mm0

+ movq [rdi+8], mm3

+ movq [rdi+rcx], mm1

+ movq [rdi+rcx+8], mm4

+ movq [rdi+rcx*2], mm2

+ movq [rdi+rcx*2+8], mm5

+ lea rdi, [rdi+rcx*2]

+ add rdi, rcx

+ movq mm0, [rsi]

+ movq mm3, [rsi+8];

+ movq [rdi], mm0

+ movq [rdi+8], mm3

+ ; begin epilog

+ pop rdi

+ pop rsi

+ UNSHADOW_ARGS

+ pop rbp

+ ret

« libvpx.gyp ('K') | « source/libvpx/vp9/common/x86/vp9_postproc_x86.h ('k') | source/libvpx/vp9/common/x86/vp9_recon_sse2.asm » ('j') | no next file with comments »