| Index: source/libvpx/vp9/encoder/x86/vp9_subtract_sse2.asm
|
| ===================================================================
|
| --- source/libvpx/vp9/encoder/x86/vp9_subtract_sse2.asm (revision 0)
|
| +++ source/libvpx/vp9/encoder/x86/vp9_subtract_sse2.asm (revision 0)
|
| @@ -0,0 +1,356 @@
|
| +;
|
| +; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
| +;
|
| +; Use of this source code is governed by a BSD-style license
|
| +; that can be found in the LICENSE file in the root of the source
|
| +; tree. An additional intellectual property rights grant can be found
|
| +; in the file PATENTS. All contributing project authors may
|
| +; be found in the AUTHORS file in the root of the source tree.
|
| +;
|
| +
|
| +
|
| +%include "vpx_ports/x86_abi_support.asm"
|
| +
|
| +;void vp9_subtract_b_sse2_impl(unsigned char *z, int src_stride,
|
| +; short *diff, unsigned char *Predictor,
|
| +; int pitch);
|
| +global sym(vp9_subtract_b_sse2_impl)
|
| +sym(vp9_subtract_b_sse2_impl):
|
| + push rbp
|
| + mov rbp, rsp
|
| + SHADOW_ARGS_TO_STACK 5
|
| + GET_GOT rbx
|
| + push rsi
|
| + push rdi
|
| + ; end prolog
|
| +
|
| + mov rdi, arg(2) ;diff
|
| + mov rax, arg(3) ;Predictor
|
| + mov rsi, arg(0) ;z
|
| + movsxd rdx, dword ptr arg(1);src_stride;
|
| + movsxd rcx, dword ptr arg(4);pitch
|
| + pxor mm7, mm7
|
| +
|
| + movd mm0, [rsi]
|
| + movd mm1, [rax]
|
| + punpcklbw mm0, mm7
|
| + punpcklbw mm1, mm7
|
| + psubw mm0, mm1
|
| + movq MMWORD PTR [rdi], mm0
|
| +
|
| + movd mm0, [rsi+rdx]
|
| + movd mm1, [rax+rcx]
|
| + punpcklbw mm0, mm7
|
| + punpcklbw mm1, mm7
|
| + psubw mm0, mm1
|
| + movq MMWORD PTR [rdi+rcx*2], mm0
|
| +
|
| + movd mm0, [rsi+rdx*2]
|
| + movd mm1, [rax+rcx*2]
|
| + punpcklbw mm0, mm7
|
| + punpcklbw mm1, mm7
|
| + psubw mm0, mm1
|
| + movq MMWORD PTR [rdi+rcx*4], mm0
|
| +
|
| + lea rsi, [rsi+rdx*2]
|
| + lea rcx, [rcx+rcx*2]
|
| +
|
| + movd mm0, [rsi+rdx]
|
| + movd mm1, [rax+rcx]
|
| + punpcklbw mm0, mm7
|
| + punpcklbw mm1, mm7
|
| + psubw mm0, mm1
|
| + movq MMWORD PTR [rdi+rcx*2], mm0
|
| +
|
| + ; begin epilog
|
| + pop rdi
|
| + pop rsi
|
| + RESTORE_GOT
|
| + UNSHADOW_ARGS
|
| + pop rbp
|
| + ret
|
| +
|
| +
|
| +;void vp9_subtract_mby_sse2(short *diff, unsigned char *src, unsigned char *pred, int stride)
|
| +global sym(vp9_subtract_mby_sse2)
|
| +sym(vp9_subtract_mby_sse2):
|
| + push rbp
|
| + mov rbp, rsp
|
| + SHADOW_ARGS_TO_STACK 4
|
| + SAVE_XMM 7
|
| + GET_GOT rbx
|
| + push rsi
|
| + push rdi
|
| + ; end prolog
|
| +
|
| + mov rsi, arg(1) ;src
|
| + mov rdi, arg(0) ;diff
|
| +
|
| + mov rax, arg(2) ;pred
|
| + movsxd rdx, dword ptr arg(3) ;stride
|
| +
|
| + mov rcx, 8 ; do two lines at one time
|
| +
|
| +.submby_loop:
|
| + movdqa xmm0, XMMWORD PTR [rsi] ; src
|
| + movdqa xmm1, XMMWORD PTR [rax] ; pred
|
| +
|
| + movdqa xmm2, xmm0
|
| + psubb xmm0, xmm1
|
| +
|
| + pxor xmm1, [GLOBAL(t80)] ;convert to signed values
|
| + pxor xmm2, [GLOBAL(t80)]
|
| + pcmpgtb xmm1, xmm2 ; obtain sign information
|
| +
|
| + movdqa xmm2, xmm0
|
| + movdqa xmm3, xmm1
|
| + punpcklbw xmm0, xmm1 ; put sign back to subtraction
|
| + punpckhbw xmm2, xmm3 ; put sign back to subtraction
|
| +
|
| + movdqa XMMWORD PTR [rdi], xmm0
|
| + movdqa XMMWORD PTR [rdi +16], xmm2
|
| +
|
| + movdqa xmm4, XMMWORD PTR [rsi + rdx]
|
| + movdqa xmm5, XMMWORD PTR [rax + 16]
|
| +
|
| + movdqa xmm6, xmm4
|
| + psubb xmm4, xmm5
|
| +
|
| + pxor xmm5, [GLOBAL(t80)] ;convert to signed values
|
| + pxor xmm6, [GLOBAL(t80)]
|
| + pcmpgtb xmm5, xmm6 ; obtain sign information
|
| +
|
| + movdqa xmm6, xmm4
|
| + movdqa xmm7, xmm5
|
| + punpcklbw xmm4, xmm5 ; put sign back to subtraction
|
| + punpckhbw xmm6, xmm7 ; put sign back to subtraction
|
| +
|
| + movdqa XMMWORD PTR [rdi +32], xmm4
|
| + movdqa XMMWORD PTR [rdi +48], xmm6
|
| +
|
| + add rdi, 64
|
| + add rax, 32
|
| + lea rsi, [rsi+rdx*2]
|
| +
|
| + sub rcx, 1
|
| + jnz .submby_loop
|
| +
|
| + pop rdi
|
| + pop rsi
|
| + ; begin epilog
|
| + RESTORE_GOT
|
| + RESTORE_XMM
|
| + UNSHADOW_ARGS
|
| + pop rbp
|
| + ret
|
| +
|
| +
|
| +;void vp9_subtract_mbuv_sse2(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride)
|
| +global sym(vp9_subtract_mbuv_sse2)
|
| +sym(vp9_subtract_mbuv_sse2):
|
| + push rbp
|
| + mov rbp, rsp
|
| + SHADOW_ARGS_TO_STACK 5
|
| + GET_GOT rbx
|
| + push rsi
|
| + push rdi
|
| + ; end prolog
|
| +
|
| + mov rdi, arg(0) ;diff
|
| + mov rax, arg(3) ;pred
|
| + mov rsi, arg(1) ;z = usrc
|
| + add rdi, 256*2 ;diff = diff + 256 (shorts)
|
| + add rax, 256 ;Predictor = pred + 256
|
| + movsxd rdx, dword ptr arg(4) ;stride;
|
| + lea rcx, [rdx + rdx*2]
|
| +
|
| + ;u
|
| + ;line 0 1
|
| + movq xmm0, MMWORD PTR [rsi] ; src
|
| + movq xmm2, MMWORD PTR [rsi+rdx]
|
| + movdqa xmm1, XMMWORD PTR [rax] ; pred
|
| + punpcklqdq xmm0, xmm2
|
| +
|
| + movdqa xmm2, xmm0
|
| + psubb xmm0, xmm1 ; subtraction with sign missed
|
| +
|
| + pxor xmm1, [GLOBAL(t80)] ;convert to signed values
|
| + pxor xmm2, [GLOBAL(t80)]
|
| + pcmpgtb xmm1, xmm2 ; obtain sign information
|
| +
|
| + movdqa xmm2, xmm0
|
| + movdqa xmm3, xmm1
|
| + punpcklbw xmm0, xmm1 ; put sign back to subtraction
|
| + punpckhbw xmm2, xmm3 ; put sign back to subtraction
|
| +
|
| + movdqa XMMWORD PTR [rdi], xmm0
|
| + movdqa XMMWORD PTR [rdi +16], xmm2
|
| +
|
| + ;line 2 3
|
| + movq xmm0, MMWORD PTR [rsi+rdx*2] ; src
|
| + movq xmm2, MMWORD PTR [rsi+rcx]
|
| + movdqa xmm1, XMMWORD PTR [rax+16] ; pred
|
| + punpcklqdq xmm0, xmm2
|
| +
|
| + movdqa xmm2, xmm0
|
| + psubb xmm0, xmm1 ; subtraction with sign missed
|
| +
|
| + pxor xmm1, [GLOBAL(t80)] ;convert to signed values
|
| + pxor xmm2, [GLOBAL(t80)]
|
| + pcmpgtb xmm1, xmm2 ; obtain sign information
|
| +
|
| + movdqa xmm2, xmm0
|
| + movdqa xmm3, xmm1
|
| + punpcklbw xmm0, xmm1 ; put sign back to subtraction
|
| + punpckhbw xmm2, xmm3 ; put sign back to subtraction
|
| +
|
| + movdqa XMMWORD PTR [rdi + 32], xmm0
|
| + movdqa XMMWORD PTR [rdi + 48], xmm2
|
| +
|
| + ;line 4 5
|
| + lea rsi, [rsi + rdx*4]
|
| +
|
| + movq xmm0, MMWORD PTR [rsi] ; src
|
| + movq xmm2, MMWORD PTR [rsi+rdx]
|
| + movdqa xmm1, XMMWORD PTR [rax + 32] ; pred
|
| + punpcklqdq xmm0, xmm2
|
| +
|
| + movdqa xmm2, xmm0
|
| + psubb xmm0, xmm1 ; subtraction with sign missed
|
| +
|
| + pxor xmm1, [GLOBAL(t80)] ;convert to signed values
|
| + pxor xmm2, [GLOBAL(t80)]
|
| + pcmpgtb xmm1, xmm2 ; obtain sign information
|
| +
|
| + movdqa xmm2, xmm0
|
| + movdqa xmm3, xmm1
|
| + punpcklbw xmm0, xmm1 ; put sign back to subtraction
|
| + punpckhbw xmm2, xmm3 ; put sign back to subtraction
|
| +
|
| + movdqa XMMWORD PTR [rdi + 64], xmm0
|
| + movdqa XMMWORD PTR [rdi + 80], xmm2
|
| +
|
| + ;line 6 7
|
| + movq xmm0, MMWORD PTR [rsi+rdx*2] ; src
|
| + movq xmm2, MMWORD PTR [rsi+rcx]
|
| + movdqa xmm1, XMMWORD PTR [rax+ 48] ; pred
|
| + punpcklqdq xmm0, xmm2
|
| +
|
| + movdqa xmm2, xmm0
|
| + psubb xmm0, xmm1 ; subtraction with sign missed
|
| +
|
| + pxor xmm1, [GLOBAL(t80)] ;convert to signed values
|
| + pxor xmm2, [GLOBAL(t80)]
|
| + pcmpgtb xmm1, xmm2 ; obtain sign information
|
| +
|
| + movdqa xmm2, xmm0
|
| + movdqa xmm3, xmm1
|
| + punpcklbw xmm0, xmm1 ; put sign back to subtraction
|
| + punpckhbw xmm2, xmm3 ; put sign back to subtraction
|
| +
|
| + movdqa XMMWORD PTR [rdi + 96], xmm0
|
| + movdqa XMMWORD PTR [rdi + 112], xmm2
|
| +
|
| + ;v
|
| + mov rsi, arg(2) ;z = vsrc
|
| + add rdi, 64*2 ;diff = diff + 320 (shorts)
|
| + add rax, 64 ;Predictor = pred + 320
|
| +
|
| + ;line 0 1
|
| + movq xmm0, MMWORD PTR [rsi] ; src
|
| + movq xmm2, MMWORD PTR [rsi+rdx]
|
| + movdqa xmm1, XMMWORD PTR [rax] ; pred
|
| + punpcklqdq xmm0, xmm2
|
| +
|
| + movdqa xmm2, xmm0
|
| + psubb xmm0, xmm1 ; subtraction with sign missed
|
| +
|
| + pxor xmm1, [GLOBAL(t80)] ;convert to signed values
|
| + pxor xmm2, [GLOBAL(t80)]
|
| + pcmpgtb xmm1, xmm2 ; obtain sign information
|
| +
|
| + movdqa xmm2, xmm0
|
| + movdqa xmm3, xmm1
|
| + punpcklbw xmm0, xmm1 ; put sign back to subtraction
|
| + punpckhbw xmm2, xmm3 ; put sign back to subtraction
|
| +
|
| + movdqa XMMWORD PTR [rdi], xmm0
|
| + movdqa XMMWORD PTR [rdi +16], xmm2
|
| +
|
| + ;line 2 3
|
| + movq xmm0, MMWORD PTR [rsi+rdx*2] ; src
|
| + movq xmm2, MMWORD PTR [rsi+rcx]
|
| + movdqa xmm1, XMMWORD PTR [rax+16] ; pred
|
| + punpcklqdq xmm0, xmm2
|
| +
|
| + movdqa xmm2, xmm0
|
| + psubb xmm0, xmm1 ; subtraction with sign missed
|
| +
|
| + pxor xmm1, [GLOBAL(t80)] ;convert to signed values
|
| + pxor xmm2, [GLOBAL(t80)]
|
| + pcmpgtb xmm1, xmm2 ; obtain sign information
|
| +
|
| + movdqa xmm2, xmm0
|
| + movdqa xmm3, xmm1
|
| + punpcklbw xmm0, xmm1 ; put sign back to subtraction
|
| + punpckhbw xmm2, xmm3 ; put sign back to subtraction
|
| +
|
| + movdqa XMMWORD PTR [rdi + 32], xmm0
|
| + movdqa XMMWORD PTR [rdi + 48], xmm2
|
| +
|
| + ;line 4 5
|
| + lea rsi, [rsi + rdx*4]
|
| +
|
| + movq xmm0, MMWORD PTR [rsi] ; src
|
| + movq xmm2, MMWORD PTR [rsi+rdx]
|
| + movdqa xmm1, XMMWORD PTR [rax + 32] ; pred
|
| + punpcklqdq xmm0, xmm2
|
| +
|
| + movdqa xmm2, xmm0
|
| + psubb xmm0, xmm1 ; subtraction with sign missed
|
| +
|
| + pxor xmm1, [GLOBAL(t80)] ;convert to signed values
|
| + pxor xmm2, [GLOBAL(t80)]
|
| + pcmpgtb xmm1, xmm2 ; obtain sign information
|
| +
|
| + movdqa xmm2, xmm0
|
| + movdqa xmm3, xmm1
|
| + punpcklbw xmm0, xmm1 ; put sign back to subtraction
|
| + punpckhbw xmm2, xmm3 ; put sign back to subtraction
|
| +
|
| + movdqa XMMWORD PTR [rdi + 64], xmm0
|
| + movdqa XMMWORD PTR [rdi + 80], xmm2
|
| +
|
| + ;line 6 7
|
| + movq xmm0, MMWORD PTR [rsi+rdx*2] ; src
|
| + movq xmm2, MMWORD PTR [rsi+rcx]
|
| + movdqa xmm1, XMMWORD PTR [rax+ 48] ; pred
|
| + punpcklqdq xmm0, xmm2
|
| +
|
| + movdqa xmm2, xmm0
|
| + psubb xmm0, xmm1 ; subtraction with sign missed
|
| +
|
| + pxor xmm1, [GLOBAL(t80)] ;convert to signed values
|
| + pxor xmm2, [GLOBAL(t80)]
|
| + pcmpgtb xmm1, xmm2 ; obtain sign information
|
| +
|
| + movdqa xmm2, xmm0
|
| + movdqa xmm3, xmm1
|
| + punpcklbw xmm0, xmm1 ; put sign back to subtraction
|
| + punpckhbw xmm2, xmm3 ; put sign back to subtraction
|
| +
|
| + movdqa XMMWORD PTR [rdi + 96], xmm0
|
| + movdqa XMMWORD PTR [rdi + 112], xmm2
|
| +
|
| + ; begin epilog
|
| + pop rdi
|
| + pop rsi
|
| + RESTORE_GOT
|
| + UNSHADOW_ARGS
|
| + pop rbp
|
| + ret
|
| +
|
| +SECTION_RODATA
|
| +align 16
|
| +t80:
|
| + times 16 db 0x80
|
|
|