| Index: libvpx/source/libvpx/vp8/common/x86/recon_sse2.asm
|
| diff --git a/libvpx/source/libvpx/vp8/common/x86/recon_sse2.asm b/libvpx/source/libvpx/vp8/common/x86/recon_sse2.asm
|
| index 0e23116ce6a34f5c7a5318882ecc7b562b4d034a..4ad3973ecb73505a4956d9ec3eed3232bc9e13b3 100644
|
| --- a/libvpx/source/libvpx/vp8/common/x86/recon_sse2.asm
|
| +++ b/libvpx/source/libvpx/vp8/common/x86/recon_sse2.asm
|
| @@ -67,7 +67,7 @@ sym(vp8_recon4b_sse2):
|
| push rbp
|
| mov rbp, rsp
|
| SHADOW_ARGS_TO_STACK 4
|
| - SAVE_XMM 7
|
| + SAVE_XMM
|
| push rsi
|
| push rdi
|
| ; end prolog
|
| @@ -229,460 +229,3 @@ sym(vp8_copy_mem16x16_sse2):
|
| UNSHADOW_ARGS
|
| pop rbp
|
| ret
|
| -
|
| -
|
| -;void vp8_intra_pred_uv_dc_mmx2(
|
| -; unsigned char *dst,
|
| -; int dst_stride
|
| -; unsigned char *src,
|
| -; int src_stride,
|
| -; )
|
| -global sym(vp8_intra_pred_uv_dc_mmx2)
|
| -sym(vp8_intra_pred_uv_dc_mmx2):
|
| - push rbp
|
| - mov rbp, rsp
|
| - SHADOW_ARGS_TO_STACK 4
|
| - push rsi
|
| - push rdi
|
| - ; end prolog
|
| -
|
| - ; from top
|
| - mov rsi, arg(2) ;src;
|
| - movsxd rax, dword ptr arg(3) ;src_stride;
|
| - sub rsi, rax
|
| - pxor mm0, mm0
|
| - movq mm1, [rsi]
|
| - psadbw mm1, mm0
|
| -
|
| - ; from left
|
| - dec rsi
|
| - lea rdi, [rax*3]
|
| - movzx ecx, byte [rsi+rax]
|
| - movzx edx, byte [rsi+rax*2]
|
| - add ecx, edx
|
| - movzx edx, byte [rsi+rdi]
|
| - add ecx, edx
|
| - lea rsi, [rsi+rax*4]
|
| - movzx edx, byte [rsi]
|
| - add ecx, edx
|
| - movzx edx, byte [rsi+rax]
|
| - add ecx, edx
|
| - movzx edx, byte [rsi+rax*2]
|
| - add ecx, edx
|
| - movzx edx, byte [rsi+rdi]
|
| - add ecx, edx
|
| - movzx edx, byte [rsi+rax*4]
|
| - add ecx, edx
|
| -
|
| - ; add up
|
| - pextrw edx, mm1, 0x0
|
| - lea edx, [edx+ecx+8]
|
| - sar edx, 4
|
| - movd mm1, edx
|
| - pshufw mm1, mm1, 0x0
|
| - packuswb mm1, mm1
|
| -
|
| - ; write out
|
| - mov rdi, arg(0) ;dst;
|
| - movsxd rcx, dword ptr arg(1) ;dst_stride
|
| - lea rax, [rcx*3]
|
| -
|
| - movq [rdi ], mm1
|
| - movq [rdi+rcx ], mm1
|
| - movq [rdi+rcx*2], mm1
|
| - movq [rdi+rax ], mm1
|
| - lea rdi, [rdi+rcx*4]
|
| - movq [rdi ], mm1
|
| - movq [rdi+rcx ], mm1
|
| - movq [rdi+rcx*2], mm1
|
| - movq [rdi+rax ], mm1
|
| -
|
| - ; begin epilog
|
| - pop rdi
|
| - pop rsi
|
| - UNSHADOW_ARGS
|
| - pop rbp
|
| - ret
|
| -
|
| -;void vp8_intra_pred_uv_dctop_mmx2(
|
| -; unsigned char *dst,
|
| -; int dst_stride
|
| -; unsigned char *src,
|
| -; int src_stride,
|
| -; )
|
| -global sym(vp8_intra_pred_uv_dctop_mmx2)
|
| -sym(vp8_intra_pred_uv_dctop_mmx2):
|
| - push rbp
|
| - mov rbp, rsp
|
| - SHADOW_ARGS_TO_STACK 4
|
| - GET_GOT rbx
|
| - push rsi
|
| - push rdi
|
| - ; end prolog
|
| -
|
| - ; from top
|
| - mov rsi, arg(2) ;src;
|
| - movsxd rax, dword ptr arg(3) ;src_stride;
|
| - sub rsi, rax
|
| - pxor mm0, mm0
|
| - movq mm1, [rsi]
|
| - psadbw mm1, mm0
|
| -
|
| - ; add up
|
| - paddw mm1, [GLOBAL(dc_4)]
|
| - psraw mm1, 3
|
| - pshufw mm1, mm1, 0x0
|
| - packuswb mm1, mm1
|
| -
|
| - ; write out
|
| - mov rdi, arg(0) ;dst;
|
| - movsxd rcx, dword ptr arg(1) ;dst_stride
|
| - lea rax, [rcx*3]
|
| -
|
| - movq [rdi ], mm1
|
| - movq [rdi+rcx ], mm1
|
| - movq [rdi+rcx*2], mm1
|
| - movq [rdi+rax ], mm1
|
| - lea rdi, [rdi+rcx*4]
|
| - movq [rdi ], mm1
|
| - movq [rdi+rcx ], mm1
|
| - movq [rdi+rcx*2], mm1
|
| - movq [rdi+rax ], mm1
|
| -
|
| - ; begin epilog
|
| - pop rdi
|
| - pop rsi
|
| - RESTORE_GOT
|
| - UNSHADOW_ARGS
|
| - pop rbp
|
| - ret
|
| -
|
| -;void vp8_intra_pred_uv_dcleft_mmx2(
|
| -; unsigned char *dst,
|
| -; int dst_stride
|
| -; unsigned char *src,
|
| -; int src_stride,
|
| -; )
|
| -global sym(vp8_intra_pred_uv_dcleft_mmx2)
|
| -sym(vp8_intra_pred_uv_dcleft_mmx2):
|
| - push rbp
|
| - mov rbp, rsp
|
| - SHADOW_ARGS_TO_STACK 4
|
| - push rsi
|
| - push rdi
|
| - ; end prolog
|
| -
|
| - ; from left
|
| - mov rsi, arg(2) ;src;
|
| - movsxd rax, dword ptr arg(3) ;src_stride;
|
| - dec rsi
|
| - lea rdi, [rax*3]
|
| - movzx ecx, byte [rsi]
|
| - movzx edx, byte [rsi+rax]
|
| - add ecx, edx
|
| - movzx edx, byte [rsi+rax*2]
|
| - add ecx, edx
|
| - movzx edx, byte [rsi+rdi]
|
| - add ecx, edx
|
| - lea rsi, [rsi+rax*4]
|
| - movzx edx, byte [rsi]
|
| - add ecx, edx
|
| - movzx edx, byte [rsi+rax]
|
| - add ecx, edx
|
| - movzx edx, byte [rsi+rax*2]
|
| - add ecx, edx
|
| - movzx edx, byte [rsi+rdi]
|
| - lea edx, [ecx+edx+4]
|
| -
|
| - ; add up
|
| - shr edx, 3
|
| - movd mm1, edx
|
| - pshufw mm1, mm1, 0x0
|
| - packuswb mm1, mm1
|
| -
|
| - ; write out
|
| - mov rdi, arg(0) ;dst;
|
| - movsxd rcx, dword ptr arg(1) ;dst_stride
|
| - lea rax, [rcx*3]
|
| -
|
| - movq [rdi ], mm1
|
| - movq [rdi+rcx ], mm1
|
| - movq [rdi+rcx*2], mm1
|
| - movq [rdi+rax ], mm1
|
| - lea rdi, [rdi+rcx*4]
|
| - movq [rdi ], mm1
|
| - movq [rdi+rcx ], mm1
|
| - movq [rdi+rcx*2], mm1
|
| - movq [rdi+rax ], mm1
|
| -
|
| - ; begin epilog
|
| - pop rdi
|
| - pop rsi
|
| - UNSHADOW_ARGS
|
| - pop rbp
|
| - ret
|
| -
|
| -;void vp8_intra_pred_uv_dc128_mmx(
|
| -; unsigned char *dst,
|
| -; int dst_stride
|
| -; unsigned char *src,
|
| -; int src_stride,
|
| -; )
|
| -global sym(vp8_intra_pred_uv_dc128_mmx)
|
| -sym(vp8_intra_pred_uv_dc128_mmx):
|
| - push rbp
|
| - mov rbp, rsp
|
| - SHADOW_ARGS_TO_STACK 4
|
| - GET_GOT rbx
|
| - ; end prolog
|
| -
|
| - ; write out
|
| - movq mm1, [GLOBAL(dc_128)]
|
| - mov rax, arg(0) ;dst;
|
| - movsxd rdx, dword ptr arg(1) ;dst_stride
|
| - lea rcx, [rdx*3]
|
| -
|
| - movq [rax ], mm1
|
| - movq [rax+rdx ], mm1
|
| - movq [rax+rdx*2], mm1
|
| - movq [rax+rcx ], mm1
|
| - lea rax, [rax+rdx*4]
|
| - movq [rax ], mm1
|
| - movq [rax+rdx ], mm1
|
| - movq [rax+rdx*2], mm1
|
| - movq [rax+rcx ], mm1
|
| -
|
| - ; begin epilog
|
| - RESTORE_GOT
|
| - UNSHADOW_ARGS
|
| - pop rbp
|
| - ret
|
| -
|
| -;void vp8_intra_pred_uv_tm_sse2(
|
| -; unsigned char *dst,
|
| -; int dst_stride
|
| -; unsigned char *src,
|
| -; int src_stride,
|
| -; )
|
| -%macro vp8_intra_pred_uv_tm 1
|
| -global sym(vp8_intra_pred_uv_tm_%1)
|
| -sym(vp8_intra_pred_uv_tm_%1):
|
| - push rbp
|
| - mov rbp, rsp
|
| - SHADOW_ARGS_TO_STACK 4
|
| - GET_GOT rbx
|
| - push rsi
|
| - push rdi
|
| - ; end prolog
|
| -
|
| - ; read top row
|
| - mov edx, 4
|
| - mov rsi, arg(2) ;src;
|
| - movsxd rax, dword ptr arg(3) ;src_stride;
|
| - sub rsi, rax
|
| - pxor xmm0, xmm0
|
| -%ifidn %1, ssse3
|
| - movdqa xmm2, [GLOBAL(dc_1024)]
|
| -%endif
|
| - movq xmm1, [rsi]
|
| - punpcklbw xmm1, xmm0
|
| -
|
| - ; set up left ptrs ans subtract topleft
|
| - movd xmm3, [rsi-1]
|
| - lea rsi, [rsi+rax-1]
|
| -%ifidn %1, sse2
|
| - punpcklbw xmm3, xmm0
|
| - pshuflw xmm3, xmm3, 0x0
|
| - punpcklqdq xmm3, xmm3
|
| -%else
|
| - pshufb xmm3, xmm2
|
| -%endif
|
| - psubw xmm1, xmm3
|
| -
|
| - ; set up dest ptrs
|
| - mov rdi, arg(0) ;dst;
|
| - movsxd rcx, dword ptr arg(1) ;dst_stride
|
| -
|
| -vp8_intra_pred_uv_tm_%1_loop:
|
| - movd xmm3, [rsi]
|
| - movd xmm5, [rsi+rax]
|
| -%ifidn %1, sse2
|
| - punpcklbw xmm3, xmm0
|
| - punpcklbw xmm5, xmm0
|
| - pshuflw xmm3, xmm3, 0x0
|
| - pshuflw xmm5, xmm5, 0x0
|
| - punpcklqdq xmm3, xmm3
|
| - punpcklqdq xmm5, xmm5
|
| -%else
|
| - pshufb xmm3, xmm2
|
| - pshufb xmm5, xmm2
|
| -%endif
|
| - paddw xmm3, xmm1
|
| - paddw xmm5, xmm1
|
| - packuswb xmm3, xmm5
|
| - movq [rdi ], xmm3
|
| - movhps[rdi+rcx], xmm3
|
| - lea rsi, [rsi+rax*2]
|
| - lea rdi, [rdi+rcx*2]
|
| - dec edx
|
| - jnz vp8_intra_pred_uv_tm_%1_loop
|
| -
|
| - ; begin epilog
|
| - pop rdi
|
| - pop rsi
|
| - RESTORE_GOT
|
| - UNSHADOW_ARGS
|
| - pop rbp
|
| - ret
|
| -%endmacro
|
| -
|
| -vp8_intra_pred_uv_tm sse2
|
| -vp8_intra_pred_uv_tm ssse3
|
| -
|
| -;void vp8_intra_pred_uv_ve_mmx(
|
| -; unsigned char *dst,
|
| -; int dst_stride
|
| -; unsigned char *src,
|
| -; int src_stride,
|
| -; )
|
| -global sym(vp8_intra_pred_uv_ve_mmx)
|
| -sym(vp8_intra_pred_uv_ve_mmx):
|
| - push rbp
|
| - mov rbp, rsp
|
| - SHADOW_ARGS_TO_STACK 4
|
| - ; end prolog
|
| -
|
| - ; read from top
|
| - mov rax, arg(2) ;src;
|
| - movsxd rdx, dword ptr arg(3) ;src_stride;
|
| - sub rax, rdx
|
| - movq mm1, [rax]
|
| -
|
| - ; write out
|
| - mov rax, arg(0) ;dst;
|
| - movsxd rdx, dword ptr arg(1) ;dst_stride
|
| - lea rcx, [rdx*3]
|
| -
|
| - movq [rax ], mm1
|
| - movq [rax+rdx ], mm1
|
| - movq [rax+rdx*2], mm1
|
| - movq [rax+rcx ], mm1
|
| - lea rax, [rax+rdx*4]
|
| - movq [rax ], mm1
|
| - movq [rax+rdx ], mm1
|
| - movq [rax+rdx*2], mm1
|
| - movq [rax+rcx ], mm1
|
| -
|
| - ; begin epilog
|
| - UNSHADOW_ARGS
|
| - pop rbp
|
| - ret
|
| -
|
| -;void vp8_intra_pred_uv_ho_mmx2(
|
| -; unsigned char *dst,
|
| -; int dst_stride
|
| -; unsigned char *src,
|
| -; int src_stride,
|
| -; )
|
| -%macro vp8_intra_pred_uv_ho 1
|
| -global sym(vp8_intra_pred_uv_ho_%1)
|
| -sym(vp8_intra_pred_uv_ho_%1):
|
| - push rbp
|
| - mov rbp, rsp
|
| - SHADOW_ARGS_TO_STACK 4
|
| - push rsi
|
| - push rdi
|
| -%ifidn %1, ssse3
|
| -%ifndef GET_GOT_SAVE_ARG
|
| - push rbx
|
| -%endif
|
| - GET_GOT rbx
|
| -%endif
|
| - ; end prolog
|
| -
|
| - ; read from left and write out
|
| -%ifidn %1, mmx2
|
| - mov edx, 4
|
| -%endif
|
| - mov rsi, arg(2) ;src;
|
| - movsxd rax, dword ptr arg(3) ;src_stride;
|
| - mov rdi, arg(0) ;dst;
|
| - movsxd rcx, dword ptr arg(1) ;dst_stride
|
| -%ifidn %1, ssse3
|
| - lea rdx, [rcx*3]
|
| - movdqa xmm2, [GLOBAL(dc_00001111)]
|
| - lea rbx, [rax*3]
|
| -%endif
|
| - dec rsi
|
| -%ifidn %1, mmx2
|
| -vp8_intra_pred_uv_ho_%1_loop:
|
| - movd mm0, [rsi]
|
| - movd mm1, [rsi+rax]
|
| - punpcklbw mm0, mm0
|
| - punpcklbw mm1, mm1
|
| - pshufw mm0, mm0, 0x0
|
| - pshufw mm1, mm1, 0x0
|
| - movq [rdi ], mm0
|
| - movq [rdi+rcx], mm1
|
| - lea rsi, [rsi+rax*2]
|
| - lea rdi, [rdi+rcx*2]
|
| - dec edx
|
| - jnz vp8_intra_pred_uv_ho_%1_loop
|
| -%else
|
| - movd xmm0, [rsi]
|
| - movd xmm3, [rsi+rax]
|
| - movd xmm1, [rsi+rax*2]
|
| - movd xmm4, [rsi+rbx]
|
| - punpcklbw xmm0, xmm3
|
| - punpcklbw xmm1, xmm4
|
| - pshufb xmm0, xmm2
|
| - pshufb xmm1, xmm2
|
| - movq [rdi ], xmm0
|
| - movhps [rdi+rcx], xmm0
|
| - movq [rdi+rcx*2], xmm1
|
| - movhps [rdi+rdx], xmm1
|
| - lea rsi, [rsi+rax*4]
|
| - lea rdi, [rdi+rcx*4]
|
| - movd xmm0, [rsi]
|
| - movd xmm3, [rsi+rax]
|
| - movd xmm1, [rsi+rax*2]
|
| - movd xmm4, [rsi+rbx]
|
| - punpcklbw xmm0, xmm3
|
| - punpcklbw xmm1, xmm4
|
| - pshufb xmm0, xmm2
|
| - pshufb xmm1, xmm2
|
| - movq [rdi ], xmm0
|
| - movhps [rdi+rcx], xmm0
|
| - movq [rdi+rcx*2], xmm1
|
| - movhps [rdi+rdx], xmm1
|
| -%endif
|
| -
|
| - ; begin epilog
|
| -%ifidn %1, ssse3
|
| - RESTORE_GOT
|
| -%ifndef GET_GOT_SAVE_ARG
|
| - pop rbx
|
| -%endif
|
| -%endif
|
| - pop rdi
|
| - pop rsi
|
| - UNSHADOW_ARGS
|
| - pop rbp
|
| - ret
|
| -%endmacro
|
| -
|
| -vp8_intra_pred_uv_ho mmx2
|
| -vp8_intra_pred_uv_ho ssse3
|
| -
|
| -SECTION_RODATA
|
| -dc_128:
|
| - times 8 db 128
|
| -dc_4:
|
| - times 4 dw 4
|
| -align 16
|
| -dc_1024:
|
| - times 8 dw 0x400
|
| -align 16
|
| -dc_00001111:
|
| - times 8 db 0
|
| - times 8 db 1
|
|
|