| Index: libvpx/source/libvpx/vp8/common/x86/subpixel_ssse3.asm
|
| diff --git a/libvpx/source/libvpx/vp8/common/x86/subpixel_ssse3.asm b/libvpx/source/libvpx/vp8/common/x86/subpixel_ssse3.asm
|
| index 1ddbc54bd6ae99466f825ea40eb813ceb294e68a..7f6fd93e4eb8a30b3802b818b746c3d5a755eec0 100644
|
| --- a/libvpx/source/libvpx/vp8/common/x86/subpixel_ssse3.asm
|
| +++ b/libvpx/source/libvpx/vp8/common/x86/subpixel_ssse3.asm
|
| @@ -39,7 +39,6 @@ sym(vp8_filter_block1d8_h6_ssse3):
|
| push rbp
|
| mov rbp, rsp
|
| SHADOW_ARGS_TO_STACK 6
|
| - SAVE_XMM 7
|
| GET_GOT rbx
|
| push rsi
|
| push rdi
|
| @@ -108,7 +107,6 @@ filter_block1d8_h6_rowloop_ssse3:
|
| pop rdi
|
| pop rsi
|
| RESTORE_GOT
|
| - RESTORE_XMM
|
| UNSHADOW_ARGS
|
| pop rbp
|
| ret
|
| @@ -164,7 +162,6 @@ filter_block1d8_h4_rowloop_ssse3:
|
| pop rdi
|
| pop rsi
|
| RESTORE_GOT
|
| - RESTORE_XMM
|
| UNSHADOW_ARGS
|
| pop rbp
|
| ret
|
| @@ -182,7 +179,7 @@ sym(vp8_filter_block1d16_h6_ssse3):
|
| push rbp
|
| mov rbp, rsp
|
| SHADOW_ARGS_TO_STACK 6
|
| - SAVE_XMM 7
|
| + SAVE_XMM
|
| GET_GOT rbx
|
| push rsi
|
| push rdi
|
| @@ -197,6 +194,10 @@ sym(vp8_filter_block1d16_h6_ssse3):
|
|
|
| mov rdi, arg(2) ;output_ptr
|
|
|
| +;;
|
| +;; cmp esi, DWORD PTR [rax]
|
| +;; je vp8_filter_block1d16_h4_ssse3
|
| +
|
| mov rsi, arg(0) ;src_ptr
|
|
|
| movdqa xmm4, XMMWORD PTR [rax] ;k0_k5
|
| @@ -270,7 +271,61 @@ filter_block1d16_h6_rowloop_ssse3:
|
| pop rdi
|
| pop rsi
|
| RESTORE_GOT
|
| - RESTORE_XMM
|
| + UNSHADOW_ARGS
|
| + pop rbp
|
| + ret
|
| +
|
| +vp8_filter_block1d16_h4_ssse3:
|
| + movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
|
| + movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
|
| +
|
| + mov rsi, arg(0) ;src_ptr
|
| + movsxd rax, dword ptr arg(1) ;src_pixels_per_line
|
| + movsxd rcx, dword ptr arg(4) ;output_height
|
| + movsxd rdx, dword ptr arg(3) ;output_pitch
|
| +
|
| +filter_block1d16_h4_rowloop_ssse3:
|
| + movdqu xmm1, XMMWORD PTR [rsi - 2]
|
| +
|
| + movdqa xmm2, xmm1
|
| + pshufb xmm1, [GLOBAL(shuf2b)]
|
| + pshufb xmm2, [GLOBAL(shuf3b)]
|
| + pmaddubsw xmm1, xmm5
|
| +
|
| + movdqu xmm3, XMMWORD PTR [rsi + 6]
|
| +
|
| + pmaddubsw xmm2, xmm6
|
| + movdqa xmm0, xmm3
|
| + pshufb xmm3, [GLOBAL(shuf3b)]
|
| + pshufb xmm0, [GLOBAL(shuf2b)]
|
| +
|
| + paddsw xmm1, [GLOBAL(rd)]
|
| + paddsw xmm1, xmm2
|
| +
|
| + pmaddubsw xmm0, xmm5
|
| + pmaddubsw xmm3, xmm6
|
| +
|
| + psraw xmm1, 7
|
| + packuswb xmm1, xmm1
|
| + lea rsi, [rsi + rax]
|
| + paddsw xmm3, xmm0
|
| + paddsw xmm3, [GLOBAL(rd)]
|
| + psraw xmm3, 7
|
| + packuswb xmm3, xmm3
|
| +
|
| + punpcklqdq xmm1, xmm3
|
| +
|
| + movdqa XMMWORD Ptr [rdi], xmm1
|
| +
|
| + add rdi, rdx
|
| + dec rcx
|
| + jnz filter_block1d16_h4_rowloop_ssse3
|
| +
|
| +
|
| + ; begin epilog
|
| + pop rdi
|
| + pop rsi
|
| + RESTORE_GOT
|
| UNSHADOW_ARGS
|
| pop rbp
|
| ret
|
| @@ -289,7 +344,6 @@ sym(vp8_filter_block1d4_h6_ssse3):
|
| push rbp
|
| mov rbp, rsp
|
| SHADOW_ARGS_TO_STACK 6
|
| - SAVE_XMM 7
|
| GET_GOT rbx
|
| push rsi
|
| push rdi
|
| @@ -397,7 +451,6 @@ filter_block1d4_h4_rowloop_ssse3:
|
| pop rdi
|
| pop rsi
|
| RESTORE_GOT
|
| - RESTORE_XMM
|
| UNSHADOW_ARGS
|
| pop rbp
|
| ret
|
| @@ -418,7 +471,6 @@ sym(vp8_filter_block1d16_v6_ssse3):
|
| push rbp
|
| mov rbp, rsp
|
| SHADOW_ARGS_TO_STACK 6
|
| - SAVE_XMM 7
|
| GET_GOT rbx
|
| push rsi
|
| push rdi
|
| @@ -514,7 +566,6 @@ vp8_filter_block1d16_v6_ssse3_loop:
|
| pop rdi
|
| pop rsi
|
| RESTORE_GOT
|
| - RESTORE_XMM
|
| UNSHADOW_ARGS
|
| pop rbp
|
| ret
|
| @@ -587,7 +638,6 @@ vp8_filter_block1d16_v4_ssse3_loop:
|
| pop rdi
|
| pop rsi
|
| RESTORE_GOT
|
| - RESTORE_XMM
|
| UNSHADOW_ARGS
|
| pop rbp
|
| ret
|
| @@ -606,7 +656,6 @@ sym(vp8_filter_block1d8_v6_ssse3):
|
| push rbp
|
| mov rbp, rsp
|
| SHADOW_ARGS_TO_STACK 6
|
| - SAVE_XMM 7
|
| GET_GOT rbx
|
| push rsi
|
| push rdi
|
| @@ -679,7 +728,6 @@ vp8_filter_block1d8_v6_ssse3_loop:
|
| pop rdi
|
| pop rsi
|
| RESTORE_GOT
|
| - RESTORE_XMM
|
| UNSHADOW_ARGS
|
| pop rbp
|
| ret
|
| @@ -728,7 +776,6 @@ vp8_filter_block1d8_v4_ssse3_loop:
|
| pop rdi
|
| pop rsi
|
| RESTORE_GOT
|
| - RESTORE_XMM
|
| UNSHADOW_ARGS
|
| pop rbp
|
| ret
|
| @@ -885,7 +932,7 @@ sym(vp8_bilinear_predict16x16_ssse3):
|
| push rbp
|
| mov rbp, rsp
|
| SHADOW_ARGS_TO_STACK 6
|
| - SAVE_XMM 7
|
| + SAVE_XMM
|
| GET_GOT rbx
|
| push rsi
|
| push rdi
|
| @@ -1148,7 +1195,7 @@ sym(vp8_bilinear_predict8x8_ssse3):
|
| push rbp
|
| mov rbp, rsp
|
| SHADOW_ARGS_TO_STACK 6
|
| - SAVE_XMM 7
|
| + SAVE_XMM
|
| GET_GOT rbx
|
| push rsi
|
| push rdi
|
|
|