| Index: libvpx/source/libvpx/vp8/encoder/x86/quantize_ssse3.asm
|
| diff --git a/libvpx/source/libvpx/vp8/encoder/x86/quantize_ssse3.asm b/libvpx/source/libvpx/vp8/encoder/x86/quantize_ssse3.asm
|
| index 912007e02d7682697ce33af594954abb4e3d645e..2f33199e54bfeeb0b5c47e24e746ef3178c844b5 100644
|
| --- a/libvpx/source/libvpx/vp8/encoder/x86/quantize_ssse3.asm
|
| +++ b/libvpx/source/libvpx/vp8/encoder/x86/quantize_ssse3.asm
|
| @@ -9,62 +9,38 @@
|
|
|
|
|
| %include "vpx_ports/x86_abi_support.asm"
|
| -%include "asm_enc_offsets.asm"
|
|
|
|
|
| -; void vp8_fast_quantize_b_ssse3 | arg
|
| -; (BLOCK *b, | 0
|
| -; BLOCKD *d) | 1
|
| +;int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr
|
| +; short *qcoeff_ptr,short *dequant_ptr,
|
| +; short *round_ptr,
|
| +; short *quant_ptr, short *dqcoeff_ptr);
|
| ;
|
| -
|
| -global sym(vp8_fast_quantize_b_ssse3)
|
| -sym(vp8_fast_quantize_b_ssse3):
|
| +global sym(vp8_fast_quantize_b_impl_ssse3)
|
| +sym(vp8_fast_quantize_b_impl_ssse3):
|
| push rbp
|
| mov rbp, rsp
|
| + SHADOW_ARGS_TO_STACK 6
|
| GET_GOT rbx
|
| -
|
| -%if ABI_IS_32BIT
|
| - push rdi
|
| push rsi
|
| -%else
|
| - %ifidn __OUTPUT_FORMAT__,x64
|
| push rdi
|
| - push rsi
|
| - %endif
|
| -%endif
|
| ; end prolog
|
|
|
| -%if ABI_IS_32BIT
|
| - mov rdi, arg(0) ; BLOCK *b
|
| - mov rsi, arg(1) ; BLOCKD *d
|
| -%else
|
| - %ifidn __OUTPUT_FORMAT__,x64
|
| - mov rdi, rcx ; BLOCK *b
|
| - mov rsi, rdx ; BLOCKD *d
|
| - %else
|
| - ;mov rdi, rdi ; BLOCK *b
|
| - ;mov rsi, rsi ; BLOCKD *d
|
| - %endif
|
| -%endif
|
| -
|
| - mov rax, [rdi + vp8_block_coeff]
|
| - mov rcx, [rdi + vp8_block_round]
|
| - mov rdx, [rdi + vp8_block_quant_fast]
|
| -
|
| - ; coeff
|
| - movdqa xmm0, [rax]
|
| - movdqa xmm4, [rax + 16]
|
| -
|
| - ; round
|
| - movdqa xmm2, [rcx]
|
| - movdqa xmm3, [rcx + 16]
|
| + mov rdx, arg(0) ;coeff_ptr
|
| + mov rdi, arg(3) ;round_ptr
|
| + mov rsi, arg(4) ;quant_ptr
|
| +
|
| + movdqa xmm0, [rdx]
|
| + movdqa xmm4, [rdx + 16]
|
| +
|
| + movdqa xmm2, [rdi] ;round lo
|
| + movdqa xmm3, [rdi + 16] ;round hi
|
|
|
| movdqa xmm1, xmm0
|
| movdqa xmm5, xmm4
|
|
|
| - ; sz = z >> 15
|
| - psraw xmm0, 15
|
| - psraw xmm4, 15
|
| + psraw xmm0, 15 ;sign of z (aka sz)
|
| + psraw xmm4, 15 ;sign of z (aka sz)
|
|
|
| pabsw xmm1, xmm1
|
| pabsw xmm5, xmm5
|
| @@ -72,24 +48,23 @@ sym(vp8_fast_quantize_b_ssse3):
|
| paddw xmm1, xmm2
|
| paddw xmm5, xmm3
|
|
|
| - ; quant_fast
|
| - pmulhw xmm1, [rdx]
|
| - pmulhw xmm5, [rdx + 16]
|
| + pmulhw xmm1, [rsi]
|
| + pmulhw xmm5, [rsi + 16]
|
|
|
| - mov rax, [rsi + vp8_blockd_qcoeff]
|
| - mov rdi, [rsi + vp8_blockd_dequant]
|
| - mov rcx, [rsi + vp8_blockd_dqcoeff]
|
| + mov rdi, arg(1) ;qcoeff_ptr
|
| + mov rcx, arg(2) ;dequant_ptr
|
| + mov rsi, arg(5) ;dqcoeff_ptr
|
|
|
| pxor xmm1, xmm0
|
| pxor xmm5, xmm4
|
| psubw xmm1, xmm0
|
| psubw xmm5, xmm4
|
|
|
| - movdqa [rax], xmm1
|
| - movdqa [rax + 16], xmm5
|
| + movdqa [rdi], xmm1
|
| + movdqa [rdi + 16], xmm5
|
|
|
| - movdqa xmm2, [rdi]
|
| - movdqa xmm3, [rdi + 16]
|
| + movdqa xmm2, [rcx]
|
| + movdqa xmm3, [rcx + 16]
|
|
|
| pxor xmm4, xmm4
|
| pmullw xmm2, xmm1
|
| @@ -98,37 +73,38 @@ sym(vp8_fast_quantize_b_ssse3):
|
| pcmpeqw xmm1, xmm4 ;non zero mask
|
| pcmpeqw xmm5, xmm4 ;non zero mask
|
| packsswb xmm1, xmm5
|
| - pshufb xmm1, [GLOBAL(zz_shuf)]
|
| + pshufb xmm1, [ GLOBAL(zz_shuf)]
|
|
|
| pmovmskb edx, xmm1
|
|
|
| +; xor ecx, ecx
|
| +; mov eax, -1
|
| +;find_eob_loop:
|
| +; shr edx, 1
|
| +; jc fq_skip
|
| +; mov eax, ecx
|
| +;fq_skip:
|
| +; inc ecx
|
| +; cmp ecx, 16
|
| +; jne find_eob_loop
|
| xor rdi, rdi
|
| mov eax, -1
|
| xor dx, ax ;flip the bits for bsr
|
| bsr eax, edx
|
|
|
| - movdqa [rcx], xmm2 ;store dqcoeff
|
| - movdqa [rcx + 16], xmm3 ;store dqcoeff
|
| + movdqa [rsi], xmm2 ;store dqcoeff
|
| + movdqa [rsi + 16], xmm3 ;store dqcoeff
|
|
|
| sub edi, edx ;check for all zeros in bit mask
|
| sar edi, 31 ;0 or -1
|
| add eax, 1
|
| and eax, edi ;if the bit mask was all zero,
|
| ;then eob = 0
|
| - mov [rsi + vp8_blockd_eob], eax
|
| -
|
| ; begin epilog
|
| -%if ABI_IS_32BIT
|
| - pop rsi
|
| pop rdi
|
| -%else
|
| - %ifidn __OUTPUT_FORMAT__,x64
|
| pop rsi
|
| - pop rdi
|
| - %endif
|
| -%endif
|
| -
|
| RESTORE_GOT
|
| + UNSHADOW_ARGS
|
| pop rbp
|
| ret
|
|
|
|
|