| Index: source/libvpx/vp8/encoder/x86/quantize_ssse3.asm
|
| ===================================================================
|
| --- source/libvpx/vp8/encoder/x86/quantize_ssse3.asm (revision 96967)
|
| +++ source/libvpx/vp8/encoder/x86/quantize_ssse3.asm (working copy)
|
| @@ -9,38 +9,62 @@
|
|
|
|
|
| %include "vpx_ports/x86_abi_support.asm"
|
| +%include "asm_enc_offsets.asm"
|
|
|
|
|
| -;int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr
|
| -; short *qcoeff_ptr,short *dequant_ptr,
|
| -; short *round_ptr,
|
| -; short *quant_ptr, short *dqcoeff_ptr);
|
| +; void vp8_fast_quantize_b_ssse3 | arg
|
| +; (BLOCK *b, | 0
|
| +; BLOCKD *d) | 1
|
| ;
|
| -global sym(vp8_fast_quantize_b_impl_ssse3)
|
| -sym(vp8_fast_quantize_b_impl_ssse3):
|
| +
|
| +global sym(vp8_fast_quantize_b_ssse3)
|
| +sym(vp8_fast_quantize_b_ssse3):
|
| push rbp
|
| mov rbp, rsp
|
| - SHADOW_ARGS_TO_STACK 6
|
| GET_GOT rbx
|
| +
|
| +%if ABI_IS_32BIT
|
| + push rdi
|
| push rsi
|
| +%else
|
| + %ifidn __OUTPUT_FORMAT__,x64
|
| push rdi
|
| + push rsi
|
| + %endif
|
| +%endif
|
| ; end prolog
|
|
|
| - mov rdx, arg(0) ;coeff_ptr
|
| - mov rdi, arg(3) ;round_ptr
|
| - mov rsi, arg(4) ;quant_ptr
|
| +%if ABI_IS_32BIT
|
| + mov rdi, arg(0) ; BLOCK *b
|
| + mov rsi, arg(1) ; BLOCKD *d
|
| +%else
|
| + %ifidn __OUTPUT_FORMAT__,x64
|
| + mov rdi, rcx ; BLOCK *b
|
| + mov rsi, rdx ; BLOCKD *d
|
| + %else
|
| + ;mov rdi, rdi ; BLOCK *b
|
| + ;mov rsi, rsi ; BLOCKD *d
|
| + %endif
|
| +%endif
|
|
|
| - movdqa xmm0, [rdx]
|
| - movdqa xmm4, [rdx + 16]
|
| + mov rax, [rdi + vp8_block_coeff]
|
| + mov rcx, [rdi + vp8_block_round]
|
| + mov rdx, [rdi + vp8_block_quant_fast]
|
|
|
| - movdqa xmm2, [rdi] ;round lo
|
| - movdqa xmm3, [rdi + 16] ;round hi
|
| + ; coeff
|
| + movdqa xmm0, [rax]
|
| + movdqa xmm4, [rax + 16]
|
|
|
| + ; round
|
| + movdqa xmm2, [rcx]
|
| + movdqa xmm3, [rcx + 16]
|
| +
|
| movdqa xmm1, xmm0
|
| movdqa xmm5, xmm4
|
|
|
| - psraw xmm0, 15 ;sign of z (aka sz)
|
| - psraw xmm4, 15 ;sign of z (aka sz)
|
| + ; sz = z >> 15
|
| + psraw xmm0, 15
|
| + psraw xmm4, 15
|
|
|
| pabsw xmm1, xmm1
|
| pabsw xmm5, xmm5
|
| @@ -48,23 +72,24 @@
|
| paddw xmm1, xmm2
|
| paddw xmm5, xmm3
|
|
|
| - pmulhw xmm1, [rsi]
|
| - pmulhw xmm5, [rsi + 16]
|
| + ; quant_fast
|
| + pmulhw xmm1, [rdx]
|
| + pmulhw xmm5, [rdx + 16]
|
|
|
| - mov rdi, arg(1) ;qcoeff_ptr
|
| - mov rcx, arg(2) ;dequant_ptr
|
| - mov rsi, arg(5) ;dqcoeff_ptr
|
| + mov rax, [rsi + vp8_blockd_qcoeff]
|
| + mov rdi, [rsi + vp8_blockd_dequant]
|
| + mov rcx, [rsi + vp8_blockd_dqcoeff]
|
|
|
| pxor xmm1, xmm0
|
| pxor xmm5, xmm4
|
| psubw xmm1, xmm0
|
| psubw xmm5, xmm4
|
|
|
| - movdqa [rdi], xmm1
|
| - movdqa [rdi + 16], xmm5
|
| + movdqa [rax], xmm1
|
| + movdqa [rax + 16], xmm5
|
|
|
| - movdqa xmm2, [rcx]
|
| - movdqa xmm3, [rcx + 16]
|
| + movdqa xmm2, [rdi]
|
| + movdqa xmm3, [rdi + 16]
|
|
|
| pxor xmm4, xmm4
|
| pmullw xmm2, xmm1
|
| @@ -73,38 +98,37 @@
|
| pcmpeqw xmm1, xmm4 ;non zero mask
|
| pcmpeqw xmm5, xmm4 ;non zero mask
|
| packsswb xmm1, xmm5
|
| - pshufb xmm1, [ GLOBAL(zz_shuf)]
|
| + pshufb xmm1, [GLOBAL(zz_shuf)]
|
|
|
| pmovmskb edx, xmm1
|
|
|
| -; xor ecx, ecx
|
| -; mov eax, -1
|
| -;find_eob_loop:
|
| -; shr edx, 1
|
| -; jc fq_skip
|
| -; mov eax, ecx
|
| -;fq_skip:
|
| -; inc ecx
|
| -; cmp ecx, 16
|
| -; jne find_eob_loop
|
| xor rdi, rdi
|
| mov eax, -1
|
| xor dx, ax ;flip the bits for bsr
|
| bsr eax, edx
|
|
|
| - movdqa [rsi], xmm2 ;store dqcoeff
|
| - movdqa [rsi + 16], xmm3 ;store dqcoeff
|
| + movdqa [rcx], xmm2 ;store dqcoeff
|
| + movdqa [rcx + 16], xmm3 ;store dqcoeff
|
|
|
| sub edi, edx ;check for all zeros in bit mask
|
| sar edi, 31 ;0 or -1
|
| add eax, 1
|
| and eax, edi ;if the bit mask was all zero,
|
| ;then eob = 0
|
| + mov [rsi + vp8_blockd_eob], eax
|
| +
|
| ; begin epilog
|
| +%if ABI_IS_32BIT
|
| + pop rsi
|
| pop rdi
|
| +%else
|
| + %ifidn __OUTPUT_FORMAT__,x64
|
| pop rsi
|
| + pop rdi
|
| + %endif
|
| +%endif
|
| +
|
| RESTORE_GOT
|
| - UNSHADOW_ARGS
|
| pop rbp
|
| ret
|
|
|
|
|