Index: libvpx/source/libvpx/vp8/encoder/x86/quantize_ssse3.asm |
diff --git a/libvpx/source/libvpx/vp8/encoder/x86/quantize_ssse3.asm b/libvpx/source/libvpx/vp8/encoder/x86/quantize_ssse3.asm |
index 912007e02d7682697ce33af594954abb4e3d645e..2f33199e54bfeeb0b5c47e24e746ef3178c844b5 100644 |
--- a/libvpx/source/libvpx/vp8/encoder/x86/quantize_ssse3.asm |
+++ b/libvpx/source/libvpx/vp8/encoder/x86/quantize_ssse3.asm |
@@ -9,62 +9,38 @@ |
%include "vpx_ports/x86_abi_support.asm" |
-%include "asm_enc_offsets.asm" |
-; void vp8_fast_quantize_b_ssse3 | arg |
-; (BLOCK *b, | 0 |
-; BLOCKD *d) | 1 |
+;int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr |
+; short *qcoeff_ptr,short *dequant_ptr, |
+; short *round_ptr, |
+; short *quant_ptr, short *dqcoeff_ptr); |
; |
- |
-global sym(vp8_fast_quantize_b_ssse3) |
-sym(vp8_fast_quantize_b_ssse3): |
+global sym(vp8_fast_quantize_b_impl_ssse3) |
+sym(vp8_fast_quantize_b_impl_ssse3): |
push rbp |
mov rbp, rsp |
+ SHADOW_ARGS_TO_STACK 6 |
GET_GOT rbx |
- |
-%if ABI_IS_32BIT |
- push rdi |
push rsi |
-%else |
- %ifidn __OUTPUT_FORMAT__,x64 |
push rdi |
- push rsi |
- %endif |
-%endif |
; end prolog |
-%if ABI_IS_32BIT |
- mov rdi, arg(0) ; BLOCK *b |
- mov rsi, arg(1) ; BLOCKD *d |
-%else |
- %ifidn __OUTPUT_FORMAT__,x64 |
- mov rdi, rcx ; BLOCK *b |
- mov rsi, rdx ; BLOCKD *d |
- %else |
- ;mov rdi, rdi ; BLOCK *b |
- ;mov rsi, rsi ; BLOCKD *d |
- %endif |
-%endif |
- |
- mov rax, [rdi + vp8_block_coeff] |
- mov rcx, [rdi + vp8_block_round] |
- mov rdx, [rdi + vp8_block_quant_fast] |
- |
- ; coeff |
- movdqa xmm0, [rax] |
- movdqa xmm4, [rax + 16] |
- |
- ; round |
- movdqa xmm2, [rcx] |
- movdqa xmm3, [rcx + 16] |
+ mov rdx, arg(0) ;coeff_ptr |
+ mov rdi, arg(3) ;round_ptr |
+ mov rsi, arg(4) ;quant_ptr |
+ |
+ movdqa xmm0, [rdx] |
+ movdqa xmm4, [rdx + 16] |
+ |
+ movdqa xmm2, [rdi] ;round lo |
+ movdqa xmm3, [rdi + 16] ;round hi |
movdqa xmm1, xmm0 |
movdqa xmm5, xmm4 |
- ; sz = z >> 15 |
- psraw xmm0, 15 |
- psraw xmm4, 15 |
+ psraw xmm0, 15 ;sign of z (aka sz) |
+ psraw xmm4, 15 ;sign of z (aka sz) |
pabsw xmm1, xmm1 |
pabsw xmm5, xmm5 |
@@ -72,24 +48,23 @@ sym(vp8_fast_quantize_b_ssse3): |
paddw xmm1, xmm2 |
paddw xmm5, xmm3 |
- ; quant_fast |
- pmulhw xmm1, [rdx] |
- pmulhw xmm5, [rdx + 16] |
+ pmulhw xmm1, [rsi] |
+ pmulhw xmm5, [rsi + 16] |
- mov rax, [rsi + vp8_blockd_qcoeff] |
- mov rdi, [rsi + vp8_blockd_dequant] |
- mov rcx, [rsi + vp8_blockd_dqcoeff] |
+ mov rdi, arg(1) ;qcoeff_ptr |
+ mov rcx, arg(2) ;dequant_ptr |
+ mov rsi, arg(5) ;dqcoeff_ptr |
pxor xmm1, xmm0 |
pxor xmm5, xmm4 |
psubw xmm1, xmm0 |
psubw xmm5, xmm4 |
- movdqa [rax], xmm1 |
- movdqa [rax + 16], xmm5 |
+ movdqa [rdi], xmm1 |
+ movdqa [rdi + 16], xmm5 |
- movdqa xmm2, [rdi] |
- movdqa xmm3, [rdi + 16] |
+ movdqa xmm2, [rcx] |
+ movdqa xmm3, [rcx + 16] |
pxor xmm4, xmm4 |
pmullw xmm2, xmm1 |
@@ -98,37 +73,38 @@ sym(vp8_fast_quantize_b_ssse3): |
pcmpeqw xmm1, xmm4 ;non zero mask |
pcmpeqw xmm5, xmm4 ;non zero mask |
packsswb xmm1, xmm5 |
- pshufb xmm1, [GLOBAL(zz_shuf)] |
+ pshufb xmm1, [ GLOBAL(zz_shuf)] |
pmovmskb edx, xmm1 |
+; xor ecx, ecx |
+; mov eax, -1 |
+;find_eob_loop: |
+; shr edx, 1 |
+; jc fq_skip |
+; mov eax, ecx |
+;fq_skip: |
+; inc ecx |
+; cmp ecx, 16 |
+; jne find_eob_loop |
xor rdi, rdi |
mov eax, -1 |
xor dx, ax ;flip the bits for bsr |
bsr eax, edx |
- movdqa [rcx], xmm2 ;store dqcoeff |
- movdqa [rcx + 16], xmm3 ;store dqcoeff |
+ movdqa [rsi], xmm2 ;store dqcoeff |
+ movdqa [rsi + 16], xmm3 ;store dqcoeff |
sub edi, edx ;check for all zeros in bit mask |
sar edi, 31 ;0 or -1 |
add eax, 1 |
and eax, edi ;if the bit mask was all zero, |
;then eob = 0 |
- mov [rsi + vp8_blockd_eob], eax |
- |
; begin epilog |
-%if ABI_IS_32BIT |
- pop rsi |
pop rdi |
-%else |
- %ifidn __OUTPUT_FORMAT__,x64 |
pop rsi |
- pop rdi |
- %endif |
-%endif |
- |
RESTORE_GOT |
+ UNSHADOW_ARGS |
pop rbp |
ret |