| Index: source/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
|
| ===================================================================
|
| --- source/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm (revision 282873)
|
| +++ source/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm (working copy)
|
| @@ -234,21 +234,18 @@
|
| movifnidn quantq, quantmp
|
| mova m1, [roundq] ; m1 = round
|
| mova m2, [quantq] ; m2 = quant
|
| -%ifidn %1, b_32x32
|
| -; TODO(jingning) to be continued with 32x32 quantization process
|
| +%ifidn %1, fp_32x32
|
| pcmpeqw m5, m5
|
| psrlw m5, 15
|
| - paddw m0, m5
|
| paddw m1, m5
|
| - psrlw m0, 1 ; m0 = (m0 + 1) / 2
|
| psrlw m1, 1 ; m1 = (m1 + 1) / 2
|
| %endif
|
| mova m3, [r2q] ; m3 = dequant
|
| mov r3, qcoeffmp
|
| mov r4, dqcoeffmp
|
| mov r5, iscanmp
|
| -%ifidn %1, b_32x32
|
| - psllw m4, 1
|
| +%ifidn %1, fp_32x32
|
| + psllw m2, 1
|
| %endif
|
| pxor m5, m5 ; m5 = dedicated zero
|
| DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, d6, eob
|
| @@ -275,18 +272,19 @@
|
| psignw m13, m10 ; m13 = reinsert sign
|
| mova [qcoeffq+ncoeffq*2+ 0], m8
|
| mova [qcoeffq+ncoeffq*2+16], m13
|
| -%ifidn %1, b_32x32
|
| +%ifidn %1, fp_32x32
|
| pabsw m8, m8
|
| pabsw m13, m13
|
| %endif
|
| pmullw m8, m3 ; dqc[i] = qc[i] * q
|
| punpckhqdq m3, m3
|
| pmullw m13, m3 ; dqc[i] = qc[i] * q
|
| -%ifidn %1, b_32x32
|
| +%ifidn %1, fp_32x32
|
| psrlw m8, 1
|
| psrlw m13, 1
|
| psignw m8, m9
|
| psignw m13, m10
|
| + psrlw m0, m3, 2
|
| %endif
|
| mova [dqcoeffq+ncoeffq*2+ 0], m8
|
| mova [dqcoeffq+ncoeffq*2+16], m13
|
| @@ -307,13 +305,17 @@
|
| mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
|
| pabsw m6, m9 ; m6 = abs(m9)
|
| pabsw m11, m10 ; m11 = abs(m10)
|
| - pcmpeqw m7, m7
|
| -%ifidn %1, b_32x32
|
| +%ifidn %1, fp_32x32
|
| + pcmpgtw m7, m6, m0
|
| + pcmpgtw m12, m11, m0
|
| pmovmskb r6, m7
|
| - pmovmskb r2, m7
|
| + pmovmskb r2, m12
|
| +
|
| or r6, r2
|
| jz .skip_iter
|
| %endif
|
| + pcmpeqw m7, m7
|
| +
|
| paddsw m6, m1 ; m6 += round
|
| paddsw m11, m1 ; m11 += round
|
| pmulhw m14, m6, m2 ; m14 = m6*q>>16
|
| @@ -322,13 +324,13 @@
|
| psignw m13, m10 ; m13 = reinsert sign
|
| mova [qcoeffq+ncoeffq*2+ 0], m14
|
| mova [qcoeffq+ncoeffq*2+16], m13
|
| -%ifidn %1, b_32x32
|
| +%ifidn %1, fp_32x32
|
| pabsw m14, m14
|
| pabsw m13, m13
|
| %endif
|
| pmullw m14, m3 ; dqc[i] = qc[i] * q
|
| pmullw m13, m3 ; dqc[i] = qc[i] * q
|
| -%ifidn %1, b_32x32
|
| +%ifidn %1, fp_32x32
|
| psrlw m14, 1
|
| psrlw m13, 1
|
| psignw m14, m9
|
| @@ -349,7 +351,7 @@
|
| add ncoeffq, mmsize
|
| jl .ac_only_loop
|
|
|
| -%ifidn %1, b_32x32
|
| +%ifidn %1, fp_32x32
|
| jmp .accumulate_eob
|
| .skip_iter:
|
| mova [qcoeffq+ncoeffq*2+ 0], m5
|
| @@ -397,3 +399,4 @@
|
|
|
| INIT_XMM ssse3
|
| QUANTIZE_FP fp, 7
|
| +QUANTIZE_FP fp_32x32, 7
|
|
|