| Index: source/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
|
| diff --git a/source/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm b/source/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
|
| index 72e01d646c1864492c4d2fc9dc98b6ded4485ea7..c35eb3603ce26d2450fc01fa8b322dcf96931abd 100644
|
| --- a/source/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
|
| +++ b/source/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
|
| @@ -15,6 +15,7 @@ pw_1: times 8 dw 1
|
|
|
| SECTION .text
|
|
|
| +; TODO(yunqingwang)fix quantize_b code for skip=1 case.
|
| %macro QUANTIZE_FN 2
|
| cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
| shift, qcoeff, dqcoeff, dequant, \
|
| @@ -244,11 +245,11 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
| psllw m2, 1
|
| %endif
|
| pxor m5, m5 ; m5 = dedicated zero
|
| - DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, eob
|
| +
|
| lea coeffq, [ coeffq+ncoeffq*2]
|
| - lea iscanq, [ iscanq+ncoeffq*2]
|
| - lea qcoeffq, [ qcoeffq+ncoeffq*2]
|
| - lea dqcoeffq, [dqcoeffq+ncoeffq*2]
|
| + lea r5q, [ r5q+ncoeffq*2]
|
| + lea r3q, [ r3q+ncoeffq*2]
|
| + lea r4q, [r4q+ncoeffq*2]
|
| neg ncoeffq
|
|
|
| ; get DC and first 15 AC coeffs
|
| @@ -266,15 +267,15 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
| pmulhw m13, m11, m2 ; m13 = m11*q>>16
|
| psignw m8, m9 ; m8 = reinsert sign
|
| psignw m13, m10 ; m13 = reinsert sign
|
| - mova [qcoeffq+ncoeffq*2+ 0], m8
|
| - mova [qcoeffq+ncoeffq*2+16], m13
|
| + mova [r3q+ncoeffq*2+ 0], m8
|
| + mova [r3q+ncoeffq*2+16], m13
|
| %ifidn %1, fp_32x32
|
| pabsw m8, m8
|
| pabsw m13, m13
|
| %endif
|
| - pmullw m8, m3 ; dqc[i] = qc[i] * q
|
| + pmullw m8, m3 ; r4[i] = r3[i] * q
|
| punpckhqdq m3, m3
|
| - pmullw m13, m3 ; dqc[i] = qc[i] * q
|
| + pmullw m13, m3 ; r4[i] = r3[i] * q
|
| %ifidn %1, fp_32x32
|
| psrlw m8, 1
|
| psrlw m13, 1
|
| @@ -282,12 +283,12 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
| psignw m13, m10
|
| psrlw m0, m3, 2
|
| %endif
|
| - mova [dqcoeffq+ncoeffq*2+ 0], m8
|
| - mova [dqcoeffq+ncoeffq*2+16], m13
|
| + mova [r4q+ncoeffq*2+ 0], m8
|
| + mova [r4q+ncoeffq*2+16], m13
|
| pcmpeqw m8, m5 ; m8 = c[i] == 0
|
| pcmpeqw m13, m5 ; m13 = c[i] == 0
|
| - mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i]
|
| - mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i]
|
| + mova m6, [ r5q+ncoeffq*2+ 0] ; m6 = scan[i]
|
| + mova m11, [ r5q+ncoeffq*2+16] ; m11 = scan[i]
|
| psubw m6, m7 ; m6 = scan[i] + 1
|
| psubw m11, m7 ; m11 = scan[i] + 1
|
| pandn m8, m6 ; m8 = max(eob)
|
| @@ -318,26 +319,26 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
| pmulhw m13, m11, m2 ; m13 = m11*q>>16
|
| psignw m14, m9 ; m14 = reinsert sign
|
| psignw m13, m10 ; m13 = reinsert sign
|
| - mova [qcoeffq+ncoeffq*2+ 0], m14
|
| - mova [qcoeffq+ncoeffq*2+16], m13
|
| + mova [r3q+ncoeffq*2+ 0], m14
|
| + mova [r3q+ncoeffq*2+16], m13
|
| %ifidn %1, fp_32x32
|
| pabsw m14, m14
|
| pabsw m13, m13
|
| %endif
|
| - pmullw m14, m3 ; dqc[i] = qc[i] * q
|
| - pmullw m13, m3 ; dqc[i] = qc[i] * q
|
| + pmullw m14, m3 ; r4[i] = r3[i] * q
|
| + pmullw m13, m3 ; r4[i] = r3[i] * q
|
| %ifidn %1, fp_32x32
|
| psrlw m14, 1
|
| psrlw m13, 1
|
| psignw m14, m9
|
| psignw m13, m10
|
| %endif
|
| - mova [dqcoeffq+ncoeffq*2+ 0], m14
|
| - mova [dqcoeffq+ncoeffq*2+16], m13
|
| + mova [r4q+ncoeffq*2+ 0], m14
|
| + mova [r4q+ncoeffq*2+16], m13
|
| pcmpeqw m14, m5 ; m14 = c[i] == 0
|
| pcmpeqw m13, m5 ; m13 = c[i] == 0
|
| - mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i]
|
| - mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i]
|
| + mova m6, [ r5q+ncoeffq*2+ 0] ; m6 = scan[i]
|
| + mova m11, [ r5q+ncoeffq*2+16] ; m11 = scan[i]
|
| psubw m6, m7 ; m6 = scan[i] + 1
|
| psubw m11, m7 ; m11 = scan[i] + 1
|
| pandn m14, m6 ; m14 = max(eob)
|
| @@ -350,10 +351,10 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
| %ifidn %1, fp_32x32
|
| jmp .accumulate_eob
|
| .skip_iter:
|
| - mova [qcoeffq+ncoeffq*2+ 0], m5
|
| - mova [qcoeffq+ncoeffq*2+16], m5
|
| - mova [dqcoeffq+ncoeffq*2+ 0], m5
|
| - mova [dqcoeffq+ncoeffq*2+16], m5
|
| + mova [r3q+ncoeffq*2+ 0], m5
|
| + mova [r3q+ncoeffq*2+16], m5
|
| + mova [r4q+ncoeffq*2+ 0], m5
|
| + mova [r4q+ncoeffq*2+16], m5
|
| add ncoeffq, mmsize
|
| jl .ac_only_loop
|
| %endif
|
| @@ -368,7 +369,7 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
| pshuflw m7, m8, 0x1
|
| pmaxsw m8, m7
|
| pextrw r6, m8, 0
|
| - mov [r2], r6
|
| + mov [r2], r6
|
| RET
|
|
|
| ; skip-block, i.e. just write all zeroes
|
| @@ -377,19 +378,19 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
|
| movifnidn ncoeffq, ncoeffmp
|
| mov r2, qcoeffmp
|
| mov r3, eobmp
|
| - DEFINE_ARGS dqcoeff, ncoeff, qcoeff, eob
|
| - lea dqcoeffq, [dqcoeffq+ncoeffq*2]
|
| - lea qcoeffq, [ qcoeffq+ncoeffq*2]
|
| +
|
| + lea r0q, [r0q+ncoeffq*2]
|
| + lea r2q, [r2q+ncoeffq*2]
|
| neg ncoeffq
|
| pxor m7, m7
|
| .blank_loop:
|
| - mova [dqcoeffq+ncoeffq*2+ 0], m7
|
| - mova [dqcoeffq+ncoeffq*2+16], m7
|
| - mova [qcoeffq+ncoeffq*2+ 0], m7
|
| - mova [qcoeffq+ncoeffq*2+16], m7
|
| + mova [r0q+ncoeffq*2+ 0], m7
|
| + mova [r0q+ncoeffq*2+16], m7
|
| + mova [r2q+ncoeffq*2+ 0], m7
|
| + mova [r2q+ncoeffq*2+16], m7
|
| add ncoeffq, mmsize
|
| jl .blank_loop
|
| - mov word [eobq], 0
|
| + mov word [r3q], 0
|
| RET
|
| %endmacro
|
|
|
|
|