Index: source/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm |
diff --git a/source/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm b/source/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm |
index 72e01d646c1864492c4d2fc9dc98b6ded4485ea7..c35eb3603ce26d2450fc01fa8b322dcf96931abd 100644 |
--- a/source/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm |
+++ b/source/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm |
@@ -15,6 +15,7 @@ pw_1: times 8 dw 1 |
SECTION .text |
+; TODO(yunqingwang)fix quantize_b code for skip=1 case. |
%macro QUANTIZE_FN 2 |
cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ |
shift, qcoeff, dqcoeff, dequant, \ |
@@ -244,11 +245,11 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ |
psllw m2, 1 |
%endif |
pxor m5, m5 ; m5 = dedicated zero |
- DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, eob |
+ |
lea coeffq, [ coeffq+ncoeffq*2] |
- lea iscanq, [ iscanq+ncoeffq*2] |
- lea qcoeffq, [ qcoeffq+ncoeffq*2] |
- lea dqcoeffq, [dqcoeffq+ncoeffq*2] |
+ lea r5q, [ r5q+ncoeffq*2] |
+ lea r3q, [ r3q+ncoeffq*2] |
+ lea r4q, [r4q+ncoeffq*2] |
neg ncoeffq |
; get DC and first 15 AC coeffs |
@@ -266,15 +267,15 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ |
pmulhw m13, m11, m2 ; m13 = m11*q>>16 |
psignw m8, m9 ; m8 = reinsert sign |
psignw m13, m10 ; m13 = reinsert sign |
- mova [qcoeffq+ncoeffq*2+ 0], m8 |
- mova [qcoeffq+ncoeffq*2+16], m13 |
+ mova [r3q+ncoeffq*2+ 0], m8 |
+ mova [r3q+ncoeffq*2+16], m13 |
%ifidn %1, fp_32x32 |
pabsw m8, m8 |
pabsw m13, m13 |
%endif |
- pmullw m8, m3 ; dqc[i] = qc[i] * q |
+ pmullw m8, m3 ; r4[i] = r3[i] * q |
punpckhqdq m3, m3 |
- pmullw m13, m3 ; dqc[i] = qc[i] * q |
+ pmullw m13, m3 ; r4[i] = r3[i] * q |
%ifidn %1, fp_32x32 |
psrlw m8, 1 |
psrlw m13, 1 |
@@ -282,12 +283,12 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ |
psignw m13, m10 |
psrlw m0, m3, 2 |
%endif |
- mova [dqcoeffq+ncoeffq*2+ 0], m8 |
- mova [dqcoeffq+ncoeffq*2+16], m13 |
+ mova [r4q+ncoeffq*2+ 0], m8 |
+ mova [r4q+ncoeffq*2+16], m13 |
pcmpeqw m8, m5 ; m8 = c[i] == 0 |
pcmpeqw m13, m5 ; m13 = c[i] == 0 |
- mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i] |
- mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i] |
+ mova m6, [ r5q+ncoeffq*2+ 0] ; m6 = scan[i] |
+ mova m11, [ r5q+ncoeffq*2+16] ; m11 = scan[i] |
psubw m6, m7 ; m6 = scan[i] + 1 |
psubw m11, m7 ; m11 = scan[i] + 1 |
pandn m8, m6 ; m8 = max(eob) |
@@ -318,26 +319,26 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ |
pmulhw m13, m11, m2 ; m13 = m11*q>>16 |
psignw m14, m9 ; m14 = reinsert sign |
psignw m13, m10 ; m13 = reinsert sign |
- mova [qcoeffq+ncoeffq*2+ 0], m14 |
- mova [qcoeffq+ncoeffq*2+16], m13 |
+ mova [r3q+ncoeffq*2+ 0], m14 |
+ mova [r3q+ncoeffq*2+16], m13 |
%ifidn %1, fp_32x32 |
pabsw m14, m14 |
pabsw m13, m13 |
%endif |
- pmullw m14, m3 ; dqc[i] = qc[i] * q |
- pmullw m13, m3 ; dqc[i] = qc[i] * q |
+ pmullw m14, m3 ; r4[i] = r3[i] * q |
+ pmullw m13, m3 ; r4[i] = r3[i] * q |
%ifidn %1, fp_32x32 |
psrlw m14, 1 |
psrlw m13, 1 |
psignw m14, m9 |
psignw m13, m10 |
%endif |
- mova [dqcoeffq+ncoeffq*2+ 0], m14 |
- mova [dqcoeffq+ncoeffq*2+16], m13 |
+ mova [r4q+ncoeffq*2+ 0], m14 |
+ mova [r4q+ncoeffq*2+16], m13 |
pcmpeqw m14, m5 ; m14 = c[i] == 0 |
pcmpeqw m13, m5 ; m13 = c[i] == 0 |
- mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i] |
- mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i] |
+ mova m6, [ r5q+ncoeffq*2+ 0] ; m6 = scan[i] |
+ mova m11, [ r5q+ncoeffq*2+16] ; m11 = scan[i] |
psubw m6, m7 ; m6 = scan[i] + 1 |
psubw m11, m7 ; m11 = scan[i] + 1 |
pandn m14, m6 ; m14 = max(eob) |
@@ -350,10 +351,10 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ |
%ifidn %1, fp_32x32 |
jmp .accumulate_eob |
.skip_iter: |
- mova [qcoeffq+ncoeffq*2+ 0], m5 |
- mova [qcoeffq+ncoeffq*2+16], m5 |
- mova [dqcoeffq+ncoeffq*2+ 0], m5 |
- mova [dqcoeffq+ncoeffq*2+16], m5 |
+ mova [r3q+ncoeffq*2+ 0], m5 |
+ mova [r3q+ncoeffq*2+16], m5 |
+ mova [r4q+ncoeffq*2+ 0], m5 |
+ mova [r4q+ncoeffq*2+16], m5 |
add ncoeffq, mmsize |
jl .ac_only_loop |
%endif |
@@ -368,7 +369,7 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ |
pshuflw m7, m8, 0x1 |
pmaxsw m8, m7 |
pextrw r6, m8, 0 |
- mov [r2], r6 |
+ mov [r2], r6 |
RET |
; skip-block, i.e. just write all zeroes |
@@ -377,19 +378,19 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ |
movifnidn ncoeffq, ncoeffmp |
mov r2, qcoeffmp |
mov r3, eobmp |
- DEFINE_ARGS dqcoeff, ncoeff, qcoeff, eob |
- lea dqcoeffq, [dqcoeffq+ncoeffq*2] |
- lea qcoeffq, [ qcoeffq+ncoeffq*2] |
+ |
+ lea r0q, [r0q+ncoeffq*2] |
+ lea r2q, [r2q+ncoeffq*2] |
neg ncoeffq |
pxor m7, m7 |
.blank_loop: |
- mova [dqcoeffq+ncoeffq*2+ 0], m7 |
- mova [dqcoeffq+ncoeffq*2+16], m7 |
- mova [qcoeffq+ncoeffq*2+ 0], m7 |
- mova [qcoeffq+ncoeffq*2+16], m7 |
+ mova [r0q+ncoeffq*2+ 0], m7 |
+ mova [r0q+ncoeffq*2+16], m7 |
+ mova [r2q+ncoeffq*2+ 0], m7 |
+ mova [r2q+ncoeffq*2+16], m7 |
add ncoeffq, mmsize |
jl .blank_loop |
- mov word [eobq], 0 |
+ mov word [r3q], 0 |
RET |
%endmacro |