| Index: source/libvpx/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm
|
| diff --git a/source/libvpx/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm b/source/libvpx/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm
|
| index 28458dcdd52291dd18d2717958672008e6a4ee2a..3a29aba6f27839f6133b01ddbdac711a275a702a 100644
|
| --- a/source/libvpx/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm
|
| +++ b/source/libvpx/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm
|
| @@ -179,4 +179,77 @@ cglobal fdct8x8, 3, 5, 13, input, output, stride
|
| mova [outputq + 112], m7
|
|
|
| RET
|
| +
|
| +%macro HMD8_1D 0
|
| + psubw m8, m0, m1
|
| + psubw m9, m2, m3
|
| + paddw m0, m1
|
| + paddw m2, m3
|
| + SWAP 1, 8
|
| + SWAP 3, 9
|
| + psubw m8, m4, m5
|
| + psubw m9, m6, m7
|
| + paddw m4, m5
|
| + paddw m6, m7
|
| + SWAP 5, 8
|
| + SWAP 7, 9
|
| +
|
| + psubw m8, m0, m2
|
| + psubw m9, m1, m3
|
| + paddw m0, m2
|
| + paddw m1, m3
|
| + SWAP 2, 8
|
| + SWAP 3, 9
|
| + psubw m8, m4, m6
|
| + psubw m9, m5, m7
|
| + paddw m4, m6
|
| + paddw m5, m7
|
| + SWAP 6, 8
|
| + SWAP 7, 9
|
| +
|
| + psubw m8, m0, m4
|
| + psubw m9, m1, m5
|
| + paddw m0, m4
|
| + paddw m1, m5
|
| + SWAP 4, 8
|
| + SWAP 5, 9
|
| + psubw m8, m2, m6
|
| + psubw m9, m3, m7
|
| + paddw m2, m6
|
| + paddw m3, m7
|
| + SWAP 6, 8
|
| + SWAP 7, 9
|
| +%endmacro
|
| +
|
| +INIT_XMM ssse3
|
| +cglobal hadamard_8x8, 3, 5, 10, input, stride, output
|
| + lea r3, [2 * strideq]
|
| + lea r4, [4 * strideq]
|
| +
|
| + mova m0, [inputq]
|
| + mova m1, [inputq + r3]
|
| + lea inputq, [inputq + r4]
|
| + mova m2, [inputq]
|
| + mova m3, [inputq + r3]
|
| + lea inputq, [inputq + r4]
|
| + mova m4, [inputq]
|
| + mova m5, [inputq + r3]
|
| + lea inputq, [inputq + r4]
|
| + mova m6, [inputq]
|
| + mova m7, [inputq + r3]
|
| +
|
| + HMD8_1D
|
| + TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
|
| + HMD8_1D
|
| +
|
| + mova [outputq + 0], m0
|
| + mova [outputq + 16], m1
|
| + mova [outputq + 32], m2
|
| + mova [outputq + 48], m3
|
| + mova [outputq + 64], m4
|
| + mova [outputq + 80], m5
|
| + mova [outputq + 96], m6
|
| + mova [outputq + 112], m7
|
| +
|
| + RET
|
| %endif
|
|
|