| Index: source/libvpx/vp9/common/x86/vp9_subpixel_8t_ssse3.asm
|
| ===================================================================
|
| --- source/libvpx/vp9/common/x86/vp9_subpixel_8t_ssse3.asm (revision 293588)
|
| +++ source/libvpx/vp9/common/x86/vp9_subpixel_8t_ssse3.asm (working copy)
|
| @@ -18,7 +18,7 @@
|
| mov rcx, 0x0400040
|
|
|
| movdqa xmm4, [rdx] ;load filters
|
| - movd xmm5, rcx
|
| + movq xmm5, rcx
|
| packsswb xmm4, xmm4
|
| pshuflw xmm0, xmm4, 0b ;k0_k1
|
| pshuflw xmm1, xmm4, 01010101b ;k2_k3
|
| @@ -661,7 +661,7 @@
|
| mov rcx, 0x0400040
|
|
|
| movdqa xmm4, [rdx] ;load filters
|
| - movd xmm5, rcx
|
| + movq xmm5, rcx
|
| packsswb xmm4, xmm4
|
| pshuflw xmm0, xmm4, 0b ;k0_k1
|
| pshuflw xmm1, xmm4, 01010101b ;k2_k3
|
| @@ -765,40 +765,50 @@
|
|
|
| movq xmm0, [rsi - 3] ;load src data
|
| movq xmm4, [rsi + 5]
|
| - movq xmm7, [rsi + 13]
|
| + movq xmm6, [rsi + 13]
|
| punpcklqdq xmm0, xmm4
|
| - punpcklqdq xmm4, xmm7
|
| + punpcklqdq xmm4, xmm6
|
|
|
| + movdqa xmm7, xmm0
|
| +
|
| + punpcklbw xmm7, xmm7
|
| + punpckhbw xmm0, xmm0
|
| movdqa xmm1, xmm0
|
| movdqa xmm2, xmm0
|
| movdqa xmm3, xmm0
|
| - movdqa xmm5, xmm4
|
| - movdqa xmm6, xmm4
|
| - movdqa xmm7, xmm4
|
|
|
| - pshufb xmm0, [GLOBAL(shuf_t0t1)]
|
| - pshufb xmm1, [GLOBAL(shuf_t2t3)]
|
| - pshufb xmm2, [GLOBAL(shuf_t4t5)]
|
| - pshufb xmm3, [GLOBAL(shuf_t6t7)]
|
| - pshufb xmm4, [GLOBAL(shuf_t0t1)]
|
| - pshufb xmm5, [GLOBAL(shuf_t2t3)]
|
| - pshufb xmm6, [GLOBAL(shuf_t4t5)]
|
| - pshufb xmm7, [GLOBAL(shuf_t6t7)]
|
| -
|
| + palignr xmm0, xmm7, 1
|
| + palignr xmm1, xmm7, 5
|
| pmaddubsw xmm0, k0k1
|
| + palignr xmm2, xmm7, 9
|
| pmaddubsw xmm1, k2k3
|
| + palignr xmm3, xmm7, 13
|
| +
|
| pmaddubsw xmm2, k4k5
|
| pmaddubsw xmm3, k6k7
|
| - pmaddubsw xmm4, k0k1
|
| - pmaddubsw xmm5, k2k3
|
| - pmaddubsw xmm6, k4k5
|
| - pmaddubsw xmm7, k6k7
|
| + paddsw xmm0, xmm3
|
|
|
| - paddsw xmm0, xmm3
|
| + movdqa xmm3, xmm4
|
| + punpcklbw xmm3, xmm3
|
| + punpckhbw xmm4, xmm4
|
| +
|
| + movdqa xmm5, xmm4
|
| + movdqa xmm6, xmm4
|
| + movdqa xmm7, xmm4
|
| +
|
| + palignr xmm4, xmm3, 1
|
| + palignr xmm5, xmm3, 5
|
| + palignr xmm6, xmm3, 9
|
| + palignr xmm7, xmm3, 13
|
| +
|
| movdqa xmm3, xmm1
|
| + pmaddubsw xmm4, k0k1
|
| pmaxsw xmm1, xmm2
|
| + pmaddubsw xmm5, k2k3
|
| pminsw xmm2, xmm3
|
| + pmaddubsw xmm6, k4k5
|
| paddsw xmm0, xmm2
|
| + pmaddubsw xmm7, k6k7
|
| paddsw xmm0, xmm1
|
|
|
| paddsw xmm4, xmm7
|
|
|