Index: source/libvpx/vp9/common/x86/vp9_intrapred_sse2.asm |
diff --git a/source/libvpx/vp9/common/x86/vp9_intrapred_sse2.asm b/source/libvpx/vp9/common/x86/vp9_intrapred_sse2.asm |
index 69b07f6457508bc7d37afc29def550825b864ce1..22b5731886c530979e03716d354ea01b8308ca37 100644 |
--- a/source/libvpx/vp9/common/x86/vp9_intrapred_sse2.asm |
+++ b/source/libvpx/vp9/common/x86/vp9_intrapred_sse2.asm |
@@ -15,6 +15,11 @@ pw_4: times 8 dw 4 |
pw_8: times 8 dw 8 |
pw_16: times 8 dw 16 |
pw_32: times 8 dw 32 |
+dc_128: times 16 db 128 |
+pw2_4: times 8 dw 2 |
+pw2_8: times 8 dw 4 |
+pw2_16: times 8 dw 8 |
+pw2_32: times 8 dw 16 |
SECTION .text |
@@ -40,6 +45,46 @@ cglobal dc_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset |
RET |
INIT_MMX sse |
+cglobal dc_left_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset |
+ GET_GOT goffsetq |
+ |
+ pxor m1, m1 |
+ movd m0, [leftq] |
+ psadbw m0, m1 |
+ paddw m0, [GLOBAL(pw2_4)] |
+ psraw m0, 2 |
+ pshufw m0, m0, 0x0 |
+ packuswb m0, m0 |
+ movd [dstq ], m0 |
+ movd [dstq+strideq], m0 |
+ lea dstq, [dstq+strideq*2] |
+ movd [dstq ], m0 |
+ movd [dstq+strideq], m0 |
+ |
+ RESTORE_GOT |
+ RET |
+ |
+INIT_MMX sse |
+cglobal dc_top_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset |
+ GET_GOT goffsetq |
+ |
+ pxor m1, m1 |
+ movd m0, [aboveq] |
+ psadbw m0, m1 |
+ paddw m0, [GLOBAL(pw2_4)] |
+ psraw m0, 2 |
+ pshufw m0, m0, 0x0 |
+ packuswb m0, m0 |
+ movd [dstq ], m0 |
+ movd [dstq+strideq], m0 |
+ lea dstq, [dstq+strideq*2] |
+ movd [dstq ], m0 |
+ movd [dstq+strideq], m0 |
+ |
+ RESTORE_GOT |
+ RET |
+ |
+INIT_MMX sse |
cglobal dc_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset |
GET_GOT goffsetq |
@@ -68,6 +113,91 @@ cglobal dc_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset |
RESTORE_GOT |
RET |
+INIT_MMX sse |
+cglobal dc_top_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset |
+ GET_GOT goffsetq |
+ |
+ pxor m1, m1 |
+ movq m0, [aboveq] |
+ DEFINE_ARGS dst, stride, stride3 |
+ lea stride3q, [strideq*3] |
+ psadbw m0, m1 |
+ paddw m0, [GLOBAL(pw2_8)] |
+ psraw m0, 3 |
+ pshufw m0, m0, 0x0 |
+ packuswb m0, m0 |
+ movq [dstq ], m0 |
+ movq [dstq+strideq ], m0 |
+ movq [dstq+strideq*2], m0 |
+ movq [dstq+stride3q ], m0 |
+ lea dstq, [dstq+strideq*4] |
+ movq [dstq ], m0 |
+ movq [dstq+strideq ], m0 |
+ movq [dstq+strideq*2], m0 |
+ movq [dstq+stride3q ], m0 |
+ |
+ RESTORE_GOT |
+ RET |
+ |
+INIT_MMX sse |
+cglobal dc_left_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset |
+ GET_GOT goffsetq |
+ |
+ pxor m1, m1 |
+ movq m0, [leftq] |
+ DEFINE_ARGS dst, stride, stride3 |
+ lea stride3q, [strideq*3] |
+ psadbw m0, m1 |
+ paddw m0, [GLOBAL(pw2_8)] |
+ psraw m0, 3 |
+ pshufw m0, m0, 0x0 |
+ packuswb m0, m0 |
+ movq [dstq ], m0 |
+ movq [dstq+strideq ], m0 |
+ movq [dstq+strideq*2], m0 |
+ movq [dstq+stride3q ], m0 |
+ lea dstq, [dstq+strideq*4] |
+ movq [dstq ], m0 |
+ movq [dstq+strideq ], m0 |
+ movq [dstq+strideq*2], m0 |
+ movq [dstq+stride3q ], m0 |
+ |
+ RESTORE_GOT |
+ RET |
+ |
+INIT_MMX sse |
+cglobal dc_128_predictor_4x4, 4, 5, 3, dst, stride, above, left, goffset |
+ GET_GOT goffsetq |
+ |
+ DEFINE_ARGS dst, stride, stride3 |
+ lea stride3q, [strideq*3] |
+ movd m0, [GLOBAL(dc_128)] |
+ movd [dstq ], m0 |
+ movd [dstq+strideq ], m0 |
+ movd [dstq+strideq*2], m0 |
+ movd [dstq+stride3q ], m0 |
+ RESTORE_GOT |
+ RET |
+ |
+INIT_MMX sse |
+cglobal dc_128_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset |
+ GET_GOT goffsetq |
+ |
+ DEFINE_ARGS dst, stride, stride3 |
+ lea stride3q, [strideq*3] |
+ movq m0, [GLOBAL(dc_128)] |
+ movq [dstq ], m0 |
+ movq [dstq+strideq ], m0 |
+ movq [dstq+strideq*2], m0 |
+ movq [dstq+stride3q ], m0 |
+ lea dstq, [dstq+strideq*4] |
+ movq [dstq ], m0 |
+ movq [dstq+strideq ], m0 |
+ movq [dstq+strideq*2], m0 |
+ movq [dstq+stride3q ], m0 |
+ RESTORE_GOT |
+ RET |
+ |
INIT_XMM sse2 |
cglobal dc_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset |
GET_GOT goffsetq |
@@ -100,6 +230,91 @@ cglobal dc_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset |
RESTORE_GOT |
REP_RET |
+ |
+INIT_XMM sse2 |
+cglobal dc_top_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset |
+ GET_GOT goffsetq |
+ |
+ pxor m1, m1 |
+ pxor m2, m2 |
+ mova m0, [aboveq] |
+ DEFINE_ARGS dst, stride, stride3, lines4 |
+ lea stride3q, [strideq*3] |
+ mov lines4d, 4 |
+ psadbw m0, m1 |
+ psadbw m2, m1 |
+ paddw m0, m2 |
+ movhlps m2, m0 |
+ paddw m0, m2 |
+ paddw m0, [GLOBAL(pw2_16)] |
+ psraw m0, 4 |
+ pshuflw m0, m0, 0x0 |
+ punpcklqdq m0, m0 |
+ packuswb m0, m0 |
+.loop: |
+ mova [dstq ], m0 |
+ mova [dstq+strideq ], m0 |
+ mova [dstq+strideq*2], m0 |
+ mova [dstq+stride3q ], m0 |
+ lea dstq, [dstq+strideq*4] |
+ dec lines4d |
+ jnz .loop |
+ |
+ RESTORE_GOT |
+ REP_RET |
+ |
+INIT_XMM sse2 |
+cglobal dc_left_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset |
+ GET_GOT goffsetq |
+ |
+ pxor m1, m1 |
+ pxor m2, m2 |
+ mova m0, [leftq] |
+ DEFINE_ARGS dst, stride, stride3, lines4 |
+ lea stride3q, [strideq*3] |
+ mov lines4d, 4 |
+ psadbw m0, m1 |
+ psadbw m2, m1 |
+ paddw m0, m2 |
+ movhlps m2, m0 |
+ paddw m0, m2 |
+ paddw m0, [GLOBAL(pw2_16)] |
+ psraw m0, 4 |
+ pshuflw m0, m0, 0x0 |
+ punpcklqdq m0, m0 |
+ packuswb m0, m0 |
+.loop: |
+ mova [dstq ], m0 |
+ mova [dstq+strideq ], m0 |
+ mova [dstq+strideq*2], m0 |
+ mova [dstq+stride3q ], m0 |
+ lea dstq, [dstq+strideq*4] |
+ dec lines4d |
+ jnz .loop |
+ |
+ RESTORE_GOT |
+ REP_RET |
+ |
+INIT_XMM sse2 |
+cglobal dc_128_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset |
+ GET_GOT goffsetq |
+ |
+ DEFINE_ARGS dst, stride, stride3, lines4 |
+ lea stride3q, [strideq*3] |
+ mov lines4d, 4 |
+ mova m0, [GLOBAL(dc_128)] |
+.loop: |
+ mova [dstq ], m0 |
+ mova [dstq+strideq ], m0 |
+ mova [dstq+strideq*2], m0 |
+ mova [dstq+stride3q ], m0 |
+ lea dstq, [dstq+strideq*4] |
+ dec lines4d |
+ jnz .loop |
+ RESTORE_GOT |
+ RET |
+ |
+ |
INIT_XMM sse2 |
cglobal dc_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset |
GET_GOT goffsetq |
@@ -142,6 +357,101 @@ cglobal dc_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset |
RESTORE_GOT |
REP_RET |
+INIT_XMM sse2 |
+cglobal dc_top_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset |
+ GET_GOT goffsetq |
+ |
+ pxor m1, m1 |
+ mova m0, [aboveq] |
+ mova m2, [aboveq+16] |
+ DEFINE_ARGS dst, stride, stride3, lines4 |
+ lea stride3q, [strideq*3] |
+ mov lines4d, 8 |
+ psadbw m0, m1 |
+ psadbw m2, m1 |
+ paddw m0, m2 |
+ movhlps m2, m0 |
+ paddw m0, m2 |
+ paddw m0, [GLOBAL(pw2_32)] |
+ psraw m0, 5 |
+ pshuflw m0, m0, 0x0 |
+ punpcklqdq m0, m0 |
+ packuswb m0, m0 |
+.loop: |
+ mova [dstq ], m0 |
+ mova [dstq +16], m0 |
+ mova [dstq+strideq ], m0 |
+ mova [dstq+strideq +16], m0 |
+ mova [dstq+strideq*2 ], m0 |
+ mova [dstq+strideq*2+16], m0 |
+ mova [dstq+stride3q ], m0 |
+ mova [dstq+stride3q +16], m0 |
+ lea dstq, [dstq+strideq*4] |
+ dec lines4d |
+ jnz .loop |
+ |
+ RESTORE_GOT |
+ REP_RET |
+ |
+INIT_XMM sse2 |
+cglobal dc_left_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset |
+ GET_GOT goffsetq |
+ |
+ pxor m1, m1 |
+ mova m0, [leftq] |
+ mova m2, [leftq+16] |
+ DEFINE_ARGS dst, stride, stride3, lines4 |
+ lea stride3q, [strideq*3] |
+ mov lines4d, 8 |
+ psadbw m0, m1 |
+ psadbw m2, m1 |
+ paddw m0, m2 |
+ movhlps m2, m0 |
+ paddw m0, m2 |
+ paddw m0, [GLOBAL(pw2_32)] |
+ psraw m0, 5 |
+ pshuflw m0, m0, 0x0 |
+ punpcklqdq m0, m0 |
+ packuswb m0, m0 |
+.loop: |
+ mova [dstq ], m0 |
+ mova [dstq +16], m0 |
+ mova [dstq+strideq ], m0 |
+ mova [dstq+strideq +16], m0 |
+ mova [dstq+strideq*2 ], m0 |
+ mova [dstq+strideq*2+16], m0 |
+ mova [dstq+stride3q ], m0 |
+ mova [dstq+stride3q +16], m0 |
+ lea dstq, [dstq+strideq*4] |
+ dec lines4d |
+ jnz .loop |
+ |
+ RESTORE_GOT |
+ REP_RET |
+ |
+INIT_XMM sse2 |
+cglobal dc_128_predictor_32x32, 4, 5, 3, dst, stride, above, left, goffset |
+ GET_GOT goffsetq |
+ |
+ DEFINE_ARGS dst, stride, stride3, lines4 |
+ lea stride3q, [strideq*3] |
+ mov lines4d, 8 |
+ mova m0, [GLOBAL(dc_128)] |
+.loop: |
+ mova [dstq ], m0 |
+ mova [dstq +16], m0 |
+ mova [dstq+strideq ], m0 |
+ mova [dstq+strideq +16], m0 |
+ mova [dstq+strideq*2 ], m0 |
+ mova [dstq+strideq*2+16], m0 |
+ mova [dstq+stride3q ], m0 |
+ mova [dstq+stride3q +16], m0 |
+ lea dstq, [dstq+strideq*4] |
+ dec lines4d |
+ jnz .loop |
+ RESTORE_GOT |
+ RET |
+ |
INIT_MMX sse |
cglobal v_predictor_4x4, 3, 3, 1, dst, stride, above |
movd m0, [aboveq] |