Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(135)

Unified Diff: source/libvpx/vp9/common/x86/vp9_intrapred_sse2.asm

Issue 1124333011: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: only update to last nights LKGR Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: source/libvpx/vp9/common/x86/vp9_intrapred_sse2.asm
diff --git a/source/libvpx/vp9/common/x86/vp9_intrapred_sse2.asm b/source/libvpx/vp9/common/x86/vp9_intrapred_sse2.asm
index 69b07f6457508bc7d37afc29def550825b864ce1..22b5731886c530979e03716d354ea01b8308ca37 100644
--- a/source/libvpx/vp9/common/x86/vp9_intrapred_sse2.asm
+++ b/source/libvpx/vp9/common/x86/vp9_intrapred_sse2.asm
@@ -15,6 +15,11 @@ pw_4: times 8 dw 4
pw_8: times 8 dw 8
pw_16: times 8 dw 16
pw_32: times 8 dw 32
+dc_128: times 16 db 128
+pw2_4: times 8 dw 2
+pw2_8: times 8 dw 4
+pw2_16: times 8 dw 8
+pw2_32: times 8 dw 16
SECTION .text
@@ -40,6 +45,46 @@ cglobal dc_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset
RET
INIT_MMX sse
+cglobal dc_left_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset
+ GET_GOT goffsetq
+
+ pxor m1, m1
+ movd m0, [leftq]
+ psadbw m0, m1
+ paddw m0, [GLOBAL(pw2_4)]
+ psraw m0, 2
+ pshufw m0, m0, 0x0
+ packuswb m0, m0
+ movd [dstq ], m0
+ movd [dstq+strideq], m0
+ lea dstq, [dstq+strideq*2]
+ movd [dstq ], m0
+ movd [dstq+strideq], m0
+
+ RESTORE_GOT
+ RET
+
+INIT_MMX sse
+cglobal dc_top_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset
+ GET_GOT goffsetq
+
+ pxor m1, m1
+ movd m0, [aboveq]
+ psadbw m0, m1
+ paddw m0, [GLOBAL(pw2_4)]
+ psraw m0, 2
+ pshufw m0, m0, 0x0
+ packuswb m0, m0
+ movd [dstq ], m0
+ movd [dstq+strideq], m0
+ lea dstq, [dstq+strideq*2]
+ movd [dstq ], m0
+ movd [dstq+strideq], m0
+
+ RESTORE_GOT
+ RET
+
+INIT_MMX sse
cglobal dc_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
GET_GOT goffsetq
@@ -68,6 +113,91 @@ cglobal dc_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
RESTORE_GOT
RET
+INIT_MMX sse
+cglobal dc_top_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
+ GET_GOT goffsetq
+
+ pxor m1, m1
+ movq m0, [aboveq]
+ DEFINE_ARGS dst, stride, stride3
+ lea stride3q, [strideq*3]
+ psadbw m0, m1
+ paddw m0, [GLOBAL(pw2_8)]
+ psraw m0, 3
+ pshufw m0, m0, 0x0
+ packuswb m0, m0
+ movq [dstq ], m0
+ movq [dstq+strideq ], m0
+ movq [dstq+strideq*2], m0
+ movq [dstq+stride3q ], m0
+ lea dstq, [dstq+strideq*4]
+ movq [dstq ], m0
+ movq [dstq+strideq ], m0
+ movq [dstq+strideq*2], m0
+ movq [dstq+stride3q ], m0
+
+ RESTORE_GOT
+ RET
+
+INIT_MMX sse
+cglobal dc_left_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
+ GET_GOT goffsetq
+
+ pxor m1, m1
+ movq m0, [leftq]
+ DEFINE_ARGS dst, stride, stride3
+ lea stride3q, [strideq*3]
+ psadbw m0, m1
+ paddw m0, [GLOBAL(pw2_8)]
+ psraw m0, 3
+ pshufw m0, m0, 0x0
+ packuswb m0, m0
+ movq [dstq ], m0
+ movq [dstq+strideq ], m0
+ movq [dstq+strideq*2], m0
+ movq [dstq+stride3q ], m0
+ lea dstq, [dstq+strideq*4]
+ movq [dstq ], m0
+ movq [dstq+strideq ], m0
+ movq [dstq+strideq*2], m0
+ movq [dstq+stride3q ], m0
+
+ RESTORE_GOT
+ RET
+
+INIT_MMX sse
+cglobal dc_128_predictor_4x4, 4, 5, 3, dst, stride, above, left, goffset
+ GET_GOT goffsetq
+
+ DEFINE_ARGS dst, stride, stride3
+ lea stride3q, [strideq*3]
+ movd m0, [GLOBAL(dc_128)]
+ movd [dstq ], m0
+ movd [dstq+strideq ], m0
+ movd [dstq+strideq*2], m0
+ movd [dstq+stride3q ], m0
+ RESTORE_GOT
+ RET
+
+INIT_MMX sse
+cglobal dc_128_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
+ GET_GOT goffsetq
+
+ DEFINE_ARGS dst, stride, stride3
+ lea stride3q, [strideq*3]
+ movq m0, [GLOBAL(dc_128)]
+ movq [dstq ], m0
+ movq [dstq+strideq ], m0
+ movq [dstq+strideq*2], m0
+ movq [dstq+stride3q ], m0
+ lea dstq, [dstq+strideq*4]
+ movq [dstq ], m0
+ movq [dstq+strideq ], m0
+ movq [dstq+strideq*2], m0
+ movq [dstq+stride3q ], m0
+ RESTORE_GOT
+ RET
+
INIT_XMM sse2
cglobal dc_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
GET_GOT goffsetq
@@ -100,6 +230,91 @@ cglobal dc_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
RESTORE_GOT
REP_RET
+
+INIT_XMM sse2
+cglobal dc_top_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
+ GET_GOT goffsetq
+
+ pxor m1, m1
+ pxor m2, m2
+ mova m0, [aboveq]
+ DEFINE_ARGS dst, stride, stride3, lines4
+ lea stride3q, [strideq*3]
+ mov lines4d, 4
+ psadbw m0, m1
+ psadbw m2, m1
+ paddw m0, m2
+ movhlps m2, m0
+ paddw m0, m2
+ paddw m0, [GLOBAL(pw2_16)]
+ psraw m0, 4
+ pshuflw m0, m0, 0x0
+ punpcklqdq m0, m0
+ packuswb m0, m0
+.loop:
+ mova [dstq ], m0
+ mova [dstq+strideq ], m0
+ mova [dstq+strideq*2], m0
+ mova [dstq+stride3q ], m0
+ lea dstq, [dstq+strideq*4]
+ dec lines4d
+ jnz .loop
+
+ RESTORE_GOT
+ REP_RET
+
+INIT_XMM sse2
+cglobal dc_left_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
+ GET_GOT goffsetq
+
+ pxor m1, m1
+ pxor m2, m2
+ mova m0, [leftq]
+ DEFINE_ARGS dst, stride, stride3, lines4
+ lea stride3q, [strideq*3]
+ mov lines4d, 4
+ psadbw m0, m1
+ psadbw m2, m1
+ paddw m0, m2
+ movhlps m2, m0
+ paddw m0, m2
+ paddw m0, [GLOBAL(pw2_16)]
+ psraw m0, 4
+ pshuflw m0, m0, 0x0
+ punpcklqdq m0, m0
+ packuswb m0, m0
+.loop:
+ mova [dstq ], m0
+ mova [dstq+strideq ], m0
+ mova [dstq+strideq*2], m0
+ mova [dstq+stride3q ], m0
+ lea dstq, [dstq+strideq*4]
+ dec lines4d
+ jnz .loop
+
+ RESTORE_GOT
+ REP_RET
+
+INIT_XMM sse2
+cglobal dc_128_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
+ GET_GOT goffsetq
+
+ DEFINE_ARGS dst, stride, stride3, lines4
+ lea stride3q, [strideq*3]
+ mov lines4d, 4
+ mova m0, [GLOBAL(dc_128)]
+.loop:
+ mova [dstq ], m0
+ mova [dstq+strideq ], m0
+ mova [dstq+strideq*2], m0
+ mova [dstq+stride3q ], m0
+ lea dstq, [dstq+strideq*4]
+ dec lines4d
+ jnz .loop
+ RESTORE_GOT
+ RET
+
+
INIT_XMM sse2
cglobal dc_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset
GET_GOT goffsetq
@@ -142,6 +357,101 @@ cglobal dc_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset
RESTORE_GOT
REP_RET
+INIT_XMM sse2
+cglobal dc_top_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset
+ GET_GOT goffsetq
+
+ pxor m1, m1
+ mova m0, [aboveq]
+ mova m2, [aboveq+16]
+ DEFINE_ARGS dst, stride, stride3, lines4
+ lea stride3q, [strideq*3]
+ mov lines4d, 8
+ psadbw m0, m1
+ psadbw m2, m1
+ paddw m0, m2
+ movhlps m2, m0
+ paddw m0, m2
+ paddw m0, [GLOBAL(pw2_32)]
+ psraw m0, 5
+ pshuflw m0, m0, 0x0
+ punpcklqdq m0, m0
+ packuswb m0, m0
+.loop:
+ mova [dstq ], m0
+ mova [dstq +16], m0
+ mova [dstq+strideq ], m0
+ mova [dstq+strideq +16], m0
+ mova [dstq+strideq*2 ], m0
+ mova [dstq+strideq*2+16], m0
+ mova [dstq+stride3q ], m0
+ mova [dstq+stride3q +16], m0
+ lea dstq, [dstq+strideq*4]
+ dec lines4d
+ jnz .loop
+
+ RESTORE_GOT
+ REP_RET
+
+INIT_XMM sse2
+cglobal dc_left_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset
+ GET_GOT goffsetq
+
+ pxor m1, m1
+ mova m0, [leftq]
+ mova m2, [leftq+16]
+ DEFINE_ARGS dst, stride, stride3, lines4
+ lea stride3q, [strideq*3]
+ mov lines4d, 8
+ psadbw m0, m1
+ psadbw m2, m1
+ paddw m0, m2
+ movhlps m2, m0
+ paddw m0, m2
+ paddw m0, [GLOBAL(pw2_32)]
+ psraw m0, 5
+ pshuflw m0, m0, 0x0
+ punpcklqdq m0, m0
+ packuswb m0, m0
+.loop:
+ mova [dstq ], m0
+ mova [dstq +16], m0
+ mova [dstq+strideq ], m0
+ mova [dstq+strideq +16], m0
+ mova [dstq+strideq*2 ], m0
+ mova [dstq+strideq*2+16], m0
+ mova [dstq+stride3q ], m0
+ mova [dstq+stride3q +16], m0
+ lea dstq, [dstq+strideq*4]
+ dec lines4d
+ jnz .loop
+
+ RESTORE_GOT
+ REP_RET
+
+INIT_XMM sse2
+cglobal dc_128_predictor_32x32, 4, 5, 3, dst, stride, above, left, goffset
+ GET_GOT goffsetq
+
+ DEFINE_ARGS dst, stride, stride3, lines4
+ lea stride3q, [strideq*3]
+ mov lines4d, 8
+ mova m0, [GLOBAL(dc_128)]
+.loop:
+ mova [dstq ], m0
+ mova [dstq +16], m0
+ mova [dstq+strideq ], m0
+ mova [dstq+strideq +16], m0
+ mova [dstq+strideq*2 ], m0
+ mova [dstq+strideq*2+16], m0
+ mova [dstq+stride3q ], m0
+ mova [dstq+stride3q +16], m0
+ lea dstq, [dstq+strideq*4]
+ dec lines4d
+ jnz .loop
+ RESTORE_GOT
+ RET
+
INIT_MMX sse
cglobal v_predictor_4x4, 3, 3, 1, dst, stride, above
movd m0, [aboveq]
« no previous file with comments | « source/libvpx/vp9/common/x86/vp9_idct_intrin_ssse3.c ('k') | source/libvpx/vp9/common/x86/vp9_loopfilter_intrin_sse2.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698