Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2137)

Unified Diff: source/libvpx/vp9/encoder/x86/vp9_error_sse2.asm

Issue 1124333011: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: only update to last nights LKGR Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: source/libvpx/vp9/encoder/x86/vp9_error_sse2.asm
diff --git a/source/libvpx/vp9/encoder/x86/vp9_error_sse2.asm b/source/libvpx/vp9/encoder/x86/vp9_error_sse2.asm
index 1126fdb61640e40cb820dc3928ed2e3ecbc9553a..56373e897c9d2c41bbda1dfe87a6768e96750783 100644
--- a/source/libvpx/vp9/encoder/x86/vp9_error_sse2.asm
+++ b/source/libvpx/vp9/encoder/x86/vp9_error_sse2.asm
@@ -72,3 +72,49 @@ cglobal block_error, 3, 3, 8, uqc, dqc, size, ssz
movd edx, m5
%endif
RET
+
+; Compute the sum of squared difference between two int16_t vectors.
+; int64_t vp9_block_error_fp(int16_t *coeff, int16_t *dqcoeff,
+; intptr_t block_size)
+
+INIT_XMM sse2
+cglobal block_error_fp, 3, 3, 6, uqc, dqc, size
+ pxor m4, m4 ; sse accumulator
+ pxor m5, m5 ; dedicated zero register
+ lea uqcq, [uqcq+sizeq*2]
+ lea dqcq, [dqcq+sizeq*2]
+ neg sizeq
+.loop:
+ mova m2, [uqcq+sizeq*2]
+ mova m0, [dqcq+sizeq*2]
+ mova m3, [uqcq+sizeq*2+mmsize]
+ mova m1, [dqcq+sizeq*2+mmsize]
+ psubw m0, m2
+ psubw m1, m3
+ ; individual errors are max. 15bit+sign, so squares are 30bit, and
+ ; thus the sum of 2 should fit in a 31bit integer (+ unused sign bit)
+ pmaddwd m0, m0
+ pmaddwd m1, m1
+ ; accumulate in 64bit
+ punpckldq m3, m0, m5
+ punpckhdq m0, m5
+ paddq m4, m3
+ punpckldq m3, m1, m5
+ paddq m4, m0
+ punpckhdq m1, m5
+ paddq m4, m3
+ paddq m4, m1
+ add sizeq, mmsize
+ jl .loop
+
+ ; accumulate horizontally and store in return value
+ movhlps m5, m4
+ paddq m4, m5
+%if ARCH_X86_64
+ movq rax, m4
+%else
+ pshufd m5, m4, 0x1
+ movd eax, m4
+ movd edx, m5
+%endif
+ RET
« no previous file with comments | « source/libvpx/vp9/encoder/x86/vp9_denoiser_sse2.c ('k') | source/libvpx/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698