source/libvpx/vp9/encoder/x86/vp9_error_sse2.asm - Issue 1124333011: libvpx: Pull from upstream

Keyboard Shortcuts

	File
u :	up to issue
j / k :	jump to file after / before current file
J / K :	jump to next file with a comment after / before current file
	Side-by-side diff
i :	toggle intra-line diffs
e :	expand all comments
c :	collapse all comments
s :	toggle showing all comments
n / p :	next / previous diff chunk or comment
N / P :	next / previous comment
<Up> / <Down> :	next / previous line

	Issue
u :	up to list of issues
j / k :	jump to patch after / before current patch
o / <Enter> :	open current patch in side-by-side view
i :	open current patch in unified diff view

	Issue List
j / k :	jump to issue after / before current issue
o / <Enter> :	open current issue

Unified Diff: source/libvpx/vp9/encoder/x86/vp9_error_sse2.asm

Issue 1124333011: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master

Patch Set: only update to last nights LKGR Created 5 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « source/libvpx/vp9/encoder/x86/vp9_denoiser_sse2.c ('k') | source/libvpx/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: source/libvpx/vp9/encoder/x86/vp9_error_sse2.asm

diff --git a/source/libvpx/vp9/encoder/x86/vp9_error_sse2.asm b/source/libvpx/vp9/encoder/x86/vp9_error_sse2.asm

index 1126fdb61640e40cb820dc3928ed2e3ecbc9553a..56373e897c9d2c41bbda1dfe87a6768e96750783 100644

--- a/source/libvpx/vp9/encoder/x86/vp9_error_sse2.asm

+++ b/source/libvpx/vp9/encoder/x86/vp9_error_sse2.asm

@@ -72,3 +72,49 @@ cglobal block_error, 3, 3, 8, uqc, dqc, size, ssz

movd edx, m5

%endif

RET

+; Compute the sum of squared difference between two int16_t vectors.

+; int64_t vp9_block_error_fp(int16_t *coeff, int16_t *dqcoeff,

+; intptr_t block_size)

+INIT_XMM sse2

+cglobal block_error_fp, 3, 3, 6, uqc, dqc, size

+ pxor m4, m4 ; sse accumulator

+ pxor m5, m5 ; dedicated zero register

+ lea uqcq, [uqcq+sizeq*2]

+ lea dqcq, [dqcq+sizeq*2]

+ neg sizeq

+.loop:

+ mova m2, [uqcq+sizeq*2]

+ mova m0, [dqcq+sizeq*2]

+ mova m3, [uqcq+sizeq*2+mmsize]

+ mova m1, [dqcq+sizeq*2+mmsize]

+ psubw m0, m2

+ psubw m1, m3

+ ; individual errors are max. 15bit+sign, so squares are 30bit, and

+ ; thus the sum of 2 should fit in a 31bit integer (+ unused sign bit)

+ pmaddwd m0, m0

+ pmaddwd m1, m1

+ ; accumulate in 64bit

+ punpckldq m3, m0, m5

+ punpckhdq m0, m5

+ paddq m4, m3

+ punpckldq m3, m1, m5

+ paddq m4, m0

+ punpckhdq m1, m5

+ paddq m4, m3

+ paddq m4, m1

+ add sizeq, mmsize

+ jl .loop

+ ; accumulate horizontally and store in return value

+ movhlps m5, m4

+ paddq m4, m5

+%if ARCH_X86_64

+ movq rax, m4

+%else

+ pshufd m5, m4, 0x1

+ movd eax, m4

+ movd edx, m5

+%endif

+ RET