source/libvpx/third_party/libyuv/source/compare_win.cc - Issue 996503002: libvpx: Pull from upstream

Unified Diff: source/libvpx/third_party/libyuv/source/compare_win.cc

Issue 996503002: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master

Patch Set: Created 5 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « source/libvpx/third_party/libyuv/source/compare_posix.cc ('k') | source/libvpx/third_party/libyuv/source/convert.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: source/libvpx/third_party/libyuv/source/compare_win.cc

diff --git a/source/libvpx/third_party/libyuv/source/compare_win.cc b/source/libvpx/third_party/libyuv/source/compare_win.cc

index 99831651f5f58d723dc044dd0f447bc13dcfe7a1..e99009a21dff24c63f341f9ef3344068feffaeb0 100644

--- a/source/libvpx/third_party/libyuv/source/compare_win.cc

+++ b/source/libvpx/third_party/libyuv/source/compare_win.cc

@@ -27,13 +27,11 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {

pxor xmm0, xmm0

pxor xmm5, xmm5

- align 4

wloop:

- movdqa xmm1, [eax]

+ movdqu xmm1, [eax]

lea eax, [eax + 16]

- movdqa xmm2, [edx]

+ movdqu xmm2, [edx]

lea edx, [edx + 16]

- sub ecx, 16

movdqa xmm3, xmm1 // abs trick

psubusb xmm1, xmm2

psubusb xmm2, xmm3

@@ -45,6 +43,7 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {

pmaddwd xmm2, xmm2

paddd xmm0, xmm1

paddd xmm0, xmm2

+ sub ecx, 16

jg wloop

pshufd xmm1, xmm0, 0xee

@@ -70,12 +69,10 @@ uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {

vpxor ymm5, ymm5, ymm5 // constant 0 for unpck

sub edx, eax

- align 4

wloop:

vmovdqu ymm1, [eax]

vmovdqu ymm2, [eax + edx]

lea eax, [eax + 32]

- sub ecx, 32

vpsubusb ymm3, ymm1, ymm2 // abs difference trick

vpsubusb ymm2, ymm2, ymm1

vpor ymm1, ymm2, ymm3

@@ -85,6 +82,7 @@ uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {

vpmaddwd ymm1, ymm1, ymm1

vpaddd ymm0, ymm0, ymm1

vpaddd ymm0, ymm0, ymm2

+ sub ecx, 32

jg wloop

vpshufd ymm1, ymm0, 0xee // 3, 2 + 1, 0 both lanes.

@@ -145,7 +143,6 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {

pxor xmm7, xmm7 // constant 0 for unpck

movdqa xmm6, kHash16x33

- align 4

wloop:

movdqu xmm1, [eax] // src[0-15]

lea eax, [eax + 16]

@@ -170,7 +167,6 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {

pmulld(0xcd) // pmulld xmm1, xmm5

paddd xmm3, xmm4 // add 16 results

paddd xmm1, xmm2

- sub ecx, 16

paddd xmm1, xmm3

pshufd xmm2, xmm1, 0x0e // upper 2 dwords

@@ -178,6 +174,7 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {

pshufd xmm2, xmm1, 0x01

paddd xmm1, xmm2

paddd xmm0, xmm1

+ sub ecx, 16

jg wloop

movd eax, xmm0 // return hash

@@ -195,7 +192,6 @@ uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {

movd xmm0, [esp + 12] // seed

movdqa xmm6, kHash16x33

- align 4

wloop:

vpmovzxbd xmm3, dword ptr [eax] // src[0-3]

pmulld xmm0, xmm6 // hash *= 33 ^ 16

@@ -209,13 +205,13 @@ uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {

pmulld xmm1, kHashMul3

paddd xmm3, xmm4 // add 16 results

paddd xmm1, xmm2

- sub ecx, 16

paddd xmm1, xmm3

pshufd xmm2, xmm1, 0x0e // upper 2 dwords

paddd xmm1, xmm2

pshufd xmm2, xmm1, 0x01

paddd xmm1, xmm2

paddd xmm0, xmm1

+ sub ecx, 16

jg wloop

movd eax, xmm0 // return hash

« no previous file with comments | « source/libvpx/third_party/libyuv/source/compare_posix.cc ('k') | source/libvpx/third_party/libyuv/source/convert.cc » ('j') | no next file with comments »