third_party/libwebp/dsp/enc_neon.c - Issue 2651883004: libwebp-0.6.0-rc1

Unified Diff: third_party/libwebp/dsp/enc_neon.c

Issue 2651883004: libwebp-0.6.0-rc1 (Closed)

Patch Set: Created 3 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: third_party/libwebp/dsp/enc_neon.c

diff --git a/third_party/libwebp/dsp/enc_neon.c b/third_party/libwebp/dsp/enc_neon.c

index 46f6bf9a33f8d3fcf16b9bbebd9b11161a9c15d9..6a078d632d89572df9aa7f85400b3ae7cfb42c7c 100644

--- a/third_party/libwebp/dsp/enc_neon.c

+++ b/third_party/libwebp/dsp/enc_neon.c

@@ -18,7 +18,7 @@

#include <assert.h>

#include "./neon.h"

-#include "../enc/vp8enci.h"

+#include "../enc/vp8i_enc.h"

//------------------------------------------------------------------------------

// Transforms (Paragraph 14.4)

@@ -746,9 +746,14 @@ static WEBP_INLINE void AccumulateSSE16(const uint8_t* const a,

const uint8x16_t a0 = vld1q_u8(a);

const uint8x16_t b0 = vld1q_u8(b);

const uint8x16_t abs_diff = vabdq_u8(a0, b0);

- uint16x8_t prod = vmull_u8(vget_low_u8(abs_diff), vget_low_u8(abs_diff));

- prod = vmlal_u8(prod, vget_high_u8(abs_diff), vget_high_u8(abs_diff));

- *sum = vpadalq_u16(*sum, prod); // pair-wise add and accumulate

+ const uint16x8_t prod1 = vmull_u8(vget_low_u8(abs_diff),

+ vget_low_u8(abs_diff));

+ const uint16x8_t prod2 = vmull_u8(vget_high_u8(abs_diff),

+ vget_high_u8(abs_diff));

+ /* pair-wise adds and widen */

+ const uint32x4_t sum1 = vpaddlq_u16(prod1);

+ const uint32x4_t sum2 = vpaddlq_u16(prod2);

+ *sum = vaddq_u32(*sum, vaddq_u32(sum1, sum2));

}

// Horizontal sum of all four uint32_t values in 'sum'.

@@ -758,7 +763,7 @@ static int SumToInt(uint32x4_t sum) {

return (int)sum3;

}

-static int SSE16x16(const uint8_t* a, const uint8_t* b) {

+static int SSE16x16_NEON(const uint8_t* a, const uint8_t* b) {

uint32x4_t sum = vdupq_n_u32(0);

int y;

for (y = 0; y < 16; ++y) {

@@ -767,7 +772,7 @@ static int SSE16x16(const uint8_t* a, const uint8_t* b) {

return SumToInt(sum);

}

-static int SSE16x8(const uint8_t* a, const uint8_t* b) {

+static int SSE16x8_NEON(const uint8_t* a, const uint8_t* b) {

uint32x4_t sum = vdupq_n_u32(0);

int y;

for (y = 0; y < 8; ++y) {

@@ -776,7 +781,7 @@ static int SSE16x8(const uint8_t* a, const uint8_t* b) {

return SumToInt(sum);

}

-static int SSE8x8(const uint8_t* a, const uint8_t* b) {

+static int SSE8x8_NEON(const uint8_t* a, const uint8_t* b) {

uint32x4_t sum = vdupq_n_u32(0);

int y;

for (y = 0; y < 8; ++y) {

@@ -789,13 +794,18 @@ static int SSE8x8(const uint8_t* a, const uint8_t* b) {

return SumToInt(sum);

}

-static int SSE4x4(const uint8_t* a, const uint8_t* b) {

+static int SSE4x4_NEON(const uint8_t* a, const uint8_t* b) {

const uint8x16_t a0 = Load4x4(a);

const uint8x16_t b0 = Load4x4(b);

const uint8x16_t abs_diff = vabdq_u8(a0, b0);

- uint16x8_t prod = vmull_u8(vget_low_u8(abs_diff), vget_low_u8(abs_diff));

- prod = vmlal_u8(prod, vget_high_u8(abs_diff), vget_high_u8(abs_diff));

- return SumToInt(vpaddlq_u16(prod));

+ const uint16x8_t prod1 = vmull_u8(vget_low_u8(abs_diff),

+ vget_low_u8(abs_diff));

+ const uint16x8_t prod2 = vmull_u8(vget_high_u8(abs_diff),

+ vget_high_u8(abs_diff));

+ /* pair-wise adds and widen */

+ const uint32x4_t sum1 = vpaddlq_u16(prod1);

+ const uint32x4_t sum2 = vpaddlq_u16(prod2);

+ return SumToInt(vaddq_u32(sum1, sum2));

}

//------------------------------------------------------------------------------

@@ -903,10 +913,12 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitNEON(void) {

VP8TDisto4x4 = Disto4x4;

VP8TDisto16x16 = Disto16x16;

VP8CollectHistogram = CollectHistogram;

- VP8SSE16x16 = SSE16x16;

- VP8SSE16x8 = SSE16x8;

- VP8SSE8x8 = SSE8x8;

- VP8SSE4x4 = SSE4x4;

+ VP8SSE16x16 = SSE16x16_NEON;

+ VP8SSE16x8 = SSE16x8_NEON;

+ VP8SSE8x8 = SSE8x8_NEON;

+ VP8SSE4x4 = SSE4x4_NEON;

#if !defined(WORK_AROUND_GCC)

VP8EncQuantizeBlock = QuantizeBlock;

VP8EncQuantize2Blocks = Quantize2Blocks;

« no previous file with comments | « third_party/libwebp/dsp/enc_msa.c ('k') | third_party/libwebp/dsp/enc_sse2.c » ('j') | no next file with comments »