third_party/libwebp/dsp/upsampling_sse2.c - Issue 116213006: Update libwebp to 0.4.0

Unified Diff: third_party/libwebp/dsp/upsampling_sse2.c

Issue 116213006: Update libwebp to 0.4.0 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: After Blink Roll Created 6 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: third_party/libwebp/dsp/upsampling_sse2.c

diff --git a/third_party/libwebp/dsp/upsampling_sse2.c b/third_party/libwebp/dsp/upsampling_sse2.c

index f31d04845e4a2b2902350fecfcaf556f6987b13f..0db0798c6d0655a25b76153914b65ba566c1c317 100644

--- a/third_party/libwebp/dsp/upsampling_sse2.c

+++ b/third_party/libwebp/dsp/upsampling_sse2.c

@@ -13,10 +13,6 @@

#include "./dsp.h"

-#if defined(__cplusplus) || defined(c_plusplus)

-extern "C" {

-#endif

#if defined(WEBP_USE_SSE2)

#include <assert.h>

@@ -51,7 +47,7 @@ extern "C" {

(out) = _mm_sub_epi8(tmp0, tmp4); /* (k + in + 1) / 2 - lsb_correction */ \

} while (0)

-// pack and store two alterning pixel rows

+// pack and store two alternating pixel rows

#define PACK_AND_STORE(a, b, da, db, out) do { \

const __m128i t_a = _mm_avg_epu8(a, da); /* (9a + 3b + 3c + d + 8) / 16 */ \

const __m128i t_b = _mm_avg_epu8(b, db); /* (3a + 9b + c + 3d + 8) / 16 */ \

@@ -87,8 +83,8 @@ extern "C" {

GET_M(ad, s, diag2); /* diag2 = (3a + b + c + 3d) / 8 */ \

/* pack the alternate pixels */ \

- PACK_AND_STORE(a, b, diag1, diag2, &(out)[0 * 32]); \

- PACK_AND_STORE(c, d, diag2, diag1, &(out)[2 * 32]); \

+ PACK_AND_STORE(a, b, diag1, diag2, out + 0); /* store top */ \

+ PACK_AND_STORE(c, d, diag2, diag1, out + 2 * 32); /* store bottom */ \

}

// Turn the macro into a function for reducing code-size when non-critical

@@ -108,69 +104,68 @@ static void Upsample32Pixels(const uint8_t r1[], const uint8_t r2[],

Upsample32Pixels(r1, r2, out); \

}

-#define CONVERT2RGB(FUNC, XSTEP, top_y, bottom_y, uv, \

+#define CONVERT2RGB(FUNC, XSTEP, top_y, bottom_y, \

top_dst, bottom_dst, cur_x, num_pixels) { \

int n; \

- if (top_y) { \

- for (n = 0; n < (num_pixels); ++n) { \

- FUNC(top_y[(cur_x) + n], (uv)[n], (uv)[32 + n], \

- top_dst + ((cur_x) + n) * XSTEP); \

- } \

+ for (n = 0; n < (num_pixels); ++n) { \

+ FUNC(top_y[(cur_x) + n], r_u[n], r_v[n], \

+ top_dst + ((cur_x) + n) * XSTEP); \

} \

- if (bottom_y) { \

+ if (bottom_y != NULL) { \

for (n = 0; n < (num_pixels); ++n) { \

- FUNC(bottom_y[(cur_x) + n], (uv)[64 + n], (uv)[64 + 32 + n], \

+ FUNC(bottom_y[(cur_x) + n], r_u[64 + n], r_v[64 + n], \

bottom_dst + ((cur_x) + n) * XSTEP); \

} \

}

+#define CONVERT2RGB_32(FUNC, XSTEP, top_y, bottom_y, \

+ top_dst, bottom_dst, cur_x) do { \

+ FUNC##32(top_y + (cur_x), r_u, r_v, top_dst + (cur_x) * XSTEP); \

+ if (bottom_y != NULL) { \

+ FUNC##32(bottom_y + (cur_x), r_u + 64, r_v + 64, \

+ bottom_dst + (cur_x) * XSTEP); \

+ } \

+} while (0)

#define SSE2_UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \

static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \

const uint8_t* top_u, const uint8_t* top_v, \

const uint8_t* cur_u, const uint8_t* cur_v, \

uint8_t* top_dst, uint8_t* bottom_dst, int len) { \

- int block; \

- /* 16 byte aligned array to cache reconstructed u and v */ \

+ int uv_pos, pos; \

+ /* 16byte-aligned array to cache reconstructed u and v */ \

uint8_t uv_buf[4 * 32 + 15]; \

- uint8_t* const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15); \

- const int uv_len = (len + 1) >> 1; \

- /* 17 pixels must be read-able for each block */ \

- const int num_blocks = (uv_len - 1) >> 4; \

- const int leftover = uv_len - num_blocks * 16; \

- const int last_pos = 1 + 32 * num_blocks; \

- \

- const int u_diag = ((top_u[0] + cur_u[0]) >> 1) + 1; \

- const int v_diag = ((top_v[0] + cur_v[0]) >> 1) + 1; \

+ uint8_t* const r_u = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15); \

+ uint8_t* const r_v = r_u + 32; \

- assert(len > 0); \

- /* Treat the first pixel in regular way */ \

- if (top_y) { \

- const int u0 = (top_u[0] + u_diag) >> 1; \

- const int v0 = (top_v[0] + v_diag) >> 1; \

- FUNC(top_y[0], u0, v0, top_dst); \

+ assert(top_y != NULL); \

+ { /* Treat the first pixel in regular way */ \

+ const int u_diag = ((top_u[0] + cur_u[0]) >> 1) + 1; \

+ const int v_diag = ((top_v[0] + cur_v[0]) >> 1) + 1; \

+ const int u0_t = (top_u[0] + u_diag) >> 1; \

+ const int v0_t = (top_v[0] + v_diag) >> 1; \

+ FUNC(top_y[0], u0_t, v0_t, top_dst); \

+ if (bottom_y != NULL) { \

+ const int u0_b = (cur_u[0] + u_diag) >> 1; \

+ const int v0_b = (cur_v[0] + v_diag) >> 1; \

+ FUNC(bottom_y[0], u0_b, v0_b, bottom_dst); \

+ } \

} \

- if (bottom_y) { \

- const int u0 = (cur_u[0] + u_diag) >> 1; \

- const int v0 = (cur_v[0] + v_diag) >> 1; \

- FUNC(bottom_y[0], u0, v0, bottom_dst); \

+ /* For UPSAMPLE_32PIXELS, 17 u/v values must be read-able for each block */ \

+ for (pos = 1, uv_pos = 0; pos + 32 + 1 <= len; pos += 32, uv_pos += 16) { \

+ UPSAMPLE_32PIXELS(top_u + uv_pos, cur_u + uv_pos, r_u); \

+ UPSAMPLE_32PIXELS(top_v + uv_pos, cur_v + uv_pos, r_v); \

+ CONVERT2RGB_32(FUNC, XSTEP, top_y, bottom_y, top_dst, bottom_dst, pos); \

} \

- \

- for (block = 0; block < num_blocks; ++block) { \

- UPSAMPLE_32PIXELS(top_u, cur_u, r_uv + 0 * 32); \

- UPSAMPLE_32PIXELS(top_v, cur_v, r_uv + 1 * 32); \

- CONVERT2RGB(FUNC, XSTEP, top_y, bottom_y, r_uv, top_dst, bottom_dst, \

- 32 * block + 1, 32) \

- top_u += 16; \

- cur_u += 16; \

- top_v += 16; \

- cur_v += 16; \

+ if (len > 1) { \

+ const int left_over = ((len + 1) >> 1) - (pos >> 1); \

+ assert(left_over > 0); \

+ UPSAMPLE_LAST_BLOCK(top_u + uv_pos, cur_u + uv_pos, left_over, r_u); \

+ UPSAMPLE_LAST_BLOCK(top_v + uv_pos, cur_v + uv_pos, left_over, r_v); \

+ CONVERT2RGB(FUNC, XSTEP, top_y, bottom_y, top_dst, bottom_dst, \

+ pos, len - pos); \

} \

- \

- UPSAMPLE_LAST_BLOCK(top_u, cur_u, leftover, r_uv + 0 * 32); \

- UPSAMPLE_LAST_BLOCK(top_v, cur_v, leftover, r_uv + 1 * 32); \

- CONVERT2RGB(FUNC, XSTEP, top_y, bottom_y, r_uv, top_dst, bottom_dst, \

- last_pos, len - last_pos); \

}

// SSE2 variants of the fancy upsampler.

@@ -184,6 +179,7 @@ SSE2_UPSAMPLE_FUNC(UpsampleBgraLinePairSSE2, VP8YuvToBgra, 4)

#undef UPSAMPLE_32PIXELS

#undef UPSAMPLE_LAST_BLOCK

#undef CONVERT2RGB

+#undef CONVERT2RGB_32

#undef SSE2_UPSAMPLE_FUNC

#endif // FANCY_UPSAMPLING

@@ -192,10 +188,13 @@ SSE2_UPSAMPLE_FUNC(UpsampleBgraLinePairSSE2, VP8YuvToBgra, 4)

//------------------------------------------------------------------------------

+#ifdef FANCY_UPSAMPLING

extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];

void WebPInitUpsamplersSSE2(void) {

#if defined(WEBP_USE_SSE2)

+ VP8YUVInitSSE2();

WebPUpsamplers[MODE_RGB] = UpsampleRgbLinePairSSE2;

WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePairSSE2;

WebPUpsamplers[MODE_BGR] = UpsampleBgrLinePairSSE2;

@@ -210,8 +209,10 @@ void WebPInitPremultiplySSE2(void) {

#endif // WEBP_USE_SSE2

}

-#if defined(__cplusplus) || defined(c_plusplus)

-} // extern "C"

-#endif

+#else

+// this empty function is to avoid an empty .o

+void WebPInitPremultiplySSE2(void) {}

+#endif // FANCY_UPSAMPLING

« no previous file with comments | « third_party/libwebp/dsp/upsampling_neon.c ('k') | third_party/libwebp/dsp/yuv.h » ('j') | no next file with comments »