third_party/libwebp/dsp/common_sse2.h - Issue 2651883004: libwebp-0.6.0-rc1

Unified Diff: third_party/libwebp/dsp/common_sse2.h

Issue 2651883004: libwebp-0.6.0-rc1 (Closed)

Patch Set: Created 3 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: third_party/libwebp/dsp/common_sse2.h

diff --git a/third_party/libwebp/dsp/common_sse2.h b/third_party/libwebp/dsp/common_sse2.h

index 7cea13fb3cb580afedea46958c33af804fe4bc75..995d7cf4eafde1d74ee87ecc69864789df7d055a 100644

--- a/third_party/libwebp/dsp/common_sse2.h

+++ b/third_party/libwebp/dsp/common_sse2.h

@@ -100,6 +100,91 @@ static WEBP_INLINE void VP8Transpose_2_4x4_16b(

// a03 a13 a23 a33 b03 b13 b23 b33

}

+//------------------------------------------------------------------------------

+// Channel mixing.

+// Function used several times in VP8PlanarTo24b.

+// It samples the in buffer as follows: one every two unsigned char is stored

+// at the beginning of the buffer, while the other half is stored at the end.

+#define VP8PlanarTo24bHelper(IN, OUT) \

+ do { \

+ const __m128i v_mask = _mm_set1_epi16(0x00ff); \

+ /* Take one every two upper 8b values.*/ \

+ (OUT##0) = _mm_packus_epi16(_mm_and_si128((IN##0), v_mask), \

+ _mm_and_si128((IN##1), v_mask)); \

+ (OUT##1) = _mm_packus_epi16(_mm_and_si128((IN##2), v_mask), \

+ _mm_and_si128((IN##3), v_mask)); \

+ (OUT##2) = _mm_packus_epi16(_mm_and_si128((IN##4), v_mask), \

+ _mm_and_si128((IN##5), v_mask)); \

+ /* Take one every two lower 8b values.*/ \

+ (OUT##3) = _mm_packus_epi16(_mm_srli_epi16((IN##0), 8), \

+ _mm_srli_epi16((IN##1), 8)); \

+ (OUT##4) = _mm_packus_epi16(_mm_srli_epi16((IN##2), 8), \

+ _mm_srli_epi16((IN##3), 8)); \

+ (OUT##5) = _mm_packus_epi16(_mm_srli_epi16((IN##4), 8), \

+ _mm_srli_epi16((IN##5), 8)); \

+ } while (0)

+// Pack the planar buffers

+// rrrr... rrrr... gggg... gggg... bbbb... bbbb....

+// triplet by triplet in the output buffer rgb as rgbrgbrgbrgb ...

+static WEBP_INLINE void VP8PlanarTo24b(__m128i* const in0, __m128i* const in1,

+ __m128i* const in2, __m128i* const in3,

+ __m128i* const in4, __m128i* const in5) {

+ // The input is 6 registers of sixteen 8b but for the sake of explanation,

+ // let's take 6 registers of four 8b values.

+ // To pack, we will keep taking one every two 8b integer and move it

+ // around as follows:

+ // Input:

+ // Split the 6 registers in two sets of 3 registers: the first set as the even

+ // 8b bytes, the second the odd ones:

+ // Repeat the same permutations twice more:

+ __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;

+ VP8PlanarTo24bHelper(*in, tmp);

+ VP8PlanarTo24bHelper(tmp, *in);

+ VP8PlanarTo24bHelper(*in, tmp);

+ // We need to do it two more times than the example as we have sixteen bytes.

+ {

+ __m128i out0, out1, out2, out3, out4, out5;

+ VP8PlanarTo24bHelper(tmp, out);

+ VP8PlanarTo24bHelper(out, *in);

+ }

+#undef VP8PlanarTo24bHelper

+// Convert four packed four-channel buffers like argbargbargbargb... into the

+// split channels aaaaa ... rrrr ... gggg .... bbbbb ......

+static WEBP_INLINE void VP8L32bToPlanar(__m128i* const in0,

+ __m128i* const in1,

+ __m128i* const in2,

+ __m128i* const in3) {

+ // Column-wise transpose.

+ const __m128i A0 = _mm_unpacklo_epi8(*in0, *in1);

+ const __m128i A1 = _mm_unpackhi_epi8(*in0, *in1);

+ const __m128i A2 = _mm_unpacklo_epi8(*in2, *in3);

+ const __m128i A3 = _mm_unpackhi_epi8(*in2, *in3);

+ const __m128i B0 = _mm_unpacklo_epi8(A0, A1);

+ const __m128i B1 = _mm_unpackhi_epi8(A0, A1);

+ const __m128i B2 = _mm_unpacklo_epi8(A2, A3);

+ const __m128i B3 = _mm_unpackhi_epi8(A2, A3);

+ // C0 = g7 g6 ... g1 g0 | b7 b6 ... b1 b0

+ // C1 = a7 a6 ... a1 a0 | r7 r6 ... r1 r0

+ const __m128i C0 = _mm_unpacklo_epi8(B0, B1);

+ const __m128i C1 = _mm_unpackhi_epi8(B0, B1);

+ const __m128i C2 = _mm_unpacklo_epi8(B2, B3);

+ const __m128i C3 = _mm_unpackhi_epi8(B2, B3);

+ // Gather the channels.

+ *in0 = _mm_unpackhi_epi64(C1, C3);

+ *in1 = _mm_unpacklo_epi64(C1, C3);

+ *in2 = _mm_unpackhi_epi64(C0, C2);

+ *in3 = _mm_unpacklo_epi64(C0, C2);

#endif // WEBP_USE_SSE2

#ifdef __cplusplus

« no previous file with comments | « third_party/libwebp/dsp/alpha_processing_sse2.c ('k') | third_party/libwebp/dsp/cost.c » ('j') | no next file with comments »