| Index: src/opts/SkColor_opts_SSE2.h
|
| diff --git a/src/opts/SkColor_opts_SSE2.h b/src/opts/SkColor_opts_SSE2.h
|
| index 741d1ab77732cdfb41c1a8f8a4aa2f8ca426484b..c52fc1e876ac34ce24ff1fa9ea54e936fd713463 100644
|
| --- a/src/opts/SkColor_opts_SSE2.h
|
| +++ b/src/opts/SkColor_opts_SSE2.h
|
| @@ -42,24 +42,42 @@ static inline __m128i SkAlphaMulAlpha_SSE2(const __m128i& a,
|
| return prod;
|
| }
|
|
|
| +static const __m128i rb_mask = _mm_set1_epi32(0x00FF00FF);
|
| +static const __m128i ag_mask = _mm_set1_epi32(0xFF00FF00);
|
| +
|
| // Portable version SkAlphaMulQ is in SkColorPriv.h.
|
| static inline __m128i SkAlphaMulQ_SSE2(const __m128i& c, const __m128i& scale) {
|
| - __m128i mask = _mm_set1_epi32(0xFF00FF);
|
| __m128i s = _mm_or_si128(_mm_slli_epi32(scale, 16), scale);
|
|
|
| // uint32_t rb = ((c & mask) * scale) >> 8
|
| - __m128i rb = _mm_and_si128(mask, c);
|
| + __m128i rb = _mm_and_si128(rb_mask, c);
|
| rb = _mm_mullo_epi16(rb, s);
|
| rb = _mm_srli_epi16(rb, 8);
|
|
|
| // uint32_t ag = ((c >> 8) & mask) * scale
|
| __m128i ag = _mm_srli_epi16(c, 8);
|
| - ASSERT_EQ(ag, _mm_and_si128(mask, ag)); // ag = _mm_srli_epi16(c, 8) did this for us.
|
| + ASSERT_EQ(ag, _mm_and_si128(rb_mask, ag)); // ag = _mm_srli_epi16(c, 8) did this for us.
|
| ag = _mm_mullo_epi16(ag, s);
|
|
|
| // (rb & mask) | (ag & ~mask)
|
| - ASSERT_EQ(rb, _mm_and_si128(mask, rb)); // rb = _mm_srli_epi16(rb, 8) did this for us.
|
| - ag = _mm_andnot_si128(mask, ag);
|
| + ASSERT_EQ(rb, _mm_and_si128(rb_mask, rb)); // rb = _mm_srli_epi16(rb, 8) did this for us.
|
| + ag = _mm_and_si128(ag_mask, ag);
|
| + return _mm_or_si128(rb, ag);
|
| +}
|
| +
|
| +// Fast path for SkAlphaMulQ_SSE2 with a constant scale factor.
|
| +static inline __m128i SkAlphaMulQ_SSE2(const __m128i& c, const unsigned scale) {
|
| + __m128i s = _mm_set1_epi16(scale << 8); // Move scale factor to upper byte of word.
|
| +
|
| + // With mulhi, red and blue values are already in the right place and
|
| + // don't need to be divided by 256.
|
| + __m128i rb = _mm_and_si128(rb_mask, c);
|
| + rb = _mm_mulhi_epu16(rb, s);
|
| +
|
| + __m128i ag = _mm_and_si128(ag_mask, c);
|
| + ag = _mm_mulhi_epu16(ag, s); // Alpha and green values are in the higher byte of each word.
|
| + ag = _mm_and_si128(ag_mask, ag);
|
| +
|
| return _mm_or_si128(rb, ag);
|
| }
|
|
|
| @@ -185,5 +203,34 @@ static inline __m128i SkPixel32ToPixel16_ToU16_SSE2(const __m128i& src_pixel1,
|
| return d_pixel;
|
| }
|
|
|
| +// Portable version SkBlendARGB32 is in SkColorPriv.h.
|
| +static inline __m128i SkBlendARGB32_SSE2(const __m128i& src, const __m128i& dst,
|
| + const __m128i& aa) {
|
| + __m128i src_scale = SkAlpha255To256_SSE2(aa);
|
| + // SkAlpha255To256(255 - SkAlphaMul(SkGetPackedA32(src), src_scale))
|
| + __m128i dst_scale = SkGetPackedA32_SSE2(src);
|
| + dst_scale = _mm_mullo_epi16(dst_scale, src_scale);
|
| + dst_scale = _mm_srli_epi16(dst_scale, 8);
|
| + dst_scale = _mm_sub_epi32(_mm_set1_epi32(256), dst_scale);
|
| +
|
| + __m128i result = SkAlphaMulQ_SSE2(src, src_scale);
|
| + return _mm_add_epi8(result, SkAlphaMulQ_SSE2(dst, dst_scale));
|
| +}
|
| +
|
| +// Fast path for SkBlendARGB32_SSE2 with a constant alpha factor.
|
| +static inline __m128i SkBlendARGB32_SSE2(const __m128i& src, const __m128i& dst,
|
| + const unsigned aa) {
|
| + unsigned alpha = SkAlpha255To256(aa);
|
| + __m128i src_scale = _mm_set1_epi32(alpha);
|
| + // SkAlpha255To256(255 - SkAlphaMul(SkGetPackedA32(src), src_scale))
|
| + __m128i dst_scale = SkGetPackedA32_SSE2(src);
|
| + dst_scale = _mm_mullo_epi16(dst_scale, src_scale);
|
| + dst_scale = _mm_srli_epi16(dst_scale, 8);
|
| + dst_scale = _mm_sub_epi32(_mm_set1_epi32(256), dst_scale);
|
| +
|
| + __m128i result = SkAlphaMulQ_SSE2(src, alpha);
|
| + return _mm_add_epi8(result, SkAlphaMulQ_SSE2(dst, dst_scale));
|
| +}
|
| +
|
| #undef ASSERT_EQ
|
| #endif // SkColor_opts_SSE2_DEFINED
|
|
|