Chromium Code Reviews| Index: src/opts/SkColor_opts_SSE2.h | 
| diff --git a/src/opts/SkColor_opts_SSE2.h b/src/opts/SkColor_opts_SSE2.h | 
| index 24ab6f73c8da13653fecf560f7ca8b4fdd08103f..960c48a023126ee304f0d0505d1b09054c542b46 100644 | 
| --- a/src/opts/SkColor_opts_SSE2.h | 
| +++ b/src/opts/SkColor_opts_SSE2.h | 
| @@ -10,6 +10,10 @@ | 
| #include <emmintrin.h> | 
| +static inline __m128i SkAlpha255To256_SSE2(const __m128i& alpha) { | 
| + return _mm_add_epi32(alpha, _mm_set1_epi32(1)); | 
| +} | 
| + | 
| // See #define SkAlphaMulAlpha(a, b) SkMulDiv255Round(a, b) in SkXfermode.cpp. | 
| static inline __m128i SkAlphaMulAlpha_SSE2(const __m128i& a, | 
| const __m128i& b) { | 
| @@ -21,6 +25,27 @@ static inline __m128i SkAlphaMulAlpha_SSE2(const __m128i& a, | 
| return prod; | 
| } | 
| +// Portable version SkAlphaMulQ is in SkColorPriv.h. | 
| +static inline __m128i SkAlphaMulQ_SSE2(const __m128i& c, const __m128i& scale) { | 
| 
 
mtklein
2014/04/11 18:41:54
Is this still the best way to do this with SSE2?
 
qiankun
2014/04/14 02:33:01
Unfortunately, there is no 8-bits multiply funcito
 
 | 
| + __m128i mask = _mm_set1_epi32(gMask_00FF00FF); | 
| + __m128i s = _mm_or_si128(_mm_slli_epi32(scale, 16), scale); | 
| + | 
| + // uint32_t rb = ((c & mask) * scale) >> 8 | 
| + __m128i rb = _mm_and_si128(mask, c); | 
| + rb = _mm_mullo_epi16(rb, s); | 
| + rb = _mm_srli_epi16(rb, 8); | 
| + | 
| + // uint32_t ag = ((c >> 8) & mask) * scale | 
| + __m128i ag = _mm_srli_epi16(c, 8); | 
| + ag = _mm_and_si128(ag, mask); | 
| + ag = _mm_mullo_epi16(ag, s); | 
| + | 
| + // (rb & mask) | (ag & ~mask) | 
| + rb = _mm_and_si128(mask, rb); | 
| + ag = _mm_andnot_si128(mask, ag); | 
| + return _mm_or_si128(rb, ag); | 
| +} | 
| + | 
| static inline __m128i SkGetPackedA32_SSE2(const __m128i& src) { | 
| __m128i a = _mm_slli_epi32(src, (24 - SK_A32_SHIFT)); | 
| return _mm_srli_epi32(a, 24); |