| Index: src/opts/SkBlurImage_opts_SSE2.cpp
|
| diff --git a/src/opts/SkBlurImage_opts_SSE2.cpp b/src/opts/SkBlurImage_opts_SSE2.cpp
|
| index d2f8882726c204325c3dc5eb3a71581e3904c461..bbc6a66462ea5bd278bcf20ef73e2f11ab80ad56 100644
|
| --- a/src/opts/SkBlurImage_opts_SSE2.cpp
|
| +++ b/src/opts/SkBlurImage_opts_SSE2.cpp
|
| @@ -55,13 +55,17 @@
|
| const SkPMColor* sptr = src;
|
| SkColor* dptr = dst;
|
| for (int x = 0; x < width; ++x) {
|
| - // SSE2 has no PMULLUD, so we must do AG and RB separately.
|
| +#if 0
|
| + // In SSE4.1, this would be
|
| + __m128i result = _mm_mullo_epi32(sum, scale);
|
| +#else
|
| + // But SSE2 has no PMULLUD, so we must do AG and RB separately.
|
| __m128i tmp1 = _mm_mul_epu32(sum, scale);
|
| __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(sum, 4),
|
| _mm_srli_si128(scale, 4));
|
| __m128i result = _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0,0,2,0)),
|
| _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0,0,2,0)));
|
| -
|
| +#endif
|
| // sumA*scale+.5 sumB*scale+.5 sumG*scale+.5 sumB*scale+.5
|
| result = _mm_add_epi32(result, half);
|
|
|
|
|