| Index: src/opts/SkBlurImage_opts_SSE2.cpp
|
| diff --git a/src/opts/SkBlurImage_opts_SSE2.cpp b/src/opts/SkBlurImage_opts_SSE2.cpp
|
| index bbc6a66462ea5bd278bcf20ef73e2f11ab80ad56..d2f8882726c204325c3dc5eb3a71581e3904c461 100644
|
| --- a/src/opts/SkBlurImage_opts_SSE2.cpp
|
| +++ b/src/opts/SkBlurImage_opts_SSE2.cpp
|
| @@ -55,17 +55,13 @@ void SkBoxBlur_SSE2(const SkPMColor* src, int srcStride, SkPMColor* dst, int ker
|
| const SkPMColor* sptr = src;
|
| SkColor* dptr = dst;
|
| for (int x = 0; x < width; ++x) {
|
| -#if 0
|
| - // In SSE4.1, this would be
|
| - __m128i result = _mm_mullo_epi32(sum, scale);
|
| -#else
|
| - // But SSE2 has no PMULLUD, so we must do AG and RB separately.
|
| + // SSE2 has no PMULLUD, so we must do AG and RB separately.
|
| __m128i tmp1 = _mm_mul_epu32(sum, scale);
|
| __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(sum, 4),
|
| _mm_srli_si128(scale, 4));
|
| __m128i result = _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0,0,2,0)),
|
| _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0,0,2,0)));
|
| -#endif
|
| +
|
| // sumA*scale+.5 sumB*scale+.5 sumG*scale+.5 sumB*scale+.5
|
| result = _mm_add_epi32(result, half);
|
|
|
|
|