| Index: src/opts/SkBlurImage_opts_SSE4.cpp
|
| diff --git a/src/opts/SkBlurImage_opts_SSE2.cpp b/src/opts/SkBlurImage_opts_SSE4.cpp
|
| similarity index 72%
|
| copy from src/opts/SkBlurImage_opts_SSE2.cpp
|
| copy to src/opts/SkBlurImage_opts_SSE4.cpp
|
| index bbc6a66462ea5bd278bcf20ef73e2f11ab80ad56..fb17aaca4d38e72a405e4044a830a1376a8c458a 100644
|
| --- a/src/opts/SkBlurImage_opts_SSE2.cpp
|
| +++ b/src/opts/SkBlurImage_opts_SSE4.cpp
|
| @@ -1,16 +1,24 @@
|
| /*
|
| - * Copyright 2013 The Android Open Source Project
|
| + * Copyright 2014 The Android Open Source Project
|
| *
|
| * Use of this source code is governed by a BSD-style license that can be
|
| * found in the LICENSE file.
|
| */
|
|
|
| -#include <emmintrin.h>
|
| #include "SkBitmap.h"
|
| -#include "SkBlurImage_opts_SSE2.h"
|
| +#include "SkBlurImage_opts_SSE4.h"
|
| #include "SkColorPriv.h"
|
| #include "SkRect.h"
|
|
|
| +/* With the exception of the Android framework we always build the SSE4 functions
|
| + * and enable the caller to determine SSE4 support. However, for the Android framework,
|
| + * if the device does not support SSE4x then the compiler will not supply the required
|
| + * -msse4* option needed to build this file, so instead we provide a stub implementation.
|
| + */
|
| +#if !defined(SK_BUILD_FOR_ANDROID_FRAMEWORK) || SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
|
| +
|
| +#include <smmintrin.h>
|
| +
|
| namespace {
|
| enum BlurDirection {
|
| kX, kY
|
| @@ -33,7 +41,7 @@ inline __m128i expand(int a) {
|
| }
|
|
|
| template<BlurDirection srcDirection, BlurDirection dstDirection>
|
| -void SkBoxBlur_SSE2(const SkPMColor* src, int srcStride, SkPMColor* dst, int kernelSize,
|
| +void SkBoxBlur_SSE4(const SkPMColor* src, int srcStride, SkPMColor* dst, int kernelSize,
|
| int leftOffset, int rightOffset, int width, int height)
|
| {
|
| const int rightBorder = SkMin32(rightOffset + 1, width);
|
| @@ -55,17 +63,8 @@ void SkBoxBlur_SSE2(const SkPMColor* src, int srcStride, SkPMColor* dst, int ker
|
| const SkPMColor* sptr = src;
|
| SkColor* dptr = dst;
|
| for (int x = 0; x < width; ++x) {
|
| -#if 0
|
| - // In SSE4.1, this would be
|
| __m128i result = _mm_mullo_epi32(sum, scale);
|
| -#else
|
| - // But SSE2 has no PMULLUD, so we must do AG and RB separately.
|
| - __m128i tmp1 = _mm_mul_epu32(sum, scale);
|
| - __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(sum, 4),
|
| - _mm_srli_si128(scale, 4));
|
| - __m128i result = _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0,0,2,0)),
|
| - _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0,0,2,0)));
|
| -#endif
|
| +
|
| // sumA*scale+.5 sumB*scale+.5 sumG*scale+.5 sumB*scale+.5
|
| result = _mm_add_epi32(result, half);
|
|
|
| @@ -100,13 +99,25 @@ void SkBoxBlur_SSE2(const SkPMColor* src, int srcStride, SkPMColor* dst, int ker
|
|
|
| } // namespace
|
|
|
| -bool SkBoxBlurGetPlatformProcs_SSE2(SkBoxBlurProc* boxBlurX,
|
| +bool SkBoxBlurGetPlatformProcs_SSE4(SkBoxBlurProc* boxBlurX,
|
| SkBoxBlurProc* boxBlurY,
|
| SkBoxBlurProc* boxBlurXY,
|
| SkBoxBlurProc* boxBlurYX) {
|
| - *boxBlurX = SkBoxBlur_SSE2<kX, kX>;
|
| - *boxBlurY = SkBoxBlur_SSE2<kY, kY>;
|
| - *boxBlurXY = SkBoxBlur_SSE2<kX, kY>;
|
| - *boxBlurYX = SkBoxBlur_SSE2<kY, kX>;
|
| + *boxBlurX = SkBoxBlur_SSE4<kX, kX>;
|
| + *boxBlurY = SkBoxBlur_SSE4<kY, kY>;
|
| + *boxBlurXY = SkBoxBlur_SSE4<kX, kY>;
|
| + *boxBlurYX = SkBoxBlur_SSE4<kY, kX>;
|
| return true;
|
| }
|
| +
|
| +#else // !defined(SK_BUILD_FOR_ANDROID_FRAMEWORK) || SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
|
| +
|
| +bool SkBoxBlurGetPlatformProcs_SSE4(SkBoxBlurProc* boxBlurX,
|
| + SkBoxBlurProc* boxBlurY,
|
| + SkBoxBlurProc* boxBlurXY,
|
| + SkBoxBlurProc* boxBlurYX) {
|
| + sk_throw();
|
| +}
|
| +
|
| +
|
| +#endif
|
|
|