| Index: src/opts/SkUtils_opts.h
|
| diff --git a/src/opts/SkOpts_neon.cpp b/src/opts/SkUtils_opts.h
|
| similarity index 53%
|
| copy from src/opts/SkOpts_neon.cpp
|
| copy to src/opts/SkUtils_opts.h
|
| index aa718272e15957f8e2243aab0a2f42b8d5ef4df2..44fe643276cb5139fca3ebfc8af14cbf03f369a3 100644
|
| --- a/src/opts/SkOpts_neon.cpp
|
| +++ b/src/opts/SkUtils_opts.h
|
| @@ -5,18 +5,51 @@
|
| * found in the LICENSE file.
|
| */
|
|
|
| -#include "SkFloatingPoint.h"
|
| -#include "SkOpts.h"
|
| -#define SK_OPTS_NS neon
|
| -#include "SkBlurImageFilter_opts.h"
|
| -#include "SkXfermode_opts.h"
|
| +#ifndef SkUtils_opts_DEFINED
|
| +#define SkUtils_opts_DEFINED
|
|
|
| -namespace neon { // This helps identify methods from this file when debugging / profiling.
|
| +namespace SK_OPTS_NS {
|
|
|
| -static float rsqrt(float x) {
|
| - return sk_float_rsqrt(x); // This sk_float_rsqrt copy will take the NEON compile-time path.
|
| +#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
|
| +
|
| +static void memset16(uint16_t* dst, uint16_t val, int n) {
|
| + auto dst8 = (__m128i*)dst;
|
| + auto val8 = _mm_set1_epi16(val);
|
| + for ( ; n >= 8; n -= 8) {
|
| + _mm_storeu_si128(dst8++, val8);
|
| + }
|
| + dst = (uint16_t*)dst8;
|
| + if (n & 4) {
|
| + _mm_storel_epi64((__m128i*)dst, val8);
|
| + dst += 4;
|
| + }
|
| + if (n & 2) {
|
| + *(uint32_t*)dst = _mm_cvtsi128_si32(val8);
|
| + dst += 2;
|
| + }
|
| + if (n & 1) {
|
| + *dst = val;
|
| + }
|
| }
|
|
|
| +static void memset32(uint32_t* dst, uint32_t val, int n) {
|
| + auto dst4 = (__m128i*)dst;
|
| + auto val4 = _mm_set1_epi32(val);
|
| + for ( ; n >= 4; n -= 4) {
|
| + _mm_storeu_si128(dst4++, val4);
|
| + }
|
| + dst = (uint32_t*)dst4;
|
| + if (n & 2) {
|
| + _mm_storel_epi64((__m128i*)dst, val4);
|
| + dst += 2;
|
| + }
|
| + if (n & 1) {
|
| + *dst = val;
|
| + }
|
| +}
|
| +
|
| +#elif defined(SK_ARM_HAS_NEON)
|
| +
|
| static void memset16(uint16_t* dst, uint16_t value, int n) {
|
| uint16x8_t v8 = vdupq_n_u16(value);
|
| uint16x8x4_t v32 = {{ v8, v8, v8, v8 }};
|
| @@ -65,18 +98,13 @@ static void memset32(uint32_t* dst, uint32_t value, int n) {
|
| }
|
| }
|
|
|
| -} // namespace neon
|
| +#else // Neither NEON nor SSE2.
|
|
|
| -namespace SkOpts {
|
| - void Init_neon() {
|
| - rsqrt = neon::rsqrt;
|
| - memset16 = neon::memset16;
|
| - memset32 = neon::memset32;
|
| - create_xfermode = SkCreate4pxXfermode;
|
| +static void memset16(uint16_t* dst, uint16_t val, int n) { while (n --> 0) { *dst++ = val; } }
|
| +static void memset32(uint32_t* dst, uint32_t val, int n) { while (n --> 0) { *dst++ = val; } }
|
|
|
| - static const auto x = neon::kX, y = neon::kY;
|
| - box_blur_xx = neon::box_blur<x,x>;
|
| - box_blur_xy = neon::box_blur<x,y>;
|
| - box_blur_yx = neon::box_blur<y,x>;
|
| - }
|
| -}
|
| +#endif
|
| +
|
| +} // namespace SK_OPTS_NS
|
| +
|
| +#endif//SkUtils_opts_DEFINED
|
|
|