Index: src/opts/SkUtils_opts.h |
diff --git a/src/opts/SkOpts_neon.cpp b/src/opts/SkUtils_opts.h |
similarity index 53% |
copy from src/opts/SkOpts_neon.cpp |
copy to src/opts/SkUtils_opts.h |
index aa718272e15957f8e2243aab0a2f42b8d5ef4df2..44fe643276cb5139fca3ebfc8af14cbf03f369a3 100644 |
--- a/src/opts/SkOpts_neon.cpp |
+++ b/src/opts/SkUtils_opts.h |
@@ -5,18 +5,51 @@ |
* found in the LICENSE file. |
*/ |
-#include "SkFloatingPoint.h" |
-#include "SkOpts.h" |
-#define SK_OPTS_NS neon |
-#include "SkBlurImageFilter_opts.h" |
-#include "SkXfermode_opts.h" |
+#ifndef SkUtils_opts_DEFINED |
+#define SkUtils_opts_DEFINED |
-namespace neon { // This helps identify methods from this file when debugging / profiling. |
+namespace SK_OPTS_NS { |
-static float rsqrt(float x) { |
- return sk_float_rsqrt(x); // This sk_float_rsqrt copy will take the NEON compile-time path. |
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 |
+ |
+static void memset16(uint16_t* dst, uint16_t val, int n) { |
+ auto dst8 = (__m128i*)dst; |
+ auto val8 = _mm_set1_epi16(val); |
+ for ( ; n >= 8; n -= 8) { |
+ _mm_storeu_si128(dst8++, val8); |
+ } |
+ dst = (uint16_t*)dst8; |
+ if (n & 4) { |
+ _mm_storel_epi64((__m128i*)dst, val8); |
+ dst += 4; |
+ } |
+ if (n & 2) { |
+ *(uint32_t*)dst = _mm_cvtsi128_si32(val8); |
+ dst += 2; |
+ } |
+ if (n & 1) { |
+ *dst = val; |
+ } |
} |
+static void memset32(uint32_t* dst, uint32_t val, int n) { |
+ auto dst4 = (__m128i*)dst; |
+ auto val4 = _mm_set1_epi32(val); |
+ for ( ; n >= 4; n -= 4) { |
+ _mm_storeu_si128(dst4++, val4); |
+ } |
+ dst = (uint32_t*)dst4; |
+ if (n & 2) { |
+ _mm_storel_epi64((__m128i*)dst, val4); |
+ dst += 2; |
+ } |
+ if (n & 1) { |
+ *dst = val; |
+ } |
+} |
+ |
+#elif defined(SK_ARM_HAS_NEON) |
+ |
static void memset16(uint16_t* dst, uint16_t value, int n) { |
uint16x8_t v8 = vdupq_n_u16(value); |
uint16x8x4_t v32 = {{ v8, v8, v8, v8 }}; |
@@ -65,18 +98,13 @@ static void memset32(uint32_t* dst, uint32_t value, int n) { |
} |
} |
-} // namespace neon |
+#else // Neither NEON nor SSE2. |
-namespace SkOpts { |
- void Init_neon() { |
- rsqrt = neon::rsqrt; |
- memset16 = neon::memset16; |
- memset32 = neon::memset32; |
- create_xfermode = SkCreate4pxXfermode; |
+static void memset16(uint16_t* dst, uint16_t val, int n) { while (n --> 0) { *dst++ = val; } } |
+static void memset32(uint32_t* dst, uint32_t val, int n) { while (n --> 0) { *dst++ = val; } } |
- static const auto x = neon::kX, y = neon::kY; |
- box_blur_xx = neon::box_blur<x,x>; |
- box_blur_xy = neon::box_blur<x,y>; |
- box_blur_yx = neon::box_blur<y,x>; |
- } |
-} |
+#endif |
+ |
+} // namespace SK_OPTS_NS |
+ |
+#endif//SkUtils_opts_DEFINED |