| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #include "SkFloatingPoint.h" | 8 #ifndef SkUtils_opts_DEFINED |
| 9 #include "SkOpts.h" | 9 #define SkUtils_opts_DEFINED |
| 10 #define SK_OPTS_NS neon | |
| 11 #include "SkBlurImageFilter_opts.h" | |
| 12 #include "SkXfermode_opts.h" | |
| 13 | 10 |
| 14 namespace neon { // This helps identify methods from this file when debugging /
profiling. | 11 namespace SK_OPTS_NS { |
| 15 | 12 |
| 16 static float rsqrt(float x) { | 13 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 |
| 17 return sk_float_rsqrt(x); // This sk_float_rsqrt copy will take the NEON co
mpile-time path. | 14 |
| 15 static void memset16(uint16_t* dst, uint16_t val, int n) { |
| 16 auto dst8 = (__m128i*)dst; |
| 17 auto val8 = _mm_set1_epi16(val); |
| 18 for ( ; n >= 8; n -= 8) { |
| 19 _mm_storeu_si128(dst8++, val8); |
| 20 } |
| 21 dst = (uint16_t*)dst8; |
| 22 if (n & 4) { |
| 23 _mm_storel_epi64((__m128i*)dst, val8); |
| 24 dst += 4; |
| 25 } |
| 26 if (n & 2) { |
| 27 *(uint32_t*)dst = _mm_cvtsi128_si32(val8); |
| 28 dst += 2; |
| 29 } |
| 30 if (n & 1) { |
| 31 *dst = val; |
| 32 } |
| 18 } | 33 } |
| 19 | 34 |
| 35 static void memset32(uint32_t* dst, uint32_t val, int n) { |
| 36 auto dst4 = (__m128i*)dst; |
| 37 auto val4 = _mm_set1_epi32(val); |
| 38 for ( ; n >= 4; n -= 4) { |
| 39 _mm_storeu_si128(dst4++, val4); |
| 40 } |
| 41 dst = (uint32_t*)dst4; |
| 42 if (n & 2) { |
| 43 _mm_storel_epi64((__m128i*)dst, val4); |
| 44 dst += 2; |
| 45 } |
| 46 if (n & 1) { |
| 47 *dst = val; |
| 48 } |
| 49 } |
| 50 |
| 51 #elif defined(SK_ARM_HAS_NEON) |
| 52 |
| 20 static void memset16(uint16_t* dst, uint16_t value, int n) { | 53 static void memset16(uint16_t* dst, uint16_t value, int n) { |
| 21 uint16x8_t v8 = vdupq_n_u16(value); | 54 uint16x8_t v8 = vdupq_n_u16(value); |
| 22 uint16x8x4_t v32 = {{ v8, v8, v8, v8 }}; | 55 uint16x8x4_t v32 = {{ v8, v8, v8, v8 }}; |
| 23 | 56 |
| 24 while (n >= 32) { | 57 while (n >= 32) { |
| 25 vst4q_u16(dst, v32); // This swizzles, but we don't care: all lanes are
the same, value. | 58 vst4q_u16(dst, v32); // This swizzles, but we don't care: all lanes are
the same, value. |
| 26 dst += 32; | 59 dst += 32; |
| 27 n -= 32; | 60 n -= 32; |
| 28 } | 61 } |
| 29 switch (n / 8) { | 62 switch (n / 8) { |
| (...skipping 28 matching lines...) Expand all Loading... |
| 58 } | 91 } |
| 59 if (n & 2) { | 92 if (n & 2) { |
| 60 vst1_u32(dst, vget_low_u32(v4)); | 93 vst1_u32(dst, vget_low_u32(v4)); |
| 61 dst += 2; | 94 dst += 2; |
| 62 } | 95 } |
| 63 if (n & 1) { | 96 if (n & 1) { |
| 64 *dst = value; | 97 *dst = value; |
| 65 } | 98 } |
| 66 } | 99 } |
| 67 | 100 |
| 68 } // namespace neon | 101 #else // Neither NEON nor SSE2. |
| 69 | 102 |
| 70 namespace SkOpts { | 103 static void memset16(uint16_t* dst, uint16_t val, int n) { while (n --> 0) { *ds
t++ = val; } } |
| 71 void Init_neon() { | 104 static void memset32(uint32_t* dst, uint32_t val, int n) { while (n --> 0) { *ds
t++ = val; } } |
| 72 rsqrt = neon::rsqrt; | |
| 73 memset16 = neon::memset16; | |
| 74 memset32 = neon::memset32; | |
| 75 create_xfermode = SkCreate4pxXfermode; | |
| 76 | 105 |
| 77 static const auto x = neon::kX, y = neon::kY; | 106 #endif |
| 78 box_blur_xx = neon::box_blur<x,x>; | 107 |
| 79 box_blur_xy = neon::box_blur<x,y>; | 108 } // namespace SK_OPTS_NS |
| 80 box_blur_yx = neon::box_blur<y,x>; | 109 |
| 81 } | 110 #endif//SkUtils_opts_DEFINED |
| 82 } | |
| OLD | NEW |