| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * Copyright 2015 Google Inc. | |
| 3 * | |
| 4 * Use of this source code is governed by a BSD-style license that can be | |
| 5 * found in the LICENSE file. | |
| 6 */ | |
| 7 | |
| 8 #ifndef SkUtils_opts_DEFINED | |
| 9 #define SkUtils_opts_DEFINED | |
| 10 | |
| 11 namespace SK_OPTS_NS { | |
| 12 | |
| 13 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 | |
| 14 | |
| 15 static void memset16(uint16_t* dst, uint16_t val, int n) { | |
| 16 auto dst8 = (__m128i*)dst; | |
| 17 auto val8 = _mm_set1_epi16(val); | |
| 18 for ( ; n >= 8; n -= 8) { | |
| 19 _mm_storeu_si128(dst8++, val8); | |
| 20 } | |
| 21 dst = (uint16_t*)dst8; | |
| 22 if (n & 4) { | |
| 23 _mm_storel_epi64((__m128i*)dst, val8); | |
| 24 dst += 4; | |
| 25 } | |
| 26 if (n & 2) { | |
| 27 *(uint32_t*)dst = _mm_cvtsi128_si32(val8); | |
| 28 dst += 2; | |
| 29 } | |
| 30 if (n & 1) { | |
| 31 *dst = val; | |
| 32 } | |
| 33 } | |
| 34 | |
| 35 static void memset32(uint32_t* dst, uint32_t val, int n) { | |
| 36 auto dst4 = (__m128i*)dst; | |
| 37 auto val4 = _mm_set1_epi32(val); | |
| 38 for ( ; n >= 4; n -= 4) { | |
| 39 _mm_storeu_si128(dst4++, val4); | |
| 40 } | |
| 41 dst = (uint32_t*)dst4; | |
| 42 if (n & 2) { | |
| 43 _mm_storel_epi64((__m128i*)dst, val4); | |
| 44 dst += 2; | |
| 45 } | |
| 46 if (n & 1) { | |
| 47 *dst = val; | |
| 48 } | |
| 49 } | |
| 50 | |
| 51 #elif defined(SK_ARM_HAS_NEON) | |
| 52 | |
| 53 static void memset16(uint16_t* dst, uint16_t value, int n) { | |
| 54 uint16x8_t v8 = vdupq_n_u16(value); | |
| 55 uint16x8x4_t v32 = {{ v8, v8, v8, v8 }}; | |
| 56 | |
| 57 while (n >= 32) { | |
| 58 vst4q_u16(dst, v32); // This swizzles, but we don't care: all lanes are
the same, value. | |
| 59 dst += 32; | |
| 60 n -= 32; | |
| 61 } | |
| 62 switch (n / 8) { | |
| 63 case 3: vst1q_u16(dst, v8); dst += 8; | |
| 64 case 2: vst1q_u16(dst, v8); dst += 8; | |
| 65 case 1: vst1q_u16(dst, v8); dst += 8; | |
| 66 } | |
| 67 if (n & 4) { | |
| 68 vst1_u16(dst, vget_low_u16(v8)); | |
| 69 dst += 4; | |
| 70 } | |
| 71 switch (n & 3) { | |
| 72 case 3: *dst++ = value; | |
| 73 case 2: *dst++ = value; | |
| 74 case 1: *dst = value; | |
| 75 } | |
| 76 } | |
| 77 | |
| 78 static void memset32(uint32_t* dst, uint32_t value, int n) { | |
| 79 uint32x4_t v4 = vdupq_n_u32(value); | |
| 80 uint32x4x4_t v16 = {{ v4, v4, v4, v4 }}; | |
| 81 | |
| 82 while (n >= 16) { | |
| 83 vst4q_u32(dst, v16); // This swizzles, but we don't care: all lanes are
the same, value. | |
| 84 dst += 16; | |
| 85 n -= 16; | |
| 86 } | |
| 87 switch (n / 4) { | |
| 88 case 3: vst1q_u32(dst, v4); dst += 4; | |
| 89 case 2: vst1q_u32(dst, v4); dst += 4; | |
| 90 case 1: vst1q_u32(dst, v4); dst += 4; | |
| 91 } | |
| 92 if (n & 2) { | |
| 93 vst1_u32(dst, vget_low_u32(v4)); | |
| 94 dst += 2; | |
| 95 } | |
| 96 if (n & 1) { | |
| 97 *dst = value; | |
| 98 } | |
| 99 } | |
| 100 | |
| 101 #else // Neither NEON nor SSE2. | |
| 102 | |
| 103 static void memset16(uint16_t* dst, uint16_t val, int n) { while (n --> 0) { *ds
t++ = val; } } | |
| 104 static void memset32(uint32_t* dst, uint32_t val, int n) { while (n --> 0) { *ds
t++ = val; } } | |
| 105 | |
| 106 #endif | |
| 107 | |
| 108 } // namespace SK_OPTS_NS | |
| 109 | |
| 110 #endif//SkUtils_opts_DEFINED | |
| OLD | NEW |