OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright 2015 Google Inc. | |
3 * | |
4 * Use of this source code is governed by a BSD-style license that can be | |
5 * found in the LICENSE file. | |
6 */ | |
7 | |
8 #ifndef SkUtils_opts_DEFINED | |
9 #define SkUtils_opts_DEFINED | |
10 | |
11 namespace SK_OPTS_NS { | |
12 | |
13 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 | |
14 | |
15 static void memset16(uint16_t* dst, uint16_t val, int n) { | |
16 auto dst8 = (__m128i*)dst; | |
17 auto val8 = _mm_set1_epi16(val); | |
18 for ( ; n >= 8; n -= 8) { | |
19 _mm_storeu_si128(dst8++, val8); | |
20 } | |
21 dst = (uint16_t*)dst8; | |
22 if (n & 4) { | |
23 _mm_storel_epi64((__m128i*)dst, val8); | |
24 dst += 4; | |
25 } | |
26 if (n & 2) { | |
27 *(uint32_t*)dst = _mm_cvtsi128_si32(val8); | |
28 dst += 2; | |
29 } | |
30 if (n & 1) { | |
31 *dst = val; | |
32 } | |
33 } | |
34 | |
35 static void memset32(uint32_t* dst, uint32_t val, int n) { | |
36 auto dst4 = (__m128i*)dst; | |
37 auto val4 = _mm_set1_epi32(val); | |
38 for ( ; n >= 4; n -= 4) { | |
39 _mm_storeu_si128(dst4++, val4); | |
40 } | |
41 dst = (uint32_t*)dst4; | |
42 if (n & 2) { | |
43 _mm_storel_epi64((__m128i*)dst, val4); | |
44 dst += 2; | |
45 } | |
46 if (n & 1) { | |
47 *dst = val; | |
48 } | |
49 } | |
50 | |
51 #elif defined(SK_ARM_HAS_NEON) | |
52 | |
53 static void memset16(uint16_t* dst, uint16_t value, int n) { | |
54 uint16x8_t v8 = vdupq_n_u16(value); | |
55 uint16x8x4_t v32 = {{ v8, v8, v8, v8 }}; | |
56 | |
57 while (n >= 32) { | |
58 vst4q_u16(dst, v32); // This swizzles, but we don't care: all lanes are
the same, value. | |
59 dst += 32; | |
60 n -= 32; | |
61 } | |
62 switch (n / 8) { | |
63 case 3: vst1q_u16(dst, v8); dst += 8; | |
64 case 2: vst1q_u16(dst, v8); dst += 8; | |
65 case 1: vst1q_u16(dst, v8); dst += 8; | |
66 } | |
67 if (n & 4) { | |
68 vst1_u16(dst, vget_low_u16(v8)); | |
69 dst += 4; | |
70 } | |
71 switch (n & 3) { | |
72 case 3: *dst++ = value; | |
73 case 2: *dst++ = value; | |
74 case 1: *dst = value; | |
75 } | |
76 } | |
77 | |
78 static void memset32(uint32_t* dst, uint32_t value, int n) { | |
79 uint32x4_t v4 = vdupq_n_u32(value); | |
80 uint32x4x4_t v16 = {{ v4, v4, v4, v4 }}; | |
81 | |
82 while (n >= 16) { | |
83 vst4q_u32(dst, v16); // This swizzles, but we don't care: all lanes are
the same, value. | |
84 dst += 16; | |
85 n -= 16; | |
86 } | |
87 switch (n / 4) { | |
88 case 3: vst1q_u32(dst, v4); dst += 4; | |
89 case 2: vst1q_u32(dst, v4); dst += 4; | |
90 case 1: vst1q_u32(dst, v4); dst += 4; | |
91 } | |
92 if (n & 2) { | |
93 vst1_u32(dst, vget_low_u32(v4)); | |
94 dst += 2; | |
95 } | |
96 if (n & 1) { | |
97 *dst = value; | |
98 } | |
99 } | |
100 | |
101 #else // Neither NEON nor SSE2. | |
102 | |
103 static void memset16(uint16_t* dst, uint16_t val, int n) { while (n --> 0) { *ds
t++ = val; } } | |
104 static void memset32(uint32_t* dst, uint32_t val, int n) { while (n --> 0) { *ds
t++ = val; } } | |
105 | |
106 #endif | |
107 | |
108 } // namespace SK_OPTS_NS | |
109 | |
110 #endif//SkUtils_opts_DEFINED | |
OLD | NEW |