Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(687)

Side by Side Diff: src/opts/SkUtils_opts.h

Issue 1639863002: try plain-old code for sk_memset16/32 now that NEON is compile-time (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: for Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/opts/SkOpts_neon.cpp ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*
2 * Copyright 2015 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #ifndef SkUtils_opts_DEFINED
9 #define SkUtils_opts_DEFINED
10
11 namespace SK_OPTS_NS {
12
13 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
14
15 static void memset16(uint16_t* dst, uint16_t val, int n) {
16 auto dst8 = (__m128i*)dst;
17 auto val8 = _mm_set1_epi16(val);
18 for ( ; n >= 8; n -= 8) {
19 _mm_storeu_si128(dst8++, val8);
20 }
21 dst = (uint16_t*)dst8;
22 if (n & 4) {
23 _mm_storel_epi64((__m128i*)dst, val8);
24 dst += 4;
25 }
26 if (n & 2) {
27 *(uint32_t*)dst = _mm_cvtsi128_si32(val8);
28 dst += 2;
29 }
30 if (n & 1) {
31 *dst = val;
32 }
33 }
34
35 static void memset32(uint32_t* dst, uint32_t val, int n) {
36 auto dst4 = (__m128i*)dst;
37 auto val4 = _mm_set1_epi32(val);
38 for ( ; n >= 4; n -= 4) {
39 _mm_storeu_si128(dst4++, val4);
40 }
41 dst = (uint32_t*)dst4;
42 if (n & 2) {
43 _mm_storel_epi64((__m128i*)dst, val4);
44 dst += 2;
45 }
46 if (n & 1) {
47 *dst = val;
48 }
49 }
50
51 #elif defined(SK_ARM_HAS_NEON)
52
53 static void memset16(uint16_t* dst, uint16_t value, int n) {
54 uint16x8_t v8 = vdupq_n_u16(value);
55 uint16x8x4_t v32 = {{ v8, v8, v8, v8 }};
56
57 while (n >= 32) {
58 vst4q_u16(dst, v32); // This swizzles, but we don't care: all lanes are the same, value.
59 dst += 32;
60 n -= 32;
61 }
62 switch (n / 8) {
63 case 3: vst1q_u16(dst, v8); dst += 8;
64 case 2: vst1q_u16(dst, v8); dst += 8;
65 case 1: vst1q_u16(dst, v8); dst += 8;
66 }
67 if (n & 4) {
68 vst1_u16(dst, vget_low_u16(v8));
69 dst += 4;
70 }
71 switch (n & 3) {
72 case 3: *dst++ = value;
73 case 2: *dst++ = value;
74 case 1: *dst = value;
75 }
76 }
77
78 static void memset32(uint32_t* dst, uint32_t value, int n) {
79 uint32x4_t v4 = vdupq_n_u32(value);
80 uint32x4x4_t v16 = {{ v4, v4, v4, v4 }};
81
82 while (n >= 16) {
83 vst4q_u32(dst, v16); // This swizzles, but we don't care: all lanes are the same, value.
84 dst += 16;
85 n -= 16;
86 }
87 switch (n / 4) {
88 case 3: vst1q_u32(dst, v4); dst += 4;
89 case 2: vst1q_u32(dst, v4); dst += 4;
90 case 1: vst1q_u32(dst, v4); dst += 4;
91 }
92 if (n & 2) {
93 vst1_u32(dst, vget_low_u32(v4));
94 dst += 2;
95 }
96 if (n & 1) {
97 *dst = value;
98 }
99 }
100
101 #else // Neither NEON nor SSE2.
102
103 static void memset16(uint16_t* dst, uint16_t val, int n) { while (n --> 0) { *ds t++ = val; } }
104 static void memset32(uint32_t* dst, uint32_t val, int n) { while (n --> 0) { *ds t++ = val; } }
105
106 #endif
107
108 } // namespace SK_OPTS_NS
109
110 #endif//SkUtils_opts_DEFINED
OLDNEW
« no previous file with comments | « src/opts/SkOpts_neon.cpp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698