OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2006 The Android Open Source Project | 2 * Copyright 2006 The Android Open Source Project |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkUtils_DEFINED | 8 #ifndef SkUtils_DEFINED |
9 #define SkUtils_DEFINED | 9 #define SkUtils_DEFINED |
10 | 10 |
11 #include "SkTypes.h" | 11 #include "SkTypes.h" |
12 #if defined(SK_BUILD_FOR_WIN) | |
13 #include <intrin.h> | |
14 #endif | |
15 | |
16 #if defined(SK_CPU_X86) | |
17 static inline void rep_stosw(uint16_t buffer[], uint16_t value, int count) { | |
18 #if defined(SK_BUILD_FOR_WIN) | |
19 __stosw(buffer, value, count); | |
20 #else | |
21 __asm__ __volatile__ ( "rep stosw" : "+D"(buffer), "+c"(count) : "a"(val
ue) ); | |
22 #endif | |
23 } | |
24 | |
25 static inline void rep_stosd(uint32_t buffer[], uint32_t value, int count) { | |
26 #if defined(SK_BUILD_FOR_WIN) | |
27 __stosd((PDWORD)buffer, value, count); | |
28 #else | |
29 __asm__ __volatile__ ( "rep stosl" : "+D"(buffer), "+c"(count) : "a"(val
ue) ); | |
30 #endif | |
31 } | |
32 #endif | |
33 | 12 |
34 namespace SkOpts { | 13 namespace SkOpts { |
35 extern void (*memset16)(uint16_t[], uint16_t, int); | 14 extern void (*memset16)(uint16_t[], uint16_t, int); |
36 extern void (*memset32)(uint32_t[], uint32_t, int); | 15 extern void (*memset32)(uint32_t[], uint32_t, int); |
37 } | 16 } |
38 | 17 |
39 /////////////////////////////////////////////////////////////////////////////// | 18 /////////////////////////////////////////////////////////////////////////////// |
40 | 19 |
41 // The stosw/d and inlining heuristics below were determined using | 20 // The inlining heuristics below were determined using bench/MemsetBench.cpp |
42 // bench/MemsetBench.cpp and perf.skia.org. | 21 // on a x86 desktop, a Nexus 7 with and without NEON, and a Nexus 9: |
| 22 // - on x86, inlining was never faster, |
| 23 // - on ARMv7, inlining was faster for N<=10. Putting this check inside the N
EON |
| 24 // code was not helpful; it's got to be here outside. |
| 25 // - NEON code generation for ARMv8 with GCC 4.9 is terrible, |
| 26 // making the NEON code ~8x slower that just a serial loop. |
43 | 27 |
44 /** Similar to memset(), but it assigns a 16bit value into the buffer. | 28 /** Similar to memset(), but it assigns a 16bit value into the buffer. |
45 @param buffer The memory to have value copied into it | 29 @param buffer The memory to have value copied into it |
46 @param value The 16bit value to be copied into buffer | 30 @param value The 16bit value to be copied into buffer |
47 @param count The number of times value should be copied into the buffer. | 31 @param count The number of times value should be copied into the buffer. |
48 */ | 32 */ |
49 static inline void sk_memset16(uint16_t buffer[], uint16_t value, int count) { | 33 static inline void sk_memset16(uint16_t buffer[], uint16_t value, int count) { |
50 #if defined(SK_CPU_X86) | 34 #if defined(SK_CPU_ARM64) |
51 if (count > 30) { rep_stosw(buffer, value, count); return; } | |
52 #elif defined(SK_ARM_HAS_NEON) | |
53 while (count --> 0) { *buffer++ = value; } return; | 35 while (count --> 0) { *buffer++ = value; } return; |
| 36 #elif defined(SK_CPU_ARM32) |
| 37 if (count <= 10) { while (count --> 0) { *buffer++ = value; } return; } |
54 #endif | 38 #endif |
55 SkOpts::memset16(buffer, value, count); | 39 SkOpts::memset16(buffer, value, count); |
56 } | 40 } |
57 | 41 |
58 /** Similar to memset(), but it assigns a 32bit value into the buffer. | 42 /** Similar to memset(), but it assigns a 32bit value into the buffer. |
59 @param buffer The memory to have value copied into it | 43 @param buffer The memory to have value copied into it |
60 @param value The 32bit value to be copied into buffer | 44 @param value The 32bit value to be copied into buffer |
61 @param count The number of times value should be copied into the buffer. | 45 @param count The number of times value should be copied into the buffer. |
62 */ | 46 */ |
63 static inline void sk_memset32(uint32_t buffer[], uint32_t value, int count) { | 47 static inline void sk_memset32(uint32_t buffer[], uint32_t value, int count) { |
64 #if defined(SK_CPU_X86) | 48 #if defined(SK_CPU_ARM64) |
65 if (count > 30) { rep_stosd(buffer, value, count); return; } | |
66 #elif defined(SK_ARM_HAS_NEON) | |
67 while (count --> 0) { *buffer++ = value; } return; | 49 while (count --> 0) { *buffer++ = value; } return; |
| 50 #elif defined(SK_CPU_ARM32) |
| 51 if (count <= 10) { while (count --> 0) { *buffer++ = value; } return; } |
68 #endif | 52 #endif |
69 SkOpts::memset32(buffer, value, count); | 53 SkOpts::memset32(buffer, value, count); |
70 } | 54 } |
71 | 55 |
72 | 56 |
73 /////////////////////////////////////////////////////////////////////////////// | 57 /////////////////////////////////////////////////////////////////////////////// |
74 | 58 |
75 #define kMaxBytesInUTF8Sequence 4 | 59 #define kMaxBytesInUTF8Sequence 4 |
76 | 60 |
77 #ifdef SK_DEBUG | 61 #ifdef SK_DEBUG |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
140 } | 124 } |
141 ~SkAutoTrace() { | 125 ~SkAutoTrace() { |
142 SkDebugf("--- trace: %s Leave\n", fLabel); | 126 SkDebugf("--- trace: %s Leave\n", fLabel); |
143 } | 127 } |
144 private: | 128 private: |
145 const char* fLabel; | 129 const char* fLabel; |
146 }; | 130 }; |
147 #define SkAutoTrace(...) SK_REQUIRE_LOCAL_VAR(SkAutoTrace) | 131 #define SkAutoTrace(...) SK_REQUIRE_LOCAL_VAR(SkAutoTrace) |
148 | 132 |
149 #endif | 133 #endif |
OLD | NEW |