OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2006 The Android Open Source Project | 2 * Copyright 2006 The Android Open Source Project |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkUtils_DEFINED | 8 #ifndef SkUtils_DEFINED |
9 #define SkUtils_DEFINED | 9 #define SkUtils_DEFINED |
10 | 10 |
11 #include "SkTypes.h" | 11 #include "SkTypes.h" |
| 12 #if defined(SK_BUILD_FOR_WIN) |
| 13 #include <intrin.h> |
| 14 #endif |
| 15 |
| 16 #if defined(SK_CPU_X86) |
| 17 static inline void rep_stosw(uint16_t buffer[], uint16_t value, int count) { |
| 18 #if defined(SK_BUILD_FOR_WIN) |
| 19 __stosw(buffer, value, count); |
| 20 #else |
| 21 __asm__ __volatile__ ( "rep stosw" : "+D"(buffer), "+c"(count) : "a"(val
ue) ); |
| 22 #endif |
| 23 } |
| 24 |
| 25 static inline void rep_stosd(uint32_t buffer[], uint32_t value, int count) { |
| 26 #if defined(SK_BUILD_FOR_WIN) |
| 27 __stosd((PDWORD)buffer, value, count); |
| 28 #else |
| 29 __asm__ __volatile__ ( "rep stosl" : "+D"(buffer), "+c"(count) : "a"(val
ue) ); |
| 30 #endif |
| 31 } |
| 32 #endif |
12 | 33 |
13 namespace SkOpts { | 34 namespace SkOpts { |
14 extern void (*memset16)(uint16_t[], uint16_t, int); | 35 extern void (*memset16)(uint16_t[], uint16_t, int); |
15 extern void (*memset32)(uint32_t[], uint32_t, int); | 36 extern void (*memset32)(uint32_t[], uint32_t, int); |
16 } | 37 } |
17 | 38 |
18 /////////////////////////////////////////////////////////////////////////////// | 39 /////////////////////////////////////////////////////////////////////////////// |
19 | 40 |
20 // The inlining heuristics below were determined using bench/MemsetBench.cpp | 41 // The stosw/d and inlining heuristics below were determined using |
21 // on a x86 desktop, a Nexus 7 with and without NEON, and a Nexus 9: | 42 // bench/MemsetBench.cpp and perf.skia.org. |
22 // - on x86, inlining was never faster, | |
23 // - on ARMv7, inlining was faster for N<=10. Putting this check inside the N
EON | |
24 // code was not helpful; it's got to be here outside. | |
25 // - NEON code generation for ARMv8 with GCC 4.9 is terrible, | |
26 // making the NEON code ~8x slower that just a serial loop. | |
27 | 43 |
28 /** Similar to memset(), but it assigns a 16bit value into the buffer. | 44 /** Similar to memset(), but it assigns a 16bit value into the buffer. |
29 @param buffer The memory to have value copied into it | 45 @param buffer The memory to have value copied into it |
30 @param value The 16bit value to be copied into buffer | 46 @param value The 16bit value to be copied into buffer |
31 @param count The number of times value should be copied into the buffer. | 47 @param count The number of times value should be copied into the buffer. |
32 */ | 48 */ |
33 static inline void sk_memset16(uint16_t buffer[], uint16_t value, int count) { | 49 static inline void sk_memset16(uint16_t buffer[], uint16_t value, int count) { |
34 #if defined(SK_CPU_ARM64) | 50 #if defined(SK_CPU_X86) |
| 51 if (count > 30) { rep_stosw(buffer, value, count); return; } |
| 52 #elif defined(SK_ARM_HAS_NEON) |
35 while (count --> 0) { *buffer++ = value; } return; | 53 while (count --> 0) { *buffer++ = value; } return; |
36 #elif defined(SK_CPU_ARM32) | |
37 if (count <= 10) { while (count --> 0) { *buffer++ = value; } return; } | |
38 #endif | 54 #endif |
39 SkOpts::memset16(buffer, value, count); | 55 SkOpts::memset16(buffer, value, count); |
40 } | 56 } |
41 | 57 |
42 /** Similar to memset(), but it assigns a 32bit value into the buffer. | 58 /** Similar to memset(), but it assigns a 32bit value into the buffer. |
43 @param buffer The memory to have value copied into it | 59 @param buffer The memory to have value copied into it |
44 @param value The 32bit value to be copied into buffer | 60 @param value The 32bit value to be copied into buffer |
45 @param count The number of times value should be copied into the buffer. | 61 @param count The number of times value should be copied into the buffer. |
46 */ | 62 */ |
47 static inline void sk_memset32(uint32_t buffer[], uint32_t value, int count) { | 63 static inline void sk_memset32(uint32_t buffer[], uint32_t value, int count) { |
48 #if defined(SK_CPU_ARM64) | 64 #if defined(SK_CPU_X86) |
| 65 if (count > 30) { rep_stosd(buffer, value, count); return; } |
| 66 #elif defined(SK_ARM_HAS_NEON) |
49 while (count --> 0) { *buffer++ = value; } return; | 67 while (count --> 0) { *buffer++ = value; } return; |
50 #elif defined(SK_CPU_ARM32) | |
51 if (count <= 10) { while (count --> 0) { *buffer++ = value; } return; } | |
52 #endif | 68 #endif |
53 SkOpts::memset32(buffer, value, count); | 69 SkOpts::memset32(buffer, value, count); |
54 } | 70 } |
55 | 71 |
56 | 72 |
57 /////////////////////////////////////////////////////////////////////////////// | 73 /////////////////////////////////////////////////////////////////////////////// |
58 | 74 |
59 #define kMaxBytesInUTF8Sequence 4 | 75 #define kMaxBytesInUTF8Sequence 4 |
60 | 76 |
61 #ifdef SK_DEBUG | 77 #ifdef SK_DEBUG |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
124 } | 140 } |
125 ~SkAutoTrace() { | 141 ~SkAutoTrace() { |
126 SkDebugf("--- trace: %s Leave\n", fLabel); | 142 SkDebugf("--- trace: %s Leave\n", fLabel); |
127 } | 143 } |
128 private: | 144 private: |
129 const char* fLabel; | 145 const char* fLabel; |
130 }; | 146 }; |
131 #define SkAutoTrace(...) SK_REQUIRE_LOCAL_VAR(SkAutoTrace) | 147 #define SkAutoTrace(...) SK_REQUIRE_LOCAL_VAR(SkAutoTrace) |
132 | 148 |
133 #endif | 149 #endif |
OLD | NEW |