OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2006 The Android Open Source Project | 2 * Copyright 2006 The Android Open Source Project |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkUtils_DEFINED | 8 #ifndef SkUtils_DEFINED |
9 #define SkUtils_DEFINED | 9 #define SkUtils_DEFINED |
10 | 10 |
11 #include "SkTypes.h" | 11 #include "SkTypes.h" |
12 #if defined(SK_BUILD_FOR_WIN) | |
13 #include <intrin.h> | |
14 #endif | |
15 | 12 |
16 namespace SkOpts { | 13 namespace SkOpts { |
17 extern void (*memset16)(uint16_t[], uint16_t, int); | 14 extern void (*memset16)(uint16_t[], uint16_t, int); |
18 extern void (*memset32)(uint32_t[], uint32_t, int); | 15 extern void (*memset32)(uint32_t[], uint32_t, int); |
19 } | 16 } |
20 | 17 |
21 /////////////////////////////////////////////////////////////////////////////// | 18 /////////////////////////////////////////////////////////////////////////////// |
22 | 19 |
23 // The inlining heuristics below were determined using bench/MemsetBench.cpp | 20 // The inlining heuristics below were determined using bench/MemsetBench.cpp |
24 // on a x86 desktop, a Nexus 7 with and without NEON, and a Nexus 9: | 21 // on a x86 desktop, a Nexus 7 with and without NEON, and a Nexus 9: |
25 // - on x86, inlining was never faster, | 22 // - on x86, inlining was never faster, |
26 // - on ARMv7, inlining was faster for N<=10. Putting this check inside the N
EON | 23 // - on ARMv7, inlining was faster for N<=10. Putting this check inside the N
EON |
27 // code was not helpful; it's got to be here outside. | 24 // code was not helpful; it's got to be here outside. |
28 // - NEON code generation for ARMv8 with GCC 4.9 is terrible, | 25 // - NEON code generation for ARMv8 with GCC 4.9 is terrible, |
29 // making the NEON code ~8x slower that just a serial loop. | 26 // making the NEON code ~8x slower that just a serial loop. |
30 | 27 |
31 /** Similar to memset(), but it assigns a 16bit value into the buffer. | 28 /** Similar to memset(), but it assigns a 16bit value into the buffer. |
32 @param buffer The memory to have value copied into it | 29 @param buffer The memory to have value copied into it |
33 @param value The 16bit value to be copied into buffer | 30 @param value The 16bit value to be copied into buffer |
34 @param count The number of times value should be copied into the buffer. | 31 @param count The number of times value should be copied into the buffer. |
35 */ | 32 */ |
36 static inline void sk_memset16(uint16_t buffer[], uint16_t value, int count) { | 33 static inline void sk_memset16(uint16_t buffer[], uint16_t value, int count) { |
37 #if defined(SK_BUILD_FOR_WIN) | 34 #if defined(SK_CPU_ARM64) |
38 __stosw(buffer, value, count); | 35 while (count --> 0) { *buffer++ = value; } return; |
39 #elif defined(SK_CPU_X86) | 36 #elif defined(SK_CPU_ARM32) |
40 __asm__ __volatile__ ( "rep stosw" : "+D"(buffer), "+c"(count) : "a"(value)
); | 37 if (count <= 10) { while (count --> 0) { *buffer++ = value; } return; } |
41 #elif defined(SK_ARM_HAS_NEON) | 38 #endif |
42 while (count --> 0) { *buffer++ = value; } | |
43 #else | |
44 SkOpts::memset16(buffer, value, count); | 39 SkOpts::memset16(buffer, value, count); |
45 #endif | |
46 } | 40 } |
47 | 41 |
48 /** Similar to memset(), but it assigns a 32bit value into the buffer. | 42 /** Similar to memset(), but it assigns a 32bit value into the buffer. |
49 @param buffer The memory to have value copied into it | 43 @param buffer The memory to have value copied into it |
50 @param value The 32bit value to be copied into buffer | 44 @param value The 32bit value to be copied into buffer |
51 @param count The number of times value should be copied into the buffer. | 45 @param count The number of times value should be copied into the buffer. |
52 */ | 46 */ |
53 static inline void sk_memset32(uint32_t buffer[], uint32_t value, int count) { | 47 static inline void sk_memset32(uint32_t buffer[], uint32_t value, int count) { |
54 #if defined(SK_BUILD_FOR_WIN) | 48 #if defined(SK_CPU_ARM64) |
55 __stosd((PDWORD)buffer, value, count); | 49 while (count --> 0) { *buffer++ = value; } return; |
56 #elif defined(SK_CPU_X86) | 50 #elif defined(SK_CPU_ARM32) |
57 __asm__ __volatile__ ( "rep stosl" : "+D"(buffer), "+c"(count) : "a"(value)
); | 51 if (count <= 10) { while (count --> 0) { *buffer++ = value; } return; } |
58 #elif defined(SK_ARM_HAS_NEON) | 52 #endif |
59 while (count --> 0) { *buffer++ = value; } | |
60 #else | |
61 SkOpts::memset32(buffer, value, count); | 53 SkOpts::memset32(buffer, value, count); |
62 #endif | |
63 } | 54 } |
64 | 55 |
65 | 56 |
66 /////////////////////////////////////////////////////////////////////////////// | 57 /////////////////////////////////////////////////////////////////////////////// |
67 | 58 |
68 #define kMaxBytesInUTF8Sequence 4 | 59 #define kMaxBytesInUTF8Sequence 4 |
69 | 60 |
70 #ifdef SK_DEBUG | 61 #ifdef SK_DEBUG |
71 int SkUTF8_LeadByteToCount(unsigned c); | 62 int SkUTF8_LeadByteToCount(unsigned c); |
72 #else | 63 #else |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
133 } | 124 } |
134 ~SkAutoTrace() { | 125 ~SkAutoTrace() { |
135 SkDebugf("--- trace: %s Leave\n", fLabel); | 126 SkDebugf("--- trace: %s Leave\n", fLabel); |
136 } | 127 } |
137 private: | 128 private: |
138 const char* fLabel; | 129 const char* fLabel; |
139 }; | 130 }; |
140 #define SkAutoTrace(...) SK_REQUIRE_LOCAL_VAR(SkAutoTrace) | 131 #define SkAutoTrace(...) SK_REQUIRE_LOCAL_VAR(SkAutoTrace) |
141 | 132 |
142 #endif | 133 #endif |
OLD | NEW |