OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright 2006 The Android Open Source Project | 2 * Copyright 2006 The Android Open Source Project |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkUtils_DEFINED | 8 #ifndef SkUtils_DEFINED |
9 #define SkUtils_DEFINED | 9 #define SkUtils_DEFINED |
10 | 10 |
11 #include "SkTypes.h" | 11 #include "SkTypes.h" |
12 | 12 |
13 namespace SkOpts { | 13 namespace SkOpts { |
14 extern void (*memset16)(uint16_t[], uint16_t, int); | 14 extern void (*memset16)(uint16_t[], uint16_t, int); |
15 extern void (*memset32)(uint32_t[], uint32_t, int); | 15 extern void (*memset32)(uint32_t[], uint32_t, int); |
16 } | 16 } |
17 | 17 |
18 /////////////////////////////////////////////////////////////////////////////// | 18 /////////////////////////////////////////////////////////////////////////////// |
19 | 19 |
20 // The inlining heuristics below were determined using bench/MemsetBench.cpp | 20 // Inlining heuristics were determined by using perf.skia.org and bench/MemsetBe nch.cpp. |
21 // on a x86 desktop, a Nexus 7 with and without NEON, and a Nexus 9: | 21 // When using MSVC, inline is better >= 1K and worse <= 100. The Nexus Player w as the opposite. |
22 // - on x86, inlining was never faster, | 22 // Otherwise, when NEON or SSE is available to GCC or Clang, they can handle it best. |
23 // - on ARMv7, inlining was faster for N<=10. Putting this check inside the N EON | 23 // See https://code.google.com/p/chromium/issues/detail?id=516426#c15 for more d etails. |
24 // code was not helpful; it's got to be here outside. | 24 // See also skia:4316; it might be a good idea to use rep stosw/stosd here. |
25 // - NEON code generation for ARMv8 with GCC 4.9 is terrible, | 25 #define INLINE_IF(cond) if (cond) { while (count --> 0) { *buffer++ = value; } r eturn; } |
Noel Gordon
2015/11/25 02:02:25
while (count --> 0)
I suppose it compiles, but ma
| |
26 // making the NEON code ~8x slower that just a serial loop. | |
27 | 26 |
28 /** Similar to memset(), but it assigns a 16bit value into the buffer. | 27 /** Similar to memset(), but it assigns a 16bit value into the buffer. |
29 @param buffer The memory to have value copied into it | 28 @param buffer The memory to have value copied into it |
30 @param value The 16bit value to be copied into buffer | 29 @param value The 16bit value to be copied into buffer |
31 @param count The number of times value should be copied into the buffer. | 30 @param count The number of times value should be copied into the buffer. |
32 */ | 31 */ |
33 static inline void sk_memset16(uint16_t buffer[], uint16_t value, int count) { | 32 static inline void sk_memset16(uint16_t buffer[], uint16_t value, int count) { |
34 #if defined(SK_CPU_ARM64) | 33 #if defined(_MSC_VER) |
35 while (count --> 0) { *buffer++ = value; } return; | 34 INLINE_IF(count > 300) |
36 #elif defined(SK_CPU_ARM32) | 35 #elif defined(SK_BUILD_FOR_ANDROID) && defined(SK_CPU_X86) |
37 if (count <= 10) { while (count --> 0) { *buffer++ = value; } return; } | 36 INLINE_IF(count < 300) |
37 #elif defined(SK_ARM_HAS_NEON) || SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 | |
38 INLINE_IF(true) | |
39 #else | |
40 INLINE_IF(count <= 10) | |
38 #endif | 41 #endif |
39 SkOpts::memset16(buffer, value, count); | 42 SkOpts::memset16(buffer, value, count); |
40 } | 43 } |
41 | 44 |
42 /** Similar to memset(), but it assigns a 32bit value into the buffer. | 45 /** Similar to memset(), but it assigns a 32bit value into the buffer. |
43 @param buffer The memory to have value copied into it | 46 @param buffer The memory to have value copied into it |
44 @param value The 32bit value to be copied into buffer | 47 @param value The 32bit value to be copied into buffer |
45 @param count The number of times value should be copied into the buffer. | 48 @param count The number of times value should be copied into the buffer. |
46 */ | 49 */ |
47 static inline void sk_memset32(uint32_t buffer[], uint32_t value, int count) { | 50 static inline void sk_memset32(uint32_t buffer[], uint32_t value, int count) { |
48 #if defined(SK_CPU_ARM64) | 51 #if defined(_MSC_VER) |
49 while (count --> 0) { *buffer++ = value; } return; | 52 INLINE_IF(count > 300) |
50 #elif defined(SK_CPU_ARM32) | 53 #elif defined(SK_BUILD_FOR_ANDROID) && defined(SK_CPU_X86) |
51 if (count <= 10) { while (count --> 0) { *buffer++ = value; } return; } | 54 INLINE_IF(count < 300) |
55 #elif defined(SK_ARM_HAS_NEON) || SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 | |
56 INLINE_IF(true) | |
57 #else | |
58 INLINE_IF(count <= 10) | |
52 #endif | 59 #endif |
53 SkOpts::memset32(buffer, value, count); | 60 SkOpts::memset32(buffer, value, count); |
54 } | 61 } |
55 | 62 |
63 #undef INLINE_IF | |
56 | 64 |
57 /////////////////////////////////////////////////////////////////////////////// | 65 /////////////////////////////////////////////////////////////////////////////// |
58 | 66 |
59 #define kMaxBytesInUTF8Sequence 4 | 67 #define kMaxBytesInUTF8Sequence 4 |
60 | 68 |
61 #ifdef SK_DEBUG | 69 #ifdef SK_DEBUG |
62 int SkUTF8_LeadByteToCount(unsigned c); | 70 int SkUTF8_LeadByteToCount(unsigned c); |
63 #else | 71 #else |
64 #define SkUTF8_LeadByteToCount(c) ((((0xE5 << 24) >> ((unsigned)c >> 4 << 1)) & 3) + 1) | 72 #define SkUTF8_LeadByteToCount(c) ((((0xE5 << 24) >> ((unsigned)c >> 4 << 1)) & 3) + 1) |
65 #endif | 73 #endif |
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
124 } | 132 } |
125 ~SkAutoTrace() { | 133 ~SkAutoTrace() { |
126 SkDebugf("--- trace: %s Leave\n", fLabel); | 134 SkDebugf("--- trace: %s Leave\n", fLabel); |
127 } | 135 } |
128 private: | 136 private: |
129 const char* fLabel; | 137 const char* fLabel; |
130 }; | 138 }; |
131 #define SkAutoTrace(...) SK_REQUIRE_LOCAL_VAR(SkAutoTrace) | 139 #define SkAutoTrace(...) SK_REQUIRE_LOCAL_VAR(SkAutoTrace) |
132 | 140 |
133 #endif | 141 #endif |
OLD | NEW |