| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2006 The Android Open Source Project | 2 * Copyright 2006 The Android Open Source Project |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef SkUtils_DEFINED | 8 #ifndef SkUtils_DEFINED |
| 9 #define SkUtils_DEFINED | 9 #define SkUtils_DEFINED |
| 10 | 10 |
| 11 #include "SkTypes.h" | 11 #include "SkTypes.h" |
| 12 #include "../private/SkOpts.h" |
| 12 | 13 |
| 13 /////////////////////////////////////////////////////////////////////////////// | 14 /////////////////////////////////////////////////////////////////////////////// |
| 14 | 15 |
| 15 // Determined empirically using bench/MemsetBench.cpp on a Nexus 7, Nexus 9, and
desktop. | 16 // The inlining heuristics below were determined using bench/MemsetBench.cpp |
| 16 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 || defined(SK_ARM_HAS_NEON) | 17 // on a x86 desktop, a Nexus 7 with and without NEON, and a Nexus 9: |
| 17 // Platforms where we can assume an autovectorizer will give us a good inlin
e memset. | 18 // - on x86, inlining was never faster, |
| 18 #define SK_SMALL_MEMSET 1000 | 19 // - on ARMv7, inlining was faster for N<=10. Putting this check inside the N
EON |
| 19 #else | 20 // code was not helpful; it's got to be here outside. |
| 20 // Platforms like Chrome on ARMv7 that don't typically compile with NEON glo
bally. | 21 // - NEON code generation for ARMv8 with GCC 4.9 is terrible, |
| 21 #define SK_SMALL_MEMSET 10 | 22 // making the NEON code ~8x slower that just a serial loop. |
| 22 #endif | |
| 23 | |
| 24 | 23 |
| 25 /** Similar to memset(), but it assigns a 16bit value into the buffer. | 24 /** Similar to memset(), but it assigns a 16bit value into the buffer. |
| 26 @param buffer The memory to have value copied into it | 25 @param buffer The memory to have value copied into it |
| 27 @param value The 16bit value to be copied into buffer | 26 @param value The 16bit value to be copied into buffer |
| 28 @param count The number of times value should be copied into the buffer. | 27 @param count The number of times value should be copied into the buffer. |
| 29 */ | 28 */ |
| 30 void sk_memset16_large(uint16_t dst[], uint16_t value, int count); | 29 static inline void sk_memset16(uint16_t buffer[], uint16_t value, int count) { |
| 31 inline void sk_memset16(uint16_t dst[], uint16_t value, int count) { | 30 #if defined(SK_CPU_ARM64) |
| 32 if (count <= SK_SMALL_MEMSET) { | 31 while (count --> 0) { *buffer++ = value; } return; |
| 33 for (int i = 0; i < count; i++) { | 32 #elif defined(SK_CPU_ARM32) |
| 34 dst[i] = value; | 33 if (count <= 10) { while (count --> 0) { *buffer++ = value; } return; } |
| 35 } | 34 #endif |
| 36 } else { | 35 SkOpts::memset16(buffer, value, count); |
| 37 sk_memset16_large(dst, value, count); | |
| 38 } | |
| 39 } | 36 } |
| 40 typedef void (*SkMemset16Proc)(uint16_t dst[], uint16_t value, int count); | |
| 41 SkMemset16Proc SkMemset16GetPlatformProc(); | |
| 42 | 37 |
| 43 /** Similar to memset(), but it assigns a 32bit value into the buffer. | 38 /** Similar to memset(), but it assigns a 32bit value into the buffer. |
| 44 @param buffer The memory to have value copied into it | 39 @param buffer The memory to have value copied into it |
| 45 @param value The 32bit value to be copied into buffer | 40 @param value The 32bit value to be copied into buffer |
| 46 @param count The number of times value should be copied into the buffer. | 41 @param count The number of times value should be copied into the buffer. |
| 47 */ | 42 */ |
| 48 void sk_memset32_large(uint32_t dst[], uint32_t value, int count); | 43 static inline void sk_memset32(uint32_t buffer[], uint32_t value, int count) { |
| 49 inline void sk_memset32(uint32_t dst[], uint32_t value, int count) { | 44 #if defined(SK_CPU_ARM64) |
| 50 if (count <= SK_SMALL_MEMSET) { | 45 while (count --> 0) { *buffer++ = value; } return; |
| 51 for (int i = 0; i < count; i++) { | 46 #elif defined(SK_CPU_ARM32) |
| 52 dst[i] = value; | 47 if (count <= 10) { while (count --> 0) { *buffer++ = value; } return; } |
| 53 } | 48 #endif |
| 54 } else { | 49 SkOpts::memset32(buffer, value, count); |
| 55 sk_memset32_large(dst, value, count); | |
| 56 } | |
| 57 } | 50 } |
| 58 | 51 |
| 59 typedef void (*SkMemset32Proc)(uint32_t dst[], uint32_t value, int count); | |
| 60 SkMemset32Proc SkMemset32GetPlatformProc(); | |
| 61 | |
| 62 #undef SK_SMALL_MEMSET | |
| 63 | 52 |
| 64 /////////////////////////////////////////////////////////////////////////////// | 53 /////////////////////////////////////////////////////////////////////////////// |
| 65 | 54 |
| 66 #define kMaxBytesInUTF8Sequence 4 | 55 #define kMaxBytesInUTF8Sequence 4 |
| 67 | 56 |
| 68 #ifdef SK_DEBUG | 57 #ifdef SK_DEBUG |
| 69 int SkUTF8_LeadByteToCount(unsigned c); | 58 int SkUTF8_LeadByteToCount(unsigned c); |
| 70 #else | 59 #else |
| 71 #define SkUTF8_LeadByteToCount(c) ((((0xE5 << 24) >> ((unsigned)c >> 4 <<
1)) & 3) + 1) | 60 #define SkUTF8_LeadByteToCount(c) ((((0xE5 << 24) >> ((unsigned)c >> 4 <<
1)) & 3) + 1) |
| 72 #endif | 61 #endif |
| (...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 131 } | 120 } |
| 132 ~SkAutoTrace() { | 121 ~SkAutoTrace() { |
| 133 SkDebugf("--- trace: %s Leave\n", fLabel); | 122 SkDebugf("--- trace: %s Leave\n", fLabel); |
| 134 } | 123 } |
| 135 private: | 124 private: |
| 136 const char* fLabel; | 125 const char* fLabel; |
| 137 }; | 126 }; |
| 138 #define SkAutoTrace(...) SK_REQUIRE_LOCAL_VAR(SkAutoTrace) | 127 #define SkAutoTrace(...) SK_REQUIRE_LOCAL_VAR(SkAutoTrace) |
| 139 | 128 |
| 140 #endif | 129 #endif |
| OLD | NEW |