Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(70)

Side by Side Diff: include/core/SkUtils.h

Issue 1357193002: update memset16/32 inlining heuristics (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: dont forget armv7, INLINE_IF Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2006 The Android Open Source Project 2 * Copyright 2006 The Android Open Source Project
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkUtils_DEFINED 8 #ifndef SkUtils_DEFINED
9 #define SkUtils_DEFINED 9 #define SkUtils_DEFINED
10 10
11 #include "SkTypes.h" 11 #include "SkTypes.h"
12 12
13 namespace SkOpts { 13 namespace SkOpts {
14 extern void (*memset16)(uint16_t[], uint16_t, int); 14 extern void (*memset16)(uint16_t[], uint16_t, int);
15 extern void (*memset32)(uint32_t[], uint32_t, int); 15 extern void (*memset32)(uint32_t[], uint32_t, int);
16 } 16 }
17 17
18 /////////////////////////////////////////////////////////////////////////////// 18 ///////////////////////////////////////////////////////////////////////////////
19 19
20 // The inlining heuristics below were determined using bench/MemsetBench.cpp 20 // Inlining heuristics were determined by using perf.skia.org and bench/MemsetBe nch.cpp.
21 // on a x86 desktop, a Nexus 7 with and without NEON, and a Nexus 9: 21 // When using MSVC, inline is better >= 1K and worse <= 100. The Nexus Player w as the opposite.
22 // - on x86, inlining was never faster, 22 // Otherwise, when NEON or SSE is available to GCC or Clang, they can handle it best.
23 // - on ARMv7, inlining was faster for N<=10. Putting this check inside the N EON 23 // See https://code.google.com/p/chromium/issues/detail?id=516426#c15 for more d etails.
24 // code was not helpful; it's got to be here outside. 24 // See also skia:4316; it might be a good idea to use rep stosw/stosd here.
25 // - NEON code generation for ARMv8 with GCC 4.9 is terrible, 25 #define INLINE_IF(cond) if (cond) { while (count --> 0) { *buffer++ = value; } r eturn; }
Noel Gordon 2015/11/25 02:02:25 while (count --> 0) I suppose it compiles, but ma
26 // making the NEON code ~8x slower that just a serial loop.
27 26
28 /** Similar to memset(), but it assigns a 16bit value into the buffer. 27 /** Similar to memset(), but it assigns a 16bit value into the buffer.
29 @param buffer The memory to have value copied into it 28 @param buffer The memory to have value copied into it
30 @param value The 16bit value to be copied into buffer 29 @param value The 16bit value to be copied into buffer
31 @param count The number of times value should be copied into the buffer. 30 @param count The number of times value should be copied into the buffer.
32 */ 31 */
33 static inline void sk_memset16(uint16_t buffer[], uint16_t value, int count) { 32 static inline void sk_memset16(uint16_t buffer[], uint16_t value, int count) {
34 #if defined(SK_CPU_ARM64) 33 #if defined(_MSC_VER)
35 while (count --> 0) { *buffer++ = value; } return; 34 INLINE_IF(count > 300)
36 #elif defined(SK_CPU_ARM32) 35 #elif defined(SK_BUILD_FOR_ANDROID) && defined(SK_CPU_X86)
37 if (count <= 10) { while (count --> 0) { *buffer++ = value; } return; } 36 INLINE_IF(count < 300)
37 #elif defined(SK_ARM_HAS_NEON) || SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
38 INLINE_IF(true)
39 #else
40 INLINE_IF(count <= 10)
38 #endif 41 #endif
39 SkOpts::memset16(buffer, value, count); 42 SkOpts::memset16(buffer, value, count);
40 } 43 }
41 44
42 /** Similar to memset(), but it assigns a 32bit value into the buffer. 45 /** Similar to memset(), but it assigns a 32bit value into the buffer.
43 @param buffer The memory to have value copied into it 46 @param buffer The memory to have value copied into it
44 @param value The 32bit value to be copied into buffer 47 @param value The 32bit value to be copied into buffer
45 @param count The number of times value should be copied into the buffer. 48 @param count The number of times value should be copied into the buffer.
46 */ 49 */
47 static inline void sk_memset32(uint32_t buffer[], uint32_t value, int count) { 50 static inline void sk_memset32(uint32_t buffer[], uint32_t value, int count) {
48 #if defined(SK_CPU_ARM64) 51 #if defined(_MSC_VER)
49 while (count --> 0) { *buffer++ = value; } return; 52 INLINE_IF(count > 300)
50 #elif defined(SK_CPU_ARM32) 53 #elif defined(SK_BUILD_FOR_ANDROID) && defined(SK_CPU_X86)
51 if (count <= 10) { while (count --> 0) { *buffer++ = value; } return; } 54 INLINE_IF(count < 300)
55 #elif defined(SK_ARM_HAS_NEON) || SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
56 INLINE_IF(true)
57 #else
58 INLINE_IF(count <= 10)
52 #endif 59 #endif
53 SkOpts::memset32(buffer, value, count); 60 SkOpts::memset32(buffer, value, count);
54 } 61 }
55 62
63 #undef INLINE_IF
56 64
57 /////////////////////////////////////////////////////////////////////////////// 65 ///////////////////////////////////////////////////////////////////////////////
58 66
59 #define kMaxBytesInUTF8Sequence 4 67 #define kMaxBytesInUTF8Sequence 4
60 68
61 #ifdef SK_DEBUG 69 #ifdef SK_DEBUG
62 int SkUTF8_LeadByteToCount(unsigned c); 70 int SkUTF8_LeadByteToCount(unsigned c);
63 #else 71 #else
64 #define SkUTF8_LeadByteToCount(c) ((((0xE5 << 24) >> ((unsigned)c >> 4 << 1)) & 3) + 1) 72 #define SkUTF8_LeadByteToCount(c) ((((0xE5 << 24) >> ((unsigned)c >> 4 << 1)) & 3) + 1)
65 #endif 73 #endif
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
124 } 132 }
125 ~SkAutoTrace() { 133 ~SkAutoTrace() {
126 SkDebugf("--- trace: %s Leave\n", fLabel); 134 SkDebugf("--- trace: %s Leave\n", fLabel);
127 } 135 }
128 private: 136 private:
129 const char* fLabel; 137 const char* fLabel;
130 }; 138 };
131 #define SkAutoTrace(...) SK_REQUIRE_LOCAL_VAR(SkAutoTrace) 139 #define SkAutoTrace(...) SK_REQUIRE_LOCAL_VAR(SkAutoTrace)
132 140
133 #endif 141 #endif
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698