Index: include/core/SkUtils.h |
diff --git a/include/core/SkUtils.h b/include/core/SkUtils.h |
index d522ae0dea139555f6e8790ac06d8287e5771fd7..bb7214c7d9e6ac50ebd8472c3de3652cb31e75b0 100644 |
--- a/include/core/SkUtils.h |
+++ b/include/core/SkUtils.h |
@@ -12,12 +12,31 @@ |
/////////////////////////////////////////////////////////////////////////////// |
+// Determined empirically using bench/MemsetBench.cpp on a Nexus 7, Nexus 9, and desktop. |
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 || defined(SK_ARM_HAS_NEON) |
+ // Platforms where we can assume an autovectorizer will give us a good inline memset. |
+ #define SK_SMALL_MEMSET 1000 |
reed1
2015/04/09 20:30:24
I know I suggested this be a #define. Since this i
mtklein
2015/04/09 20:36:42
Hmm. Actually, I can just undef it when we're don
|
+#else |
+ // Platforms like Chrome on ARMv7 that don't typically compile with NEON globally. |
+ #define SK_SMALL_MEMSET 10 |
+#endif |
+ |
+ |
/** Similar to memset(), but it assigns a 16bit value into the buffer. |
@param buffer The memory to have value copied into it |
@param value The 16bit value to be copied into buffer |
@param count The number of times value should be copied into the buffer. |
*/ |
-void sk_memset16(uint16_t dst[], uint16_t value, int count); |
+void sk_memset16_large(uint16_t dst[], uint16_t value, int count); |
reed1
2015/04/09 20:30:24
These two "large" functions. I know they have to b
mtklein
2015/04/09 20:36:42
Moving this code around sounds good to me. I hadn
|
+inline void sk_memset16(uint16_t dst[], uint16_t value, int count) { |
+ if (count <= SK_SMALL_MEMSET) { |
+ for (int i = 0; i < count; i++) { |
+ dst[i] = value; |
+ } |
+ } else { |
+ sk_memset16_large(dst, value, count); |
+ } |
+} |
typedef void (*SkMemset16Proc)(uint16_t dst[], uint16_t value, int count); |
SkMemset16Proc SkMemset16GetPlatformProc(); |
@@ -26,7 +45,17 @@ SkMemset16Proc SkMemset16GetPlatformProc(); |
@param value The 32bit value to be copied into buffer |
@param count The number of times value should be copied into the buffer. |
*/ |
-void sk_memset32(uint32_t dst[], uint32_t value, int count); |
+void sk_memset32_large(uint32_t dst[], uint32_t value, int count); |
+inline void sk_memset32(uint32_t dst[], uint32_t value, int count) { |
+ if (count <= SK_SMALL_MEMSET) { |
+ for (int i = 0; i < count; i++) { |
+ dst[i] = value; |
+ } |
+ } else { |
+ sk_memset32_large(dst, value, count); |
+ } |
+} |
+ |
typedef void (*SkMemset32Proc)(uint32_t dst[], uint32_t value, int count); |
SkMemset32Proc SkMemset32GetPlatformProc(); |