Index: src/core/SkCpu.h |
diff --git a/src/core/SkCpu.h b/src/core/SkCpu.h |
new file mode 100644 |
index 0000000000000000000000000000000000000000..2a41d37b16dbc2d296c5449d160fc1f5d7a69fcf |
--- /dev/null |
+++ b/src/core/SkCpu.h |
@@ -0,0 +1,123 @@ |
+/* |
+ * Copyright 2016 Google Inc. |
+ * |
+ * Use of this source code is governed by a BSD-style license that can be |
+ * found in the LICENSE file. |
+ */ |
+ |
+#ifndef SkCpu_DEFINED |
+#define SkCpu_DEFINED |
+ |
+#include "SkTypes.h" |
+ |
+struct SkCpu { |
+ enum { |
+ SSE1 = 1 << 0, |
+ SSE2 = 1 << 1, |
+ SSE3 = 1 << 2, |
+ SSSE3 = 1 << 3, |
+ SSE41 = 1 << 4, |
+ SSE42 = 1 << 5, |
+ AVX = 1 << 6, |
+ F16C = 1 << 7, |
+ FMA = 1 << 8, |
+ AVX2 = 1 << 9, |
+ }; |
+ enum { |
+ NEON = 1 << 0, |
+ NEON_FMA = 1 << 1, |
+ VFP_FP16 = 1 << 2, |
+ }; |
+ |
+ static bool Supports(uint32_t); |
+ |
+private: |
+ // Consider a loop like this that expands 16-bit floats out to 32-bit, does math, and repacks: |
+ // for (int i = 0; i < N; i++) { |
+ // if (SkCpu::Supports(SkCpu::F16C)) { |
+ // f32s = SkCpu::F16C_cvtph_ps(f16s); |
+ // } else { |
+ // f32s = some_slower_f16_to_f32_routine(f16s); |
+ // } |
+ // |
+ // ... do some math with f32s ... |
+ // |
+ // if (SkCpu::Supports(SkCpu::F16C)) { |
+ // f16s = SkCpu::F16C_cvtps_ph(f32s); |
+ // } else { |
+ // f16s = some_slower_f32_to_f16_routine(f32s); |
+ // } |
+ // } |
+ // |
+ // We would like SkCpu::Supports() to participate in common sub-expression elimination, |
+ // so that it's called exactly 1 time, rather than N or 2N times. This is especially |
+ // important when the if-else blocks you see above are really inline functions. |
+ // |
+ // The key to this is to make sure to implement RuntimeCpuFeatures() with the same |
+ // capacity for common sub-expression elimination. |
+ // |
+ // __attribute__((const)) works perfectly when available. |
+ // |
+ // When it's not (MSVC), we fall back to a static initializer. |
+ // (Static intializers would work fine everywhere, but Chrome really dislikes them.) |
+ |
+#if defined(__GNUC__) || defined(__clang__) // i.e. GCC, Clang, or clang-cl |
+ __attribute__((const)) |
+ static uint32_t RuntimeCpuFeatures(); |
+#else |
+ static const uint32_t gCachedCpuFeatures; |
+ static uint32_t RuntimeCpuFeatures() { |
+ return gCachedCpuFeatures; |
+ } |
+#endif |
+}; |
+ |
+inline bool SkCpu::Supports(uint32_t mask) { |
+ uint32_t features = RuntimeCpuFeatures(); |
+ |
+ // If we mask in compile-time known lower limits, the compiler can completely |
+ // drop many calls to RuntimeCpuFeatures(). |
+#if SK_CPU_X86 |
+ #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1 |
+ features |= SSE1; |
+ #endif |
+ #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 |
+ features |= SSE2; |
+ #endif |
+ #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE3 |
+ features |= SSE3; |
+ #endif |
+ #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 |
+ features |= SSSE3; |
+ #endif |
+ #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41 |
+ features |= SSE41; |
+ #endif |
+ #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42 |
+ features |= SSE42; |
+ #endif |
+ #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX |
+ features |= AVX; |
+ #endif |
+ // F16C goes here if we add SK_CPU_SSE_LEVEL_F16C |
+ #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2 |
+ features |= AVX2; |
+ #endif |
+ // FMA doesn't fit neatly into this total ordering. |
+ // It's available on Haswell+ just like AVX2, but it's technically a different bit. |
+ // TODO: circle back on this if we find ourselves limited by lack of compile-time FMA |
+ |
+#else |
+ #if defined(SK_ARM_HAS_NEON) |
+ features |= NEON; |
+ #endif |
+ |
+ #if defined(SK_CPU_ARM64) |
+ features |= NEON|NEON_FMA|VFP_FP16; |
+ #endif |
+ |
+#endif |
+ return (features & mask) == mask; |
+} |
+ |
+#endif//SkCpu_DEFINED |