Chromium Code Reviews| Index: src/core/SkCpu.h |
| diff --git a/src/core/SkCpu.h b/src/core/SkCpu.h |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..2a41d37b16dbc2d296c5449d160fc1f5d7a69fcf |
| --- /dev/null |
| +++ b/src/core/SkCpu.h |
| @@ -0,0 +1,123 @@ |
| +/* |
| + * Copyright 2016 Google Inc. |
| + * |
| + * Use of this source code is governed by a BSD-style license that can be |
| + * found in the LICENSE file. |
| + */ |
| + |
| +#ifndef SkCpu_DEFINED |
| +#define SkCpu_DEFINED |
| + |
| +#include "SkTypes.h" |
| + |
| +struct SkCpu { |
| + enum { |
| + SSE1 = 1 << 0, |
| + SSE2 = 1 << 1, |
| + SSE3 = 1 << 2, |
| + SSSE3 = 1 << 3, |
| + SSE41 = 1 << 4, |
| + SSE42 = 1 << 5, |
| + AVX = 1 << 6, |
| + F16C = 1 << 7, |
| + FMA = 1 << 8, |
| + AVX2 = 1 << 9, |
| + }; |
| + enum { |
| + NEON = 1 << 0, |
| + NEON_FMA = 1 << 1, |
| + VFP_FP16 = 1 << 2, |
| + }; |
| + |
| + static bool Supports(uint32_t); |
| + |
| +private: |
| + // Consider a loop like this that expands 16-bit floats out to 32-bit, does math, and repacks: |
| + // for (int i = 0; i < N; i++) { |
| + // if (SkCpu::Supports(SkCpu::F16C)) { |
| + // f32s = SkCpu::F16C_cvtph_ps(f16s); |
| + // } else { |
| + // f32s = some_slower_f16_to_f32_routine(f16s); |
| + // } |
| + // |
| + // ... do some math with f32s ... |
| + // |
| + // if (SkCpu::Supports(SkCpu::F16C)) { |
| + // f16s = SkCpu::F16C_cvtps_ph(f32s); |
| + // } else { |
| + // f16s = some_slower_f32_to_f16_routine(f32s); |
| + // } |
| + // } |
| + // |
| + // We would like SkCpu::Supports() to participate in common sub-expression elimination, |
| + // so that it's called exactly 1 time, rather than N or 2N times. This is especially |
| + // important when the if-else blocks you see above are really inline functions. |
| + // |
| + // The key to this is to make sure to implement RuntimeCpuFeatures() with the same |
| + // capacity for common sub-expression elimination. |
| + // |
| + // __attribute__((const)) works perfectly when available. |
| + // |
| + // When it's not (MSVC), we fall back to a static initializer. |
| + // (Static intializers would work fine everywhere, but Chrome really dislikes them.) |
|
f(malita)
2016/04/14 15:40:35
Does this mean we'll have to sell a static initial
mtklein
2016/04/14 15:52:28
Yeah, maybe, for some values of "have to" and "sel
|
| + |
| +#if defined(__GNUC__) || defined(__clang__) // i.e. GCC, Clang, or clang-cl |
| + __attribute__((const)) |
| + static uint32_t RuntimeCpuFeatures(); |
| +#else |
| + static const uint32_t gCachedCpuFeatures; |
| + static uint32_t RuntimeCpuFeatures() { |
| + return gCachedCpuFeatures; |
| + } |
| +#endif |
| +}; |
| + |
| +inline bool SkCpu::Supports(uint32_t mask) { |
| + uint32_t features = RuntimeCpuFeatures(); |
| + |
| + // If we mask in compile-time known lower limits, the compiler can completely |
| + // drop many calls to RuntimeCpuFeatures(). |
| +#if SK_CPU_X86 |
| + #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1 |
| + features |= SSE1; |
| + #endif |
| + #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 |
| + features |= SSE2; |
| + #endif |
| + #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE3 |
| + features |= SSE3; |
| + #endif |
| + #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 |
| + features |= SSSE3; |
| + #endif |
| + #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41 |
| + features |= SSE41; |
| + #endif |
| + #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42 |
| + features |= SSE42; |
| + #endif |
| + #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX |
| + features |= AVX; |
| + #endif |
| + // F16C goes here if we add SK_CPU_SSE_LEVEL_F16C |
| + #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2 |
| + features |= AVX2; |
| + #endif |
| + // FMA doesn't fit neatly into this total ordering. |
| + // It's available on Haswell+ just like AVX2, but it's technically a different bit. |
| + // TODO: circle back on this if we find ourselves limited by lack of compile-time FMA |
| + |
| +#else |
| + #if defined(SK_ARM_HAS_NEON) |
| + features |= NEON; |
| + #endif |
| + |
| + #if defined(SK_CPU_ARM64) |
| + features |= NEON|NEON_FMA|VFP_FP16; |
| + #endif |
| + |
| +#endif |
| + return (features & mask) == mask; |
| +} |
| + |
| +#endif//SkCpu_DEFINED |