| Index: src/core/SkCpu.h
|
| diff --git a/src/core/SkCpu.h b/src/core/SkCpu.h
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..2a41d37b16dbc2d296c5449d160fc1f5d7a69fcf
|
| --- /dev/null
|
| +++ b/src/core/SkCpu.h
|
| @@ -0,0 +1,123 @@
|
| +/*
|
| + * Copyright 2016 Google Inc.
|
| + *
|
| + * Use of this source code is governed by a BSD-style license that can be
|
| + * found in the LICENSE file.
|
| + */
|
| +
|
| +#ifndef SkCpu_DEFINED
|
| +#define SkCpu_DEFINED
|
| +
|
| +#include "SkTypes.h"
|
| +
|
| +struct SkCpu {
|
| + enum {
|
| + SSE1 = 1 << 0,
|
| + SSE2 = 1 << 1,
|
| + SSE3 = 1 << 2,
|
| + SSSE3 = 1 << 3,
|
| + SSE41 = 1 << 4,
|
| + SSE42 = 1 << 5,
|
| + AVX = 1 << 6,
|
| + F16C = 1 << 7,
|
| + FMA = 1 << 8,
|
| + AVX2 = 1 << 9,
|
| + };
|
| + enum {
|
| + NEON = 1 << 0,
|
| + NEON_FMA = 1 << 1,
|
| + VFP_FP16 = 1 << 2,
|
| + };
|
| +
|
| + static bool Supports(uint32_t);
|
| +
|
| +private:
|
| + // Consider a loop like this that expands 16-bit floats out to 32-bit, does math, and repacks:
|
| + // for (int i = 0; i < N; i++) {
|
| + // if (SkCpu::Supports(SkCpu::F16C)) {
|
| + // f32s = SkCpu::F16C_cvtph_ps(f16s);
|
| + // } else {
|
| + // f32s = some_slower_f16_to_f32_routine(f16s);
|
| + // }
|
| + //
|
| + // ... do some math with f32s ...
|
| + //
|
| + // if (SkCpu::Supports(SkCpu::F16C)) {
|
| + // f16s = SkCpu::F16C_cvtps_ph(f32s);
|
| + // } else {
|
| + // f16s = some_slower_f32_to_f16_routine(f32s);
|
| + // }
|
| + // }
|
| + //
|
| + // We would like SkCpu::Supports() to participate in common sub-expression elimination,
|
| + // so that it's called exactly 1 time, rather than N or 2N times. This is especially
|
| + // important when the if-else blocks you see above are really inline functions.
|
| + //
|
| + // The key to this is to make sure to implement RuntimeCpuFeatures() with the same
|
| + // capacity for common sub-expression elimination.
|
| + //
|
| + // __attribute__((const)) works perfectly when available.
|
| + //
|
| + // When it's not (MSVC), we fall back to a static initializer.
|
| + // (Static intializers would work fine everywhere, but Chrome really dislikes them.)
|
| +
|
| +#if defined(__GNUC__) || defined(__clang__) // i.e. GCC, Clang, or clang-cl
|
| + __attribute__((const))
|
| + static uint32_t RuntimeCpuFeatures();
|
| +#else
|
| + static const uint32_t gCachedCpuFeatures;
|
| + static uint32_t RuntimeCpuFeatures() {
|
| + return gCachedCpuFeatures;
|
| + }
|
| +#endif
|
| +};
|
| +
|
| +inline bool SkCpu::Supports(uint32_t mask) {
|
| + uint32_t features = RuntimeCpuFeatures();
|
| +
|
| + // If we mask in compile-time known lower limits, the compiler can completely
|
| + // drop many calls to RuntimeCpuFeatures().
|
| +#if SK_CPU_X86
|
| + #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1
|
| + features |= SSE1;
|
| + #endif
|
| + #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
|
| + features |= SSE2;
|
| + #endif
|
| + #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE3
|
| + features |= SSE3;
|
| + #endif
|
| + #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
|
| + features |= SSSE3;
|
| + #endif
|
| + #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
|
| + features |= SSE41;
|
| + #endif
|
| + #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42
|
| + features |= SSE42;
|
| + #endif
|
| + #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX
|
| + features |= AVX;
|
| + #endif
|
| + // F16C goes here if we add SK_CPU_SSE_LEVEL_F16C
|
| + #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2
|
| + features |= AVX2;
|
| + #endif
|
| + // FMA doesn't fit neatly into this total ordering.
|
| + // It's available on Haswell+ just like AVX2, but it's technically a different bit.
|
| + // TODO: circle back on this if we find ourselves limited by lack of compile-time FMA
|
| +
|
| +#else
|
| + #if defined(SK_ARM_HAS_NEON)
|
| + features |= NEON;
|
| + #endif
|
| +
|
| + #if defined(SK_CPU_ARM64)
|
| + features |= NEON|NEON_FMA|VFP_FP16;
|
| + #endif
|
| +
|
| +#endif
|
| + return (features & mask) == mask;
|
| +}
|
| +
|
| +#endif//SkCpu_DEFINED
|
|
|