OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright 2016 Google Inc. |
| 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. |
| 6 */ |
| 7 |
| 8 #ifndef SkCpu_DEFINED |
| 9 #define SkCpu_DEFINED |
| 10 |
| 11 #include "SkTypes.h" |
| 12 |
| 13 struct SkCpu { |
| 14 enum { |
| 15 SSE1 = 1 << 0, |
| 16 SSE2 = 1 << 1, |
| 17 SSE3 = 1 << 2, |
| 18 SSSE3 = 1 << 3, |
| 19 SSE41 = 1 << 4, |
| 20 SSE42 = 1 << 5, |
| 21 AVX = 1 << 6, |
| 22 F16C = 1 << 7, |
| 23 FMA = 1 << 8, |
| 24 AVX2 = 1 << 9, |
| 25 }; |
| 26 enum { |
| 27 NEON = 1 << 0, |
| 28 NEON_FMA = 1 << 1, |
| 29 VFP_FP16 = 1 << 2, |
| 30 }; |
| 31 |
| 32 static bool Supports(uint32_t); |
| 33 |
| 34 private: |
| 35 // Consider a loop like this that expands 16-bit floats out to 32-bit, does
math, and repacks: |
| 36 // for (int i = 0; i < N; i++) { |
| 37 // if (SkCpu::Supports(SkCpu::F16C)) { |
| 38 // f32s = SkCpu::F16C_cvtph_ps(f16s); |
| 39 // } else { |
| 40 // f32s = some_slower_f16_to_f32_routine(f16s); |
| 41 // } |
| 42 // |
| 43 // ... do some math with f32s ... |
| 44 // |
| 45 // if (SkCpu::Supports(SkCpu::F16C)) { |
| 46 // f16s = SkCpu::F16C_cvtps_ph(f32s); |
| 47 // } else { |
| 48 // f16s = some_slower_f32_to_f16_routine(f32s); |
| 49 // } |
| 50 // } |
| 51 // |
| 52 // We would like SkCpu::Supports() to participate in common sub-expression e
limination, |
| 53 // so that it's called exactly 1 time, rather than N or 2N times. This is e
specially |
| 54 // important when the if-else blocks you see above are really inline functio
ns. |
| 55 // |
| 56 // The key to this is to make sure to implement RuntimeCpuFeatures() with th
e same |
| 57 // capacity for common sub-expression elimination. |
| 58 // |
| 59 // __attribute__((const)) works perfectly when available. |
| 60 // |
| 61 // When it's not (MSVC), we fall back to a static initializer. |
| 62 // (Static intializers would work fine everywhere, but Chrome really dislike
s them.) |
| 63 |
| 64 #if defined(__GNUC__) || defined(__clang__) // i.e. GCC, Clang, or clang-cl |
| 65 __attribute__((const)) |
| 66 static uint32_t RuntimeCpuFeatures(); |
| 67 #else |
| 68 static const uint32_t gCachedCpuFeatures; |
| 69 static uint32_t RuntimeCpuFeatures() { |
| 70 return gCachedCpuFeatures; |
| 71 } |
| 72 #endif |
| 73 }; |
| 74 |
| 75 inline bool SkCpu::Supports(uint32_t mask) { |
| 76 uint32_t features = RuntimeCpuFeatures(); |
| 77 |
| 78 // If we mask in compile-time known lower limits, the compiler can completel
y |
| 79 // drop many calls to RuntimeCpuFeatures(). |
| 80 #if SK_CPU_X86 |
| 81 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1 |
| 82 features |= SSE1; |
| 83 #endif |
| 84 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 |
| 85 features |= SSE2; |
| 86 #endif |
| 87 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE3 |
| 88 features |= SSE3; |
| 89 #endif |
| 90 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 |
| 91 features |= SSSE3; |
| 92 #endif |
| 93 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41 |
| 94 features |= SSE41; |
| 95 #endif |
| 96 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42 |
| 97 features |= SSE42; |
| 98 #endif |
| 99 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX |
| 100 features |= AVX; |
| 101 #endif |
| 102 // F16C goes here if we add SK_CPU_SSE_LEVEL_F16C |
| 103 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2 |
| 104 features |= AVX2; |
| 105 #endif |
| 106 // FMA doesn't fit neatly into this total ordering. |
| 107 // It's available on Haswell+ just like AVX2, but it's technically a differe
nt bit. |
| 108 // TODO: circle back on this if we find ourselves limited by lack of compile
-time FMA |
| 109 |
| 110 #else |
| 111 #if defined(SK_ARM_HAS_NEON) |
| 112 features |= NEON; |
| 113 #endif |
| 114 |
| 115 #if defined(SK_CPU_ARM64) |
| 116 features |= NEON|NEON_FMA|VFP_FP16; |
| 117 #endif |
| 118 |
| 119 #endif |
| 120 return (features & mask) == mask; |
| 121 } |
| 122 |
| 123 #endif//SkCpu_DEFINED |
OLD | NEW |