| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef SkCpu_DEFINED | 8 #ifndef SkCpu_DEFINED |
| 9 #define SkCpu_DEFINED | 9 #define SkCpu_DEFINED |
| 10 | 10 |
| (...skipping 11 matching lines...) Expand all Loading... |
| 22 F16C = 1 << 7, | 22 F16C = 1 << 7, |
| 23 FMA = 1 << 8, | 23 FMA = 1 << 8, |
| 24 AVX2 = 1 << 9, | 24 AVX2 = 1 << 9, |
| 25 }; | 25 }; |
| 26 enum { | 26 enum { |
| 27 NEON = 1 << 0, | 27 NEON = 1 << 0, |
| 28 NEON_FMA = 1 << 1, | 28 NEON_FMA = 1 << 1, |
| 29 VFP_FP16 = 1 << 2, | 29 VFP_FP16 = 1 << 2, |
| 30 }; | 30 }; |
| 31 | 31 |
| 32 static void CacheRuntimeFeatures(); |
| 32 static bool Supports(uint32_t); | 33 static bool Supports(uint32_t); |
| 33 | |
| 34 private: | 34 private: |
| 35 // Consider a loop like this that expands 16-bit floats out to 32-bit, does
math, and repacks: | 35 #if defined(_MSC_VER) || !defined(SkCpu_IMPL) |
| 36 // for (int i = 0; i < N; i++) { | 36 static const uint32_t gCachedFeatures; |
| 37 // if (SkCpu::Supports(SkCpu::F16C)) { | |
| 38 // f32s = SkCpu::F16C_cvtph_ps(f16s); | |
| 39 // } else { | |
| 40 // f32s = some_slower_f16_to_f32_routine(f16s); | |
| 41 // } | |
| 42 // | |
| 43 // ... do some math with f32s ... | |
| 44 // | |
| 45 // if (SkCpu::Supports(SkCpu::F16C)) { | |
| 46 // f16s = SkCpu::F16C_cvtps_ph(f32s); | |
| 47 // } else { | |
| 48 // f16s = some_slower_f32_to_f16_routine(f32s); | |
| 49 // } | |
| 50 // } | |
| 51 // | |
| 52 // We would like SkCpu::Supports() to participate in common sub-expression e
limination, | |
| 53 // so that it's called exactly 1 time, rather than N or 2N times. This is e
specially | |
| 54 // important when the if-else blocks you see above are really inline functio
ns. | |
| 55 // | |
| 56 // The key to this is to make sure to implement RuntimeCpuFeatures() with th
e same | |
| 57 // capacity for common sub-expression elimination. | |
| 58 // | |
| 59 // __attribute__((const)) works perfectly when available. | |
| 60 // | |
| 61 // When it's not (MSVC), we fall back to a static initializer. | |
| 62 // (Static intializers would work fine everywhere, but Chrome really dislike
s them.) | |
| 63 | |
| 64 #if defined(__GNUC__) || defined(__clang__) // i.e. GCC, Clang, or clang-cl | |
| 65 __attribute__((const)) | |
| 66 static uint32_t RuntimeCpuFeatures(); | |
| 67 #else | 37 #else |
| 68 static const uint32_t gCachedCpuFeatures; | 38 static uint32_t gCachedFeatures; |
| 69 static uint32_t RuntimeCpuFeatures() { | |
| 70 return gCachedCpuFeatures; | |
| 71 } | |
| 72 #endif | 39 #endif |
| 73 }; | 40 }; |
| 74 | 41 |
| 75 inline bool SkCpu::Supports(uint32_t mask) { | 42 inline bool SkCpu::Supports(uint32_t mask) { |
| 76 uint32_t features = RuntimeCpuFeatures(); | 43 uint32_t features = gCachedFeatures; |
| 77 | 44 |
| 78 // If we mask in compile-time known lower limits, the compiler can completel
y | 45 // If we mask in compile-time known lower limits, the compiler can |
| 79 // drop many calls to RuntimeCpuFeatures(). | 46 // often compile away this entire function. |
| 80 #if SK_CPU_X86 | 47 #if SK_CPU_X86 |
| 81 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1 | 48 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1 |
| 82 features |= SSE1; | 49 features |= SSE1; |
| 83 #endif | 50 #endif |
| 84 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 | 51 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 |
| 85 features |= SSE2; | 52 features |= SSE2; |
| 86 #endif | 53 #endif |
| 87 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE3 | 54 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE3 |
| 88 features |= SSE3; | 55 features |= SSE3; |
| 89 #endif | 56 #endif |
| (...skipping 24 matching lines...) Expand all Loading... |
| 114 | 81 |
| 115 #if defined(SK_CPU_ARM64) | 82 #if defined(SK_CPU_ARM64) |
| 116 features |= NEON|NEON_FMA|VFP_FP16; | 83 features |= NEON|NEON_FMA|VFP_FP16; |
| 117 #endif | 84 #endif |
| 118 | 85 |
| 119 #endif | 86 #endif |
| 120 return (features & mask) == mask; | 87 return (features & mask) == mask; |
| 121 } | 88 } |
| 122 | 89 |
| 123 #endif//SkCpu_DEFINED | 90 #endif//SkCpu_DEFINED |
| OLD | NEW |