OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkCpu_DEFINED | 8 #ifndef SkCpu_DEFINED |
9 #define SkCpu_DEFINED | 9 #define SkCpu_DEFINED |
10 | 10 |
(...skipping 11 matching lines...) Expand all Loading... |
22 F16C = 1 << 7, | 22 F16C = 1 << 7, |
23 FMA = 1 << 8, | 23 FMA = 1 << 8, |
24 AVX2 = 1 << 9, | 24 AVX2 = 1 << 9, |
25 }; | 25 }; |
26 enum { | 26 enum { |
27 NEON = 1 << 0, | 27 NEON = 1 << 0, |
28 NEON_FMA = 1 << 1, | 28 NEON_FMA = 1 << 1, |
29 VFP_FP16 = 1 << 2, | 29 VFP_FP16 = 1 << 2, |
30 }; | 30 }; |
31 | 31 |
| 32 static void CacheRuntimeFeatures(); |
32 static bool Supports(uint32_t); | 33 static bool Supports(uint32_t); |
33 | |
34 private: | 34 private: |
35 // Consider a loop like this that expands 16-bit floats out to 32-bit, does
math, and repacks: | 35 #if defined(_MSC_VER) || !defined(SkCpu_IMPL) |
36 // for (int i = 0; i < N; i++) { | 36 static const uint32_t gCachedFeatures; |
37 // if (SkCpu::Supports(SkCpu::F16C)) { | |
38 // f32s = SkCpu::F16C_cvtph_ps(f16s); | |
39 // } else { | |
40 // f32s = some_slower_f16_to_f32_routine(f16s); | |
41 // } | |
42 // | |
43 // ... do some math with f32s ... | |
44 // | |
45 // if (SkCpu::Supports(SkCpu::F16C)) { | |
46 // f16s = SkCpu::F16C_cvtps_ph(f32s); | |
47 // } else { | |
48 // f16s = some_slower_f32_to_f16_routine(f32s); | |
49 // } | |
50 // } | |
51 // | |
52 // We would like SkCpu::Supports() to participate in common sub-expression e
limination, | |
53 // so that it's called exactly 1 time, rather than N or 2N times. This is e
specially | |
54 // important when the if-else blocks you see above are really inline functio
ns. | |
55 // | |
56 // The key to this is to make sure to implement RuntimeCpuFeatures() with th
e same | |
57 // capacity for common sub-expression elimination. | |
58 // | |
59 // __attribute__((const)) works perfectly when available. | |
60 // | |
61 // When it's not (MSVC), we fall back to a static initializer. | |
62 // (Static intializers would work fine everywhere, but Chrome really dislike
s them.) | |
63 | |
64 #if defined(__GNUC__) || defined(__clang__) // i.e. GCC, Clang, or clang-cl | |
65 __attribute__((const)) | |
66 static uint32_t RuntimeCpuFeatures(); | |
67 #else | 37 #else |
68 static const uint32_t gCachedCpuFeatures; | 38 static uint32_t gCachedFeatures; |
69 static uint32_t RuntimeCpuFeatures() { | |
70 return gCachedCpuFeatures; | |
71 } | |
72 #endif | 39 #endif |
73 }; | 40 }; |
74 | 41 |
75 inline bool SkCpu::Supports(uint32_t mask) { | 42 inline bool SkCpu::Supports(uint32_t mask) { |
76 uint32_t features = RuntimeCpuFeatures(); | 43 uint32_t features = gCachedFeatures; |
77 | 44 |
78 // If we mask in compile-time known lower limits, the compiler can completel
y | 45 // If we mask in compile-time known lower limits, the compiler can |
79 // drop many calls to RuntimeCpuFeatures(). | 46 // often compile away this entire function. |
80 #if SK_CPU_X86 | 47 #if SK_CPU_X86 |
81 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1 | 48 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1 |
82 features |= SSE1; | 49 features |= SSE1; |
83 #endif | 50 #endif |
84 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 | 51 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 |
85 features |= SSE2; | 52 features |= SSE2; |
86 #endif | 53 #endif |
87 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE3 | 54 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE3 |
88 features |= SSE3; | 55 features |= SSE3; |
89 #endif | 56 #endif |
(...skipping 24 matching lines...) Expand all Loading... |
114 | 81 |
115 #if defined(SK_CPU_ARM64) | 82 #if defined(SK_CPU_ARM64) |
116 features |= NEON|NEON_FMA|VFP_FP16; | 83 features |= NEON|NEON_FMA|VFP_FP16; |
117 #endif | 84 #endif |
118 | 85 |
119 #endif | 86 #endif |
120 return (features & mask) == mask; | 87 return (features & mask) == mask; |
121 } | 88 } |
122 | 89 |
123 #endif//SkCpu_DEFINED | 90 #endif//SkCpu_DEFINED |
OLD | NEW |