OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright 2016 Google Inc. | |
3 * | |
4 * Use of this source code is governed by a BSD-style license that can be | |
5 * found in the LICENSE file. | |
6 */ | |
7 | |
8 #ifndef SkCpu_DEFINED | |
9 #define SkCpu_DEFINED | |
10 | |
11 #include "SkTypes.h" | |
12 | |
13 struct SkCpu { | |
14 enum { | |
15 SSE1 = 1 << 0, | |
16 SSE2 = 1 << 1, | |
17 SSE3 = 1 << 2, | |
18 SSSE3 = 1 << 3, | |
19 SSE41 = 1 << 4, | |
20 SSE42 = 1 << 5, | |
21 AVX = 1 << 6, | |
22 F16C = 1 << 7, | |
23 FMA = 1 << 8, | |
24 AVX2 = 1 << 9, | |
25 }; | |
26 enum { | |
27 NEON = 1 << 0, | |
28 NEON_FMA = 1 << 1, | |
29 VFP_FP16 = 1 << 2, | |
30 }; | |
31 | |
32 static bool Supports(uint32_t); | |
33 | |
34 private: | |
35 // Consider a loop like this that expands 16-bit floats out to 32-bit, does
math, and repacks: | |
36 // for (int i = 0; i < N; i++) { | |
37 // if (SkCpu::Supports(SkCpu::F16C)) { | |
38 // f32s = SkCpu::F16C_cvtph_ps(f16s); | |
39 // } else { | |
40 // f32s = some_slower_f16_to_f32_routine(f16s); | |
41 // } | |
42 // | |
43 // ... do some math with f32s ... | |
44 // | |
45 // if (SkCpu::Supports(SkCpu::F16C)) { | |
46 // f16s = SkCpu::F16C_cvtps_ph(f32s); | |
47 // } else { | |
48 // f16s = some_slower_f32_to_f16_routine(f32s); | |
49 // } | |
50 // } | |
51 // | |
52 // We would like SkCpu::Supports() to participate in common sub-expression e
limination, | |
53 // so that it's called exactly 1 time, rather than N or 2N times. This is e
specially | |
54 // important when the if-else blocks you see above are really inline functio
ns. | |
55 // | |
56 // The key to this is to make sure to implement RuntimeCpuFeatures() with th
e same | |
57 // capacity for common sub-expression elimination. | |
58 // | |
59 // __attribute__((const)) works perfectly when available. | |
60 // | |
61 // When it's not (MSVC), we fall back to a static initializer. | |
62 // (Static intializers would work fine everywhere, but Chrome really dislike
s them.) | |
63 | |
64 #if defined(__GNUC__) || defined(__clang__) // i.e. GCC, Clang, or clang-cl | |
65 __attribute__((const)) | |
66 static uint32_t RuntimeCpuFeatures(); | |
67 #else | |
68 static const uint32_t gCachedCpuFeatures; | |
69 static uint32_t RuntimeCpuFeatures() { | |
70 return gCachedCpuFeatures; | |
71 } | |
72 #endif | |
73 }; | |
74 | |
75 inline bool SkCpu::Supports(uint32_t mask) { | |
76 uint32_t features = RuntimeCpuFeatures(); | |
77 | |
78 // If we mask in compile-time known lower limits, the compiler can completel
y | |
79 // drop many calls to RuntimeCpuFeatures(). | |
80 #if SK_CPU_X86 | |
81 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1 | |
82 features |= SSE1; | |
83 #endif | |
84 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 | |
85 features |= SSE2; | |
86 #endif | |
87 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE3 | |
88 features |= SSE3; | |
89 #endif | |
90 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 | |
91 features |= SSSE3; | |
92 #endif | |
93 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41 | |
94 features |= SSE41; | |
95 #endif | |
96 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42 | |
97 features |= SSE42; | |
98 #endif | |
99 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX | |
100 features |= AVX; | |
101 #endif | |
102 // F16C goes here if we add SK_CPU_SSE_LEVEL_F16C | |
103 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2 | |
104 features |= AVX2; | |
105 #endif | |
106 // FMA doesn't fit neatly into this total ordering. | |
107 // It's available on Haswell+ just like AVX2, but it's technically a differe
nt bit. | |
108 // TODO: circle back on this if we find ourselves limited by lack of compile
-time FMA | |
109 | |
110 #else | |
111 #if defined(SK_ARM_HAS_NEON) | |
112 features |= NEON; | |
113 #endif | |
114 | |
115 #if defined(SK_CPU_ARM64) | |
116 features |= NEON|NEON_FMA|VFP_FP16; | |
117 #endif | |
118 | |
119 #endif | |
120 return (features & mask) == mask; | |
121 } | |
122 | |
123 #endif//SkCpu_DEFINED | |
OLD | NEW |