OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkNx_DEFINED | 8 #ifndef SkNx_DEFINED |
9 #define SkNx_DEFINED | 9 #define SkNx_DEFINED |
10 | 10 |
11 | 11 |
12 #define SKNX_NO_SIMDx // Remove the x to disable SIMD for all SkNx types. | 12 //#define SKNX_NO_SIMD |
13 | |
14 | 13 |
15 #include "SkScalar.h" | 14 #include "SkScalar.h" |
16 #include "SkTypes.h" | 15 #include "SkTypes.h" |
17 #include <math.h> | 16 #include <math.h> |
18 #define REQUIRE(x) static_assert(x, #x) | 17 #define REQUIRE(x) static_assert(x, #x) |
19 | 18 |
20 // This file may be included multiple times by .cpp files with different flags,
leading | 19 // This file may be included multiple times by .cpp files with different flags,
leading |
21 // to different definitions. Usually that doesn't matter because it's all inlin
ed, but | 20 // to different definitions. Usually that doesn't matter because it's all inlin
ed, but |
22 // in Debug modes the compilers may not inline everything. So wrap everything i
n an | 21 // in Debug modes the compilers may not inline everything. So wrap everything i
n an |
23 // anonymous namespace to give each includer their own silo of this code (or the
linker | 22 // anonymous namespace to give each includer their own silo of this code (or the
linker |
24 // will probably pick one randomly for us, which is rarely correct). | 23 // will probably pick one randomly for us, which is rarely correct). |
25 namespace { | 24 namespace { |
26 | 25 |
27 // The default implementations just fall back on a pair of size N/2. | 26 // The default implementations just fall back on a pair of size N/2. |
28 | 27 // These support the union of operations we might do to ints and floats, but |
| 28 // platform specializations might support fewer (e.g. no float <<, no int /). |
29 template <int N, typename T> | 29 template <int N, typename T> |
30 class SkNx { | 30 class SkNx { |
31 public: | 31 public: |
32 SkNx() {} | 32 SkNx() {} |
33 SkNx(const SkNx<N/2, T>& lo, const SkNx<N/2, T>& hi) : fLo(lo), fHi(hi) {} | 33 SkNx(const SkNx<N/2, T>& lo, const SkNx<N/2, T>& hi) : fLo(lo), fHi(hi) {} |
34 SkNx(T val) : fLo(val), fHi(val) {} | 34 SkNx(T val) : fLo(val), fHi(val) {} |
35 static SkNx Load(const T vals[N]) { | 35 static SkNx Load(const T vals[N]) { |
36 return SkNx(SkNx<N/2,T>::Load(vals), SkNx<N/2,T>::Load(vals+N/2)); | 36 return SkNx(SkNx<N/2,T>::Load(vals), SkNx<N/2,T>::Load(vals+N/2)); |
37 } | 37 } |
38 | 38 |
39 SkNx(T a, T b) : fLo(a), fHi(b) {
REQUIRE(N==2); } | 39 SkNx(T a, T b) : fLo(a), fHi(b) {
REQUIRE(N==2); } |
40 SkNx(T a, T b, T c, T d) : fLo(a,b), fHi(c,d) {
REQUIRE(N==4); } | 40 SkNx(T a, T b, T c, T d) : fLo(a,b), fHi(c,d) {
REQUIRE(N==4); } |
41 SkNx(T a, T b, T c, T d, T e, T f, T g, T h) : fLo(a,b,c,d), fHi(e,f,g,h) {
REQUIRE(N==8); } | 41 SkNx(T a, T b, T c, T d, T e, T f, T g, T h) : fLo(a,b,c,d), fHi(e,f,g,h) {
REQUIRE(N==8); } |
42 SkNx(T a, T b, T c, T d, T e, T f, T g, T h, | 42 SkNx(T a, T b, T c, T d, T e, T f, T g, T h, |
43 T i, T j, T k, T l, T m, T n, T o, T p) | 43 T i, T j, T k, T l, T m, T n, T o, T p) |
44 : fLo(a,b,c,d, e,f,g,h), fHi(i,j,k,l, m,n,o,p) { REQUIRE(N==16); } | 44 : fLo(a,b,c,d, e,f,g,h), fHi(i,j,k,l, m,n,o,p) { REQUIRE(N==16); } |
45 | 45 |
46 void store(T vals[N]) const { | 46 void store(T vals[N]) const { |
47 fLo.store(vals); | 47 fLo.store(vals); |
48 fHi.store(vals+N/2); | 48 fHi.store(vals+N/2); |
49 } | 49 } |
50 | 50 |
51 SkNx saturatedAdd(const SkNx& o) const { | 51 SkNx saturatedAdd(const SkNx& o) const { |
52 return SkNx(fLo.saturatedAdd(o.fLo), fHi.saturatedAdd(o.fHi)); | 52 return SkNx(fLo.saturatedAdd(o.fLo), fHi.saturatedAdd(o.fHi)); |
53 } | 53 } |
54 | 54 |
55 SkNx operator + (const SkNx& o) const { return SkNx(fLo + o.fLo, fHi + o.fHi
); } | 55 SkNx operator + (const SkNx& o) const { return SkNx(fLo + o.fLo, fHi + o.fHi
); } |
56 SkNx operator - (const SkNx& o) const { return SkNx(fLo - o.fLo, fHi - o.fHi
); } | 56 SkNx operator - (const SkNx& o) const { return SkNx(fLo - o.fLo, fHi - o.fHi
); } |
57 SkNx operator * (const SkNx& o) const { return SkNx(fLo * o.fLo, fHi * o.fHi
); } | 57 SkNx operator * (const SkNx& o) const { return SkNx(fLo * o.fLo, fHi * o.fHi
); } |
58 | |
59 SkNx operator << (int bits) const { return SkNx(fLo << bits, fHi << bits); } | |
60 SkNx operator >> (int bits) const { return SkNx(fLo >> bits, fHi >> bits); } | |
61 | |
62 static SkNx Min(const SkNx& a, const SkNx& b) { | |
63 return SkNx(SkNx<N/2, T>::Min(a.fLo, b.fLo), SkNx<N/2, T>::Min(a.fHi, b.
fHi)); | |
64 } | |
65 SkNx operator < (const SkNx& o) const { return SkNx(fLo < o.fLo, fHi < o.fHi
); } | |
66 | |
67 template <int k> T kth() const { | |
68 SkASSERT(0 <= k && k < N); | |
69 return k < N/2 ? fLo.template kth<k>() : fHi.template kth<k-N/2>(); | |
70 } | |
71 | |
72 bool allTrue() const { return fLo.allTrue() && fHi.allTrue(); } | |
73 bool anyTrue() const { return fLo.anyTrue() || fHi.anyTrue(); } | |
74 SkNx thenElse(const SkNx& t, const SkNx& e) const { | |
75 return SkNx(fLo.thenElse(t.fLo, e.fLo), fHi.thenElse(t.fHi, e.fHi)); | |
76 } | |
77 | |
78 protected: | |
79 REQUIRE(0 == (N & (N-1))); | |
80 | |
81 SkNx<N/2, T> fLo, fHi; | |
82 }; | |
83 | |
84 template <int N> | |
85 class SkNx<N,float> { | |
86 public: | |
87 SkNx() {} | |
88 SkNx(float val) : fLo(val), fHi(val) {} | |
89 static SkNx Load(const float vals[N]) { | |
90 return SkNx(SkNx<N/2, float>::Load(vals), SkNx<N/2, float>::Load(vals+N/
2)); | |
91 } | |
92 // FromBytes() and toBytes() specializations may assume their argument is N-
byte aligned. | |
93 // E.g. Sk4f::FromBytes() may assume it's reading from a 4-byte-aligned poin
ter. | |
94 // Converts [0,255] bytes to [0.0, 255.0] floats. | |
95 static SkNx FromBytes(const uint8_t bytes[N]) { | |
96 return SkNx(SkNx<N/2, float>::FromBytes(bytes), SkNx<N/2, float>::FromBy
tes(bytes+N/2)); | |
97 } | |
98 | |
99 SkNx(float a, float b) : fLo(a), fHi(b) { REQUIRE(N==2
); } | |
100 SkNx(float a, float b, float c, float d) : fLo(a,b), fHi(c,d) { REQUIRE(N==4
); } | |
101 SkNx(float a, float b, float c, float d, float e, float f, float g, float h) | |
102 : fLo(a,b,c,d) | |
103 , fHi(e,f,g,h) { REQUIRE(N==8); } | |
104 | |
105 void store(float vals[N]) const { | |
106 fLo.store(vals); | |
107 fHi.store(vals+N/2); | |
108 } | |
109 // Please see note on FromBytes(). | |
110 // Clamps to [0.0,255.0] floats and truncates to [0,255] bytes. | |
111 void toBytes(uint8_t bytes[N]) const { | |
112 fLo.toBytes(bytes); | |
113 fHi.toBytes(bytes+N/2); | |
114 } | |
115 | |
116 // Some implementations can do this faster. | |
117 static void ToBytes(uint8_t bytes[4*N], | |
118 const SkNx& a, const SkNx& b, const SkNx& c, const SkNx&
d) { | |
119 a.toBytes(bytes+0*N); | |
120 b.toBytes(bytes+1*N); | |
121 c.toBytes(bytes+2*N); | |
122 d.toBytes(bytes+3*N); | |
123 } | |
124 | |
125 SkNx operator + (const SkNx& o) const { return SkNx(fLo + o.fLo, fHi + o.fHi
); } | |
126 SkNx operator - (const SkNx& o) const { return SkNx(fLo - o.fLo, fHi - o.fHi
); } | |
127 SkNx operator * (const SkNx& o) const { return SkNx(fLo * o.fLo, fHi * o.fHi
); } | |
128 SkNx operator / (const SkNx& o) const { return SkNx(fLo / o.fLo, fHi / o.fHi
); } | 58 SkNx operator / (const SkNx& o) const { return SkNx(fLo / o.fLo, fHi / o.fHi
); } |
129 | 59 |
| 60 SkNx operator << (int bits) const { return SkNx(fLo << bits, fHi << bits); } |
| 61 SkNx operator >> (int bits) const { return SkNx(fLo >> bits, fHi >> bits); } |
| 62 |
130 SkNx operator == (const SkNx& o) const { return SkNx(fLo == o.fLo, fHi == o.
fHi); } | 63 SkNx operator == (const SkNx& o) const { return SkNx(fLo == o.fLo, fHi == o.
fHi); } |
131 SkNx operator != (const SkNx& o) const { return SkNx(fLo != o.fLo, fHi != o.
fHi); } | 64 SkNx operator != (const SkNx& o) const { return SkNx(fLo != o.fLo, fHi != o.
fHi); } |
132 SkNx operator < (const SkNx& o) const { return SkNx(fLo < o.fLo, fHi < o.
fHi); } | 65 SkNx operator < (const SkNx& o) const { return SkNx(fLo < o.fLo, fHi < o.
fHi); } |
133 SkNx operator > (const SkNx& o) const { return SkNx(fLo > o.fLo, fHi > o.
fHi); } | 66 SkNx operator > (const SkNx& o) const { return SkNx(fLo > o.fLo, fHi > o.
fHi); } |
134 SkNx operator <= (const SkNx& o) const { return SkNx(fLo <= o.fLo, fHi <= o.
fHi); } | 67 SkNx operator <= (const SkNx& o) const { return SkNx(fLo <= o.fLo, fHi <= o.
fHi); } |
135 SkNx operator >= (const SkNx& o) const { return SkNx(fLo >= o.fLo, fHi >= o.
fHi); } | 68 SkNx operator >= (const SkNx& o) const { return SkNx(fLo >= o.fLo, fHi >= o.
fHi); } |
136 | 69 |
137 static SkNx Min(const SkNx& l, const SkNx& r) { | 70 static SkNx Min(const SkNx& a, const SkNx& b) { |
138 return SkNx(SkNx<N/2, float>::Min(l.fLo, r.fLo), SkNx<N/2, float>::Min(l
.fHi, r.fHi)); | 71 return SkNx(SkNx<N/2, T>::Min(a.fLo, b.fLo), SkNx<N/2, T>::Min(a.fHi, b.
fHi)); |
139 } | 72 } |
140 static SkNx Max(const SkNx& l, const SkNx& r) { | 73 static SkNx Max(const SkNx& a, const SkNx& b) { |
141 return SkNx(SkNx<N/2, float>::Max(l.fLo, r.fLo), SkNx<N/2, float>::Max(l
.fHi, r.fHi)); | 74 return SkNx(SkNx<N/2, T>::Max(a.fLo, b.fLo), SkNx<N/2, T>::Max(a.fHi, b.
fHi)); |
142 } | 75 } |
143 | 76 |
144 SkNx sqrt() const { return SkNx(fLo. sqrt(), fHi. sqrt()); } | 77 SkNx sqrt() const { return SkNx(fLo.sqrt(), fHi.sqrt()); } |
145 | |
146 // Generally, increasing precision, increasing cost. | 78 // Generally, increasing precision, increasing cost. |
147 SkNx rsqrt0() const { return SkNx(fLo.rsqrt0(), fHi.rsqrt0()); } | 79 SkNx rsqrt0() const { return SkNx(fLo.rsqrt0(), fHi.rsqrt0()); } |
148 SkNx rsqrt1() const { return SkNx(fLo.rsqrt1(), fHi.rsqrt1()); } | 80 SkNx rsqrt1() const { return SkNx(fLo.rsqrt1(), fHi.rsqrt1()); } |
149 SkNx rsqrt2() const { return SkNx(fLo.rsqrt2(), fHi.rsqrt2()); } | 81 SkNx rsqrt2() const { return SkNx(fLo.rsqrt2(), fHi.rsqrt2()); } |
150 | 82 |
151 SkNx invert() const { return SkNx(fLo. invert(), fHi. invert
()); } | 83 SkNx invert() const { return SkNx(fLo. invert(), fHi. invert
()); } |
152 SkNx approxInvert() const { return SkNx(fLo.approxInvert(), fHi.approxInvert
()); } | 84 SkNx approxInvert() const { return SkNx(fLo.approxInvert(), fHi.approxInvert
()); } |
153 | 85 |
154 template <int k> float kth() const { | 86 template <int k> T kth() const { |
155 SkASSERT(0 <= k && k < N); | 87 SkASSERT(0 <= k && k < N); |
156 return k < N/2 ? fLo.template kth<k>() : fHi.template kth<k-N/2>(); | 88 return k < N/2 ? fLo.template kth<k>() : fHi.template kth<k-N/2>(); |
157 } | 89 } |
158 | 90 |
159 bool allTrue() const { return fLo.allTrue() && fHi.allTrue(); } | 91 bool allTrue() const { return fLo.allTrue() && fHi.allTrue(); } |
160 bool anyTrue() const { return fLo.anyTrue() || fHi.anyTrue(); } | 92 bool anyTrue() const { return fLo.anyTrue() || fHi.anyTrue(); } |
161 SkNx thenElse(const SkNx& t, const SkNx& e) const { | 93 SkNx thenElse(const SkNx& t, const SkNx& e) const { |
162 return SkNx(fLo.thenElse(t.fLo, e.fLo), fHi.thenElse(t.fHi, e.fHi)); | 94 return SkNx(fLo.thenElse(t.fLo, e.fLo), fHi.thenElse(t.fHi, e.fHi)); |
163 } | 95 } |
164 | 96 |
165 protected: | 97 protected: |
166 REQUIRE(0 == (N & (N-1))); | 98 REQUIRE(0 == (N & (N-1))); |
167 SkNx(const SkNx<N/2, float>& lo, const SkNx<N/2, float>& hi) : fLo(lo), fHi(
hi) {} | |
168 | 99 |
169 SkNx<N/2, float> fLo, fHi; | 100 SkNx<N/2, T> fLo, fHi; |
170 }; | 101 }; |
171 | 102 |
172 | |
173 // Bottom out the default implementations with scalars when nothing's been speci
alized. | 103 // Bottom out the default implementations with scalars when nothing's been speci
alized. |
174 | |
175 template <typename T> | 104 template <typename T> |
176 class SkNx<1,T> { | 105 class SkNx<1,T> { |
177 public: | 106 public: |
178 SkNx() {} | 107 SkNx() {} |
179 SkNx(T val) : fVal(val) {} | 108 SkNx(T val) : fVal(val) {} |
180 static SkNx Load(const T vals[1]) { return SkNx(vals[0]); } | 109 static SkNx Load(const T vals[1]) { return SkNx(vals[0]); } |
181 | 110 |
182 void store(T vals[1]) const { vals[0] = fVal; } | 111 void store(T vals[1]) const { vals[0] = fVal; } |
183 | 112 |
184 SkNx saturatedAdd(const SkNx& o) const { | 113 SkNx saturatedAdd(const SkNx& o) const { |
185 SkASSERT((T)(~0) > 0); // TODO: support signed T | 114 SkASSERT((T)(~0) > 0); // TODO: support signed T |
186 T sum = fVal + o.fVal; | 115 T sum = fVal + o.fVal; |
187 return SkNx(sum < fVal ? (T)(~0) : sum); | 116 return SkNx(sum < fVal ? (T)(~0) : sum); |
188 } | 117 } |
189 | 118 |
190 SkNx operator + (const SkNx& o) const { return SkNx(fVal + o.fVal); } | 119 SkNx operator + (const SkNx& o) const { return SkNx(fVal + o.fVal); } |
191 SkNx operator - (const SkNx& o) const { return SkNx(fVal - o.fVal); } | 120 SkNx operator - (const SkNx& o) const { return SkNx(fVal - o.fVal); } |
192 SkNx operator * (const SkNx& o) const { return SkNx(fVal * o.fVal); } | 121 SkNx operator * (const SkNx& o) const { return SkNx(fVal * o.fVal); } |
193 | |
194 SkNx operator << (int bits) const { return SkNx(fVal << bits); } | |
195 SkNx operator >> (int bits) const { return SkNx(fVal >> bits); } | |
196 | |
197 static SkNx Min(const SkNx& a, const SkNx& b) { return SkNx(SkTMin(a.fVal, b
.fVal)); } | |
198 SkNx operator <(const SkNx& o) const { return SkNx(fVal < o.fVal); } | |
199 | |
200 template <int k> T kth() const { | |
201 SkASSERT(0 == k); | |
202 return fVal; | |
203 } | |
204 | |
205 bool allTrue() const { return fVal; } | |
206 bool anyTrue() const { return fVal; } | |
207 SkNx thenElse(const SkNx& t, const SkNx& e) const { return fVal ? t : e; } | |
208 | |
209 protected: | |
210 T fVal; | |
211 }; | |
212 | |
213 template <> | |
214 class SkNx<1,float> { | |
215 public: | |
216 SkNx() {} | |
217 SkNx(float val) : fVal(val) {} | |
218 static SkNx Load(const float vals[1]) { return SkNx(vals[0]); } | |
219 static SkNx FromBytes(const uint8_t bytes[1]) { return SkNx((float)bytes[0])
; } | |
220 | |
221 void store(float vals[1]) const { vals[0] = fVal; } | |
222 void toBytes(uint8_t bytes[1]) const { bytes[0] = (uint8_t)(SkTMin(fVal, 255
.0f)); } | |
223 | |
224 SkNx operator + (const SkNx& o) const { return SkNx(fVal + o.fVal); } | |
225 SkNx operator - (const SkNx& o) const { return SkNx(fVal - o.fVal); } | |
226 SkNx operator * (const SkNx& o) const { return SkNx(fVal * o.fVal); } | |
227 SkNx operator / (const SkNx& o) const { return SkNx(fVal / o.fVal); } | 122 SkNx operator / (const SkNx& o) const { return SkNx(fVal / o.fVal); } |
228 | 123 |
| 124 SkNx operator << (int bits) const { return SkNx(fVal << bits); } |
| 125 SkNx operator >> (int bits) const { return SkNx(fVal >> bits); } |
| 126 |
229 SkNx operator == (const SkNx& o) const { return SkNx(fVal == o.fVal); } | 127 SkNx operator == (const SkNx& o) const { return SkNx(fVal == o.fVal); } |
230 SkNx operator != (const SkNx& o) const { return SkNx(fVal != o.fVal); } | 128 SkNx operator != (const SkNx& o) const { return SkNx(fVal != o.fVal); } |
231 SkNx operator < (const SkNx& o) const { return SkNx(fVal < o.fVal); } | 129 SkNx operator < (const SkNx& o) const { return SkNx(fVal < o.fVal); } |
232 SkNx operator > (const SkNx& o) const { return SkNx(fVal > o.fVal); } | 130 SkNx operator > (const SkNx& o) const { return SkNx(fVal > o.fVal); } |
233 SkNx operator <= (const SkNx& o) const { return SkNx(fVal <= o.fVal); } | 131 SkNx operator <= (const SkNx& o) const { return SkNx(fVal <= o.fVal); } |
234 SkNx operator >= (const SkNx& o) const { return SkNx(fVal >= o.fVal); } | 132 SkNx operator >= (const SkNx& o) const { return SkNx(fVal >= o.fVal); } |
235 | 133 |
236 static SkNx Min(const SkNx& l, const SkNx& r) { return SkNx(SkTMin(l.fVal, r
.fVal)); } | 134 static SkNx Min(const SkNx& a, const SkNx& b) { return SkNx(SkTMin(a.fVal, b
.fVal)); } |
237 static SkNx Max(const SkNx& l, const SkNx& r) { return SkNx(SkTMax(l.fVal, r
.fVal)); } | 135 static SkNx Max(const SkNx& a, const SkNx& b) { return SkNx(SkTMax(a.fVal, b
.fVal)); } |
238 | 136 |
239 SkNx sqrt() const { return SkNx(sqrtf(fVal)); } | 137 SkNx sqrt () const { return SkNx(Sqrt(fVal)); } |
240 SkNx rsqrt0() const { return SkNx(1.0f / sqrtf(fVal)); } | 138 SkNx rsqrt0() const { return this->sqrt().invert(); } |
241 SkNx rsqrt1() const { return this->rsqrt0(); } | 139 SkNx rsqrt1() const { return this->rsqrt0(); } |
242 SkNx rsqrt2() const { return this->rsqrt1(); } | 140 SkNx rsqrt2() const { return this->rsqrt1(); } |
243 | 141 |
244 SkNx invert() const { return SkNx(1.0f / fVal); } | 142 SkNx invert() const { return SkNx(1) / SkNx(fVal); } |
245 SkNx approxInvert() const { return this->invert(); } | 143 SkNx approxInvert() const { return this->invert(); } |
246 | 144 |
247 template <int k> float kth() const { | 145 template <int k> T kth() const { |
248 SkASSERT(k == 0); | 146 SkASSERT(0 == k); |
249 return fVal; | 147 return fVal; |
250 } | 148 } |
251 | 149 |
252 bool allTrue() const { return this->pun() != 0; } | 150 bool allTrue() const { return fVal != 0; } |
253 bool anyTrue() const { return this->pun() != 0; } | 151 bool anyTrue() const { return fVal != 0; } |
254 SkNx thenElse(const SkNx& t, const SkNx& e) const { return this->pun() ? t :
e; } | 152 SkNx thenElse(const SkNx& t, const SkNx& e) const { return fVal != 0 ? t : e
; } |
255 | 153 |
256 protected: | 154 protected: |
257 uint32_t pun() const { | 155 static double Sqrt(double val) { return ::sqrt (val); } |
258 union { float f; uint32_t i; } pun = { fVal }; | 156 static float Sqrt(float val) { return ::sqrtf(val); } |
259 return pun.i; | |
260 } | |
261 | 157 |
262 float fVal; | 158 T fVal; |
263 }; | 159 }; |
264 | 160 |
265 // This default implementation can be specialized by ../opts/SkNx_foo.h | 161 // This default implementation can be specialized by ../opts/SkNx_foo.h |
266 // if there's a better platform-specific shuffle strategy. | 162 // if there's a better platform-specific shuffle strategy. |
267 template <typename Nx, int... Ix> | 163 template <typename Nx, int... Ix> |
268 inline Nx SkNx_shuffle_impl(const Nx& src) { return Nx( src.template kth<Ix>()..
. ); } | 164 inline Nx SkNx_shuffle_impl(const Nx& src) { return Nx( src.template kth<Ix>()..
. ); } |
269 | 165 |
270 // This generic shuffle can be called with 1 or N indices: | 166 // This generic shuffle can be called with 1 or N indices: |
271 // Sk4f f(a,b,c,d); | 167 // Sk4f f(a,b,c,d); |
272 // SkNx_shuffle<3>(f); // ~~~> Sk4f(d,d,d,d) | 168 // SkNx_shuffle<3>(f); // ~~~> Sk4f(d,d,d,d) |
(...skipping 16 matching lines...) Expand all Loading... |
289 template <> struct MakeSkIntSequence<16> : SkIntSequence<0,1,2,3,4,5,6,7,8,9,10,
11,12,13,14,15>{}; | 185 template <> struct MakeSkIntSequence<16> : SkIntSequence<0,1,2,3,4,5,6,7,8,9,10,
11,12,13,14,15>{}; |
290 | 186 |
291 // This is the default/fallback implementation for SkNx_cast. Best to specializ
e SkNx_cast! | 187 // This is the default/fallback implementation for SkNx_cast. Best to specializ
e SkNx_cast! |
292 template <typename D, typename S, int N, int... Ix> | 188 template <typename D, typename S, int N, int... Ix> |
293 SkNx<N,D> SkNx_cast_fallback(const SkNx<N,S>& src, SkIntSequence<Ix...>) { | 189 SkNx<N,D> SkNx_cast_fallback(const SkNx<N,S>& src, SkIntSequence<Ix...>) { |
294 return SkNx<N,D>( (D)src.template kth<Ix>()... ); | 190 return SkNx<N,D>( (D)src.template kth<Ix>()... ); |
295 } | 191 } |
296 | 192 |
297 // This is a generic cast between two SkNx with the same number of elements N.
E.g. | 193 // This is a generic cast between two SkNx with the same number of elements N.
E.g. |
298 // Sk4b bs = ...; // Load 4 bytes. | 194 // Sk4b bs = ...; // Load 4 bytes. |
299 // Sk4f fs = SkNx_cast<float>(bs); // (This will replace SkNf::FromBytes() o
ne day.) | 195 // Sk4f fs = SkNx_cast<float>(bs); // Cast each byte to a float. |
300 // Sk4i is = SkNx_cast<int>(fs); // Cast each float to int. | 196 // Sk4i is = SkNx_cast<int>(fs); // Cast each float to int. |
301 // This can be specialized in ../opts/SkNx_foo.h if there's a better platform-sp
ecific cast. | 197 // This can be specialized in ../opts/SkNx_foo.h if there's a better platform-sp
ecific cast. |
302 template <typename D, typename S, int N> | 198 template <typename D, typename S, int N> |
303 SkNx<N,D> SkNx_cast(const SkNx<N,S>& src) { | 199 SkNx<N,D> SkNx_cast(const SkNx<N,S>& src) { |
304 return SkNx_cast_fallback<D,S,N>(src, MakeSkIntSequence<N>()); | 200 return SkNx_cast_fallback<D,S,N>(src, MakeSkIntSequence<N>()); |
305 } | 201 } |
306 | 202 |
307 } // namespace | 203 } // namespace |
308 | 204 |
309 | |
310 // Include platform specific specializations if available. | |
311 #ifndef SKNX_NO_SIMD | |
312 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX | |
313 #include "../opts/SkNx_avx.h" | |
314 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 | |
315 #include "../opts/SkNx_sse.h" | |
316 #elif defined(SK_ARM_HAS_NEON) | |
317 #include "../opts/SkNx_neon.h" | |
318 #endif | |
319 #endif | |
320 | |
321 #undef REQUIRE | |
322 | |
323 typedef SkNx<2, float> Sk2f; | 205 typedef SkNx<2, float> Sk2f; |
324 typedef SkNx<2, float> Sk2s; | 206 typedef SkNx<2, float> Sk2s; |
325 typedef SkNx<4, float> Sk4f; | 207 typedef SkNx<4, float> Sk4f; |
326 typedef SkNx<4, float> Sk4s; | 208 typedef SkNx<4, float> Sk4s; |
327 typedef SkNx<8, float> Sk8f; | 209 typedef SkNx<8, float> Sk8f; |
328 typedef SkNx<8, float> Sk8s; | 210 typedef SkNx<8, float> Sk8s; |
329 | 211 |
330 typedef SkNx<8, uint16_t> Sk8h; | 212 typedef SkNx< 4, uint16_t> Sk4h; |
| 213 typedef SkNx< 8, uint16_t> Sk8h; |
331 typedef SkNx<16, uint16_t> Sk16h; | 214 typedef SkNx<16, uint16_t> Sk16h; |
| 215 |
| 216 typedef SkNx< 4, uint8_t> Sk4b; |
| 217 typedef SkNx< 8, uint8_t> Sk8b; |
332 typedef SkNx<16, uint8_t> Sk16b; | 218 typedef SkNx<16, uint8_t> Sk16b; |
333 | 219 |
334 typedef SkNx<4, int> Sk4i; | 220 typedef SkNx<4, int> Sk4i; |
335 | 221 |
| 222 // Include platform specific specializations if available. |
| 223 #if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX |
| 224 #include "../opts/SkNx_avx.h" |
| 225 #elif !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 |
| 226 #include "../opts/SkNx_sse.h" |
| 227 #elif !defined(SKNX_NO_SIMD) && defined(SK_ARM_HAS_NEON) |
| 228 #include "../opts/SkNx_neon.h" |
| 229 #else |
| 230 static inline |
| 231 void Sk4f_ToBytes(uint8_t p[16], const Sk4f& a, const Sk4f& b, const Sk4f& c
, const Sk4f& d) { |
| 232 SkNx_cast<uint8_t>(a).store(p+ 0); |
| 233 SkNx_cast<uint8_t>(b).store(p+ 4); |
| 234 SkNx_cast<uint8_t>(c).store(p+ 8); |
| 235 SkNx_cast<uint8_t>(d).store(p+12); |
| 236 } |
| 237 #endif |
| 238 |
| 239 #undef REQUIRE |
| 240 |
| 241 |
336 #endif//SkNx_DEFINED | 242 #endif//SkNx_DEFINED |
OLD | NEW |