OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkNx_neon_DEFINED | 8 #ifndef SkNx_neon_DEFINED |
9 #define SkNx_neon_DEFINED | 9 #define SkNx_neon_DEFINED |
10 | 10 |
11 #include <arm_neon.h> | 11 #include <arm_neon.h> |
12 | 12 |
| 13 // Well, this is absurd. The shifts require compile-time constant arguments. |
| 14 |
| 15 #define SHIFT8(op, v, bits) switch(bits) { \ |
| 16 case 1: return op(v, 1); case 2: return op(v, 2); case 3: return op(v
, 3); \ |
| 17 case 4: return op(v, 4); case 5: return op(v, 5); case 6: return op(v
, 6); \ |
| 18 case 7: return op(v, 7); \ |
| 19 } return fVec |
| 20 |
| 21 #define SHIFT16(op, v, bits) if (bits < 8) { SHIFT8(op, v, bits); } switch(bits)
{ \ |
| 22 case 8: return op(v, 8); case 9: return op(v
, 9); \ |
| 23 case 10: return op(v, 10); case 11: return op(v, 11); case 12: return op(v
, 12); \ |
| 24 case 13: return op(v, 13); case 14: return op(v, 14); case 15: return op(v
, 15); \ |
| 25 } return fVec |
| 26 |
| 27 #define SHIFT32(op, v, bits) if (bits < 16) { SHIFT16(op, v, bits); } switch(bit
s) { \ |
| 28 case 16: return op(v, 16); case 17: return op(v, 17); case 18: return op(v
, 18); \ |
| 29 case 19: return op(v, 19); case 20: return op(v, 20); case 21: return op(v
, 21); \ |
| 30 case 22: return op(v, 22); case 23: return op(v, 23); case 24: return op(v
, 24); \ |
| 31 case 25: return op(v, 25); case 26: return op(v, 26); case 27: return op(v
, 27); \ |
| 32 case 28: return op(v, 28); case 29: return op(v, 29); case 30: return op(v
, 30); \ |
| 33 case 31: return op(v, 31); } return fVec |
| 34 |
13 template <> | 35 template <> |
14 class SkNb<2, 4> { | 36 class SkNb<2, 4> { |
15 public: | 37 public: |
16 SkNb(uint32x2_t vec) : fVec(vec) {} | 38 SkNb(uint32x2_t vec) : fVec(vec) {} |
17 | 39 |
18 SkNb() {} | 40 SkNb() {} |
19 bool allTrue() const { return vget_lane_u32(fVec, 0) && vget_lane_u32(fVec,
1); } | 41 bool allTrue() const { return vget_lane_u32(fVec, 0) && vget_lane_u32(fVec,
1); } |
20 bool anyTrue() const { return vget_lane_u32(fVec, 0) || vget_lane_u32(fVec,
1); } | 42 bool anyTrue() const { return vget_lane_u32(fVec, 0) || vget_lane_u32(fVec,
1); } |
21 private: | 43 |
22 uint32x2_t fVec; | 44 uint32x2_t fVec; |
23 }; | 45 }; |
24 | 46 |
25 template <> | 47 template <> |
26 class SkNb<4, 4> { | 48 class SkNb<4, 4> { |
27 public: | 49 public: |
28 SkNb(uint32x4_t vec) : fVec(vec) {} | 50 SkNb(uint32x4_t vec) : fVec(vec) {} |
29 | 51 |
30 SkNb() {} | 52 SkNb() {} |
31 bool allTrue() const { return vgetq_lane_u32(fVec, 0) && vgetq_lane_u32(fVec
, 1) | 53 bool allTrue() const { return vgetq_lane_u32(fVec, 0) && vgetq_lane_u32(fVec
, 1) |
32 && vgetq_lane_u32(fVec, 2) && vgetq_lane_u32(fVec
, 3); } | 54 && vgetq_lane_u32(fVec, 2) && vgetq_lane_u32(fVec
, 3); } |
33 bool anyTrue() const { return vgetq_lane_u32(fVec, 0) || vgetq_lane_u32(fVec
, 1) | 55 bool anyTrue() const { return vgetq_lane_u32(fVec, 0) || vgetq_lane_u32(fVec
, 1) |
34 || vgetq_lane_u32(fVec, 2) || vgetq_lane_u32(fVec
, 3); } | 56 || vgetq_lane_u32(fVec, 2) || vgetq_lane_u32(fVec
, 3); } |
35 private: | 57 |
36 uint32x4_t fVec; | 58 uint32x4_t fVec; |
37 }; | 59 }; |
38 | 60 |
39 template <> | 61 template <> |
40 class SkNf<2, float> { | 62 class SkNf<2, float> { |
41 typedef SkNb<2, 4> Nb; | 63 typedef SkNb<2, 4> Nb; |
42 public: | 64 public: |
43 SkNf(float32x2_t vec) : fVec(vec) {} | 65 SkNf(float32x2_t vec) : fVec(vec) {} |
44 | 66 |
45 SkNf() {} | 67 SkNf() {} |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
97 #else | 119 #else |
98 return *this * this->rsqrt2(); | 120 return *this * this->rsqrt2(); |
99 #endif | 121 #endif |
100 } | 122 } |
101 | 123 |
102 template <int k> float kth() const { | 124 template <int k> float kth() const { |
103 SkASSERT(0 <= k && k < 2); | 125 SkASSERT(0 <= k && k < 2); |
104 return vget_lane_f32(fVec, k&1); | 126 return vget_lane_f32(fVec, k&1); |
105 } | 127 } |
106 | 128 |
107 private: | |
108 float32x2_t fVec; | 129 float32x2_t fVec; |
109 }; | 130 }; |
110 | 131 |
111 #if defined(SK_CPU_ARM64) | 132 #if defined(SK_CPU_ARM64) |
112 template <> | 133 template <> |
113 class SkNb<2, 8> { | 134 class SkNb<2, 8> { |
114 public: | 135 public: |
115 SkNb(uint64x2_t vec) : fVec(vec) {} | 136 SkNb(uint64x2_t vec) : fVec(vec) {} |
116 | 137 |
117 SkNb() {} | 138 SkNb() {} |
118 bool allTrue() const { return vgetq_lane_u64(fVec, 0) && vgetq_lane_u64(fVec
, 1); } | 139 bool allTrue() const { return vgetq_lane_u64(fVec, 0) && vgetq_lane_u64(fVec
, 1); } |
119 bool anyTrue() const { return vgetq_lane_u64(fVec, 0) || vgetq_lane_u64(fVec
, 1); } | 140 bool anyTrue() const { return vgetq_lane_u64(fVec, 0) || vgetq_lane_u64(fVec
, 1); } |
120 private: | 141 |
121 uint64x2_t fVec; | 142 uint64x2_t fVec; |
122 }; | 143 }; |
123 | 144 |
124 template <> | 145 template <> |
125 class SkNf<2, double> { | 146 class SkNf<2, double> { |
126 typedef SkNb<2, 8> Nb; | 147 typedef SkNb<2, 8> Nb; |
127 public: | 148 public: |
128 SkNf(float64x2_t vec) : fVec(vec) {} | 149 SkNf(float64x2_t vec) : fVec(vec) {} |
129 | 150 |
130 SkNf() {} | 151 SkNf() {} |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
174 est2 = vmulq_f64(vrecpsq_f64(est1, fVec), est1), | 195 est2 = vmulq_f64(vrecpsq_f64(est1, fVec), est1), |
175 est3 = vmulq_f64(vrecpsq_f64(est2, fVec), est2); | 196 est3 = vmulq_f64(vrecpsq_f64(est2, fVec), est2); |
176 return est3; | 197 return est3; |
177 } | 198 } |
178 | 199 |
179 template <int k> double kth() const { | 200 template <int k> double kth() const { |
180 SkASSERT(0 <= k && k < 2); | 201 SkASSERT(0 <= k && k < 2); |
181 return vgetq_lane_f64(fVec, k&1); | 202 return vgetq_lane_f64(fVec, k&1); |
182 } | 203 } |
183 | 204 |
184 private: | |
185 float64x2_t fVec; | 205 float64x2_t fVec; |
186 }; | 206 }; |
187 #endif//defined(SK_CPU_ARM64) | 207 #endif//defined(SK_CPU_ARM64) |
188 | 208 |
189 template <> | 209 template <> |
190 class SkNi<4, int> { | 210 class SkNi<4, int> { |
191 public: | 211 public: |
192 SkNi(const int32x4_t& vec) : fVec(vec) {} | 212 SkNi(const int32x4_t& vec) : fVec(vec) {} |
193 | 213 |
194 SkNi() {} | 214 SkNi() {} |
195 explicit SkNi(int val) : fVec(vdupq_n_s32(val)) {} | 215 explicit SkNi(int val) : fVec(vdupq_n_s32(val)) {} |
196 static SkNi Load(const int vals[4]) { return vld1q_s32(vals); } | 216 static SkNi Load(const int vals[4]) { return vld1q_s32(vals); } |
197 SkNi(int a, int b, int c, int d) { fVec = (int32x4_t) { a, b, c, d }; } | 217 SkNi(int a, int b, int c, int d) { fVec = (int32x4_t) { a, b, c, d }; } |
198 | 218 |
199 void store(int vals[4]) const { vst1q_s32(vals, fVec); } | 219 void store(int vals[4]) const { vst1q_s32(vals, fVec); } |
200 | 220 |
201 SkNi operator + (const SkNi& o) const { return vaddq_s32(fVec, o.fVec); } | 221 SkNi operator + (const SkNi& o) const { return vaddq_s32(fVec, o.fVec); } |
202 SkNi operator - (const SkNi& o) const { return vsubq_s32(fVec, o.fVec); } | 222 SkNi operator - (const SkNi& o) const { return vsubq_s32(fVec, o.fVec); } |
203 SkNi operator * (const SkNi& o) const { return vmulq_s32(fVec, o.fVec); } | 223 SkNi operator * (const SkNi& o) const { return vmulq_s32(fVec, o.fVec); } |
204 | 224 |
205 // Well, this is absurd. The shifts require compile-time constant arguments
. | 225 SkNi operator << (int bits) const { SHIFT32(vshlq_n_s32, fVec, bits); } |
206 #define SHIFT(op, v, bits) switch(bits) { \ | 226 SkNi operator >> (int bits) const { SHIFT32(vshrq_n_s32, fVec, bits); } |
207 case 1: return op(v, 1); case 2: return op(v, 2); case 3: return op(v
, 3); \ | |
208 case 4: return op(v, 4); case 5: return op(v, 5); case 6: return op(v
, 6); \ | |
209 case 7: return op(v, 7); case 8: return op(v, 8); case 9: return op(v
, 9); \ | |
210 case 10: return op(v, 10); case 11: return op(v, 11); case 12: return op(v
, 12); \ | |
211 case 13: return op(v, 13); case 14: return op(v, 14); case 15: return op(v
, 15); \ | |
212 case 16: return op(v, 16); case 17: return op(v, 17); case 18: return op(v
, 18); \ | |
213 case 19: return op(v, 19); case 20: return op(v, 20); case 21: return op(v
, 21); \ | |
214 case 22: return op(v, 22); case 23: return op(v, 23); case 24: return op(v
, 24); \ | |
215 case 25: return op(v, 25); case 26: return op(v, 26); case 27: return op(v
, 27); \ | |
216 case 28: return op(v, 28); case 29: return op(v, 29); case 30: return op(v
, 30); \ | |
217 case 31: return op(v, 31); } return fVec | |
218 | |
219 SkNi operator << (int bits) const { SHIFT(vshlq_n_s32, fVec, bits); } | |
220 SkNi operator >> (int bits) const { SHIFT(vshrq_n_s32, fVec, bits); } | |
221 #undef SHIFT | |
222 | 227 |
223 template <int k> int kth() const { | 228 template <int k> int kth() const { |
224 SkASSERT(0 <= k && k < 4); | 229 SkASSERT(0 <= k && k < 4); |
225 return vgetq_lane_s32(fVec, k&3); | 230 return vgetq_lane_s32(fVec, k&3); |
226 } | 231 } |
227 protected: | 232 |
228 int32x4_t fVec; | 233 int32x4_t fVec; |
229 }; | 234 }; |
230 | 235 |
231 template <> | 236 template <> |
232 class SkNf<4, float> { | 237 class SkNf<4, float> { |
233 typedef SkNb<4, 4> Nb; | 238 typedef SkNb<4, 4> Nb; |
234 public: | 239 public: |
235 SkNf(float32x4_t vec) : fVec(vec) {} | 240 SkNf(float32x4_t vec) : fVec(vec) {} |
236 | 241 |
237 SkNf() {} | 242 SkNf() {} |
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
291 #else | 296 #else |
292 return *this * this->rsqrt2(); | 297 return *this * this->rsqrt2(); |
293 #endif | 298 #endif |
294 } | 299 } |
295 | 300 |
296 template <int k> float kth() const { | 301 template <int k> float kth() const { |
297 SkASSERT(0 <= k && k < 4); | 302 SkASSERT(0 <= k && k < 4); |
298 return vgetq_lane_f32(fVec, k&3); | 303 return vgetq_lane_f32(fVec, k&3); |
299 } | 304 } |
300 | 305 |
301 protected: | |
302 float32x4_t fVec; | 306 float32x4_t fVec; |
303 }; | 307 }; |
304 | 308 |
| 309 template <> |
| 310 class SkNi<8, uint16_t> { |
| 311 public: |
| 312 SkNi(const uint16x8_t& vec) : fVec(vec) {} |
| 313 |
| 314 SkNi() {} |
| 315 explicit SkNi(uint16_t val) : fVec(vdupq_n_u16(val)) {} |
| 316 static SkNi Load(const uint16_t vals[8]) { return vld1q_u16(vals); } |
| 317 |
| 318 SkNi(uint16_t a, uint16_t b, uint16_t c, uint16_t d, |
| 319 uint16_t e, uint16_t f, uint16_t g, uint16_t h) { |
| 320 fVec = (uint16x8_t) { a,b,c,d, e,f,g,h }; |
| 321 } |
| 322 |
| 323 void store(uint16_t vals[8]) const { vst1q_u16(vals, fVec); } |
| 324 |
| 325 SkNi operator + (const SkNi& o) const { return vaddq_u16(fVec, o.fVec); } |
| 326 SkNi operator - (const SkNi& o) const { return vsubq_u16(fVec, o.fVec); } |
| 327 SkNi operator * (const SkNi& o) const { return vmulq_u16(fVec, o.fVec); } |
| 328 |
| 329 SkNi operator << (int bits) const { SHIFT16(vshlq_n_u16, fVec, bits); } |
| 330 SkNi operator >> (int bits) const { SHIFT16(vshrq_n_u16, fVec, bits); } |
| 331 |
| 332 template <int k> uint16_t kth() const { |
| 333 SkASSERT(0 <= k && k < 8); |
| 334 return vgetq_lane_u16(fVec, k&7); |
| 335 } |
| 336 |
| 337 uint16x8_t fVec; |
| 338 }; |
| 339 |
| 340 template <> |
| 341 class SkNi<16, uint8_t> { |
| 342 public: |
| 343 SkNi(const uint8x16_t& vec) : fVec(vec) {} |
| 344 |
| 345 SkNi() {} |
| 346 explicit SkNi(uint8_t val) : fVec(vdupq_n_u8(val)) {} |
| 347 static SkNi Load(const uint8_t vals[16]) { return vld1q_u8(vals); } |
| 348 |
| 349 SkNi(uint8_t a, uint8_t b, uint8_t c, uint8_t d, |
| 350 uint8_t e, uint8_t f, uint8_t g, uint8_t h, |
| 351 uint8_t i, uint8_t j, uint8_t k, uint8_t l, |
| 352 uint8_t m, uint8_t n, uint8_t o, uint8_t p) { |
| 353 fVec = (uint8x16_t) { a,b,c,d, e,f,g,h, i,j,k,l, m,n,o,p }; |
| 354 } |
| 355 |
| 356 void store(uint8_t vals[16]) const { vst1q_u8(vals, fVec); } |
| 357 |
| 358 SkNi operator + (const SkNi& o) const { return vaddq_u8(fVec, o.fVec); } |
| 359 SkNi operator - (const SkNi& o) const { return vsubq_u8(fVec, o.fVec); } |
| 360 SkNi operator * (const SkNi& o) const { return vmulq_u8(fVec, o.fVec); } |
| 361 |
| 362 SkNi operator << (int bits) const { SHIFT8(vshlq_n_u8, fVec, bits); } |
| 363 SkNi operator >> (int bits) const { SHIFT8(vshrq_n_u8, fVec, bits); } |
| 364 |
| 365 template <int k> uint8_t kth() const { |
| 366 SkASSERT(0 <= k && k < 15); |
| 367 return vgetq_lane_u8(fVec, k&16); |
| 368 } |
| 369 |
| 370 uint8x16_t fVec; |
| 371 }; |
| 372 |
| 373 #undef SHIFT32 |
| 374 #undef SHIFT16 |
| 375 #undef SHIFT8 |
| 376 |
305 #endif//SkNx_neon_DEFINED | 377 #endif//SkNx_neon_DEFINED |
OLD | NEW |