| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef SkNx_neon_DEFINED | 8 #ifndef SkNx_neon_DEFINED |
| 9 #define SkNx_neon_DEFINED | 9 #define SkNx_neon_DEFINED |
| 10 | 10 |
| 11 #include <arm_neon.h> | 11 #include <arm_neon.h> |
| 12 | 12 |
| 13 // Well, this is absurd. The shifts require compile-time constant arguments. |
| 14 |
| 15 #define SHIFT8(op, v, bits) switch(bits) { \ |
| 16 case 1: return op(v, 1); case 2: return op(v, 2); case 3: return op(v
, 3); \ |
| 17 case 4: return op(v, 4); case 5: return op(v, 5); case 6: return op(v
, 6); \ |
| 18 case 7: return op(v, 7); \ |
| 19 } return fVec |
| 20 |
| 21 #define SHIFT16(op, v, bits) if (bits < 8) { SHIFT8(op, v, bits); } switch(bits)
{ \ |
| 22 case 8: return op(v, 8); case 9: return op(v
, 9); \ |
| 23 case 10: return op(v, 10); case 11: return op(v, 11); case 12: return op(v
, 12); \ |
| 24 case 13: return op(v, 13); case 14: return op(v, 14); case 15: return op(v
, 15); \ |
| 25 } return fVec |
| 26 |
| 27 #define SHIFT32(op, v, bits) if (bits < 16) { SHIFT16(op, v, bits); } switch(bit
s) { \ |
| 28 case 16: return op(v, 16); case 17: return op(v, 17); case 18: return op(v
, 18); \ |
| 29 case 19: return op(v, 19); case 20: return op(v, 20); case 21: return op(v
, 21); \ |
| 30 case 22: return op(v, 22); case 23: return op(v, 23); case 24: return op(v
, 24); \ |
| 31 case 25: return op(v, 25); case 26: return op(v, 26); case 27: return op(v
, 27); \ |
| 32 case 28: return op(v, 28); case 29: return op(v, 29); case 30: return op(v
, 30); \ |
| 33 case 31: return op(v, 31); } return fVec |
| 34 |
| 13 template <> | 35 template <> |
| 14 class SkNb<2, 4> { | 36 class SkNb<2, 4> { |
| 15 public: | 37 public: |
| 16 SkNb(uint32x2_t vec) : fVec(vec) {} | 38 SkNb(uint32x2_t vec) : fVec(vec) {} |
| 17 | 39 |
| 18 SkNb() {} | 40 SkNb() {} |
| 19 bool allTrue() const { return vget_lane_u32(fVec, 0) && vget_lane_u32(fVec,
1); } | 41 bool allTrue() const { return vget_lane_u32(fVec, 0) && vget_lane_u32(fVec,
1); } |
| 20 bool anyTrue() const { return vget_lane_u32(fVec, 0) || vget_lane_u32(fVec,
1); } | 42 bool anyTrue() const { return vget_lane_u32(fVec, 0) || vget_lane_u32(fVec,
1); } |
| 21 private: | 43 |
| 22 uint32x2_t fVec; | 44 uint32x2_t fVec; |
| 23 }; | 45 }; |
| 24 | 46 |
| 25 template <> | 47 template <> |
| 26 class SkNb<4, 4> { | 48 class SkNb<4, 4> { |
| 27 public: | 49 public: |
| 28 SkNb(uint32x4_t vec) : fVec(vec) {} | 50 SkNb(uint32x4_t vec) : fVec(vec) {} |
| 29 | 51 |
| 30 SkNb() {} | 52 SkNb() {} |
| 31 bool allTrue() const { return vgetq_lane_u32(fVec, 0) && vgetq_lane_u32(fVec
, 1) | 53 bool allTrue() const { return vgetq_lane_u32(fVec, 0) && vgetq_lane_u32(fVec
, 1) |
| 32 && vgetq_lane_u32(fVec, 2) && vgetq_lane_u32(fVec
, 3); } | 54 && vgetq_lane_u32(fVec, 2) && vgetq_lane_u32(fVec
, 3); } |
| 33 bool anyTrue() const { return vgetq_lane_u32(fVec, 0) || vgetq_lane_u32(fVec
, 1) | 55 bool anyTrue() const { return vgetq_lane_u32(fVec, 0) || vgetq_lane_u32(fVec
, 1) |
| 34 || vgetq_lane_u32(fVec, 2) || vgetq_lane_u32(fVec
, 3); } | 56 || vgetq_lane_u32(fVec, 2) || vgetq_lane_u32(fVec
, 3); } |
| 35 private: | 57 |
| 36 uint32x4_t fVec; | 58 uint32x4_t fVec; |
| 37 }; | 59 }; |
| 38 | 60 |
| 39 template <> | 61 template <> |
| 40 class SkNf<2, float> { | 62 class SkNf<2, float> { |
| 41 typedef SkNb<2, 4> Nb; | 63 typedef SkNb<2, 4> Nb; |
| 42 public: | 64 public: |
| 43 SkNf(float32x2_t vec) : fVec(vec) {} | 65 SkNf(float32x2_t vec) : fVec(vec) {} |
| 44 | 66 |
| 45 SkNf() {} | 67 SkNf() {} |
| (...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 97 #else | 119 #else |
| 98 return *this * this->rsqrt2(); | 120 return *this * this->rsqrt2(); |
| 99 #endif | 121 #endif |
| 100 } | 122 } |
| 101 | 123 |
| 102 template <int k> float kth() const { | 124 template <int k> float kth() const { |
| 103 SkASSERT(0 <= k && k < 2); | 125 SkASSERT(0 <= k && k < 2); |
| 104 return vget_lane_f32(fVec, k&1); | 126 return vget_lane_f32(fVec, k&1); |
| 105 } | 127 } |
| 106 | 128 |
| 107 private: | |
| 108 float32x2_t fVec; | 129 float32x2_t fVec; |
| 109 }; | 130 }; |
| 110 | 131 |
| 111 #if defined(SK_CPU_ARM64) | 132 #if defined(SK_CPU_ARM64) |
| 112 template <> | 133 template <> |
| 113 class SkNb<2, 8> { | 134 class SkNb<2, 8> { |
| 114 public: | 135 public: |
| 115 SkNb(uint64x2_t vec) : fVec(vec) {} | 136 SkNb(uint64x2_t vec) : fVec(vec) {} |
| 116 | 137 |
| 117 SkNb() {} | 138 SkNb() {} |
| 118 bool allTrue() const { return vgetq_lane_u64(fVec, 0) && vgetq_lane_u64(fVec
, 1); } | 139 bool allTrue() const { return vgetq_lane_u64(fVec, 0) && vgetq_lane_u64(fVec
, 1); } |
| 119 bool anyTrue() const { return vgetq_lane_u64(fVec, 0) || vgetq_lane_u64(fVec
, 1); } | 140 bool anyTrue() const { return vgetq_lane_u64(fVec, 0) || vgetq_lane_u64(fVec
, 1); } |
| 120 private: | 141 |
| 121 uint64x2_t fVec; | 142 uint64x2_t fVec; |
| 122 }; | 143 }; |
| 123 | 144 |
| 124 template <> | 145 template <> |
| 125 class SkNf<2, double> { | 146 class SkNf<2, double> { |
| 126 typedef SkNb<2, 8> Nb; | 147 typedef SkNb<2, 8> Nb; |
| 127 public: | 148 public: |
| 128 SkNf(float64x2_t vec) : fVec(vec) {} | 149 SkNf(float64x2_t vec) : fVec(vec) {} |
| 129 | 150 |
| 130 SkNf() {} | 151 SkNf() {} |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 174 est2 = vmulq_f64(vrecpsq_f64(est1, fVec), est1), | 195 est2 = vmulq_f64(vrecpsq_f64(est1, fVec), est1), |
| 175 est3 = vmulq_f64(vrecpsq_f64(est2, fVec), est2); | 196 est3 = vmulq_f64(vrecpsq_f64(est2, fVec), est2); |
| 176 return est3; | 197 return est3; |
| 177 } | 198 } |
| 178 | 199 |
| 179 template <int k> double kth() const { | 200 template <int k> double kth() const { |
| 180 SkASSERT(0 <= k && k < 2); | 201 SkASSERT(0 <= k && k < 2); |
| 181 return vgetq_lane_f64(fVec, k&1); | 202 return vgetq_lane_f64(fVec, k&1); |
| 182 } | 203 } |
| 183 | 204 |
| 184 private: | |
| 185 float64x2_t fVec; | 205 float64x2_t fVec; |
| 186 }; | 206 }; |
| 187 #endif//defined(SK_CPU_ARM64) | 207 #endif//defined(SK_CPU_ARM64) |
| 188 | 208 |
| 189 template <> | 209 template <> |
| 190 class SkNi<4, int> { | 210 class SkNi<4, int> { |
| 191 public: | 211 public: |
| 192 SkNi(const int32x4_t& vec) : fVec(vec) {} | 212 SkNi(const int32x4_t& vec) : fVec(vec) {} |
| 193 | 213 |
| 194 SkNi() {} | 214 SkNi() {} |
| 195 explicit SkNi(int val) : fVec(vdupq_n_s32(val)) {} | 215 explicit SkNi(int val) : fVec(vdupq_n_s32(val)) {} |
| 196 static SkNi Load(const int vals[4]) { return vld1q_s32(vals); } | 216 static SkNi Load(const int vals[4]) { return vld1q_s32(vals); } |
| 197 SkNi(int a, int b, int c, int d) { fVec = (int32x4_t) { a, b, c, d }; } | 217 SkNi(int a, int b, int c, int d) { fVec = (int32x4_t) { a, b, c, d }; } |
| 198 | 218 |
| 199 void store(int vals[4]) const { vst1q_s32(vals, fVec); } | 219 void store(int vals[4]) const { vst1q_s32(vals, fVec); } |
| 200 | 220 |
| 201 SkNi operator + (const SkNi& o) const { return vaddq_s32(fVec, o.fVec); } | 221 SkNi operator + (const SkNi& o) const { return vaddq_s32(fVec, o.fVec); } |
| 202 SkNi operator - (const SkNi& o) const { return vsubq_s32(fVec, o.fVec); } | 222 SkNi operator - (const SkNi& o) const { return vsubq_s32(fVec, o.fVec); } |
| 203 SkNi operator * (const SkNi& o) const { return vmulq_s32(fVec, o.fVec); } | 223 SkNi operator * (const SkNi& o) const { return vmulq_s32(fVec, o.fVec); } |
| 204 | 224 |
| 205 // Well, this is absurd. The shifts require compile-time constant arguments
. | 225 SkNi operator << (int bits) const { SHIFT32(vshlq_n_s32, fVec, bits); } |
| 206 #define SHIFT(op, v, bits) switch(bits) { \ | 226 SkNi operator >> (int bits) const { SHIFT32(vshrq_n_s32, fVec, bits); } |
| 207 case 1: return op(v, 1); case 2: return op(v, 2); case 3: return op(v
, 3); \ | |
| 208 case 4: return op(v, 4); case 5: return op(v, 5); case 6: return op(v
, 6); \ | |
| 209 case 7: return op(v, 7); case 8: return op(v, 8); case 9: return op(v
, 9); \ | |
| 210 case 10: return op(v, 10); case 11: return op(v, 11); case 12: return op(v
, 12); \ | |
| 211 case 13: return op(v, 13); case 14: return op(v, 14); case 15: return op(v
, 15); \ | |
| 212 case 16: return op(v, 16); case 17: return op(v, 17); case 18: return op(v
, 18); \ | |
| 213 case 19: return op(v, 19); case 20: return op(v, 20); case 21: return op(v
, 21); \ | |
| 214 case 22: return op(v, 22); case 23: return op(v, 23); case 24: return op(v
, 24); \ | |
| 215 case 25: return op(v, 25); case 26: return op(v, 26); case 27: return op(v
, 27); \ | |
| 216 case 28: return op(v, 28); case 29: return op(v, 29); case 30: return op(v
, 30); \ | |
| 217 case 31: return op(v, 31); } return fVec | |
| 218 | |
| 219 SkNi operator << (int bits) const { SHIFT(vshlq_n_s32, fVec, bits); } | |
| 220 SkNi operator >> (int bits) const { SHIFT(vshrq_n_s32, fVec, bits); } | |
| 221 #undef SHIFT | |
| 222 | 227 |
| 223 template <int k> int kth() const { | 228 template <int k> int kth() const { |
| 224 SkASSERT(0 <= k && k < 4); | 229 SkASSERT(0 <= k && k < 4); |
| 225 return vgetq_lane_s32(fVec, k&3); | 230 return vgetq_lane_s32(fVec, k&3); |
| 226 } | 231 } |
| 227 protected: | 232 |
| 228 int32x4_t fVec; | 233 int32x4_t fVec; |
| 229 }; | 234 }; |
| 230 | 235 |
| 231 template <> | 236 template <> |
| 232 class SkNf<4, float> { | 237 class SkNf<4, float> { |
| 233 typedef SkNb<4, 4> Nb; | 238 typedef SkNb<4, 4> Nb; |
| 234 public: | 239 public: |
| 235 SkNf(float32x4_t vec) : fVec(vec) {} | 240 SkNf(float32x4_t vec) : fVec(vec) {} |
| 236 | 241 |
| 237 SkNf() {} | 242 SkNf() {} |
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 291 #else | 296 #else |
| 292 return *this * this->rsqrt2(); | 297 return *this * this->rsqrt2(); |
| 293 #endif | 298 #endif |
| 294 } | 299 } |
| 295 | 300 |
| 296 template <int k> float kth() const { | 301 template <int k> float kth() const { |
| 297 SkASSERT(0 <= k && k < 4); | 302 SkASSERT(0 <= k && k < 4); |
| 298 return vgetq_lane_f32(fVec, k&3); | 303 return vgetq_lane_f32(fVec, k&3); |
| 299 } | 304 } |
| 300 | 305 |
| 301 protected: | |
| 302 float32x4_t fVec; | 306 float32x4_t fVec; |
| 303 }; | 307 }; |
| 304 | 308 |
| 309 template <> |
| 310 class SkNi<8, uint16_t> { |
| 311 public: |
| 312 SkNi(const uint16x8_t& vec) : fVec(vec) {} |
| 313 |
| 314 SkNi() {} |
| 315 explicit SkNi(uint16_t val) : fVec(vdupq_n_u16(val)) {} |
| 316 static SkNi Load(const uint16_t vals[8]) { return vld1q_u16(vals); } |
| 317 |
| 318 SkNi(uint16_t a, uint16_t b, uint16_t c, uint16_t d, |
| 319 uint16_t e, uint16_t f, uint16_t g, uint16_t h) { |
| 320 fVec = (uint16x8_t) { a,b,c,d, e,f,g,h }; |
| 321 } |
| 322 |
| 323 void store(uint16_t vals[8]) const { vst1q_u16(vals, fVec); } |
| 324 |
| 325 SkNi operator + (const SkNi& o) const { return vaddq_u16(fVec, o.fVec); } |
| 326 SkNi operator - (const SkNi& o) const { return vsubq_u16(fVec, o.fVec); } |
| 327 SkNi operator * (const SkNi& o) const { return vmulq_u16(fVec, o.fVec); } |
| 328 |
| 329 SkNi operator << (int bits) const { SHIFT16(vshlq_n_u16, fVec, bits); } |
| 330 SkNi operator >> (int bits) const { SHIFT16(vshrq_n_u16, fVec, bits); } |
| 331 |
| 332 template <int k> uint16_t kth() const { |
| 333 SkASSERT(0 <= k && k < 8); |
| 334 return vgetq_lane_u16(fVec, k&7); |
| 335 } |
| 336 |
| 337 uint16x8_t fVec; |
| 338 }; |
| 339 |
| 340 template <> |
| 341 class SkNi<16, uint8_t> { |
| 342 public: |
| 343 SkNi(const uint8x16_t& vec) : fVec(vec) {} |
| 344 |
| 345 SkNi() {} |
| 346 explicit SkNi(uint8_t val) : fVec(vdupq_n_u8(val)) {} |
| 347 static SkNi Load(const uint8_t vals[16]) { return vld1q_u8(vals); } |
| 348 |
| 349 SkNi(uint8_t a, uint8_t b, uint8_t c, uint8_t d, |
| 350 uint8_t e, uint8_t f, uint8_t g, uint8_t h, |
| 351 uint8_t i, uint8_t j, uint8_t k, uint8_t l, |
| 352 uint8_t m, uint8_t n, uint8_t o, uint8_t p) { |
| 353 fVec = (uint8x16_t) { a,b,c,d, e,f,g,h, i,j,k,l, m,n,o,p }; |
| 354 } |
| 355 |
| 356 void store(uint8_t vals[16]) const { vst1q_u8(vals, fVec); } |
| 357 |
| 358 SkNi operator + (const SkNi& o) const { return vaddq_u8(fVec, o.fVec); } |
| 359 SkNi operator - (const SkNi& o) const { return vsubq_u8(fVec, o.fVec); } |
| 360 SkNi operator * (const SkNi& o) const { return vmulq_u8(fVec, o.fVec); } |
| 361 |
| 362 SkNi operator << (int bits) const { SHIFT8(vshlq_n_u8, fVec, bits); } |
| 363 SkNi operator >> (int bits) const { SHIFT8(vshrq_n_u8, fVec, bits); } |
| 364 |
| 365 template <int k> uint8_t kth() const { |
| 366 SkASSERT(0 <= k && k < 15); |
| 367 return vgetq_lane_u8(fVec, k&16); |
| 368 } |
| 369 |
| 370 uint8x16_t fVec; |
| 371 }; |
| 372 |
| 373 #undef SHIFT32 |
| 374 #undef SHIFT16 |
| 375 #undef SHIFT8 |
| 376 |
| 305 #endif//SkNx_neon_DEFINED | 377 #endif//SkNx_neon_DEFINED |
| OLD | NEW |