| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef SkNx_neon_DEFINED | 8 #ifndef SkNx_neon_DEFINED |
| 9 #define SkNx_neon_DEFINED | 9 #define SkNx_neon_DEFINED |
| 10 | 10 |
| 11 #include <arm_neon.h> | 11 #include <arm_neon.h> |
| 12 | 12 |
| 13 template <> | 13 template <> |
| 14 class SkNi<2, int32_t> { | 14 class SkNi<2, int32_t> { |
| 15 public: | 15 public: |
| 16 SkNi(int32x2_t vec) : fVec(vec) {} | 16 SkNi(int32x2_t vec) : fVec(vec) {} |
| 17 | 17 |
| 18 SkNi() {} | 18 SkNi() {} |
| 19 bool allTrue() const { return fVec[0] && fVec[1]; } | 19 bool allTrue() const { return vget_lane_s32(fVec, 0) && vget_lane_s32(fVec,
1); } |
| 20 bool anyTrue() const { return fVec[0] || fVec[1]; } | 20 bool anyTrue() const { return vget_lane_s32(fVec, 0) || vget_lane_s32(fVec,
1); } |
| 21 private: | 21 private: |
| 22 int32x2_t fVec; | 22 int32x2_t fVec; |
| 23 }; | 23 }; |
| 24 | 24 |
| 25 template <> | 25 template <> |
| 26 class SkNi<4, int32_t> { | 26 class SkNi<4, int32_t> { |
| 27 public: | 27 public: |
| 28 SkNi(int32x4_t vec) : fVec(vec) {} | 28 SkNi(int32x4_t vec) : fVec(vec) {} |
| 29 | 29 |
| 30 SkNi() {} | 30 SkNi() {} |
| 31 bool allTrue() const { return fVec[0] && fVec[1] && fVec[2] && fVec[3]; } | 31 bool allTrue() const { return vgetq_lane_s32(fVec, 0) && vgetq_lane_s32(fVec
, 1) |
| 32 bool anyTrue() const { return fVec[0] || fVec[1] || fVec[2] || fVec[3]; } | 32 && vgetq_lane_s32(fVec, 2) && vgetq_lane_s32(fVec
, 3); } |
| 33 bool anyTrue() const { return vgetq_lane_s32(fVec, 0) || vgetq_lane_s32(fVec
, 1) |
| 34 || vgetq_lane_s32(fVec, 2) || vgetq_lane_s32(fVec
, 3); } |
| 33 private: | 35 private: |
| 34 int32x4_t fVec; | 36 int32x4_t fVec; |
| 35 }; | 37 }; |
| 36 | 38 |
| 37 template <> | 39 template <> |
| 38 class SkNf<2, float> { | 40 class SkNf<2, float> { |
| 39 typedef SkNi<2, int32_t> Ni; | 41 typedef SkNi<2, int32_t> Ni; |
| 40 public: | 42 public: |
| 41 SkNf(float32x2_t vec) : fVec(vec) {} | 43 SkNf(float32x2_t vec) : fVec(vec) {} |
| 42 | 44 |
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 91 #if defined(SK_CPU_ARM64) | 93 #if defined(SK_CPU_ARM64) |
| 92 return vsqrt_f32(fVec); | 94 return vsqrt_f32(fVec); |
| 93 #else | 95 #else |
| 94 float32x2_t est1 = this->rsqrt().fVec, | 96 float32x2_t est1 = this->rsqrt().fVec, |
| 95 // An extra step of Newton's method to refine the estimate of 1/sqrt(thi
s). | 97 // An extra step of Newton's method to refine the estimate of 1/sqrt(thi
s). |
| 96 est2 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est1, est1)), est
1); | 98 est2 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est1, est1)), est
1); |
| 97 return vmul_f32(fVec, est2); | 99 return vmul_f32(fVec, est2); |
| 98 #endif | 100 #endif |
| 99 } | 101 } |
| 100 | 102 |
| 101 float operator[] (int k) const { | 103 template <int k> float kth() const { |
| 102 SkASSERT(0 <= k && k < 2); | 104 SkASSERT(0 <= k && k < 2); |
| 103 return fVec[k]; | 105 return vget_lane_f32(fVec, k&1); |
| 104 } | 106 } |
| 105 | 107 |
| 106 private: | 108 private: |
| 107 float32x2_t fVec; | 109 float32x2_t fVec; |
| 108 }; | 110 }; |
| 109 | 111 |
| 110 #if defined(SK_CPU_ARM64) | 112 #if defined(SK_CPU_ARM64) |
| 111 template <> | 113 template <> |
| 112 class SkNi<2, int64_t> { | 114 class SkNi<2, int64_t> { |
| 113 public: | 115 public: |
| 114 SkNi(int64x2_t vec) : fVec(vec) {} | 116 SkNi(int64x2_t vec) : fVec(vec) {} |
| 115 | 117 |
| 116 SkNi() {} | 118 SkNi() {} |
| 117 bool allTrue() const { return fVec[0] && fVec[1]; } | 119 bool allTrue() const { return vgetq_lane_s64(fVec, 0) && vgetq_lane_s64(fVec
, 1); } |
| 118 bool anyTrue() const { return fVec[0] || fVec[1]; } | 120 bool anyTrue() const { return vgetq_lane_s64(fVec, 0) || vgetq_lane_s64(fVec
, 1); } |
| 119 private: | 121 private: |
| 120 int64x2_t fVec; | 122 int64x2_t fVec; |
| 121 }; | 123 }; |
| 122 | 124 |
| 123 template <> | 125 template <> |
| 124 class SkNf<2, double> { | 126 class SkNf<2, double> { |
| 125 typedef SkNi<2, int64_t> Ni; | 127 typedef SkNi<2, int64_t> Ni; |
| 126 public: | 128 public: |
| 127 SkNf(float64x2_t vec) : fVec(vec) {} | 129 SkNf(float64x2_t vec) : fVec(vec) {} |
| 128 | 130 |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 163 return est1; | 165 return est1; |
| 164 } | 166 } |
| 165 | 167 |
| 166 SkNf invert() const { | 168 SkNf invert() const { |
| 167 float64x2_t est1 = this->approxInvert().fVec, | 169 float64x2_t est1 = this->approxInvert().fVec, |
| 168 est2 = vmulq_f64(vrecpsq_f64(est1, fVec), est1), | 170 est2 = vmulq_f64(vrecpsq_f64(est1, fVec), est1), |
| 169 est3 = vmulq_f64(vrecpsq_f64(est2, fVec), est2); | 171 est3 = vmulq_f64(vrecpsq_f64(est2, fVec), est2); |
| 170 return est3; | 172 return est3; |
| 171 } | 173 } |
| 172 | 174 |
| 173 double operator[] (int k) const { | 175 template <int k> double kth() const { |
| 174 SkASSERT(0 <= k && k < 2); | 176 SkASSERT(0 <= k && k < 2); |
| 175 return fVec[k]; | 177 return vgetq_lane_f64(fVec, k&1); |
| 176 } | 178 } |
| 177 | 179 |
| 178 private: | 180 private: |
| 179 float64x2_t fVec; | 181 float64x2_t fVec; |
| 180 }; | 182 }; |
| 181 #endif//defined(SK_CPU_ARM64) | 183 #endif//defined(SK_CPU_ARM64) |
| 182 | 184 |
| 183 template <> | 185 template <> |
| 184 class SkNf<4, float> { | 186 class SkNf<4, float> { |
| 185 typedef SkNi<4, int32_t> Ni; | 187 typedef SkNi<4, int32_t> Ni; |
| (...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 238 #if defined(SK_CPU_ARM64) | 240 #if defined(SK_CPU_ARM64) |
| 239 return vsqrtq_f32(fVec); | 241 return vsqrtq_f32(fVec); |
| 240 #else | 242 #else |
| 241 float32x4_t est1 = this->rsqrt().fVec, | 243 float32x4_t est1 = this->rsqrt().fVec, |
| 242 // An extra step of Newton's method to refine the estimate of 1/sqrt(thi
s). | 244 // An extra step of Newton's method to refine the estimate of 1/sqrt(thi
s). |
| 243 est2 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)),
est1); | 245 est2 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)),
est1); |
| 244 return vmulq_f32(fVec, est2); | 246 return vmulq_f32(fVec, est2); |
| 245 #endif | 247 #endif |
| 246 } | 248 } |
| 247 | 249 |
| 248 float operator[] (int k) const { | 250 template <int k> float kth() const { |
| 249 SkASSERT(0 <= k && k < 4); | 251 SkASSERT(0 <= k && k < 4); |
| 250 return fVec[k]; | 252 return vgetq_lane_f32(fVec, k&3); |
| 251 } | 253 } |
| 252 | 254 |
| 253 private: | 255 private: |
| 254 float32x4_t fVec; | 256 float32x4_t fVec; |
| 255 }; | 257 }; |
| 256 | 258 |
| 257 #endif//SkNx_neon_DEFINED | 259 #endif//SkNx_neon_DEFINED |
| OLD | NEW |