| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef SkNx_neon_DEFINED | 8 #ifndef SkNx_neon_DEFINED |
| 9 #define SkNx_neon_DEFINED | 9 #define SkNx_neon_DEFINED |
| 10 | 10 |
| (...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 74 Nb operator == (const SkNf& o) const { return vceq_f32(fVec, o.fVec); } | 74 Nb operator == (const SkNf& o) const { return vceq_f32(fVec, o.fVec); } |
| 75 Nb operator < (const SkNf& o) const { return vclt_f32(fVec, o.fVec); } | 75 Nb operator < (const SkNf& o) const { return vclt_f32(fVec, o.fVec); } |
| 76 Nb operator > (const SkNf& o) const { return vcgt_f32(fVec, o.fVec); } | 76 Nb operator > (const SkNf& o) const { return vcgt_f32(fVec, o.fVec); } |
| 77 Nb operator <= (const SkNf& o) const { return vcle_f32(fVec, o.fVec); } | 77 Nb operator <= (const SkNf& o) const { return vcle_f32(fVec, o.fVec); } |
| 78 Nb operator >= (const SkNf& o) const { return vcge_f32(fVec, o.fVec); } | 78 Nb operator >= (const SkNf& o) const { return vcge_f32(fVec, o.fVec); } |
| 79 Nb operator != (const SkNf& o) const { return vmvn_u32(vceq_f32(fVec, o.fVec
)); } | 79 Nb operator != (const SkNf& o) const { return vmvn_u32(vceq_f32(fVec, o.fVec
)); } |
| 80 | 80 |
| 81 static SkNf Min(const SkNf& l, const SkNf& r) { return vmin_f32(l.fVec, r.fV
ec); } | 81 static SkNf Min(const SkNf& l, const SkNf& r) { return vmin_f32(l.fVec, r.fV
ec); } |
| 82 static SkNf Max(const SkNf& l, const SkNf& r) { return vmax_f32(l.fVec, r.fV
ec); } | 82 static SkNf Max(const SkNf& l, const SkNf& r) { return vmax_f32(l.fVec, r.fV
ec); } |
| 83 | 83 |
| 84 SkNf rsqrt() const { | 84 SkNf rsqrt0() const { return vrsqrte_f32(fVec); } |
| 85 float32x2_t est0 = vrsqrte_f32(fVec), | 85 SkNf rsqrt1() const { |
| 86 est1 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est
0); | 86 float32x2_t est0 = this->rsqrt0().fVec; |
| 87 return est1; | 87 return vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est0); |
| 88 } |
| 89 SkNf rsqrt2() const { |
| 90 float32x2_t est1 = this->rsqrt1().fVec; |
| 91 return vmul_f32(vrsqrts_f32(fVec, vmul_f32(est1, est1)), est1); |
| 88 } | 92 } |
| 89 | 93 |
| 90 SkNf sqrt() const { | 94 SkNf sqrt() const { |
| 91 #if defined(SK_CPU_ARM64) | 95 #if defined(SK_CPU_ARM64) |
| 92 return vsqrt_f32(fVec); | 96 return vsqrt_f32(fVec); |
| 93 #else | 97 #else |
| 94 float32x2_t est1 = this->rsqrt().fVec, | 98 return *this * this->rsqrt2(); |
| 95 // An extra step of Newton's method to refine the estimate of 1/sqrt(thi
s). | |
| 96 est2 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est1, est1)), est
1); | |
| 97 return vmul_f32(fVec, est2); | |
| 98 #endif | 99 #endif |
| 99 } | 100 } |
| 100 | 101 |
| 101 template <int k> float kth() const { | 102 template <int k> float kth() const { |
| 102 SkASSERT(0 <= k && k < 2); | 103 SkASSERT(0 <= k && k < 2); |
| 103 return vget_lane_f32(fVec, k&1); | 104 return vget_lane_f32(fVec, k&1); |
| 104 } | 105 } |
| 105 | 106 |
| 106 private: | 107 private: |
| 107 float32x2_t fVec; | 108 float32x2_t fVec; |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 144 Nb operator <= (const SkNf& o) const { return vcleq_f64(fVec, o.fVec); } | 145 Nb operator <= (const SkNf& o) const { return vcleq_f64(fVec, o.fVec); } |
| 145 Nb operator >= (const SkNf& o) const { return vcgeq_f64(fVec, o.fVec); } | 146 Nb operator >= (const SkNf& o) const { return vcgeq_f64(fVec, o.fVec); } |
| 146 Nb operator != (const SkNf& o) const { | 147 Nb operator != (const SkNf& o) const { |
| 147 return vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(f
Vec, o.fVec)))); | 148 return vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(f
Vec, o.fVec)))); |
| 148 } | 149 } |
| 149 | 150 |
| 150 static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f64(l.fVec, r.f
Vec); } | 151 static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f64(l.fVec, r.f
Vec); } |
| 151 static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f64(l.fVec, r.f
Vec); } | 152 static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f64(l.fVec, r.f
Vec); } |
| 152 | 153 |
| 153 SkNf sqrt() const { return vsqrtq_f64(fVec); } | 154 SkNf sqrt() const { return vsqrtq_f64(fVec); } |
| 154 SkNf rsqrt() const { | 155 |
| 155 float64x2_t est0 = vrsqrteq_f64(fVec), | 156 SkNf rsqrt0() const { return vrsqrteq_f64(fVec); } |
| 156 est1 = vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est0, est0)),
est0); | 157 SkNf rsqrt1() const { |
| 157 return est1; | 158 float64x2_t est0 = this->rsqrt0().fVec; |
| 159 return vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est0, est0)), est0); |
| 160 } |
| 161 SkNf rsqrt2() const { |
| 162 float64x2_t est1 = this->rsqrt1().fVec; |
| 163 return vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est1, est1)), est1); |
| 158 } | 164 } |
| 159 | 165 |
| 160 SkNf approxInvert() const { | 166 SkNf approxInvert() const { |
| 161 float64x2_t est0 = vrecpeq_f64(fVec), | 167 float64x2_t est0 = vrecpeq_f64(fVec), |
| 162 est1 = vmulq_f64(vrecpsq_f64(est0, fVec), est0); | 168 est1 = vmulq_f64(vrecpsq_f64(est0, fVec), est0); |
| 163 return est1; | 169 return est1; |
| 164 } | 170 } |
| 165 | 171 |
| 166 SkNf invert() const { | 172 SkNf invert() const { |
| 167 float64x2_t est1 = this->approxInvert().fVec, | 173 float64x2_t est1 = this->approxInvert().fVec, |
| (...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 262 Nb operator == (const SkNf& o) const { return vceqq_f32(fVec, o.fVec); } | 268 Nb operator == (const SkNf& o) const { return vceqq_f32(fVec, o.fVec); } |
| 263 Nb operator < (const SkNf& o) const { return vcltq_f32(fVec, o.fVec); } | 269 Nb operator < (const SkNf& o) const { return vcltq_f32(fVec, o.fVec); } |
| 264 Nb operator > (const SkNf& o) const { return vcgtq_f32(fVec, o.fVec); } | 270 Nb operator > (const SkNf& o) const { return vcgtq_f32(fVec, o.fVec); } |
| 265 Nb operator <= (const SkNf& o) const { return vcleq_f32(fVec, o.fVec); } | 271 Nb operator <= (const SkNf& o) const { return vcleq_f32(fVec, o.fVec); } |
| 266 Nb operator >= (const SkNf& o) const { return vcgeq_f32(fVec, o.fVec); } | 272 Nb operator >= (const SkNf& o) const { return vcgeq_f32(fVec, o.fVec); } |
| 267 Nb operator != (const SkNf& o) const { return vmvnq_u32(vceqq_f32(fVec, o.fV
ec)); } | 273 Nb operator != (const SkNf& o) const { return vmvnq_u32(vceqq_f32(fVec, o.fV
ec)); } |
| 268 | 274 |
| 269 static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f32(l.fVec, r.f
Vec); } | 275 static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f32(l.fVec, r.f
Vec); } |
| 270 static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f32(l.fVec, r.f
Vec); } | 276 static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f32(l.fVec, r.f
Vec); } |
| 271 | 277 |
| 272 SkNf rsqrt() const { | 278 SkNf rsqrt0() const { return vrsqrteq_f32(fVec); } |
| 273 float32x4_t est0 = vrsqrteq_f32(fVec), | 279 SkNf rsqrt1() const { |
| 274 est1 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)),
est0); | 280 float32x4_t est0 = this->rsqrt0().fVec; |
| 275 return est1; | 281 return vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0); |
| 282 } |
| 283 SkNf rsqrt2() const { |
| 284 float32x4_t est1 = this->rsqrt1().fVec; |
| 285 return vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1); |
| 276 } | 286 } |
| 277 | 287 |
| 278 SkNf sqrt() const { | 288 SkNf sqrt() const { |
| 279 #if defined(SK_CPU_ARM64) | 289 #if defined(SK_CPU_ARM64) |
| 280 return vsqrtq_f32(fVec); | 290 return vsqrtq_f32(fVec); |
| 281 #else | 291 #else |
| 282 float32x4_t est1 = this->rsqrt().fVec, | 292 return *this * this->rsqrt2(); |
| 283 // An extra step of Newton's method to refine the estimate of 1/sqrt(thi
s). | |
| 284 est2 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)),
est1); | |
| 285 return vmulq_f32(fVec, est2); | |
| 286 #endif | 293 #endif |
| 287 } | 294 } |
| 288 | 295 |
| 289 template <int k> float kth() const { | 296 template <int k> float kth() const { |
| 290 SkASSERT(0 <= k && k < 4); | 297 SkASSERT(0 <= k && k < 4); |
| 291 return vgetq_lane_f32(fVec, k&3); | 298 return vgetq_lane_f32(fVec, k&3); |
| 292 } | 299 } |
| 293 | 300 |
| 294 protected: | 301 protected: |
| 295 float32x4_t fVec; | 302 float32x4_t fVec; |
| 296 }; | 303 }; |
| 297 | 304 |
| 298 #endif//SkNx_neon_DEFINED | 305 #endif//SkNx_neon_DEFINED |
| OLD | NEW |