OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkNx_neon_DEFINED | 8 #ifndef SkNx_neon_DEFINED |
9 #define SkNx_neon_DEFINED | 9 #define SkNx_neon_DEFINED |
10 | 10 |
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
74 Nb operator == (const SkNf& o) const { return vceq_f32(fVec, o.fVec); } | 74 Nb operator == (const SkNf& o) const { return vceq_f32(fVec, o.fVec); } |
75 Nb operator < (const SkNf& o) const { return vclt_f32(fVec, o.fVec); } | 75 Nb operator < (const SkNf& o) const { return vclt_f32(fVec, o.fVec); } |
76 Nb operator > (const SkNf& o) const { return vcgt_f32(fVec, o.fVec); } | 76 Nb operator > (const SkNf& o) const { return vcgt_f32(fVec, o.fVec); } |
77 Nb operator <= (const SkNf& o) const { return vcle_f32(fVec, o.fVec); } | 77 Nb operator <= (const SkNf& o) const { return vcle_f32(fVec, o.fVec); } |
78 Nb operator >= (const SkNf& o) const { return vcge_f32(fVec, o.fVec); } | 78 Nb operator >= (const SkNf& o) const { return vcge_f32(fVec, o.fVec); } |
79 Nb operator != (const SkNf& o) const { return vmvn_u32(vceq_f32(fVec, o.fVec
)); } | 79 Nb operator != (const SkNf& o) const { return vmvn_u32(vceq_f32(fVec, o.fVec
)); } |
80 | 80 |
81 static SkNf Min(const SkNf& l, const SkNf& r) { return vmin_f32(l.fVec, r.fV
ec); } | 81 static SkNf Min(const SkNf& l, const SkNf& r) { return vmin_f32(l.fVec, r.fV
ec); } |
82 static SkNf Max(const SkNf& l, const SkNf& r) { return vmax_f32(l.fVec, r.fV
ec); } | 82 static SkNf Max(const SkNf& l, const SkNf& r) { return vmax_f32(l.fVec, r.fV
ec); } |
83 | 83 |
84 SkNf rsqrt0() const { return vrsqrte_f32(fVec); } | 84 SkNf rsqrt() const { |
85 SkNf rsqrt1() const { | 85 float32x2_t est0 = vrsqrte_f32(fVec), |
86 float32x2_t est0 = this->rsqrt0().fVec; | 86 est1 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est
0); |
87 return vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est0); | 87 return est1; |
88 } | |
89 SkNf rsqrt2() const { | |
90 float32x2_t est1 = this->rsqrt1().fVec; | |
91 return vmul_f32(vrsqrts_f32(fVec, vmul_f32(est1, est1)), est1); | |
92 } | 88 } |
93 | 89 |
94 SkNf sqrt() const { | 90 SkNf sqrt() const { |
95 #if defined(SK_CPU_ARM64) | 91 #if defined(SK_CPU_ARM64) |
96 return vsqrt_f32(fVec); | 92 return vsqrt_f32(fVec); |
97 #else | 93 #else |
98 return *this * this->rsqrt2(); | 94 float32x2_t est1 = this->rsqrt().fVec, |
| 95 // An extra step of Newton's method to refine the estimate of 1/sqrt(thi
s). |
| 96 est2 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est1, est1)), est
1); |
| 97 return vmul_f32(fVec, est2); |
99 #endif | 98 #endif |
100 } | 99 } |
101 | 100 |
102 template <int k> float kth() const { | 101 template <int k> float kth() const { |
103 SkASSERT(0 <= k && k < 2); | 102 SkASSERT(0 <= k && k < 2); |
104 return vget_lane_f32(fVec, k&1); | 103 return vget_lane_f32(fVec, k&1); |
105 } | 104 } |
106 | 105 |
107 private: | 106 private: |
108 float32x2_t fVec; | 107 float32x2_t fVec; |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
145 Nb operator <= (const SkNf& o) const { return vcleq_f64(fVec, o.fVec); } | 144 Nb operator <= (const SkNf& o) const { return vcleq_f64(fVec, o.fVec); } |
146 Nb operator >= (const SkNf& o) const { return vcgeq_f64(fVec, o.fVec); } | 145 Nb operator >= (const SkNf& o) const { return vcgeq_f64(fVec, o.fVec); } |
147 Nb operator != (const SkNf& o) const { | 146 Nb operator != (const SkNf& o) const { |
148 return vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(f
Vec, o.fVec)))); | 147 return vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(f
Vec, o.fVec)))); |
149 } | 148 } |
150 | 149 |
151 static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f64(l.fVec, r.f
Vec); } | 150 static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f64(l.fVec, r.f
Vec); } |
152 static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f64(l.fVec, r.f
Vec); } | 151 static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f64(l.fVec, r.f
Vec); } |
153 | 152 |
154 SkNf sqrt() const { return vsqrtq_f64(fVec); } | 153 SkNf sqrt() const { return vsqrtq_f64(fVec); } |
155 | 154 SkNf rsqrt() const { |
156 SkNf rsqrt0() const { return vrsqrteq_f64(fVec); } | 155 float64x2_t est0 = vrsqrteq_f64(fVec), |
157 SkNf rsqrt1() const { | 156 est1 = vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est0, est0)),
est0); |
158 float32x4_t est0 = this->rsqrt0().fVec; | 157 return est1; |
159 return vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est0, est0)), est0); | |
160 } | |
161 SkNf rsqrt2() const { | |
162 float32x4_t est1 = this->rsqrt1().fVec; | |
163 return vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est1, est1)), est1); | |
164 } | 158 } |
165 | 159 |
166 SkNf approxInvert() const { | 160 SkNf approxInvert() const { |
167 float64x2_t est0 = vrecpeq_f64(fVec), | 161 float64x2_t est0 = vrecpeq_f64(fVec), |
168 est1 = vmulq_f64(vrecpsq_f64(est0, fVec), est0); | 162 est1 = vmulq_f64(vrecpsq_f64(est0, fVec), est0); |
169 return est1; | 163 return est1; |
170 } | 164 } |
171 | 165 |
172 SkNf invert() const { | 166 SkNf invert() const { |
173 float64x2_t est1 = this->approxInvert().fVec, | 167 float64x2_t est1 = this->approxInvert().fVec, |
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
268 Nb operator == (const SkNf& o) const { return vceqq_f32(fVec, o.fVec); } | 262 Nb operator == (const SkNf& o) const { return vceqq_f32(fVec, o.fVec); } |
269 Nb operator < (const SkNf& o) const { return vcltq_f32(fVec, o.fVec); } | 263 Nb operator < (const SkNf& o) const { return vcltq_f32(fVec, o.fVec); } |
270 Nb operator > (const SkNf& o) const { return vcgtq_f32(fVec, o.fVec); } | 264 Nb operator > (const SkNf& o) const { return vcgtq_f32(fVec, o.fVec); } |
271 Nb operator <= (const SkNf& o) const { return vcleq_f32(fVec, o.fVec); } | 265 Nb operator <= (const SkNf& o) const { return vcleq_f32(fVec, o.fVec); } |
272 Nb operator >= (const SkNf& o) const { return vcgeq_f32(fVec, o.fVec); } | 266 Nb operator >= (const SkNf& o) const { return vcgeq_f32(fVec, o.fVec); } |
273 Nb operator != (const SkNf& o) const { return vmvnq_u32(vceqq_f32(fVec, o.fV
ec)); } | 267 Nb operator != (const SkNf& o) const { return vmvnq_u32(vceqq_f32(fVec, o.fV
ec)); } |
274 | 268 |
275 static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f32(l.fVec, r.f
Vec); } | 269 static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f32(l.fVec, r.f
Vec); } |
276 static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f32(l.fVec, r.f
Vec); } | 270 static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f32(l.fVec, r.f
Vec); } |
277 | 271 |
278 SkNf rsqrt0() const { return vrsqrteq_f32(fVec); } | 272 SkNf rsqrt() const { |
279 SkNf rsqrt1() const { | 273 float32x4_t est0 = vrsqrteq_f32(fVec), |
280 float32x4_t est0 = this->rsqrt0().fVec; | 274 est1 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)),
est0); |
281 return vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0); | 275 return est1; |
282 } | |
283 SkNf rsqrt2() const { | |
284 float32x4_t est1 = this->rsqrt1().fVec; | |
285 return vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1); | |
286 } | 276 } |
287 | 277 |
288 SkNf sqrt() const { | 278 SkNf sqrt() const { |
289 #if defined(SK_CPU_ARM64) | 279 #if defined(SK_CPU_ARM64) |
290 return vsqrtq_f32(fVec); | 280 return vsqrtq_f32(fVec); |
291 #else | 281 #else |
292 return *this * this->rsqrt2(); | 282 float32x4_t est1 = this->rsqrt().fVec, |
| 283 // An extra step of Newton's method to refine the estimate of 1/sqrt(thi
s). |
| 284 est2 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)),
est1); |
| 285 return vmulq_f32(fVec, est2); |
293 #endif | 286 #endif |
294 } | 287 } |
295 | 288 |
296 template <int k> float kth() const { | 289 template <int k> float kth() const { |
297 SkASSERT(0 <= k && k < 4); | 290 SkASSERT(0 <= k && k < 4); |
298 return vgetq_lane_f32(fVec, k&3); | 291 return vgetq_lane_f32(fVec, k&3); |
299 } | 292 } |
300 | 293 |
301 protected: | 294 protected: |
302 float32x4_t fVec; | 295 float32x4_t fVec; |
303 }; | 296 }; |
304 | 297 |
305 #endif//SkNx_neon_DEFINED | 298 #endif//SkNx_neon_DEFINED |
OLD | NEW |