Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(5)

Side by Side Diff: src/opts/SkNx_neon.h

Issue 1109913002: Split rsqrt into rsqrt{0,1,2}, with increasing cost and precision on ARM (Closed) Base URL: https://skia.googlesource.com/skia@master
Patch Set: arm64 typos Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/effects/gradients/SkRadialGradient.cpp ('k') | src/opts/SkNx_sse.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkNx_neon_DEFINED 8 #ifndef SkNx_neon_DEFINED
9 #define SkNx_neon_DEFINED 9 #define SkNx_neon_DEFINED
10 10
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
74 Nb operator == (const SkNf& o) const { return vceq_f32(fVec, o.fVec); } 74 Nb operator == (const SkNf& o) const { return vceq_f32(fVec, o.fVec); }
75 Nb operator < (const SkNf& o) const { return vclt_f32(fVec, o.fVec); } 75 Nb operator < (const SkNf& o) const { return vclt_f32(fVec, o.fVec); }
76 Nb operator > (const SkNf& o) const { return vcgt_f32(fVec, o.fVec); } 76 Nb operator > (const SkNf& o) const { return vcgt_f32(fVec, o.fVec); }
77 Nb operator <= (const SkNf& o) const { return vcle_f32(fVec, o.fVec); } 77 Nb operator <= (const SkNf& o) const { return vcle_f32(fVec, o.fVec); }
78 Nb operator >= (const SkNf& o) const { return vcge_f32(fVec, o.fVec); } 78 Nb operator >= (const SkNf& o) const { return vcge_f32(fVec, o.fVec); }
79 Nb operator != (const SkNf& o) const { return vmvn_u32(vceq_f32(fVec, o.fVec )); } 79 Nb operator != (const SkNf& o) const { return vmvn_u32(vceq_f32(fVec, o.fVec )); }
80 80
81 static SkNf Min(const SkNf& l, const SkNf& r) { return vmin_f32(l.fVec, r.fV ec); } 81 static SkNf Min(const SkNf& l, const SkNf& r) { return vmin_f32(l.fVec, r.fV ec); }
82 static SkNf Max(const SkNf& l, const SkNf& r) { return vmax_f32(l.fVec, r.fV ec); } 82 static SkNf Max(const SkNf& l, const SkNf& r) { return vmax_f32(l.fVec, r.fV ec); }
83 83
84 SkNf rsqrt() const { 84 SkNf rsqrt0() const { return vrsqrte_f32(fVec); }
85 float32x2_t est0 = vrsqrte_f32(fVec), 85 SkNf rsqrt1() const {
86 est1 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est 0); 86 float32x2_t est0 = this->rsqrt0().fVec;
87 return est1; 87 return vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est0);
88 }
89 SkNf rsqrt2() const {
90 float32x2_t est1 = this->rsqrt1().fVec;
91 return vmul_f32(vrsqrts_f32(fVec, vmul_f32(est1, est1)), est1);
88 } 92 }
89 93
90 SkNf sqrt() const { 94 SkNf sqrt() const {
91 #if defined(SK_CPU_ARM64) 95 #if defined(SK_CPU_ARM64)
92 return vsqrt_f32(fVec); 96 return vsqrt_f32(fVec);
93 #else 97 #else
94 float32x2_t est1 = this->rsqrt().fVec, 98 return *this * this->rsqrt2();
95 // An extra step of Newton's method to refine the estimate of 1/sqrt(thi s).
96 est2 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est1, est1)), est 1);
97 return vmul_f32(fVec, est2);
98 #endif 99 #endif
99 } 100 }
100 101
101 template <int k> float kth() const { 102 template <int k> float kth() const {
102 SkASSERT(0 <= k && k < 2); 103 SkASSERT(0 <= k && k < 2);
103 return vget_lane_f32(fVec, k&1); 104 return vget_lane_f32(fVec, k&1);
104 } 105 }
105 106
106 private: 107 private:
107 float32x2_t fVec; 108 float32x2_t fVec;
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
144 Nb operator <= (const SkNf& o) const { return vcleq_f64(fVec, o.fVec); } 145 Nb operator <= (const SkNf& o) const { return vcleq_f64(fVec, o.fVec); }
145 Nb operator >= (const SkNf& o) const { return vcgeq_f64(fVec, o.fVec); } 146 Nb operator >= (const SkNf& o) const { return vcgeq_f64(fVec, o.fVec); }
146 Nb operator != (const SkNf& o) const { 147 Nb operator != (const SkNf& o) const {
147 return vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(f Vec, o.fVec)))); 148 return vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(f Vec, o.fVec))));
148 } 149 }
149 150
150 static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f64(l.fVec, r.f Vec); } 151 static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f64(l.fVec, r.f Vec); }
151 static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f64(l.fVec, r.f Vec); } 152 static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f64(l.fVec, r.f Vec); }
152 153
153 SkNf sqrt() const { return vsqrtq_f64(fVec); } 154 SkNf sqrt() const { return vsqrtq_f64(fVec); }
154 SkNf rsqrt() const { 155
155 float64x2_t est0 = vrsqrteq_f64(fVec), 156 SkNf rsqrt0() const { return vrsqrteq_f64(fVec); }
156 est1 = vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est0, est0)), est0); 157 SkNf rsqrt1() const {
157 return est1; 158 float64x2_t est0 = this->rsqrt0().fVec;
159 return vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est0, est0)), est0);
160 }
161 SkNf rsqrt2() const {
162 float64x2_t est1 = this->rsqrt1().fVec;
163 return vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est1, est1)), est1);
158 } 164 }
159 165
160 SkNf approxInvert() const { 166 SkNf approxInvert() const {
161 float64x2_t est0 = vrecpeq_f64(fVec), 167 float64x2_t est0 = vrecpeq_f64(fVec),
162 est1 = vmulq_f64(vrecpsq_f64(est0, fVec), est0); 168 est1 = vmulq_f64(vrecpsq_f64(est0, fVec), est0);
163 return est1; 169 return est1;
164 } 170 }
165 171
166 SkNf invert() const { 172 SkNf invert() const {
167 float64x2_t est1 = this->approxInvert().fVec, 173 float64x2_t est1 = this->approxInvert().fVec,
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after
262 Nb operator == (const SkNf& o) const { return vceqq_f32(fVec, o.fVec); } 268 Nb operator == (const SkNf& o) const { return vceqq_f32(fVec, o.fVec); }
263 Nb operator < (const SkNf& o) const { return vcltq_f32(fVec, o.fVec); } 269 Nb operator < (const SkNf& o) const { return vcltq_f32(fVec, o.fVec); }
264 Nb operator > (const SkNf& o) const { return vcgtq_f32(fVec, o.fVec); } 270 Nb operator > (const SkNf& o) const { return vcgtq_f32(fVec, o.fVec); }
265 Nb operator <= (const SkNf& o) const { return vcleq_f32(fVec, o.fVec); } 271 Nb operator <= (const SkNf& o) const { return vcleq_f32(fVec, o.fVec); }
266 Nb operator >= (const SkNf& o) const { return vcgeq_f32(fVec, o.fVec); } 272 Nb operator >= (const SkNf& o) const { return vcgeq_f32(fVec, o.fVec); }
267 Nb operator != (const SkNf& o) const { return vmvnq_u32(vceqq_f32(fVec, o.fV ec)); } 273 Nb operator != (const SkNf& o) const { return vmvnq_u32(vceqq_f32(fVec, o.fV ec)); }
268 274
269 static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f32(l.fVec, r.f Vec); } 275 static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f32(l.fVec, r.f Vec); }
270 static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f32(l.fVec, r.f Vec); } 276 static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f32(l.fVec, r.f Vec); }
271 277
272 SkNf rsqrt() const { 278 SkNf rsqrt0() const { return vrsqrteq_f32(fVec); }
273 float32x4_t est0 = vrsqrteq_f32(fVec), 279 SkNf rsqrt1() const {
274 est1 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0); 280 float32x4_t est0 = this->rsqrt0().fVec;
275 return est1; 281 return vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0);
282 }
283 SkNf rsqrt2() const {
284 float32x4_t est1 = this->rsqrt1().fVec;
285 return vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1);
276 } 286 }
277 287
278 SkNf sqrt() const { 288 SkNf sqrt() const {
279 #if defined(SK_CPU_ARM64) 289 #if defined(SK_CPU_ARM64)
280 return vsqrtq_f32(fVec); 290 return vsqrtq_f32(fVec);
281 #else 291 #else
282 float32x4_t est1 = this->rsqrt().fVec, 292 return *this * this->rsqrt2();
283 // An extra step of Newton's method to refine the estimate of 1/sqrt(thi s).
284 est2 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1);
285 return vmulq_f32(fVec, est2);
286 #endif 293 #endif
287 } 294 }
288 295
289 template <int k> float kth() const { 296 template <int k> float kth() const {
290 SkASSERT(0 <= k && k < 4); 297 SkASSERT(0 <= k && k < 4);
291 return vgetq_lane_f32(fVec, k&3); 298 return vgetq_lane_f32(fVec, k&3);
292 } 299 }
293 300
294 protected: 301 protected:
295 float32x4_t fVec; 302 float32x4_t fVec;
296 }; 303 };
297 304
298 #endif//SkNx_neon_DEFINED 305 #endif//SkNx_neon_DEFINED
OLDNEW
« no previous file with comments | « src/effects/gradients/SkRadialGradient.cpp ('k') | src/opts/SkNx_sse.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698