Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1880)

Unified Diff: src/opts/SkNx_neon.h

Issue 1109913002: Split rsqrt into rsqrt{0,1,2}, with increasing cost and precision on ARM (Closed) Base URL: https://skia.googlesource.com/skia@master
Patch Set: arm64 typos Created 5 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/effects/gradients/SkRadialGradient.cpp ('k') | src/opts/SkNx_sse.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/opts/SkNx_neon.h
diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h
index 6b216827a8f007449258d18edf4d85811ac1bc96..f1deabc5febacb4d2f688c9dae61df581f2cfc2e 100644
--- a/src/opts/SkNx_neon.h
+++ b/src/opts/SkNx_neon.h
@@ -81,20 +81,21 @@ public:
static SkNf Min(const SkNf& l, const SkNf& r) { return vmin_f32(l.fVec, r.fVec); }
static SkNf Max(const SkNf& l, const SkNf& r) { return vmax_f32(l.fVec, r.fVec); }
- SkNf rsqrt() const {
- float32x2_t est0 = vrsqrte_f32(fVec),
- est1 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est0);
- return est1;
+ SkNf rsqrt0() const { return vrsqrte_f32(fVec); }
+ SkNf rsqrt1() const {
+ float32x2_t est0 = this->rsqrt0().fVec;
+ return vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est0);
+ }
+ SkNf rsqrt2() const {
+ float32x2_t est1 = this->rsqrt1().fVec;
+ return vmul_f32(vrsqrts_f32(fVec, vmul_f32(est1, est1)), est1);
}
SkNf sqrt() const {
#if defined(SK_CPU_ARM64)
return vsqrt_f32(fVec);
#else
- float32x2_t est1 = this->rsqrt().fVec,
- // An extra step of Newton's method to refine the estimate of 1/sqrt(this).
- est2 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est1, est1)), est1);
- return vmul_f32(fVec, est2);
+ return *this * this->rsqrt2();
#endif
}
@@ -151,10 +152,15 @@ public:
static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f64(l.fVec, r.fVec); }
SkNf sqrt() const { return vsqrtq_f64(fVec); }
- SkNf rsqrt() const {
- float64x2_t est0 = vrsqrteq_f64(fVec),
- est1 = vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est0, est0)), est0);
- return est1;
+
+ SkNf rsqrt0() const { return vrsqrteq_f64(fVec); }
+ SkNf rsqrt1() const {
+ float64x2_t est0 = this->rsqrt0().fVec;
+ return vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est0, est0)), est0);
+ }
+ SkNf rsqrt2() const {
+ float64x2_t est1 = this->rsqrt1().fVec;
+ return vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est1, est1)), est1);
}
SkNf approxInvert() const {
@@ -269,20 +275,21 @@ public:
static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f32(l.fVec, r.fVec); }
static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f32(l.fVec, r.fVec); }
- SkNf rsqrt() const {
- float32x4_t est0 = vrsqrteq_f32(fVec),
- est1 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0);
- return est1;
+ SkNf rsqrt0() const { return vrsqrteq_f32(fVec); }
+ SkNf rsqrt1() const {
+ float32x4_t est0 = this->rsqrt0().fVec;
+ return vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0);
+ }
+ SkNf rsqrt2() const {
+ float32x4_t est1 = this->rsqrt1().fVec;
+ return vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1);
}
SkNf sqrt() const {
#if defined(SK_CPU_ARM64)
return vsqrtq_f32(fVec);
#else
- float32x4_t est1 = this->rsqrt().fVec,
- // An extra step of Newton's method to refine the estimate of 1/sqrt(this).
- est2 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1);
- return vmulq_f32(fVec, est2);
+ return *this * this->rsqrt2();
#endif
}
« no previous file with comments | « src/effects/gradients/SkRadialGradient.cpp ('k') | src/opts/SkNx_sse.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698