| Index: src/opts/SkNx_neon.h
|
| diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h
|
| index 6b216827a8f007449258d18edf4d85811ac1bc96..f1deabc5febacb4d2f688c9dae61df581f2cfc2e 100644
|
| --- a/src/opts/SkNx_neon.h
|
| +++ b/src/opts/SkNx_neon.h
|
| @@ -81,20 +81,21 @@ public:
|
| static SkNf Min(const SkNf& l, const SkNf& r) { return vmin_f32(l.fVec, r.fVec); }
|
| static SkNf Max(const SkNf& l, const SkNf& r) { return vmax_f32(l.fVec, r.fVec); }
|
|
|
| - SkNf rsqrt() const {
|
| - float32x2_t est0 = vrsqrte_f32(fVec),
|
| - est1 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est0);
|
| - return est1;
|
| + SkNf rsqrt0() const { return vrsqrte_f32(fVec); }
|
| + SkNf rsqrt1() const {
|
| + float32x2_t est0 = this->rsqrt0().fVec;
|
| + return vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est0);
|
| + }
|
| + SkNf rsqrt2() const {
|
| + float32x2_t est1 = this->rsqrt1().fVec;
|
| + return vmul_f32(vrsqrts_f32(fVec, vmul_f32(est1, est1)), est1);
|
| }
|
|
|
| SkNf sqrt() const {
|
| #if defined(SK_CPU_ARM64)
|
| return vsqrt_f32(fVec);
|
| #else
|
| - float32x2_t est1 = this->rsqrt().fVec,
|
| - // An extra step of Newton's method to refine the estimate of 1/sqrt(this).
|
| - est2 = vmul_f32(vrsqrts_f32(fVec, vmul_f32(est1, est1)), est1);
|
| - return vmul_f32(fVec, est2);
|
| + return *this * this->rsqrt2();
|
| #endif
|
| }
|
|
|
| @@ -151,10 +152,15 @@ public:
|
| static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f64(l.fVec, r.fVec); }
|
|
|
| SkNf sqrt() const { return vsqrtq_f64(fVec); }
|
| - SkNf rsqrt() const {
|
| - float64x2_t est0 = vrsqrteq_f64(fVec),
|
| - est1 = vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est0, est0)), est0);
|
| - return est1;
|
| +
|
| + SkNf rsqrt0() const { return vrsqrteq_f64(fVec); }
|
| + SkNf rsqrt1() const {
|
| + float64x2_t est0 = this->rsqrt0().fVec;
|
| + return vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est0, est0)), est0);
|
| + }
|
| + SkNf rsqrt2() const {
|
| + float64x2_t est1 = this->rsqrt1().fVec;
|
| + return vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est1, est1)), est1);
|
| }
|
|
|
| SkNf approxInvert() const {
|
| @@ -269,20 +275,21 @@ public:
|
| static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f32(l.fVec, r.fVec); }
|
| static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f32(l.fVec, r.fVec); }
|
|
|
| - SkNf rsqrt() const {
|
| - float32x4_t est0 = vrsqrteq_f32(fVec),
|
| - est1 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0);
|
| - return est1;
|
| + SkNf rsqrt0() const { return vrsqrteq_f32(fVec); }
|
| + SkNf rsqrt1() const {
|
| + float32x4_t est0 = this->rsqrt0().fVec;
|
| + return vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0);
|
| + }
|
| + SkNf rsqrt2() const {
|
| + float32x4_t est1 = this->rsqrt1().fVec;
|
| + return vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1);
|
| }
|
|
|
| SkNf sqrt() const {
|
| #if defined(SK_CPU_ARM64)
|
| return vsqrtq_f32(fVec);
|
| #else
|
| - float32x4_t est1 = this->rsqrt().fVec,
|
| - // An extra step of Newton's method to refine the estimate of 1/sqrt(this).
|
| - est2 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1);
|
| - return vmulq_f32(fVec, est2);
|
| + return *this * this->rsqrt2();
|
| #endif
|
| }
|
|
|
|
|