| Index: src/opts/Sk4x_neon.h
 | 
| diff --git a/src/opts/Sk4x_neon.h b/src/opts/Sk4x_neon.h
 | 
| index 2851fb31a4e7525f6885d89a2410fde3fa05018d..3f35fe785b9af2c39ae9ca948e5cacee698ca41f 100644
 | 
| --- a/src/opts/Sk4x_neon.h
 | 
| +++ b/src/opts/Sk4x_neon.h
 | 
| @@ -82,13 +82,17 @@ M(Sk4f) divide  (const Sk4f& o) const {
 | 
|  }
 | 
|  
 | 
|  M(Sk4f) rsqrt() const {
 | 
| -    float32x4_t est0 = vrsqrteq_f32(fVec);
 | 
| -    float32x4_t est1 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0);
 | 
| -    float32x4_t est2 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1);
 | 
| -    return est2;
 | 
| +    float32x4_t est0 = vrsqrteq_f32(fVec),
 | 
| +                est1 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0);
 | 
| +    return est1;
 | 
|  }
 | 
|  
 | 
| -M(Sk4f)  sqrt() const { return this->multiply(this->rsqrt()); }
 | 
| +M(Sk4f)  sqrt() const {
 | 
| +    float32x4_t est1 = this->rsqrt().fVec,
 | 
| +    // An extra step of Newton's method to refine the estimate of 1/sqrt(this).
 | 
| +                est2 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1);
 | 
| +    return vmulq_f32(fVec, est2);
 | 
| +}
 | 
|  
 | 
|  M(Sk4i) equal           (const Sk4f& o) const { return vreinterpretq_s32_u32(vceqq_f32(fVec, o.fVec)); }
 | 
|  M(Sk4i) notEqual        (const Sk4f& o) const { return vreinterpretq_s32_u32(vmvnq_u32(vceqq_f32(fVec, o.fVec))); }
 | 
| 
 |