| Index: include/core/SkFloatingPoint.h
 | 
| diff --git a/include/core/SkFloatingPoint.h b/include/core/SkFloatingPoint.h
 | 
| index 44a3eef98dc8c0272a324494e4ea0153dd4f7972..7dfa9d8680e82a025eb869d0b0ed7af13e6165ec 100644
 | 
| --- a/include/core/SkFloatingPoint.h
 | 
| +++ b/include/core/SkFloatingPoint.h
 | 
| @@ -96,4 +96,45 @@ extern const uint32_t gIEEENegativeInfinity;
 | 
|  #define SK_FloatNaN                 (*SkTCast<const float*>(&gIEEENotANumber))
 | 
|  #define SK_FloatInfinity            (*SkTCast<const float*>(&gIEEEInfinity))
 | 
|  #define SK_FloatNegativeInfinity    (*SkTCast<const float*>(&gIEEENegativeInfinity))
 | 
| +
 | 
| +#if defined(__SSE__)
 | 
| +#include <xmmintrin.h>
 | 
| +#elif defined(__ARM_NEON__)
 | 
| +#include <arm_neon.h>
 | 
| +#endif
 | 
| +
 | 
| +// Fast, approximate inverse square root.
 | 
| +// Compare to name-brand "1.0f / sk_float_sqrt(x)".  Should be around 10x faster on SSE, 2x on NEON.
 | 
| +static inline float sk_float_rsqrt(const float x) {
 | 
| +// We want all this inlined, so we'll inline SIMD and just take the hit when we don't know we've got
 | 
| +// it at compile time.  This is going to be too fast to productively hide behind a function pointer.
 | 
| +//
 | 
| +// We do one step of Newton's method to refine the estimates in the NEON and null paths.  No
 | 
| +// refinement is faster, but very innacurate.  Two steps is more accurate, but slower than 1/sqrt.
 | 
| +#if defined(__SSE__)
 | 
| +    float result;
 | 
| +    _mm_store_ss(&result, _mm_rsqrt_ss(_mm_set_ss(x)));
 | 
| +    return result;
 | 
| +#elif defined(__ARM_NEON__)
 | 
| +    // Get initial estimate.
 | 
| +    const float32x2_t xx = vdup_n_f32(x);  // Clever readers will note we're doing everything 2x.
 | 
| +    float32x2_t estimate = vrsqrte_f32(xx);
 | 
| +
 | 
| +    // One step of Newton's method to refine.
 | 
| +    const float32x2_t estimate_sq = vmul_f32(estimate, estimate);
 | 
| +    estimate = vmul_f32(estimate, vrsqrts_f32(xx, estimate_sq));
 | 
| +    return vget_lane_f32(estimate, 0);  // 1 will work fine too; the answer's in both places.
 | 
| +#else
 | 
| +    // Get initial estimate.
 | 
| +    int i = *SkTCast<int*>(&x);
 | 
| +    i = 0x5f3759df - (i>>1);
 | 
| +    float estimate = *SkTCast<float*>(&i);
 | 
| +
 | 
| +    // One step of Newton's method to refine.
 | 
| +    const float estimate_sq = estimate*estimate;
 | 
| +    estimate *= (1.5f-0.5f*x*estimate_sq);
 | 
| +    return estimate;
 | 
| +#endif
 | 
| +}
 | 
| +
 | 
|  #endif
 | 
| 
 |