| Index: include/private/SkFloatingPoint.h
|
| diff --git a/include/private/SkFloatingPoint.h b/include/private/SkFloatingPoint.h
|
| index ffed5c07471709e3dd188e3a8d572fcbd7e6b7cd..f7ee816b12005a6377a34016e35f878696a1e270 100644
|
| --- a/include/private/SkFloatingPoint.h
|
| +++ b/include/private/SkFloatingPoint.h
|
| @@ -127,28 +127,20 @@
|
| #define SK_FloatInfinity (*SkTCast<const float*>(&gIEEEInfinity))
|
| #define SK_FloatNegativeInfinity (*SkTCast<const float*>(&gIEEENegativeInfinity))
|
|
|
| -static inline float sk_float_rsqrt_portable(float x) {
|
| - // Get initial estimate.
|
| - int i = *SkTCast<int*>(&x);
|
| - i = 0x5F1FFFF9 - (i>>1);
|
| - float estimate = *SkTCast<float*>(&i);
|
| -
|
| - // One step of Newton's method to refine.
|
| - const float estimate_sq = estimate*estimate;
|
| - estimate *= 0.703952253f*(2.38924456f-x*estimate_sq);
|
| - return estimate;
|
| -}
|
| +// We forward declare this to break an #include cycle.
|
| +// (SkScalar -> SkFloatingPoint -> SkOpts.h -> SkXfermode -> SkColor -> SkScalar)
|
| +namespace SkOpts { extern float (*rsqrt)(float); }
|
|
|
| // Fast, approximate inverse square root.
|
| // Compare to name-brand "1.0f / sk_float_sqrt(x)". Should be around 10x faster on SSE, 2x on NEON.
|
| -static inline float sk_float_rsqrt(float x) {
|
| +static inline float sk_float_rsqrt(const float x) {
|
| // We want all this inlined, so we'll inline SIMD and just take the hit when we don't know we've got
|
| // it at compile time. This is going to be too fast to productively hide behind a function pointer.
|
| //
|
| -// We do one step of Newton's method to refine the estimates in the NEON and portable paths. No
|
| +// We do one step of Newton's method to refine the estimates in the NEON and null paths. No
|
| // refinement is faster, but very innacurate. Two steps is more accurate, but slower than 1/sqrt.
|
| //
|
| -// Optimized constants in the portable path courtesy of http://rrrola.wz.cz/inv_sqrt.html
|
| +// Optimized constants in the null path courtesy of http://rrrola.wz.cz/inv_sqrt.html
|
| #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1
|
| return _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(x)));
|
| #elif defined(SK_ARM_HAS_NEON)
|
| @@ -161,7 +153,8 @@
|
| estimate = vmul_f32(estimate, vrsqrts_f32(xx, estimate_sq));
|
| return vget_lane_f32(estimate, 0); // 1 will work fine too; the answer's in both places.
|
| #else
|
| - return sk_float_rsqrt_portable(x);
|
| + // Perhaps runtime-detected NEON, or a portable fallback.
|
| + return SkOpts::rsqrt(x);
|
| #endif
|
| }
|
|
|
|
|