| Index: include/core/SkFloatingPoint.h
|
| diff --git a/include/core/SkFloatingPoint.h b/include/core/SkFloatingPoint.h
|
| index ad1669c4ee3ade7a0aaaaf9589bda4bc80fcb839..7c34706f7a05093007aebbb131c0e6cbf26ba298 100644
|
| --- a/include/core/SkFloatingPoint.h
|
| +++ b/include/core/SkFloatingPoint.h
|
| @@ -143,12 +143,6 @@ extern const uint32_t gIEEENegativeInfinity;
|
| #define SK_FloatInfinity (*SkTCast<const float*>(&gIEEEInfinity))
|
| #define SK_FloatNegativeInfinity (*SkTCast<const float*>(&gIEEENegativeInfinity))
|
|
|
| -#if defined(__SSE__)
|
| -#include <xmmintrin.h>
|
| -#elif defined(SK_ARM_HAS_NEON)
|
| -#include <arm_neon.h>
|
| -#endif
|
| -
|
| // Fast, approximate inverse square root.
|
| // Compare to name-brand "1.0f / sk_float_sqrt(x)". Should be around 10x faster on SSE, 2x on NEON.
|
| static inline float sk_float_rsqrt(const float x) {
|
| @@ -157,10 +151,8 @@ static inline float sk_float_rsqrt(const float x) {
|
| //
|
| // We do one step of Newton's method to refine the estimates in the NEON and null paths. No
|
| // refinement is faster, but very innacurate. Two steps is more accurate, but slower than 1/sqrt.
|
| -#if defined(__SSE__)
|
| - float result;
|
| - _mm_store_ss(&result, _mm_rsqrt_ss(_mm_set_ss(x)));
|
| - return result;
|
| +#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1
|
| + return _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(x)));
|
| #elif defined(SK_ARM_HAS_NEON)
|
| // Get initial estimate.
|
| const float32x2_t xx = vdup_n_f32(x); // Clever readers will note we're doing everything 2x.
|
|
|