Index: include/core/SkFloatingPoint.h |
diff --git a/include/core/SkFloatingPoint.h b/include/core/SkFloatingPoint.h |
index 44a3eef98dc8c0272a324494e4ea0153dd4f7972..7dfa9d8680e82a025eb869d0b0ed7af13e6165ec 100644 |
--- a/include/core/SkFloatingPoint.h |
+++ b/include/core/SkFloatingPoint.h |
@@ -96,4 +96,45 @@ extern const uint32_t gIEEENegativeInfinity; |
#define SK_FloatNaN (*SkTCast<const float*>(&gIEEENotANumber)) |
#define SK_FloatInfinity (*SkTCast<const float*>(&gIEEEInfinity)) |
#define SK_FloatNegativeInfinity (*SkTCast<const float*>(&gIEEENegativeInfinity)) |
+ |
+#if defined(__SSE__) |
+#include <xmmintrin.h> |
+#elif defined(__ARM_NEON__) |
+#include <arm_neon.h> |
+#endif |
+ |
+// Fast, approximate inverse square root. |
+// Compare to name-brand "1.0f / sk_float_sqrt(x)". Should be around 10x faster on SSE, 2x on NEON. |
+static inline float sk_float_rsqrt(const float x) { |
+// We want all this inlined, so we'll inline SIMD and just take the hit when we don't know we've got |
+// it at compile time. This is going to be too fast to productively hide behind a function pointer. |
+// |
+// We do one step of Newton's method to refine the estimates in the NEON and null paths. No |
+// refinement is faster, but very innacurate. Two steps is more accurate, but slower than 1/sqrt. |
+#if defined(__SSE__) |
+ float result; |
+ _mm_store_ss(&result, _mm_rsqrt_ss(_mm_set_ss(x))); |
+ return result; |
+#elif defined(__ARM_NEON__) |
+ // Get initial estimate. |
+ const float32x2_t xx = vdup_n_f32(x); // Clever readers will note we're doing everything 2x. |
+ float32x2_t estimate = vrsqrte_f32(xx); |
+ |
+ // One step of Newton's method to refine. |
+ const float32x2_t estimate_sq = vmul_f32(estimate, estimate); |
+ estimate = vmul_f32(estimate, vrsqrts_f32(xx, estimate_sq)); |
+ return vget_lane_f32(estimate, 0); // 1 will work fine too; the answer's in both places. |
+#else |
+ // Get initial estimate. |
+ int i = *SkTCast<int*>(&x); |
+ i = 0x5f3759df - (i>>1); |
+ float estimate = *SkTCast<float*>(&i); |
+ |
+ // One step of Newton's method to refine. |
+ const float estimate_sq = estimate*estimate; |
+ estimate *= (1.5f-0.5f*x*estimate_sq); |
+ return estimate; |
+#endif |
+} |
+ |
#endif |