Index: include/core/SkFloatingPoint.h |
diff --git a/include/core/SkFloatingPoint.h b/include/core/SkFloatingPoint.h |
index 44a3eef98dc8c0272a324494e4ea0153dd4f7972..acc5d5d9d85ddcdf0e9773ddb99cf87f035792a8 100644 |
--- a/include/core/SkFloatingPoint.h |
+++ b/include/core/SkFloatingPoint.h |
@@ -96,4 +96,30 @@ extern const uint32_t gIEEENegativeInfinity; |
#define SK_FloatNaN (*SkTCast<const float*>(&gIEEENotANumber)) |
#define SK_FloatInfinity (*SkTCast<const float*>(&gIEEEInfinity)) |
#define SK_FloatNegativeInfinity (*SkTCast<const float*>(&gIEEENegativeInfinity)) |
+ |
+#ifdef __SSE__ |
+#include <xmmintrin.h> |
+#endif |
+ |
+// Fast, approximate inverse square root. |
+// Compare to name-brand "1.0f / sk_float_sqrt(x)". |
+inline float sk_float_rsqrt(float x) { |
reed1
2013/11/07 22:28:44
static ...
|
+// We want all this inlined, so we'll inline SIMD and just take the hit when we don't know we've got |
+// it at compile time. This is going to be too fast to productively hide behind a function pointer. |
+#ifdef __SSE__ |
+ float result; |
+ _mm_store_ss(&result, _mm_rsqrt_ss(_mm_set_ss(x))); |
+ return result; |
+#else |
+// TODO(mtklein): NEON |
+ float xhalf = 0.5f*x; |
+ int i = *SkTCast<int*>(&x); |
+ i = 0x5f3759df - (i>>1); |
+ x = *SkTCast<float*>(&i); |
+ x = x*(1.5f-xhalf*x*x); |
+// x = x*(1.5f-xhalf*x*x); // this line takes err from 10^-3 to 10^-6, but at what cost!?!? |
+ return x; |
+#endif |
+} |
+ |
#endif |