Index: src/opts/Sk2x_sse.h |
diff --git a/src/opts/Sk2x_sse.h b/src/opts/Sk2x_sse.h |
index 111d3c230e51bfcee7dd0e9d06efee9823fcf6c1..1136f1d8568ecb47b66be01daa7c774410b5559c 100644 |
--- a/src/opts/Sk2x_sse.h |
+++ b/src/opts/Sk2x_sse.h |
@@ -46,6 +46,9 @@ M(Sk2f) Max(const Sk2f& a, const Sk2f& b) { return _mm_max_ps(a.fVec, b.fVec); } |
M(Sk2f) rsqrt() const { return _mm_rsqrt_ps(fVec); } |
M(Sk2f) sqrt() const { return _mm_sqrt_ps (fVec); } |
+M(Sk2f) invert() const { return Sk2f(1.0f) / *this; } |
+M(Sk2f) approxInvert() const { return _mm_rcp_ps(fVec); } |
+ |
#undef M |
#define M(...) template <> inline __VA_ARGS__ Sk2x<double>:: |
@@ -70,6 +73,10 @@ M(Sk2d) Max(const Sk2d& a, const Sk2d& b) { return _mm_max_pd(a.fVec, b.fVec); } |
M(Sk2d) rsqrt() const { return _mm_cvtps_pd(_mm_rsqrt_ps(_mm_cvtpd_ps(fVec))); } |
M(Sk2d) sqrt() const { return _mm_sqrt_pd(fVec); } |
+// No _mm_rcp_pd, so do Sk2d::approxInvert() in floats. |
+M(Sk2d) invert() const { return Sk2d(1.0) / *this; } |
+M(Sk2d) approxInvert() const { return _mm_cvtps_pd(_mm_rcp_ps(_mm_cvtpd_ps(fVec))); } |
+ |
#undef M |
#endif |