Index: src/opts/SkNx_sse.h |
diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h |
index 34b58c10edaee21d87f235f494c0ec52db149a92..80c7f0e9ae79f1a0e0743d77eb50a0fdbfb9a499 100644 |
--- a/src/opts/SkNx_sse.h |
+++ b/src/opts/SkNx_sse.h |
@@ -52,13 +52,9 @@ public: |
static SkNx Min(const SkNx& l, const SkNx& r) { return _mm_min_ps(l.fVec, r.fVec); } |
static SkNx Max(const SkNx& l, const SkNx& r) { return _mm_max_ps(l.fVec, r.fVec); } |
- SkNx sqrt () const { return _mm_sqrt_ps (fVec); } |
- SkNx rsqrt0() const { return _mm_rsqrt_ps(fVec); } |
- SkNx rsqrt1() const { return this->rsqrt0(); } |
- SkNx rsqrt2() const { return this->rsqrt1(); } |
- |
- SkNx invert() const { return SkNx(1) / *this; } |
- SkNx approxInvert() const { return _mm_rcp_ps(fVec); } |
+ SkNx sqrt() const { return _mm_sqrt_ps (fVec); } |
+ SkNx rsqrt() const { return _mm_rsqrt_ps(fVec); } |
+ SkNx invert() const { return _mm_rcp_ps(fVec); } |
float operator[](int k) const { |
SkASSERT(0 <= k && k < 2); |
@@ -103,13 +99,9 @@ public: |
SkNx abs() const { return _mm_andnot_ps(_mm_set1_ps(-0.0f), fVec); } |
SkNx floor() const { return sse2_mm_floor_ps(fVec); } |
- SkNx sqrt () const { return _mm_sqrt_ps (fVec); } |
- SkNx rsqrt0() const { return _mm_rsqrt_ps(fVec); } |
- SkNx rsqrt1() const { return this->rsqrt0(); } |
- SkNx rsqrt2() const { return this->rsqrt1(); } |
- |
- SkNx invert() const { return SkNx(1) / *this; } |
- SkNx approxInvert() const { return _mm_rcp_ps(fVec); } |
+ SkNx sqrt() const { return _mm_sqrt_ps (fVec); } |
+ SkNx rsqrt() const { return _mm_rsqrt_ps(fVec); } |
+ SkNx invert() const { return _mm_rcp_ps(fVec); } |
float operator[](int k) const { |
SkASSERT(0 <= k && k < 4); |
@@ -346,13 +338,18 @@ template<> /*static*/ inline Sk4f SkNx_cast<float, uint16_t>(const Sk4h& src) { |
return _mm_cvtepi32_ps(_32); |
} |
-static inline void Sk4f_ToBytes(uint8_t bytes[16], |
- const Sk4f& a, const Sk4f& b, const Sk4f& c, const Sk4f& d) { |
- _mm_storeu_si128((__m128i*)bytes, |
- _mm_packus_epi16(_mm_packus_epi16(_mm_cvttps_epi32(a.fVec), |
- _mm_cvttps_epi32(b.fVec)), |
- _mm_packus_epi16(_mm_cvttps_epi32(c.fVec), |
- _mm_cvttps_epi32(d.fVec)))); |
+template<> /*static*/ inline Sk16b SkNx_cast<uint8_t, float>(const Sk16f& src) { |
+ Sk8f ab, cd; |
+ SkNx_split(src, &ab, &cd); |
+ |
+ Sk4f a,b,c,d; |
+ SkNx_split(ab, &a, &b); |
+ SkNx_split(cd, &c, &d); |
+ |
+ return _mm_packus_epi16(_mm_packus_epi16(_mm_cvttps_epi32(a.fVec), |
+ _mm_cvttps_epi32(b.fVec)), |
+ _mm_packus_epi16(_mm_cvttps_epi32(c.fVec), |
+ _mm_cvttps_epi32(d.fVec))); |
} |
template<> /*static*/ inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src) { |