| Index: src/opts/SkNx_sse.h
|
| diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h
|
| index a4783c6302eec19f2231e852ced78ebdb67af588..a4594115e0ecff4e4223606da03c4ccba8fcbcd8 100644
|
| --- a/src/opts/SkNx_sse.h
|
| +++ b/src/opts/SkNx_sse.h
|
| @@ -544,14 +544,6 @@ public:
|
| __m256i fVec;
|
| };
|
|
|
| - // _mm256_unpack{lo,hi}_pd() auto-casting to and from __m256d.
|
| - AI static __m256 unpacklo_pd(__m256 x, __m256 y) {
|
| - return _mm256_castpd_ps(_mm256_unpacklo_pd(_mm256_castps_pd(x), _mm256_castps_pd(y)));
|
| - }
|
| - AI static __m256 unpackhi_pd(__m256 x, __m256 y) {
|
| - return _mm256_castpd_ps(_mm256_unpackhi_pd(_mm256_castps_pd(x), _mm256_castps_pd(y)));
|
| - }
|
| -
|
| template <>
|
| class SkNx<8, float> {
|
| public:
|
| @@ -568,29 +560,6 @@ public:
|
| AI static SkNx Load(const void* ptr) { return _mm256_loadu_ps((const float*)ptr); }
|
| AI void store(void* ptr) const { _mm256_storeu_ps((float*)ptr, fVec); }
|
|
|
| - AI static void Store4(void* ptr,
|
| - const SkNx& r, const SkNx& g, const SkNx& b, const SkNx& a) {
|
| - __m256 rg0145 = _mm256_unpacklo_ps(r.fVec, g.fVec), // r0 g0 r1 g1 | r4 g4 r5 g5
|
| - rg2367 = _mm256_unpackhi_ps(r.fVec, g.fVec), // r2 ... | r6 ...
|
| - ba0145 = _mm256_unpacklo_ps(b.fVec, a.fVec), // b0 a0 b1 a1 | b4 a4 b5 a5
|
| - ba2367 = _mm256_unpackhi_ps(b.fVec, a.fVec); // b2 ... | b6 ...
|
| -
|
| - __m256 _04 = unpacklo_pd(rg0145, ba0145), // r0 g0 b0 a0 | r4 g4 b4 a4
|
| - _15 = unpackhi_pd(rg0145, ba0145), // r1 ... | r5 ...
|
| - _26 = unpacklo_pd(rg2367, ba2367), // r2 ... | r6 ...
|
| - _37 = unpackhi_pd(rg2367, ba2367); // r3 ... | r7 ...
|
| -
|
| - __m256 _01 = _mm256_permute2f128_ps(_04, _15, 16), // 16 == 010 000 == lo, lo
|
| - _23 = _mm256_permute2f128_ps(_26, _37, 16),
|
| - _45 = _mm256_permute2f128_ps(_04, _15, 25), // 25 == 011 001 == hi, hi
|
| - _67 = _mm256_permute2f128_ps(_26, _37, 25);
|
| -
|
| - _mm256_storeu_ps((float*)ptr + 0*8, _01);
|
| - _mm256_storeu_ps((float*)ptr + 1*8, _23);
|
| - _mm256_storeu_ps((float*)ptr + 2*8, _45);
|
| - _mm256_storeu_ps((float*)ptr + 3*8, _67);
|
| - }
|
| -
|
| AI SkNx operator+(const SkNx& o) const { return _mm256_add_ps(fVec, o.fVec); }
|
| AI SkNx operator-(const SkNx& o) const { return _mm256_sub_ps(fVec, o.fVec); }
|
| AI SkNx operator*(const SkNx& o) const { return _mm256_mul_ps(fVec, o.fVec); }
|
|
|