| Index: src/opts/SkNx_sse.h
|
| diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h
|
| index cbe624ba2dbaaaa6edca19b32989b3a8a7a3c4d4..b3339f9957a2ca5041f1a730c9f7f219a13b9fc2 100644
|
| --- a/src/opts/SkNx_sse.h
|
| +++ b/src/opts/SkNx_sse.h
|
| @@ -20,7 +20,6 @@ public:
|
| bool allTrue() const { return 0xff == (_mm_movemask_epi8(fVec) & 0xff); }
|
| bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(fVec) & 0xff); }
|
|
|
| -private:
|
| __m128i fVec;
|
| };
|
|
|
| @@ -33,7 +32,6 @@ public:
|
| bool allTrue() const { return 0xffff == _mm_movemask_epi8(fVec); }
|
| bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(fVec); }
|
|
|
| -private:
|
| __m128i fVec;
|
| };
|
|
|
| @@ -46,7 +44,6 @@ public:
|
| bool allTrue() const { return 0xffff == _mm_movemask_epi8(fVec); }
|
| bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(fVec); }
|
|
|
| -private:
|
| __m128i fVec;
|
| };
|
|
|
| @@ -95,7 +92,6 @@ public:
|
| return pun.fs[k&1];
|
| }
|
|
|
| -private:
|
| __m128 fVec;
|
| };
|
|
|
| @@ -141,7 +137,6 @@ public:
|
| return pun.ds[k&1];
|
| }
|
|
|
| -private:
|
| __m128d fVec;
|
| };
|
|
|
| @@ -179,7 +174,7 @@ public:
|
| default: SkASSERT(false); return 0;
|
| }
|
| }
|
| -protected:
|
| +
|
| __m128i fVec;
|
| };
|
|
|
| @@ -227,7 +222,6 @@ public:
|
| return pun.fs[k&3];
|
| }
|
|
|
| -protected:
|
| __m128 fVec;
|
| };
|
|
|
| @@ -254,7 +248,7 @@ public:
|
| SkASSERT(0 <= k && k < 4);
|
| return _mm_extract_epi16(fVec, k);
|
| }
|
| -protected:
|
| +
|
| __m128i fVec;
|
| };
|
|
|
| @@ -282,7 +276,41 @@ public:
|
| SkASSERT(0 <= k && k < 8);
|
| return _mm_extract_epi16(fVec, k);
|
| }
|
| -protected:
|
| +
|
| + __m128i fVec;
|
| +};
|
| +
|
| +template <>
|
| +class SkNi<16, uint8_t> {
|
| +public:
|
| + SkNi(const __m128i& vec) : fVec(vec) {}
|
| +
|
| + SkNi() {}
|
| + explicit SkNi(uint8_t val) : fVec(_mm_set1_epi8(val)) {}
|
| + static SkNi Load(const uint8_t vals[16]) { return _mm_loadu_si128((const __m128i*)vals); }
|
| + SkNi(uint8_t a, uint8_t b, uint8_t c, uint8_t d,
|
| + uint8_t e, uint8_t f, uint8_t g, uint8_t h,
|
| + uint8_t i, uint8_t j, uint8_t k, uint8_t l,
|
| + uint8_t m, uint8_t n, uint8_t o, uint8_t p)
|
| + : fVec(_mm_setr_epi8(a,b,c,d, e,f,g,h, i,j,k,l, m,n,o,p)) {}
|
| +
|
| + void store(uint8_t vals[16]) const { _mm_storeu_si128((__m128i*)vals, fVec); }
|
| +
|
| + SkNi operator + (const SkNi& o) const { return _mm_add_epi8(fVec, o.fVec); }
|
| + SkNi operator - (const SkNi& o) const { return _mm_sub_epi8(fVec, o.fVec); }
|
| +
|
| + // SSE cannot multiply or shift vectors of uint8_t.
|
| + SkNi operator * (const SkNi& o) const { SkASSERT(false); return fVec; }
|
| + SkNi operator << (int bits) const { SkASSERT(false); return fVec; }
|
| + SkNi operator >> (int bits) const { SkASSERT(false); return fVec; }
|
| +
|
| + template <int k> uint8_t kth() const {
|
| + SkASSERT(0 <= k && k < 16);
|
| + // SSE4.1 would just `return _mm_extract_epi8(fVec, k)`. We have to read 16-bits instead.
|
| + int pair = _mm_extract_epi16(fVec, k/2);
|
| + return k % 2 == 0 ? pair : (pair >> 8);
|
| + }
|
| +
|
| __m128i fVec;
|
| };
|
|
|
|
|