| Index: src/opts/SkNx_sse.h
|
| diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h
|
| index e165f587370c2af26caeb75b364a478c6abd3141..093cd4c14cfcf58bbb35c1b726082db8d461fd40 100644
|
| --- a/src/opts/SkNx_sse.h
|
| +++ b/src/opts/SkNx_sse.h
|
| @@ -155,9 +155,30 @@ public:
|
| SkNf() {}
|
| explicit SkNf(float val) : fVec( _mm_set1_ps(val) ) {}
|
| static SkNf Load(const float vals[4]) { return _mm_loadu_ps(vals); }
|
| +
|
| + static SkNf FromBytes(const uint8_t bytes[4]) {
|
| + __m128i fix8 = _mm_cvtsi32_si128(*(const int*)bytes);
|
| + #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
|
| + const char _ = ~0; // Zero these bytes.
|
| + __m128i fix8_32 = _mm_shuffle_epi8(fix8, _mm_setr_epi8(0,_,_,_, 1,_,_,_, 2,_,_,_, 3,_,_,_));
|
| + #else
|
| + __m128i fix8_16 = _mm_unpacklo_epi8 (fix8, _mm_setzero_si128()),
|
| + fix8_32 = _mm_unpacklo_epi16(fix8_16, _mm_setzero_si128());
|
| + #endif
|
| + return SkNf(_mm_cvtepi32_ps(fix8_32));
|
| + // TODO: use _mm_cvtepu8_epi32 w/SSE4.1?
|
| + }
|
| +
|
| SkNf(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {}
|
|
|
| void store(float vals[4]) const { _mm_storeu_ps(vals, fVec); }
|
| + void toBytes(uint8_t bytes[4]) const {
|
| + __m128i fix8_32 = _mm_cvttps_epi32(fVec),
|
| + fix8_16 = _mm_packus_epi16(fix8_32, fix8_32),
|
| + fix8 = _mm_packus_epi16(fix8_16, fix8_16);
|
| + *(int*)bytes = _mm_cvtsi128_si32(fix8);
|
| + // TODO: use _mm_shuffle_epi8 w/SSSE3?
|
| + }
|
|
|
| SkNi<4, int> castTrunc() const { return _mm_cvttps_epi32(fVec); }
|
|
|
|
|