Index: src/opts/SkNx_sse.h |
diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h |
index e165f587370c2af26caeb75b364a478c6abd3141..093cd4c14cfcf58bbb35c1b726082db8d461fd40 100644 |
--- a/src/opts/SkNx_sse.h |
+++ b/src/opts/SkNx_sse.h |
@@ -155,9 +155,30 @@ public: |
SkNf() {} |
explicit SkNf(float val) : fVec( _mm_set1_ps(val) ) {} |
static SkNf Load(const float vals[4]) { return _mm_loadu_ps(vals); } |
+ |
+ static SkNf FromBytes(const uint8_t bytes[4]) { |
+ __m128i fix8 = _mm_cvtsi32_si128(*(const int*)bytes); |
+ #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 |
+ const char _ = ~0; // Zero these bytes. |
+ __m128i fix8_32 = _mm_shuffle_epi8(fix8, _mm_setr_epi8(0,_,_,_, 1,_,_,_, 2,_,_,_, 3,_,_,_)); |
+ #else |
+ __m128i fix8_16 = _mm_unpacklo_epi8 (fix8, _mm_setzero_si128()), |
+ fix8_32 = _mm_unpacklo_epi16(fix8_16, _mm_setzero_si128()); |
+ #endif |
+ return SkNf(_mm_cvtepi32_ps(fix8_32)); |
+ // TODO: use _mm_cvtepu8_epi32 w/SSE4.1? |
+ } |
+ |
SkNf(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {} |
void store(float vals[4]) const { _mm_storeu_ps(vals, fVec); } |
+ void toBytes(uint8_t bytes[4]) const { |
+ __m128i fix8_32 = _mm_cvttps_epi32(fVec), |
+ fix8_16 = _mm_packus_epi16(fix8_32, fix8_32), |
+ fix8 = _mm_packus_epi16(fix8_16, fix8_16); |
+ *(int*)bytes = _mm_cvtsi128_si32(fix8); |
+ // TODO: use _mm_shuffle_epi8 w/SSSE3? |
+ } |
SkNi<4, int> castTrunc() const { return _mm_cvttps_epi32(fVec); } |