Index: src/opts/SkNx_sse.h |
diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h |
index 71ecbfd89d9664ae6a4bded0cc44dac4dcd18618..ee6fdc5654b26838bf2e9f649741476d8d648055 100644 |
--- a/src/opts/SkNx_sse.h |
+++ b/src/opts/SkNx_sse.h |
@@ -348,6 +348,11 @@ template<> inline Sk4i SkNx_cast<int, float, 4>(const Sk4f& src) { |
return _mm_cvttps_epi32(src.fVec); |
} |
+template<> inline Sk4h SkNx_cast<uint16_t, float, 4>(const Sk4f& src) { |
+ auto _32 = _mm_cvttps_epi32(src.fVec); |
+ return _mm_packus_epi16(_32, _32); |
+} |
+ |
template<> inline Sk4b SkNx_cast<uint8_t, float, 4>(const Sk4f& src) { |
auto _32 = _mm_cvttps_epi32(src.fVec); |
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 |
@@ -370,6 +375,11 @@ template<> inline Sk4f SkNx_cast<float, uint8_t, 4>(const Sk4b& src) { |
return _mm_cvtepi32_ps(_32); |
} |
+template<> inline Sk4f SkNx_cast<float, uint16_t, 4>(const Sk4h& src) { |
+ auto _32 = _mm_unpacklo_epi16(src.fVec, _mm_setzero_si128()); |
+ return _mm_cvtepi32_ps(_32); |
+} |
+ |
static inline void Sk4f_ToBytes(uint8_t bytes[16], |
const Sk4f& a, const Sk4f& b, const Sk4f& c, const Sk4f& d) { |
_mm_storeu_si128((__m128i*)bytes, |