Index: src/core/SkHalf.h |
diff --git a/src/core/SkHalf.h b/src/core/SkHalf.h |
index 5f5575ae1aeaede7f1fd6bde72c8df549f8557bf..2b64d353437c9db870d2d8ebbc65e07a9b7a57cd 100644 |
--- a/src/core/SkHalf.h |
+++ b/src/core/SkHalf.h |
@@ -8,6 +8,7 @@ |
#ifndef SkHalf_DEFINED |
#define SkHalf_DEFINED |
+#include "SkOpts.h" |
#include "SkNx.h" |
#include "SkTypes.h" |
@@ -29,6 +30,37 @@ SkHalf SkFloatToHalf(float f); |
static inline Sk4f SkHalfToFloat_01(uint64_t); |
static inline uint64_t SkFloatToHalf_01(const Sk4f&); |
+struct SkFloatConvert { |
+ // The compiler can't hoist SkOpts::has_f16c out of loops, so we cache it locally. |
+ // TODO: similar has_fp16 for ARMv7 |
+ const bool has_f16c = SkOpts::has_f16c; |
+ |
+ Sk4f halfToFloat(uint64_t h) const { |
+ #if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 |
+ if (has_f16c) { |
+ __m128 v = _mm_castsi128_ps(_mm_loadl_epi64((const __m128i*)&h)); |
+ asm("vcvtph2ps %[v], %[v]" : [v] "+x" (v)); |
+ return v; |
+ } |
+ #endif |
+ return SkHalfToFloat_01(h); |
+ } |
+ |
+ uint64_t floatToHalf(const Sk4f& f) const { |
+ #if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 |
+ if (has_f16c) { |
+ __m128 v = f.fVec; |
+ asm("vcvtps2ph $0, %[v], %[v]" : [v] "+x" (v)); |
+ |
+ uint64_t h; |
+ _mm_storel_epi64((__m128i*)&h, _mm_castps_si128(v)); |
+ return h; |
+ } |
+ #endif |
+ return SkFloatToHalf_01(f); |
+ } |
+}; |
+ |
// ~~~~~~~~~~~ impl ~~~~~~~~~~~~~~ // |
// Like the serial versions in SkHalf.cpp, these are based on |