| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2014 Google Inc. | 2 * Copyright 2014 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef SkHalf_DEFINED | 8 #ifndef SkHalf_DEFINED |
| 9 #define SkHalf_DEFINED | 9 #define SkHalf_DEFINED |
| 10 | 10 |
| 11 #include "SkOpts.h" |
| 11 #include "SkNx.h" | 12 #include "SkNx.h" |
| 12 #include "SkTypes.h" | 13 #include "SkTypes.h" |
| 13 | 14 |
| 14 // 16-bit floating point value | 15 // 16-bit floating point value |
| 15 // format is 1 bit sign, 5 bits exponent, 10 bits mantissa | 16 // format is 1 bit sign, 5 bits exponent, 10 bits mantissa |
| 16 // only used for storage | 17 // only used for storage |
| 17 typedef uint16_t SkHalf; | 18 typedef uint16_t SkHalf; |
| 18 | 19 |
| 19 #define SK_HalfMin 0x0400 // 2^-24 (minimum positive normal value) | 20 #define SK_HalfMin 0x0400 // 2^-24 (minimum positive normal value) |
| 20 #define SK_HalfMax 0x7bff // 65504 | 21 #define SK_HalfMax 0x7bff // 65504 |
| 21 #define SK_HalfEpsilon 0x1400 // 2^-10 | 22 #define SK_HalfEpsilon 0x1400 // 2^-10 |
| 22 | 23 |
| 23 // convert between half and single precision floating point | 24 // convert between half and single precision floating point |
| 24 float SkHalfToFloat(SkHalf h); | 25 float SkHalfToFloat(SkHalf h); |
| 25 SkHalf SkFloatToHalf(float f); | 26 SkHalf SkFloatToHalf(float f); |
| 26 | 27 |
| 27 // Convert between half and single precision floating point, but pull any dirty | 28 // Convert between half and single precision floating point, but pull any dirty |
| 28 // trick we can to make it faster as long as it's correct enough for values in [
0,1]. | 29 // trick we can to make it faster as long as it's correct enough for values in [
0,1]. |
| 29 static inline Sk4f SkHalfToFloat_01(uint64_t); | 30 static inline Sk4f SkHalfToFloat_01(uint64_t); |
| 30 static inline uint64_t SkFloatToHalf_01(const Sk4f&); | 31 static inline uint64_t SkFloatToHalf_01(const Sk4f&); |
| 31 | 32 |
| 33 struct SkFloatConvert { |
| 34 // The compiler can't hoist SkOpts::has_f16c out of loops, so we cache it lo
cally. |
| 35 // TODO: similar has_fp16 for ARMv7 |
| 36 const bool has_f16c = SkOpts::has_f16c; |
| 37 |
| 38 Sk4f halfToFloat(uint64_t h) const { |
| 39 #if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 |
| 40 if (has_f16c) { |
| 41 __m128 v = _mm_castsi128_ps(_mm_loadl_epi64((const __m128i*)&h)); |
| 42 asm("vcvtph2ps %[v], %[v]" : [v] "+x" (v)); |
| 43 return v; |
| 44 } |
| 45 #endif |
| 46 return SkHalfToFloat_01(h); |
| 47 } |
| 48 |
| 49 uint64_t floatToHalf(const Sk4f& f) const { |
| 50 #if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 |
| 51 if (has_f16c) { |
| 52 __m128 v = f.fVec; |
| 53 asm("vcvtps2ph $0, %[v], %[v]" : [v] "+x" (v)); |
| 54 |
| 55 uint64_t h; |
| 56 _mm_storel_epi64((__m128i*)&h, _mm_castps_si128(v)); |
| 57 return h; |
| 58 } |
| 59 #endif |
| 60 return SkFloatToHalf_01(f); |
| 61 } |
| 62 }; |
| 63 |
| 32 // ~~~~~~~~~~~ impl ~~~~~~~~~~~~~~ // | 64 // ~~~~~~~~~~~ impl ~~~~~~~~~~~~~~ // |
| 33 | 65 |
| 34 // Like the serial versions in SkHalf.cpp, these are based on | 66 // Like the serial versions in SkHalf.cpp, these are based on |
| 35 // https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/ | 67 // https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/ |
| 36 | 68 |
| 37 // GCC 4.9 lacks the intrinsics to use ARMv8 f16<->f32 instructions, so we use i
nline assembly. | 69 // GCC 4.9 lacks the intrinsics to use ARMv8 f16<->f32 instructions, so we use i
nline assembly. |
| 38 | 70 |
| 39 static inline Sk4f SkHalfToFloat_01(uint64_t hs) { | 71 static inline Sk4f SkHalfToFloat_01(uint64_t hs) { |
| 40 #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64) | 72 #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64) |
| 41 float32x4_t fs; | 73 float32x4_t fs; |
| (...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 115 } | 147 } |
| 116 r = (uint64_t)hs[3] << 48 | 148 r = (uint64_t)hs[3] << 48 |
| 117 | (uint64_t)hs[2] << 32 | 149 | (uint64_t)hs[2] << 32 |
| 118 | (uint64_t)hs[1] << 16 | 150 | (uint64_t)hs[1] << 16 |
| 119 | (uint64_t)hs[0] << 0; | 151 | (uint64_t)hs[0] << 0; |
| 120 #endif | 152 #endif |
| 121 return r; | 153 return r; |
| 122 } | 154 } |
| 123 | 155 |
| 124 #endif | 156 #endif |
| OLD | NEW |