| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2014 Google Inc. | 2 * Copyright 2014 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef SkHalf_DEFINED | 8 #ifndef SkHalf_DEFINED |
| 9 #define SkHalf_DEFINED | 9 #define SkHalf_DEFINED |
| 10 | 10 |
| 11 #include "SkNx.h" | |
| 12 #include "SkTypes.h" | 11 #include "SkTypes.h" |
| 13 | 12 |
| 14 // 16-bit floating point value | 13 // 16-bit floating point value |
| 15 // format is 1 bit sign, 5 bits exponent, 10 bits mantissa | 14 // format is 1 bit sign, 5 bits exponent, 10 bits mantissa |
| 16 // only used for storage | 15 // only used for storage |
| 17 typedef uint16_t SkHalf; | 16 typedef uint16_t SkHalf; |
| 18 | 17 |
| 19 #define SK_HalfMin 0x0400 // 2^-24 (minimum positive normal value) | 18 #define SK_HalfMin 0x0400 // 2^-24 (minimum positive normal value) |
| 20 #define SK_HalfMax 0x7bff // 65504 | 19 #define SK_HalfMax 0x7bff // 65504 |
| 21 #define SK_HalfEpsilon 0x1400 // 2^-10 | 20 #define SK_HalfEpsilon 0x1400 // 2^-10 |
| 22 | 21 |
| 23 // convert between half and single precision floating point | 22 // convert between half and single precision floating point |
| 24 float SkHalfToFloat(SkHalf h); | 23 float SkHalfToFloat(SkHalf h); |
| 25 SkHalf SkFloatToHalf(float f); | 24 SkHalf SkFloatToHalf(float f); |
| 26 | 25 |
| 27 // Convert between half and single precision floating point, but pull any dirty | |
| 28 // trick we can to make it faster as long as it's correct enough for values in [
0,1]. | |
| 29 static inline Sk4f SkHalfToFloat_01(uint64_t); | |
| 30 static inline uint64_t SkFloatToHalf_01(const Sk4f&); | |
| 31 | |
| 32 // ~~~~~~~~~~~ impl ~~~~~~~~~~~~~~ // | |
| 33 | |
| 34 // Like the serial versions in SkHalf.cpp, these are based on | |
| 35 // https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/ | |
| 36 | |
| 37 // TODO: NEON versions | |
| 38 static inline Sk4f SkHalfToFloat_01(uint64_t hs) { | |
| 39 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 | |
| 40 // Load our 16-bit floats into the bottom 16 bits of each 32-bit lane, with
zeroes on top. | |
| 41 __m128i h = _mm_unpacklo_epi16(_mm_loadl_epi64((const __m128i*)&hs), _mm_set
zero_si128()); | |
| 42 | |
| 43 // Fork into two paths, depending on whether the 16-bit float is denormalize
d. | |
| 44 __m128 is_denorm = _mm_castsi128_ps(_mm_cmplt_epi32(h, _mm_set1_epi32(0x0400
))); | |
| 45 | |
| 46 // TODO: figure out, explain | |
| 47 const __m128 half = _mm_set1_ps(0.5f); | |
| 48 __m128 denorm = _mm_sub_ps(_mm_or_ps(_mm_castsi128_ps(h), half), half); | |
| 49 | |
| 50 // If we're normalized, just shift ourselves so the exponent/mantissa dividi
ng line | |
| 51 // is correct, then re-bias the exponent from 15 to 127. | |
| 52 __m128 norm = _mm_castsi128_ps(_mm_add_epi32(_mm_slli_epi32(h, 13), | |
| 53 _mm_set1_epi32((127-15) << 23))
); | |
| 54 | |
| 55 return _mm_or_ps(_mm_and_ps (is_denorm, denorm), | |
| 56 _mm_andnot_ps(is_denorm, norm)); | |
| 57 #else | |
| 58 float fs[4]; | |
| 59 for (int i = 0; i < 4; i++) { | |
| 60 fs[i] = SkHalfToFloat(hs >> (i*16)); | |
| 61 } | |
| 62 return Sk4f::Load(fs); | |
| 63 #endif | 26 #endif |
| 64 } | |
| 65 | |
| 66 static inline uint64_t SkFloatToHalf_01(const Sk4f& fs) { | |
| 67 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 | |
| 68 // Scale our floats down by a tiny power of 2 to pull up our mantissa bits, | |
| 69 // then shift back down to 16-bit float layout. This doesn't round, so can
be 1 bit small. | |
| 70 // TODO: understand better. Why this scale factor? | |
| 71 const __m128 scale = _mm_castsi128_ps(_mm_set1_epi32(15 << 23)); | |
| 72 __m128i h = _mm_srli_epi32(_mm_castps_si128(_mm_mul_ps(fs.fVec, scale)), 13)
; | |
| 73 | |
| 74 uint64_t r; | |
| 75 _mm_storel_epi64((__m128i*)&r, _mm_packs_epi32(h,h)); | |
| 76 return r; | |
| 77 #else | |
| 78 SkHalf hs[4]; | |
| 79 for (int i = 0; i < 4; i++) { | |
| 80 hs[i] = SkFloatToHalf(fs[i]); | |
| 81 } | |
| 82 return (uint64_t)hs[3] << 48 | |
| 83 | (uint64_t)hs[2] << 32 | |
| 84 | (uint64_t)hs[1] << 16 | |
| 85 | (uint64_t)hs[0] << 0; | |
| 86 #endif | |
| 87 } | |
| 88 | |
| 89 #endif | |
| OLD | NEW |