OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2014 Google Inc. | 2 * Copyright 2014 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkHalf_DEFINED | 8 #ifndef SkHalf_DEFINED |
9 #define SkHalf_DEFINED | 9 #define SkHalf_DEFINED |
10 | 10 |
| 11 #include "SkNx.h" |
11 #include "SkTypes.h" | 12 #include "SkTypes.h" |
12 | 13 |
13 // 16-bit floating point value | 14 // 16-bit floating point value |
14 // format is 1 bit sign, 5 bits exponent, 10 bits mantissa | 15 // format is 1 bit sign, 5 bits exponent, 10 bits mantissa |
15 // only used for storage | 16 // only used for storage |
16 typedef uint16_t SkHalf; | 17 typedef uint16_t SkHalf; |
17 | 18 |
18 #define SK_HalfMin 0x0400 // 2^-24 (minimum positive normal value) | 19 #define SK_HalfMin 0x0400 // 2^-24 (minimum positive normal value) |
19 #define SK_HalfMax 0x7bff // 65504 | 20 #define SK_HalfMax 0x7bff // 65504 |
20 #define SK_HalfEpsilon 0x1400 // 2^-10 | 21 #define SK_HalfEpsilon 0x1400 // 2^-10 |
21 | 22 |
22 // convert between half and single precision floating point | 23 // convert between half and single precision floating point |
23 float SkHalfToFloat(SkHalf h); | 24 float SkHalfToFloat(SkHalf h); |
24 SkHalf SkFloatToHalf(float f); | 25 SkHalf SkFloatToHalf(float f); |
25 | 26 |
| 27 // Convert between half and single precision floating point, but pull any dirty |
| 28 // trick we can to make it faster as long as it's correct enough for values in [
0,1]. |
| 29 static inline Sk4f SkHalfToFloat_01(uint64_t); |
| 30 static inline uint64_t SkFloatToHalf_01(const Sk4f&); |
| 31 |
| 32 // ~~~~~~~~~~~ impl ~~~~~~~~~~~~~~ // |
| 33 |
| 34 // Like the serial versions in SkHalf.cpp, these are based on |
| 35 // https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/ |
| 36 |
| 37 // TODO: NEON versions |
| 38 static inline Sk4f SkHalfToFloat_01(uint64_t hs) { |
| 39 #if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 |
| 40 // Load our 16-bit floats into the bottom 16 bits of each 32-bit lane, with
zeroes on top. |
| 41 __m128i h = _mm_unpacklo_epi16(_mm_loadl_epi64((const __m128i*)&hs), _mm_set
zero_si128()); |
| 42 |
| 43 // Fork into two paths, depending on whether the 16-bit float is denormalize
d. |
| 44 __m128 is_denorm = _mm_castsi128_ps(_mm_cmplt_epi32(h, _mm_set1_epi32(0x0400
))); |
| 45 |
| 46 // TODO: figure out, explain |
| 47 const __m128 half = _mm_set1_ps(0.5f); |
| 48 __m128 denorm = _mm_sub_ps(_mm_or_ps(_mm_castsi128_ps(h), half), half); |
| 49 |
| 50 // If we're normalized, just shift ourselves so the exponent/mantissa dividi
ng line |
| 51 // is correct, then re-bias the exponent from 15 to 127. |
| 52 __m128 norm = _mm_castsi128_ps(_mm_add_epi32(_mm_slli_epi32(h, 13), |
| 53 _mm_set1_epi32((127-15) << 23))
); |
| 54 |
| 55 return _mm_or_ps(_mm_and_ps (is_denorm, denorm), |
| 56 _mm_andnot_ps(is_denorm, norm)); |
| 57 #else |
| 58 float fs[4]; |
| 59 for (int i = 0; i < 4; i++) { |
| 60 fs[i] = SkHalfToFloat(hs >> (i*16)); |
| 61 } |
| 62 return Sk4f::Load(fs); |
26 #endif | 63 #endif |
| 64 } |
| 65 |
| 66 static inline uint64_t SkFloatToHalf_01(const Sk4f& fs) { |
| 67 #if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 |
| 68 // Scale our floats down by a tiny power of 2 to pull up our mantissa bits, |
| 69 // then shift back down to 16-bit float layout. This doesn't round, so can
be 1 bit small. |
| 70 // TODO: understand better. Why this scale factor? |
| 71 const __m128 scale = _mm_castsi128_ps(_mm_set1_epi32(15 << 23)); |
| 72 __m128i h = _mm_srli_epi32(_mm_castps_si128(_mm_mul_ps(fs.fVec, scale)), 13)
; |
| 73 |
| 74 uint64_t r; |
| 75 _mm_storel_epi64((__m128i*)&r, _mm_packs_epi32(h,h)); |
| 76 return r; |
| 77 #else |
| 78 SkHalf hs[4]; |
| 79 for (int i = 0; i < 4; i++) { |
| 80 hs[i] = SkFloatToHalf(fs[i]); |
| 81 } |
| 82 return (uint64_t)hs[3] << 48 |
| 83 | (uint64_t)hs[2] << 32 |
| 84 | (uint64_t)hs[1] << 16 |
| 85 | (uint64_t)hs[0] << 0; |
| 86 #endif |
| 87 } |
| 88 |
| 89 #endif |
OLD | NEW |