OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2014 Google Inc. | 2 * Copyright 2014 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkHalf_DEFINED | 8 #ifndef SkHalf_DEFINED |
9 #define SkHalf_DEFINED | 9 #define SkHalf_DEFINED |
10 | 10 |
11 #include "SkNx.h" | |
12 #include "SkTypes.h" | 11 #include "SkTypes.h" |
13 | 12 |
14 // 16-bit floating point value | 13 // 16-bit floating point value |
15 // format is 1 bit sign, 5 bits exponent, 10 bits mantissa | 14 // format is 1 bit sign, 5 bits exponent, 10 bits mantissa |
16 // only used for storage | 15 // only used for storage |
17 typedef uint16_t SkHalf; | 16 typedef uint16_t SkHalf; |
18 | 17 |
19 #define SK_HalfMin 0x0400 // 2^-24 (minimum positive normal value) | 18 #define SK_HalfMin 0x0400 // 2^-24 (minimum positive normal value) |
20 #define SK_HalfMax 0x7bff // 65504 | 19 #define SK_HalfMax 0x7bff // 65504 |
21 #define SK_HalfEpsilon 0x1400 // 2^-10 | 20 #define SK_HalfEpsilon 0x1400 // 2^-10 |
22 | 21 |
23 // convert between half and single precision floating point | 22 // convert between half and single precision floating point |
24 float SkHalfToFloat(SkHalf h); | 23 float SkHalfToFloat(SkHalf h); |
25 SkHalf SkFloatToHalf(float f); | 24 SkHalf SkFloatToHalf(float f); |
26 | 25 |
27 // Convert between half and single precision floating point, but pull any dirty | |
28 // trick we can to make it faster as long as it's correct enough for values in [
0,1]. | |
29 static inline Sk4f SkHalfToFloat_01(uint64_t); | |
30 static inline uint64_t SkFloatToHalf_01(const Sk4f&); | |
31 | |
32 // ~~~~~~~~~~~ impl ~~~~~~~~~~~~~~ // | |
33 | |
34 // Like the serial versions in SkHalf.cpp, these are based on | |
35 // https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/ | |
36 | |
37 // TODO: NEON versions | |
38 static inline Sk4f SkHalfToFloat_01(uint64_t hs) { | |
39 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 | |
40 // Load our 16-bit floats into the bottom 16 bits of each 32-bit lane, with
zeroes on top. | |
41 __m128i h = _mm_unpacklo_epi16(_mm_loadl_epi64((const __m128i*)&hs), _mm_set
zero_si128()); | |
42 | |
43 // Fork into two paths, depending on whether the 16-bit float is denormalize
d. | |
44 __m128 is_denorm = _mm_castsi128_ps(_mm_cmplt_epi32(h, _mm_set1_epi32(0x0400
))); | |
45 | |
46 // TODO: figure out, explain | |
47 const __m128 half = _mm_set1_ps(0.5f); | |
48 __m128 denorm = _mm_sub_ps(_mm_or_ps(_mm_castsi128_ps(h), half), half); | |
49 | |
50 // If we're normalized, just shift ourselves so the exponent/mantissa dividi
ng line | |
51 // is correct, then re-bias the exponent from 15 to 127. | |
52 __m128 norm = _mm_castsi128_ps(_mm_add_epi32(_mm_slli_epi32(h, 13), | |
53 _mm_set1_epi32((127-15) << 23))
); | |
54 | |
55 return _mm_or_ps(_mm_and_ps (is_denorm, denorm), | |
56 _mm_andnot_ps(is_denorm, norm)); | |
57 #else | |
58 float fs[4]; | |
59 for (int i = 0; i < 4; i++) { | |
60 fs[i] = SkHalfToFloat(hs >> (i*16)); | |
61 } | |
62 return Sk4f::Load(fs); | |
63 #endif | 26 #endif |
64 } | |
65 | |
66 static inline uint64_t SkFloatToHalf_01(const Sk4f& fs) { | |
67 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 | |
68 // Scale our floats down by a tiny power of 2 to pull up our mantissa bits, | |
69 // then shift back down to 16-bit float layout. This doesn't round, so can
be 1 bit small. | |
70 // TODO: understand better. Why this scale factor? | |
71 const __m128 scale = _mm_castsi128_ps(_mm_set1_epi32(15 << 23)); | |
72 __m128i h = _mm_srli_epi32(_mm_castps_si128(_mm_mul_ps(fs.fVec, scale)), 13)
; | |
73 | |
74 uint64_t r; | |
75 _mm_storel_epi64((__m128i*)&r, _mm_packs_epi32(h,h)); | |
76 return r; | |
77 #else | |
78 SkHalf hs[4]; | |
79 for (int i = 0; i < 4; i++) { | |
80 hs[i] = SkFloatToHalf(fs[i]); | |
81 } | |
82 return (uint64_t)hs[3] << 48 | |
83 | (uint64_t)hs[2] << 32 | |
84 | (uint64_t)hs[1] << 16 | |
85 | (uint64_t)hs[0] << 0; | |
86 #endif | |
87 } | |
88 | |
89 #endif | |
OLD | NEW |