OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2014 Google Inc. | 2 * Copyright 2014 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkHalf_DEFINED | 8 #ifndef SkHalf_DEFINED |
9 #define SkHalf_DEFINED | 9 #define SkHalf_DEFINED |
10 | 10 |
11 #include "SkNx.h" | 11 #include "SkNx.h" |
12 #include "SkTypes.h" | 12 #include "SkTypes.h" |
13 | 13 |
14 // 16-bit floating point value | 14 // 16-bit floating point value |
15 // format is 1 bit sign, 5 bits exponent, 10 bits mantissa | 15 // format is 1 bit sign, 5 bits exponent, 10 bits mantissa |
16 // only used for storage | 16 // only used for storage |
17 typedef uint16_t SkHalf; | 17 typedef uint16_t SkHalf; |
18 | 18 |
19 static constexpr uint16_t SK_HalfMin = 0x0400; // 2^-24 (minimum positive n
ormal value) | 19 static constexpr uint16_t SK_HalfMin = 0x0400; // 2^-24 (minimum positive n
ormal value) |
20 static constexpr uint16_t SK_HalfMax = 0x7bff; // 65504 | 20 static constexpr uint16_t SK_HalfMax = 0x7bff; // 65504 |
21 static constexpr uint16_t SK_HalfEpsilon = 0x1400; // 2^-10 | 21 static constexpr uint16_t SK_HalfEpsilon = 0x1400; // 2^-10 |
22 static constexpr uint16_t SK_Half1 = 0x3C00; // 1 | 22 static constexpr uint16_t SK_Half1 = 0x3C00; // 1 |
23 | 23 |
24 // convert between half and single precision floating point | 24 // convert between half and single precision floating point |
25 float SkHalfToFloat(SkHalf h); | 25 float SkHalfToFloat(SkHalf h); |
26 SkHalf SkFloatToHalf(float f); | 26 SkHalf SkFloatToHalf(float f); |
27 | 27 |
28 // Convert between half and single precision floating point, | 28 // Convert between half and single precision floating point, |
29 // assuming inputs and outputs are both finite. | 29 // assuming inputs and outputs are both finite, and |
30 static inline Sk4f SkHalfToFloat_finite(uint64_t); | 30 // flushing values which would be denormal half floats to zero. |
31 static inline Sk4h SkFloatToHalf_finite(const Sk4f&); | 31 static inline Sk4f SkHalfToFloat_finite_ftz(uint64_t); |
| 32 static inline Sk4h SkFloatToHalf_finite_ftz(const Sk4f&); |
32 | 33 |
33 // ~~~~~~~~~~~ impl ~~~~~~~~~~~~~~ // | 34 // ~~~~~~~~~~~ impl ~~~~~~~~~~~~~~ // |
34 | 35 |
35 // Like the serial versions in SkHalf.cpp, these are based on | 36 // Like the serial versions in SkHalf.cpp, these are based on |
36 // https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/ | 37 // https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/ |
37 | 38 |
38 // GCC 4.9 lacks the intrinsics to use ARMv8 f16<->f32 instructions, so we use i
nline assembly. | 39 // GCC 4.9 lacks the intrinsics to use ARMv8 f16<->f32 instructions, so we use i
nline assembly. |
39 | 40 |
40 static inline Sk4f SkHalfToFloat_finite(const Sk4h& hs) { | 41 static inline Sk4f SkHalfToFloat_finite_ftz(const Sk4h& hs) { |
41 #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64) | 42 #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64) |
42 float32x4_t fs; | 43 float32x4_t fs; |
43 asm ("fcvtl %[fs].4s, %[hs].4h \n" // vcvt_f32_f16(...) | 44 asm ("fcvtl %[fs].4s, %[hs].4h \n" // vcvt_f32_f16(...) |
44 : [fs] "=w" (fs) // =w: write-only NEON register | 45 : [fs] "=w" (fs) // =w: write-only NEON register |
45 : [hs] "w" (hs.fVec)); // w: read-only NEON register | 46 : [hs] "w" (hs.fVec)); // w: read-only NEON register |
46 return fs; | 47 return fs; |
47 #else | 48 #else |
48 Sk4i bits = SkNx_cast<int>(hs), // Expand to 32 bit. | 49 Sk4i bits = SkNx_cast<int>(hs), // Expand to 32 bit. |
49 sign = bits & 0x00008000, // Save the sign bit for later... | 50 sign = bits & 0x00008000, // Save the sign bit for later... |
50 positive = bits ^ sign, // ...but strip it off for now. | 51 positive = bits ^ sign, // ...but strip it off for now. |
51 is_denorm = positive < (1<<10); // Exponent == 0? | 52 is_norm = 0x03ff < positive; // Exponent > 0? |
52 | 53 |
53 // For normal half floats, extend the mantissa by 13 zero bits, | 54 // For normal half floats, extend the mantissa by 13 zero bits, |
54 // then adjust the exponent from 15 bias to 127 bias. | 55 // then adjust the exponent from 15 bias to 127 bias. |
55 Sk4i norm = (positive << 13) + ((127 - 15) << 23); | 56 Sk4i norm = (positive << 13) + ((127 - 15) << 23); |
56 | 57 |
57 // For denorm half floats, mask in the exponent-only float K that turns our | 58 Sk4i merged = (sign << 16) | (norm & is_norm); |
58 // denorm value V*2^-14 into a normalized float K + V*2^-14. Then subtract
off K. | |
59 const Sk4i K = ((127-15) + (23-10) + 1) << 23; | |
60 Sk4i mask_K = positive | K; | |
61 Sk4f denorm = Sk4f::Load(&mask_K) - Sk4f::Load(&K); | |
62 | |
63 Sk4i merged = (sign << 16) | is_denorm.thenElse(Sk4i::Load(&denorm), norm); | |
64 return Sk4f::Load(&merged); | 59 return Sk4f::Load(&merged); |
65 #endif | 60 #endif |
66 } | 61 } |
67 | 62 |
68 static inline Sk4f SkHalfToFloat_finite(uint64_t hs) { | 63 static inline Sk4f SkHalfToFloat_finite_ftz(uint64_t hs) { |
69 return SkHalfToFloat_finite(Sk4h::Load(&hs)); | 64 return SkHalfToFloat_finite_ftz(Sk4h::Load(&hs)); |
70 } | 65 } |
71 | 66 |
72 static inline Sk4h SkFloatToHalf_finite(const Sk4f& fs) { | 67 static inline Sk4h SkFloatToHalf_finite_ftz(const Sk4f& fs) { |
73 #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64) | 68 #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64) |
74 float32x4_t vec = fs.fVec; | 69 float32x4_t vec = fs.fVec; |
75 asm ("fcvtn %[vec].4h, %[vec].4s \n" // vcvt_f16_f32(vec) | 70 asm ("fcvtn %[vec].4h, %[vec].4s \n" // vcvt_f16_f32(vec) |
76 : [vec] "+w" (vec)); // +w: read-write NEON register | 71 : [vec] "+w" (vec)); // +w: read-write NEON register |
77 return vreinterpret_u16_f32(vget_low_f32(vec)); | 72 return vreinterpret_u16_f32(vget_low_f32(vec)); |
78 #else | 73 #else |
79 Sk4i bits = Sk4i::Load(&fs), | 74 Sk4i bits = Sk4i::Load(&fs), |
80 sign = bits & 0x80000000, // Save the sign bit f
or later... | 75 sign = bits & 0x80000000, // Save the sign bit for later..
. |
81 positive = bits ^ sign, // ...but strip it off
for now. | 76 positive = bits ^ sign, // ...but strip it off for now. |
82 will_be_denorm = positive < ((127-15+1) << 23); // positve < smallest
normal half? | 77 will_be_norm = 0x387fdfff < positive; // greater than largest denorm h
alf? |
83 | 78 |
84 // For normal half floats, adjust the exponent from 127 bias to 15 bias, | 79 // For normal half floats, adjust the exponent from 127 bias to 15 bias, |
85 // then drop the bottom 13 mantissa bits. | 80 // then drop the bottom 13 mantissa bits. |
86 Sk4i norm = (positive - ((127 - 15) << 23)) >> 13; | 81 Sk4i norm = (positive - ((127 - 15) << 23)) >> 13; |
87 | 82 |
88 // This mechanically inverts the denorm half -> normal float conversion abov
e. | 83 Sk4i merged = (sign >> 16) | (will_be_norm & norm); |
89 // Knowning that and reading its explanation will leave you feeling more con
fident | |
90 // than reading my best attempt at explaining this directly. | |
91 const Sk4i K = ((127-15) + (23-10) + 1) << 23; | |
92 Sk4f plus_K = Sk4f::Load(&positive) + Sk4f::Load(&K); | |
93 Sk4i denorm = Sk4i::Load(&plus_K) ^ K; | |
94 | |
95 Sk4i merged = (sign >> 16) | will_be_denorm.thenElse(denorm, norm); | |
96 return SkNx_cast<uint16_t>(merged); | 84 return SkNx_cast<uint16_t>(merged); |
97 #endif | 85 #endif |
98 } | 86 } |
99 | 87 |
100 #endif | 88 #endif |
OLD | NEW |