OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2014 Google Inc. | 2 * Copyright 2014 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkHalf_DEFINED | 8 #ifndef SkHalf_DEFINED |
9 #define SkHalf_DEFINED | 9 #define SkHalf_DEFINED |
10 | 10 |
(...skipping 19 matching lines...) Expand all Loading... |
30 static inline Sk4f SkHalfToFloat_finite(uint64_t); | 30 static inline Sk4f SkHalfToFloat_finite(uint64_t); |
31 static inline Sk4h SkFloatToHalf_finite(const Sk4f&); | 31 static inline Sk4h SkFloatToHalf_finite(const Sk4f&); |
32 | 32 |
33 // ~~~~~~~~~~~ impl ~~~~~~~~~~~~~~ // | 33 // ~~~~~~~~~~~ impl ~~~~~~~~~~~~~~ // |
34 | 34 |
35 // Like the serial versions in SkHalf.cpp, these are based on | 35 // Like the serial versions in SkHalf.cpp, these are based on |
36 // https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/ | 36 // https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/ |
37 | 37 |
38 // GCC 4.9 lacks the intrinsics to use ARMv8 f16<->f32 instructions, so we use i
nline assembly. | 38 // GCC 4.9 lacks the intrinsics to use ARMv8 f16<->f32 instructions, so we use i
nline assembly. |
39 | 39 |
40 static inline Sk4f SkHalfToFloat_finite(uint64_t hs) { | 40 static inline Sk4f SkHalfToFloat_finite(const Sk4h& hs) { |
41 #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64) | 41 #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64) |
42 float32x4_t fs; | 42 float32x4_t fs; |
43 asm ("fmov %d[fs], %[hs] \n" // vcreate_f16(hs) | 43 asm ("fcvtl %[fs].4s, %[hs].4h \n" // vcvt_f32_f16(...) |
44 "fcvtl %[fs].4s, %[fs].4h \n" // vcvt_f32_f16(...) | |
45 : [fs] "=w" (fs) // =w: write-only NEON register | 44 : [fs] "=w" (fs) // =w: write-only NEON register |
46 : [hs] "r" (hs)); // r: read-only 64-bit general regis
ter | 45 : [hs] "w" (hs.fVec)); // w: read-only NEON register |
47 return fs; | 46 return fs; |
48 #else | 47 #else |
49 Sk4i bits = SkNx_cast<int>(Sk4h::Load(&hs)), // Expand to 32 bit. | 48 Sk4i bits = SkNx_cast<int>(hs), // Expand to 32 bit. |
50 sign = bits & 0x00008000, // Save the sign bit for
later... | 49 sign = bits & 0x00008000, // Save the sign bit for later... |
51 positive = bits ^ sign, // ...but strip it off f
or now. | 50 positive = bits ^ sign, // ...but strip it off for now. |
52 is_denorm = positive < (1<<10); // Exponent == 0? | 51 is_denorm = positive < (1<<10); // Exponent == 0? |
53 | 52 |
54 // For normal half floats, extend the mantissa by 13 zero bits, | 53 // For normal half floats, extend the mantissa by 13 zero bits, |
55 // then adjust the exponent from 15 bias to 127 bias. | 54 // then adjust the exponent from 15 bias to 127 bias. |
56 Sk4i norm = (positive << 13) + ((127 - 15) << 23); | 55 Sk4i norm = (positive << 13) + ((127 - 15) << 23); |
57 | 56 |
58 // For denorm half floats, mask in the exponent-only float K that turns our | 57 // For denorm half floats, mask in the exponent-only float K that turns our |
59 // denorm value V*2^-14 into a normalized float K + V*2^-14. Then subtract
off K. | 58 // denorm value V*2^-14 into a normalized float K + V*2^-14. Then subtract
off K. |
60 const Sk4i K = ((127-15) + (23-10) + 1) << 23; | 59 const Sk4i K = ((127-15) + (23-10) + 1) << 23; |
61 Sk4i mask_K = positive | K; | 60 Sk4i mask_K = positive | K; |
62 Sk4f denorm = Sk4f::Load(&mask_K) - Sk4f::Load(&K); | 61 Sk4f denorm = Sk4f::Load(&mask_K) - Sk4f::Load(&K); |
63 | 62 |
64 Sk4i merged = (sign << 16) | is_denorm.thenElse(Sk4i::Load(&denorm), norm); | 63 Sk4i merged = (sign << 16) | is_denorm.thenElse(Sk4i::Load(&denorm), norm); |
65 return Sk4f::Load(&merged); | 64 return Sk4f::Load(&merged); |
66 #endif | 65 #endif |
67 } | 66 } |
68 | 67 |
| 68 static inline Sk4f SkHalfToFloat_finite(uint64_t hs) { |
| 69 return SkHalfToFloat_finite(Sk4h::Load(&hs)); |
| 70 } |
| 71 |
69 static inline Sk4h SkFloatToHalf_finite(const Sk4f& fs) { | 72 static inline Sk4h SkFloatToHalf_finite(const Sk4f& fs) { |
70 #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64) | 73 #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64) |
71 float32x4_t vec = fs.fVec; | 74 float32x4_t vec = fs.fVec; |
72 asm ("fcvtn %[vec].4h, %[vec].4s \n" // vcvt_f16_f32(vec) | 75 asm ("fcvtn %[vec].4h, %[vec].4s \n" // vcvt_f16_f32(vec) |
73 : [vec] "+w" (vec)); // +w: read-write NEON register | 76 : [vec] "+w" (vec)); // +w: read-write NEON register |
74 return vreinterpret_u16_f32(vget_low_f32(vec)); | 77 return vreinterpret_u16_f32(vget_low_f32(vec)); |
75 #else | 78 #else |
76 Sk4i bits = Sk4i::Load(&fs), | 79 Sk4i bits = Sk4i::Load(&fs), |
77 sign = bits & 0x80000000, // Save the sign bit f
or later... | 80 sign = bits & 0x80000000, // Save the sign bit f
or later... |
78 positive = bits ^ sign, // ...but strip it off
for now. | 81 positive = bits ^ sign, // ...but strip it off
for now. |
79 will_be_denorm = positive < ((127-15+1) << 23); // positve < smallest
normal half? | 82 will_be_denorm = positive < ((127-15+1) << 23); // positve < smallest
normal half? |
80 | 83 |
81 // For normal half floats, adjust the exponent from 127 bias to 15 bias, | 84 // For normal half floats, adjust the exponent from 127 bias to 15 bias, |
82 // then drop the bottom 13 mantissa bits. | 85 // then drop the bottom 13 mantissa bits. |
83 Sk4i norm = (positive - ((127 - 15) << 23)) >> 13; | 86 Sk4i norm = (positive - ((127 - 15) << 23)) >> 13; |
84 | 87 |
85 // This mechanically inverts the denorm half -> normal float conversion abov
e. | 88 // This mechanically inverts the denorm half -> normal float conversion abov
e. |
86 // Knowning that and reading its explanation will leave you feeling more con
fident | 89 // Knowning that and reading its explanation will leave you feeling more con
fident |
87 // than reading my best attempt at explaining this directly. | 90 // than reading my best attempt at explaining this directly. |
88 const Sk4i K = ((127-15) + (23-10) + 1) << 23; | 91 const Sk4i K = ((127-15) + (23-10) + 1) << 23; |
89 Sk4f plus_K = Sk4f::Load(&positive) + Sk4f::Load(&K); | 92 Sk4f plus_K = Sk4f::Load(&positive) + Sk4f::Load(&K); |
90 Sk4i denorm = Sk4i::Load(&plus_K) ^ K; | 93 Sk4i denorm = Sk4i::Load(&plus_K) ^ K; |
91 | 94 |
92 Sk4i merged = (sign >> 16) | will_be_denorm.thenElse(denorm, norm); | 95 Sk4i merged = (sign >> 16) | will_be_denorm.thenElse(denorm, norm); |
93 return SkNx_cast<uint16_t>(merged); | 96 return SkNx_cast<uint16_t>(merged); |
94 #endif | 97 #endif |
95 } | 98 } |
96 | 99 |
97 #endif | 100 #endif |
OLD | NEW |