Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright 2014 Google Inc. | 2 * Copyright 2014 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef SkHalf_DEFINED | 8 #ifndef SkHalf_DEFINED |
| 9 #define SkHalf_DEFINED | 9 #define SkHalf_DEFINED |
| 10 | 10 |
| (...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 58 // denorm value V*2^-14 into a normalized float K + V*2^-14. Then subtract off K. | 58 // denorm value V*2^-14 into a normalized float K + V*2^-14. Then subtract off K. |
| 59 const Sk4i K = ((127-15) + (23-10) + 1) << 23; | 59 const Sk4i K = ((127-15) + (23-10) + 1) << 23; |
| 60 Sk4i mask_K = positive | K; | 60 Sk4i mask_K = positive | K; |
| 61 Sk4f denorm = Sk4f::Load(&mask_K) - Sk4f::Load(&K); | 61 Sk4f denorm = Sk4f::Load(&mask_K) - Sk4f::Load(&K); |
| 62 | 62 |
| 63 Sk4i merged = (sign << 16) | is_denorm.thenElse(Sk4i::Load(&denorm), norm); | 63 Sk4i merged = (sign << 16) | is_denorm.thenElse(Sk4i::Load(&denorm), norm); |
| 64 return Sk4f::Load(&merged); | 64 return Sk4f::Load(&merged); |
| 65 #endif | 65 #endif |
| 66 } | 66 } |
| 67 | 67 |
| 68 static inline uint64_t SkFloatToHalf_finite(const Sk4f& fs) { | 68 static inline Sk4h SkFloatToVectorHalf_finite(const Sk4f& fs) { |
|
msarett
2016/07/18 22:09:09
I'm seeing significant benefit from having a versi
mtklein
2016/07/19 12:45:45
Cool cool cool. Less assembly's always better.
msarett
2016/07/19 15:24:49
That's potentially true.
I only looked at the Int
| |
| 69 uint64_t r; | |
| 70 #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64) | 69 #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64) |
| 71 float32x4_t vec = fs.fVec; | 70 float32x4_t vec = fs.fVec; |
| 72 asm ("fcvtn %[vec].4h, %[vec].4s \n" // vcvt_f16_f32(vec) | 71 asm ("fcvtn %[vec].4h, %[vec].4s \n" // vcvt_f16_f32(vec) |
| 73 "fmov %[r], %d[vec] \n" // vst1_f16(&r, ...) | 72 : [vec] "+w" (vec)); // +w: read-write NEON register |
| 74 : [r] "=r" (r) // =r: write-only 64-bit general reg ister | 73 return vreinterpret_u16_f32(vget_low_f32(vec)); |
| 75 , [vec] "+w" (vec)); // +w: read-write NEON register | |
| 76 #else | 74 #else |
| 77 Sk4i bits = Sk4i::Load(&fs), | 75 Sk4i bits = Sk4i::Load(&fs), |
| 78 sign = bits & 0x80000000, // Save the sign bit f or later... | 76 sign = bits & 0x80000000, // Save the sign bit f or later... |
| 79 positive = bits ^ sign, // ...but strip it off for now. | 77 positive = bits ^ sign, // ...but strip it off for now. |
| 80 will_be_denorm = positive < ((127-15+1) << 23); // positve < smallest normal half? | 78 will_be_denorm = positive < ((127-15+1) << 23); // positve < smallest normal half? |
| 81 | 79 |
| 82 // For normal half floats, adjust the exponent from 127 bias to 15 bias, | 80 // For normal half floats, adjust the exponent from 127 bias to 15 bias, |
| 83 // then drop the bottom 13 mantissa bits. | 81 // then drop the bottom 13 mantissa bits. |
| 84 Sk4i norm = (positive - ((127 - 15) << 23)) >> 13; | 82 Sk4i norm = (positive - ((127 - 15) << 23)) >> 13; |
| 85 | 83 |
| 86 // This mechanically inverts the denorm half -> normal float conversion abov e. | 84 // This mechanically inverts the denorm half -> normal float conversion abov e. |
| 87 // Knowning that and reading its explanation will leave you feeling more con fident | 85 // Knowning that and reading its explanation will leave you feeling more con fident |
| 88 // than reading my best attempt at explaining this directly. | 86 // than reading my best attempt at explaining this directly. |
| 89 const Sk4i K = ((127-15) + (23-10) + 1) << 23; | 87 const Sk4i K = ((127-15) + (23-10) + 1) << 23; |
| 90 Sk4f plus_K = Sk4f::Load(&positive) + Sk4f::Load(&K); | 88 Sk4f plus_K = Sk4f::Load(&positive) + Sk4f::Load(&K); |
| 91 Sk4i denorm = Sk4i::Load(&plus_K) ^ K; | 89 Sk4i denorm = Sk4i::Load(&plus_K) ^ K; |
| 92 | 90 |
| 93 Sk4i merged = (sign >> 16) | will_be_denorm.thenElse(denorm, norm); | 91 Sk4i merged = (sign >> 16) | will_be_denorm.thenElse(denorm, norm); |
| 94 SkNx_cast<uint16_t>(merged).store(&r); | 92 return SkNx_cast<uint16_t>(merged); |
| 95 #endif | 93 #endif |
| 94 } | |
| 95 | |
| 96 static inline uint64_t SkFloatToHalf_finite(const Sk4f& fs) { | |
| 97 uint64_t r; | |
| 98 Sk4h v = SkFloatToVectorHalf_finite(fs); | |
| 99 v.store(&r); | |
| 96 return r; | 100 return r; |
| 97 } | 101 } |
| 98 | 102 |
| 99 #endif | 103 #endif |
| OLD | NEW |