Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(581)

Side by Side Diff: src/core/SkHalf.h

Issue 2184753002: Add Sk4h_load4 for loading F16. (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: typo Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | src/core/SkNx.h » ('j') | src/opts/SkNx_sse.h » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2014 Google Inc. 2 * Copyright 2014 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkHalf_DEFINED 8 #ifndef SkHalf_DEFINED
9 #define SkHalf_DEFINED 9 #define SkHalf_DEFINED
10 10
(...skipping 19 matching lines...) Expand all
30 static inline Sk4f SkHalfToFloat_finite(uint64_t); 30 static inline Sk4f SkHalfToFloat_finite(uint64_t);
31 static inline Sk4h SkFloatToHalf_finite(const Sk4f&); 31 static inline Sk4h SkFloatToHalf_finite(const Sk4f&);
32 32
33 // ~~~~~~~~~~~ impl ~~~~~~~~~~~~~~ // 33 // ~~~~~~~~~~~ impl ~~~~~~~~~~~~~~ //
34 34
35 // Like the serial versions in SkHalf.cpp, these are based on 35 // Like the serial versions in SkHalf.cpp, these are based on
36 // https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/ 36 // https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
37 37
38 // GCC 4.9 lacks the intrinsics to use ARMv8 f16<->f32 instructions, so we use i nline assembly. 38 // GCC 4.9 lacks the intrinsics to use ARMv8 f16<->f32 instructions, so we use i nline assembly.
39 39
40 static inline Sk4f SkHalfToFloat_finite(uint64_t hs) { 40 static inline Sk4f SkHalfToFloat_finite(const Sk4h& hs) {
41 #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64) 41 #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64)
42 float32x4_t fs; 42 float32x4_t fs;
43 asm ("fmov %d[fs], %[hs] \n" // vcreate_f16(hs) 43 asm ("fcvtl %[fs].4s, %[hs].4h \n" // vcvt_f32_f16(...)
44 "fcvtl %[fs].4s, %[fs].4h \n" // vcvt_f32_f16(...)
45 : [fs] "=w" (fs) // =w: write-only NEON register 44 : [fs] "=w" (fs) // =w: write-only NEON register
46 : [hs] "r" (hs)); // r: read-only 64-bit general regis ter 45 : [hs] "w" (hs.fVec)); // w: read-only NEON register
47 return fs; 46 return fs;
48 #else 47 #else
49 Sk4i bits = SkNx_cast<int>(Sk4h::Load(&hs)), // Expand to 32 bit. 48 Sk4i bits = SkNx_cast<int>(hs), // Expand to 32 bit.
50 sign = bits & 0x00008000, // Save the sign bit for later... 49 sign = bits & 0x00008000, // Save the sign bit for later...
51 positive = bits ^ sign, // ...but strip it off f or now. 50 positive = bits ^ sign, // ...but strip it off for now.
52 is_denorm = positive < (1<<10); // Exponent == 0? 51 is_denorm = positive < (1<<10); // Exponent == 0?
53 52
54 // For normal half floats, extend the mantissa by 13 zero bits, 53 // For normal half floats, extend the mantissa by 13 zero bits,
55 // then adjust the exponent from 15 bias to 127 bias. 54 // then adjust the exponent from 15 bias to 127 bias.
56 Sk4i norm = (positive << 13) + ((127 - 15) << 23); 55 Sk4i norm = (positive << 13) + ((127 - 15) << 23);
57 56
58 // For denorm half floats, mask in the exponent-only float K that turns our 57 // For denorm half floats, mask in the exponent-only float K that turns our
59 // denorm value V*2^-14 into a normalized float K + V*2^-14. Then subtract off K. 58 // denorm value V*2^-14 into a normalized float K + V*2^-14. Then subtract off K.
60 const Sk4i K = ((127-15) + (23-10) + 1) << 23; 59 const Sk4i K = ((127-15) + (23-10) + 1) << 23;
61 Sk4i mask_K = positive | K; 60 Sk4i mask_K = positive | K;
62 Sk4f denorm = Sk4f::Load(&mask_K) - Sk4f::Load(&K); 61 Sk4f denorm = Sk4f::Load(&mask_K) - Sk4f::Load(&K);
63 62
64 Sk4i merged = (sign << 16) | is_denorm.thenElse(Sk4i::Load(&denorm), norm); 63 Sk4i merged = (sign << 16) | is_denorm.thenElse(Sk4i::Load(&denorm), norm);
65 return Sk4f::Load(&merged); 64 return Sk4f::Load(&merged);
66 #endif 65 #endif
67 } 66 }
68 67
68 static inline Sk4f SkHalfToFloat_finite(uint64_t hs) {
69 return SkHalfToFloat_finite(Sk4h::Load(&hs));
70 }
71
69 static inline Sk4h SkFloatToHalf_finite(const Sk4f& fs) { 72 static inline Sk4h SkFloatToHalf_finite(const Sk4f& fs) {
70 #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64) 73 #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64)
71 float32x4_t vec = fs.fVec; 74 float32x4_t vec = fs.fVec;
72 asm ("fcvtn %[vec].4h, %[vec].4s \n" // vcvt_f16_f32(vec) 75 asm ("fcvtn %[vec].4h, %[vec].4s \n" // vcvt_f16_f32(vec)
73 : [vec] "+w" (vec)); // +w: read-write NEON register 76 : [vec] "+w" (vec)); // +w: read-write NEON register
74 return vreinterpret_u16_f32(vget_low_f32(vec)); 77 return vreinterpret_u16_f32(vget_low_f32(vec));
75 #else 78 #else
76 Sk4i bits = Sk4i::Load(&fs), 79 Sk4i bits = Sk4i::Load(&fs),
77 sign = bits & 0x80000000, // Save the sign bit f or later... 80 sign = bits & 0x80000000, // Save the sign bit f or later...
78 positive = bits ^ sign, // ...but strip it off for now. 81 positive = bits ^ sign, // ...but strip it off for now.
79 will_be_denorm = positive < ((127-15+1) << 23); // positve < smallest normal half? 82 will_be_denorm = positive < ((127-15+1) << 23); // positve < smallest normal half?
80 83
81 // For normal half floats, adjust the exponent from 127 bias to 15 bias, 84 // For normal half floats, adjust the exponent from 127 bias to 15 bias,
82 // then drop the bottom 13 mantissa bits. 85 // then drop the bottom 13 mantissa bits.
83 Sk4i norm = (positive - ((127 - 15) << 23)) >> 13; 86 Sk4i norm = (positive - ((127 - 15) << 23)) >> 13;
84 87
85 // This mechanically inverts the denorm half -> normal float conversion abov e. 88 // This mechanically inverts the denorm half -> normal float conversion abov e.
86 // Knowning that and reading its explanation will leave you feeling more con fident 89 // Knowning that and reading its explanation will leave you feeling more con fident
87 // than reading my best attempt at explaining this directly. 90 // than reading my best attempt at explaining this directly.
88 const Sk4i K = ((127-15) + (23-10) + 1) << 23; 91 const Sk4i K = ((127-15) + (23-10) + 1) << 23;
89 Sk4f plus_K = Sk4f::Load(&positive) + Sk4f::Load(&K); 92 Sk4f plus_K = Sk4f::Load(&positive) + Sk4f::Load(&K);
90 Sk4i denorm = Sk4i::Load(&plus_K) ^ K; 93 Sk4i denorm = Sk4i::Load(&plus_K) ^ K;
91 94
92 Sk4i merged = (sign >> 16) | will_be_denorm.thenElse(denorm, norm); 95 Sk4i merged = (sign >> 16) | will_be_denorm.thenElse(denorm, norm);
93 return SkNx_cast<uint16_t>(merged); 96 return SkNx_cast<uint16_t>(merged);
94 #endif 97 #endif
95 } 98 }
96 99
97 #endif 100 #endif
OLDNEW
« no previous file with comments | « no previous file | src/core/SkNx.h » ('j') | src/opts/SkNx_sse.h » ('J')

Powered by Google App Engine
This is Rietveld 408576698