src/core/SkHalf.h - Issue 2256023002: Flush denorm half floats to zero.

Side by Side Diff: src/core/SkHalf.h

Issue 2256023002: Flush denorm half floats to zero. (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: fix threshold, clean up tests Created 4 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright 2014 Google Inc.	2 * Copyright 2014 Google Inc.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license that can be	4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.	5 * found in the LICENSE file.

6 */	6 */

7	7

8 #ifndef SkHalf_DEFINED	8 #ifndef SkHalf_DEFINED

9 #define SkHalf_DEFINED	9 #define SkHalf_DEFINED

10	10

11 #include "SkNx.h"	11 #include "SkNx.h"

12 #include "SkTypes.h"	12 #include "SkTypes.h"

13	13

14 // 16-bit floating point value	14 // 16-bit floating point value

15 // format is 1 bit sign, 5 bits exponent, 10 bits mantissa	15 // format is 1 bit sign, 5 bits exponent, 10 bits mantissa

16 // only used for storage	16 // only used for storage

17 typedef uint16_t SkHalf;	17 typedef uint16_t SkHalf;

18	18

19 static constexpr uint16_t SK_HalfMin = 0x0400; // 2^-24 (minimum positive n ormal value)	19 static constexpr uint16_t SK_HalfMin = 0x0400; // 2^-24 (minimum positive n ormal value)

20 static constexpr uint16_t SK_HalfMax = 0x7bff; // 65504	20 static constexpr uint16_t SK_HalfMax = 0x7bff; // 65504

21 static constexpr uint16_t SK_HalfEpsilon = 0x1400; // 2^-10	21 static constexpr uint16_t SK_HalfEpsilon = 0x1400; // 2^-10

22 static constexpr uint16_t SK_Half1 = 0x3C00; // 1	22 static constexpr uint16_t SK_Half1 = 0x3C00; // 1

23	23

24 // convert between half and single precision floating point	24 // convert between half and single precision floating point

25 float SkHalfToFloat(SkHalf h);	25 float SkHalfToFloat(SkHalf h);

26 SkHalf SkFloatToHalf(float f);	26 SkHalf SkFloatToHalf(float f);

27	27

28 // Convert between half and single precision floating point,	28 // Convert between half and single precision floating point,

29 // assuming inputs and outputs are both finite.	29 // assuming inputs and outputs are both finite, and

30 static inline Sk4f SkHalfToFloat_finite(uint64_t);	30 // flushing values which would be denormal half floats to zero.

31 static inline Sk4h SkFloatToHalf_finite(const Sk4f&);	31 static inline Sk4f SkHalfToFloat_finite_ftz(uint64_t);

	32 static inline Sk4h SkFloatToHalf_finite_ftz(const Sk4f&);

32	33

33 // ~~~~~~~~~~~ impl ~~~~~~~~~~~~~~ //	34 // ~~~~~~~~~~~ impl ~~~~~~~~~~~~~~ //

34	35

35 // Like the serial versions in SkHalf.cpp, these are based on	36 // Like the serial versions in SkHalf.cpp, these are based on

36 // https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/	37 // https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/

37	38

38 // GCC 4.9 lacks the intrinsics to use ARMv8 f16<->f32 instructions, so we use i nline assembly.	39 // GCC 4.9 lacks the intrinsics to use ARMv8 f16<->f32 instructions, so we use i nline assembly.

39	40

40 static inline Sk4f SkHalfToFloat_finite(const Sk4h& hs) {	41 static inline Sk4f SkHalfToFloat_finite_ftz(const Sk4h& hs) {

41 #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64)	42 #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64)

42 float32x4_t fs;	43 float32x4_t fs;

43 asm ("fcvtl %[fs].4s, %[hs].4h \n" // vcvt_f32_f16(...)	44 asm ("fcvtl %[fs].4s, %[hs].4h \n" // vcvt_f32_f16(...)

44 : [fs] "=w" (fs) // =w: write-only NEON register	45 : [fs] "=w" (fs) // =w: write-only NEON register

45 : [hs] "w" (hs.fVec)); // w: read-only NEON register	46 : [hs] "w" (hs.fVec)); // w: read-only NEON register

46 return fs;	47 return fs;

47 #else	48 #else

48 Sk4i bits = SkNx_cast<int>(hs), // Expand to 32 bit.	49 Sk4i bits = SkNx_cast<int>(hs), // Expand to 32 bit.

49 sign = bits & 0x00008000, // Save the sign bit for later...	50 sign = bits & 0x00008000, // Save the sign bit for later...

50 positive = bits ^ sign, // ...but strip it off for now.	51 positive = bits ^ sign, // ...but strip it off for now.

51 is_denorm = positive < (1<<10); // Exponent == 0?	52 is_norm = 0x03ff < positive; // Exponent > 0?

52	53

53 // For normal half floats, extend the mantissa by 13 zero bits,	54 // For normal half floats, extend the mantissa by 13 zero bits,

54 // then adjust the exponent from 15 bias to 127 bias.	55 // then adjust the exponent from 15 bias to 127 bias.

55 Sk4i norm = (positive << 13) + ((127 - 15) << 23);	56 Sk4i norm = (positive << 13) + ((127 - 15) << 23);

56	57

57 // For denorm half floats, mask in the exponent-only float K that turns our	58 Sk4i merged = (sign << 16) \| (norm & is_norm);

58 // denorm value V2^-14 into a normalized float K + V2^-14. Then subtract off K.

59 const Sk4i K = ((127-15) + (23-10) + 1) << 23;

60 Sk4i mask_K = positive \| K;

61 Sk4f denorm = Sk4f::Load(&mask_K) - Sk4f::Load(&K);

62

63 Sk4i merged = (sign << 16) \| is_denorm.thenElse(Sk4i::Load(&denorm), norm);

64 return Sk4f::Load(&merged);	59 return Sk4f::Load(&merged);

65 #endif	60 #endif

66 }	61 }

67	62

68 static inline Sk4f SkHalfToFloat_finite(uint64_t hs) {	63 static inline Sk4f SkHalfToFloat_finite_ftz(uint64_t hs) {

69 return SkHalfToFloat_finite(Sk4h::Load(&hs));	64 return SkHalfToFloat_finite_ftz(Sk4h::Load(&hs));

70 }	65 }

71	66

72 static inline Sk4h SkFloatToHalf_finite(const Sk4f& fs) {	67 static inline Sk4h SkFloatToHalf_finite_ftz(const Sk4f& fs) {

73 #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64)	68 #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64)

74 float32x4_t vec = fs.fVec;	69 float32x4_t vec = fs.fVec;

75 asm ("fcvtn %[vec].4h, %[vec].4s \n" // vcvt_f16_f32(vec)	70 asm ("fcvtn %[vec].4h, %[vec].4s \n" // vcvt_f16_f32(vec)

76 : [vec] "+w" (vec)); // +w: read-write NEON register	71 : [vec] "+w" (vec)); // +w: read-write NEON register

77 return vreinterpret_u16_f32(vget_low_f32(vec));	72 return vreinterpret_u16_f32(vget_low_f32(vec));

78 #else	73 #else

79 Sk4i bits = Sk4i::Load(&fs),	74 Sk4i bits = Sk4i::Load(&fs),

80 sign = bits & 0x80000000, // Save the sign bit f or later...	75 sign = bits & 0x80000000, // Save the sign bit for later.. .

81 positive = bits ^ sign, // ...but strip it off for now.	76 positive = bits ^ sign, // ...but strip it off for now.

82 will_be_denorm = positive < ((127-15+1) << 23); // positve < smallest normal half?	77 will_be_norm = 0x387fdfff < positive; // greater than largest denorm h alf?

83	78

84 // For normal half floats, adjust the exponent from 127 bias to 15 bias,	79 // For normal half floats, adjust the exponent from 127 bias to 15 bias,

85 // then drop the bottom 13 mantissa bits.	80 // then drop the bottom 13 mantissa bits.

86 Sk4i norm = (positive - ((127 - 15) << 23)) >> 13;	81 Sk4i norm = (positive - ((127 - 15) << 23)) >> 13;

87	82

88 // This mechanically inverts the denorm half -> normal float conversion abov e.	83 Sk4i merged = (sign >> 16) \| (will_be_norm & norm);

89 // Knowning that and reading its explanation will leave you feeling more con fident

90 // than reading my best attempt at explaining this directly.

91 const Sk4i K = ((127-15) + (23-10) + 1) << 23;

92 Sk4f plus_K = Sk4f::Load(&positive) + Sk4f::Load(&K);

93 Sk4i denorm = Sk4i::Load(&plus_K) ^ K;

94

95 Sk4i merged = (sign >> 16) \| will_be_denorm.thenElse(denorm, norm);

96 return SkNx_cast<uint16_t>(merged);	84 return SkNx_cast<uint16_t>(merged);

97 #endif	85 #endif

98 }	86 }

99	87

100 #endif	88 #endif

OLD	NEW

« no previous file with comments | « src/core/SkColorSpaceXform.cpp ('k') | src/core/SkLinearBitmapPipeline_sample.h » ('j') | no next file with comments »