src/core/SkHalf.h - Issue 2256023002: Flush denorm half floats to zero.

Side by Side Diff: src/core/SkHalf.h

Issue 2256023002: Flush denorm half floats to zero. (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: names Created 4 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright 2014 Google Inc.	2 * Copyright 2014 Google Inc.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license that can be	4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.	5 * found in the LICENSE file.

6 */	6 */

7	7

8 #ifndef SkHalf_DEFINED	8 #ifndef SkHalf_DEFINED

9 #define SkHalf_DEFINED	9 #define SkHalf_DEFINED

10	10

11 #include "SkNx.h"	11 #include "SkNx.h"

12 #include "SkTypes.h"	12 #include "SkTypes.h"

13	13

14 // 16-bit floating point value	14 // 16-bit floating point value

15 // format is 1 bit sign, 5 bits exponent, 10 bits mantissa	15 // format is 1 bit sign, 5 bits exponent, 10 bits mantissa

16 // only used for storage	16 // only used for storage

17 typedef uint16_t SkHalf;	17 typedef uint16_t SkHalf;

18	18

19 static constexpr uint16_t SK_HalfMin = 0x0400; // 2^-24 (minimum positive n ormal value)	19 static constexpr uint16_t SK_HalfMin = 0x0400; // 2^-24 (minimum positive n ormal value)

20 static constexpr uint16_t SK_HalfMax = 0x7bff; // 65504	20 static constexpr uint16_t SK_HalfMax = 0x7bff; // 65504

21 static constexpr uint16_t SK_HalfEpsilon = 0x1400; // 2^-10	21 static constexpr uint16_t SK_HalfEpsilon = 0x1400; // 2^-10

22 static constexpr uint16_t SK_Half1 = 0x3C00; // 1	22 static constexpr uint16_t SK_Half1 = 0x3C00; // 1

23	23

24 // convert between half and single precision floating point	24 // convert between half and single precision floating point

25 float SkHalfToFloat(SkHalf h);	25 float SkHalfToFloat(SkHalf h);

26 SkHalf SkFloatToHalf(float f);	26 SkHalf SkFloatToHalf(float f);

27	27

28 // Convert between half and single precision floating point,	28 // Convert between half and single precision floating point,

29 // assuming inputs and outputs are both finite.	29 // assuming inputs and outputs are both ordinary, that is, zero or normal.

30 static inline Sk4f SkHalfToFloat_finite(uint64_t);	30 static inline Sk4f SkHalfToFloat_ordinary(uint64_t);

31 static inline Sk4h SkFloatToHalf_finite(const Sk4f&);	31 static inline Sk4h SkFloatToHalf_ordinary(const Sk4f&);

32	32

33 // ~~~~~~~~~~~ impl ~~~~~~~~~~~~~~ //	33 // ~~~~~~~~~~~ impl ~~~~~~~~~~~~~~ //

34	34

35 // Like the serial versions in SkHalf.cpp, these are based on	35 // Like the serial versions in SkHalf.cpp, these are based on

36 // https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/	36 // https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/

37	37

38 // GCC 4.9 lacks the intrinsics to use ARMv8 f16<->f32 instructions, so we use i nline assembly.	38 // GCC 4.9 lacks the intrinsics to use ARMv8 f16<->f32 instructions, so we use i nline assembly.

39	39

40 static inline Sk4f SkHalfToFloat_finite(const Sk4h& hs) {	40 static inline Sk4f SkHalfToFloat_ordinary(const Sk4h& hs) {

41 #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64)	41 #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64)

42 float32x4_t fs;	42 float32x4_t fs;

43 asm ("fcvtl %[fs].4s, %[hs].4h \n" // vcvt_f32_f16(...)	43 asm ("fcvtl %[fs].4s, %[hs].4h \n" // vcvt_f32_f16(...)

44 : [fs] "=w" (fs) // =w: write-only NEON register	44 : [fs] "=w" (fs) // =w: write-only NEON register

45 : [hs] "w" (hs.fVec)); // w: read-only NEON register	45 : [hs] "w" (hs.fVec)); // w: read-only NEON register

46 return fs;	46 return fs;

47 #else	47 #else

48 Sk4i bits = SkNx_cast<int>(hs), // Expand to 32 bit.	48 Sk4i bits = SkNx_cast<int>(hs), // Expand to 32 bit.

49 sign = bits & 0x00008000, // Save the sign bit for later...	49 sign = bits & 0x00008000, // Save the sign bit for later...

50 positive = bits ^ sign, // ...but strip it off for now.	50 positive = bits ^ sign, // ...but strip it off for now.

51 is_denorm = positive < (1<<10); // Exponent == 0?	51 is_norm = 0x03ff < positive; // Exponent > 0?

52	52

53 // For normal half floats, extend the mantissa by 13 zero bits,	53 // For normal half floats, extend the mantissa by 13 zero bits,

54 // then adjust the exponent from 15 bias to 127 bias.	54 // then adjust the exponent from 15 bias to 127 bias.

55 Sk4i norm = (positive << 13) + ((127 - 15) << 23);	55 Sk4i norm = (positive << 13) + ((127 - 15) << 23);

56	56

57 // For denorm half floats, mask in the exponent-only float K that turns our	57 Sk4i merged = (sign << 16) \| (norm & is_norm);

58 // denorm value V2^-14 into a normalized float K + V2^-14. Then subtract off K.

59 const Sk4i K = ((127-15) + (23-10) + 1) << 23;

60 Sk4i mask_K = positive \| K;

61 Sk4f denorm = Sk4f::Load(&mask_K) - Sk4f::Load(&K);

62

63 Sk4i merged = (sign << 16) \| is_denorm.thenElse(Sk4i::Load(&denorm), norm);

64 return Sk4f::Load(&merged);	58 return Sk4f::Load(&merged);

65 #endif	59 #endif

66 }	60 }

67	61

68 static inline Sk4f SkHalfToFloat_finite(uint64_t hs) {	62 static inline Sk4f SkHalfToFloat_ordinary(uint64_t hs) {

69 return SkHalfToFloat_finite(Sk4h::Load(&hs));	63 return SkHalfToFloat_ordinary(Sk4h::Load(&hs));

70 }	64 }

71	65

72 static inline Sk4h SkFloatToHalf_finite(const Sk4f& fs) {	66 static inline Sk4h SkFloatToHalf_ordinary(const Sk4f& fs) {

73 #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64)	67 #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64)

74 float32x4_t vec = fs.fVec;	68 float32x4_t vec = fs.fVec;

75 asm ("fcvtn %[vec].4h, %[vec].4s \n" // vcvt_f16_f32(vec)	69 asm ("fcvtn %[vec].4h, %[vec].4s \n" // vcvt_f16_f32(vec)

76 : [vec] "+w" (vec)); // +w: read-write NEON register	70 : [vec] "+w" (vec)); // +w: read-write NEON register

77 return vreinterpret_u16_f32(vget_low_f32(vec));	71 return vreinterpret_u16_f32(vget_low_f32(vec));

78 #else	72 #else

79 Sk4i bits = Sk4i::Load(&fs),	73 Sk4i bits = Sk4i::Load(&fs),

80 sign = bits & 0x80000000, // Save the sign bit f or later...	74 sign = bits & 0x80000000, // Save the sign bit for later.. .

81 positive = bits ^ sign, // ...but strip it off for now.	75 positive = bits ^ sign, // ...but strip it off for now.

82 will_be_denorm = positive < ((127-15+1) << 23); // positve < smallest normal half?	76 will_be_norm = 0x387fc000 < positive; // greater than largest denorm h alf?

83	77

84 // For normal half floats, adjust the exponent from 127 bias to 15 bias,	78 // For normal half floats, adjust the exponent from 127 bias to 15 bias,

85 // then drop the bottom 13 mantissa bits.	79 // then drop the bottom 13 mantissa bits.

86 Sk4i norm = (positive - ((127 - 15) << 23)) >> 13;	80 Sk4i norm = (positive - ((127 - 15) << 23)) >> 13;

87	81

88 // This mechanically inverts the denorm half -> normal float conversion abov e.	82 Sk4i merged = (sign >> 16) \| (will_be_norm & norm);
	msarett 2016/08/18 13:19:55 Woohoo! This file gets a lot simpler! Woohoo! This file gets a lot simpler!
89 // Knowning that and reading its explanation will leave you feeling more con fident

90 // than reading my best attempt at explaining this directly.

91 const Sk4i K = ((127-15) + (23-10) + 1) << 23;

92 Sk4f plus_K = Sk4f::Load(&positive) + Sk4f::Load(&K);

93 Sk4i denorm = Sk4i::Load(&plus_K) ^ K;

94

95 Sk4i merged = (sign >> 16) \| will_be_denorm.thenElse(denorm, norm);

96 return SkNx_cast<uint16_t>(merged);	83 return SkNx_cast<uint16_t>(merged);

97 #endif	84 #endif

98 }	85 }

99	86

100 #endif	87 #endif

OLD	NEW

« no previous file with comments | « src/core/SkColorSpaceXform.cpp ('k') | src/core/SkLinearBitmapPipeline_sample.h » ('j') | tests/Float16Test.cpp » ('J')