src/core/SkHalf.h - Issue 2159993003: Improve naive SkColorXform to half floats

Side by Side Diff: src/core/SkHalf.h

Issue 2159993003: Improve naive SkColorXform to half floats (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: Created 4 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright 2014 Google Inc.	2 * Copyright 2014 Google Inc.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license that can be	4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.	5 * found in the LICENSE file.

6 */	6 */

7	7

8 #ifndef SkHalf_DEFINED	8 #ifndef SkHalf_DEFINED

9 #define SkHalf_DEFINED	9 #define SkHalf_DEFINED

10	10

(...skipping 47 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
58 // denorm value V2^-14 into a normalized float K + V2^-14. Then subtract off K.	58 // denorm value V2^-14 into a normalized float K + V2^-14. Then subtract off K.

59 const Sk4i K = ((127-15) + (23-10) + 1) << 23;	59 const Sk4i K = ((127-15) + (23-10) + 1) << 23;

60 Sk4i mask_K = positive \| K;	60 Sk4i mask_K = positive \| K;

61 Sk4f denorm = Sk4f::Load(&mask_K) - Sk4f::Load(&K);	61 Sk4f denorm = Sk4f::Load(&mask_K) - Sk4f::Load(&K);

62	62

63 Sk4i merged = (sign << 16) \| is_denorm.thenElse(Sk4i::Load(&denorm), norm);	63 Sk4i merged = (sign << 16) \| is_denorm.thenElse(Sk4i::Load(&denorm), norm);

64 return Sk4f::Load(&merged);	64 return Sk4f::Load(&merged);

65 #endif	65 #endif

66 }	66 }

67	67

68 static inline uint64_t SkFloatToHalf_finite(const Sk4f& fs) {	68 static inline Sk4h SkFloatToVectorHalf_finite(const Sk4f& fs) {
	msarett 2016/07/18 22:09:09 I'm seeing significant benefit from having a versi I'm seeing significant benefit from having a version of this that keeps the result in the vector registers. mtklein 2016/07/19 12:45:45 Cool cool cool. Less assembly's always better. Show quoted text On 2016/07/18 22:09:09, msarett wrote: > I'm seeing significant benefit from having a version of this that keeps the > result in the vector registers. Cool cool cool. Less assembly's always better. Must be the old code is always literally producing the fmov, where this new version can see it as a logical operation? msarett 2016/07/19 15:24:49 That's potentially true. I only looked at the Int Show quoted text On 2016/07/19 12:45:45, mtklein wrote: > On 2016/07/18 22:09:09, msarett wrote: > > I'm seeing significant benefit from having a version of this that keeps the > > result in the vector registers. > > Cool cool cool. Less assembly's always better. Must be the old code is always > literally producing the fmov, where this new version can see it as a logical > operation? That's potentially true. I only looked at the Intel clang disassembly. I was having issues with: 4h -> store to uint64_t -> load to 4h -> unpack This code was moving out of the vector registers, then reloading one lane at a time (instead of the unpack). It was ugly.
69 uint64_t r;

70 #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64)	69 #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64)

71 float32x4_t vec = fs.fVec;	70 float32x4_t vec = fs.fVec;

72 asm ("fcvtn %[vec].4h, %[vec].4s \n" // vcvt_f16_f32(vec)	71 asm ("fcvtn %[vec].4h, %[vec].4s \n" // vcvt_f16_f32(vec)

73 "fmov %[r], %d[vec] \n" // vst1_f16(&r, ...)	72 : [vec] "+w" (vec)); // +w: read-write NEON register

74 : [r] "=r" (r) // =r: write-only 64-bit general reg ister	73 return vreinterpret_u16_f32(vget_low_f32(vec));

75 , [vec] "+w" (vec)); // +w: read-write NEON register

76 #else	74 #else

77 Sk4i bits = Sk4i::Load(&fs),	75 Sk4i bits = Sk4i::Load(&fs),

78 sign = bits & 0x80000000, // Save the sign bit f or later...	76 sign = bits & 0x80000000, // Save the sign bit f or later...

79 positive = bits ^ sign, // ...but strip it off for now.	77 positive = bits ^ sign, // ...but strip it off for now.

80 will_be_denorm = positive < ((127-15+1) << 23); // positve < smallest normal half?	78 will_be_denorm = positive < ((127-15+1) << 23); // positve < smallest normal half?

81	79

82 // For normal half floats, adjust the exponent from 127 bias to 15 bias,	80 // For normal half floats, adjust the exponent from 127 bias to 15 bias,

83 // then drop the bottom 13 mantissa bits.	81 // then drop the bottom 13 mantissa bits.

84 Sk4i norm = (positive - ((127 - 15) << 23)) >> 13;	82 Sk4i norm = (positive - ((127 - 15) << 23)) >> 13;

85	83

86 // This mechanically inverts the denorm half -> normal float conversion abov e.	84 // This mechanically inverts the denorm half -> normal float conversion abov e.

87 // Knowning that and reading its explanation will leave you feeling more con fident	85 // Knowning that and reading its explanation will leave you feeling more con fident

88 // than reading my best attempt at explaining this directly.	86 // than reading my best attempt at explaining this directly.

89 const Sk4i K = ((127-15) + (23-10) + 1) << 23;	87 const Sk4i K = ((127-15) + (23-10) + 1) << 23;

90 Sk4f plus_K = Sk4f::Load(&positive) + Sk4f::Load(&K);	88 Sk4f plus_K = Sk4f::Load(&positive) + Sk4f::Load(&K);

91 Sk4i denorm = Sk4i::Load(&plus_K) ^ K;	89 Sk4i denorm = Sk4i::Load(&plus_K) ^ K;

92	90

93 Sk4i merged = (sign >> 16) \| will_be_denorm.thenElse(denorm, norm);	91 Sk4i merged = (sign >> 16) \| will_be_denorm.thenElse(denorm, norm);

94 SkNx_cast<uint16_t>(merged).store(&r);	92 return SkNx_cast<uint16_t>(merged);

95 #endif	93 #endif

	94 }

	95

	96 static inline uint64_t SkFloatToHalf_finite(const Sk4f& fs) {

	97 uint64_t r;

	98 Sk4h v = SkFloatToVectorHalf_finite(fs);

	99 v.store(&r);

96 return r;	100 return r;

97 }	101 }

98	102

99 #endif	103 #endif

OLD	NEW

« no previous file with comments | « bench/ColorCodecBench.cpp ('k') | src/opts/SkColorXform_opts.h » ('j') | src/opts/SkColorXform_opts.h » ('J')