src/opts/SkBlend_opts.h - Issue 2130183003: Remove bloat from SkBlend_opts.

Side by Side Diff: src/opts/SkBlend_opts.h

Issue 2130183003: Remove bloat from SkBlend_opts. (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: Created 4 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /*	1 /*

2 * Copyright 2016 Google Inc.	2 * Copyright 2016 Google Inc.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license that can be	4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.	5 * found in the LICENSE file.

6 */	6 */

7	7

8 /*	8 /*

9 ninja -C out/Release dm nanobench ; and ./out/Release/dm --match Blend_opts ; an d ./out/Release/nanobench --samples 300 --nompd --match LinearSrcOver -q	9 ninja -C out/Release dm nanobench ; and ./out/Release/dm --match Blend_opts ; an d ./out/Release/nanobench --samples 300 --nompd --match LinearSrcOver -q

10 */	10 */

11	11

12 #ifndef SkBlend_opts_DEFINED	12 #ifndef SkBlend_opts_DEFINED

13 #define SkBlend_opts_DEFINED	13 #define SkBlend_opts_DEFINED

14	14

15 #include "SkNx.h"	15 #include "SkNx.h"

16 #include "SkPM4fPriv.h"	16 #include "SkPM4fPriv.h"

17	17

18 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2	18 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2

19 #include <immintrin.h>	19 #include <immintrin.h>

20 #endif	20 #endif

21	21

22 namespace SK_OPTS_NS {	22 namespace SK_OPTS_NS {

23	23

24 // An implementation of SrcOver from bytes to bytes in linear space that takes a dvantage of the	24 static inline void srcover_srgb8888_srgb_1(uint32_t* dst, const uint32_t pixel) {

25 // observation that the 255's cancel.

26 // invA = 1 - (As / 255);

27 //

28 // R = 255 * sqrt((Rs/255)^2 + (Rd/255)^2 * invA)

29 // => R = 255 * sqrt((Rs^2 + Rd^2 * invA)/255^2)

30 // => R = sqrt(Rs^2 + Rd^2 * invA)

31 static inline void blend_srgb_srgb_1(uint32_t* dst, const uint32_t pixel) {

32 Sk4f s = srgb_to_linear(to_4f(pixel));

33 Sk4f d = srgb_to_linear(to_4f(*dst));

34 Sk4f invAlpha = 1.0f - Sk4f{s[SkPM4f::A]} * (1.0f / 255.0f);

35 Sk4f r = linear_to_srgb(s + d * invAlpha) + 0.5f;

36 *dst = to_4b(r);

37 }

38

39 static inline void srcover_srgb_srgb_1(uint32_t* dst, const uint32_t pixel) {

40 if ((~pixel & 0xFF000000) == 0) {	25 if ((~pixel & 0xFF000000) == 0) {

41 *dst = pixel;	26 *dst = pixel;

42 } else if ((pixel & 0xFF000000) != 0) {	27 } else if ((pixel & 0xFF000000) != 0) {

43 blend_srgb_srgb_1(dst, pixel);	28 srcover_blend_srgb8888_srgb_1(dst, srgb_to_linear(to_4f(pixel)));

44 }	29 }

45 }	30 }

46	31

47 static inline void srcover_srgb_srgb_2(uint32_t* dst, const uint32_t* src) {

48 srcover_srgb_srgb_1(dst++, *src++);

49 srcover_srgb_srgb_1(dst, *src);

50 }

51

52 static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) {	32 static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) {

53 srcover_srgb_srgb_1(dst++, *src++);	33 srcover_srgb8888_srgb_1(dst++, *src++);

54 srcover_srgb_srgb_1(dst++, *src++);	34 srcover_srgb8888_srgb_1(dst++, *src++);

55 srcover_srgb_srgb_1(dst++, *src++);	35 srcover_srgb8888_srgb_1(dst++, *src++);

56 srcover_srgb_srgb_1(dst, *src);	36 srcover_srgb8888_srgb_1(dst, *src);

57 }

58

59 void best_non_simd_srcover_srgb_srgb(

60 uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {

61 uint64_t* ddst = reinterpret_cast<uint64_t*>(dst);

62

63 while (ndst >0) {

64 int count = SkTMin(ndst, nsrc);

65 ndst -= count;

66 const uint64_t* dsrc = reinterpret_cast<const uint64_t*>(src);

67 const uint64_t* end = dsrc + (count >> 1);

68 do {

69 if ((~*dsrc & 0xFF000000FF000000) == 0) {

70 do {

71 ddst++ = dsrc++;

72 } while (dsrc < end && (~*dsrc & 0xFF000000FF000000) == 0);

73 } else if ((*dsrc & 0xFF000000FF000000) == 0) {

74 do {

75 dsrc++;

76 ddst++;

77 } while (dsrc < end && (*dsrc & 0xFF000000FF000000) == 0);

78 } else {

79 srcover_srgb_srgb_2(reinterpret_cast<uint32_t*>(ddst++),

80 reinterpret_cast<const uint32_t*>(dsrc++));

81 }

82 } while (dsrc < end);

83

84 if ((count & 1) != 0) {

85 srcover_srgb_srgb_1(reinterpret_cast<uint32_t*>(ddst),

86 reinterpret_cast<const uint32_t>(dsrc));

87 }

88 }

89 }

90

91 void brute_force_srcover_srgb_srgb(

92 uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {

93 while (ndst > 0) {

94 int n = SkTMin(ndst, nsrc);

95

96 for (int i = 0; i < n; i++) {

97 blend_srgb_srgb_1(dst++, src[i]);

98 }

99 ndst -= n;

100 }

101 }

102

103 void trivial_srcover_srgb_srgb(

104 uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {

105 while (ndst > 0) {

106 int n = SkTMin(ndst, nsrc);

107

108 for (int i = 0; i < n; i++) {

109 srcover_srgb_srgb_1(dst++, src[i]);

110 }

111 ndst -= n;

112 }

113 }	37 }

114	38

115 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2	39 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2

116	40

117 static inline __m128i load(const uint32_t* p) {	41 static inline __m128i load(const uint32_t* p) {

118 return _mm_loadu_si128(reinterpret_cast<const __m128i*>(p));	42 return _mm_loadu_si128(reinterpret_cast<const __m128i*>(p));

119 }	43 }

120	44

121 static inline void store(uint32_t* p, __m128i v) {	45 static inline void store(uint32_t* p, __m128i v) {

122 _mm_storeu_si128(reinterpret_cast<__m128i*>(p), v);	46 _mm_storeu_si128(reinterpret_cast<__m128i*>(p), v);

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
156 srcover_srgb_srgb_4(dst, dst + delta);	80 srcover_srgb_srgb_4(dst, dst + delta);

157 dst += 4;	81 dst += 4;

158 } while (dst < end	82 } while (dst < end

159 && _mm_testnzc_si128(pixels = load(dst + delta) , alphaMask));	83 && _mm_testnzc_si128(pixels = load(dst + delta) , alphaMask));

160 src += dst - start;	84 src += dst - start;

161 }	85 }

162 }	86 }

163	87

164 count = count & 3;	88 count = count & 3;

165 while (count-- > 0) {	89 while (count-- > 0) {

166 srcover_srgb_srgb_1(dst++, *src++);	90 srcover_srgb8888_srgb_1(dst++, *src++);

167 }	91 }

168 }	92 }

169 }	93 }

170 #else	94 #else

171 // SSE2 versions	95 // SSE2 versions

172	96

173 // Note: In the next three comparisons a group of 4 pixels is converted to a group of	97 // Note: In the next three comparisons a group of 4 pixels is converted to a group of

174 // "signed" pixels because the sse2 does not have an unsigned comparison .	98 // "signed" pixels because the sse2 does not have an unsigned comparison .

175 // Make it so that we can use the signed comparison operators by biasing	99 // Make it so that we can use the signed comparison operators by biasing

176 // 0x00xxxxxx to 0x80xxxxxxx which is the smallest values and biasing 0x ffxxxxxx to	100 // 0x00xxxxxx to 0x80xxxxxxx which is the smallest values and biasing 0x ffxxxxxx to

(...skipping 51 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
228 do {	152 do {

229 srcover_srgb_srgb_4(dst, dst + delta);	153 srcover_srgb_srgb_4(dst, dst + delta);

230 dst += 4;	154 dst += 4;

231 } while (dst < end && check_partial_alphas(pixels = load (dst + delta)));	155 } while (dst < end && check_partial_alphas(pixels = load (dst + delta)));

232 src += dst - start;	156 src += dst - start;

233 }	157 }

234 } while (dst < end);	158 } while (dst < end);

235	159

236 count = count & 3;	160 count = count & 3;

237 while (count-- > 0) {	161 while (count-- > 0) {

238 srcover_srgb_srgb_1(dst++, *src++);	162 srcover_srgb8888_srgb_1(dst++, *src++);

239 }	163 }

240 }	164 }

241 }	165 }

242 #endif	166 #endif

243 #else	167 #else

244	168

245 void srcover_srgb_srgb(	169 void srcover_srgb_srgb(

246 uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {	170 uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {

247 trivial_srcover_srgb_srgb(dst, src, ndst, nsrc);	171 while (ndst > 0) {

	172 int n = SkTMin(ndst, nsrc);

	173

	174 for (int i = 0; i < n; i++) {

	175 srcover_srgb8888_srgb_1(dst++, src[i]);

	176 }

	177 ndst -= n;

	178 }

248 }	179 }

249	180

250 #endif	181 #endif

251	182

252 } // namespace SK_OPTS_NS	183 } // namespace SK_OPTS_NS

253	184

254 #endif//SkBlend_opts_DEFINED	185 #endif//SkBlend_opts_DEFINED

OLD	NEW

« no previous file with comments | « src/core/SkPM4fPriv.h ('k') | tests/SkBlend_optsTest.cpp » ('j') | no next file with comments »