src/opts/SkBlend_opts.h - Issue 1986763002: Revert of Add specialized sRGB blitter for SkOpts

Side by Side Diff: src/opts/SkBlend_opts.h

Issue 1986763002: Revert of Add specialized sRGB blitter for SkOpts (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright 2016 Google Inc.	2 * Copyright 2016 Google Inc.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license that can be	4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.	5 * found in the LICENSE file.

6 */	6 */

7	7

8 /*

9 ninja -C out/Release dm nanobench ; and ./out/Release/dm --match Blend_opts ; an d ./out/Release/nanobench --samples 300 --nompd --match LinearSrcOver -q

10 */

11

12 #ifndef SkBlend_opts_DEFINED	8 #ifndef SkBlend_opts_DEFINED

13 #define SkBlend_opts_DEFINED	9 #define SkBlend_opts_DEFINED

14	10

15 #include "SkNx.h"

16 #include "SkPM4fPriv.h"

17

18 namespace SK_OPTS_NS {	11 namespace SK_OPTS_NS {

19	12

20 // An implementation of SrcOver from bytes to bytes in linear space that takes a dvantage of the	13 #if 0

21 // observation that the 255's cancel.

22 // invA = 1 - (As / 255);

23 //

24 // R = 255 * sqrt((Rs/255)^2 + (Rd/255)^2 * invA)

25 // => R = 255 * sqrt((Rs^2 + Rd^2 * invA)/255^2)

26 // => R = sqrt(Rs^2 + Rd^2 * invA)

27 static inline void blend_srgb_srgb_1(uint32_t* dst, const uint32_t pixel) {

28 Sk4f s = srgb_to_linear(to_4f(pixel));

29 Sk4f d = srgb_to_linear(to_4f(*dst));

30 Sk4f invAlpha = 1.0f - Sk4f{s[SkPM4f::A]} * (1.0f / 255.0f);

31 Sk4f r = linear_to_srgb(s + d * invAlpha) + 0.5f;

32 *dst = to_4b(r);

33 }

34	14

35 static inline void srcover_srgb_srgb_1(uint32_t* dst, const uint32_t pixel) {	15 #else

36 if ((~pixel & 0xFF000000) == 0) {	16

37 *dst = pixel;	17 static inline void srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) {

38 } else if ((pixel & 0xFF000000) != 0) {	18 switch (src >> 24) {

39 blend_srgb_srgb_1(dst, pixel);	19 case 0x00: return;

	20 case 0xff: *dst = src; return;

	21 }

	22

	23 Sk4f d = SkNx_cast<float>(Sk4b::Load( dst)),

	24 s = SkNx_cast<float>(Sk4b::Load(&src));

	25

	26 // Approximate sRGB gamma as 2.0.

	27 Sk4f d_sq = d*d,

	28 s_sq = s*s;

	29 d = Sk4f{d_sq[0], d_sq[1], d_sq[2], d[3]};

	30 s = Sk4f{s_sq[0], s_sq[1], s_sq[2], s[3]};

	31

	32 // SrcOver.

	33 Sk4f invA = 1.0f - s[3]*(1/255.0f);

	34 d = s + d * invA;

	35

	36 // Re-apply approximate sRGB gamma.

	37 Sk4f d_sqrt = d.sqrt();

	38 d = Sk4f{d_sqrt[0], d_sqrt[1], d_sqrt[2], d[3]};

	39

	40 SkNx_cast<uint8_t>(d).store(dst);

40 }	41 }

41 }

42	42

43 static inline void srcover_srgb_srgb_2(uint32_t* dst, const uint32_t* src) {	43 static inline void srcover_srgb_srgb(uint32_t* dst, const uint32_t* const sr c, int ndst, const int nsrc) {

44 srcover_srgb_srgb_1(dst++, *src++);	44 while (ndst > 0) {

45 srcover_srgb_srgb_1(dst, *src);	45 int n = SkTMin(ndst, nsrc);

46 }

47	46

48 static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) {	47 for (int i = 0; i < n; i++) {

49 srcover_srgb_srgb_1(dst++, *src++);	48 srcover_srgb_srgb_1(dst++, src[i]);

50 srcover_srgb_srgb_1(dst++, *src++);

51 srcover_srgb_srgb_1(dst++, *src++);

52 srcover_srgb_srgb_1(dst, *src);

53 }

54

55 void best_non_simd_srcover_srgb_srgb(

56 uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {

57 uint64_t* ddst = reinterpret_cast<uint64_t*>(dst);

58

59 while (ndst >0) {

60 int count = SkTMin(ndst, nsrc);

61 ndst -= count;

62 const uint64_t* dsrc = reinterpret_cast<const uint64_t*>(src);

63 const uint64_t* end = dsrc + (count >> 1);

64 do {

65 if ((~*dsrc & 0xFF000000FF000000) == 0) {

66 do {

67 ddst++ = dsrc++;

68 } while (dsrc < end && (~*dsrc & 0xFF000000FF000000) == 0);

69 } else if ((*dsrc & 0xFF000000FF000000) == 0) {

70 do {

71 dsrc++;

72 ddst++;

73 } while (dsrc < end && (*dsrc & 0xFF000000FF000000) == 0);

74 } else {

75 srcover_srgb_srgb_2(reinterpret_cast<uint32_t*>(ddst++),

76 reinterpret_cast<const uint32_t*>(dsrc++));

77 }	49 }

78 } while (dsrc < end);	50 ndst -= n;

79

80 if ((count & 1) != 0) {

81 srcover_srgb_srgb_1(reinterpret_cast<uint32_t*>(ddst),

82 reinterpret_cast<const uint32_t>(dsrc));

83 }	51 }

84 }	52 }

85 }	53

86

87 void brute_force_srcover_srgb_srgb(

88 uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {

89 while (ndst > 0) {

90 int n = SkTMin(ndst, nsrc);

91

92 for (int i = 0; i < n; i++) {

93 blend_srgb_srgb_1(dst++, src[i]);

94 }

95 ndst -= n;

96 }

97 }

98

99 void trivial_srcover_srgb_srgb(

100 uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {

101 while (ndst > 0) {

102 int n = SkTMin(ndst, nsrc);

103

104 for (int i = 0; i < n; i++) {

105 srcover_srgb_srgb_1(dst++, src[i]);

106 }

107 ndst -= n;

108 }

109 }

110

111 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2

112

113 static inline __m128i load(const uint32_t* p) {

114 return _mm_loadu_si128(reinterpret_cast<const __m128i*>(p));

115 }

116

117 static inline void store(uint32_t* p, __m128i v) {

118 _mm_storeu_si128(reinterpret_cast<__m128i*>(p), v);

119 }

120

121 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41

122

123 void srcover_srgb_srgb(

124 uint32_t* dst, const uint32_t* const srcStart, int ndst, const int n src) {

125 const __m128i alphaMask = _mm_set1_epi32(0xFF000000);

126 while (ndst > 0) {

127 int count = SkTMin(ndst, nsrc);

128 ndst -= count;

129 const uint32_t* src = srcStart;

130 const uint32_t* end = src + (count & ~3);

131

132 while (src < end) {

133 __m128i pixels = load(src);

134 if (_mm_testc_si128(pixels, alphaMask)) {

135 do {

136 store(dst, pixels);

137 dst += 4;

138 src += 4;

139 } while (src < end && _mm_testc_si128(pixels = load(src) , alphaMask));

140 } else if (_mm_testz_si128(pixels, alphaMask)) {

141 do {

142 dst += 4;

143 src += 4;

144 } while (src < end && _mm_testz_si128(pixels = load(src) , alphaMask));

145 } else {

146 do {

147 srcover_srgb_srgb_4(dst, src);

148 dst += 4;

149 src += 4;

150 } while (src < end && _mm_testnzc_si128(pixels = load(sr c), alphaMask));

151 }

152 }

153

154 count = count & 3;

155 while (count-- > 0) {

156 srcover_srgb_srgb_1(dst++, *src++);

157 }

158 }

159 }

160 #else

161 // SSE2 versions

162 static inline bool check_opaque_alphas(__m128i pixels) {

163 int mask =

164 _mm_movemask_epi8(

165 _mm_cmpeq_epi32(

166 _mm_andnot_si128(pixels, _mm_set1_epi32(0xFF000000)),

167 _mm_setzero_si128()));

168 return mask == 0xFFFF;

169 }

170

171 static inline bool check_transparent_alphas(__m128i pixels) {

172 int mask =

173 _mm_movemask_epi8(

174 _mm_cmpeq_epi32(

175 _mm_and_si128(pixels, _mm_set1_epi32(0xFF000000)),

176 _mm_setzero_si128()));

177 return mask == 0xFFFF;

178 }

179

180 static inline bool check_partial_alphas(__m128i pixels) {

181 __m128i alphas = _mm_and_si128(pixels, _mm_set1_epi32(0xFF000000));

182 int mask =

183 _mm_movemask_epi8(

184 _mm_cmpeq_epi8(

185 _mm_srai_epi32(alphas, 8),

186 alphas));

187 return mask == 0xFFFF;

188 }

189

190 void srcover_srgb_srgb(

191 uint32_t* dst, const uint32_t* const srcStart, int ndst, const int n src) {

192 while (ndst > 0) {

193 int count = SkTMin(ndst, nsrc);

194 ndst -= count;

195 const uint32_t* src = srcStart;

196 const uint32_t* end = src + (count & ~3);

197

198 __m128i pixels = load(src);

199 do {

200 if (check_opaque_alphas(pixels)) {

201 do {

202 store(dst, pixels);

203 dst += 4;

204 src += 4;

205 } while (src < end && check_opaque_alphas(pixels = load( src)));

206 } else if (check_transparent_alphas(pixels)) {

207 const uint32_t* start = src;

208 do {

209 src += 4;

210 } while (src < end && check_transparent_alphas(pixels = load(src)));

211 dst += src - start;

212 } else {

213 do {

214 srcover_srgb_srgb_4(dst, src);

215 dst += 4;

216 src += 4;

217 } while (src < end && check_partial_alphas(pixels = load (src)));

218 }

219 } while (src < end);

220

221 count = count & 3;

222 while (count-- > 0) {

223 srcover_srgb_srgb_1(dst++, *src++);

224 }

225 }

226 }

227 #endif

228 #else

229

230 void srcover_srgb_srgb(

231 uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {

232 trivial_srcover_srgb_srgb(dst, src, ndst, nsrc);

233 }

234

235 #endif	54 #endif

236	55

237 } // namespace SK_OPTS_NS	56 } // namespace SK_OPTS_NS

238	57

239 #endif//SkBlend_opts_DEFINED	58 #endif//SkBlend_opts_DEFINED

OLD	NEW

« no previous file with comments | « bench/SkBlend_optsBench.cpp ('k') | src/opts/SkOpts_sse41.cpp » ('j') | no next file with comments »