src/core/Sk4px.h - Issue 1242973004: Revert of 565 support for SIMD xfermodes

Side by Side Diff: src/core/Sk4px.h

Issue 1242973004: Revert of 565 support for SIMD xfermodes (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: Created 5 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright 2015 Google Inc.	2 * Copyright 2015 Google Inc.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license that can be	4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.	5 * found in the LICENSE file.

6 */	6 */

7	7

8 #ifndef Sk4px_DEFINED	8 #ifndef Sk4px_DEFINED

9 #define Sk4px_DEFINED	9 #define Sk4px_DEFINED

10	10

11 #include "SkNx.h"	11 #include "SkNx.h"

12 #include "SkColor.h"	12 #include "SkColor.h"

13 #include "SkColorPriv.h"

14	13

15 // This file may be included multiple times by .cpp files with different flags, leading	14 // This file may be included multiple times by .cpp files with different flags, leading

16 // to different definitions. Usually that doesn't matter because it's all inlin ed, but	15 // to different definitions. Usually that doesn't matter because it's all inlin ed, but

17 // in Debug modes the compilers may not inline everything. So wrap everything i n an	16 // in Debug modes the compilers may not inline everything. So wrap everything i n an

18 // anonymous namespace to give each includer their own silo of this code (or the linker	17 // anonymous namespace to give each includer their own silo of this code (or the linker

19 // will probably pick one randomly for us, which is rarely correct).	18 // will probably pick one randomly for us, which is rarely correct).

20 namespace {	19 namespace {

21	20

22 // 1, 2 or 4 SkPMColors, generally vectorized.	21 // 1, 2 or 4 SkPMColors, generally vectorized.

23 class Sk4px : public Sk16b {	22 class Sk4px : public Sk16b {

(...skipping 17 matching lines...) Expand all Loading...
41 static Sk4px Load1(const SkPMColor[1]); // PMColor[1] -> ARGB ???? ???? ??? ?	40 static Sk4px Load1(const SkPMColor[1]); // PMColor[1] -> ARGB ???? ???? ??? ?

42	41

43 // Ditto for Alphas... Load2Alphas fills the low two lanes of Sk4px.	42 // Ditto for Alphas... Load2Alphas fills the low two lanes of Sk4px.

44 static Sk4px Load4Alphas(const SkAlpha[4]); // AaXx -> AAAA aaaa XXXX xxxx	43 static Sk4px Load4Alphas(const SkAlpha[4]); // AaXx -> AAAA aaaa XXXX xxxx

45 static Sk4px Load2Alphas(const SkAlpha[2]); // Aa -> AAAA aaaa ???? ????	44 static Sk4px Load2Alphas(const SkAlpha[2]); // Aa -> AAAA aaaa ???? ????

46	45

47 void store4(SkPMColor[4]) const;	46 void store4(SkPMColor[4]) const;

48 void store2(SkPMColor[2]) const;	47 void store2(SkPMColor[2]) const;

49 void store1(SkPMColor[1]) const;	48 void store1(SkPMColor[1]) const;

50	49

51 // Same as above for 565.

52 static Sk4px Load4(const SkPMColor16 src[4]);

53 static Sk4px Load2(const SkPMColor16 src[2]);

54 static Sk4px Load1(const SkPMColor16 src[1]);

55 void store4(SkPMColor16 dst[4]) const;

56 void store2(SkPMColor16 dst[2]) const;

57 void store1(SkPMColor16 dst[1]) const;

58

59 // 1, 2, or 4 SkPMColors with 16-bit components.	50 // 1, 2, or 4 SkPMColors with 16-bit components.

60 // This is most useful as the result of a multiply, e.g. from mulWiden().	51 // This is most useful as the result of a multiply, e.g. from mulWiden().

61 class Wide : public Sk16h {	52 class Wide : public Sk16h {

62 public:	53 public:

63 Wide(const Sk16h& v) : Sk16h(v) {}	54 Wide(const Sk16h& v) : Sk16h(v) {}

64	55

65 // Pack the top byte of each component back down into 4 SkPMColors.	56 // Pack the top byte of each component back down into 4 SkPMColors.

66 Sk4px addNarrowHi(const Sk16h&) const;	57 Sk4px addNarrowHi(const Sk16h&) const;

67	58

68 // Rounds, i.e. (x+127) / 255.	59 // Rounds, i.e. (x+127) / 255.

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
101	92

102 // Generally faster than (this o).div255().	93 // Generally faster than (this o).div255().

103 // May be incorrect by +-1, but is always exactly correct when *this or o is 0 or 255.	94 // May be incorrect by +-1, but is always exactly correct when *this or o is 0 or 255.

104 Sk4px approxMulDiv255(const Sk16b& o) const {	95 Sk4px approxMulDiv255(const Sk16b& o) const {

105 // (xy + x) / 256 meets these criteria. (As of course does (xy + y) / 256 by symmetry.)	96 // (xy + x) / 256 meets these criteria. (As of course does (xy + y) / 256 by symmetry.)

106 return this->widenLo().addNarrowHi(this o);	97 return this->widenLo().addNarrowHi(this o);

107 }	98 }

108	99

109 // A generic driver that maps fn over a src array into a dst array.	100 // A generic driver that maps fn over a src array into a dst array.

110 // fn should take an Sk4px (4 src pixels) and return an Sk4px (4 dst pixels) .	101 // fn should take an Sk4px (4 src pixels) and return an Sk4px (4 dst pixels) .

111 template <typename Fn, typename Dst>	102 template <typename Fn>

112 static void MapSrc(int n, Dst* dst, const SkPMColor* src, const Fn& fn) {	103 static void MapSrc(int n, SkPMColor* dst, const SkPMColor* src, const Fn& fn ) {

113 // This looks a bit odd, but it helps loop-invariant hoisting across dif ferent calls to fn.	104 // This looks a bit odd, but it helps loop-invariant hoisting across dif ferent calls to fn.

114 // Basically, we need to make sure we keep things inside a single loop.	105 // Basically, we need to make sure we keep things inside a single loop.

115 while (n > 0) {	106 while (n > 0) {

116 if (n >= 8) {	107 if (n >= 8) {

117 Sk4px dst0 = fn(Load4(src+0)),	108 Sk4px dst0 = fn(Load4(src+0)),

118 dst4 = fn(Load4(src+4));	109 dst4 = fn(Load4(src+4));

119 dst0.store4(dst+0);	110 dst0.store4(dst+0);

120 dst4.store4(dst+4);	111 dst4.store4(dst+4);

121 dst += 8; src += 8; n -= 8;	112 dst += 8; src += 8; n -= 8;

122 continue; // Keep our stride at 8 pixels as long as possible.	113 continue; // Keep our stride at 8 pixels as long as possible.

123 }	114 }

124 SkASSERT(n <= 7);	115 SkASSERT(n <= 7);

125 if (n >= 4) {	116 if (n >= 4) {

126 fn(Load4(src)).store4(dst);	117 fn(Load4(src)).store4(dst);

127 dst += 4; src += 4; n -= 4;	118 dst += 4; src += 4; n -= 4;

128 }	119 }

129 if (n >= 2) {	120 if (n >= 2) {

130 fn(Load2(src)).store2(dst);	121 fn(Load2(src)).store2(dst);

131 dst += 2; src += 2; n -= 2;	122 dst += 2; src += 2; n -= 2;

132 }	123 }

133 if (n >= 1) {	124 if (n >= 1) {

134 fn(Load1(src)).store1(dst);	125 fn(Load1(src)).store1(dst);

135 }	126 }

136 break;	127 break;

137 }	128 }

138 }	129 }

139	130

140 // As above, but with dst4' = fn(dst4, src4).	131 // As above, but with dst4' = fn(dst4, src4).

141 template <typename Fn, typename Dst>	132 template <typename Fn>

142 static void MapDstSrc(int n, Dst* dst, const SkPMColor* src, const Fn& fn) {	133 static void MapDstSrc(int n, SkPMColor* dst, const SkPMColor* src, const Fn& fn) {

143 while (n > 0) {	134 while (n > 0) {

144 if (n >= 8) {	135 if (n >= 8) {

145 Sk4px dst0 = fn(Load4(dst+0), Load4(src+0)),	136 Sk4px dst0 = fn(Load4(dst+0), Load4(src+0)),

146 dst4 = fn(Load4(dst+4), Load4(src+4));	137 dst4 = fn(Load4(dst+4), Load4(src+4));

147 dst0.store4(dst+0);	138 dst0.store4(dst+0);

148 dst4.store4(dst+4);	139 dst4.store4(dst+4);

149 dst += 8; src += 8; n -= 8;	140 dst += 8; src += 8; n -= 8;

150 continue; // Keep our stride at 8 pixels as long as possible.	141 continue; // Keep our stride at 8 pixels as long as possible.

151 }	142 }

152 SkASSERT(n <= 7);	143 SkASSERT(n <= 7);

153 if (n >= 4) {	144 if (n >= 4) {

154 fn(Load4(dst), Load4(src)).store4(dst);	145 fn(Load4(dst), Load4(src)).store4(dst);

155 dst += 4; src += 4; n -= 4;	146 dst += 4; src += 4; n -= 4;

156 }	147 }

157 if (n >= 2) {	148 if (n >= 2) {

158 fn(Load2(dst), Load2(src)).store2(dst);	149 fn(Load2(dst), Load2(src)).store2(dst);

159 dst += 2; src += 2; n -= 2;	150 dst += 2; src += 2; n -= 2;

160 }	151 }

161 if (n >= 1) {	152 if (n >= 1) {

162 fn(Load1(dst), Load1(src)).store1(dst);	153 fn(Load1(dst), Load1(src)).store1(dst);

163 }	154 }

164 break;	155 break;

165 }	156 }

166 }	157 }

167	158

168 // As above, but with dst4' = fn(dst4, src4, alpha4).	159 // As above, but with dst4' = fn(dst4, src4, alpha4).

169 template <typename Fn, typename Dst>	160 template <typename Fn>

170 static void MapDstSrcAlpha(int n, Dst* dst, const SkPMColor* src, const SkAl pha* a,	161 static void MapDstSrcAlpha(int n, SkPMColor* dst, const SkPMColor* src, cons t SkAlpha* a,

171 const Fn& fn) {	162 const Fn& fn) {

172 while (n > 0) {	163 while (n > 0) {

173 if (n >= 8) {	164 if (n >= 8) {

174 Sk4px dst0 = fn(Load4(dst+0), Load4(src+0), Load4Alphas(a+0)),	165 Sk4px dst0 = fn(Load4(dst+0), Load4(src+0), Load4Alphas(a+0)),

175 dst4 = fn(Load4(dst+4), Load4(src+4), Load4Alphas(a+4));	166 dst4 = fn(Load4(dst+4), Load4(src+4), Load4Alphas(a+4));

176 dst0.store4(dst+0);	167 dst0.store4(dst+0);

177 dst4.store4(dst+4);	168 dst4.store4(dst+4);

178 dst += 8; src += 8; a += 8; n -= 8;	169 dst += 8; src += 8; a += 8; n -= 8;

179 continue; // Keep our stride at 8 pixels as long as possible.	170 continue; // Keep our stride at 8 pixels as long as possible.

180 }	171 }

(...skipping 10 matching lines...) Expand all Loading...
191 fn(Load1(dst), Load1(src), DupAlpha(*a)).store1(dst);	182 fn(Load1(dst), Load1(src), DupAlpha(*a)).store1(dst);

192 }	183 }

193 break;	184 break;

194 }	185 }

195 }	186 }

196	187

197 private:	188 private:

198 typedef Sk16b INHERITED;	189 typedef Sk16b INHERITED;

199 };	190 };

200	191

201 // TODO: specialize these per-backend

202

203 inline Sk4px Sk4px::Load4(const SkPMColor16 src[4]) {

204 SkPMColor src32[4];

205 for (int i = 0; i < 4; i++) { src32[i] = SkPixel16ToPixel32(src[i]); }

206 return Load4(src32);

207 }

208 inline Sk4px Sk4px::Load2(const SkPMColor16 src[2]) {

209 SkPMColor src32[2];

210 for (int i = 0; i < 2; i++) { src32[i] = SkPixel16ToPixel32(src[i]); }

211 return Load2(src32);

212 }

213 inline Sk4px Sk4px::Load1(const SkPMColor16 src[1]) {

214 SkPMColor src32 = SkPixel16ToPixel32(src[0]);

215 return Load1(&src32);

216 }

217

218 inline void Sk4px::store4(SkPMColor16 dst[4]) const {

219 SkPMColor dst32[4];

220 this->store4(dst32);

221 for (int i = 0; i < 4; i++) { dst[i] = SkPixel32ToPixel16(dst32[i]); }

222 }

223 inline void Sk4px::store2(SkPMColor16 dst[2]) const {

224 SkPMColor dst32[2];

225 this->store2(dst32);

226 for (int i = 0; i < 2; i++) { dst[i] = SkPixel32ToPixel16(dst32[i]); }

227 }

228 inline void Sk4px::store1(SkPMColor16 dst[1]) const {

229 SkPMColor dst32;

230 this->store1(&dst32);

231 dst[0] = SkPixel32ToPixel16(dst32);

232 }

233

234 } // namespace	192 } // namespace

235	193

236 #ifdef SKNX_NO_SIMD	194 #ifdef SKNX_NO_SIMD

237 #include "../opts/Sk4px_none.h"	195 #include "../opts/Sk4px_none.h"

238 #else	196 #else

239 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2	197 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2

240 #include "../opts/Sk4px_SSE2.h"	198 #include "../opts/Sk4px_SSE2.h"

241 #elif defined(SK_ARM_HAS_NEON)	199 #elif defined(SK_ARM_HAS_NEON)

242 #include "../opts/Sk4px_NEON.h"	200 #include "../opts/Sk4px_NEON.h"

243 #else	201 #else

244 #include "../opts/Sk4px_none.h"	202 #include "../opts/Sk4px_none.h"

245 #endif	203 #endif

246 #endif	204 #endif

247	205

248 #endif//Sk4px_DEFINED	206 #endif//Sk4px_DEFINED

OLD	NEW

« no previous file with comments | « no previous file | src/core/Sk4pxXfermode.h » ('j') | no next file with comments »