OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright 2015 Google Inc. | |
3 * | |
4 * Use of this source code is governed by a BSD-style license that can be | |
5 * found in the LICENSE file. | |
6 */ | |
7 | |
8 #ifndef Sk4pxXfermode_DEFINED | |
9 #define Sk4pxXfermode_DEFINED | |
10 | |
11 #include "Sk4px.h" | |
12 #include "SkPMFloat.h" | |
13 #include "SkXfermode_proccoeff.h" | |
14 | |
15 // This file is possibly included into multiple .cpp files. | |
16 // Each gets its own independent instantiation by wrapping in an anonymous names
pace. | |
17 namespace { | |
18 | |
19 #if defined(SK_CPU_ARM32) && !defined(SK_ARM_HAS_NEON) | |
20 // Signals SkXfermode.cpp to look for runtime-detected NEON. | |
21 static SkProcCoeffXfermode* SkCreate4pxXfermode(const ProcCoeff& rec, SkXfer
mode::Mode mode) { | |
22 return nullptr; | |
23 } | |
24 #else | |
25 | |
26 // Most xfermodes can be done most efficiently 4 pixels at a time in 8 or 16-bit
fixed point. | |
27 #define XFERMODE(Name) static Sk4px SK_VECTORCALL Name(Sk4px s, Sk4px d) | |
28 | |
29 XFERMODE(Clear) { return Sk4px::DupPMColor(0); } | |
30 XFERMODE(Src) { return s; } | |
31 XFERMODE(Dst) { return d; } | |
32 XFERMODE(SrcIn) { return s.approxMulDiv255(d.alphas() ); } | |
33 XFERMODE(SrcOut) { return s.approxMulDiv255(d.alphas().inv()); } | |
34 XFERMODE(SrcOver) { return s + d.approxMulDiv255(s.alphas().inv()); } | |
35 XFERMODE(DstIn) { return SrcIn (d,s); } | |
36 XFERMODE(DstOut) { return SrcOut (d,s); } | |
37 XFERMODE(DstOver) { return SrcOver(d,s); } | |
38 | |
39 // [ S * Da + (1 - Sa) * D] | |
40 XFERMODE(SrcATop) { return (s * d.alphas() + d * s.alphas().inv()).div255(); } | |
41 XFERMODE(DstATop) { return SrcATop(d,s); } | |
42 //[ S * (1 - Da) + (1 - Sa) * D ] | |
43 XFERMODE(Xor) { return (s * d.alphas().inv() + d * s.alphas().inv()).div255(); } | |
44 // [S + D ] | |
45 XFERMODE(Plus) { return s.saturatedAdd(d); } | |
46 // [S * D ] | |
47 XFERMODE(Modulate) { return s.approxMulDiv255(d); } | |
48 // [S + D - S * D] | |
49 XFERMODE(Screen) { | |
50 // Doing the math as S + (1-S)*D or S + (D - S*D) means the add and subtract
can be done | |
51 // in 8-bit space without overflow. S + (1-S)*D is a touch faster because i
nv() is cheap. | |
52 return s + d.approxMulDiv255(s.inv()); | |
53 } | |
54 XFERMODE(Multiply) { return (s * d.alphas().inv() + d * s.alphas().inv() + s*d).
div255(); } | |
55 // [ Sa + Da - Sa*Da, Sc + Dc - 2*min(Sc*Da, Dc*Sa) ] (And notice Sa*Da == min(
Sa*Da, Da*Sa).) | |
56 XFERMODE(Difference) { | |
57 auto m = Sk4px::Wide::Min(s * d.alphas(), d * s.alphas()).div255(); | |
58 // There's no chance of underflow, and if we subtract m before adding s+d, n
o overflow. | |
59 return (s - m) + (d - m.zeroAlphas()); | |
60 } | |
61 // [ Sa + Da - Sa*Da, Sc + Dc - 2*Sc*Dc ] | |
62 XFERMODE(Exclusion) { | |
63 auto p = s.approxMulDiv255(d); | |
64 // There's no chance of underflow, and if we subtract p before adding src+ds
t, no overflow. | |
65 return (s - p) + (d - p.zeroAlphas()); | |
66 } | |
67 | |
68 // We take care to use exact math for these next few modes where alphas | |
69 // and colors are calculated using significantly different math. We need | |
70 // to preserve premul invariants, and exact math makes this easier. | |
71 // | |
72 // TODO: Some of these implementations might be able to be sped up a bit | |
73 // while maintaining exact math, but let's follow up with that. | |
74 | |
75 XFERMODE(HardLight) { | |
76 auto sa = s.alphas(), | |
77 da = d.alphas(); | |
78 | |
79 auto srcover = s + (d * sa.inv()).div255(); | |
80 | |
81 auto isLite = ((sa-s) < s).widenLoHi(); | |
82 | |
83 auto lite = sa*da - ((da-d)*(sa-s) << 1), | |
84 dark = s*d << 1, | |
85 both = s*da.inv() + d*sa.inv(); | |
86 | |
87 auto alphas = srcover; | |
88 auto colors = (both + isLite.thenElse(lite, dark)).div255(); | |
89 return alphas.zeroColors() + colors.zeroAlphas(); | |
90 } | |
91 XFERMODE(Overlay) { return HardLight(d,s); } | |
92 | |
93 XFERMODE(Darken) { | |
94 auto sa = s.alphas(), | |
95 da = d.alphas(); | |
96 | |
97 auto sda = (s*da).div255(), | |
98 dsa = (d*sa).div255(); | |
99 | |
100 auto srcover = s + (d * sa.inv()).div255(), | |
101 dstover = d + (s * da.inv()).div255(); | |
102 auto alphas = srcover, | |
103 colors = (sda < dsa).thenElse(srcover, dstover); | |
104 return alphas.zeroColors() + colors.zeroAlphas(); | |
105 } | |
106 XFERMODE(Lighten) { | |
107 auto sa = s.alphas(), | |
108 da = d.alphas(); | |
109 | |
110 auto sda = (s*da).div255(), | |
111 dsa = (d*sa).div255(); | |
112 | |
113 auto srcover = s + (d * sa.inv()).div255(), | |
114 dstover = d + (s * da.inv()).div255(); | |
115 auto alphas = srcover, | |
116 colors = (dsa < sda).thenElse(srcover, dstover); | |
117 return alphas.zeroColors() + colors.zeroAlphas(); | |
118 } | |
119 #undef XFERMODE | |
120 | |
121 // Some xfermodes use math like divide or sqrt that's best done in floats 1 pixe
l at a time. | |
122 #define XFERMODE(Name) static SkPMFloat SK_VECTORCALL Name(SkPMFloat s, SkPMFloa
t d) | |
123 | |
124 XFERMODE(ColorDodge) { | |
125 auto sa = s.alphas(), | |
126 da = d.alphas(), | |
127 isa = Sk4f(1)-sa, | |
128 ida = Sk4f(1)-da; | |
129 | |
130 auto srcover = s + d*isa, | |
131 dstover = d + s*ida, | |
132 otherwise = sa * Sk4f::Min(da, (d*sa)*(sa-s).approxInvert()) + s*ida +
d*isa; | |
133 | |
134 // Order matters here, preferring d==0 over s==sa. | |
135 auto colors = (d == Sk4f(0)).thenElse(dstover, | |
136 (s == sa).thenElse(srcover, | |
137 otherwise)); | |
138 return srcover * SkPMFloat(1,0,0,0) + colors * SkPMFloat(0,1,1,1); | |
139 } | |
140 XFERMODE(ColorBurn) { | |
141 auto sa = s.alphas(), | |
142 da = d.alphas(), | |
143 isa = Sk4f(1)-sa, | |
144 ida = Sk4f(1)-da; | |
145 | |
146 auto srcover = s + d*isa, | |
147 dstover = d + s*ida, | |
148 otherwise = sa*(da-Sk4f::Min(da, (da-d)*sa*s.approxInvert())) + s*ida +
d*isa; | |
149 | |
150 // Order matters here, preferring d==da over s==0. | |
151 auto colors = (d == da).thenElse(dstover, | |
152 (s == Sk4f(0)).thenElse(srcover, | |
153 otherwise)); | |
154 return srcover * SkPMFloat(1,0,0,0) + colors * SkPMFloat(0,1,1,1); | |
155 } | |
156 XFERMODE(SoftLight) { | |
157 auto sa = s.alphas(), | |
158 da = d.alphas(), | |
159 isa = Sk4f(1)-sa, | |
160 ida = Sk4f(1)-da; | |
161 | |
162 // Some common terms. | |
163 auto m = (da > Sk4f(0)).thenElse(d / da, Sk4f(0)), | |
164 s2 = Sk4f(2)*s, | |
165 m4 = Sk4f(4)*m; | |
166 | |
167 // The logic forks three ways: | |
168 // 1. dark src? | |
169 // 2. light src, dark dst? | |
170 // 3. light src, light dst? | |
171 auto darkSrc = d*(sa + (s2 - sa)*(Sk4f(1) - m)), // Used in case 1. | |
172 darkDst = (m4*m4 + m4)*(m - Sk4f(1)) + Sk4f(7)*m, // Used in case 2. | |
173 liteDst = m.sqrt() - m, // Used in case 3. | |
174 liteSrc = d*sa + da*(s2-sa)*(Sk4f(4)*d <= da).thenElse(darkDst, liteDst
); // Case 2 or 3? | |
175 | |
176 auto alpha = s + d*isa; | |
177 auto colors = s*ida + d*isa + (s2 <= sa).thenElse(darkSrc, liteSrc);
// Case 1 or 2/3? | |
178 | |
179 return alpha * SkPMFloat(1,0,0,0) + colors * SkPMFloat(0,1,1,1); | |
180 } | |
181 #undef XFERMODE | |
182 | |
183 // A reasonable fallback mode for doing AA is to simply apply the transfermode f
irst, | |
184 // then linearly interpolate the AA. | |
185 template <Sk4px (SK_VECTORCALL *Mode)(Sk4px, Sk4px)> | |
186 static Sk4px SK_VECTORCALL xfer_aa(Sk4px s, Sk4px d, Sk4px aa) { | |
187 Sk4px bw = Mode(s, d); | |
188 return (bw * aa + d * aa.inv()).div255(); | |
189 } | |
190 | |
191 // For some transfermodes we specialize AA, either for correctness or performanc
e. | |
192 #define XFERMODE_AA(Name) \ | |
193 template <> Sk4px SK_VECTORCALL xfer_aa<Name>(Sk4px s, Sk4px d, Sk4px aa) | |
194 | |
195 // Plus' clamp needs to happen after AA. skia:3852 | |
196 XFERMODE_AA(Plus) { // [ clamp( (1-AA)D + (AA)(S+D) ) == clamp(D + AA*S) ] | |
197 return d.saturatedAdd(s.approxMulDiv255(aa)); | |
198 } | |
199 | |
200 #undef XFERMODE_AA | |
201 | |
202 class Sk4pxXfermode : public SkProcCoeffXfermode { | |
203 public: | |
204 typedef Sk4px (SK_VECTORCALL *Proc4)(Sk4px, Sk4px); | |
205 typedef Sk4px (SK_VECTORCALL *AAProc4)(Sk4px, Sk4px, Sk4px); | |
206 | |
207 Sk4pxXfermode(const ProcCoeff& rec, SkXfermode::Mode mode, Proc4 proc4, AAPr
oc4 aaproc4) | |
208 : INHERITED(rec, mode) | |
209 , fProc4(proc4) | |
210 , fAAProc4(aaproc4) {} | |
211 | |
212 void xfer32(SkPMColor dst[], const SkPMColor src[], int n, const SkAlpha aa[
]) const override { | |
213 if (NULL == aa) { | |
214 Sk4px::MapDstSrc(n, dst, src, [&](const Sk4px& dst4, const Sk4px& sr
c4) { | |
215 return fProc4(src4, dst4); | |
216 }); | |
217 } else { | |
218 Sk4px::MapDstSrcAlpha(n, dst, src, aa, | |
219 [&](const Sk4px& dst4, const Sk4px& src4, const Sk4px& alpha
) { | |
220 return fAAProc4(src4, dst4, alpha); | |
221 }); | |
222 } | |
223 } | |
224 | |
225 void xfer16(uint16_t dst[], const SkPMColor src[], int n, const SkAlpha aa[]
) const override { | |
226 if (NULL == aa) { | |
227 Sk4px::MapDstSrc(n, dst, src, [&](const Sk4px& dst4, const Sk4px& sr
c4) { | |
228 return fProc4(src4, dst4); | |
229 }); | |
230 } else { | |
231 Sk4px::MapDstSrcAlpha(n, dst, src, aa, | |
232 [&](const Sk4px& dst4, const Sk4px& src4, const Sk4px& alpha
) { | |
233 return fAAProc4(src4, dst4, alpha); | |
234 }); | |
235 } | |
236 } | |
237 | |
238 private: | |
239 Proc4 fProc4; | |
240 AAProc4 fAAProc4; | |
241 typedef SkProcCoeffXfermode INHERITED; | |
242 }; | |
243 | |
244 class SkPMFloatXfermode : public SkProcCoeffXfermode { | |
245 public: | |
246 typedef SkPMFloat (SK_VECTORCALL *ProcF)(SkPMFloat, SkPMFloat); | |
247 SkPMFloatXfermode(const ProcCoeff& rec, SkXfermode::Mode mode, ProcF procf) | |
248 : INHERITED(rec, mode) | |
249 , fProcF(procf) {} | |
250 | |
251 void xfer32(SkPMColor dst[], const SkPMColor src[], int n, const SkAlpha aa[
]) const override { | |
252 for (int i = 0; i < n; i++) { | |
253 dst[i] = aa ? this->xfer32(dst[i], src[i], aa[i]) | |
254 : this->xfer32(dst[i], src[i]); | |
255 } | |
256 } | |
257 | |
258 void xfer16(uint16_t dst[], const SkPMColor src[], int n, const SkAlpha aa[]
) const override { | |
259 for (int i = 0; i < n; i++) { | |
260 SkPMColor dst32 = SkPixel16ToPixel32(dst[i]); | |
261 dst32 = aa ? this->xfer32(dst32, src[i], aa[i]) | |
262 : this->xfer32(dst32, src[i]); | |
263 dst[i] = SkPixel32ToPixel16(dst32); | |
264 } | |
265 } | |
266 | |
267 private: | |
268 inline SkPMColor xfer32(SkPMColor dst, SkPMColor src) const { | |
269 return fProcF(SkPMFloat(src), SkPMFloat(dst)).round(); | |
270 } | |
271 | |
272 inline SkPMColor xfer32(SkPMColor dst, SkPMColor src, SkAlpha aa) const { | |
273 SkPMFloat s(src), | |
274 d(dst), | |
275 b(fProcF(s,d)); | |
276 // We do aa in full float precision before going back down to bytes, bec
ause we can! | |
277 SkPMFloat a = Sk4f(aa) * Sk4f(1.0f/255); | |
278 b = b*a + d*(Sk4f(1)-a); | |
279 return b.round(); | |
280 } | |
281 | |
282 ProcF fProcF; | |
283 typedef SkProcCoeffXfermode INHERITED; | |
284 }; | |
285 | |
286 static SkProcCoeffXfermode* SkCreate4pxXfermode(const ProcCoeff& rec, SkXfermode
::Mode mode) { | |
287 switch (mode) { | |
288 #define CASE(Mode) case SkXfermode::k##Mode##_Mode: \ | |
289 return SkNEW_ARGS(Sk4pxXfermode, (rec, mode, &Mode, &xfer_aa<Mode>)) | |
290 CASE(Clear); | |
291 CASE(Src); | |
292 CASE(Dst); | |
293 CASE(SrcOver); | |
294 CASE(DstOver); | |
295 CASE(SrcIn); | |
296 CASE(DstIn); | |
297 CASE(SrcOut); | |
298 CASE(DstOut); | |
299 CASE(SrcATop); | |
300 CASE(DstATop); | |
301 CASE(Xor); | |
302 CASE(Plus); | |
303 CASE(Modulate); | |
304 CASE(Screen); | |
305 CASE(Multiply); | |
306 CASE(Difference); | |
307 CASE(Exclusion); | |
308 CASE(HardLight); | |
309 CASE(Overlay); | |
310 CASE(Darken); | |
311 CASE(Lighten); | |
312 #undef CASE | |
313 | |
314 #define CASE(Mode) case SkXfermode::k##Mode##_Mode: \ | |
315 return SkNEW_ARGS(SkPMFloatXfermode, (rec, mode, &Mode)) | |
316 CASE(ColorDodge); | |
317 CASE(ColorBurn); | |
318 CASE(SoftLight); | |
319 #undef CASE | |
320 | |
321 default: break; | |
322 } | |
323 return nullptr; | |
324 } | |
325 | |
326 #endif | |
327 | |
328 } // namespace | |
329 | |
330 #endif//Sk4pxXfermode_DEFINED | |
OLD | NEW |