Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1210)

Side by Side Diff: src/opts/SkXfermode_opts.h

Issue 1432903002: float xfermodes (burn, dodge, softlight) in Sk8f, possibly using AVX. (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: 1.0f/255 Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« src/opts/SkNx_avx.h ('K') | « src/opts/SkOpts_avx.cpp ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef Sk4pxXfermode_DEFINED 8 #ifndef Sk4pxXfermode_DEFINED
9 #define Sk4pxXfermode_DEFINED 9 #define Sk4pxXfermode_DEFINED
10 10
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after
102 dsa = (d*sa).div255(); 102 dsa = (d*sa).div255();
103 103
104 auto srcover = s + (d * sa.inv()).div255(), 104 auto srcover = s + (d * sa.inv()).div255(),
105 dstover = d + (s * da.inv()).div255(); 105 dstover = d + (s * da.inv()).div255();
106 auto alphas = srcover, 106 auto alphas = srcover,
107 colors = (dsa < sda).thenElse(srcover, dstover); 107 colors = (dsa < sda).thenElse(srcover, dstover);
108 return alphas.zeroColors() + colors.zeroAlphas(); 108 return alphas.zeroColors() + colors.zeroAlphas();
109 } 109 }
110 #undef XFERMODE 110 #undef XFERMODE
111 111
112 // Some xfermodes use math like divide or sqrt that's best done in floats 1 pixe l at a time. 112 // Some xfermodes use math like divide or sqrt that's best done in floats.
113 #define XFERMODE(Name) static Sk4f SK_VECTORCALL Name(Sk4f d, Sk4f s) 113 // We write it generically, then call it 1 or 2 pixels at a time (T == Sk4f or S k8f).
114 #define XFERMODE(Name) struct Name { template <typename T> T operator()(const T& , const T&); }; \
115 template <typename T> T Name::operator()(const T& d, const T& s)
114 116
117 static_assert(SK_A32_SHIFT == 24, "");
115 static inline Sk4f a_rgb(const Sk4f& a, const Sk4f& rgb) { 118 static inline Sk4f a_rgb(const Sk4f& a, const Sk4f& rgb) {
116 static_assert(SK_A32_SHIFT == 24, "");
117 return a * Sk4f(0,0,0,1) + rgb * Sk4f(1,1,1,0); 119 return a * Sk4f(0,0,0,1) + rgb * Sk4f(1,1,1,0);
118 } 120 }
119 static inline Sk4f alphas(const Sk4f& f) { 121 static inline Sk8f a_rgb(const Sk8f& a, const Sk8f& rgb) {
120 return SkNx_dup<SK_A32_SHIFT/8>(f); 122 // TODO: SkNx_blend<0,0,0,1,0,0,0,1>(a, rgb) to let us use _mm256_blend_ps?
123 return a * Sk8f(0,0,0,1,0,0,0,1) + rgb * Sk8f(1,1,1,0,1,1,1,0);
121 } 124 }
125 static inline Sk4f alphas(const Sk4f& f) { return SkNx_shuffle<3,3,3,3> ( f); }
126 static inline Sk8f alphas(const Sk8f& f) { return SkNx_shuffle<3,3,3,3,7,7,7,7>( f); }
msarett 2015/11/09 23:25:06 Where is shuffle defined for AVX? Oh, it looks li
mtklein 2015/11/10 00:22:05 Right, default implementation. This compiles into
msarett 2015/11/10 14:54:16 Great, cool instruction! I'm impressed by the com
mtklein 2015/11/11 19:27:13 Yeah, Clang's pretty impressive. Mostly SkNx_shuf
122 127
123 XFERMODE(ColorDodge) { 128 XFERMODE(ColorDodge) {
124 auto sa = alphas(s), 129 auto sa = alphas(s),
125 da = alphas(d), 130 da = alphas(d),
126 isa = Sk4f(1)-sa, 131 isa = T(1)-sa,
127 ida = Sk4f(1)-da; 132 ida = T(1)-da;
128 133
129 auto srcover = s + d*isa, 134 auto srcover = s + d*isa,
130 dstover = d + s*ida, 135 dstover = d + s*ida,
131 otherwise = sa * Sk4f::Min(da, (d*sa)*(sa-s).approxInvert()) + s*ida + d*isa; 136 otherwise = sa * T::Min(da, (d*sa)*(sa-s).approxInvert()) + s*ida + d*i sa;
132 137
133 // Order matters here, preferring d==0 over s==sa. 138 // Order matters here, preferring d==0 over s==sa.
134 auto colors = (d == Sk4f(0)).thenElse(dstover, 139 auto colors = (d == 0).thenElse(dstover,
135 (s == sa).thenElse(srcover, 140 (s == sa).thenElse(srcover,
136 otherwise)); 141 otherwise));
137 return a_rgb(srcover, colors); 142 return a_rgb(srcover, colors);
138 } 143 }
139 XFERMODE(ColorBurn) { 144 XFERMODE(ColorBurn) {
140 auto sa = alphas(s), 145 auto sa = alphas(s),
141 da = alphas(d), 146 da = alphas(d),
142 isa = Sk4f(1)-sa, 147 isa = T(1)-sa,
143 ida = Sk4f(1)-da; 148 ida = T(1)-da;
144 149
145 auto srcover = s + d*isa, 150 auto srcover = s + d*isa,
146 dstover = d + s*ida, 151 dstover = d + s*ida,
147 otherwise = sa*(da-Sk4f::Min(da, (da-d)*sa*s.approxInvert())) + s*ida + d*isa; 152 otherwise = sa*(da-T::Min(da, (da-d)*sa*s.approxInvert())) + s*ida + d* isa;
148 153
149 // Order matters here, preferring d==da over s==0. 154 // Order matters here, preferring d==da over s==0.
150 auto colors = (d == da).thenElse(dstover, 155 auto colors = (d == da).thenElse(dstover,
151 (s == Sk4f(0)).thenElse(srcover, 156 (s == 0).thenElse(srcover,
152 otherwise)); 157 otherwise));
153 return a_rgb(srcover, colors); 158 return a_rgb(srcover, colors);
154 } 159 }
155 XFERMODE(SoftLight) { 160 XFERMODE(SoftLight) {
156 auto sa = alphas(s), 161 auto sa = alphas(s),
157 da = alphas(d), 162 da = alphas(d),
158 isa = Sk4f(1)-sa, 163 isa = T(1)-sa,
159 ida = Sk4f(1)-da; 164 ida = T(1)-da;
160 165
161 // Some common terms. 166 // Some common terms.
162 auto m = (da > Sk4f(0)).thenElse(d / da, Sk4f(0)), 167 auto m = (da > 0).thenElse(d / da, 0),
163 s2 = Sk4f(2)*s, 168 s2 = s*2,
164 m4 = Sk4f(4)*m; 169 m4 = m*4;
165 170
166 // The logic forks three ways: 171 // The logic forks three ways:
167 // 1. dark src? 172 // 1. dark src?
168 // 2. light src, dark dst? 173 // 2. light src, dark dst?
169 // 3. light src, light dst? 174 // 3. light src, light dst?
170 auto darkSrc = d*(sa + (s2 - sa)*(Sk4f(1) - m)), // Used in case 1. 175 auto darkSrc = d*(sa + (s2 - sa)*(T(1) - m)), // Used in case 1.
171 darkDst = (m4*m4 + m4)*(m - Sk4f(1)) + Sk4f(7)*m, // Used in case 2. 176 darkDst = (m4*m4 + m4)*(m - 1) + m*7, // Used in case 2.
172 liteDst = m.sqrt() - m, // Used in case 3. 177 liteDst = m.sqrt() - m, // Used in case 3.
173 liteSrc = d*sa + da*(s2-sa)*(Sk4f(4)*d <= da).thenElse(darkDst, liteDst ); // Case 2 or 3? 178 liteSrc = d*sa + da*(s2-sa)*(d*4 <= da).thenElse(darkDst, liteDst); // Case 2 or 3?
174 179
175 auto alpha = s + d*isa; 180 auto alpha = s + d*isa;
176 auto colors = s*ida + d*isa + (s2 <= sa).thenElse(darkSrc, liteSrc); // Case 1 or 2/3? 181 auto colors = s*ida + d*isa + (s2 <= sa).thenElse(darkSrc, liteSrc); // Case 1 or 2/3?
177 182
178 return a_rgb(alpha, colors); 183 return a_rgb(alpha, colors);
179 } 184 }
180 #undef XFERMODE 185 #undef XFERMODE
181 186
182 // A reasonable fallback mode for doing AA is to simply apply the transfermode f irst, 187 // A reasonable fallback mode for doing AA is to simply apply the transfermode f irst,
183 // then linearly interpolate the AA. 188 // then linearly interpolate the AA.
184 template <Sk4px (SK_VECTORCALL *Mode)(Sk4px, Sk4px)> 189 template <Sk4px (SK_VECTORCALL *Mode)(Sk4px, Sk4px)>
185 static Sk4px SK_VECTORCALL xfer_aa(Sk4px s, Sk4px d, Sk4px aa) { 190 static Sk4px SK_VECTORCALL xfer_aa(Sk4px s, Sk4px d, Sk4px aa) {
186 Sk4px bw = Mode(s, d); 191 Sk4px bw = Mode(s, d);
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
233 }); 238 });
234 } 239 }
235 } 240 }
236 241
237 private: 242 private:
238 Proc4 fProc4; 243 Proc4 fProc4;
239 AAProc4 fAAProc4; 244 AAProc4 fAAProc4;
240 typedef SkProcCoeffXfermode INHERITED; 245 typedef SkProcCoeffXfermode INHERITED;
241 }; 246 };
242 247
243 class Sk4fXfermode : public SkProcCoeffXfermode { 248 template <typename BlendFn>
249 class FloatXfermode : public SkProcCoeffXfermode {
244 public: 250 public:
245 typedef Sk4f (SK_VECTORCALL *ProcF)(Sk4f, Sk4f); 251 FloatXfermode(const ProcCoeff& rec, SkXfermode::Mode mode)
246 Sk4fXfermode(const ProcCoeff& rec, SkXfermode::Mode mode, ProcF procf) 252 : INHERITED(rec, mode) {}
247 : INHERITED(rec, mode)
248 , fProcF(procf) {}
249 253
250 void xfer32(SkPMColor dst[], const SkPMColor src[], int n, const SkAlpha aa[ ]) const override { 254 void xfer32(SkPMColor dst[], const SkPMColor src[], int n, const SkAlpha aa[ ]) const override {
msarett 2015/11/09 23:25:06 It looks like you've deleted some private helper f
mtklein 2015/11/10 00:22:05 Yep, just moving things around.
msarett 2015/11/10 14:54:16 Acknowledged.
251 for (int i = 0; i < n; i++) { 255 BlendFn blend;
252 dst[i] = aa ? this->xfer32(dst[i], src[i], aa[i]) 256 while (n >= 2) {
253 : this->xfer32(dst[i], src[i]); 257 auto d = Sk8f::FromBytes((const uint8_t*)dst) * (1.0f/255),
msarett 2015/11/09 23:25:06 nit: Any reason not to write 1.0f / 255.0f?
mtklein 2015/11/10 00:22:05 Nope, either way works the same and gets to the sa
258 s = Sk8f::FromBytes((const uint8_t*)src) * (1.0f/255),
259 b = blend(d, s);
260 if (aa) {
261 auto a255 = Sk8f(aa[0],aa[0],aa[0],aa[0], aa[1],aa[1],aa[1],aa[1 ]);
262 (b*a255 + d*(Sk8f(255)-a255) + 0.5).toBytes((uint8_t*)dst);
263 aa += 2;
264 } else {
265 (b * 255 + 0.5).toBytes((uint8_t*)dst);
266 }
267 dst += 2;
268 src += 2;
269 n -= 2;
270 }
271 if (n) {
272 auto d = Sk4f::FromBytes((const uint8_t*)dst) * (1.0f/255),
273 s = Sk4f::FromBytes((const uint8_t*)src) * (1.0f/255),
274 b = blend(d, s);
275 if (aa) {
276 auto a255 = Sk4f(aa[0],aa[0],aa[0],aa[0]);
277 (b*a255 + d*(Sk4f(255)-a255) + 0.5).toBytes((uint8_t*)dst);
278 aa++;
279 } else {
280 (b * 255 + 0.5).toBytes((uint8_t*)dst);
281 }
254 } 282 }
255 } 283 }
256 284
257 void xfer16(uint16_t dst[], const SkPMColor src[], int n, const SkAlpha aa[] ) const override { 285 void xfer16(uint16_t dst[], const SkPMColor src[], int n, const SkAlpha aa[] ) const override {
258 for (int i = 0; i < n; i++) { 286 for (int i = 0; i < n; i++) {
259 SkPMColor dst32 = SkPixel16ToPixel32(dst[i]); 287 SkPMColor dst32 = SkPixel16ToPixel32(dst[i]); // Convert d st up to 8888.
260 dst32 = aa ? this->xfer32(dst32, src[i], aa[i]) 288 this->xfer32(&dst32, src+i, 1, aa ? aa+i : nullptr); // Blend 1 p ixel.
261 : this->xfer32(dst32, src[i]); 289 dst[i] = SkPixel32ToPixel16(dst32); // Repack ds t to 565 and store.
msarett 2015/11/09 23:25:06 This seems slow? Although not different from befo
mtklein 2015/11/10 00:22:05 Right. No different from before. Just always 1 p
262 dst[i] = SkPixel32ToPixel16(dst32);
263 } 290 }
264 } 291 }
265 292
266 private: 293 private:
267 static Sk4f Load(SkPMColor c) {
268 return Sk4f::FromBytes((uint8_t*)&c) * Sk4f(1.0f/255);
269 }
270 static SkPMColor Round(const Sk4f& f) {
271 SkPMColor c;
272 (f * Sk4f(255) + Sk4f(0.5f)).toBytes((uint8_t*)&c);
273 return c;
274 }
275 inline SkPMColor xfer32(SkPMColor dst, SkPMColor src) const {
276 return Round(fProcF(Load(dst), Load(src)));
277 }
278
279 inline SkPMColor xfer32(SkPMColor dst, SkPMColor src, SkAlpha aa) const {
280 Sk4f s(Load(src)),
281 d(Load(dst)),
282 b(fProcF(d,s));
283 // We do aa in full float precision before going back down to bytes, bec ause we can!
284 Sk4f a = Sk4f(aa) * Sk4f(1.0f/255);
285 b = b*a + d*(Sk4f(1)-a);
286 return Round(b);
287 }
288
289 ProcF fProcF;
290 typedef SkProcCoeffXfermode INHERITED; 294 typedef SkProcCoeffXfermode INHERITED;
291 }; 295 };
292 296
293 } // namespace 297 } // namespace
294 298
295 namespace SK_OPTS_NS { 299 namespace SK_OPTS_NS {
296 300
297 static SkXfermode* create_xfermode(const ProcCoeff& rec, SkXfermode::Mode mode) { 301 static SkXfermode* create_xfermode(const ProcCoeff& rec, SkXfermode::Mode mode) {
298 switch (mode) { 302 switch (mode) {
299 #define CASE(Mode) \ 303 #define CASE(Mode) \
(...skipping 16 matching lines...) Expand all
316 CASE(Multiply); 320 CASE(Multiply);
317 CASE(Difference); 321 CASE(Difference);
318 CASE(Exclusion); 322 CASE(Exclusion);
319 CASE(HardLight); 323 CASE(HardLight);
320 CASE(Overlay); 324 CASE(Overlay);
321 CASE(Darken); 325 CASE(Darken);
322 CASE(Lighten); 326 CASE(Lighten);
323 #undef CASE 327 #undef CASE
324 328
325 #define CASE(Mode) \ 329 #define CASE(Mode) \
326 case SkXfermode::k##Mode##_Mode: return new Sk4fXfermode(rec, mode, &Mode) 330 case SkXfermode::k##Mode##_Mode: return new FloatXfermode<Mode>(rec, mode)
327 CASE(ColorDodge); 331 CASE(ColorDodge);
328 CASE(ColorBurn); 332 CASE(ColorBurn);
329 CASE(SoftLight); 333 CASE(SoftLight);
330 #undef CASE 334 #undef CASE
331 335
332 default: break; 336 default: break;
333 } 337 }
334 return nullptr; 338 return nullptr;
335 } 339 }
336 340
337 } // namespace SK_OPTS_NS 341 } // namespace SK_OPTS_NS
338 342
339 #endif//Sk4pxXfermode_DEFINED 343 #endif//Sk4pxXfermode_DEFINED
OLDNEW
« src/opts/SkNx_avx.h ('K') | « src/opts/SkOpts_avx.cpp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698