 Chromium Code Reviews
 Chromium Code Reviews Issue 1432903002:
  float xfermodes (burn, dodge, softlight) in Sk8f, possibly using AVX.  (Closed) 
  Base URL: https://skia.googlesource.com/skia.git@master
    
  
    Issue 1432903002:
  float xfermodes (burn, dodge, softlight) in Sk8f, possibly using AVX.  (Closed) 
  Base URL: https://skia.googlesource.com/skia.git@master| OLD | NEW | 
|---|---|
| 1 /* | 1 /* | 
| 2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. | 
| 3 * | 3 * | 
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be | 
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. | 
| 6 */ | 6 */ | 
| 7 | 7 | 
| 8 #ifndef Sk4pxXfermode_DEFINED | 8 #ifndef Sk4pxXfermode_DEFINED | 
| 9 #define Sk4pxXfermode_DEFINED | 9 #define Sk4pxXfermode_DEFINED | 
| 10 | 10 | 
| (...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 102 dsa = (d*sa).div255(); | 102 dsa = (d*sa).div255(); | 
| 103 | 103 | 
| 104 auto srcover = s + (d * sa.inv()).div255(), | 104 auto srcover = s + (d * sa.inv()).div255(), | 
| 105 dstover = d + (s * da.inv()).div255(); | 105 dstover = d + (s * da.inv()).div255(); | 
| 106 auto alphas = srcover, | 106 auto alphas = srcover, | 
| 107 colors = (dsa < sda).thenElse(srcover, dstover); | 107 colors = (dsa < sda).thenElse(srcover, dstover); | 
| 108 return alphas.zeroColors() + colors.zeroAlphas(); | 108 return alphas.zeroColors() + colors.zeroAlphas(); | 
| 109 } | 109 } | 
| 110 #undef XFERMODE | 110 #undef XFERMODE | 
| 111 | 111 | 
| 112 // Some xfermodes use math like divide or sqrt that's best done in floats 1 pixe l at a time. | 112 // Some xfermodes use math like divide or sqrt that's best done in floats. | 
| 113 #define XFERMODE(Name) static Sk4f SK_VECTORCALL Name(Sk4f d, Sk4f s) | 113 // We write it generically, then call it 1 or 2 pixels at a time (T == Sk4f or S k8f). | 
| 114 #define XFERMODE(Name) struct Name { template <typename T> T operator()(const T& , const T&); }; \ | |
| 115 template <typename T> T Name::operator()(const T& d, const T& s) | |
| 114 | 116 | 
| 117 static_assert(SK_A32_SHIFT == 24, ""); | |
| 115 static inline Sk4f a_rgb(const Sk4f& a, const Sk4f& rgb) { | 118 static inline Sk4f a_rgb(const Sk4f& a, const Sk4f& rgb) { | 
| 116 static_assert(SK_A32_SHIFT == 24, ""); | |
| 117 return a * Sk4f(0,0,0,1) + rgb * Sk4f(1,1,1,0); | 119 return a * Sk4f(0,0,0,1) + rgb * Sk4f(1,1,1,0); | 
| 118 } | 120 } | 
| 119 static inline Sk4f alphas(const Sk4f& f) { | 121 static inline Sk8f a_rgb(const Sk8f& a, const Sk8f& rgb) { | 
| 120 return SkNx_dup<SK_A32_SHIFT/8>(f); | 122 // TODO: SkNx_blend<0,0,0,1,0,0,0,1>(a, rgb) to let us use _mm256_blend_ps? | 
| 123 return a * Sk8f(0,0,0,1,0,0,0,1) + rgb * Sk8f(1,1,1,0,1,1,1,0); | |
| 121 } | 124 } | 
| 125 static inline Sk4f alphas(const Sk4f& f) { return SkNx_shuffle<3,3,3,3> ( f); } | |
| 126 static inline Sk8f alphas(const Sk8f& f) { return SkNx_shuffle<3,3,3,3,7,7,7,7>( f); } | |
| 
msarett
2015/11/09 23:25:06
Where is shuffle defined for AVX?
Oh, it looks li
 
mtklein
2015/11/10 00:22:05
Right, default implementation.  This compiles into
 
msarett
2015/11/10 14:54:16
Great, cool instruction!
I'm impressed by the com
 
mtklein
2015/11/11 19:27:13
Yeah, Clang's pretty impressive.  Mostly SkNx_shuf
 | |
| 122 | 127 | 
| 123 XFERMODE(ColorDodge) { | 128 XFERMODE(ColorDodge) { | 
| 124 auto sa = alphas(s), | 129 auto sa = alphas(s), | 
| 125 da = alphas(d), | 130 da = alphas(d), | 
| 126 isa = Sk4f(1)-sa, | 131 isa = T(1)-sa, | 
| 127 ida = Sk4f(1)-da; | 132 ida = T(1)-da; | 
| 128 | 133 | 
| 129 auto srcover = s + d*isa, | 134 auto srcover = s + d*isa, | 
| 130 dstover = d + s*ida, | 135 dstover = d + s*ida, | 
| 131 otherwise = sa * Sk4f::Min(da, (d*sa)*(sa-s).approxInvert()) + s*ida + d*isa; | 136 otherwise = sa * T::Min(da, (d*sa)*(sa-s).approxInvert()) + s*ida + d*i sa; | 
| 132 | 137 | 
| 133 // Order matters here, preferring d==0 over s==sa. | 138 // Order matters here, preferring d==0 over s==sa. | 
| 134 auto colors = (d == Sk4f(0)).thenElse(dstover, | 139 auto colors = (d == 0).thenElse(dstover, | 
| 135 (s == sa).thenElse(srcover, | 140 (s == sa).thenElse(srcover, | 
| 136 otherwise)); | 141 otherwise)); | 
| 137 return a_rgb(srcover, colors); | 142 return a_rgb(srcover, colors); | 
| 138 } | 143 } | 
| 139 XFERMODE(ColorBurn) { | 144 XFERMODE(ColorBurn) { | 
| 140 auto sa = alphas(s), | 145 auto sa = alphas(s), | 
| 141 da = alphas(d), | 146 da = alphas(d), | 
| 142 isa = Sk4f(1)-sa, | 147 isa = T(1)-sa, | 
| 143 ida = Sk4f(1)-da; | 148 ida = T(1)-da; | 
| 144 | 149 | 
| 145 auto srcover = s + d*isa, | 150 auto srcover = s + d*isa, | 
| 146 dstover = d + s*ida, | 151 dstover = d + s*ida, | 
| 147 otherwise = sa*(da-Sk4f::Min(da, (da-d)*sa*s.approxInvert())) + s*ida + d*isa; | 152 otherwise = sa*(da-T::Min(da, (da-d)*sa*s.approxInvert())) + s*ida + d* isa; | 
| 148 | 153 | 
| 149 // Order matters here, preferring d==da over s==0. | 154 // Order matters here, preferring d==da over s==0. | 
| 150 auto colors = (d == da).thenElse(dstover, | 155 auto colors = (d == da).thenElse(dstover, | 
| 151 (s == Sk4f(0)).thenElse(srcover, | 156 (s == 0).thenElse(srcover, | 
| 152 otherwise)); | 157 otherwise)); | 
| 153 return a_rgb(srcover, colors); | 158 return a_rgb(srcover, colors); | 
| 154 } | 159 } | 
| 155 XFERMODE(SoftLight) { | 160 XFERMODE(SoftLight) { | 
| 156 auto sa = alphas(s), | 161 auto sa = alphas(s), | 
| 157 da = alphas(d), | 162 da = alphas(d), | 
| 158 isa = Sk4f(1)-sa, | 163 isa = T(1)-sa, | 
| 159 ida = Sk4f(1)-da; | 164 ida = T(1)-da; | 
| 160 | 165 | 
| 161 // Some common terms. | 166 // Some common terms. | 
| 162 auto m = (da > Sk4f(0)).thenElse(d / da, Sk4f(0)), | 167 auto m = (da > 0).thenElse(d / da, 0), | 
| 163 s2 = Sk4f(2)*s, | 168 s2 = s*2, | 
| 164 m4 = Sk4f(4)*m; | 169 m4 = m*4; | 
| 165 | 170 | 
| 166 // The logic forks three ways: | 171 // The logic forks three ways: | 
| 167 // 1. dark src? | 172 // 1. dark src? | 
| 168 // 2. light src, dark dst? | 173 // 2. light src, dark dst? | 
| 169 // 3. light src, light dst? | 174 // 3. light src, light dst? | 
| 170 auto darkSrc = d*(sa + (s2 - sa)*(Sk4f(1) - m)), // Used in case 1. | 175 auto darkSrc = d*(sa + (s2 - sa)*(T(1) - m)), // Used in case 1. | 
| 171 darkDst = (m4*m4 + m4)*(m - Sk4f(1)) + Sk4f(7)*m, // Used in case 2. | 176 darkDst = (m4*m4 + m4)*(m - 1) + m*7, // Used in case 2. | 
| 172 liteDst = m.sqrt() - m, // Used in case 3. | 177 liteDst = m.sqrt() - m, // Used in case 3. | 
| 173 liteSrc = d*sa + da*(s2-sa)*(Sk4f(4)*d <= da).thenElse(darkDst, liteDst ); // Case 2 or 3? | 178 liteSrc = d*sa + da*(s2-sa)*(d*4 <= da).thenElse(darkDst, liteDst); // Case 2 or 3? | 
| 174 | 179 | 
| 175 auto alpha = s + d*isa; | 180 auto alpha = s + d*isa; | 
| 176 auto colors = s*ida + d*isa + (s2 <= sa).thenElse(darkSrc, liteSrc); // Case 1 or 2/3? | 181 auto colors = s*ida + d*isa + (s2 <= sa).thenElse(darkSrc, liteSrc); // Case 1 or 2/3? | 
| 177 | 182 | 
| 178 return a_rgb(alpha, colors); | 183 return a_rgb(alpha, colors); | 
| 179 } | 184 } | 
| 180 #undef XFERMODE | 185 #undef XFERMODE | 
| 181 | 186 | 
| 182 // A reasonable fallback mode for doing AA is to simply apply the transfermode f irst, | 187 // A reasonable fallback mode for doing AA is to simply apply the transfermode f irst, | 
| 183 // then linearly interpolate the AA. | 188 // then linearly interpolate the AA. | 
| 184 template <Sk4px (SK_VECTORCALL *Mode)(Sk4px, Sk4px)> | 189 template <Sk4px (SK_VECTORCALL *Mode)(Sk4px, Sk4px)> | 
| 185 static Sk4px SK_VECTORCALL xfer_aa(Sk4px s, Sk4px d, Sk4px aa) { | 190 static Sk4px SK_VECTORCALL xfer_aa(Sk4px s, Sk4px d, Sk4px aa) { | 
| 186 Sk4px bw = Mode(s, d); | 191 Sk4px bw = Mode(s, d); | 
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 233 }); | 238 }); | 
| 234 } | 239 } | 
| 235 } | 240 } | 
| 236 | 241 | 
| 237 private: | 242 private: | 
| 238 Proc4 fProc4; | 243 Proc4 fProc4; | 
| 239 AAProc4 fAAProc4; | 244 AAProc4 fAAProc4; | 
| 240 typedef SkProcCoeffXfermode INHERITED; | 245 typedef SkProcCoeffXfermode INHERITED; | 
| 241 }; | 246 }; | 
| 242 | 247 | 
| 243 class Sk4fXfermode : public SkProcCoeffXfermode { | 248 template <typename BlendFn> | 
| 249 class FloatXfermode : public SkProcCoeffXfermode { | |
| 244 public: | 250 public: | 
| 245 typedef Sk4f (SK_VECTORCALL *ProcF)(Sk4f, Sk4f); | 251 FloatXfermode(const ProcCoeff& rec, SkXfermode::Mode mode) | 
| 246 Sk4fXfermode(const ProcCoeff& rec, SkXfermode::Mode mode, ProcF procf) | 252 : INHERITED(rec, mode) {} | 
| 247 : INHERITED(rec, mode) | |
| 248 , fProcF(procf) {} | |
| 249 | 253 | 
| 250 void xfer32(SkPMColor dst[], const SkPMColor src[], int n, const SkAlpha aa[ ]) const override { | 254 void xfer32(SkPMColor dst[], const SkPMColor src[], int n, const SkAlpha aa[ ]) const override { | 
| 
msarett
2015/11/09 23:25:06
It looks like you've deleted some private helper f
 
mtklein
2015/11/10 00:22:05
Yep, just moving things around.
 
msarett
2015/11/10 14:54:16
Acknowledged.
 | |
| 251 for (int i = 0; i < n; i++) { | 255 BlendFn blend; | 
| 252 dst[i] = aa ? this->xfer32(dst[i], src[i], aa[i]) | 256 while (n >= 2) { | 
| 253 : this->xfer32(dst[i], src[i]); | 257 auto d = Sk8f::FromBytes((const uint8_t*)dst) * (1.0f/255), | 
| 
msarett
2015/11/09 23:25:06
nit:  Any reason not to write 1.0f / 255.0f?
 
mtklein
2015/11/10 00:22:05
Nope, either way works the same and gets to the sa
 | |
| 258 s = Sk8f::FromBytes((const uint8_t*)src) * (1.0f/255), | |
| 259 b = blend(d, s); | |
| 260 if (aa) { | |
| 261 auto a255 = Sk8f(aa[0],aa[0],aa[0],aa[0], aa[1],aa[1],aa[1],aa[1 ]); | |
| 262 (b*a255 + d*(Sk8f(255)-a255) + 0.5).toBytes((uint8_t*)dst); | |
| 263 aa += 2; | |
| 264 } else { | |
| 265 (b * 255 + 0.5).toBytes((uint8_t*)dst); | |
| 266 } | |
| 267 dst += 2; | |
| 268 src += 2; | |
| 269 n -= 2; | |
| 270 } | |
| 271 if (n) { | |
| 272 auto d = Sk4f::FromBytes((const uint8_t*)dst) * (1.0f/255), | |
| 273 s = Sk4f::FromBytes((const uint8_t*)src) * (1.0f/255), | |
| 274 b = blend(d, s); | |
| 275 if (aa) { | |
| 276 auto a255 = Sk4f(aa[0],aa[0],aa[0],aa[0]); | |
| 277 (b*a255 + d*(Sk4f(255)-a255) + 0.5).toBytes((uint8_t*)dst); | |
| 278 aa++; | |
| 279 } else { | |
| 280 (b * 255 + 0.5).toBytes((uint8_t*)dst); | |
| 281 } | |
| 254 } | 282 } | 
| 255 } | 283 } | 
| 256 | 284 | 
| 257 void xfer16(uint16_t dst[], const SkPMColor src[], int n, const SkAlpha aa[] ) const override { | 285 void xfer16(uint16_t dst[], const SkPMColor src[], int n, const SkAlpha aa[] ) const override { | 
| 258 for (int i = 0; i < n; i++) { | 286 for (int i = 0; i < n; i++) { | 
| 259 SkPMColor dst32 = SkPixel16ToPixel32(dst[i]); | 287 SkPMColor dst32 = SkPixel16ToPixel32(dst[i]); // Convert d st up to 8888. | 
| 260 dst32 = aa ? this->xfer32(dst32, src[i], aa[i]) | 288 this->xfer32(&dst32, src+i, 1, aa ? aa+i : nullptr); // Blend 1 p ixel. | 
| 261 : this->xfer32(dst32, src[i]); | 289 dst[i] = SkPixel32ToPixel16(dst32); // Repack ds t to 565 and store. | 
| 
msarett
2015/11/09 23:25:06
This seems slow?  Although not different from befo
 
mtklein
2015/11/10 00:22:05
Right.  No different from before.  Just always 1 p
 | |
| 262 dst[i] = SkPixel32ToPixel16(dst32); | |
| 263 } | 290 } | 
| 264 } | 291 } | 
| 265 | 292 | 
| 266 private: | 293 private: | 
| 267 static Sk4f Load(SkPMColor c) { | |
| 268 return Sk4f::FromBytes((uint8_t*)&c) * Sk4f(1.0f/255); | |
| 269 } | |
| 270 static SkPMColor Round(const Sk4f& f) { | |
| 271 SkPMColor c; | |
| 272 (f * Sk4f(255) + Sk4f(0.5f)).toBytes((uint8_t*)&c); | |
| 273 return c; | |
| 274 } | |
| 275 inline SkPMColor xfer32(SkPMColor dst, SkPMColor src) const { | |
| 276 return Round(fProcF(Load(dst), Load(src))); | |
| 277 } | |
| 278 | |
| 279 inline SkPMColor xfer32(SkPMColor dst, SkPMColor src, SkAlpha aa) const { | |
| 280 Sk4f s(Load(src)), | |
| 281 d(Load(dst)), | |
| 282 b(fProcF(d,s)); | |
| 283 // We do aa in full float precision before going back down to bytes, bec ause we can! | |
| 284 Sk4f a = Sk4f(aa) * Sk4f(1.0f/255); | |
| 285 b = b*a + d*(Sk4f(1)-a); | |
| 286 return Round(b); | |
| 287 } | |
| 288 | |
| 289 ProcF fProcF; | |
| 290 typedef SkProcCoeffXfermode INHERITED; | 294 typedef SkProcCoeffXfermode INHERITED; | 
| 291 }; | 295 }; | 
| 292 | 296 | 
| 293 } // namespace | 297 } // namespace | 
| 294 | 298 | 
| 295 namespace SK_OPTS_NS { | 299 namespace SK_OPTS_NS { | 
| 296 | 300 | 
| 297 static SkXfermode* create_xfermode(const ProcCoeff& rec, SkXfermode::Mode mode) { | 301 static SkXfermode* create_xfermode(const ProcCoeff& rec, SkXfermode::Mode mode) { | 
| 298 switch (mode) { | 302 switch (mode) { | 
| 299 #define CASE(Mode) \ | 303 #define CASE(Mode) \ | 
| (...skipping 16 matching lines...) Expand all Loading... | |
| 316 CASE(Multiply); | 320 CASE(Multiply); | 
| 317 CASE(Difference); | 321 CASE(Difference); | 
| 318 CASE(Exclusion); | 322 CASE(Exclusion); | 
| 319 CASE(HardLight); | 323 CASE(HardLight); | 
| 320 CASE(Overlay); | 324 CASE(Overlay); | 
| 321 CASE(Darken); | 325 CASE(Darken); | 
| 322 CASE(Lighten); | 326 CASE(Lighten); | 
| 323 #undef CASE | 327 #undef CASE | 
| 324 | 328 | 
| 325 #define CASE(Mode) \ | 329 #define CASE(Mode) \ | 
| 326 case SkXfermode::k##Mode##_Mode: return new Sk4fXfermode(rec, mode, &Mode) | 330 case SkXfermode::k##Mode##_Mode: return new FloatXfermode<Mode>(rec, mode) | 
| 327 CASE(ColorDodge); | 331 CASE(ColorDodge); | 
| 328 CASE(ColorBurn); | 332 CASE(ColorBurn); | 
| 329 CASE(SoftLight); | 333 CASE(SoftLight); | 
| 330 #undef CASE | 334 #undef CASE | 
| 331 | 335 | 
| 332 default: break; | 336 default: break; | 
| 333 } | 337 } | 
| 334 return nullptr; | 338 return nullptr; | 
| 335 } | 339 } | 
| 336 | 340 | 
| 337 } // namespace SK_OPTS_NS | 341 } // namespace SK_OPTS_NS | 
| 338 | 342 | 
| 339 #endif//Sk4pxXfermode_DEFINED | 343 #endif//Sk4pxXfermode_DEFINED | 
| OLD | NEW |