Chromium Code Reviews| Index: src/opts/SkXfermode_opts.h |
| diff --git a/src/opts/SkXfermode_opts.h b/src/opts/SkXfermode_opts.h |
| index 69f2b420f5f4e6fe2166d2175803322b622a0906..a40faec21a9289c16eeb4aca19f4875917eb767e 100644 |
| --- a/src/opts/SkXfermode_opts.h |
| +++ b/src/opts/SkXfermode_opts.h |
| @@ -109,71 +109,76 @@ XFERMODE(Lighten) { |
| } |
| #undef XFERMODE |
| -// Some xfermodes use math like divide or sqrt that's best done in floats 1 pixel at a time. |
| -#define XFERMODE(Name) static Sk4f SK_VECTORCALL Name(Sk4f d, Sk4f s) |
| +// Some xfermodes use math like divide or sqrt that's best done in floats. |
| +// We write it generically, then call it 1 or 2 pixels at a time (T == Sk4f or Sk8f). |
| +#define XFERMODE(Name) struct Name { template <typename T> T operator()(const T&, const T&); }; \ |
| + template <typename T> T Name::operator()(const T& d, const T& s) |
| +static_assert(SK_A32_SHIFT == 24, ""); |
| static inline Sk4f a_rgb(const Sk4f& a, const Sk4f& rgb) { |
| - static_assert(SK_A32_SHIFT == 24, ""); |
| return a * Sk4f(0,0,0,1) + rgb * Sk4f(1,1,1,0); |
| } |
| -static inline Sk4f alphas(const Sk4f& f) { |
| - return SkNx_dup<SK_A32_SHIFT/8>(f); |
| +static inline Sk8f a_rgb(const Sk8f& a, const Sk8f& rgb) { |
| + // TODO: SkNx_blend<0,0,0,1,0,0,0,1>(a, rgb) to let us use _mm256_blend_ps? |
| + return a * Sk8f(0,0,0,1,0,0,0,1) + rgb * Sk8f(1,1,1,0,1,1,1,0); |
| } |
| +static inline Sk4f alphas(const Sk4f& f) { return SkNx_shuffle<3,3,3,3> (f); } |
| +static inline Sk8f alphas(const Sk8f& f) { return SkNx_shuffle<3,3,3,3,7,7,7,7>(f); } |
|
msarett
2015/11/09 23:25:06
Where is shuffle defined for AVX?
Oh, it looks li
mtklein
2015/11/10 00:22:05
Right, default implementation. This compiles into
msarett
2015/11/10 14:54:16
Great, cool instruction!
I'm impressed by the com
mtklein
2015/11/11 19:27:13
Yeah, Clang's pretty impressive. Mostly SkNx_shuf
|
| XFERMODE(ColorDodge) { |
| auto sa = alphas(s), |
| da = alphas(d), |
| - isa = Sk4f(1)-sa, |
| - ida = Sk4f(1)-da; |
| + isa = T(1)-sa, |
| + ida = T(1)-da; |
| auto srcover = s + d*isa, |
| dstover = d + s*ida, |
| - otherwise = sa * Sk4f::Min(da, (d*sa)*(sa-s).approxInvert()) + s*ida + d*isa; |
| + otherwise = sa * T::Min(da, (d*sa)*(sa-s).approxInvert()) + s*ida + d*isa; |
| // Order matters here, preferring d==0 over s==sa. |
| - auto colors = (d == Sk4f(0)).thenElse(dstover, |
| - (s == sa).thenElse(srcover, |
| - otherwise)); |
| + auto colors = (d == 0).thenElse(dstover, |
| + (s == sa).thenElse(srcover, |
| + otherwise)); |
| return a_rgb(srcover, colors); |
| } |
| XFERMODE(ColorBurn) { |
| auto sa = alphas(s), |
| da = alphas(d), |
| - isa = Sk4f(1)-sa, |
| - ida = Sk4f(1)-da; |
| + isa = T(1)-sa, |
| + ida = T(1)-da; |
| auto srcover = s + d*isa, |
| dstover = d + s*ida, |
| - otherwise = sa*(da-Sk4f::Min(da, (da-d)*sa*s.approxInvert())) + s*ida + d*isa; |
| + otherwise = sa*(da-T::Min(da, (da-d)*sa*s.approxInvert())) + s*ida + d*isa; |
| // Order matters here, preferring d==da over s==0. |
| - auto colors = (d == da).thenElse(dstover, |
| - (s == Sk4f(0)).thenElse(srcover, |
| - otherwise)); |
| + auto colors = (d == da).thenElse(dstover, |
| + (s == 0).thenElse(srcover, |
| + otherwise)); |
| return a_rgb(srcover, colors); |
| } |
| XFERMODE(SoftLight) { |
| auto sa = alphas(s), |
| da = alphas(d), |
| - isa = Sk4f(1)-sa, |
| - ida = Sk4f(1)-da; |
| + isa = T(1)-sa, |
| + ida = T(1)-da; |
| // Some common terms. |
| - auto m = (da > Sk4f(0)).thenElse(d / da, Sk4f(0)), |
| - s2 = Sk4f(2)*s, |
| - m4 = Sk4f(4)*m; |
| + auto m = (da > 0).thenElse(d / da, 0), |
| + s2 = s*2, |
| + m4 = m*4; |
| // The logic forks three ways: |
| // 1. dark src? |
| // 2. light src, dark dst? |
| // 3. light src, light dst? |
| - auto darkSrc = d*(sa + (s2 - sa)*(Sk4f(1) - m)), // Used in case 1. |
| - darkDst = (m4*m4 + m4)*(m - Sk4f(1)) + Sk4f(7)*m, // Used in case 2. |
| - liteDst = m.sqrt() - m, // Used in case 3. |
| - liteSrc = d*sa + da*(s2-sa)*(Sk4f(4)*d <= da).thenElse(darkDst, liteDst); // Case 2 or 3? |
| + auto darkSrc = d*(sa + (s2 - sa)*(T(1) - m)), // Used in case 1. |
| + darkDst = (m4*m4 + m4)*(m - 1) + m*7, // Used in case 2. |
| + liteDst = m.sqrt() - m, // Used in case 3. |
| + liteSrc = d*sa + da*(s2-sa)*(d*4 <= da).thenElse(darkDst, liteDst); // Case 2 or 3? |
| auto alpha = s + d*isa; |
| - auto colors = s*ida + d*isa + (s2 <= sa).thenElse(darkSrc, liteSrc); // Case 1 or 2/3? |
| + auto colors = s*ida + d*isa + (s2 <= sa).thenElse(darkSrc, liteSrc); // Case 1 or 2/3? |
| return a_rgb(alpha, colors); |
| } |
| @@ -240,53 +245,52 @@ private: |
| typedef SkProcCoeffXfermode INHERITED; |
| }; |
| -class Sk4fXfermode : public SkProcCoeffXfermode { |
| +template <typename BlendFn> |
| +class FloatXfermode : public SkProcCoeffXfermode { |
| public: |
| - typedef Sk4f (SK_VECTORCALL *ProcF)(Sk4f, Sk4f); |
| - Sk4fXfermode(const ProcCoeff& rec, SkXfermode::Mode mode, ProcF procf) |
| - : INHERITED(rec, mode) |
| - , fProcF(procf) {} |
| + FloatXfermode(const ProcCoeff& rec, SkXfermode::Mode mode) |
| + : INHERITED(rec, mode) {} |
| void xfer32(SkPMColor dst[], const SkPMColor src[], int n, const SkAlpha aa[]) const override { |
|
msarett
2015/11/09 23:25:06
It looks like you've deleted some private helper f
mtklein
2015/11/10 00:22:05
Yep, just moving things around.
msarett
2015/11/10 14:54:16
Acknowledged.
|
| - for (int i = 0; i < n; i++) { |
| - dst[i] = aa ? this->xfer32(dst[i], src[i], aa[i]) |
| - : this->xfer32(dst[i], src[i]); |
| + BlendFn blend; |
| + while (n >= 2) { |
| + auto d = Sk8f::FromBytes((const uint8_t*)dst) * (1.0f/255), |
|
msarett
2015/11/09 23:25:06
nit: Any reason not to write 1.0f / 255.0f?
mtklein
2015/11/10 00:22:05
Nope, either way works the same and gets to the sa
|
| + s = Sk8f::FromBytes((const uint8_t*)src) * (1.0f/255), |
| + b = blend(d, s); |
| + if (aa) { |
| + auto a255 = Sk8f(aa[0],aa[0],aa[0],aa[0], aa[1],aa[1],aa[1],aa[1]); |
| + (b*a255 + d*(Sk8f(255)-a255) + 0.5).toBytes((uint8_t*)dst); |
| + aa += 2; |
| + } else { |
| + (b * 255 + 0.5).toBytes((uint8_t*)dst); |
| + } |
| + dst += 2; |
| + src += 2; |
| + n -= 2; |
| + } |
| + if (n) { |
| + auto d = Sk4f::FromBytes((const uint8_t*)dst) * (1.0f/255), |
| + s = Sk4f::FromBytes((const uint8_t*)src) * (1.0f/255), |
| + b = blend(d, s); |
| + if (aa) { |
| + auto a255 = Sk4f(aa[0],aa[0],aa[0],aa[0]); |
| + (b*a255 + d*(Sk4f(255)-a255) + 0.5).toBytes((uint8_t*)dst); |
| + aa++; |
| + } else { |
| + (b * 255 + 0.5).toBytes((uint8_t*)dst); |
| + } |
| } |
| } |
| void xfer16(uint16_t dst[], const SkPMColor src[], int n, const SkAlpha aa[]) const override { |
| for (int i = 0; i < n; i++) { |
| - SkPMColor dst32 = SkPixel16ToPixel32(dst[i]); |
| - dst32 = aa ? this->xfer32(dst32, src[i], aa[i]) |
| - : this->xfer32(dst32, src[i]); |
| - dst[i] = SkPixel32ToPixel16(dst32); |
| + SkPMColor dst32 = SkPixel16ToPixel32(dst[i]); // Convert dst up to 8888. |
| + this->xfer32(&dst32, src+i, 1, aa ? aa+i : nullptr); // Blend 1 pixel. |
| + dst[i] = SkPixel32ToPixel16(dst32); // Repack dst to 565 and store. |
|
msarett
2015/11/09 23:25:06
This seems slow? Although not different from befo
mtklein
2015/11/10 00:22:05
Right. No different from before. Just always 1 p
|
| } |
| } |
| private: |
| - static Sk4f Load(SkPMColor c) { |
| - return Sk4f::FromBytes((uint8_t*)&c) * Sk4f(1.0f/255); |
| - } |
| - static SkPMColor Round(const Sk4f& f) { |
| - SkPMColor c; |
| - (f * Sk4f(255) + Sk4f(0.5f)).toBytes((uint8_t*)&c); |
| - return c; |
| - } |
| - inline SkPMColor xfer32(SkPMColor dst, SkPMColor src) const { |
| - return Round(fProcF(Load(dst), Load(src))); |
| - } |
| - |
| - inline SkPMColor xfer32(SkPMColor dst, SkPMColor src, SkAlpha aa) const { |
| - Sk4f s(Load(src)), |
| - d(Load(dst)), |
| - b(fProcF(d,s)); |
| - // We do aa in full float precision before going back down to bytes, because we can! |
| - Sk4f a = Sk4f(aa) * Sk4f(1.0f/255); |
| - b = b*a + d*(Sk4f(1)-a); |
| - return Round(b); |
| - } |
| - |
| - ProcF fProcF; |
| typedef SkProcCoeffXfermode INHERITED; |
| }; |
| @@ -323,7 +327,7 @@ static SkXfermode* create_xfermode(const ProcCoeff& rec, SkXfermode::Mode mode) |
| #undef CASE |
| #define CASE(Mode) \ |
| - case SkXfermode::k##Mode##_Mode: return new Sk4fXfermode(rec, mode, &Mode) |
| + case SkXfermode::k##Mode##_Mode: return new FloatXfermode<Mode>(rec, mode) |
| CASE(ColorDodge); |
| CASE(ColorBurn); |
| CASE(SoftLight); |