| Index: src/opts/SkXfermode_opts.h
|
| diff --git a/src/opts/SkXfermode_opts.h b/src/opts/SkXfermode_opts.h
|
| index 93559ab8eb52d30a45ec183a0c3b6fd77ed86329..16c7daec65210751b8a68b4c1188c90f9ad6264d 100644
|
| --- a/src/opts/SkXfermode_opts.h
|
| +++ b/src/opts/SkXfermode_opts.h
|
| @@ -15,7 +15,9 @@
|
| namespace {
|
|
|
| // Most xfermodes can be done most efficiently 4 pixels at a time in 8 or 16-bit fixed point.
|
| -#define XFERMODE(Name) static Sk4px SK_VECTORCALL Name(Sk4px s, Sk4px d)
|
| +#define XFERMODE(Xfermode) \
|
| + struct Xfermode { Sk4px operator()(const Sk4px&, const Sk4px&) const; }; \
|
| + inline Sk4px Xfermode::operator()(const Sk4px& s, const Sk4px& d) const
|
|
|
| XFERMODE(Clear) { return Sk4px::DupPMColor(0); }
|
| XFERMODE(Src) { return s; }
|
| @@ -23,13 +25,13 @@ XFERMODE(Dst) { return d; }
|
| XFERMODE(SrcIn) { return s.approxMulDiv255(d.alphas() ); }
|
| XFERMODE(SrcOut) { return s.approxMulDiv255(d.alphas().inv()); }
|
| XFERMODE(SrcOver) { return s + d.approxMulDiv255(s.alphas().inv()); }
|
| -XFERMODE(DstIn) { return SrcIn (d,s); }
|
| -XFERMODE(DstOut) { return SrcOut (d,s); }
|
| -XFERMODE(DstOver) { return SrcOver(d,s); }
|
| +XFERMODE(DstIn) { return SrcIn ()(d,s); }
|
| +XFERMODE(DstOut) { return SrcOut ()(d,s); }
|
| +XFERMODE(DstOver) { return SrcOver()(d,s); }
|
|
|
| // [ S * Da + (1 - Sa) * D]
|
| XFERMODE(SrcATop) { return (s * d.alphas() + d * s.alphas().inv()).div255(); }
|
| -XFERMODE(DstATop) { return SrcATop(d,s); }
|
| +XFERMODE(DstATop) { return SrcATop()(d,s); }
|
| //[ S * (1 - Da) + (1 - Sa) * D ]
|
| XFERMODE(Xor) { return (s * d.alphas().inv() + d * s.alphas().inv()).div255(); }
|
| // [S + D ]
|
| @@ -79,7 +81,7 @@ XFERMODE(HardLight) {
|
| auto colors = (both + isLite.thenElse(lite, dark)).div255();
|
| return alphas.zeroColors() + colors.zeroAlphas();
|
| }
|
| -XFERMODE(Overlay) { return HardLight(d,s); }
|
| +XFERMODE(Overlay) { return HardLight()(d,s); }
|
|
|
| XFERMODE(Darken) {
|
| auto sa = s.alphas(),
|
| @@ -110,7 +112,9 @@ XFERMODE(Lighten) {
|
| #undef XFERMODE
|
|
|
| // Some xfermodes use math like divide or sqrt that's best done in floats 1 pixel at a time.
|
| -#define XFERMODE(Name) static Sk4f SK_VECTORCALL Name(Sk4f d, Sk4f s)
|
| +#define XFERMODE(Xfermode) \
|
| + struct Xfermode { Sk4f operator()(const Sk4f&, const Sk4f&) const; }; \
|
| + inline Sk4f Xfermode::operator()(const Sk4f& d, const Sk4f& s) const
|
|
|
| static inline Sk4f a_rgb(const Sk4f& a, const Sk4f& rgb) {
|
| static_assert(SK_A32_SHIFT == 24, "");
|
| @@ -181,15 +185,15 @@ XFERMODE(SoftLight) {
|
|
|
| // A reasonable fallback mode for doing AA is to simply apply the transfermode first,
|
| // then linearly interpolate the AA.
|
| -template <Sk4px (SK_VECTORCALL *Mode)(Sk4px, Sk4px)>
|
| -static Sk4px SK_VECTORCALL xfer_aa(Sk4px s, Sk4px d, Sk4px aa) {
|
| - Sk4px bw = Mode(s, d);
|
| +template <typename Xfermode>
|
| +static Sk4px xfer_aa(const Sk4px& s, const Sk4px& d, const Sk4px& aa) {
|
| + Sk4px bw = Xfermode()(s, d);
|
| return (bw * aa + d * aa.inv()).div255();
|
| }
|
|
|
| // For some transfermodes we specialize AA, either for correctness or performance.
|
| -#define XFERMODE_AA(Name) \
|
| - template <> Sk4px SK_VECTORCALL xfer_aa<Name>(Sk4px s, Sk4px d, Sk4px aa)
|
| +#define XFERMODE_AA(Xfermode) \
|
| + template <> Sk4px xfer_aa<Xfermode>(const Sk4px& s, const Sk4px& d, const Sk4px& aa)
|
|
|
| // Plus' clamp needs to happen after AA. skia:3852
|
| XFERMODE_AA(Plus) { // [ clamp( (1-AA)D + (AA)(S+D) ) == clamp(D + AA*S) ]
|
| @@ -198,95 +202,103 @@ XFERMODE_AA(Plus) { // [ clamp( (1-AA)D + (AA)(S+D) ) == clamp(D + AA*S) ]
|
|
|
| #undef XFERMODE_AA
|
|
|
| +template <typename Xfermode>
|
| class Sk4pxXfermode : public SkProcCoeffXfermode {
|
| public:
|
| - typedef Sk4px (SK_VECTORCALL *Proc4)(Sk4px, Sk4px);
|
| - typedef Sk4px (SK_VECTORCALL *AAProc4)(Sk4px, Sk4px, Sk4px);
|
| -
|
| - Sk4pxXfermode(const ProcCoeff& rec, SkXfermode::Mode mode, Proc4 proc4, AAProc4 aaproc4)
|
| - : INHERITED(rec, mode)
|
| - , fProc4(proc4)
|
| - , fAAProc4(aaproc4) {}
|
| + Sk4pxXfermode(const ProcCoeff& rec, SkXfermode::Mode mode)
|
| + : INHERITED(rec, mode) {}
|
|
|
| void xfer32(SkPMColor dst[], const SkPMColor src[], int n, const SkAlpha aa[]) const override {
|
| if (nullptr == aa) {
|
| Sk4px::MapDstSrc(n, dst, src, [&](const Sk4px& dst4, const Sk4px& src4) {
|
| - return fProc4(src4, dst4);
|
| + return Xfermode()(src4, dst4);
|
| });
|
| } else {
|
| Sk4px::MapDstSrcAlpha(n, dst, src, aa,
|
| [&](const Sk4px& dst4, const Sk4px& src4, const Sk4px& alpha) {
|
| - return fAAProc4(src4, dst4, alpha);
|
| - });
|
| + return xfer_aa<Xfermode>(src4, dst4, alpha);
|
| + });
|
| }
|
| }
|
|
|
| void xfer16(uint16_t dst[], const SkPMColor src[], int n, const SkAlpha aa[]) const override {
|
| - if (nullptr == aa) {
|
| - Sk4px::MapDstSrc(n, dst, src, [&](const Sk4px& dst4, const Sk4px& src4) {
|
| - return fProc4(src4, dst4);
|
| - });
|
| - } else {
|
| - Sk4px::MapDstSrcAlpha(n, dst, src, aa,
|
| - [&](const Sk4px& dst4, const Sk4px& src4, const Sk4px& alpha) {
|
| - return fAAProc4(src4, dst4, alpha);
|
| - });
|
| + SkPMColor dst32[4];
|
| + while (n >= 4) {
|
| + dst32[0] = SkPixel16ToPixel32(dst[0]);
|
| + dst32[1] = SkPixel16ToPixel32(dst[1]);
|
| + dst32[2] = SkPixel16ToPixel32(dst[2]);
|
| + dst32[3] = SkPixel16ToPixel32(dst[3]);
|
| +
|
| + this->xfer32(dst32, src, 4, aa);
|
| +
|
| + dst[0] = SkPixel32ToPixel16(dst32[0]);
|
| + dst[1] = SkPixel32ToPixel16(dst32[1]);
|
| + dst[2] = SkPixel32ToPixel16(dst32[2]);
|
| + dst[3] = SkPixel32ToPixel16(dst32[3]);
|
| +
|
| + dst += 4;
|
| + src += 4;
|
| + aa += aa ? 4 : 0;
|
| + n -= 4;
|
| + }
|
| + while (n) {
|
| + SkPMColor dst32 = SkPixel16ToPixel32(*dst);
|
| + this->xfer32(&dst32, src, 1, aa);
|
| + *dst = SkPixel32ToPixel16(dst32);
|
| +
|
| + dst += 1;
|
| + src += 1;
|
| + aa += aa ? 1 : 0;
|
| + n -= 1;
|
| }
|
| }
|
|
|
| private:
|
| - Proc4 fProc4;
|
| - AAProc4 fAAProc4;
|
| typedef SkProcCoeffXfermode INHERITED;
|
| };
|
|
|
| +template <typename Xfermode>
|
| class Sk4fXfermode : public SkProcCoeffXfermode {
|
| public:
|
| - typedef Sk4f (SK_VECTORCALL *ProcF)(Sk4f, Sk4f);
|
| - Sk4fXfermode(const ProcCoeff& rec, SkXfermode::Mode mode, ProcF procf)
|
| - : INHERITED(rec, mode)
|
| - , fProcF(procf) {}
|
| + Sk4fXfermode(const ProcCoeff& rec, SkXfermode::Mode mode)
|
| + : INHERITED(rec, mode) {}
|
|
|
| void xfer32(SkPMColor dst[], const SkPMColor src[], int n, const SkAlpha aa[]) const override {
|
| for (int i = 0; i < n; i++) {
|
| - dst[i] = aa ? this->xfer32(dst[i], src[i], aa[i])
|
| - : this->xfer32(dst[i], src[i]);
|
| + dst[i] = Xfer32_1(dst[i], src[i], aa ? aa+i : nullptr);
|
| }
|
| }
|
|
|
| void xfer16(uint16_t dst[], const SkPMColor src[], int n, const SkAlpha aa[]) const override {
|
| for (int i = 0; i < n; i++) {
|
| SkPMColor dst32 = SkPixel16ToPixel32(dst[i]);
|
| - dst32 = aa ? this->xfer32(dst32, src[i], aa[i])
|
| - : this->xfer32(dst32, src[i]);
|
| + dst32 = Xfer32_1(dst32, src[i], aa ? aa+i : nullptr);
|
| dst[i] = SkPixel32ToPixel16(dst32);
|
| }
|
| }
|
|
|
| private:
|
| + static SkPMColor Xfer32_1(SkPMColor dst, const SkPMColor src, const SkAlpha* aa) {
|
| + Sk4f d = Load(dst),
|
| + s = Load(src),
|
| + b = Xfermode()(d, s);
|
| + if (aa) {
|
| + Sk4f a = Sk4f(*aa) * Sk4f(1.0f/255);
|
| + b = b*a + d*(Sk4f(1)-a);
|
| + }
|
| + return Round(b);
|
| + }
|
| +
|
| static Sk4f Load(SkPMColor c) {
|
| return SkNx_cast<float>(Sk4b::Load((uint8_t*)&c)) * Sk4f(1.0f/255);
|
| }
|
| +
|
| static SkPMColor Round(const Sk4f& f) {
|
| SkPMColor c;
|
| SkNx_cast<uint8_t>(f * Sk4f(255) + Sk4f(0.5f)).store((uint8_t*)&c);
|
| return c;
|
| }
|
| - inline SkPMColor xfer32(SkPMColor dst, SkPMColor src) const {
|
| - return Round(fProcF(Load(dst), Load(src)));
|
| - }
|
| -
|
| - inline SkPMColor xfer32(SkPMColor dst, SkPMColor src, SkAlpha aa) const {
|
| - Sk4f s(Load(src)),
|
| - d(Load(dst)),
|
| - b(fProcF(d,s));
|
| - // We do aa in full float precision before going back down to bytes, because we can!
|
| - Sk4f a = Sk4f(aa) * Sk4f(1.0f/255);
|
| - b = b*a + d*(Sk4f(1)-a);
|
| - return Round(b);
|
| - }
|
|
|
| - ProcF fProcF;
|
| typedef SkProcCoeffXfermode INHERITED;
|
| };
|
|
|
| @@ -296,8 +308,8 @@ namespace SK_OPTS_NS {
|
|
|
| static SkXfermode* create_xfermode(const ProcCoeff& rec, SkXfermode::Mode mode) {
|
| switch (mode) {
|
| -#define CASE(Mode) \
|
| - case SkXfermode::k##Mode##_Mode: return new Sk4pxXfermode(rec, mode, &Mode, &xfer_aa<Mode>)
|
| +#define CASE(Xfermode) \
|
| + case SkXfermode::k##Xfermode##_Mode: return new Sk4pxXfermode<Xfermode>(rec, mode)
|
| CASE(Clear);
|
| CASE(Src);
|
| CASE(Dst);
|
| @@ -322,8 +334,8 @@ static SkXfermode* create_xfermode(const ProcCoeff& rec, SkXfermode::Mode mode)
|
| CASE(Lighten);
|
| #undef CASE
|
|
|
| -#define CASE(Mode) \
|
| - case SkXfermode::k##Mode##_Mode: return new Sk4fXfermode(rec, mode, &Mode)
|
| +#define CASE(Xfermode) \
|
| + case SkXfermode::k##Xfermode##_Mode: return new Sk4fXfermode<Xfermode>(rec, mode)
|
| CASE(ColorDodge);
|
| CASE(ColorBurn);
|
| CASE(SoftLight);
|
|
|