| Index: src/core/SkXfermode.cpp
|
| diff --git a/src/core/SkXfermode.cpp b/src/core/SkXfermode.cpp
|
| index 4f0f9f3d4c575f005f396cb97a618849984c5b87..928fa5001eaa655fa577310505b46a127eef122e 100644
|
| --- a/src/core/SkXfermode.cpp
|
| +++ b/src/core/SkXfermode.cpp
|
| @@ -19,13 +19,12 @@
|
| #include "SkUtilsArm.h"
|
| #include "SkWriteBuffer.h"
|
|
|
| -// When implemented, the Sk4f and Sk4px xfermodes beat src/opts/SkXfermodes_opts_SSE2's.
|
| -// When implemented, the Sk4px, but not Sk4f, xfermodes beat src/opts/SkXfermodes_arm_neon's.
|
| -#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
|
| - #define SK_4F_XFERMODES_ARE_FAST
|
| - #define SK_4PX_XFERMODES_ARE_FAST
|
| -#elif defined(SK_ARM_HAS_NEON)
|
| - #define SK_4PX_XFERMODES_ARE_FAST
|
| +#if SK_CPU_X86 && SK_CPU_SSE_LEVEL < SK_CPU_SSE_LEVEL_SSE2
|
| + #warning "SkXfermode will be much faster if you compile with support for SSE2."
|
| +#endif
|
| +
|
| +#if SK_CPU_X86 || defined(SK_ARM_HAS_NEON)
|
| + #define SK_USE_4PX_XFERMODES
|
| #endif
|
|
|
| #if !SK_ARM_NEON_IS_NONE
|
| @@ -1182,56 +1181,6 @@ void SkDstInXfermode::toString(SkString* str) const {
|
|
|
| ///////////////////////////////////////////////////////////////////////////////
|
|
|
| -/* These modes can merge coverage into src-alpha
|
| - *
|
| -{ dst_modeproc, SkXfermode::kZero_Coeff, SkXfermode::kOne_Coeff },
|
| -{ srcover_modeproc, SkXfermode::kOne_Coeff, SkXfermode::kISA_Coeff },
|
| -{ dstover_modeproc, SkXfermode::kIDA_Coeff, SkXfermode::kOne_Coeff },
|
| -{ dstout_modeproc, SkXfermode::kZero_Coeff, SkXfermode::kISA_Coeff },
|
| -{ srcatop_modeproc, SkXfermode::kDA_Coeff, SkXfermode::kISA_Coeff },
|
| -{ xor_modeproc, SkXfermode::kIDA_Coeff, SkXfermode::kISA_Coeff },
|
| -{ plus_modeproc, SkXfermode::kOne_Coeff, SkXfermode::kOne_Coeff },
|
| -{ screen_modeproc, SkXfermode::kOne_Coeff, SkXfermode::kISC_Coeff },
|
| -*/
|
| -
|
| -static const float gInv255 = 0.0039215683f; // (1.0f / 255) - ULP == SkBits2Float(0x3B808080)
|
| -
|
| -static Sk4f ramp(const Sk4f& v0, const Sk4f& v1, const Sk4f& t) {
|
| - return v0 + (v1 - v0) * t;
|
| -}
|
| -
|
| -static Sk4f clamp_255(const Sk4f& value) {
|
| - return Sk4f::Min(Sk4f(255), value);
|
| -}
|
| -
|
| -static Sk4f clamp_0_255(const Sk4f& value) {
|
| - return Sk4f::Max(Sk4f(0), Sk4f::Min(Sk4f(255), value));
|
| -}
|
| -
|
| -/**
|
| - * Some modes can, due to very slight numerical error, generate "invalid" pmcolors...
|
| - *
|
| - * e.g.
|
| - * alpha = 100.9999
|
| - * red = 101
|
| - *
|
| - * or
|
| - * alpha = 255.0001
|
| - *
|
| - * If we know we're going to write-out the values as bytes, we can relax these somewhat,
|
| - * since we only really need to enforce that the bytes are valid premul...
|
| - *
|
| - * To that end, this method asserts that the resulting pmcolor will be valid, but does not call
|
| - * SkPMFloat::isValid(), as that would fire sometimes, but not result in a bad pixel.
|
| - */
|
| -static inline SkPMFloat check_as_pmfloat(const Sk4f& value) {
|
| - SkPMFloat pm = value;
|
| -#ifdef SK_DEBUG
|
| - (void)pm.round();
|
| -#endif
|
| - return pm;
|
| -}
|
| -
|
| #define XFERMODE(Name) \
|
| struct Name { \
|
| static Sk4px Xfer(const Sk4px&, const Sk4px&); \
|
| @@ -1249,184 +1198,48 @@ XFERMODE(DstIn) { return SrcIn ::Xfer(d,s); }
|
| XFERMODE(DstOut) { return SrcOut ::Xfer(d,s); }
|
| XFERMODE(DstOver) { return SrcOver::Xfer(d,s); }
|
|
|
| -#undef XFERMODE
|
| -
|
| -// kSrcATop_Mode, //!< [Da, Sc * Da + (1 - Sa) * Dc]
|
| -struct SrcATop4f {
|
| - static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) {
|
| - const Sk4f inv255(gInv255);
|
| - return check_as_pmfloat(dst + (src * Sk4f(dst.a()) - dst * Sk4f(src.a())) * inv255);
|
| - }
|
| - static Sk4px Xfer(const Sk4px& src, const Sk4px& dst) {
|
| - return Sk4px::Wide(src.mulWiden(dst.alphas()) + dst.mulWiden(src.alphas().inv()))
|
| - .div255RoundNarrow();
|
| - }
|
| - static const bool kFoldCoverageIntoSrcAlpha = true;
|
| - static const SkXfermode::Mode kMode = SkXfermode::kSrcATop_Mode;
|
| -};
|
| -
|
| -// kDstATop_Mode, //!< [Sa, Sa * Dc + Sc * (1 - Da)]
|
| -struct DstATop4f {
|
| - static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) {
|
| - return SrcATop4f::Xfer(dst, src);
|
| - }
|
| - static Sk4px Xfer(const Sk4px& src, const Sk4px& dst) {
|
| - return SrcATop4f::Xfer(dst, src);
|
| - }
|
| - static const bool kFoldCoverageIntoSrcAlpha = false;
|
| - static const SkXfermode::Mode kMode = SkXfermode::kDstATop_Mode;
|
| -};
|
| -
|
| -// kXor_Mode [Sa + Da - 2 * Sa * Da, Sc * (1 - Da) + (1 - Sa) * Dc]
|
| -struct Xor4f {
|
| - static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) {
|
| - const Sk4f inv255(gInv255);
|
| - return check_as_pmfloat(src + dst - (src * Sk4f(dst.a()) + dst * Sk4f(src.a())) * inv255);
|
| - }
|
| - static Sk4px Xfer(const Sk4px& src, const Sk4px& dst) {
|
| - return Sk4px::Wide(src.mulWiden(dst.alphas().inv()) + dst.mulWiden(src.alphas().inv()))
|
| - .div255RoundNarrow();
|
| - }
|
| - static const bool kFoldCoverageIntoSrcAlpha = true;
|
| - static const SkXfermode::Mode kMode = SkXfermode::kXor_Mode;
|
| -};
|
| -
|
| -// kPlus_Mode [Sa + Da, Sc + Dc]
|
| -struct Plus4f {
|
| - static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) {
|
| - return check_as_pmfloat(clamp_255(src + dst));
|
| - }
|
| - static Sk4px Xfer(const Sk4px& src, const Sk4px& dst) {
|
| - return src.saturatedAdd(dst);
|
| - }
|
| - static const bool kFoldCoverageIntoSrcAlpha = false;
|
| - static const SkXfermode::Mode kMode = SkXfermode::kPlus_Mode;
|
| -};
|
| -
|
| -// kModulate_Mode [Sa * Da, Sc * Dc]
|
| -struct Modulate4f {
|
| - static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) {
|
| - const Sk4f inv255(gInv255);
|
| - return check_as_pmfloat(src * dst * inv255);
|
| - }
|
| - static Sk4px Xfer(const Sk4px& src, const Sk4px& dst) {
|
| - return src.fastMulDiv255Round(dst);
|
| - }
|
| - static const bool kFoldCoverageIntoSrcAlpha = false;
|
| - static const SkXfermode::Mode kMode = SkXfermode::kModulate_Mode;
|
| -};
|
| -
|
| -// kScreen_Mode [S + D - S * D]
|
| -struct Screen4f {
|
| - static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) {
|
| - const Sk4f inv255(gInv255);
|
| - return check_as_pmfloat(src + dst - src * dst * inv255);
|
| - }
|
| - static Sk4px Xfer(const Sk4px& src, const Sk4px& dst) {
|
| - // Doing the math as S + (1-S)*D or S + (D - S*D) means the add and subtract can be done
|
| - // in 8-bit space without overflow. S + (1-S)*D is a touch faster because inv() is cheap.
|
| - return src + dst.fastMulDiv255Round(src.inv());
|
| - }
|
| - static const bool kFoldCoverageIntoSrcAlpha = true;
|
| - static const SkXfermode::Mode kMode = SkXfermode::kScreen_Mode;
|
| -};
|
| -
|
| -struct Multiply4f {
|
| - static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) {
|
| - const Sk4f inv255(gInv255);
|
| - Sk4f sa = Sk4f(src.a());
|
| - Sk4f da = Sk4f(dst.a());
|
| - Sk4f sc = src;
|
| - Sk4f dc = dst;
|
| - Sk4f rc = sc + dc + (sc * (dc - da) - dc * sa) * inv255;
|
| - // ra = srcover(sa, da), but the calc for rc happens to accomplish this for us
|
| - return check_as_pmfloat(clamp_0_255(rc));
|
| - }
|
| - static Sk4px Xfer(const Sk4px& src, const Sk4px& dst) {
|
| - return Sk4px::Wide(src.mulWiden(dst.alphas().inv()) +
|
| - dst.mulWiden(src.alphas().inv()) +
|
| - src.mulWiden(dst))
|
| - .div255RoundNarrow();
|
| - }
|
| - static const bool kFoldCoverageIntoSrcAlpha = false;
|
| - static const SkXfermode::Mode kMode = SkXfermode::kMultiply_Mode;
|
| -};
|
| -
|
| -// [ sa + da - sa*da, sc + dc - 2*min(sc*da, dc*sa) ] (And notice sa*da == min(sa*da, da*sa).)
|
| -struct Difference4f {
|
| - static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) {
|
| - const Sk4f inv255(gInv255);
|
| - Sk4f sa = Sk4f(src.a());
|
| - Sk4f da = Sk4f(dst.a());
|
| - Sk4f sc = src;
|
| - Sk4f dc = dst;
|
| - Sk4f min = Sk4f::Min(sc * da, dc * sa) * inv255;
|
| - Sk4f ra = sc + dc - min;
|
| - return check_as_pmfloat(ra - min * SkPMFloat(0, 1, 1, 1));
|
| - }
|
| - static Sk4px Xfer(const Sk4px& src, const Sk4px& dst) {
|
| - auto m = Sk4px::Wide(Sk16h::Min(src.mulWiden(dst.alphas()), dst.mulWiden(src.alphas())))
|
| - .div255RoundNarrow();
|
| - // There's no chance of underflow, and if we subtract m before adding src+dst, no overflow.
|
| - return (src - m) + (dst - m.zeroAlphas());
|
| - }
|
| - static const bool kFoldCoverageIntoSrcAlpha = false;
|
| - static const SkXfermode::Mode kMode = SkXfermode::kDifference_Mode;
|
| -};
|
| -
|
| -// [ sa + da - sa*da, sc + dc - 2*sc*dc ]
|
| -struct Exclusion4f {
|
| - static SkPMFloat Xfer(const SkPMFloat& src, const SkPMFloat& dst) {
|
| - const Sk4f inv255(gInv255);
|
| - Sk4f sc = src;
|
| - Sk4f dc = dst;
|
| - Sk4f prod = sc * dc * inv255;
|
| - Sk4f ra = sc + dc - prod;
|
| - return check_as_pmfloat(ra - prod * SkPMFloat(0, 1, 1, 1));
|
| - }
|
| - static Sk4px Xfer(const Sk4px& src, const Sk4px& dst) {
|
| - auto p = src.fastMulDiv255Round(dst);
|
| - // There's no chance of underflow, and if we subtract p before adding src+dst, no overflow.
|
| - return (src - p) + (dst - p.zeroAlphas());
|
| - }
|
| - static const bool kFoldCoverageIntoSrcAlpha = false;
|
| - static const SkXfermode::Mode kMode = SkXfermode::kExclusion_Mode;
|
| -};
|
| -
|
| -template <typename ProcType>
|
| -class SkT4fXfermode : public SkProcCoeffXfermode {
|
| -public:
|
| - static SkXfermode* Create(const ProcCoeff& rec) {
|
| - return SkNEW_ARGS(SkT4fXfermode, (rec));
|
| - }
|
| -
|
| - void xfer32(SkPMColor dst[], const SkPMColor src[], int n, const SkAlpha aa[]) const override {
|
| - if (NULL == aa) {
|
| - for (int i = 0; i < n; ++i) {
|
| - dst[i] = ProcType::Xfer(SkPMFloat(src[i]), SkPMFloat(dst[i])).round();
|
| - }
|
| - } else {
|
| - for (int i = 0; i < n; ++i) {
|
| - const Sk4f aa4 = Sk4f(aa[i] * gInv255);
|
| - SkPMFloat dstF(dst[i]);
|
| - SkPMFloat srcF(src[i]);
|
| - Sk4f res;
|
| - if (ProcType::kFoldCoverageIntoSrcAlpha) {
|
| - Sk4f src4 = srcF;
|
| - res = ProcType::Xfer(src4 * aa4, dstF);
|
| - } else {
|
| - res = ramp(dstF, ProcType::Xfer(srcF, dstF), aa4);
|
| - }
|
| - dst[i] = SkPMFloat(res).round();
|
| - }
|
| - }
|
| - }
|
| -
|
| -private:
|
| - SkT4fXfermode(const ProcCoeff& rec) : SkProcCoeffXfermode(rec, ProcType::kMode) {}
|
| +// [ S * Da + (1 - Sa) * D]
|
| +XFERMODE(SrcATop) {
|
| + return Sk4px::Wide(s.mulWiden(d.alphas()) + d.mulWiden(s.alphas().inv()))
|
| + .div255RoundNarrow();
|
| +}
|
| +XFERMODE(DstATop) { return SrcATop::Xfer(d,s); }
|
| +//[ S * (1 - Da) + (1 - Sa) * D ]
|
| +XFERMODE(Xor) {
|
| + return Sk4px::Wide(s.mulWiden(d.alphas().inv()) + d.mulWiden(s.alphas().inv()))
|
| + .div255RoundNarrow();
|
| +}
|
| +// [S + D ]
|
| +XFERMODE(Plus) { return s.saturatedAdd(d); }
|
| +// [S * D ]
|
| +XFERMODE(Modulate) { return s.fastMulDiv255Round(d); }
|
| +// [S + D - S * D]
|
| +XFERMODE(Screen) {
|
| + // Doing the math as S + (1-S)*D or S + (D - S*D) means the add and subtract can be done
|
| + // in 8-bit space without overflow. S + (1-S)*D is a touch faster because inv() is cheap.
|
| + return s + d.fastMulDiv255Round(s.inv());
|
| +}
|
| +XFERMODE(Multiply) {
|
| + return Sk4px::Wide(s.mulWiden(d.alphas().inv()) +
|
| + d.mulWiden(s.alphas().inv()) +
|
| + s.mulWiden(d))
|
| + .div255RoundNarrow();
|
| +}
|
| +// [ Sa + Da - Sa*Da, Sc + Dc - 2*min(Sc*Da, Dc*Sa) ] (And notice Sa*Da == min(Sa*Da, Da*Sa).)
|
| +XFERMODE(Difference) {
|
| + auto m = Sk4px::Wide(Sk16h::Min(s.mulWiden(d.alphas()), d.mulWiden(s.alphas())))
|
| + .div255RoundNarrow();
|
| + // There's no chance of underflow, and if we subtract m before adding s+d, no overflow.
|
| + return (s - m) + (d - m.zeroAlphas());
|
| +}
|
| +// [ Sa + Da - Sa*Da, Sc + Dc - 2*Sc*Dc ]
|
| +XFERMODE(Exclusion) {
|
| + auto p = s.fastMulDiv255Round(d);
|
| + // There's no chance of underflow, and if we subtract p before adding src+dst, no overflow.
|
| + return (s - p) + (d - p.zeroAlphas());
|
| +}
|
|
|
| - typedef SkProcCoeffXfermode INHERITED;
|
| -};
|
| +#undef XFERMODE
|
|
|
| template <typename ProcType>
|
| class SkT4pxXfermode : public SkProcCoeffXfermode {
|
| @@ -1443,7 +1256,6 @@ public:
|
| } else {
|
| Sk4px::MapDstSrcAlpha(n, dst, src, aa,
|
| [&](const Sk4px& dst4, const Sk4px& src4, const Sk16b& alpha) {
|
| - // We can't exploit kFoldCoverageIntoSrcAlpha. That requires >=24-bit intermediates.
|
| Sk4px res4 = ProcType::Xfer(src4, dst4);
|
| return Sk4px::Wide(res4.mulWiden(alpha) + dst4.mulWiden(Sk4px(alpha).inv()))
|
| .div255RoundNarrow();
|
| @@ -1517,7 +1329,7 @@ SkXfermode* create_mode(int iMode) {
|
| rec.fProc = pp;
|
| }
|
|
|
| -#if defined(SK_4PX_XFERMODES_ARE_FAST) && !defined(SK_PREFER_LEGACY_FLOAT_XFERMODES)
|
| +#if defined(SK_USE_4PX_XFERMODES)
|
| switch (mode) {
|
| case SkXfermode::kClear_Mode: return SkT4pxXfermode<Clear>::Create(rec);
|
| case SkXfermode::kSrc_Mode: return SkT4pxXfermode<Src>::Create(rec);
|
| @@ -1528,31 +1340,15 @@ SkXfermode* create_mode(int iMode) {
|
| case SkXfermode::kDstIn_Mode: return SkT4pxXfermode<DstIn>::Create(rec);
|
| case SkXfermode::kSrcOut_Mode: return SkT4pxXfermode<SrcOut>::Create(rec);
|
| case SkXfermode::kDstOut_Mode: return SkT4pxXfermode<DstOut>::Create(rec);
|
| -
|
| - case SkXfermode::kSrcATop_Mode: return SkT4pxXfermode<SrcATop4f>::Create(rec);
|
| - case SkXfermode::kDstATop_Mode: return SkT4pxXfermode<DstATop4f>::Create(rec);
|
| - case SkXfermode::kXor_Mode: return SkT4pxXfermode<Xor4f>::Create(rec);
|
| - case SkXfermode::kPlus_Mode: return SkT4pxXfermode<Plus4f>::Create(rec);
|
| - case SkXfermode::kModulate_Mode: return SkT4pxXfermode<Modulate4f>::Create(rec);
|
| - case SkXfermode::kScreen_Mode: return SkT4pxXfermode<Screen4f>::Create(rec);
|
| - case SkXfermode::kMultiply_Mode: return SkT4pxXfermode<Multiply4f>::Create(rec);
|
| - case SkXfermode::kDifference_Mode: return SkT4pxXfermode<Difference4f>::Create(rec);
|
| - case SkXfermode::kExclusion_Mode: return SkT4pxXfermode<Exclusion4f>::Create(rec);
|
| - default: break;
|
| - }
|
| -#endif
|
| -
|
| -#if defined(SK_4F_XFERMODES_ARE_FAST)
|
| - switch (mode) {
|
| - case SkXfermode::kSrcATop_Mode: return SkT4fXfermode<SrcATop4f>::Create(rec);
|
| - case SkXfermode::kDstATop_Mode: return SkT4fXfermode<DstATop4f>::Create(rec);
|
| - case SkXfermode::kXor_Mode: return SkT4fXfermode<Xor4f>::Create(rec);
|
| - case SkXfermode::kPlus_Mode: return SkT4fXfermode<Plus4f>::Create(rec);
|
| - case SkXfermode::kModulate_Mode: return SkT4fXfermode<Modulate4f>::Create(rec);
|
| - case SkXfermode::kScreen_Mode: return SkT4fXfermode<Screen4f>::Create(rec);
|
| - case SkXfermode::kMultiply_Mode: return SkT4fXfermode<Multiply4f>::Create(rec);
|
| - case SkXfermode::kDifference_Mode: return SkT4fXfermode<Difference4f>::Create(rec);
|
| - case SkXfermode::kExclusion_Mode: return SkT4fXfermode<Exclusion4f>::Create(rec);
|
| + case SkXfermode::kSrcATop_Mode: return SkT4pxXfermode<SrcATop>::Create(rec);
|
| + case SkXfermode::kDstATop_Mode: return SkT4pxXfermode<DstATop>::Create(rec);
|
| + case SkXfermode::kXor_Mode: return SkT4pxXfermode<Xor>::Create(rec);
|
| + case SkXfermode::kPlus_Mode: return SkT4pxXfermode<Plus>::Create(rec);
|
| + case SkXfermode::kModulate_Mode: return SkT4pxXfermode<Modulate>::Create(rec);
|
| + case SkXfermode::kScreen_Mode: return SkT4pxXfermode<Screen>::Create(rec);
|
| + case SkXfermode::kMultiply_Mode: return SkT4pxXfermode<Multiply>::Create(rec);
|
| + case SkXfermode::kDifference_Mode: return SkT4pxXfermode<Difference>::Create(rec);
|
| + case SkXfermode::kExclusion_Mode: return SkT4pxXfermode<Exclusion>::Create(rec);
|
| default: break;
|
| }
|
| #endif
|
|
|