| Index: src/effects/gradients/SkRadialGradient.cpp
|
| diff --git a/src/effects/gradients/SkRadialGradient.cpp b/src/effects/gradients/SkRadialGradient.cpp
|
| index b25a8750a2329cf799528f3c3c217151c2858e78..bf3c821f6b7a463cdb0eab7a31f7aea2c5ebeece 100644
|
| --- a/src/effects/gradients/SkRadialGradient.cpp
|
| +++ b/src/effects/gradients/SkRadialGradient.cpp
|
| @@ -8,6 +8,7 @@
|
|
|
| #include "SkRadialGradient.h"
|
| #include "SkRadialGradient_Table.h"
|
| +#include "SkNx.h"
|
|
|
| #define kSQRT_TABLE_BITS 11
|
| #define kSQRT_TABLE_SIZE (1 << kSQRT_TABLE_BITS)
|
| @@ -270,13 +271,16 @@ void SkRadialGradient::flatten(SkWriteBuffer& buffer) const {
|
| namespace {
|
|
|
| inline bool radial_completely_pinned(int fx, int dx, int fy, int dy) {
|
| - // fast, overly-conservative test: checks unit square instead
|
| - // of unit circle
|
| - bool xClamped = (fx >= SK_FixedHalf && dx >= 0) ||
|
| - (fx <= -SK_FixedHalf && dx <= 0);
|
| - bool yClamped = (fy >= SK_FixedHalf && dy >= 0) ||
|
| - (fy <= -SK_FixedHalf && dy <= 0);
|
| + // fast, overly-conservative test: checks unit square instead of unit circle
|
| + bool xClamped = (fx >= SK_FixedHalf && dx >= 0) || (fx <= -SK_FixedHalf && dx <= 0);
|
| + bool yClamped = (fy >= SK_FixedHalf && dy >= 0) || (fy <= -SK_FixedHalf && dy <= 0);
|
| + return xClamped || yClamped;
|
| +}
|
|
|
| +inline bool radial_completely_pinned(SkScalar fx, SkScalar dx, SkScalar fy, SkScalar dy) {
|
| + // fast, overly-conservative test: checks unit square instead of unit circle
|
| + bool xClamped = (fx >= 1 && dx >= 0) || (fx <= -1 && dx <= 0);
|
| + bool yClamped = (fy >= 1 && dy >= 0) || (fy <= -1 && dy <= 0);
|
| return xClamped || yClamped;
|
| }
|
|
|
| @@ -373,6 +377,70 @@ void shadeSpan_radial_clamp(SkScalar sfx, SkScalar sdx,
|
| }
|
| }
|
|
|
| +// TODO: can we get away with 0th approximatino of inverse-sqrt (i.e. faster than rsqrt)?
|
| +// seems like ~10bits is more than enough for our use, since we want a byte-index
|
| +static inline Sk4f fast_sqrt(const Sk4f& R) {
|
| + return R * R.rsqrt();
|
| +}
|
| +
|
| +static inline Sk4f sum_squares(const Sk4f& a, const Sk4f& b) {
|
| + return a * a + b * b;
|
| +}
|
| +
|
| +void shadeSpan_radial_clamp2(SkScalar sfx, SkScalar sdx, SkScalar sfy, SkScalar sdy,
|
| + SkPMColor* SK_RESTRICT dstC, const SkPMColor* SK_RESTRICT cache,
|
| + int count, int toggle) {
|
| + if (radial_completely_pinned(sfx, sdx, sfy, sdy)) {
|
| + unsigned fi = SkGradientShaderBase::kCache32Count - 1;
|
| + sk_memset32_dither(dstC,
|
| + cache[toggle + fi],
|
| + cache[next_dither_toggle(toggle) + fi],
|
| + count);
|
| + } else {
|
| + const Sk4f max(255);
|
| + const float scale = 255;
|
| + sfx *= scale;
|
| + sfy *= scale;
|
| + sdx *= scale;
|
| + sdy *= scale;
|
| + const Sk4f fx4(sfx, sfx + sdx, sfx + 2*sdx, sfx + 3*sdx);
|
| + const Sk4f fy4(sfy, sfy + sdy, sfy + 2*sdy, sfy + 3*sdy);
|
| + const Sk4f dx4(sdx * 4);
|
| + const Sk4f dy4(sdy * 4);
|
| +
|
| + Sk4f tmpxy = fx4 * dx4 + fy4 * dy4;
|
| + Sk4f tmpdxdy = sum_squares(dx4, dy4);
|
| + Sk4f R = sum_squares(fx4, fy4);
|
| + Sk4f dR = tmpxy + tmpxy + tmpdxdy;
|
| + const Sk4f ddR = tmpdxdy + tmpdxdy;
|
| +
|
| + for (int i = 0; i < (count >> 2); ++i) {
|
| + Sk4f dist = Sk4f::Min(fast_sqrt(R), max);
|
| + R += dR;
|
| + dR += ddR;
|
| +
|
| + int fi[4];
|
| + dist.castTrunc().store(fi);
|
| +
|
| + for (int i = 0; i < 4; i++) {
|
| + *dstC++ = cache[toggle + fi[i]];
|
| + toggle = next_dither_toggle(toggle);
|
| + }
|
| + }
|
| + count &= 3;
|
| + if (count) {
|
| + Sk4f dist = Sk4f::Min(fast_sqrt(R), max);
|
| +
|
| + int fi[4];
|
| + dist.castTrunc().store(fi);
|
| + for (int i = 0; i < count; i++) {
|
| + *dstC++ = cache[toggle + fi[i]];
|
| + toggle = next_dither_toggle(toggle);
|
| + }
|
| + }
|
| + }
|
| +}
|
| +
|
| // Unrolling this loop doesn't seem to help (when float); we're stalling to
|
| // get the results of the sqrt (?), and don't have enough extra registers to
|
| // have many in flight.
|
| @@ -407,6 +475,11 @@ void shadeSpan_radial_repeat(SkScalar fx, SkScalar dx, SkScalar fy, SkScalar dy,
|
|
|
| void SkRadialGradient::RadialGradientContext::shadeSpan(int x, int y,
|
| SkPMColor* SK_RESTRICT dstC, int count) {
|
| +#ifdef SK_SUPPORT_LEGACY_RADIAL_GRADIENT_SQRT
|
| + const bool use_new_proc = false;
|
| +#else
|
| + const bool use_new_proc = true;
|
| +#endif
|
| SkASSERT(count > 0);
|
|
|
| const SkRadialGradient& radialGradient = static_cast<const SkRadialGradient&>(fShader);
|
| @@ -435,7 +508,7 @@ void SkRadialGradient::RadialGradientContext::shadeSpan(int x, int y,
|
|
|
| RadialShadeProc shadeProc = shadeSpan_radial_repeat;
|
| if (SkShader::kClamp_TileMode == radialGradient.fTileMode) {
|
| - shadeProc = shadeSpan_radial_clamp;
|
| + shadeProc = use_new_proc ? shadeSpan_radial_clamp2 : shadeSpan_radial_clamp;
|
| } else if (SkShader::kMirror_TileMode == radialGradient.fTileMode) {
|
| shadeProc = shadeSpan_radial_mirror;
|
| } else {
|
|
|