| Index: src/effects/SkBlurImageFilter.cpp
|
| diff --git a/src/effects/SkBlurImageFilter.cpp b/src/effects/SkBlurImageFilter.cpp
|
| index 17f8d3a337397de0a3b3c83afa3e43b4f6619c34..a867971a0c289bb5196be40d56691004a0a7d1da 100644
|
| --- a/src/effects/SkBlurImageFilter.cpp
|
| +++ b/src/effects/SkBlurImageFilter.cpp
|
| @@ -8,10 +8,10 @@
|
| #include "SkBitmap.h"
|
| #include "SkBlurImageFilter.h"
|
| #include "SkColorPriv.h"
|
| +#include "SkGpuBlurUtils.h"
|
| +#include "SkOpts.h"
|
| #include "SkReadBuffer.h"
|
| #include "SkWriteBuffer.h"
|
| -#include "SkGpuBlurUtils.h"
|
| -#include "SkBlurImage_opts.h"
|
| #if SK_SUPPORT_GPU
|
| #include "GrContext.h"
|
| #endif
|
| @@ -51,83 +51,6 @@ void SkBlurImageFilter::flatten(SkWriteBuffer& buffer) const {
|
| buffer.writeScalar(fSigma.fHeight);
|
| }
|
|
|
| -enum BlurDirection {
|
| - kX, kY
|
| -};
|
| -
|
| -/**
|
| - *
|
| - * In order to make memory accesses cache-friendly, we reorder the passes to
|
| - * use contiguous memory reads wherever possible.
|
| - *
|
| - * For example, the 6 passes of the X-and-Y blur case are rewritten as
|
| - * follows. Instead of 3 passes in X and 3 passes in Y, we perform
|
| - * 2 passes in X, 1 pass in X transposed to Y on write, 2 passes in X,
|
| - * then 1 pass in X transposed to Y on write.
|
| - *
|
| - * +----+ +----+ +----+ +---+ +---+ +---+ +----+
|
| - * + AB + ----> | AB | ----> | AB | -----> | A | ----> | A | ----> | A | -----> | AB |
|
| - * +----+ blurX +----+ blurX +----+ blurXY | B | blurX | B | blurX | B | blurXY +----+
|
| - * +---+ +---+ +---+
|
| - *
|
| - * In this way, two of the y-blurs become x-blurs applied to transposed
|
| - * images, and all memory reads are contiguous.
|
| - */
|
| -
|
| -template<BlurDirection srcDirection, BlurDirection dstDirection>
|
| -static void boxBlur(const SkPMColor* src, int srcStride, SkPMColor* dst, int kernelSize,
|
| - int leftOffset, int rightOffset, int width, int height)
|
| -{
|
| - int rightBorder = SkMin32(rightOffset + 1, width);
|
| - int srcStrideX = srcDirection == kX ? 1 : srcStride;
|
| - int dstStrideX = dstDirection == kX ? 1 : height;
|
| - int srcStrideY = srcDirection == kX ? srcStride : 1;
|
| - int dstStrideY = dstDirection == kX ? width : 1;
|
| - uint32_t scale = (1 << 24) / kernelSize;
|
| - uint32_t half = 1 << 23;
|
| - for (int y = 0; y < height; ++y) {
|
| - int sumA = 0, sumR = 0, sumG = 0, sumB = 0;
|
| - const SkPMColor* p = src;
|
| - for (int i = 0; i < rightBorder; ++i) {
|
| - sumA += SkGetPackedA32(*p);
|
| - sumR += SkGetPackedR32(*p);
|
| - sumG += SkGetPackedG32(*p);
|
| - sumB += SkGetPackedB32(*p);
|
| - p += srcStrideX;
|
| - }
|
| -
|
| - const SkPMColor* sptr = src;
|
| - SkColor* dptr = dst;
|
| - for (int x = 0; x < width; ++x) {
|
| - *dptr = SkPackARGB32((sumA * scale + half) >> 24,
|
| - (sumR * scale + half) >> 24,
|
| - (sumG * scale + half) >> 24,
|
| - (sumB * scale + half) >> 24);
|
| - if (x >= leftOffset) {
|
| - SkColor l = *(sptr - leftOffset * srcStrideX);
|
| - sumA -= SkGetPackedA32(l);
|
| - sumR -= SkGetPackedR32(l);
|
| - sumG -= SkGetPackedG32(l);
|
| - sumB -= SkGetPackedB32(l);
|
| - }
|
| - if (x + rightOffset + 1 < width) {
|
| - SkColor r = *(sptr + (rightOffset + 1) * srcStrideX);
|
| - sumA += SkGetPackedA32(r);
|
| - sumR += SkGetPackedR32(r);
|
| - sumG += SkGetPackedG32(r);
|
| - sumB += SkGetPackedB32(r);
|
| - }
|
| - sptr += srcStrideX;
|
| - if (srcDirection == kY) {
|
| - SK_PREFETCH(sptr + (rightOffset + 1) * srcStrideX);
|
| - }
|
| - dptr += dstStrideX;
|
| - }
|
| - src += srcStrideY;
|
| - dst += dstStrideY;
|
| - }
|
| -}
|
| -
|
| static void getBox3Params(SkScalar s, int *kernelSize, int* kernelSize3, int *lowOffset,
|
| int *highOffset)
|
| {
|
| @@ -204,28 +127,40 @@ bool SkBlurImageFilter::onFilterImage(Proxy* proxy,
|
| SkPMColor* d = dst->getAddr32(0, 0);
|
| int w = dstBounds.width(), h = dstBounds.height();
|
| int sw = src.rowBytesAsPixels();
|
| - SkBoxBlurProc boxBlurX, boxBlurXY, boxBlurYX;
|
| - if (!SkBoxBlurGetPlatformProcs(&boxBlurX, &boxBlurXY, &boxBlurYX)) {
|
| - boxBlurX = boxBlur<kX, kX>;
|
| - boxBlurXY = boxBlur<kX, kY>;
|
| - boxBlurYX = boxBlur<kY, kX>;
|
| - }
|
|
|
| + /**
|
| + *
|
| + * In order to make memory accesses cache-friendly, we reorder the passes to
|
| + * use contiguous memory reads wherever possible.
|
| + *
|
| + * For example, the 6 passes of the X-and-Y blur case are rewritten as
|
| + * follows. Instead of 3 passes in X and 3 passes in Y, we perform
|
| + * 2 passes in X, 1 pass in X transposed to Y on write, 2 passes in X,
|
| + * then 1 pass in X transposed to Y on write.
|
| + *
|
| + * +----+ +----+ +----+ +---+ +---+ +---+ +----+
|
| + * + AB + ----> | AB | ----> | AB | -----> | A | ----> | A | ----> | A | -----> | AB |
|
| + * +----+ blurX +----+ blurX +----+ blurXY | B | blurX | B | blurX | B | blurXY +----+
|
| + * +---+ +---+ +---+
|
| + *
|
| + * In this way, two of the y-blurs become x-blurs applied to transposed
|
| + * images, and all memory reads are contiguous.
|
| + */
|
| if (kernelSizeX > 0 && kernelSizeY > 0) {
|
| - boxBlurX(s, sw, t, kernelSizeX, lowOffsetX, highOffsetX, w, h);
|
| - boxBlurX(t, w, d, kernelSizeX, highOffsetX, lowOffsetX, w, h);
|
| - boxBlurXY(d, w, t, kernelSizeX3, highOffsetX, highOffsetX, w, h);
|
| - boxBlurX(t, h, d, kernelSizeY, lowOffsetY, highOffsetY, h, w);
|
| - boxBlurX(d, h, t, kernelSizeY, highOffsetY, lowOffsetY, h, w);
|
| - boxBlurXY(t, h, d, kernelSizeY3, highOffsetY, highOffsetY, h, w);
|
| + SkOpts::box_blur_xx(s, sw, t, kernelSizeX, lowOffsetX, highOffsetX, w, h);
|
| + SkOpts::box_blur_xx(t, w, d, kernelSizeX, highOffsetX, lowOffsetX, w, h);
|
| + SkOpts::box_blur_xy(d, w, t, kernelSizeX3, highOffsetX, highOffsetX, w, h);
|
| + SkOpts::box_blur_xx(t, h, d, kernelSizeY, lowOffsetY, highOffsetY, h, w);
|
| + SkOpts::box_blur_xx(d, h, t, kernelSizeY, highOffsetY, lowOffsetY, h, w);
|
| + SkOpts::box_blur_xy(t, h, d, kernelSizeY3, highOffsetY, highOffsetY, h, w);
|
| } else if (kernelSizeX > 0) {
|
| - boxBlurX(s, sw, d, kernelSizeX, lowOffsetX, highOffsetX, w, h);
|
| - boxBlurX(d, w, t, kernelSizeX, highOffsetX, lowOffsetX, w, h);
|
| - boxBlurX(t, w, d, kernelSizeX3, highOffsetX, highOffsetX, w, h);
|
| + SkOpts::box_blur_xx(s, sw, d, kernelSizeX, lowOffsetX, highOffsetX, w, h);
|
| + SkOpts::box_blur_xx(d, w, t, kernelSizeX, highOffsetX, lowOffsetX, w, h);
|
| + SkOpts::box_blur_xx(t, w, d, kernelSizeX3, highOffsetX, highOffsetX, w, h);
|
| } else if (kernelSizeY > 0) {
|
| - boxBlurYX(s, sw, d, kernelSizeY, lowOffsetY, highOffsetY, h, w);
|
| - boxBlurX(d, h, t, kernelSizeY, highOffsetY, lowOffsetY, h, w);
|
| - boxBlurXY(t, h, d, kernelSizeY3, highOffsetY, highOffsetY, h, w);
|
| + SkOpts::box_blur_yx(s, sw, d, kernelSizeY, lowOffsetY, highOffsetY, h, w);
|
| + SkOpts::box_blur_xx(d, h, t, kernelSizeY, highOffsetY, lowOffsetY, h, w);
|
| + SkOpts::box_blur_xy(t, h, d, kernelSizeY3, highOffsetY, highOffsetY, h, w);
|
| }
|
| return true;
|
| }
|
| @@ -260,7 +195,7 @@ bool SkBlurImageFilter::filterImageGPU(Proxy* proxy, const SkBitmap& src, const
|
| #if SK_SUPPORT_GPU
|
| SkBitmap input = src;
|
| SkIPoint srcOffset = SkIPoint::Make(0, 0);
|
| - if (this->getInput(0) &&
|
| + if (this->getInput(0) &&
|
| !this->getInput(0)->getInputResultGPU(proxy, src, ctx, &input, &srcOffset)) {
|
| return false;
|
| }
|
|
|