Index: src/effects/SkBlurImageFilter.cpp |
diff --git a/src/effects/SkBlurImageFilter.cpp b/src/effects/SkBlurImageFilter.cpp |
index 17f8d3a337397de0a3b3c83afa3e43b4f6619c34..a867971a0c289bb5196be40d56691004a0a7d1da 100644 |
--- a/src/effects/SkBlurImageFilter.cpp |
+++ b/src/effects/SkBlurImageFilter.cpp |
@@ -8,10 +8,10 @@ |
#include "SkBitmap.h" |
#include "SkBlurImageFilter.h" |
#include "SkColorPriv.h" |
+#include "SkGpuBlurUtils.h" |
+#include "SkOpts.h" |
#include "SkReadBuffer.h" |
#include "SkWriteBuffer.h" |
-#include "SkGpuBlurUtils.h" |
-#include "SkBlurImage_opts.h" |
#if SK_SUPPORT_GPU |
#include "GrContext.h" |
#endif |
@@ -51,83 +51,6 @@ void SkBlurImageFilter::flatten(SkWriteBuffer& buffer) const { |
buffer.writeScalar(fSigma.fHeight); |
} |
-enum BlurDirection { |
- kX, kY |
-}; |
- |
-/** |
- * |
- * In order to make memory accesses cache-friendly, we reorder the passes to |
- * use contiguous memory reads wherever possible. |
- * |
- * For example, the 6 passes of the X-and-Y blur case are rewritten as |
- * follows. Instead of 3 passes in X and 3 passes in Y, we perform |
- * 2 passes in X, 1 pass in X transposed to Y on write, 2 passes in X, |
- * then 1 pass in X transposed to Y on write. |
- * |
- * +----+ +----+ +----+ +---+ +---+ +---+ +----+ |
- * + AB + ----> | AB | ----> | AB | -----> | A | ----> | A | ----> | A | -----> | AB | |
- * +----+ blurX +----+ blurX +----+ blurXY | B | blurX | B | blurX | B | blurXY +----+ |
- * +---+ +---+ +---+ |
- * |
- * In this way, two of the y-blurs become x-blurs applied to transposed |
- * images, and all memory reads are contiguous. |
- */ |
- |
-template<BlurDirection srcDirection, BlurDirection dstDirection> |
-static void boxBlur(const SkPMColor* src, int srcStride, SkPMColor* dst, int kernelSize, |
- int leftOffset, int rightOffset, int width, int height) |
-{ |
- int rightBorder = SkMin32(rightOffset + 1, width); |
- int srcStrideX = srcDirection == kX ? 1 : srcStride; |
- int dstStrideX = dstDirection == kX ? 1 : height; |
- int srcStrideY = srcDirection == kX ? srcStride : 1; |
- int dstStrideY = dstDirection == kX ? width : 1; |
- uint32_t scale = (1 << 24) / kernelSize; |
- uint32_t half = 1 << 23; |
- for (int y = 0; y < height; ++y) { |
- int sumA = 0, sumR = 0, sumG = 0, sumB = 0; |
- const SkPMColor* p = src; |
- for (int i = 0; i < rightBorder; ++i) { |
- sumA += SkGetPackedA32(*p); |
- sumR += SkGetPackedR32(*p); |
- sumG += SkGetPackedG32(*p); |
- sumB += SkGetPackedB32(*p); |
- p += srcStrideX; |
- } |
- |
- const SkPMColor* sptr = src; |
- SkColor* dptr = dst; |
- for (int x = 0; x < width; ++x) { |
- *dptr = SkPackARGB32((sumA * scale + half) >> 24, |
- (sumR * scale + half) >> 24, |
- (sumG * scale + half) >> 24, |
- (sumB * scale + half) >> 24); |
- if (x >= leftOffset) { |
- SkColor l = *(sptr - leftOffset * srcStrideX); |
- sumA -= SkGetPackedA32(l); |
- sumR -= SkGetPackedR32(l); |
- sumG -= SkGetPackedG32(l); |
- sumB -= SkGetPackedB32(l); |
- } |
- if (x + rightOffset + 1 < width) { |
- SkColor r = *(sptr + (rightOffset + 1) * srcStrideX); |
- sumA += SkGetPackedA32(r); |
- sumR += SkGetPackedR32(r); |
- sumG += SkGetPackedG32(r); |
- sumB += SkGetPackedB32(r); |
- } |
- sptr += srcStrideX; |
- if (srcDirection == kY) { |
- SK_PREFETCH(sptr + (rightOffset + 1) * srcStrideX); |
- } |
- dptr += dstStrideX; |
- } |
- src += srcStrideY; |
- dst += dstStrideY; |
- } |
-} |
- |
static void getBox3Params(SkScalar s, int *kernelSize, int* kernelSize3, int *lowOffset, |
int *highOffset) |
{ |
@@ -204,28 +127,40 @@ bool SkBlurImageFilter::onFilterImage(Proxy* proxy, |
SkPMColor* d = dst->getAddr32(0, 0); |
int w = dstBounds.width(), h = dstBounds.height(); |
int sw = src.rowBytesAsPixels(); |
- SkBoxBlurProc boxBlurX, boxBlurXY, boxBlurYX; |
- if (!SkBoxBlurGetPlatformProcs(&boxBlurX, &boxBlurXY, &boxBlurYX)) { |
- boxBlurX = boxBlur<kX, kX>; |
- boxBlurXY = boxBlur<kX, kY>; |
- boxBlurYX = boxBlur<kY, kX>; |
- } |
+ /** |
+ * |
+ * In order to make memory accesses cache-friendly, we reorder the passes to |
+ * use contiguous memory reads wherever possible. |
+ * |
+ * For example, the 6 passes of the X-and-Y blur case are rewritten as |
+ * follows. Instead of 3 passes in X and 3 passes in Y, we perform |
+ * 2 passes in X, 1 pass in X transposed to Y on write, 2 passes in X, |
+ * then 1 pass in X transposed to Y on write. |
+ * |
+ * +----+ +----+ +----+ +---+ +---+ +---+ +----+ |
+ * + AB + ----> | AB | ----> | AB | -----> | A | ----> | A | ----> | A | -----> | AB | |
+ * +----+ blurX +----+ blurX +----+ blurXY | B | blurX | B | blurX | B | blurXY +----+ |
+ * +---+ +---+ +---+ |
+ * |
+ * In this way, two of the y-blurs become x-blurs applied to transposed |
+ * images, and all memory reads are contiguous. |
+ */ |
if (kernelSizeX > 0 && kernelSizeY > 0) { |
- boxBlurX(s, sw, t, kernelSizeX, lowOffsetX, highOffsetX, w, h); |
- boxBlurX(t, w, d, kernelSizeX, highOffsetX, lowOffsetX, w, h); |
- boxBlurXY(d, w, t, kernelSizeX3, highOffsetX, highOffsetX, w, h); |
- boxBlurX(t, h, d, kernelSizeY, lowOffsetY, highOffsetY, h, w); |
- boxBlurX(d, h, t, kernelSizeY, highOffsetY, lowOffsetY, h, w); |
- boxBlurXY(t, h, d, kernelSizeY3, highOffsetY, highOffsetY, h, w); |
+ SkOpts::box_blur_xx(s, sw, t, kernelSizeX, lowOffsetX, highOffsetX, w, h); |
+ SkOpts::box_blur_xx(t, w, d, kernelSizeX, highOffsetX, lowOffsetX, w, h); |
+ SkOpts::box_blur_xy(d, w, t, kernelSizeX3, highOffsetX, highOffsetX, w, h); |
+ SkOpts::box_blur_xx(t, h, d, kernelSizeY, lowOffsetY, highOffsetY, h, w); |
+ SkOpts::box_blur_xx(d, h, t, kernelSizeY, highOffsetY, lowOffsetY, h, w); |
+ SkOpts::box_blur_xy(t, h, d, kernelSizeY3, highOffsetY, highOffsetY, h, w); |
} else if (kernelSizeX > 0) { |
- boxBlurX(s, sw, d, kernelSizeX, lowOffsetX, highOffsetX, w, h); |
- boxBlurX(d, w, t, kernelSizeX, highOffsetX, lowOffsetX, w, h); |
- boxBlurX(t, w, d, kernelSizeX3, highOffsetX, highOffsetX, w, h); |
+ SkOpts::box_blur_xx(s, sw, d, kernelSizeX, lowOffsetX, highOffsetX, w, h); |
+ SkOpts::box_blur_xx(d, w, t, kernelSizeX, highOffsetX, lowOffsetX, w, h); |
+ SkOpts::box_blur_xx(t, w, d, kernelSizeX3, highOffsetX, highOffsetX, w, h); |
} else if (kernelSizeY > 0) { |
- boxBlurYX(s, sw, d, kernelSizeY, lowOffsetY, highOffsetY, h, w); |
- boxBlurX(d, h, t, kernelSizeY, highOffsetY, lowOffsetY, h, w); |
- boxBlurXY(t, h, d, kernelSizeY3, highOffsetY, highOffsetY, h, w); |
+ SkOpts::box_blur_yx(s, sw, d, kernelSizeY, lowOffsetY, highOffsetY, h, w); |
+ SkOpts::box_blur_xx(d, h, t, kernelSizeY, highOffsetY, lowOffsetY, h, w); |
+ SkOpts::box_blur_xy(t, h, d, kernelSizeY3, highOffsetY, highOffsetY, h, w); |
} |
return true; |
} |
@@ -260,7 +195,7 @@ bool SkBlurImageFilter::filterImageGPU(Proxy* proxy, const SkBitmap& src, const |
#if SK_SUPPORT_GPU |
SkBitmap input = src; |
SkIPoint srcOffset = SkIPoint::Make(0, 0); |
- if (this->getInput(0) && |
+ if (this->getInput(0) && |
!this->getInput(0)->getInputResultGPU(proxy, src, ctx, &input, &srcOffset)) { |
return false; |
} |