Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright 2011 The Android Open Source Project | 2 * Copyright 2011 The Android Open Source Project |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #include "SkBitmap.h" | 8 #include "SkBitmap.h" |
| 9 #include "SkBlurImageFilter.h" | 9 #include "SkBlurImageFilter.h" |
| 10 #include "SkColorPriv.h" | 10 #include "SkColorPriv.h" |
| (...skipping 21 matching lines...) Expand all Loading... | |
| 32 : INHERITED(input, cropRect), fSigma(SkSize::Make(sigmaX, sigmaY)) { | 32 : INHERITED(input, cropRect), fSigma(SkSize::Make(sigmaX, sigmaY)) { |
| 33 SkASSERT(sigmaX >= 0 && sigmaY >= 0); | 33 SkASSERT(sigmaX >= 0 && sigmaY >= 0); |
| 34 } | 34 } |
| 35 | 35 |
| 36 void SkBlurImageFilter::flatten(SkFlattenableWriteBuffer& buffer) const { | 36 void SkBlurImageFilter::flatten(SkFlattenableWriteBuffer& buffer) const { |
| 37 this->INHERITED::flatten(buffer); | 37 this->INHERITED::flatten(buffer); |
| 38 buffer.writeScalar(fSigma.fWidth); | 38 buffer.writeScalar(fSigma.fWidth); |
| 39 buffer.writeScalar(fSigma.fHeight); | 39 buffer.writeScalar(fSigma.fHeight); |
| 40 } | 40 } |
| 41 | 41 |
| 42 static void boxBlurX(const SkBitmap& src, SkBitmap* dst, int kernelSize, | 42 enum BlurDirection { |
| 43 int leftOffset, int rightOffset, const SkIRect& bounds) | 43 kX, kY |
| 44 }; | |
| 45 | |
| 46 /** | |
| 47 * | |
| 48 * In order to make memory accesses cache-friendly, we reorder the passes to | |
| 49 * use contiguous memory reads wherever possible. | |
| 50 * | |
| 51 * For example, the 6 passes of the X-and-Y blur case are rewritten as | |
| 52 * follows. Instead of 3 passes in X and 3 passes in Y, we perform | |
| 53 * 2 passes in X, 1 pass in X transposed to Y on write, 2 passes in X, | |
| 54 * then 1 pass in X transposed to Y on write. | |
| 55 * | |
| 56 * +----+ +----+ +----+ +---+ +---+ +---+ +----+ | |
| 57 * + AB + ----> | AB | ----> | AB | -----> | A | ----> | A | ----> | A | -----> | AB | | |
| 58 * +----+ blurX +----+ blurX +----+ blurXY | B | blurX | B | blurX | B | blurXY +----+ | |
| 59 * +---+ +---+ +---+ | |
|
mtklein
2013/11/06 15:18:56
This is awesome. Thank you.
| |
| 60 */ | |
| 61 | |
| 62 template<BlurDirection srcDirection, BlurDirection dstDirection> | |
| 63 static void boxBlur(const SkPMColor* src, int srcStride, SkPMColor* dst, int ker nelSize, | |
| 64 int leftOffset, int rightOffset, int width, int height) | |
| 44 { | 65 { |
| 45 int width = bounds.width(), height = bounds.height(); | |
| 46 int rightBorder = SkMin32(rightOffset + 1, width); | 66 int rightBorder = SkMin32(rightOffset + 1, width); |
| 67 int srcStrideX = srcDirection == kX ? 1 : srcStride; | |
| 68 int dstStrideX = dstDirection == kX ? 1 : height; | |
| 69 int srcStrideY = srcDirection == kX ? srcStride : 1; | |
| 70 int dstStrideY = dstDirection == kX ? width : 1; | |
| 47 #ifndef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION | 71 #ifndef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION |
| 48 uint32_t scale = (1 << 24) / kernelSize; | 72 uint32_t scale = (1 << 24) / kernelSize; |
| 49 uint32_t half = 1 << 23; | 73 uint32_t half = 1 << 23; |
| 50 #endif | 74 #endif |
| 51 for (int y = 0; y < height; ++y) { | 75 for (int y = 0; y < height; ++y) { |
| 52 int sumA = 0, sumR = 0, sumG = 0, sumB = 0; | 76 int sumA = 0, sumR = 0, sumG = 0, sumB = 0; |
| 53 SkPMColor* p = src.getAddr32(bounds.fLeft, y + bounds.fTop); | 77 const SkPMColor* p = src; |
| 54 for (int i = 0; i < rightBorder; ++i) { | 78 for (int i = 0; i < rightBorder; ++i) { |
| 55 sumA += SkGetPackedA32(*p); | 79 sumA += SkGetPackedA32(*p); |
| 56 sumR += SkGetPackedR32(*p); | 80 sumR += SkGetPackedR32(*p); |
| 57 sumG += SkGetPackedG32(*p); | 81 sumG += SkGetPackedG32(*p); |
| 58 sumB += SkGetPackedB32(*p); | 82 sumB += SkGetPackedB32(*p); |
| 59 p++; | 83 p += srcStrideX; |
| 60 } | 84 } |
| 61 | 85 |
| 62 const SkColor* sptr = src.getAddr32(bounds.fLeft, bounds.fTop + y); | 86 const SkPMColor* sptr = src; |
| 63 SkColor* dptr = dst->getAddr32(0, y); | 87 SkColor* dptr = dst; |
| 64 for (int x = 0; x < width; ++x) { | 88 for (int x = 0; x < width; ++x) { |
| 65 #ifndef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION | 89 #ifndef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION |
| 66 *dptr = SkPackARGB32((sumA * scale + half) >> 24, | 90 *dptr = SkPackARGB32((sumA * scale + half) >> 24, |
| 67 (sumR * scale + half) >> 24, | 91 (sumR * scale + half) >> 24, |
| 68 (sumG * scale + half) >> 24, | 92 (sumG * scale + half) >> 24, |
| 69 (sumB * scale + half) >> 24); | 93 (sumB * scale + half) >> 24); |
| 70 #else | 94 #else |
| 71 *dptr = SkPackARGB32(sumA / kernelSize, | 95 *dptr = SkPackARGB32(sumA / kernelSize, |
| 72 sumR / kernelSize, | 96 sumR / kernelSize, |
| 73 sumG / kernelSize, | 97 sumG / kernelSize, |
| 74 sumB / kernelSize); | 98 sumB / kernelSize); |
| 75 #endif | 99 #endif |
| 76 if (x >= leftOffset) { | 100 if (x >= leftOffset) { |
| 77 SkColor l = *(sptr - leftOffset); | 101 SkColor l = *(sptr - leftOffset * srcStrideX); |
| 78 sumA -= SkGetPackedA32(l); | 102 sumA -= SkGetPackedA32(l); |
| 79 sumR -= SkGetPackedR32(l); | 103 sumR -= SkGetPackedR32(l); |
| 80 sumG -= SkGetPackedG32(l); | 104 sumG -= SkGetPackedG32(l); |
| 81 sumB -= SkGetPackedB32(l); | 105 sumB -= SkGetPackedB32(l); |
| 82 } | 106 } |
| 83 if (x + rightOffset + 1 < width) { | 107 if (x + rightOffset + 1 < width) { |
| 84 SkColor r = *(sptr + rightOffset + 1); | 108 SkColor r = *(sptr + (rightOffset + 1) * srcStrideX); |
| 85 sumA += SkGetPackedA32(r); | 109 sumA += SkGetPackedA32(r); |
| 86 sumR += SkGetPackedR32(r); | 110 sumR += SkGetPackedR32(r); |
| 87 sumG += SkGetPackedG32(r); | 111 sumG += SkGetPackedG32(r); |
| 88 sumB += SkGetPackedB32(r); | 112 sumB += SkGetPackedB32(r); |
| 89 } | 113 } |
| 90 sptr++; | 114 sptr += srcStrideX; |
| 91 dptr++; | 115 if (srcDirection == kY) { |
| 116 SK_PREFETCH(sptr + (rightOffset + 1) * srcStrideX); | |
| 117 } | |
| 118 dptr += dstStrideX; | |
| 92 } | 119 } |
| 120 src += srcStrideY; | |
| 121 dst += dstStrideY; | |
| 93 } | 122 } |
| 94 } | 123 } |
| 95 | 124 |
| 96 static void boxBlurY(const SkBitmap& src, SkBitmap* dst, int kernelSize, | 125 static void boxBlurX(const SkPMColor* src, int srcStride, SkPMColor* dst, int ke rnelSize, |
| 97 int topOffset, int bottomOffset, const SkIRect& bounds) | 126 int leftOffset, int rightOffset, int width, int height) |
| 98 { | 127 { |
| 99 int width = bounds.width(), height = bounds.height(); | 128 boxBlur<kX, kX>(src, srcStride, dst, kernelSize, leftOffset, rightOffset, wi dth, height); |
| 100 int bottomBorder = SkMin32(bottomOffset + 1, height); | 129 } |
| 101 int srcStride = src.rowBytesAsPixels(); | 130 |
| 102 int dstStride = dst->rowBytesAsPixels(); | |
| 103 #ifndef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION | 131 #ifndef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION |
| 104 uint32_t scale = (1 << 24) / kernelSize; | 132 static void boxBlurXY(const SkPMColor* src, int srcStride, SkPMColor* dst, int k ernelSize, |
| 105 uint32_t half = 1 << 23; | 133 int leftOffset, int rightOffset, int width, int he ight) |
| 134 { | |
| 135 boxBlur<kX, kY>(src, srcStride, dst, kernelSize, leftOffset, rightOffset, wi dth, height); | |
| 136 } | |
| 106 #endif | 137 #endif |
| 107 for (int x = 0; x < width; ++x) { | |
| 108 int sumA = 0, sumR = 0, sumG = 0, sumB = 0; | |
| 109 SkColor* p = src.getAddr32(bounds.fLeft + x, bounds.fTop); | |
| 110 for (int i = 0; i < bottomBorder; ++i) { | |
| 111 sumA += SkGetPackedA32(*p); | |
| 112 sumR += SkGetPackedR32(*p); | |
| 113 sumG += SkGetPackedG32(*p); | |
| 114 sumB += SkGetPackedB32(*p); | |
| 115 p += srcStride; | |
| 116 } | |
| 117 | 138 |
| 118 const SkColor* sptr = src.getAddr32(bounds.fLeft + x, bounds.fTop); | 139 static void boxBlurY(const SkPMColor* src, int srcStride, SkPMColor* dst, int ke rnelSize, |
| 119 SkColor* dptr = dst->getAddr32(x, 0); | 140 int topOffset, int bottomOffset, int width, int height) |
| 120 for (int y = 0; y < height; ++y) { | 141 { |
| 121 #ifndef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION | 142 boxBlur<kY, kY>(src, srcStride, dst, kernelSize, topOffset, bottomOffset, wi dth, height); |
| 122 *dptr = SkPackARGB32((sumA * scale + half) >> 24, | |
| 123 (sumR * scale + half) >> 24, | |
| 124 (sumG * scale + half) >> 24, | |
| 125 (sumB * scale + half) >> 24); | |
| 126 #else | |
| 127 *dptr = SkPackARGB32(sumA / kernelSize, | |
| 128 sumR / kernelSize, | |
| 129 sumG / kernelSize, | |
| 130 sumB / kernelSize); | |
| 131 #endif | |
| 132 if (y >= topOffset) { | |
| 133 SkColor l = *(sptr - topOffset * srcStride); | |
| 134 sumA -= SkGetPackedA32(l); | |
| 135 sumR -= SkGetPackedR32(l); | |
| 136 sumG -= SkGetPackedG32(l); | |
| 137 sumB -= SkGetPackedB32(l); | |
| 138 } | |
| 139 if (y + bottomOffset + 1 < height) { | |
| 140 SkColor r = *(sptr + (bottomOffset + 1) * srcStride); | |
| 141 sumA += SkGetPackedA32(r); | |
| 142 sumR += SkGetPackedR32(r); | |
| 143 sumG += SkGetPackedG32(r); | |
| 144 sumB += SkGetPackedB32(r); | |
| 145 } | |
| 146 sptr += srcStride; | |
| 147 // The next leading pixel seems to be too hard to predict. Hint the fetch. | |
| 148 SK_PREFETCH(sptr + (bottomOffset + 1) * srcStride); | |
| 149 dptr += dstStride; | |
| 150 } | |
| 151 } | |
| 152 } | 143 } |
| 153 | 144 |
| 154 static void getBox3Params(SkScalar s, int *kernelSize, int* kernelSize3, int *lo wOffset, | 145 static void getBox3Params(SkScalar s, int *kernelSize, int* kernelSize3, int *lo wOffset, |
| 155 int *highOffset) | 146 int *highOffset) |
| 156 { | 147 { |
| 157 float pi = SkScalarToFloat(SK_ScalarPI); | 148 float pi = SkScalarToFloat(SK_ScalarPI); |
| 158 int d = static_cast<int>(floorf(SkScalarToFloat(s) * 3.0f * sqrtf(2.0f * pi) / 4.0f + 0.5f)); | 149 int d = static_cast<int>(floorf(SkScalarToFloat(s) * 3.0f * sqrtf(2.0f * pi) / 4.0f + 0.5f)); |
| 159 *kernelSize = d; | 150 *kernelSize = d; |
| 160 if (d % 2 == 1) { | 151 if (d % 2 == 1) { |
| 161 *lowOffset = *highOffset = (d - 1) / 2; | 152 *lowOffset = *highOffset = (d - 1) / 2; |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 206 src.copyTo(dst, dst->config()); | 197 src.copyTo(dst, dst->config()); |
| 207 return true; | 198 return true; |
| 208 } | 199 } |
| 209 | 200 |
| 210 SkBitmap temp; | 201 SkBitmap temp; |
| 211 temp.setConfig(dst->config(), dst->width(), dst->height()); | 202 temp.setConfig(dst->config(), dst->width(), dst->height()); |
| 212 if (!temp.allocPixels()) { | 203 if (!temp.allocPixels()) { |
| 213 return false; | 204 return false; |
| 214 } | 205 } |
| 215 | 206 |
| 207 const SkPMColor* s = src.getAddr32(srcBounds.left(), srcBounds.top()); | |
| 208 SkPMColor* t = temp.getAddr32(0, 0); | |
| 209 SkPMColor* d = dst->getAddr32(0, 0); | |
| 210 int w = dstBounds.width(), h = dstBounds.height(); | |
| 211 int sw = src.rowBytesAsPixels(); | |
| 216 if (kernelSizeX > 0 && kernelSizeY > 0) { | 212 if (kernelSizeX > 0 && kernelSizeY > 0) { |
| 217 boxBlurX(src, &temp, kernelSizeX, lowOffsetX, highOffsetX, srcBounds) ; | 213 #ifndef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION |
| 218 boxBlurY(temp, dst, kernelSizeY, lowOffsetY, highOffsetY, dstBounds) ; | 214 boxBlurX(s, sw, t, kernelSizeX, lowOffsetX, highOffsetX, w, h); |
| 219 boxBlurX(*dst, &temp, kernelSizeX, highOffsetX, lowOffsetX, dstBounds); | 215 boxBlurX(t, w, d, kernelSizeX, highOffsetX, lowOffsetX, w, h); |
| 220 boxBlurY(temp, dst, kernelSizeY, highOffsetY, lowOffsetY, dstBounds); | 216 boxBlurXY(d, w, t, kernelSizeX3, highOffsetX, highOffsetX, w, h); |
| 221 boxBlurX(*dst, &temp, kernelSizeX3, highOffsetX, highOffsetX, dstBounds) ; | 217 boxBlurX(t, h, d, kernelSizeY, lowOffsetY, highOffsetY, h, w); |
| 222 boxBlurY(temp, dst, kernelSizeY3, highOffsetY, highOffsetY, dstBounds) ; | 218 boxBlurX(d, h, t, kernelSizeY, highOffsetY, lowOffsetY, h, w); |
| 219 boxBlurXY(t, h, d, kernelSizeY3, highOffsetY, highOffsetY, h, w); | |
| 220 #else | |
| 221 boxBlurX(s, sw, t, kernelSizeX, lowOffsetX, highOffsetX, w, h); | |
| 222 boxBlurY(t, w, d, kernelSizeY, lowOffsetY, highOffsetY, h, w); | |
| 223 boxBlurX(d, w, t, kernelSizeX, highOffsetX, lowOffsetX, w, h); | |
| 224 boxBlurY(t, w, d, kernelSizeY, highOffsetY, lowOffsetY, h, w); | |
| 225 boxBlurX(d, w, t, kernelSizeX3, highOffsetX, highOffsetX, w, h); | |
| 226 boxBlurY(t, w, d, kernelSizeY3, highOffsetY, highOffsetY, h, w); | |
| 227 #endif | |
| 223 } else if (kernelSizeX > 0) { | 228 } else if (kernelSizeX > 0) { |
| 224 boxBlurX(src, dst, kernelSizeX, lowOffsetX, highOffsetX, srcBounds) ; | 229 boxBlurX(s, sw, d, kernelSizeX, lowOffsetX, highOffsetX, w, h); |
| 225 boxBlurX(*dst, &temp, kernelSizeX, highOffsetX, lowOffsetX, dstBounds); | 230 boxBlurX(d, w, t, kernelSizeX, highOffsetX, lowOffsetX, w, h); |
| 226 boxBlurX(temp, dst, kernelSizeX3, highOffsetX, highOffsetX, dstBounds) ; | 231 boxBlurX(t, w, d, kernelSizeX3, highOffsetX, highOffsetX, w, h); |
| 227 } else if (kernelSizeY > 0) { | 232 } else if (kernelSizeY > 0) { |
| 228 boxBlurY(src, dst, kernelSizeY, lowOffsetY, highOffsetY, srcBounds) ; | 233 boxBlurY(s, sw, d, kernelSizeY, lowOffsetY, highOffsetY, h, w); |
| 229 boxBlurY(*dst, &temp, kernelSizeY, highOffsetY, lowOffsetY, dstBounds); | 234 boxBlurY(d, w, t, kernelSizeY, highOffsetY, lowOffsetY, h, w); |
| 230 boxBlurY(temp, dst, kernelSizeY3, highOffsetY, highOffsetY, dstBounds) ; | 235 boxBlurY(t, w, d, kernelSizeY3, highOffsetY, highOffsetY, h, w); |
| 231 } | 236 } |
| 232 offset->fX += srcBounds.fLeft; | 237 offset->fX += srcBounds.fLeft; |
| 233 offset->fY += srcBounds.fTop; | 238 offset->fY += srcBounds.fTop; |
| 234 return true; | 239 return true; |
| 235 } | 240 } |
| 236 | 241 |
| 237 bool SkBlurImageFilter::filterImageGPU(Proxy* proxy, const SkBitmap& src, const SkMatrix& ctm, | 242 bool SkBlurImageFilter::filterImageGPU(Proxy* proxy, const SkBitmap& src, const SkMatrix& ctm, |
| 238 SkBitmap* result, SkIPoint* offset) { | 243 SkBitmap* result, SkIPoint* offset) { |
| 239 #if SK_SUPPORT_GPU | 244 #if SK_SUPPORT_GPU |
| 240 SkBitmap input; | 245 SkBitmap input; |
| (...skipping 14 matching lines...) Expand all Loading... | |
| 255 fSigma.width(), | 260 fSigma.width(), |
| 256 fSigma.height())); | 261 fSigma.height())); |
| 257 offset->fX += rect.fLeft; | 262 offset->fX += rect.fLeft; |
| 258 offset->fY += rect.fTop; | 263 offset->fY += rect.fTop; |
| 259 return SkImageFilterUtils::WrapTexture(tex, rect.width(), rect.height(), res ult); | 264 return SkImageFilterUtils::WrapTexture(tex, rect.width(), rect.height(), res ult); |
| 260 #else | 265 #else |
| 261 SkDEBUGFAIL("Should not call in GPU-less build"); | 266 SkDEBUGFAIL("Should not call in GPU-less build"); |
| 262 return false; | 267 return false; |
| 263 #endif | 268 #endif |
| 264 } | 269 } |
| OLD | NEW |