OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2011 The Android Open Source Project | 2 * Copyright 2011 The Android Open Source Project |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #include "SkBitmap.h" | 8 #include "SkBitmap.h" |
9 #include "SkBlurImageFilter.h" | 9 #include "SkBlurImageFilter.h" |
10 #include "SkColorPriv.h" | 10 #include "SkColorPriv.h" |
(...skipping 21 matching lines...) Expand all Loading... |
32 : INHERITED(input, cropRect), fSigma(SkSize::Make(sigmaX, sigmaY)) { | 32 : INHERITED(input, cropRect), fSigma(SkSize::Make(sigmaX, sigmaY)) { |
33 SkASSERT(sigmaX >= 0 && sigmaY >= 0); | 33 SkASSERT(sigmaX >= 0 && sigmaY >= 0); |
34 } | 34 } |
35 | 35 |
36 void SkBlurImageFilter::flatten(SkFlattenableWriteBuffer& buffer) const { | 36 void SkBlurImageFilter::flatten(SkFlattenableWriteBuffer& buffer) const { |
37 this->INHERITED::flatten(buffer); | 37 this->INHERITED::flatten(buffer); |
38 buffer.writeScalar(fSigma.fWidth); | 38 buffer.writeScalar(fSigma.fWidth); |
39 buffer.writeScalar(fSigma.fHeight); | 39 buffer.writeScalar(fSigma.fHeight); |
40 } | 40 } |
41 | 41 |
42 static void boxBlurX(const SkBitmap& src, SkBitmap* dst, int kernelSize, | 42 enum BlurDirection { |
43 int leftOffset, int rightOffset, const SkIRect& bounds) | 43 kX, kY |
| 44 }; |
| 45 |
| 46 /** |
| 47 * |
| 48 * In order to make memory accesses cache-friendly, we reorder the passes to |
| 49 * use contiguous memory reads wherever possible. |
| 50 * |
| 51 * For example, the 6 passes of the X-and-Y blur case are rewritten as |
| 52 * follows. Instead of 3 passes in X and 3 passes in Y, we perform |
| 53 * 2 passes in X, 1 pass in X transposed to Y on write, 2 passes in X, |
| 54 * then 1 pass in X transposed to Y on write. |
| 55 * |
| 56 * +----+ +----+ +----+ +---+ +---+ +---+
+----+ |
| 57 * + AB + ----> | AB | ----> | AB | -----> | A | ----> | A | ----> | A | ----->
| AB | |
| 58 * +----+ blurX +----+ blurX +----+ blurXY | B | blurX | B | blurX | B | blurXY
+----+ |
| 59 * +---+ +---+ +---+ |
| 60 * |
| 61 * In this way, two of the y-blurs become x-blurs applied to transposed |
| 62 * images, and all memory reads are contiguous. |
| 63 */ |
| 64 |
| 65 template<BlurDirection srcDirection, BlurDirection dstDirection> |
| 66 static void boxBlur(const SkPMColor* src, int srcStride, SkPMColor* dst, int ker
nelSize, |
| 67 int leftOffset, int rightOffset, int width, int height) |
44 { | 68 { |
45 int width = bounds.width(), height = bounds.height(); | |
46 int rightBorder = SkMin32(rightOffset + 1, width); | 69 int rightBorder = SkMin32(rightOffset + 1, width); |
| 70 int srcStrideX = srcDirection == kX ? 1 : srcStride; |
| 71 int dstStrideX = dstDirection == kX ? 1 : height; |
| 72 int srcStrideY = srcDirection == kX ? srcStride : 1; |
| 73 int dstStrideY = dstDirection == kX ? width : 1; |
47 #ifndef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION | 74 #ifndef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION |
48 uint32_t scale = (1 << 24) / kernelSize; | 75 uint32_t scale = (1 << 24) / kernelSize; |
49 uint32_t half = 1 << 23; | 76 uint32_t half = 1 << 23; |
50 #endif | 77 #endif |
51 for (int y = 0; y < height; ++y) { | 78 for (int y = 0; y < height; ++y) { |
52 int sumA = 0, sumR = 0, sumG = 0, sumB = 0; | 79 int sumA = 0, sumR = 0, sumG = 0, sumB = 0; |
53 SkPMColor* p = src.getAddr32(bounds.fLeft, y + bounds.fTop); | 80 const SkPMColor* p = src; |
54 for (int i = 0; i < rightBorder; ++i) { | 81 for (int i = 0; i < rightBorder; ++i) { |
55 sumA += SkGetPackedA32(*p); | 82 sumA += SkGetPackedA32(*p); |
56 sumR += SkGetPackedR32(*p); | 83 sumR += SkGetPackedR32(*p); |
57 sumG += SkGetPackedG32(*p); | 84 sumG += SkGetPackedG32(*p); |
58 sumB += SkGetPackedB32(*p); | 85 sumB += SkGetPackedB32(*p); |
59 p++; | 86 p += srcStrideX; |
60 } | 87 } |
61 | 88 |
62 const SkColor* sptr = src.getAddr32(bounds.fLeft, bounds.fTop + y); | 89 const SkPMColor* sptr = src; |
63 SkColor* dptr = dst->getAddr32(0, y); | 90 SkColor* dptr = dst; |
64 for (int x = 0; x < width; ++x) { | 91 for (int x = 0; x < width; ++x) { |
65 #ifndef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION | 92 #ifndef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION |
66 *dptr = SkPackARGB32((sumA * scale + half) >> 24, | 93 *dptr = SkPackARGB32((sumA * scale + half) >> 24, |
67 (sumR * scale + half) >> 24, | 94 (sumR * scale + half) >> 24, |
68 (sumG * scale + half) >> 24, | 95 (sumG * scale + half) >> 24, |
69 (sumB * scale + half) >> 24); | 96 (sumB * scale + half) >> 24); |
70 #else | 97 #else |
71 *dptr = SkPackARGB32(sumA / kernelSize, | 98 *dptr = SkPackARGB32(sumA / kernelSize, |
72 sumR / kernelSize, | 99 sumR / kernelSize, |
73 sumG / kernelSize, | 100 sumG / kernelSize, |
74 sumB / kernelSize); | 101 sumB / kernelSize); |
75 #endif | 102 #endif |
76 if (x >= leftOffset) { | 103 if (x >= leftOffset) { |
77 SkColor l = *(sptr - leftOffset); | 104 SkColor l = *(sptr - leftOffset * srcStrideX); |
78 sumA -= SkGetPackedA32(l); | 105 sumA -= SkGetPackedA32(l); |
79 sumR -= SkGetPackedR32(l); | 106 sumR -= SkGetPackedR32(l); |
80 sumG -= SkGetPackedG32(l); | 107 sumG -= SkGetPackedG32(l); |
81 sumB -= SkGetPackedB32(l); | 108 sumB -= SkGetPackedB32(l); |
82 } | 109 } |
83 if (x + rightOffset + 1 < width) { | 110 if (x + rightOffset + 1 < width) { |
84 SkColor r = *(sptr + rightOffset + 1); | 111 SkColor r = *(sptr + (rightOffset + 1) * srcStrideX); |
85 sumA += SkGetPackedA32(r); | 112 sumA += SkGetPackedA32(r); |
86 sumR += SkGetPackedR32(r); | 113 sumR += SkGetPackedR32(r); |
87 sumG += SkGetPackedG32(r); | 114 sumG += SkGetPackedG32(r); |
88 sumB += SkGetPackedB32(r); | 115 sumB += SkGetPackedB32(r); |
89 } | 116 } |
90 sptr++; | 117 sptr += srcStrideX; |
91 dptr++; | 118 if (srcDirection == kY) { |
| 119 SK_PREFETCH(sptr + (rightOffset + 1) * srcStrideX); |
| 120 } |
| 121 dptr += dstStrideX; |
92 } | 122 } |
| 123 src += srcStrideY; |
| 124 dst += dstStrideY; |
93 } | 125 } |
94 } | 126 } |
95 | 127 |
96 static void boxBlurY(const SkBitmap& src, SkBitmap* dst, int kernelSize, | 128 static void boxBlurX(const SkPMColor* src, int srcStride, SkPMColor* dst, int ke
rnelSize, |
97 int topOffset, int bottomOffset, const SkIRect& bounds) | 129 int leftOffset, int rightOffset, int width, int height) |
98 { | 130 { |
99 int width = bounds.width(), height = bounds.height(); | 131 boxBlur<kX, kX>(src, srcStride, dst, kernelSize, leftOffset, rightOffset, wi
dth, height); |
100 int bottomBorder = SkMin32(bottomOffset + 1, height); | 132 } |
101 int srcStride = src.rowBytesAsPixels(); | 133 |
102 int dstStride = dst->rowBytesAsPixels(); | |
103 #ifndef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION | 134 #ifndef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION |
104 uint32_t scale = (1 << 24) / kernelSize; | 135 static void boxBlurXY(const SkPMColor* src, int srcStride, SkPMColor* dst, int k
ernelSize, |
105 uint32_t half = 1 << 23; | 136 int leftOffset, int rightOffset, int width, int he
ight) |
| 137 { |
| 138 boxBlur<kX, kY>(src, srcStride, dst, kernelSize, leftOffset, rightOffset, wi
dth, height); |
| 139 } |
106 #endif | 140 #endif |
107 for (int x = 0; x < width; ++x) { | |
108 int sumA = 0, sumR = 0, sumG = 0, sumB = 0; | |
109 SkColor* p = src.getAddr32(bounds.fLeft + x, bounds.fTop); | |
110 for (int i = 0; i < bottomBorder; ++i) { | |
111 sumA += SkGetPackedA32(*p); | |
112 sumR += SkGetPackedR32(*p); | |
113 sumG += SkGetPackedG32(*p); | |
114 sumB += SkGetPackedB32(*p); | |
115 p += srcStride; | |
116 } | |
117 | 141 |
118 const SkColor* sptr = src.getAddr32(bounds.fLeft + x, bounds.fTop); | 142 static void boxBlurY(const SkPMColor* src, int srcStride, SkPMColor* dst, int ke
rnelSize, |
119 SkColor* dptr = dst->getAddr32(x, 0); | 143 int topOffset, int bottomOffset, int width, int height) |
120 for (int y = 0; y < height; ++y) { | 144 { |
121 #ifndef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION | 145 boxBlur<kY, kY>(src, srcStride, dst, kernelSize, topOffset, bottomOffset, wi
dth, height); |
122 *dptr = SkPackARGB32((sumA * scale + half) >> 24, | |
123 (sumR * scale + half) >> 24, | |
124 (sumG * scale + half) >> 24, | |
125 (sumB * scale + half) >> 24); | |
126 #else | |
127 *dptr = SkPackARGB32(sumA / kernelSize, | |
128 sumR / kernelSize, | |
129 sumG / kernelSize, | |
130 sumB / kernelSize); | |
131 #endif | |
132 if (y >= topOffset) { | |
133 SkColor l = *(sptr - topOffset * srcStride); | |
134 sumA -= SkGetPackedA32(l); | |
135 sumR -= SkGetPackedR32(l); | |
136 sumG -= SkGetPackedG32(l); | |
137 sumB -= SkGetPackedB32(l); | |
138 } | |
139 if (y + bottomOffset + 1 < height) { | |
140 SkColor r = *(sptr + (bottomOffset + 1) * srcStride); | |
141 sumA += SkGetPackedA32(r); | |
142 sumR += SkGetPackedR32(r); | |
143 sumG += SkGetPackedG32(r); | |
144 sumB += SkGetPackedB32(r); | |
145 } | |
146 sptr += srcStride; | |
147 // The next leading pixel seems to be too hard to predict. Hint the
fetch. | |
148 SK_PREFETCH(sptr + (bottomOffset + 1) * srcStride); | |
149 dptr += dstStride; | |
150 } | |
151 } | |
152 } | 146 } |
153 | 147 |
154 static void getBox3Params(SkScalar s, int *kernelSize, int* kernelSize3, int *lo
wOffset, | 148 static void getBox3Params(SkScalar s, int *kernelSize, int* kernelSize3, int *lo
wOffset, |
155 int *highOffset) | 149 int *highOffset) |
156 { | 150 { |
157 float pi = SkScalarToFloat(SK_ScalarPI); | 151 float pi = SkScalarToFloat(SK_ScalarPI); |
158 int d = static_cast<int>(floorf(SkScalarToFloat(s) * 3.0f * sqrtf(2.0f * pi)
/ 4.0f + 0.5f)); | 152 int d = static_cast<int>(floorf(SkScalarToFloat(s) * 3.0f * sqrtf(2.0f * pi)
/ 4.0f + 0.5f)); |
159 *kernelSize = d; | 153 *kernelSize = d; |
160 if (d % 2 == 1) { | 154 if (d % 2 == 1) { |
161 *lowOffset = *highOffset = (d - 1) / 2; | 155 *lowOffset = *highOffset = (d - 1) / 2; |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
206 src.copyTo(dst, dst->config()); | 200 src.copyTo(dst, dst->config()); |
207 return true; | 201 return true; |
208 } | 202 } |
209 | 203 |
210 SkBitmap temp; | 204 SkBitmap temp; |
211 temp.setConfig(dst->config(), dst->width(), dst->height()); | 205 temp.setConfig(dst->config(), dst->width(), dst->height()); |
212 if (!temp.allocPixels()) { | 206 if (!temp.allocPixels()) { |
213 return false; | 207 return false; |
214 } | 208 } |
215 | 209 |
| 210 const SkPMColor* s = src.getAddr32(srcBounds.left(), srcBounds.top()); |
| 211 SkPMColor* t = temp.getAddr32(0, 0); |
| 212 SkPMColor* d = dst->getAddr32(0, 0); |
| 213 int w = dstBounds.width(), h = dstBounds.height(); |
| 214 int sw = src.rowBytesAsPixels(); |
216 if (kernelSizeX > 0 && kernelSizeY > 0) { | 215 if (kernelSizeX > 0 && kernelSizeY > 0) { |
217 boxBlurX(src, &temp, kernelSizeX, lowOffsetX, highOffsetX, srcBounds)
; | 216 #ifndef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION |
218 boxBlurY(temp, dst, kernelSizeY, lowOffsetY, highOffsetY, dstBounds)
; | 217 boxBlurX(s, sw, t, kernelSizeX, lowOffsetX, highOffsetX, w, h); |
219 boxBlurX(*dst, &temp, kernelSizeX, highOffsetX, lowOffsetX, dstBounds); | 218 boxBlurX(t, w, d, kernelSizeX, highOffsetX, lowOffsetX, w, h); |
220 boxBlurY(temp, dst, kernelSizeY, highOffsetY, lowOffsetY, dstBounds); | 219 boxBlurXY(d, w, t, kernelSizeX3, highOffsetX, highOffsetX, w, h); |
221 boxBlurX(*dst, &temp, kernelSizeX3, highOffsetX, highOffsetX, dstBounds)
; | 220 boxBlurX(t, h, d, kernelSizeY, lowOffsetY, highOffsetY, h, w); |
222 boxBlurY(temp, dst, kernelSizeY3, highOffsetY, highOffsetY, dstBounds)
; | 221 boxBlurX(d, h, t, kernelSizeY, highOffsetY, lowOffsetY, h, w); |
| 222 boxBlurXY(t, h, d, kernelSizeY3, highOffsetY, highOffsetY, h, w); |
| 223 #else |
| 224 boxBlurX(s, sw, t, kernelSizeX, lowOffsetX, highOffsetX, w, h); |
| 225 boxBlurY(t, w, d, kernelSizeY, lowOffsetY, highOffsetY, h, w); |
| 226 boxBlurX(d, w, t, kernelSizeX, highOffsetX, lowOffsetX, w, h); |
| 227 boxBlurY(t, w, d, kernelSizeY, highOffsetY, lowOffsetY, h, w); |
| 228 boxBlurX(d, w, t, kernelSizeX3, highOffsetX, highOffsetX, w, h); |
| 229 boxBlurY(t, w, d, kernelSizeY3, highOffsetY, highOffsetY, h, w); |
| 230 #endif |
223 } else if (kernelSizeX > 0) { | 231 } else if (kernelSizeX > 0) { |
224 boxBlurX(src, dst, kernelSizeX, lowOffsetX, highOffsetX, srcBounds)
; | 232 boxBlurX(s, sw, d, kernelSizeX, lowOffsetX, highOffsetX, w, h); |
225 boxBlurX(*dst, &temp, kernelSizeX, highOffsetX, lowOffsetX, dstBounds); | 233 boxBlurX(d, w, t, kernelSizeX, highOffsetX, lowOffsetX, w, h); |
226 boxBlurX(temp, dst, kernelSizeX3, highOffsetX, highOffsetX, dstBounds)
; | 234 boxBlurX(t, w, d, kernelSizeX3, highOffsetX, highOffsetX, w, h); |
227 } else if (kernelSizeY > 0) { | 235 } else if (kernelSizeY > 0) { |
228 boxBlurY(src, dst, kernelSizeY, lowOffsetY, highOffsetY, srcBounds)
; | 236 boxBlurY(s, sw, d, kernelSizeY, lowOffsetY, highOffsetY, h, w); |
229 boxBlurY(*dst, &temp, kernelSizeY, highOffsetY, lowOffsetY, dstBounds); | 237 boxBlurY(d, w, t, kernelSizeY, highOffsetY, lowOffsetY, h, w); |
230 boxBlurY(temp, dst, kernelSizeY3, highOffsetY, highOffsetY, dstBounds)
; | 238 boxBlurY(t, w, d, kernelSizeY3, highOffsetY, highOffsetY, h, w); |
231 } | 239 } |
232 offset->fX += srcBounds.fLeft; | 240 offset->fX += srcBounds.fLeft; |
233 offset->fY += srcBounds.fTop; | 241 offset->fY += srcBounds.fTop; |
234 return true; | 242 return true; |
235 } | 243 } |
236 | 244 |
237 bool SkBlurImageFilter::filterImageGPU(Proxy* proxy, const SkBitmap& src, const
SkMatrix& ctm, | 245 bool SkBlurImageFilter::filterImageGPU(Proxy* proxy, const SkBitmap& src, const
SkMatrix& ctm, |
238 SkBitmap* result, SkIPoint* offset) { | 246 SkBitmap* result, SkIPoint* offset) { |
239 #if SK_SUPPORT_GPU | 247 #if SK_SUPPORT_GPU |
240 SkBitmap input; | 248 SkBitmap input; |
(...skipping 14 matching lines...) Expand all Loading... |
255 fSigma.width(), | 263 fSigma.width(), |
256 fSigma.height())); | 264 fSigma.height())); |
257 offset->fX += rect.fLeft; | 265 offset->fX += rect.fLeft; |
258 offset->fY += rect.fTop; | 266 offset->fY += rect.fTop; |
259 return SkImageFilterUtils::WrapTexture(tex, rect.width(), rect.height(), res
ult); | 267 return SkImageFilterUtils::WrapTexture(tex, rect.width(), rect.height(), res
ult); |
260 #else | 268 #else |
261 SkDEBUGFAIL("Should not call in GPU-less build"); | 269 SkDEBUGFAIL("Should not call in GPU-less build"); |
262 return false; | 270 return false; |
263 #endif | 271 #endif |
264 } | 272 } |
OLD | NEW |