OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2011 The Android Open Source Project | 2 * Copyright 2011 The Android Open Source Project |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #include "SkBitmap.h" | 8 #include "SkBitmap.h" |
9 #include "SkBlurImageFilter.h" | 9 #include "SkBlurImageFilter.h" |
10 #include "SkColorPriv.h" | 10 #include "SkColorPriv.h" |
| 11 #include "SkGpuBlurUtils.h" |
| 12 #include "SkOpts.h" |
11 #include "SkReadBuffer.h" | 13 #include "SkReadBuffer.h" |
12 #include "SkWriteBuffer.h" | 14 #include "SkWriteBuffer.h" |
13 #include "SkGpuBlurUtils.h" | |
14 #include "SkBlurImage_opts.h" | |
15 #if SK_SUPPORT_GPU | 15 #if SK_SUPPORT_GPU |
16 #include "GrContext.h" | 16 #include "GrContext.h" |
17 #endif | 17 #endif |
18 | 18 |
19 // This rather arbitrary-looking value results in a maximum box blur kernel size | 19 // This rather arbitrary-looking value results in a maximum box blur kernel size |
20 // of 1000 pixels on the raster path, which matches the WebKit and Firefox | 20 // of 1000 pixels on the raster path, which matches the WebKit and Firefox |
21 // implementations. Since the GPU path does not compute a box blur, putting | 21 // implementations. Since the GPU path does not compute a box blur, putting |
22 // the limit on sigma ensures consistent behaviour between the GPU and | 22 // the limit on sigma ensures consistent behaviour between the GPU and |
23 // raster paths. | 23 // raster paths. |
24 #define MAX_SIGMA SkIntToScalar(532) | 24 #define MAX_SIGMA SkIntToScalar(532) |
(...skipping 19 matching lines...) Expand all Loading... |
44 SkScalar sigmaY = buffer.readScalar(); | 44 SkScalar sigmaY = buffer.readScalar(); |
45 return Create(sigmaX, sigmaY, common.getInput(0), &common.cropRect()); | 45 return Create(sigmaX, sigmaY, common.getInput(0), &common.cropRect()); |
46 } | 46 } |
47 | 47 |
48 void SkBlurImageFilter::flatten(SkWriteBuffer& buffer) const { | 48 void SkBlurImageFilter::flatten(SkWriteBuffer& buffer) const { |
49 this->INHERITED::flatten(buffer); | 49 this->INHERITED::flatten(buffer); |
50 buffer.writeScalar(fSigma.fWidth); | 50 buffer.writeScalar(fSigma.fWidth); |
51 buffer.writeScalar(fSigma.fHeight); | 51 buffer.writeScalar(fSigma.fHeight); |
52 } | 52 } |
53 | 53 |
54 enum BlurDirection { | |
55 kX, kY | |
56 }; | |
57 | |
58 /** | |
59 * | |
60 * In order to make memory accesses cache-friendly, we reorder the passes to | |
61 * use contiguous memory reads wherever possible. | |
62 * | |
63 * For example, the 6 passes of the X-and-Y blur case are rewritten as | |
64 * follows. Instead of 3 passes in X and 3 passes in Y, we perform | |
65 * 2 passes in X, 1 pass in X transposed to Y on write, 2 passes in X, | |
66 * then 1 pass in X transposed to Y on write. | |
67 * | |
68 * +----+ +----+ +----+ +---+ +---+ +---+
+----+ | |
69 * + AB + ----> | AB | ----> | AB | -----> | A | ----> | A | ----> | A | ----->
| AB | | |
70 * +----+ blurX +----+ blurX +----+ blurXY | B | blurX | B | blurX | B | blurXY
+----+ | |
71 * +---+ +---+ +---+ | |
72 * | |
73 * In this way, two of the y-blurs become x-blurs applied to transposed | |
74 * images, and all memory reads are contiguous. | |
75 */ | |
76 | |
77 template<BlurDirection srcDirection, BlurDirection dstDirection> | |
78 static void boxBlur(const SkPMColor* src, int srcStride, SkPMColor* dst, int ker
nelSize, | |
79 int leftOffset, int rightOffset, int width, int height) | |
80 { | |
81 int rightBorder = SkMin32(rightOffset + 1, width); | |
82 int srcStrideX = srcDirection == kX ? 1 : srcStride; | |
83 int dstStrideX = dstDirection == kX ? 1 : height; | |
84 int srcStrideY = srcDirection == kX ? srcStride : 1; | |
85 int dstStrideY = dstDirection == kX ? width : 1; | |
86 uint32_t scale = (1 << 24) / kernelSize; | |
87 uint32_t half = 1 << 23; | |
88 for (int y = 0; y < height; ++y) { | |
89 int sumA = 0, sumR = 0, sumG = 0, sumB = 0; | |
90 const SkPMColor* p = src; | |
91 for (int i = 0; i < rightBorder; ++i) { | |
92 sumA += SkGetPackedA32(*p); | |
93 sumR += SkGetPackedR32(*p); | |
94 sumG += SkGetPackedG32(*p); | |
95 sumB += SkGetPackedB32(*p); | |
96 p += srcStrideX; | |
97 } | |
98 | |
99 const SkPMColor* sptr = src; | |
100 SkColor* dptr = dst; | |
101 for (int x = 0; x < width; ++x) { | |
102 *dptr = SkPackARGB32((sumA * scale + half) >> 24, | |
103 (sumR * scale + half) >> 24, | |
104 (sumG * scale + half) >> 24, | |
105 (sumB * scale + half) >> 24); | |
106 if (x >= leftOffset) { | |
107 SkColor l = *(sptr - leftOffset * srcStrideX); | |
108 sumA -= SkGetPackedA32(l); | |
109 sumR -= SkGetPackedR32(l); | |
110 sumG -= SkGetPackedG32(l); | |
111 sumB -= SkGetPackedB32(l); | |
112 } | |
113 if (x + rightOffset + 1 < width) { | |
114 SkColor r = *(sptr + (rightOffset + 1) * srcStrideX); | |
115 sumA += SkGetPackedA32(r); | |
116 sumR += SkGetPackedR32(r); | |
117 sumG += SkGetPackedG32(r); | |
118 sumB += SkGetPackedB32(r); | |
119 } | |
120 sptr += srcStrideX; | |
121 if (srcDirection == kY) { | |
122 SK_PREFETCH(sptr + (rightOffset + 1) * srcStrideX); | |
123 } | |
124 dptr += dstStrideX; | |
125 } | |
126 src += srcStrideY; | |
127 dst += dstStrideY; | |
128 } | |
129 } | |
130 | |
131 static void getBox3Params(SkScalar s, int *kernelSize, int* kernelSize3, int *lo
wOffset, | 54 static void getBox3Params(SkScalar s, int *kernelSize, int* kernelSize3, int *lo
wOffset, |
132 int *highOffset) | 55 int *highOffset) |
133 { | 56 { |
134 float pi = SkScalarToFloat(SK_ScalarPI); | 57 float pi = SkScalarToFloat(SK_ScalarPI); |
135 int d = static_cast<int>(floorf(SkScalarToFloat(s) * 3.0f * sqrtf(2.0f * pi)
/ 4.0f + 0.5f)); | 58 int d = static_cast<int>(floorf(SkScalarToFloat(s) * 3.0f * sqrtf(2.0f * pi)
/ 4.0f + 0.5f)); |
136 *kernelSize = d; | 59 *kernelSize = d; |
137 if (d % 2 == 1) { | 60 if (d % 2 == 1) { |
138 *lowOffset = *highOffset = (d - 1) / 2; | 61 *lowOffset = *highOffset = (d - 1) / 2; |
139 *kernelSize3 = d; | 62 *kernelSize3 = d; |
140 } else { | 63 } else { |
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
197 } | 120 } |
198 | 121 |
199 offset->fX = srcBounds.fLeft; | 122 offset->fX = srcBounds.fLeft; |
200 offset->fY = srcBounds.fTop; | 123 offset->fY = srcBounds.fTop; |
201 srcBounds.offset(-srcOffset); | 124 srcBounds.offset(-srcOffset); |
202 const SkPMColor* s = src.getAddr32(srcBounds.left(), srcBounds.top()); | 125 const SkPMColor* s = src.getAddr32(srcBounds.left(), srcBounds.top()); |
203 SkPMColor* t = temp.getAddr32(0, 0); | 126 SkPMColor* t = temp.getAddr32(0, 0); |
204 SkPMColor* d = dst->getAddr32(0, 0); | 127 SkPMColor* d = dst->getAddr32(0, 0); |
205 int w = dstBounds.width(), h = dstBounds.height(); | 128 int w = dstBounds.width(), h = dstBounds.height(); |
206 int sw = src.rowBytesAsPixels(); | 129 int sw = src.rowBytesAsPixels(); |
207 SkBoxBlurProc boxBlurX, boxBlurXY, boxBlurYX; | |
208 if (!SkBoxBlurGetPlatformProcs(&boxBlurX, &boxBlurXY, &boxBlurYX)) { | |
209 boxBlurX = boxBlur<kX, kX>; | |
210 boxBlurXY = boxBlur<kX, kY>; | |
211 boxBlurYX = boxBlur<kY, kX>; | |
212 } | |
213 | 130 |
| 131 /** |
| 132 * |
| 133 * In order to make memory accesses cache-friendly, we reorder the passes to |
| 134 * use contiguous memory reads wherever possible. |
| 135 * |
| 136 * For example, the 6 passes of the X-and-Y blur case are rewritten as |
| 137 * follows. Instead of 3 passes in X and 3 passes in Y, we perform |
| 138 * 2 passes in X, 1 pass in X transposed to Y on write, 2 passes in X, |
| 139 * then 1 pass in X transposed to Y on write. |
| 140 * |
| 141 * +----+ +----+ +----+ +---+ +---+ +---+
+----+ |
| 142 * + AB + ----> | AB | ----> | AB | -----> | A | ----> | A | ----> | A | ---
--> | AB | |
| 143 * +----+ blurX +----+ blurX +----+ blurXY | B | blurX | B | blurX | B | blu
rXY +----+ |
| 144 * +---+ +---+ +---+ |
| 145 * |
| 146 * In this way, two of the y-blurs become x-blurs applied to transposed |
| 147 * images, and all memory reads are contiguous. |
| 148 */ |
214 if (kernelSizeX > 0 && kernelSizeY > 0) { | 149 if (kernelSizeX > 0 && kernelSizeY > 0) { |
215 boxBlurX(s, sw, t, kernelSizeX, lowOffsetX, highOffsetX, w, h); | 150 SkOpts::box_blur_xx(s, sw, t, kernelSizeX, lowOffsetX, highOffsetX, w
, h); |
216 boxBlurX(t, w, d, kernelSizeX, highOffsetX, lowOffsetX, w, h); | 151 SkOpts::box_blur_xx(t, w, d, kernelSizeX, highOffsetX, lowOffsetX, w
, h); |
217 boxBlurXY(d, w, t, kernelSizeX3, highOffsetX, highOffsetX, w, h); | 152 SkOpts::box_blur_xy(d, w, t, kernelSizeX3, highOffsetX, highOffsetX, w
, h); |
218 boxBlurX(t, h, d, kernelSizeY, lowOffsetY, highOffsetY, h, w); | 153 SkOpts::box_blur_xx(t, h, d, kernelSizeY, lowOffsetY, highOffsetY, h
, w); |
219 boxBlurX(d, h, t, kernelSizeY, highOffsetY, lowOffsetY, h, w); | 154 SkOpts::box_blur_xx(d, h, t, kernelSizeY, highOffsetY, lowOffsetY, h
, w); |
220 boxBlurXY(t, h, d, kernelSizeY3, highOffsetY, highOffsetY, h, w); | 155 SkOpts::box_blur_xy(t, h, d, kernelSizeY3, highOffsetY, highOffsetY, h
, w); |
221 } else if (kernelSizeX > 0) { | 156 } else if (kernelSizeX > 0) { |
222 boxBlurX(s, sw, d, kernelSizeX, lowOffsetX, highOffsetX, w, h); | 157 SkOpts::box_blur_xx(s, sw, d, kernelSizeX, lowOffsetX, highOffsetX, w
, h); |
223 boxBlurX(d, w, t, kernelSizeX, highOffsetX, lowOffsetX, w, h); | 158 SkOpts::box_blur_xx(d, w, t, kernelSizeX, highOffsetX, lowOffsetX, w
, h); |
224 boxBlurX(t, w, d, kernelSizeX3, highOffsetX, highOffsetX, w, h); | 159 SkOpts::box_blur_xx(t, w, d, kernelSizeX3, highOffsetX, highOffsetX, w
, h); |
225 } else if (kernelSizeY > 0) { | 160 } else if (kernelSizeY > 0) { |
226 boxBlurYX(s, sw, d, kernelSizeY, lowOffsetY, highOffsetY, h, w); | 161 SkOpts::box_blur_yx(s, sw, d, kernelSizeY, lowOffsetY, highOffsetY, h
, w); |
227 boxBlurX(d, h, t, kernelSizeY, highOffsetY, lowOffsetY, h, w); | 162 SkOpts::box_blur_xx(d, h, t, kernelSizeY, highOffsetY, lowOffsetY, h
, w); |
228 boxBlurXY(t, h, d, kernelSizeY3, highOffsetY, highOffsetY, h, w); | 163 SkOpts::box_blur_xy(t, h, d, kernelSizeY3, highOffsetY, highOffsetY, h
, w); |
229 } | 164 } |
230 return true; | 165 return true; |
231 } | 166 } |
232 | 167 |
233 | 168 |
234 void SkBlurImageFilter::computeFastBounds(const SkRect& src, SkRect* dst) const
{ | 169 void SkBlurImageFilter::computeFastBounds(const SkRect& src, SkRect* dst) const
{ |
235 if (this->getInput(0)) { | 170 if (this->getInput(0)) { |
236 this->getInput(0)->computeFastBounds(src, dst); | 171 this->getInput(0)->computeFastBounds(src, dst); |
237 } else { | 172 } else { |
238 *dst = src; | 173 *dst = src; |
(...skipping 14 matching lines...) Expand all Loading... |
253 } | 188 } |
254 *dst = bounds; | 189 *dst = bounds; |
255 return true; | 190 return true; |
256 } | 191 } |
257 | 192 |
258 bool SkBlurImageFilter::filterImageGPU(Proxy* proxy, const SkBitmap& src, const
Context& ctx, | 193 bool SkBlurImageFilter::filterImageGPU(Proxy* proxy, const SkBitmap& src, const
Context& ctx, |
259 SkBitmap* result, SkIPoint* offset) const
{ | 194 SkBitmap* result, SkIPoint* offset) const
{ |
260 #if SK_SUPPORT_GPU | 195 #if SK_SUPPORT_GPU |
261 SkBitmap input = src; | 196 SkBitmap input = src; |
262 SkIPoint srcOffset = SkIPoint::Make(0, 0); | 197 SkIPoint srcOffset = SkIPoint::Make(0, 0); |
263 if (this->getInput(0) && | 198 if (this->getInput(0) && |
264 !this->getInput(0)->getInputResultGPU(proxy, src, ctx, &input, &srcOffse
t)) { | 199 !this->getInput(0)->getInputResultGPU(proxy, src, ctx, &input, &srcOffse
t)) { |
265 return false; | 200 return false; |
266 } | 201 } |
267 SkIRect rect; | 202 SkIRect rect; |
268 if (!this->applyCropRect(ctx, proxy, input, &srcOffset, &rect, &input)) { | 203 if (!this->applyCropRect(ctx, proxy, input, &srcOffset, &rect, &input)) { |
269 return false; | 204 return false; |
270 } | 205 } |
271 GrTexture* source = input.getTexture(); | 206 GrTexture* source = input.getTexture(); |
272 SkVector sigma = mapSigma(fSigma, ctx.ctm()); | 207 SkVector sigma = mapSigma(fSigma, ctx.ctm()); |
273 offset->fX = rect.fLeft; | 208 offset->fX = rect.fLeft; |
(...skipping 22 matching lines...) Expand all Loading... |
296 str->appendf("SkBlurImageFilter: ("); | 231 str->appendf("SkBlurImageFilter: ("); |
297 str->appendf("sigma: (%f, %f) input (", fSigma.fWidth, fSigma.fHeight); | 232 str->appendf("sigma: (%f, %f) input (", fSigma.fWidth, fSigma.fHeight); |
298 | 233 |
299 if (this->getInput(0)) { | 234 if (this->getInput(0)) { |
300 this->getInput(0)->toString(str); | 235 this->getInput(0)->toString(str); |
301 } | 236 } |
302 | 237 |
303 str->append("))"); | 238 str->append("))"); |
304 } | 239 } |
305 #endif | 240 #endif |
OLD | NEW |