Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1504)

Side by Side Diff: src/effects/SkBlurImageFilter.cpp

Issue 61643011: SSE2 implementation of RGBA box blurs. This yields ~2X perf improvement on (Closed) Base URL: https://skia.googlecode.com/svn/trunk
Patch Set: Add another missing file. Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 * Copyright 2011 The Android Open Source Project 2 * Copyright 2011 The Android Open Source Project
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #include "SkBitmap.h" 8 #include "SkBitmap.h"
9 #include "SkBlurImageFilter.h" 9 #include "SkBlurImageFilter.h"
10 #include "SkColorPriv.h" 10 #include "SkColorPriv.h"
11 #include "SkFlattenableBuffers.h" 11 #include "SkFlattenableBuffers.h"
12 #include "SkGpuBlurUtils.h" 12 #include "SkGpuBlurUtils.h"
13 #include "SkBlurImage_opts.h"
13 #if SK_SUPPORT_GPU 14 #if SK_SUPPORT_GPU
14 #include "GrContext.h" 15 #include "GrContext.h"
15 #include "SkImageFilterUtils.h" 16 #include "SkImageFilterUtils.h"
16 #endif 17 #endif
17 18
18 SkBlurImageFilter::SkBlurImageFilter(SkFlattenableReadBuffer& buffer) 19 SkBlurImageFilter::SkBlurImageFilter(SkFlattenableReadBuffer& buffer)
19 : INHERITED(buffer) { 20 : INHERITED(buffer) {
20 fSigma.fWidth = buffer.readScalar(); 21 fSigma.fWidth = buffer.readScalar();
21 fSigma.fHeight = buffer.readScalar(); 22 fSigma.fHeight = buffer.readScalar();
22 buffer.validate(SkScalarIsFinite(fSigma.fWidth) && 23 buffer.validate(SkScalarIsFinite(fSigma.fWidth) &&
23 SkScalarIsFinite(fSigma.fHeight) && 24 SkScalarIsFinite(fSigma.fHeight) &&
24 (fSigma.fWidth >= 0) && 25 (fSigma.fWidth >= 0) &&
25 (fSigma.fHeight >= 0)); 26 (fSigma.fHeight >= 0));
26 } 27 }
27 28
28 SkBlurImageFilter::SkBlurImageFilter(SkScalar sigmaX, 29 SkBlurImageFilter::SkBlurImageFilter(SkScalar sigmaX,
29 SkScalar sigmaY, 30 SkScalar sigmaY,
30 SkImageFilter* input, 31 SkImageFilter* input,
31 const CropRect* cropRect) 32 const CropRect* cropRect)
32 : INHERITED(input, cropRect), fSigma(SkSize::Make(sigmaX, sigmaY)) { 33 : INHERITED(input, cropRect), fSigma(SkSize::Make(sigmaX, sigmaY)) {
33 SkASSERT(sigmaX >= 0 && sigmaY >= 0); 34 SkASSERT(sigmaX >= 0 && sigmaY >= 0);
34 } 35 }
35 36
36 void SkBlurImageFilter::flatten(SkFlattenableWriteBuffer& buffer) const { 37 void SkBlurImageFilter::flatten(SkFlattenableWriteBuffer& buffer) const {
37 this->INHERITED::flatten(buffer); 38 this->INHERITED::flatten(buffer);
38 buffer.writeScalar(fSigma.fWidth); 39 buffer.writeScalar(fSigma.fWidth);
39 buffer.writeScalar(fSigma.fHeight); 40 buffer.writeScalar(fSigma.fHeight);
40 } 41 }
41 42
42 enum BlurDirection {
43 kX, kY
44 };
45
46 /** 43 /**
47 * 44 *
48 * In order to make memory accesses cache-friendly, we reorder the passes to 45 * In order to make memory accesses cache-friendly, we reorder the passes to
49 * use contiguous memory reads wherever possible. 46 * use contiguous memory reads wherever possible.
50 * 47 *
51 * For example, the 6 passes of the X-and-Y blur case are rewritten as 48 * For example, the 6 passes of the X-and-Y blur case are rewritten as
52 * follows. Instead of 3 passes in X and 3 passes in Y, we perform 49 * follows. Instead of 3 passes in X and 3 passes in Y, we perform
53 * 2 passes in X, 1 pass in X transposed to Y on write, 2 passes in X, 50 * 2 passes in X, 1 pass in X transposed to Y on write, 2 passes in X,
54 * then 1 pass in X transposed to Y on write. 51 * then 1 pass in X transposed to Y on write.
55 * 52 *
56 * +----+ +----+ +----+ +---+ +---+ +---+ +----+ 53 * +----+ +----+ +----+ +---+ +---+ +---+ +----+
57 * + AB + ----> | AB | ----> | AB | -----> | A | ----> | A | ----> | A | -----> | AB | 54 * + AB + ----> | AB | ----> | AB | -----> | A | ----> | A | ----> | A | -----> | AB |
58 * +----+ blurX +----+ blurX +----+ blurXY | B | blurX | B | blurX | B | blurXY +----+ 55 * +----+ blurX +----+ blurX +----+ blurXY | B | blurX | B | blurX | B | blurXY +----+
59 * +---+ +---+ +---+ 56 * +---+ +---+ +---+
60 * 57 *
61 * In this way, two of the y-blurs become x-blurs applied to transposed 58 * In this way, two of the y-blurs become x-blurs applied to transposed
62 * images, and all memory reads are contiguous. 59 * images, and all memory reads are contiguous.
63 */ 60 */
64 61
65 template<BlurDirection srcDirection, BlurDirection dstDirection> 62 template<SkBlurDirection srcDirection, SkBlurDirection dstDirection>
66 static void boxBlur(const SkPMColor* src, int srcStride, SkPMColor* dst, int ker nelSize, 63 static void boxBlur(const SkPMColor* src, int srcStride, SkPMColor* dst, int ker nelSize,
67 int leftOffset, int rightOffset, int width, int height) 64 int leftOffset, int rightOffset, int width, int height)
68 { 65 {
69 int rightBorder = SkMin32(rightOffset + 1, width); 66 int rightBorder = SkMin32(rightOffset + 1, width);
70 int srcStrideX = srcDirection == kX ? 1 : srcStride; 67 int srcStrideX = srcDirection == kX_BlurDirection ? 1 : srcStride;
71 int dstStrideX = dstDirection == kX ? 1 : height; 68 int dstStrideX = dstDirection == kX_BlurDirection ? 1 : height;
72 int srcStrideY = srcDirection == kX ? srcStride : 1; 69 int srcStrideY = srcDirection == kX_BlurDirection ? srcStride : 1;
73 int dstStrideY = dstDirection == kX ? width : 1; 70 int dstStrideY = dstDirection == kX_BlurDirection ? width : 1;
74 #ifndef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION 71 #ifndef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION
75 uint32_t scale = (1 << 24) / kernelSize; 72 uint32_t scale = (1 << 24) / kernelSize;
76 uint32_t half = 1 << 23; 73 uint32_t half = 1 << 23;
77 #endif 74 #endif
78 for (int y = 0; y < height; ++y) { 75 for (int y = 0; y < height; ++y) {
79 int sumA = 0, sumR = 0, sumG = 0, sumB = 0; 76 int sumA = 0, sumR = 0, sumG = 0, sumB = 0;
80 const SkPMColor* p = src; 77 const SkPMColor* p = src;
81 for (int i = 0; i < rightBorder; ++i) { 78 for (int i = 0; i < rightBorder; ++i) {
82 sumA += SkGetPackedA32(*p); 79 sumA += SkGetPackedA32(*p);
83 sumR += SkGetPackedR32(*p); 80 sumR += SkGetPackedR32(*p);
(...skipping 24 matching lines...) Expand all
108 sumB -= SkGetPackedB32(l); 105 sumB -= SkGetPackedB32(l);
109 } 106 }
110 if (x + rightOffset + 1 < width) { 107 if (x + rightOffset + 1 < width) {
111 SkColor r = *(sptr + (rightOffset + 1) * srcStrideX); 108 SkColor r = *(sptr + (rightOffset + 1) * srcStrideX);
112 sumA += SkGetPackedA32(r); 109 sumA += SkGetPackedA32(r);
113 sumR += SkGetPackedR32(r); 110 sumR += SkGetPackedR32(r);
114 sumG += SkGetPackedG32(r); 111 sumG += SkGetPackedG32(r);
115 sumB += SkGetPackedB32(r); 112 sumB += SkGetPackedB32(r);
116 } 113 }
117 sptr += srcStrideX; 114 sptr += srcStrideX;
118 if (srcDirection == kY) { 115 if (srcDirection == kY_BlurDirection) {
119 SK_PREFETCH(sptr + (rightOffset + 1) * srcStrideX); 116 SK_PREFETCH(sptr + (rightOffset + 1) * srcStrideX);
120 } 117 }
121 dptr += dstStrideX; 118 dptr += dstStrideX;
122 } 119 }
123 src += srcStrideY; 120 src += srcStrideY;
124 dst += dstStrideY; 121 dst += dstStrideY;
125 } 122 }
126 } 123 }
127 124
128 static void boxBlurX(const SkPMColor* src, int srcStride, SkPMColor* dst, int ke rnelSize,
129 int leftOffset, int rightOffset, int width, int height)
130 {
131 boxBlur<kX, kX>(src, srcStride, dst, kernelSize, leftOffset, rightOffset, wi dth, height);
132 }
133
134 #ifndef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION
135 static void boxBlurXY(const SkPMColor* src, int srcStride, SkPMColor* dst, int k ernelSize,
136 int leftOffset, int rightOffset, int width, int he ight)
137 {
138 boxBlur<kX, kY>(src, srcStride, dst, kernelSize, leftOffset, rightOffset, wi dth, height);
139 }
140 #endif
141
142 static void boxBlurY(const SkPMColor* src, int srcStride, SkPMColor* dst, int ke rnelSize,
143 int topOffset, int bottomOffset, int width, int height)
144 {
145 boxBlur<kY, kY>(src, srcStride, dst, kernelSize, topOffset, bottomOffset, wi dth, height);
146 }
147
148 static void getBox3Params(SkScalar s, int *kernelSize, int* kernelSize3, int *lo wOffset, 125 static void getBox3Params(SkScalar s, int *kernelSize, int* kernelSize3, int *lo wOffset,
149 int *highOffset) 126 int *highOffset)
150 { 127 {
151 float pi = SkScalarToFloat(SK_ScalarPI); 128 float pi = SkScalarToFloat(SK_ScalarPI);
152 int d = static_cast<int>(floorf(SkScalarToFloat(s) * 3.0f * sqrtf(2.0f * pi) / 4.0f + 0.5f)); 129 int d = static_cast<int>(floorf(SkScalarToFloat(s) * 3.0f * sqrtf(2.0f * pi) / 4.0f + 0.5f));
153 *kernelSize = d; 130 *kernelSize = d;
154 if (d % 2 == 1) { 131 if (d % 2 == 1) {
155 *lowOffset = *highOffset = (d - 1) / 2; 132 *lowOffset = *highOffset = (d - 1) / 2;
156 *kernelSize3 = d; 133 *kernelSize3 = d;
157 } else { 134 } else {
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
205 temp.setConfig(dst->config(), dst->width(), dst->height()); 182 temp.setConfig(dst->config(), dst->width(), dst->height());
206 if (!temp.allocPixels()) { 183 if (!temp.allocPixels()) {
207 return false; 184 return false;
208 } 185 }
209 186
210 const SkPMColor* s = src.getAddr32(srcBounds.left(), srcBounds.top()); 187 const SkPMColor* s = src.getAddr32(srcBounds.left(), srcBounds.top());
211 SkPMColor* t = temp.getAddr32(0, 0); 188 SkPMColor* t = temp.getAddr32(0, 0);
212 SkPMColor* d = dst->getAddr32(0, 0); 189 SkPMColor* d = dst->getAddr32(0, 0);
213 int w = dstBounds.width(), h = dstBounds.height(); 190 int w = dstBounds.width(), h = dstBounds.height();
214 int sw = src.rowBytesAsPixels(); 191 int sw = src.rowBytesAsPixels();
192 SkBoxBlurProc boxBlurX = SkBoxBlurGetPlatformProc(kX_BlurDirection, kX_BlurD irection);
mtklein 2013/11/08 02:11:03 This feels weird, like unnecessary degrees of free
Stephen White 2013/11/08 15:47:47 Done.
193 if (!boxBlurX) {
194 boxBlurX = boxBlur<kX_BlurDirection, kX_BlurDirection>;
195 }
196 SkBoxBlurProc boxBlurY = SkBoxBlurGetPlatformProc(kY_BlurDirection, kY_BlurD irection);
197 if (!boxBlurY) {
198 boxBlurY = boxBlur<kY_BlurDirection, kY_BlurDirection>;
199 }
200 SkBoxBlurProc boxBlurXY = SkBoxBlurGetPlatformProc(kX_BlurDirection, kY_Blur Direction);
201 if (!boxBlurY) {
mtklein 2013/11/08 02:11:03 oh dear. -> boxBlurXY?
Stephen White 2013/11/08 15:47:47 Thanks! Good catch. Should be obsoleted by new cod
202 boxBlurXY = boxBlur<kX_BlurDirection, kY_BlurDirection>;
203 }
204
215 if (kernelSizeX > 0 && kernelSizeY > 0) { 205 if (kernelSizeX > 0 && kernelSizeY > 0) {
216 #ifndef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION 206 #ifndef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION
217 boxBlurX(s, sw, t, kernelSizeX, lowOffsetX, highOffsetX, w, h); 207 boxBlurX(s, sw, t, kernelSizeX, lowOffsetX, highOffsetX, w, h);
218 boxBlurX(t, w, d, kernelSizeX, highOffsetX, lowOffsetX, w, h); 208 boxBlurX(t, w, d, kernelSizeX, highOffsetX, lowOffsetX, w, h);
219 boxBlurXY(d, w, t, kernelSizeX3, highOffsetX, highOffsetX, w, h); 209 boxBlurXY(d, w, t, kernelSizeX3, highOffsetX, highOffsetX, w, h);
220 boxBlurX(t, h, d, kernelSizeY, lowOffsetY, highOffsetY, h, w); 210 boxBlurX(t, h, d, kernelSizeY, lowOffsetY, highOffsetY, h, w);
221 boxBlurX(d, h, t, kernelSizeY, highOffsetY, lowOffsetY, h, w); 211 boxBlurX(d, h, t, kernelSizeY, highOffsetY, lowOffsetY, h, w);
222 boxBlurXY(t, h, d, kernelSizeY3, highOffsetY, highOffsetY, h, w); 212 boxBlurXY(t, h, d, kernelSizeY3, highOffsetY, highOffsetY, h, w);
223 #else 213 #else
224 boxBlurX(s, sw, t, kernelSizeX, lowOffsetX, highOffsetX, w, h); 214 boxBlurX(s, sw, t, kernelSizeX, lowOffsetX, highOffsetX, w, h);
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
263 fSigma.width(), 253 fSigma.width(),
264 fSigma.height())); 254 fSigma.height()));
265 offset->fX += rect.fLeft; 255 offset->fX += rect.fLeft;
266 offset->fY += rect.fTop; 256 offset->fY += rect.fTop;
267 return SkImageFilterUtils::WrapTexture(tex, rect.width(), rect.height(), res ult); 257 return SkImageFilterUtils::WrapTexture(tex, rect.width(), rect.height(), res ult);
268 #else 258 #else
269 SkDEBUGFAIL("Should not call in GPU-less build"); 259 SkDEBUGFAIL("Should not call in GPU-less build");
270 return false; 260 return false;
271 #endif 261 #endif
272 } 262 }
OLDNEW
« no previous file with comments | « gyp/opts.gyp ('k') | src/opts/SkBlurImage_opts.h » ('j') | src/opts/SkBlurImage_opts_SSE2.h » ('J')

Powered by Google App Engine
This is Rietveld 408576698