src/effects/GrCircleBlurFragmentProcessor.cpp - Issue 1991413002: When building circle blur profile evaluate kernel vertically once per column

Side by Side Diff: src/effects/GrCircleBlurFragmentProcessor.cpp

Issue 1991413002: When building circle blur profile evaluate kernel vertically once per column (Closed) Base URL: https://skia.googlesource.com/skia.git@blursep

Patch Set: cleanup Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright 2015 Google Inc.	2 * Copyright 2015 Google Inc.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license that can be	4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.	5 * found in the LICENSE file.

6 */	6 */

7	7

8 #include "GrCircleBlurFragmentProcessor.h"	8 #include "GrCircleBlurFragmentProcessor.h"

9	9

10 #if SK_SUPPORT_GPU	10 #if SK_SUPPORT_GPU

(...skipping 118 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
129 }	129 }

130 float sum = 0.f;	130 float sum = 0.f;

131 // The half kernel should sum to 0.5 not 1.0.	131 // The half kernel should sum to 0.5 not 1.0.

132 tot *= 2.f;	132 tot *= 2.f;

133 for (int i = 0; i < halfKernelSize; ++i) {	133 for (int i = 0; i < halfKernelSize; ++i) {

134 halfKernel[i] /= tot;	134 halfKernel[i] /= tot;

135 sum += halfKernel[i];	135 sum += halfKernel[i];

136 summedHalfKernel[i] = sum;	136 summedHalfKernel[i] = sum;

137 }	137 }

138 }	138 }

139	139
	robertphillips 2016/05/20 14:44:18 at a points ? Maybe something more like: Create at a points ? Maybe something more like: Create a table to store the ... ? Should the name of this method be changed? bsalomon 2016/05/20 15:17:14 Done. Show quoted text On 2016/05/20 14:44:18, robertphillips wrote: > at a points ? > > Maybe something more like: > > Create a table to store the ... > > ? > > Should the name of this method be changed? Done.
140 // Applies the 1D half kernel vertically at a point (x, 0) to a circle centered at the origin with	140 // Applies the 1D half kernel vertically at a points (x, 0) to a circle centered at the origin with

141 // radius circleR.	141 // radius circleR.

142 static float eval_vertically(float x, float circleR, const float* summedHalfKern elTable,	142 void apply_kernel_in_y(float* results, int numSteps, float firstX, float circleR ,

143 int halfKernelSize) {	143 int halfKernelSize, const float* summedHalfKernelTable) {

144 // Given x find the positive y that is on the edge of the circle.	144 float x = firstX;

145 float y = sqrtf(fabs(circleR * circleR - x * x));	145 for (int i = 0; i < numSteps; ++i, x += 1.f) {

146 // In the column at x we exit the circle at +y and -y	146 if (x < -circleR \|\| x > circleR) {
	bsalomon 2016/05/20 00:24:45 I tried breaking this loop up into five parts: 1) I tried breaking this loop up into five parts: 1) x fully out of the circle to the left 2) x inside the circle, half kernel extends vertically outside the circle 3) x inside the circle, half kernel contained in y 4) 2 again 5) x fully outside the circle to the right However, I wasn't able to measure a speedup even with nullgpu.
147 // table entry j is actually the kernel evaluated at j + 0.5.	147 results[i] = 0;

148 y -= 0.5f;	148 continue;

149 int yInt = SkScalarFloorToInt(y);	149 }

150 SkASSERT(yInt >= -1);	150 float y = sqrtf(circleR * circleR - x * x);

151 if (y < 0) {	151 // In the column at x we exit the circle at +y and -y

152 return (y + 0.5f) * summedHalfKernelTable[0];	152 // The summed table entry j is actually reflects an offset of j + 0.5.

153 } else if (yInt >= halfKernelSize - 1) {	153 y -= 0.5f;

154 return 0.5f;	154 int yInt = SkScalarFloorToInt(y);

155 } else {	155 SkASSERT(yInt >= -1);

156 float yFrac = y - yInt;	156 if (y < 0) {

157 return (1.f - yFrac) * summedHalfKernelTable[yInt] +	157 results[i] = (y + 0.5f) * summedHalfKernelTable[0];

158 yFrac * summedHalfKernelTable[yInt + 1];	158 } else if (yInt >= halfKernelSize - 1) {

	159 results[i] = 0.5f;

	160 } else {

	161 float yFrac = y - yInt;

	162 results[i] = (1.f - yFrac) * summedHalfKernelTable[yInt] +

	163 yFrac * summedHalfKernelTable[yInt + 1];

	164 }

159 }	165 }

160 }	166 }

161	167
	robertphillips 2016/05/20 14:44:19 // Conceptually combine the 2D Gaussian kernel pos // Conceptually combine the 2D Gaussian kernel positioned at (evalX, 0) with a circle centered at the origin with radius circleR. In reality, combine the look ups in the halfKernel table (for X) with look ups from the yKernelEvaluations (for Y). bsalomon 2016/05/20 15:17:14 Done. Show quoted text On 2016/05/20 14:44:19, robertphillips wrote: > // Conceptually combine the 2D Gaussian kernel positioned at (evalX, 0) with a > circle centered at the origin with radius circleR. In reality, combine the look > ups in the halfKernel table (for X) with look ups from the yKernelEvaluations > (for Y). Done.
162 // Apply the kernel at point (t, 0) to a circle centered at the origin with radi us circleR.	168 // Apply the kernel at point (evalX, 0) to a circle centered at the origin with radius circleR.

163 static uint8_t eval_at(float t, float circleR, const float* halfKernel,	169 static uint8_t eval_at(float evalX, float circleR, const float* halfKernel, int halfKernelSize,

164 const float* summedHalfKernelTable, int halfKernelSize) {	170 const float* yKernelEvaluations) {

165 float acc = 0;	171 float acc = 0;

166	172

167 for (int i = 0; i < halfKernelSize; ++i) {	173 float x = evalX - halfKernelSize;

168 float x = t - i - 0.5f;	174 for (int i = 0; i < halfKernelSize; ++i, x += 1.f) {

169 if (x < -circleR \|\| x > circleR) {	175 if (x < -circleR \|\| x > circleR) {

170 continue;	176 continue;

171 }	177 }

172 float verticalEval = eval_vertically(x, circleR, summedHalfKernelTable, halfKernelSize);	178 float verticalEval = yKernelEvaluations[i];

173 acc += verticalEval * halfKernel[i];	179 acc += verticalEval * halfKernel[halfKernelSize - i - 1];

174 }	180 }

175 for (int i = 0; i < halfKernelSize; ++i) {	181 for (int i = 0; i < halfKernelSize; ++i, x += 1.f) {

176 float x = t + i + 0.5f;

177 if (x < -circleR \|\| x > circleR) {	182 if (x < -circleR \|\| x > circleR) {

178 continue;	183 continue;

179 }	184 }

180 float verticalEval = eval_vertically(x, circleR, summedHalfKernelTable, halfKernelSize);	185 float verticalEval = yKernelEvaluations[i + halfKernelSize];

181 acc += verticalEval * halfKernel[i];	186 acc += verticalEval * halfKernel[i];

182 }	187 }

183 // Since we applied a half kernel in y we multiply acc by 2 (the circle is s ymmetric about the	188 // Since we applied a half kernel in y we multiply acc by 2 (the circle is s ymmetric about the

184 // x axis).	189 // x axis).

185 return SkUnitScalarClampToByte(2.f * acc);	190 return SkUnitScalarClampToByte(2.f * acc);

186 }	191 }

187	192

188 static inline void compute_profile_offset_and_size(float circleR, float sigma,	193 static inline void compute_profile_offset_and_size(float circleR, float sigma,

189 float* offset, int* size) {	194 float* offset, int* size) {

190 if (3*sigma <= circleR) {	195 if (3*sigma <= circleR) {

191 // The circle is bigger than the Gaussian. In this case we know the inte rior of the	196 // The circle is bigger than the Gaussian. In this case we know the inte rior of the

192 // blurred circle is solid.	197 // blurred circle is solid.

193 offset = circleR - 3 sigma; // This location maps to 0.5f in the weig hts texture.	198 offset = circleR - 3 sigma; // This location maps to 0.5f in the weig hts texture.

194 // It should always be 255.	199 // It should always be 255.

195 size = SkScalarCeilToInt(6sigma);	200 size = SkScalarCeilToInt(6sigma);

196 } else {	201 } else {

197 // The Gaussian is bigger than the circle.	202 // The Gaussian is bigger than the circle.

198 *offset = 0.0f;	203 *offset = 0.0f;

199 size = SkScalarCeilToInt(circleR + 3sigma);	204 size = SkScalarCeilToInt(circleR + 3sigma);

200 }	205 }

201 }	206 }

202	207
	robertphillips 2016/05/20 14:44:19 Seems like this might need updating Seems like this might need updating bsalomon 2016/05/20 15:17:14 Done. Show quoted text On 2016/05/20 14:44:19, robertphillips wrote: > Seems like this might need updating Done.
203 // This function creates a profile of a blurred circle. It does this by computin g a kernel for	208 // This function creates a profile of a blurred circle. It does this by computin g a kernel for

204 // half the Gaussian and a matching summed area table. To compute a profile valu e at x = r it steps	209 // half the Gaussian and a matching summed area table. To compute a profile valu e at x = r it steps

205 // outward in x from (r, 0) in both directions. There is a step for each directi on for each entry	210 // outward in x from (r, 0) in both directions. There is a step for each directi on for each entry

206 // in the half kernel. The y contribution at each step is computed from the summ ed area table using	211 // in the half kernel. The y contribution at each step is computed from the summ ed area table using

207 // the height of the circle above the step point. Each y contribution is multipl ied by the half	212 // the height of the circle above the step point. Each y contribution is multipl ied by the half

208 // kernel value corresponding to the step in x.	213 // kernel value corresponding to the step in x.

209 static uint8_t* create_profile(float circleR, float sigma) {	214 static uint8_t* create_profile(float circleR, float sigma) {

210 float offset;	215 float offset;

211 int numSteps;	216 int numSteps;

212 compute_profile_offset_and_size(circleR, sigma, &offset, &numSteps);	217 compute_profile_offset_and_size(circleR, sigma, &offset, &numSteps);

213	218

214 uint8_t* weights = new uint8_t[numSteps];	219 uint8_t* weights = new uint8_t[numSteps];

215	220

216 // The full kernel is 6 sigmas wide.	221 // The full kernel is 6 sigmas wide.

217 int halfKernelSize = SkScalarCeilToInt(6.0f*sigma);	222 int halfKernelSize = SkScalarCeilToInt(6.0f*sigma);

218 // round up to next multiple of 2 and then divide by 2	223 // round up to next multiple of 2 and then divide by 2

219 halfKernelSize = ((halfKernelSize + 1) & ~1) >> 1;	224 halfKernelSize = ((halfKernelSize + 1) & ~1) >> 1;

220 SkAutoTArray<float> halfKernel(halfKernelSize);	225

221 SkAutoTArray<float> summedKernel(halfKernelSize);	226 // Number of x steps at which to apply kernel in y to cover all the profile samples in x.

222 make_half_kernel_and_summed_table(halfKernel.get(), summedKernel.get(), half KernelSize,	227 int numYSteps = numSteps + 2 * halfKernelSize;

223 sigma);	228

	229 SkAutoTArray<float> bulkAlloc(halfKernelSize + halfKernelSize + numYSteps);

	230 float* halfKernel = bulkAlloc.get();

	231 float* summedKernel = bulkAlloc.get() + halfKernelSize;

	232 float* yEvals = bulkAlloc.get() + 2 * halfKernelSize;

	233 make_half_kernel_and_summed_table(halfKernel, summedKernel, halfKernelSize, sigma);

	234

	235 float firstX = offset - halfKernelSize + 0.5;

	236 apply_kernel_in_y(yEvals, numYSteps, firstX, circleR, halfKernelSize, summed Kernel);

	237

224 for (int i = 0; i < numSteps - 1; ++i) {	238 for (int i = 0; i < numSteps - 1; ++i) {

225 weights[i] = eval_at(offset+i, circleR, halfKernel.get(), summedKernel.g et(),	239 float evalX = offset + i + 0.5f;

226 halfKernelSize);	240 weights[i] = eval_at(evalX, circleR, halfKernel, halfKernelSize, yEvals + i);

227 }	241 }

228 // Ensure the tail of the Gaussian goes to zero.	242 // Ensure the tail of the Gaussian goes to zero.

229 weights[numSteps - 1] = 0;	243 weights[numSteps - 1] = 0;

230 return weights;	244 return weights;

231 }	245 }

232	246

233 GrTexture* GrCircleBlurFragmentProcessor::CreateCircleBlurProfileTexture(	247 GrTexture* GrCircleBlurFragmentProcessor::CreateCircleBlurProfileTexture(

234 GrTextureProvide r* textureProvider,	248 GrTextureProvide r* textureProvider,

235 const SkRect& ci rcle,	249 const SkRect& ci rcle,

236 float sigma,	250 float sigma,

(...skipping 35 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
272 GR_DEFINE_FRAGMENT_PROCESSOR_TEST(GrCircleBlurFragmentProcessor);	286 GR_DEFINE_FRAGMENT_PROCESSOR_TEST(GrCircleBlurFragmentProcessor);

273	287

274 const GrFragmentProcessor* GrCircleBlurFragmentProcessor::TestCreate(GrProcessor TestData* d) {	288 const GrFragmentProcessor* GrCircleBlurFragmentProcessor::TestCreate(GrProcessor TestData* d) {

275 SkScalar wh = d->fRandom->nextRangeScalar(100.f, 1000.f);	289 SkScalar wh = d->fRandom->nextRangeScalar(100.f, 1000.f);

276 SkScalar sigma = d->fRandom->nextRangeF(1.f,10.f);	290 SkScalar sigma = d->fRandom->nextRangeF(1.f,10.f);

277 SkRect circle = SkRect::MakeWH(wh, wh);	291 SkRect circle = SkRect::MakeWH(wh, wh);

278 return GrCircleBlurFragmentProcessor::Create(d->fContext->textureProvider(), circle, sigma);	292 return GrCircleBlurFragmentProcessor::Create(d->fContext->textureProvider(), circle, sigma);

279 }	293 }

280	294

281 #endif	295 #endif

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »