| Index: src/gpu/effects/GrConvolutionEffect.cpp
|
| diff --git a/src/gpu/effects/GrConvolutionEffect.cpp b/src/gpu/effects/GrConvolutionEffect.cpp
|
| index 1fb2e9598f9dc3ac4cd12c92b96aa9aa8016948e..4f091384935f9c9199880d8151104162f6b4a6f6 100644
|
| --- a/src/gpu/effects/GrConvolutionEffect.cpp
|
| +++ b/src/gpu/effects/GrConvolutionEffect.cpp
|
| @@ -46,9 +46,12 @@ void GrGLConvolutionEffect::emitCode(EmitArgs& args) {
|
|
|
| int width = Gr1DKernelEffect::WidthFromRadius(ce.radius());
|
|
|
| + int arrayCount = (width + 3) / 4;
|
| + SkASSERT(4 * arrayCount >= width);
|
| +
|
| fKernelUni = uniformHandler->addUniformArray(kFragment_GrShaderFlag,
|
| - kFloat_GrSLType, kDefault_GrSLPrecision,
|
| - "Kernel", width);
|
| + kVec4f_GrSLType, kDefault_GrSLPrecision,
|
| + "Kernel", arrayCount);
|
|
|
| GrGLSLFPFragmentBuilder* fragBuilder = args.fFragBuilder;
|
| SkString coords2D = fragBuilder->ensureFSCoords2D(args.fCoords, 0);
|
| @@ -61,11 +64,13 @@ void GrGLConvolutionEffect::emitCode(EmitArgs& args) {
|
| fragBuilder->codeAppendf("vec2 coord = %s - %d.0 * %s;", coords2D.c_str(), ce.radius(), imgInc);
|
|
|
| // Manually unroll loop because some drivers don't; yields 20-30% speedup.
|
| + const char* kVecSuffix[4] = { ".x", ".y", ".z", ".w" };
|
| for (int i = 0; i < width; i++) {
|
| SkString index;
|
| SkString kernelIndex;
|
| - index.appendS32(i);
|
| + index.appendS32(i/4);
|
| kernel.appendArrayAccess(index.c_str(), &kernelIndex);
|
| + kernelIndex.append(kVecSuffix[i & 0x3]);
|
|
|
| if (ce.useBounds()) {
|
| // We used to compute a bool indicating whether we're in bounds or not, cast it to a
|
| @@ -119,7 +124,9 @@ void GrGLConvolutionEffect::onSetData(const GrGLSLProgramDataManager& pdman,
|
| }
|
| int width = Gr1DKernelEffect::WidthFromRadius(conv.radius());
|
|
|
| - pdman.set1fv(fKernelUni, width, conv.kernel());
|
| + int arrayCount = (width + 3) / 4;
|
| + SkASSERT(4 * arrayCount >= width);
|
| + pdman.set4fv(fKernelUni, arrayCount, conv.kernel());
|
| }
|
|
|
| void GrGLConvolutionEffect::GenKey(const GrProcessor& processor, const GrGLSLCaps&,
|
|
|