| Index: src/gpu/effects/GrConvolutionEffect.cpp
|
| diff --git a/src/gpu/effects/GrConvolutionEffect.cpp b/src/gpu/effects/GrConvolutionEffect.cpp
|
| index 4f091384935f9c9199880d8151104162f6b4a6f6..33920ac7ba09217d7c7d9afd115580e4a8e7649b 100644
|
| --- a/src/gpu/effects/GrConvolutionEffect.cpp
|
| +++ b/src/gpu/effects/GrConvolutionEffect.cpp
|
| @@ -14,13 +14,61 @@
|
| // For brevity
|
| typedef GrGLSLProgramDataManager::UniformHandle UniformHandle;
|
|
|
| +/**
|
| + * Base class with shared functionality for GrGLBoundedConvolutionEffect and
|
| + * GrGLLerpConvolutionEffect.
|
| + */
|
| class GrGLConvolutionEffect : public GrGLSLFragmentProcessor {
|
| public:
|
| - void emitCode(EmitArgs&) override;
|
| -
|
| static inline void GenKey(const GrProcessor&, const GrGLSLCaps&, GrProcessorKeyBuilder*);
|
|
|
| protected:
|
| + void getImageIncrement(const GrConvolutionEffect&, float (*)[2]) const;
|
| +
|
| +private:
|
| + typedef GrGLSLFragmentProcessor INHERITED;
|
| +};
|
| +
|
| +void GrGLConvolutionEffect::GenKey(const GrProcessor& processor,
|
| + const GrGLSLCaps&,
|
| + GrProcessorKeyBuilder* b) {
|
| + const GrConvolutionEffect& ce = processor.cast<GrConvolutionEffect>();
|
| + uint32_t key = ce.radius();
|
| + key <<= 2;
|
| + if (ce.useBounds()) {
|
| + key |= 0x2;
|
| + key |= GrConvolutionEffect::kY_Direction == ce.direction() ? 0x1 : 0x0;
|
| + }
|
| + b->add32(key);
|
| +}
|
| +
|
| +void GrGLConvolutionEffect::getImageIncrement(const GrConvolutionEffect& ce,
|
| + float (*imageIncrement)[2]) const {
|
| + GrTexture& texture = *ce.texture(0);
|
| + (*imageIncrement)[0] = (*imageIncrement)[1] = 0;
|
| + float ySign = texture.origin() != kTopLeft_GrSurfaceOrigin ? 1.0f : -1.0f;
|
| + switch (ce.direction()) {
|
| + case Gr1DKernelEffect::kX_Direction:
|
| + (*imageIncrement)[0] = 1.0f / texture.width();
|
| + break;
|
| + case Gr1DKernelEffect::kY_Direction:
|
| + (*imageIncrement)[1] = ySign / texture.height();
|
| + break;
|
| + default:
|
| + SkFAIL("Unknown filter direction.");
|
| + }
|
| +}
|
| +
|
| +///////////////////////////////////////////////////////////////////////////////
|
| +
|
| +/**
|
| + * Applies a ceolution effect which restricts samples to the provided bounds
|
| + * using shader logic.
|
| + */
|
| +class GrGLBoundedConvolutionEffect : public GrGLConvolutionEffect {
|
| +public:
|
| + virtual void emitCode(EmitArgs&) override;
|
| +
|
| void onSetData(const GrGLSLProgramDataManager& pdman, const GrProcessor&) override;
|
|
|
| private:
|
| @@ -28,66 +76,52 @@ private:
|
| UniformHandle fImageIncrementUni;
|
| UniformHandle fBoundsUni;
|
|
|
| - typedef GrGLSLFragmentProcessor INHERITED;
|
| + typedef GrGLConvolutionEffect INHERITED;
|
| };
|
|
|
| -void GrGLConvolutionEffect::emitCode(EmitArgs& args) {
|
| +void GrGLBoundedConvolutionEffect::emitCode(EmitArgs& args) {
|
| const GrConvolutionEffect& ce = args.fFp.cast<GrConvolutionEffect>();
|
| -
|
| +
|
| GrGLSLUniformHandler* uniformHandler = args.fUniformHandler;
|
| - fImageIncrementUni = uniformHandler->addUniform(kFragment_GrShaderFlag,
|
| - kVec2f_GrSLType, kDefault_GrSLPrecision,
|
| - "ImageIncrement");
|
| - if (ce.useBounds()) {
|
| - fBoundsUni = uniformHandler->addUniform(kFragment_GrShaderFlag,
|
| - kVec2f_GrSLType, kDefault_GrSLPrecision,
|
| - "Bounds");
|
| - }
|
| + fImageIncrementUni =
|
| + uniformHandler->addUniform(kFragment_GrShaderFlag, kVec2f_GrSLType,
|
| + kDefault_GrSLPrecision, "ImageIncrement");
|
| + fBoundsUni = uniformHandler->addUniform(kFragment_GrShaderFlag, kVec2f_GrSLType,
|
| + kDefault_GrSLPrecision, "Bounds");
|
|
|
| int width = Gr1DKernelEffect::WidthFromRadius(ce.radius());
|
| + fKernelUni = uniformHandler->addUniformArray(kFragment_GrShaderFlag, kFloat_GrSLType,
|
| + kDefault_GrSLPrecision, "Kernel", width);
|
|
|
| - int arrayCount = (width + 3) / 4;
|
| - SkASSERT(4 * arrayCount >= width);
|
| -
|
| - fKernelUni = uniformHandler->addUniformArray(kFragment_GrShaderFlag,
|
| - kVec4f_GrSLType, kDefault_GrSLPrecision,
|
| - "Kernel", arrayCount);
|
| -
|
| - GrGLSLFPFragmentBuilder* fragBuilder = args.fFragBuilder;
|
| + GrGLSLFragmentBuilder* fragBuilder = args.fFragBuilder;
|
| SkString coords2D = fragBuilder->ensureFSCoords2D(args.fCoords, 0);
|
|
|
| - fragBuilder->codeAppendf("%s = vec4(0, 0, 0, 0);", args.fOutputColor);
|
| + fragBuilder->codeAppendf("%s = vec4(0, 0, 0, 0);\n", args.fOutputColor);
|
|
|
| const GrGLSLShaderVar& kernel = uniformHandler->getUniformVariable(fKernelUni);
|
| const char* imgInc = uniformHandler->getUniformCStr(fImageIncrementUni);
|
|
|
| - fragBuilder->codeAppendf("vec2 coord = %s - %d.0 * %s;", coords2D.c_str(), ce.radius(), imgInc);
|
| + fragBuilder->codeAppendf("vec2 coord = %s - %d.0 * %s;\n", coords2D.c_str(), ce.radius(),
|
| + imgInc);
|
|
|
| // Manually unroll loop because some drivers don't; yields 20-30% speedup.
|
| - const char* kVecSuffix[4] = { ".x", ".y", ".z", ".w" };
|
| for (int i = 0; i < width; i++) {
|
| SkString index;
|
| SkString kernelIndex;
|
| - index.appendS32(i/4);
|
| + index.appendS32(i);
|
| kernel.appendArrayAccess(index.c_str(), &kernelIndex);
|
| - kernelIndex.append(kVecSuffix[i & 0x3]);
|
| -
|
| - if (ce.useBounds()) {
|
| - // We used to compute a bool indicating whether we're in bounds or not, cast it to a
|
| - // float, and then mul weight*texture_sample by the float. However, the Adreno 430 seems
|
| - // to have a bug that caused corruption.
|
| - const char* bounds = uniformHandler->getUniformCStr(fBoundsUni);
|
| - const char* component = ce.direction() == Gr1DKernelEffect::kY_Direction ? "y" : "x";
|
| - fragBuilder->codeAppendf("if (coord.%s >= %s.x && coord.%s <= %s.y) {",
|
| - component, bounds, component, bounds);
|
| - }
|
| - fragBuilder->codeAppendf("\t\t%s += ", args.fOutputColor);
|
| + // We used to compute a bool indicating whether we're in bounds or not, cast it to a
|
| + // float, and then mul weight*texture_sample by the float. However, the Adreno 430 seems
|
| + // to have a bug that caused corruption.
|
| + const char* bounds = uniformHandler->getUniformCStr(fBoundsUni);
|
| + const char* component = ce.direction() == Gr1DKernelEffect::kY_Direction ? "y" : "x";
|
| + fragBuilder->codeAppendf("if (coord.%s >= %s.x && coord.%s <= %s.y) {",
|
| + component, bounds, component, bounds);
|
| + fragBuilder->codeAppendf("%s += ", args.fOutputColor);
|
| fragBuilder->appendTextureLookup(args.fSamplers[0], "coord");
|
| fragBuilder->codeAppendf(" * %s;\n", kernelIndex.c_str());
|
| - if (ce.useBounds()) {
|
| - fragBuilder->codeAppend("}");
|
| - }
|
| - fragBuilder->codeAppendf("\t\tcoord += %s;\n", imgInc);
|
| + fragBuilder->codeAppend("}");
|
| + fragBuilder->codeAppendf("coord += %s;\n", imgInc);
|
| }
|
|
|
| SkString modulate;
|
| @@ -95,50 +129,154 @@ void GrGLConvolutionEffect::emitCode(EmitArgs& args) {
|
| fragBuilder->codeAppend(modulate.c_str());
|
| }
|
|
|
| -void GrGLConvolutionEffect::onSetData(const GrGLSLProgramDataManager& pdman,
|
| - const GrProcessor& processor) {
|
| - const GrConvolutionEffect& conv = processor.cast<GrConvolutionEffect>();
|
| - GrTexture& texture = *conv.texture(0);
|
| +void GrGLBoundedConvolutionEffect::onSetData(const GrGLSLProgramDataManager& pdman,
|
| + const GrProcessor& processor) {
|
| + const GrConvolutionEffect& ce = processor.cast<GrConvolutionEffect>();
|
|
|
| - float imageIncrement[2] = { 0 };
|
| - float ySign = texture.origin() != kTopLeft_GrSurfaceOrigin ? 1.0f : -1.0f;
|
| - switch (conv.direction()) {
|
| - case Gr1DKernelEffect::kX_Direction:
|
| - imageIncrement[0] = 1.0f / texture.width();
|
| - break;
|
| - case Gr1DKernelEffect::kY_Direction:
|
| - imageIncrement[1] = ySign / texture.height();
|
| - break;
|
| - default:
|
| - SkFAIL("Unknown filter direction.");
|
| - }
|
| + // the code we generated was for a specific bounding mode.
|
| + SkASSERT(ce.useBounds());
|
| +
|
| + GrTexture& texture = *ce.texture(0);
|
| + float imageIncrement[2];
|
| + getImageIncrement(ce, &imageIncrement);
|
| pdman.set2fv(fImageIncrementUni, 1, imageIncrement);
|
| - if (conv.useBounds()) {
|
| - const float* bounds = conv.bounds();
|
| - if (Gr1DKernelEffect::kY_Direction == conv.direction() &&
|
| - texture.origin() != kTopLeft_GrSurfaceOrigin) {
|
| - pdman.set2f(fBoundsUni, 1.0f - bounds[1], 1.0f - bounds[0]);
|
| - } else {
|
| - pdman.set2f(fBoundsUni, bounds[0], bounds[1]);
|
| - }
|
| + const float* bounds = ce.bounds();
|
| + if (Gr1DKernelEffect::kY_Direction == ce.direction() &&
|
| + texture.origin() != kTopLeft_GrSurfaceOrigin) {
|
| + pdman.set2f(fBoundsUni, 1.0f - bounds[1], 1.0f - bounds[0]);
|
| + } else {
|
| + pdman.set2f(fBoundsUni, bounds[0], bounds[1]);
|
| }
|
| - int width = Gr1DKernelEffect::WidthFromRadius(conv.radius());
|
|
|
| - int arrayCount = (width + 3) / 4;
|
| - SkASSERT(4 * arrayCount >= width);
|
| - pdman.set4fv(fKernelUni, arrayCount, conv.kernel());
|
| + int width = Gr1DKernelEffect::WidthFromRadius(ce.radius());
|
| + pdman.set1fv(fKernelUni, width, ce.kernel());
|
| }
|
|
|
| -void GrGLConvolutionEffect::GenKey(const GrProcessor& processor, const GrGLSLCaps&,
|
| - GrProcessorKeyBuilder* b) {
|
| - const GrConvolutionEffect& conv = processor.cast<GrConvolutionEffect>();
|
| - uint32_t key = conv.radius();
|
| - key <<= 2;
|
| - if (conv.useBounds()) {
|
| - key |= 0x2;
|
| - key |= GrConvolutionEffect::kY_Direction == conv.direction() ? 0x1 : 0x0;
|
| +///////////////////////////////////////////////////////////////////////////////
|
| +
|
| +/**
|
| + * Applies a ceolution effect which applies the ceolution using a linear
|
| + * interpolation optimization to use half as many samples.
|
| + */
|
| +class GrGLLerpConvolutionEffect : public GrGLConvolutionEffect {
|
| +public:
|
| + virtual void emitCode(EmitArgs&) override;
|
| +
|
| + void onSetData(const GrGLSLProgramDataManager& pdman, const GrProcessor&) override;
|
| +
|
| +private:
|
| + int bilerpSampleCount(int width) const;
|
| +
|
| + // Bounded uniforms
|
| + UniformHandle fSampleWeightUni;
|
| + UniformHandle fSampleOffsetUni;
|
| +
|
| + typedef GrGLConvolutionEffect INHERITED;
|
| +};
|
| +
|
| +void GrGLLerpConvolutionEffect::emitCode(EmitArgs& args) {
|
| + const GrConvolutionEffect& ce = args.fFp.cast<GrConvolutionEffect>();
|
| +
|
| + int width = Gr1DKernelEffect::WidthFromRadius(ce.radius());
|
| + int sampleCount = bilerpSampleCount(width);
|
| + // We use 2 * sampleCount uniforms. The maximum allowed by PS2.0 is 32, so
|
| + // ensure we don't exceed this. Note that it is currently impossible to
|
| + // exceed this as bilerpSampleCount = (kernelWidth + 1) / 2, and kernelWidth
|
| + // maxes out at 25, resulting in a max sampleCount of 26.
|
| + SkASSERT(sampleCount < 16);
|
| +
|
| +
|
| + GrGLSLUniformHandler* uniformHandler = args.fUniformHandler;
|
| + fSampleOffsetUni =
|
| + uniformHandler->addUniformArray(kFragment_GrShaderFlag, kVec2f_GrSLType,
|
| + kDefault_GrSLPrecision, "SampleOffset", sampleCount);
|
| + fSampleWeightUni =
|
| + uniformHandler->addUniformArray(kFragment_GrShaderFlag, kFloat_GrSLType,
|
| + kDefault_GrSLPrecision, "SampleWeight", sampleCount);
|
| +
|
| + GrGLSLFragmentBuilder* fragBuilder = args.fFragBuilder;
|
| + SkString coords2D = fragBuilder->ensureFSCoords2D(args.fCoords, 0);
|
| +
|
| + fragBuilder->codeAppendf("%s = vec4(0, 0, 0, 0);\n", args.fOutputColor);
|
| +
|
| + const GrGLSLShaderVar& kernel = uniformHandler->getUniformVariable(fSampleWeightUni);
|
| + const GrGLSLShaderVar& imgInc = uniformHandler->getUniformVariable(fSampleOffsetUni);
|
| +
|
| + fragBuilder->codeAppendf("vec2 coord; \n");
|
| +
|
| + // Manually unroll loop because some drivers don't; yields 20-30% speedup.
|
| + for (int i = 0; i < sampleCount; i++) {
|
| + SkString index;
|
| + SkString weightIndex;
|
| + SkString offsetIndex;
|
| + index.appendS32(i);
|
| + kernel.appendArrayAccess(index.c_str(), &weightIndex);
|
| + imgInc.appendArrayAccess(index.c_str(), &offsetIndex);
|
| + fragBuilder->codeAppendf("coord = %s + %s;\n", coords2D.c_str(), offsetIndex.c_str());
|
| + fragBuilder->codeAppendf("%s += ", args.fOutputColor);
|
| + fragBuilder->appendTextureLookup(args.fSamplers[0], "coord");
|
| + fragBuilder->codeAppendf(" * %s;\n", weightIndex.c_str());
|
| }
|
| - b->add32(key);
|
| +
|
| + SkString modulate;
|
| + GrGLSLMulVarBy4f(&modulate, args.fOutputColor, args.fInputColor);
|
| + fragBuilder->codeAppend(modulate.c_str());
|
| +}
|
| +
|
| +void GrGLLerpConvolutionEffect::onSetData(const GrGLSLProgramDataManager& pdman,
|
| + const GrProcessor& processor) {
|
| + const GrConvolutionEffect& ce = processor.cast<GrConvolutionEffect>();
|
| +
|
| + // the code we generated was for a specific kernel radius
|
| + SkASSERT(ce.radius() == this->radius());
|
| +
|
| + // the code we generated was for a specific bounding mode.
|
| + SkASSERT(!ce.useBounds());
|
| +
|
| + int width = Gr1DKernelEffect::WidthFromRadius(ce.radius());
|
| + int sampleCount = bilerpSampleCount(width);
|
| + SkAutoTArray<float> imageIncrements(sampleCount * 2); // X and Y floats per sample.
|
| + SkAutoTArray<float> kernel(sampleCount);
|
| +
|
| + float baseImageIncrement[2];
|
| + getImageIncrement(ce, &baseImageIncrement);
|
| +
|
| + for (int i = 0; i < sampleCount; i++) {
|
| + int sampleIndex1 = i * 2;
|
| + int sampleIndex2 = sampleIndex1 + 1;
|
| +
|
| + // If we have an odd number of samples in our filter, the last sample won't use
|
| + // the linear interpolation optimization (it will be pixel aligned).
|
| + if (sampleIndex2 >= width) {
|
| + sampleIndex2 = sampleIndex1;
|
| + }
|
| +
|
| + float kernelWeight1 = ce.kernel()[sampleIndex1];
|
| + float kernelWeight2 = ce.kernel()[sampleIndex2];
|
| +
|
| + float totalKernelWeight =
|
| + (sampleIndex1 == sampleIndex2) ? kernelWeight1 : (kernelWeight1 + kernelWeight2);
|
| +
|
| + float sampleRatio =
|
| + (sampleIndex1 == sampleIndex2) ? 0 : kernelWeight2 / (kernelWeight1 + kernelWeight2);
|
| +
|
| + imageIncrements[i * 2] = (-ce.radius() + i * 2 + sampleRatio) * baseImageIncrement[0];
|
| + imageIncrements[i * 2 + 1] =
|
| + (-ce.radius() + i * 2 + sampleRatio) * baseImageIncrement[1];
|
| +
|
| + kernel[i] = totalKernelWeight;
|
| + }
|
| + pdman.set2fv(fSampleOffsetUni, sampleCount, imageIncrements.get());
|
| + pdman.set1fv(fSampleWeightUni, sampleCount, kernel.get());
|
| +}
|
| +
|
| +int GrGLLerpConvolutionEffect::bilerpSampleCount(int width) const {
|
| + // We use a linear interpolation optimization to only sample once for each
|
| + // two pixel aligned samples in the kernel. If we have an odd number of
|
| + // samples, we will have to skip this optimization for the last sample.
|
| + // Because of this we always round up our sample count (by adding 1 before
|
| + // dividing).
|
| + return (width + 1) / 2;
|
| }
|
|
|
| ///////////////////////////////////////////////////////////////////////////////
|
| @@ -149,7 +287,12 @@ GrConvolutionEffect::GrConvolutionEffect(GrTexture* texture,
|
| const float* kernel,
|
| bool useBounds,
|
| float bounds[2])
|
| - : INHERITED(texture, direction, radius), fUseBounds(useBounds) {
|
| + : INHERITED(texture,
|
| + direction,
|
| + radius,
|
| + useBounds ? GrTextureParams::FilterMode::kNone_FilterMode
|
| + : GrTextureParams::FilterMode::kBilerp_FilterMode)
|
| + , fUseBounds(useBounds) {
|
| this->initClassID<GrConvolutionEffect>();
|
| SkASSERT(radius <= kMaxKernelRadius);
|
| SkASSERT(kernel);
|
| @@ -166,7 +309,12 @@ GrConvolutionEffect::GrConvolutionEffect(GrTexture* texture,
|
| float gaussianSigma,
|
| bool useBounds,
|
| float bounds[2])
|
| - : INHERITED(texture, direction, radius), fUseBounds(useBounds) {
|
| + : INHERITED(texture,
|
| + direction,
|
| + radius,
|
| + useBounds ? GrTextureParams::FilterMode::kNone_FilterMode
|
| + : GrTextureParams::FilterMode::kBilerp_FilterMode)
|
| + , fUseBounds(useBounds) {
|
| this->initClassID<GrConvolutionEffect>();
|
| SkASSERT(radius <= kMaxKernelRadius);
|
| int width = this->width();
|
| @@ -192,12 +340,20 @@ GrConvolutionEffect::~GrConvolutionEffect() {
|
| }
|
|
|
| void GrConvolutionEffect::onGetGLSLProcessorKey(const GrGLSLCaps& caps,
|
| - GrProcessorKeyBuilder* b) const {
|
| + GrProcessorKeyBuilder* b) const {
|
| GrGLConvolutionEffect::GenKey(*this, caps, b);
|
| }
|
|
|
| GrGLSLFragmentProcessor* GrConvolutionEffect::onCreateGLSLInstance() const {
|
| - return new GrGLConvolutionEffect;
|
| + // We support a linear interpolation optimization which (when feasible) uses
|
| + // half the number of samples to apply the kernel. This is not always
|
| + // applicable, as the linear interpolation optimization does not support
|
| + // bounded sampling.
|
| + if (this->useBounds()) {
|
| + return new GrGLBoundedConvolutionEffect;
|
| + } else {
|
| + return new GrGLLerpConvolutionEffect;
|
| + }
|
| }
|
|
|
| bool GrConvolutionEffect::onIsEqual(const GrFragmentProcessor& sBase) const {
|
|
|