src/gpu/effects/GrConvolutionEffect.cpp - Issue 1231383005: Revert of Bilinear optimization for 1D convolution.

Unified Diff: src/gpu/effects/GrConvolutionEffect.cpp

Issue 1231383005: Revert of Bilinear optimization for 1D convolution. (Closed) Base URL: https://chromium.googlesource.com/skia.git@master

Patch Set: Created 5 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/gpu/effects/GrConvolutionEffect.cpp

diff --git a/src/gpu/effects/GrConvolutionEffect.cpp b/src/gpu/effects/GrConvolutionEffect.cpp

index 1e1c477cd5bf6da947a6b4022be99404e054c860..f5b5e22ce15dfe127bb4b32308eb086fbe0477a8 100644

--- a/src/gpu/effects/GrConvolutionEffect.cpp

+++ b/src/gpu/effects/GrConvolutionEffect.cpp

@@ -13,24 +13,32 @@

// For brevity

typedef GrGLProgramDataManager::UniformHandle UniformHandle;

-/**

- * Base class with shared functionality for GrGLBoundedConvolutionEffect and

- * GrGLLerpConvolutionEffect.

- */

class GrGLConvolutionEffect : public GrGLFragmentProcessor {

public:

GrGLConvolutionEffect(const GrProcessor&);

+ virtual void emitCode(GrGLFPBuilder*,

+ const GrFragmentProcessor&,

+ const char* outputColor,

+ const char* inputColor,

+ const TransformedCoordsArray&,

+ const TextureSamplerArray&) override;

+ void setData(const GrGLProgramDataManager& pdman, const GrProcessor&) override;

static inline void GenKey(const GrProcessor&, const GrGLSLCaps&, GrProcessorKeyBuilder*);

-protected:

- int radius() const { return fRadius; }

+private:

int width() const { return Gr1DKernelEffect::WidthFromRadius(fRadius); }

+ bool useBounds() const { return fUseBounds; }

Gr1DKernelEffect::Direction direction() const { return fDirection; }

- void getImageIncrement(const GrConvolutionEffect&, float (*)[2]) const;

-private:

- int fRadius;

- Gr1DKernelEffect::Direction fDirection;

+ int fRadius;

+ bool fUseBounds;

+ Gr1DKernelEffect::Direction fDirection;

+ UniformHandle fKernelUni;

+ UniformHandle fImageIncrementUni;

+ UniformHandle fBoundsUni;

typedef GrGLFragmentProcessor INHERITED;

};

@@ -38,11 +46,101 @@

GrGLConvolutionEffect::GrGLConvolutionEffect(const GrProcessor& processor) {

const GrConvolutionEffect& c = processor.cast<GrConvolutionEffect>();

fRadius = c.radius();

+ fUseBounds = c.useBounds();

fDirection = c.direction();

}

-void GrGLConvolutionEffect::GenKey(const GrProcessor& processor,

- const GrGLSLCaps&,

+void GrGLConvolutionEffect::emitCode(GrGLFPBuilder* builder,

+ const GrFragmentProcessor&,

+ const char* outputColor,

+ const char* inputColor,

+ const TransformedCoordsArray& coords,

+ const TextureSamplerArray& samplers) {

+ fImageIncrementUni = builder->addUniform(GrGLProgramBuilder::kFragment_Visibility,

+ kVec2f_GrSLType, kDefault_GrSLPrecision,

+ "ImageIncrement");

+ if (this->useBounds()) {

+ fBoundsUni = builder->addUniform(GrGLProgramBuilder::kFragment_Visibility,

+ kVec2f_GrSLType, kDefault_GrSLPrecision,

+ "Bounds");

+ }

+ fKernelUni = builder->addUniformArray(GrGLProgramBuilder::kFragment_Visibility,

+ kFloat_GrSLType, kDefault_GrSLPrecision,

+ "Kernel", this->width());

+ GrGLFragmentBuilder* fsBuilder = builder->getFragmentShaderBuilder();

+ SkString coords2D = fsBuilder->ensureFSCoords2D(coords, 0);

+ fsBuilder->codeAppendf("\t\t%s = vec4(0, 0, 0, 0);\n", outputColor);

+ int width = this->width();

+ const GrGLShaderVar& kernel = builder->getUniformVariable(fKernelUni);

+ const char* imgInc = builder->getUniformCStr(fImageIncrementUni);

+ fsBuilder->codeAppendf("\t\tvec2 coord = %s - %d.0 * %s;\n", coords2D.c_str(), fRadius, imgInc);

+ // Manually unroll loop because some drivers don't; yields 20-30% speedup.

+ for (int i = 0; i < width; i++) {

+ SkString index;

+ SkString kernelIndex;

+ index.appendS32(i);

+ kernel.appendArrayAccess(index.c_str(), &kernelIndex);

+ if (this->useBounds()) {

+ // We used to compute a bool indicating whether we're in bounds or not, cast it to a

+ // float, and then mul weight*texture_sample by the float. However, the Adreno 430 seems

+ // to have a bug that caused corruption.

+ const char* bounds = builder->getUniformCStr(fBoundsUni);

+ const char* component = this->direction() == Gr1DKernelEffect::kY_Direction ? "y" : "x";

+ fsBuilder->codeAppendf("if (coord.%s >= %s.x && coord.%s <= %s.y) {",

+ component, bounds, component, bounds);

+ }

+ fsBuilder->codeAppendf("\t\t%s += ", outputColor);

+ fsBuilder->appendTextureLookup(samplers[0], "coord");

+ fsBuilder->codeAppendf(" * %s;\n", kernelIndex.c_str());

+ if (this->useBounds()) {

+ fsBuilder->codeAppend("}");

+ }

+ fsBuilder->codeAppendf("\t\tcoord += %s;\n", imgInc);

+ }

+ SkString modulate;

+ GrGLSLMulVarBy4f(&modulate, outputColor, inputColor);

+ fsBuilder->codeAppend(modulate.c_str());

+void GrGLConvolutionEffect::setData(const GrGLProgramDataManager& pdman,

+ const GrProcessor& processor) {

+ const GrConvolutionEffect& conv = processor.cast<GrConvolutionEffect>();

+ GrTexture& texture = *conv.texture(0);

+ // the code we generated was for a specific kernel radius

+ SkASSERT(conv.radius() == fRadius);

+ float imageIncrement[2] = { 0 };

+ float ySign = texture.origin() != kTopLeft_GrSurfaceOrigin ? 1.0f : -1.0f;

+ switch (conv.direction()) {

+ case Gr1DKernelEffect::kX_Direction:

+ imageIncrement[0] = 1.0f / texture.width();

+ break;

+ case Gr1DKernelEffect::kY_Direction:

+ imageIncrement[1] = ySign / texture.height();

+ break;

+ default:

+ SkFAIL("Unknown filter direction.");

+ }

+ pdman.set2fv(fImageIncrementUni, 1, imageIncrement);

+ if (conv.useBounds()) {

+ const float* bounds = conv.bounds();

+ if (Gr1DKernelEffect::kY_Direction == conv.direction() &&

+ texture.origin() != kTopLeft_GrSurfaceOrigin) {

+ pdman.set2f(fBoundsUni, 1.0f - bounds[1], 1.0f - bounds[0]);

+ } else {

+ pdman.set2f(fBoundsUni, bounds[0], bounds[1]);

+ }

+ pdman.set1fv(fKernelUni, this->width(), conv.kernel());

+void GrGLConvolutionEffect::GenKey(const GrProcessor& processor, const GrGLSLCaps&,

GrProcessorKeyBuilder* b) {

const GrConvolutionEffect& conv = processor.cast<GrConvolutionEffect>();

uint32_t key = conv.radius();

@@ -54,261 +152,6 @@

b->add32(key);

}

-void GrGLConvolutionEffect::getImageIncrement(const GrConvolutionEffect& conv,

- float (*imageIncrement)[2]) const {

- GrTexture& texture = *conv.texture(0);

- (*imageIncrement)[0] = (*imageIncrement)[1] = 0;

- float ySign = texture.origin() != kTopLeft_GrSurfaceOrigin ? 1.0f : -1.0f;

- switch (conv.direction()) {

- case Gr1DKernelEffect::kX_Direction:

- (*imageIncrement)[0] = 1.0f / texture.width();

- break;

- case Gr1DKernelEffect::kY_Direction:

- (*imageIncrement)[1] = ySign / texture.height();

- break;

- default:

- SkFAIL("Unknown filter direction.");

- }

-///////////////////////////////////////////////////////////////////////////////

-/**

- * Applies a convolution effect which restricts samples to the provided bounds

- * using shader logic.

- */

-class GrGLBoundedConvolutionEffect : public GrGLConvolutionEffect {

-public:

- GrGLBoundedConvolutionEffect(const GrProcessor& processor) : INHERITED(processor) {}

- virtual void emitCode(GrGLFPBuilder*,

- const GrFragmentProcessor&,

- const char* outputColor,

- const char* inputColor,

- const TransformedCoordsArray&,

- const TextureSamplerArray&) override;

- void setData(const GrGLProgramDataManager& pdman, const GrProcessor&) override;

-private:

- UniformHandle fKernelUni;

- UniformHandle fImageIncrementUni;

- UniformHandle fBoundsUni;

- typedef GrGLConvolutionEffect INHERITED;

-};

-void GrGLBoundedConvolutionEffect::emitCode(GrGLFPBuilder* builder,

- const GrFragmentProcessor& processor,

- const char* outputColor,

- const char* inputColor,

- const TransformedCoordsArray& coords,

- const TextureSamplerArray& samplers) {

- fImageIncrementUni =

- builder->addUniform(GrGLProgramBuilder::kFragment_Visibility, kVec2f_GrSLType,

- kDefault_GrSLPrecision, "ImageIncrement");

- fBoundsUni = builder->addUniform(GrGLProgramBuilder::kFragment_Visibility, kVec2f_GrSLType,

- kDefault_GrSLPrecision, "Bounds");

- fKernelUni = builder->addUniformArray(GrGLProgramBuilder::kFragment_Visibility, kFloat_GrSLType,

- kDefault_GrSLPrecision, "Kernel", this->width());

- GrGLFragmentBuilder* fsBuilder = builder->getFragmentShaderBuilder();

- SkString coords2D = fsBuilder->ensureFSCoords2D(coords, 0);

- fsBuilder->codeAppendf("%s = vec4(0, 0, 0, 0);\n", outputColor);

- int width = this->width();

- const GrGLShaderVar& kernel = builder->getUniformVariable(fKernelUni);

- const char* imgInc = builder->getUniformCStr(fImageIncrementUni);

- fsBuilder->codeAppendf("vec2 coord = %s - %d.0 * %s;\n", coords2D.c_str(), this->radius(),

- imgInc);

- // Manually unroll loop because some drivers don't; yields 20-30% speedup.

- for (int i = 0; i < width; i++) {

- SkString index;

- SkString kernelIndex;

- index.appendS32(i);

- kernel.appendArrayAccess(index.c_str(), &kernelIndex);

- // We used to compute a bool indicating whether we're in bounds or not, cast it to a

- // float, and then mul weight*texture_sample by the float. However, the Adreno 430 seems

- // to have a bug that caused corruption.

- const char* bounds = builder->getUniformCStr(fBoundsUni);

- const char* component = this->direction() == Gr1DKernelEffect::kY_Direction ? "y" : "x";

- fsBuilder->codeAppendf("if (coord.%s >= %s.x && coord.%s <= %s.y) {",

- component, bounds, component, bounds);

- fsBuilder->codeAppendf("%s += ", outputColor);

- fsBuilder->appendTextureLookup(samplers[0], "coord");

- fsBuilder->codeAppendf(" * %s;\n", kernelIndex.c_str());

- fsBuilder->codeAppend("}");

- fsBuilder->codeAppendf("coord += %s;\n", imgInc);

- }

- SkString modulate;

- GrGLSLMulVarBy4f(&modulate, outputColor, inputColor);

- fsBuilder->codeAppend(modulate.c_str());

-void GrGLBoundedConvolutionEffect::setData(const GrGLProgramDataManager& pdman,

- const GrProcessor& processor) {

- const GrConvolutionEffect& conv = processor.cast<GrConvolutionEffect>();

- // the code we generated was for a specific kernel radius

- SkASSERT(conv.radius() == this->radius());

- // the code we generated was for a specific bounding mode.

- SkASSERT(conv.useBounds());

- GrTexture& texture = *conv.texture(0);

- float imageIncrement[2];

- getImageIncrement(conv, &imageIncrement);

- pdman.set2fv(fImageIncrementUni, 1, imageIncrement);

- const float* bounds = conv.bounds();

- if (Gr1DKernelEffect::kY_Direction == conv.direction() &&

- texture.origin() != kTopLeft_GrSurfaceOrigin) {

- pdman.set2f(fBoundsUni, 1.0f - bounds[1], 1.0f - bounds[0]);

- } else {

- pdman.set2f(fBoundsUni, bounds[0], bounds[1]);

- }

- pdman.set1fv(fKernelUni, this->width(), conv.kernel());

-///////////////////////////////////////////////////////////////////////////////

-/**

- * Applies a convolution effect which applies the convolution using a linear

- * interpolation optimization to use half as many samples.

- */

-class GrGLLerpConvolutionEffect : public GrGLConvolutionEffect {

-public:

- GrGLLerpConvolutionEffect(const GrProcessor& processor) : INHERITED(processor) {}

- virtual void emitCode(GrGLFPBuilder*,

- const GrFragmentProcessor&,

- const char* outputColor,

- const char* inputColor,

- const TransformedCoordsArray&,

- const TextureSamplerArray&) override;

- void setData(const GrGLProgramDataManager& pdman, const GrProcessor&) override;

-private:

- int bilerpSampleCount() const;

- // Bounded uniforms

- UniformHandle fSampleWeightUni;

- UniformHandle fSampleOffsetUni;

- typedef GrGLConvolutionEffect INHERITED;

-};

-void GrGLLerpConvolutionEffect::emitCode(GrGLFPBuilder* builder,

- const GrFragmentProcessor& processor,

- const char* outputColor,

- const char* inputColor,

- const TransformedCoordsArray& coords,

- const TextureSamplerArray& samplers) {

- int sampleCount = bilerpSampleCount();

- // We use 2 * sampleCount uniforms. The maximum allowed by PS2.0 is 32, so

- // ensure we don't exceed this. Note that it is currently impossible to

- // exceed this as bilerpSampleCount = (kernelWidth + 1) / 2, and kernelWidth

- // maxes out at 25, resulting in a max sampleCount of 26.

- SkASSERT(sampleCount < 16);

- fSampleOffsetUni =

- builder->addUniformArray(GrGLProgramBuilder::kFragment_Visibility, kVec2f_GrSLType,

- kDefault_GrSLPrecision, "SampleOffset", sampleCount);

- fSampleWeightUni =

- builder->addUniformArray(GrGLProgramBuilder::kFragment_Visibility, kFloat_GrSLType,

- kDefault_GrSLPrecision, "SampleWeight", sampleCount);

- GrGLFragmentBuilder* fsBuilder = builder->getFragmentShaderBuilder();

- SkString coords2D = fsBuilder->ensureFSCoords2D(coords, 0);

- fsBuilder->codeAppendf("%s = vec4(0, 0, 0, 0);\n", outputColor);

- const GrGLShaderVar& kernel = builder->getUniformVariable(fSampleWeightUni);

- const GrGLShaderVar& imgInc = builder->getUniformVariable(fSampleOffsetUni);

- fsBuilder->codeAppendf("vec2 coord; \n");

- // Manually unroll loop because some drivers don't; yields 20-30% speedup.

- for (int i = 0; i < sampleCount; i++) {

- SkString index;

- SkString weightIndex;

- SkString offsetIndex;

- index.appendS32(i);

- kernel.appendArrayAccess(index.c_str(), &weightIndex);

- imgInc.appendArrayAccess(index.c_str(), &offsetIndex);

- fsBuilder->codeAppendf("coord = %s + %s;\n", coords2D.c_str(), offsetIndex.c_str());

- fsBuilder->codeAppendf("%s += ", outputColor);

- fsBuilder->appendTextureLookup(samplers[0], "coord");

- fsBuilder->codeAppendf(" * %s;\n", weightIndex.c_str());

- }

- SkString modulate;

- GrGLSLMulVarBy4f(&modulate, outputColor, inputColor);

- fsBuilder->codeAppend(modulate.c_str());

-void GrGLLerpConvolutionEffect::setData(const GrGLProgramDataManager& pdman,

- const GrProcessor& processor) {

- const GrConvolutionEffect& conv = processor.cast<GrConvolutionEffect>();

- // the code we generated was for a specific kernel radius

- SkASSERT(conv.radius() == this->radius());

- // the code we generated was for a specific bounding mode.

- SkASSERT(!conv.useBounds());

- int sampleCount = bilerpSampleCount();

- SkAutoTArray<float> imageIncrements(sampleCount * 2); // X and Y floats per sample.

- SkAutoTArray<float> kernel(sampleCount);

- float baseImageIncrement[2];

- getImageIncrement(conv, &baseImageIncrement);

- for (int i = 0; i < sampleCount; i++) {

- int sampleIndex1 = i * 2;

- int sampleIndex2 = sampleIndex1 + 1;

- // If we have an odd number of samples in our filter, the last sample won't use

- // the linear interpolation optimization (it will be pixel aligned).

- if (sampleIndex2 >= this->width()) {

- sampleIndex2 = sampleIndex1;

- }

- float kernelWeight1 = conv.kernel()[sampleIndex1];

- float kernelWeight2 = conv.kernel()[sampleIndex2];

- float totalKernelWeight =

- (sampleIndex1 == sampleIndex2) ? kernelWeight1 : (kernelWeight1 + kernelWeight2);

- float sampleRatio =

- (sampleIndex1 == sampleIndex2) ? 0 : kernelWeight2 / (kernelWeight1 + kernelWeight2);

- imageIncrements[i * 2] = (-this->radius() + i * 2 + sampleRatio) * baseImageIncrement[0];

- imageIncrements[i * 2 + 1] =

- (-this->radius() + i * 2 + sampleRatio) * baseImageIncrement[1];

- kernel[i] = totalKernelWeight;

- }

- pdman.set2fv(fSampleOffsetUni, sampleCount, imageIncrements.get());

- pdman.set1fv(fSampleWeightUni, sampleCount, kernel.get());

-int GrGLLerpConvolutionEffect::bilerpSampleCount() const {

- // We use a linear interpolation optimization to only sample once for each

- // two pixel aligned samples in the kernel. If we have an odd number of

- // samples, we will have to skip this optimization for the last sample.

- // Because of this we always round up our sample count (by adding 1 before

- // dividing).

- return (this->width() + 1) / 2;

///////////////////////////////////////////////////////////////////////////////

GrConvolutionEffect::GrConvolutionEffect(GrProcessorDataManager* procDataManager,

@@ -318,13 +161,7 @@

const float* kernel,

bool useBounds,

float bounds[2])

- : INHERITED(procDataManager,

- texture,

- direction,

- radius,

- useBounds ? GrTextureParams::FilterMode::kNone_FilterMode

- : GrTextureParams::FilterMode::kBilerp_FilterMode)

- , fUseBounds(useBounds) {

+ : INHERITED(procDataManager, texture, direction, radius), fUseBounds(useBounds) {

this->initClassID<GrConvolutionEffect>();

SkASSERT(radius <= kMaxKernelRadius);

SkASSERT(kernel);

@@ -342,13 +179,7 @@

float gaussianSigma,

bool useBounds,

float bounds[2])

- : INHERITED(procDataManager,

- texture,

- direction,

- radius,

- useBounds ? GrTextureParams::FilterMode::kNone_FilterMode

- : GrTextureParams::FilterMode::kBilerp_FilterMode)

- , fUseBounds(useBounds) {

+ : INHERITED(procDataManager, texture, direction, radius), fUseBounds(useBounds) {

this->initClassID<GrConvolutionEffect>();

SkASSERT(radius <= kMaxKernelRadius);

int width = this->width();

@@ -379,15 +210,7 @@

}

GrGLFragmentProcessor* GrConvolutionEffect::createGLInstance() const {

- // We support a linear interpolation optimization which (when feasible) uses

- // half the number of samples to apply the kernel. This is not always

- // applicable, as the linear interpolation optimization does not support

- // bounded sampling.

- if (this->useBounds()) {

- return SkNEW_ARGS(GrGLBoundedConvolutionEffect, (*this));

- } else {

- return SkNEW_ARGS(GrGLLerpConvolutionEffect, (*this));

- }

+ return SkNEW_ARGS(GrGLConvolutionEffect, (*this));

}

bool GrConvolutionEffect::onIsEqual(const GrFragmentProcessor& sBase) const {

« no previous file with comments | « src/gpu/effects/Gr1DKernelEffect.h ('k') | no next file » | no next file with comments »