Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(255)

Unified Diff: src/gpu/effects/GrConvolutionEffect.cpp

Issue 1216623003: Bilinear optimization for 1D convolution. Base URL: https://chromium.googlesource.com/skia.git@master
Patch Set: Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/gpu/effects/Gr1DKernelEffect.h ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/gpu/effects/GrConvolutionEffect.cpp
diff --git a/src/gpu/effects/GrConvolutionEffect.cpp b/src/gpu/effects/GrConvolutionEffect.cpp
index 4f091384935f9c9199880d8151104162f6b4a6f6..33920ac7ba09217d7c7d9afd115580e4a8e7649b 100644
--- a/src/gpu/effects/GrConvolutionEffect.cpp
+++ b/src/gpu/effects/GrConvolutionEffect.cpp
@@ -14,13 +14,61 @@
// For brevity
typedef GrGLSLProgramDataManager::UniformHandle UniformHandle;
+/**
+ * Base class with shared functionality for GrGLBoundedConvolutionEffect and
+ * GrGLLerpConvolutionEffect.
+ */
class GrGLConvolutionEffect : public GrGLSLFragmentProcessor {
public:
- void emitCode(EmitArgs&) override;
-
static inline void GenKey(const GrProcessor&, const GrGLSLCaps&, GrProcessorKeyBuilder*);
protected:
+ void getImageIncrement(const GrConvolutionEffect&, float (*)[2]) const;
+
+private:
+ typedef GrGLSLFragmentProcessor INHERITED;
+};
+
+void GrGLConvolutionEffect::GenKey(const GrProcessor& processor,
+ const GrGLSLCaps&,
+ GrProcessorKeyBuilder* b) {
+ const GrConvolutionEffect& ce = processor.cast<GrConvolutionEffect>();
+ uint32_t key = ce.radius();
+ key <<= 2;
+ if (ce.useBounds()) {
+ key |= 0x2;
+ key |= GrConvolutionEffect::kY_Direction == ce.direction() ? 0x1 : 0x0;
+ }
+ b->add32(key);
+}
+
+void GrGLConvolutionEffect::getImageIncrement(const GrConvolutionEffect& ce,
+ float (*imageIncrement)[2]) const {
+ GrTexture& texture = *ce.texture(0);
+ (*imageIncrement)[0] = (*imageIncrement)[1] = 0;
+ float ySign = texture.origin() != kTopLeft_GrSurfaceOrigin ? 1.0f : -1.0f;
+ switch (ce.direction()) {
+ case Gr1DKernelEffect::kX_Direction:
+ (*imageIncrement)[0] = 1.0f / texture.width();
+ break;
+ case Gr1DKernelEffect::kY_Direction:
+ (*imageIncrement)[1] = ySign / texture.height();
+ break;
+ default:
+ SkFAIL("Unknown filter direction.");
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+/**
+ * Applies a ceolution effect which restricts samples to the provided bounds
+ * using shader logic.
+ */
+class GrGLBoundedConvolutionEffect : public GrGLConvolutionEffect {
+public:
+ virtual void emitCode(EmitArgs&) override;
+
void onSetData(const GrGLSLProgramDataManager& pdman, const GrProcessor&) override;
private:
@@ -28,66 +76,52 @@ private:
UniformHandle fImageIncrementUni;
UniformHandle fBoundsUni;
- typedef GrGLSLFragmentProcessor INHERITED;
+ typedef GrGLConvolutionEffect INHERITED;
};
-void GrGLConvolutionEffect::emitCode(EmitArgs& args) {
+void GrGLBoundedConvolutionEffect::emitCode(EmitArgs& args) {
const GrConvolutionEffect& ce = args.fFp.cast<GrConvolutionEffect>();
-
+
GrGLSLUniformHandler* uniformHandler = args.fUniformHandler;
- fImageIncrementUni = uniformHandler->addUniform(kFragment_GrShaderFlag,
- kVec2f_GrSLType, kDefault_GrSLPrecision,
- "ImageIncrement");
- if (ce.useBounds()) {
- fBoundsUni = uniformHandler->addUniform(kFragment_GrShaderFlag,
- kVec2f_GrSLType, kDefault_GrSLPrecision,
- "Bounds");
- }
+ fImageIncrementUni =
+ uniformHandler->addUniform(kFragment_GrShaderFlag, kVec2f_GrSLType,
+ kDefault_GrSLPrecision, "ImageIncrement");
+ fBoundsUni = uniformHandler->addUniform(kFragment_GrShaderFlag, kVec2f_GrSLType,
+ kDefault_GrSLPrecision, "Bounds");
int width = Gr1DKernelEffect::WidthFromRadius(ce.radius());
+ fKernelUni = uniformHandler->addUniformArray(kFragment_GrShaderFlag, kFloat_GrSLType,
+ kDefault_GrSLPrecision, "Kernel", width);
- int arrayCount = (width + 3) / 4;
- SkASSERT(4 * arrayCount >= width);
-
- fKernelUni = uniformHandler->addUniformArray(kFragment_GrShaderFlag,
- kVec4f_GrSLType, kDefault_GrSLPrecision,
- "Kernel", arrayCount);
-
- GrGLSLFPFragmentBuilder* fragBuilder = args.fFragBuilder;
+ GrGLSLFragmentBuilder* fragBuilder = args.fFragBuilder;
SkString coords2D = fragBuilder->ensureFSCoords2D(args.fCoords, 0);
- fragBuilder->codeAppendf("%s = vec4(0, 0, 0, 0);", args.fOutputColor);
+ fragBuilder->codeAppendf("%s = vec4(0, 0, 0, 0);\n", args.fOutputColor);
const GrGLSLShaderVar& kernel = uniformHandler->getUniformVariable(fKernelUni);
const char* imgInc = uniformHandler->getUniformCStr(fImageIncrementUni);
- fragBuilder->codeAppendf("vec2 coord = %s - %d.0 * %s;", coords2D.c_str(), ce.radius(), imgInc);
+ fragBuilder->codeAppendf("vec2 coord = %s - %d.0 * %s;\n", coords2D.c_str(), ce.radius(),
+ imgInc);
// Manually unroll loop because some drivers don't; yields 20-30% speedup.
- const char* kVecSuffix[4] = { ".x", ".y", ".z", ".w" };
for (int i = 0; i < width; i++) {
SkString index;
SkString kernelIndex;
- index.appendS32(i/4);
+ index.appendS32(i);
kernel.appendArrayAccess(index.c_str(), &kernelIndex);
- kernelIndex.append(kVecSuffix[i & 0x3]);
-
- if (ce.useBounds()) {
- // We used to compute a bool indicating whether we're in bounds or not, cast it to a
- // float, and then mul weight*texture_sample by the float. However, the Adreno 430 seems
- // to have a bug that caused corruption.
- const char* bounds = uniformHandler->getUniformCStr(fBoundsUni);
- const char* component = ce.direction() == Gr1DKernelEffect::kY_Direction ? "y" : "x";
- fragBuilder->codeAppendf("if (coord.%s >= %s.x && coord.%s <= %s.y) {",
- component, bounds, component, bounds);
- }
- fragBuilder->codeAppendf("\t\t%s += ", args.fOutputColor);
+ // We used to compute a bool indicating whether we're in bounds or not, cast it to a
+ // float, and then mul weight*texture_sample by the float. However, the Adreno 430 seems
+ // to have a bug that caused corruption.
+ const char* bounds = uniformHandler->getUniformCStr(fBoundsUni);
+ const char* component = ce.direction() == Gr1DKernelEffect::kY_Direction ? "y" : "x";
+ fragBuilder->codeAppendf("if (coord.%s >= %s.x && coord.%s <= %s.y) {",
+ component, bounds, component, bounds);
+ fragBuilder->codeAppendf("%s += ", args.fOutputColor);
fragBuilder->appendTextureLookup(args.fSamplers[0], "coord");
fragBuilder->codeAppendf(" * %s;\n", kernelIndex.c_str());
- if (ce.useBounds()) {
- fragBuilder->codeAppend("}");
- }
- fragBuilder->codeAppendf("\t\tcoord += %s;\n", imgInc);
+ fragBuilder->codeAppend("}");
+ fragBuilder->codeAppendf("coord += %s;\n", imgInc);
}
SkString modulate;
@@ -95,50 +129,154 @@ void GrGLConvolutionEffect::emitCode(EmitArgs& args) {
fragBuilder->codeAppend(modulate.c_str());
}
-void GrGLConvolutionEffect::onSetData(const GrGLSLProgramDataManager& pdman,
- const GrProcessor& processor) {
- const GrConvolutionEffect& conv = processor.cast<GrConvolutionEffect>();
- GrTexture& texture = *conv.texture(0);
+void GrGLBoundedConvolutionEffect::onSetData(const GrGLSLProgramDataManager& pdman,
+ const GrProcessor& processor) {
+ const GrConvolutionEffect& ce = processor.cast<GrConvolutionEffect>();
- float imageIncrement[2] = { 0 };
- float ySign = texture.origin() != kTopLeft_GrSurfaceOrigin ? 1.0f : -1.0f;
- switch (conv.direction()) {
- case Gr1DKernelEffect::kX_Direction:
- imageIncrement[0] = 1.0f / texture.width();
- break;
- case Gr1DKernelEffect::kY_Direction:
- imageIncrement[1] = ySign / texture.height();
- break;
- default:
- SkFAIL("Unknown filter direction.");
- }
+ // the code we generated was for a specific bounding mode.
+ SkASSERT(ce.useBounds());
+
+ GrTexture& texture = *ce.texture(0);
+ float imageIncrement[2];
+ getImageIncrement(ce, &imageIncrement);
pdman.set2fv(fImageIncrementUni, 1, imageIncrement);
- if (conv.useBounds()) {
- const float* bounds = conv.bounds();
- if (Gr1DKernelEffect::kY_Direction == conv.direction() &&
- texture.origin() != kTopLeft_GrSurfaceOrigin) {
- pdman.set2f(fBoundsUni, 1.0f - bounds[1], 1.0f - bounds[0]);
- } else {
- pdman.set2f(fBoundsUni, bounds[0], bounds[1]);
- }
+ const float* bounds = ce.bounds();
+ if (Gr1DKernelEffect::kY_Direction == ce.direction() &&
+ texture.origin() != kTopLeft_GrSurfaceOrigin) {
+ pdman.set2f(fBoundsUni, 1.0f - bounds[1], 1.0f - bounds[0]);
+ } else {
+ pdman.set2f(fBoundsUni, bounds[0], bounds[1]);
}
- int width = Gr1DKernelEffect::WidthFromRadius(conv.radius());
- int arrayCount = (width + 3) / 4;
- SkASSERT(4 * arrayCount >= width);
- pdman.set4fv(fKernelUni, arrayCount, conv.kernel());
+ int width = Gr1DKernelEffect::WidthFromRadius(ce.radius());
+ pdman.set1fv(fKernelUni, width, ce.kernel());
}
-void GrGLConvolutionEffect::GenKey(const GrProcessor& processor, const GrGLSLCaps&,
- GrProcessorKeyBuilder* b) {
- const GrConvolutionEffect& conv = processor.cast<GrConvolutionEffect>();
- uint32_t key = conv.radius();
- key <<= 2;
- if (conv.useBounds()) {
- key |= 0x2;
- key |= GrConvolutionEffect::kY_Direction == conv.direction() ? 0x1 : 0x0;
+///////////////////////////////////////////////////////////////////////////////
+
+/**
+ * Applies a ceolution effect which applies the ceolution using a linear
+ * interpolation optimization to use half as many samples.
+ */
+class GrGLLerpConvolutionEffect : public GrGLConvolutionEffect {
+public:
+ virtual void emitCode(EmitArgs&) override;
+
+ void onSetData(const GrGLSLProgramDataManager& pdman, const GrProcessor&) override;
+
+private:
+ int bilerpSampleCount(int width) const;
+
+ // Bounded uniforms
+ UniformHandle fSampleWeightUni;
+ UniformHandle fSampleOffsetUni;
+
+ typedef GrGLConvolutionEffect INHERITED;
+};
+
+void GrGLLerpConvolutionEffect::emitCode(EmitArgs& args) {
+ const GrConvolutionEffect& ce = args.fFp.cast<GrConvolutionEffect>();
+
+ int width = Gr1DKernelEffect::WidthFromRadius(ce.radius());
+ int sampleCount = bilerpSampleCount(width);
+ // We use 2 * sampleCount uniforms. The maximum allowed by PS2.0 is 32, so
+ // ensure we don't exceed this. Note that it is currently impossible to
+ // exceed this as bilerpSampleCount = (kernelWidth + 1) / 2, and kernelWidth
+ // maxes out at 25, resulting in a max sampleCount of 26.
+ SkASSERT(sampleCount < 16);
+
+
+ GrGLSLUniformHandler* uniformHandler = args.fUniformHandler;
+ fSampleOffsetUni =
+ uniformHandler->addUniformArray(kFragment_GrShaderFlag, kVec2f_GrSLType,
+ kDefault_GrSLPrecision, "SampleOffset", sampleCount);
+ fSampleWeightUni =
+ uniformHandler->addUniformArray(kFragment_GrShaderFlag, kFloat_GrSLType,
+ kDefault_GrSLPrecision, "SampleWeight", sampleCount);
+
+ GrGLSLFragmentBuilder* fragBuilder = args.fFragBuilder;
+ SkString coords2D = fragBuilder->ensureFSCoords2D(args.fCoords, 0);
+
+ fragBuilder->codeAppendf("%s = vec4(0, 0, 0, 0);\n", args.fOutputColor);
+
+ const GrGLSLShaderVar& kernel = uniformHandler->getUniformVariable(fSampleWeightUni);
+ const GrGLSLShaderVar& imgInc = uniformHandler->getUniformVariable(fSampleOffsetUni);
+
+ fragBuilder->codeAppendf("vec2 coord; \n");
+
+ // Manually unroll loop because some drivers don't; yields 20-30% speedup.
+ for (int i = 0; i < sampleCount; i++) {
+ SkString index;
+ SkString weightIndex;
+ SkString offsetIndex;
+ index.appendS32(i);
+ kernel.appendArrayAccess(index.c_str(), &weightIndex);
+ imgInc.appendArrayAccess(index.c_str(), &offsetIndex);
+ fragBuilder->codeAppendf("coord = %s + %s;\n", coords2D.c_str(), offsetIndex.c_str());
+ fragBuilder->codeAppendf("%s += ", args.fOutputColor);
+ fragBuilder->appendTextureLookup(args.fSamplers[0], "coord");
+ fragBuilder->codeAppendf(" * %s;\n", weightIndex.c_str());
}
- b->add32(key);
+
+ SkString modulate;
+ GrGLSLMulVarBy4f(&modulate, args.fOutputColor, args.fInputColor);
+ fragBuilder->codeAppend(modulate.c_str());
+}
+
+void GrGLLerpConvolutionEffect::onSetData(const GrGLSLProgramDataManager& pdman,
+ const GrProcessor& processor) {
+ const GrConvolutionEffect& ce = processor.cast<GrConvolutionEffect>();
+
+ // the code we generated was for a specific kernel radius
+ SkASSERT(ce.radius() == this->radius());
+
+ // the code we generated was for a specific bounding mode.
+ SkASSERT(!ce.useBounds());
+
+ int width = Gr1DKernelEffect::WidthFromRadius(ce.radius());
+ int sampleCount = bilerpSampleCount(width);
+ SkAutoTArray<float> imageIncrements(sampleCount * 2); // X and Y floats per sample.
+ SkAutoTArray<float> kernel(sampleCount);
+
+ float baseImageIncrement[2];
+ getImageIncrement(ce, &baseImageIncrement);
+
+ for (int i = 0; i < sampleCount; i++) {
+ int sampleIndex1 = i * 2;
+ int sampleIndex2 = sampleIndex1 + 1;
+
+ // If we have an odd number of samples in our filter, the last sample won't use
+ // the linear interpolation optimization (it will be pixel aligned).
+ if (sampleIndex2 >= width) {
+ sampleIndex2 = sampleIndex1;
+ }
+
+ float kernelWeight1 = ce.kernel()[sampleIndex1];
+ float kernelWeight2 = ce.kernel()[sampleIndex2];
+
+ float totalKernelWeight =
+ (sampleIndex1 == sampleIndex2) ? kernelWeight1 : (kernelWeight1 + kernelWeight2);
+
+ float sampleRatio =
+ (sampleIndex1 == sampleIndex2) ? 0 : kernelWeight2 / (kernelWeight1 + kernelWeight2);
+
+ imageIncrements[i * 2] = (-ce.radius() + i * 2 + sampleRatio) * baseImageIncrement[0];
+ imageIncrements[i * 2 + 1] =
+ (-ce.radius() + i * 2 + sampleRatio) * baseImageIncrement[1];
+
+ kernel[i] = totalKernelWeight;
+ }
+ pdman.set2fv(fSampleOffsetUni, sampleCount, imageIncrements.get());
+ pdman.set1fv(fSampleWeightUni, sampleCount, kernel.get());
+}
+
+int GrGLLerpConvolutionEffect::bilerpSampleCount(int width) const {
+ // We use a linear interpolation optimization to only sample once for each
+ // two pixel aligned samples in the kernel. If we have an odd number of
+ // samples, we will have to skip this optimization for the last sample.
+ // Because of this we always round up our sample count (by adding 1 before
+ // dividing).
+ return (width + 1) / 2;
}
///////////////////////////////////////////////////////////////////////////////
@@ -149,7 +287,12 @@ GrConvolutionEffect::GrConvolutionEffect(GrTexture* texture,
const float* kernel,
bool useBounds,
float bounds[2])
- : INHERITED(texture, direction, radius), fUseBounds(useBounds) {
+ : INHERITED(texture,
+ direction,
+ radius,
+ useBounds ? GrTextureParams::FilterMode::kNone_FilterMode
+ : GrTextureParams::FilterMode::kBilerp_FilterMode)
+ , fUseBounds(useBounds) {
this->initClassID<GrConvolutionEffect>();
SkASSERT(radius <= kMaxKernelRadius);
SkASSERT(kernel);
@@ -166,7 +309,12 @@ GrConvolutionEffect::GrConvolutionEffect(GrTexture* texture,
float gaussianSigma,
bool useBounds,
float bounds[2])
- : INHERITED(texture, direction, radius), fUseBounds(useBounds) {
+ : INHERITED(texture,
+ direction,
+ radius,
+ useBounds ? GrTextureParams::FilterMode::kNone_FilterMode
+ : GrTextureParams::FilterMode::kBilerp_FilterMode)
+ , fUseBounds(useBounds) {
this->initClassID<GrConvolutionEffect>();
SkASSERT(radius <= kMaxKernelRadius);
int width = this->width();
@@ -192,12 +340,20 @@ GrConvolutionEffect::~GrConvolutionEffect() {
}
void GrConvolutionEffect::onGetGLSLProcessorKey(const GrGLSLCaps& caps,
- GrProcessorKeyBuilder* b) const {
+ GrProcessorKeyBuilder* b) const {
GrGLConvolutionEffect::GenKey(*this, caps, b);
}
GrGLSLFragmentProcessor* GrConvolutionEffect::onCreateGLSLInstance() const {
- return new GrGLConvolutionEffect;
+ // We support a linear interpolation optimization which (when feasible) uses
+ // half the number of samples to apply the kernel. This is not always
+ // applicable, as the linear interpolation optimization does not support
+ // bounded sampling.
+ if (this->useBounds()) {
+ return new GrGLBoundedConvolutionEffect;
+ } else {
+ return new GrGLLerpConvolutionEffect;
+ }
}
bool GrConvolutionEffect::onIsEqual(const GrFragmentProcessor& sBase) const {
« no previous file with comments | « src/gpu/effects/Gr1DKernelEffect.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698