Index: src/gpu/effects/GrConvolutionEffect.cpp |
diff --git a/src/gpu/effects/GrConvolutionEffect.cpp b/src/gpu/effects/GrConvolutionEffect.cpp |
index 4f091384935f9c9199880d8151104162f6b4a6f6..33920ac7ba09217d7c7d9afd115580e4a8e7649b 100644 |
--- a/src/gpu/effects/GrConvolutionEffect.cpp |
+++ b/src/gpu/effects/GrConvolutionEffect.cpp |
@@ -14,13 +14,61 @@ |
// For brevity |
typedef GrGLSLProgramDataManager::UniformHandle UniformHandle; |
+/** |
+ * Base class with shared functionality for GrGLBoundedConvolutionEffect and |
+ * GrGLLerpConvolutionEffect. |
+ */ |
class GrGLConvolutionEffect : public GrGLSLFragmentProcessor { |
public: |
- void emitCode(EmitArgs&) override; |
- |
static inline void GenKey(const GrProcessor&, const GrGLSLCaps&, GrProcessorKeyBuilder*); |
protected: |
+ void getImageIncrement(const GrConvolutionEffect&, float (*)[2]) const; |
+ |
+private: |
+ typedef GrGLSLFragmentProcessor INHERITED; |
+}; |
+ |
+void GrGLConvolutionEffect::GenKey(const GrProcessor& processor, |
+ const GrGLSLCaps&, |
+ GrProcessorKeyBuilder* b) { |
+ const GrConvolutionEffect& ce = processor.cast<GrConvolutionEffect>(); |
+ uint32_t key = ce.radius(); |
+ key <<= 2; |
+ if (ce.useBounds()) { |
+ key |= 0x2; |
+ key |= GrConvolutionEffect::kY_Direction == ce.direction() ? 0x1 : 0x0; |
+ } |
+ b->add32(key); |
+} |
+ |
+void GrGLConvolutionEffect::getImageIncrement(const GrConvolutionEffect& ce, |
+ float (*imageIncrement)[2]) const { |
+ GrTexture& texture = *ce.texture(0); |
+ (*imageIncrement)[0] = (*imageIncrement)[1] = 0; |
+ float ySign = texture.origin() != kTopLeft_GrSurfaceOrigin ? 1.0f : -1.0f; |
+ switch (ce.direction()) { |
+ case Gr1DKernelEffect::kX_Direction: |
+ (*imageIncrement)[0] = 1.0f / texture.width(); |
+ break; |
+ case Gr1DKernelEffect::kY_Direction: |
+ (*imageIncrement)[1] = ySign / texture.height(); |
+ break; |
+ default: |
+ SkFAIL("Unknown filter direction."); |
+ } |
+} |
+ |
+/////////////////////////////////////////////////////////////////////////////// |
+ |
+/** |
+ * Applies a ceolution effect which restricts samples to the provided bounds |
+ * using shader logic. |
+ */ |
+class GrGLBoundedConvolutionEffect : public GrGLConvolutionEffect { |
+public: |
+ virtual void emitCode(EmitArgs&) override; |
+ |
void onSetData(const GrGLSLProgramDataManager& pdman, const GrProcessor&) override; |
private: |
@@ -28,66 +76,52 @@ private: |
UniformHandle fImageIncrementUni; |
UniformHandle fBoundsUni; |
- typedef GrGLSLFragmentProcessor INHERITED; |
+ typedef GrGLConvolutionEffect INHERITED; |
}; |
-void GrGLConvolutionEffect::emitCode(EmitArgs& args) { |
+void GrGLBoundedConvolutionEffect::emitCode(EmitArgs& args) { |
const GrConvolutionEffect& ce = args.fFp.cast<GrConvolutionEffect>(); |
- |
+ |
GrGLSLUniformHandler* uniformHandler = args.fUniformHandler; |
- fImageIncrementUni = uniformHandler->addUniform(kFragment_GrShaderFlag, |
- kVec2f_GrSLType, kDefault_GrSLPrecision, |
- "ImageIncrement"); |
- if (ce.useBounds()) { |
- fBoundsUni = uniformHandler->addUniform(kFragment_GrShaderFlag, |
- kVec2f_GrSLType, kDefault_GrSLPrecision, |
- "Bounds"); |
- } |
+ fImageIncrementUni = |
+ uniformHandler->addUniform(kFragment_GrShaderFlag, kVec2f_GrSLType, |
+ kDefault_GrSLPrecision, "ImageIncrement"); |
+ fBoundsUni = uniformHandler->addUniform(kFragment_GrShaderFlag, kVec2f_GrSLType, |
+ kDefault_GrSLPrecision, "Bounds"); |
int width = Gr1DKernelEffect::WidthFromRadius(ce.radius()); |
+ fKernelUni = uniformHandler->addUniformArray(kFragment_GrShaderFlag, kFloat_GrSLType, |
+ kDefault_GrSLPrecision, "Kernel", width); |
- int arrayCount = (width + 3) / 4; |
- SkASSERT(4 * arrayCount >= width); |
- |
- fKernelUni = uniformHandler->addUniformArray(kFragment_GrShaderFlag, |
- kVec4f_GrSLType, kDefault_GrSLPrecision, |
- "Kernel", arrayCount); |
- |
- GrGLSLFPFragmentBuilder* fragBuilder = args.fFragBuilder; |
+ GrGLSLFragmentBuilder* fragBuilder = args.fFragBuilder; |
SkString coords2D = fragBuilder->ensureFSCoords2D(args.fCoords, 0); |
- fragBuilder->codeAppendf("%s = vec4(0, 0, 0, 0);", args.fOutputColor); |
+ fragBuilder->codeAppendf("%s = vec4(0, 0, 0, 0);\n", args.fOutputColor); |
const GrGLSLShaderVar& kernel = uniformHandler->getUniformVariable(fKernelUni); |
const char* imgInc = uniformHandler->getUniformCStr(fImageIncrementUni); |
- fragBuilder->codeAppendf("vec2 coord = %s - %d.0 * %s;", coords2D.c_str(), ce.radius(), imgInc); |
+ fragBuilder->codeAppendf("vec2 coord = %s - %d.0 * %s;\n", coords2D.c_str(), ce.radius(), |
+ imgInc); |
// Manually unroll loop because some drivers don't; yields 20-30% speedup. |
- const char* kVecSuffix[4] = { ".x", ".y", ".z", ".w" }; |
for (int i = 0; i < width; i++) { |
SkString index; |
SkString kernelIndex; |
- index.appendS32(i/4); |
+ index.appendS32(i); |
kernel.appendArrayAccess(index.c_str(), &kernelIndex); |
- kernelIndex.append(kVecSuffix[i & 0x3]); |
- |
- if (ce.useBounds()) { |
- // We used to compute a bool indicating whether we're in bounds or not, cast it to a |
- // float, and then mul weight*texture_sample by the float. However, the Adreno 430 seems |
- // to have a bug that caused corruption. |
- const char* bounds = uniformHandler->getUniformCStr(fBoundsUni); |
- const char* component = ce.direction() == Gr1DKernelEffect::kY_Direction ? "y" : "x"; |
- fragBuilder->codeAppendf("if (coord.%s >= %s.x && coord.%s <= %s.y) {", |
- component, bounds, component, bounds); |
- } |
- fragBuilder->codeAppendf("\t\t%s += ", args.fOutputColor); |
+ // We used to compute a bool indicating whether we're in bounds or not, cast it to a |
+ // float, and then mul weight*texture_sample by the float. However, the Adreno 430 seems |
+ // to have a bug that caused corruption. |
+ const char* bounds = uniformHandler->getUniformCStr(fBoundsUni); |
+ const char* component = ce.direction() == Gr1DKernelEffect::kY_Direction ? "y" : "x"; |
+ fragBuilder->codeAppendf("if (coord.%s >= %s.x && coord.%s <= %s.y) {", |
+ component, bounds, component, bounds); |
+ fragBuilder->codeAppendf("%s += ", args.fOutputColor); |
fragBuilder->appendTextureLookup(args.fSamplers[0], "coord"); |
fragBuilder->codeAppendf(" * %s;\n", kernelIndex.c_str()); |
- if (ce.useBounds()) { |
- fragBuilder->codeAppend("}"); |
- } |
- fragBuilder->codeAppendf("\t\tcoord += %s;\n", imgInc); |
+ fragBuilder->codeAppend("}"); |
+ fragBuilder->codeAppendf("coord += %s;\n", imgInc); |
} |
SkString modulate; |
@@ -95,50 +129,154 @@ void GrGLConvolutionEffect::emitCode(EmitArgs& args) { |
fragBuilder->codeAppend(modulate.c_str()); |
} |
-void GrGLConvolutionEffect::onSetData(const GrGLSLProgramDataManager& pdman, |
- const GrProcessor& processor) { |
- const GrConvolutionEffect& conv = processor.cast<GrConvolutionEffect>(); |
- GrTexture& texture = *conv.texture(0); |
+void GrGLBoundedConvolutionEffect::onSetData(const GrGLSLProgramDataManager& pdman, |
+ const GrProcessor& processor) { |
+ const GrConvolutionEffect& ce = processor.cast<GrConvolutionEffect>(); |
- float imageIncrement[2] = { 0 }; |
- float ySign = texture.origin() != kTopLeft_GrSurfaceOrigin ? 1.0f : -1.0f; |
- switch (conv.direction()) { |
- case Gr1DKernelEffect::kX_Direction: |
- imageIncrement[0] = 1.0f / texture.width(); |
- break; |
- case Gr1DKernelEffect::kY_Direction: |
- imageIncrement[1] = ySign / texture.height(); |
- break; |
- default: |
- SkFAIL("Unknown filter direction."); |
- } |
+ // the code we generated was for a specific bounding mode. |
+ SkASSERT(ce.useBounds()); |
+ |
+ GrTexture& texture = *ce.texture(0); |
+ float imageIncrement[2]; |
+ getImageIncrement(ce, &imageIncrement); |
pdman.set2fv(fImageIncrementUni, 1, imageIncrement); |
- if (conv.useBounds()) { |
- const float* bounds = conv.bounds(); |
- if (Gr1DKernelEffect::kY_Direction == conv.direction() && |
- texture.origin() != kTopLeft_GrSurfaceOrigin) { |
- pdman.set2f(fBoundsUni, 1.0f - bounds[1], 1.0f - bounds[0]); |
- } else { |
- pdman.set2f(fBoundsUni, bounds[0], bounds[1]); |
- } |
+ const float* bounds = ce.bounds(); |
+ if (Gr1DKernelEffect::kY_Direction == ce.direction() && |
+ texture.origin() != kTopLeft_GrSurfaceOrigin) { |
+ pdman.set2f(fBoundsUni, 1.0f - bounds[1], 1.0f - bounds[0]); |
+ } else { |
+ pdman.set2f(fBoundsUni, bounds[0], bounds[1]); |
} |
- int width = Gr1DKernelEffect::WidthFromRadius(conv.radius()); |
- int arrayCount = (width + 3) / 4; |
- SkASSERT(4 * arrayCount >= width); |
- pdman.set4fv(fKernelUni, arrayCount, conv.kernel()); |
+ int width = Gr1DKernelEffect::WidthFromRadius(ce.radius()); |
+ pdman.set1fv(fKernelUni, width, ce.kernel()); |
} |
-void GrGLConvolutionEffect::GenKey(const GrProcessor& processor, const GrGLSLCaps&, |
- GrProcessorKeyBuilder* b) { |
- const GrConvolutionEffect& conv = processor.cast<GrConvolutionEffect>(); |
- uint32_t key = conv.radius(); |
- key <<= 2; |
- if (conv.useBounds()) { |
- key |= 0x2; |
- key |= GrConvolutionEffect::kY_Direction == conv.direction() ? 0x1 : 0x0; |
+/////////////////////////////////////////////////////////////////////////////// |
+ |
+/** |
+ * Applies a ceolution effect which applies the ceolution using a linear |
+ * interpolation optimization to use half as many samples. |
+ */ |
+class GrGLLerpConvolutionEffect : public GrGLConvolutionEffect { |
+public: |
+ virtual void emitCode(EmitArgs&) override; |
+ |
+ void onSetData(const GrGLSLProgramDataManager& pdman, const GrProcessor&) override; |
+ |
+private: |
+ int bilerpSampleCount(int width) const; |
+ |
+ // Bounded uniforms |
+ UniformHandle fSampleWeightUni; |
+ UniformHandle fSampleOffsetUni; |
+ |
+ typedef GrGLConvolutionEffect INHERITED; |
+}; |
+ |
+void GrGLLerpConvolutionEffect::emitCode(EmitArgs& args) { |
+ const GrConvolutionEffect& ce = args.fFp.cast<GrConvolutionEffect>(); |
+ |
+ int width = Gr1DKernelEffect::WidthFromRadius(ce.radius()); |
+ int sampleCount = bilerpSampleCount(width); |
+ // We use 2 * sampleCount uniforms. The maximum allowed by PS2.0 is 32, so |
+ // ensure we don't exceed this. Note that it is currently impossible to |
+ // exceed this as bilerpSampleCount = (kernelWidth + 1) / 2, and kernelWidth |
+ // maxes out at 25, resulting in a max sampleCount of 26. |
+ SkASSERT(sampleCount < 16); |
+ |
+ |
+ GrGLSLUniformHandler* uniformHandler = args.fUniformHandler; |
+ fSampleOffsetUni = |
+ uniformHandler->addUniformArray(kFragment_GrShaderFlag, kVec2f_GrSLType, |
+ kDefault_GrSLPrecision, "SampleOffset", sampleCount); |
+ fSampleWeightUni = |
+ uniformHandler->addUniformArray(kFragment_GrShaderFlag, kFloat_GrSLType, |
+ kDefault_GrSLPrecision, "SampleWeight", sampleCount); |
+ |
+ GrGLSLFragmentBuilder* fragBuilder = args.fFragBuilder; |
+ SkString coords2D = fragBuilder->ensureFSCoords2D(args.fCoords, 0); |
+ |
+ fragBuilder->codeAppendf("%s = vec4(0, 0, 0, 0);\n", args.fOutputColor); |
+ |
+ const GrGLSLShaderVar& kernel = uniformHandler->getUniformVariable(fSampleWeightUni); |
+ const GrGLSLShaderVar& imgInc = uniformHandler->getUniformVariable(fSampleOffsetUni); |
+ |
+ fragBuilder->codeAppendf("vec2 coord; \n"); |
+ |
+ // Manually unroll loop because some drivers don't; yields 20-30% speedup. |
+ for (int i = 0; i < sampleCount; i++) { |
+ SkString index; |
+ SkString weightIndex; |
+ SkString offsetIndex; |
+ index.appendS32(i); |
+ kernel.appendArrayAccess(index.c_str(), &weightIndex); |
+ imgInc.appendArrayAccess(index.c_str(), &offsetIndex); |
+ fragBuilder->codeAppendf("coord = %s + %s;\n", coords2D.c_str(), offsetIndex.c_str()); |
+ fragBuilder->codeAppendf("%s += ", args.fOutputColor); |
+ fragBuilder->appendTextureLookup(args.fSamplers[0], "coord"); |
+ fragBuilder->codeAppendf(" * %s;\n", weightIndex.c_str()); |
} |
- b->add32(key); |
+ |
+ SkString modulate; |
+ GrGLSLMulVarBy4f(&modulate, args.fOutputColor, args.fInputColor); |
+ fragBuilder->codeAppend(modulate.c_str()); |
+} |
+ |
+void GrGLLerpConvolutionEffect::onSetData(const GrGLSLProgramDataManager& pdman, |
+ const GrProcessor& processor) { |
+ const GrConvolutionEffect& ce = processor.cast<GrConvolutionEffect>(); |
+ |
+ // the code we generated was for a specific kernel radius |
+ SkASSERT(ce.radius() == this->radius()); |
+ |
+ // the code we generated was for a specific bounding mode. |
+ SkASSERT(!ce.useBounds()); |
+ |
+ int width = Gr1DKernelEffect::WidthFromRadius(ce.radius()); |
+ int sampleCount = bilerpSampleCount(width); |
+ SkAutoTArray<float> imageIncrements(sampleCount * 2); // X and Y floats per sample. |
+ SkAutoTArray<float> kernel(sampleCount); |
+ |
+ float baseImageIncrement[2]; |
+ getImageIncrement(ce, &baseImageIncrement); |
+ |
+ for (int i = 0; i < sampleCount; i++) { |
+ int sampleIndex1 = i * 2; |
+ int sampleIndex2 = sampleIndex1 + 1; |
+ |
+ // If we have an odd number of samples in our filter, the last sample won't use |
+ // the linear interpolation optimization (it will be pixel aligned). |
+ if (sampleIndex2 >= width) { |
+ sampleIndex2 = sampleIndex1; |
+ } |
+ |
+ float kernelWeight1 = ce.kernel()[sampleIndex1]; |
+ float kernelWeight2 = ce.kernel()[sampleIndex2]; |
+ |
+ float totalKernelWeight = |
+ (sampleIndex1 == sampleIndex2) ? kernelWeight1 : (kernelWeight1 + kernelWeight2); |
+ |
+ float sampleRatio = |
+ (sampleIndex1 == sampleIndex2) ? 0 : kernelWeight2 / (kernelWeight1 + kernelWeight2); |
+ |
+ imageIncrements[i * 2] = (-ce.radius() + i * 2 + sampleRatio) * baseImageIncrement[0]; |
+ imageIncrements[i * 2 + 1] = |
+ (-ce.radius() + i * 2 + sampleRatio) * baseImageIncrement[1]; |
+ |
+ kernel[i] = totalKernelWeight; |
+ } |
+ pdman.set2fv(fSampleOffsetUni, sampleCount, imageIncrements.get()); |
+ pdman.set1fv(fSampleWeightUni, sampleCount, kernel.get()); |
+} |
+ |
+int GrGLLerpConvolutionEffect::bilerpSampleCount(int width) const { |
+ // We use a linear interpolation optimization to only sample once for each |
+ // two pixel aligned samples in the kernel. If we have an odd number of |
+ // samples, we will have to skip this optimization for the last sample. |
+ // Because of this we always round up our sample count (by adding 1 before |
+ // dividing). |
+ return (width + 1) / 2; |
} |
/////////////////////////////////////////////////////////////////////////////// |
@@ -149,7 +287,12 @@ GrConvolutionEffect::GrConvolutionEffect(GrTexture* texture, |
const float* kernel, |
bool useBounds, |
float bounds[2]) |
- : INHERITED(texture, direction, radius), fUseBounds(useBounds) { |
+ : INHERITED(texture, |
+ direction, |
+ radius, |
+ useBounds ? GrTextureParams::FilterMode::kNone_FilterMode |
+ : GrTextureParams::FilterMode::kBilerp_FilterMode) |
+ , fUseBounds(useBounds) { |
this->initClassID<GrConvolutionEffect>(); |
SkASSERT(radius <= kMaxKernelRadius); |
SkASSERT(kernel); |
@@ -166,7 +309,12 @@ GrConvolutionEffect::GrConvolutionEffect(GrTexture* texture, |
float gaussianSigma, |
bool useBounds, |
float bounds[2]) |
- : INHERITED(texture, direction, radius), fUseBounds(useBounds) { |
+ : INHERITED(texture, |
+ direction, |
+ radius, |
+ useBounds ? GrTextureParams::FilterMode::kNone_FilterMode |
+ : GrTextureParams::FilterMode::kBilerp_FilterMode) |
+ , fUseBounds(useBounds) { |
this->initClassID<GrConvolutionEffect>(); |
SkASSERT(radius <= kMaxKernelRadius); |
int width = this->width(); |
@@ -192,12 +340,20 @@ GrConvolutionEffect::~GrConvolutionEffect() { |
} |
void GrConvolutionEffect::onGetGLSLProcessorKey(const GrGLSLCaps& caps, |
- GrProcessorKeyBuilder* b) const { |
+ GrProcessorKeyBuilder* b) const { |
GrGLConvolutionEffect::GenKey(*this, caps, b); |
} |
GrGLSLFragmentProcessor* GrConvolutionEffect::onCreateGLSLInstance() const { |
- return new GrGLConvolutionEffect; |
+ // We support a linear interpolation optimization which (when feasible) uses |
+ // half the number of samples to apply the kernel. This is not always |
+ // applicable, as the linear interpolation optimization does not support |
+ // bounded sampling. |
+ if (this->useBounds()) { |
+ return new GrGLBoundedConvolutionEffect; |
+ } else { |
+ return new GrGLLerpConvolutionEffect; |
+ } |
} |
bool GrConvolutionEffect::onIsEqual(const GrFragmentProcessor& sBase) const { |