Index: src/opts/SkMatrixConvolutionImageFilter_opts.h |
diff --git a/src/opts/SkMatrixConvolutionImageFilter_opts.h b/src/opts/SkMatrixConvolutionImageFilter_opts.h |
new file mode 100644 |
index 0000000000000000000000000000000000000000..c472e851295c8032c794ba5b167c1b7f1e747d4c |
--- /dev/null |
+++ b/src/opts/SkMatrixConvolutionImageFilter_opts.h |
@@ -0,0 +1,71 @@ |
+/* |
+ * Copyright 2016 Google Inc. |
+ * |
+ * Use of this source code is governed by a BSD-style license that can be |
+ * found in the LICENSE file. |
+ */ |
+ |
+#ifndef SkMatrixConvolutionImageFilter_opts_DEFINED |
+#define SkMatrixConvolutionImageFilter_opts_DEFINED |
+ |
+#include "SkScalar.h" |
+#include "SkSize.h" |
+#include "SkPoint.h" |
+ |
+ |
+namespace SK_OPTS_NS { |
+ |
+static void SkMatrixConvolutionImageFilter_filterPixels_none( |
+ const SkBitmap& src, |
+ SkBitmap* result, |
+ const SkIRect& r, |
+ const SkIRect& bounds, |
+ bool convolveAlpha, |
+ SkScalar* kernel, |
+ const SkISize& kernelSize, |
+ const SkIPoint& kernelOffset, |
+ SkScalar gain, |
+ SkScalar bias) |
+{ |
+ SkIRect rect(r); |
+ if (!rect.intersect(bounds)) { |
+ return; |
+ } |
+ for (int y = rect.fTop; y < rect.fBottom; ++y) { |
+ SkPMColor* dptr = result->getAddr32(rect.fLeft - bounds.fLeft, y - bounds.fTop); |
+ for (int x = rect.fLeft; x < rect.fRight; ++x) { |
+ SkScalar sumA = 0, sumR = 0, sumG = 0, sumB = 0; |
+ for (int cy = 0; cy < kernelSize.fHeight; cy++) { |
+ for (int cx = 0; cx < kernelSize.fWidth; cx++) { |
+ SkPMColor s = *src.getAddr32(x + cx - kernelOffset.fX, |
Stephen White
2016/04/12 20:01:58
Rather than maintain a no-opts and SSE2 flavour, a
|
+ y + cy - kernelOffset.fY); |
+ SkScalar k = kernel[cy * kernelSize.fWidth + cx]; |
+ |
+ // Calculate sumA in spite of convolveAlpha value. It's more |
+ // likely a compiler does SLP than if convolveAlpha is a |
+ // template argument. if alpha isn't really necessary |
+ // (convolveAlpha == false), case will be handled after the loop. |
+ sumA += SkScalarMul(SkIntToScalar(SkGetPackedA32(s)), k); |
+ sumR += SkScalarMul(SkIntToScalar(SkGetPackedR32(s)), k); |
+ sumG += SkScalarMul(SkIntToScalar(SkGetPackedG32(s)), k); |
+ sumB += SkScalarMul(SkIntToScalar(SkGetPackedB32(s)), k); |
+ } |
+ } |
+ int a = convolveAlpha |
+ ? SkClampMax(SkScalarFloorToInt(SkScalarMul(sumA, gain) + bias), 255) |
+ : 255; |
+ int r = SkClampMax(SkScalarFloorToInt(SkScalarMul(sumR, gain) + bias), a); |
+ int g = SkClampMax(SkScalarFloorToInt(SkScalarMul(sumG, gain) + bias), a); |
+ int b = SkClampMax(SkScalarFloorToInt(SkScalarMul(sumB, gain) + bias), a); |
+ if (!convolveAlpha) { |
+ a = SkGetPackedA32(*src.getAddr32(x, y)); |
+ *dptr++ = SkPreMultiplyARGB(a, r, g, b); |
+ } else { |
+ *dptr++ = SkPackARGB32(a, r, g, b); |
+ } |
+ } |
+ } |
+} |
+ |
+} |
+#endif |