Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(98)

Unified Diff: src/opts/SkMatrixConvolutionImageFilter_opts_AVX2.cpp

Issue 1881903004: Rewriting MatrixConvolution image filter with SSE and AVX2 Base URL: https://skia.googlesource.com/skia@master
Patch Set: Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/opts/SkMatrixConvolutionImageFilter_opts_AVX2.cpp
diff --git a/src/opts/SkMatrixConvolutionImageFilter_opts_AVX2.cpp b/src/opts/SkMatrixConvolutionImageFilter_opts_AVX2.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..53ce2ff8cc8e43de80b23e2bd24827eff6e4cb8d
--- /dev/null
+++ b/src/opts/SkMatrixConvolutionImageFilter_opts_AVX2.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright 2016 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "SkColor.h"
+#include "SkBitmap.h"
+#include "SkPoint.h"
+#include "SkColorPriv.h"
+
+#include <immintrin.h>
+
+
+namespace sk_avx2 {
+
+void SkMatrixConvolutionImageFilter_filterPixels_AVX2(
+ const SkBitmap& src,
+ SkBitmap* result,
+ const SkIRect& r,
+ const SkIRect& bounds,
+ bool convolveAlpha,
+ SkScalar* kernel,
+ const SkISize& kernelSize,
+ const SkIPoint& kernelOffset,
+ SkScalar gain,
+ SkScalar bias)
+{
+ SkIRect rect(r);
+ if (!rect.intersect(bounds)) {
+ return;
+ }
+
+ __m256i mask = _mm256_set_epi8(128,128,128,3, 128,128,128,2, 128,128,128,1, 128,128,128,0,
+ 128,128,128,3, 128,128,128,2, 128,128,128,1, 128,128,128,0);
+ __m256i mask_odd = _mm256_set_epi8(128,128,128,128, 128,128,128,128, 128,128,128,128, 128,128,128,128,
+ 128,128,128,3, 128,128,128,2, 128,128,128,1, 128,128,128,0);
+ int width_even = 2*(kernelSize.fWidth/2);
+
+ for (int y = rect.fTop; y < rect.fBottom; ++y) {
+ SkPMColor* dptr = result->getAddr32(rect.fLeft - bounds.fLeft, y - bounds.fTop);
+ for (int x = rect.fLeft; x < rect.fRight; ++x) {
+ __m256 psum = _mm256_setzero_ps();
+ for (int cy = 0; cy < kernelSize.fHeight; cy++) {
+ int cx;
+ for (cx = 0; cx < width_even ; cx+=2) {
+ SkPMColor s1 = *src.getAddr32(
+ x + cx - kernelOffset.fX,
+ y + cy - kernelOffset.fY);
+ SkPMColor s2 = *src.getAddr32(
+ x + cx - kernelOffset.fX + 1,
+ y + cy - kernelOffset.fY);
+ __m256i ps = _mm256_set_epi32(0, 0, 0, s2, 0, 0, 0, s1);
+ __m256i ps_sh = _mm256_shuffle_epi8(ps, mask);
+ __m256 pss = _mm256_cvtepi32_ps(ps_sh);
+
+ SkScalar k1 = kernel[cy * kernelSize.fWidth + cx];
+ SkScalar k2 = kernel[cy * kernelSize.fWidth + cx + 1];
+ __m256 pk = _mm256_set_ps(k2,k2,k2,k2, k1,k1,k1,k1);
+
+ __m256 pmul = _mm256_mul_ps(pss, pk);
+
+ psum = _mm256_add_ps(psum, pmul);
+ }
+ if (cx < kernelSize.fWidth) {
+ SkPMColor s1 = *src.getAddr32(
+ x + cx - kernelOffset.fX,
+ y + cy - kernelOffset.fY);
+
+ __m256i ps = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, s1);
+ __m256i ps_sh = _mm256_shuffle_epi8(ps, mask_odd);
+
+ SkScalar k = kernel[cy * kernelSize.fWidth + cx];
+ __m256 pk = _mm256_set1_ps(k);
+
+ __m256 pss = _mm256_cvtepi32_ps(ps_sh);
+ __m256 pmul = _mm256_mul_ps(pss, pk);
+
+ psum = _mm256_add_ps(psum, pmul);
+ }
+ }
+
+ union {
+ __m256 m256;
+ float f[8];
+ } conv = {psum};
+
+ SkScalar sumA, sumR, sumG, sumB;
+
+ sumA = conv.f[3] + conv.f[7];
+ sumR = conv.f[2] + conv.f[6];
+ sumG = conv.f[1] + conv.f[5];
+ sumB = conv.f[0] + conv.f[4];
+
+ int a = convolveAlpha
+ ? SkClampMax(SkScalarFloorToInt(SkScalarMul(sumA, gain) + bias), 255)
+ : 255;
+ int r = SkClampMax(SkScalarFloorToInt(SkScalarMul(sumR, gain) + bias), a);
+ int g = SkClampMax(SkScalarFloorToInt(SkScalarMul(sumG, gain) + bias), a);
+ int b = SkClampMax(SkScalarFloorToInt(SkScalarMul(sumB, gain) + bias), a);
+ if (!convolveAlpha) {
+ a = SkGetPackedA32(*src.getAddr32(x, y));
+ *dptr++ = SkPreMultiplyARGB(a, r, g, b);
+ } else {
+ *dptr++ = SkPackARGB32(a, r, g, b);
+ }
+ }
+ }
+}
+
+}

Powered by Google App Engine
This is Rietveld 408576698