Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(86)

Side by Side Diff: src/opts/SkMatrixConvolutionImageFilter_opts_AVX2.cpp

Issue 1881903004: Rewriting MatrixConvolution image filter with SSE and AVX2 Base URL: https://skia.googlesource.com/skia@master
Patch Set: Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright 2016 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "SkColor.h"
9 #include "SkBitmap.h"
10 #include "SkPoint.h"
11 #include "SkColorPriv.h"
12
13 #include <immintrin.h>
14
15
16 namespace sk_avx2 {
17
18 void SkMatrixConvolutionImageFilter_filterPixels_AVX2(
19 const SkBitmap& src,
20 SkBitmap* result,
21 const SkIRect& r,
22 const SkIRect& bounds,
23 bool convolveAlpha,
24 SkScalar* kernel,
25 const SkISize& kernelSize,
26 const SkIPoint& kernelOffset,
27 SkScalar gain,
28 SkScalar bias)
29 {
30 SkIRect rect(r);
31 if (!rect.intersect(bounds)) {
32 return;
33 }
34
35 __m256i mask = _mm256_set_epi8(128,128,128,3, 128,128,128,2, 128,128,128,1, 128,128,128,0,
36 128,128,128,3, 128,128,128,2, 128,128,128,1, 128,128,128,0);
37 __m256i mask_odd = _mm256_set_epi8(128,128,128,128, 128,128,128,128, 128,128 ,128,128, 128,128,128,128,
38 128,128,128,3, 128,128,128,2, 128,128,128 ,1, 128,128,128,0);
39 int width_even = 2*(kernelSize.fWidth/2);
40
41 for (int y = rect.fTop; y < rect.fBottom; ++y) {
42 SkPMColor* dptr = result->getAddr32(rect.fLeft - bounds.fLeft, y - bound s.fTop);
43 for (int x = rect.fLeft; x < rect.fRight; ++x) {
44 __m256 psum = _mm256_setzero_ps();
45 for (int cy = 0; cy < kernelSize.fHeight; cy++) {
46 int cx;
47 for (cx = 0; cx < width_even ; cx+=2) {
48 SkPMColor s1 = *src.getAddr32(
49 x + cx - kernelOffset.fX,
50 y + cy - kernelOffset.fY);
51 SkPMColor s2 = *src.getAddr32(
52 x + cx - kernelOffset.fX + 1,
53 y + cy - kernelOffset.fY);
54 __m256i ps = _mm256_set_epi32(0, 0, 0, s2, 0, 0, 0, s1);
55 __m256i ps_sh = _mm256_shuffle_epi8(ps, mask);
56 __m256 pss = _mm256_cvtepi32_ps(ps_sh);
57
58 SkScalar k1 = kernel[cy * kernelSize.fWidth + cx];
59 SkScalar k2 = kernel[cy * kernelSize.fWidth + cx + 1];
60 __m256 pk = _mm256_set_ps(k2,k2,k2,k2, k1,k1,k1,k1);
61
62 __m256 pmul = _mm256_mul_ps(pss, pk);
63
64 psum = _mm256_add_ps(psum, pmul);
65 }
66 if (cx < kernelSize.fWidth) {
67 SkPMColor s1 = *src.getAddr32(
68 x + cx - kernelOffset.fX,
69 y + cy - kernelOffset.fY);
70
71 __m256i ps = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, s1);
72 __m256i ps_sh = _mm256_shuffle_epi8(ps, mask_odd);
73
74 SkScalar k = kernel[cy * kernelSize.fWidth + cx];
75 __m256 pk = _mm256_set1_ps(k);
76
77 __m256 pss = _mm256_cvtepi32_ps(ps_sh);
78 __m256 pmul = _mm256_mul_ps(pss, pk);
79
80 psum = _mm256_add_ps(psum, pmul);
81 }
82 }
83
84 union {
85 __m256 m256;
86 float f[8];
87 } conv = {psum};
88
89 SkScalar sumA, sumR, sumG, sumB;
90
91 sumA = conv.f[3] + conv.f[7];
92 sumR = conv.f[2] + conv.f[6];
93 sumG = conv.f[1] + conv.f[5];
94 sumB = conv.f[0] + conv.f[4];
95
96 int a = convolveAlpha
97 ? SkClampMax(SkScalarFloorToInt(SkScalarMul(sumA, gain) + bias), 255)
98 : 255;
99 int r = SkClampMax(SkScalarFloorToInt(SkScalarMul(sumR, gain) + bias ), a);
100 int g = SkClampMax(SkScalarFloorToInt(SkScalarMul(sumG, gain) + bias ), a);
101 int b = SkClampMax(SkScalarFloorToInt(SkScalarMul(sumB, gain) + bias ), a);
102 if (!convolveAlpha) {
103 a = SkGetPackedA32(*src.getAddr32(x, y));
104 *dptr++ = SkPreMultiplyARGB(a, r, g, b);
105 } else {
106 *dptr++ = SkPackARGB32(a, r, g, b);
107 }
108 }
109 }
110 }
111
112 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698