Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(227)

Side by Side Diff: src/opts/SkMatrixConvolutionImageFilter_opts_SSE.cpp

Issue 1881903004: Rewriting MatrixConvolution image filter with SSE and AVX2 Base URL: https://skia.googlesource.com/skia@master
Patch Set: Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright 2016 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "SkColor.h"
9 #include "SkBitmap.h"
10 #include "SkPoint.h"
11 #include "SkColorPriv.h"
12
13
14 namespace sk_sse {
15
16 void SkMatrixConvolutionImageFilter_filterPixels_SSE(const SkBitmap& src,
17 SkBitmap* result,
18 const SkIRect& r,
19 const SkIRect& bounds,
20 bool convolveAlpha,
21 SkScalar* kernel,
22 const SkISize& kernelSize,
23 const SkIPoint& kernelOffs et,
24 SkScalar gain,
25 SkScalar bias)
26 {
27 SkIRect rect(r);
28 if (!rect.intersect(bounds)) {
29 return;
30 }
31 for (int y = rect.fTop; y < rect.fBottom; ++y) {
32 SkPMColor* dptr = result->getAddr32(rect.fLeft - bounds.fLeft, y - bound s.fTop);
33 for (int x = rect.fLeft; x < rect.fRight; ++x) {
34 __m128 psum = _mm_setzero_ps();
35 for (int cy = 0; cy < kernelSize.fHeight; cy++) {
36 for (int cx = 0; cx < kernelSize.fWidth; cx++) {
37 SkPMColor s = *src.getAddr32(
38 x + cx - kernelOffset.fX,
39 y + cy - kernelOffset.fY);
40 __m128 pss = _mm_cvtpu8_ps(_mm_set_pi32 (0, s));
41
42 __m128 pk = _mm_set1_ps(kernel[cy * kernelSize.fWidth + cx]) ;
43
44 psum = _mm_add_ps(psum, _mm_mul_ps(pss, pk));
45 }
46 }
47
48 union {
49 __m128 m128;
50 float f[4];
51 } conv = {psum};
52
53 SkScalar sumA, sumR, sumG, sumB;
54
55 sumA = conv.f[3];
56 sumR = conv.f[2];
57 sumG = conv.f[1];
58 sumB = conv.f[0];
59
60 int a = convolveAlpha
61 ? SkClampMax(SkScalarFloorToInt(SkScalarMul(sumA, gain) + bias), 255)
Stephen White 2016/04/12 20:01:59 Using Sk4/Sk4f might also allow us to benefit from
62 : 255;
63 int r = SkClampMax(SkScalarFloorToInt(SkScalarMul(sumR, gain) + bias ), a);
64 int g = SkClampMax(SkScalarFloorToInt(SkScalarMul(sumG, gain) + bias ), a);
65 int b = SkClampMax(SkScalarFloorToInt(SkScalarMul(sumB, gain) + bias ), a);
66 if (!convolveAlpha) {
67 a = SkGetPackedA32(*src.getAddr32(x, y));
68 *dptr++ = SkPreMultiplyARGB(a, r, g, b);
69 } else {
70 *dptr++ = SkPackARGB32(a, r, g, b);
71 }
72 }
73 }
74 }
75
76 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698