src/opts/SkMatrixConvolutionImageFilter_opts_SSE.cpp - Issue 1881903004: Rewriting MatrixConvolution image filter with SSE and AVX2

Side by Side Diff

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Keyboard Shortcuts

	File
u :	up to issue
j / k :	jump to file after / before current file
J / K :	jump to next file with a comment after / before current file
	Side-by-side diff
i :	toggle intra-line diffs
e :	expand all comments
c :	collapse all comments
s :	toggle showing all comments
n / p :	next / previous diff chunk or comment
N / P :	next / previous comment
<Up> / <Down> :	next / previous line

	Issue
u :	up to list of issues
j / k :	jump to patch after / before current patch
o / <Enter> :	open current patch in side-by-side view
i :	open current patch in unified diff view

	Issue List
j / k :	jump to issue after / before current issue
o / <Enter> :	open current issue

Side by Side Diff: src/opts/SkMatrixConvolutionImageFilter_opts_SSE.cpp

Issue 1881903004: Rewriting MatrixConvolution image filter with SSE and AVX2 Base URL: https://skia.googlesource.com/skia@master

Patch Set: Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 /*

	2 * Copyright 2016 Google Inc.

	3 *

	4 * Use of this source code is governed by a BSD-style license that can be

	5 * found in the LICENSE file.

	6 */

	7

	8 #include "SkColor.h"

	9 #include "SkBitmap.h"

	10 #include "SkPoint.h"

	11 #include "SkColorPriv.h"

	12

	13

	14 namespace sk_sse {

	15

	16 void SkMatrixConvolutionImageFilter_filterPixels_SSE(const SkBitmap& src,

	17 SkBitmap* result,

	18 const SkIRect& r,

	19 const SkIRect& bounds,

	20 bool convolveAlpha,

	21 SkScalar* kernel,

	22 const SkISize& kernelSize,

	23 const SkIPoint& kernelOffs et,

	24 SkScalar gain,

	25 SkScalar bias)

	26 {

	27 SkIRect rect(r);

	28 if (!rect.intersect(bounds)) {

	29 return;

	30 }

	31 for (int y = rect.fTop; y < rect.fBottom; ++y) {

	32 SkPMColor* dptr = result->getAddr32(rect.fLeft - bounds.fLeft, y - bound s.fTop);

	33 for (int x = rect.fLeft; x < rect.fRight; ++x) {

	34 __m128 psum = _mm_setzero_ps();

	35 for (int cy = 0; cy < kernelSize.fHeight; cy++) {

	36 for (int cx = 0; cx < kernelSize.fWidth; cx++) {

	37 SkPMColor s = *src.getAddr32(

	38 x + cx - kernelOffset.fX,

	39 y + cy - kernelOffset.fY);

	40 __m128 pss = _mm_cvtpu8_ps(_mm_set_pi32 (0, s));

	41

	42 __m128 pk = _mm_set1_ps(kernel[cy * kernelSize.fWidth + cx]) ;

	43

	44 psum = _mm_add_ps(psum, _mm_mul_ps(pss, pk));

	45 }

	46 }

	47

	48 union {

	49 __m128 m128;

	50 float f[4];

	51 } conv = {psum};

	52

	53 SkScalar sumA, sumR, sumG, sumB;

	54

	55 sumA = conv.f[3];

	56 sumR = conv.f[2];

	57 sumG = conv.f[1];

	58 sumB = conv.f[0];

	59

	60 int a = convolveAlpha

	61 ? SkClampMax(SkScalarFloorToInt(SkScalarMul(sumA, gain) + bias), 255)
	Stephen White 2016/04/12 20:01:59 Using Sk4/Sk4f might also allow us to benefit from Using Sk4/Sk4f might also allow us to benefit from SSE2 integer ops here, instead of resorting to scalar.
	62 : 255;

	63 int r = SkClampMax(SkScalarFloorToInt(SkScalarMul(sumR, gain) + bias ), a);

	64 int g = SkClampMax(SkScalarFloorToInt(SkScalarMul(sumG, gain) + bias ), a);

	65 int b = SkClampMax(SkScalarFloorToInt(SkScalarMul(sumB, gain) + bias ), a);

	66 if (!convolveAlpha) {

	67 a = SkGetPackedA32(*src.getAddr32(x, y));

	68 *dptr++ = SkPreMultiplyARGB(a, r, g, b);

	69 } else {

	70 *dptr++ = SkPackARGB32(a, r, g, b);

	71 }

	72 }

	73 }

	74 }

	75

	76 }

OLD	NEW

« src/opts/SkMatrixConvolutionImageFilter_opts.h ('K') | « src/opts/SkMatrixConvolutionImageFilter_opts_SSE.h ('k') | src/opts/SkOpts_avx2.cpp » ('j') | no next file with comments »