src/opts/SkMatrixConvolutionImageFilter_opts.h - Issue 1881903004: Rewriting MatrixConvolution image filter with SSE and AVX2

Side by Side Diff

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Keyboard Shortcuts

	File
u :	up to issue
j / k :	jump to file after / before current file
J / K :	jump to next file with a comment after / before current file
	Side-by-side diff
i :	toggle intra-line diffs
e :	expand all comments
c :	collapse all comments
s :	toggle showing all comments
n / p :	next / previous diff chunk or comment
N / P :	next / previous comment
<Up> / <Down> :	next / previous line

	Issue
u :	up to list of issues
j / k :	jump to patch after / before current patch
o / <Enter> :	open current patch in side-by-side view
i :	open current patch in unified diff view

	Issue List
j / k :	jump to issue after / before current issue
o / <Enter> :	open current issue

Side by Side Diff: src/opts/SkMatrixConvolutionImageFilter_opts.h

Issue 1881903004: Rewriting MatrixConvolution image filter with SSE and AVX2 Base URL: https://skia.googlesource.com/skia@master

Patch Set: Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« src/effects/SkMatrixConvolutionImageFilter.cpp ('K') | « src/effects/SkMatrixConvolutionImageFilter.cpp ('k') | src/opts/SkMatrixConvolutionImageFilter_opts_AVX2.h » ('j') | src/opts/SkMatrixConvolutionImageFilter_opts_SSE.cpp » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 /*

	2 * Copyright 2016 Google Inc.

	3 *

	4 * Use of this source code is governed by a BSD-style license that can be

	5 * found in the LICENSE file.

	6 */

	7

	8 #ifndef SkMatrixConvolutionImageFilter_opts_DEFINED

	9 #define SkMatrixConvolutionImageFilter_opts_DEFINED

	10

	11 #include "SkScalar.h"

	12 #include "SkSize.h"

	13 #include "SkPoint.h"

	14

	15

	16 namespace SK_OPTS_NS {

	17

	18 static void SkMatrixConvolutionImageFilter_filterPixels_none(

	19 const SkBitmap& src,

	20 SkBitmap* result,

	21 const SkIRect& r,

	22 const SkIRect& bounds,

	23 bool convolveAlpha,

	24 SkScalar* kernel,

	25 const SkISize& kernelSize,

	26 const SkIPoint& kernelOffset,

	27 SkScalar gain,

	28 SkScalar bias)

	29 {

	30 SkIRect rect(r);

	31 if (!rect.intersect(bounds)) {

	32 return;

	33 }

	34 for (int y = rect.fTop; y < rect.fBottom; ++y) {

	35 SkPMColor* dptr = result->getAddr32(rect.fLeft - bounds.fLeft, y - bound s.fTop);

	36 for (int x = rect.fLeft; x < rect.fRight; ++x) {

	37 SkScalar sumA = 0, sumR = 0, sumG = 0, sumB = 0;

	38 for (int cy = 0; cy < kernelSize.fHeight; cy++) {

	39 for (int cx = 0; cx < kernelSize.fWidth; cx++) {

	40 SkPMColor s = *src.getAddr32(x + cx - kernelOffset.fX,
	Stephen White 2016/04/12 20:01:58 Rather than maintain a no-opts and SSE2 flavour, a Rather than maintain a no-opts and SSE2 flavour, another way to do this might be to use the Sk4f classes, which give SSE2 and NEON automatically. The Sk4f version could be done directly in SkMatrixConvolutionImageFilter, and the AVX2 version could be switched at runtime.
	41 y + cy - kernelOffset.fY);

	42 SkScalar k = kernel[cy * kernelSize.fWidth + cx];

	43

	44 // Calculate sumA in spite of convolveAlpha value. It's more

	45 // likely a compiler does SLP than if convolveAlpha is a

	46 // template argument. if alpha isn't really necessary

	47 // (convolveAlpha == false), case will be handled after the loop.

	48 sumA += SkScalarMul(SkIntToScalar(SkGetPackedA32(s)), k);

	49 sumR += SkScalarMul(SkIntToScalar(SkGetPackedR32(s)), k);

	50 sumG += SkScalarMul(SkIntToScalar(SkGetPackedG32(s)), k);

	51 sumB += SkScalarMul(SkIntToScalar(SkGetPackedB32(s)), k);

	52 }

	53 }

	54 int a = convolveAlpha

	55 ? SkClampMax(SkScalarFloorToInt(SkScalarMul(sumA, gain) + bias ), 255)

	56 : 255;

	57 int r = SkClampMax(SkScalarFloorToInt(SkScalarMul(sumR, gain) + bias ), a);

	58 int g = SkClampMax(SkScalarFloorToInt(SkScalarMul(sumG, gain) + bias ), a);

	59 int b = SkClampMax(SkScalarFloorToInt(SkScalarMul(sumB, gain) + bias ), a);

	60 if (!convolveAlpha) {

	61 a = SkGetPackedA32(*src.getAddr32(x, y));

	62 *dptr++ = SkPreMultiplyARGB(a, r, g, b);

	63 } else {

	64 *dptr++ = SkPackARGB32(a, r, g, b);

	65 }

	66 }

	67 }

	68 }

	69

	70 }

	71 #endif

OLD	NEW