src/opts/SkMatrixConvolutionImageFilter_opts_AVX2.cpp - Issue 1881903004: Rewriting MatrixConvolution image filter with SSE and AVX2

Side by Side Diff: src/opts/SkMatrixConvolutionImageFilter_opts_AVX2.cpp

Issue 1881903004: Rewriting MatrixConvolution image filter with SSE and AVX2 Base URL: https://skia.googlesource.com/skia@master

Patch Set: Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« src/opts/SkMatrixConvolutionImageFilter_opts.h ('K') | « src/opts/SkMatrixConvolutionImageFilter_opts_AVX2.h ('k') | src/opts/SkMatrixConvolutionImageFilter_opts_SSE.h » ('j') | src/opts/SkMatrixConvolutionImageFilter_opts_SSE.cpp » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 /*

	2 * Copyright 2016 Google Inc.

	3 *

	4 * Use of this source code is governed by a BSD-style license that can be

	5 * found in the LICENSE file.

	6 */

	7

	8 #include "SkColor.h"

	9 #include "SkBitmap.h"

	10 #include "SkPoint.h"

	11 #include "SkColorPriv.h"

	12

	13 #include <immintrin.h>

	14

	15

	16 namespace sk_avx2 {

	17

	18 void SkMatrixConvolutionImageFilter_filterPixels_AVX2(

	19 const SkBitmap& src,

	20 SkBitmap* result,

	21 const SkIRect& r,

	22 const SkIRect& bounds,

	23 bool convolveAlpha,

	24 SkScalar* kernel,

	25 const SkISize& kernelSize,

	26 const SkIPoint& kernelOffset,

	27 SkScalar gain,

	28 SkScalar bias)

	29 {

	30 SkIRect rect(r);

	31 if (!rect.intersect(bounds)) {

	32 return;

	33 }

	34

	35 __m256i mask = _mm256_set_epi8(128,128,128,3, 128,128,128,2, 128,128,128,1, 128,128,128,0,

	36 128,128,128,3, 128,128,128,2, 128,128,128,1, 128,128,128,0);

	37 __m256i mask_odd = _mm256_set_epi8(128,128,128,128, 128,128,128,128, 128,128 ,128,128, 128,128,128,128,

	38 128,128,128,3, 128,128,128,2, 128,128,128 ,1, 128,128,128,0);

	39 int width_even = 2*(kernelSize.fWidth/2);

	40

	41 for (int y = rect.fTop; y < rect.fBottom; ++y) {

	42 SkPMColor* dptr = result->getAddr32(rect.fLeft - bounds.fLeft, y - bound s.fTop);

	43 for (int x = rect.fLeft; x < rect.fRight; ++x) {

	44 __m256 psum = _mm256_setzero_ps();

	45 for (int cy = 0; cy < kernelSize.fHeight; cy++) {

	46 int cx;

	47 for (cx = 0; cx < width_even ; cx+=2) {

	48 SkPMColor s1 = *src.getAddr32(

	49 x + cx - kernelOffset.fX,

	50 y + cy - kernelOffset.fY);

	51 SkPMColor s2 = *src.getAddr32(

	52 x + cx - kernelOffset.fX + 1,

	53 y + cy - kernelOffset.fY);

	54 __m256i ps = _mm256_set_epi32(0, 0, 0, s2, 0, 0, 0, s1);

	55 __m256i ps_sh = _mm256_shuffle_epi8(ps, mask);

	56 __m256 pss = _mm256_cvtepi32_ps(ps_sh);

	57

	58 SkScalar k1 = kernel[cy * kernelSize.fWidth + cx];

	59 SkScalar k2 = kernel[cy * kernelSize.fWidth + cx + 1];

	60 __m256 pk = _mm256_set_ps(k2,k2,k2,k2, k1,k1,k1,k1);

	61

	62 __m256 pmul = _mm256_mul_ps(pss, pk);

	63

	64 psum = _mm256_add_ps(psum, pmul);

	65 }

	66 if (cx < kernelSize.fWidth) {

	67 SkPMColor s1 = *src.getAddr32(

	68 x + cx - kernelOffset.fX,

	69 y + cy - kernelOffset.fY);

	70

	71 __m256i ps = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, s1);

	72 __m256i ps_sh = _mm256_shuffle_epi8(ps, mask_odd);

	73

	74 SkScalar k = kernel[cy * kernelSize.fWidth + cx];

	75 __m256 pk = _mm256_set1_ps(k);

	76

	77 __m256 pss = _mm256_cvtepi32_ps(ps_sh);

	78 __m256 pmul = _mm256_mul_ps(pss, pk);

	79

	80 psum = _mm256_add_ps(psum, pmul);

	81 }

	82 }

	83

	84 union {

	85 __m256 m256;

	86 float f[8];

	87 } conv = {psum};

	88

	89 SkScalar sumA, sumR, sumG, sumB;

	90

	91 sumA = conv.f[3] + conv.f[7];

	92 sumR = conv.f[2] + conv.f[6];

	93 sumG = conv.f[1] + conv.f[5];

	94 sumB = conv.f[0] + conv.f[4];

	95

	96 int a = convolveAlpha

	97 ? SkClampMax(SkScalarFloorToInt(SkScalarMul(sumA, gain) + bias), 255)

	98 : 255;

	99 int r = SkClampMax(SkScalarFloorToInt(SkScalarMul(sumR, gain) + bias ), a);

	100 int g = SkClampMax(SkScalarFloorToInt(SkScalarMul(sumG, gain) + bias ), a);

	101 int b = SkClampMax(SkScalarFloorToInt(SkScalarMul(sumB, gain) + bias ), a);

	102 if (!convolveAlpha) {

	103 a = SkGetPackedA32(*src.getAddr32(x, y));

	104 *dptr++ = SkPreMultiplyARGB(a, r, g, b);

	105 } else {

	106 *dptr++ = SkPackARGB32(a, r, g, b);

	107 }

	108 }

	109 }

	110 }

	111

	112 }

OLD	NEW