src/opts/SkNx_sse.h - Issue 2480823002: skrpb: evaluate color filters for constant shaders once.

Keyboard Shortcuts

	File
u :	up to issue
j / k :	jump to file after / before current file
J / K :	jump to next file with a comment after / before current file
	Side-by-side diff
i :	toggle intra-line diffs
e :	expand all comments
c :	collapse all comments
s :	toggle showing all comments
n / p :	next / previous diff chunk or comment
N / P :	next / previous comment
<Up> / <Down> :	next / previous line

	Issue
u :	up to list of issues
j / k :	jump to patch after / before current patch
o / <Enter> :	open current patch in side-by-side view
i :	open current patch in unified diff view

	Issue List
j / k :	jump to issue after / before current issue
o / <Enter> :	open current issue

Unified Diff: src/opts/SkNx_sse.h

Issue 2480823002: skrpb: evaluate color filters for constant shaders once. (Closed)

Patch Set: Created 4 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/opts/SkNx_sse.h

diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h

index a4594115e0ecff4e4223606da03c4ccba8fcbcd8..a4783c6302eec19f2231e852ced78ebdb67af588 100644

--- a/src/opts/SkNx_sse.h

+++ b/src/opts/SkNx_sse.h

@@ -544,6 +544,14 @@ public:

__m256i fVec;

};

+ // _mm256_unpack{lo,hi}_pd() auto-casting to and from __m256d.

+ AI static __m256 unpacklo_pd(__m256 x, __m256 y) {

+ return _mm256_castpd_ps(_mm256_unpacklo_pd(_mm256_castps_pd(x), _mm256_castps_pd(y)));

+ }

+ AI static __m256 unpackhi_pd(__m256 x, __m256 y) {

+ return _mm256_castpd_ps(_mm256_unpackhi_pd(_mm256_castps_pd(x), _mm256_castps_pd(y)));

+ }

template <>

class SkNx<8, float> {

public:

@@ -560,6 +568,29 @@ public:

AI static SkNx Load(const void* ptr) { return _mm256_loadu_ps((const float*)ptr); }

AI void store(void* ptr) const { _mm256_storeu_ps((float*)ptr, fVec); }

+ AI static void Store4(void* ptr,

+ const SkNx& r, const SkNx& g, const SkNx& b, const SkNx& a) {

+ __m256 rg0145 = _mm256_unpacklo_ps(r.fVec, g.fVec), // r0 g0 r1 g1 | r4 g4 r5 g5

+ rg2367 = _mm256_unpackhi_ps(r.fVec, g.fVec), // r2 ... | r6 ...

+ ba0145 = _mm256_unpacklo_ps(b.fVec, a.fVec), // b0 a0 b1 a1 | b4 a4 b5 a5

+ ba2367 = _mm256_unpackhi_ps(b.fVec, a.fVec); // b2 ... | b6 ...

+ __m256 _04 = unpacklo_pd(rg0145, ba0145), // r0 g0 b0 a0 | r4 g4 b4 a4

+ _15 = unpackhi_pd(rg0145, ba0145), // r1 ... | r5 ...

+ _26 = unpacklo_pd(rg2367, ba2367), // r2 ... | r6 ...

+ _37 = unpackhi_pd(rg2367, ba2367); // r3 ... | r7 ...

+ __m256 _01 = _mm256_permute2f128_ps(_04, _15, 16), // 16 == 010 000 == lo, lo

+ _23 = _mm256_permute2f128_ps(_26, _37, 16),

+ _45 = _mm256_permute2f128_ps(_04, _15, 25), // 25 == 011 001 == hi, hi

+ _67 = _mm256_permute2f128_ps(_26, _37, 25);

+ _mm256_storeu_ps((float*)ptr + 0*8, _01);

+ _mm256_storeu_ps((float*)ptr + 1*8, _23);

+ _mm256_storeu_ps((float*)ptr + 2*8, _45);

+ _mm256_storeu_ps((float*)ptr + 3*8, _67);

+ }

AI SkNx operator+(const SkNx& o) const { return _mm256_add_ps(fVec, o.fVec); }

AI SkNx operator-(const SkNx& o) const { return _mm256_sub_ps(fVec, o.fVec); }

AI SkNx operator*(const SkNx& o) const { return _mm256_mul_ps(fVec, o.fVec); }

« no previous file with comments | « src/core/SkRasterPipelineBlitter.cpp ('k') | src/opts/SkRasterPipeline_opts.h » ('j') | no next file with comments »