Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(944)

Unified Diff: src/opts/SkNx_sse.h

Issue 2480823002: skrpb: evaluate color filters for constant shaders once. (Closed)
Patch Set: Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/core/SkRasterPipelineBlitter.cpp ('k') | src/opts/SkRasterPipeline_opts.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/opts/SkNx_sse.h
diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h
index a4594115e0ecff4e4223606da03c4ccba8fcbcd8..a4783c6302eec19f2231e852ced78ebdb67af588 100644
--- a/src/opts/SkNx_sse.h
+++ b/src/opts/SkNx_sse.h
@@ -544,6 +544,14 @@ public:
__m256i fVec;
};
+ // _mm256_unpack{lo,hi}_pd() auto-casting to and from __m256d.
+ AI static __m256 unpacklo_pd(__m256 x, __m256 y) {
+ return _mm256_castpd_ps(_mm256_unpacklo_pd(_mm256_castps_pd(x), _mm256_castps_pd(y)));
+ }
+ AI static __m256 unpackhi_pd(__m256 x, __m256 y) {
+ return _mm256_castpd_ps(_mm256_unpackhi_pd(_mm256_castps_pd(x), _mm256_castps_pd(y)));
+ }
+
template <>
class SkNx<8, float> {
public:
@@ -560,6 +568,29 @@ public:
AI static SkNx Load(const void* ptr) { return _mm256_loadu_ps((const float*)ptr); }
AI void store(void* ptr) const { _mm256_storeu_ps((float*)ptr, fVec); }
+ AI static void Store4(void* ptr,
+ const SkNx& r, const SkNx& g, const SkNx& b, const SkNx& a) {
+ __m256 rg0145 = _mm256_unpacklo_ps(r.fVec, g.fVec), // r0 g0 r1 g1 | r4 g4 r5 g5
+ rg2367 = _mm256_unpackhi_ps(r.fVec, g.fVec), // r2 ... | r6 ...
+ ba0145 = _mm256_unpacklo_ps(b.fVec, a.fVec), // b0 a0 b1 a1 | b4 a4 b5 a5
+ ba2367 = _mm256_unpackhi_ps(b.fVec, a.fVec); // b2 ... | b6 ...
+
+ __m256 _04 = unpacklo_pd(rg0145, ba0145), // r0 g0 b0 a0 | r4 g4 b4 a4
+ _15 = unpackhi_pd(rg0145, ba0145), // r1 ... | r5 ...
+ _26 = unpacklo_pd(rg2367, ba2367), // r2 ... | r6 ...
+ _37 = unpackhi_pd(rg2367, ba2367); // r3 ... | r7 ...
+
+ __m256 _01 = _mm256_permute2f128_ps(_04, _15, 16), // 16 == 010 000 == lo, lo
+ _23 = _mm256_permute2f128_ps(_26, _37, 16),
+ _45 = _mm256_permute2f128_ps(_04, _15, 25), // 25 == 011 001 == hi, hi
+ _67 = _mm256_permute2f128_ps(_26, _37, 25);
+
+ _mm256_storeu_ps((float*)ptr + 0*8, _01);
+ _mm256_storeu_ps((float*)ptr + 1*8, _23);
+ _mm256_storeu_ps((float*)ptr + 2*8, _45);
+ _mm256_storeu_ps((float*)ptr + 3*8, _67);
+ }
+
AI SkNx operator+(const SkNx& o) const { return _mm256_add_ps(fVec, o.fVec); }
AI SkNx operator-(const SkNx& o) const { return _mm256_sub_ps(fVec, o.fVec); }
AI SkNx operator*(const SkNx& o) const { return _mm256_mul_ps(fVec, o.fVec); }
« no previous file with comments | « src/core/SkRasterPipelineBlitter.cpp ('k') | src/opts/SkRasterPipeline_opts.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698