Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(551)

Side by Side Diff: src/opts/SkNx_sse.h

Issue 2480823002: skrpb: evaluate color filters for constant shaders once. (Closed)
Patch Set: Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/core/SkRasterPipelineBlitter.cpp ('k') | src/opts/SkRasterPipeline_opts.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkNx_sse_DEFINED 8 #ifndef SkNx_sse_DEFINED
9 #define SkNx_sse_DEFINED 9 #define SkNx_sse_DEFINED
10 10
(...skipping 526 matching lines...) Expand 10 before | Expand all | Expand 10 after
537 537
538 AI uint32_t operator[](int k) const { 538 AI uint32_t operator[](int k) const {
539 SkASSERT(0 <= k && k < 8); 539 SkASSERT(0 <= k && k < 8);
540 union { __m256i v; uint32_t us[8]; } pun = {fVec}; 540 union { __m256i v; uint32_t us[8]; } pun = {fVec};
541 return pun.us[k&7]; 541 return pun.us[k&7];
542 } 542 }
543 543
544 __m256i fVec; 544 __m256i fVec;
545 }; 545 };
546 546
547 // _mm256_unpack{lo,hi}_pd() auto-casting to and from __m256d.
548 AI static __m256 unpacklo_pd(__m256 x, __m256 y) {
549 return _mm256_castpd_ps(_mm256_unpacklo_pd(_mm256_castps_pd(x), _mm256_c astps_pd(y)));
550 }
551 AI static __m256 unpackhi_pd(__m256 x, __m256 y) {
552 return _mm256_castpd_ps(_mm256_unpackhi_pd(_mm256_castps_pd(x), _mm256_c astps_pd(y)));
553 }
554
547 template <> 555 template <>
548 class SkNx<8, float> { 556 class SkNx<8, float> {
549 public: 557 public:
550 AI SkNx(const __m256& vec) : fVec(vec) {} 558 AI SkNx(const __m256& vec) : fVec(vec) {}
551 559
552 AI SkNx() {} 560 AI SkNx() {}
553 AI SkNx(float val) : fVec(_mm256_set1_ps(val)) {} 561 AI SkNx(float val) : fVec(_mm256_set1_ps(val)) {}
554 AI SkNx(float a, float b, float c, float d, 562 AI SkNx(float a, float b, float c, float d,
555 float e, float f, float g, float h) : fVec(_mm256_setr_ps(a,b,c, d,e,f,g,h)) {} 563 float e, float f, float g, float h) : fVec(_mm256_setr_ps(a,b,c, d,e,f,g,h)) {}
556 564
557 AI SkNx(const SkNx_abi<8,float>& a) : fVec(a.vec) {} 565 AI SkNx(const SkNx_abi<8,float>& a) : fVec(a.vec) {}
558 AI operator SkNx_abi<8,float>() const { return { fVec }; } 566 AI operator SkNx_abi<8,float>() const { return { fVec }; }
559 567
560 AI static SkNx Load(const void* ptr) { return _mm256_loadu_ps((const flo at*)ptr); } 568 AI static SkNx Load(const void* ptr) { return _mm256_loadu_ps((const flo at*)ptr); }
561 AI void store(void* ptr) const { _mm256_storeu_ps((float*)ptr, fVec); } 569 AI void store(void* ptr) const { _mm256_storeu_ps((float*)ptr, fVec); }
562 570
571 AI static void Store4(void* ptr,
572 const SkNx& r, const SkNx& g, const SkNx& b, const SkNx& a) {
573 __m256 rg0145 = _mm256_unpacklo_ps(r.fVec, g.fVec), // r0 g0 r1 g1 | r4 g4 r5 g5
574 rg2367 = _mm256_unpackhi_ps(r.fVec, g.fVec), // r2 ... | r6 ...
575 ba0145 = _mm256_unpacklo_ps(b.fVec, a.fVec), // b0 a0 b1 a1 | b4 a4 b5 a5
576 ba2367 = _mm256_unpackhi_ps(b.fVec, a.fVec); // b2 ... | b6 ...
577
578 __m256 _04 = unpacklo_pd(rg0145, ba0145), // r0 g0 b0 a0 | r4 g4 b4 a4
579 _15 = unpackhi_pd(rg0145, ba0145), // r1 ... | r5 ...
580 _26 = unpacklo_pd(rg2367, ba2367), // r2 ... | r6 ...
581 _37 = unpackhi_pd(rg2367, ba2367); // r3 ... | r7 ...
582
583 __m256 _01 = _mm256_permute2f128_ps(_04, _15, 16), // 16 == 010 000 == lo, lo
584 _23 = _mm256_permute2f128_ps(_26, _37, 16),
585 _45 = _mm256_permute2f128_ps(_04, _15, 25), // 25 == 011 001 == hi, hi
586 _67 = _mm256_permute2f128_ps(_26, _37, 25);
587
588 _mm256_storeu_ps((float*)ptr + 0*8, _01);
589 _mm256_storeu_ps((float*)ptr + 1*8, _23);
590 _mm256_storeu_ps((float*)ptr + 2*8, _45);
591 _mm256_storeu_ps((float*)ptr + 3*8, _67);
592 }
593
563 AI SkNx operator+(const SkNx& o) const { return _mm256_add_ps(fVec, o.fV ec); } 594 AI SkNx operator+(const SkNx& o) const { return _mm256_add_ps(fVec, o.fV ec); }
564 AI SkNx operator-(const SkNx& o) const { return _mm256_sub_ps(fVec, o.fV ec); } 595 AI SkNx operator-(const SkNx& o) const { return _mm256_sub_ps(fVec, o.fV ec); }
565 AI SkNx operator*(const SkNx& o) const { return _mm256_mul_ps(fVec, o.fV ec); } 596 AI SkNx operator*(const SkNx& o) const { return _mm256_mul_ps(fVec, o.fV ec); }
566 AI SkNx operator/(const SkNx& o) const { return _mm256_div_ps(fVec, o.fV ec); } 597 AI SkNx operator/(const SkNx& o) const { return _mm256_div_ps(fVec, o.fV ec); }
567 598
568 AI SkNx operator==(const SkNx& o) const { return _mm256_cmp_ps(fVec, o.f Vec, _CMP_EQ_OQ); } 599 AI SkNx operator==(const SkNx& o) const { return _mm256_cmp_ps(fVec, o.f Vec, _CMP_EQ_OQ); }
569 AI SkNx operator!=(const SkNx& o) const { return _mm256_cmp_ps(fVec, o.f Vec, _CMP_NEQ_OQ); } 600 AI SkNx operator!=(const SkNx& o) const { return _mm256_cmp_ps(fVec, o.f Vec, _CMP_NEQ_OQ); }
570 AI SkNx operator <(const SkNx& o) const { return _mm256_cmp_ps(fVec, o.f Vec, _CMP_LT_OQ); } 601 AI SkNx operator <(const SkNx& o) const { return _mm256_cmp_ps(fVec, o.f Vec, _CMP_LT_OQ); }
571 AI SkNx operator >(const SkNx& o) const { return _mm256_cmp_ps(fVec, o.f Vec, _CMP_GT_OQ); } 602 AI SkNx operator >(const SkNx& o) const { return _mm256_cmp_ps(fVec, o.f Vec, _CMP_GT_OQ); }
572 AI SkNx operator<=(const SkNx& o) const { return _mm256_cmp_ps(fVec, o.f Vec, _CMP_LE_OQ); } 603 AI SkNx operator<=(const SkNx& o) const { return _mm256_cmp_ps(fVec, o.f Vec, _CMP_LE_OQ); }
(...skipping 146 matching lines...) Expand 10 before | Expand all | Expand 10 after
719 return src.fVec; 750 return src.fVec;
720 } 751 }
721 752
722 AI static Sk4i Sk4f_round(const Sk4f& x) { 753 AI static Sk4i Sk4f_round(const Sk4f& x) {
723 return _mm_cvtps_epi32(x.fVec); 754 return _mm_cvtps_epi32(x.fVec);
724 } 755 }
725 756
726 } // namespace 757 } // namespace
727 758
728 #endif//SkNx_sse_DEFINED 759 #endif//SkNx_sse_DEFINED
OLDNEW
« no previous file with comments | « src/core/SkRasterPipelineBlitter.cpp ('k') | src/opts/SkRasterPipeline_opts.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698