src/opts/SkNx_sse.h - Issue 2480823002: skrpb: evaluate color filters for constant shaders once.

Side by Side Diff: src/opts/SkNx_sse.h

Issue 2480823002: skrpb: evaluate color filters for constant shaders once. (Closed)

Patch Set: Created 4 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /*	1 /*

2 * Copyright 2015 Google Inc.	2 * Copyright 2015 Google Inc.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license that can be	4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.	5 * found in the LICENSE file.

6 */	6 */

7	7

8 #ifndef SkNx_sse_DEFINED	8 #ifndef SkNx_sse_DEFINED

9 #define SkNx_sse_DEFINED	9 #define SkNx_sse_DEFINED

10	10

(...skipping 526 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
537	537

538 AI uint32_t operator[](int k) const {	538 AI uint32_t operator[](int k) const {

539 SkASSERT(0 <= k && k < 8);	539 SkASSERT(0 <= k && k < 8);

540 union { __m256i v; uint32_t us[8]; } pun = {fVec};	540 union { __m256i v; uint32_t us[8]; } pun = {fVec};

541 return pun.us[k&7];	541 return pun.us[k&7];

542 }	542 }

543	543

544 __m256i fVec;	544 __m256i fVec;

545 };	545 };

546	546

	547 // _mm256_unpack{lo,hi}_pd() auto-casting to and from __m256d.

	548 AI static __m256 unpacklo_pd(__m256 x, __m256 y) {

	549 return _mm256_castpd_ps(_mm256_unpacklo_pd(_mm256_castps_pd(x), _mm256_c astps_pd(y)));

	550 }

	551 AI static __m256 unpackhi_pd(__m256 x, __m256 y) {

	552 return _mm256_castpd_ps(_mm256_unpackhi_pd(_mm256_castps_pd(x), _mm256_c astps_pd(y)));

	553 }

	554

547 template <>	555 template <>

548 class SkNx<8, float> {	556 class SkNx<8, float> {

549 public:	557 public:

550 AI SkNx(const __m256& vec) : fVec(vec) {}	558 AI SkNx(const __m256& vec) : fVec(vec) {}

551	559

552 AI SkNx() {}	560 AI SkNx() {}

553 AI SkNx(float val) : fVec(_mm256_set1_ps(val)) {}	561 AI SkNx(float val) : fVec(_mm256_set1_ps(val)) {}

554 AI SkNx(float a, float b, float c, float d,	562 AI SkNx(float a, float b, float c, float d,

555 float e, float f, float g, float h) : fVec(_mm256_setr_ps(a,b,c, d,e,f,g,h)) {}	563 float e, float f, float g, float h) : fVec(_mm256_setr_ps(a,b,c, d,e,f,g,h)) {}

556	564

557 AI SkNx(const SkNx_abi<8,float>& a) : fVec(a.vec) {}	565 AI SkNx(const SkNx_abi<8,float>& a) : fVec(a.vec) {}

558 AI operator SkNx_abi<8,float>() const { return { fVec }; }	566 AI operator SkNx_abi<8,float>() const { return { fVec }; }

559	567

560 AI static SkNx Load(const void* ptr) { return _mm256_loadu_ps((const flo at*)ptr); }	568 AI static SkNx Load(const void* ptr) { return _mm256_loadu_ps((const flo at*)ptr); }

561 AI void store(void* ptr) const { _mm256_storeu_ps((float*)ptr, fVec); }	569 AI void store(void* ptr) const { _mm256_storeu_ps((float*)ptr, fVec); }

562	570

	571 AI static void Store4(void* ptr,

	572 const SkNx& r, const SkNx& g, const SkNx& b, const SkNx& a) {

	573 __m256 rg0145 = _mm256_unpacklo_ps(r.fVec, g.fVec), // r0 g0 r1 g1 \| r4 g4 r5 g5

	574 rg2367 = _mm256_unpackhi_ps(r.fVec, g.fVec), // r2 ... \| r6 ...

	575 ba0145 = _mm256_unpacklo_ps(b.fVec, a.fVec), // b0 a0 b1 a1 \| b4 a4 b5 a5

	576 ba2367 = _mm256_unpackhi_ps(b.fVec, a.fVec); // b2 ... \| b6 ...

	577

	578 __m256 _04 = unpacklo_pd(rg0145, ba0145), // r0 g0 b0 a0 \| r4 g4 b4 a4

	579 _15 = unpackhi_pd(rg0145, ba0145), // r1 ... \| r5 ...

	580 _26 = unpacklo_pd(rg2367, ba2367), // r2 ... \| r6 ...

	581 _37 = unpackhi_pd(rg2367, ba2367); // r3 ... \| r7 ...

	582

	583 __m256 _01 = _mm256_permute2f128_ps(_04, _15, 16), // 16 == 010 000 == lo, lo

	584 _23 = _mm256_permute2f128_ps(_26, _37, 16),

	585 _45 = _mm256_permute2f128_ps(_04, _15, 25), // 25 == 011 001 == hi, hi

	586 _67 = _mm256_permute2f128_ps(_26, _37, 25);

	587

	588 _mm256_storeu_ps((float)ptr + 08, _01);

	589 _mm256_storeu_ps((float)ptr + 18, _23);

	590 _mm256_storeu_ps((float)ptr + 28, _45);

	591 _mm256_storeu_ps((float)ptr + 38, _67);

	592 }

	593

563 AI SkNx operator+(const SkNx& o) const { return _mm256_add_ps(fVec, o.fV ec); }	594 AI SkNx operator+(const SkNx& o) const { return _mm256_add_ps(fVec, o.fV ec); }

564 AI SkNx operator-(const SkNx& o) const { return _mm256_sub_ps(fVec, o.fV ec); }	595 AI SkNx operator-(const SkNx& o) const { return _mm256_sub_ps(fVec, o.fV ec); }

565 AI SkNx operator*(const SkNx& o) const { return _mm256_mul_ps(fVec, o.fV ec); }	596 AI SkNx operator*(const SkNx& o) const { return _mm256_mul_ps(fVec, o.fV ec); }

566 AI SkNx operator/(const SkNx& o) const { return _mm256_div_ps(fVec, o.fV ec); }	597 AI SkNx operator/(const SkNx& o) const { return _mm256_div_ps(fVec, o.fV ec); }

567	598

568 AI SkNx operator==(const SkNx& o) const { return _mm256_cmp_ps(fVec, o.f Vec, _CMP_EQ_OQ); }	599 AI SkNx operator==(const SkNx& o) const { return _mm256_cmp_ps(fVec, o.f Vec, _CMP_EQ_OQ); }

569 AI SkNx operator!=(const SkNx& o) const { return _mm256_cmp_ps(fVec, o.f Vec, _CMP_NEQ_OQ); }	600 AI SkNx operator!=(const SkNx& o) const { return _mm256_cmp_ps(fVec, o.f Vec, _CMP_NEQ_OQ); }

570 AI SkNx operator <(const SkNx& o) const { return _mm256_cmp_ps(fVec, o.f Vec, _CMP_LT_OQ); }	601 AI SkNx operator <(const SkNx& o) const { return _mm256_cmp_ps(fVec, o.f Vec, _CMP_LT_OQ); }

571 AI SkNx operator >(const SkNx& o) const { return _mm256_cmp_ps(fVec, o.f Vec, _CMP_GT_OQ); }	602 AI SkNx operator >(const SkNx& o) const { return _mm256_cmp_ps(fVec, o.f Vec, _CMP_GT_OQ); }

572 AI SkNx operator<=(const SkNx& o) const { return _mm256_cmp_ps(fVec, o.f Vec, _CMP_LE_OQ); }	603 AI SkNx operator<=(const SkNx& o) const { return _mm256_cmp_ps(fVec, o.f Vec, _CMP_LE_OQ); }

(...skipping 146 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
719 return src.fVec;	750 return src.fVec;

720 }	751 }

721	752

722 AI static Sk4i Sk4f_round(const Sk4f& x) {	753 AI static Sk4i Sk4f_round(const Sk4f& x) {

723 return _mm_cvtps_epi32(x.fVec);	754 return _mm_cvtps_epi32(x.fVec);

724 }	755 }

725	756

726 } // namespace	757 } // namespace

727	758

728 #endif//SkNx_sse_DEFINED	759 #endif//SkNx_sse_DEFINED

OLD	NEW

« no previous file with comments | « src/core/SkRasterPipelineBlitter.cpp ('k') | src/opts/SkRasterPipeline_opts.h » ('j') | no next file with comments »