src/opts/SkPx_sse.h - Issue 1336423002: Revert of SkPx: new approach to fixed-point SIMD

Unified Diff: src/opts/SkPx_sse.h

Issue 1336423002: Revert of SkPx: new approach to fixed-point SIMD (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/opts/SkPx_sse.h

diff --git a/src/opts/SkPx_sse.h b/src/opts/SkPx_sse.h

deleted file mode 100644

index bc5ccd11ea7a2673128cf098a696cd987863a9d4..0000000000000000000000000000000000000000

--- a/src/opts/SkPx_sse.h

+++ /dev/null

@@ -1,150 +0,0 @@

-/*

- *

- * Use of this source code is governed by a BSD-style license that can be

- * found in the LICENSE file.

- */

-#ifndef SkPx_sse_DEFINED

-#define SkPx_sse_DEFINED

-// SkPx_sse's sweet spot is to work with 4 pixels at a time,

-// stored interlaced, just as they sit in memory: rgba rgba rgba rgba.

-// SkPx_sse's best way to work with alphas is similar,

-// replicating the 4 alphas 4 times each across the pixel: aaaa aaaa aaaa aaaa.

-// When working with fewer than 4 pixels, we load the pixels in the low lanes,

-// usually filling the top lanes with zeros (but who cares, might be junk).

-struct SkPx_sse {

- static const int N = 4;

- __m128i fVec;

- SkPx_sse(__m128i vec) : fVec(vec) {}

- static SkPx_sse Dup(uint32_t px) { return _mm_set1_epi32(px); }

- static SkPx_sse LoadN(const uint32_t* px) { return _mm_loadu_si128((const __m128i*)px); }

- static SkPx_sse Load(int n, const uint32_t* px) {

- SkASSERT(n > 0 && n < 4);

- switch (n) {

- case 1: return _mm_cvtsi32_si128(px[0]);

- case 2: return _mm_loadl_epi64((const __m128i*)px);

- case 3: return _mm_or_si128(_mm_loadl_epi64((const __m128i*)px),

- _mm_slli_si128(_mm_cvtsi32_si128(px[2]), 8));

- }

- return _mm_setzero_si128(); // Not actually reachable.

- }

- void storeN(uint32_t* px) const { _mm_storeu_si128((__m128i*)px, fVec); }

- void store(int n, uint32_t* px) const {

- SkASSERT(n > 0 && n < 4);

- __m128i v = fVec;

- if (n & 1) {

- *px++ = _mm_cvtsi128_si32(v);

- v = _mm_srli_si128(v, 4);

- }

- if (n & 2) {

- _mm_storel_epi64((__m128i*)px, v);

- }

- struct Alpha {

- __m128i fVec;

- Alpha(__m128i vec) : fVec(vec) {}

- static Alpha Dup(uint8_t a) { return _mm_set1_epi8(a); }

- static Alpha LoadN(const uint8_t* a) {

- __m128i as = _mm_cvtsi32_si128(*(const uint32_t*)a); // ____ ____ ____ 3210

- #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3

- return _mm_shuffle_epi8(as, _mm_set_epi8(3,3,3,3, 2,2,2,2, 1,1,1,1, 0,0,0,0));

- #else

- as = _mm_unpacklo_epi8 (as, _mm_setzero_si128()); // ____ ____ _3_2 _1_0

- as = _mm_unpacklo_epi16(as, _mm_setzero_si128()); // ___3 ___2 ___1 ___0

- as = _mm_or_si128(as, _mm_slli_si128(as, 1)); // __33 __22 __11 __00

- return _mm_or_si128(as, _mm_slli_si128(as, 2)); // 3333 2222 1111 0000

- #endif

- }

- static Alpha Load(int n, const uint8_t* a) {

- SkASSERT(n > 0 && n < 4);

- uint8_t a4[] = { 0,0,0,0 };

- switch (n) {

- case 3: a4[2] = a[2]; // fall through

- case 2: a4[1] = a[1]; // fall through

- case 1: a4[0] = a[0];

- }

- return LoadN(a4);

- }

- Alpha inv() const { return _mm_sub_epi8(_mm_set1_epi8(~0), fVec); }

- };

- struct Wide {

- __m128i fLo, fHi;

- Wide(__m128i lo, __m128i hi) : fLo(lo), fHi(hi) {}

- Wide operator+(const Wide& o) const {

- return Wide(_mm_add_epi16(fLo, o.fLo), _mm_add_epi16(fHi, o.fHi));

- }

- Wide operator-(const Wide& o) const {

- return Wide(_mm_sub_epi16(fLo, o.fLo), _mm_sub_epi16(fHi, o.fHi));

- }

- Wide operator<<(int bits) const {

- return Wide(_mm_slli_epi16(fLo, bits), _mm_slli_epi16(fHi, bits));

- }

- Wide operator>>(int bits) const {

- return Wide(_mm_srli_epi16(fLo, bits), _mm_srli_epi16(fHi, bits));

- }

- SkPx_sse addNarrowHi(const SkPx_sse& o) const {

- Wide sum = (*this + o.widenLo()) >> 8;

- return _mm_packus_epi16(sum.fLo, sum.fHi);

- }

- };

- Alpha alpha() const {

- #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3

- return _mm_shuffle_epi8(fVec, _mm_set_epi8(15,15,15,15, 11,11,11,11, 7,7,7,7, 3,3,3,3));

- #else

- __m128i as = _mm_srli_epi32(fVec, 24); // ___3 ___2 ___1 ___0

- as = _mm_or_si128(as, _mm_slli_si128(as, 1)); // __33 __22 __11 __00

- return _mm_or_si128(as, _mm_slli_si128(as, 2)); // 3333 2222 1111 0000

- #endif

- }

- Wide widenLo() const {

- return Wide(_mm_unpacklo_epi8(fVec, _mm_setzero_si128()),

- _mm_unpackhi_epi8(fVec, _mm_setzero_si128()));

- }

- Wide widenHi() const {

- return Wide(_mm_unpacklo_epi8(_mm_setzero_si128(), fVec),

- _mm_unpackhi_epi8(_mm_setzero_si128(), fVec));

- }

- Wide widenLoHi() const {

- return Wide(_mm_unpacklo_epi8(fVec, fVec),

- _mm_unpackhi_epi8(fVec, fVec));

- }

- SkPx_sse operator+(const SkPx_sse& o) const { return _mm_add_epi8(fVec, o.fVec); }

- SkPx_sse operator-(const SkPx_sse& o) const { return _mm_sub_epi8(fVec, o.fVec); }

- SkPx_sse saturatedAdd(const SkPx_sse& o) const { return _mm_adds_epi8(fVec, o.fVec); }

- Wide operator*(const Alpha& a) const {

- __m128i pLo = _mm_unpacklo_epi8( fVec, _mm_setzero_si128()),

- aLo = _mm_unpacklo_epi8(a.fVec, _mm_setzero_si128()),

- pHi = _mm_unpackhi_epi8( fVec, _mm_setzero_si128()),

- aHi = _mm_unpackhi_epi8(a.fVec, _mm_setzero_si128());

- return Wide(_mm_mullo_epi16(pLo, aLo), _mm_mullo_epi16(pHi, aHi));

- }

- SkPx_sse approxMulDiv255(const Alpha& a) const {

- return (*this * a).addNarrowHi(*this);

- }

- SkPx_sse addAlpha(const Alpha& a) const {

- return _mm_add_epi8(fVec, _mm_and_si128(a.fVec, _mm_set1_epi32(0xFF000000)));

- }

-};

-typedef SkPx_sse SkPx;

-#endif//SkPx_sse_DEFINED

« no previous file with comments | « src/opts/SkPx_none.h ('k') | no next file » | no next file with comments »