src/opts/SkPx_sse.h - Issue 1436513002: SkPx: use namespaces as namespaces

Side by Side Diff: src/opts/SkPx_sse.h

Issue 1436513002: SkPx: use namespaces as namespaces (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright 2015 Google Inc.	2 * Copyright 2015 Google Inc.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license that can be	4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.	5 * found in the LICENSE file.

6 */	6 */

7	7

8 #ifndef SkPx_sse_DEFINED	8 #ifndef SkPx_sse_DEFINED

9 #define SkPx_sse_DEFINED	9 #define SkPx_sse_DEFINED

10	10

11 // SkPx_sse's sweet spot is to work with 4 pixels at a time,	11 // sse::SkPx's sweet spot is to work with 4 pixels at a time,

12 // stored interlaced, just as they sit in memory: rgba rgba rgba rgba.	12 // stored interlaced, just as they sit in memory: rgba rgba rgba rgba.

13	13

14 // SkPx_sse's best way to work with alphas is similar,	14 // sse::SkPx's best way to work with alphas is similar,

15 // replicating the 4 alphas 4 times each across the pixel: aaaa aaaa aaaa aaaa.	15 // replicating the 4 alphas 4 times each across the pixel: aaaa aaaa aaaa aaaa.

16	16

17 // When working with fewer than 4 pixels, we load the pixels in the low lanes,	17 // When working with fewer than 4 pixels, we load the pixels in the low lanes,

18 // usually filling the top lanes with zeros (but who cares, might be junk).	18 // usually filling the top lanes with zeros (but who cares, might be junk).

19	19

20 struct SkPx_sse {	20 namespace sse {

	21

	22 struct SkPx {

21 static const int N = 4;	23 static const int N = 4;

22	24

23 __m128i fVec;	25 __m128i fVec;

24 SkPx_sse(__m128i vec) : fVec(vec) {}	26 SkPx(__m128i vec) : fVec(vec) {}

25	27

26 static SkPx_sse Dup(uint32_t px) { return _mm_set1_epi32(px); }	28 static SkPx Dup(uint32_t px) { return _mm_set1_epi32(px); }

27 static SkPx_sse Load(const uint32_t* px) { return _mm_loadu_si128((const __m 128i*)px); }	29 static SkPx Load(const uint32_t* px) { return _mm_loadu_si128((const __m128i *)px); }

28 static SkPx_sse Load(const uint32_t* px, int n) {	30 static SkPx Load(const uint32_t* px, int n) {

29 SkASSERT(n > 0 && n < 4);	31 SkASSERT(n > 0 && n < 4);

30 switch (n) {	32 switch (n) {

31 case 1: return _mm_cvtsi32_si128(px[0]);	33 case 1: return _mm_cvtsi32_si128(px[0]);

32 case 2: return _mm_loadl_epi64((const __m128i*)px);	34 case 2: return _mm_loadl_epi64((const __m128i*)px);

33 case 3: return _mm_or_si128(_mm_loadl_epi64((const __m128i*)px),	35 case 3: return _mm_or_si128(_mm_loadl_epi64((const __m128i*)px),

34 _mm_slli_si128(_mm_cvtsi32_si128(px[2]), 8));	36 _mm_slli_si128(_mm_cvtsi32_si128(px[2]), 8));

35 }	37 }

36 return _mm_setzero_si128(); // Not actually reachable.	38 return _mm_setzero_si128(); // Not actually reachable.

37 }	39 }

38	40

(...skipping 50 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
89 Wide operator-(const Wide& o) const {	91 Wide operator-(const Wide& o) const {

90 return Wide(_mm_sub_epi16(fLo, o.fLo), _mm_sub_epi16(fHi, o.fHi));	92 return Wide(_mm_sub_epi16(fLo, o.fLo), _mm_sub_epi16(fHi, o.fHi));

91 }	93 }

92 template <int bits> Wide shl() const {	94 template <int bits> Wide shl() const {

93 return Wide(_mm_slli_epi16(fLo, bits), _mm_slli_epi16(fHi, bits));	95 return Wide(_mm_slli_epi16(fLo, bits), _mm_slli_epi16(fHi, bits));

94 }	96 }

95 template <int bits> Wide shr() const {	97 template <int bits> Wide shr() const {

96 return Wide(_mm_srli_epi16(fLo, bits), _mm_srli_epi16(fHi, bits));	98 return Wide(_mm_srli_epi16(fLo, bits), _mm_srli_epi16(fHi, bits));

97 }	99 }

98	100

99 SkPx_sse addNarrowHi(const SkPx_sse& o) const {	101 SkPx addNarrowHi(const SkPx& o) const {

100 Wide sum = (*this + o.widenLo()).shr<8>();	102 Wide sum = (*this + o.widenLo()).shr<8>();

101 return _mm_packus_epi16(sum.fLo, sum.fHi);	103 return _mm_packus_epi16(sum.fLo, sum.fHi);

102 }	104 }

103 };	105 };

104	106

105 Alpha alpha() const {	107 Alpha alpha() const {

106 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3	108 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3

107 return _mm_shuffle_epi8(fVec, _mm_set_epi8(15,15,15,15, 11,11,11,11, 7,7 ,7,7, 3,3,3,3));	109 return _mm_shuffle_epi8(fVec, _mm_set_epi8(15,15,15,15, 11,11,11,11, 7,7 ,7,7, 3,3,3,3));

108 #else	110 #else

109 __m128i as = _mm_srli_epi32(fVec, 24); // ___3 ___2 ___1 ___0	111 __m128i as = _mm_srli_epi32(fVec, 24); // ___3 ___2 ___1 ___0

110 as = _mm_or_si128(as, _mm_slli_si128(as, 1)); // __33 __22 __11 __00	112 as = _mm_or_si128(as, _mm_slli_si128(as, 1)); // __33 __22 __11 __00

111 return _mm_or_si128(as, _mm_slli_si128(as, 2)); // 3333 2222 1111 0000	113 return _mm_or_si128(as, _mm_slli_si128(as, 2)); // 3333 2222 1111 0000

112 #endif	114 #endif

113 }	115 }

114	116

115 Wide widenLo() const {	117 Wide widenLo() const {

116 return Wide(_mm_unpacklo_epi8(fVec, _mm_setzero_si128()),	118 return Wide(_mm_unpacklo_epi8(fVec, _mm_setzero_si128()),

117 _mm_unpackhi_epi8(fVec, _mm_setzero_si128()));	119 _mm_unpackhi_epi8(fVec, _mm_setzero_si128()));

118 }	120 }

119 Wide widenHi() const {	121 Wide widenHi() const {

120 return Wide(_mm_unpacklo_epi8(_mm_setzero_si128(), fVec),	122 return Wide(_mm_unpacklo_epi8(_mm_setzero_si128(), fVec),

121 _mm_unpackhi_epi8(_mm_setzero_si128(), fVec));	123 _mm_unpackhi_epi8(_mm_setzero_si128(), fVec));

122 }	124 }

123 Wide widenLoHi() const {	125 Wide widenLoHi() const {

124 return Wide(_mm_unpacklo_epi8(fVec, fVec),	126 return Wide(_mm_unpacklo_epi8(fVec, fVec),

125 _mm_unpackhi_epi8(fVec, fVec));	127 _mm_unpackhi_epi8(fVec, fVec));

126 }	128 }

127	129

128 SkPx_sse operator+(const SkPx_sse& o) const { return _mm_add_epi8(fVec, o .fVec); }	130 SkPx operator+(const SkPx& o) const { return _mm_add_epi8(fVec, o.fVec); }

129 SkPx_sse operator-(const SkPx_sse& o) const { return _mm_sub_epi8(fVec, o .fVec); }	131 SkPx operator-(const SkPx& o) const { return _mm_sub_epi8(fVec, o.fVec); }

130 SkPx_sse saturatedAdd(const SkPx_sse& o) const { return _mm_adds_epi8(fVec, o.fVec); }	132 SkPx saturatedAdd(const SkPx& o) const { return _mm_adds_epi8(fVec, o.fVec); }

131	133

132 Wide operator*(const Alpha& a) const {	134 Wide operator*(const Alpha& a) const {

133 __m128i pLo = _mm_unpacklo_epi8( fVec, _mm_setzero_si128()),	135 __m128i pLo = _mm_unpacklo_epi8( fVec, _mm_setzero_si128()),

134 aLo = _mm_unpacklo_epi8(a.fVec, _mm_setzero_si128()),	136 aLo = _mm_unpacklo_epi8(a.fVec, _mm_setzero_si128()),

135 pHi = _mm_unpackhi_epi8( fVec, _mm_setzero_si128()),	137 pHi = _mm_unpackhi_epi8( fVec, _mm_setzero_si128()),

136 aHi = _mm_unpackhi_epi8(a.fVec, _mm_setzero_si128());	138 aHi = _mm_unpackhi_epi8(a.fVec, _mm_setzero_si128());

137 return Wide(_mm_mullo_epi16(pLo, aLo), _mm_mullo_epi16(pHi, aHi));	139 return Wide(_mm_mullo_epi16(pLo, aLo), _mm_mullo_epi16(pHi, aHi));

138 }	140 }

139 SkPx_sse approxMulDiv255(const Alpha& a) const {	141 SkPx approxMulDiv255(const Alpha& a) const {

140 return (this a).addNarrowHi(*this);	142 return (this a).addNarrowHi(*this);

141 }	143 }

142	144

143 SkPx_sse addAlpha(const Alpha& a) const {	145 SkPx addAlpha(const Alpha& a) const {

144 return _mm_add_epi8(fVec, _mm_and_si128(a.fVec, _mm_set1_epi32(0xFF00000 0)));	146 return _mm_add_epi8(fVec, _mm_and_si128(a.fVec, _mm_set1_epi32(0xFF00000 0)));

145 }	147 }

146 };	148 };

147	149

148 typedef SkPx_sse SkPx;	150 } // namespace sse

	151

	152 typedef sse::SkPx SkPx;

149	153

150 #endif//SkPx_sse_DEFINED	154 #endif//SkPx_sse_DEFINED

OLD	NEW

« no previous file with comments | « src/opts/SkPx_none.h ('k') | no next file » | no next file with comments »