src/opts/SkPx_sse.h - Issue 1521623003: archive skpx... currently dead code

Side by Side Diff: src/opts/SkPx_sse.h

Issue 1521623003: archive skpx... currently dead code (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: Created 5 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 /*

2 * Copyright 2015 Google Inc.

3 *

4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.

6 */

7

8 #ifndef SkPx_sse_DEFINED

9 #define SkPx_sse_DEFINED

10

11 // sse::SkPx's sweet spot is to work with 4 pixels at a time,

12 // stored interlaced, just as they sit in memory: rgba rgba rgba rgba.

13

14 // sse::SkPx's best way to work with alphas is similar,

15 // replicating the 4 alphas 4 times each across the pixel: aaaa aaaa aaaa aaaa.

16

17 // When working with fewer than 4 pixels, we load the pixels in the low lanes,

18 // usually filling the top lanes with zeros (but who cares, might be junk).

19

20 namespace sse {

21

22 struct SkPx {

23 static const int N = 4;

24

25 __m128i fVec;

26 SkPx(__m128i vec) : fVec(vec) {}

27

28 static SkPx Dup(uint32_t px) { return _mm_set1_epi32(px); }

29 static SkPx Load(const uint32_t* px) { return _mm_loadu_si128((const __m128i *)px); }

30 static SkPx Load(const uint32_t* px, int n) {

31 SkASSERT(n > 0 && n < 4);

32 switch (n) {

33 case 1: return _mm_cvtsi32_si128(px[0]);

34 case 2: return _mm_loadl_epi64((const __m128i*)px);

35 case 3: return _mm_or_si128(_mm_loadl_epi64((const __m128i*)px),

36 _mm_slli_si128(_mm_cvtsi32_si128(px[2]), 8));

37 }

38 return _mm_setzero_si128(); // Not actually reachable.

39 }

40

41 void store(uint32_t* px) const { _mm_storeu_si128((__m128i*)px, fVec); }

42 void store(uint32_t* px, int n) const {

43 SkASSERT(n > 0 && n < 4);

44 __m128i v = fVec;

45 if (n & 1) {

46 *px++ = _mm_cvtsi128_si32(v);

47 v = _mm_srli_si128(v, 4);

48 }

49 if (n & 2) {

50 _mm_storel_epi64((__m128i*)px, v);

51 }

52 }

53

54 struct Alpha {

55 __m128i fVec;

56 Alpha(__m128i vec) : fVec(vec) {}

57

58 static Alpha Dup(uint8_t a) { return _mm_set1_epi8(a); }

59 static Alpha Load(const uint8_t* a) {

60 __m128i as = _mm_cvtsi32_si128((const uint32_t)a); // ____ ____ ____ 3210

61 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3

62 return _mm_shuffle_epi8(as, _mm_set_epi8(3,3,3,3, 2,2,2,2, 1,1,1,1, 0,0,0,0));

63 #else

64 as = _mm_unpacklo_epi8 (as, as); // ____ ____ 3322 1100

65 as = _mm_unpacklo_epi16(as, as); // 3333 2222 1111 0000

66 return as;

67 #endif

68 }

69 static Alpha Load(const uint8_t* a, int n) {

70 SkASSERT(n > 0 && n < 4);

71 uint8_t a4[] = { 0,0,0,0 };

72 switch (n) {

73 case 3: a4[2] = a[2]; // fall through

74 case 2: a4[1] = a[1]; // fall through

75 case 1: a4[0] = a[0];

76 }

77 return Load(a4);

78 }

79

80 Alpha inv() const { return _mm_sub_epi8(_mm_set1_epi8(~0), fVec); }

81 };

82

83 struct Wide {

84 __m128i fLo, fHi;

85 Wide(__m128i lo, __m128i hi) : fLo(lo), fHi(hi) {}

86

87 Wide operator+(const Wide& o) const {

88 return Wide(_mm_add_epi16(fLo, o.fLo), _mm_add_epi16(fHi, o.fHi));

89 }

90 Wide operator-(const Wide& o) const {

91 return Wide(_mm_sub_epi16(fLo, o.fLo), _mm_sub_epi16(fHi, o.fHi));

92 }

93 template <int bits> Wide shl() const {

94 return Wide(_mm_slli_epi16(fLo, bits), _mm_slli_epi16(fHi, bits));

95 }

96 template <int bits> Wide shr() const {

97 return Wide(_mm_srli_epi16(fLo, bits), _mm_srli_epi16(fHi, bits));

98 }

99

100 SkPx addNarrowHi(const SkPx& o) const {

101 Wide sum = (*this + o.widenLo()).shr<8>();

102 return _mm_packus_epi16(sum.fLo, sum.fHi);

103 }

104 };

105

106 Alpha alpha() const {

107 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3

108 return _mm_shuffle_epi8(fVec, _mm_set_epi8(15,15,15,15, 11,11,11,11, 7,7 ,7,7, 3,3,3,3));

109 #else

110 // We exploit that A >= rgb for any premul pixel.

111 __m128i as = fVec; // 3xxx 2xxx 1xxx 0xxx

112 as = _mm_max_epu8(as, _mm_srli_epi32(as, 8)); // 33xx 22xx 11xx 00xx

113 as = _mm_max_epu8(as, _mm_srli_epi32(as, 16)); // 3333 2222 1111 0000

114 return as;

115 #endif

116 }

117

118 Wide widenLo() const {

119 return Wide(_mm_unpacklo_epi8(fVec, _mm_setzero_si128()),

120 _mm_unpackhi_epi8(fVec, _mm_setzero_si128()));

121 }

122 Wide widenHi() const {

123 return Wide(_mm_unpacklo_epi8(_mm_setzero_si128(), fVec),

124 _mm_unpackhi_epi8(_mm_setzero_si128(), fVec));

125 }

126 Wide widenLoHi() const {

127 return Wide(_mm_unpacklo_epi8(fVec, fVec),

128 _mm_unpackhi_epi8(fVec, fVec));

129 }

130

131 SkPx operator+(const SkPx& o) const { return _mm_add_epi8(fVec, o.fVec); }

132 SkPx operator-(const SkPx& o) const { return _mm_sub_epi8(fVec, o.fVec); }

133 SkPx saturatedAdd(const SkPx& o) const { return _mm_adds_epi8(fVec, o.fVec); }

134

135 Wide operator*(const Alpha& a) const {

136 __m128i pLo = _mm_unpacklo_epi8( fVec, _mm_setzero_si128()),

137 aLo = _mm_unpacklo_epi8(a.fVec, _mm_setzero_si128()),

138 pHi = _mm_unpackhi_epi8( fVec, _mm_setzero_si128()),

139 aHi = _mm_unpackhi_epi8(a.fVec, _mm_setzero_si128());

140 return Wide(_mm_mullo_epi16(pLo, aLo), _mm_mullo_epi16(pHi, aHi));

141 }

142 SkPx approxMulDiv255(const Alpha& a) const {

143 return (this a).addNarrowHi(*this);

144 }

145

146 SkPx addAlpha(const Alpha& a) const {

147 return _mm_add_epi8(fVec, _mm_and_si128(a.fVec, _mm_set1_epi32(0xFF00000 0)));

148 }

149 };

150

151 } // namespace sse

152

153 typedef sse::SkPx SkPx;

154

155 #endif//SkPx_sse_DEFINED

OLD	NEW

« no previous file with comments | « src/opts/SkPx_none.h ('k') | no next file » | no next file with comments »