Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(197)

Side by Side Diff: src/opts/SkPx_sse.h

Issue 1436513002: SkPx: use namespaces as namespaces (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/opts/SkPx_none.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkPx_sse_DEFINED 8 #ifndef SkPx_sse_DEFINED
9 #define SkPx_sse_DEFINED 9 #define SkPx_sse_DEFINED
10 10
11 // SkPx_sse's sweet spot is to work with 4 pixels at a time, 11 // sse::SkPx's sweet spot is to work with 4 pixels at a time,
12 // stored interlaced, just as they sit in memory: rgba rgba rgba rgba. 12 // stored interlaced, just as they sit in memory: rgba rgba rgba rgba.
13 13
14 // SkPx_sse's best way to work with alphas is similar, 14 // sse::SkPx's best way to work with alphas is similar,
15 // replicating the 4 alphas 4 times each across the pixel: aaaa aaaa aaaa aaaa. 15 // replicating the 4 alphas 4 times each across the pixel: aaaa aaaa aaaa aaaa.
16 16
17 // When working with fewer than 4 pixels, we load the pixels in the low lanes, 17 // When working with fewer than 4 pixels, we load the pixels in the low lanes,
18 // usually filling the top lanes with zeros (but who cares, might be junk). 18 // usually filling the top lanes with zeros (but who cares, might be junk).
19 19
20 struct SkPx_sse { 20 namespace sse {
21
22 struct SkPx {
21 static const int N = 4; 23 static const int N = 4;
22 24
23 __m128i fVec; 25 __m128i fVec;
24 SkPx_sse(__m128i vec) : fVec(vec) {} 26 SkPx(__m128i vec) : fVec(vec) {}
25 27
26 static SkPx_sse Dup(uint32_t px) { return _mm_set1_epi32(px); } 28 static SkPx Dup(uint32_t px) { return _mm_set1_epi32(px); }
27 static SkPx_sse Load(const uint32_t* px) { return _mm_loadu_si128((const __m 128i*)px); } 29 static SkPx Load(const uint32_t* px) { return _mm_loadu_si128((const __m128i *)px); }
28 static SkPx_sse Load(const uint32_t* px, int n) { 30 static SkPx Load(const uint32_t* px, int n) {
29 SkASSERT(n > 0 && n < 4); 31 SkASSERT(n > 0 && n < 4);
30 switch (n) { 32 switch (n) {
31 case 1: return _mm_cvtsi32_si128(px[0]); 33 case 1: return _mm_cvtsi32_si128(px[0]);
32 case 2: return _mm_loadl_epi64((const __m128i*)px); 34 case 2: return _mm_loadl_epi64((const __m128i*)px);
33 case 3: return _mm_or_si128(_mm_loadl_epi64((const __m128i*)px), 35 case 3: return _mm_or_si128(_mm_loadl_epi64((const __m128i*)px),
34 _mm_slli_si128(_mm_cvtsi32_si128(px[2]), 8)); 36 _mm_slli_si128(_mm_cvtsi32_si128(px[2]), 8));
35 } 37 }
36 return _mm_setzero_si128(); // Not actually reachable. 38 return _mm_setzero_si128(); // Not actually reachable.
37 } 39 }
38 40
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
89 Wide operator-(const Wide& o) const { 91 Wide operator-(const Wide& o) const {
90 return Wide(_mm_sub_epi16(fLo, o.fLo), _mm_sub_epi16(fHi, o.fHi)); 92 return Wide(_mm_sub_epi16(fLo, o.fLo), _mm_sub_epi16(fHi, o.fHi));
91 } 93 }
92 template <int bits> Wide shl() const { 94 template <int bits> Wide shl() const {
93 return Wide(_mm_slli_epi16(fLo, bits), _mm_slli_epi16(fHi, bits)); 95 return Wide(_mm_slli_epi16(fLo, bits), _mm_slli_epi16(fHi, bits));
94 } 96 }
95 template <int bits> Wide shr() const { 97 template <int bits> Wide shr() const {
96 return Wide(_mm_srli_epi16(fLo, bits), _mm_srli_epi16(fHi, bits)); 98 return Wide(_mm_srli_epi16(fLo, bits), _mm_srli_epi16(fHi, bits));
97 } 99 }
98 100
99 SkPx_sse addNarrowHi(const SkPx_sse& o) const { 101 SkPx addNarrowHi(const SkPx& o) const {
100 Wide sum = (*this + o.widenLo()).shr<8>(); 102 Wide sum = (*this + o.widenLo()).shr<8>();
101 return _mm_packus_epi16(sum.fLo, sum.fHi); 103 return _mm_packus_epi16(sum.fLo, sum.fHi);
102 } 104 }
103 }; 105 };
104 106
105 Alpha alpha() const { 107 Alpha alpha() const {
106 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 108 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
107 return _mm_shuffle_epi8(fVec, _mm_set_epi8(15,15,15,15, 11,11,11,11, 7,7 ,7,7, 3,3,3,3)); 109 return _mm_shuffle_epi8(fVec, _mm_set_epi8(15,15,15,15, 11,11,11,11, 7,7 ,7,7, 3,3,3,3));
108 #else 110 #else
109 __m128i as = _mm_srli_epi32(fVec, 24); // ___3 ___2 ___1 ___0 111 __m128i as = _mm_srli_epi32(fVec, 24); // ___3 ___2 ___1 ___0
110 as = _mm_or_si128(as, _mm_slli_si128(as, 1)); // __33 __22 __11 __00 112 as = _mm_or_si128(as, _mm_slli_si128(as, 1)); // __33 __22 __11 __00
111 return _mm_or_si128(as, _mm_slli_si128(as, 2)); // 3333 2222 1111 0000 113 return _mm_or_si128(as, _mm_slli_si128(as, 2)); // 3333 2222 1111 0000
112 #endif 114 #endif
113 } 115 }
114 116
115 Wide widenLo() const { 117 Wide widenLo() const {
116 return Wide(_mm_unpacklo_epi8(fVec, _mm_setzero_si128()), 118 return Wide(_mm_unpacklo_epi8(fVec, _mm_setzero_si128()),
117 _mm_unpackhi_epi8(fVec, _mm_setzero_si128())); 119 _mm_unpackhi_epi8(fVec, _mm_setzero_si128()));
118 } 120 }
119 Wide widenHi() const { 121 Wide widenHi() const {
120 return Wide(_mm_unpacklo_epi8(_mm_setzero_si128(), fVec), 122 return Wide(_mm_unpacklo_epi8(_mm_setzero_si128(), fVec),
121 _mm_unpackhi_epi8(_mm_setzero_si128(), fVec)); 123 _mm_unpackhi_epi8(_mm_setzero_si128(), fVec));
122 } 124 }
123 Wide widenLoHi() const { 125 Wide widenLoHi() const {
124 return Wide(_mm_unpacklo_epi8(fVec, fVec), 126 return Wide(_mm_unpacklo_epi8(fVec, fVec),
125 _mm_unpackhi_epi8(fVec, fVec)); 127 _mm_unpackhi_epi8(fVec, fVec));
126 } 128 }
127 129
128 SkPx_sse operator+(const SkPx_sse& o) const { return _mm_add_epi8(fVec, o .fVec); } 130 SkPx operator+(const SkPx& o) const { return _mm_add_epi8(fVec, o.fVec); }
129 SkPx_sse operator-(const SkPx_sse& o) const { return _mm_sub_epi8(fVec, o .fVec); } 131 SkPx operator-(const SkPx& o) const { return _mm_sub_epi8(fVec, o.fVec); }
130 SkPx_sse saturatedAdd(const SkPx_sse& o) const { return _mm_adds_epi8(fVec, o.fVec); } 132 SkPx saturatedAdd(const SkPx& o) const { return _mm_adds_epi8(fVec, o.fVec); }
131 133
132 Wide operator*(const Alpha& a) const { 134 Wide operator*(const Alpha& a) const {
133 __m128i pLo = _mm_unpacklo_epi8( fVec, _mm_setzero_si128()), 135 __m128i pLo = _mm_unpacklo_epi8( fVec, _mm_setzero_si128()),
134 aLo = _mm_unpacklo_epi8(a.fVec, _mm_setzero_si128()), 136 aLo = _mm_unpacklo_epi8(a.fVec, _mm_setzero_si128()),
135 pHi = _mm_unpackhi_epi8( fVec, _mm_setzero_si128()), 137 pHi = _mm_unpackhi_epi8( fVec, _mm_setzero_si128()),
136 aHi = _mm_unpackhi_epi8(a.fVec, _mm_setzero_si128()); 138 aHi = _mm_unpackhi_epi8(a.fVec, _mm_setzero_si128());
137 return Wide(_mm_mullo_epi16(pLo, aLo), _mm_mullo_epi16(pHi, aHi)); 139 return Wide(_mm_mullo_epi16(pLo, aLo), _mm_mullo_epi16(pHi, aHi));
138 } 140 }
139 SkPx_sse approxMulDiv255(const Alpha& a) const { 141 SkPx approxMulDiv255(const Alpha& a) const {
140 return (*this * a).addNarrowHi(*this); 142 return (*this * a).addNarrowHi(*this);
141 } 143 }
142 144
143 SkPx_sse addAlpha(const Alpha& a) const { 145 SkPx addAlpha(const Alpha& a) const {
144 return _mm_add_epi8(fVec, _mm_and_si128(a.fVec, _mm_set1_epi32(0xFF00000 0))); 146 return _mm_add_epi8(fVec, _mm_and_si128(a.fVec, _mm_set1_epi32(0xFF00000 0)));
145 } 147 }
146 }; 148 };
147 149
148 typedef SkPx_sse SkPx; 150 } // namespace sse
151
152 typedef sse::SkPx SkPx;
149 153
150 #endif//SkPx_sse_DEFINED 154 #endif//SkPx_sse_DEFINED
OLDNEW
« no previous file with comments | « src/opts/SkPx_none.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698