Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(472)

Side by Side Diff: src/opts/SkNx_sse.h

Issue 1132273004: Sk4px (Closed) Base URL: https://skia.googlesource.com/skia@master
Patch Set: a smidge faster Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/opts/SkNx_neon.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkNx_sse_DEFINED 8 #ifndef SkNx_sse_DEFINED
9 #define SkNx_sse_DEFINED 9 #define SkNx_sse_DEFINED
10 10
11 // This file may assume <= SSE2, but must check SK_CPU_SSE_LEVEL for anything mo re recent. 11 // This file may assume <= SSE2, but must check SK_CPU_SSE_LEVEL for anything mo re recent.
12 #include <immintrin.h> 12 #include <immintrin.h>
13 13
14 template <> 14 template <>
15 class SkNb<2, 4> { 15 class SkNb<2, 4> {
16 public: 16 public:
17 SkNb(const __m128i& vec) : fVec(vec) {} 17 SkNb(const __m128i& vec) : fVec(vec) {}
18 18
19 SkNb() {} 19 SkNb() {}
20 bool allTrue() const { return 0xff == (_mm_movemask_epi8(fVec) & 0xff); } 20 bool allTrue() const { return 0xff == (_mm_movemask_epi8(fVec) & 0xff); }
21 bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(fVec) & 0xff); } 21 bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(fVec) & 0xff); }
22 22
23 private:
24 __m128i fVec; 23 __m128i fVec;
25 }; 24 };
26 25
27 template <> 26 template <>
28 class SkNb<4, 4> { 27 class SkNb<4, 4> {
29 public: 28 public:
30 SkNb(const __m128i& vec) : fVec(vec) {} 29 SkNb(const __m128i& vec) : fVec(vec) {}
31 30
32 SkNb() {} 31 SkNb() {}
33 bool allTrue() const { return 0xffff == _mm_movemask_epi8(fVec); } 32 bool allTrue() const { return 0xffff == _mm_movemask_epi8(fVec); }
34 bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(fVec); } 33 bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(fVec); }
35 34
36 private:
37 __m128i fVec; 35 __m128i fVec;
38 }; 36 };
39 37
40 template <> 38 template <>
41 class SkNb<2, 8> { 39 class SkNb<2, 8> {
42 public: 40 public:
43 SkNb(const __m128i& vec) : fVec(vec) {} 41 SkNb(const __m128i& vec) : fVec(vec) {}
44 42
45 SkNb() {} 43 SkNb() {}
46 bool allTrue() const { return 0xffff == _mm_movemask_epi8(fVec); } 44 bool allTrue() const { return 0xffff == _mm_movemask_epi8(fVec); }
47 bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(fVec); } 45 bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(fVec); }
48 46
49 private:
50 __m128i fVec; 47 __m128i fVec;
51 }; 48 };
52 49
53 50
54 template <> 51 template <>
55 class SkNf<2, float> { 52 class SkNf<2, float> {
56 typedef SkNb<2, 4> Nb; 53 typedef SkNb<2, 4> Nb;
57 public: 54 public:
58 SkNf(const __m128& vec) : fVec(vec) {} 55 SkNf(const __m128& vec) : fVec(vec) {}
59 56
(...skipping 28 matching lines...) Expand all
88 85
89 SkNf invert() const { return SkNf(1) / *this; } 86 SkNf invert() const { return SkNf(1) / *this; }
90 SkNf approxInvert() const { return _mm_rcp_ps(fVec); } 87 SkNf approxInvert() const { return _mm_rcp_ps(fVec); }
91 88
92 template <int k> float kth() const { 89 template <int k> float kth() const {
93 SkASSERT(0 <= k && k < 2); 90 SkASSERT(0 <= k && k < 2);
94 union { __m128 v; float fs[4]; } pun = {fVec}; 91 union { __m128 v; float fs[4]; } pun = {fVec};
95 return pun.fs[k&1]; 92 return pun.fs[k&1];
96 } 93 }
97 94
98 private:
99 __m128 fVec; 95 __m128 fVec;
100 }; 96 };
101 97
102 template <> 98 template <>
103 class SkNf<2, double> { 99 class SkNf<2, double> {
104 typedef SkNb<2, 8> Nb; 100 typedef SkNb<2, 8> Nb;
105 public: 101 public:
106 SkNf(const __m128d& vec) : fVec(vec) {} 102 SkNf(const __m128d& vec) : fVec(vec) {}
107 103
108 SkNf() {} 104 SkNf() {}
(...skipping 25 matching lines...) Expand all
134 130
135 SkNf invert() const { return SkNf(1) / *this; } 131 SkNf invert() const { return SkNf(1) / *this; }
136 SkNf approxInvert() const { return _mm_cvtps_pd(_mm_rcp_ps(_mm_cvtpd_ps(fVec ))); } 132 SkNf approxInvert() const { return _mm_cvtps_pd(_mm_rcp_ps(_mm_cvtpd_ps(fVec ))); }
137 133
138 template <int k> double kth() const { 134 template <int k> double kth() const {
139 SkASSERT(0 <= k && k < 2); 135 SkASSERT(0 <= k && k < 2);
140 union { __m128d v; double ds[2]; } pun = {fVec}; 136 union { __m128d v; double ds[2]; } pun = {fVec};
141 return pun.ds[k&1]; 137 return pun.ds[k&1];
142 } 138 }
143 139
144 private:
145 __m128d fVec; 140 __m128d fVec;
146 }; 141 };
147 142
148 template <> 143 template <>
149 class SkNi<4, int> { 144 class SkNi<4, int> {
150 public: 145 public:
151 SkNi(const __m128i& vec) : fVec(vec) {} 146 SkNi(const __m128i& vec) : fVec(vec) {}
152 147
153 SkNi() {} 148 SkNi() {}
154 explicit SkNi(int val) : fVec(_mm_set1_epi32(val)) {} 149 explicit SkNi(int val) : fVec(_mm_set1_epi32(val)) {}
(...skipping 17 matching lines...) Expand all
172 template <int k> int kth() const { 167 template <int k> int kth() const {
173 SkASSERT(0 <= k && k < 4); 168 SkASSERT(0 <= k && k < 4);
174 switch (k) { 169 switch (k) {
175 case 0: return _mm_cvtsi128_si32(fVec); 170 case 0: return _mm_cvtsi128_si32(fVec);
176 case 1: return _mm_cvtsi128_si32(_mm_srli_si128(fVec, 4)); 171 case 1: return _mm_cvtsi128_si32(_mm_srli_si128(fVec, 4));
177 case 2: return _mm_cvtsi128_si32(_mm_srli_si128(fVec, 8)); 172 case 2: return _mm_cvtsi128_si32(_mm_srli_si128(fVec, 8));
178 case 3: return _mm_cvtsi128_si32(_mm_srli_si128(fVec, 12)); 173 case 3: return _mm_cvtsi128_si32(_mm_srli_si128(fVec, 12));
179 default: SkASSERT(false); return 0; 174 default: SkASSERT(false); return 0;
180 } 175 }
181 } 176 }
182 protected: 177
183 __m128i fVec; 178 __m128i fVec;
184 }; 179 };
185 180
186 template <> 181 template <>
187 class SkNf<4, float> { 182 class SkNf<4, float> {
188 typedef SkNb<4, 4> Nb; 183 typedef SkNb<4, 4> Nb;
189 public: 184 public:
190 SkNf(const __m128& vec) : fVec(vec) {} 185 SkNf(const __m128& vec) : fVec(vec) {}
191 186
192 SkNf() {} 187 SkNf() {}
(...skipping 27 matching lines...) Expand all
220 215
221 SkNf invert() const { return SkNf(1) / *this; } 216 SkNf invert() const { return SkNf(1) / *this; }
222 SkNf approxInvert() const { return _mm_rcp_ps(fVec); } 217 SkNf approxInvert() const { return _mm_rcp_ps(fVec); }
223 218
224 template <int k> float kth() const { 219 template <int k> float kth() const {
225 SkASSERT(0 <= k && k < 4); 220 SkASSERT(0 <= k && k < 4);
226 union { __m128 v; float fs[4]; } pun = {fVec}; 221 union { __m128 v; float fs[4]; } pun = {fVec};
227 return pun.fs[k&3]; 222 return pun.fs[k&3];
228 } 223 }
229 224
230 protected:
231 __m128 fVec; 225 __m128 fVec;
232 }; 226 };
233 227
234 template <> 228 template <>
235 class SkNi<4, uint16_t> { 229 class SkNi<4, uint16_t> {
236 public: 230 public:
237 SkNi(const __m128i& vec) : fVec(vec) {} 231 SkNi(const __m128i& vec) : fVec(vec) {}
238 232
239 SkNi() {} 233 SkNi() {}
240 explicit SkNi(uint16_t val) : fVec(_mm_set1_epi16(val)) {} 234 explicit SkNi(uint16_t val) : fVec(_mm_set1_epi16(val)) {}
241 static SkNi Load(const uint16_t vals[4]) { return _mm_loadl_epi64((const __m 128i*)vals); } 235 static SkNi Load(const uint16_t vals[4]) { return _mm_loadl_epi64((const __m 128i*)vals); }
242 SkNi(uint16_t a, uint16_t b, uint16_t c, uint16_t d) : fVec(_mm_setr_epi16(a ,b,c,d,0,0,0,0)) {} 236 SkNi(uint16_t a, uint16_t b, uint16_t c, uint16_t d) : fVec(_mm_setr_epi16(a ,b,c,d,0,0,0,0)) {}
243 237
244 void store(uint16_t vals[4]) const { _mm_storel_epi64((__m128i*)vals, fVec); } 238 void store(uint16_t vals[4]) const { _mm_storel_epi64((__m128i*)vals, fVec); }
245 239
246 SkNi operator + (const SkNi& o) const { return _mm_add_epi16(fVec, o.fVec); } 240 SkNi operator + (const SkNi& o) const { return _mm_add_epi16(fVec, o.fVec); }
247 SkNi operator - (const SkNi& o) const { return _mm_sub_epi16(fVec, o.fVec); } 241 SkNi operator - (const SkNi& o) const { return _mm_sub_epi16(fVec, o.fVec); }
248 SkNi operator * (const SkNi& o) const { return _mm_mullo_epi16(fVec, o.fVec) ; } 242 SkNi operator * (const SkNi& o) const { return _mm_mullo_epi16(fVec, o.fVec) ; }
249 243
250 SkNi operator << (int bits) const { return _mm_slli_epi16(fVec, bits); } 244 SkNi operator << (int bits) const { return _mm_slli_epi16(fVec, bits); }
251 SkNi operator >> (int bits) const { return _mm_srli_epi16(fVec, bits); } 245 SkNi operator >> (int bits) const { return _mm_srli_epi16(fVec, bits); }
252 246
253 template <int k> uint16_t kth() const { 247 template <int k> uint16_t kth() const {
254 SkASSERT(0 <= k && k < 4); 248 SkASSERT(0 <= k && k < 4);
255 return _mm_extract_epi16(fVec, k); 249 return _mm_extract_epi16(fVec, k);
256 } 250 }
257 protected: 251
258 __m128i fVec; 252 __m128i fVec;
259 }; 253 };
260 254
261 template <> 255 template <>
262 class SkNi<8, uint16_t> { 256 class SkNi<8, uint16_t> {
263 public: 257 public:
264 SkNi(const __m128i& vec) : fVec(vec) {} 258 SkNi(const __m128i& vec) : fVec(vec) {}
265 259
266 SkNi() {} 260 SkNi() {}
267 explicit SkNi(uint16_t val) : fVec(_mm_set1_epi16(val)) {} 261 explicit SkNi(uint16_t val) : fVec(_mm_set1_epi16(val)) {}
268 static SkNi Load(const uint16_t vals[8]) { return _mm_loadu_si128((const __m 128i*)vals); } 262 static SkNi Load(const uint16_t vals[8]) { return _mm_loadu_si128((const __m 128i*)vals); }
269 SkNi(uint16_t a, uint16_t b, uint16_t c, uint16_t d, 263 SkNi(uint16_t a, uint16_t b, uint16_t c, uint16_t d,
270 uint16_t e, uint16_t f, uint16_t g, uint16_t h) : fVec(_mm_setr_epi16(a ,b,c,d,e,f,g,h)) {} 264 uint16_t e, uint16_t f, uint16_t g, uint16_t h) : fVec(_mm_setr_epi16(a ,b,c,d,e,f,g,h)) {}
271 265
272 void store(uint16_t vals[8]) const { _mm_storeu_si128((__m128i*)vals, fVec); } 266 void store(uint16_t vals[8]) const { _mm_storeu_si128((__m128i*)vals, fVec); }
273 267
274 SkNi operator + (const SkNi& o) const { return _mm_add_epi16(fVec, o.fVec); } 268 SkNi operator + (const SkNi& o) const { return _mm_add_epi16(fVec, o.fVec); }
275 SkNi operator - (const SkNi& o) const { return _mm_sub_epi16(fVec, o.fVec); } 269 SkNi operator - (const SkNi& o) const { return _mm_sub_epi16(fVec, o.fVec); }
276 SkNi operator * (const SkNi& o) const { return _mm_mullo_epi16(fVec, o.fVec) ; } 270 SkNi operator * (const SkNi& o) const { return _mm_mullo_epi16(fVec, o.fVec) ; }
277 271
278 SkNi operator << (int bits) const { return _mm_slli_epi16(fVec, bits); } 272 SkNi operator << (int bits) const { return _mm_slli_epi16(fVec, bits); }
279 SkNi operator >> (int bits) const { return _mm_srli_epi16(fVec, bits); } 273 SkNi operator >> (int bits) const { return _mm_srli_epi16(fVec, bits); }
280 274
281 template <int k> uint16_t kth() const { 275 template <int k> uint16_t kth() const {
282 SkASSERT(0 <= k && k < 8); 276 SkASSERT(0 <= k && k < 8);
283 return _mm_extract_epi16(fVec, k); 277 return _mm_extract_epi16(fVec, k);
284 } 278 }
285 protected: 279
286 __m128i fVec; 280 __m128i fVec;
287 }; 281 };
288 282
283 template <>
284 class SkNi<16, uint8_t> {
285 public:
286 SkNi(const __m128i& vec) : fVec(vec) {}
287
288 SkNi() {}
289 explicit SkNi(uint8_t val) : fVec(_mm_set1_epi8(val)) {}
290 static SkNi Load(const uint8_t vals[16]) { return _mm_loadu_si128((const __m 128i*)vals); }
291 SkNi(uint8_t a, uint8_t b, uint8_t c, uint8_t d,
292 uint8_t e, uint8_t f, uint8_t g, uint8_t h,
293 uint8_t i, uint8_t j, uint8_t k, uint8_t l,
294 uint8_t m, uint8_t n, uint8_t o, uint8_t p)
295 : fVec(_mm_setr_epi8(a,b,c,d, e,f,g,h, i,j,k,l, m,n,o,p)) {}
296
297 void store(uint8_t vals[16]) const { _mm_storeu_si128((__m128i*)vals, fVec); }
298
299 SkNi operator + (const SkNi& o) const { return _mm_add_epi8(fVec, o.fVec); }
300 SkNi operator - (const SkNi& o) const { return _mm_sub_epi8(fVec, o.fVec); }
301
302 // SSE cannot multiply or shift vectors of uint8_t.
303 SkNi operator * (const SkNi& o) const { SkASSERT(false); return fVec; }
304 SkNi operator << (int bits) const { SkASSERT(false); return fVec; }
305 SkNi operator >> (int bits) const { SkASSERT(false); return fVec; }
306
307 template <int k> uint8_t kth() const {
308 SkASSERT(0 <= k && k < 16);
309 // SSE4.1 would just `return _mm_extract_epi8(fVec, k)`. We have to rea d 16-bits instead.
310 int pair = _mm_extract_epi16(fVec, k/2);
311 return k % 2 == 0 ? pair : (pair >> 8);
312 }
313
314 __m128i fVec;
315 };
316
289 #endif//SkNx_sse_DEFINED 317 #endif//SkNx_sse_DEFINED
OLDNEW
« no previous file with comments | « src/opts/SkNx_neon.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698