Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(334)

Side by Side Diff: src/opts/SkNx_sse.h

Issue 1526523003: Unify some SkNx code (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: typo Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/opts/SkNx_neon.h ('k') | src/opts/SkXfermode_opts.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkNx_sse_DEFINED 8 #ifndef SkNx_sse_DEFINED
9 #define SkNx_sse_DEFINED 9 #define SkNx_sse_DEFINED
10 10
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after
104 104
105 template <> 105 template <>
106 class SkNx<4, float> { 106 class SkNx<4, float> {
107 public: 107 public:
108 SkNx(const __m128& vec) : fVec(vec) {} 108 SkNx(const __m128& vec) : fVec(vec) {}
109 109
110 SkNx() {} 110 SkNx() {}
111 SkNx(float val) : fVec( _mm_set1_ps(val) ) {} 111 SkNx(float val) : fVec( _mm_set1_ps(val) ) {}
112 static SkNx Load(const float vals[4]) { return _mm_loadu_ps(vals); } 112 static SkNx Load(const float vals[4]) { return _mm_loadu_ps(vals); }
113 113
114 static SkNx FromBytes(const uint8_t bytes[4]) {
115 __m128i fix8 = _mm_cvtsi32_si128(*(const int*)bytes);
116 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
117 const char _ = ~0; // Zero these bytes.
118 __m128i fix8_32 = _mm_shuffle_epi8(fix8, _mm_setr_epi8(0,_,_,_, 1,_,_,_, 2,_,_,_, 3,_,_,_));
119 #else
120 __m128i fix8_16 = _mm_unpacklo_epi8 (fix8, _mm_setzero_si128()),
121 fix8_32 = _mm_unpacklo_epi16(fix8_16, _mm_setzero_si128());
122 #endif
123 return SkNx(_mm_cvtepi32_ps(fix8_32));
124 // TODO: use _mm_cvtepu8_epi32 w/SSE4.1?
125 }
126
127 SkNx(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {} 114 SkNx(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {}
128 115
129 void store(float vals[4]) const { _mm_storeu_ps(vals, fVec); } 116 void store(float vals[4]) const { _mm_storeu_ps(vals, fVec); }
130 void toBytes(uint8_t bytes[4]) const {
131 __m128i fix8_32 = _mm_cvttps_epi32(fVec),
132 fix8_16 = _mm_packus_epi16(fix8_32, fix8_32),
133 fix8 = _mm_packus_epi16(fix8_16, fix8_16);
134 *(int*)bytes = _mm_cvtsi128_si32(fix8);
135 }
136
137 static void ToBytes(uint8_t bytes[16],
138 const SkNx& a, const SkNx& b, const SkNx& c, const SkNx& d) {
139 _mm_storeu_si128((__m128i*)bytes,
140 _mm_packus_epi16(_mm_packus_epi16(_mm_cvttps_epi32(a.fV ec),
141 _mm_cvttps_epi32(b.fV ec)),
142 _mm_packus_epi16(_mm_cvttps_epi32(c.fV ec),
143 _mm_cvttps_epi32(d.fV ec))));
144 }
145 117
146 SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); } 118 SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); }
147 SkNx operator - (const SkNx& o) const { return _mm_sub_ps(fVec, o.fVec); } 119 SkNx operator - (const SkNx& o) const { return _mm_sub_ps(fVec, o.fVec); }
148 SkNx operator * (const SkNx& o) const { return _mm_mul_ps(fVec, o.fVec); } 120 SkNx operator * (const SkNx& o) const { return _mm_mul_ps(fVec, o.fVec); }
149 SkNx operator / (const SkNx& o) const { return _mm_div_ps(fVec, o.fVec); } 121 SkNx operator / (const SkNx& o) const { return _mm_div_ps(fVec, o.fVec); }
150 122
151 SkNx operator == (const SkNx& o) const { return _mm_cmpeq_ps (fVec, o.fVec); } 123 SkNx operator == (const SkNx& o) const { return _mm_cmpeq_ps (fVec, o.fVec); }
152 SkNx operator != (const SkNx& o) const { return _mm_cmpneq_ps(fVec, o.fVec); } 124 SkNx operator != (const SkNx& o) const { return _mm_cmpneq_ps(fVec, o.fVec); }
153 SkNx operator < (const SkNx& o) const { return _mm_cmplt_ps (fVec, o.fVec); } 125 SkNx operator < (const SkNx& o) const { return _mm_cmplt_ps (fVec, o.fVec); }
154 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_ps (fVec, o.fVec); } 126 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_ps (fVec, o.fVec); }
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after
246 218
247 template <int k> uint16_t kth() const { 219 template <int k> uint16_t kth() const {
248 SkASSERT(0 <= k && k < 8); 220 SkASSERT(0 <= k && k < 8);
249 return _mm_extract_epi16(fVec, k); 221 return _mm_extract_epi16(fVec, k);
250 } 222 }
251 223
252 __m128i fVec; 224 __m128i fVec;
253 }; 225 };
254 226
255 template <> 227 template <>
228 class SkNx<4, uint8_t> {
229 public:
230 SkNx(const __m128i& vec) : fVec(vec) {}
231
232 SkNx() {}
233 static SkNx Load(const uint8_t vals[4]) { return _mm_cvtsi32_si128(*(const i nt*)vals); }
234 void store(uint8_t vals[4]) const { *(int*)vals = _mm_cvtsi128_si32(fVec); }
235
236 // TODO as needed
237
238 __m128i fVec;
239 };
240
241 template <>
242 class SkNx<8, uint8_t> {
243 public:
244 SkNx(const __m128i& vec) : fVec(vec) {}
245
246 SkNx() {}
247 static SkNx Load(const uint8_t vals[8]) { return _mm_loadl_epi64((const __m1 28i*)vals); }
248 void store(uint8_t vals[8]) const { _mm_storel_epi64((__m128i*)vals, fVec); }
249
250 // TODO as needed
251
252 __m128i fVec;
253 };
254
255 template <>
256 class SkNx<16, uint8_t> { 256 class SkNx<16, uint8_t> {
257 public: 257 public:
258 SkNx(const __m128i& vec) : fVec(vec) {} 258 SkNx(const __m128i& vec) : fVec(vec) {}
259 259
260 SkNx() {} 260 SkNx() {}
261 SkNx(uint8_t val) : fVec(_mm_set1_epi8(val)) {} 261 SkNx(uint8_t val) : fVec(_mm_set1_epi8(val)) {}
262 static SkNx Load(const uint8_t vals[16]) { return _mm_loadu_si128((const __m 128i*)vals); } 262 static SkNx Load(const uint8_t vals[16]) { return _mm_loadu_si128((const __m 128i*)vals); }
263 SkNx(uint8_t a, uint8_t b, uint8_t c, uint8_t d, 263 SkNx(uint8_t a, uint8_t b, uint8_t c, uint8_t d,
264 uint8_t e, uint8_t f, uint8_t g, uint8_t h, 264 uint8_t e, uint8_t f, uint8_t g, uint8_t h,
265 uint8_t i, uint8_t j, uint8_t k, uint8_t l, 265 uint8_t i, uint8_t j, uint8_t k, uint8_t l,
(...skipping 23 matching lines...) Expand all
289 289
290 SkNx thenElse(const SkNx& t, const SkNx& e) const { 290 SkNx thenElse(const SkNx& t, const SkNx& e) const {
291 return _mm_or_si128(_mm_and_si128 (fVec, t.fVec), 291 return _mm_or_si128(_mm_and_si128 (fVec, t.fVec),
292 _mm_andnot_si128(fVec, e.fVec)); 292 _mm_andnot_si128(fVec, e.fVec));
293 } 293 }
294 294
295 __m128i fVec; 295 __m128i fVec;
296 }; 296 };
297 297
298 298
299 template<> 299 template<> inline Sk4i SkNx_cast<int, float, 4>(const Sk4f& src) {
300 inline SkNx<4, int> SkNx_cast<int, float, 4>(const SkNx<4, float>& src) {
301 return _mm_cvttps_epi32(src.fVec); 300 return _mm_cvttps_epi32(src.fVec);
302 } 301 }
303 302
303 template<> inline Sk4b SkNx_cast<uint8_t, float, 4>(const Sk4f& src) {
304 auto _32 = _mm_cvttps_epi32(src.fVec);
305 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
306 const int _ = ~0;
307 return _mm_shuffle_epi8(_32, _mm_setr_epi8(0,4,8,12, _,_,_,_, _,_,_,_, _,_,_ ,_));
308 #else
309 auto _16 = _mm_packus_epi16(_32, _32);
310 return _mm_packus_epi16(_16, _16);
311 #endif
312 }
313
314 template<> inline Sk4f SkNx_cast<float, uint8_t, 4>(const Sk4b& src) {
315 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
316 const int _ = ~0;
317 auto _32 = _mm_shuffle_epi8(src.fVec, _mm_setr_epi8(0,_,_,_, 1,_,_,_, 2,_,_, _, 3,_,_,_));
318 #else
319 auto _16 = _mm_unpacklo_epi8(src.fVec, _mm_setzero_si128()),
320 _32 = _mm_unpacklo_epi16(_16, _mm_setzero_si128());
321 #endif
322 return _mm_cvtepi32_ps(_32);
323 }
324
325 static inline void Sk4f_ToBytes(uint8_t bytes[16],
326 const Sk4f& a, const Sk4f& b, const Sk4f& c, con st Sk4f& d) {
327 _mm_storeu_si128((__m128i*)bytes,
328 _mm_packus_epi16(_mm_packus_epi16(_mm_cvttps_epi32(a.fVec),
329 _mm_cvttps_epi32(b.fVec)) ,
330 _mm_packus_epi16(_mm_cvttps_epi32(c.fVec),
331 _mm_cvttps_epi32(d.fVec)) ));
332 }
333
334
304 } // namespace 335 } // namespace
305 336
306 #endif//SkNx_sse_DEFINED 337 #endif//SkNx_sse_DEFINED
OLDNEW
« no previous file with comments | « src/opts/SkNx_neon.h ('k') | src/opts/SkXfermode_opts.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698