Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(28)

Side by Side Diff: src/opts/SkNx_sse.h

Issue 2151023003: Revert of Expand _01 half<->float limitation to _finite. Simplify. (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/opts/SkNx_neon.h ('k') | tests/Float16Test.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkNx_sse_DEFINED 8 #ifndef SkNx_sse_DEFINED
9 #define SkNx_sse_DEFINED 9 #define SkNx_sse_DEFINED
10 10
(...skipping 134 matching lines...) Expand 10 before | Expand all | Expand 10 after
145 SkNx operator - (const SkNx& o) const { return _mm_sub_epi32(fVec, o.fVec); } 145 SkNx operator - (const SkNx& o) const { return _mm_sub_epi32(fVec, o.fVec); }
146 SkNx operator * (const SkNx& o) const { 146 SkNx operator * (const SkNx& o) const {
147 __m128i mul20 = _mm_mul_epu32(fVec, o.fVec), 147 __m128i mul20 = _mm_mul_epu32(fVec, o.fVec),
148 mul31 = _mm_mul_epu32(_mm_srli_si128(fVec, 4), _mm_srli_si128(o. fVec, 4)); 148 mul31 = _mm_mul_epu32(_mm_srli_si128(fVec, 4), _mm_srli_si128(o. fVec, 4));
149 return _mm_unpacklo_epi32(_mm_shuffle_epi32(mul20, _MM_SHUFFLE(0,0,2,0)) , 149 return _mm_unpacklo_epi32(_mm_shuffle_epi32(mul20, _MM_SHUFFLE(0,0,2,0)) ,
150 _mm_shuffle_epi32(mul31, _MM_SHUFFLE(0,0,2,0)) ); 150 _mm_shuffle_epi32(mul31, _MM_SHUFFLE(0,0,2,0)) );
151 } 151 }
152 152
153 SkNx operator & (const SkNx& o) const { return _mm_and_si128(fVec, o.fVec); } 153 SkNx operator & (const SkNx& o) const { return _mm_and_si128(fVec, o.fVec); }
154 SkNx operator | (const SkNx& o) const { return _mm_or_si128(fVec, o.fVec); } 154 SkNx operator | (const SkNx& o) const { return _mm_or_si128(fVec, o.fVec); }
155 SkNx operator ^ (const SkNx& o) const { return _mm_xor_si128(fVec, o.fVec); }
156 155
157 SkNx operator << (int bits) const { return _mm_slli_epi32(fVec, bits); } 156 SkNx operator << (int bits) const { return _mm_slli_epi32(fVec, bits); }
158 SkNx operator >> (int bits) const { return _mm_srai_epi32(fVec, bits); } 157 SkNx operator >> (int bits) const { return _mm_srai_epi32(fVec, bits); }
159 158
160 SkNx operator == (const SkNx& o) const { return _mm_cmpeq_epi32 (fVec, o.fVe c); }
161 SkNx operator < (const SkNx& o) const { return _mm_cmplt_epi32 (fVec, o.fVe c); }
162 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_epi32 (fVec, o.fVe c); }
163
164 int operator[](int k) const { 159 int operator[](int k) const {
165 SkASSERT(0 <= k && k < 4); 160 SkASSERT(0 <= k && k < 4);
166 union { __m128i v; int is[4]; } pun = {fVec}; 161 union { __m128i v; int is[4]; } pun = {fVec};
167 return pun.is[k&3]; 162 return pun.is[k&3];
168 } 163 }
169 164
170 SkNx thenElse(const SkNx& t, const SkNx& e) const {
171 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
172 return _mm_blendv_epi8(e.fVec, t.fVec, fVec);
173 #else
174 return _mm_or_si128(_mm_and_si128 (fVec, t.fVec),
175 _mm_andnot_si128(fVec, e.fVec));
176 #endif
177 }
178
179 __m128i fVec; 165 __m128i fVec;
180 }; 166 };
181 167
182 template <> 168 template <>
183 class SkNx<4, uint16_t> { 169 class SkNx<4, uint16_t> {
184 public: 170 public:
185 SkNx(const __m128i& vec) : fVec(vec) {} 171 SkNx(const __m128i& vec) : fVec(vec) {}
186 172
187 SkNx() {} 173 SkNx() {}
188 SkNx(uint16_t val) : fVec(_mm_set1_epi16(val)) {} 174 SkNx(uint16_t val) : fVec(_mm_set1_epi16(val)) {}
(...skipping 190 matching lines...) Expand 10 before | Expand all | Expand 10 after
379 } 365 }
380 366
381 template<> /*static*/ inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src) { 367 template<> /*static*/ inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src) {
382 return _mm_unpacklo_epi8(src.fVec, _mm_setzero_si128()); 368 return _mm_unpacklo_epi8(src.fVec, _mm_setzero_si128());
383 } 369 }
384 370
385 template<> /*static*/ inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src) { 371 template<> /*static*/ inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src) {
386 return _mm_packus_epi16(src.fVec, src.fVec); 372 return _mm_packus_epi16(src.fVec, src.fVec);
387 } 373 }
388 374
389 template<> /*static*/ inline Sk4i SkNx_cast<int, uint16_t>(const Sk4h& src) { 375 template<> inline Sk4b SkNx_cast<uint8_t, int>(const Sk4i& src) {
390 return _mm_unpacklo_epi16(src.fVec, _mm_setzero_si128());
391 }
392
393 template<> /*static*/ inline Sk4h SkNx_cast<uint16_t, int>(const Sk4i& src) {
394 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
395 return _mm_packus_epi32(src.fVec, src.fVec);
396 #else
397 // Sign extend to trick _mm_packs_epi32() into doing the pack we want.
398 __m128i x = _mm_srai_epi32(_mm_slli_epi32(src.fVec, 16), 16);
399 return _mm_packs_epi32(x,x);
400 #endif
401 }
402
403 template<> /*static*/ inline Sk4b SkNx_cast<uint8_t, int>(const Sk4i& src) {
404 return _mm_packus_epi16(_mm_packus_epi16(src.fVec, src.fVec), src.fVec); 376 return _mm_packus_epi16(_mm_packus_epi16(src.fVec, src.fVec), src.fVec);
405 } 377 }
406 378
407 static inline Sk4i Sk4f_round(const Sk4f& x) { 379 static inline Sk4i Sk4f_round(const Sk4f& x) {
408 return _mm_cvtps_epi32(x.fVec); 380 return _mm_cvtps_epi32(x.fVec);
409 } 381 }
410 382
411 #endif//SkNx_sse_DEFINED 383 #endif//SkNx_sse_DEFINED
OLDNEW
« no previous file with comments | « src/opts/SkNx_neon.h ('k') | tests/Float16Test.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698