| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef SkNx_sse_DEFINED | 8 #ifndef SkNx_sse_DEFINED |
| 9 #define SkNx_sse_DEFINED | 9 #define SkNx_sse_DEFINED |
| 10 | 10 |
| (...skipping 134 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 145 SkNx operator - (const SkNx& o) const { return _mm_sub_epi32(fVec, o.fVec);
} | 145 SkNx operator - (const SkNx& o) const { return _mm_sub_epi32(fVec, o.fVec);
} |
| 146 SkNx operator * (const SkNx& o) const { | 146 SkNx operator * (const SkNx& o) const { |
| 147 __m128i mul20 = _mm_mul_epu32(fVec, o.fVec), | 147 __m128i mul20 = _mm_mul_epu32(fVec, o.fVec), |
| 148 mul31 = _mm_mul_epu32(_mm_srli_si128(fVec, 4), _mm_srli_si128(o.
fVec, 4)); | 148 mul31 = _mm_mul_epu32(_mm_srli_si128(fVec, 4), _mm_srli_si128(o.
fVec, 4)); |
| 149 return _mm_unpacklo_epi32(_mm_shuffle_epi32(mul20, _MM_SHUFFLE(0,0,2,0))
, | 149 return _mm_unpacklo_epi32(_mm_shuffle_epi32(mul20, _MM_SHUFFLE(0,0,2,0))
, |
| 150 _mm_shuffle_epi32(mul31, _MM_SHUFFLE(0,0,2,0))
); | 150 _mm_shuffle_epi32(mul31, _MM_SHUFFLE(0,0,2,0))
); |
| 151 } | 151 } |
| 152 | 152 |
| 153 SkNx operator & (const SkNx& o) const { return _mm_and_si128(fVec, o.fVec);
} | 153 SkNx operator & (const SkNx& o) const { return _mm_and_si128(fVec, o.fVec);
} |
| 154 SkNx operator | (const SkNx& o) const { return _mm_or_si128(fVec, o.fVec); } | 154 SkNx operator | (const SkNx& o) const { return _mm_or_si128(fVec, o.fVec); } |
| 155 SkNx operator ^ (const SkNx& o) const { return _mm_xor_si128(fVec, o.fVec);
} |
| 155 | 156 |
| 156 SkNx operator << (int bits) const { return _mm_slli_epi32(fVec, bits); } | 157 SkNx operator << (int bits) const { return _mm_slli_epi32(fVec, bits); } |
| 157 SkNx operator >> (int bits) const { return _mm_srai_epi32(fVec, bits); } | 158 SkNx operator >> (int bits) const { return _mm_srai_epi32(fVec, bits); } |
| 158 | 159 |
| 160 SkNx operator == (const SkNx& o) const { return _mm_cmpeq_epi32 (fVec, o.fVe
c); } |
| 161 SkNx operator < (const SkNx& o) const { return _mm_cmplt_epi32 (fVec, o.fVe
c); } |
| 162 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_epi32 (fVec, o.fVe
c); } |
| 163 |
| 159 int operator[](int k) const { | 164 int operator[](int k) const { |
| 160 SkASSERT(0 <= k && k < 4); | 165 SkASSERT(0 <= k && k < 4); |
| 161 union { __m128i v; int is[4]; } pun = {fVec}; | 166 union { __m128i v; int is[4]; } pun = {fVec}; |
| 162 return pun.is[k&3]; | 167 return pun.is[k&3]; |
| 163 } | 168 } |
| 164 | 169 |
| 170 SkNx thenElse(const SkNx& t, const SkNx& e) const { |
| 171 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41 |
| 172 return _mm_blendv_epi8(e.fVec, t.fVec, fVec); |
| 173 #else |
| 174 return _mm_or_si128(_mm_and_si128 (fVec, t.fVec), |
| 175 _mm_andnot_si128(fVec, e.fVec)); |
| 176 #endif |
| 177 } |
| 178 |
| 165 __m128i fVec; | 179 __m128i fVec; |
| 166 }; | 180 }; |
| 167 | 181 |
| 168 template <> | 182 template <> |
| 169 class SkNx<4, uint16_t> { | 183 class SkNx<4, uint16_t> { |
| 170 public: | 184 public: |
| 171 SkNx(const __m128i& vec) : fVec(vec) {} | 185 SkNx(const __m128i& vec) : fVec(vec) {} |
| 172 | 186 |
| 173 SkNx() {} | 187 SkNx() {} |
| 174 SkNx(uint16_t val) : fVec(_mm_set1_epi16(val)) {} | 188 SkNx(uint16_t val) : fVec(_mm_set1_epi16(val)) {} |
| (...skipping 190 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 365 } | 379 } |
| 366 | 380 |
| 367 template<> /*static*/ inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src)
{ | 381 template<> /*static*/ inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src)
{ |
| 368 return _mm_unpacklo_epi8(src.fVec, _mm_setzero_si128()); | 382 return _mm_unpacklo_epi8(src.fVec, _mm_setzero_si128()); |
| 369 } | 383 } |
| 370 | 384 |
| 371 template<> /*static*/ inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src)
{ | 385 template<> /*static*/ inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src)
{ |
| 372 return _mm_packus_epi16(src.fVec, src.fVec); | 386 return _mm_packus_epi16(src.fVec, src.fVec); |
| 373 } | 387 } |
| 374 | 388 |
| 375 template<> inline Sk4b SkNx_cast<uint8_t, int>(const Sk4i& src) { | 389 template<> /*static*/ inline Sk4i SkNx_cast<int, uint16_t>(const Sk4h& src) { |
| 390 return _mm_unpacklo_epi16(src.fVec, _mm_setzero_si128()); |
| 391 } |
| 392 |
| 393 template<> /*static*/ inline Sk4h SkNx_cast<uint16_t, int>(const Sk4i& src) { |
| 394 // TODO: merge with other work exploring best int -> uint16_t conversion. |
| 395 |
| 396 // Sign extend to trick _mm_packs_epi32() into doing the pack we want. |
| 397 __m128i x = _mm_srai_epi32(_mm_slli_epi32(src.fVec, 16), 16); |
| 398 return _mm_packs_epi32(x,x); |
| 399 } |
| 400 |
| 401 template<> /*static*/ inline Sk4b SkNx_cast<uint8_t, int>(const Sk4i& src) { |
| 376 return _mm_packus_epi16(_mm_packus_epi16(src.fVec, src.fVec), src.fVec); | 402 return _mm_packus_epi16(_mm_packus_epi16(src.fVec, src.fVec), src.fVec); |
| 377 } | 403 } |
| 378 | 404 |
| 379 static inline Sk4i Sk4f_round(const Sk4f& x) { | 405 static inline Sk4i Sk4f_round(const Sk4f& x) { |
| 380 return _mm_cvtps_epi32(x.fVec); | 406 return _mm_cvtps_epi32(x.fVec); |
| 381 } | 407 } |
| 382 | 408 |
| 383 #endif//SkNx_sse_DEFINED | 409 #endif//SkNx_sse_DEFINED |
| OLD | NEW |