OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkNx_sse_DEFINED | 8 #ifndef SkNx_sse_DEFINED |
9 #define SkNx_sse_DEFINED | 9 #define SkNx_sse_DEFINED |
10 | 10 |
(...skipping 429 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
440 } | 440 } |
441 | 441 |
442 template<> /*static*/ inline Sk4i SkNx_cast<int32_t, uint16_t>(const Sk4h& src)
{ | 442 template<> /*static*/ inline Sk4i SkNx_cast<int32_t, uint16_t>(const Sk4h& src)
{ |
443 return _mm_unpacklo_epi16(src.fVec, _mm_setzero_si128()); | 443 return _mm_unpacklo_epi16(src.fVec, _mm_setzero_si128()); |
444 } | 444 } |
445 | 445 |
446 template<> /*static*/ inline Sk4b SkNx_cast<uint8_t, int32_t>(const Sk4i& src) { | 446 template<> /*static*/ inline Sk4b SkNx_cast<uint8_t, int32_t>(const Sk4i& src) { |
447 return _mm_packus_epi16(_mm_packus_epi16(src.fVec, src.fVec), src.fVec); | 447 return _mm_packus_epi16(_mm_packus_epi16(src.fVec, src.fVec), src.fVec); |
448 } | 448 } |
449 | 449 |
| 450 template<> /*static*/ inline Sk4i SkNx_cast<int32_t, uint32_t>(const Sk4u& src)
{ |
| 451 return src.fVec; |
| 452 } |
| 453 |
450 static inline Sk4i Sk4f_round(const Sk4f& x) { | 454 static inline Sk4i Sk4f_round(const Sk4f& x) { |
451 return _mm_cvtps_epi32(x.fVec); | 455 return _mm_cvtps_epi32(x.fVec); |
452 } | 456 } |
453 | 457 |
454 static inline void Sk4h_load4(const void* ptr, Sk4h* r, Sk4h* g, Sk4h* b, Sk4h*
a) { | 458 static inline void Sk4h_load4(const void* ptr, Sk4h* r, Sk4h* g, Sk4h* b, Sk4h*
a) { |
455 __m128i lo = _mm_loadu_si128(((__m128i*)ptr) + 0), | 459 __m128i lo = _mm_loadu_si128(((__m128i*)ptr) + 0), |
456 hi = _mm_loadu_si128(((__m128i*)ptr) + 1); | 460 hi = _mm_loadu_si128(((__m128i*)ptr) + 1); |
457 __m128i even = _mm_unpacklo_epi16(lo, hi), // r0 r2 g0 g2 b0 b2 a0 a2 | 461 __m128i even = _mm_unpacklo_epi16(lo, hi), // r0 r2 g0 g2 b0 b2 a0 a2 |
458 odd = _mm_unpackhi_epi16(lo, hi); // r1 r3 ... | 462 odd = _mm_unpackhi_epi16(lo, hi); // r1 r3 ... |
459 __m128i rg = _mm_unpacklo_epi16(even, odd), // r0 r1 r2 r3 g0 g1 g2 g3 | 463 __m128i rg = _mm_unpacklo_epi16(even, odd), // r0 r1 r2 r3 g0 g1 g2 g3 |
460 ba = _mm_unpackhi_epi16(even, odd); // b0 b1 ... a0 a1 ... | 464 ba = _mm_unpackhi_epi16(even, odd); // b0 b1 ... a0 a1 ... |
461 *r = rg; | 465 *r = rg; |
462 *g = _mm_srli_si128(rg, 8); | 466 *g = _mm_srli_si128(rg, 8); |
463 *b = ba; | 467 *b = ba; |
464 *a = _mm_srli_si128(ba, 8); | 468 *a = _mm_srli_si128(ba, 8); |
465 } | 469 } |
466 | 470 |
467 static inline void Sk4h_store4(void* dst, const Sk4h& r, const Sk4h& g, const Sk
4h& b, | 471 static inline void Sk4h_store4(void* dst, const Sk4h& r, const Sk4h& g, const Sk
4h& b, |
468 const Sk4h& a) { | 472 const Sk4h& a) { |
469 __m128i rg = _mm_unpacklo_epi16(r.fVec, g.fVec); | 473 __m128i rg = _mm_unpacklo_epi16(r.fVec, g.fVec); |
470 __m128i ba = _mm_unpacklo_epi16(b.fVec, a.fVec); | 474 __m128i ba = _mm_unpacklo_epi16(b.fVec, a.fVec); |
471 __m128i lo = _mm_unpacklo_epi32(rg, ba); | 475 __m128i lo = _mm_unpacklo_epi32(rg, ba); |
472 __m128i hi = _mm_unpackhi_epi32(rg, ba); | 476 __m128i hi = _mm_unpackhi_epi32(rg, ba); |
473 _mm_storeu_si128(((__m128i*) dst) + 0, lo); | 477 _mm_storeu_si128(((__m128i*) dst) + 0, lo); |
474 _mm_storeu_si128(((__m128i*) dst) + 1, hi); | 478 _mm_storeu_si128(((__m128i*) dst) + 1, hi); |
475 } | 479 } |
476 | 480 |
477 #endif//SkNx_sse_DEFINED | 481 #endif//SkNx_sse_DEFINED |
OLD | NEW |