OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkNx_sse_DEFINED | 8 #ifndef SkNx_sse_DEFINED |
9 #define SkNx_sse_DEFINED | 9 #define SkNx_sse_DEFINED |
10 | 10 |
(...skipping 385 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
396 } | 396 } |
397 | 397 |
398 template<> /*static*/ inline Sk4b SkNx_cast<uint8_t, int>(const Sk4i& src) { | 398 template<> /*static*/ inline Sk4b SkNx_cast<uint8_t, int>(const Sk4i& src) { |
399 return _mm_packus_epi16(_mm_packus_epi16(src.fVec, src.fVec), src.fVec); | 399 return _mm_packus_epi16(_mm_packus_epi16(src.fVec, src.fVec), src.fVec); |
400 } | 400 } |
401 | 401 |
402 static inline Sk4i Sk4f_round(const Sk4f& x) { | 402 static inline Sk4i Sk4f_round(const Sk4f& x) { |
403 return _mm_cvtps_epi32(x.fVec); | 403 return _mm_cvtps_epi32(x.fVec); |
404 } | 404 } |
405 | 405 |
406 static inline void Sk4h_load4(const void* ptr, Sk4h* r, Sk4h* g, Sk4h* b, Sk4h* a) { | |
msarett
2016/07/26 14:25:07
The fact that we are doing a transpose on loads an
| |
407 __m128i lo = _mm_loadu_si128(((__m128i*)ptr) + 0), | |
408 hi = _mm_loadu_si128(((__m128i*)ptr) + 1); | |
409 __m128i even = _mm_unpacklo_epi16(lo, hi), // r0 r2 g0 g2 b0 b2 a0 a2 | |
410 odd = _mm_unpackhi_epi16(lo, hi); // r1 r3 ... | |
411 __m128i rg = _mm_unpacklo_epi16(even, odd), // r0 r1 r2 r3 g0 g1 g2 g3 | |
412 ba = _mm_unpackhi_epi16(even, odd); // b0 b1 ... a0 a1 ... | |
413 *r = rg; | |
414 *g = _mm_srli_si128(rg, 8); | |
415 *b = ba; | |
416 *a = _mm_srli_si128(ba, 8); | |
417 } | |
418 | |
406 static inline void Sk4h_store4(void* dst, const Sk4h& r, const Sk4h& g, const Sk 4h& b, | 419 static inline void Sk4h_store4(void* dst, const Sk4h& r, const Sk4h& g, const Sk 4h& b, |
407 const Sk4h& a) { | 420 const Sk4h& a) { |
408 __m128i rg = _mm_unpacklo_epi16(r.fVec, g.fVec); | 421 __m128i rg = _mm_unpacklo_epi16(r.fVec, g.fVec); |
409 __m128i ba = _mm_unpacklo_epi16(b.fVec, a.fVec); | 422 __m128i ba = _mm_unpacklo_epi16(b.fVec, a.fVec); |
410 __m128i lo = _mm_unpacklo_epi32(rg, ba); | 423 __m128i lo = _mm_unpacklo_epi32(rg, ba); |
411 __m128i hi = _mm_unpackhi_epi32(rg, ba); | 424 __m128i hi = _mm_unpackhi_epi32(rg, ba); |
412 _mm_storeu_si128(((__m128i*) dst) + 0, lo); | 425 _mm_storeu_si128(((__m128i*) dst) + 0, lo); |
413 _mm_storeu_si128(((__m128i*) dst) + 1, hi); | 426 _mm_storeu_si128(((__m128i*) dst) + 1, hi); |
414 } | 427 } |
415 | 428 |
416 #endif//SkNx_sse_DEFINED | 429 #endif//SkNx_sse_DEFINED |
OLD | NEW |