Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef SkNx_sse_DEFINED | 8 #ifndef SkNx_sse_DEFINED |
| 9 #define SkNx_sse_DEFINED | 9 #define SkNx_sse_DEFINED |
| 10 | 10 |
| (...skipping 460 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 471 static inline void Sk4h_store4(void* dst, const Sk4h& r, const Sk4h& g, const Sk 4h& b, | 471 static inline void Sk4h_store4(void* dst, const Sk4h& r, const Sk4h& g, const Sk 4h& b, |
| 472 const Sk4h& a) { | 472 const Sk4h& a) { |
| 473 __m128i rg = _mm_unpacklo_epi16(r.fVec, g.fVec); | 473 __m128i rg = _mm_unpacklo_epi16(r.fVec, g.fVec); |
| 474 __m128i ba = _mm_unpacklo_epi16(b.fVec, a.fVec); | 474 __m128i ba = _mm_unpacklo_epi16(b.fVec, a.fVec); |
| 475 __m128i lo = _mm_unpacklo_epi32(rg, ba); | 475 __m128i lo = _mm_unpacklo_epi32(rg, ba); |
| 476 __m128i hi = _mm_unpackhi_epi32(rg, ba); | 476 __m128i hi = _mm_unpackhi_epi32(rg, ba); |
| 477 _mm_storeu_si128(((__m128i*) dst) + 0, lo); | 477 _mm_storeu_si128(((__m128i*) dst) + 0, lo); |
| 478 _mm_storeu_si128(((__m128i*) dst) + 1, hi); | 478 _mm_storeu_si128(((__m128i*) dst) + 1, hi); |
| 479 } | 479 } |
| 480 | 480 |
| 481 static inline void Sk4f_load4(const void* ptr, Sk4f* r, Sk4f* g, Sk4f* b, Sk4f* a) { | |
|
mtklein
2016/09/14 20:30:36
May want to call _MM_TRANSPOSE_PS, or crib off it:
msarett
2016/09/14 21:09:43
Cool! Done.
| |
| 482 __m128 v0 = _mm_loadu_ps(((float*)ptr) + 0), | |
| 483 v1 = _mm_loadu_ps(((float*)ptr) + 4), | |
| 484 v2 = _mm_loadu_ps(((float*)ptr) + 8), | |
| 485 v3 = _mm_loadu_ps(((float*)ptr) + 12); | |
| 486 __m128 rg01 = _mm_unpacklo_ps(v0, v1), // r0 r1 g0 g1 | |
| 487 ba01 = _mm_unpackhi_ps(v0, v1), // b0 b1 a0 a1 | |
| 488 rg23 = _mm_unpacklo_ps(v2, v3), // r2 r3 g2 g3 | |
| 489 ba23 = _mm_unpacklo_ps(v2, v3); // b2 b3 a2 a3 | |
| 490 *r = _mm_shuffle_ps(rg01, rg23, 0x88); // 00 01 00 01 | |
| 491 *g = _mm_shuffle_ps(rg01, rg23, 0xEE); // 10 11 10 11 | |
| 492 *b = _mm_shuffle_ps(ba01, ba23, 0x88); // 00 01 00 01 | |
| 493 *a = _mm_shuffle_ps(ba01, ba23, 0xEE); // 10 11 10 11 | |
| 494 } | |
| 495 | |
| 496 static inline void Sk4f_store4(void* dst, const Sk4f& r, const Sk4f& g, const Sk 4f& b, | |
| 497 const Sk4f& a) { | |
| 498 __m128 rg01 = _mm_unpacklo_ps(r.fVec, g.fVec); | |
| 499 __m128 rg23 = _mm_unpackhi_ps(r.fVec, g.fVec); | |
| 500 __m128 ba01 = _mm_unpacklo_ps(b.fVec, a.fVec); | |
| 501 __m128 ba23 = _mm_unpacklo_ps(b.fVec, a.fVec); | |
| 502 __m128 v0 = _mm_shuffle_ps(rg01, ba01, 0x88); | |
| 503 __m128 v1 = _mm_shuffle_ps(rg01, ba01, 0xEE); | |
| 504 __m128 v2 = _mm_shuffle_ps(rg23, ba23, 0x88); | |
| 505 __m128 v3 = _mm_shuffle_ps(rg23, ba23, 0xEE); | |
| 506 _mm_storeu_ps(((float*) dst) + 0, v0); | |
| 507 _mm_storeu_ps(((float*) dst) + 4, v1); | |
| 508 _mm_storeu_ps(((float*) dst) + 8, v2); | |
| 509 _mm_storeu_ps(((float*) dst) + 12, v3); | |
| 510 } | |
| 511 | |
| 481 #endif//SkNx_sse_DEFINED | 512 #endif//SkNx_sse_DEFINED |
| OLD | NEW |