OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkNx_sse_DEFINED | 8 #ifndef SkNx_sse_DEFINED |
9 #define SkNx_sse_DEFINED | 9 #define SkNx_sse_DEFINED |
10 | 10 |
(...skipping 460 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
471 static inline void Sk4h_store4(void* dst, const Sk4h& r, const Sk4h& g, const Sk
4h& b, | 471 static inline void Sk4h_store4(void* dst, const Sk4h& r, const Sk4h& g, const Sk
4h& b, |
472 const Sk4h& a) { | 472 const Sk4h& a) { |
473 __m128i rg = _mm_unpacklo_epi16(r.fVec, g.fVec); | 473 __m128i rg = _mm_unpacklo_epi16(r.fVec, g.fVec); |
474 __m128i ba = _mm_unpacklo_epi16(b.fVec, a.fVec); | 474 __m128i ba = _mm_unpacklo_epi16(b.fVec, a.fVec); |
475 __m128i lo = _mm_unpacklo_epi32(rg, ba); | 475 __m128i lo = _mm_unpacklo_epi32(rg, ba); |
476 __m128i hi = _mm_unpackhi_epi32(rg, ba); | 476 __m128i hi = _mm_unpackhi_epi32(rg, ba); |
477 _mm_storeu_si128(((__m128i*) dst) + 0, lo); | 477 _mm_storeu_si128(((__m128i*) dst) + 0, lo); |
478 _mm_storeu_si128(((__m128i*) dst) + 1, hi); | 478 _mm_storeu_si128(((__m128i*) dst) + 1, hi); |
479 } | 479 } |
480 | 480 |
| 481 static inline void Sk4f_load4(const void* ptr, Sk4f* r, Sk4f* g, Sk4f* b, Sk4f*
a) { |
| 482 __m128 v0 = _mm_loadu_ps(((float*)ptr) + 0), |
| 483 v1 = _mm_loadu_ps(((float*)ptr) + 4), |
| 484 v2 = _mm_loadu_ps(((float*)ptr) + 8), |
| 485 v3 = _mm_loadu_ps(((float*)ptr) + 12); |
| 486 _MM_TRANSPOSE4_PS(v0, v1, v2, v3); |
| 487 *r = v0; |
| 488 *g = v1; |
| 489 *b = v2; |
| 490 *a = v3; |
| 491 } |
| 492 |
| 493 static inline void Sk4f_store4(void* dst, const Sk4f& r, const Sk4f& g, const Sk
4f& b, |
| 494 const Sk4f& a) { |
| 495 __m128 v0 = r.fVec, |
| 496 v1 = g.fVec, |
| 497 v2 = b.fVec, |
| 498 v3 = a.fVec; |
| 499 _MM_TRANSPOSE4_PS(v0, v1, v2, v3); |
| 500 _mm_storeu_ps(((float*) dst) + 0, v0); |
| 501 _mm_storeu_ps(((float*) dst) + 4, v1); |
| 502 _mm_storeu_ps(((float*) dst) + 8, v2); |
| 503 _mm_storeu_ps(((float*) dst) + 12, v3); |
| 504 } |
| 505 |
481 #endif//SkNx_sse_DEFINED | 506 #endif//SkNx_sse_DEFINED |
OLD | NEW |