Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(406)

Side by Side Diff: src/opts/SkNx_sse.h

Issue 2339233003: Support Float32 output from SkColorSpaceXform (Closed)
Patch Set: Some fixes Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkNx_sse_DEFINED 8 #ifndef SkNx_sse_DEFINED
9 #define SkNx_sse_DEFINED 9 #define SkNx_sse_DEFINED
10 10
(...skipping 460 matching lines...) Expand 10 before | Expand all | Expand 10 after
471 static inline void Sk4h_store4(void* dst, const Sk4h& r, const Sk4h& g, const Sk 4h& b, 471 static inline void Sk4h_store4(void* dst, const Sk4h& r, const Sk4h& g, const Sk 4h& b,
472 const Sk4h& a) { 472 const Sk4h& a) {
473 __m128i rg = _mm_unpacklo_epi16(r.fVec, g.fVec); 473 __m128i rg = _mm_unpacklo_epi16(r.fVec, g.fVec);
474 __m128i ba = _mm_unpacklo_epi16(b.fVec, a.fVec); 474 __m128i ba = _mm_unpacklo_epi16(b.fVec, a.fVec);
475 __m128i lo = _mm_unpacklo_epi32(rg, ba); 475 __m128i lo = _mm_unpacklo_epi32(rg, ba);
476 __m128i hi = _mm_unpackhi_epi32(rg, ba); 476 __m128i hi = _mm_unpackhi_epi32(rg, ba);
477 _mm_storeu_si128(((__m128i*) dst) + 0, lo); 477 _mm_storeu_si128(((__m128i*) dst) + 0, lo);
478 _mm_storeu_si128(((__m128i*) dst) + 1, hi); 478 _mm_storeu_si128(((__m128i*) dst) + 1, hi);
479 } 479 }
480 480
481 static inline void Sk4f_load4(const void* ptr, Sk4f* r, Sk4f* g, Sk4f* b, Sk4f* a) {
mtklein 2016/09/14 20:30:36 May want to call _MM_TRANSPOSE_PS, or crib off it:
msarett 2016/09/14 21:09:43 Cool! Done.
482 __m128 v0 = _mm_loadu_ps(((float*)ptr) + 0),
483 v1 = _mm_loadu_ps(((float*)ptr) + 4),
484 v2 = _mm_loadu_ps(((float*)ptr) + 8),
485 v3 = _mm_loadu_ps(((float*)ptr) + 12);
486 __m128 rg01 = _mm_unpacklo_ps(v0, v1), // r0 r1 g0 g1
487 ba01 = _mm_unpackhi_ps(v0, v1), // b0 b1 a0 a1
488 rg23 = _mm_unpacklo_ps(v2, v3), // r2 r3 g2 g3
489 ba23 = _mm_unpacklo_ps(v2, v3); // b2 b3 a2 a3
490 *r = _mm_shuffle_ps(rg01, rg23, 0x88); // 00 01 00 01
491 *g = _mm_shuffle_ps(rg01, rg23, 0xEE); // 10 11 10 11
492 *b = _mm_shuffle_ps(ba01, ba23, 0x88); // 00 01 00 01
493 *a = _mm_shuffle_ps(ba01, ba23, 0xEE); // 10 11 10 11
494 }
495
496 static inline void Sk4f_store4(void* dst, const Sk4f& r, const Sk4f& g, const Sk 4f& b,
497 const Sk4f& a) {
498 __m128 rg01 = _mm_unpacklo_ps(r.fVec, g.fVec);
499 __m128 rg23 = _mm_unpackhi_ps(r.fVec, g.fVec);
500 __m128 ba01 = _mm_unpacklo_ps(b.fVec, a.fVec);
501 __m128 ba23 = _mm_unpacklo_ps(b.fVec, a.fVec);
502 __m128 v0 = _mm_shuffle_ps(rg01, ba01, 0x88);
503 __m128 v1 = _mm_shuffle_ps(rg01, ba01, 0xEE);
504 __m128 v2 = _mm_shuffle_ps(rg23, ba23, 0x88);
505 __m128 v3 = _mm_shuffle_ps(rg23, ba23, 0xEE);
506 _mm_storeu_ps(((float*) dst) + 0, v0);
507 _mm_storeu_ps(((float*) dst) + 4, v1);
508 _mm_storeu_ps(((float*) dst) + 8, v2);
509 _mm_storeu_ps(((float*) dst) + 12, v3);
510 }
511
481 #endif//SkNx_sse_DEFINED 512 #endif//SkNx_sse_DEFINED
OLDNEW
« src/opts/SkNx_neon.h ('K') | « src/opts/SkNx_neon.h ('k') | tests/ColorSpaceXformTest.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698