Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(506)

Side by Side Diff: src/opts/SkNx_sse.h

Issue 2184753002: Add Sk4h_load4 for loading F16. (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: typo Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/opts/SkNx_neon.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkNx_sse_DEFINED 8 #ifndef SkNx_sse_DEFINED
9 #define SkNx_sse_DEFINED 9 #define SkNx_sse_DEFINED
10 10
(...skipping 385 matching lines...) Expand 10 before | Expand all | Expand 10 after
396 } 396 }
397 397
398 template<> /*static*/ inline Sk4b SkNx_cast<uint8_t, int>(const Sk4i& src) { 398 template<> /*static*/ inline Sk4b SkNx_cast<uint8_t, int>(const Sk4i& src) {
399 return _mm_packus_epi16(_mm_packus_epi16(src.fVec, src.fVec), src.fVec); 399 return _mm_packus_epi16(_mm_packus_epi16(src.fVec, src.fVec), src.fVec);
400 } 400 }
401 401
402 static inline Sk4i Sk4f_round(const Sk4f& x) { 402 static inline Sk4i Sk4f_round(const Sk4f& x) {
403 return _mm_cvtps_epi32(x.fVec); 403 return _mm_cvtps_epi32(x.fVec);
404 } 404 }
405 405
406 static inline void Sk4h_load4(const void* ptr, Sk4h* r, Sk4h* g, Sk4h* b, Sk4h* a) {
msarett 2016/07/26 14:25:07 The fact that we are doing a transpose on loads an
407 __m128i lo = _mm_loadu_si128(((__m128i*)ptr) + 0),
408 hi = _mm_loadu_si128(((__m128i*)ptr) + 1);
409 __m128i even = _mm_unpacklo_epi16(lo, hi), // r0 r2 g0 g2 b0 b2 a0 a2
410 odd = _mm_unpackhi_epi16(lo, hi); // r1 r3 ...
411 __m128i rg = _mm_unpacklo_epi16(even, odd), // r0 r1 r2 r3 g0 g1 g2 g3
412 ba = _mm_unpackhi_epi16(even, odd); // b0 b1 ... a0 a1 ...
413 *r = rg;
414 *g = _mm_srli_si128(rg, 8);
415 *b = ba;
416 *a = _mm_srli_si128(ba, 8);
417 }
418
406 static inline void Sk4h_store4(void* dst, const Sk4h& r, const Sk4h& g, const Sk 4h& b, 419 static inline void Sk4h_store4(void* dst, const Sk4h& r, const Sk4h& g, const Sk 4h& b,
407 const Sk4h& a) { 420 const Sk4h& a) {
408 __m128i rg = _mm_unpacklo_epi16(r.fVec, g.fVec); 421 __m128i rg = _mm_unpacklo_epi16(r.fVec, g.fVec);
409 __m128i ba = _mm_unpacklo_epi16(b.fVec, a.fVec); 422 __m128i ba = _mm_unpacklo_epi16(b.fVec, a.fVec);
410 __m128i lo = _mm_unpacklo_epi32(rg, ba); 423 __m128i lo = _mm_unpacklo_epi32(rg, ba);
411 __m128i hi = _mm_unpackhi_epi32(rg, ba); 424 __m128i hi = _mm_unpackhi_epi32(rg, ba);
412 _mm_storeu_si128(((__m128i*) dst) + 0, lo); 425 _mm_storeu_si128(((__m128i*) dst) + 0, lo);
413 _mm_storeu_si128(((__m128i*) dst) + 1, hi); 426 _mm_storeu_si128(((__m128i*) dst) + 1, hi);
414 } 427 }
415 428
416 #endif//SkNx_sse_DEFINED 429 #endif//SkNx_sse_DEFINED
OLDNEW
« no previous file with comments | « src/opts/SkNx_neon.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698