src/opts/SkNx_sse.h - Issue 1707673002: fast sk4f <-> sk4i SSE methods

Side by Side Diff

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Keyboard Shortcuts

	File
u :	up to issue
j / k :	jump to file after / before current file
J / K :	jump to next file with a comment after / before current file
	Side-by-side diff
i :	toggle intra-line diffs
e :	expand all comments
c :	collapse all comments
s :	toggle showing all comments
n / p :	next / previous diff chunk or comment
N / P :	next / previous comment
<Up> / <Down> :	next / previous line

	Issue
u :	up to list of issues
j / k :	jump to patch after / before current patch
o / <Enter> :	open current patch in side-by-side view
i :	open current patch in unified diff view

	Issue List
j / k :	jump to issue after / before current issue
o / <Enter> :	open current issue

Side by Side Diff: src/opts/SkNx_sse.h

Issue 1707673002: fast sk4f <-> sk4i SSE methods (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: ffff Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright 2015 Google Inc.	2 * Copyright 2015 Google Inc.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license that can be	4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.	5 * found in the LICENSE file.

6 */	6 */

7	7

8 #ifndef SkNx_sse_DEFINED	8 #ifndef SkNx_sse_DEFINED

9 #define SkNx_sse_DEFINED	9 #define SkNx_sse_DEFINED

10	10

(...skipping 276 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
287 template <int k> uint8_t kth() const { return (*this)[k]; }	287 template <int k> uint8_t kth() const { return (*this)[k]; }

288	288

289 SkNx thenElse(const SkNx& t, const SkNx& e) const {	289 SkNx thenElse(const SkNx& t, const SkNx& e) const {

290 return _mm_or_si128(_mm_and_si128 (fVec, t.fVec),	290 return _mm_or_si128(_mm_and_si128 (fVec, t.fVec),

291 _mm_andnot_si128(fVec, e.fVec));	291 _mm_andnot_si128(fVec, e.fVec));

292 }	292 }

293	293

294 __m128i fVec;	294 __m128i fVec;

295 };	295 };

296	296

	297 template<> /static/ inline Sk4f SkNx_cast<float, int>(const Sk4i& src) {

	298 return _mm_cvtepi32_ps(src.fVec);

	299 }

	300

	301 template <> /static/ inline Sk4i SkNx_cast<int, float>(const Sk4f& src) {

	302 return _mm_cvttps_epi32(src.fVec);

	303 }

297	304

298 template<> /static/ inline Sk4h SkNx_cast<uint16_t, float>(const Sk4f& src) {	305 template<> /static/ inline Sk4h SkNx_cast<uint16_t, float>(const Sk4f& src) {

299 auto _32 = _mm_cvttps_epi32(src.fVec);	306 auto _32 = _mm_cvttps_epi32(src.fVec);

300 // Ideally we'd use _mm_packus_epi32 here. But that's SSE4.1+.	307 // Ideally we'd use _mm_packus_epi32 here. But that's SSE4.1+.

301 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3	308 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3

302 // With SSSE3, we can just shuffle the low 2 bytes from each lane right into place.	309 // With SSSE3, we can just shuffle the low 2 bytes from each lane right into place.

303 const int _ = ~0;	310 const int _ = ~0;

304 return _mm_shuffle_epi8(_32, _mm_setr_epi8(0,1, 4,5, 8,9, 12,13, _,_,_,_,_,_ ,_,_));	311 return _mm_shuffle_epi8(_32, _mm_setr_epi8(0,1, 4,5, 8,9, 12,13, _,_,_,_,_,_ ,_,_));

305 #else	312 #else

306 // With SSE2, we have to emulate _mm_packus_epi32 with _mm_packs_epi32:	313 // With SSE2, we have to emulate _mm_packus_epi32 with _mm_packs_epi32:

(...skipping 40 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
347	354

348 template<> /static/ inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src) {	355 template<> /static/ inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src) {

349 return _mm_unpacklo_epi8(src.fVec, _mm_setzero_si128());	356 return _mm_unpacklo_epi8(src.fVec, _mm_setzero_si128());

350 }	357 }

351	358

352 template<> /static/ inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src) {	359 template<> /static/ inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src) {

353 return _mm_packus_epi16(src.fVec, src.fVec);	360 return _mm_packus_epi16(src.fVec, src.fVec);

354 }	361 }

355	362

356 #endif//SkNx_sse_DEFINED	363 #endif//SkNx_sse_DEFINED

OLD	NEW

« no previous file with comments | « no previous file | tests/SkNxTest.cpp » ('j') | no next file with comments »