include/private/SkNx_sse.h - Issue 2133413002: try to speed-up maprect + round2i + contains

Side by Side Diff: include/private/SkNx_sse.h

Issue 2133413002: try to speed-up maprect + round2i + contains (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: add dox Created 4 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright 2015 Google Inc.	2 * Copyright 2015 Google Inc.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license that can be	4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.	5 * found in the LICENSE file.

6 */	6 */

7	7

8 #ifndef SkNx_sse_DEFINED	8 #ifndef SkNx_sse_DEFINED

9 #define SkNx_sse_DEFINED	9 #define SkNx_sse_DEFINED

10	10

(...skipping 264 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
275 void store(void* ptr) const { _mm_storeu_si128((__m128i*)ptr, fVec); }	275 void store(void* ptr) const { _mm_storeu_si128((__m128i*)ptr, fVec); }

276	276

277 SkNx saturatedAdd(const SkNx& o) const { return _mm_adds_epu8(fVec, o.fVec); }	277 SkNx saturatedAdd(const SkNx& o) const { return _mm_adds_epu8(fVec, o.fVec); }

278	278

279 SkNx operator + (const SkNx& o) const { return _mm_add_epi8(fVec, o.fVec); }	279 SkNx operator + (const SkNx& o) const { return _mm_add_epi8(fVec, o.fVec); }

280 SkNx operator - (const SkNx& o) const { return _mm_sub_epi8(fVec, o.fVec); }	280 SkNx operator - (const SkNx& o) const { return _mm_sub_epi8(fVec, o.fVec); }

281	281

282 static SkNx Min(const SkNx& a, const SkNx& b) { return _mm_min_epu8(a.fVec, b.fVec); }	282 static SkNx Min(const SkNx& a, const SkNx& b) { return _mm_min_epu8(a.fVec, b.fVec); }

283 SkNx operator < (const SkNx& o) const {	283 SkNx operator < (const SkNx& o) const {

284 // There's no unsigned _mm_cmplt_epu8, so we flip the sign bits then use a signed compare.	284 // There's no unsigned _mm_cmplt_epu8, so we flip the sign bits then use a signed compare.

285 auto flip = _mm_set1_epi8(char(0x80));	285 auto flip = _mm_set1_epi8(char(0x80));
	bungeman-skia 2016/07/11 22:07:49 Heh, maybe something like the following would keep Heh, maybe something like the following would keep msvc happy? Too bad _mm_set1_epi8 takes char instead of 'signed char' or int8_t or something sane. #include <climits> #if CHAR_MAX == UCHAR_MAX auto flip = _mm_set1_epi8(char(0x80)); #else auto flip = _mm_set1_epi8(char(-0x80)); #endif
286 return _mm_cmplt_epi8(_mm_xor_si128(flip, fVec), _mm_xor_si128(flip, o.f Vec));	286 return _mm_cmplt_epi8(_mm_xor_si128(flip, fVec), _mm_xor_si128(flip, o.f Vec));

287 }	287 }

288	288

289 uint8_t operator[](int k) const {	289 uint8_t operator[](int k) const {

290 SkASSERT(0 <= k && k < 16);	290 SkASSERT(0 <= k && k < 16);

291 union { __m128i v; uint8_t us[16]; } pun = {fVec};	291 union { __m128i v; uint8_t us[16]; } pun = {fVec};

292 return pun.us[k&15];	292 return pun.us[k&15];

293 }	293 }

294	294

295 SkNx thenElse(const SkNx& t, const SkNx& e) const {	295 SkNx thenElse(const SkNx& t, const SkNx& e) const {

(...skipping 69 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
365	365

366 template<> /static/ inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src) {	366 template<> /static/ inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src) {

367 return _mm_unpacklo_epi8(src.fVec, _mm_setzero_si128());	367 return _mm_unpacklo_epi8(src.fVec, _mm_setzero_si128());

368 }	368 }

369	369

370 template<> /static/ inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src) {	370 template<> /static/ inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src) {

371 return _mm_packus_epi16(src.fVec, src.fVec);	371 return _mm_packus_epi16(src.fVec, src.fVec);

372 }	372 }

373	373

374 #endif//SkNx_sse_DEFINED	374 #endif//SkNx_sse_DEFINED

OLD	NEW

« no previous file with comments | « include/private/SkNx_neon.h ('k') | src/core/SkNx.h » ('j') | no next file with comments »