src/opts/SkNx_sse.h - Issue 2151023003: Revert of Expand _01 half<->float limitation to _finite. Simplify.

Side by Side Diff: src/opts/SkNx_sse.h

Issue 2151023003: Revert of Expand _01 half<->float limitation to _finite. Simplify. (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: Created 4 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright 2015 Google Inc.	2 * Copyright 2015 Google Inc.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license that can be	4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.	5 * found in the LICENSE file.

6 */	6 */

7	7

8 #ifndef SkNx_sse_DEFINED	8 #ifndef SkNx_sse_DEFINED

9 #define SkNx_sse_DEFINED	9 #define SkNx_sse_DEFINED

10	10

(...skipping 134 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
145 SkNx operator - (const SkNx& o) const { return _mm_sub_epi32(fVec, o.fVec); }	145 SkNx operator - (const SkNx& o) const { return _mm_sub_epi32(fVec, o.fVec); }

146 SkNx operator * (const SkNx& o) const {	146 SkNx operator * (const SkNx& o) const {

147 __m128i mul20 = _mm_mul_epu32(fVec, o.fVec),	147 __m128i mul20 = _mm_mul_epu32(fVec, o.fVec),

148 mul31 = _mm_mul_epu32(_mm_srli_si128(fVec, 4), _mm_srli_si128(o. fVec, 4));	148 mul31 = _mm_mul_epu32(_mm_srli_si128(fVec, 4), _mm_srli_si128(o. fVec, 4));

149 return _mm_unpacklo_epi32(_mm_shuffle_epi32(mul20, _MM_SHUFFLE(0,0,2,0)) ,	149 return _mm_unpacklo_epi32(_mm_shuffle_epi32(mul20, _MM_SHUFFLE(0,0,2,0)) ,

150 _mm_shuffle_epi32(mul31, _MM_SHUFFLE(0,0,2,0)) );	150 _mm_shuffle_epi32(mul31, _MM_SHUFFLE(0,0,2,0)) );

151 }	151 }

152	152

153 SkNx operator & (const SkNx& o) const { return _mm_and_si128(fVec, o.fVec); }	153 SkNx operator & (const SkNx& o) const { return _mm_and_si128(fVec, o.fVec); }

154 SkNx operator \| (const SkNx& o) const { return _mm_or_si128(fVec, o.fVec); }	154 SkNx operator \| (const SkNx& o) const { return _mm_or_si128(fVec, o.fVec); }

155 SkNx operator ^ (const SkNx& o) const { return _mm_xor_si128(fVec, o.fVec); }

156	155

157 SkNx operator << (int bits) const { return _mm_slli_epi32(fVec, bits); }	156 SkNx operator << (int bits) const { return _mm_slli_epi32(fVec, bits); }

158 SkNx operator >> (int bits) const { return _mm_srai_epi32(fVec, bits); }	157 SkNx operator >> (int bits) const { return _mm_srai_epi32(fVec, bits); }

159	158

160 SkNx operator == (const SkNx& o) const { return _mm_cmpeq_epi32 (fVec, o.fVe c); }

161 SkNx operator < (const SkNx& o) const { return _mm_cmplt_epi32 (fVec, o.fVe c); }

162 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_epi32 (fVec, o.fVe c); }

163

164 int operator[](int k) const {	159 int operator[](int k) const {

165 SkASSERT(0 <= k && k < 4);	160 SkASSERT(0 <= k && k < 4);

166 union { __m128i v; int is[4]; } pun = {fVec};	161 union { __m128i v; int is[4]; } pun = {fVec};

167 return pun.is[k&3];	162 return pun.is[k&3];

168 }	163 }

169	164

170 SkNx thenElse(const SkNx& t, const SkNx& e) const {

171 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41

172 return _mm_blendv_epi8(e.fVec, t.fVec, fVec);

173 #else

174 return _mm_or_si128(_mm_and_si128 (fVec, t.fVec),

175 _mm_andnot_si128(fVec, e.fVec));

176 #endif

177 }

178

179 __m128i fVec;	165 __m128i fVec;

180 };	166 };

181	167

182 template <>	168 template <>

183 class SkNx<4, uint16_t> {	169 class SkNx<4, uint16_t> {

184 public:	170 public:

185 SkNx(const __m128i& vec) : fVec(vec) {}	171 SkNx(const __m128i& vec) : fVec(vec) {}

186	172

187 SkNx() {}	173 SkNx() {}

188 SkNx(uint16_t val) : fVec(_mm_set1_epi16(val)) {}	174 SkNx(uint16_t val) : fVec(_mm_set1_epi16(val)) {}

(...skipping 190 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
379 }	365 }

380	366

381 template<> /static/ inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src) {	367 template<> /static/ inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src) {

382 return _mm_unpacklo_epi8(src.fVec, _mm_setzero_si128());	368 return _mm_unpacklo_epi8(src.fVec, _mm_setzero_si128());

383 }	369 }

384	370

385 template<> /static/ inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src) {	371 template<> /static/ inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src) {

386 return _mm_packus_epi16(src.fVec, src.fVec);	372 return _mm_packus_epi16(src.fVec, src.fVec);

387 }	373 }

388	374

389 template<> /static/ inline Sk4i SkNx_cast<int, uint16_t>(const Sk4h& src) {	375 template<> inline Sk4b SkNx_cast<uint8_t, int>(const Sk4i& src) {

390 return _mm_unpacklo_epi16(src.fVec, _mm_setzero_si128());

391 }

392

393 template<> /static/ inline Sk4h SkNx_cast<uint16_t, int>(const Sk4i& src) {

394 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41

395 return _mm_packus_epi32(src.fVec, src.fVec);

396 #else

397 // Sign extend to trick _mm_packs_epi32() into doing the pack we want.

398 __m128i x = _mm_srai_epi32(_mm_slli_epi32(src.fVec, 16), 16);

399 return _mm_packs_epi32(x,x);

400 #endif

401 }

402

403 template<> /static/ inline Sk4b SkNx_cast<uint8_t, int>(const Sk4i& src) {

404 return _mm_packus_epi16(_mm_packus_epi16(src.fVec, src.fVec), src.fVec);	376 return _mm_packus_epi16(_mm_packus_epi16(src.fVec, src.fVec), src.fVec);

405 }	377 }

406	378

407 static inline Sk4i Sk4f_round(const Sk4f& x) {	379 static inline Sk4i Sk4f_round(const Sk4f& x) {

408 return _mm_cvtps_epi32(x.fVec);	380 return _mm_cvtps_epi32(x.fVec);

409 }	381 }

410	382

411 #endif//SkNx_sse_DEFINED	383 #endif//SkNx_sse_DEFINED

OLD	NEW

« no previous file with comments | « src/opts/SkNx_neon.h ('k') | tests/Float16Test.cpp » ('j') | no next file with comments »