src/opts/SkNx_sse.h - Issue 1526523003: Unify some SkNx code

Side by Side Diff: src/opts/SkNx_sse.h

Issue 1526523003: Unify some SkNx code (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: typo Created 5 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /*	1 /*

2 * Copyright 2015 Google Inc.	2 * Copyright 2015 Google Inc.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license that can be	4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.	5 * found in the LICENSE file.

6 */	6 */

7	7

8 #ifndef SkNx_sse_DEFINED	8 #ifndef SkNx_sse_DEFINED

9 #define SkNx_sse_DEFINED	9 #define SkNx_sse_DEFINED

10	10

(...skipping 93 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
104	104

105 template <>	105 template <>

106 class SkNx<4, float> {	106 class SkNx<4, float> {

107 public:	107 public:

108 SkNx(const __m128& vec) : fVec(vec) {}	108 SkNx(const __m128& vec) : fVec(vec) {}

109	109

110 SkNx() {}	110 SkNx() {}

111 SkNx(float val) : fVec( _mm_set1_ps(val) ) {}	111 SkNx(float val) : fVec( _mm_set1_ps(val) ) {}

112 static SkNx Load(const float vals[4]) { return _mm_loadu_ps(vals); }	112 static SkNx Load(const float vals[4]) { return _mm_loadu_ps(vals); }

113	113

114 static SkNx FromBytes(const uint8_t bytes[4]) {

115 __m128i fix8 = _mm_cvtsi32_si128((const int)bytes);

116 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3

117 const char _ = ~0; // Zero these bytes.

118 __m128i fix8_32 = _mm_shuffle_epi8(fix8, _mm_setr_epi8(0,_,_,_, 1,_,_,_, 2,_,_,_, 3,_,_,_));

119 #else

120 __m128i fix8_16 = _mm_unpacklo_epi8 (fix8, _mm_setzero_si128()),

121 fix8_32 = _mm_unpacklo_epi16(fix8_16, _mm_setzero_si128());

122 #endif

123 return SkNx(_mm_cvtepi32_ps(fix8_32));

124 // TODO: use _mm_cvtepu8_epi32 w/SSE4.1?

125 }

126

127 SkNx(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {}	114 SkNx(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {}

128	115

129 void store(float vals[4]) const { _mm_storeu_ps(vals, fVec); }	116 void store(float vals[4]) const { _mm_storeu_ps(vals, fVec); }

130 void toBytes(uint8_t bytes[4]) const {

131 __m128i fix8_32 = _mm_cvttps_epi32(fVec),

132 fix8_16 = _mm_packus_epi16(fix8_32, fix8_32),

133 fix8 = _mm_packus_epi16(fix8_16, fix8_16);

134 (int)bytes = _mm_cvtsi128_si32(fix8);

135 }

136

137 static void ToBytes(uint8_t bytes[16],

138 const SkNx& a, const SkNx& b, const SkNx& c, const SkNx& d) {

139 _mm_storeu_si128((__m128i*)bytes,

140 _mm_packus_epi16(_mm_packus_epi16(_mm_cvttps_epi32(a.fV ec),

141 _mm_cvttps_epi32(b.fV ec)),

142 _mm_packus_epi16(_mm_cvttps_epi32(c.fV ec),

143 _mm_cvttps_epi32(d.fV ec))));

144 }

145	117

146 SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); }	118 SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); }

147 SkNx operator - (const SkNx& o) const { return _mm_sub_ps(fVec, o.fVec); }	119 SkNx operator - (const SkNx& o) const { return _mm_sub_ps(fVec, o.fVec); }

148 SkNx operator * (const SkNx& o) const { return _mm_mul_ps(fVec, o.fVec); }	120 SkNx operator * (const SkNx& o) const { return _mm_mul_ps(fVec, o.fVec); }

149 SkNx operator / (const SkNx& o) const { return _mm_div_ps(fVec, o.fVec); }	121 SkNx operator / (const SkNx& o) const { return _mm_div_ps(fVec, o.fVec); }

150	122

151 SkNx operator == (const SkNx& o) const { return _mm_cmpeq_ps (fVec, o.fVec); }	123 SkNx operator == (const SkNx& o) const { return _mm_cmpeq_ps (fVec, o.fVec); }

152 SkNx operator != (const SkNx& o) const { return _mm_cmpneq_ps(fVec, o.fVec); }	124 SkNx operator != (const SkNx& o) const { return _mm_cmpneq_ps(fVec, o.fVec); }

153 SkNx operator < (const SkNx& o) const { return _mm_cmplt_ps (fVec, o.fVec); }	125 SkNx operator < (const SkNx& o) const { return _mm_cmplt_ps (fVec, o.fVec); }

154 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_ps (fVec, o.fVec); }	126 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_ps (fVec, o.fVec); }

(...skipping 91 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
246	218

247 template <int k> uint16_t kth() const {	219 template <int k> uint16_t kth() const {

248 SkASSERT(0 <= k && k < 8);	220 SkASSERT(0 <= k && k < 8);

249 return _mm_extract_epi16(fVec, k);	221 return _mm_extract_epi16(fVec, k);

250 }	222 }

251	223

252 __m128i fVec;	224 __m128i fVec;

253 };	225 };

254	226

255 template <>	227 template <>

	228 class SkNx<4, uint8_t> {

	229 public:

	230 SkNx(const __m128i& vec) : fVec(vec) {}

	231

	232 SkNx() {}

	233 static SkNx Load(const uint8_t vals[4]) { return _mm_cvtsi32_si128((const i nt)vals); }

	234 void store(uint8_t vals[4]) const { (int)vals = _mm_cvtsi128_si32(fVec); }

	235

	236 // TODO as needed

	237

	238 __m128i fVec;

	239 };

	240

	241 template <>

	242 class SkNx<8, uint8_t> {

	243 public:

	244 SkNx(const __m128i& vec) : fVec(vec) {}

	245

	246 SkNx() {}

	247 static SkNx Load(const uint8_t vals[8]) { return _mm_loadl_epi64((const __m1 28i*)vals); }

	248 void store(uint8_t vals[8]) const { _mm_storel_epi64((__m128i*)vals, fVec); }

	249

	250 // TODO as needed

	251

	252 __m128i fVec;

	253 };

	254

	255 template <>

256 class SkNx<16, uint8_t> {	256 class SkNx<16, uint8_t> {

257 public:	257 public:

258 SkNx(const __m128i& vec) : fVec(vec) {}	258 SkNx(const __m128i& vec) : fVec(vec) {}

259	259

260 SkNx() {}	260 SkNx() {}

261 SkNx(uint8_t val) : fVec(_mm_set1_epi8(val)) {}	261 SkNx(uint8_t val) : fVec(_mm_set1_epi8(val)) {}

262 static SkNx Load(const uint8_t vals[16]) { return _mm_loadu_si128((const __m 128i*)vals); }	262 static SkNx Load(const uint8_t vals[16]) { return _mm_loadu_si128((const __m 128i*)vals); }

263 SkNx(uint8_t a, uint8_t b, uint8_t c, uint8_t d,	263 SkNx(uint8_t a, uint8_t b, uint8_t c, uint8_t d,

264 uint8_t e, uint8_t f, uint8_t g, uint8_t h,	264 uint8_t e, uint8_t f, uint8_t g, uint8_t h,

265 uint8_t i, uint8_t j, uint8_t k, uint8_t l,	265 uint8_t i, uint8_t j, uint8_t k, uint8_t l,

(...skipping 23 matching lines...) Expand all Loading...
289	289

290 SkNx thenElse(const SkNx& t, const SkNx& e) const {	290 SkNx thenElse(const SkNx& t, const SkNx& e) const {

291 return _mm_or_si128(_mm_and_si128 (fVec, t.fVec),	291 return _mm_or_si128(_mm_and_si128 (fVec, t.fVec),

292 _mm_andnot_si128(fVec, e.fVec));	292 _mm_andnot_si128(fVec, e.fVec));

293 }	293 }

294	294

295 __m128i fVec;	295 __m128i fVec;

296 };	296 };

297	297

298	298

299 template<>	299 template<> inline Sk4i SkNx_cast<int, float, 4>(const Sk4f& src) {

300 inline SkNx<4, int> SkNx_cast<int, float, 4>(const SkNx<4, float>& src) {

301 return _mm_cvttps_epi32(src.fVec);	300 return _mm_cvttps_epi32(src.fVec);

302 }	301 }

303	302

	303 template<> inline Sk4b SkNx_cast<uint8_t, float, 4>(const Sk4f& src) {

	304 auto _32 = _mm_cvttps_epi32(src.fVec);

	305 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3

	306 const int _ = ~0;

	307 return _mm_shuffle_epi8(_32, _mm_setr_epi8(0,4,8,12, _,_,_,_, _,_,_,_, _,_,_ ,_));

	308 #else

	309 auto _16 = _mm_packus_epi16(_32, _32);

	310 return _mm_packus_epi16(_16, _16);

	311 #endif

	312 }

	313

	314 template<> inline Sk4f SkNx_cast<float, uint8_t, 4>(const Sk4b& src) {

	315 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3

	316 const int _ = ~0;

	317 auto _32 = _mm_shuffle_epi8(src.fVec, _mm_setr_epi8(0,_,_,_, 1,_,_,_, 2,_,_, _, 3,_,_,_));

	318 #else

	319 auto _16 = _mm_unpacklo_epi8(src.fVec, _mm_setzero_si128()),

	320 _32 = _mm_unpacklo_epi16(_16, _mm_setzero_si128());

	321 #endif

	322 return _mm_cvtepi32_ps(_32);

	323 }

	324

	325 static inline void Sk4f_ToBytes(uint8_t bytes[16],

	326 const Sk4f& a, const Sk4f& b, const Sk4f& c, con st Sk4f& d) {

	327 _mm_storeu_si128((__m128i*)bytes,

	328 _mm_packus_epi16(_mm_packus_epi16(_mm_cvttps_epi32(a.fVec),

	329 _mm_cvttps_epi32(b.fVec)) ,

	330 _mm_packus_epi16(_mm_cvttps_epi32(c.fVec),

	331 _mm_cvttps_epi32(d.fVec)) ));

	332 }

	333

	334

304 } // namespace	335 } // namespace

305	336

306 #endif//SkNx_sse_DEFINED	337 #endif//SkNx_sse_DEFINED

OLD	NEW

« no previous file with comments | « src/opts/SkNx_neon.h ('k') | src/opts/SkXfermode_opts.h » ('j') | no next file with comments »