Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(55)

Side by Side Diff: src/opts/SkNx_sse.h

Issue 1432903002: float xfermodes (burn, dodge, softlight) in Sk8f, possibly using AVX. (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: 1.0f/255 Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkNx_sse_DEFINED 8 #ifndef SkNx_sse_DEFINED
9 #define SkNx_sse_DEFINED 9 #define SkNx_sse_DEFINED
10 10
11 // This file may assume <= SSE2, but must check SK_CPU_SSE_LEVEL for anything mo re recent. 11 // This file may assume <= SSE2, but must check SK_CPU_SSE_LEVEL for anything mo re recent.
12 12
13 namespace { // See SkNx.h 13 namespace { // See SkNx.h
14 14
15 15
16 template <> 16 template <>
17 class SkNf<2> { 17 class SkNf<2> {
18 public: 18 public:
19 SkNf(const __m128& vec) : fVec(vec) {} 19 SkNf(const __m128& vec) : fVec(vec) {}
20 20
21 SkNf() {} 21 SkNf() {}
22 explicit SkNf(float val) : fVec(_mm_set1_ps(val)) {} 22 SkNf(float val) : fVec(_mm_set1_ps(val)) {}
23 static SkNf Load(const float vals[2]) { 23 static SkNf Load(const float vals[2]) {
24 return _mm_castsi128_ps(_mm_loadl_epi64((const __m128i*)vals)); 24 return _mm_castsi128_ps(_mm_loadl_epi64((const __m128i*)vals));
25 } 25 }
26 SkNf(float a, float b) : fVec(_mm_setr_ps(a,b,0,0)) {} 26 SkNf(float a, float b) : fVec(_mm_setr_ps(a,b,0,0)) {}
27 27
28 void store(float vals[2]) const { _mm_storel_pi((__m64*)vals, fVec); } 28 void store(float vals[2]) const { _mm_storel_pi((__m64*)vals, fVec); }
29 29
30 SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); } 30 SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); }
31 SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); } 31 SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); }
32 SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); } 32 SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); }
(...skipping 28 matching lines...) Expand all
61 61
62 __m128 fVec; 62 __m128 fVec;
63 }; 63 };
64 64
65 template <> 65 template <>
66 class SkNi<4, int> { 66 class SkNi<4, int> {
67 public: 67 public:
68 SkNi(const __m128i& vec) : fVec(vec) {} 68 SkNi(const __m128i& vec) : fVec(vec) {}
69 69
70 SkNi() {} 70 SkNi() {}
71 explicit SkNi(int val) : fVec(_mm_set1_epi32(val)) {} 71 SkNi(int val) : fVec(_mm_set1_epi32(val)) {}
72 static SkNi Load(const int vals[4]) { return _mm_loadu_si128((const __m128i* )vals); } 72 static SkNi Load(const int vals[4]) { return _mm_loadu_si128((const __m128i* )vals); }
73 SkNi(int a, int b, int c, int d) : fVec(_mm_setr_epi32(a,b,c,d)) {} 73 SkNi(int a, int b, int c, int d) : fVec(_mm_setr_epi32(a,b,c,d)) {}
74 74
75 void store(int vals[4]) const { _mm_storeu_si128((__m128i*)vals, fVec); } 75 void store(int vals[4]) const { _mm_storeu_si128((__m128i*)vals, fVec); }
76 76
77 SkNi operator + (const SkNi& o) const { return _mm_add_epi32(fVec, o.fVec); } 77 SkNi operator + (const SkNi& o) const { return _mm_add_epi32(fVec, o.fVec); }
78 SkNi operator - (const SkNi& o) const { return _mm_sub_epi32(fVec, o.fVec); } 78 SkNi operator - (const SkNi& o) const { return _mm_sub_epi32(fVec, o.fVec); }
79 SkNi operator * (const SkNi& o) const { 79 SkNi operator * (const SkNi& o) const {
80 __m128i mul20 = _mm_mul_epu32(fVec, o.fVec), 80 __m128i mul20 = _mm_mul_epu32(fVec, o.fVec),
81 mul31 = _mm_mul_epu32(_mm_srli_si128(fVec, 4), _mm_srli_si128(o. fVec, 4)); 81 mul31 = _mm_mul_epu32(_mm_srli_si128(fVec, 4), _mm_srli_si128(o. fVec, 4));
(...skipping 17 matching lines...) Expand all
99 99
100 __m128i fVec; 100 __m128i fVec;
101 }; 101 };
102 102
103 template <> 103 template <>
104 class SkNf<4> { 104 class SkNf<4> {
105 public: 105 public:
106 SkNf(const __m128& vec) : fVec(vec) {} 106 SkNf(const __m128& vec) : fVec(vec) {}
107 107
108 SkNf() {} 108 SkNf() {}
109 explicit SkNf(float val) : fVec( _mm_set1_ps(val) ) {} 109 SkNf(float val) : fVec( _mm_set1_ps(val) ) {}
110 static SkNf Load(const float vals[4]) { return _mm_loadu_ps(vals); } 110 static SkNf Load(const float vals[4]) { return _mm_loadu_ps(vals); }
111 111
112 static SkNf FromBytes(const uint8_t bytes[4]) { 112 static SkNf FromBytes(const uint8_t bytes[4]) {
113 __m128i fix8 = _mm_cvtsi32_si128(*(const int*)bytes); 113 __m128i fix8 = _mm_cvtsi32_si128(*(const int*)bytes);
114 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 114 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
115 const char _ = ~0; // Zero these bytes. 115 const char _ = ~0; // Zero these bytes.
116 __m128i fix8_32 = _mm_shuffle_epi8(fix8, _mm_setr_epi8(0,_,_,_, 1,_,_,_, 2,_,_,_, 3,_,_,_)); 116 __m128i fix8_32 = _mm_shuffle_epi8(fix8, _mm_setr_epi8(0,_,_,_, 1,_,_,_, 2,_,_,_, 3,_,_,_));
117 #else 117 #else
118 __m128i fix8_16 = _mm_unpacklo_epi8 (fix8, _mm_setzero_si128()), 118 __m128i fix8_16 = _mm_unpacklo_epi8 (fix8, _mm_setzero_si128()),
119 fix8_32 = _mm_unpacklo_epi16(fix8_16, _mm_setzero_si128()); 119 fix8_32 = _mm_unpacklo_epi16(fix8_16, _mm_setzero_si128());
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
171 171
172 __m128 fVec; 172 __m128 fVec;
173 }; 173 };
174 174
175 template <> 175 template <>
176 class SkNi<4, uint16_t> { 176 class SkNi<4, uint16_t> {
177 public: 177 public:
178 SkNi(const __m128i& vec) : fVec(vec) {} 178 SkNi(const __m128i& vec) : fVec(vec) {}
179 179
180 SkNi() {} 180 SkNi() {}
181 explicit SkNi(uint16_t val) : fVec(_mm_set1_epi16(val)) {} 181 SkNi(uint16_t val) : fVec(_mm_set1_epi16(val)) {}
182 static SkNi Load(const uint16_t vals[4]) { return _mm_loadl_epi64((const __m 128i*)vals); } 182 static SkNi Load(const uint16_t vals[4]) { return _mm_loadl_epi64((const __m 128i*)vals); }
183 SkNi(uint16_t a, uint16_t b, uint16_t c, uint16_t d) : fVec(_mm_setr_epi16(a ,b,c,d,0,0,0,0)) {} 183 SkNi(uint16_t a, uint16_t b, uint16_t c, uint16_t d) : fVec(_mm_setr_epi16(a ,b,c,d,0,0,0,0)) {}
184 184
185 void store(uint16_t vals[4]) const { _mm_storel_epi64((__m128i*)vals, fVec); } 185 void store(uint16_t vals[4]) const { _mm_storel_epi64((__m128i*)vals, fVec); }
186 186
187 SkNi operator + (const SkNi& o) const { return _mm_add_epi16(fVec, o.fVec); } 187 SkNi operator + (const SkNi& o) const { return _mm_add_epi16(fVec, o.fVec); }
188 SkNi operator - (const SkNi& o) const { return _mm_sub_epi16(fVec, o.fVec); } 188 SkNi operator - (const SkNi& o) const { return _mm_sub_epi16(fVec, o.fVec); }
189 SkNi operator * (const SkNi& o) const { return _mm_mullo_epi16(fVec, o.fVec) ; } 189 SkNi operator * (const SkNi& o) const { return _mm_mullo_epi16(fVec, o.fVec) ; }
190 190
191 SkNi operator << (int bits) const { return _mm_slli_epi16(fVec, bits); } 191 SkNi operator << (int bits) const { return _mm_slli_epi16(fVec, bits); }
192 SkNi operator >> (int bits) const { return _mm_srli_epi16(fVec, bits); } 192 SkNi operator >> (int bits) const { return _mm_srli_epi16(fVec, bits); }
193 193
194 template <int k> uint16_t kth() const { 194 template <int k> uint16_t kth() const {
195 SkASSERT(0 <= k && k < 4); 195 SkASSERT(0 <= k && k < 4);
196 return _mm_extract_epi16(fVec, k); 196 return _mm_extract_epi16(fVec, k);
197 } 197 }
198 198
199 __m128i fVec; 199 __m128i fVec;
200 }; 200 };
201 201
202 template <> 202 template <>
203 class SkNi<8, uint16_t> { 203 class SkNi<8, uint16_t> {
204 public: 204 public:
205 SkNi(const __m128i& vec) : fVec(vec) {} 205 SkNi(const __m128i& vec) : fVec(vec) {}
206 206
207 SkNi() {} 207 SkNi() {}
208 explicit SkNi(uint16_t val) : fVec(_mm_set1_epi16(val)) {} 208 SkNi(uint16_t val) : fVec(_mm_set1_epi16(val)) {}
209 static SkNi Load(const uint16_t vals[8]) { return _mm_loadu_si128((const __m 128i*)vals); } 209 static SkNi Load(const uint16_t vals[8]) { return _mm_loadu_si128((const __m 128i*)vals); }
210 SkNi(uint16_t a, uint16_t b, uint16_t c, uint16_t d, 210 SkNi(uint16_t a, uint16_t b, uint16_t c, uint16_t d,
211 uint16_t e, uint16_t f, uint16_t g, uint16_t h) : fVec(_mm_setr_epi16(a ,b,c,d,e,f,g,h)) {} 211 uint16_t e, uint16_t f, uint16_t g, uint16_t h) : fVec(_mm_setr_epi16(a ,b,c,d,e,f,g,h)) {}
212 212
213 void store(uint16_t vals[8]) const { _mm_storeu_si128((__m128i*)vals, fVec); } 213 void store(uint16_t vals[8]) const { _mm_storeu_si128((__m128i*)vals, fVec); }
214 214
215 SkNi operator + (const SkNi& o) const { return _mm_add_epi16(fVec, o.fVec); } 215 SkNi operator + (const SkNi& o) const { return _mm_add_epi16(fVec, o.fVec); }
216 SkNi operator - (const SkNi& o) const { return _mm_sub_epi16(fVec, o.fVec); } 216 SkNi operator - (const SkNi& o) const { return _mm_sub_epi16(fVec, o.fVec); }
217 SkNi operator * (const SkNi& o) const { return _mm_mullo_epi16(fVec, o.fVec) ; } 217 SkNi operator * (const SkNi& o) const { return _mm_mullo_epi16(fVec, o.fVec) ; }
218 218
(...skipping 21 matching lines...) Expand all
240 240
241 __m128i fVec; 241 __m128i fVec;
242 }; 242 };
243 243
244 template <> 244 template <>
245 class SkNi<16, uint8_t> { 245 class SkNi<16, uint8_t> {
246 public: 246 public:
247 SkNi(const __m128i& vec) : fVec(vec) {} 247 SkNi(const __m128i& vec) : fVec(vec) {}
248 248
249 SkNi() {} 249 SkNi() {}
250 explicit SkNi(uint8_t val) : fVec(_mm_set1_epi8(val)) {} 250 SkNi(uint8_t val) : fVec(_mm_set1_epi8(val)) {}
251 static SkNi Load(const uint8_t vals[16]) { return _mm_loadu_si128((const __m 128i*)vals); } 251 static SkNi Load(const uint8_t vals[16]) { return _mm_loadu_si128((const __m 128i*)vals); }
252 SkNi(uint8_t a, uint8_t b, uint8_t c, uint8_t d, 252 SkNi(uint8_t a, uint8_t b, uint8_t c, uint8_t d,
253 uint8_t e, uint8_t f, uint8_t g, uint8_t h, 253 uint8_t e, uint8_t f, uint8_t g, uint8_t h,
254 uint8_t i, uint8_t j, uint8_t k, uint8_t l, 254 uint8_t i, uint8_t j, uint8_t k, uint8_t l,
255 uint8_t m, uint8_t n, uint8_t o, uint8_t p) 255 uint8_t m, uint8_t n, uint8_t o, uint8_t p)
256 : fVec(_mm_setr_epi8(a,b,c,d, e,f,g,h, i,j,k,l, m,n,o,p)) {} 256 : fVec(_mm_setr_epi8(a,b,c,d, e,f,g,h, i,j,k,l, m,n,o,p)) {}
257 257
258 void store(uint8_t vals[16]) const { _mm_storeu_si128((__m128i*)vals, fVec); } 258 void store(uint8_t vals[16]) const { _mm_storeu_si128((__m128i*)vals, fVec); }
259 259
260 SkNi saturatedAdd(const SkNi& o) const { return _mm_adds_epu8(fVec, o.fVec); } 260 SkNi saturatedAdd(const SkNi& o) const { return _mm_adds_epu8(fVec, o.fVec); }
(...skipping 19 matching lines...) Expand all
280 return _mm_or_si128(_mm_and_si128 (fVec, t.fVec), 280 return _mm_or_si128(_mm_and_si128 (fVec, t.fVec),
281 _mm_andnot_si128(fVec, e.fVec)); 281 _mm_andnot_si128(fVec, e.fVec));
282 } 282 }
283 283
284 __m128i fVec; 284 __m128i fVec;
285 }; 285 };
286 286
287 } // namespace 287 } // namespace
288 288
289 #endif//SkNx_sse_DEFINED 289 #endif//SkNx_sse_DEFINED
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698