OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkNx_sse_DEFINED | 8 #ifndef SkNx_sse_DEFINED |
9 #define SkNx_sse_DEFINED | 9 #define SkNx_sse_DEFINED |
10 | 10 |
11 // This file may assume <= SSE2, but must check SK_CPU_SSE_LEVEL for anything mo
re recent. | 11 // This file may assume <= SSE2, but must check SK_CPU_SSE_LEVEL for anything mo
re recent. |
12 | 12 |
13 #define SKNX_IS_FAST | 13 #define SKNX_IS_FAST |
14 | 14 |
15 namespace { // See SkNx.h | 15 namespace { // See SkNx.h |
16 | 16 |
17 | 17 |
18 template <> | 18 template <> |
19 class SkNx<2, float> { | 19 class SkNx<2, float> { |
20 public: | 20 public: |
21 SkNx(const __m128& vec) : fVec(vec) {} | 21 SkNx(const __m128& vec) : fVec(vec) {} |
22 | 22 |
23 SkNx() {} | 23 SkNx() {} |
24 SkNx(float val) : fVec(_mm_set1_ps(val)) {} | 24 SkNx(float val) : fVec(_mm_set1_ps(val)) {} |
25 static SkNx Load(const float vals[2]) { | 25 static SkNx Load(const void* ptr) { |
26 return _mm_castsi128_ps(_mm_loadl_epi64((const __m128i*)vals)); | 26 return _mm_castsi128_ps(_mm_loadl_epi64((const __m128i*)ptr)); |
27 } | 27 } |
28 SkNx(float a, float b) : fVec(_mm_setr_ps(a,b,0,0)) {} | 28 SkNx(float a, float b) : fVec(_mm_setr_ps(a,b,0,0)) {} |
29 | 29 |
30 void store(float vals[2]) const { _mm_storel_pi((__m64*)vals, fVec); } | 30 void store(void* ptr) const { _mm_storel_pi((__m64*)ptr, fVec); } |
31 | 31 |
32 SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); } | 32 SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); } |
33 SkNx operator - (const SkNx& o) const { return _mm_sub_ps(fVec, o.fVec); } | 33 SkNx operator - (const SkNx& o) const { return _mm_sub_ps(fVec, o.fVec); } |
34 SkNx operator * (const SkNx& o) const { return _mm_mul_ps(fVec, o.fVec); } | 34 SkNx operator * (const SkNx& o) const { return _mm_mul_ps(fVec, o.fVec); } |
35 SkNx operator / (const SkNx& o) const { return _mm_div_ps(fVec, o.fVec); } | 35 SkNx operator / (const SkNx& o) const { return _mm_div_ps(fVec, o.fVec); } |
36 | 36 |
37 SkNx operator == (const SkNx& o) const { return _mm_cmpeq_ps (fVec, o.fVec);
} | 37 SkNx operator == (const SkNx& o) const { return _mm_cmpeq_ps (fVec, o.fVec);
} |
38 SkNx operator != (const SkNx& o) const { return _mm_cmpneq_ps(fVec, o.fVec);
} | 38 SkNx operator != (const SkNx& o) const { return _mm_cmpneq_ps(fVec, o.fVec);
} |
39 SkNx operator < (const SkNx& o) const { return _mm_cmplt_ps (fVec, o.fVec);
} | 39 SkNx operator < (const SkNx& o) const { return _mm_cmplt_ps (fVec, o.fVec);
} |
40 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_ps (fVec, o.fVec);
} | 40 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_ps (fVec, o.fVec);
} |
(...skipping 23 matching lines...) Expand all Loading... |
64 __m128 fVec; | 64 __m128 fVec; |
65 }; | 65 }; |
66 | 66 |
67 template <> | 67 template <> |
68 class SkNx<2, double> { | 68 class SkNx<2, double> { |
69 public: | 69 public: |
70 SkNx(const __m128d& vec) : fVec(vec) {} | 70 SkNx(const __m128d& vec) : fVec(vec) {} |
71 | 71 |
72 SkNx() {} | 72 SkNx() {} |
73 SkNx(double val) : fVec(_mm_set1_pd(val)) {} | 73 SkNx(double val) : fVec(_mm_set1_pd(val)) {} |
74 static SkNx Load(const double vals[2]) { return _mm_loadu_pd(vals); } | 74 static SkNx Load(const void* ptr) { return _mm_loadu_pd((const double*)ptr);
} |
75 SkNx(double a, double b) : fVec(_mm_setr_pd(a,b)) {} | 75 SkNx(double a, double b) : fVec(_mm_setr_pd(a,b)) {} |
76 | 76 |
77 void store(double vals[2]) const { _mm_storeu_pd(vals, fVec); } | 77 void store(void* ptr) const { _mm_storeu_pd((double*)ptr, fVec); } |
78 | 78 |
79 SkNx operator + (const SkNx& o) const { return _mm_add_pd(fVec, o.fVec); } | 79 SkNx operator + (const SkNx& o) const { return _mm_add_pd(fVec, o.fVec); } |
80 SkNx operator - (const SkNx& o) const { return _mm_sub_pd(fVec, o.fVec); } | 80 SkNx operator - (const SkNx& o) const { return _mm_sub_pd(fVec, o.fVec); } |
81 SkNx operator * (const SkNx& o) const { return _mm_mul_pd(fVec, o.fVec); } | 81 SkNx operator * (const SkNx& o) const { return _mm_mul_pd(fVec, o.fVec); } |
82 SkNx operator / (const SkNx& o) const { return _mm_div_pd(fVec, o.fVec); } | 82 SkNx operator / (const SkNx& o) const { return _mm_div_pd(fVec, o.fVec); } |
83 | 83 |
84 SkNx operator == (const SkNx& o) const { return _mm_cmpeq_pd (fVec, o.fVec);
} | 84 SkNx operator == (const SkNx& o) const { return _mm_cmpeq_pd (fVec, o.fVec);
} |
85 SkNx operator != (const SkNx& o) const { return _mm_cmpneq_pd(fVec, o.fVec);
} | 85 SkNx operator != (const SkNx& o) const { return _mm_cmpneq_pd(fVec, o.fVec);
} |
86 SkNx operator < (const SkNx& o) const { return _mm_cmplt_pd (fVec, o.fVec);
} | 86 SkNx operator < (const SkNx& o) const { return _mm_cmplt_pd (fVec, o.fVec);
} |
87 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_pd (fVec, o.fVec);
} | 87 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_pd (fVec, o.fVec);
} |
(...skipping 22 matching lines...) Expand all Loading... |
110 __m128d fVec; | 110 __m128d fVec; |
111 }; | 111 }; |
112 | 112 |
113 template <> | 113 template <> |
114 class SkNx<4, int> { | 114 class SkNx<4, int> { |
115 public: | 115 public: |
116 SkNx(const __m128i& vec) : fVec(vec) {} | 116 SkNx(const __m128i& vec) : fVec(vec) {} |
117 | 117 |
118 SkNx() {} | 118 SkNx() {} |
119 SkNx(int val) : fVec(_mm_set1_epi32(val)) {} | 119 SkNx(int val) : fVec(_mm_set1_epi32(val)) {} |
120 static SkNx Load(const int vals[4]) { return _mm_loadu_si128((const __m128i*
)vals); } | 120 static SkNx Load(const void* ptr) { return _mm_loadu_si128((const __m128i*)p
tr); } |
121 SkNx(int a, int b, int c, int d) : fVec(_mm_setr_epi32(a,b,c,d)) {} | 121 SkNx(int a, int b, int c, int d) : fVec(_mm_setr_epi32(a,b,c,d)) {} |
122 | 122 |
123 void store(int vals[4]) const { _mm_storeu_si128((__m128i*)vals, fVec); } | 123 void store(void* ptr) const { _mm_storeu_si128((__m128i*)ptr, fVec); } |
124 | 124 |
125 SkNx operator + (const SkNx& o) const { return _mm_add_epi32(fVec, o.fVec);
} | 125 SkNx operator + (const SkNx& o) const { return _mm_add_epi32(fVec, o.fVec);
} |
126 SkNx operator - (const SkNx& o) const { return _mm_sub_epi32(fVec, o.fVec);
} | 126 SkNx operator - (const SkNx& o) const { return _mm_sub_epi32(fVec, o.fVec);
} |
127 SkNx operator * (const SkNx& o) const { | 127 SkNx operator * (const SkNx& o) const { |
128 __m128i mul20 = _mm_mul_epu32(fVec, o.fVec), | 128 __m128i mul20 = _mm_mul_epu32(fVec, o.fVec), |
129 mul31 = _mm_mul_epu32(_mm_srli_si128(fVec, 4), _mm_srli_si128(o.
fVec, 4)); | 129 mul31 = _mm_mul_epu32(_mm_srli_si128(fVec, 4), _mm_srli_si128(o.
fVec, 4)); |
130 return _mm_unpacklo_epi32(_mm_shuffle_epi32(mul20, _MM_SHUFFLE(0,0,2,0))
, | 130 return _mm_unpacklo_epi32(_mm_shuffle_epi32(mul20, _MM_SHUFFLE(0,0,2,0))
, |
131 _mm_shuffle_epi32(mul31, _MM_SHUFFLE(0,0,2,0))
); | 131 _mm_shuffle_epi32(mul31, _MM_SHUFFLE(0,0,2,0))
); |
132 } | 132 } |
133 | 133 |
(...skipping 14 matching lines...) Expand all Loading... |
148 __m128i fVec; | 148 __m128i fVec; |
149 }; | 149 }; |
150 | 150 |
151 template <> | 151 template <> |
152 class SkNx<4, float> { | 152 class SkNx<4, float> { |
153 public: | 153 public: |
154 SkNx(const __m128& vec) : fVec(vec) {} | 154 SkNx(const __m128& vec) : fVec(vec) {} |
155 | 155 |
156 SkNx() {} | 156 SkNx() {} |
157 SkNx(float val) : fVec( _mm_set1_ps(val) ) {} | 157 SkNx(float val) : fVec( _mm_set1_ps(val) ) {} |
158 static SkNx Load(const float vals[4]) { return _mm_loadu_ps(vals); } | 158 static SkNx Load(const void* ptr) { return _mm_loadu_ps((const float*)ptr);
} |
159 | 159 |
160 SkNx(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {} | 160 SkNx(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {} |
161 | 161 |
162 void store(float vals[4]) const { _mm_storeu_ps(vals, fVec); } | 162 void store(void* ptr) const { _mm_storeu_ps((float*)ptr, fVec); } |
163 | 163 |
164 SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); } | 164 SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); } |
165 SkNx operator - (const SkNx& o) const { return _mm_sub_ps(fVec, o.fVec); } | 165 SkNx operator - (const SkNx& o) const { return _mm_sub_ps(fVec, o.fVec); } |
166 SkNx operator * (const SkNx& o) const { return _mm_mul_ps(fVec, o.fVec); } | 166 SkNx operator * (const SkNx& o) const { return _mm_mul_ps(fVec, o.fVec); } |
167 SkNx operator / (const SkNx& o) const { return _mm_div_ps(fVec, o.fVec); } | 167 SkNx operator / (const SkNx& o) const { return _mm_div_ps(fVec, o.fVec); } |
168 | 168 |
169 SkNx operator == (const SkNx& o) const { return _mm_cmpeq_ps (fVec, o.fVec);
} | 169 SkNx operator == (const SkNx& o) const { return _mm_cmpeq_ps (fVec, o.fVec);
} |
170 SkNx operator != (const SkNx& o) const { return _mm_cmpneq_ps(fVec, o.fVec);
} | 170 SkNx operator != (const SkNx& o) const { return _mm_cmpneq_ps(fVec, o.fVec);
} |
171 SkNx operator < (const SkNx& o) const { return _mm_cmplt_ps (fVec, o.fVec);
} | 171 SkNx operator < (const SkNx& o) const { return _mm_cmplt_ps (fVec, o.fVec);
} |
172 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_ps (fVec, o.fVec);
} | 172 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_ps (fVec, o.fVec);
} |
(...skipping 30 matching lines...) Expand all Loading... |
203 __m128 fVec; | 203 __m128 fVec; |
204 }; | 204 }; |
205 | 205 |
206 template <> | 206 template <> |
207 class SkNx<4, uint16_t> { | 207 class SkNx<4, uint16_t> { |
208 public: | 208 public: |
209 SkNx(const __m128i& vec) : fVec(vec) {} | 209 SkNx(const __m128i& vec) : fVec(vec) {} |
210 | 210 |
211 SkNx() {} | 211 SkNx() {} |
212 SkNx(uint16_t val) : fVec(_mm_set1_epi16(val)) {} | 212 SkNx(uint16_t val) : fVec(_mm_set1_epi16(val)) {} |
213 static SkNx Load(const uint16_t vals[4]) { return _mm_loadl_epi64((const __m
128i*)vals); } | 213 static SkNx Load(const void* ptr) { return _mm_loadl_epi64((const __m128i*)p
tr); } |
214 SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d) : fVec(_mm_setr_epi16(a
,b,c,d,0,0,0,0)) {} | 214 SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d) : fVec(_mm_setr_epi16(a
,b,c,d,0,0,0,0)) {} |
215 | 215 |
216 void store(uint16_t vals[4]) const { _mm_storel_epi64((__m128i*)vals, fVec);
} | 216 void store(void* ptr) const { _mm_storel_epi64((__m128i*)ptr, fVec); } |
217 | 217 |
218 SkNx operator + (const SkNx& o) const { return _mm_add_epi16(fVec, o.fVec);
} | 218 SkNx operator + (const SkNx& o) const { return _mm_add_epi16(fVec, o.fVec);
} |
219 SkNx operator - (const SkNx& o) const { return _mm_sub_epi16(fVec, o.fVec);
} | 219 SkNx operator - (const SkNx& o) const { return _mm_sub_epi16(fVec, o.fVec);
} |
220 SkNx operator * (const SkNx& o) const { return _mm_mullo_epi16(fVec, o.fVec)
; } | 220 SkNx operator * (const SkNx& o) const { return _mm_mullo_epi16(fVec, o.fVec)
; } |
221 | 221 |
222 SkNx operator << (int bits) const { return _mm_slli_epi16(fVec, bits); } | 222 SkNx operator << (int bits) const { return _mm_slli_epi16(fVec, bits); } |
223 SkNx operator >> (int bits) const { return _mm_srli_epi16(fVec, bits); } | 223 SkNx operator >> (int bits) const { return _mm_srli_epi16(fVec, bits); } |
224 | 224 |
225 template <int k> uint16_t kth() const { | 225 template <int k> uint16_t kth() const { |
226 SkASSERT(0 <= k && k < 4); | 226 SkASSERT(0 <= k && k < 4); |
227 return _mm_extract_epi16(fVec, k); | 227 return _mm_extract_epi16(fVec, k); |
228 } | 228 } |
229 | 229 |
230 __m128i fVec; | 230 __m128i fVec; |
231 }; | 231 }; |
232 | 232 |
233 template <> | 233 template <> |
234 class SkNx<8, uint16_t> { | 234 class SkNx<8, uint16_t> { |
235 public: | 235 public: |
236 SkNx(const __m128i& vec) : fVec(vec) {} | 236 SkNx(const __m128i& vec) : fVec(vec) {} |
237 | 237 |
238 SkNx() {} | 238 SkNx() {} |
239 SkNx(uint16_t val) : fVec(_mm_set1_epi16(val)) {} | 239 SkNx(uint16_t val) : fVec(_mm_set1_epi16(val)) {} |
240 static SkNx Load(const uint16_t vals[8]) { return _mm_loadu_si128((const __m
128i*)vals); } | 240 static SkNx Load(const void* ptr) { return _mm_loadu_si128((const __m128i*)p
tr); } |
241 SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d, | 241 SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d, |
242 uint16_t e, uint16_t f, uint16_t g, uint16_t h) : fVec(_mm_setr_epi16(a
,b,c,d,e,f,g,h)) {} | 242 uint16_t e, uint16_t f, uint16_t g, uint16_t h) : fVec(_mm_setr_epi16(a
,b,c,d,e,f,g,h)) {} |
243 | 243 |
244 void store(uint16_t vals[8]) const { _mm_storeu_si128((__m128i*)vals, fVec);
} | 244 void store(void* ptr) const { _mm_storeu_si128((__m128i*)ptr, fVec); } |
245 | 245 |
246 SkNx operator + (const SkNx& o) const { return _mm_add_epi16(fVec, o.fVec);
} | 246 SkNx operator + (const SkNx& o) const { return _mm_add_epi16(fVec, o.fVec);
} |
247 SkNx operator - (const SkNx& o) const { return _mm_sub_epi16(fVec, o.fVec);
} | 247 SkNx operator - (const SkNx& o) const { return _mm_sub_epi16(fVec, o.fVec);
} |
248 SkNx operator * (const SkNx& o) const { return _mm_mullo_epi16(fVec, o.fVec)
; } | 248 SkNx operator * (const SkNx& o) const { return _mm_mullo_epi16(fVec, o.fVec)
; } |
249 | 249 |
250 SkNx operator << (int bits) const { return _mm_slli_epi16(fVec, bits); } | 250 SkNx operator << (int bits) const { return _mm_slli_epi16(fVec, bits); } |
251 SkNx operator >> (int bits) const { return _mm_srli_epi16(fVec, bits); } | 251 SkNx operator >> (int bits) const { return _mm_srli_epi16(fVec, bits); } |
252 | 252 |
253 static SkNx Min(const SkNx& a, const SkNx& b) { | 253 static SkNx Min(const SkNx& a, const SkNx& b) { |
254 // No unsigned _mm_min_epu16, so we'll shift into a space where we can u
se the | 254 // No unsigned _mm_min_epu16, so we'll shift into a space where we can u
se the |
(...skipping 16 matching lines...) Expand all Loading... |
271 | 271 |
272 __m128i fVec; | 272 __m128i fVec; |
273 }; | 273 }; |
274 | 274 |
275 template <> | 275 template <> |
276 class SkNx<4, uint8_t> { | 276 class SkNx<4, uint8_t> { |
277 public: | 277 public: |
278 SkNx(const __m128i& vec) : fVec(vec) {} | 278 SkNx(const __m128i& vec) : fVec(vec) {} |
279 | 279 |
280 SkNx() {} | 280 SkNx() {} |
281 static SkNx Load(const uint8_t vals[4]) { return _mm_cvtsi32_si128(*(const i
nt*)vals); } | 281 static SkNx Load(const void* ptr) { return _mm_cvtsi32_si128(*(const int*)pt
r); } |
282 void store(uint8_t vals[4]) const { *(int*)vals = _mm_cvtsi128_si32(fVec); } | 282 void store(void* ptr) const { *(int*)ptr = _mm_cvtsi128_si32(fVec); } |
283 | 283 |
284 // TODO as needed | 284 // TODO as needed |
285 | 285 |
286 __m128i fVec; | 286 __m128i fVec; |
287 }; | 287 }; |
288 | 288 |
289 template <> | 289 template <> |
290 class SkNx<8, uint8_t> { | 290 class SkNx<8, uint8_t> { |
291 public: | 291 public: |
292 SkNx(const __m128i& vec) : fVec(vec) {} | 292 SkNx(const __m128i& vec) : fVec(vec) {} |
293 | 293 |
294 SkNx() {} | 294 SkNx() {} |
295 static SkNx Load(const uint8_t vals[8]) { return _mm_loadl_epi64((const __m1
28i*)vals); } | 295 static SkNx Load(const void* ptr) { return _mm_loadl_epi64((const __m128i*)p
tr); } |
296 void store(uint8_t vals[8]) const { _mm_storel_epi64((__m128i*)vals, fVec);
} | 296 void store(void* ptr) const { _mm_storel_epi64((__m128i*)ptr, fVec); } |
297 | 297 |
298 // TODO as needed | 298 // TODO as needed |
299 | 299 |
300 __m128i fVec; | 300 __m128i fVec; |
301 }; | 301 }; |
302 | 302 |
303 template <> | 303 template <> |
304 class SkNx<16, uint8_t> { | 304 class SkNx<16, uint8_t> { |
305 public: | 305 public: |
306 SkNx(const __m128i& vec) : fVec(vec) {} | 306 SkNx(const __m128i& vec) : fVec(vec) {} |
307 | 307 |
308 SkNx() {} | 308 SkNx() {} |
309 SkNx(uint8_t val) : fVec(_mm_set1_epi8(val)) {} | 309 SkNx(uint8_t val) : fVec(_mm_set1_epi8(val)) {} |
310 static SkNx Load(const uint8_t vals[16]) { return _mm_loadu_si128((const __m
128i*)vals); } | 310 static SkNx Load(const void* ptr) { return _mm_loadu_si128((const __m128i*)p
tr); } |
311 SkNx(uint8_t a, uint8_t b, uint8_t c, uint8_t d, | 311 SkNx(uint8_t a, uint8_t b, uint8_t c, uint8_t d, |
312 uint8_t e, uint8_t f, uint8_t g, uint8_t h, | 312 uint8_t e, uint8_t f, uint8_t g, uint8_t h, |
313 uint8_t i, uint8_t j, uint8_t k, uint8_t l, | 313 uint8_t i, uint8_t j, uint8_t k, uint8_t l, |
314 uint8_t m, uint8_t n, uint8_t o, uint8_t p) | 314 uint8_t m, uint8_t n, uint8_t o, uint8_t p) |
315 : fVec(_mm_setr_epi8(a,b,c,d, e,f,g,h, i,j,k,l, m,n,o,p)) {} | 315 : fVec(_mm_setr_epi8(a,b,c,d, e,f,g,h, i,j,k,l, m,n,o,p)) {} |
316 | 316 |
317 void store(uint8_t vals[16]) const { _mm_storeu_si128((__m128i*)vals, fVec);
} | 317 void store(void* ptr) const { _mm_storeu_si128((__m128i*)ptr, fVec); } |
318 | 318 |
319 SkNx saturatedAdd(const SkNx& o) const { return _mm_adds_epu8(fVec, o.fVec);
} | 319 SkNx saturatedAdd(const SkNx& o) const { return _mm_adds_epu8(fVec, o.fVec);
} |
320 | 320 |
321 SkNx operator + (const SkNx& o) const { return _mm_add_epi8(fVec, o.fVec); } | 321 SkNx operator + (const SkNx& o) const { return _mm_add_epi8(fVec, o.fVec); } |
322 SkNx operator - (const SkNx& o) const { return _mm_sub_epi8(fVec, o.fVec); } | 322 SkNx operator - (const SkNx& o) const { return _mm_sub_epi8(fVec, o.fVec); } |
323 | 323 |
324 static SkNx Min(const SkNx& a, const SkNx& b) { return _mm_min_epu8(a.fVec,
b.fVec); } | 324 static SkNx Min(const SkNx& a, const SkNx& b) { return _mm_min_epu8(a.fVec,
b.fVec); } |
325 SkNx operator < (const SkNx& o) const { | 325 SkNx operator < (const SkNx& o) const { |
326 // There's no unsigned _mm_cmplt_epu8, so we flip the sign bits then use
a signed compare. | 326 // There's no unsigned _mm_cmplt_epu8, so we flip the sign bits then use
a signed compare. |
327 auto flip = _mm_set1_epi8(char(0x80)); | 327 auto flip = _mm_set1_epi8(char(0x80)); |
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
384 } | 384 } |
385 | 385 |
386 template<> inline Sk4b SkNx_cast<uint8_t, uint16_t, 4>(const Sk4h& src) { | 386 template<> inline Sk4b SkNx_cast<uint8_t, uint16_t, 4>(const Sk4h& src) { |
387 return _mm_packus_epi16(src.fVec, src.fVec); | 387 return _mm_packus_epi16(src.fVec, src.fVec); |
388 } | 388 } |
389 | 389 |
390 | 390 |
391 } // namespace | 391 } // namespace |
392 | 392 |
393 #endif//SkNx_sse_DEFINED | 393 #endif//SkNx_sse_DEFINED |
OLD | NEW |