OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkNx_sse_DEFINED | 8 #ifndef SkNx_sse_DEFINED |
9 #define SkNx_sse_DEFINED | 9 #define SkNx_sse_DEFINED |
10 | 10 |
(...skipping 278 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
289 explicit SkNi(uint8_t val) : fVec(_mm_set1_epi8(val)) {} | 289 explicit SkNi(uint8_t val) : fVec(_mm_set1_epi8(val)) {} |
290 static SkNi Load(const uint8_t vals[16]) { return _mm_loadu_si128((const __m
128i*)vals); } | 290 static SkNi Load(const uint8_t vals[16]) { return _mm_loadu_si128((const __m
128i*)vals); } |
291 SkNi(uint8_t a, uint8_t b, uint8_t c, uint8_t d, | 291 SkNi(uint8_t a, uint8_t b, uint8_t c, uint8_t d, |
292 uint8_t e, uint8_t f, uint8_t g, uint8_t h, | 292 uint8_t e, uint8_t f, uint8_t g, uint8_t h, |
293 uint8_t i, uint8_t j, uint8_t k, uint8_t l, | 293 uint8_t i, uint8_t j, uint8_t k, uint8_t l, |
294 uint8_t m, uint8_t n, uint8_t o, uint8_t p) | 294 uint8_t m, uint8_t n, uint8_t o, uint8_t p) |
295 : fVec(_mm_setr_epi8(a,b,c,d, e,f,g,h, i,j,k,l, m,n,o,p)) {} | 295 : fVec(_mm_setr_epi8(a,b,c,d, e,f,g,h, i,j,k,l, m,n,o,p)) {} |
296 | 296 |
297 void store(uint8_t vals[16]) const { _mm_storeu_si128((__m128i*)vals, fVec);
} | 297 void store(uint8_t vals[16]) const { _mm_storeu_si128((__m128i*)vals, fVec);
} |
298 | 298 |
| 299 SkNi saturatedAdd(const SkNi& o) const { return _mm_adds_epu8(fVec, o.fVec);
} |
| 300 |
299 SkNi operator + (const SkNi& o) const { return _mm_add_epi8(fVec, o.fVec); } | 301 SkNi operator + (const SkNi& o) const { return _mm_add_epi8(fVec, o.fVec); } |
300 SkNi operator - (const SkNi& o) const { return _mm_sub_epi8(fVec, o.fVec); } | 302 SkNi operator - (const SkNi& o) const { return _mm_sub_epi8(fVec, o.fVec); } |
301 | 303 |
302 // SSE cannot multiply or shift vectors of uint8_t. | 304 // SSE cannot multiply or shift vectors of uint8_t. |
303 SkNi operator * (const SkNi& o) const { SkASSERT(false); return fVec; } | 305 SkNi operator * (const SkNi& o) const { SkASSERT(false); return fVec; } |
304 SkNi operator << (int bits) const { SkASSERT(false); return fVec; } | 306 SkNi operator << (int bits) const { SkASSERT(false); return fVec; } |
305 SkNi operator >> (int bits) const { SkASSERT(false); return fVec; } | 307 SkNi operator >> (int bits) const { SkASSERT(false); return fVec; } |
306 | 308 |
307 template <int k> uint8_t kth() const { | 309 template <int k> uint8_t kth() const { |
308 SkASSERT(0 <= k && k < 16); | 310 SkASSERT(0 <= k && k < 16); |
309 // SSE4.1 would just `return _mm_extract_epi8(fVec, k)`. We have to rea
d 16-bits instead. | 311 // SSE4.1 would just `return _mm_extract_epi8(fVec, k)`. We have to rea
d 16-bits instead. |
310 int pair = _mm_extract_epi16(fVec, k/2); | 312 int pair = _mm_extract_epi16(fVec, k/2); |
311 return k % 2 == 0 ? pair : (pair >> 8); | 313 return k % 2 == 0 ? pair : (pair >> 8); |
312 } | 314 } |
313 | 315 |
314 __m128i fVec; | 316 __m128i fVec; |
315 }; | 317 }; |
316 | 318 |
317 #endif//SkNx_sse_DEFINED | 319 #endif//SkNx_sse_DEFINED |
OLD | NEW |