OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkNx_sse_DEFINED | 8 #ifndef SkNx_sse_DEFINED |
9 #define SkNx_sse_DEFINED | 9 #define SkNx_sse_DEFINED |
10 | 10 |
(...skipping 254 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
265 | 265 |
266 void store(uint16_t vals[8]) const { _mm_storeu_si128((__m128i*)vals, fVec);
} | 266 void store(uint16_t vals[8]) const { _mm_storeu_si128((__m128i*)vals, fVec);
} |
267 | 267 |
268 SkNi operator + (const SkNi& o) const { return _mm_add_epi16(fVec, o.fVec);
} | 268 SkNi operator + (const SkNi& o) const { return _mm_add_epi16(fVec, o.fVec);
} |
269 SkNi operator - (const SkNi& o) const { return _mm_sub_epi16(fVec, o.fVec);
} | 269 SkNi operator - (const SkNi& o) const { return _mm_sub_epi16(fVec, o.fVec);
} |
270 SkNi operator * (const SkNi& o) const { return _mm_mullo_epi16(fVec, o.fVec)
; } | 270 SkNi operator * (const SkNi& o) const { return _mm_mullo_epi16(fVec, o.fVec)
; } |
271 | 271 |
272 SkNi operator << (int bits) const { return _mm_slli_epi16(fVec, bits); } | 272 SkNi operator << (int bits) const { return _mm_slli_epi16(fVec, bits); } |
273 SkNi operator >> (int bits) const { return _mm_srli_epi16(fVec, bits); } | 273 SkNi operator >> (int bits) const { return _mm_srli_epi16(fVec, bits); } |
274 | 274 |
| 275 static SkNi Min(const SkNi& a, const SkNi& b) { |
| 276 // No unsigned _mm_min_epu16, so we'll shift into a space where we can u
se the |
| 277 // signed version, _mm_min_epi16, then shift back. |
| 278 const uint16_t top = 0x8000; // Keep this separate from _mm_set1_epi16 o
r MSVC will whine. |
| 279 const __m128i top_8x = _mm_set1_epi16(top); |
| 280 return _mm_add_epi8(top_8x, _mm_min_epi16(_mm_sub_epi8(a.fVec, top_8x), |
| 281 _mm_sub_epi8(b.fVec, top_8x)))
; |
| 282 } |
| 283 |
275 template <int k> uint16_t kth() const { | 284 template <int k> uint16_t kth() const { |
276 SkASSERT(0 <= k && k < 8); | 285 SkASSERT(0 <= k && k < 8); |
277 return _mm_extract_epi16(fVec, k); | 286 return _mm_extract_epi16(fVec, k); |
278 } | 287 } |
279 | 288 |
280 __m128i fVec; | 289 __m128i fVec; |
281 }; | 290 }; |
282 | 291 |
283 template <> | 292 template <> |
284 class SkNi<16, uint8_t> { | 293 class SkNi<16, uint8_t> { |
(...skipping 14 matching lines...) Expand all Loading... |
299 SkNi saturatedAdd(const SkNi& o) const { return _mm_adds_epu8(fVec, o.fVec);
} | 308 SkNi saturatedAdd(const SkNi& o) const { return _mm_adds_epu8(fVec, o.fVec);
} |
300 | 309 |
301 SkNi operator + (const SkNi& o) const { return _mm_add_epi8(fVec, o.fVec); } | 310 SkNi operator + (const SkNi& o) const { return _mm_add_epi8(fVec, o.fVec); } |
302 SkNi operator - (const SkNi& o) const { return _mm_sub_epi8(fVec, o.fVec); } | 311 SkNi operator - (const SkNi& o) const { return _mm_sub_epi8(fVec, o.fVec); } |
303 | 312 |
304 // SSE cannot multiply or shift vectors of uint8_t. | 313 // SSE cannot multiply or shift vectors of uint8_t. |
305 SkNi operator * (const SkNi& o) const { SkASSERT(false); return fVec; } | 314 SkNi operator * (const SkNi& o) const { SkASSERT(false); return fVec; } |
306 SkNi operator << (int bits) const { SkASSERT(false); return fVec; } | 315 SkNi operator << (int bits) const { SkASSERT(false); return fVec; } |
307 SkNi operator >> (int bits) const { SkASSERT(false); return fVec; } | 316 SkNi operator >> (int bits) const { SkASSERT(false); return fVec; } |
308 | 317 |
| 318 static SkNi Min(const SkNi& a, const SkNi& b) { return _mm_min_epu8(a.fVec,
b.fVec); } |
| 319 |
309 template <int k> uint8_t kth() const { | 320 template <int k> uint8_t kth() const { |
310 SkASSERT(0 <= k && k < 16); | 321 SkASSERT(0 <= k && k < 16); |
311 // SSE4.1 would just `return _mm_extract_epi8(fVec, k)`. We have to rea
d 16-bits instead. | 322 // SSE4.1 would just `return _mm_extract_epi8(fVec, k)`. We have to rea
d 16-bits instead. |
312 int pair = _mm_extract_epi16(fVec, k/2); | 323 int pair = _mm_extract_epi16(fVec, k/2); |
313 return k % 2 == 0 ? pair : (pair >> 8); | 324 return k % 2 == 0 ? pair : (pair >> 8); |
314 } | 325 } |
315 | 326 |
316 __m128i fVec; | 327 __m128i fVec; |
317 }; | 328 }; |
318 | 329 |
319 #endif//SkNx_sse_DEFINED | 330 #endif//SkNx_sse_DEFINED |
OLD | NEW |