Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(545)

Side by Side Diff: src/opts/SkNx_sse.h

Issue 1133933004: add Min to SkNi, specialized for u8 and u16 on SSE and NEON (Closed) Base URL: https://skia.googlesource.com/skia@master
Patch Set: fixes Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/opts/SkNx_neon.h ('k') | tests/SkNxTest.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkNx_sse_DEFINED 8 #ifndef SkNx_sse_DEFINED
9 #define SkNx_sse_DEFINED 9 #define SkNx_sse_DEFINED
10 10
(...skipping 254 matching lines...) Expand 10 before | Expand all | Expand 10 after
265 265
266 void store(uint16_t vals[8]) const { _mm_storeu_si128((__m128i*)vals, fVec); } 266 void store(uint16_t vals[8]) const { _mm_storeu_si128((__m128i*)vals, fVec); }
267 267
268 SkNi operator + (const SkNi& o) const { return _mm_add_epi16(fVec, o.fVec); } 268 SkNi operator + (const SkNi& o) const { return _mm_add_epi16(fVec, o.fVec); }
269 SkNi operator - (const SkNi& o) const { return _mm_sub_epi16(fVec, o.fVec); } 269 SkNi operator - (const SkNi& o) const { return _mm_sub_epi16(fVec, o.fVec); }
270 SkNi operator * (const SkNi& o) const { return _mm_mullo_epi16(fVec, o.fVec) ; } 270 SkNi operator * (const SkNi& o) const { return _mm_mullo_epi16(fVec, o.fVec) ; }
271 271
272 SkNi operator << (int bits) const { return _mm_slli_epi16(fVec, bits); } 272 SkNi operator << (int bits) const { return _mm_slli_epi16(fVec, bits); }
273 SkNi operator >> (int bits) const { return _mm_srli_epi16(fVec, bits); } 273 SkNi operator >> (int bits) const { return _mm_srli_epi16(fVec, bits); }
274 274
275 static SkNi Min(const SkNi& a, const SkNi& b) {
276 // No unsigned _mm_min_epu16, so we'll shift into a space where we can u se the
277 // signed version, _mm_min_epi16, then shift back.
278 const uint16_t top = 0x8000; // Keep this separate from _mm_set1_epi16 o r MSVC will whine.
279 const __m128i top_8x = _mm_set1_epi16(top);
280 return _mm_add_epi8(top_8x, _mm_min_epi16(_mm_sub_epi8(a.fVec, top_8x),
281 _mm_sub_epi8(b.fVec, top_8x))) ;
282 }
283
275 template <int k> uint16_t kth() const { 284 template <int k> uint16_t kth() const {
276 SkASSERT(0 <= k && k < 8); 285 SkASSERT(0 <= k && k < 8);
277 return _mm_extract_epi16(fVec, k); 286 return _mm_extract_epi16(fVec, k);
278 } 287 }
279 288
280 __m128i fVec; 289 __m128i fVec;
281 }; 290 };
282 291
283 template <> 292 template <>
284 class SkNi<16, uint8_t> { 293 class SkNi<16, uint8_t> {
(...skipping 14 matching lines...) Expand all
299 SkNi saturatedAdd(const SkNi& o) const { return _mm_adds_epu8(fVec, o.fVec); } 308 SkNi saturatedAdd(const SkNi& o) const { return _mm_adds_epu8(fVec, o.fVec); }
300 309
301 SkNi operator + (const SkNi& o) const { return _mm_add_epi8(fVec, o.fVec); } 310 SkNi operator + (const SkNi& o) const { return _mm_add_epi8(fVec, o.fVec); }
302 SkNi operator - (const SkNi& o) const { return _mm_sub_epi8(fVec, o.fVec); } 311 SkNi operator - (const SkNi& o) const { return _mm_sub_epi8(fVec, o.fVec); }
303 312
304 // SSE cannot multiply or shift vectors of uint8_t. 313 // SSE cannot multiply or shift vectors of uint8_t.
305 SkNi operator * (const SkNi& o) const { SkASSERT(false); return fVec; } 314 SkNi operator * (const SkNi& o) const { SkASSERT(false); return fVec; }
306 SkNi operator << (int bits) const { SkASSERT(false); return fVec; } 315 SkNi operator << (int bits) const { SkASSERT(false); return fVec; }
307 SkNi operator >> (int bits) const { SkASSERT(false); return fVec; } 316 SkNi operator >> (int bits) const { SkASSERT(false); return fVec; }
308 317
318 static SkNi Min(const SkNi& a, const SkNi& b) { return _mm_min_epu8(a.fVec, b.fVec); }
319
309 template <int k> uint8_t kth() const { 320 template <int k> uint8_t kth() const {
310 SkASSERT(0 <= k && k < 16); 321 SkASSERT(0 <= k && k < 16);
311 // SSE4.1 would just `return _mm_extract_epi8(fVec, k)`. We have to rea d 16-bits instead. 322 // SSE4.1 would just `return _mm_extract_epi8(fVec, k)`. We have to rea d 16-bits instead.
312 int pair = _mm_extract_epi16(fVec, k/2); 323 int pair = _mm_extract_epi16(fVec, k/2);
313 return k % 2 == 0 ? pair : (pair >> 8); 324 return k % 2 == 0 ? pair : (pair >> 8);
314 } 325 }
315 326
316 __m128i fVec; 327 __m128i fVec;
317 }; 328 };
318 329
319 #endif//SkNx_sse_DEFINED 330 #endif//SkNx_sse_DEFINED
OLDNEW
« no previous file with comments | « src/opts/SkNx_neon.h ('k') | tests/SkNxTest.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698