Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(276)

Side by Side Diff: src/opts/SkNx_sse.h

Issue 2138073002: Clean up hyper-local SkCpu feature test experiment. (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/core/SkXfermodeF16.cpp ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkNx_sse_DEFINED 8 #ifndef SkNx_sse_DEFINED
9 #define SkNx_sse_DEFINED 9 #define SkNx_sse_DEFINED
10 10
11 #include "SkCpu.h"
12 #include <immintrin.h> 11 #include <immintrin.h>
13 12
14 // This file may assume <= SSE2, but must check SK_CPU_SSE_LEVEL for anything mo re recent. 13 // This file may assume <= SSE2, but must check SK_CPU_SSE_LEVEL for anything mo re recent.
15 // If you do, make sure this is in a static inline function... anywhere else ris ks violating ODR. 14 // If you do, make sure this is in a static inline function... anywhere else ris ks violating ODR.
16 15
17 #define SKNX_IS_FAST 16 #define SKNX_IS_FAST
18 17
19 template <> 18 template <>
20 class SkNx<2, float> { 19 class SkNx<2, float> {
21 public: 20 public:
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after
84 SkNx operator < (const SkNx& o) const { return _mm_cmplt_ps (fVec, o.fVec); } 83 SkNx operator < (const SkNx& o) const { return _mm_cmplt_ps (fVec, o.fVec); }
85 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_ps (fVec, o.fVec); } 84 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_ps (fVec, o.fVec); }
86 SkNx operator <= (const SkNx& o) const { return _mm_cmple_ps (fVec, o.fVec); } 85 SkNx operator <= (const SkNx& o) const { return _mm_cmple_ps (fVec, o.fVec); }
87 SkNx operator >= (const SkNx& o) const { return _mm_cmpge_ps (fVec, o.fVec); } 86 SkNx operator >= (const SkNx& o) const { return _mm_cmpge_ps (fVec, o.fVec); }
88 87
89 static SkNx Min(const SkNx& l, const SkNx& r) { return _mm_min_ps(l.fVec, r. fVec); } 88 static SkNx Min(const SkNx& l, const SkNx& r) { return _mm_min_ps(l.fVec, r. fVec); }
90 static SkNx Max(const SkNx& l, const SkNx& r) { return _mm_max_ps(l.fVec, r. fVec); } 89 static SkNx Max(const SkNx& l, const SkNx& r) { return _mm_max_ps(l.fVec, r. fVec); }
91 90
92 SkNx abs() const { return _mm_andnot_ps(_mm_set1_ps(-0.0f), fVec); } 91 SkNx abs() const { return _mm_andnot_ps(_mm_set1_ps(-0.0f), fVec); }
93 SkNx floor() const { 92 SkNx floor() const {
94 if (SkCpu::Supports(SkCpu::SSE41)) { 93 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
95 __m128 r; 94 return _mm_floor_ps(fVec);
96 #if defined(__GNUC__) || defined(__clang__) 95 #else
97 asm("roundps $0x1, %[fVec], %[r]" : [r]"=x"(r) : [fVec]"x"(fVec));
98 #else
99 r = _mm_floor_ps(fVec);
100 #endif
101 return r;
102 }
103 // Emulate _mm_floor_ps() with SSE2: 96 // Emulate _mm_floor_ps() with SSE2:
104 // - roundtrip through integers via truncation 97 // - roundtrip through integers via truncation
105 // - subtract 1 if that's too big (possible for negative values). 98 // - subtract 1 if that's too big (possible for negative values).
106 // This restricts the domain of our inputs to a maximum somehwere around 2^31. 99 // This restricts the domain of our inputs to a maximum somehwere around 2^31.
107 // Seems plenty big. 100 // Seems plenty big.
108 __m128 roundtrip = _mm_cvtepi32_ps(_mm_cvttps_epi32(fVec)); 101 __m128 roundtrip = _mm_cvtepi32_ps(_mm_cvttps_epi32(fVec));
109 __m128 too_big = _mm_cmpgt_ps(roundtrip, fVec); 102 __m128 too_big = _mm_cmpgt_ps(roundtrip, fVec);
110 return _mm_sub_ps(roundtrip, _mm_and_ps(too_big, _mm_set1_ps(1.0f))); 103 return _mm_sub_ps(roundtrip, _mm_and_ps(too_big, _mm_set1_ps(1.0f)));
104 #endif
111 } 105 }
112 106
113 SkNx sqrt() const { return _mm_sqrt_ps (fVec); } 107 SkNx sqrt() const { return _mm_sqrt_ps (fVec); }
114 SkNx rsqrt() const { return _mm_rsqrt_ps(fVec); } 108 SkNx rsqrt() const { return _mm_rsqrt_ps(fVec); }
115 SkNx invert() const { return _mm_rcp_ps(fVec); } 109 SkNx invert() const { return _mm_rcp_ps(fVec); }
116 110
117 float operator[](int k) const { 111 float operator[](int k) const {
118 SkASSERT(0 <= k && k < 4); 112 SkASSERT(0 <= k && k < 4);
119 union { __m128 v; float fs[4]; } pun = {fVec}; 113 union { __m128 v; float fs[4]; } pun = {fVec};
120 return pun.fs[k&3]; 114 return pun.fs[k&3];
121 } 115 }
122 116
123 bool allTrue() const { return 0xffff == _mm_movemask_epi8(_mm_castps_si128(f Vec)); } 117 bool allTrue() const { return 0xffff == _mm_movemask_epi8(_mm_castps_si128(f Vec)); }
124 bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(_mm_castps_si128(f Vec)); } 118 bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(_mm_castps_si128(f Vec)); }
125 119
126 SkNx thenElse(const SkNx& t, const SkNx& e) const { 120 SkNx thenElse(const SkNx& t, const SkNx& e) const {
127 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41 121 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
128 return _mm_blendv_ps(e.fVec, t.fVec, fVec); 122 return _mm_blendv_ps(e.fVec, t.fVec, fVec);
129 #else 123 #else
130 return _mm_or_ps(_mm_and_ps (fVec, t.fVec), 124 return _mm_or_ps(_mm_and_ps (fVec, t.fVec),
131 _mm_andnot_ps(fVec, e.fVec)); 125 _mm_andnot_ps(fVec, e.fVec));
132 #endif 126 #endif
133 } 127 }
134 128
135 __m128 fVec; 129 __m128 fVec;
136 }; 130 };
137 131
138 template <> 132 template <>
139 class SkNx<4, int> { 133 class SkNx<4, int> {
140 public: 134 public:
141 SkNx(const __m128i& vec) : fVec(vec) {} 135 SkNx(const __m128i& vec) : fVec(vec) {}
142 136
(...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after
371 365
372 template<> /*static*/ inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src) { 366 template<> /*static*/ inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src) {
373 return _mm_unpacklo_epi8(src.fVec, _mm_setzero_si128()); 367 return _mm_unpacklo_epi8(src.fVec, _mm_setzero_si128());
374 } 368 }
375 369
376 template<> /*static*/ inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src) { 370 template<> /*static*/ inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src) {
377 return _mm_packus_epi16(src.fVec, src.fVec); 371 return _mm_packus_epi16(src.fVec, src.fVec);
378 } 372 }
379 373
380 #endif//SkNx_sse_DEFINED 374 #endif//SkNx_sse_DEFINED
OLDNEW
« no previous file with comments | « src/core/SkXfermodeF16.cpp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698