OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkNx_sse_DEFINED | 8 #ifndef SkNx_sse_DEFINED |
9 #define SkNx_sse_DEFINED | 9 #define SkNx_sse_DEFINED |
10 | 10 |
11 // This file may assume <= SSE2, but must check SK_CPU_SSE_LEVEL for anything mo
re recent. | 11 // This file may assume <= SSE2, but must check SK_CPU_SSE_LEVEL for anything mo
re recent. |
12 | 12 |
13 namespace { // See SkNx.h | 13 namespace { // See SkNx.h |
14 | 14 |
| 15 template <> |
| 16 class SkNb<2, 4> { |
| 17 public: |
| 18 SkNb(const __m128i& vec) : fVec(vec) {} |
| 19 |
| 20 SkNb() {} |
| 21 bool allTrue() const { return 0xff == (_mm_movemask_epi8(fVec) & 0xff); } |
| 22 bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(fVec) & 0xff); } |
| 23 |
| 24 __m128i fVec; |
| 25 }; |
| 26 |
| 27 template <> |
| 28 class SkNb<4, 4> { |
| 29 public: |
| 30 SkNb(const __m128i& vec) : fVec(vec) {} |
| 31 |
| 32 SkNb() {} |
| 33 bool allTrue() const { return 0xffff == _mm_movemask_epi8(fVec); } |
| 34 bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(fVec); } |
| 35 |
| 36 __m128i fVec; |
| 37 }; |
| 38 |
| 39 template <> |
| 40 class SkNb<2, 8> { |
| 41 public: |
| 42 SkNb(const __m128i& vec) : fVec(vec) {} |
| 43 |
| 44 SkNb() {} |
| 45 bool allTrue() const { return 0xffff == _mm_movemask_epi8(fVec); } |
| 46 bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(fVec); } |
| 47 |
| 48 __m128i fVec; |
| 49 }; |
| 50 |
15 | 51 |
16 template <> | 52 template <> |
17 class SkNf<2, float> { | 53 class SkNf<2, float> { |
| 54 typedef SkNb<2, 4> Nb; |
18 public: | 55 public: |
19 SkNf(const __m128& vec) : fVec(vec) {} | 56 SkNf(const __m128& vec) : fVec(vec) {} |
20 | 57 |
21 SkNf() {} | 58 SkNf() {} |
22 explicit SkNf(float val) : fVec(_mm_set1_ps(val)) {} | 59 explicit SkNf(float val) : fVec(_mm_set1_ps(val)) {} |
23 static SkNf Load(const float vals[2]) { | 60 static SkNf Load(const float vals[2]) { |
24 return _mm_castsi128_ps(_mm_loadl_epi64((const __m128i*)vals)); | 61 return _mm_castsi128_ps(_mm_loadl_epi64((const __m128i*)vals)); |
25 } | 62 } |
26 SkNf(float a, float b) : fVec(_mm_setr_ps(a,b,0,0)) {} | 63 SkNf(float a, float b) : fVec(_mm_setr_ps(a,b,0,0)) {} |
27 | 64 |
28 void store(float vals[2]) const { _mm_storel_pi((__m64*)vals, fVec); } | 65 void store(float vals[2]) const { _mm_storel_pi((__m64*)vals, fVec); } |
29 | 66 |
30 SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); } | 67 SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); } |
31 SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); } | 68 SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); } |
32 SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); } | 69 SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); } |
33 SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); } | 70 SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); } |
34 | 71 |
35 SkNf operator == (const SkNf& o) const { return _mm_cmpeq_ps (fVec, o.fVec);
} | 72 Nb operator == (const SkNf& o) const { return _mm_castps_si128(_mm_cmpeq_ps
(fVec, o.fVec)); } |
36 SkNf operator != (const SkNf& o) const { return _mm_cmpneq_ps(fVec, o.fVec);
} | 73 Nb operator != (const SkNf& o) const { return _mm_castps_si128(_mm_cmpneq_ps
(fVec, o.fVec)); } |
37 SkNf operator < (const SkNf& o) const { return _mm_cmplt_ps (fVec, o.fVec);
} | 74 Nb operator < (const SkNf& o) const { return _mm_castps_si128(_mm_cmplt_ps
(fVec, o.fVec)); } |
38 SkNf operator > (const SkNf& o) const { return _mm_cmpgt_ps (fVec, o.fVec);
} | 75 Nb operator > (const SkNf& o) const { return _mm_castps_si128(_mm_cmpgt_ps
(fVec, o.fVec)); } |
39 SkNf operator <= (const SkNf& o) const { return _mm_cmple_ps (fVec, o.fVec);
} | 76 Nb operator <= (const SkNf& o) const { return _mm_castps_si128(_mm_cmple_ps
(fVec, o.fVec)); } |
40 SkNf operator >= (const SkNf& o) const { return _mm_cmpge_ps (fVec, o.fVec);
} | 77 Nb operator >= (const SkNf& o) const { return _mm_castps_si128(_mm_cmpge_ps
(fVec, o.fVec)); } |
41 | 78 |
42 static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_ps(l.fVec, r.
fVec); } | 79 static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_ps(l.fVec, r.
fVec); } |
43 static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_ps(l.fVec, r.
fVec); } | 80 static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_ps(l.fVec, r.
fVec); } |
44 | 81 |
45 SkNf sqrt() const { return _mm_sqrt_ps (fVec); } | 82 SkNf sqrt() const { return _mm_sqrt_ps (fVec); } |
46 SkNf rsqrt0() const { return _mm_rsqrt_ps(fVec); } | 83 SkNf rsqrt0() const { return _mm_rsqrt_ps(fVec); } |
47 SkNf rsqrt1() const { return this->rsqrt0(); } | 84 SkNf rsqrt1() const { return this->rsqrt0(); } |
48 SkNf rsqrt2() const { return this->rsqrt1(); } | 85 SkNf rsqrt2() const { return this->rsqrt1(); } |
49 | 86 |
50 SkNf invert() const { return SkNf(1) / *this; } | 87 SkNf invert() const { return SkNf(1) / *this; } |
51 SkNf approxInvert() const { return _mm_rcp_ps(fVec); } | 88 SkNf approxInvert() const { return _mm_rcp_ps(fVec); } |
52 | 89 |
53 template <int k> float kth() const { | 90 template <int k> float kth() const { |
54 SkASSERT(0 <= k && k < 2); | 91 SkASSERT(0 <= k && k < 2); |
55 union { __m128 v; float fs[4]; } pun = {fVec}; | 92 union { __m128 v; float fs[4]; } pun = {fVec}; |
56 return pun.fs[k&1]; | 93 return pun.fs[k&1]; |
57 } | 94 } |
58 | 95 |
59 bool allTrue() const { return 0xff == (_mm_movemask_epi8(_mm_castps_si128(fV
ec)) & 0xff); } | |
60 bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(_mm_castps_si128(fV
ec)) & 0xff); } | |
61 | |
62 __m128 fVec; | 96 __m128 fVec; |
63 }; | 97 }; |
64 | 98 |
65 template <> | 99 template <> |
66 class SkNf<2, double> { | 100 class SkNf<2, double> { |
| 101 typedef SkNb<2, 8> Nb; |
67 public: | 102 public: |
68 SkNf(const __m128d& vec) : fVec(vec) {} | 103 SkNf(const __m128d& vec) : fVec(vec) {} |
69 | 104 |
70 SkNf() {} | 105 SkNf() {} |
71 explicit SkNf(double val) : fVec( _mm_set1_pd(val) ) {} | 106 explicit SkNf(double val) : fVec( _mm_set1_pd(val) ) {} |
72 static SkNf Load(const double vals[2]) { return _mm_loadu_pd(vals); } | 107 static SkNf Load(const double vals[2]) { return _mm_loadu_pd(vals); } |
73 SkNf(double a, double b) : fVec(_mm_setr_pd(a,b)) {} | 108 SkNf(double a, double b) : fVec(_mm_setr_pd(a,b)) {} |
74 | 109 |
75 void store(double vals[2]) const { _mm_storeu_pd(vals, fVec); } | 110 void store(double vals[2]) const { _mm_storeu_pd(vals, fVec); } |
76 | 111 |
77 SkNf operator + (const SkNf& o) const { return _mm_add_pd(fVec, o.fVec); } | 112 SkNf operator + (const SkNf& o) const { return _mm_add_pd(fVec, o.fVec); } |
78 SkNf operator - (const SkNf& o) const { return _mm_sub_pd(fVec, o.fVec); } | 113 SkNf operator - (const SkNf& o) const { return _mm_sub_pd(fVec, o.fVec); } |
79 SkNf operator * (const SkNf& o) const { return _mm_mul_pd(fVec, o.fVec); } | 114 SkNf operator * (const SkNf& o) const { return _mm_mul_pd(fVec, o.fVec); } |
80 SkNf operator / (const SkNf& o) const { return _mm_div_pd(fVec, o.fVec); } | 115 SkNf operator / (const SkNf& o) const { return _mm_div_pd(fVec, o.fVec); } |
81 | 116 |
82 SkNf operator == (const SkNf& o) const { return _mm_cmpeq_pd (fVec, o.fVec);
} | 117 Nb operator == (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpeq_pd
(fVec, o.fVec)); } |
83 SkNf operator != (const SkNf& o) const { return _mm_cmpneq_pd(fVec, o.fVec);
} | 118 Nb operator != (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpneq_pd
(fVec, o.fVec)); } |
84 SkNf operator < (const SkNf& o) const { return _mm_cmplt_pd (fVec, o.fVec);
} | 119 Nb operator < (const SkNf& o) const { return _mm_castpd_si128(_mm_cmplt_pd
(fVec, o.fVec)); } |
85 SkNf operator > (const SkNf& o) const { return _mm_cmpgt_pd (fVec, o.fVec);
} | 120 Nb operator > (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpgt_pd
(fVec, o.fVec)); } |
86 SkNf operator <= (const SkNf& o) const { return _mm_cmple_pd (fVec, o.fVec);
} | 121 Nb operator <= (const SkNf& o) const { return _mm_castpd_si128(_mm_cmple_pd
(fVec, o.fVec)); } |
87 SkNf operator >= (const SkNf& o) const { return _mm_cmpge_pd (fVec, o.fVec);
} | 122 Nb operator >= (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpge_pd
(fVec, o.fVec)); } |
88 | 123 |
89 static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_pd(l.fVec, r.
fVec); } | 124 static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_pd(l.fVec, r.
fVec); } |
90 static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_pd(l.fVec, r.
fVec); } | 125 static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_pd(l.fVec, r.
fVec); } |
91 | 126 |
92 SkNf sqrt() const { return _mm_sqrt_pd(fVec); } | 127 SkNf sqrt() const { return _mm_sqrt_pd(fVec); } |
93 SkNf rsqrt0() const { return _mm_cvtps_pd(_mm_rsqrt_ps(_mm_cvtpd_ps(fVec)));
} | 128 SkNf rsqrt0() const { return _mm_cvtps_pd(_mm_rsqrt_ps(_mm_cvtpd_ps(fVec)));
} |
94 SkNf rsqrt1() const { return this->rsqrt0(); } | 129 SkNf rsqrt1() const { return this->rsqrt0(); } |
95 SkNf rsqrt2() const { return this->rsqrt1(); } | 130 SkNf rsqrt2() const { return this->rsqrt1(); } |
96 | 131 |
97 SkNf invert() const { return SkNf(1) / *this; } | 132 SkNf invert() const { return SkNf(1) / *this; } |
98 SkNf approxInvert() const { return _mm_cvtps_pd(_mm_rcp_ps(_mm_cvtpd_ps(fVec
))); } | 133 SkNf approxInvert() const { return _mm_cvtps_pd(_mm_rcp_ps(_mm_cvtpd_ps(fVec
))); } |
99 | 134 |
100 template <int k> double kth() const { | 135 template <int k> double kth() const { |
101 SkASSERT(0 <= k && k < 2); | 136 SkASSERT(0 <= k && k < 2); |
102 union { __m128d v; double ds[2]; } pun = {fVec}; | 137 union { __m128d v; double ds[2]; } pun = {fVec}; |
103 return pun.ds[k&1]; | 138 return pun.ds[k&1]; |
104 } | 139 } |
105 | 140 |
106 bool allTrue() const { return 0xffff == _mm_movemask_epi8(_mm_castpd_si128(f
Vec)); } | |
107 bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(_mm_castpd_si128(f
Vec)); } | |
108 | |
109 __m128d fVec; | 141 __m128d fVec; |
110 }; | 142 }; |
111 | 143 |
112 template <> | 144 template <> |
113 class SkNi<4, int> { | 145 class SkNi<4, int> { |
114 public: | 146 public: |
115 SkNi(const __m128i& vec) : fVec(vec) {} | 147 SkNi(const __m128i& vec) : fVec(vec) {} |
116 | 148 |
117 SkNi() {} | 149 SkNi() {} |
118 explicit SkNi(int val) : fVec(_mm_set1_epi32(val)) {} | 150 explicit SkNi(int val) : fVec(_mm_set1_epi32(val)) {} |
(...skipping 23 matching lines...) Expand all Loading... |
142 case 3: return _mm_cvtsi128_si32(_mm_srli_si128(fVec, 12)); | 174 case 3: return _mm_cvtsi128_si32(_mm_srli_si128(fVec, 12)); |
143 default: SkASSERT(false); return 0; | 175 default: SkASSERT(false); return 0; |
144 } | 176 } |
145 } | 177 } |
146 | 178 |
147 __m128i fVec; | 179 __m128i fVec; |
148 }; | 180 }; |
149 | 181 |
150 template <> | 182 template <> |
151 class SkNf<4, float> { | 183 class SkNf<4, float> { |
| 184 typedef SkNb<4, 4> Nb; |
152 public: | 185 public: |
153 SkNf(const __m128& vec) : fVec(vec) {} | 186 SkNf(const __m128& vec) : fVec(vec) {} |
154 | 187 |
155 SkNf() {} | 188 SkNf() {} |
156 explicit SkNf(float val) : fVec( _mm_set1_ps(val) ) {} | 189 explicit SkNf(float val) : fVec( _mm_set1_ps(val) ) {} |
157 static SkNf Load(const float vals[4]) { return _mm_loadu_ps(vals); } | 190 static SkNf Load(const float vals[4]) { return _mm_loadu_ps(vals); } |
158 SkNf(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {} | 191 SkNf(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {} |
159 | 192 |
160 void store(float vals[4]) const { _mm_storeu_ps(vals, fVec); } | 193 void store(float vals[4]) const { _mm_storeu_ps(vals, fVec); } |
161 | 194 |
162 SkNi<4, int> castTrunc() const { return _mm_cvttps_epi32(fVec); } | 195 SkNi<4, int> castTrunc() const { return _mm_cvttps_epi32(fVec); } |
163 | 196 |
164 SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); } | 197 SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); } |
165 SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); } | 198 SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); } |
166 SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); } | 199 SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); } |
167 SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); } | 200 SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); } |
168 | 201 |
169 SkNf operator == (const SkNf& o) const { return _mm_cmpeq_ps (fVec, o.fVec);
} | 202 Nb operator == (const SkNf& o) const { return _mm_castps_si128(_mm_cmpeq_ps
(fVec, o.fVec)); } |
170 SkNf operator != (const SkNf& o) const { return _mm_cmpneq_ps(fVec, o.fVec);
} | 203 Nb operator != (const SkNf& o) const { return _mm_castps_si128(_mm_cmpneq_ps
(fVec, o.fVec)); } |
171 SkNf operator < (const SkNf& o) const { return _mm_cmplt_ps (fVec, o.fVec);
} | 204 Nb operator < (const SkNf& o) const { return _mm_castps_si128(_mm_cmplt_ps
(fVec, o.fVec)); } |
172 SkNf operator > (const SkNf& o) const { return _mm_cmpgt_ps (fVec, o.fVec);
} | 205 Nb operator > (const SkNf& o) const { return _mm_castps_si128(_mm_cmpgt_ps
(fVec, o.fVec)); } |
173 SkNf operator <= (const SkNf& o) const { return _mm_cmple_ps (fVec, o.fVec);
} | 206 Nb operator <= (const SkNf& o) const { return _mm_castps_si128(_mm_cmple_ps
(fVec, o.fVec)); } |
174 SkNf operator >= (const SkNf& o) const { return _mm_cmpge_ps (fVec, o.fVec);
} | 207 Nb operator >= (const SkNf& o) const { return _mm_castps_si128(_mm_cmpge_ps
(fVec, o.fVec)); } |
175 | 208 |
176 static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_ps(l.fVec, r.
fVec); } | 209 static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_ps(l.fVec, r.
fVec); } |
177 static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_ps(l.fVec, r.
fVec); } | 210 static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_ps(l.fVec, r.
fVec); } |
178 | 211 |
179 SkNf sqrt() const { return _mm_sqrt_ps (fVec); } | 212 SkNf sqrt() const { return _mm_sqrt_ps (fVec); } |
180 SkNf rsqrt0() const { return _mm_rsqrt_ps(fVec); } | 213 SkNf rsqrt0() const { return _mm_rsqrt_ps(fVec); } |
181 SkNf rsqrt1() const { return this->rsqrt0(); } | 214 SkNf rsqrt1() const { return this->rsqrt0(); } |
182 SkNf rsqrt2() const { return this->rsqrt1(); } | 215 SkNf rsqrt2() const { return this->rsqrt1(); } |
183 | 216 |
184 SkNf invert() const { return SkNf(1) / *this; } | 217 SkNf invert() const { return SkNf(1) / *this; } |
185 SkNf approxInvert() const { return _mm_rcp_ps(fVec); } | 218 SkNf approxInvert() const { return _mm_rcp_ps(fVec); } |
186 | 219 |
187 template <int k> float kth() const { | 220 template <int k> float kth() const { |
188 SkASSERT(0 <= k && k < 4); | 221 SkASSERT(0 <= k && k < 4); |
189 union { __m128 v; float fs[4]; } pun = {fVec}; | 222 union { __m128 v; float fs[4]; } pun = {fVec}; |
190 return pun.fs[k&3]; | 223 return pun.fs[k&3]; |
191 } | 224 } |
192 | 225 |
193 bool allTrue() const { return 0xffff == _mm_movemask_epi8(_mm_castps_si128(f
Vec)); } | |
194 bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(_mm_castps_si128(f
Vec)); } | |
195 | |
196 __m128 fVec; | 226 __m128 fVec; |
197 }; | 227 }; |
198 | 228 |
199 template <> | 229 template <> |
200 class SkNi<4, uint16_t> { | 230 class SkNi<4, uint16_t> { |
201 public: | 231 public: |
202 SkNi(const __m128i& vec) : fVec(vec) {} | 232 SkNi(const __m128i& vec) : fVec(vec) {} |
203 | 233 |
204 SkNi() {} | 234 SkNi() {} |
205 explicit SkNi(uint16_t val) : fVec(_mm_set1_epi16(val)) {} | 235 explicit SkNi(uint16_t val) : fVec(_mm_set1_epi16(val)) {} |
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
275 : fVec(_mm_setr_epi8(a,b,c,d, e,f,g,h, i,j,k,l, m,n,o,p)) {} | 305 : fVec(_mm_setr_epi8(a,b,c,d, e,f,g,h, i,j,k,l, m,n,o,p)) {} |
276 | 306 |
277 void store(uint8_t vals[16]) const { _mm_storeu_si128((__m128i*)vals, fVec);
} | 307 void store(uint8_t vals[16]) const { _mm_storeu_si128((__m128i*)vals, fVec);
} |
278 | 308 |
279 SkNi saturatedAdd(const SkNi& o) const { return _mm_adds_epu8(fVec, o.fVec);
} | 309 SkNi saturatedAdd(const SkNi& o) const { return _mm_adds_epu8(fVec, o.fVec);
} |
280 | 310 |
281 SkNi operator + (const SkNi& o) const { return _mm_add_epi8(fVec, o.fVec); } | 311 SkNi operator + (const SkNi& o) const { return _mm_add_epi8(fVec, o.fVec); } |
282 SkNi operator - (const SkNi& o) const { return _mm_sub_epi8(fVec, o.fVec); } | 312 SkNi operator - (const SkNi& o) const { return _mm_sub_epi8(fVec, o.fVec); } |
283 | 313 |
284 static SkNi Min(const SkNi& a, const SkNi& b) { return _mm_min_epu8(a.fVec,
b.fVec); } | 314 static SkNi Min(const SkNi& a, const SkNi& b) { return _mm_min_epu8(a.fVec,
b.fVec); } |
285 SkNi operator < (const SkNi& o) const { | |
286 // There's no unsigned _mm_cmplt_epu8, so we flip the sign bits then use
a signed compare. | |
287 auto flip = _mm_set1_epi8(char(0x80)); | |
288 return _mm_cmplt_epi8(_mm_xor_si128(flip, fVec), _mm_xor_si128(flip, o.f
Vec)); | |
289 } | |
290 | 315 |
291 template <int k> uint8_t kth() const { | 316 template <int k> uint8_t kth() const { |
292 SkASSERT(0 <= k && k < 16); | 317 SkASSERT(0 <= k && k < 16); |
293 // SSE4.1 would just `return _mm_extract_epi8(fVec, k)`. We have to rea
d 16-bits instead. | 318 // SSE4.1 would just `return _mm_extract_epi8(fVec, k)`. We have to rea
d 16-bits instead. |
294 int pair = _mm_extract_epi16(fVec, k/2); | 319 int pair = _mm_extract_epi16(fVec, k/2); |
295 return k % 2 == 0 ? pair : (pair >> 8); | 320 return k % 2 == 0 ? pair : (pair >> 8); |
296 } | 321 } |
297 | 322 |
298 SkNi thenElse(const SkNi& t, const SkNi& e) const { | |
299 return _mm_or_si128(_mm_and_si128 (fVec, t.fVec), | |
300 _mm_andnot_si128(fVec, e.fVec)); | |
301 } | |
302 | |
303 __m128i fVec; | 323 __m128i fVec; |
304 }; | 324 }; |
305 | 325 |
306 } // namespace | 326 } // namespace |
307 | 327 |
308 #endif//SkNx_sse_DEFINED | 328 #endif//SkNx_sse_DEFINED |
OLD | NEW |