OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkNx_sse_DEFINED | 8 #ifndef SkNx_sse_DEFINED |
9 #define SkNx_sse_DEFINED | 9 #define SkNx_sse_DEFINED |
10 | 10 |
(...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
135 SkASSERT(0 <= k && k < 2); | 135 SkASSERT(0 <= k && k < 2); |
136 union { __m128d v; double ds[2]; } pun = {fVec}; | 136 union { __m128d v; double ds[2]; } pun = {fVec}; |
137 return pun.ds[k&1]; | 137 return pun.ds[k&1]; |
138 } | 138 } |
139 | 139 |
140 private: | 140 private: |
141 __m128d fVec; | 141 __m128d fVec; |
142 }; | 142 }; |
143 | 143 |
144 template <> | 144 template <> |
145 class SkNi<4, int> { | |
146 public: | |
147 SkNi(const __m128i& vec) : fVec(vec) {} | |
148 | |
149 SkNi() {} | |
150 explicit SkNi(int val) : fVec(_mm_set1_epi32(val)) {} | |
151 static SkNi Load(const int vals[4]) { return _mm_loadu_si128((const __m128i*
)vals); } | |
152 SkNi(int a, int b, int c, int d) : fVec(_mm_setr_epi32(a,b,c,d)) {} | |
153 | |
154 void store(int vals[4]) const { _mm_storeu_si128((__m128i*)vals, fVec); } | |
155 | |
156 SkNi operator + (const SkNi& o) const { return _mm_add_epi32(fVec, o.fVec);
} | |
157 SkNi operator - (const SkNi& o) const { return _mm_sub_epi32(fVec, o.fVec);
} | |
158 SkNi operator * (const SkNi& o) const { | |
159 __m128i mul20 = _mm_mul_epu32(fVec, o.fVec), | |
160 mul31 = _mm_mul_epu32(_mm_srli_si128(fVec, 4), _mm_srli_si128(o.
fVec, 4)); | |
161 return _mm_unpacklo_epi32(_mm_shuffle_epi32(mul20, _MM_SHUFFLE(0,0,2,0))
, | |
162 _mm_shuffle_epi32(mul31, _MM_SHUFFLE(0,0,2,0))
); | |
163 } | |
164 | |
165 SkNi operator << (int bits) const { return _mm_slli_epi32(fVec, bits); } | |
166 SkNi operator >> (int bits) const { return _mm_srai_epi32(fVec, bits); } | |
167 | |
168 template <int k> int kth() const { | |
169 SkASSERT(0 <= k && k < 4); | |
170 switch (k) { | |
171 case 0: return _mm_cvtsi128_si32(fVec); | |
172 case 1: return _mm_cvtsi128_si32(_mm_srli_si128(fVec, 4)); | |
173 case 2: return _mm_cvtsi128_si32(_mm_srli_si128(fVec, 8)); | |
174 case 3: return _mm_cvtsi128_si32(_mm_srli_si128(fVec, 12)); | |
175 default: SkASSERT(false); return 0; | |
176 } | |
177 } | |
178 protected: | |
179 __m128i fVec; | |
180 }; | |
181 | |
182 template <> | |
183 class SkNf<4, float> { | 145 class SkNf<4, float> { |
184 typedef SkNb<4, 4> Nb; | 146 typedef SkNb<4, 4> Nb; |
185 public: | 147 public: |
186 SkNf(const __m128& vec) : fVec(vec) {} | 148 SkNf(const __m128& vec) : fVec(vec) {} |
187 | 149 |
188 SkNf() {} | 150 SkNf() {} |
189 explicit SkNf(float val) : fVec( _mm_set1_ps(val) ) {} | 151 explicit SkNf(float val) : fVec( _mm_set1_ps(val) ) {} |
190 static SkNf Load(const float vals[4]) { return _mm_loadu_ps(vals); } | 152 static SkNf Load(const float vals[4]) { return _mm_loadu_ps(vals); } |
191 SkNf(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {} | 153 SkNf(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {} |
192 | 154 |
193 void store(float vals[4]) const { _mm_storeu_ps(vals, fVec); } | 155 void store(float vals[4]) const { _mm_storeu_ps(vals, fVec); } |
194 | 156 |
195 SkNi<4, int> castTrunc() const { return _mm_cvttps_epi32(fVec); } | |
196 | |
197 SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); } | 157 SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); } |
198 SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); } | 158 SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); } |
199 SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); } | 159 SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); } |
200 SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); } | 160 SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); } |
201 | 161 |
202 Nb operator == (const SkNf& o) const { return _mm_castps_si128(_mm_cmpeq_ps
(fVec, o.fVec)); } | 162 Nb operator == (const SkNf& o) const { return _mm_castps_si128(_mm_cmpeq_ps
(fVec, o.fVec)); } |
203 Nb operator != (const SkNf& o) const { return _mm_castps_si128(_mm_cmpneq_ps
(fVec, o.fVec)); } | 163 Nb operator != (const SkNf& o) const { return _mm_castps_si128(_mm_cmpneq_ps
(fVec, o.fVec)); } |
204 Nb operator < (const SkNf& o) const { return _mm_castps_si128(_mm_cmplt_ps
(fVec, o.fVec)); } | 164 Nb operator < (const SkNf& o) const { return _mm_castps_si128(_mm_cmplt_ps
(fVec, o.fVec)); } |
205 Nb operator > (const SkNf& o) const { return _mm_castps_si128(_mm_cmpgt_ps
(fVec, o.fVec)); } | 165 Nb operator > (const SkNf& o) const { return _mm_castps_si128(_mm_cmpgt_ps
(fVec, o.fVec)); } |
206 Nb operator <= (const SkNf& o) const { return _mm_castps_si128(_mm_cmple_ps
(fVec, o.fVec)); } | 166 Nb operator <= (const SkNf& o) const { return _mm_castps_si128(_mm_cmple_ps
(fVec, o.fVec)); } |
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
274 | 234 |
275 template <int k> uint16_t kth() const { | 235 template <int k> uint16_t kth() const { |
276 SkASSERT(0 <= k && k < 8); | 236 SkASSERT(0 <= k && k < 8); |
277 return _mm_extract_epi16(fVec, k); | 237 return _mm_extract_epi16(fVec, k); |
278 } | 238 } |
279 protected: | 239 protected: |
280 __m128i fVec; | 240 __m128i fVec; |
281 }; | 241 }; |
282 | 242 |
283 #endif//SkNx_sse_DEFINED | 243 #endif//SkNx_sse_DEFINED |
OLD | NEW |