OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkNx_neon_DEFINED | 8 #ifndef SkNx_neon_DEFINED |
9 #define SkNx_neon_DEFINED | 9 #define SkNx_neon_DEFINED |
10 | 10 |
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
104 #else | 104 #else |
105 return *this * this->rsqrt2(); | 105 return *this * this->rsqrt2(); |
106 #endif | 106 #endif |
107 } | 107 } |
108 | 108 |
109 float operator[](int k) const { | 109 float operator[](int k) const { |
110 SkASSERT(0 <= k && k < 2); | 110 SkASSERT(0 <= k && k < 2); |
111 union { float32x2_t v; float fs[2]; } pun = {fVec}; | 111 union { float32x2_t v; float fs[2]; } pun = {fVec}; |
112 return pun.fs[k&1]; | 112 return pun.fs[k&1]; |
113 } | 113 } |
114 template <int k> float kth() const { return (*this)[k]; } | |
115 | 114 |
116 bool allTrue() const { | 115 bool allTrue() const { |
117 auto v = vreinterpret_u32_f32(fVec); | 116 auto v = vreinterpret_u32_f32(fVec); |
118 return vget_lane_u32(v,0) && vget_lane_u32(v,1); | 117 return vget_lane_u32(v,0) && vget_lane_u32(v,1); |
119 } | 118 } |
120 bool anyTrue() const { | 119 bool anyTrue() const { |
121 auto v = vreinterpret_u32_f32(fVec); | 120 auto v = vreinterpret_u32_f32(fVec); |
122 return vget_lane_u32(v,0) || vget_lane_u32(v,1); | 121 return vget_lane_u32(v,0) || vget_lane_u32(v,1); |
123 } | 122 } |
124 | 123 |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
196 #else | 195 #else |
197 return *this * this->rsqrt2(); | 196 return *this * this->rsqrt2(); |
198 #endif | 197 #endif |
199 } | 198 } |
200 | 199 |
201 float operator[](int k) const { | 200 float operator[](int k) const { |
202 SkASSERT(0 <= k && k < 4); | 201 SkASSERT(0 <= k && k < 4); |
203 union { float32x4_t v; float fs[4]; } pun = {fVec}; | 202 union { float32x4_t v; float fs[4]; } pun = {fVec}; |
204 return pun.fs[k&3]; | 203 return pun.fs[k&3]; |
205 } | 204 } |
206 template <int k> float kth() const { return (*this)[k]; } | |
207 | 205 |
208 bool allTrue() const { | 206 bool allTrue() const { |
209 auto v = vreinterpretq_u32_f32(fVec); | 207 auto v = vreinterpretq_u32_f32(fVec); |
210 return vgetq_lane_u32(v,0) && vgetq_lane_u32(v,1) | 208 return vgetq_lane_u32(v,0) && vgetq_lane_u32(v,1) |
211 && vgetq_lane_u32(v,2) && vgetq_lane_u32(v,3); | 209 && vgetq_lane_u32(v,2) && vgetq_lane_u32(v,3); |
212 } | 210 } |
213 bool anyTrue() const { | 211 bool anyTrue() const { |
214 auto v = vreinterpretq_u32_f32(fVec); | 212 auto v = vreinterpretq_u32_f32(fVec); |
215 return vgetq_lane_u32(v,0) || vgetq_lane_u32(v,1) | 213 return vgetq_lane_u32(v,0) || vgetq_lane_u32(v,1) |
216 || vgetq_lane_u32(v,2) || vgetq_lane_u32(v,3); | 214 || vgetq_lane_u32(v,2) || vgetq_lane_u32(v,3); |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
248 SkNx operator << (int bits) const { SHIFT16(vshl_n_u16, fVec, bits); } | 246 SkNx operator << (int bits) const { SHIFT16(vshl_n_u16, fVec, bits); } |
249 SkNx operator >> (int bits) const { SHIFT16(vshr_n_u16, fVec, bits); } | 247 SkNx operator >> (int bits) const { SHIFT16(vshr_n_u16, fVec, bits); } |
250 | 248 |
251 static SkNx Min(const SkNx& a, const SkNx& b) { return vmin_u16(a.fVec, b.fV
ec); } | 249 static SkNx Min(const SkNx& a, const SkNx& b) { return vmin_u16(a.fVec, b.fV
ec); } |
252 | 250 |
253 uint16_t operator[](int k) const { | 251 uint16_t operator[](int k) const { |
254 SkASSERT(0 <= k && k < 4); | 252 SkASSERT(0 <= k && k < 4); |
255 union { uint16x4_t v; uint16_t us[4]; } pun = {fVec}; | 253 union { uint16x4_t v; uint16_t us[4]; } pun = {fVec}; |
256 return pun.us[k&3]; | 254 return pun.us[k&3]; |
257 } | 255 } |
258 template <int k> uint16_t kth() const { return (*this)[k]; } | |
259 | 256 |
260 SkNx thenElse(const SkNx& t, const SkNx& e) const { | 257 SkNx thenElse(const SkNx& t, const SkNx& e) const { |
261 return vbsl_u16(fVec, t.fVec, e.fVec); | 258 return vbsl_u16(fVec, t.fVec, e.fVec); |
262 } | 259 } |
263 | 260 |
264 uint16x4_t fVec; | 261 uint16x4_t fVec; |
265 }; | 262 }; |
266 | 263 |
267 template <> | 264 template <> |
268 class SkNx<8, uint16_t> { | 265 class SkNx<8, uint16_t> { |
(...skipping 18 matching lines...) Expand all Loading... |
287 SkNx operator << (int bits) const { SHIFT16(vshlq_n_u16, fVec, bits); } | 284 SkNx operator << (int bits) const { SHIFT16(vshlq_n_u16, fVec, bits); } |
288 SkNx operator >> (int bits) const { SHIFT16(vshrq_n_u16, fVec, bits); } | 285 SkNx operator >> (int bits) const { SHIFT16(vshrq_n_u16, fVec, bits); } |
289 | 286 |
290 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_u16(a.fVec, b.f
Vec); } | 287 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_u16(a.fVec, b.f
Vec); } |
291 | 288 |
292 uint16_t operator[](int k) const { | 289 uint16_t operator[](int k) const { |
293 SkASSERT(0 <= k && k < 8); | 290 SkASSERT(0 <= k && k < 8); |
294 union { uint16x8_t v; uint16_t us[8]; } pun = {fVec}; | 291 union { uint16x8_t v; uint16_t us[8]; } pun = {fVec}; |
295 return pun.us[k&7]; | 292 return pun.us[k&7]; |
296 } | 293 } |
297 template <int k> uint16_t kth() const { return (*this)[k]; } | |
298 | 294 |
299 SkNx thenElse(const SkNx& t, const SkNx& e) const { | 295 SkNx thenElse(const SkNx& t, const SkNx& e) const { |
300 return vbslq_u16(fVec, t.fVec, e.fVec); | 296 return vbslq_u16(fVec, t.fVec, e.fVec); |
301 } | 297 } |
302 | 298 |
303 uint16x8_t fVec; | 299 uint16x8_t fVec; |
304 }; | 300 }; |
305 | 301 |
306 template <> | 302 template <> |
307 class SkNx<4, uint8_t> { | 303 class SkNx<4, uint8_t> { |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
345 SkNx operator - (const SkNx& o) const { return vsubq_u8(fVec, o.fVec); } | 341 SkNx operator - (const SkNx& o) const { return vsubq_u8(fVec, o.fVec); } |
346 | 342 |
347 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_u8(a.fVec, b.fV
ec); } | 343 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_u8(a.fVec, b.fV
ec); } |
348 SkNx operator < (const SkNx& o) const { return vcltq_u8(fVec, o.fVec); } | 344 SkNx operator < (const SkNx& o) const { return vcltq_u8(fVec, o.fVec); } |
349 | 345 |
350 uint8_t operator[](int k) const { | 346 uint8_t operator[](int k) const { |
351 SkASSERT(0 <= k && k < 16); | 347 SkASSERT(0 <= k && k < 16); |
352 union { uint8x16_t v; uint8_t us[16]; } pun = {fVec}; | 348 union { uint8x16_t v; uint8_t us[16]; } pun = {fVec}; |
353 return pun.us[k&15]; | 349 return pun.us[k&15]; |
354 } | 350 } |
355 template <int k> uint8_t kth() const { return (*this)[k]; } | |
356 | 351 |
357 SkNx thenElse(const SkNx& t, const SkNx& e) const { | 352 SkNx thenElse(const SkNx& t, const SkNx& e) const { |
358 return vbslq_u8(fVec, t.fVec, e.fVec); | 353 return vbslq_u8(fVec, t.fVec, e.fVec); |
359 } | 354 } |
360 | 355 |
361 uint8x16_t fVec; | 356 uint8x16_t fVec; |
362 }; | 357 }; |
363 | 358 |
364 #undef SHIFT32 | 359 #undef SHIFT32 |
365 #undef SHIFT16 | 360 #undef SHIFT16 |
(...skipping 29 matching lines...) Expand all Loading... |
395 | 390 |
396 template<> inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src) { | 391 template<> inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src) { |
397 return vget_low_u16(vmovl_u8(src.fVec)); | 392 return vget_low_u16(vmovl_u8(src.fVec)); |
398 } | 393 } |
399 | 394 |
400 template<> inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src) { | 395 template<> inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src) { |
401 return vmovn_u16(vcombine_u16(src.fVec, src.fVec)); | 396 return vmovn_u16(vcombine_u16(src.fVec, src.fVec)); |
402 } | 397 } |
403 | 398 |
404 #endif//SkNx_neon_DEFINED | 399 #endif//SkNx_neon_DEFINED |
OLD | NEW |